listpage-next-ai 0.0.273 → 0.0.275
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.d.ts +14 -0
- package/dist/cjs/index.js +482 -7
- package/package.json +1 -1
package/dist/cjs/index.d.ts
CHANGED
|
@@ -11,6 +11,7 @@ import { OpenAIClient } from '@langchain/openai';
|
|
|
11
11
|
import { ReactAgent } from 'langchain';
|
|
12
12
|
import { ReplaySubject } from 'rxjs';
|
|
13
13
|
import { ResponseFormatUndefined } from 'langchain';
|
|
14
|
+
import type { Runnable } from '@langchain/core/runnables';
|
|
14
15
|
import { StreamEvent } from '@langchain/core/tracers/log_stream';
|
|
15
16
|
|
|
16
17
|
export declare interface AgentOptions {
|
|
@@ -18,17 +19,20 @@ export declare interface AgentOptions {
|
|
|
18
19
|
database?: boolean;
|
|
19
20
|
knowledge?: boolean;
|
|
20
21
|
word?: boolean;
|
|
22
|
+
documentSearch?: boolean;
|
|
21
23
|
websearch?: boolean;
|
|
22
24
|
};
|
|
23
25
|
databaseOptions?: DatabaseAgentOptions;
|
|
24
26
|
knowledgeOptions?: KnowledgeAgentOptions;
|
|
25
27
|
wordOptions?: WordToolOptions;
|
|
28
|
+
documentSearchOptions?: DocumentSearchToolOptions;
|
|
26
29
|
websearchOptions?: {};
|
|
27
30
|
model: LanguageModelLike;
|
|
28
31
|
system_prompt: string;
|
|
29
32
|
tools?: any[];
|
|
30
33
|
middleware?: any[];
|
|
31
34
|
name?: string;
|
|
35
|
+
subagents?: CompiledSubAgent[];
|
|
32
36
|
inject_current_time?: boolean;
|
|
33
37
|
max_iterations?: number;
|
|
34
38
|
simple?: boolean;
|
|
@@ -61,6 +65,12 @@ declare interface Column {
|
|
|
61
65
|
}[] | null;
|
|
62
66
|
}
|
|
63
67
|
|
|
68
|
+
declare interface CompiledSubAgent {
|
|
69
|
+
name: string;
|
|
70
|
+
description: string;
|
|
71
|
+
runnable: ReactAgent<any, any, any, any> | Runnable;
|
|
72
|
+
}
|
|
73
|
+
|
|
64
74
|
export declare function createReactAgent(options: AgentOptions): ReactAgent<ResponseFormatUndefined, undefined, AnyAnnotationRoot, any[]>;
|
|
65
75
|
|
|
66
76
|
declare interface DatabaseAgentOptions extends DatabaseToolOptions {
|
|
@@ -94,6 +104,10 @@ declare interface DatabaseToolOptions {
|
|
|
94
104
|
}) => Promise<string>;
|
|
95
105
|
}
|
|
96
106
|
|
|
107
|
+
declare interface DocumentSearchToolOptions {
|
|
108
|
+
markdown: string;
|
|
109
|
+
}
|
|
110
|
+
|
|
97
111
|
declare interface KnowledgeAgentOptions extends KnowledgeToolOptions {
|
|
98
112
|
}
|
|
99
113
|
|
package/dist/cjs/index.js
CHANGED
|
@@ -351,6 +351,291 @@ const TodoSchema = v3_namespaceObject.z.object({
|
|
|
351
351
|
v3_namespaceObject.z.object({
|
|
352
352
|
todos: v3_namespaceObject.z.array(TodoSchema).default([])
|
|
353
353
|
});
|
|
354
|
+
const DEFAULT_SUBAGENT_PROMPT = 'In order to complete the objective that the user asks of you, you have access to a number of standard tools.';
|
|
355
|
+
const EXCLUDED_STATE_KEYS = [
|
|
356
|
+
'messages',
|
|
357
|
+
'todos',
|
|
358
|
+
'jumpTo'
|
|
359
|
+
];
|
|
360
|
+
const DEFAULT_GENERAL_PURPOSE_DESCRIPTION = 'General-purpose agent for researching complex questions, searching for files and content, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you. This agent has access to all tools as the main agent.';
|
|
361
|
+
function getTaskToolDescription(subagentDescriptions) {
|
|
362
|
+
return `
|
|
363
|
+
Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context windows.
|
|
364
|
+
|
|
365
|
+
Available agent types and the tools they have access to:
|
|
366
|
+
${subagentDescriptions.join('\n')}
|
|
367
|
+
|
|
368
|
+
When using the Task tool, you must specify a subagent_type parameter to select which agent type to use.
|
|
369
|
+
|
|
370
|
+
## Usage notes:
|
|
371
|
+
1. Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses
|
|
372
|
+
2. When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.
|
|
373
|
+
3. Each agent invocation is stateless. You will not be able to send additional messages to the agent, nor will the agent be able to communicate with you outside of its final report. Therefore, your prompt should contain a highly detailed task description for the agent to perform autonomously and you should specify exactly what information the agent should return back to you in its final and only message to you.
|
|
374
|
+
4. The agent's outputs should generally be trusted
|
|
375
|
+
5. Clearly tell the agent whether you expect it to create content, perform analysis, or just do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent
|
|
376
|
+
6. If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.
|
|
377
|
+
7. When only the general-purpose agent is provided, you should use it for all tasks. It is great for isolating context and token usage, and completing specific, complex tasks, as it has all the same capabilities as the main agent.
|
|
378
|
+
|
|
379
|
+
### Example usage of the general-purpose agent:
|
|
380
|
+
|
|
381
|
+
<example_agent_descriptions>
|
|
382
|
+
"general-purpose": use this agent for general purpose tasks, it has access to all tools as the main agent.
|
|
383
|
+
</example_agent_descriptions>
|
|
384
|
+
|
|
385
|
+
<example>
|
|
386
|
+
User: "I want to conduct research on the accomplishments of Lebron James, Michael Jordan, and Kobe Bryant, and then compare them."
|
|
387
|
+
Assistant: *Uses the task tool in parallel to conduct isolated research on each of the three players*
|
|
388
|
+
Assistant: *Synthesizes the results of the three isolated research tasks and responds to the User*
|
|
389
|
+
<commentary>
|
|
390
|
+
Research is a complex, multi-step task in it of itself.
|
|
391
|
+
The research of each individual player is not dependent on the research of the other players.
|
|
392
|
+
The assistant uses the task tool to break down the complex objective into three isolated tasks.
|
|
393
|
+
Each research task only needs to worry about context and tokens about one player, then returns synthesized information about each player as the Tool Result.
|
|
394
|
+
This means each research task can dive deep and spend tokens and context deeply researching each player, but the final result is synthesized information, and saves us tokens in the long run when comparing the players to each other.
|
|
395
|
+
</commentary>
|
|
396
|
+
</example>
|
|
397
|
+
|
|
398
|
+
<example>
|
|
399
|
+
User: "Analyze a single large code repository for security vulnerabilities and generate a report."
|
|
400
|
+
Assistant: *Launches a single \`task\` subagent for the repository analysis*
|
|
401
|
+
Assistant: *Receives report and integrates results into final summary*
|
|
402
|
+
<commentary>
|
|
403
|
+
Subagent is used to isolate a large, context-heavy task, even though there is only one. This prevents the main thread from being overloaded with details.
|
|
404
|
+
If the user then asks followup questions, we have a concise report to reference instead of the entire history of analysis and tool calls, which is good and saves us time and money.
|
|
405
|
+
</commentary>
|
|
406
|
+
</example>
|
|
407
|
+
|
|
408
|
+
<example>
|
|
409
|
+
User: "Schedule two meetings for me and prepare agendas for each."
|
|
410
|
+
Assistant: *Calls the task tool in parallel to launch two \`task\` subagents (one per meeting) to prepare agendas*
|
|
411
|
+
Assistant: *Returns final schedules and agendas*
|
|
412
|
+
<commentary>
|
|
413
|
+
Tasks are simple individually, but subagents help silo agenda preparation.
|
|
414
|
+
Each subagent only needs to worry about the agenda for one meeting.
|
|
415
|
+
</commentary>
|
|
416
|
+
</example>
|
|
417
|
+
|
|
418
|
+
<example>
|
|
419
|
+
User: "I want to order a pizza from Dominos, order a burger from McDonald's, and order a salad from Subway."
|
|
420
|
+
Assistant: *Calls tools directly in parallel to order a pizza from Dominos, a burger from McDonald's, and a salad from Subway*
|
|
421
|
+
<commentary>
|
|
422
|
+
The assistant did not use the task tool because the objective is super simple and clear and only requires a few trivial tool calls.
|
|
423
|
+
It is better to just complete the task directly and NOT use the \`task\`tool.
|
|
424
|
+
</commentary>
|
|
425
|
+
</example>
|
|
426
|
+
|
|
427
|
+
### Example usage with custom agents:
|
|
428
|
+
|
|
429
|
+
<example_agent_descriptions>
|
|
430
|
+
"content-reviewer": use this agent after you are done creating significant content or documents
|
|
431
|
+
"greeting-responder": use this agent when to respond to user greetings with a friendly joke
|
|
432
|
+
"research-analyst": use this agent to conduct thorough research on complex topics
|
|
433
|
+
</example_agent_description>
|
|
434
|
+
|
|
435
|
+
<example>
|
|
436
|
+
user: "Please write a function that checks if a number is prime"
|
|
437
|
+
assistant: Sure let me write a function that checks if a number is prime
|
|
438
|
+
assistant: First let me use the Write tool to write a function that checks if a number is prime
|
|
439
|
+
assistant: I'm going to use the Write tool to write the following code:
|
|
440
|
+
<code>
|
|
441
|
+
function isPrime(n) {
|
|
442
|
+
if (n <= 1) return false
|
|
443
|
+
for (let i = 2; i * i <= n; i++) {
|
|
444
|
+
if (n % i === 0) return false
|
|
445
|
+
}
|
|
446
|
+
return true
|
|
447
|
+
}
|
|
448
|
+
</code>
|
|
449
|
+
<commentary>
|
|
450
|
+
Since significant content was created and the task was completed, now use the content-reviewer agent to review the work
|
|
451
|
+
</commentary>
|
|
452
|
+
assistant: Now let me use the content-reviewer agent to review the code
|
|
453
|
+
assistant: Uses the Task tool to launch with the content-reviewer agent
|
|
454
|
+
</example>
|
|
455
|
+
|
|
456
|
+
<example>
|
|
457
|
+
user: "Can you help me research the environmental impact of different renewable energy sources and create a comprehensive report?"
|
|
458
|
+
<commentary>
|
|
459
|
+
This is a complex research task that would benefit from using the research-analyst agent to conduct thorough analysis
|
|
460
|
+
</commentary>
|
|
461
|
+
assistant: I'll help you research the environmental impact of renewable energy sources. Let me use the research-analyst agent to conduct comprehensive research on this topic.
|
|
462
|
+
assistant: Uses the Task tool to launch with the research-analyst agent, providing detailed instructions about what research to conduct and what format the report should take
|
|
463
|
+
</example>
|
|
464
|
+
|
|
465
|
+
<example>
|
|
466
|
+
user: "Hello"
|
|
467
|
+
<commentary>
|
|
468
|
+
Since the user is greeting, use the greeting-responder agent to respond with a friendly joke
|
|
469
|
+
</commentary>
|
|
470
|
+
assistant: "I'm going to use the Task tool to launch with the greeting-responder agent"
|
|
471
|
+
</example>
|
|
472
|
+
`.trim();
|
|
473
|
+
}
|
|
474
|
+
const TASK_SYSTEM_PROMPT = `## \`task\` (subagent spawner)
|
|
475
|
+
|
|
476
|
+
You have access to a \`task\` tool to launch short-lived subagents that handle isolated tasks. These agents are ephemeral — they live only for the duration of the task and return a single result.
|
|
477
|
+
|
|
478
|
+
When to use the task tool:
|
|
479
|
+
- When a task is complex and multi-step, and can be fully delegated in isolation
|
|
480
|
+
- When a task is independent of other tasks and can run in parallel
|
|
481
|
+
- When a task requires focused reasoning or heavy token/context usage that would bloat the orchestrator thread
|
|
482
|
+
- When sandboxing improves reliability (e.g. code execution, structured searches, data formatting)
|
|
483
|
+
- When you only care about the output of the subagent, and not the intermediate steps (ex. performing a lot of research and then returned a synthesized report, performing a series of computations or lookups to achieve a concise, relevant answer.)
|
|
484
|
+
|
|
485
|
+
Subagent lifecycle:
|
|
486
|
+
1. **Spawn** → Provide clear role, instructions, and expected output
|
|
487
|
+
2. **Run** → The subagent completes the task autonomously
|
|
488
|
+
3. **Return** → The subagent provides a single structured result
|
|
489
|
+
4. **Reconcile** → Incorporate or synthesize the result into the main thread
|
|
490
|
+
|
|
491
|
+
When NOT to use the task tool:
|
|
492
|
+
- If you need to see the intermediate reasoning or steps after the subagent has completed (the task tool hides them)
|
|
493
|
+
- If the task is trivial (a few tool calls or simple lookup)
|
|
494
|
+
- If delegating does not reduce token usage, complexity, or context switching
|
|
495
|
+
- If splitting would add latency without benefit
|
|
496
|
+
|
|
497
|
+
## Important Task Tool Usage Notes to Remember
|
|
498
|
+
- Whenever possible, parallelize the work that you do. This is true for both tool_calls, and for tasks. Whenever you have independent steps to complete - make tool_calls, or kick off tasks (subagents) in parallel to accomplish them faster. This saves time for the user, which is incredibly important.
|
|
499
|
+
- Remember to use the \`task\` tool to silo independent tasks within a multi-part objective.
|
|
500
|
+
- You should use the \`task\` tool whenever you have a complex task that will take multiple steps, and is independent from other tasks that the agent needs to complete. These agents are highly competent and efficient.
|
|
501
|
+
|
|
502
|
+
IMPORTANT: When you decide to use this tool, you MUST wrap your thought process (the explanation of why you are creating the task) in <think> tags.`;
|
|
503
|
+
function filterStateForSubagent(state) {
|
|
504
|
+
const filtered = {};
|
|
505
|
+
for (const [key, value] of Object.entries(state))if (!EXCLUDED_STATE_KEYS.includes(key)) filtered[key] = value;
|
|
506
|
+
return filtered;
|
|
507
|
+
}
|
|
508
|
+
function returnCommandWithStateUpdate(result, toolCallId) {
|
|
509
|
+
const stateUpdate = filterStateForSubagent(result);
|
|
510
|
+
const messages = result.messages;
|
|
511
|
+
const lastMessage = messages?.[messages.length - 1];
|
|
512
|
+
return new langgraph_namespaceObject.Command({
|
|
513
|
+
update: {
|
|
514
|
+
...stateUpdate,
|
|
515
|
+
messages: [
|
|
516
|
+
new external_langchain_namespaceObject.ToolMessage({
|
|
517
|
+
content: lastMessage?.content || 'Task completed',
|
|
518
|
+
tool_call_id: toolCallId,
|
|
519
|
+
name: 'task'
|
|
520
|
+
})
|
|
521
|
+
]
|
|
522
|
+
}
|
|
523
|
+
});
|
|
524
|
+
}
|
|
525
|
+
function getSubagents(options) {
|
|
526
|
+
const { defaultModel, defaultTools, defaultMiddleware, defaultInterruptOn, subagents, generalPurposeAgent } = options;
|
|
527
|
+
const defaultSubagentMiddleware = defaultMiddleware || [];
|
|
528
|
+
const agents = {};
|
|
529
|
+
const subagentDescriptions = [];
|
|
530
|
+
if (generalPurposeAgent) {
|
|
531
|
+
const generalPurposeMiddleware = [
|
|
532
|
+
...defaultSubagentMiddleware
|
|
533
|
+
];
|
|
534
|
+
if (defaultInterruptOn) generalPurposeMiddleware.push((0, external_langchain_namespaceObject.humanInTheLoopMiddleware)({
|
|
535
|
+
interruptOn: defaultInterruptOn
|
|
536
|
+
}));
|
|
537
|
+
const generalPurposeSubagent = (0, external_langchain_namespaceObject.createAgent)({
|
|
538
|
+
model: defaultModel,
|
|
539
|
+
systemPrompt: DEFAULT_SUBAGENT_PROMPT,
|
|
540
|
+
tools: defaultTools,
|
|
541
|
+
middleware: generalPurposeMiddleware
|
|
542
|
+
});
|
|
543
|
+
agents['general-purpose'] = generalPurposeSubagent;
|
|
544
|
+
subagentDescriptions.push(`- general-purpose: ${DEFAULT_GENERAL_PURPOSE_DESCRIPTION}`);
|
|
545
|
+
}
|
|
546
|
+
for (const agentParams of subagents){
|
|
547
|
+
subagentDescriptions.push(`- ${agentParams.name}: ${agentParams.description}`);
|
|
548
|
+
if ('runnable' in agentParams) agents[agentParams.name] = agentParams.runnable;
|
|
549
|
+
else {
|
|
550
|
+
const middleware = agentParams.middleware ? [
|
|
551
|
+
...defaultSubagentMiddleware,
|
|
552
|
+
...agentParams.middleware
|
|
553
|
+
] : [
|
|
554
|
+
...defaultSubagentMiddleware
|
|
555
|
+
];
|
|
556
|
+
const interruptOn = agentParams.interruptOn || defaultInterruptOn;
|
|
557
|
+
if (interruptOn) middleware.push((0, external_langchain_namespaceObject.humanInTheLoopMiddleware)({
|
|
558
|
+
interruptOn
|
|
559
|
+
}));
|
|
560
|
+
agents[agentParams.name] = (0, external_langchain_namespaceObject.createAgent)({
|
|
561
|
+
model: agentParams.model ?? defaultModel,
|
|
562
|
+
systemPrompt: agentParams.systemPrompt,
|
|
563
|
+
tools: agentParams.tools ?? defaultTools,
|
|
564
|
+
middleware
|
|
565
|
+
});
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
return {
|
|
569
|
+
agents,
|
|
570
|
+
descriptions: subagentDescriptions
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
function createTaskTool(options) {
|
|
574
|
+
const { defaultModel, defaultTools, defaultMiddleware, defaultInterruptOn, subagents, generalPurposeAgent, taskDescription } = options;
|
|
575
|
+
const { agents: subagentGraphs, descriptions: subagentDescriptions } = getSubagents({
|
|
576
|
+
defaultModel,
|
|
577
|
+
defaultTools,
|
|
578
|
+
defaultMiddleware,
|
|
579
|
+
defaultInterruptOn,
|
|
580
|
+
subagents,
|
|
581
|
+
generalPurposeAgent
|
|
582
|
+
});
|
|
583
|
+
const finalTaskDescription = taskDescription ? taskDescription : getTaskToolDescription(subagentDescriptions);
|
|
584
|
+
return (0, external_langchain_namespaceObject.tool)(async (input, config)=>{
|
|
585
|
+
const { description, subagent_type } = input;
|
|
586
|
+
if (!(subagent_type in subagentGraphs)) {
|
|
587
|
+
const allowedTypes = Object.keys(subagentGraphs).map((k)=>`\`${k}\``).join(', ');
|
|
588
|
+
throw new Error(`Error: invoked agent of type ${subagent_type}, the only allowed types are ${allowedTypes}`);
|
|
589
|
+
}
|
|
590
|
+
const subagent = subagentGraphs[subagent_type];
|
|
591
|
+
const currentState = (0, langgraph_namespaceObject.getCurrentTaskInput)();
|
|
592
|
+
const subagentState = filterStateForSubagent(currentState);
|
|
593
|
+
subagentState.messages = [
|
|
594
|
+
new messages_namespaceObject.HumanMessage({
|
|
595
|
+
content: description
|
|
596
|
+
})
|
|
597
|
+
];
|
|
598
|
+
const result = await subagent.invoke(subagentState, config);
|
|
599
|
+
if (!config.toolCall?.id) throw new Error('Tool call ID is required for subagent invocation');
|
|
600
|
+
return returnCommandWithStateUpdate(result, config.toolCall.id);
|
|
601
|
+
}, {
|
|
602
|
+
name: 'task',
|
|
603
|
+
description: finalTaskDescription,
|
|
604
|
+
schema: v3_namespaceObject.z.object({
|
|
605
|
+
description: v3_namespaceObject.z.string().describe('The task to execute with the selected agent'),
|
|
606
|
+
subagent_type: v3_namespaceObject.z.string().describe(`Name of the agent to use. Available: ${Object.keys(subagentGraphs).join(', ')}`)
|
|
607
|
+
})
|
|
608
|
+
});
|
|
609
|
+
}
|
|
610
|
+
function createSubAgentMiddleware(options) {
|
|
611
|
+
const { defaultModel, defaultTools = [], defaultMiddleware = null, defaultInterruptOn = null, subagents = [], systemPrompt = TASK_SYSTEM_PROMPT, generalPurposeAgent = true, taskDescription = null } = options;
|
|
612
|
+
const taskTool = createTaskTool({
|
|
613
|
+
defaultModel,
|
|
614
|
+
defaultTools,
|
|
615
|
+
defaultMiddleware,
|
|
616
|
+
defaultInterruptOn,
|
|
617
|
+
subagents,
|
|
618
|
+
generalPurposeAgent,
|
|
619
|
+
taskDescription
|
|
620
|
+
});
|
|
621
|
+
return (0, external_langchain_namespaceObject.createMiddleware)({
|
|
622
|
+
name: 'subAgentMiddleware',
|
|
623
|
+
tools: [
|
|
624
|
+
taskTool
|
|
625
|
+
],
|
|
626
|
+
wrapModelCall: async (request, handler)=>{
|
|
627
|
+
if (null !== systemPrompt) {
|
|
628
|
+
const currentPrompt = request.systemPrompt || '';
|
|
629
|
+
const newPrompt = currentPrompt ? `${currentPrompt}\n\n${systemPrompt}` : systemPrompt;
|
|
630
|
+
return handler({
|
|
631
|
+
...request,
|
|
632
|
+
systemPrompt: newPrompt
|
|
633
|
+
});
|
|
634
|
+
}
|
|
635
|
+
return handler(request);
|
|
636
|
+
}
|
|
637
|
+
});
|
|
638
|
+
}
|
|
354
639
|
const jieba_namespaceObject = require("@node-rs/jieba");
|
|
355
640
|
const dict_namespaceObject = require("@node-rs/jieba/dict");
|
|
356
641
|
class BM25 {
|
|
@@ -437,18 +722,201 @@ function createWordTools(options) {
|
|
|
437
722
|
})
|
|
438
723
|
];
|
|
439
724
|
}
|
|
725
|
+
class SimpleMarkdownHeaderTextSplitter {
|
|
726
|
+
constructor(options){
|
|
727
|
+
this.headersToSplitOn = options.headersToSplitOn;
|
|
728
|
+
}
|
|
729
|
+
splitText(text) {
|
|
730
|
+
const lines = text.split('\n');
|
|
731
|
+
const docs = [];
|
|
732
|
+
let currentContent = [];
|
|
733
|
+
let currentMetadata = {};
|
|
734
|
+
const headerMap = new Map(this.headersToSplitOn);
|
|
735
|
+
for (const line of lines){
|
|
736
|
+
const match = line.match(/^(#{1,6}) (.*)/);
|
|
737
|
+
if (match) {
|
|
738
|
+
const hashes = match[1];
|
|
739
|
+
const title = match[2];
|
|
740
|
+
if (headerMap.has(hashes)) {
|
|
741
|
+
if (currentContent.length > 0 && currentContent.some((l)=>l.trim().length > 0)) {
|
|
742
|
+
const contextHeader = this.getHeaderContext(currentMetadata);
|
|
743
|
+
docs.push({
|
|
744
|
+
pageContent: (contextHeader + currentContent.join('\n')).trim(),
|
|
745
|
+
metadata: {
|
|
746
|
+
...currentMetadata
|
|
747
|
+
}
|
|
748
|
+
});
|
|
749
|
+
}
|
|
750
|
+
const currentLevel = hashes.length;
|
|
751
|
+
const headerName = headerMap.get(hashes);
|
|
752
|
+
const newMetadata = {};
|
|
753
|
+
for (const [h, name] of this.headersToSplitOn)if (h.length < currentLevel && currentMetadata[name]) newMetadata[name] = currentMetadata[name];
|
|
754
|
+
newMetadata[headerName] = title;
|
|
755
|
+
currentMetadata = newMetadata;
|
|
756
|
+
currentContent = [];
|
|
757
|
+
continue;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
currentContent.push(line);
|
|
761
|
+
}
|
|
762
|
+
if (currentContent.length > 0 && currentContent.some((l)=>l.trim().length > 0)) {
|
|
763
|
+
const contextHeader = this.getHeaderContext(currentMetadata);
|
|
764
|
+
docs.push({
|
|
765
|
+
pageContent: (contextHeader + currentContent.join('\n')).trim(),
|
|
766
|
+
metadata: {
|
|
767
|
+
...currentMetadata
|
|
768
|
+
}
|
|
769
|
+
});
|
|
770
|
+
}
|
|
771
|
+
return docs;
|
|
772
|
+
}
|
|
773
|
+
getHeaderContext(metadata) {
|
|
774
|
+
const levels = [
|
|
775
|
+
'h1',
|
|
776
|
+
'h2',
|
|
777
|
+
'h3',
|
|
778
|
+
'h4',
|
|
779
|
+
'h5',
|
|
780
|
+
'h6'
|
|
781
|
+
];
|
|
782
|
+
const headers = [];
|
|
783
|
+
for (const level of levels)if (metadata[level]) headers.push(metadata[level]);
|
|
784
|
+
if (0 === headers.length) return '';
|
|
785
|
+
return `Start of the section: ${headers.join(' > ')}\n\n`;
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
function simpleTokenize(text) {
|
|
789
|
+
const regex = /[\u4e00-\u9fa5]|[a-zA-Z0-9]+/g;
|
|
790
|
+
return text.match(regex) || [];
|
|
791
|
+
}
|
|
792
|
+
class DocumentSearchEngine {
|
|
793
|
+
constructor(){
|
|
794
|
+
this.documents = [];
|
|
795
|
+
this.tokenizedDocs = [];
|
|
796
|
+
this.bm25 = null;
|
|
797
|
+
}
|
|
798
|
+
async loadAndIndex(markdown) {
|
|
799
|
+
const headersToSplitOn = [
|
|
800
|
+
[
|
|
801
|
+
'#',
|
|
802
|
+
'h1'
|
|
803
|
+
],
|
|
804
|
+
[
|
|
805
|
+
'##',
|
|
806
|
+
'h2'
|
|
807
|
+
],
|
|
808
|
+
[
|
|
809
|
+
'###',
|
|
810
|
+
'h3'
|
|
811
|
+
],
|
|
812
|
+
[
|
|
813
|
+
'####',
|
|
814
|
+
'h4'
|
|
815
|
+
]
|
|
816
|
+
];
|
|
817
|
+
const splitter = new SimpleMarkdownHeaderTextSplitter({
|
|
818
|
+
headersToSplitOn
|
|
819
|
+
});
|
|
820
|
+
const splitDocs = splitter.splitText(markdown);
|
|
821
|
+
const newDocs = splitDocs.map((doc)=>({
|
|
822
|
+
pageContent: doc.pageContent,
|
|
823
|
+
metadata: doc.metadata
|
|
824
|
+
}));
|
|
825
|
+
const newTokens = newDocs.map((doc)=>{
|
|
826
|
+
const tokens = simpleTokenize(doc.pageContent);
|
|
827
|
+
return tokens;
|
|
828
|
+
});
|
|
829
|
+
this.documents = [
|
|
830
|
+
...this.documents,
|
|
831
|
+
...newDocs
|
|
832
|
+
];
|
|
833
|
+
this.tokenizedDocs = [
|
|
834
|
+
...this.tokenizedDocs,
|
|
835
|
+
...newTokens
|
|
836
|
+
];
|
|
837
|
+
this.bm25 = new BM25(this.tokenizedDocs);
|
|
838
|
+
}
|
|
839
|
+
search(query, options = {}) {
|
|
840
|
+
if (!this.bm25 || 0 === this.documents.length) return [];
|
|
841
|
+
const { k = 3 } = options;
|
|
842
|
+
const queryTokens = simpleTokenize(query);
|
|
843
|
+
const scores = this.documents.map((_, index)=>{
|
|
844
|
+
const score = this.bm25.getScore(queryTokens, index);
|
|
845
|
+
return {
|
|
846
|
+
index,
|
|
847
|
+
score
|
|
848
|
+
};
|
|
849
|
+
});
|
|
850
|
+
return scores.filter((item)=>item.score > 0).sort((a, b)=>b.score - a.score).slice(0, k).map(({ index, score })=>({
|
|
851
|
+
document: this.documents[index],
|
|
852
|
+
score
|
|
853
|
+
}));
|
|
854
|
+
}
|
|
855
|
+
getDocuments() {
|
|
856
|
+
return this.documents;
|
|
857
|
+
}
|
|
858
|
+
clear() {
|
|
859
|
+
this.documents = [];
|
|
860
|
+
this.tokenizedDocs = [];
|
|
861
|
+
this.bm25 = null;
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
function createDocumentSearchTools(options) {
|
|
865
|
+
const { markdown } = options;
|
|
866
|
+
const engine = new DocumentSearchEngine();
|
|
867
|
+
let isIndexed = false;
|
|
868
|
+
let indexingPromise = null;
|
|
869
|
+
const ensureIndexed = async ()=>{
|
|
870
|
+
if (isIndexed) return;
|
|
871
|
+
if (!indexingPromise) indexingPromise = engine.loadAndIndex(markdown).then(()=>{
|
|
872
|
+
isIndexed = true;
|
|
873
|
+
indexingPromise = null;
|
|
874
|
+
});
|
|
875
|
+
await indexingPromise;
|
|
876
|
+
};
|
|
877
|
+
return [
|
|
878
|
+
(0, external_langchain_namespaceObject.tool)(async ({ query })=>{
|
|
879
|
+
try {
|
|
880
|
+
await ensureIndexed();
|
|
881
|
+
const results = engine.search(query);
|
|
882
|
+
if (0 === results.length) return '未找到相关文档内容。';
|
|
883
|
+
return results.map((res, i)=>{
|
|
884
|
+
const metadataStr = Object.entries(res.document.metadata).map(([key, value])=>`${key}: ${value}`).join(', ');
|
|
885
|
+
return `[结果 ${i + 1}] (得分: ${res.score.toFixed(4)})\n元数据: { ${metadataStr} }\n内容: ${res.document.pageContent}\n`;
|
|
886
|
+
}).join('\n---\n');
|
|
887
|
+
} catch (error) {
|
|
888
|
+
console.error('Document search failed:', error);
|
|
889
|
+
return `检索过程中发生错误: ${error instanceof Error ? error.message : String(error)}`;
|
|
890
|
+
}
|
|
891
|
+
}, {
|
|
892
|
+
name: 'search_local_documents',
|
|
893
|
+
description: '当需要回答关于用户提供文档的问题时使用此工具。该工具会基于 BM25 算法在用户提供的 Markdown 文档中检索相关片段。',
|
|
894
|
+
schema: {
|
|
895
|
+
type: 'object',
|
|
896
|
+
properties: {
|
|
897
|
+
query: {
|
|
898
|
+
type: 'string',
|
|
899
|
+
description: '检索关键词或问题'
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
})
|
|
904
|
+
];
|
|
905
|
+
}
|
|
440
906
|
function createReactAgent(options) {
|
|
441
|
-
const { name, model, system_prompt, tools = [], middleware = [], features, databaseOptions, knowledgeOptions, wordOptions, inject_current_time, max_iterations = 0, simple = false } = options;
|
|
907
|
+
const { name, model, system_prompt, subagents = [], tools = [], middleware = [], features, databaseOptions, knowledgeOptions, wordOptions, documentSearchOptions, inject_current_time, max_iterations = 0, simple = false } = options;
|
|
908
|
+
const defaultTools = [
|
|
909
|
+
...features.database ? createDatabaseTools(databaseOptions) : [],
|
|
910
|
+
...features.knowledge ? createKnowledgeTools(knowledgeOptions) : [],
|
|
911
|
+
...features.word ? createWordTools(wordOptions) : [],
|
|
912
|
+
...features.documentSearch ? createDocumentSearchTools(documentSearchOptions) : [],
|
|
913
|
+
...tools
|
|
914
|
+
].filter(Boolean);
|
|
442
915
|
const agent = (0, external_langchain_namespaceObject.createAgent)({
|
|
443
916
|
name,
|
|
444
917
|
model,
|
|
445
918
|
systemPrompt: system_prompt,
|
|
446
|
-
tools: simple ? [] :
|
|
447
|
-
...features.database ? createDatabaseTools(databaseOptions) : [],
|
|
448
|
-
...features.knowledge ? createKnowledgeTools(knowledgeOptions) : [],
|
|
449
|
-
...features.word ? createWordTools(wordOptions) : [],
|
|
450
|
-
...tools
|
|
451
|
-
].filter(Boolean),
|
|
919
|
+
tools: simple ? [] : defaultTools,
|
|
452
920
|
middleware: [
|
|
453
921
|
inject_current_time && currentTimeMiddleware(),
|
|
454
922
|
max_iterations > 0 && maxIterationsMiddleware({
|
|
@@ -457,6 +925,13 @@ function createReactAgent(options) {
|
|
|
457
925
|
!simple && (0, external_langchain_namespaceObject.todoListMiddleware)({
|
|
458
926
|
systemPrompt: TODO_LIST_MIDDLEWARE_SYSTEM_PROMPT
|
|
459
927
|
}),
|
|
928
|
+
!simple && Boolean(subagents.length) && createSubAgentMiddleware({
|
|
929
|
+
defaultModel: model,
|
|
930
|
+
defaultTools: [],
|
|
931
|
+
defaultMiddleware: [],
|
|
932
|
+
subagents,
|
|
933
|
+
generalPurposeAgent: true
|
|
934
|
+
}),
|
|
460
935
|
(0, external_langchain_namespaceObject.summarizationMiddleware)({
|
|
461
936
|
model: model,
|
|
462
937
|
trigger: {
|