@librechat/agents 3.1.57 → 3.1.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +326 -62
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/events.cjs +7 -27
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +303 -222
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/init.cjs +60 -0
- package/dist/cjs/llm/init.cjs.map +1 -0
- package/dist/cjs/llm/invoke.cjs +90 -0
- package/dist/cjs/llm/invoke.cjs.map +1 -0
- package/dist/cjs/llm/openai/index.cjs +2 -0
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/request.cjs +41 -0
- package/dist/cjs/llm/request.cjs.map +1 -0
- package/dist/cjs/main.cjs +40 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +76 -89
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/contextPruning.cjs +156 -0
- package/dist/cjs/messages/contextPruning.cjs.map +1 -0
- package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
- package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
- package/dist/cjs/messages/core.cjs +23 -37
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +156 -11
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +1161 -49
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/messages/reducer.cjs +87 -0
- package/dist/cjs/messages/reducer.cjs.map +1 -0
- package/dist/cjs/run.cjs +81 -42
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +54 -7
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/summarization/index.cjs +75 -0
- package/dist/cjs/summarization/index.cjs.map +1 -0
- package/dist/cjs/summarization/node.cjs +663 -0
- package/dist/cjs/summarization/node.cjs.map +1 -0
- package/dist/cjs/tools/ToolNode.cjs +16 -8
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +2 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/utils/errors.cjs +115 -0
- package/dist/cjs/utils/errors.cjs.map +1 -0
- package/dist/cjs/utils/events.cjs +17 -0
- package/dist/cjs/utils/events.cjs.map +1 -1
- package/dist/cjs/utils/handlers.cjs +16 -0
- package/dist/cjs/utils/handlers.cjs.map +1 -1
- package/dist/cjs/utils/llm.cjs +10 -0
- package/dist/cjs/utils/llm.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +247 -14
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/truncation.cjs +107 -0
- package/dist/cjs/utils/truncation.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +325 -61
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/events.mjs +8 -28
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +307 -226
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/init.mjs +58 -0
- package/dist/esm/llm/init.mjs.map +1 -0
- package/dist/esm/llm/invoke.mjs +87 -0
- package/dist/esm/llm/invoke.mjs.map +1 -0
- package/dist/esm/llm/openai/index.mjs +2 -0
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/request.mjs +38 -0
- package/dist/esm/llm/request.mjs.map +1 -0
- package/dist/esm/main.mjs +13 -3
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +76 -89
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/contextPruning.mjs +154 -0
- package/dist/esm/messages/contextPruning.mjs.map +1 -0
- package/dist/esm/messages/contextPruningSettings.mjs +50 -0
- package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
- package/dist/esm/messages/core.mjs +23 -37
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +156 -11
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +1158 -52
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/messages/reducer.mjs +83 -0
- package/dist/esm/messages/reducer.mjs.map +1 -0
- package/dist/esm/run.mjs +82 -43
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +54 -7
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/summarization/index.mjs +73 -0
- package/dist/esm/summarization/index.mjs.map +1 -0
- package/dist/esm/summarization/node.mjs +659 -0
- package/dist/esm/summarization/node.mjs.map +1 -0
- package/dist/esm/tools/ToolNode.mjs +16 -8
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +2 -0
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/utils/errors.mjs +111 -0
- package/dist/esm/utils/errors.mjs.map +1 -0
- package/dist/esm/utils/events.mjs +17 -1
- package/dist/esm/utils/events.mjs.map +1 -1
- package/dist/esm/utils/handlers.mjs +16 -0
- package/dist/esm/utils/handlers.mjs.map +1 -1
- package/dist/esm/utils/llm.mjs +10 -1
- package/dist/esm/utils/llm.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +245 -15
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/truncation.mjs +102 -0
- package/dist/esm/utils/truncation.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +124 -6
- package/dist/types/common/enum.d.ts +14 -1
- package/dist/types/graphs/Graph.d.ts +22 -27
- package/dist/types/index.d.ts +5 -0
- package/dist/types/llm/init.d.ts +18 -0
- package/dist/types/llm/invoke.d.ts +48 -0
- package/dist/types/llm/request.d.ts +14 -0
- package/dist/types/messages/contextPruning.d.ts +42 -0
- package/dist/types/messages/contextPruningSettings.d.ts +44 -0
- package/dist/types/messages/core.d.ts +1 -1
- package/dist/types/messages/format.d.ts +17 -1
- package/dist/types/messages/index.d.ts +3 -0
- package/dist/types/messages/prune.d.ts +162 -1
- package/dist/types/messages/reducer.d.ts +18 -0
- package/dist/types/run.d.ts +12 -1
- package/dist/types/summarization/index.d.ts +20 -0
- package/dist/types/summarization/node.d.ts +29 -0
- package/dist/types/tools/ToolNode.d.ts +3 -1
- package/dist/types/types/graph.d.ts +44 -6
- package/dist/types/types/index.d.ts +1 -0
- package/dist/types/types/run.d.ts +30 -0
- package/dist/types/types/stream.d.ts +31 -4
- package/dist/types/types/summarize.d.ts +47 -0
- package/dist/types/types/tools.d.ts +7 -0
- package/dist/types/utils/errors.d.ts +28 -0
- package/dist/types/utils/events.d.ts +13 -0
- package/dist/types/utils/index.d.ts +2 -0
- package/dist/types/utils/llm.d.ts +4 -0
- package/dist/types/utils/tokens.d.ts +14 -1
- package/dist/types/utils/truncation.d.ts +49 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +388 -58
- package/src/agents/__tests__/AgentContext.test.ts +265 -5
- package/src/common/enum.ts +13 -0
- package/src/events.ts +9 -39
- package/src/graphs/Graph.ts +468 -331
- package/src/index.ts +7 -0
- package/src/llm/anthropic/llm.spec.ts +3 -3
- package/src/llm/anthropic/utils/message_inputs.ts +6 -4
- package/src/llm/bedrock/llm.spec.ts +1 -1
- package/src/llm/bedrock/utils/message_inputs.ts +6 -2
- package/src/llm/init.ts +63 -0
- package/src/llm/invoke.ts +144 -0
- package/src/llm/request.ts +55 -0
- package/src/messages/__tests__/observationMasking.test.ts +221 -0
- package/src/messages/cache.ts +77 -102
- package/src/messages/contextPruning.ts +191 -0
- package/src/messages/contextPruningSettings.ts +90 -0
- package/src/messages/core.ts +32 -53
- package/src/messages/ensureThinkingBlock.test.ts +39 -39
- package/src/messages/format.ts +227 -15
- package/src/messages/formatAgentMessages.test.ts +511 -1
- package/src/messages/index.ts +3 -0
- package/src/messages/prune.ts +1548 -62
- package/src/messages/reducer.ts +22 -0
- package/src/run.ts +104 -51
- package/src/scripts/bedrock-merge-test.ts +1 -1
- package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
- package/src/scripts/test-thinking-handoff.ts +1 -1
- package/src/scripts/thinking-bedrock.ts +1 -1
- package/src/scripts/thinking.ts +1 -1
- package/src/specs/anthropic.simple.test.ts +1 -1
- package/src/specs/multi-agent-summarization.test.ts +396 -0
- package/src/specs/prune.test.ts +1196 -23
- package/src/specs/summarization-unit.test.ts +868 -0
- package/src/specs/summarization.test.ts +3810 -0
- package/src/specs/summarize-prune.test.ts +376 -0
- package/src/specs/thinking-handoff.test.ts +10 -10
- package/src/specs/thinking-prune.test.ts +7 -4
- package/src/specs/token-accounting-e2e.test.ts +1034 -0
- package/src/specs/token-accounting-pipeline.test.ts +882 -0
- package/src/specs/token-distribution-edge-case.test.ts +25 -26
- package/src/splitStream.test.ts +42 -33
- package/src/stream.ts +64 -11
- package/src/summarization/__tests__/aggregator.test.ts +153 -0
- package/src/summarization/__tests__/node.test.ts +708 -0
- package/src/summarization/__tests__/trigger.test.ts +50 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/node.ts +982 -0
- package/src/tools/ToolNode.ts +25 -3
- package/src/types/graph.ts +62 -7
- package/src/types/index.ts +1 -0
- package/src/types/run.ts +32 -0
- package/src/types/stream.ts +45 -5
- package/src/types/summarize.ts +58 -0
- package/src/types/tools.ts +7 -0
- package/src/utils/errors.ts +117 -0
- package/src/utils/events.ts +31 -0
- package/src/utils/handlers.ts +18 -0
- package/src/utils/index.ts +2 -0
- package/src/utils/llm.ts +12 -0
- package/src/utils/tokens.ts +336 -18
- package/src/utils/truncation.ts +124 -0
- package/src/scripts/image.ts +0 -180
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ingestion-time and pre-flight truncation utilities for tool results.
|
|
3
|
+
*
|
|
4
|
+
* Prevents oversized tool outputs from entering the message array and
|
|
5
|
+
* consuming the entire context window.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Absolute hard cap on tool result length (characters).
|
|
9
|
+
* Even if the model has a 1M-token context, a single tool result
|
|
10
|
+
* larger than this is almost certainly a bug (e.g., dumping a binary file).
|
|
11
|
+
*/
|
|
12
|
+
const HARD_MAX_TOOL_RESULT_CHARS = 400_000;
|
|
13
|
+
/**
|
|
14
|
+
* Computes the dynamic max tool result size based on the model's context window.
|
|
15
|
+
* Uses 30% of the context window (in estimated characters, ~4 chars/token)
|
|
16
|
+
* capped at HARD_MAX_TOOL_RESULT_CHARS.
|
|
17
|
+
*
|
|
18
|
+
* @param contextWindowTokens - The model's max context tokens (optional).
|
|
19
|
+
* @returns Maximum allowed characters for a single tool result.
|
|
20
|
+
*/
|
|
21
|
+
function calculateMaxToolResultChars(contextWindowTokens) {
|
|
22
|
+
if (contextWindowTokens == null || contextWindowTokens <= 0) {
|
|
23
|
+
return HARD_MAX_TOOL_RESULT_CHARS;
|
|
24
|
+
}
|
|
25
|
+
return Math.min(Math.floor(contextWindowTokens * 0.3) * 4, HARD_MAX_TOOL_RESULT_CHARS);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Truncates a tool-call input (the arguments/payload of a tool_use block)
|
|
29
|
+
* using head+tail strategy. Returns an object with `_truncated` (the
|
|
30
|
+
* truncated string) and `_originalChars` (for diagnostics).
|
|
31
|
+
*
|
|
32
|
+
* Accepts any type — objects are JSON-serialized before truncation.
|
|
33
|
+
*
|
|
34
|
+
* @param input - The tool input (string, object, etc.).
|
|
35
|
+
* @param maxChars - Maximum allowed characters.
|
|
36
|
+
*/
|
|
37
|
+
function truncateToolInput(input, maxChars) {
|
|
38
|
+
const serialized = typeof input === 'string' ? input : JSON.stringify(input);
|
|
39
|
+
if (serialized.length <= maxChars) {
|
|
40
|
+
return { _truncated: serialized, _originalChars: serialized.length };
|
|
41
|
+
}
|
|
42
|
+
const indicator = `\n… [truncated: ${serialized.length} chars exceeded ${maxChars} limit] …\n`;
|
|
43
|
+
const available = maxChars - indicator.length;
|
|
44
|
+
if (available < 100) {
|
|
45
|
+
return {
|
|
46
|
+
_truncated: serialized.slice(0, maxChars) + indicator.trimEnd(),
|
|
47
|
+
_originalChars: serialized.length,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
const headSize = Math.ceil(available * 0.7);
|
|
51
|
+
const tailSize = available - headSize;
|
|
52
|
+
return {
|
|
53
|
+
_truncated: serialized.slice(0, headSize) +
|
|
54
|
+
indicator +
|
|
55
|
+
serialized.slice(serialized.length - tailSize),
|
|
56
|
+
_originalChars: serialized.length,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Truncates tool result content that exceeds `maxChars` using a head+tail
|
|
61
|
+
* strategy. Keeps the beginning (structure/headers) and end (return value /
|
|
62
|
+
* conclusion) of the content so the model retains both the opening context
|
|
63
|
+
* and the final outcome.
|
|
64
|
+
*
|
|
65
|
+
* Head gets ~70% of the budget, tail gets ~30%. Falls back to head-only
|
|
66
|
+
* when the budget is too small for a meaningful tail.
|
|
67
|
+
*
|
|
68
|
+
* @param content - The tool result string content.
|
|
69
|
+
* @param maxChars - Maximum allowed characters.
|
|
70
|
+
* @returns The (possibly truncated) content string.
|
|
71
|
+
*/
|
|
72
|
+
function truncateToolResultContent(content, maxChars) {
|
|
73
|
+
if (content.length <= maxChars) {
|
|
74
|
+
return content;
|
|
75
|
+
}
|
|
76
|
+
const indicator = `\n\n… [truncated: ${content.length} chars exceeded ${maxChars} limit] …\n\n`;
|
|
77
|
+
const available = maxChars - indicator.length;
|
|
78
|
+
if (available <= 0) {
|
|
79
|
+
return content.slice(0, maxChars);
|
|
80
|
+
}
|
|
81
|
+
// When budget is too small for a meaningful tail, fall back to head-only
|
|
82
|
+
if (available < 200) {
|
|
83
|
+
return content.slice(0, available) + indicator.trimEnd();
|
|
84
|
+
}
|
|
85
|
+
const headSize = Math.ceil(available * 0.7);
|
|
86
|
+
const tailSize = available - headSize;
|
|
87
|
+
// Try to break at newline boundaries for cleaner output
|
|
88
|
+
let headEnd = headSize;
|
|
89
|
+
const headNewline = content.lastIndexOf('\n', headSize);
|
|
90
|
+
if (headNewline > headSize - 200 && headNewline > 0) {
|
|
91
|
+
headEnd = headNewline;
|
|
92
|
+
}
|
|
93
|
+
let tailStart = content.length - tailSize;
|
|
94
|
+
const tailNewline = content.indexOf('\n', tailStart);
|
|
95
|
+
if (tailNewline > 0 && tailNewline < tailStart + 200) {
|
|
96
|
+
tailStart = tailNewline + 1;
|
|
97
|
+
}
|
|
98
|
+
return content.slice(0, headEnd) + indicator + content.slice(tailStart);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export { HARD_MAX_TOOL_RESULT_CHARS, calculateMaxToolResultChars, truncateToolInput, truncateToolResultContent };
|
|
102
|
+
//# sourceMappingURL=truncation.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"truncation.mjs","sources":["../../../src/utils/truncation.ts"],"sourcesContent":["/**\n * Ingestion-time and pre-flight truncation utilities for tool results.\n *\n * Prevents oversized tool outputs from entering the message array and\n * consuming the entire context window.\n */\n\n/**\n * Absolute hard cap on tool result length (characters).\n * Even if the model has a 1M-token context, a single tool result\n * larger than this is almost certainly a bug (e.g., dumping a binary file).\n */\nexport const HARD_MAX_TOOL_RESULT_CHARS = 400_000;\n\n/**\n * Computes the dynamic max tool result size based on the model's context window.\n * Uses 30% of the context window (in estimated characters, ~4 chars/token)\n * capped at HARD_MAX_TOOL_RESULT_CHARS.\n *\n * @param contextWindowTokens - The model's max context tokens (optional).\n * @returns Maximum allowed characters for a single tool result.\n */\nexport function calculateMaxToolResultChars(\n contextWindowTokens?: number\n): number {\n if (contextWindowTokens == null || contextWindowTokens <= 0) {\n return HARD_MAX_TOOL_RESULT_CHARS;\n }\n return Math.min(\n Math.floor(contextWindowTokens * 0.3) * 4,\n HARD_MAX_TOOL_RESULT_CHARS\n );\n}\n\n/**\n * Truncates a tool-call input (the arguments/payload of a tool_use block)\n * using head+tail strategy. Returns an object with `_truncated` (the\n * truncated string) and `_originalChars` (for diagnostics).\n *\n * Accepts any type — objects are JSON-serialized before truncation.\n *\n * @param input - The tool input (string, object, etc.).\n * @param maxChars - Maximum allowed characters.\n */\nexport function truncateToolInput(\n input: unknown,\n maxChars: number\n): { _truncated: string; _originalChars: number } {\n const serialized = typeof input === 'string' ? input : JSON.stringify(input);\n if (serialized.length <= maxChars) {\n return { _truncated: serialized, _originalChars: serialized.length };\n }\n const indicator = `\\n… [truncated: ${serialized.length} chars exceeded ${maxChars} limit] …\\n`;\n const available = maxChars - indicator.length;\n\n if (available < 100) {\n return {\n _truncated: serialized.slice(0, maxChars) + indicator.trimEnd(),\n _originalChars: serialized.length,\n };\n }\n\n const headSize = Math.ceil(available * 0.7);\n const tailSize = available - headSize;\n\n return {\n _truncated:\n serialized.slice(0, headSize) +\n indicator +\n serialized.slice(serialized.length - tailSize),\n _originalChars: serialized.length,\n };\n}\n\n/**\n * Truncates tool result content that exceeds `maxChars` using a head+tail\n * strategy. Keeps the beginning (structure/headers) and end (return value /\n * conclusion) of the content so the model retains both the opening context\n * and the final outcome.\n *\n * Head gets ~70% of the budget, tail gets ~30%. Falls back to head-only\n * when the budget is too small for a meaningful tail.\n *\n * @param content - The tool result string content.\n * @param maxChars - Maximum allowed characters.\n * @returns The (possibly truncated) content string.\n */\nexport function truncateToolResultContent(\n content: string,\n maxChars: number\n): string {\n if (content.length <= maxChars) {\n return content;\n }\n\n const indicator = `\\n\\n… [truncated: ${content.length} chars exceeded ${maxChars} limit] …\\n\\n`;\n const available = maxChars - indicator.length;\n if (available <= 0) {\n return content.slice(0, maxChars);\n }\n\n // When budget is too small for a meaningful tail, fall back to head-only\n if (available < 200) {\n return content.slice(0, available) + indicator.trimEnd();\n }\n\n const headSize = Math.ceil(available * 0.7);\n const tailSize = available - headSize;\n\n // Try to break at newline boundaries for cleaner output\n let headEnd = headSize;\n const headNewline = content.lastIndexOf('\\n', headSize);\n if (headNewline > headSize - 200 && headNewline > 0) {\n headEnd = headNewline;\n }\n\n let tailStart = content.length - tailSize;\n const tailNewline = content.indexOf('\\n', tailStart);\n if (tailNewline > 0 && tailNewline < tailStart + 200) {\n tailStart = tailNewline + 1;\n }\n\n return content.slice(0, headEnd) + indicator + content.slice(tailStart);\n}\n"],"names":[],"mappings":"AAAA;;;;;AAKG;AAEH;;;;AAIG;AACI,MAAM,0BAA0B,GAAG;AAE1C;;;;;;;AAOG;AACG,SAAU,2BAA2B,CACzC,mBAA4B,EAAA;IAE5B,IAAI,mBAAmB,IAAI,IAAI,IAAI,mBAAmB,IAAI,CAAC,EAAE;AAC3D,QAAA,OAAO,0BAA0B;IACnC;AACA,IAAA,OAAO,IAAI,CAAC,GAAG,CACb,IAAI,CAAC,KAAK,CAAC,mBAAmB,GAAG,GAAG,CAAC,GAAG,CAAC,EACzC,0BAA0B,CAC3B;AACH;AAEA;;;;;;;;;AASG;AACG,SAAU,iBAAiB,CAC/B,KAAc,EACd,QAAgB,EAAA;AAEhB,IAAA,MAAM,UAAU,GAAG,OAAO,KAAK,KAAK,QAAQ,GAAG,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC;AAC5E,IAAA,IAAI,UAAU,CAAC,MAAM,IAAI,QAAQ,EAAE;QACjC,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,MAAM,EAAE;IACtE;IACA,MAAM,SAAS,GAAG,CAAA,gBAAA,EAAmB,UAAU,CAAC,MAAM,CAAA,gBAAA,EAAmB,QAAQ,CAAA,WAAA,CAAa;AAC9F,IAAA,MAAM,SAAS,GAAG,QAAQ,GAAG,SAAS,CAAC,MAAM;AAE7C,IAAA,IAAI,SAAS,GAAG,GAAG,EAAE;QACnB,OAAO;AACL,YAAA,UAAU,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE;YAC/D,cAAc,EAAE,UAAU,CAAC,MAAM;SAClC;IACH;IAEA,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,GAAG,CAAC;AAC3C,IAAA,MAAM,QAAQ,GAAG,SAAS,GAAG,QAAQ;IAErC,OAAO;QACL,UAAU,EACR,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC;YAC7B,SAAS;YACT,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,QAAQ,CAAC;QAChD,cAAc,EAAE,UAAU,CAAC,MAAM;KAClC;AACH;AAEA;;;;;;;;;;;;AAYG;AACG,SAAU,yBAAyB,CACvC,OAAe,EACf,QAAgB,EAAA;AAEhB,IAAA,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE;AAC9B,QAAA,OAAO,OAAO;IAChB;IAEA,MAAM,SAAS,GAAG,CAAA,kBAAA,EAAqB,OAAO,CAAC,MAAM,CAAA,gBAAA,EAAmB,QAAQ,CAAA,aAAA,CAAe;AAC/F,IAAA,MAAM,SAAS,GAAG,QAAQ,GAAG,SAAS,CAAC,MAAM;AAC7C,IAAA,IAAI,SAAS,IAAI,CAAC,EAAE;QAClB,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC;IACnC;;AAGA,IAAA,IAAI,SAAS,GAAG,GAAG,EAAE;AACnB,QAAA,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE;IAC1D;IAEA,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,GAAG,CAAC;AAC3C,IAAA,MAAM,QAAQ,GAAG,SAAS,GAAG,QAAQ;;IAGrC,IAAI,OAAO,GAAG,QAAQ;IACtB,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,CAAC;IACvD,IAAI,WAAW,GAAG,QAAQ,GAAG,GAAG,IAAI,WAAW,GAAG,CAAC,EAAE;QACnD,OAAO,GAAG,WAAW;IACvB;AAEA,IAAA,IAAI,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,QAAQ;IACzC,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,SAAS,CAAC;IACpD,IAAI,WAAW,GAAG,CAAC,IAAI,WAAW,GAAG,SAAS,GAAG,GAAG,EAAE;AACpD,QAAA,SAAS,GAAG,WAAW,GAAG,CAAC;IAC7B;AAEA,IAAA,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,GAAG,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;AACzE;;;;"}
|
|
@@ -28,12 +28,42 @@ export declare class AgentContext {
|
|
|
28
28
|
maxContextTokens?: number;
|
|
29
29
|
/** Current usage metadata for this agent */
|
|
30
30
|
currentUsage?: Partial<UsageMetadata>;
|
|
31
|
+
/**
|
|
32
|
+
* Usage from the most recent LLM call only (not accumulated).
|
|
33
|
+
* Used for accurate provider calibration in pruning.
|
|
34
|
+
*/
|
|
35
|
+
lastCallUsage?: {
|
|
36
|
+
inputTokens: number;
|
|
37
|
+
outputTokens: number;
|
|
38
|
+
totalTokens: number;
|
|
39
|
+
cacheRead?: number;
|
|
40
|
+
cacheCreation?: number;
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Whether totalTokens data is fresh (set true when provider usage arrives,
|
|
44
|
+
* false at the start of each turn before the LLM responds).
|
|
45
|
+
* Prevents stale token data from driving pruning/trigger decisions.
|
|
46
|
+
*/
|
|
47
|
+
totalTokensFresh: boolean;
|
|
48
|
+
/** Context pruning configuration. */
|
|
49
|
+
contextPruningConfig?: t.ContextPruningConfig;
|
|
50
|
+
maxToolResultChars?: number;
|
|
31
51
|
/** Prune messages function configured for this agent */
|
|
32
52
|
pruneMessages?: ReturnType<typeof createPruneMessages>;
|
|
33
53
|
/** Token counter function for this agent */
|
|
34
54
|
tokenCounter?: t.TokenCounter;
|
|
35
|
-
/**
|
|
36
|
-
|
|
55
|
+
/** Token count for the system message (instructions text). */
|
|
56
|
+
systemMessageTokens: number;
|
|
57
|
+
/** Token count for tool schemas only. */
|
|
58
|
+
toolSchemaTokens: number;
|
|
59
|
+
/** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
|
|
60
|
+
calibrationRatio: number;
|
|
61
|
+
/** Provider-observed instruction overhead from the pruner's best-variance turn. */
|
|
62
|
+
resolvedInstructionOverhead?: number;
|
|
63
|
+
/** Pre-masking tool content keyed by message index, consumed by the summarize node. */
|
|
64
|
+
pendingOriginalToolContent?: Map<number, string>;
|
|
65
|
+
/** Total instruction overhead: system message + tool schemas + pending summary. */
|
|
66
|
+
get instructionTokens(): number;
|
|
37
67
|
/** The amount of time that should pass before another consecutive API call */
|
|
38
68
|
streamBuffer?: number;
|
|
39
69
|
/** Last stream call timestamp for rate limiting */
|
|
@@ -76,12 +106,41 @@ export declare class AgentContext {
|
|
|
76
106
|
private cachedSystemRunnable?;
|
|
77
107
|
/** Whether system runnable needs rebuild (set when discovered tools change) */
|
|
78
108
|
private systemRunnableStale;
|
|
79
|
-
/** Cached system message token count (separate from tool tokens) */
|
|
80
|
-
private systemMessageTokens;
|
|
81
109
|
/** Promise for token calculation initialization */
|
|
82
110
|
tokenCalculationPromise?: Promise<void>;
|
|
83
111
|
/** Format content blocks as strings (for legacy compatibility) */
|
|
84
112
|
useLegacyContent: boolean;
|
|
113
|
+
/** Enables graph-level summarization for this agent */
|
|
114
|
+
summarizationEnabled?: boolean;
|
|
115
|
+
/** Summarization runtime settings used by graph pruning hooks */
|
|
116
|
+
summarizationConfig?: t.SummarizationConfig;
|
|
117
|
+
/** Current summary text produced by the summarize node, integrated into system message */
|
|
118
|
+
private summaryText?;
|
|
119
|
+
/** Token count of the current summary (tracked for token accounting) */
|
|
120
|
+
private summaryTokenCount;
|
|
121
|
+
/**
|
|
122
|
+
* Where the summary should be injected:
|
|
123
|
+
* - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
|
|
124
|
+
* - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
|
|
125
|
+
* - `'none'`: no summary present
|
|
126
|
+
*/
|
|
127
|
+
private _summaryLocation;
|
|
128
|
+
/**
|
|
129
|
+
* Durable summary that survives reset() calls. Set from initialSummary
|
|
130
|
+
* during fromConfig() and updated by setSummary() so that the latest
|
|
131
|
+
* summary (whether cross-run or intra-run) is always restored after
|
|
132
|
+
* processStream's resetValues() cycle.
|
|
133
|
+
*/
|
|
134
|
+
private _durableSummaryText?;
|
|
135
|
+
private _durableSummaryTokenCount;
|
|
136
|
+
/** Number of summarization cycles that have occurred for this agent context */
|
|
137
|
+
private _summaryVersion;
|
|
138
|
+
/**
|
|
139
|
+
* Message count at the time summarization was last triggered.
|
|
140
|
+
* Used to prevent re-summarizing the same unchanged message set.
|
|
141
|
+
* Summarization is allowed to fire again only when new messages appear.
|
|
142
|
+
*/
|
|
143
|
+
private _lastSummarizationMsgCount;
|
|
85
144
|
/**
|
|
86
145
|
* Handoff context when this agent receives control via handoff.
|
|
87
146
|
* Contains source and parallel execution info for system message context.
|
|
@@ -92,7 +151,7 @@ export declare class AgentContext {
|
|
|
92
151
|
/** Names of sibling agents executing in parallel (empty if sequential) */
|
|
93
152
|
parallelSiblings: string[];
|
|
94
153
|
};
|
|
95
|
-
constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, }: {
|
|
154
|
+
constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, contextPruningConfig, maxToolResultChars, }: {
|
|
96
155
|
agentId: string;
|
|
97
156
|
name?: string;
|
|
98
157
|
provider: Providers;
|
|
@@ -111,6 +170,10 @@ export declare class AgentContext {
|
|
|
111
170
|
instructionTokens?: number;
|
|
112
171
|
useLegacyContent?: boolean;
|
|
113
172
|
discoveredTools?: string[];
|
|
173
|
+
summarizationEnabled?: boolean;
|
|
174
|
+
summarizationConfig?: t.SummarizationConfig;
|
|
175
|
+
contextPruningConfig?: t.ContextPruningConfig;
|
|
176
|
+
maxToolResultChars?: number;
|
|
114
177
|
});
|
|
115
178
|
/**
|
|
116
179
|
* Builds instructions text for tools that are ONLY callable via programmatic code execution.
|
|
@@ -153,7 +216,18 @@ export declare class AgentContext {
|
|
|
153
216
|
*/
|
|
154
217
|
reset(): void;
|
|
155
218
|
/**
|
|
156
|
-
* Update the token count map
|
|
219
|
+
* Update the token count map from a base map.
|
|
220
|
+
*
|
|
221
|
+
* Previously this inflated index 0 with instructionTokens to indirectly
|
|
222
|
+
* reserve budget for the system prompt. That approach was imprecise: with
|
|
223
|
+
* large tool-schema overhead (e.g. 26 MCP tools ~5 000 tokens) the first
|
|
224
|
+
* conversation message appeared enormous and was always pruned, while the
|
|
225
|
+
* real available budget was never explicitly computed.
|
|
226
|
+
*
|
|
227
|
+
* Now instruction tokens are passed to getMessagesWithinTokenLimit via
|
|
228
|
+
* the `getInstructionTokens` factory param so the pruner subtracts them
|
|
229
|
+
* from the budget directly. The token map contains only real per-message
|
|
230
|
+
* token counts.
|
|
157
231
|
*/
|
|
158
232
|
updateTokenMapWithInstructions(baseTokenMap: Record<string, number>): void;
|
|
159
233
|
/**
|
|
@@ -180,6 +254,50 @@ export declare class AgentContext {
|
|
|
180
254
|
* Call this when resetting the agent or when handoff context is no longer relevant.
|
|
181
255
|
*/
|
|
182
256
|
clearHandoffContext(): void;
|
|
257
|
+
setSummary(text: string, tokenCount: number): void;
|
|
258
|
+
/** Sets a cross-run summary that is injected into the system prompt. */
|
|
259
|
+
setInitialSummary(text: string, tokenCount: number): void;
|
|
260
|
+
/**
|
|
261
|
+
* Replaces the indexTokenCountMap with a fresh map keyed to the surviving
|
|
262
|
+
* context messages after summarization. Called by the summarize node after
|
|
263
|
+
* it emits RemoveMessage operations that shift message indices.
|
|
264
|
+
*/
|
|
265
|
+
rebuildTokenMapAfterSummarization(newTokenMap: Record<string, number>): void;
|
|
266
|
+
hasSummary(): boolean;
|
|
267
|
+
/** True when a mid-run compaction summary is ready to be injected as a HumanMessage. */
|
|
268
|
+
hasPendingCompactionSummary(): boolean;
|
|
269
|
+
getSummaryText(): string | undefined;
|
|
270
|
+
get summaryVersion(): number;
|
|
271
|
+
/**
|
|
272
|
+
* Returns true when the message count hasn't changed since the last
|
|
273
|
+
* summarization — re-summarizing would produce an identical result.
|
|
274
|
+
* Oversized individual messages are handled by fit-to-budget truncation
|
|
275
|
+
* in the pruner, which keeps them in context without triggering overflow.
|
|
276
|
+
*/
|
|
277
|
+
shouldSkipSummarization(currentMsgCount: number): boolean;
|
|
278
|
+
/**
|
|
279
|
+
* Records the message count at which summarization was triggered,
|
|
280
|
+
* so subsequent calls with the same count are suppressed.
|
|
281
|
+
*/
|
|
282
|
+
markSummarizationTriggered(msgCount: number): void;
|
|
283
|
+
clearSummary(): void;
|
|
284
|
+
/**
|
|
285
|
+
* Returns a structured breakdown of how the context token budget is consumed.
|
|
286
|
+
* Useful for diagnostics when context overflow or pruning issues occur.
|
|
287
|
+
*/
|
|
288
|
+
getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown;
|
|
289
|
+
/**
|
|
290
|
+
* Returns a human-readable string of the token budget breakdown
|
|
291
|
+
* for inclusion in error messages and diagnostics.
|
|
292
|
+
*/
|
|
293
|
+
formatTokenBudgetBreakdown(messages?: BaseMessage[]): string;
|
|
294
|
+
/**
|
|
295
|
+
* Updates the last-call usage with data from the most recent LLM response.
|
|
296
|
+
* Unlike `currentUsage` which accumulates, this captures only the single call.
|
|
297
|
+
*/
|
|
298
|
+
updateLastCallUsage(usage: Partial<UsageMetadata>): void;
|
|
299
|
+
/** Marks token data as stale before a new LLM call. */
|
|
300
|
+
markTokensStale(): void;
|
|
183
301
|
/**
|
|
184
302
|
* Marks tools as discovered via tool search.
|
|
185
303
|
* Discovered tools will be included in the next model binding.
|
|
@@ -19,6 +19,14 @@ export declare enum GraphEvents {
|
|
|
19
19
|
ON_REASONING_DELTA = "on_reasoning_delta",
|
|
20
20
|
/** [Custom] Request to execute tools - dispatched by ToolNode, handled by host */
|
|
21
21
|
ON_TOOL_EXECUTE = "on_tool_execute",
|
|
22
|
+
/** [Custom] Emitted when the summarize node begins generating a summary */
|
|
23
|
+
ON_SUMMARIZE_START = "on_summarize_start",
|
|
24
|
+
/** [Custom] Delta event carrying the completed summary content */
|
|
25
|
+
ON_SUMMARIZE_DELTA = "on_summarize_delta",
|
|
26
|
+
/** [Custom] Emitted when the summarize node completes with the final summary */
|
|
27
|
+
ON_SUMMARIZE_COMPLETE = "on_summarize_complete",
|
|
28
|
+
/** [Custom] Diagnostic logging event for context management observability */
|
|
29
|
+
ON_AGENT_LOG = "on_agent_log",
|
|
22
30
|
/** Custom event, emitted by system */
|
|
23
31
|
ON_CUSTOM_EVENT = "on_custom_event",
|
|
24
32
|
/** Emitted when a chat model starts processing. */
|
|
@@ -69,6 +77,7 @@ export declare enum Providers {
|
|
|
69
77
|
export declare enum GraphNodeKeys {
|
|
70
78
|
TOOLS = "tools=",
|
|
71
79
|
AGENT = "agent=",
|
|
80
|
+
SUMMARIZE = "summarize=",
|
|
72
81
|
ROUTER = "router",
|
|
73
82
|
PRE_TOOLS = "pre_tools",
|
|
74
83
|
POST_TOOLS = "post_tools"
|
|
@@ -98,6 +107,8 @@ export declare enum ContentTypes {
|
|
|
98
107
|
REASONING = "reasoning",
|
|
99
108
|
/** Multi-Agent Switch */
|
|
100
109
|
AGENT_UPDATE = "agent_update",
|
|
110
|
+
/** Framework-level conversation summary block */
|
|
111
|
+
SUMMARY = "summary",
|
|
101
112
|
/** Bedrock */
|
|
102
113
|
REASONING_CONTENT = "reasoning_content"
|
|
103
114
|
}
|
|
@@ -123,7 +134,9 @@ export declare enum Constants {
|
|
|
123
134
|
CONTENT_AND_ARTIFACT = "content_and_artifact",
|
|
124
135
|
LC_TRANSFER_TO_ = "lc_transfer_to_",
|
|
125
136
|
/** Delimiter for MCP tools: toolName_mcp_serverName */
|
|
126
|
-
MCP_DELIMITER = "_mcp_"
|
|
137
|
+
MCP_DELIMITER = "_mcp_",
|
|
138
|
+
/** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
|
|
139
|
+
ANTHROPIC_SERVER_TOOL_PREFIX = "srvtoolu_"
|
|
127
140
|
}
|
|
128
141
|
export declare enum TitleMethod {
|
|
129
142
|
STRUCTURED = "structured",
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import { ToolNode } from '@langchain/langgraph/prebuilt';
|
|
2
|
-
import {
|
|
2
|
+
import { RunnableConfig } from '@langchain/core/runnables';
|
|
3
3
|
import type { UsageMetadata, BaseMessage } from '@langchain/core/messages';
|
|
4
4
|
import type { ToolCall } from '@langchain/core/messages/tool';
|
|
5
5
|
import type * as t from '@/types';
|
|
6
|
-
import { Providers } from '@/common';
|
|
7
6
|
import { ToolNode as CustomToolNode } from '@/tools/ToolNode';
|
|
8
7
|
import { AgentContext } from '@/agents/AgentContext';
|
|
9
8
|
import { HandlerRegistry } from '@/events';
|
|
@@ -13,11 +12,6 @@ export declare abstract class Graph<T extends t.BaseGraphState = t.BaseGraphStat
|
|
|
13
12
|
currentTools?: t.GraphTools;
|
|
14
13
|
currentToolMap?: t.ToolMap;
|
|
15
14
|
}): CustomToolNode<T> | ToolNode<T>;
|
|
16
|
-
abstract initializeModel({ currentModel, tools, clientOptions, }: {
|
|
17
|
-
currentModel?: t.ChatModel;
|
|
18
|
-
tools?: t.GraphTools;
|
|
19
|
-
clientOptions?: t.ClientOptions;
|
|
20
|
-
}): Runnable;
|
|
21
15
|
abstract getRunMessages(): BaseMessage[] | undefined;
|
|
22
16
|
abstract getContentParts(): t.MessageContentComplex[] | undefined;
|
|
23
17
|
abstract generateStepId(stepKey: string): [string, number];
|
|
@@ -30,7 +24,7 @@ export declare abstract class Graph<T extends t.BaseGraphState = t.BaseGraphStat
|
|
|
30
24
|
abstract dispatchRunStepDelta(id: string, delta: t.ToolCallDelta): Promise<void>;
|
|
31
25
|
abstract dispatchMessageDelta(id: string, delta: t.MessageDelta): Promise<void>;
|
|
32
26
|
abstract dispatchReasoningDelta(stepId: string, delta: t.ReasoningDelta): Promise<void>;
|
|
33
|
-
abstract createCallModel(agentId?: string, currentModel?: t.ChatModel): (state:
|
|
27
|
+
abstract createCallModel(agentId?: string, currentModel?: t.ChatModel): (state: t.AgentSubgraphState, config?: RunnableConfig) => Promise<Partial<t.AgentSubgraphState>>;
|
|
34
28
|
messageStepHasToolCalls: Map<string, boolean>;
|
|
35
29
|
messageIdsByStepKey: Map<string, string>;
|
|
36
30
|
prelimMessageIdsByStepKey: Map<string, string>;
|
|
@@ -39,6 +33,12 @@ export declare abstract class Graph<T extends t.BaseGraphState = t.BaseGraphStat
|
|
|
39
33
|
stepKeyIds: Map<string, string[]>;
|
|
40
34
|
contentIndexMap: Map<string, number>;
|
|
41
35
|
toolCallStepIds: Map<string, string>;
|
|
36
|
+
/**
|
|
37
|
+
* Step IDs that have been dispatched via handler registry directly
|
|
38
|
+
* (in dispatchRunStep). Used by the custom event callback to skip
|
|
39
|
+
* duplicate dispatch through the LangGraph callback chain.
|
|
40
|
+
*/
|
|
41
|
+
handlerDispatchedStepIds: Set<string>;
|
|
42
42
|
signal?: AbortSignal;
|
|
43
43
|
/** Set of invoked tool call IDs from non-message run steps completed mid-run, if any */
|
|
44
44
|
invokedToolIds?: Set<string>;
|
|
@@ -61,14 +61,23 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
|
|
|
61
61
|
/** Optional compile options passed into workflow.compile() */
|
|
62
62
|
compileOptions?: t.CompileOptions | undefined;
|
|
63
63
|
messages: BaseMessage[];
|
|
64
|
+
/** Cached run messages preserved before clearHeavyState() so getRunMessages() works after cleanup. */
|
|
65
|
+
private cachedRunMessages?;
|
|
64
66
|
runId: string | undefined;
|
|
67
|
+
/**
|
|
68
|
+
* Boundary between historical messages (loaded from conversation state)
|
|
69
|
+
* and messages produced during the current run. Set once in the state
|
|
70
|
+
* reducer when messages first arrive. Used by `getRunMessages()` and
|
|
71
|
+
* multi-agent message filtering — NOT for pruner token counting (the
|
|
72
|
+
* pruner maintains its own `lastTurnStartIndex` in its closure).
|
|
73
|
+
*/
|
|
65
74
|
startIndex: number;
|
|
66
75
|
signal?: AbortSignal;
|
|
67
76
|
/** Map of agent contexts by agent ID */
|
|
68
77
|
agentContexts: Map<string, AgentContext>;
|
|
69
78
|
/** Default agent ID to use */
|
|
70
79
|
defaultAgentId: string;
|
|
71
|
-
constructor({ runId, signal, agents, tokenCounter, indexTokenCountMap, }: t.StandardGraphInput);
|
|
80
|
+
constructor({ runId, signal, agents, tokenCounter, indexTokenCountMap, calibrationRatio, }: t.StandardGraphInput);
|
|
72
81
|
resetValues(keepContent?: boolean): void;
|
|
73
82
|
clearHeavyState(): void;
|
|
74
83
|
getRunStep(stepId: string): t.RunStep | undefined;
|
|
@@ -80,6 +89,9 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
|
|
|
80
89
|
checkKeyList(keyList: (string | number | undefined)[]): boolean;
|
|
81
90
|
getRunMessages(): BaseMessage[] | undefined;
|
|
82
91
|
getContentParts(): t.MessageContentComplex[] | undefined;
|
|
92
|
+
getCalibrationRatio(): number;
|
|
93
|
+
getResolvedInstructionOverhead(): number | undefined;
|
|
94
|
+
getToolCount(): number;
|
|
83
95
|
/**
|
|
84
96
|
* Get all run steps, optionally filtered by agent ID
|
|
85
97
|
*/
|
|
@@ -97,32 +109,15 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
|
|
|
97
109
|
* Returns a map where key is the contentPart index and value is the agentId
|
|
98
110
|
*/
|
|
99
111
|
getContentPartAgentMap(): Map<number, string>;
|
|
100
|
-
createSystemRunnable({ provider, clientOptions, instructions, additional_instructions, }: {
|
|
101
|
-
provider?: Providers;
|
|
102
|
-
clientOptions?: t.ClientOptions;
|
|
103
|
-
instructions?: string;
|
|
104
|
-
additional_instructions?: string;
|
|
105
|
-
}): t.SystemRunnable | undefined;
|
|
106
112
|
initializeTools({ currentTools, currentToolMap, agentContext, }: {
|
|
107
113
|
currentTools?: t.GraphTools;
|
|
108
114
|
currentToolMap?: t.ToolMap;
|
|
109
115
|
agentContext?: AgentContext;
|
|
110
116
|
}): CustomToolNode<t.BaseGraphState> | ToolNode<t.BaseGraphState>;
|
|
111
|
-
initializeModel({ provider, tools, clientOptions, }: {
|
|
112
|
-
provider: Providers;
|
|
113
|
-
tools?: t.GraphTools;
|
|
114
|
-
clientOptions?: t.ClientOptions;
|
|
115
|
-
}): Runnable;
|
|
116
117
|
overrideTestModel(responses: string[], sleep?: number, toolCalls?: ToolCall[]): void;
|
|
117
|
-
getNewModel({ provider, clientOptions, }: {
|
|
118
|
-
provider: Providers;
|
|
119
|
-
clientOptions?: t.ClientOptions;
|
|
120
|
-
}): t.ChatModelInstance;
|
|
121
118
|
getUsageMetadata(finalMessage?: BaseMessage): Partial<UsageMetadata> | undefined;
|
|
122
|
-
/** Execute model invocation with streaming support */
|
|
123
|
-
private attemptInvoke;
|
|
124
119
|
cleanupSignalListener(currentModel?: t.ChatModel): void;
|
|
125
|
-
createCallModel(agentId?: string): (state: t.
|
|
120
|
+
createCallModel(agentId?: string): (state: t.AgentSubgraphState, config?: RunnableConfig) => Promise<Partial<t.AgentSubgraphState>>;
|
|
126
121
|
createAgentNode(agentId: string): t.CompiledAgentWorfklow;
|
|
127
122
|
createWorkflow(): t.CompiledStateWorkflow;
|
|
128
123
|
/**
|
package/dist/types/index.d.ts
CHANGED
|
@@ -4,6 +4,7 @@ export * from './splitStream';
|
|
|
4
4
|
export * from './events';
|
|
5
5
|
export * from './messages';
|
|
6
6
|
export * from './graphs';
|
|
7
|
+
export * from './summarization';
|
|
7
8
|
export * from './tools/Calculator';
|
|
8
9
|
export * from './tools/CodeExecutor';
|
|
9
10
|
export * from './tools/ProgrammaticToolCalling';
|
|
@@ -18,3 +19,7 @@ export type * from './types';
|
|
|
18
19
|
export { CustomOpenAIClient } from './llm/openai';
|
|
19
20
|
export { ChatOpenRouter } from './llm/openrouter';
|
|
20
21
|
export type { OpenRouterReasoning, OpenRouterReasoningEffort, ChatOpenRouterCallOptions, } from './llm/openrouter';
|
|
22
|
+
export { getChatModelClass } from './llm/providers';
|
|
23
|
+
export { initializeModel } from './llm/init';
|
|
24
|
+
export { attemptInvoke, tryFallbackProviders } from './llm/invoke';
|
|
25
|
+
export { isThinkingEnabled, getMaxOutputTokensKey } from './llm/request';
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { Runnable } from '@langchain/core/runnables';
|
|
2
|
+
import type * as t from '@/types';
|
|
3
|
+
import { Providers } from '@/common';
|
|
4
|
+
/**
|
|
5
|
+
* Creates a chat model instance for a given provider, applies provider-specific
|
|
6
|
+
* field assignments, and optionally binds tools.
|
|
7
|
+
*
|
|
8
|
+
* This is the single entry point for model creation across the codebase — used
|
|
9
|
+
* by both the agent graph (main LLM) and the summarization node (compaction LLM).
|
|
10
|
+
* An optional `override` model can be passed to skip construction entirely
|
|
11
|
+
* (useful for cached/reused model instances or test fakes).
|
|
12
|
+
*/
|
|
13
|
+
export declare function initializeModel({ provider, clientOptions, tools, override, }: {
|
|
14
|
+
provider: Providers;
|
|
15
|
+
clientOptions?: t.ClientOptions;
|
|
16
|
+
tools?: t.GraphTools;
|
|
17
|
+
override?: t.ChatModelInstance;
|
|
18
|
+
}): Runnable;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { AIMessageChunk } from '@langchain/core/messages';
|
|
2
|
+
import type { RunnableConfig } from '@langchain/core/runnables';
|
|
3
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
4
|
+
import type * as t from '@/types';
|
|
5
|
+
import { ChatModelStreamHandler } from '@/stream';
|
|
6
|
+
import { Providers } from '@/common';
|
|
7
|
+
/**
|
|
8
|
+
* Context passed to `attemptInvoke` for the default stream handler.
|
|
9
|
+
* Matches the subset of Graph that `ChatModelStreamHandler.handle` needs.
|
|
10
|
+
*/
|
|
11
|
+
export type InvokeContext = Parameters<ChatModelStreamHandler['handle']>[3];
|
|
12
|
+
/**
|
|
13
|
+
* Per-chunk callback for custom stream processing.
|
|
14
|
+
* When provided, replaces the default `ChatModelStreamHandler`.
|
|
15
|
+
*/
|
|
16
|
+
export type OnChunk = (chunk: AIMessageChunk) => void | Promise<void>;
|
|
17
|
+
/**
|
|
18
|
+
* Invokes a chat model with the given messages, handling both streaming and
|
|
19
|
+
* non-streaming paths.
|
|
20
|
+
*
|
|
21
|
+
* By default, stream chunks are processed through a `ChatModelStreamHandler`
|
|
22
|
+
* that dispatches run steps (MESSAGE_CREATION, TOOL_CALLS) for the graph.
|
|
23
|
+
* Pass an `onChunk` callback to override this with custom chunk processing
|
|
24
|
+
* (e.g. summarization delta events).
|
|
25
|
+
*/
|
|
26
|
+
export declare function attemptInvoke({ model, messages, provider, context, onChunk, }: {
|
|
27
|
+
model: t.ChatModel;
|
|
28
|
+
messages: BaseMessage[];
|
|
29
|
+
provider: Providers;
|
|
30
|
+
context?: InvokeContext;
|
|
31
|
+
onChunk?: OnChunk;
|
|
32
|
+
}, config?: RunnableConfig): Promise<Partial<t.BaseGraphState>>;
|
|
33
|
+
/**
|
|
34
|
+
* Attempts each fallback provider in order until one succeeds.
|
|
35
|
+
* Throws the last error if all fallbacks fail.
|
|
36
|
+
*/
|
|
37
|
+
export declare function tryFallbackProviders({ fallbacks, tools, messages, config, primaryError, context, onChunk, }: {
|
|
38
|
+
fallbacks: Array<{
|
|
39
|
+
provider: Providers;
|
|
40
|
+
clientOptions?: t.ClientOptions;
|
|
41
|
+
}>;
|
|
42
|
+
tools?: t.GraphTools;
|
|
43
|
+
messages: BaseMessage[];
|
|
44
|
+
config?: RunnableConfig;
|
|
45
|
+
primaryError: unknown;
|
|
46
|
+
context?: InvokeContext;
|
|
47
|
+
onChunk?: OnChunk;
|
|
48
|
+
}): Promise<Partial<t.BaseGraphState> | undefined>;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type * as t from '@/types';
|
|
2
|
+
import { Providers } from '@/common';
|
|
3
|
+
/**
|
|
4
|
+
* Returns true when the provider + clientOptions indicate extended thinking
|
|
5
|
+
* is enabled. Works across Anthropic (direct), Bedrock (additionalModelRequestFields),
|
|
6
|
+
* and OpenAI-compat (modelKwargs.thinking).
|
|
7
|
+
*/
|
|
8
|
+
export declare function isThinkingEnabled(provider: Providers, clientOptions?: t.ClientOptions): boolean;
|
|
9
|
+
/**
|
|
10
|
+
* Returns the correct key for setting max output tokens on the model
|
|
11
|
+
* constructor options. Google/Vertex use `maxOutputTokens`, all others
|
|
12
|
+
* use `maxTokens`.
|
|
13
|
+
*/
|
|
14
|
+
export declare function getMaxOutputTokensKey(provider: Providers | string): 'maxOutputTokens' | 'maxTokens';
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Position-based context pruning for tool results.
|
|
3
|
+
*
|
|
4
|
+
* Uses position-based age: the distance of a message
|
|
5
|
+
* from the conversation end as a fraction of total messages.
|
|
6
|
+
*
|
|
7
|
+
* Two degradation levels:
|
|
8
|
+
* - Soft-trim: Keep head + tail of tool result content, drop middle.
|
|
9
|
+
* - Hard-clear: Replace entire content with a placeholder.
|
|
10
|
+
*
|
|
11
|
+
* Messages in the "protected zone" (recent assistant turns, system/pre-first-human
|
|
12
|
+
* messages, and messages with image content) are never pruned.
|
|
13
|
+
*/
|
|
14
|
+
import { type BaseMessage } from '@langchain/core/messages';
|
|
15
|
+
import type { ContextPruningConfig } from '@/types/graph';
|
|
16
|
+
import type { TokenCounter } from '@/types/run';
|
|
17
|
+
import type { ContextPruningSettings } from './contextPruningSettings';
|
|
18
|
+
export interface ContextPruningResult {
|
|
19
|
+
/** Number of messages that were soft-trimmed. */
|
|
20
|
+
softTrimmed: number;
|
|
21
|
+
/** Number of messages that were hard-cleared. */
|
|
22
|
+
hardCleared: number;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Applies position-based context pruning to tool result messages.
|
|
26
|
+
*
|
|
27
|
+
* Modifies messages in-place and updates indexTokenCountMap with recounted
|
|
28
|
+
* token values for modified messages.
|
|
29
|
+
*
|
|
30
|
+
* @param params.messages - The full message array (modified in-place).
|
|
31
|
+
* @param params.indexTokenCountMap - Token count map (updated in-place).
|
|
32
|
+
* @param params.tokenCounter - Function to recount tokens after modification.
|
|
33
|
+
* @param params.config - Partial context pruning config (merged with defaults).
|
|
34
|
+
* @returns Counts of soft-trimmed and hard-cleared messages.
|
|
35
|
+
*/
|
|
36
|
+
export declare function applyContextPruning(params: {
|
|
37
|
+
messages: BaseMessage[];
|
|
38
|
+
indexTokenCountMap: Record<string, number | undefined>;
|
|
39
|
+
tokenCounter: TokenCounter;
|
|
40
|
+
config?: ContextPruningConfig;
|
|
41
|
+
resolvedSettings?: ContextPruningSettings;
|
|
42
|
+
}): ContextPruningResult;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default settings for position-based context pruning.
|
|
3
|
+
*
|
|
4
|
+
* These are merged with user-provided overrides so any subset can be customized.
|
|
5
|
+
*/
|
|
6
|
+
export interface ContextPruningSettings {
|
|
7
|
+
/** Whether position-based pruning is enabled. Default: false (opt-in). */
|
|
8
|
+
enabled: boolean;
|
|
9
|
+
/** Number of recent assistant turns to protect from pruning. Default: 3 */
|
|
10
|
+
keepLastAssistants: number;
|
|
11
|
+
/** Age ratio (0-1) at which soft-trim fires. Default: 0.3 */
|
|
12
|
+
softTrimRatio: number;
|
|
13
|
+
/** Age ratio (0-1) at which hard-clear fires. Default: 0.5 */
|
|
14
|
+
hardClearRatio: number;
|
|
15
|
+
/** Minimum tool result size (chars) before pruning applies. Default: 50000 */
|
|
16
|
+
minPrunableToolChars: number;
|
|
17
|
+
softTrim: {
|
|
18
|
+
/** Maximum total chars after soft-trim. Default: 4000 */
|
|
19
|
+
maxChars: number;
|
|
20
|
+
/** Head portion to keep. Default: 1500 */
|
|
21
|
+
headChars: number;
|
|
22
|
+
/** Tail portion to keep. Default: 1500 */
|
|
23
|
+
tailChars: number;
|
|
24
|
+
};
|
|
25
|
+
hardClear: {
|
|
26
|
+
/** Whether hard-clear is enabled. Default: true */
|
|
27
|
+
enabled: boolean;
|
|
28
|
+
/** Placeholder text for hard-cleared content. */
|
|
29
|
+
placeholder: string;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
export declare const DEFAULT_CONTEXT_PRUNING_SETTINGS: ContextPruningSettings;
|
|
33
|
+
/**
|
|
34
|
+
* Merges user-provided partial overrides with the defaults.
|
|
35
|
+
*/
|
|
36
|
+
export declare function resolveContextPruningSettings(overrides?: Partial<{
|
|
37
|
+
enabled?: boolean;
|
|
38
|
+
keepLastAssistants?: number;
|
|
39
|
+
softTrimRatio?: number;
|
|
40
|
+
hardClearRatio?: number;
|
|
41
|
+
minPrunableToolChars?: number;
|
|
42
|
+
softTrim?: Partial<ContextPruningSettings['softTrim']>;
|
|
43
|
+
hardClear?: Partial<ContextPruningSettings['hardClear']>;
|
|
44
|
+
}>): ContextPruningSettings;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { AIMessage, BaseMessage, ToolMessage, HumanMessage, AIMessageChunk } from '@langchain/core/messages';
|
|
2
2
|
import type * as t from '@/types';
|
|
3
3
|
import { Providers } from '@/common';
|
|
4
4
|
export declare function getConverseOverrideMessage({ userMessage, lastMessageX, lastMessageY, }: {
|