@librechat/agents 3.1.57 → 3.1.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +326 -62
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/events.cjs +7 -27
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +303 -222
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/init.cjs +60 -0
- package/dist/cjs/llm/init.cjs.map +1 -0
- package/dist/cjs/llm/invoke.cjs +90 -0
- package/dist/cjs/llm/invoke.cjs.map +1 -0
- package/dist/cjs/llm/openai/index.cjs +2 -0
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/request.cjs +41 -0
- package/dist/cjs/llm/request.cjs.map +1 -0
- package/dist/cjs/main.cjs +40 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +76 -89
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/contextPruning.cjs +156 -0
- package/dist/cjs/messages/contextPruning.cjs.map +1 -0
- package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
- package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
- package/dist/cjs/messages/core.cjs +23 -37
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +156 -11
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +1161 -49
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/messages/reducer.cjs +87 -0
- package/dist/cjs/messages/reducer.cjs.map +1 -0
- package/dist/cjs/run.cjs +81 -42
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +54 -7
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/summarization/index.cjs +75 -0
- package/dist/cjs/summarization/index.cjs.map +1 -0
- package/dist/cjs/summarization/node.cjs +663 -0
- package/dist/cjs/summarization/node.cjs.map +1 -0
- package/dist/cjs/tools/ToolNode.cjs +16 -8
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +2 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/utils/errors.cjs +115 -0
- package/dist/cjs/utils/errors.cjs.map +1 -0
- package/dist/cjs/utils/events.cjs +17 -0
- package/dist/cjs/utils/events.cjs.map +1 -1
- package/dist/cjs/utils/handlers.cjs +16 -0
- package/dist/cjs/utils/handlers.cjs.map +1 -1
- package/dist/cjs/utils/llm.cjs +10 -0
- package/dist/cjs/utils/llm.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +247 -14
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/truncation.cjs +107 -0
- package/dist/cjs/utils/truncation.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +325 -61
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/events.mjs +8 -28
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +307 -226
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/init.mjs +58 -0
- package/dist/esm/llm/init.mjs.map +1 -0
- package/dist/esm/llm/invoke.mjs +87 -0
- package/dist/esm/llm/invoke.mjs.map +1 -0
- package/dist/esm/llm/openai/index.mjs +2 -0
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/request.mjs +38 -0
- package/dist/esm/llm/request.mjs.map +1 -0
- package/dist/esm/main.mjs +13 -3
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +76 -89
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/contextPruning.mjs +154 -0
- package/dist/esm/messages/contextPruning.mjs.map +1 -0
- package/dist/esm/messages/contextPruningSettings.mjs +50 -0
- package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
- package/dist/esm/messages/core.mjs +23 -37
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +156 -11
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +1158 -52
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/messages/reducer.mjs +83 -0
- package/dist/esm/messages/reducer.mjs.map +1 -0
- package/dist/esm/run.mjs +82 -43
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +54 -7
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/summarization/index.mjs +73 -0
- package/dist/esm/summarization/index.mjs.map +1 -0
- package/dist/esm/summarization/node.mjs +659 -0
- package/dist/esm/summarization/node.mjs.map +1 -0
- package/dist/esm/tools/ToolNode.mjs +16 -8
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +2 -0
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/utils/errors.mjs +111 -0
- package/dist/esm/utils/errors.mjs.map +1 -0
- package/dist/esm/utils/events.mjs +17 -1
- package/dist/esm/utils/events.mjs.map +1 -1
- package/dist/esm/utils/handlers.mjs +16 -0
- package/dist/esm/utils/handlers.mjs.map +1 -1
- package/dist/esm/utils/llm.mjs +10 -1
- package/dist/esm/utils/llm.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +245 -15
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/truncation.mjs +102 -0
- package/dist/esm/utils/truncation.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +124 -6
- package/dist/types/common/enum.d.ts +14 -1
- package/dist/types/graphs/Graph.d.ts +22 -27
- package/dist/types/index.d.ts +5 -0
- package/dist/types/llm/init.d.ts +18 -0
- package/dist/types/llm/invoke.d.ts +48 -0
- package/dist/types/llm/request.d.ts +14 -0
- package/dist/types/messages/contextPruning.d.ts +42 -0
- package/dist/types/messages/contextPruningSettings.d.ts +44 -0
- package/dist/types/messages/core.d.ts +1 -1
- package/dist/types/messages/format.d.ts +17 -1
- package/dist/types/messages/index.d.ts +3 -0
- package/dist/types/messages/prune.d.ts +162 -1
- package/dist/types/messages/reducer.d.ts +18 -0
- package/dist/types/run.d.ts +12 -1
- package/dist/types/summarization/index.d.ts +20 -0
- package/dist/types/summarization/node.d.ts +29 -0
- package/dist/types/tools/ToolNode.d.ts +3 -1
- package/dist/types/types/graph.d.ts +44 -6
- package/dist/types/types/index.d.ts +1 -0
- package/dist/types/types/run.d.ts +30 -0
- package/dist/types/types/stream.d.ts +31 -4
- package/dist/types/types/summarize.d.ts +47 -0
- package/dist/types/types/tools.d.ts +7 -0
- package/dist/types/utils/errors.d.ts +28 -0
- package/dist/types/utils/events.d.ts +13 -0
- package/dist/types/utils/index.d.ts +2 -0
- package/dist/types/utils/llm.d.ts +4 -0
- package/dist/types/utils/tokens.d.ts +14 -1
- package/dist/types/utils/truncation.d.ts +49 -0
- package/package.json +3 -3
- package/src/agents/AgentContext.ts +388 -58
- package/src/agents/__tests__/AgentContext.test.ts +265 -5
- package/src/common/enum.ts +13 -0
- package/src/events.ts +9 -39
- package/src/graphs/Graph.ts +468 -331
- package/src/index.ts +7 -0
- package/src/llm/anthropic/llm.spec.ts +3 -3
- package/src/llm/anthropic/utils/message_inputs.ts +6 -4
- package/src/llm/bedrock/llm.spec.ts +1 -1
- package/src/llm/bedrock/utils/message_inputs.ts +6 -2
- package/src/llm/init.ts +63 -0
- package/src/llm/invoke.ts +144 -0
- package/src/llm/request.ts +55 -0
- package/src/messages/__tests__/observationMasking.test.ts +221 -0
- package/src/messages/cache.ts +77 -102
- package/src/messages/contextPruning.ts +191 -0
- package/src/messages/contextPruningSettings.ts +90 -0
- package/src/messages/core.ts +32 -53
- package/src/messages/ensureThinkingBlock.test.ts +39 -39
- package/src/messages/format.ts +227 -15
- package/src/messages/formatAgentMessages.test.ts +511 -1
- package/src/messages/index.ts +3 -0
- package/src/messages/prune.ts +1548 -62
- package/src/messages/reducer.ts +22 -0
- package/src/run.ts +104 -51
- package/src/scripts/bedrock-merge-test.ts +1 -1
- package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
- package/src/scripts/test-thinking-handoff.ts +1 -1
- package/src/scripts/thinking-bedrock.ts +1 -1
- package/src/scripts/thinking.ts +1 -1
- package/src/specs/anthropic.simple.test.ts +1 -1
- package/src/specs/multi-agent-summarization.test.ts +396 -0
- package/src/specs/prune.test.ts +1196 -23
- package/src/specs/summarization-unit.test.ts +868 -0
- package/src/specs/summarization.test.ts +3827 -0
- package/src/specs/summarize-prune.test.ts +376 -0
- package/src/specs/thinking-handoff.test.ts +10 -10
- package/src/specs/thinking-prune.test.ts +7 -4
- package/src/specs/token-accounting-e2e.test.ts +1034 -0
- package/src/specs/token-accounting-pipeline.test.ts +882 -0
- package/src/specs/token-distribution-edge-case.test.ts +25 -26
- package/src/splitStream.test.ts +42 -33
- package/src/stream.ts +64 -11
- package/src/summarization/__tests__/aggregator.test.ts +153 -0
- package/src/summarization/__tests__/node.test.ts +708 -0
- package/src/summarization/__tests__/trigger.test.ts +50 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/node.ts +982 -0
- package/src/tools/ToolNode.ts +25 -3
- package/src/types/graph.ts +62 -7
- package/src/types/index.ts +1 -0
- package/src/types/run.ts +32 -0
- package/src/types/stream.ts +45 -5
- package/src/types/summarize.ts +58 -0
- package/src/types/tools.ts +7 -0
- package/src/utils/errors.ts +117 -0
- package/src/utils/events.ts +31 -0
- package/src/utils/handlers.ts +18 -0
- package/src/utils/index.ts +2 -0
- package/src/utils/llm.ts +12 -0
- package/src/utils/tokens.ts +336 -18
- package/src/utils/truncation.ts +124 -0
- package/src/scripts/image.ts +0 -180
|
@@ -3,11 +3,29 @@
|
|
|
3
3
|
var messages = require('@langchain/core/messages');
|
|
4
4
|
var runnables = require('@langchain/core/runnables');
|
|
5
5
|
var schema$1 = require('../tools/schema.cjs');
|
|
6
|
+
var cache = require('../messages/cache.cjs');
|
|
6
7
|
var _enum = require('../common/enum.cjs');
|
|
8
|
+
require('../messages/core.cjs');
|
|
9
|
+
require('nanoid');
|
|
10
|
+
var prune = require('../messages/prune.cjs');
|
|
11
|
+
require('@langchain/core/callbacks/dispatch');
|
|
12
|
+
require('uuid');
|
|
7
13
|
var schema = require('../utils/schema.cjs');
|
|
8
14
|
|
|
9
15
|
/* eslint-disable no-console */
|
|
10
|
-
|
|
16
|
+
/**
|
|
17
|
+
* Anthropic direct API tool schema overhead multiplier.
|
|
18
|
+
* Empirically calibrated against real MCP tool sets (29 tools).
|
|
19
|
+
* Accounts for Anthropic's internal XML-like tool encoding plus
|
|
20
|
+
* a ~300-token hidden tool-system preamble.
|
|
21
|
+
*/
|
|
22
|
+
const ANTHROPIC_TOOL_TOKEN_MULTIPLIER = 2.6;
|
|
23
|
+
/**
|
|
24
|
+
* Default tool schema overhead multiplier for all non-Anthropic providers.
|
|
25
|
+
* Covers OpenAI function-calling format, Bedrock, and other providers.
|
|
26
|
+
* Empirically calibrated at ~1.4× the raw JSON token count.
|
|
27
|
+
*/
|
|
28
|
+
const DEFAULT_TOOL_TOKEN_MULTIPLIER = 1.4;
|
|
11
29
|
/**
|
|
12
30
|
* Encapsulates agent-specific state that can vary between agents in a multi-agent system
|
|
13
31
|
*/
|
|
@@ -16,7 +34,7 @@ class AgentContext {
|
|
|
16
34
|
* Create an AgentContext from configuration with token accounting initialization
|
|
17
35
|
*/
|
|
18
36
|
static fromConfig(agentConfig, tokenCounter, indexTokenCountMap) {
|
|
19
|
-
const { agentId, name, provider, clientOptions, tools, toolMap, toolEnd, toolRegistry, toolDefinitions, instructions, additional_instructions, streamBuffer, maxContextTokens, reasoningKey, useLegacyContent, discoveredTools, } = agentConfig;
|
|
37
|
+
const { agentId, name, provider, clientOptions, tools, toolMap, toolEnd, toolRegistry, toolDefinitions, instructions, additional_instructions, streamBuffer, maxContextTokens, reasoningKey, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, initialSummary, contextPruningConfig, maxToolResultChars, } = agentConfig;
|
|
20
38
|
const agentContext = new AgentContext({
|
|
21
39
|
agentId,
|
|
22
40
|
name: name ?? agentId,
|
|
@@ -36,11 +54,15 @@ class AgentContext {
|
|
|
36
54
|
tokenCounter,
|
|
37
55
|
useLegacyContent,
|
|
38
56
|
discoveredTools,
|
|
57
|
+
summarizationEnabled,
|
|
58
|
+
summarizationConfig,
|
|
59
|
+
contextPruningConfig,
|
|
60
|
+
maxToolResultChars,
|
|
39
61
|
});
|
|
62
|
+
if (initialSummary?.text != null && initialSummary.text !== '') {
|
|
63
|
+
agentContext.setInitialSummary(initialSummary.text, initialSummary.tokenCount);
|
|
64
|
+
}
|
|
40
65
|
if (tokenCounter) {
|
|
41
|
-
// Initialize system runnable BEFORE async tool token calculation
|
|
42
|
-
// This ensures system message tokens are in instructionTokens before
|
|
43
|
-
// updateTokenMapWithInstructions is called
|
|
44
66
|
agentContext.initializeSystemRunnable();
|
|
45
67
|
const tokenMap = indexTokenCountMap || {};
|
|
46
68
|
agentContext.baseIndexTokenCountMap = { ...tokenMap };
|
|
@@ -48,7 +70,6 @@ class AgentContext {
|
|
|
48
70
|
agentContext.tokenCalculationPromise = agentContext
|
|
49
71
|
.calculateInstructionTokens(tokenCounter)
|
|
50
72
|
.then(() => {
|
|
51
|
-
// Update token map with instruction tokens (includes system + tool tokens)
|
|
52
73
|
agentContext.updateTokenMapWithInstructions(tokenMap);
|
|
53
74
|
})
|
|
54
75
|
.catch((err) => {
|
|
@@ -77,12 +98,39 @@ class AgentContext {
|
|
|
77
98
|
maxContextTokens;
|
|
78
99
|
/** Current usage metadata for this agent */
|
|
79
100
|
currentUsage;
|
|
101
|
+
/**
|
|
102
|
+
* Usage from the most recent LLM call only (not accumulated).
|
|
103
|
+
* Used for accurate provider calibration in pruning.
|
|
104
|
+
*/
|
|
105
|
+
lastCallUsage;
|
|
106
|
+
/**
|
|
107
|
+
* Whether totalTokens data is fresh (set true when provider usage arrives,
|
|
108
|
+
* false at the start of each turn before the LLM responds).
|
|
109
|
+
* Prevents stale token data from driving pruning/trigger decisions.
|
|
110
|
+
*/
|
|
111
|
+
totalTokensFresh = false;
|
|
112
|
+
/** Context pruning configuration. */
|
|
113
|
+
contextPruningConfig;
|
|
114
|
+
maxToolResultChars;
|
|
80
115
|
/** Prune messages function configured for this agent */
|
|
81
116
|
pruneMessages;
|
|
82
117
|
/** Token counter function for this agent */
|
|
83
118
|
tokenCounter;
|
|
84
|
-
/**
|
|
85
|
-
|
|
119
|
+
/** Token count for the system message (instructions text). */
|
|
120
|
+
systemMessageTokens = 0;
|
|
121
|
+
/** Token count for tool schemas only. */
|
|
122
|
+
toolSchemaTokens = 0;
|
|
123
|
+
/** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
|
|
124
|
+
calibrationRatio = 1;
|
|
125
|
+
/** Provider-observed instruction overhead from the pruner's best-variance turn. */
|
|
126
|
+
resolvedInstructionOverhead;
|
|
127
|
+
/** Pre-masking tool content keyed by message index, consumed by the summarize node. */
|
|
128
|
+
pendingOriginalToolContent;
|
|
129
|
+
/** Total instruction overhead: system message + tool schemas + pending summary. */
|
|
130
|
+
get instructionTokens() {
|
|
131
|
+
const summaryOverhead = this._summaryLocation === 'user_message' ? this.summaryTokenCount : 0;
|
|
132
|
+
return this.systemMessageTokens + this.toolSchemaTokens + summaryOverhead;
|
|
133
|
+
}
|
|
86
134
|
/** The amount of time that should pass before another consecutive API call */
|
|
87
135
|
streamBuffer;
|
|
88
136
|
/** Last stream call timestamp for rate limiting */
|
|
@@ -125,18 +173,47 @@ class AgentContext {
|
|
|
125
173
|
cachedSystemRunnable;
|
|
126
174
|
/** Whether system runnable needs rebuild (set when discovered tools change) */
|
|
127
175
|
systemRunnableStale = true;
|
|
128
|
-
/** Cached system message token count (separate from tool tokens) */
|
|
129
|
-
systemMessageTokens = 0;
|
|
130
176
|
/** Promise for token calculation initialization */
|
|
131
177
|
tokenCalculationPromise;
|
|
132
178
|
/** Format content blocks as strings (for legacy compatibility) */
|
|
133
179
|
useLegacyContent = false;
|
|
180
|
+
/** Enables graph-level summarization for this agent */
|
|
181
|
+
summarizationEnabled;
|
|
182
|
+
/** Summarization runtime settings used by graph pruning hooks */
|
|
183
|
+
summarizationConfig;
|
|
184
|
+
/** Current summary text produced by the summarize node, integrated into system message */
|
|
185
|
+
summaryText;
|
|
186
|
+
/** Token count of the current summary (tracked for token accounting) */
|
|
187
|
+
summaryTokenCount = 0;
|
|
188
|
+
/**
|
|
189
|
+
* Where the summary should be injected:
|
|
190
|
+
* - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
|
|
191
|
+
* - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
|
|
192
|
+
* - `'none'`: no summary present
|
|
193
|
+
*/
|
|
194
|
+
_summaryLocation = 'none';
|
|
195
|
+
/**
|
|
196
|
+
* Durable summary that survives reset() calls. Set from initialSummary
|
|
197
|
+
* during fromConfig() and updated by setSummary() so that the latest
|
|
198
|
+
* summary (whether cross-run or intra-run) is always restored after
|
|
199
|
+
* processStream's resetValues() cycle.
|
|
200
|
+
*/
|
|
201
|
+
_durableSummaryText;
|
|
202
|
+
_durableSummaryTokenCount = 0;
|
|
203
|
+
/** Number of summarization cycles that have occurred for this agent context */
|
|
204
|
+
_summaryVersion = 0;
|
|
205
|
+
/**
|
|
206
|
+
* Message count at the time summarization was last triggered.
|
|
207
|
+
* Used to prevent re-summarizing the same unchanged message set.
|
|
208
|
+
* Summarization is allowed to fire again only when new messages appear.
|
|
209
|
+
*/
|
|
210
|
+
_lastSummarizationMsgCount = 0;
|
|
134
211
|
/**
|
|
135
212
|
* Handoff context when this agent receives control via handoff.
|
|
136
213
|
* Contains source and parallel execution info for system message context.
|
|
137
214
|
*/
|
|
138
215
|
handoffContext;
|
|
139
|
-
constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, }) {
|
|
216
|
+
constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, contextPruningConfig, maxToolResultChars, }) {
|
|
140
217
|
this.agentId = agentId;
|
|
141
218
|
this.name = name;
|
|
142
219
|
this.provider = provider;
|
|
@@ -157,9 +234,13 @@ class AgentContext {
|
|
|
157
234
|
this.toolEnd = toolEnd;
|
|
158
235
|
}
|
|
159
236
|
if (instructionTokens !== undefined) {
|
|
160
|
-
this.
|
|
237
|
+
this.systemMessageTokens = instructionTokens;
|
|
161
238
|
}
|
|
162
239
|
this.useLegacyContent = useLegacyContent ?? false;
|
|
240
|
+
this.summarizationEnabled = summarizationEnabled;
|
|
241
|
+
this.summarizationConfig = summarizationConfig;
|
|
242
|
+
this.contextPruningConfig = contextPruningConfig;
|
|
243
|
+
this.maxToolResultChars = maxToolResultChars;
|
|
163
244
|
if (discoveredTools && discoveredTools.length > 0) {
|
|
164
245
|
for (const toolName of discoveredTools) {
|
|
165
246
|
this.discoveredToolNames.add(toolName);
|
|
@@ -185,7 +266,6 @@ class AgentContext {
|
|
|
185
266
|
!allowedCallers.includes('direct');
|
|
186
267
|
if (!isCodeExecutionOnly)
|
|
187
268
|
continue;
|
|
188
|
-
// Include if: not deferred OR deferred but discovered
|
|
189
269
|
const isDeferred = toolDef.defer_loading === true;
|
|
190
270
|
const isDiscovered = this.discoveredToolNames.has(name);
|
|
191
271
|
if (!isDeferred || isDiscovered) {
|
|
@@ -217,11 +297,9 @@ class AgentContext {
|
|
|
217
297
|
* Only rebuilds when marked stale (via markToolsAsDiscovered).
|
|
218
298
|
*/
|
|
219
299
|
get systemRunnable() {
|
|
220
|
-
// Return cached if not stale
|
|
221
300
|
if (!this.systemRunnableStale && this.cachedSystemRunnable !== undefined) {
|
|
222
301
|
return this.cachedSystemRunnable;
|
|
223
302
|
}
|
|
224
|
-
// Stale or first access - rebuild
|
|
225
303
|
const instructionsString = this.buildInstructionsString();
|
|
226
304
|
this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
|
|
227
305
|
this.systemRunnableStale = false;
|
|
@@ -244,25 +322,29 @@ class AgentContext {
|
|
|
244
322
|
*/
|
|
245
323
|
buildInstructionsString() {
|
|
246
324
|
const parts = [];
|
|
247
|
-
/** Build agent identity and handoff context preamble */
|
|
248
325
|
const identityPreamble = this.buildIdentityPreamble();
|
|
249
326
|
if (identityPreamble) {
|
|
250
327
|
parts.push(identityPreamble);
|
|
251
328
|
}
|
|
252
|
-
/** Add main instructions */
|
|
253
329
|
if (this.instructions != null && this.instructions !== '') {
|
|
254
330
|
parts.push(this.instructions);
|
|
255
331
|
}
|
|
256
|
-
/** Add additional instructions */
|
|
257
332
|
if (this.additionalInstructions != null &&
|
|
258
333
|
this.additionalInstructions !== '') {
|
|
259
334
|
parts.push(this.additionalInstructions);
|
|
260
335
|
}
|
|
261
|
-
/** Add programmatic tools documentation */
|
|
262
336
|
const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
|
|
263
337
|
if (programmaticToolsDoc) {
|
|
264
338
|
parts.push(programmaticToolsDoc);
|
|
265
339
|
}
|
|
340
|
+
// Cross-run summary: include in system prompt so the model has context
|
|
341
|
+
// from the prior run. Mid-run summaries are injected as a HumanMessage
|
|
342
|
+
// on the post-compaction clean slate instead (see buildSystemRunnable).
|
|
343
|
+
if (this._summaryLocation === 'system_prompt' &&
|
|
344
|
+
this.summaryText != null &&
|
|
345
|
+
this.summaryText !== '') {
|
|
346
|
+
parts.push('## Conversation Summary\n\n' + this.summaryText);
|
|
347
|
+
}
|
|
266
348
|
return parts.join('\n\n');
|
|
267
349
|
}
|
|
268
350
|
/**
|
|
@@ -289,17 +371,19 @@ class AgentContext {
|
|
|
289
371
|
* Only called when content has actually changed.
|
|
290
372
|
*/
|
|
291
373
|
buildSystemRunnable(instructionsString) {
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
this.
|
|
374
|
+
const hasMidRunSummary = this._summaryLocation === 'user_message' &&
|
|
375
|
+
this.summaryText != null &&
|
|
376
|
+
this.summaryText !== '';
|
|
377
|
+
if (!instructionsString && !hasMidRunSummary) {
|
|
295
378
|
this.systemMessageTokens = 0;
|
|
296
379
|
return undefined;
|
|
297
380
|
}
|
|
298
381
|
let finalInstructions = instructionsString;
|
|
299
|
-
|
|
382
|
+
let usePromptCache = false;
|
|
300
383
|
if (this.provider === _enum.Providers.ANTHROPIC) {
|
|
301
384
|
const anthropicOptions = this.clientOptions;
|
|
302
385
|
if (anthropicOptions?.promptCache === true) {
|
|
386
|
+
usePromptCache = true;
|
|
303
387
|
finalInstructions = {
|
|
304
388
|
content: [
|
|
305
389
|
{
|
|
@@ -311,23 +395,56 @@ class AgentContext {
|
|
|
311
395
|
};
|
|
312
396
|
}
|
|
313
397
|
}
|
|
314
|
-
const systemMessage =
|
|
315
|
-
|
|
398
|
+
const systemMessage = instructionsString
|
|
399
|
+
? new messages.SystemMessage(finalInstructions)
|
|
400
|
+
: undefined;
|
|
316
401
|
if (this.tokenCounter) {
|
|
317
|
-
this.
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
}
|
|
321
|
-
return runnables.RunnableLambda.from((messages) => {
|
|
322
|
-
|
|
402
|
+
this.systemMessageTokens = systemMessage
|
|
403
|
+
? this.tokenCounter(systemMessage)
|
|
404
|
+
: 0;
|
|
405
|
+
}
|
|
406
|
+
return runnables.RunnableLambda.from((messages$1) => {
|
|
407
|
+
const prefix = systemMessage ? [systemMessage] : [];
|
|
408
|
+
// Build the non-system portion (summary + conversation), then apply
|
|
409
|
+
// cache markers separately so addCacheControl doesn't strip the
|
|
410
|
+
// SystemMessage's own cache_control breakpoint set above.
|
|
411
|
+
const hasSummaryBody = this._summaryLocation === 'user_message' &&
|
|
412
|
+
this.summaryText != null &&
|
|
413
|
+
this.summaryText !== '';
|
|
414
|
+
let body;
|
|
415
|
+
if (hasSummaryBody) {
|
|
416
|
+
const wrappedSummary = '<summary>\n' +
|
|
417
|
+
this.summaryText +
|
|
418
|
+
'\n</summary>\n\n' +
|
|
419
|
+
'This is your own checkpoint: you wrote it to preserve context after compaction. Pick up where you left off based on the summary above. Do not repeat prior tasks, information or acknowledge this checkpoint message directly.';
|
|
420
|
+
const summaryMsg = usePromptCache
|
|
421
|
+
? new messages.HumanMessage({
|
|
422
|
+
content: [
|
|
423
|
+
{
|
|
424
|
+
type: 'text',
|
|
425
|
+
text: wrappedSummary,
|
|
426
|
+
cache_control: { type: 'ephemeral' },
|
|
427
|
+
},
|
|
428
|
+
],
|
|
429
|
+
})
|
|
430
|
+
: new messages.HumanMessage(wrappedSummary);
|
|
431
|
+
body = [summaryMsg, ...messages$1];
|
|
432
|
+
}
|
|
433
|
+
else {
|
|
434
|
+
body = messages$1;
|
|
435
|
+
}
|
|
436
|
+
if (usePromptCache && body.length >= 2) {
|
|
437
|
+
body = cache.addCacheControl(body);
|
|
438
|
+
}
|
|
439
|
+
return [...prefix, ...body];
|
|
323
440
|
}).withConfig({ runName: 'prompt' });
|
|
324
441
|
}
|
|
325
442
|
/**
|
|
326
443
|
* Reset context for a new run
|
|
327
444
|
*/
|
|
328
445
|
reset() {
|
|
329
|
-
this.instructionTokens = 0;
|
|
330
446
|
this.systemMessageTokens = 0;
|
|
447
|
+
this.toolSchemaTokens = 0;
|
|
331
448
|
this.cachedSystemRunnable = undefined;
|
|
332
449
|
this.systemRunnableStale = true;
|
|
333
450
|
this.lastToken = undefined;
|
|
@@ -340,6 +457,11 @@ class AgentContext {
|
|
|
340
457
|
this.currentTokenType = _enum.ContentTypes.TEXT;
|
|
341
458
|
this.discoveredToolNames.clear();
|
|
342
459
|
this.handoffContext = undefined;
|
|
460
|
+
this.summaryText = this._durableSummaryText;
|
|
461
|
+
this.summaryTokenCount = this._durableSummaryTokenCount;
|
|
462
|
+
this._lastSummarizationMsgCount = 0;
|
|
463
|
+
this.lastCallUsage = undefined;
|
|
464
|
+
this.totalTokensFresh = false;
|
|
343
465
|
if (this.tokenCounter) {
|
|
344
466
|
this.initializeSystemRunnable();
|
|
345
467
|
const baseTokenMap = { ...this.baseIndexTokenCountMap };
|
|
@@ -357,24 +479,21 @@ class AgentContext {
|
|
|
357
479
|
}
|
|
358
480
|
}
|
|
359
481
|
/**
|
|
360
|
-
* Update the token count map
|
|
482
|
+
* Update the token count map from a base map.
|
|
483
|
+
*
|
|
484
|
+
* Previously this inflated index 0 with instructionTokens to indirectly
|
|
485
|
+
* reserve budget for the system prompt. That approach was imprecise: with
|
|
486
|
+
* large tool-schema overhead (e.g. 26 MCP tools ~5 000 tokens) the first
|
|
487
|
+
* conversation message appeared enormous and was always pruned, while the
|
|
488
|
+
* real available budget was never explicitly computed.
|
|
489
|
+
*
|
|
490
|
+
* Now instruction tokens are passed to getMessagesWithinTokenLimit via
|
|
491
|
+
* the `getInstructionTokens` factory param so the pruner subtracts them
|
|
492
|
+
* from the budget directly. The token map contains only real per-message
|
|
493
|
+
* token counts.
|
|
361
494
|
*/
|
|
362
495
|
updateTokenMapWithInstructions(baseTokenMap) {
|
|
363
|
-
|
|
364
|
-
// Shift all indices by the instruction token count
|
|
365
|
-
const shiftedMap = {};
|
|
366
|
-
for (const [key, value] of Object.entries(baseTokenMap)) {
|
|
367
|
-
const index = parseInt(key, 10);
|
|
368
|
-
if (!isNaN(index)) {
|
|
369
|
-
shiftedMap[String(index)] =
|
|
370
|
-
value + (index === 0 ? this.instructionTokens : 0);
|
|
371
|
-
}
|
|
372
|
-
}
|
|
373
|
-
this.indexTokenCountMap = shiftedMap;
|
|
374
|
-
}
|
|
375
|
-
else {
|
|
376
|
-
this.indexTokenCountMap = { ...baseTokenMap };
|
|
377
|
-
}
|
|
496
|
+
this.indexTokenCountMap = { ...baseTokenMap };
|
|
378
497
|
}
|
|
379
498
|
/**
|
|
380
499
|
* Calculate tool tokens and add to instruction tokens
|
|
@@ -382,11 +501,7 @@ class AgentContext {
|
|
|
382
501
|
*/
|
|
383
502
|
async calculateInstructionTokens(tokenCounter) {
|
|
384
503
|
let toolTokens = 0;
|
|
385
|
-
// Track names to avoid double-counting when a tool appears in both
|
|
386
|
-
// this.tools (bound StructuredTool instances) and this.toolDefinitions
|
|
387
|
-
// (MCP / event-driven schemas).
|
|
388
504
|
const countedToolNames = new Set();
|
|
389
|
-
// Count tokens for bound tools (StructuredTool instances with .schema)
|
|
390
505
|
if (this.tools && this.tools.length > 0) {
|
|
391
506
|
for (const tool of this.tools) {
|
|
392
507
|
const genericTool = tool;
|
|
@@ -401,23 +516,29 @@ class AgentContext {
|
|
|
401
516
|
}
|
|
402
517
|
}
|
|
403
518
|
}
|
|
404
|
-
// Count tokens for tool definitions (MCP / event-driven tools).
|
|
405
|
-
// These are sent to the provider API as tool schemas alongside bound tools.
|
|
406
|
-
// Both can be populated simultaneously (graph tools + MCP tools).
|
|
407
519
|
if (this.toolDefinitions && this.toolDefinitions.length > 0) {
|
|
408
520
|
for (const def of this.toolDefinitions) {
|
|
409
521
|
if (countedToolNames.has(def.name)) {
|
|
410
|
-
continue;
|
|
522
|
+
continue;
|
|
411
523
|
}
|
|
412
524
|
const schema = {
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
525
|
+
type: 'function',
|
|
526
|
+
function: {
|
|
527
|
+
name: def.name,
|
|
528
|
+
description: def.description ?? '',
|
|
529
|
+
parameters: def.parameters ?? {},
|
|
530
|
+
},
|
|
416
531
|
};
|
|
417
532
|
toolTokens += tokenCounter(new messages.SystemMessage(JSON.stringify(schema)));
|
|
418
533
|
}
|
|
419
534
|
}
|
|
420
|
-
this.
|
|
535
|
+
const isAnthropic = this.provider !== _enum.Providers.BEDROCK &&
|
|
536
|
+
(this.provider === _enum.Providers.ANTHROPIC ||
|
|
537
|
+
/anthropic|claude/i.test(String(this.clientOptions?.model ?? '')));
|
|
538
|
+
const toolTokenMultiplier = isAnthropic
|
|
539
|
+
? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
|
|
540
|
+
: DEFAULT_TOOL_TOKEN_MULTIPLIER;
|
|
541
|
+
this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
|
|
421
542
|
}
|
|
422
543
|
/**
|
|
423
544
|
* Gets the tool registry for deferred tools (for tool search).
|
|
@@ -457,6 +578,151 @@ class AgentContext {
|
|
|
457
578
|
this.systemRunnableStale = true;
|
|
458
579
|
}
|
|
459
580
|
}
|
|
581
|
+
setSummary(text, tokenCount) {
|
|
582
|
+
this.summaryText = text;
|
|
583
|
+
this.summaryTokenCount = tokenCount;
|
|
584
|
+
this._summaryLocation = 'user_message';
|
|
585
|
+
this._durableSummaryText = text;
|
|
586
|
+
this._durableSummaryTokenCount = tokenCount;
|
|
587
|
+
this._summaryVersion += 1;
|
|
588
|
+
this.systemRunnableStale = true;
|
|
589
|
+
this.pruneMessages = undefined;
|
|
590
|
+
}
|
|
591
|
+
/** Sets a cross-run summary that is injected into the system prompt. */
|
|
592
|
+
setInitialSummary(text, tokenCount) {
|
|
593
|
+
this.summaryText = text;
|
|
594
|
+
this.summaryTokenCount = tokenCount;
|
|
595
|
+
this._summaryLocation = 'system_prompt';
|
|
596
|
+
this._durableSummaryText = text;
|
|
597
|
+
this._durableSummaryTokenCount = tokenCount;
|
|
598
|
+
this._summaryVersion += 1;
|
|
599
|
+
this.systemRunnableStale = true;
|
|
600
|
+
}
|
|
601
|
+
/**
|
|
602
|
+
* Replaces the indexTokenCountMap with a fresh map keyed to the surviving
|
|
603
|
+
* context messages after summarization. Called by the summarize node after
|
|
604
|
+
* it emits RemoveMessage operations that shift message indices.
|
|
605
|
+
*/
|
|
606
|
+
rebuildTokenMapAfterSummarization(newTokenMap) {
|
|
607
|
+
this.indexTokenCountMap = newTokenMap;
|
|
608
|
+
this.baseIndexTokenCountMap = { ...newTokenMap };
|
|
609
|
+
this._lastSummarizationMsgCount = Object.keys(newTokenMap).length;
|
|
610
|
+
this.currentUsage = undefined;
|
|
611
|
+
this.lastCallUsage = undefined;
|
|
612
|
+
this.totalTokensFresh = false;
|
|
613
|
+
}
|
|
614
|
+
hasSummary() {
|
|
615
|
+
return this.summaryText != null && this.summaryText !== '';
|
|
616
|
+
}
|
|
617
|
+
/** True when a mid-run compaction summary is ready to be injected as a HumanMessage. */
|
|
618
|
+
hasPendingCompactionSummary() {
|
|
619
|
+
return this._summaryLocation === 'user_message' && this.hasSummary();
|
|
620
|
+
}
|
|
621
|
+
getSummaryText() {
|
|
622
|
+
return this.summaryText;
|
|
623
|
+
}
|
|
624
|
+
get summaryVersion() {
|
|
625
|
+
return this._summaryVersion;
|
|
626
|
+
}
|
|
627
|
+
/**
|
|
628
|
+
* Returns true when the message count hasn't changed since the last
|
|
629
|
+
* summarization — re-summarizing would produce an identical result.
|
|
630
|
+
* Oversized individual messages are handled by fit-to-budget truncation
|
|
631
|
+
* in the pruner, which keeps them in context without triggering overflow.
|
|
632
|
+
*/
|
|
633
|
+
shouldSkipSummarization(currentMsgCount) {
|
|
634
|
+
return (this._lastSummarizationMsgCount > 0 &&
|
|
635
|
+
currentMsgCount <= this._lastSummarizationMsgCount);
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* Records the message count at which summarization was triggered,
|
|
639
|
+
* so subsequent calls with the same count are suppressed.
|
|
640
|
+
*/
|
|
641
|
+
markSummarizationTriggered(msgCount) {
|
|
642
|
+
this._lastSummarizationMsgCount = msgCount;
|
|
643
|
+
}
|
|
644
|
+
clearSummary() {
|
|
645
|
+
if (this.summaryText != null) {
|
|
646
|
+
this.summaryText = undefined;
|
|
647
|
+
this.summaryTokenCount = 0;
|
|
648
|
+
this._durableSummaryText = undefined;
|
|
649
|
+
this._durableSummaryTokenCount = 0;
|
|
650
|
+
this._summaryLocation = 'none';
|
|
651
|
+
this.systemRunnableStale = true;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
/**
|
|
655
|
+
* Returns a structured breakdown of how the context token budget is consumed.
|
|
656
|
+
* Useful for diagnostics when context overflow or pruning issues occur.
|
|
657
|
+
*/
|
|
658
|
+
getTokenBudgetBreakdown(messages) {
|
|
659
|
+
const maxContextTokens = this.maxContextTokens ?? 0;
|
|
660
|
+
const toolCount = (this.tools?.length ?? 0) + (this.toolDefinitions?.length ?? 0);
|
|
661
|
+
const messageCount = messages?.length ?? 0;
|
|
662
|
+
let messageTokens = 0;
|
|
663
|
+
if (messages != null) {
|
|
664
|
+
for (let i = 0; i < messages.length; i++) {
|
|
665
|
+
messageTokens +=
|
|
666
|
+
this.indexTokenCountMap[i] ?? 0;
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
const reserveTokens = Math.round(maxContextTokens * prune.DEFAULT_RESERVE_RATIO);
|
|
670
|
+
const availableForMessages = Math.max(0, maxContextTokens - reserveTokens - this.instructionTokens);
|
|
671
|
+
return {
|
|
672
|
+
maxContextTokens,
|
|
673
|
+
instructionTokens: this.instructionTokens,
|
|
674
|
+
systemMessageTokens: this.systemMessageTokens,
|
|
675
|
+
toolSchemaTokens: this.toolSchemaTokens,
|
|
676
|
+
summaryTokens: this.summaryTokenCount,
|
|
677
|
+
toolCount,
|
|
678
|
+
messageCount,
|
|
679
|
+
messageTokens,
|
|
680
|
+
availableForMessages,
|
|
681
|
+
};
|
|
682
|
+
}
|
|
683
|
+
/**
|
|
684
|
+
* Returns a human-readable string of the token budget breakdown
|
|
685
|
+
* for inclusion in error messages and diagnostics.
|
|
686
|
+
*/
|
|
687
|
+
formatTokenBudgetBreakdown(messages) {
|
|
688
|
+
const b = this.getTokenBudgetBreakdown(messages);
|
|
689
|
+
const lines = [
|
|
690
|
+
'Token budget breakdown:',
|
|
691
|
+
` maxContextTokens: ${b.maxContextTokens}`,
|
|
692
|
+
` instructionTokens: ${b.instructionTokens} (system: ${b.systemMessageTokens}, tools: ${b.toolSchemaTokens} [${b.toolCount} tools])`,
|
|
693
|
+
` summaryTokens: ${b.summaryTokens}`,
|
|
694
|
+
` messageTokens: ${b.messageTokens} (${b.messageCount} messages)`,
|
|
695
|
+
` availableForMessages: ${b.availableForMessages}`,
|
|
696
|
+
];
|
|
697
|
+
return lines.join('\n');
|
|
698
|
+
}
|
|
699
|
+
/**
|
|
700
|
+
* Updates the last-call usage with data from the most recent LLM response.
|
|
701
|
+
* Unlike `currentUsage` which accumulates, this captures only the single call.
|
|
702
|
+
*/
|
|
703
|
+
updateLastCallUsage(usage) {
|
|
704
|
+
const baseInputTokens = Number(usage.input_tokens) || 0;
|
|
705
|
+
const cacheCreation = Number(usage.input_token_details?.cache_creation) || 0;
|
|
706
|
+
const cacheRead = Number(usage.input_token_details?.cache_read) || 0;
|
|
707
|
+
const outputTokens = Number(usage.output_tokens) || 0;
|
|
708
|
+
const cacheSum = cacheCreation + cacheRead;
|
|
709
|
+
const cacheIsAdditive = cacheSum > 0 && cacheSum > baseInputTokens;
|
|
710
|
+
const totalInputTokens = cacheIsAdditive
|
|
711
|
+
? baseInputTokens + cacheSum
|
|
712
|
+
: baseInputTokens;
|
|
713
|
+
this.lastCallUsage = {
|
|
714
|
+
inputTokens: totalInputTokens,
|
|
715
|
+
outputTokens,
|
|
716
|
+
totalTokens: totalInputTokens + outputTokens,
|
|
717
|
+
cacheRead: cacheRead || undefined,
|
|
718
|
+
cacheCreation: cacheCreation || undefined,
|
|
719
|
+
};
|
|
720
|
+
this.totalTokensFresh = true;
|
|
721
|
+
}
|
|
722
|
+
/** Marks token data as stale before a new LLM call. */
|
|
723
|
+
markTokensStale() {
|
|
724
|
+
this.totalTokensFresh = false;
|
|
725
|
+
}
|
|
460
726
|
/**
|
|
461
727
|
* Marks tools as discovered via tool search.
|
|
462
728
|
* Discovered tools will be included in the next model binding.
|
|
@@ -486,11 +752,9 @@ class AgentContext {
|
|
|
486
752
|
* @returns Array of tools to bind to model
|
|
487
753
|
*/
|
|
488
754
|
getToolsForBinding() {
|
|
489
|
-
/** Event-driven mode: create schema-only tools from definitions */
|
|
490
755
|
if (this.toolDefinitions && this.toolDefinitions.length > 0) {
|
|
491
756
|
return this.getEventDrivenToolsForBinding();
|
|
492
757
|
}
|
|
493
|
-
/** Traditional mode: filter actual tool instances */
|
|
494
758
|
const filtered = !this.tools || !this.toolRegistry
|
|
495
759
|
? this.tools
|
|
496
760
|
: this.filterToolsForBinding(this.tools);
|