@librechat/agents 3.1.57 → 3.1.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +326 -62
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/events.cjs +7 -27
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +303 -222
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/init.cjs +60 -0
- package/dist/cjs/llm/init.cjs.map +1 -0
- package/dist/cjs/llm/invoke.cjs +90 -0
- package/dist/cjs/llm/invoke.cjs.map +1 -0
- package/dist/cjs/llm/openai/index.cjs +2 -0
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/request.cjs +41 -0
- package/dist/cjs/llm/request.cjs.map +1 -0
- package/dist/cjs/main.cjs +40 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +76 -89
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/contextPruning.cjs +156 -0
- package/dist/cjs/messages/contextPruning.cjs.map +1 -0
- package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
- package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
- package/dist/cjs/messages/core.cjs +23 -37
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +156 -11
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +1161 -49
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/messages/reducer.cjs +87 -0
- package/dist/cjs/messages/reducer.cjs.map +1 -0
- package/dist/cjs/run.cjs +81 -42
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +54 -7
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/summarization/index.cjs +75 -0
- package/dist/cjs/summarization/index.cjs.map +1 -0
- package/dist/cjs/summarization/node.cjs +663 -0
- package/dist/cjs/summarization/node.cjs.map +1 -0
- package/dist/cjs/tools/ToolNode.cjs +16 -8
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +2 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/utils/errors.cjs +115 -0
- package/dist/cjs/utils/errors.cjs.map +1 -0
- package/dist/cjs/utils/events.cjs +17 -0
- package/dist/cjs/utils/events.cjs.map +1 -1
- package/dist/cjs/utils/handlers.cjs +16 -0
- package/dist/cjs/utils/handlers.cjs.map +1 -1
- package/dist/cjs/utils/llm.cjs +10 -0
- package/dist/cjs/utils/llm.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +247 -14
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/truncation.cjs +107 -0
- package/dist/cjs/utils/truncation.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +325 -61
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/events.mjs +8 -28
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +307 -226
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/init.mjs +58 -0
- package/dist/esm/llm/init.mjs.map +1 -0
- package/dist/esm/llm/invoke.mjs +87 -0
- package/dist/esm/llm/invoke.mjs.map +1 -0
- package/dist/esm/llm/openai/index.mjs +2 -0
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/request.mjs +38 -0
- package/dist/esm/llm/request.mjs.map +1 -0
- package/dist/esm/main.mjs +13 -3
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +76 -89
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/contextPruning.mjs +154 -0
- package/dist/esm/messages/contextPruning.mjs.map +1 -0
- package/dist/esm/messages/contextPruningSettings.mjs +50 -0
- package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
- package/dist/esm/messages/core.mjs +23 -37
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +156 -11
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +1158 -52
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/messages/reducer.mjs +83 -0
- package/dist/esm/messages/reducer.mjs.map +1 -0
- package/dist/esm/run.mjs +82 -43
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +54 -7
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/summarization/index.mjs +73 -0
- package/dist/esm/summarization/index.mjs.map +1 -0
- package/dist/esm/summarization/node.mjs +659 -0
- package/dist/esm/summarization/node.mjs.map +1 -0
- package/dist/esm/tools/ToolNode.mjs +16 -8
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +2 -0
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/utils/errors.mjs +111 -0
- package/dist/esm/utils/errors.mjs.map +1 -0
- package/dist/esm/utils/events.mjs +17 -1
- package/dist/esm/utils/events.mjs.map +1 -1
- package/dist/esm/utils/handlers.mjs +16 -0
- package/dist/esm/utils/handlers.mjs.map +1 -1
- package/dist/esm/utils/llm.mjs +10 -1
- package/dist/esm/utils/llm.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +245 -15
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/truncation.mjs +102 -0
- package/dist/esm/utils/truncation.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +124 -6
- package/dist/types/common/enum.d.ts +14 -1
- package/dist/types/graphs/Graph.d.ts +22 -27
- package/dist/types/index.d.ts +5 -0
- package/dist/types/llm/init.d.ts +18 -0
- package/dist/types/llm/invoke.d.ts +48 -0
- package/dist/types/llm/request.d.ts +14 -0
- package/dist/types/messages/contextPruning.d.ts +42 -0
- package/dist/types/messages/contextPruningSettings.d.ts +44 -0
- package/dist/types/messages/core.d.ts +1 -1
- package/dist/types/messages/format.d.ts +17 -1
- package/dist/types/messages/index.d.ts +3 -0
- package/dist/types/messages/prune.d.ts +162 -1
- package/dist/types/messages/reducer.d.ts +18 -0
- package/dist/types/run.d.ts +12 -1
- package/dist/types/summarization/index.d.ts +20 -0
- package/dist/types/summarization/node.d.ts +29 -0
- package/dist/types/tools/ToolNode.d.ts +3 -1
- package/dist/types/types/graph.d.ts +44 -6
- package/dist/types/types/index.d.ts +1 -0
- package/dist/types/types/run.d.ts +30 -0
- package/dist/types/types/stream.d.ts +31 -4
- package/dist/types/types/summarize.d.ts +47 -0
- package/dist/types/types/tools.d.ts +7 -0
- package/dist/types/utils/errors.d.ts +28 -0
- package/dist/types/utils/events.d.ts +13 -0
- package/dist/types/utils/index.d.ts +2 -0
- package/dist/types/utils/llm.d.ts +4 -0
- package/dist/types/utils/tokens.d.ts +14 -1
- package/dist/types/utils/truncation.d.ts +49 -0
- package/package.json +3 -3
- package/src/agents/AgentContext.ts +388 -58
- package/src/agents/__tests__/AgentContext.test.ts +265 -5
- package/src/common/enum.ts +13 -0
- package/src/events.ts +9 -39
- package/src/graphs/Graph.ts +468 -331
- package/src/index.ts +7 -0
- package/src/llm/anthropic/llm.spec.ts +3 -3
- package/src/llm/anthropic/utils/message_inputs.ts +6 -4
- package/src/llm/bedrock/llm.spec.ts +1 -1
- package/src/llm/bedrock/utils/message_inputs.ts +6 -2
- package/src/llm/init.ts +63 -0
- package/src/llm/invoke.ts +144 -0
- package/src/llm/request.ts +55 -0
- package/src/messages/__tests__/observationMasking.test.ts +221 -0
- package/src/messages/cache.ts +77 -102
- package/src/messages/contextPruning.ts +191 -0
- package/src/messages/contextPruningSettings.ts +90 -0
- package/src/messages/core.ts +32 -53
- package/src/messages/ensureThinkingBlock.test.ts +39 -39
- package/src/messages/format.ts +227 -15
- package/src/messages/formatAgentMessages.test.ts +511 -1
- package/src/messages/index.ts +3 -0
- package/src/messages/prune.ts +1548 -62
- package/src/messages/reducer.ts +22 -0
- package/src/run.ts +104 -51
- package/src/scripts/bedrock-merge-test.ts +1 -1
- package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
- package/src/scripts/test-thinking-handoff.ts +1 -1
- package/src/scripts/thinking-bedrock.ts +1 -1
- package/src/scripts/thinking.ts +1 -1
- package/src/specs/anthropic.simple.test.ts +1 -1
- package/src/specs/multi-agent-summarization.test.ts +396 -0
- package/src/specs/prune.test.ts +1196 -23
- package/src/specs/summarization-unit.test.ts +868 -0
- package/src/specs/summarization.test.ts +3827 -0
- package/src/specs/summarize-prune.test.ts +376 -0
- package/src/specs/thinking-handoff.test.ts +10 -10
- package/src/specs/thinking-prune.test.ts +7 -4
- package/src/specs/token-accounting-e2e.test.ts +1034 -0
- package/src/specs/token-accounting-pipeline.test.ts +882 -0
- package/src/specs/token-distribution-edge-case.test.ts +25 -26
- package/src/splitStream.test.ts +42 -33
- package/src/stream.ts +64 -11
- package/src/summarization/__tests__/aggregator.test.ts +153 -0
- package/src/summarization/__tests__/node.test.ts +708 -0
- package/src/summarization/__tests__/trigger.test.ts +50 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/node.ts +982 -0
- package/src/tools/ToolNode.ts +25 -3
- package/src/types/graph.ts +62 -7
- package/src/types/index.ts +1 -0
- package/src/types/run.ts +32 -0
- package/src/types/stream.ts +45 -5
- package/src/types/summarize.ts +58 -0
- package/src/types/tools.ts +7 -0
- package/src/utils/errors.ts +117 -0
- package/src/utils/events.ts +31 -0
- package/src/utils/handlers.ts +18 -0
- package/src/utils/index.ts +2 -0
- package/src/utils/llm.ts +12 -0
- package/src/utils/tokens.ts +336 -18
- package/src/utils/truncation.ts +124 -0
- package/src/scripts/image.ts +0 -180
|
@@ -1,11 +1,29 @@
|
|
|
1
|
-
import { SystemMessage } from '@langchain/core/messages';
|
|
1
|
+
import { SystemMessage, HumanMessage } from '@langchain/core/messages';
|
|
2
2
|
import { RunnableLambda } from '@langchain/core/runnables';
|
|
3
3
|
import { createSchemaOnlyTools } from '../tools/schema.mjs';
|
|
4
|
+
import { addCacheControl } from '../messages/cache.mjs';
|
|
4
5
|
import { ContentTypes, Providers } from '../common/enum.mjs';
|
|
6
|
+
import '../messages/core.mjs';
|
|
7
|
+
import 'nanoid';
|
|
8
|
+
import { DEFAULT_RESERVE_RATIO } from '../messages/prune.mjs';
|
|
9
|
+
import '@langchain/core/callbacks/dispatch';
|
|
10
|
+
import 'uuid';
|
|
5
11
|
import { toJsonSchema } from '../utils/schema.mjs';
|
|
6
12
|
|
|
7
13
|
/* eslint-disable no-console */
|
|
8
|
-
|
|
14
|
+
/**
|
|
15
|
+
* Anthropic direct API tool schema overhead multiplier.
|
|
16
|
+
* Empirically calibrated against real MCP tool sets (29 tools).
|
|
17
|
+
* Accounts for Anthropic's internal XML-like tool encoding plus
|
|
18
|
+
* a ~300-token hidden tool-system preamble.
|
|
19
|
+
*/
|
|
20
|
+
const ANTHROPIC_TOOL_TOKEN_MULTIPLIER = 2.6;
|
|
21
|
+
/**
|
|
22
|
+
* Default tool schema overhead multiplier for all non-Anthropic providers.
|
|
23
|
+
* Covers OpenAI function-calling format, Bedrock, and other providers.
|
|
24
|
+
* Empirically calibrated at ~1.4× the raw JSON token count.
|
|
25
|
+
*/
|
|
26
|
+
const DEFAULT_TOOL_TOKEN_MULTIPLIER = 1.4;
|
|
9
27
|
/**
|
|
10
28
|
* Encapsulates agent-specific state that can vary between agents in a multi-agent system
|
|
11
29
|
*/
|
|
@@ -14,7 +32,7 @@ class AgentContext {
|
|
|
14
32
|
* Create an AgentContext from configuration with token accounting initialization
|
|
15
33
|
*/
|
|
16
34
|
static fromConfig(agentConfig, tokenCounter, indexTokenCountMap) {
|
|
17
|
-
const { agentId, name, provider, clientOptions, tools, toolMap, toolEnd, toolRegistry, toolDefinitions, instructions, additional_instructions, streamBuffer, maxContextTokens, reasoningKey, useLegacyContent, discoveredTools, } = agentConfig;
|
|
35
|
+
const { agentId, name, provider, clientOptions, tools, toolMap, toolEnd, toolRegistry, toolDefinitions, instructions, additional_instructions, streamBuffer, maxContextTokens, reasoningKey, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, initialSummary, contextPruningConfig, maxToolResultChars, } = agentConfig;
|
|
18
36
|
const agentContext = new AgentContext({
|
|
19
37
|
agentId,
|
|
20
38
|
name: name ?? agentId,
|
|
@@ -34,11 +52,15 @@ class AgentContext {
|
|
|
34
52
|
tokenCounter,
|
|
35
53
|
useLegacyContent,
|
|
36
54
|
discoveredTools,
|
|
55
|
+
summarizationEnabled,
|
|
56
|
+
summarizationConfig,
|
|
57
|
+
contextPruningConfig,
|
|
58
|
+
maxToolResultChars,
|
|
37
59
|
});
|
|
60
|
+
if (initialSummary?.text != null && initialSummary.text !== '') {
|
|
61
|
+
agentContext.setInitialSummary(initialSummary.text, initialSummary.tokenCount);
|
|
62
|
+
}
|
|
38
63
|
if (tokenCounter) {
|
|
39
|
-
// Initialize system runnable BEFORE async tool token calculation
|
|
40
|
-
// This ensures system message tokens are in instructionTokens before
|
|
41
|
-
// updateTokenMapWithInstructions is called
|
|
42
64
|
agentContext.initializeSystemRunnable();
|
|
43
65
|
const tokenMap = indexTokenCountMap || {};
|
|
44
66
|
agentContext.baseIndexTokenCountMap = { ...tokenMap };
|
|
@@ -46,7 +68,6 @@ class AgentContext {
|
|
|
46
68
|
agentContext.tokenCalculationPromise = agentContext
|
|
47
69
|
.calculateInstructionTokens(tokenCounter)
|
|
48
70
|
.then(() => {
|
|
49
|
-
// Update token map with instruction tokens (includes system + tool tokens)
|
|
50
71
|
agentContext.updateTokenMapWithInstructions(tokenMap);
|
|
51
72
|
})
|
|
52
73
|
.catch((err) => {
|
|
@@ -75,12 +96,39 @@ class AgentContext {
|
|
|
75
96
|
maxContextTokens;
|
|
76
97
|
/** Current usage metadata for this agent */
|
|
77
98
|
currentUsage;
|
|
99
|
+
/**
|
|
100
|
+
* Usage from the most recent LLM call only (not accumulated).
|
|
101
|
+
* Used for accurate provider calibration in pruning.
|
|
102
|
+
*/
|
|
103
|
+
lastCallUsage;
|
|
104
|
+
/**
|
|
105
|
+
* Whether totalTokens data is fresh (set true when provider usage arrives,
|
|
106
|
+
* false at the start of each turn before the LLM responds).
|
|
107
|
+
* Prevents stale token data from driving pruning/trigger decisions.
|
|
108
|
+
*/
|
|
109
|
+
totalTokensFresh = false;
|
|
110
|
+
/** Context pruning configuration. */
|
|
111
|
+
contextPruningConfig;
|
|
112
|
+
maxToolResultChars;
|
|
78
113
|
/** Prune messages function configured for this agent */
|
|
79
114
|
pruneMessages;
|
|
80
115
|
/** Token counter function for this agent */
|
|
81
116
|
tokenCounter;
|
|
82
|
-
/**
|
|
83
|
-
|
|
117
|
+
/** Token count for the system message (instructions text). */
|
|
118
|
+
systemMessageTokens = 0;
|
|
119
|
+
/** Token count for tool schemas only. */
|
|
120
|
+
toolSchemaTokens = 0;
|
|
121
|
+
/** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
|
|
122
|
+
calibrationRatio = 1;
|
|
123
|
+
/** Provider-observed instruction overhead from the pruner's best-variance turn. */
|
|
124
|
+
resolvedInstructionOverhead;
|
|
125
|
+
/** Pre-masking tool content keyed by message index, consumed by the summarize node. */
|
|
126
|
+
pendingOriginalToolContent;
|
|
127
|
+
/** Total instruction overhead: system message + tool schemas + pending summary. */
|
|
128
|
+
get instructionTokens() {
|
|
129
|
+
const summaryOverhead = this._summaryLocation === 'user_message' ? this.summaryTokenCount : 0;
|
|
130
|
+
return this.systemMessageTokens + this.toolSchemaTokens + summaryOverhead;
|
|
131
|
+
}
|
|
84
132
|
/** The amount of time that should pass before another consecutive API call */
|
|
85
133
|
streamBuffer;
|
|
86
134
|
/** Last stream call timestamp for rate limiting */
|
|
@@ -123,18 +171,47 @@ class AgentContext {
|
|
|
123
171
|
cachedSystemRunnable;
|
|
124
172
|
/** Whether system runnable needs rebuild (set when discovered tools change) */
|
|
125
173
|
systemRunnableStale = true;
|
|
126
|
-
/** Cached system message token count (separate from tool tokens) */
|
|
127
|
-
systemMessageTokens = 0;
|
|
128
174
|
/** Promise for token calculation initialization */
|
|
129
175
|
tokenCalculationPromise;
|
|
130
176
|
/** Format content blocks as strings (for legacy compatibility) */
|
|
131
177
|
useLegacyContent = false;
|
|
178
|
+
/** Enables graph-level summarization for this agent */
|
|
179
|
+
summarizationEnabled;
|
|
180
|
+
/** Summarization runtime settings used by graph pruning hooks */
|
|
181
|
+
summarizationConfig;
|
|
182
|
+
/** Current summary text produced by the summarize node, integrated into system message */
|
|
183
|
+
summaryText;
|
|
184
|
+
/** Token count of the current summary (tracked for token accounting) */
|
|
185
|
+
summaryTokenCount = 0;
|
|
186
|
+
/**
|
|
187
|
+
* Where the summary should be injected:
|
|
188
|
+
* - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
|
|
189
|
+
* - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
|
|
190
|
+
* - `'none'`: no summary present
|
|
191
|
+
*/
|
|
192
|
+
_summaryLocation = 'none';
|
|
193
|
+
/**
|
|
194
|
+
* Durable summary that survives reset() calls. Set from initialSummary
|
|
195
|
+
* during fromConfig() and updated by setSummary() so that the latest
|
|
196
|
+
* summary (whether cross-run or intra-run) is always restored after
|
|
197
|
+
* processStream's resetValues() cycle.
|
|
198
|
+
*/
|
|
199
|
+
_durableSummaryText;
|
|
200
|
+
_durableSummaryTokenCount = 0;
|
|
201
|
+
/** Number of summarization cycles that have occurred for this agent context */
|
|
202
|
+
_summaryVersion = 0;
|
|
203
|
+
/**
|
|
204
|
+
* Message count at the time summarization was last triggered.
|
|
205
|
+
* Used to prevent re-summarizing the same unchanged message set.
|
|
206
|
+
* Summarization is allowed to fire again only when new messages appear.
|
|
207
|
+
*/
|
|
208
|
+
_lastSummarizationMsgCount = 0;
|
|
132
209
|
/**
|
|
133
210
|
* Handoff context when this agent receives control via handoff.
|
|
134
211
|
* Contains source and parallel execution info for system message context.
|
|
135
212
|
*/
|
|
136
213
|
handoffContext;
|
|
137
|
-
constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, }) {
|
|
214
|
+
constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, contextPruningConfig, maxToolResultChars, }) {
|
|
138
215
|
this.agentId = agentId;
|
|
139
216
|
this.name = name;
|
|
140
217
|
this.provider = provider;
|
|
@@ -155,9 +232,13 @@ class AgentContext {
|
|
|
155
232
|
this.toolEnd = toolEnd;
|
|
156
233
|
}
|
|
157
234
|
if (instructionTokens !== undefined) {
|
|
158
|
-
this.
|
|
235
|
+
this.systemMessageTokens = instructionTokens;
|
|
159
236
|
}
|
|
160
237
|
this.useLegacyContent = useLegacyContent ?? false;
|
|
238
|
+
this.summarizationEnabled = summarizationEnabled;
|
|
239
|
+
this.summarizationConfig = summarizationConfig;
|
|
240
|
+
this.contextPruningConfig = contextPruningConfig;
|
|
241
|
+
this.maxToolResultChars = maxToolResultChars;
|
|
161
242
|
if (discoveredTools && discoveredTools.length > 0) {
|
|
162
243
|
for (const toolName of discoveredTools) {
|
|
163
244
|
this.discoveredToolNames.add(toolName);
|
|
@@ -183,7 +264,6 @@ class AgentContext {
|
|
|
183
264
|
!allowedCallers.includes('direct');
|
|
184
265
|
if (!isCodeExecutionOnly)
|
|
185
266
|
continue;
|
|
186
|
-
// Include if: not deferred OR deferred but discovered
|
|
187
267
|
const isDeferred = toolDef.defer_loading === true;
|
|
188
268
|
const isDiscovered = this.discoveredToolNames.has(name);
|
|
189
269
|
if (!isDeferred || isDiscovered) {
|
|
@@ -215,11 +295,9 @@ class AgentContext {
|
|
|
215
295
|
* Only rebuilds when marked stale (via markToolsAsDiscovered).
|
|
216
296
|
*/
|
|
217
297
|
get systemRunnable() {
|
|
218
|
-
// Return cached if not stale
|
|
219
298
|
if (!this.systemRunnableStale && this.cachedSystemRunnable !== undefined) {
|
|
220
299
|
return this.cachedSystemRunnable;
|
|
221
300
|
}
|
|
222
|
-
// Stale or first access - rebuild
|
|
223
301
|
const instructionsString = this.buildInstructionsString();
|
|
224
302
|
this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
|
|
225
303
|
this.systemRunnableStale = false;
|
|
@@ -242,25 +320,29 @@ class AgentContext {
|
|
|
242
320
|
*/
|
|
243
321
|
buildInstructionsString() {
|
|
244
322
|
const parts = [];
|
|
245
|
-
/** Build agent identity and handoff context preamble */
|
|
246
323
|
const identityPreamble = this.buildIdentityPreamble();
|
|
247
324
|
if (identityPreamble) {
|
|
248
325
|
parts.push(identityPreamble);
|
|
249
326
|
}
|
|
250
|
-
/** Add main instructions */
|
|
251
327
|
if (this.instructions != null && this.instructions !== '') {
|
|
252
328
|
parts.push(this.instructions);
|
|
253
329
|
}
|
|
254
|
-
/** Add additional instructions */
|
|
255
330
|
if (this.additionalInstructions != null &&
|
|
256
331
|
this.additionalInstructions !== '') {
|
|
257
332
|
parts.push(this.additionalInstructions);
|
|
258
333
|
}
|
|
259
|
-
/** Add programmatic tools documentation */
|
|
260
334
|
const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
|
|
261
335
|
if (programmaticToolsDoc) {
|
|
262
336
|
parts.push(programmaticToolsDoc);
|
|
263
337
|
}
|
|
338
|
+
// Cross-run summary: include in system prompt so the model has context
|
|
339
|
+
// from the prior run. Mid-run summaries are injected as a HumanMessage
|
|
340
|
+
// on the post-compaction clean slate instead (see buildSystemRunnable).
|
|
341
|
+
if (this._summaryLocation === 'system_prompt' &&
|
|
342
|
+
this.summaryText != null &&
|
|
343
|
+
this.summaryText !== '') {
|
|
344
|
+
parts.push('## Conversation Summary\n\n' + this.summaryText);
|
|
345
|
+
}
|
|
264
346
|
return parts.join('\n\n');
|
|
265
347
|
}
|
|
266
348
|
/**
|
|
@@ -287,17 +369,19 @@ class AgentContext {
|
|
|
287
369
|
* Only called when content has actually changed.
|
|
288
370
|
*/
|
|
289
371
|
buildSystemRunnable(instructionsString) {
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
this.
|
|
372
|
+
const hasMidRunSummary = this._summaryLocation === 'user_message' &&
|
|
373
|
+
this.summaryText != null &&
|
|
374
|
+
this.summaryText !== '';
|
|
375
|
+
if (!instructionsString && !hasMidRunSummary) {
|
|
293
376
|
this.systemMessageTokens = 0;
|
|
294
377
|
return undefined;
|
|
295
378
|
}
|
|
296
379
|
let finalInstructions = instructionsString;
|
|
297
|
-
|
|
380
|
+
let usePromptCache = false;
|
|
298
381
|
if (this.provider === Providers.ANTHROPIC) {
|
|
299
382
|
const anthropicOptions = this.clientOptions;
|
|
300
383
|
if (anthropicOptions?.promptCache === true) {
|
|
384
|
+
usePromptCache = true;
|
|
301
385
|
finalInstructions = {
|
|
302
386
|
content: [
|
|
303
387
|
{
|
|
@@ -309,23 +393,56 @@ class AgentContext {
|
|
|
309
393
|
};
|
|
310
394
|
}
|
|
311
395
|
}
|
|
312
|
-
const systemMessage =
|
|
313
|
-
|
|
396
|
+
const systemMessage = instructionsString
|
|
397
|
+
? new SystemMessage(finalInstructions)
|
|
398
|
+
: undefined;
|
|
314
399
|
if (this.tokenCounter) {
|
|
315
|
-
this.
|
|
316
|
-
|
|
317
|
-
|
|
400
|
+
this.systemMessageTokens = systemMessage
|
|
401
|
+
? this.tokenCounter(systemMessage)
|
|
402
|
+
: 0;
|
|
318
403
|
}
|
|
319
404
|
return RunnableLambda.from((messages) => {
|
|
320
|
-
|
|
405
|
+
const prefix = systemMessage ? [systemMessage] : [];
|
|
406
|
+
// Build the non-system portion (summary + conversation), then apply
|
|
407
|
+
// cache markers separately so addCacheControl doesn't strip the
|
|
408
|
+
// SystemMessage's own cache_control breakpoint set above.
|
|
409
|
+
const hasSummaryBody = this._summaryLocation === 'user_message' &&
|
|
410
|
+
this.summaryText != null &&
|
|
411
|
+
this.summaryText !== '';
|
|
412
|
+
let body;
|
|
413
|
+
if (hasSummaryBody) {
|
|
414
|
+
const wrappedSummary = '<summary>\n' +
|
|
415
|
+
this.summaryText +
|
|
416
|
+
'\n</summary>\n\n' +
|
|
417
|
+
'This is your own checkpoint: you wrote it to preserve context after compaction. Pick up where you left off based on the summary above. Do not repeat prior tasks, information or acknowledge this checkpoint message directly.';
|
|
418
|
+
const summaryMsg = usePromptCache
|
|
419
|
+
? new HumanMessage({
|
|
420
|
+
content: [
|
|
421
|
+
{
|
|
422
|
+
type: 'text',
|
|
423
|
+
text: wrappedSummary,
|
|
424
|
+
cache_control: { type: 'ephemeral' },
|
|
425
|
+
},
|
|
426
|
+
],
|
|
427
|
+
})
|
|
428
|
+
: new HumanMessage(wrappedSummary);
|
|
429
|
+
body = [summaryMsg, ...messages];
|
|
430
|
+
}
|
|
431
|
+
else {
|
|
432
|
+
body = messages;
|
|
433
|
+
}
|
|
434
|
+
if (usePromptCache && body.length >= 2) {
|
|
435
|
+
body = addCacheControl(body);
|
|
436
|
+
}
|
|
437
|
+
return [...prefix, ...body];
|
|
321
438
|
}).withConfig({ runName: 'prompt' });
|
|
322
439
|
}
|
|
323
440
|
/**
|
|
324
441
|
* Reset context for a new run
|
|
325
442
|
*/
|
|
326
443
|
reset() {
|
|
327
|
-
this.instructionTokens = 0;
|
|
328
444
|
this.systemMessageTokens = 0;
|
|
445
|
+
this.toolSchemaTokens = 0;
|
|
329
446
|
this.cachedSystemRunnable = undefined;
|
|
330
447
|
this.systemRunnableStale = true;
|
|
331
448
|
this.lastToken = undefined;
|
|
@@ -338,6 +455,11 @@ class AgentContext {
|
|
|
338
455
|
this.currentTokenType = ContentTypes.TEXT;
|
|
339
456
|
this.discoveredToolNames.clear();
|
|
340
457
|
this.handoffContext = undefined;
|
|
458
|
+
this.summaryText = this._durableSummaryText;
|
|
459
|
+
this.summaryTokenCount = this._durableSummaryTokenCount;
|
|
460
|
+
this._lastSummarizationMsgCount = 0;
|
|
461
|
+
this.lastCallUsage = undefined;
|
|
462
|
+
this.totalTokensFresh = false;
|
|
341
463
|
if (this.tokenCounter) {
|
|
342
464
|
this.initializeSystemRunnable();
|
|
343
465
|
const baseTokenMap = { ...this.baseIndexTokenCountMap };
|
|
@@ -355,24 +477,21 @@ class AgentContext {
|
|
|
355
477
|
}
|
|
356
478
|
}
|
|
357
479
|
/**
|
|
358
|
-
* Update the token count map
|
|
480
|
+
* Update the token count map from a base map.
|
|
481
|
+
*
|
|
482
|
+
* Previously this inflated index 0 with instructionTokens to indirectly
|
|
483
|
+
* reserve budget for the system prompt. That approach was imprecise: with
|
|
484
|
+
* large tool-schema overhead (e.g. 26 MCP tools ~5 000 tokens) the first
|
|
485
|
+
* conversation message appeared enormous and was always pruned, while the
|
|
486
|
+
* real available budget was never explicitly computed.
|
|
487
|
+
*
|
|
488
|
+
* Now instruction tokens are passed to getMessagesWithinTokenLimit via
|
|
489
|
+
* the `getInstructionTokens` factory param so the pruner subtracts them
|
|
490
|
+
* from the budget directly. The token map contains only real per-message
|
|
491
|
+
* token counts.
|
|
359
492
|
*/
|
|
360
493
|
updateTokenMapWithInstructions(baseTokenMap) {
|
|
361
|
-
|
|
362
|
-
// Shift all indices by the instruction token count
|
|
363
|
-
const shiftedMap = {};
|
|
364
|
-
for (const [key, value] of Object.entries(baseTokenMap)) {
|
|
365
|
-
const index = parseInt(key, 10);
|
|
366
|
-
if (!isNaN(index)) {
|
|
367
|
-
shiftedMap[String(index)] =
|
|
368
|
-
value + (index === 0 ? this.instructionTokens : 0);
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
this.indexTokenCountMap = shiftedMap;
|
|
372
|
-
}
|
|
373
|
-
else {
|
|
374
|
-
this.indexTokenCountMap = { ...baseTokenMap };
|
|
375
|
-
}
|
|
494
|
+
this.indexTokenCountMap = { ...baseTokenMap };
|
|
376
495
|
}
|
|
377
496
|
/**
|
|
378
497
|
* Calculate tool tokens and add to instruction tokens
|
|
@@ -380,11 +499,7 @@ class AgentContext {
|
|
|
380
499
|
*/
|
|
381
500
|
async calculateInstructionTokens(tokenCounter) {
|
|
382
501
|
let toolTokens = 0;
|
|
383
|
-
// Track names to avoid double-counting when a tool appears in both
|
|
384
|
-
// this.tools (bound StructuredTool instances) and this.toolDefinitions
|
|
385
|
-
// (MCP / event-driven schemas).
|
|
386
502
|
const countedToolNames = new Set();
|
|
387
|
-
// Count tokens for bound tools (StructuredTool instances with .schema)
|
|
388
503
|
if (this.tools && this.tools.length > 0) {
|
|
389
504
|
for (const tool of this.tools) {
|
|
390
505
|
const genericTool = tool;
|
|
@@ -399,23 +514,29 @@ class AgentContext {
|
|
|
399
514
|
}
|
|
400
515
|
}
|
|
401
516
|
}
|
|
402
|
-
// Count tokens for tool definitions (MCP / event-driven tools).
|
|
403
|
-
// These are sent to the provider API as tool schemas alongside bound tools.
|
|
404
|
-
// Both can be populated simultaneously (graph tools + MCP tools).
|
|
405
517
|
if (this.toolDefinitions && this.toolDefinitions.length > 0) {
|
|
406
518
|
for (const def of this.toolDefinitions) {
|
|
407
519
|
if (countedToolNames.has(def.name)) {
|
|
408
|
-
continue;
|
|
520
|
+
continue;
|
|
409
521
|
}
|
|
410
522
|
const schema = {
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
523
|
+
type: 'function',
|
|
524
|
+
function: {
|
|
525
|
+
name: def.name,
|
|
526
|
+
description: def.description ?? '',
|
|
527
|
+
parameters: def.parameters ?? {},
|
|
528
|
+
},
|
|
414
529
|
};
|
|
415
530
|
toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
|
|
416
531
|
}
|
|
417
532
|
}
|
|
418
|
-
this.
|
|
533
|
+
const isAnthropic = this.provider !== Providers.BEDROCK &&
|
|
534
|
+
(this.provider === Providers.ANTHROPIC ||
|
|
535
|
+
/anthropic|claude/i.test(String(this.clientOptions?.model ?? '')));
|
|
536
|
+
const toolTokenMultiplier = isAnthropic
|
|
537
|
+
? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
|
|
538
|
+
: DEFAULT_TOOL_TOKEN_MULTIPLIER;
|
|
539
|
+
this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
|
|
419
540
|
}
|
|
420
541
|
/**
|
|
421
542
|
* Gets the tool registry for deferred tools (for tool search).
|
|
@@ -455,6 +576,151 @@ class AgentContext {
|
|
|
455
576
|
this.systemRunnableStale = true;
|
|
456
577
|
}
|
|
457
578
|
}
|
|
579
|
+
setSummary(text, tokenCount) {
|
|
580
|
+
this.summaryText = text;
|
|
581
|
+
this.summaryTokenCount = tokenCount;
|
|
582
|
+
this._summaryLocation = 'user_message';
|
|
583
|
+
this._durableSummaryText = text;
|
|
584
|
+
this._durableSummaryTokenCount = tokenCount;
|
|
585
|
+
this._summaryVersion += 1;
|
|
586
|
+
this.systemRunnableStale = true;
|
|
587
|
+
this.pruneMessages = undefined;
|
|
588
|
+
}
|
|
589
|
+
/** Sets a cross-run summary that is injected into the system prompt. */
|
|
590
|
+
setInitialSummary(text, tokenCount) {
|
|
591
|
+
this.summaryText = text;
|
|
592
|
+
this.summaryTokenCount = tokenCount;
|
|
593
|
+
this._summaryLocation = 'system_prompt';
|
|
594
|
+
this._durableSummaryText = text;
|
|
595
|
+
this._durableSummaryTokenCount = tokenCount;
|
|
596
|
+
this._summaryVersion += 1;
|
|
597
|
+
this.systemRunnableStale = true;
|
|
598
|
+
}
|
|
599
|
+
/**
|
|
600
|
+
* Replaces the indexTokenCountMap with a fresh map keyed to the surviving
|
|
601
|
+
* context messages after summarization. Called by the summarize node after
|
|
602
|
+
* it emits RemoveMessage operations that shift message indices.
|
|
603
|
+
*/
|
|
604
|
+
rebuildTokenMapAfterSummarization(newTokenMap) {
|
|
605
|
+
this.indexTokenCountMap = newTokenMap;
|
|
606
|
+
this.baseIndexTokenCountMap = { ...newTokenMap };
|
|
607
|
+
this._lastSummarizationMsgCount = Object.keys(newTokenMap).length;
|
|
608
|
+
this.currentUsage = undefined;
|
|
609
|
+
this.lastCallUsage = undefined;
|
|
610
|
+
this.totalTokensFresh = false;
|
|
611
|
+
}
|
|
612
|
+
hasSummary() {
|
|
613
|
+
return this.summaryText != null && this.summaryText !== '';
|
|
614
|
+
}
|
|
615
|
+
/** True when a mid-run compaction summary is ready to be injected as a HumanMessage. */
|
|
616
|
+
hasPendingCompactionSummary() {
|
|
617
|
+
return this._summaryLocation === 'user_message' && this.hasSummary();
|
|
618
|
+
}
|
|
619
|
+
getSummaryText() {
|
|
620
|
+
return this.summaryText;
|
|
621
|
+
}
|
|
622
|
+
get summaryVersion() {
|
|
623
|
+
return this._summaryVersion;
|
|
624
|
+
}
|
|
625
|
+
/**
|
|
626
|
+
* Returns true when the message count hasn't changed since the last
|
|
627
|
+
* summarization — re-summarizing would produce an identical result.
|
|
628
|
+
* Oversized individual messages are handled by fit-to-budget truncation
|
|
629
|
+
* in the pruner, which keeps them in context without triggering overflow.
|
|
630
|
+
*/
|
|
631
|
+
shouldSkipSummarization(currentMsgCount) {
|
|
632
|
+
return (this._lastSummarizationMsgCount > 0 &&
|
|
633
|
+
currentMsgCount <= this._lastSummarizationMsgCount);
|
|
634
|
+
}
|
|
635
|
+
/**
|
|
636
|
+
* Records the message count at which summarization was triggered,
|
|
637
|
+
* so subsequent calls with the same count are suppressed.
|
|
638
|
+
*/
|
|
639
|
+
markSummarizationTriggered(msgCount) {
|
|
640
|
+
this._lastSummarizationMsgCount = msgCount;
|
|
641
|
+
}
|
|
642
|
+
clearSummary() {
|
|
643
|
+
if (this.summaryText != null) {
|
|
644
|
+
this.summaryText = undefined;
|
|
645
|
+
this.summaryTokenCount = 0;
|
|
646
|
+
this._durableSummaryText = undefined;
|
|
647
|
+
this._durableSummaryTokenCount = 0;
|
|
648
|
+
this._summaryLocation = 'none';
|
|
649
|
+
this.systemRunnableStale = true;
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
/**
|
|
653
|
+
* Returns a structured breakdown of how the context token budget is consumed.
|
|
654
|
+
* Useful for diagnostics when context overflow or pruning issues occur.
|
|
655
|
+
*/
|
|
656
|
+
getTokenBudgetBreakdown(messages) {
|
|
657
|
+
const maxContextTokens = this.maxContextTokens ?? 0;
|
|
658
|
+
const toolCount = (this.tools?.length ?? 0) + (this.toolDefinitions?.length ?? 0);
|
|
659
|
+
const messageCount = messages?.length ?? 0;
|
|
660
|
+
let messageTokens = 0;
|
|
661
|
+
if (messages != null) {
|
|
662
|
+
for (let i = 0; i < messages.length; i++) {
|
|
663
|
+
messageTokens +=
|
|
664
|
+
this.indexTokenCountMap[i] ?? 0;
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
const reserveTokens = Math.round(maxContextTokens * DEFAULT_RESERVE_RATIO);
|
|
668
|
+
const availableForMessages = Math.max(0, maxContextTokens - reserveTokens - this.instructionTokens);
|
|
669
|
+
return {
|
|
670
|
+
maxContextTokens,
|
|
671
|
+
instructionTokens: this.instructionTokens,
|
|
672
|
+
systemMessageTokens: this.systemMessageTokens,
|
|
673
|
+
toolSchemaTokens: this.toolSchemaTokens,
|
|
674
|
+
summaryTokens: this.summaryTokenCount,
|
|
675
|
+
toolCount,
|
|
676
|
+
messageCount,
|
|
677
|
+
messageTokens,
|
|
678
|
+
availableForMessages,
|
|
679
|
+
};
|
|
680
|
+
}
|
|
681
|
+
/**
|
|
682
|
+
* Returns a human-readable string of the token budget breakdown
|
|
683
|
+
* for inclusion in error messages and diagnostics.
|
|
684
|
+
*/
|
|
685
|
+
formatTokenBudgetBreakdown(messages) {
|
|
686
|
+
const b = this.getTokenBudgetBreakdown(messages);
|
|
687
|
+
const lines = [
|
|
688
|
+
'Token budget breakdown:',
|
|
689
|
+
` maxContextTokens: ${b.maxContextTokens}`,
|
|
690
|
+
` instructionTokens: ${b.instructionTokens} (system: ${b.systemMessageTokens}, tools: ${b.toolSchemaTokens} [${b.toolCount} tools])`,
|
|
691
|
+
` summaryTokens: ${b.summaryTokens}`,
|
|
692
|
+
` messageTokens: ${b.messageTokens} (${b.messageCount} messages)`,
|
|
693
|
+
` availableForMessages: ${b.availableForMessages}`,
|
|
694
|
+
];
|
|
695
|
+
return lines.join('\n');
|
|
696
|
+
}
|
|
697
|
+
/**
|
|
698
|
+
* Updates the last-call usage with data from the most recent LLM response.
|
|
699
|
+
* Unlike `currentUsage` which accumulates, this captures only the single call.
|
|
700
|
+
*/
|
|
701
|
+
updateLastCallUsage(usage) {
|
|
702
|
+
const baseInputTokens = Number(usage.input_tokens) || 0;
|
|
703
|
+
const cacheCreation = Number(usage.input_token_details?.cache_creation) || 0;
|
|
704
|
+
const cacheRead = Number(usage.input_token_details?.cache_read) || 0;
|
|
705
|
+
const outputTokens = Number(usage.output_tokens) || 0;
|
|
706
|
+
const cacheSum = cacheCreation + cacheRead;
|
|
707
|
+
const cacheIsAdditive = cacheSum > 0 && cacheSum > baseInputTokens;
|
|
708
|
+
const totalInputTokens = cacheIsAdditive
|
|
709
|
+
? baseInputTokens + cacheSum
|
|
710
|
+
: baseInputTokens;
|
|
711
|
+
this.lastCallUsage = {
|
|
712
|
+
inputTokens: totalInputTokens,
|
|
713
|
+
outputTokens,
|
|
714
|
+
totalTokens: totalInputTokens + outputTokens,
|
|
715
|
+
cacheRead: cacheRead || undefined,
|
|
716
|
+
cacheCreation: cacheCreation || undefined,
|
|
717
|
+
};
|
|
718
|
+
this.totalTokensFresh = true;
|
|
719
|
+
}
|
|
720
|
+
/** Marks token data as stale before a new LLM call. */
|
|
721
|
+
markTokensStale() {
|
|
722
|
+
this.totalTokensFresh = false;
|
|
723
|
+
}
|
|
458
724
|
/**
|
|
459
725
|
* Marks tools as discovered via tool search.
|
|
460
726
|
* Discovered tools will be included in the next model binding.
|
|
@@ -484,11 +750,9 @@ class AgentContext {
|
|
|
484
750
|
* @returns Array of tools to bind to model
|
|
485
751
|
*/
|
|
486
752
|
getToolsForBinding() {
|
|
487
|
-
/** Event-driven mode: create schema-only tools from definitions */
|
|
488
753
|
if (this.toolDefinitions && this.toolDefinitions.length > 0) {
|
|
489
754
|
return this.getEventDrivenToolsForBinding();
|
|
490
755
|
}
|
|
491
|
-
/** Traditional mode: filter actual tool instances */
|
|
492
756
|
const filtered = !this.tools || !this.toolRegistry
|
|
493
757
|
? this.tools
|
|
494
758
|
: this.filterToolsForBinding(this.tools);
|