@librechat/agents 3.1.56 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +2 -2
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -1,11 +1,29 @@
1
- import { SystemMessage } from '@langchain/core/messages';
1
+ import { SystemMessage, HumanMessage } from '@langchain/core/messages';
2
2
  import { RunnableLambda } from '@langchain/core/runnables';
3
3
  import { createSchemaOnlyTools } from '../tools/schema.mjs';
4
+ import { addCacheControl } from '../messages/cache.mjs';
4
5
  import { ContentTypes, Providers } from '../common/enum.mjs';
6
+ import '../messages/core.mjs';
7
+ import 'nanoid';
8
+ import { DEFAULT_RESERVE_RATIO } from '../messages/prune.mjs';
9
+ import '@langchain/core/callbacks/dispatch';
10
+ import 'uuid';
5
11
  import { toJsonSchema } from '../utils/schema.mjs';
6
12
 
7
13
  /* eslint-disable no-console */
8
- // src/agents/AgentContext.ts
14
+ /**
15
+ * Anthropic direct API tool schema overhead multiplier.
16
+ * Empirically calibrated against real MCP tool sets (29 tools).
17
+ * Accounts for Anthropic's internal XML-like tool encoding plus
18
+ * a ~300-token hidden tool-system preamble.
19
+ */
20
+ const ANTHROPIC_TOOL_TOKEN_MULTIPLIER = 2.6;
21
+ /**
22
+ * Default tool schema overhead multiplier for all non-Anthropic providers.
23
+ * Covers OpenAI function-calling format, Bedrock, and other providers.
24
+ * Empirically calibrated at ~1.4× the raw JSON token count.
25
+ */
26
+ const DEFAULT_TOOL_TOKEN_MULTIPLIER = 1.4;
9
27
  /**
10
28
  * Encapsulates agent-specific state that can vary between agents in a multi-agent system
11
29
  */
@@ -14,7 +32,7 @@ class AgentContext {
14
32
  * Create an AgentContext from configuration with token accounting initialization
15
33
  */
16
34
  static fromConfig(agentConfig, tokenCounter, indexTokenCountMap) {
17
- const { agentId, name, provider, clientOptions, tools, toolMap, toolEnd, toolRegistry, toolDefinitions, instructions, additional_instructions, streamBuffer, maxContextTokens, reasoningKey, useLegacyContent, discoveredTools, } = agentConfig;
35
+ const { agentId, name, provider, clientOptions, tools, toolMap, toolEnd, toolRegistry, toolDefinitions, instructions, additional_instructions, streamBuffer, maxContextTokens, reasoningKey, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, initialSummary, contextPruningConfig, maxToolResultChars, } = agentConfig;
18
36
  const agentContext = new AgentContext({
19
37
  agentId,
20
38
  name: name ?? agentId,
@@ -34,11 +52,15 @@ class AgentContext {
34
52
  tokenCounter,
35
53
  useLegacyContent,
36
54
  discoveredTools,
55
+ summarizationEnabled,
56
+ summarizationConfig,
57
+ contextPruningConfig,
58
+ maxToolResultChars,
37
59
  });
60
+ if (initialSummary?.text != null && initialSummary.text !== '') {
61
+ agentContext.setInitialSummary(initialSummary.text, initialSummary.tokenCount);
62
+ }
38
63
  if (tokenCounter) {
39
- // Initialize system runnable BEFORE async tool token calculation
40
- // This ensures system message tokens are in instructionTokens before
41
- // updateTokenMapWithInstructions is called
42
64
  agentContext.initializeSystemRunnable();
43
65
  const tokenMap = indexTokenCountMap || {};
44
66
  agentContext.baseIndexTokenCountMap = { ...tokenMap };
@@ -46,7 +68,6 @@ class AgentContext {
46
68
  agentContext.tokenCalculationPromise = agentContext
47
69
  .calculateInstructionTokens(tokenCounter)
48
70
  .then(() => {
49
- // Update token map with instruction tokens (includes system + tool tokens)
50
71
  agentContext.updateTokenMapWithInstructions(tokenMap);
51
72
  })
52
73
  .catch((err) => {
@@ -75,12 +96,39 @@ class AgentContext {
75
96
  maxContextTokens;
76
97
  /** Current usage metadata for this agent */
77
98
  currentUsage;
99
+ /**
100
+ * Usage from the most recent LLM call only (not accumulated).
101
+ * Used for accurate provider calibration in pruning.
102
+ */
103
+ lastCallUsage;
104
+ /**
105
+ * Whether totalTokens data is fresh (set true when provider usage arrives,
106
+ * false at the start of each turn before the LLM responds).
107
+ * Prevents stale token data from driving pruning/trigger decisions.
108
+ */
109
+ totalTokensFresh = false;
110
+ /** Context pruning configuration. */
111
+ contextPruningConfig;
112
+ maxToolResultChars;
78
113
  /** Prune messages function configured for this agent */
79
114
  pruneMessages;
80
115
  /** Token counter function for this agent */
81
116
  tokenCounter;
82
- /** Instructions/system message token count */
83
- instructionTokens = 0;
117
+ /** Token count for the system message (instructions text). */
118
+ systemMessageTokens = 0;
119
+ /** Token count for tool schemas only. */
120
+ toolSchemaTokens = 0;
121
+ /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
122
+ calibrationRatio = 1;
123
+ /** Provider-observed instruction overhead from the pruner's best-variance turn. */
124
+ resolvedInstructionOverhead;
125
+ /** Pre-masking tool content keyed by message index, consumed by the summarize node. */
126
+ pendingOriginalToolContent;
127
+ /** Total instruction overhead: system message + tool schemas + pending summary. */
128
+ get instructionTokens() {
129
+ const summaryOverhead = this._summaryLocation === 'user_message' ? this.summaryTokenCount : 0;
130
+ return this.systemMessageTokens + this.toolSchemaTokens + summaryOverhead;
131
+ }
84
132
  /** The amount of time that should pass before another consecutive API call */
85
133
  streamBuffer;
86
134
  /** Last stream call timestamp for rate limiting */
@@ -123,18 +171,47 @@ class AgentContext {
123
171
  cachedSystemRunnable;
124
172
  /** Whether system runnable needs rebuild (set when discovered tools change) */
125
173
  systemRunnableStale = true;
126
- /** Cached system message token count (separate from tool tokens) */
127
- systemMessageTokens = 0;
128
174
  /** Promise for token calculation initialization */
129
175
  tokenCalculationPromise;
130
176
  /** Format content blocks as strings (for legacy compatibility) */
131
177
  useLegacyContent = false;
178
+ /** Enables graph-level summarization for this agent */
179
+ summarizationEnabled;
180
+ /** Summarization runtime settings used by graph pruning hooks */
181
+ summarizationConfig;
182
+ /** Current summary text produced by the summarize node, integrated into system message */
183
+ summaryText;
184
+ /** Token count of the current summary (tracked for token accounting) */
185
+ summaryTokenCount = 0;
186
+ /**
187
+ * Where the summary should be injected:
188
+ * - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
189
+ * - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
190
+ * - `'none'`: no summary present
191
+ */
192
+ _summaryLocation = 'none';
193
+ /**
194
+ * Durable summary that survives reset() calls. Set from initialSummary
195
+ * during fromConfig() and updated by setSummary() so that the latest
196
+ * summary (whether cross-run or intra-run) is always restored after
197
+ * processStream's resetValues() cycle.
198
+ */
199
+ _durableSummaryText;
200
+ _durableSummaryTokenCount = 0;
201
+ /** Number of summarization cycles that have occurred for this agent context */
202
+ _summaryVersion = 0;
203
+ /**
204
+ * Message count at the time summarization was last triggered.
205
+ * Used to prevent re-summarizing the same unchanged message set.
206
+ * Summarization is allowed to fire again only when new messages appear.
207
+ */
208
+ _lastSummarizationMsgCount = 0;
132
209
  /**
133
210
  * Handoff context when this agent receives control via handoff.
134
211
  * Contains source and parallel execution info for system message context.
135
212
  */
136
213
  handoffContext;
137
- constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, }) {
214
+ constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, contextPruningConfig, maxToolResultChars, }) {
138
215
  this.agentId = agentId;
139
216
  this.name = name;
140
217
  this.provider = provider;
@@ -155,9 +232,13 @@ class AgentContext {
155
232
  this.toolEnd = toolEnd;
156
233
  }
157
234
  if (instructionTokens !== undefined) {
158
- this.instructionTokens = instructionTokens;
235
+ this.systemMessageTokens = instructionTokens;
159
236
  }
160
237
  this.useLegacyContent = useLegacyContent ?? false;
238
+ this.summarizationEnabled = summarizationEnabled;
239
+ this.summarizationConfig = summarizationConfig;
240
+ this.contextPruningConfig = contextPruningConfig;
241
+ this.maxToolResultChars = maxToolResultChars;
161
242
  if (discoveredTools && discoveredTools.length > 0) {
162
243
  for (const toolName of discoveredTools) {
163
244
  this.discoveredToolNames.add(toolName);
@@ -183,7 +264,6 @@ class AgentContext {
183
264
  !allowedCallers.includes('direct');
184
265
  if (!isCodeExecutionOnly)
185
266
  continue;
186
- // Include if: not deferred OR deferred but discovered
187
267
  const isDeferred = toolDef.defer_loading === true;
188
268
  const isDiscovered = this.discoveredToolNames.has(name);
189
269
  if (!isDeferred || isDiscovered) {
@@ -215,11 +295,9 @@ class AgentContext {
215
295
  * Only rebuilds when marked stale (via markToolsAsDiscovered).
216
296
  */
217
297
  get systemRunnable() {
218
- // Return cached if not stale
219
298
  if (!this.systemRunnableStale && this.cachedSystemRunnable !== undefined) {
220
299
  return this.cachedSystemRunnable;
221
300
  }
222
- // Stale or first access - rebuild
223
301
  const instructionsString = this.buildInstructionsString();
224
302
  this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
225
303
  this.systemRunnableStale = false;
@@ -242,25 +320,29 @@ class AgentContext {
242
320
  */
243
321
  buildInstructionsString() {
244
322
  const parts = [];
245
- /** Build agent identity and handoff context preamble */
246
323
  const identityPreamble = this.buildIdentityPreamble();
247
324
  if (identityPreamble) {
248
325
  parts.push(identityPreamble);
249
326
  }
250
- /** Add main instructions */
251
327
  if (this.instructions != null && this.instructions !== '') {
252
328
  parts.push(this.instructions);
253
329
  }
254
- /** Add additional instructions */
255
330
  if (this.additionalInstructions != null &&
256
331
  this.additionalInstructions !== '') {
257
332
  parts.push(this.additionalInstructions);
258
333
  }
259
- /** Add programmatic tools documentation */
260
334
  const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
261
335
  if (programmaticToolsDoc) {
262
336
  parts.push(programmaticToolsDoc);
263
337
  }
338
+ // Cross-run summary: include in system prompt so the model has context
339
+ // from the prior run. Mid-run summaries are injected as a HumanMessage
340
+ // on the post-compaction clean slate instead (see buildSystemRunnable).
341
+ if (this._summaryLocation === 'system_prompt' &&
342
+ this.summaryText != null &&
343
+ this.summaryText !== '') {
344
+ parts.push('## Conversation Summary\n\n' + this.summaryText);
345
+ }
264
346
  return parts.join('\n\n');
265
347
  }
266
348
  /**
@@ -287,17 +369,19 @@ class AgentContext {
287
369
  * Only called when content has actually changed.
288
370
  */
289
371
  buildSystemRunnable(instructionsString) {
290
- if (!instructionsString) {
291
- // Remove previous tokens if we had a system message before
292
- this.instructionTokens -= this.systemMessageTokens;
372
+ const hasMidRunSummary = this._summaryLocation === 'user_message' &&
373
+ this.summaryText != null &&
374
+ this.summaryText !== '';
375
+ if (!instructionsString && !hasMidRunSummary) {
293
376
  this.systemMessageTokens = 0;
294
377
  return undefined;
295
378
  }
296
379
  let finalInstructions = instructionsString;
297
- // Handle Anthropic prompt caching
380
+ let usePromptCache = false;
298
381
  if (this.provider === Providers.ANTHROPIC) {
299
382
  const anthropicOptions = this.clientOptions;
300
383
  if (anthropicOptions?.promptCache === true) {
384
+ usePromptCache = true;
301
385
  finalInstructions = {
302
386
  content: [
303
387
  {
@@ -309,23 +393,56 @@ class AgentContext {
309
393
  };
310
394
  }
311
395
  }
312
- const systemMessage = new SystemMessage(finalInstructions);
313
- // Update token counts (subtract old, add new)
396
+ const systemMessage = instructionsString
397
+ ? new SystemMessage(finalInstructions)
398
+ : undefined;
314
399
  if (this.tokenCounter) {
315
- this.instructionTokens -= this.systemMessageTokens;
316
- this.systemMessageTokens = this.tokenCounter(systemMessage);
317
- this.instructionTokens += this.systemMessageTokens;
400
+ this.systemMessageTokens = systemMessage
401
+ ? this.tokenCounter(systemMessage)
402
+ : 0;
318
403
  }
319
404
  return RunnableLambda.from((messages) => {
320
- return [systemMessage, ...messages];
405
+ const prefix = systemMessage ? [systemMessage] : [];
406
+ // Build the non-system portion (summary + conversation), then apply
407
+ // cache markers separately so addCacheControl doesn't strip the
408
+ // SystemMessage's own cache_control breakpoint set above.
409
+ const hasSummaryBody = this._summaryLocation === 'user_message' &&
410
+ this.summaryText != null &&
411
+ this.summaryText !== '';
412
+ let body;
413
+ if (hasSummaryBody) {
414
+ const wrappedSummary = '<summary>\n' +
415
+ this.summaryText +
416
+ '\n</summary>\n\n' +
417
+ 'This is your own checkpoint: you wrote it to preserve context after compaction. Pick up where you left off based on the summary above. Do not repeat prior tasks, information or acknowledge this checkpoint message directly.';
418
+ const summaryMsg = usePromptCache
419
+ ? new HumanMessage({
420
+ content: [
421
+ {
422
+ type: 'text',
423
+ text: wrappedSummary,
424
+ cache_control: { type: 'ephemeral' },
425
+ },
426
+ ],
427
+ })
428
+ : new HumanMessage(wrappedSummary);
429
+ body = [summaryMsg, ...messages];
430
+ }
431
+ else {
432
+ body = messages;
433
+ }
434
+ if (usePromptCache && body.length >= 2) {
435
+ body = addCacheControl(body);
436
+ }
437
+ return [...prefix, ...body];
321
438
  }).withConfig({ runName: 'prompt' });
322
439
  }
323
440
  /**
324
441
  * Reset context for a new run
325
442
  */
326
443
  reset() {
327
- this.instructionTokens = 0;
328
444
  this.systemMessageTokens = 0;
445
+ this.toolSchemaTokens = 0;
329
446
  this.cachedSystemRunnable = undefined;
330
447
  this.systemRunnableStale = true;
331
448
  this.lastToken = undefined;
@@ -338,6 +455,11 @@ class AgentContext {
338
455
  this.currentTokenType = ContentTypes.TEXT;
339
456
  this.discoveredToolNames.clear();
340
457
  this.handoffContext = undefined;
458
+ this.summaryText = this._durableSummaryText;
459
+ this.summaryTokenCount = this._durableSummaryTokenCount;
460
+ this._lastSummarizationMsgCount = 0;
461
+ this.lastCallUsage = undefined;
462
+ this.totalTokensFresh = false;
341
463
  if (this.tokenCounter) {
342
464
  this.initializeSystemRunnable();
343
465
  const baseTokenMap = { ...this.baseIndexTokenCountMap };
@@ -355,24 +477,21 @@ class AgentContext {
355
477
  }
356
478
  }
357
479
  /**
358
- * Update the token count map with instruction tokens
480
+ * Update the token count map from a base map.
481
+ *
482
+ * Previously this inflated index 0 with instructionTokens to indirectly
483
+ * reserve budget for the system prompt. That approach was imprecise: with
484
+ * large tool-schema overhead (e.g. 26 MCP tools ~5 000 tokens) the first
485
+ * conversation message appeared enormous and was always pruned, while the
486
+ * real available budget was never explicitly computed.
487
+ *
488
+ * Now instruction tokens are passed to getMessagesWithinTokenLimit via
489
+ * the `getInstructionTokens` factory param so the pruner subtracts them
490
+ * from the budget directly. The token map contains only real per-message
491
+ * token counts.
359
492
  */
360
493
  updateTokenMapWithInstructions(baseTokenMap) {
361
- if (this.instructionTokens > 0) {
362
- // Shift all indices by the instruction token count
363
- const shiftedMap = {};
364
- for (const [key, value] of Object.entries(baseTokenMap)) {
365
- const index = parseInt(key, 10);
366
- if (!isNaN(index)) {
367
- shiftedMap[String(index)] =
368
- value + (index === 0 ? this.instructionTokens : 0);
369
- }
370
- }
371
- this.indexTokenCountMap = shiftedMap;
372
- }
373
- else {
374
- this.indexTokenCountMap = { ...baseTokenMap };
375
- }
494
+ this.indexTokenCountMap = { ...baseTokenMap };
376
495
  }
377
496
  /**
378
497
  * Calculate tool tokens and add to instruction tokens
@@ -380,11 +499,7 @@ class AgentContext {
380
499
  */
381
500
  async calculateInstructionTokens(tokenCounter) {
382
501
  let toolTokens = 0;
383
- // Track names to avoid double-counting when a tool appears in both
384
- // this.tools (bound StructuredTool instances) and this.toolDefinitions
385
- // (MCP / event-driven schemas).
386
502
  const countedToolNames = new Set();
387
- // Count tokens for bound tools (StructuredTool instances with .schema)
388
503
  if (this.tools && this.tools.length > 0) {
389
504
  for (const tool of this.tools) {
390
505
  const genericTool = tool;
@@ -399,23 +514,29 @@ class AgentContext {
399
514
  }
400
515
  }
401
516
  }
402
- // Count tokens for tool definitions (MCP / event-driven tools).
403
- // These are sent to the provider API as tool schemas alongside bound tools.
404
- // Both can be populated simultaneously (graph tools + MCP tools).
405
517
  if (this.toolDefinitions && this.toolDefinitions.length > 0) {
406
518
  for (const def of this.toolDefinitions) {
407
519
  if (countedToolNames.has(def.name)) {
408
- continue; // Already counted via this.tools
520
+ continue;
409
521
  }
410
522
  const schema = {
411
- name: def.name,
412
- description: def.description ?? '',
413
- parameters: def.parameters ?? {},
523
+ type: 'function',
524
+ function: {
525
+ name: def.name,
526
+ description: def.description ?? '',
527
+ parameters: def.parameters ?? {},
528
+ },
414
529
  };
415
530
  toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
416
531
  }
417
532
  }
418
- this.instructionTokens += toolTokens;
533
+ const isAnthropic = this.provider !== Providers.BEDROCK &&
534
+ (this.provider === Providers.ANTHROPIC ||
535
+ /anthropic|claude/i.test(String(this.clientOptions?.model ?? '')));
536
+ const toolTokenMultiplier = isAnthropic
537
+ ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
538
+ : DEFAULT_TOOL_TOKEN_MULTIPLIER;
539
+ this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
419
540
  }
420
541
  /**
421
542
  * Gets the tool registry for deferred tools (for tool search).
@@ -455,6 +576,151 @@ class AgentContext {
455
576
  this.systemRunnableStale = true;
456
577
  }
457
578
  }
579
+ setSummary(text, tokenCount) {
580
+ this.summaryText = text;
581
+ this.summaryTokenCount = tokenCount;
582
+ this._summaryLocation = 'user_message';
583
+ this._durableSummaryText = text;
584
+ this._durableSummaryTokenCount = tokenCount;
585
+ this._summaryVersion += 1;
586
+ this.systemRunnableStale = true;
587
+ this.pruneMessages = undefined;
588
+ }
589
+ /** Sets a cross-run summary that is injected into the system prompt. */
590
+ setInitialSummary(text, tokenCount) {
591
+ this.summaryText = text;
592
+ this.summaryTokenCount = tokenCount;
593
+ this._summaryLocation = 'system_prompt';
594
+ this._durableSummaryText = text;
595
+ this._durableSummaryTokenCount = tokenCount;
596
+ this._summaryVersion += 1;
597
+ this.systemRunnableStale = true;
598
+ }
599
+ /**
600
+ * Replaces the indexTokenCountMap with a fresh map keyed to the surviving
601
+ * context messages after summarization. Called by the summarize node after
602
+ * it emits RemoveMessage operations that shift message indices.
603
+ */
604
+ rebuildTokenMapAfterSummarization(newTokenMap) {
605
+ this.indexTokenCountMap = newTokenMap;
606
+ this.baseIndexTokenCountMap = { ...newTokenMap };
607
+ this._lastSummarizationMsgCount = Object.keys(newTokenMap).length;
608
+ this.currentUsage = undefined;
609
+ this.lastCallUsage = undefined;
610
+ this.totalTokensFresh = false;
611
+ }
612
+ hasSummary() {
613
+ return this.summaryText != null && this.summaryText !== '';
614
+ }
615
+ /** True when a mid-run compaction summary is ready to be injected as a HumanMessage. */
616
+ hasPendingCompactionSummary() {
617
+ return this._summaryLocation === 'user_message' && this.hasSummary();
618
+ }
619
+ getSummaryText() {
620
+ return this.summaryText;
621
+ }
622
+ get summaryVersion() {
623
+ return this._summaryVersion;
624
+ }
625
+ /**
626
+ * Returns true when the message count hasn't changed since the last
627
+ * summarization — re-summarizing would produce an identical result.
628
+ * Oversized individual messages are handled by fit-to-budget truncation
629
+ * in the pruner, which keeps them in context without triggering overflow.
630
+ */
631
+ shouldSkipSummarization(currentMsgCount) {
632
+ return (this._lastSummarizationMsgCount > 0 &&
633
+ currentMsgCount <= this._lastSummarizationMsgCount);
634
+ }
635
+ /**
636
+ * Records the message count at which summarization was triggered,
637
+ * so subsequent calls with the same count are suppressed.
638
+ */
639
+ markSummarizationTriggered(msgCount) {
640
+ this._lastSummarizationMsgCount = msgCount;
641
+ }
642
+ clearSummary() {
643
+ if (this.summaryText != null) {
644
+ this.summaryText = undefined;
645
+ this.summaryTokenCount = 0;
646
+ this._durableSummaryText = undefined;
647
+ this._durableSummaryTokenCount = 0;
648
+ this._summaryLocation = 'none';
649
+ this.systemRunnableStale = true;
650
+ }
651
+ }
652
+ /**
653
+ * Returns a structured breakdown of how the context token budget is consumed.
654
+ * Useful for diagnostics when context overflow or pruning issues occur.
655
+ */
656
+ getTokenBudgetBreakdown(messages) {
657
+ const maxContextTokens = this.maxContextTokens ?? 0;
658
+ const toolCount = (this.tools?.length ?? 0) + (this.toolDefinitions?.length ?? 0);
659
+ const messageCount = messages?.length ?? 0;
660
+ let messageTokens = 0;
661
+ if (messages != null) {
662
+ for (let i = 0; i < messages.length; i++) {
663
+ messageTokens +=
664
+ this.indexTokenCountMap[i] ?? 0;
665
+ }
666
+ }
667
+ const reserveTokens = Math.round(maxContextTokens * DEFAULT_RESERVE_RATIO);
668
+ const availableForMessages = Math.max(0, maxContextTokens - reserveTokens - this.instructionTokens);
669
+ return {
670
+ maxContextTokens,
671
+ instructionTokens: this.instructionTokens,
672
+ systemMessageTokens: this.systemMessageTokens,
673
+ toolSchemaTokens: this.toolSchemaTokens,
674
+ summaryTokens: this.summaryTokenCount,
675
+ toolCount,
676
+ messageCount,
677
+ messageTokens,
678
+ availableForMessages,
679
+ };
680
+ }
681
+ /**
682
+ * Returns a human-readable string of the token budget breakdown
683
+ * for inclusion in error messages and diagnostics.
684
+ */
685
+ formatTokenBudgetBreakdown(messages) {
686
+ const b = this.getTokenBudgetBreakdown(messages);
687
+ const lines = [
688
+ 'Token budget breakdown:',
689
+ ` maxContextTokens: ${b.maxContextTokens}`,
690
+ ` instructionTokens: ${b.instructionTokens} (system: ${b.systemMessageTokens}, tools: ${b.toolSchemaTokens} [${b.toolCount} tools])`,
691
+ ` summaryTokens: ${b.summaryTokens}`,
692
+ ` messageTokens: ${b.messageTokens} (${b.messageCount} messages)`,
693
+ ` availableForMessages: ${b.availableForMessages}`,
694
+ ];
695
+ return lines.join('\n');
696
+ }
697
+ /**
698
+ * Updates the last-call usage with data from the most recent LLM response.
699
+ * Unlike `currentUsage` which accumulates, this captures only the single call.
700
+ */
701
+ updateLastCallUsage(usage) {
702
+ const baseInputTokens = Number(usage.input_tokens) || 0;
703
+ const cacheCreation = Number(usage.input_token_details?.cache_creation) || 0;
704
+ const cacheRead = Number(usage.input_token_details?.cache_read) || 0;
705
+ const outputTokens = Number(usage.output_tokens) || 0;
706
+ const cacheSum = cacheCreation + cacheRead;
707
+ const cacheIsAdditive = cacheSum > 0 && cacheSum > baseInputTokens;
708
+ const totalInputTokens = cacheIsAdditive
709
+ ? baseInputTokens + cacheSum
710
+ : baseInputTokens;
711
+ this.lastCallUsage = {
712
+ inputTokens: totalInputTokens,
713
+ outputTokens,
714
+ totalTokens: totalInputTokens + outputTokens,
715
+ cacheRead: cacheRead || undefined,
716
+ cacheCreation: cacheCreation || undefined,
717
+ };
718
+ this.totalTokensFresh = true;
719
+ }
720
+ /** Marks token data as stale before a new LLM call. */
721
+ markTokensStale() {
722
+ this.totalTokensFresh = false;
723
+ }
458
724
  /**
459
725
  * Marks tools as discovered via tool search.
460
726
  * Discovered tools will be included in the next model binding.
@@ -484,11 +750,9 @@ class AgentContext {
484
750
  * @returns Array of tools to bind to model
485
751
  */
486
752
  getToolsForBinding() {
487
- /** Event-driven mode: create schema-only tools from definitions */
488
753
  if (this.toolDefinitions && this.toolDefinitions.length > 0) {
489
754
  return this.getEventDrivenToolsForBinding();
490
755
  }
491
- /** Traditional mode: filter actual tool instances */
492
756
  const filtered = !this.tools || !this.toolRegistry
493
757
  ? this.tools
494
758
  : this.filterToolsForBinding(this.tools);