@librechat/agents 3.1.57 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +1 -1
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -1,6 +1,5 @@
1
1
  /* eslint-disable no-console */
2
- // src/agents/AgentContext.ts
3
- import { SystemMessage } from '@langchain/core/messages';
2
+ import { HumanMessage, SystemMessage } from '@langchain/core/messages';
4
3
  import { RunnableLambda } from '@langchain/core/runnables';
5
4
  import type {
6
5
  UsageMetadata,
@@ -11,9 +10,26 @@ import type { RunnableConfig, Runnable } from '@langchain/core/runnables';
11
10
  import type * as t from '@/types';
12
11
  import type { createPruneMessages } from '@/messages';
13
12
  import { createSchemaOnlyTools } from '@/tools/schema';
13
+ import { addCacheControl } from '@/messages/cache';
14
14
  import { ContentTypes, Providers } from '@/common';
15
+ import { DEFAULT_RESERVE_RATIO } from '@/messages';
15
16
  import { toJsonSchema } from '@/utils/schema';
16
17
 
18
+ /**
19
+ * Anthropic direct API tool schema overhead multiplier.
20
+ * Empirically calibrated against real MCP tool sets (29 tools).
21
+ * Accounts for Anthropic's internal XML-like tool encoding plus
22
+ * a ~300-token hidden tool-system preamble.
23
+ */
24
+ const ANTHROPIC_TOOL_TOKEN_MULTIPLIER = 2.6;
25
+
26
+ /**
27
+ * Default tool schema overhead multiplier for all non-Anthropic providers.
28
+ * Covers OpenAI function-calling format, Bedrock, and other providers.
29
+ * Empirically calibrated at ~1.4× the raw JSON token count.
30
+ */
31
+ const DEFAULT_TOOL_TOKEN_MULTIPLIER = 1.4;
32
+
17
33
  /**
18
34
  * Encapsulates agent-specific state that can vary between agents in a multi-agent system
19
35
  */
@@ -43,6 +59,11 @@ export class AgentContext {
43
59
  reasoningKey,
44
60
  useLegacyContent,
45
61
  discoveredTools,
62
+ summarizationEnabled,
63
+ summarizationConfig,
64
+ initialSummary,
65
+ contextPruningConfig,
66
+ maxToolResultChars,
46
67
  } = agentConfig;
47
68
 
48
69
  const agentContext = new AgentContext({
@@ -64,12 +85,20 @@ export class AgentContext {
64
85
  tokenCounter,
65
86
  useLegacyContent,
66
87
  discoveredTools,
88
+ summarizationEnabled,
89
+ summarizationConfig,
90
+ contextPruningConfig,
91
+ maxToolResultChars,
67
92
  });
68
93
 
94
+ if (initialSummary?.text != null && initialSummary.text !== '') {
95
+ agentContext.setInitialSummary(
96
+ initialSummary.text,
97
+ initialSummary.tokenCount
98
+ );
99
+ }
100
+
69
101
  if (tokenCounter) {
70
- // Initialize system runnable BEFORE async tool token calculation
71
- // This ensures system message tokens are in instructionTokens before
72
- // updateTokenMapWithInstructions is called
73
102
  agentContext.initializeSystemRunnable();
74
103
 
75
104
  const tokenMap = indexTokenCountMap || {};
@@ -78,7 +107,6 @@ export class AgentContext {
78
107
  agentContext.tokenCalculationPromise = agentContext
79
108
  .calculateInstructionTokens(tokenCounter)
80
109
  .then(() => {
81
- // Update token map with instruction tokens (includes system + tool tokens)
82
110
  agentContext.updateTokenMapWithInstructions(tokenMap);
83
111
  })
84
112
  .catch((err) => {
@@ -108,12 +136,47 @@ export class AgentContext {
108
136
  maxContextTokens?: number;
109
137
  /** Current usage metadata for this agent */
110
138
  currentUsage?: Partial<UsageMetadata>;
139
+ /**
140
+ * Usage from the most recent LLM call only (not accumulated).
141
+ * Used for accurate provider calibration in pruning.
142
+ */
143
+ lastCallUsage?: {
144
+ inputTokens: number;
145
+ outputTokens: number;
146
+ totalTokens: number;
147
+ cacheRead?: number;
148
+ cacheCreation?: number;
149
+ };
150
+ /**
151
+ * Whether totalTokens data is fresh (set true when provider usage arrives,
152
+ * false at the start of each turn before the LLM responds).
153
+ * Prevents stale token data from driving pruning/trigger decisions.
154
+ */
155
+ totalTokensFresh: boolean = false;
156
+ /** Context pruning configuration. */
157
+ contextPruningConfig?: t.ContextPruningConfig;
158
+ maxToolResultChars?: number;
111
159
  /** Prune messages function configured for this agent */
112
160
  pruneMessages?: ReturnType<typeof createPruneMessages>;
113
161
  /** Token counter function for this agent */
114
162
  tokenCounter?: t.TokenCounter;
115
- /** Instructions/system message token count */
116
- instructionTokens: number = 0;
163
+ /** Token count for the system message (instructions text). */
164
+ systemMessageTokens: number = 0;
165
+ /** Token count for tool schemas only. */
166
+ toolSchemaTokens: number = 0;
167
+ /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
168
+ calibrationRatio: number = 1;
169
+ /** Provider-observed instruction overhead from the pruner's best-variance turn. */
170
+ resolvedInstructionOverhead?: number;
171
+ /** Pre-masking tool content keyed by message index, consumed by the summarize node. */
172
+ pendingOriginalToolContent?: Map<number, string>;
173
+
174
+ /** Total instruction overhead: system message + tool schemas + pending summary. */
175
+ get instructionTokens(): number {
176
+ const summaryOverhead =
177
+ this._summaryLocation === 'user_message' ? this.summaryTokenCount : 0;
178
+ return this.systemMessageTokens + this.toolSchemaTokens + summaryOverhead;
179
+ }
117
180
  /** The amount of time that should pass before another consecutive API call */
118
181
  streamBuffer?: number;
119
182
  /** Last stream call timestamp for rate limiting */
@@ -161,12 +224,41 @@ export class AgentContext {
161
224
  >;
162
225
  /** Whether system runnable needs rebuild (set when discovered tools change) */
163
226
  private systemRunnableStale: boolean = true;
164
- /** Cached system message token count (separate from tool tokens) */
165
- private systemMessageTokens: number = 0;
166
227
  /** Promise for token calculation initialization */
167
228
  tokenCalculationPromise?: Promise<void>;
168
229
  /** Format content blocks as strings (for legacy compatibility) */
169
230
  useLegacyContent: boolean = false;
231
+ /** Enables graph-level summarization for this agent */
232
+ summarizationEnabled?: boolean;
233
+ /** Summarization runtime settings used by graph pruning hooks */
234
+ summarizationConfig?: t.SummarizationConfig;
235
+ /** Current summary text produced by the summarize node, integrated into system message */
236
+ private summaryText?: string;
237
+ /** Token count of the current summary (tracked for token accounting) */
238
+ private summaryTokenCount: number = 0;
239
+ /**
240
+ * Where the summary should be injected:
241
+ * - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
242
+ * - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
243
+ * - `'none'`: no summary present
244
+ */
245
+ private _summaryLocation: 'system_prompt' | 'user_message' | 'none' = 'none';
246
+ /**
247
+ * Durable summary that survives reset() calls. Set from initialSummary
248
+ * during fromConfig() and updated by setSummary() so that the latest
249
+ * summary (whether cross-run or intra-run) is always restored after
250
+ * processStream's resetValues() cycle.
251
+ */
252
+ private _durableSummaryText?: string;
253
+ private _durableSummaryTokenCount: number = 0;
254
+ /** Number of summarization cycles that have occurred for this agent context */
255
+ private _summaryVersion: number = 0;
256
+ /**
257
+ * Message count at the time summarization was last triggered.
258
+ * Used to prevent re-summarizing the same unchanged message set.
259
+ * Summarization is allowed to fire again only when new messages appear.
260
+ */
261
+ private _lastSummarizationMsgCount: number = 0;
170
262
  /**
171
263
  * Handoff context when this agent receives control via handoff.
172
264
  * Contains source and parallel execution info for system message context.
@@ -197,6 +289,10 @@ export class AgentContext {
197
289
  instructionTokens,
198
290
  useLegacyContent,
199
291
  discoveredTools,
292
+ summarizationEnabled,
293
+ summarizationConfig,
294
+ contextPruningConfig,
295
+ maxToolResultChars,
200
296
  }: {
201
297
  agentId: string;
202
298
  name?: string;
@@ -216,6 +312,10 @@ export class AgentContext {
216
312
  instructionTokens?: number;
217
313
  useLegacyContent?: boolean;
218
314
  discoveredTools?: string[];
315
+ summarizationEnabled?: boolean;
316
+ summarizationConfig?: t.SummarizationConfig;
317
+ contextPruningConfig?: t.ContextPruningConfig;
318
+ maxToolResultChars?: number;
219
319
  }) {
220
320
  this.agentId = agentId;
221
321
  this.name = name;
@@ -237,10 +337,14 @@ export class AgentContext {
237
337
  this.toolEnd = toolEnd;
238
338
  }
239
339
  if (instructionTokens !== undefined) {
240
- this.instructionTokens = instructionTokens;
340
+ this.systemMessageTokens = instructionTokens;
241
341
  }
242
342
 
243
343
  this.useLegacyContent = useLegacyContent ?? false;
344
+ this.summarizationEnabled = summarizationEnabled;
345
+ this.summarizationConfig = summarizationConfig;
346
+ this.contextPruningConfig = contextPruningConfig;
347
+ this.maxToolResultChars = maxToolResultChars;
244
348
 
245
349
  if (discoveredTools && discoveredTools.length > 0) {
246
350
  for (const toolName of discoveredTools) {
@@ -270,7 +374,6 @@ export class AgentContext {
270
374
 
271
375
  if (!isCodeExecutionOnly) continue;
272
376
 
273
- // Include if: not deferred OR deferred but discovered
274
377
  const isDeferred = toolDef.defer_loading === true;
275
378
  const isDiscovered = this.discoveredToolNames.has(name);
276
379
  if (!isDeferred || isDiscovered) {
@@ -313,12 +416,10 @@ export class AgentContext {
313
416
  RunnableConfig<Record<string, unknown>>
314
417
  >
315
418
  | undefined {
316
- // Return cached if not stale
317
419
  if (!this.systemRunnableStale && this.cachedSystemRunnable !== undefined) {
318
420
  return this.cachedSystemRunnable;
319
421
  }
320
422
 
321
- // Stale or first access - rebuild
322
423
  const instructionsString = this.buildInstructionsString();
323
424
  this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
324
425
  this.systemRunnableStale = false;
@@ -344,18 +445,15 @@ export class AgentContext {
344
445
  private buildInstructionsString(): string {
345
446
  const parts: string[] = [];
346
447
 
347
- /** Build agent identity and handoff context preamble */
348
448
  const identityPreamble = this.buildIdentityPreamble();
349
449
  if (identityPreamble) {
350
450
  parts.push(identityPreamble);
351
451
  }
352
452
 
353
- /** Add main instructions */
354
453
  if (this.instructions != null && this.instructions !== '') {
355
454
  parts.push(this.instructions);
356
455
  }
357
456
 
358
- /** Add additional instructions */
359
457
  if (
360
458
  this.additionalInstructions != null &&
361
459
  this.additionalInstructions !== ''
@@ -363,12 +461,22 @@ export class AgentContext {
363
461
  parts.push(this.additionalInstructions);
364
462
  }
365
463
 
366
- /** Add programmatic tools documentation */
367
464
  const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
368
465
  if (programmaticToolsDoc) {
369
466
  parts.push(programmaticToolsDoc);
370
467
  }
371
468
 
469
+ // Cross-run summary: include in system prompt so the model has context
470
+ // from the prior run. Mid-run summaries are injected as a HumanMessage
471
+ // on the post-compaction clean slate instead (see buildSystemRunnable).
472
+ if (
473
+ this._summaryLocation === 'system_prompt' &&
474
+ this.summaryText != null &&
475
+ this.summaryText !== ''
476
+ ) {
477
+ parts.push('## Conversation Summary\n\n' + this.summaryText);
478
+ }
479
+
372
480
  return parts.join('\n\n');
373
481
  }
374
482
 
@@ -413,21 +521,25 @@ export class AgentContext {
413
521
  RunnableConfig<Record<string, unknown>>
414
522
  >
415
523
  | undefined {
416
- if (!instructionsString) {
417
- // Remove previous tokens if we had a system message before
418
- this.instructionTokens -= this.systemMessageTokens;
524
+ const hasMidRunSummary =
525
+ this._summaryLocation === 'user_message' &&
526
+ this.summaryText != null &&
527
+ this.summaryText !== '';
528
+
529
+ if (!instructionsString && !hasMidRunSummary) {
419
530
  this.systemMessageTokens = 0;
420
531
  return undefined;
421
532
  }
422
533
 
423
534
  let finalInstructions: string | BaseMessageFields = instructionsString;
424
535
 
425
- // Handle Anthropic prompt caching
536
+ let usePromptCache = false;
426
537
  if (this.provider === Providers.ANTHROPIC) {
427
538
  const anthropicOptions = this.clientOptions as
428
539
  | t.AnthropicClientOptions
429
540
  | undefined;
430
541
  if (anthropicOptions?.promptCache === true) {
542
+ usePromptCache = true;
431
543
  finalInstructions = {
432
544
  content: [
433
545
  {
@@ -440,17 +552,55 @@ export class AgentContext {
440
552
  }
441
553
  }
442
554
 
443
- const systemMessage = new SystemMessage(finalInstructions);
555
+ const systemMessage = instructionsString
556
+ ? new SystemMessage(finalInstructions)
557
+ : undefined;
444
558
 
445
- // Update token counts (subtract old, add new)
446
559
  if (this.tokenCounter) {
447
- this.instructionTokens -= this.systemMessageTokens;
448
- this.systemMessageTokens = this.tokenCounter(systemMessage);
449
- this.instructionTokens += this.systemMessageTokens;
560
+ this.systemMessageTokens = systemMessage
561
+ ? this.tokenCounter(systemMessage)
562
+ : 0;
450
563
  }
451
564
 
452
565
  return RunnableLambda.from((messages: BaseMessage[]) => {
453
- return [systemMessage, ...messages];
566
+ const prefix: BaseMessage[] = systemMessage ? [systemMessage] : [];
567
+
568
+ // Build the non-system portion (summary + conversation), then apply
569
+ // cache markers separately so addCacheControl doesn't strip the
570
+ // SystemMessage's own cache_control breakpoint set above.
571
+ const hasSummaryBody =
572
+ this._summaryLocation === 'user_message' &&
573
+ this.summaryText != null &&
574
+ this.summaryText !== '';
575
+
576
+ let body: BaseMessage[];
577
+ if (hasSummaryBody) {
578
+ const wrappedSummary =
579
+ '<summary>\n' +
580
+ (this.summaryText as string) +
581
+ '\n</summary>\n\n' +
582
+ 'This is your own checkpoint: you wrote it to preserve context after compaction. Pick up where you left off based on the summary above. Do not repeat prior tasks, information or acknowledge this checkpoint message directly.';
583
+
584
+ const summaryMsg = usePromptCache
585
+ ? new HumanMessage({
586
+ content: [
587
+ {
588
+ type: 'text',
589
+ text: wrappedSummary,
590
+ cache_control: { type: 'ephemeral' },
591
+ },
592
+ ],
593
+ })
594
+ : new HumanMessage(wrappedSummary);
595
+ body = [summaryMsg, ...messages];
596
+ } else {
597
+ body = messages;
598
+ }
599
+
600
+ if (usePromptCache && body.length >= 2) {
601
+ body = addCacheControl(body);
602
+ }
603
+ return [...prefix, ...body];
454
604
  }).withConfig({ runName: 'prompt' });
455
605
  }
456
606
 
@@ -458,8 +608,8 @@ export class AgentContext {
458
608
  * Reset context for a new run
459
609
  */
460
610
  reset(): void {
461
- this.instructionTokens = 0;
462
611
  this.systemMessageTokens = 0;
612
+ this.toolSchemaTokens = 0;
463
613
  this.cachedSystemRunnable = undefined;
464
614
  this.systemRunnableStale = true;
465
615
  this.lastToken = undefined;
@@ -473,6 +623,12 @@ export class AgentContext {
473
623
  this.discoveredToolNames.clear();
474
624
  this.handoffContext = undefined;
475
625
 
626
+ this.summaryText = this._durableSummaryText;
627
+ this.summaryTokenCount = this._durableSummaryTokenCount;
628
+ this._lastSummarizationMsgCount = 0;
629
+ this.lastCallUsage = undefined;
630
+ this.totalTokensFresh = false;
631
+
476
632
  if (this.tokenCounter) {
477
633
  this.initializeSystemRunnable();
478
634
  const baseTokenMap = { ...this.baseIndexTokenCountMap };
@@ -492,23 +648,21 @@ export class AgentContext {
492
648
  }
493
649
 
494
650
  /**
495
- * Update the token count map with instruction tokens
651
+ * Update the token count map from a base map.
652
+ *
653
+ * Previously this inflated index 0 with instructionTokens to indirectly
654
+ * reserve budget for the system prompt. That approach was imprecise: with
655
+ * large tool-schema overhead (e.g. 26 MCP tools ~5 000 tokens) the first
656
+ * conversation message appeared enormous and was always pruned, while the
657
+ * real available budget was never explicitly computed.
658
+ *
659
+ * Now instruction tokens are passed to getMessagesWithinTokenLimit via
660
+ * the `getInstructionTokens` factory param so the pruner subtracts them
661
+ * from the budget directly. The token map contains only real per-message
662
+ * token counts.
496
663
  */
497
664
  updateTokenMapWithInstructions(baseTokenMap: Record<string, number>): void {
498
- if (this.instructionTokens > 0) {
499
- // Shift all indices by the instruction token count
500
- const shiftedMap: Record<string, number> = {};
501
- for (const [key, value] of Object.entries(baseTokenMap)) {
502
- const index = parseInt(key, 10);
503
- if (!isNaN(index)) {
504
- shiftedMap[String(index)] =
505
- value + (index === 0 ? this.instructionTokens : 0);
506
- }
507
- }
508
- this.indexTokenCountMap = shiftedMap;
509
- } else {
510
- this.indexTokenCountMap = { ...baseTokenMap };
511
- }
665
+ this.indexTokenCountMap = { ...baseTokenMap };
512
666
  }
513
667
 
514
668
  /**
@@ -519,12 +673,8 @@ export class AgentContext {
519
673
  tokenCounter: t.TokenCounter
520
674
  ): Promise<void> {
521
675
  let toolTokens = 0;
522
- // Track names to avoid double-counting when a tool appears in both
523
- // this.tools (bound StructuredTool instances) and this.toolDefinitions
524
- // (MCP / event-driven schemas).
525
676
  const countedToolNames = new Set<string>();
526
677
 
527
- // Count tokens for bound tools (StructuredTool instances with .schema)
528
678
  if (this.tools && this.tools.length > 0) {
529
679
  for (const tool of this.tools) {
530
680
  const genericTool = tool as Record<string, unknown>;
@@ -548,24 +698,35 @@ export class AgentContext {
548
698
  }
549
699
  }
550
700
 
551
- // Count tokens for tool definitions (MCP / event-driven tools).
552
- // These are sent to the provider API as tool schemas alongside bound tools.
553
- // Both can be populated simultaneously (graph tools + MCP tools).
554
701
  if (this.toolDefinitions && this.toolDefinitions.length > 0) {
555
702
  for (const def of this.toolDefinitions) {
556
703
  if (countedToolNames.has(def.name)) {
557
- continue; // Already counted via this.tools
704
+ continue;
558
705
  }
559
706
  const schema = {
560
- name: def.name,
561
- description: def.description ?? '',
562
- parameters: def.parameters ?? {},
707
+ type: 'function',
708
+ function: {
709
+ name: def.name,
710
+ description: def.description ?? '',
711
+ parameters: def.parameters ?? {},
712
+ },
563
713
  };
564
714
  toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
565
715
  }
566
716
  }
567
717
 
568
- this.instructionTokens += toolTokens;
718
+ const isAnthropic =
719
+ this.provider !== Providers.BEDROCK &&
720
+ (this.provider === Providers.ANTHROPIC ||
721
+ /anthropic|claude/i.test(
722
+ String(
723
+ (this.clientOptions as { model?: string } | undefined)?.model ?? ''
724
+ )
725
+ ));
726
+ const toolTokenMultiplier = isAnthropic
727
+ ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
728
+ : DEFAULT_TOOL_TOKEN_MULTIPLIER;
729
+ this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
569
730
  }
570
731
 
571
732
  /**
@@ -612,6 +773,177 @@ export class AgentContext {
612
773
  }
613
774
  }
614
775
 
776
+ setSummary(text: string, tokenCount: number): void {
777
+ this.summaryText = text;
778
+ this.summaryTokenCount = tokenCount;
779
+ this._summaryLocation = 'user_message';
780
+ this._durableSummaryText = text;
781
+ this._durableSummaryTokenCount = tokenCount;
782
+ this._summaryVersion += 1;
783
+ this.systemRunnableStale = true;
784
+ this.pruneMessages = undefined;
785
+ }
786
+
787
+ /** Sets a cross-run summary that is injected into the system prompt. */
788
+ setInitialSummary(text: string, tokenCount: number): void {
789
+ this.summaryText = text;
790
+ this.summaryTokenCount = tokenCount;
791
+ this._summaryLocation = 'system_prompt';
792
+ this._durableSummaryText = text;
793
+ this._durableSummaryTokenCount = tokenCount;
794
+ this._summaryVersion += 1;
795
+ this.systemRunnableStale = true;
796
+ }
797
+
798
+ /**
799
+ * Replaces the indexTokenCountMap with a fresh map keyed to the surviving
800
+ * context messages after summarization. Called by the summarize node after
801
+ * it emits RemoveMessage operations that shift message indices.
802
+ */
803
+ rebuildTokenMapAfterSummarization(newTokenMap: Record<string, number>): void {
804
+ this.indexTokenCountMap = newTokenMap;
805
+ this.baseIndexTokenCountMap = { ...newTokenMap };
806
+ this._lastSummarizationMsgCount = Object.keys(newTokenMap).length;
807
+ this.currentUsage = undefined;
808
+ this.lastCallUsage = undefined;
809
+ this.totalTokensFresh = false;
810
+ }
811
+
812
+ hasSummary(): boolean {
813
+ return this.summaryText != null && this.summaryText !== '';
814
+ }
815
+
816
+ /** True when a mid-run compaction summary is ready to be injected as a HumanMessage. */
817
+ hasPendingCompactionSummary(): boolean {
818
+ return this._summaryLocation === 'user_message' && this.hasSummary();
819
+ }
820
+
821
+ getSummaryText(): string | undefined {
822
+ return this.summaryText;
823
+ }
824
+
825
+ get summaryVersion(): number {
826
+ return this._summaryVersion;
827
+ }
828
+
829
+ /**
830
+ * Returns true when the message count hasn't changed since the last
831
+ * summarization — re-summarizing would produce an identical result.
832
+ * Oversized individual messages are handled by fit-to-budget truncation
833
+ * in the pruner, which keeps them in context without triggering overflow.
834
+ */
835
+ shouldSkipSummarization(currentMsgCount: number): boolean {
836
+ return (
837
+ this._lastSummarizationMsgCount > 0 &&
838
+ currentMsgCount <= this._lastSummarizationMsgCount
839
+ );
840
+ }
841
+
842
+ /**
843
+ * Records the message count at which summarization was triggered,
844
+ * so subsequent calls with the same count are suppressed.
845
+ */
846
+ markSummarizationTriggered(msgCount: number): void {
847
+ this._lastSummarizationMsgCount = msgCount;
848
+ }
849
+
850
+ clearSummary(): void {
851
+ if (this.summaryText != null) {
852
+ this.summaryText = undefined;
853
+ this.summaryTokenCount = 0;
854
+ this._durableSummaryText = undefined;
855
+ this._durableSummaryTokenCount = 0;
856
+ this._summaryLocation = 'none';
857
+ this.systemRunnableStale = true;
858
+ }
859
+ }
860
+
861
+ /**
862
+ * Returns a structured breakdown of how the context token budget is consumed.
863
+ * Useful for diagnostics when context overflow or pruning issues occur.
864
+ */
865
+ getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
866
+ const maxContextTokens = this.maxContextTokens ?? 0;
867
+ const toolCount =
868
+ (this.tools?.length ?? 0) + (this.toolDefinitions?.length ?? 0);
869
+ const messageCount = messages?.length ?? 0;
870
+
871
+ let messageTokens = 0;
872
+ if (messages != null) {
873
+ for (let i = 0; i < messages.length; i++) {
874
+ messageTokens +=
875
+ (this.indexTokenCountMap[i] as number | undefined) ?? 0;
876
+ }
877
+ }
878
+
879
+ const reserveTokens = Math.round(maxContextTokens * DEFAULT_RESERVE_RATIO);
880
+ const availableForMessages = Math.max(
881
+ 0,
882
+ maxContextTokens - reserveTokens - this.instructionTokens
883
+ );
884
+
885
+ return {
886
+ maxContextTokens,
887
+ instructionTokens: this.instructionTokens,
888
+ systemMessageTokens: this.systemMessageTokens,
889
+ toolSchemaTokens: this.toolSchemaTokens,
890
+ summaryTokens: this.summaryTokenCount,
891
+ toolCount,
892
+ messageCount,
893
+ messageTokens,
894
+ availableForMessages,
895
+ };
896
+ }
897
+
898
+ /**
899
+ * Returns a human-readable string of the token budget breakdown
900
+ * for inclusion in error messages and diagnostics.
901
+ */
902
+ formatTokenBudgetBreakdown(messages?: BaseMessage[]): string {
903
+ const b = this.getTokenBudgetBreakdown(messages);
904
+ const lines = [
905
+ 'Token budget breakdown:',
906
+ ` maxContextTokens: ${b.maxContextTokens}`,
907
+ ` instructionTokens: ${b.instructionTokens} (system: ${b.systemMessageTokens}, tools: ${b.toolSchemaTokens} [${b.toolCount} tools])`,
908
+ ` summaryTokens: ${b.summaryTokens}`,
909
+ ` messageTokens: ${b.messageTokens} (${b.messageCount} messages)`,
910
+ ` availableForMessages: ${b.availableForMessages}`,
911
+ ];
912
+ return lines.join('\n');
913
+ }
914
+
915
+ /**
916
+ * Updates the last-call usage with data from the most recent LLM response.
917
+ * Unlike `currentUsage` which accumulates, this captures only the single call.
918
+ */
919
+ updateLastCallUsage(usage: Partial<UsageMetadata>): void {
920
+ const baseInputTokens = Number(usage.input_tokens) || 0;
921
+ const cacheCreation =
922
+ Number(usage.input_token_details?.cache_creation) || 0;
923
+ const cacheRead = Number(usage.input_token_details?.cache_read) || 0;
924
+
925
+ const outputTokens = Number(usage.output_tokens) || 0;
926
+ const cacheSum = cacheCreation + cacheRead;
927
+ const cacheIsAdditive = cacheSum > 0 && cacheSum > baseInputTokens;
928
+ const totalInputTokens = cacheIsAdditive
929
+ ? baseInputTokens + cacheSum
930
+ : baseInputTokens;
931
+
932
+ this.lastCallUsage = {
933
+ inputTokens: totalInputTokens,
934
+ outputTokens,
935
+ totalTokens: totalInputTokens + outputTokens,
936
+ cacheRead: cacheRead || undefined,
937
+ cacheCreation: cacheCreation || undefined,
938
+ };
939
+ this.totalTokensFresh = true;
940
+ }
941
+
942
+ /** Marks token data as stale before a new LLM call. */
943
+ markTokensStale(): void {
944
+ this.totalTokensFresh = false;
945
+ }
946
+
615
947
  /**
616
948
  * Marks tools as discovered via tool search.
617
949
  * Discovered tools will be included in the next model binding.
@@ -642,12 +974,10 @@ export class AgentContext {
642
974
  * @returns Array of tools to bind to model
643
975
  */
644
976
  getToolsForBinding(): t.GraphTools | undefined {
645
- /** Event-driven mode: create schema-only tools from definitions */
646
977
  if (this.toolDefinitions && this.toolDefinitions.length > 0) {
647
978
  return this.getEventDrivenToolsForBinding();
648
979
  }
649
980
 
650
- /** Traditional mode: filter actual tool instances */
651
981
  const filtered =
652
982
  !this.tools || !this.toolRegistry
653
983
  ? this.tools