@librechat/agents 3.1.56 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +2 -2
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -3,11 +3,29 @@
3
3
  var messages = require('@langchain/core/messages');
4
4
  var runnables = require('@langchain/core/runnables');
5
5
  var schema$1 = require('../tools/schema.cjs');
6
+ var cache = require('../messages/cache.cjs');
6
7
  var _enum = require('../common/enum.cjs');
8
+ require('../messages/core.cjs');
9
+ require('nanoid');
10
+ var prune = require('../messages/prune.cjs');
11
+ require('@langchain/core/callbacks/dispatch');
12
+ require('uuid');
7
13
  var schema = require('../utils/schema.cjs');
8
14
 
9
15
  /* eslint-disable no-console */
10
- // src/agents/AgentContext.ts
16
+ /**
17
+ * Anthropic direct API tool schema overhead multiplier.
18
+ * Empirically calibrated against real MCP tool sets (29 tools).
19
+ * Accounts for Anthropic's internal XML-like tool encoding plus
20
+ * a ~300-token hidden tool-system preamble.
21
+ */
22
+ const ANTHROPIC_TOOL_TOKEN_MULTIPLIER = 2.6;
23
+ /**
24
+ * Default tool schema overhead multiplier for all non-Anthropic providers.
25
+ * Covers OpenAI function-calling format, Bedrock, and other providers.
26
+ * Empirically calibrated at ~1.4× the raw JSON token count.
27
+ */
28
+ const DEFAULT_TOOL_TOKEN_MULTIPLIER = 1.4;
11
29
  /**
12
30
  * Encapsulates agent-specific state that can vary between agents in a multi-agent system
13
31
  */
@@ -16,7 +34,7 @@ class AgentContext {
16
34
  * Create an AgentContext from configuration with token accounting initialization
17
35
  */
18
36
  static fromConfig(agentConfig, tokenCounter, indexTokenCountMap) {
19
- const { agentId, name, provider, clientOptions, tools, toolMap, toolEnd, toolRegistry, toolDefinitions, instructions, additional_instructions, streamBuffer, maxContextTokens, reasoningKey, useLegacyContent, discoveredTools, } = agentConfig;
37
+ const { agentId, name, provider, clientOptions, tools, toolMap, toolEnd, toolRegistry, toolDefinitions, instructions, additional_instructions, streamBuffer, maxContextTokens, reasoningKey, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, initialSummary, contextPruningConfig, maxToolResultChars, } = agentConfig;
20
38
  const agentContext = new AgentContext({
21
39
  agentId,
22
40
  name: name ?? agentId,
@@ -36,11 +54,15 @@ class AgentContext {
36
54
  tokenCounter,
37
55
  useLegacyContent,
38
56
  discoveredTools,
57
+ summarizationEnabled,
58
+ summarizationConfig,
59
+ contextPruningConfig,
60
+ maxToolResultChars,
39
61
  });
62
+ if (initialSummary?.text != null && initialSummary.text !== '') {
63
+ agentContext.setInitialSummary(initialSummary.text, initialSummary.tokenCount);
64
+ }
40
65
  if (tokenCounter) {
41
- // Initialize system runnable BEFORE async tool token calculation
42
- // This ensures system message tokens are in instructionTokens before
43
- // updateTokenMapWithInstructions is called
44
66
  agentContext.initializeSystemRunnable();
45
67
  const tokenMap = indexTokenCountMap || {};
46
68
  agentContext.baseIndexTokenCountMap = { ...tokenMap };
@@ -48,7 +70,6 @@ class AgentContext {
48
70
  agentContext.tokenCalculationPromise = agentContext
49
71
  .calculateInstructionTokens(tokenCounter)
50
72
  .then(() => {
51
- // Update token map with instruction tokens (includes system + tool tokens)
52
73
  agentContext.updateTokenMapWithInstructions(tokenMap);
53
74
  })
54
75
  .catch((err) => {
@@ -77,12 +98,39 @@ class AgentContext {
77
98
  maxContextTokens;
78
99
  /** Current usage metadata for this agent */
79
100
  currentUsage;
101
+ /**
102
+ * Usage from the most recent LLM call only (not accumulated).
103
+ * Used for accurate provider calibration in pruning.
104
+ */
105
+ lastCallUsage;
106
+ /**
107
+ * Whether totalTokens data is fresh (set true when provider usage arrives,
108
+ * false at the start of each turn before the LLM responds).
109
+ * Prevents stale token data from driving pruning/trigger decisions.
110
+ */
111
+ totalTokensFresh = false;
112
+ /** Context pruning configuration. */
113
+ contextPruningConfig;
114
+ maxToolResultChars;
80
115
  /** Prune messages function configured for this agent */
81
116
  pruneMessages;
82
117
  /** Token counter function for this agent */
83
118
  tokenCounter;
84
- /** Instructions/system message token count */
85
- instructionTokens = 0;
119
+ /** Token count for the system message (instructions text). */
120
+ systemMessageTokens = 0;
121
+ /** Token count for tool schemas only. */
122
+ toolSchemaTokens = 0;
123
+ /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
124
+ calibrationRatio = 1;
125
+ /** Provider-observed instruction overhead from the pruner's best-variance turn. */
126
+ resolvedInstructionOverhead;
127
+ /** Pre-masking tool content keyed by message index, consumed by the summarize node. */
128
+ pendingOriginalToolContent;
129
+ /** Total instruction overhead: system message + tool schemas + pending summary. */
130
+ get instructionTokens() {
131
+ const summaryOverhead = this._summaryLocation === 'user_message' ? this.summaryTokenCount : 0;
132
+ return this.systemMessageTokens + this.toolSchemaTokens + summaryOverhead;
133
+ }
86
134
  /** The amount of time that should pass before another consecutive API call */
87
135
  streamBuffer;
88
136
  /** Last stream call timestamp for rate limiting */
@@ -125,18 +173,47 @@ class AgentContext {
125
173
  cachedSystemRunnable;
126
174
  /** Whether system runnable needs rebuild (set when discovered tools change) */
127
175
  systemRunnableStale = true;
128
- /** Cached system message token count (separate from tool tokens) */
129
- systemMessageTokens = 0;
130
176
  /** Promise for token calculation initialization */
131
177
  tokenCalculationPromise;
132
178
  /** Format content blocks as strings (for legacy compatibility) */
133
179
  useLegacyContent = false;
180
+ /** Enables graph-level summarization for this agent */
181
+ summarizationEnabled;
182
+ /** Summarization runtime settings used by graph pruning hooks */
183
+ summarizationConfig;
184
+ /** Current summary text produced by the summarize node, integrated into system message */
185
+ summaryText;
186
+ /** Token count of the current summary (tracked for token accounting) */
187
+ summaryTokenCount = 0;
188
+ /**
189
+ * Where the summary should be injected:
190
+ * - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
191
+ * - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
192
+ * - `'none'`: no summary present
193
+ */
194
+ _summaryLocation = 'none';
195
+ /**
196
+ * Durable summary that survives reset() calls. Set from initialSummary
197
+ * during fromConfig() and updated by setSummary() so that the latest
198
+ * summary (whether cross-run or intra-run) is always restored after
199
+ * processStream's resetValues() cycle.
200
+ */
201
+ _durableSummaryText;
202
+ _durableSummaryTokenCount = 0;
203
+ /** Number of summarization cycles that have occurred for this agent context */
204
+ _summaryVersion = 0;
205
+ /**
206
+ * Message count at the time summarization was last triggered.
207
+ * Used to prevent re-summarizing the same unchanged message set.
208
+ * Summarization is allowed to fire again only when new messages appear.
209
+ */
210
+ _lastSummarizationMsgCount = 0;
134
211
  /**
135
212
  * Handoff context when this agent receives control via handoff.
136
213
  * Contains source and parallel execution info for system message context.
137
214
  */
138
215
  handoffContext;
139
- constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, }) {
216
+ constructor({ agentId, name, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, reasoningKey, toolEnd, instructionTokens, useLegacyContent, discoveredTools, summarizationEnabled, summarizationConfig, contextPruningConfig, maxToolResultChars, }) {
140
217
  this.agentId = agentId;
141
218
  this.name = name;
142
219
  this.provider = provider;
@@ -157,9 +234,13 @@ class AgentContext {
157
234
  this.toolEnd = toolEnd;
158
235
  }
159
236
  if (instructionTokens !== undefined) {
160
- this.instructionTokens = instructionTokens;
237
+ this.systemMessageTokens = instructionTokens;
161
238
  }
162
239
  this.useLegacyContent = useLegacyContent ?? false;
240
+ this.summarizationEnabled = summarizationEnabled;
241
+ this.summarizationConfig = summarizationConfig;
242
+ this.contextPruningConfig = contextPruningConfig;
243
+ this.maxToolResultChars = maxToolResultChars;
163
244
  if (discoveredTools && discoveredTools.length > 0) {
164
245
  for (const toolName of discoveredTools) {
165
246
  this.discoveredToolNames.add(toolName);
@@ -185,7 +266,6 @@ class AgentContext {
185
266
  !allowedCallers.includes('direct');
186
267
  if (!isCodeExecutionOnly)
187
268
  continue;
188
- // Include if: not deferred OR deferred but discovered
189
269
  const isDeferred = toolDef.defer_loading === true;
190
270
  const isDiscovered = this.discoveredToolNames.has(name);
191
271
  if (!isDeferred || isDiscovered) {
@@ -217,11 +297,9 @@ class AgentContext {
217
297
  * Only rebuilds when marked stale (via markToolsAsDiscovered).
218
298
  */
219
299
  get systemRunnable() {
220
- // Return cached if not stale
221
300
  if (!this.systemRunnableStale && this.cachedSystemRunnable !== undefined) {
222
301
  return this.cachedSystemRunnable;
223
302
  }
224
- // Stale or first access - rebuild
225
303
  const instructionsString = this.buildInstructionsString();
226
304
  this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
227
305
  this.systemRunnableStale = false;
@@ -244,25 +322,29 @@ class AgentContext {
244
322
  */
245
323
  buildInstructionsString() {
246
324
  const parts = [];
247
- /** Build agent identity and handoff context preamble */
248
325
  const identityPreamble = this.buildIdentityPreamble();
249
326
  if (identityPreamble) {
250
327
  parts.push(identityPreamble);
251
328
  }
252
- /** Add main instructions */
253
329
  if (this.instructions != null && this.instructions !== '') {
254
330
  parts.push(this.instructions);
255
331
  }
256
- /** Add additional instructions */
257
332
  if (this.additionalInstructions != null &&
258
333
  this.additionalInstructions !== '') {
259
334
  parts.push(this.additionalInstructions);
260
335
  }
261
- /** Add programmatic tools documentation */
262
336
  const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
263
337
  if (programmaticToolsDoc) {
264
338
  parts.push(programmaticToolsDoc);
265
339
  }
340
+ // Cross-run summary: include in system prompt so the model has context
341
+ // from the prior run. Mid-run summaries are injected as a HumanMessage
342
+ // on the post-compaction clean slate instead (see buildSystemRunnable).
343
+ if (this._summaryLocation === 'system_prompt' &&
344
+ this.summaryText != null &&
345
+ this.summaryText !== '') {
346
+ parts.push('## Conversation Summary\n\n' + this.summaryText);
347
+ }
266
348
  return parts.join('\n\n');
267
349
  }
268
350
  /**
@@ -289,17 +371,19 @@ class AgentContext {
289
371
  * Only called when content has actually changed.
290
372
  */
291
373
  buildSystemRunnable(instructionsString) {
292
- if (!instructionsString) {
293
- // Remove previous tokens if we had a system message before
294
- this.instructionTokens -= this.systemMessageTokens;
374
+ const hasMidRunSummary = this._summaryLocation === 'user_message' &&
375
+ this.summaryText != null &&
376
+ this.summaryText !== '';
377
+ if (!instructionsString && !hasMidRunSummary) {
295
378
  this.systemMessageTokens = 0;
296
379
  return undefined;
297
380
  }
298
381
  let finalInstructions = instructionsString;
299
- // Handle Anthropic prompt caching
382
+ let usePromptCache = false;
300
383
  if (this.provider === _enum.Providers.ANTHROPIC) {
301
384
  const anthropicOptions = this.clientOptions;
302
385
  if (anthropicOptions?.promptCache === true) {
386
+ usePromptCache = true;
303
387
  finalInstructions = {
304
388
  content: [
305
389
  {
@@ -311,23 +395,56 @@ class AgentContext {
311
395
  };
312
396
  }
313
397
  }
314
- const systemMessage = new messages.SystemMessage(finalInstructions);
315
- // Update token counts (subtract old, add new)
398
+ const systemMessage = instructionsString
399
+ ? new messages.SystemMessage(finalInstructions)
400
+ : undefined;
316
401
  if (this.tokenCounter) {
317
- this.instructionTokens -= this.systemMessageTokens;
318
- this.systemMessageTokens = this.tokenCounter(systemMessage);
319
- this.instructionTokens += this.systemMessageTokens;
320
- }
321
- return runnables.RunnableLambda.from((messages) => {
322
- return [systemMessage, ...messages];
402
+ this.systemMessageTokens = systemMessage
403
+ ? this.tokenCounter(systemMessage)
404
+ : 0;
405
+ }
406
+ return runnables.RunnableLambda.from((messages$1) => {
407
+ const prefix = systemMessage ? [systemMessage] : [];
408
+ // Build the non-system portion (summary + conversation), then apply
409
+ // cache markers separately so addCacheControl doesn't strip the
410
+ // SystemMessage's own cache_control breakpoint set above.
411
+ const hasSummaryBody = this._summaryLocation === 'user_message' &&
412
+ this.summaryText != null &&
413
+ this.summaryText !== '';
414
+ let body;
415
+ if (hasSummaryBody) {
416
+ const wrappedSummary = '<summary>\n' +
417
+ this.summaryText +
418
+ '\n</summary>\n\n' +
419
+ 'This is your own checkpoint: you wrote it to preserve context after compaction. Pick up where you left off based on the summary above. Do not repeat prior tasks, information or acknowledge this checkpoint message directly.';
420
+ const summaryMsg = usePromptCache
421
+ ? new messages.HumanMessage({
422
+ content: [
423
+ {
424
+ type: 'text',
425
+ text: wrappedSummary,
426
+ cache_control: { type: 'ephemeral' },
427
+ },
428
+ ],
429
+ })
430
+ : new messages.HumanMessage(wrappedSummary);
431
+ body = [summaryMsg, ...messages$1];
432
+ }
433
+ else {
434
+ body = messages$1;
435
+ }
436
+ if (usePromptCache && body.length >= 2) {
437
+ body = cache.addCacheControl(body);
438
+ }
439
+ return [...prefix, ...body];
323
440
  }).withConfig({ runName: 'prompt' });
324
441
  }
325
442
  /**
326
443
  * Reset context for a new run
327
444
  */
328
445
  reset() {
329
- this.instructionTokens = 0;
330
446
  this.systemMessageTokens = 0;
447
+ this.toolSchemaTokens = 0;
331
448
  this.cachedSystemRunnable = undefined;
332
449
  this.systemRunnableStale = true;
333
450
  this.lastToken = undefined;
@@ -340,6 +457,11 @@ class AgentContext {
340
457
  this.currentTokenType = _enum.ContentTypes.TEXT;
341
458
  this.discoveredToolNames.clear();
342
459
  this.handoffContext = undefined;
460
+ this.summaryText = this._durableSummaryText;
461
+ this.summaryTokenCount = this._durableSummaryTokenCount;
462
+ this._lastSummarizationMsgCount = 0;
463
+ this.lastCallUsage = undefined;
464
+ this.totalTokensFresh = false;
343
465
  if (this.tokenCounter) {
344
466
  this.initializeSystemRunnable();
345
467
  const baseTokenMap = { ...this.baseIndexTokenCountMap };
@@ -357,24 +479,21 @@ class AgentContext {
357
479
  }
358
480
  }
359
481
  /**
360
- * Update the token count map with instruction tokens
482
+ * Update the token count map from a base map.
483
+ *
484
+ * Previously this inflated index 0 with instructionTokens to indirectly
485
+ * reserve budget for the system prompt. That approach was imprecise: with
486
+ * large tool-schema overhead (e.g. 26 MCP tools ~5 000 tokens) the first
487
+ * conversation message appeared enormous and was always pruned, while the
488
+ * real available budget was never explicitly computed.
489
+ *
490
+ * Now instruction tokens are passed to getMessagesWithinTokenLimit via
491
+ * the `getInstructionTokens` factory param so the pruner subtracts them
492
+ * from the budget directly. The token map contains only real per-message
493
+ * token counts.
361
494
  */
362
495
  updateTokenMapWithInstructions(baseTokenMap) {
363
- if (this.instructionTokens > 0) {
364
- // Shift all indices by the instruction token count
365
- const shiftedMap = {};
366
- for (const [key, value] of Object.entries(baseTokenMap)) {
367
- const index = parseInt(key, 10);
368
- if (!isNaN(index)) {
369
- shiftedMap[String(index)] =
370
- value + (index === 0 ? this.instructionTokens : 0);
371
- }
372
- }
373
- this.indexTokenCountMap = shiftedMap;
374
- }
375
- else {
376
- this.indexTokenCountMap = { ...baseTokenMap };
377
- }
496
+ this.indexTokenCountMap = { ...baseTokenMap };
378
497
  }
379
498
  /**
380
499
  * Calculate tool tokens and add to instruction tokens
@@ -382,11 +501,7 @@ class AgentContext {
382
501
  */
383
502
  async calculateInstructionTokens(tokenCounter) {
384
503
  let toolTokens = 0;
385
- // Track names to avoid double-counting when a tool appears in both
386
- // this.tools (bound StructuredTool instances) and this.toolDefinitions
387
- // (MCP / event-driven schemas).
388
504
  const countedToolNames = new Set();
389
- // Count tokens for bound tools (StructuredTool instances with .schema)
390
505
  if (this.tools && this.tools.length > 0) {
391
506
  for (const tool of this.tools) {
392
507
  const genericTool = tool;
@@ -401,23 +516,29 @@ class AgentContext {
401
516
  }
402
517
  }
403
518
  }
404
- // Count tokens for tool definitions (MCP / event-driven tools).
405
- // These are sent to the provider API as tool schemas alongside bound tools.
406
- // Both can be populated simultaneously (graph tools + MCP tools).
407
519
  if (this.toolDefinitions && this.toolDefinitions.length > 0) {
408
520
  for (const def of this.toolDefinitions) {
409
521
  if (countedToolNames.has(def.name)) {
410
- continue; // Already counted via this.tools
522
+ continue;
411
523
  }
412
524
  const schema = {
413
- name: def.name,
414
- description: def.description ?? '',
415
- parameters: def.parameters ?? {},
525
+ type: 'function',
526
+ function: {
527
+ name: def.name,
528
+ description: def.description ?? '',
529
+ parameters: def.parameters ?? {},
530
+ },
416
531
  };
417
532
  toolTokens += tokenCounter(new messages.SystemMessage(JSON.stringify(schema)));
418
533
  }
419
534
  }
420
- this.instructionTokens += toolTokens;
535
+ const isAnthropic = this.provider !== _enum.Providers.BEDROCK &&
536
+ (this.provider === _enum.Providers.ANTHROPIC ||
537
+ /anthropic|claude/i.test(String(this.clientOptions?.model ?? '')));
538
+ const toolTokenMultiplier = isAnthropic
539
+ ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
540
+ : DEFAULT_TOOL_TOKEN_MULTIPLIER;
541
+ this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
421
542
  }
422
543
  /**
423
544
  * Gets the tool registry for deferred tools (for tool search).
@@ -457,6 +578,151 @@ class AgentContext {
457
578
  this.systemRunnableStale = true;
458
579
  }
459
580
  }
581
+ setSummary(text, tokenCount) {
582
+ this.summaryText = text;
583
+ this.summaryTokenCount = tokenCount;
584
+ this._summaryLocation = 'user_message';
585
+ this._durableSummaryText = text;
586
+ this._durableSummaryTokenCount = tokenCount;
587
+ this._summaryVersion += 1;
588
+ this.systemRunnableStale = true;
589
+ this.pruneMessages = undefined;
590
+ }
591
+ /** Sets a cross-run summary that is injected into the system prompt. */
592
+ setInitialSummary(text, tokenCount) {
593
+ this.summaryText = text;
594
+ this.summaryTokenCount = tokenCount;
595
+ this._summaryLocation = 'system_prompt';
596
+ this._durableSummaryText = text;
597
+ this._durableSummaryTokenCount = tokenCount;
598
+ this._summaryVersion += 1;
599
+ this.systemRunnableStale = true;
600
+ }
601
+ /**
602
+ * Replaces the indexTokenCountMap with a fresh map keyed to the surviving
603
+ * context messages after summarization. Called by the summarize node after
604
+ * it emits RemoveMessage operations that shift message indices.
605
+ */
606
+ rebuildTokenMapAfterSummarization(newTokenMap) {
607
+ this.indexTokenCountMap = newTokenMap;
608
+ this.baseIndexTokenCountMap = { ...newTokenMap };
609
+ this._lastSummarizationMsgCount = Object.keys(newTokenMap).length;
610
+ this.currentUsage = undefined;
611
+ this.lastCallUsage = undefined;
612
+ this.totalTokensFresh = false;
613
+ }
614
+ hasSummary() {
615
+ return this.summaryText != null && this.summaryText !== '';
616
+ }
617
+ /** True when a mid-run compaction summary is ready to be injected as a HumanMessage. */
618
+ hasPendingCompactionSummary() {
619
+ return this._summaryLocation === 'user_message' && this.hasSummary();
620
+ }
621
+ getSummaryText() {
622
+ return this.summaryText;
623
+ }
624
+ get summaryVersion() {
625
+ return this._summaryVersion;
626
+ }
627
+ /**
628
+ * Returns true when the message count hasn't changed since the last
629
+ * summarization — re-summarizing would produce an identical result.
630
+ * Oversized individual messages are handled by fit-to-budget truncation
631
+ * in the pruner, which keeps them in context without triggering overflow.
632
+ */
633
+ shouldSkipSummarization(currentMsgCount) {
634
+ return (this._lastSummarizationMsgCount > 0 &&
635
+ currentMsgCount <= this._lastSummarizationMsgCount);
636
+ }
637
+ /**
638
+ * Records the message count at which summarization was triggered,
639
+ * so subsequent calls with the same count are suppressed.
640
+ */
641
+ markSummarizationTriggered(msgCount) {
642
+ this._lastSummarizationMsgCount = msgCount;
643
+ }
644
+ clearSummary() {
645
+ if (this.summaryText != null) {
646
+ this.summaryText = undefined;
647
+ this.summaryTokenCount = 0;
648
+ this._durableSummaryText = undefined;
649
+ this._durableSummaryTokenCount = 0;
650
+ this._summaryLocation = 'none';
651
+ this.systemRunnableStale = true;
652
+ }
653
+ }
654
+ /**
655
+ * Returns a structured breakdown of how the context token budget is consumed.
656
+ * Useful for diagnostics when context overflow or pruning issues occur.
657
+ */
658
+ getTokenBudgetBreakdown(messages) {
659
+ const maxContextTokens = this.maxContextTokens ?? 0;
660
+ const toolCount = (this.tools?.length ?? 0) + (this.toolDefinitions?.length ?? 0);
661
+ const messageCount = messages?.length ?? 0;
662
+ let messageTokens = 0;
663
+ if (messages != null) {
664
+ for (let i = 0; i < messages.length; i++) {
665
+ messageTokens +=
666
+ this.indexTokenCountMap[i] ?? 0;
667
+ }
668
+ }
669
+ const reserveTokens = Math.round(maxContextTokens * prune.DEFAULT_RESERVE_RATIO);
670
+ const availableForMessages = Math.max(0, maxContextTokens - reserveTokens - this.instructionTokens);
671
+ return {
672
+ maxContextTokens,
673
+ instructionTokens: this.instructionTokens,
674
+ systemMessageTokens: this.systemMessageTokens,
675
+ toolSchemaTokens: this.toolSchemaTokens,
676
+ summaryTokens: this.summaryTokenCount,
677
+ toolCount,
678
+ messageCount,
679
+ messageTokens,
680
+ availableForMessages,
681
+ };
682
+ }
683
+ /**
684
+ * Returns a human-readable string of the token budget breakdown
685
+ * for inclusion in error messages and diagnostics.
686
+ */
687
+ formatTokenBudgetBreakdown(messages) {
688
+ const b = this.getTokenBudgetBreakdown(messages);
689
+ const lines = [
690
+ 'Token budget breakdown:',
691
+ ` maxContextTokens: ${b.maxContextTokens}`,
692
+ ` instructionTokens: ${b.instructionTokens} (system: ${b.systemMessageTokens}, tools: ${b.toolSchemaTokens} [${b.toolCount} tools])`,
693
+ ` summaryTokens: ${b.summaryTokens}`,
694
+ ` messageTokens: ${b.messageTokens} (${b.messageCount} messages)`,
695
+ ` availableForMessages: ${b.availableForMessages}`,
696
+ ];
697
+ return lines.join('\n');
698
+ }
699
+ /**
700
+ * Updates the last-call usage with data from the most recent LLM response.
701
+ * Unlike `currentUsage` which accumulates, this captures only the single call.
702
+ */
703
+ updateLastCallUsage(usage) {
704
+ const baseInputTokens = Number(usage.input_tokens) || 0;
705
+ const cacheCreation = Number(usage.input_token_details?.cache_creation) || 0;
706
+ const cacheRead = Number(usage.input_token_details?.cache_read) || 0;
707
+ const outputTokens = Number(usage.output_tokens) || 0;
708
+ const cacheSum = cacheCreation + cacheRead;
709
+ const cacheIsAdditive = cacheSum > 0 && cacheSum > baseInputTokens;
710
+ const totalInputTokens = cacheIsAdditive
711
+ ? baseInputTokens + cacheSum
712
+ : baseInputTokens;
713
+ this.lastCallUsage = {
714
+ inputTokens: totalInputTokens,
715
+ outputTokens,
716
+ totalTokens: totalInputTokens + outputTokens,
717
+ cacheRead: cacheRead || undefined,
718
+ cacheCreation: cacheCreation || undefined,
719
+ };
720
+ this.totalTokensFresh = true;
721
+ }
722
+ /** Marks token data as stale before a new LLM call. */
723
+ markTokensStale() {
724
+ this.totalTokensFresh = false;
725
+ }
460
726
  /**
461
727
  * Marks tools as discovered via tool search.
462
728
  * Discovered tools will be included in the next model binding.
@@ -486,11 +752,9 @@ class AgentContext {
486
752
  * @returns Array of tools to bind to model
487
753
  */
488
754
  getToolsForBinding() {
489
- /** Event-driven mode: create schema-only tools from definitions */
490
755
  if (this.toolDefinitions && this.toolDefinitions.length > 0) {
491
756
  return this.getEventDrivenToolsForBinding();
492
757
  }
493
- /** Traditional mode: filter actual tool instances */
494
758
  const filtered = !this.tools || !this.toolRegistry
495
759
  ? this.tools
496
760
  : this.filterToolsForBinding(this.tools);