@librechat/agents 3.1.57 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +1 -1
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -1,31 +1,13 @@
1
1
  /* eslint-disable no-console */
2
- // src/graphs/Graph.ts
3
2
  import { nanoid } from 'nanoid';
4
- import { concat } from '@langchain/core/utils/stream';
5
3
  import { ToolNode } from '@langchain/langgraph/prebuilt';
6
- import { ChatVertexAI } from '@langchain/google-vertexai';
7
- import {
8
- START,
9
- END,
10
- StateGraph,
11
- Annotation,
12
- messagesStateReducer,
13
- } from '@langchain/langgraph';
14
- import {
15
- Runnable,
16
- RunnableConfig,
17
- RunnableLambda,
18
- } from '@langchain/core/runnables';
19
- import {
20
- ToolMessage,
21
- SystemMessage,
22
- AIMessageChunk,
23
- } from '@langchain/core/messages';
4
+ import { Runnable, RunnableConfig } from '@langchain/core/runnables';
5
+ import { ToolMessage, AIMessageChunk } from '@langchain/core/messages';
6
+ import { START, END, StateGraph, Annotation } from '@langchain/langgraph';
24
7
  import type {
25
- BaseMessageFields,
26
- MessageContent,
27
8
  UsageMetadata,
28
9
  BaseMessage,
10
+ MessageContent,
29
11
  } from '@langchain/core/messages';
30
12
  import type { ToolCall } from '@langchain/core/messages/tool';
31
13
  import type * as t from '@/types';
@@ -33,9 +15,9 @@ import {
33
15
  formatAnthropicArtifactContent,
34
16
  ensureThinkingBlockInMessages,
35
17
  convertMessagesToContent,
36
- addBedrockCacheControl,
18
+ sanitizeOrphanToolBlocks,
37
19
  extractToolDiscoveries,
38
- modifyDeltaProperties,
20
+ addBedrockCacheControl,
39
21
  formatArtifactPayload,
40
22
  formatContentStrings,
41
23
  createPruneMessages,
@@ -51,23 +33,31 @@ import {
51
33
  } from '@/common';
52
34
  import {
53
35
  resetIfNotEmpty,
36
+ isAnthropicLike,
54
37
  isOpenAILike,
55
38
  isGoogleLike,
56
39
  joinKeys,
57
40
  sleep,
58
41
  } from '@/utils';
59
- import { getChatModelClass, manualToolStreamProviders } from '@/llm/providers';
60
42
  import { ToolNode as CustomToolNode, toolsCondition } from '@/tools/ToolNode';
61
- import { ChatOpenAI, AzureChatOpenAI } from '@/llm/openai';
62
- import { safeDispatchCustomEvent } from '@/utils/events';
43
+ import { safeDispatchCustomEvent, emitAgentLog } from '@/utils/events';
44
+ import { attemptInvoke, tryFallbackProviders } from '@/llm/invoke';
45
+ import { shouldTriggerSummarization } from '@/summarization';
46
+ import { createSummarizeNode } from '@/summarization/node';
47
+ import { messagesStateReducer } from '@/messages/reducer';
63
48
  import { createSchemaOnlyTools } from '@/tools/schema';
64
49
  import { AgentContext } from '@/agents/AgentContext';
65
50
  import { createFakeStreamingLLM } from '@/llm/fake';
66
51
  import { handleToolCalls } from '@/tools/handlers';
67
- import { ChatModelStreamHandler } from '@/stream';
52
+ import { isThinkingEnabled } from '@/llm/request';
53
+ import { initializeModel } from '@/llm/init';
68
54
  import { HandlerRegistry } from '@/events';
55
+ import { ChatOpenAI } from '@/llm/openai';
56
+
57
+ const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
69
58
 
70
- const { AGENT, TOOLS } = GraphNodeKeys;
59
+ /** Minimum relative variance before calibrated toolSchemaTokens overrides current value. */
60
+ const CALIBRATION_VARIANCE_THRESHOLD = 0.15;
71
61
 
72
62
  export abstract class Graph<
73
63
  T extends t.BaseGraphState = t.BaseGraphState,
@@ -81,15 +71,6 @@ export abstract class Graph<
81
71
  currentTools?: t.GraphTools;
82
72
  currentToolMap?: t.ToolMap;
83
73
  }): CustomToolNode<T> | ToolNode<T>;
84
- abstract initializeModel({
85
- currentModel,
86
- tools,
87
- clientOptions,
88
- }: {
89
- currentModel?: t.ChatModel;
90
- tools?: t.GraphTools;
91
- clientOptions?: t.ClientOptions;
92
- }): Runnable;
93
74
  abstract getRunMessages(): BaseMessage[] | undefined;
94
75
  abstract getContentParts(): t.MessageContentComplex[] | undefined;
95
76
  abstract generateStepId(stepKey: string): [string, number];
@@ -120,7 +101,10 @@ export abstract class Graph<
120
101
  abstract createCallModel(
121
102
  agentId?: string,
122
103
  currentModel?: t.ChatModel
123
- ): (state: T, config?: RunnableConfig) => Promise<Partial<T>>;
104
+ ): (
105
+ state: t.AgentSubgraphState,
106
+ config?: RunnableConfig
107
+ ) => Promise<Partial<t.AgentSubgraphState>>;
124
108
  messageStepHasToolCalls: Map<string, boolean> = new Map();
125
109
  messageIdsByStepKey: Map<string, string> = new Map();
126
110
  prelimMessageIdsByStepKey: Map<string, string> = new Map();
@@ -129,6 +113,12 @@ export abstract class Graph<
129
113
  stepKeyIds: Map<string, string[]> = new Map<string, string[]>();
130
114
  contentIndexMap: Map<string, number> = new Map();
131
115
  toolCallStepIds: Map<string, string> = new Map();
116
+ /**
117
+ * Step IDs that have been dispatched via handler registry directly
118
+ * (in dispatchRunStep). Used by the custom event callback to skip
119
+ * duplicate dispatch through the LangGraph callback chain.
120
+ */
121
+ handlerDispatchedStepIds: Set<string> = new Set();
132
122
  signal?: AbortSignal;
133
123
  /** Set of invoked tool call IDs from non-message run steps completed mid-run, if any */
134
124
  invokedToolIds?: Set<string>;
@@ -166,7 +156,16 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
166
156
  /** Optional compile options passed into workflow.compile() */
167
157
  compileOptions?: t.CompileOptions | undefined;
168
158
  messages: BaseMessage[] = [];
159
+ /** Cached run messages preserved before clearHeavyState() so getRunMessages() works after cleanup. */
160
+ private cachedRunMessages?: BaseMessage[];
169
161
  runId: string | undefined;
162
+ /**
163
+ * Boundary between historical messages (loaded from conversation state)
164
+ * and messages produced during the current run. Set once in the state
165
+ * reducer when messages first arrive. Used by `getRunMessages()` and
166
+ * multi-agent message filtering — NOT for pruner token counting (the
167
+ * pruner maintains its own `lastTurnStartIndex` in its closure).
168
+ */
170
169
  startIndex: number = 0;
171
170
  signal?: AbortSignal;
172
171
  /** Map of agent contexts by agent ID */
@@ -175,12 +174,12 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
175
174
  defaultAgentId: string;
176
175
 
177
176
  constructor({
178
- // parent-level graph inputs
179
177
  runId,
180
178
  signal,
181
179
  agents,
182
180
  tokenCounter,
183
181
  indexTokenCountMap,
182
+ calibrationRatio,
184
183
  }: t.StandardGraphInput) {
185
184
  super();
186
185
  this.runId = runId;
@@ -196,6 +195,9 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
196
195
  tokenCounter,
197
196
  indexTokenCountMap
198
197
  );
198
+ if (calibrationRatio != null && calibrationRatio > 0) {
199
+ agentContext.calibrationRatio = calibrationRatio;
200
+ }
199
201
 
200
202
  this.agentContexts.set(agentConfig.agentId, agentContext);
201
203
  }
@@ -207,6 +209,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
207
209
 
208
210
  resetValues(keepContent?: boolean): void {
209
211
  this.messages = [];
212
+ this.cachedRunMessages = undefined;
210
213
  this.config = resetIfNotEmpty(this.config, undefined);
211
214
  if (keepContent !== true) {
212
215
  this.contentData = resetIfNotEmpty(this.contentData, []);
@@ -220,6 +223,10 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
220
223
  * a stale reference on 2nd+ processStream calls.
221
224
  */
222
225
  this.toolCallStepIds.clear();
226
+ this.handlerDispatchedStepIds = resetIfNotEmpty(
227
+ this.handlerDispatchedStepIds,
228
+ new Set()
229
+ );
223
230
  this.messageIdsByStepKey = resetIfNotEmpty(
224
231
  this.messageIdsByStepKey,
225
232
  new Map()
@@ -239,6 +246,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
239
246
  }
240
247
 
241
248
  override clearHeavyState(): void {
249
+ this.cachedRunMessages = this.messages.slice(this.startIndex);
242
250
  super.clearHeavyState();
243
251
  this.messages = [];
244
252
  this.overrideModel = undefined;
@@ -274,6 +282,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
274
282
  agentId = currentNode.substring(AGENT.length);
275
283
  } else if (currentNode.startsWith(TOOLS)) {
276
284
  agentId = currentNode.substring(TOOLS.length);
285
+ } else if (currentNode.startsWith(SUMMARIZE)) {
286
+ agentId = currentNode.substring(SUMMARIZE.length);
277
287
  }
278
288
 
279
289
  const agentContext = this.agentContexts.get(agentId ?? '');
@@ -362,6 +372,9 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
362
372
  /* Misc.*/
363
373
 
364
374
  getRunMessages(): BaseMessage[] | undefined {
375
+ if (this.messages.length === 0 && this.cachedRunMessages != null) {
376
+ return this.cachedRunMessages;
377
+ }
365
378
  return this.messages.slice(this.startIndex);
366
379
  }
367
380
 
@@ -369,6 +382,23 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
369
382
  return convertMessagesToContent(this.messages.slice(this.startIndex));
370
383
  }
371
384
 
385
+ getCalibrationRatio(): number {
386
+ const context = this.agentContexts.get(this.defaultAgentId);
387
+ return context?.calibrationRatio ?? 1;
388
+ }
389
+
390
+ getResolvedInstructionOverhead(): number | undefined {
391
+ const context = this.agentContexts.get(this.defaultAgentId);
392
+ return context?.resolvedInstructionOverhead;
393
+ }
394
+
395
+ getToolCount(): number {
396
+ const context = this.agentContexts.get(this.defaultAgentId);
397
+ return (
398
+ (context?.tools?.length ?? 0) + (context?.toolDefinitions?.length ?? 0)
399
+ );
400
+ }
401
+
372
402
  /**
373
403
  * Get all run steps, optionally filtered by agent ID
374
404
  */
@@ -431,51 +461,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
431
461
 
432
462
  /* Graph */
433
463
 
434
- createSystemRunnable({
435
- provider,
436
- clientOptions,
437
- instructions,
438
- additional_instructions,
439
- }: {
440
- provider?: Providers;
441
- clientOptions?: t.ClientOptions;
442
- instructions?: string;
443
- additional_instructions?: string;
444
- }): t.SystemRunnable | undefined {
445
- let finalInstructions: string | BaseMessageFields | undefined =
446
- instructions;
447
- if (additional_instructions != null && additional_instructions !== '') {
448
- finalInstructions =
449
- finalInstructions != null && finalInstructions
450
- ? `${finalInstructions}\n\n${additional_instructions}`
451
- : additional_instructions;
452
- }
453
-
454
- if (
455
- finalInstructions != null &&
456
- finalInstructions &&
457
- provider === Providers.ANTHROPIC &&
458
- (clientOptions as t.AnthropicClientOptions).promptCache === true
459
- ) {
460
- finalInstructions = {
461
- content: [
462
- {
463
- type: 'text',
464
- text: instructions,
465
- cache_control: { type: 'ephemeral' },
466
- },
467
- ],
468
- };
469
- }
470
-
471
- if (finalInstructions != null && finalInstructions !== '') {
472
- const systemMessage = new SystemMessage(finalInstructions);
473
- return RunnableLambda.from((messages: BaseMessage[]) => {
474
- return [systemMessage, ...messages];
475
- }).withConfig({ runName: 'prompt' });
476
- }
477
- }
478
-
479
464
  initializeTools({
480
465
  currentTools,
481
466
  currentToolMap,
@@ -522,6 +507,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
522
507
  toolCallStepIds: this.toolCallStepIds,
523
508
  toolRegistry: agentContext?.toolRegistry,
524
509
  directToolNames: directToolNames.size > 0 ? directToolNames : undefined,
510
+ maxContextTokens: agentContext?.maxContextTokens,
511
+ maxToolResultChars: agentContext?.maxToolResultChars,
525
512
  errorHandler: (data, metadata) =>
526
513
  StandardGraph.handleToolCallErrorStatic(this, data, metadata),
527
514
  });
@@ -551,58 +538,11 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
551
538
  StandardGraph.handleToolCallErrorStatic(this, data, metadata),
552
539
  toolRegistry: agentContext?.toolRegistry,
553
540
  sessions: this.sessions,
541
+ maxContextTokens: agentContext?.maxContextTokens,
542
+ maxToolResultChars: agentContext?.maxToolResultChars,
554
543
  });
555
544
  }
556
545
 
557
- initializeModel({
558
- provider,
559
- tools,
560
- clientOptions,
561
- }: {
562
- provider: Providers;
563
- tools?: t.GraphTools;
564
- clientOptions?: t.ClientOptions;
565
- }): Runnable {
566
- const ChatModelClass = getChatModelClass(provider);
567
- const model = new ChatModelClass(clientOptions ?? {});
568
-
569
- if (
570
- isOpenAILike(provider) &&
571
- (model instanceof ChatOpenAI || model instanceof AzureChatOpenAI)
572
- ) {
573
- model.temperature = (clientOptions as t.OpenAIClientOptions)
574
- .temperature as number;
575
- model.topP = (clientOptions as t.OpenAIClientOptions).topP as number;
576
- model.frequencyPenalty = (clientOptions as t.OpenAIClientOptions)
577
- .frequencyPenalty as number;
578
- model.presencePenalty = (clientOptions as t.OpenAIClientOptions)
579
- .presencePenalty as number;
580
- model.n = (clientOptions as t.OpenAIClientOptions).n as number;
581
- } else if (
582
- provider === Providers.VERTEXAI &&
583
- model instanceof ChatVertexAI
584
- ) {
585
- model.temperature = (clientOptions as t.VertexAIClientOptions)
586
- .temperature as number;
587
- model.topP = (clientOptions as t.VertexAIClientOptions).topP as number;
588
- model.topK = (clientOptions as t.VertexAIClientOptions).topK as number;
589
- model.topLogprobs = (clientOptions as t.VertexAIClientOptions)
590
- .topLogprobs as number;
591
- model.frequencyPenalty = (clientOptions as t.VertexAIClientOptions)
592
- .frequencyPenalty as number;
593
- model.presencePenalty = (clientOptions as t.VertexAIClientOptions)
594
- .presencePenalty as number;
595
- model.maxOutputTokens = (clientOptions as t.VertexAIClientOptions)
596
- .maxOutputTokens as number;
597
- }
598
-
599
- if (!tools || tools.length === 0) {
600
- return model as unknown as Runnable;
601
- }
602
-
603
- return (model as t.ModelWithTools).bindTools(tools);
604
- }
605
-
606
546
  overrideTestModel(
607
547
  responses: string[],
608
548
  sleep?: number,
@@ -615,17 +555,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
615
555
  });
616
556
  }
617
557
 
618
- getNewModel({
619
- provider,
620
- clientOptions,
621
- }: {
622
- provider: Providers;
623
- clientOptions?: t.ClientOptions;
624
- }): t.ChatModelInstance {
625
- const ChatModelClass = getChatModelClass(provider);
626
- return new ChatModelClass(clientOptions ?? {});
627
- }
628
-
629
558
  getUsageMetadata(
630
559
  finalMessage?: BaseMessage
631
560
  ): Partial<UsageMetadata> | undefined {
@@ -638,84 +567,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
638
567
  }
639
568
  }
640
569
 
641
- /** Execute model invocation with streaming support */
642
- private async attemptInvoke(
643
- {
644
- currentModel,
645
- finalMessages,
646
- provider,
647
- tools: _tools,
648
- }: {
649
- currentModel?: t.ChatModel;
650
- finalMessages: BaseMessage[];
651
- provider: Providers;
652
- tools?: t.GraphTools;
653
- },
654
- config?: RunnableConfig
655
- ): Promise<Partial<t.BaseGraphState>> {
656
- const model = this.overrideModel ?? currentModel;
657
- if (!model) {
658
- throw new Error('No model found');
659
- }
660
-
661
- if (model.stream) {
662
- /**
663
- * Process all model output through a local ChatModelStreamHandler in the
664
- * graph execution context. Each chunk is awaited before the next one is
665
- * consumed, so by the time the stream is exhausted every run step
666
- * (MESSAGE_CREATION, TOOL_CALLS) has been created and toolCallStepIds is
667
- * fully populated — the graph will not transition to ToolNode until this
668
- * is done.
669
- *
670
- * This replaces the previous pattern where ChatModelStreamHandler lived
671
- * in the for-await stream consumer (handler registry). That consumer
672
- * runs concurrently with graph execution, so the graph could advance to
673
- * ToolNode before the consumer had processed all events. By handling
674
- * chunks here, inside the agent node, the race is eliminated.
675
- *
676
- * The for-await consumer no longer needs a ChatModelStreamHandler; its
677
- * on_chat_model_stream events are simply ignored (no handler registered).
678
- * The dispatched custom events (ON_RUN_STEP, ON_MESSAGE_DELTA, etc.)
679
- * still reach the content aggregator and SSE handlers through the custom
680
- * event callback in Run.createCustomEventCallback.
681
- */
682
- const metadata = config?.metadata as Record<string, unknown> | undefined;
683
- const streamHandler = new ChatModelStreamHandler();
684
- const stream = await model.stream(finalMessages, config);
685
- let finalChunk: AIMessageChunk | undefined;
686
- for await (const chunk of stream) {
687
- await streamHandler.handle(
688
- GraphEvents.CHAT_MODEL_STREAM,
689
- { chunk },
690
- metadata,
691
- this
692
- );
693
- finalChunk = finalChunk ? concat(finalChunk, chunk) : chunk;
694
- }
695
-
696
- if (manualToolStreamProviders.has(provider)) {
697
- finalChunk = modifyDeltaProperties(provider, finalChunk);
698
- }
699
-
700
- if ((finalChunk?.tool_calls?.length ?? 0) > 0) {
701
- finalChunk!.tool_calls = finalChunk!.tool_calls?.filter(
702
- (tool_call: ToolCall) => !!tool_call.name
703
- );
704
- }
705
-
706
- return { messages: [finalChunk as AIMessageChunk] };
707
- } else {
708
- /** Fallback for models without stream support. */
709
- const finalMessage = await model.invoke(finalMessages, config);
710
- if ((finalMessage.tool_calls?.length ?? 0) > 0) {
711
- finalMessage.tool_calls = finalMessage.tool_calls?.filter(
712
- (tool_call: ToolCall) => !!tool_call.name
713
- );
714
- }
715
- return { messages: [finalMessage] };
716
- }
717
- }
718
-
719
570
  cleanupSignalListener(currentModel?: t.ChatModel): void {
720
571
  if (!this.signal) {
721
572
  return;
@@ -734,12 +585,9 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
734
585
 
735
586
  createCallModel(agentId = 'default') {
736
587
  return async (
737
- state: t.BaseGraphState,
588
+ state: t.AgentSubgraphState,
738
589
  config?: RunnableConfig
739
- ): Promise<Partial<t.BaseGraphState>> => {
740
- /**
741
- * Get agent context - it must exist by this point
742
- */
590
+ ): Promise<Partial<t.AgentSubgraphState>> => {
743
591
  const agentContext = this.agentContexts.get(agentId);
744
592
  if (!agentContext) {
745
593
  throw new Error(`Agent context not found for agentId: ${agentId}`);
@@ -751,7 +599,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
751
599
 
752
600
  const { messages } = state;
753
601
 
754
- // Extract tool discoveries from current turn only (similar to formatArtifactPayload pattern)
755
602
  const discoveredNames = extractToolDiscoveries(messages);
756
603
  if (discoveredNames.length > 0) {
757
604
  agentContext.markToolsAsDiscovered(discoveredNames);
@@ -760,7 +607,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
760
607
  const toolsForBinding = agentContext.getToolsForBinding();
761
608
  let model =
762
609
  this.overrideModel ??
763
- this.initializeModel({
610
+ initializeModel({
764
611
  tools: toolsForBinding,
765
612
  provider: agentContext.provider,
766
613
  clientOptions: agentContext.clientOptions,
@@ -782,39 +629,147 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
782
629
  if (
783
630
  !agentContext.pruneMessages &&
784
631
  agentContext.tokenCounter &&
785
- agentContext.maxContextTokens != null &&
786
- agentContext.indexTokenCountMap[0] != null
632
+ agentContext.maxContextTokens != null
787
633
  ) {
788
- const isAnthropicWithThinking =
789
- (agentContext.provider === Providers.ANTHROPIC &&
790
- (agentContext.clientOptions as t.AnthropicClientOptions).thinking !=
791
- null) ||
792
- (agentContext.provider === Providers.BEDROCK &&
793
- (agentContext.clientOptions as t.BedrockAnthropicInput)
794
- .additionalModelRequestFields?.['thinking'] != null) ||
795
- (agentContext.provider === Providers.OPENAI &&
796
- (
797
- (agentContext.clientOptions as t.OpenAIClientOptions).modelKwargs
798
- ?.thinking as t.AnthropicClientOptions['thinking']
799
- )?.type === 'enabled');
800
-
801
634
  agentContext.pruneMessages = createPruneMessages({
802
- startIndex: this.startIndex,
635
+ startIndex:
636
+ agentContext.indexTokenCountMap[0] != null ? this.startIndex : 0,
803
637
  provider: agentContext.provider,
804
638
  tokenCounter: agentContext.tokenCounter,
805
639
  maxTokens: agentContext.maxContextTokens,
806
- thinkingEnabled: isAnthropicWithThinking,
640
+ thinkingEnabled: isThinkingEnabled(
641
+ agentContext.provider,
642
+ agentContext.clientOptions
643
+ ),
807
644
  indexTokenCountMap: agentContext.indexTokenCountMap,
645
+ contextPruningConfig: agentContext.contextPruningConfig,
646
+ summarizationEnabled: agentContext.summarizationEnabled,
647
+ reserveRatio: agentContext.summarizationConfig?.reserveRatio,
648
+ calibrationRatio: agentContext.calibrationRatio,
649
+ getInstructionTokens: () => agentContext.instructionTokens,
650
+ log: (level, message, data) => {
651
+ emitAgentLog(config, level, 'prune', message, data, {
652
+ runId: this.runId,
653
+ agentId,
654
+ });
655
+ },
808
656
  });
809
657
  }
810
658
  if (agentContext.pruneMessages) {
811
- const { context, indexTokenCountMap } = agentContext.pruneMessages({
659
+ const {
660
+ context,
661
+ indexTokenCountMap,
662
+ messagesToRefine,
663
+ prePruneContextTokens,
664
+ remainingContextTokens,
665
+ originalToolContent,
666
+ calibrationRatio,
667
+ resolvedInstructionOverhead,
668
+ } = agentContext.pruneMessages({
812
669
  messages,
813
670
  usageMetadata: agentContext.currentUsage,
814
- // startOnMessageType: 'human',
671
+ lastCallUsage: agentContext.lastCallUsage,
672
+ totalTokensFresh: agentContext.totalTokensFresh,
815
673
  });
816
674
  agentContext.indexTokenCountMap = indexTokenCountMap;
675
+ if (calibrationRatio != null && calibrationRatio > 0) {
676
+ agentContext.calibrationRatio = calibrationRatio;
677
+ }
678
+ if (resolvedInstructionOverhead != null) {
679
+ agentContext.resolvedInstructionOverhead =
680
+ resolvedInstructionOverhead;
681
+ const nonToolOverhead =
682
+ agentContext.instructionTokens - agentContext.toolSchemaTokens;
683
+ const calibratedToolTokens = Math.max(
684
+ 0,
685
+ resolvedInstructionOverhead - nonToolOverhead
686
+ );
687
+ const currentToolTokens = agentContext.toolSchemaTokens;
688
+ const variance =
689
+ currentToolTokens > 0
690
+ ? Math.abs(calibratedToolTokens - currentToolTokens) /
691
+ currentToolTokens
692
+ : 1;
693
+ if (variance > CALIBRATION_VARIANCE_THRESHOLD) {
694
+ agentContext.toolSchemaTokens = calibratedToolTokens;
695
+ }
696
+ }
817
697
  messagesToUse = context;
698
+
699
+ const hasPrunedMessages =
700
+ agentContext.summarizationEnabled === true &&
701
+ Array.isArray(messagesToRefine) &&
702
+ messagesToRefine.length > 0;
703
+
704
+ if (hasPrunedMessages) {
705
+ const shouldSkip = agentContext.shouldSkipSummarization(
706
+ messages.length
707
+ );
708
+ const triggerResult =
709
+ !shouldSkip &&
710
+ shouldTriggerSummarization({
711
+ trigger: agentContext.summarizationConfig?.trigger,
712
+ maxContextTokens: agentContext.maxContextTokens,
713
+ prePruneContextTokens:
714
+ prePruneContextTokens != null
715
+ ? prePruneContextTokens + agentContext.instructionTokens
716
+ : undefined,
717
+ remainingContextTokens,
718
+ messagesToRefineCount: messagesToRefine.length,
719
+ });
720
+
721
+ if (triggerResult) {
722
+ if (originalToolContent != null && originalToolContent.size > 0) {
723
+ agentContext.pendingOriginalToolContent = originalToolContent;
724
+ }
725
+
726
+ emitAgentLog(
727
+ config,
728
+ 'info',
729
+ 'graph',
730
+ 'Summarization triggered',
731
+ undefined,
732
+ { runId: this.runId, agentId }
733
+ );
734
+ emitAgentLog(
735
+ config,
736
+ 'debug',
737
+ 'graph',
738
+ 'Summarization trigger details',
739
+ {
740
+ totalMessages: messages.length,
741
+ remainingContextTokens: remainingContextTokens ?? 0,
742
+ summaryVersion: agentContext.summaryVersion + 1,
743
+ toolSchemaTokens: agentContext.toolSchemaTokens,
744
+ instructionTokens: agentContext.instructionTokens,
745
+ systemMessageTokens: agentContext.systemMessageTokens,
746
+ },
747
+ { runId: this.runId, agentId }
748
+ );
749
+ agentContext.markSummarizationTriggered(messages.length);
750
+ return {
751
+ summarizationRequest: {
752
+ remainingContextTokens: remainingContextTokens ?? 0,
753
+ agentId: agentId || agentContext.agentId,
754
+ },
755
+ };
756
+ }
757
+
758
+ if (shouldSkip) {
759
+ emitAgentLog(
760
+ config,
761
+ 'debug',
762
+ 'graph',
763
+ 'Summarization skipped — no new messages or per-run cap reached',
764
+ {
765
+ messageCount: messages.length,
766
+ messagesToRefineCount: messagesToRefine.length,
767
+ contextLength: context.length,
768
+ },
769
+ { runId: this.runId, agentId }
770
+ );
771
+ }
772
+ }
818
773
  }
819
774
 
820
775
  let finalMessages = messagesToUse;
@@ -831,36 +786,42 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
831
786
  ? finalMessages[finalMessages.length - 1]
832
787
  : null;
833
788
 
789
+ const anthropicLike = isAnthropicLike(
790
+ agentContext.provider,
791
+ agentContext.clientOptions as { model?: string }
792
+ );
793
+
834
794
  if (
835
795
  agentContext.provider === Providers.BEDROCK &&
836
796
  lastMessageX instanceof AIMessageChunk &&
837
797
  lastMessageY instanceof ToolMessage &&
838
798
  typeof lastMessageX.content === 'string'
839
799
  ) {
840
- finalMessages[finalMessages.length - 2].content = '';
800
+ const trimmed = lastMessageX.content.trim();
801
+ finalMessages[finalMessages.length - 2].content =
802
+ trimmed.length > 0 ? [{ type: 'text' as const, text: trimmed }] : '';
841
803
  }
842
804
 
843
- const isLatestToolMessage = lastMessageY instanceof ToolMessage;
844
-
845
- if (
846
- isLatestToolMessage &&
847
- agentContext.provider === Providers.ANTHROPIC
848
- ) {
849
- formatAnthropicArtifactContent(finalMessages);
850
- } else if (
851
- isLatestToolMessage &&
852
- ((isOpenAILike(agentContext.provider) &&
853
- agentContext.provider !== Providers.DEEPSEEK) ||
854
- isGoogleLike(agentContext.provider))
855
- ) {
856
- formatArtifactPayload(finalMessages);
805
+ if (lastMessageY instanceof ToolMessage) {
806
+ if (anthropicLike) {
807
+ formatAnthropicArtifactContent(finalMessages);
808
+ } else if (
809
+ (isOpenAILike(agentContext.provider) &&
810
+ agentContext.provider !== Providers.DEEPSEEK) ||
811
+ isGoogleLike(agentContext.provider)
812
+ ) {
813
+ formatArtifactPayload(finalMessages);
814
+ }
857
815
  }
858
816
 
859
817
  if (agentContext.provider === Providers.ANTHROPIC) {
860
818
  const anthropicOptions = agentContext.clientOptions as
861
819
  | t.AnthropicClientOptions
862
820
  | undefined;
863
- if (anthropicOptions?.promptCache === true) {
821
+ if (
822
+ anthropicOptions?.promptCache === true &&
823
+ !agentContext.systemRunnable
824
+ ) {
864
825
  finalMessages = addCacheControl<BaseMessage>(finalMessages);
865
826
  }
866
827
  } else if (agentContext.provider === Providers.BEDROCK) {
@@ -872,26 +833,42 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
872
833
  }
873
834
  }
874
835
 
875
- /**
876
- * Handle edge case: when switching from a non-thinking agent to a thinking-enabled agent,
877
- * convert AI messages with tool calls to HumanMessages to avoid thinking block requirements.
878
- * This is required by Anthropic/Bedrock when thinking is enabled.
879
- */
880
- const isAnthropicWithThinking =
881
- (agentContext.provider === Providers.ANTHROPIC &&
882
- (agentContext.clientOptions as t.AnthropicClientOptions).thinking !=
883
- null) ||
884
- (agentContext.provider === Providers.BEDROCK &&
885
- (agentContext.clientOptions as t.BedrockAnthropicInput)
886
- .additionalModelRequestFields?.['thinking'] != null);
887
-
888
- if (isAnthropicWithThinking) {
836
+ if (
837
+ isThinkingEnabled(agentContext.provider, agentContext.clientOptions)
838
+ ) {
889
839
  finalMessages = ensureThinkingBlockInMessages(
890
840
  finalMessages,
891
- agentContext.provider
841
+ agentContext.provider,
842
+ config
892
843
  );
893
844
  }
894
845
 
846
+ // Intentionally broad: runs when the pruner wasn't used OR any post-pruning
847
+ // transform (addCacheControl, ensureThinkingBlock, etc.) reassigned finalMessages.
848
+ // sanitizeOrphanToolBlocks fast-paths to a Set diff check when no orphans exist,
849
+ // so the cost is negligible and this acts as a safety net for Anthropic/Bedrock.
850
+ const needsOrphanSanitize =
851
+ anthropicLike &&
852
+ (!agentContext.pruneMessages || finalMessages !== messagesToUse);
853
+ if (needsOrphanSanitize) {
854
+ const beforeSanitize = finalMessages.length;
855
+ finalMessages = sanitizeOrphanToolBlocks(finalMessages);
856
+ if (finalMessages.length !== beforeSanitize) {
857
+ emitAgentLog(
858
+ config,
859
+ 'warn',
860
+ 'sanitize',
861
+ 'Orphan tool blocks removed',
862
+ {
863
+ before: beforeSanitize,
864
+ after: finalMessages.length,
865
+ dropped: beforeSanitize - finalMessages.length,
866
+ },
867
+ { runId: this.runId, agentId }
868
+ );
869
+ }
870
+ }
871
+
895
872
  if (
896
873
  agentContext.lastStreamCall != null &&
897
874
  agentContext.streamBuffer != null
@@ -906,64 +883,99 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
906
883
  }
907
884
 
908
885
  agentContext.lastStreamCall = Date.now();
886
+ agentContext.markTokensStale();
909
887
 
910
888
  let result: Partial<t.BaseGraphState> | undefined;
911
889
  const fallbacks =
912
890
  (agentContext.clientOptions as t.LLMConfig | undefined)?.fallbacks ??
913
891
  [];
914
892
 
915
- if (finalMessages.length === 0) {
893
+ if (
894
+ finalMessages.length === 0 &&
895
+ !agentContext.hasPendingCompactionSummary()
896
+ ) {
897
+ const budgetBreakdown = agentContext.getTokenBudgetBreakdown(messages);
898
+ const breakdown = agentContext.formatTokenBudgetBreakdown(messages);
899
+ const instructionsExceedBudget =
900
+ budgetBreakdown.instructionTokens > budgetBreakdown.maxContextTokens;
901
+
902
+ let guidance: string;
903
+ if (instructionsExceedBudget) {
904
+ const toolPct =
905
+ budgetBreakdown.toolSchemaTokens > 0
906
+ ? Math.round(
907
+ (budgetBreakdown.toolSchemaTokens /
908
+ budgetBreakdown.instructionTokens) *
909
+ 100
910
+ )
911
+ : 0;
912
+ guidance =
913
+ toolPct > 50
914
+ ? `Tool definitions consume ${budgetBreakdown.toolSchemaTokens} tokens (${toolPct}% of instructions) across ${budgetBreakdown.toolCount} tools, exceeding maxContextTokens (${budgetBreakdown.maxContextTokens}). Reduce the number of tools or increase maxContextTokens.`
915
+ : `Instructions (${budgetBreakdown.instructionTokens} tokens) exceed maxContextTokens (${budgetBreakdown.maxContextTokens}). Increase maxContextTokens or shorten the system prompt.`;
916
+ if (agentContext.summarizationEnabled === true) {
917
+ guidance +=
918
+ ' Summarization was skipped because the summary would further increase the instruction overhead.';
919
+ }
920
+ } else {
921
+ guidance =
922
+ 'Please increase the context window size or make your message shorter.';
923
+ }
924
+
925
+ emitAgentLog(
926
+ config,
927
+ 'error',
928
+ 'graph',
929
+ 'Empty messages after pruning',
930
+ {
931
+ messageCount: messages.length,
932
+ instructionsExceedBudget,
933
+ breakdown,
934
+ },
935
+ { runId: this.runId, agentId }
936
+ );
916
937
  throw new Error(
917
938
  JSON.stringify({
918
939
  type: 'empty_messages',
919
- info: 'Message pruning removed all messages as none fit in the context window. Please increase the context window size or make your message shorter.',
940
+ info: `Message pruning removed all messages as none fit in the context window. ${guidance}\n${breakdown}`,
920
941
  })
921
942
  );
922
943
  }
923
944
 
945
+ const invokeStart = Date.now();
946
+ const invokeMeta = { runId: this.runId, agentId };
947
+ emitAgentLog(
948
+ config,
949
+ 'debug',
950
+ 'graph',
951
+ 'Invoking LLM',
952
+ {
953
+ messageCount: finalMessages.length,
954
+ provider: agentContext.provider,
955
+ },
956
+ invokeMeta,
957
+ { force: true }
958
+ );
959
+
924
960
  try {
925
- result = await this.attemptInvoke(
961
+ result = await attemptInvoke(
926
962
  {
927
- currentModel: model,
928
- finalMessages,
963
+ model: (this.overrideModel ?? model) as t.ChatModel,
964
+ messages: finalMessages,
929
965
  provider: agentContext.provider,
930
- tools: agentContext.tools,
966
+ context: this,
931
967
  },
932
968
  config
933
969
  );
934
970
  } catch (primaryError) {
935
- let lastError: unknown = primaryError;
936
- for (const fb of fallbacks) {
937
- try {
938
- let model = this.getNewModel({
939
- provider: fb.provider,
940
- clientOptions: fb.clientOptions,
941
- });
942
- const bindableTools = agentContext.tools;
943
- model = (
944
- !bindableTools || bindableTools.length === 0
945
- ? model
946
- : model.bindTools(bindableTools)
947
- ) as t.ChatModelInstance;
948
- result = await this.attemptInvoke(
949
- {
950
- currentModel: model,
951
- finalMessages,
952
- provider: fb.provider,
953
- tools: agentContext.tools,
954
- },
955
- config
956
- );
957
- lastError = undefined;
958
- break;
959
- } catch (e) {
960
- lastError = e;
961
- continue;
962
- }
963
- }
964
- if (lastError !== undefined) {
965
- throw lastError;
966
- }
971
+ result = await tryFallbackProviders({
972
+ fallbacks,
973
+ tools: agentContext.tools,
974
+ messages: finalMessages,
975
+ config,
976
+ primaryError,
977
+ context: this,
978
+ });
967
979
  }
968
980
 
969
981
  if (!result) {
@@ -1091,13 +1103,46 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1091
1103
  }
1092
1104
  }
1093
1105
 
1106
+ const invokeElapsed = ((Date.now() - invokeStart) / 1000).toFixed(2);
1094
1107
  agentContext.currentUsage = this.getUsageMetadata(result.messages?.[0]);
1108
+ if (agentContext.currentUsage) {
1109
+ agentContext.updateLastCallUsage(agentContext.currentUsage);
1110
+ emitAgentLog(
1111
+ config,
1112
+ 'debug',
1113
+ 'graph',
1114
+ `LLM call complete (${invokeElapsed}s)`,
1115
+ {
1116
+ ...agentContext.currentUsage,
1117
+ elapsedSeconds: Number(invokeElapsed),
1118
+ instructionTokens: agentContext.instructionTokens,
1119
+ toolSchemaTokens: agentContext.toolSchemaTokens,
1120
+ messageCount: finalMessages.length,
1121
+ },
1122
+ invokeMeta,
1123
+ { force: true }
1124
+ );
1125
+ } else {
1126
+ emitAgentLog(
1127
+ config,
1128
+ 'debug',
1129
+ 'graph',
1130
+ `LLM call complete (${invokeElapsed}s)`,
1131
+ {
1132
+ elapsedSeconds: Number(invokeElapsed),
1133
+ messageCount: finalMessages.length,
1134
+ },
1135
+ invokeMeta,
1136
+ { force: true }
1137
+ );
1138
+ }
1095
1139
  this.cleanupSignalListener();
1096
1140
  return result;
1097
1141
  };
1098
1142
  }
1099
1143
 
1100
1144
  createAgentNode(agentId: string): t.CompiledAgentWorfklow {
1145
+ const getConfig = (): RunnableConfig | undefined => this.config;
1101
1146
  const agentContext = this.agentContexts.get(agentId);
1102
1147
  if (!agentContext) {
1103
1148
  throw new Error(`Agent context not found for agentId: ${agentId}`);
@@ -1105,13 +1150,21 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1105
1150
 
1106
1151
  const agentNode = `${AGENT}${agentId}` as const;
1107
1152
  const toolNode = `${TOOLS}${agentId}` as const;
1153
+ const summarizeNode = `${SUMMARIZE}${agentId}` as const;
1108
1154
 
1109
1155
  const routeMessage = (
1110
- state: t.BaseGraphState,
1156
+ state: t.AgentSubgraphState,
1111
1157
  config?: RunnableConfig
1112
1158
  ): string => {
1113
1159
  this.config = config;
1114
- return toolsCondition(state, toolNode, this.invokedToolIds);
1160
+ if (state.summarizationRequest != null) {
1161
+ return summarizeNode;
1162
+ }
1163
+ return toolsCondition(
1164
+ state as t.BaseGraphState,
1165
+ toolNode,
1166
+ this.invokedToolIds
1167
+ );
1115
1168
  };
1116
1169
 
1117
1170
  const StateAnnotation = Annotation.Root({
@@ -1119,6 +1172,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1119
1172
  reducer: messagesStateReducer,
1120
1173
  default: () => [],
1121
1174
  }),
1175
+ summarizationRequest: Annotation<t.SummarizationNodeInput | undefined>({
1176
+ reducer: (
1177
+ _: t.SummarizationNodeInput | undefined,
1178
+ b: t.SummarizationNodeInput | undefined
1179
+ ) => b,
1180
+ default: () => undefined,
1181
+ }),
1122
1182
  });
1123
1183
 
1124
1184
  const workflow = new StateGraph(StateAnnotation)
@@ -1131,16 +1191,82 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1131
1191
  agentContext,
1132
1192
  })
1133
1193
  )
1194
+ .addNode(
1195
+ summarizeNode,
1196
+ createSummarizeNode({
1197
+ agentContext,
1198
+ graph: {
1199
+ contentData: this.contentData,
1200
+ contentIndexMap: this.contentIndexMap,
1201
+ get config() {
1202
+ return getConfig();
1203
+ },
1204
+ runId: this.runId,
1205
+ isMultiAgent: this.isMultiAgentGraph(),
1206
+ dispatchRunStep: async (runStep, nodeConfig) => {
1207
+ this.contentData.push(runStep);
1208
+ this.contentIndexMap.set(runStep.id, runStep.index);
1209
+
1210
+ const resolvedConfig = nodeConfig ?? this.config;
1211
+ const handler = this.handlerRegistry?.getHandler(
1212
+ GraphEvents.ON_RUN_STEP
1213
+ );
1214
+ if (handler) {
1215
+ await handler.handle(
1216
+ GraphEvents.ON_RUN_STEP,
1217
+ runStep,
1218
+ resolvedConfig?.configurable,
1219
+ this
1220
+ );
1221
+ this.handlerDispatchedStepIds.add(runStep.id);
1222
+ }
1223
+
1224
+ if (resolvedConfig) {
1225
+ await safeDispatchCustomEvent(
1226
+ GraphEvents.ON_RUN_STEP,
1227
+ runStep,
1228
+ resolvedConfig
1229
+ );
1230
+ }
1231
+ },
1232
+ dispatchRunStepCompleted: async (
1233
+ stepId: string,
1234
+ result: t.StepCompleted,
1235
+ nodeConfig?: RunnableConfig
1236
+ ) => {
1237
+ const resolvedConfig = nodeConfig ?? this.config;
1238
+ const runStep = this.contentData.find((s) => s.id === stepId);
1239
+ const handler = this.handlerRegistry?.getHandler(
1240
+ GraphEvents.ON_RUN_STEP_COMPLETED
1241
+ );
1242
+ if (handler) {
1243
+ await handler.handle(
1244
+ GraphEvents.ON_RUN_STEP_COMPLETED,
1245
+ {
1246
+ result: {
1247
+ ...result,
1248
+ id: stepId,
1249
+ index: runStep?.index ?? 0,
1250
+ },
1251
+ },
1252
+ resolvedConfig?.configurable,
1253
+ this
1254
+ );
1255
+ }
1256
+ },
1257
+ },
1258
+ generateStepId: (stepKey: string) => this.generateStepId(stepKey),
1259
+ })
1260
+ )
1134
1261
  .addEdge(START, agentNode)
1135
1262
  .addConditionalEdges(agentNode, routeMessage)
1263
+ .addEdge(summarizeNode, agentNode)
1136
1264
  .addEdge(toolNode, agentContext.toolEnd ? END : agentNode);
1137
1265
 
1138
- // Cast to unknown to avoid tight coupling to external types; options are opt-in
1139
- return workflow.compile(this.compileOptions as unknown as never);
1266
+ return workflow.compile();
1140
1267
  }
1141
1268
 
1142
1269
  createWorkflow(): t.CompiledStateWorkflow {
1143
- /** Use the default (first) agent for now */
1144
1270
  const agentNode = this.createAgentNode(this.defaultAgentId);
1145
1271
  const StateAnnotation = Annotation.Root({
1146
1272
  messages: Annotation<BaseMessage[]>({
@@ -1158,7 +1284,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1158
1284
  const workflow = new StateGraph(StateAnnotation)
1159
1285
  .addNode(this.defaultAgentId, agentNode, { ends: [END] })
1160
1286
  .addEdge(START, this.defaultAgentId)
1161
- .compile();
1287
+ // LangGraph compile() types are overly strict for opt-in options
1288
+ .compile(this.compileOptions as unknown as never);
1162
1289
 
1163
1290
  return workflow;
1164
1291
  }
@@ -1222,18 +1349,11 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1222
1349
  runStep.runId = runId;
1223
1350
  }
1224
1351
 
1225
- /**
1226
- * Extract agentId and parallelGroupId from metadata
1227
- * Only set agentId for MultiAgentGraph (so frontend knows when to show agent labels)
1228
- */
1229
1352
  if (metadata) {
1230
1353
  try {
1231
1354
  const agentContext = this.getAgentContext(metadata);
1232
1355
  if (this.isMultiAgentGraph() && agentContext.agentId) {
1233
- // Only include agentId for MultiAgentGraph - enables frontend to show agent labels
1234
1356
  runStep.agentId = agentContext.agentId;
1235
- // Set group ID if this agent is part of a parallel group
1236
- // Group IDs are incrementing numbers (1, 2, 3...) reflecting execution order
1237
1357
  const groupId = this.getParallelGroupIdForAgent(agentContext.agentId);
1238
1358
  if (groupId != null) {
1239
1359
  runStep.groupId = groupId;
@@ -1246,6 +1366,23 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1246
1366
 
1247
1367
  this.contentData.push(runStep);
1248
1368
  this.contentIndexMap.set(stepId, runStep.index);
1369
+
1370
+ // Primary dispatch: handler registry (reliable, always works).
1371
+ // This mirrors how handleToolCallCompleted dispatches ON_RUN_STEP_COMPLETED
1372
+ // via the handler registry, ensuring the event always reaches the handler
1373
+ // even when LangGraph's callback system drops the custom event.
1374
+ const handler = this.handlerRegistry?.getHandler(GraphEvents.ON_RUN_STEP);
1375
+ if (handler) {
1376
+ await handler.handle(GraphEvents.ON_RUN_STEP, runStep, metadata, this);
1377
+ this.handlerDispatchedStepIds.add(stepId);
1378
+ }
1379
+
1380
+ // Secondary dispatch: custom event for LangGraph callback chain
1381
+ // (tracing, Langfuse, external consumers). May be silently dropped
1382
+ // in some scenarios (stale run ID, subgraph callback propagation issues),
1383
+ // but the primary dispatch above guarantees the event reaches the handler.
1384
+ // The customEventCallback in run.ts skips events already dispatched above
1385
+ // to prevent double handling.
1249
1386
  await safeDispatchCustomEvent(
1250
1387
  GraphEvents.ON_RUN_STEP,
1251
1388
  runStep,