@librechat/agents 3.2.33 → 3.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +47 -10
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +121 -3
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/llm/bedrock/index.cjs +21 -2
  8. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +38 -2
  10. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  11. package/dist/cjs/llm/google/utils/common.cjs +6 -0
  12. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  13. package/dist/cjs/llm/invoke.cjs +49 -8
  14. package/dist/cjs/llm/invoke.cjs.map +1 -1
  15. package/dist/cjs/llm/openai/index.cjs +48 -1
  16. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  17. package/dist/cjs/llm/vertexai/index.cjs +19 -0
  18. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  19. package/dist/cjs/main.cjs +2 -0
  20. package/dist/cjs/messages/content.cjs +12 -14
  21. package/dist/cjs/messages/content.cjs.map +1 -1
  22. package/dist/cjs/messages/prune.cjs +31 -13
  23. package/dist/cjs/messages/prune.cjs.map +1 -1
  24. package/dist/cjs/run.cjs +7 -2
  25. package/dist/cjs/run.cjs.map +1 -1
  26. package/dist/cjs/stream.cjs +20 -2
  27. package/dist/cjs/stream.cjs.map +1 -1
  28. package/dist/cjs/summarization/node.cjs +12 -1
  29. package/dist/cjs/summarization/node.cjs.map +1 -1
  30. package/dist/cjs/tools/ToolNode.cjs +41 -4
  31. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  32. package/dist/cjs/tools/streamedToolCallSeals.cjs +30 -1
  33. package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -1
  34. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
  35. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  36. package/dist/cjs/utils/tokens.cjs +30 -0
  37. package/dist/cjs/utils/tokens.cjs.map +1 -1
  38. package/dist/esm/agents/AgentContext.mjs +47 -10
  39. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  40. package/dist/esm/common/enum.mjs +13 -0
  41. package/dist/esm/common/enum.mjs.map +1 -1
  42. package/dist/esm/graphs/Graph.mjs +122 -4
  43. package/dist/esm/graphs/Graph.mjs.map +1 -1
  44. package/dist/esm/llm/bedrock/index.mjs +22 -3
  45. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  46. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +38 -3
  47. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  48. package/dist/esm/llm/google/utils/common.mjs +6 -0
  49. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  50. package/dist/esm/llm/invoke.mjs +49 -8
  51. package/dist/esm/llm/invoke.mjs.map +1 -1
  52. package/dist/esm/llm/openai/index.mjs +48 -1
  53. package/dist/esm/llm/openai/index.mjs.map +1 -1
  54. package/dist/esm/llm/vertexai/index.mjs +19 -0
  55. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  56. package/dist/esm/main.mjs +3 -3
  57. package/dist/esm/messages/content.mjs +12 -15
  58. package/dist/esm/messages/content.mjs.map +1 -1
  59. package/dist/esm/messages/prune.mjs +31 -13
  60. package/dist/esm/messages/prune.mjs.map +1 -1
  61. package/dist/esm/run.mjs +7 -2
  62. package/dist/esm/run.mjs.map +1 -1
  63. package/dist/esm/stream.mjs +21 -3
  64. package/dist/esm/stream.mjs.map +1 -1
  65. package/dist/esm/summarization/node.mjs +12 -1
  66. package/dist/esm/summarization/node.mjs.map +1 -1
  67. package/dist/esm/tools/ToolNode.mjs +41 -4
  68. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  69. package/dist/esm/tools/streamedToolCallSeals.mjs +25 -2
  70. package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -1
  71. package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
  72. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  73. package/dist/esm/utils/tokens.mjs +30 -1
  74. package/dist/esm/utils/tokens.mjs.map +1 -1
  75. package/dist/types/agents/AgentContext.d.ts +7 -3
  76. package/dist/types/common/enum.d.ts +13 -0
  77. package/dist/types/graphs/Graph.d.ts +8 -1
  78. package/dist/types/llm/bedrock/utils/index.d.ts +1 -1
  79. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +9 -0
  80. package/dist/types/llm/invoke.d.ts +1 -1
  81. package/dist/types/llm/vertexai/index.d.ts +10 -0
  82. package/dist/types/messages/content.d.ts +5 -0
  83. package/dist/types/messages/prune.d.ts +4 -0
  84. package/dist/types/run.d.ts +1 -0
  85. package/dist/types/tools/ToolNode.d.ts +8 -0
  86. package/dist/types/tools/streamedToolCallSeals.d.ts +5 -1
  87. package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
  88. package/dist/types/types/graph.d.ts +89 -3
  89. package/dist/types/types/run.d.ts +13 -0
  90. package/dist/types/types/tools.d.ts +10 -0
  91. package/dist/types/utils/tokens.d.ts +7 -0
  92. package/package.json +1 -1
  93. package/src/__tests__/stream.eagerEventExecution.test.ts +703 -0
  94. package/src/agents/AgentContext.ts +69 -6
  95. package/src/agents/__tests__/AgentContext.test.ts +6 -2
  96. package/src/common/enum.ts +13 -0
  97. package/src/graphs/Graph.ts +196 -0
  98. package/src/llm/bedrock/index.ts +40 -0
  99. package/src/llm/bedrock/streamSealDispatch.test.ts +158 -0
  100. package/src/llm/bedrock/utils/index.ts +1 -0
  101. package/src/llm/bedrock/utils/message_outputs.test.ts +85 -0
  102. package/src/llm/bedrock/utils/message_outputs.ts +43 -0
  103. package/src/llm/google/utils/common.test.ts +64 -0
  104. package/src/llm/google/utils/common.ts +18 -0
  105. package/src/llm/invoke.test.ts +79 -1
  106. package/src/llm/invoke.ts +58 -4
  107. package/src/llm/openai/index.ts +95 -1
  108. package/src/llm/openai/sequentialToolCallSeals.test.ts +199 -0
  109. package/src/llm/vertexai/index.ts +31 -0
  110. package/src/llm/vertexai/sealStreamedToolCalls.test.ts +88 -0
  111. package/src/llm/vertexai/streamSealDispatch.test.ts +148 -0
  112. package/src/messages/content.ts +24 -32
  113. package/src/messages/prune.ts +39 -2
  114. package/src/run.ts +5 -0
  115. package/src/scripts/subagent-usage-sink.ts +176 -0
  116. package/src/specs/context-accuracy.live.test.ts +409 -0
  117. package/src/specs/context-usage-event.test.ts +117 -0
  118. package/src/specs/context-usage.live.test.ts +297 -0
  119. package/src/specs/prune.test.ts +51 -1
  120. package/src/specs/subagent.test.ts +124 -1
  121. package/src/stream.ts +40 -6
  122. package/src/summarization/__tests__/node.test.ts +60 -1
  123. package/src/summarization/node.ts +20 -1
  124. package/src/tools/ToolNode.ts +85 -3
  125. package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
  126. package/src/tools/__tests__/ToolNode.onResultCompletion.test.ts +368 -0
  127. package/src/tools/streamedToolCallSeals.ts +37 -9
  128. package/src/tools/subagent/SubagentExecutor.ts +221 -3
  129. package/src/types/graph.ts +94 -1
  130. package/src/types/run.ts +13 -0
  131. package/src/types/tools.ts +10 -0
  132. package/src/utils/__tests__/apportion.test.ts +32 -0
  133. package/src/utils/tokens.ts +33 -0
@@ -21,6 +21,7 @@ import {
21
21
  addCacheControlToStablePrefixMessages,
22
22
  } from '@/messages/cache';
23
23
  import { createSchemaOnlyTools } from '@/tools/schema';
24
+ import { apportionTokenCounts } from '@/utils/tokens';
24
25
  import { DEFAULT_RESERVE_RATIO } from '@/messages';
25
26
  import { toJsonSchema } from '@/utils/schema';
26
27
 
@@ -191,6 +192,11 @@ export class AgentContext {
191
192
  dynamicInstructionTokens: number = 0;
192
193
  /** Token count for tool schemas only. */
193
194
  toolSchemaTokens: number = 0;
195
+ /** Per-tool schema token counts (post-multiplier), keyed by tool name.
196
+ * `undefined` when not calculated (e.g. cached aggregate schema tokens). */
197
+ toolTokenCounts?: Record<string, number>;
198
+ /** Names of counted tools that are deferred (`defer_loading`) and discovered. */
199
+ deferredToolNames: string[] = [];
194
200
  /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
195
201
  calibrationRatio: number = 1;
196
202
  /** Provider-observed instruction overhead from the pruner's best-variance turn. */
@@ -894,6 +900,8 @@ export class AgentContext {
894
900
  this.systemMessageTokens = 0;
895
901
  this.dynamicInstructionTokens = 0;
896
902
  this.toolSchemaTokens = 0;
903
+ this.toolTokenCounts = undefined;
904
+ this.deferredToolNames = [];
897
905
  this.cachedSystemRunnable = undefined;
898
906
  this.systemRunnableStale = true;
899
907
  this.lastToken = undefined;
@@ -1006,6 +1014,10 @@ export class AgentContext {
1006
1014
  ): Promise<void> {
1007
1015
  let toolTokens = 0;
1008
1016
  const countedToolNames = new Set<string>();
1017
+ /** Prototype-free: external tool names like `toString` must not hit
1018
+ * inherited properties during accumulation */
1019
+ const rawToolTokenCounts: Record<string, number> = Object.create(null);
1020
+ const deferredCountedNames = new Set<string>();
1009
1021
 
1010
1022
  /**
1011
1023
  * Iterate both `tools` (user-provided instance tools) and `graphTools`
@@ -1040,11 +1052,14 @@ export class AgentContext {
1040
1052
  toolName,
1041
1053
  (genericTool.description as string | undefined) ?? ''
1042
1054
  );
1043
- toolTokens += tokenCounter(
1055
+ const schemaTokens = tokenCounter(
1044
1056
  new SystemMessage(JSON.stringify(jsonSchema))
1045
1057
  );
1058
+ toolTokens += schemaTokens;
1046
1059
  if (toolName) {
1047
1060
  countedToolNames.add(toolName);
1061
+ rawToolTokenCounts[toolName] =
1062
+ (rawToolTokenCounts[toolName] ?? 0) + schemaTokens;
1048
1063
  }
1049
1064
  }
1050
1065
  }
@@ -1062,7 +1077,16 @@ export class AgentContext {
1062
1077
  parameters: def.parameters ?? {},
1063
1078
  },
1064
1079
  };
1065
- toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
1080
+ const schemaTokens = tokenCounter(
1081
+ new SystemMessage(JSON.stringify(schema))
1082
+ );
1083
+ toolTokens += schemaTokens;
1084
+ countedToolNames.add(def.name);
1085
+ rawToolTokenCounts[def.name] =
1086
+ (rawToolTokenCounts[def.name] ?? 0) + schemaTokens;
1087
+ if (def.defer_loading === true) {
1088
+ deferredCountedNames.add(def.name);
1089
+ }
1066
1090
  }
1067
1091
 
1068
1092
  const isAnthropic =
@@ -1077,6 +1101,25 @@ export class AgentContext {
1077
1101
  ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
1078
1102
  : DEFAULT_TOOL_TOKEN_MULTIPLIER;
1079
1103
  this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
1104
+
1105
+ /** Largest-remainder apportionment keeps the per-tool counts summing
1106
+ * exactly to the aggregate despite per-entry rounding */
1107
+ const toolTokenCounts = apportionTokenCounts(
1108
+ rawToolTokenCounts,
1109
+ toolTokenMultiplier,
1110
+ this.toolSchemaTokens
1111
+ );
1112
+ const deferredToolNames: string[] = [];
1113
+ for (const name of Object.keys(rawToolTokenCounts)) {
1114
+ if (
1115
+ deferredCountedNames.has(name) ||
1116
+ this.toolRegistry?.get(name)?.defer_loading === true
1117
+ ) {
1118
+ deferredToolNames.push(name);
1119
+ }
1120
+ }
1121
+ this.toolTokenCounts = toolTokenCounts;
1122
+ this.deferredToolNames = deferredToolNames;
1080
1123
  }
1081
1124
 
1082
1125
  /**
@@ -1212,9 +1255,8 @@ export class AgentContext {
1212
1255
  * Returns a structured breakdown of how the context token budget is consumed.
1213
1256
  * Useful for diagnostics when context overflow or pruning issues occur.
1214
1257
  *
1215
- * Note: `toolCount` reflects discoveries immediately, but `toolSchemaTokens`
1216
- * is a snapshot taken during `calculateInstructionTokens` and is not
1217
- * recomputed when `markToolsAsDiscovered` is called mid-run.
1258
+ * Note: `markToolsAsDiscovered` re-triggers `calculateInstructionTokens`,
1259
+ * so `toolSchemaTokens`/`toolTokenCounts` refresh before the next call.
1218
1260
  */
1219
1261
  getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
1220
1262
  const maxContextTokens = this.maxContextTokens ?? 0;
@@ -1238,7 +1280,14 @@ export class AgentContext {
1238
1280
  }
1239
1281
  }
1240
1282
 
1241
- const reserveTokens = Math.round(maxContextTokens * DEFAULT_RESERVE_RATIO);
1283
+ /** Mirror the pruner's reserve math so availableForMessages agrees
1284
+ * with the contextBudget computed during pruning */
1285
+ const reserveRatio =
1286
+ this.summarizationConfig?.reserveRatio ?? DEFAULT_RESERVE_RATIO;
1287
+ const reserveTokens =
1288
+ reserveRatio > 0 && reserveRatio < 1
1289
+ ? Math.round(maxContextTokens * reserveRatio)
1290
+ : 0;
1242
1291
  const availableForMessages = Math.max(
1243
1292
  0,
1244
1293
  maxContextTokens - reserveTokens - this.instructionTokens
@@ -1255,6 +1304,12 @@ export class AgentContext {
1255
1304
  messageCount,
1256
1305
  messageTokens,
1257
1306
  availableForMessages,
1307
+ toolTokenCounts:
1308
+ this.toolTokenCounts != null ? { ...this.toolTokenCounts } : undefined,
1309
+ deferredToolNames:
1310
+ this.deferredToolNames.length > 0
1311
+ ? [...this.deferredToolNames]
1312
+ : undefined,
1258
1313
  };
1259
1314
  }
1260
1315
 
@@ -1324,6 +1379,14 @@ export class AgentContext {
1324
1379
  }
1325
1380
  if (hasNewDiscoveries) {
1326
1381
  this.systemRunnableStale = true;
1382
+ /** Refresh schema token accounting so the next call's budget and
1383
+ * per-tool breakdown include the newly discovered tools; awaited
1384
+ * via tokenCalculationPromise before the next model call */
1385
+ if (this.tokenCounter) {
1386
+ this.tokenCalculationPromise = this.calculateInstructionTokens(
1387
+ this.tokenCounter
1388
+ );
1389
+ }
1327
1390
  }
1328
1391
  return hasNewDiscoveries;
1329
1392
  }
@@ -1414,7 +1414,7 @@ describe('AgentContext', () => {
1414
1414
  expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
1415
1415
  });
1416
1416
 
1417
- it('toolSchemaTokens snapshot does not auto-update after markToolsAsDiscovered', async () => {
1417
+ it('refreshes toolSchemaTokens and per-tool counts after markToolsAsDiscovered', async () => {
1418
1418
  const toolDefinitions: t.LCTool[] = [
1419
1419
  {
1420
1420
  name: 'deferred',
@@ -1431,9 +1431,13 @@ describe('AgentContext', () => {
1431
1431
 
1432
1432
  await ctx.tokenCalculationPromise;
1433
1433
  expect(ctx.toolSchemaTokens).toBe(0);
1434
+ expect(ctx.toolTokenCounts).toEqual({});
1434
1435
 
1435
1436
  ctx.markToolsAsDiscovered(['deferred']);
1436
- expect(ctx.toolSchemaTokens).toBe(0);
1437
+ await ctx.tokenCalculationPromise;
1438
+ expect(ctx.toolSchemaTokens).toBeGreaterThan(0);
1439
+ expect(ctx.toolTokenCounts?.deferred).toBeGreaterThan(0);
1440
+ expect(ctx.deferredToolNames).toContain('deferred');
1437
1441
  });
1438
1442
  });
1439
1443
 
@@ -31,6 +31,8 @@ export enum GraphEvents {
31
31
  ON_SUBAGENT_UPDATE = 'on_subagent_update',
32
32
  /** [Custom] Diagnostic logging event for context management observability */
33
33
  ON_AGENT_LOG = 'on_agent_log',
34
+ /** [Custom] Per-model-call context window usage snapshot (post-prune token budget) */
35
+ ON_CONTEXT_USAGE = 'on_context_usage',
34
36
 
35
37
  /* Official Events */
36
38
 
@@ -185,6 +187,17 @@ export enum Constants {
185
187
  /** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
186
188
  ANTHROPIC_SERVER_TOOL_PREFIX = 'srvtoolu_',
187
189
  SKILL_TOOL = 'skill',
190
+ /**
191
+ * Callback-metadata keys stamped by `attemptInvoke` /
192
+ * `tryFallbackProviders` carrying the provider (SDK `Providers` enum
193
+ * value) and configured model that actually served a model invocation.
194
+ * Unlike `ls_provider` — which derived providers inherit from their base
195
+ * class (e.g. DeepSeek/OpenRouter report `'openai'`) — these reflect the
196
+ * SDK's own routing, including fallback-provider calls. Consumed by the
197
+ * subagent usage-capture handler to tag billing events.
198
+ */
199
+ INVOKED_PROVIDER = '__invoked_provider',
200
+ INVOKED_MODEL = '__invoked_model',
188
201
  READ_FILE = 'read_file',
189
202
  BASH_TOOL = 'bash_tool',
190
203
  BASH_PROGRAMMATIC_TOOL_CALLING = 'run_tools_with_bash',
@@ -23,6 +23,7 @@ import {
23
23
  formatArtifactPayload,
24
24
  enforceOriginalContentCap,
25
25
  formatContentStrings,
26
+ isLegacyConvertible,
26
27
  createPruneMessages,
27
28
  addCacheControl,
28
29
  getMessageId,
@@ -45,6 +46,7 @@ import {
45
46
  isAnthropicLike,
46
47
  isOpenAILike,
47
48
  isGoogleLike,
49
+ apportionTokenCounts,
48
50
  joinKeys,
49
51
  sleep,
50
52
  } from '@/utils';
@@ -89,6 +91,55 @@ const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
89
91
  /** Minimum relative variance before calibrated toolSchemaTokens overrides current value. */
90
92
  const CALIBRATION_VARIANCE_THRESHOLD = 0.15;
91
93
 
94
+ /**
95
+ * Start index of the span post-prune formatters can mutate in place: the
96
+ * trailing tool batch plus its owning AI message (artifact formatting touches
97
+ * every tool result after the last AI tool call; Bedrock rewrites the AI
98
+ * message before a trailing tool result). Capped so the usage-snapshot
99
+ * recount stays constant-cost.
100
+ */
101
+ function trailingMutationStart(messages: BaseMessage[]): number {
102
+ const MAX_SPAN = 16;
103
+ let index = messages.length - 1;
104
+ while (
105
+ index >= 0 &&
106
+ messages[index]?.getType() === 'tool' &&
107
+ messages.length - index < MAX_SPAN
108
+ ) {
109
+ index--;
110
+ }
111
+ return Math.max(0, Math.min(index, messages.length - 2));
112
+ }
113
+
114
+ /**
115
+ * Re-derives the breakdown fields coupled to the calibrated budget math so
116
+ * the snapshot stays internally consistent: the aggregate
117
+ * `instructionTokens`/`availableForMessages` reflect the pruner's effective
118
+ * (calibrated) overhead — component fields remain local estimates — and
119
+ * `messageTokens` mirrors `contextBudget - instructions - remaining`.
120
+ */
121
+ function syncBudgetDerivedFields(usage: t.ContextUsageEvent): void {
122
+ const { breakdown, contextBudget, effectiveInstructionTokens } = usage;
123
+ if (effectiveInstructionTokens == null) {
124
+ return;
125
+ }
126
+ breakdown.instructionTokens = effectiveInstructionTokens;
127
+ if (contextBudget == null) {
128
+ return;
129
+ }
130
+ breakdown.availableForMessages = Math.max(
131
+ 0,
132
+ contextBudget - effectiveInstructionTokens
133
+ );
134
+ if (usage.remainingContextTokens == null) {
135
+ return;
136
+ }
137
+ breakdown.messageTokens = Math.max(
138
+ 0,
139
+ contextBudget - effectiveInstructionTokens - usage.remainingContextTokens
140
+ );
141
+ }
142
+
92
143
  type ReasoningKey = 'reasoning_content' | 'reasoning';
93
144
  type ReasoningSummary = { summary?: Array<{ text?: string }> };
94
145
  type ReasoningDetail = { type?: string; text?: string };
@@ -825,6 +876,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
825
876
  agentContexts: Map<string, AgentContext> = new Map();
826
877
  /** Default agent ID to use */
827
878
  defaultAgentId: string;
879
+ /**
880
+ * Host sink for model usage emitted inside subagent child runs. Threaded
881
+ * into each `SubagentExecutor` this graph creates (and from there into
882
+ * child graphs, so nested subagents report too). See
883
+ * {@link t.StandardGraphInput.subagentUsageSink}.
884
+ */
885
+ subagentUsageSink?: t.SubagentUsageSink;
828
886
 
829
887
  constructor({
830
888
  runId,
@@ -834,11 +892,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
834
892
  tokenCounter,
835
893
  indexTokenCountMap,
836
894
  calibrationRatio,
895
+ subagentUsageSink,
837
896
  }: t.StandardGraphInput) {
838
897
  super();
839
898
  this.runId = runId;
840
899
  this.signal = signal;
841
900
  this.langfuse = langfuse;
901
+ this.subagentUsageSink = subagentUsageSink;
842
902
 
843
903
  if (agents.length === 0) {
844
904
  throw new Error('At least one agent configuration is required');
@@ -1423,6 +1483,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1423
1483
  this.config = config;
1424
1484
 
1425
1485
  let messagesToUse = messages;
1486
+ let contextUsage: t.ContextUsageEvent | null = null;
1426
1487
  if (
1427
1488
  !agentContext.pruneMessages &&
1428
1489
  agentContext.tokenCounter &&
@@ -1462,6 +1523,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1462
1523
  originalToolContent,
1463
1524
  calibrationRatio,
1464
1525
  resolvedInstructionOverhead,
1526
+ contextBudget,
1527
+ effectiveInstructionTokens,
1465
1528
  } = agentContext.pruneMessages({
1466
1529
  messages,
1467
1530
  usageMetadata: agentContext.currentUsage,
@@ -1489,10 +1552,42 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1489
1552
  : 1;
1490
1553
  if (variance > CALIBRATION_VARIANCE_THRESHOLD) {
1491
1554
  agentContext.toolSchemaTokens = calibratedToolTokens;
1555
+ /** Largest-remainder apportionment keeps the per-tool breakdown
1556
+ * summing exactly to the calibrated aggregate */
1557
+ if (agentContext.toolTokenCounts != null && currentToolTokens > 0) {
1558
+ agentContext.toolTokenCounts = apportionTokenCounts(
1559
+ agentContext.toolTokenCounts,
1560
+ calibratedToolTokens / currentToolTokens,
1561
+ calibratedToolTokens
1562
+ );
1563
+ }
1492
1564
  }
1493
1565
  }
1494
1566
  messagesToUse = context;
1495
1567
 
1568
+ /** Dispatched right before the model invoke — a summarization
1569
+ * detour returns from this node without an LLM call, and the
1570
+ * post-summary retry produces its own snapshot.
1571
+ *
1572
+ * The breakdown describes the post-prune prompt: counts from the
1573
+ * kept context, message tokens derived from the same calibrated
1574
+ * budget math as `remainingContextTokens` (the index map is keyed
1575
+ * by pre-prune state indices, so summing it over `context` would
1576
+ * missum); `prePruneContextTokens` carries the pre-prune metric. */
1577
+ const usageBreakdown = agentContext.getTokenBudgetBreakdown(messages);
1578
+ usageBreakdown.messageCount = context.length;
1579
+ contextUsage = {
1580
+ runId: this.runId,
1581
+ agentId,
1582
+ breakdown: usageBreakdown,
1583
+ contextBudget,
1584
+ effectiveInstructionTokens,
1585
+ prePruneContextTokens,
1586
+ remainingContextTokens,
1587
+ calibrationRatio: agentContext.calibrationRatio,
1588
+ };
1589
+ syncBudgetDerivedFields(contextUsage);
1590
+
1496
1591
  const hasPrunedMessages =
1497
1592
  agentContext.summarizationEnabled === true &&
1498
1593
  Array.isArray(messagesToRefine) &&
@@ -1598,6 +1693,33 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1598
1693
  }
1599
1694
 
1600
1695
  let finalMessages = messagesToUse;
1696
+ /** Tail snapshot for the dispatch-time usage delta: in-place
1697
+ * formatters (artifact appends, Bedrock content rewrites, legacy
1698
+ * string conversion) mutate without changing length or identity —
1699
+ * capture before they run. Legacy string conversion can also touch
1700
+ * messages before the tail, so those convertible indices are
1701
+ * tracked separately (none exist in the common case). */
1702
+ const tailStart = trailingMutationStart(messagesToUse);
1703
+ let preFormatTailTokens: number | null = null;
1704
+ let legacyIndices: number[] | null = null;
1705
+ let preFormatLegacyTokens = 0;
1706
+ if (contextUsage != null && agentContext.tokenCounter != null) {
1707
+ preFormatTailTokens = 0;
1708
+ for (const message of messagesToUse.slice(tailStart)) {
1709
+ preFormatTailTokens += agentContext.tokenCounter(message);
1710
+ }
1711
+ if (agentContext.useLegacyContent) {
1712
+ legacyIndices = [];
1713
+ for (let i = 0; i < tailStart; i++) {
1714
+ if (isLegacyConvertible(messagesToUse[i])) {
1715
+ legacyIndices.push(i);
1716
+ preFormatLegacyTokens += agentContext.tokenCounter(
1717
+ messagesToUse[i]
1718
+ );
1719
+ }
1720
+ }
1721
+ }
1722
+ }
1601
1723
  if (agentContext.useLegacyContent) {
1602
1724
  finalMessages = formatContentStrings(finalMessages);
1603
1725
  }
@@ -1788,6 +1910,79 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1788
1910
  );
1789
1911
  }
1790
1912
 
1913
+ /** Past the empty-prompt guard — a model call is now guaranteed */
1914
+ if (contextUsage != null) {
1915
+ const usageRatio =
1916
+ contextUsage.calibrationRatio != null &&
1917
+ contextUsage.calibrationRatio > 0
1918
+ ? contextUsage.calibrationRatio
1919
+ : 1;
1920
+ if (
1921
+ agentContext.tokenCounter != null &&
1922
+ finalMessages.length !== messagesToUse.length
1923
+ ) {
1924
+ /** Post-prune formatting restructured the payload (e.g. thinking
1925
+ * placeholder collapse, orphan drops) — recount so the gauge
1926
+ * reflects what is actually sent */
1927
+ let rawTokens = 0;
1928
+ for (const message of finalMessages) {
1929
+ rawTokens += agentContext.tokenCounter(message);
1930
+ }
1931
+ contextUsage.breakdown.messageCount = finalMessages.length;
1932
+ if (
1933
+ contextUsage.contextBudget != null &&
1934
+ contextUsage.effectiveInstructionTokens != null
1935
+ ) {
1936
+ contextUsage.remainingContextTokens = Math.max(
1937
+ 0,
1938
+ contextUsage.contextBudget -
1939
+ contextUsage.effectiveInstructionTokens -
1940
+ Math.round(rawTokens * usageRatio)
1941
+ );
1942
+ }
1943
+ } else if (
1944
+ preFormatTailTokens != null &&
1945
+ agentContext.tokenCounter != null &&
1946
+ contextUsage.remainingContextTokens != null
1947
+ ) {
1948
+ /** Same-length formatting can still mutate in place — the trailing
1949
+ * tool batch (artifacts, Bedrock rewrites) and any legacy-converted
1950
+ * messages before it — adjust remaining by the calibrated delta */
1951
+ let postFormatTailTokens = 0;
1952
+ for (const message of finalMessages.slice(tailStart)) {
1953
+ postFormatTailTokens += agentContext.tokenCounter(message);
1954
+ }
1955
+ let formatDelta = postFormatTailTokens - preFormatTailTokens;
1956
+ if (legacyIndices != null && legacyIndices.length > 0) {
1957
+ let postFormatLegacyTokens = 0;
1958
+ for (const index of legacyIndices) {
1959
+ postFormatLegacyTokens += agentContext.tokenCounter(
1960
+ finalMessages[index]
1961
+ );
1962
+ }
1963
+ formatDelta += postFormatLegacyTokens - preFormatLegacyTokens;
1964
+ }
1965
+ if (formatDelta !== 0) {
1966
+ contextUsage.remainingContextTokens = Math.max(
1967
+ 0,
1968
+ Math.min(
1969
+ contextUsage.contextBudget ?? Number.MAX_SAFE_INTEGER,
1970
+ contextUsage.remainingContextTokens -
1971
+ Math.round(formatDelta * usageRatio)
1972
+ )
1973
+ );
1974
+ }
1975
+ }
1976
+ syncBudgetDerivedFields(contextUsage);
1977
+ /** Awaited so async host handlers receive the pre-invoke snapshot
1978
+ * before any model deltas are emitted */
1979
+ await safeDispatchCustomEvent(
1980
+ GraphEvents.ON_CONTEXT_USAGE,
1981
+ contextUsage,
1982
+ config
1983
+ );
1984
+ }
1985
+
1791
1986
  const invokeStart = Date.now();
1792
1987
  const invokeMeta = { runId: this.runId, agentId };
1793
1988
  emitAgentLog(
@@ -2063,6 +2258,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
2063
2258
  parentAgentId: agentContext.agentId,
2064
2259
  langfuse: this.langfuse,
2065
2260
  tokenCounter: agentContext.tokenCounter,
2261
+ usageSink: this.subagentUsageSink,
2066
2262
  maxDepth: effectiveSubagentDepth,
2067
2263
  createChildGraph: (input): StandardGraph => {
2068
2264
  const childGraph = new StandardGraph(input);
@@ -34,6 +34,7 @@ import type { BaseMessage, ResponseMetadata } from '@langchain/core/messages';
34
34
  import type { ChatBedrockConverseInput } from '@langchain/aws';
35
35
  import {
36
36
  convertToConverseMessages,
37
+ createConverseToolUseStopChunk,
37
38
  handleConverseStreamContentBlockStart,
38
39
  handleConverseStreamContentBlockDelta,
39
40
  handleConverseStreamMetadata,
@@ -224,6 +225,15 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
224
225
  }
225
226
 
226
227
  const seenBlockIndices = new Set<number>();
228
+ const toolUseBlockIndices = new Set<number>();
229
+ /**
230
+ * Guardrails can reject an already-streamed toolUse block at
231
+ * `messageStop` (`guardrail_intervened`), after `contentBlockStop` has
232
+ * passed. Only emit eager-execution seals when no guardrails are
233
+ * configured, so a later intervention can't race an eagerly started tool.
234
+ */
235
+ const sealToolUseOnStop =
236
+ options.guardrailConfig == null && this.guardrailConfig == null;
227
237
 
228
238
  for await (const event of response.stream) {
229
239
  if (event.contentBlockStart != null) {
@@ -234,8 +244,23 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
234
244
  const idx = event.contentBlockStart.contentBlockIndex;
235
245
  if (idx != null) {
236
246
  seenBlockIndices.add(idx);
247
+ if (event.contentBlockStart.start?.toolUse != null) {
248
+ toolUseBlockIndices.add(idx);
249
+ }
237
250
  }
238
251
  yield this.enrichChunk(startChunk, seenBlockIndices);
252
+
253
+ // Registered stream handlers receive chunks through callback
254
+ // events, not the yielded generator — dispatch the start chunk so
255
+ // they see the tool call's id/name (eager chunk state needs both).
256
+ await runManager?.handleLLMNewToken(
257
+ startChunk.text,
258
+ undefined,
259
+ undefined,
260
+ undefined,
261
+ undefined,
262
+ { chunk: startChunk }
263
+ );
239
264
  }
240
265
  } else if (event.contentBlockDelta != null) {
241
266
  const deltaChunk = handleConverseStreamContentBlockDelta(
@@ -263,6 +288,21 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
263
288
  const stopIdx = event.contentBlockStop.contentBlockIndex;
264
289
  if (stopIdx != null) {
265
290
  seenBlockIndices.add(stopIdx);
291
+ if (sealToolUseOnStop && toolUseBlockIndices.has(stopIdx)) {
292
+ // Converse guarantees the block's input is complete at stop, so
293
+ // emit an explicit seal chunk for eager tool execution — through
294
+ // the callback path too, for registered stream handlers.
295
+ const sealChunk = createConverseToolUseStopChunk(stopIdx);
296
+ yield sealChunk;
297
+ await runManager?.handleLLMNewToken(
298
+ sealChunk.text,
299
+ undefined,
300
+ undefined,
301
+ undefined,
302
+ undefined,
303
+ { chunk: sealChunk }
304
+ );
305
+ }
266
306
  }
267
307
  } else {
268
308
  yield new ChatGenerationChunk({