@librechat/agents 3.2.34 → 3.2.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +119 -9
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/agents/projection.cjs +25 -0
  4. package/dist/cjs/agents/projection.cjs.map +1 -0
  5. package/dist/cjs/common/enum.cjs +13 -0
  6. package/dist/cjs/common/enum.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +106 -3
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +26 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +20 -0
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/invoke.cjs +49 -8
  14. package/dist/cjs/llm/invoke.cjs.map +1 -1
  15. package/dist/cjs/main.cjs +7 -0
  16. package/dist/cjs/messages/budget.cjs +23 -0
  17. package/dist/cjs/messages/budget.cjs.map +1 -0
  18. package/dist/cjs/messages/cache.cjs +1 -0
  19. package/dist/cjs/messages/cache.cjs.map +1 -1
  20. package/dist/cjs/messages/content.cjs +12 -14
  21. package/dist/cjs/messages/content.cjs.map +1 -1
  22. package/dist/cjs/messages/index.cjs +1 -0
  23. package/dist/cjs/messages/prune.cjs +31 -13
  24. package/dist/cjs/messages/prune.cjs.map +1 -1
  25. package/dist/cjs/run.cjs +7 -2
  26. package/dist/cjs/run.cjs.map +1 -1
  27. package/dist/cjs/summarization/node.cjs +12 -1
  28. package/dist/cjs/summarization/node.cjs.map +1 -1
  29. package/dist/cjs/tools/search/format.cjs +91 -2
  30. package/dist/cjs/tools/search/format.cjs.map +1 -1
  31. package/dist/cjs/tools/search/tool.cjs +4 -3
  32. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  33. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
  34. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  35. package/dist/cjs/utils/tokens.cjs +30 -0
  36. package/dist/cjs/utils/tokens.cjs.map +1 -1
  37. package/dist/esm/agents/AgentContext.mjs +121 -11
  38. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  39. package/dist/esm/agents/projection.mjs +25 -0
  40. package/dist/esm/agents/projection.mjs.map +1 -0
  41. package/dist/esm/common/enum.mjs +13 -0
  42. package/dist/esm/common/enum.mjs.map +1 -1
  43. package/dist/esm/graphs/Graph.mjs +107 -4
  44. package/dist/esm/graphs/Graph.mjs.map +1 -1
  45. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +26 -4
  46. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  47. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +20 -0
  48. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  49. package/dist/esm/llm/invoke.mjs +49 -8
  50. package/dist/esm/llm/invoke.mjs.map +1 -1
  51. package/dist/esm/main.mjs +6 -4
  52. package/dist/esm/messages/budget.mjs +23 -0
  53. package/dist/esm/messages/budget.mjs.map +1 -0
  54. package/dist/esm/messages/cache.mjs +1 -1
  55. package/dist/esm/messages/cache.mjs.map +1 -1
  56. package/dist/esm/messages/content.mjs +12 -15
  57. package/dist/esm/messages/content.mjs.map +1 -1
  58. package/dist/esm/messages/index.mjs +1 -0
  59. package/dist/esm/messages/prune.mjs +31 -13
  60. package/dist/esm/messages/prune.mjs.map +1 -1
  61. package/dist/esm/run.mjs +7 -2
  62. package/dist/esm/run.mjs.map +1 -1
  63. package/dist/esm/summarization/node.mjs +12 -1
  64. package/dist/esm/summarization/node.mjs.map +1 -1
  65. package/dist/esm/tools/search/format.mjs +91 -2
  66. package/dist/esm/tools/search/format.mjs.map +1 -1
  67. package/dist/esm/tools/search/tool.mjs +4 -3
  68. package/dist/esm/tools/search/tool.mjs.map +1 -1
  69. package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
  70. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  71. package/dist/esm/utils/tokens.mjs +30 -1
  72. package/dist/esm/utils/tokens.mjs.map +1 -1
  73. package/dist/types/agents/AgentContext.d.ts +37 -4
  74. package/dist/types/agents/projection.d.ts +26 -0
  75. package/dist/types/common/enum.d.ts +13 -0
  76. package/dist/types/graphs/Graph.d.ts +8 -1
  77. package/dist/types/index.d.ts +1 -0
  78. package/dist/types/llm/invoke.d.ts +1 -1
  79. package/dist/types/messages/budget.d.ts +11 -0
  80. package/dist/types/messages/cache.d.ts +7 -0
  81. package/dist/types/messages/content.d.ts +5 -0
  82. package/dist/types/messages/index.d.ts +1 -0
  83. package/dist/types/messages/prune.d.ts +4 -0
  84. package/dist/types/run.d.ts +1 -0
  85. package/dist/types/tools/search/format.d.ts +4 -1
  86. package/dist/types/tools/search/types.d.ts +7 -0
  87. package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
  88. package/dist/types/types/graph.d.ts +89 -3
  89. package/dist/types/types/run.d.ts +13 -0
  90. package/dist/types/utils/tokens.d.ts +7 -0
  91. package/package.json +1 -1
  92. package/src/agents/AgentContext.ts +172 -8
  93. package/src/agents/__tests__/AgentContext.test.ts +235 -2
  94. package/src/agents/__tests__/projection.test.ts +73 -0
  95. package/src/agents/projection.ts +46 -0
  96. package/src/common/enum.ts +13 -0
  97. package/src/graphs/Graph.ts +168 -0
  98. package/src/index.ts +3 -0
  99. package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts +317 -0
  100. package/src/llm/anthropic/utils/message_inputs.ts +78 -16
  101. package/src/llm/bedrock/utils/cross-provider-reasoning.test.ts +131 -0
  102. package/src/llm/bedrock/utils/message_inputs.ts +35 -0
  103. package/src/llm/invoke.test.ts +79 -1
  104. package/src/llm/invoke.ts +58 -4
  105. package/src/messages/budget.ts +32 -0
  106. package/src/messages/cache.ts +1 -1
  107. package/src/messages/content.ts +24 -32
  108. package/src/messages/index.ts +1 -0
  109. package/src/messages/prune.ts +39 -2
  110. package/src/run.ts +5 -0
  111. package/src/scripts/subagent-usage-sink.ts +176 -0
  112. package/src/specs/context-accuracy.live.test.ts +409 -0
  113. package/src/specs/context-usage-event.test.ts +117 -0
  114. package/src/specs/context-usage.live.test.ts +297 -0
  115. package/src/specs/prune.test.ts +51 -1
  116. package/src/specs/subagent.test.ts +124 -1
  117. package/src/summarization/__tests__/node.test.ts +60 -1
  118. package/src/summarization/node.ts +20 -1
  119. package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
  120. package/src/tools/search/format.test.ts +242 -0
  121. package/src/tools/search/format.ts +122 -5
  122. package/src/tools/search/tool.ts +5 -1
  123. package/src/tools/search/types.ts +7 -0
  124. package/src/tools/subagent/SubagentExecutor.ts +221 -3
  125. package/src/types/graph.ts +94 -1
  126. package/src/types/run.ts +13 -0
  127. package/src/utils/__tests__/apportion.test.ts +32 -0
  128. package/src/utils/tokens.ts +33 -0
@@ -7,7 +7,6 @@ import type {
7
7
  BaseMessageFields,
8
8
  } from '@langchain/core/messages';
9
9
  import type { RunnableConfig, Runnable } from '@langchain/core/runnables';
10
- import type { createPruneMessages } from '@/messages';
11
10
  import type * as t from '@/types';
12
11
  import {
13
12
  ANTHROPIC_TOOL_TOKEN_MULTIPLIER,
@@ -19,9 +18,16 @@ import {
19
18
  import {
20
19
  addCacheControl,
21
20
  addCacheControlToStablePrefixMessages,
21
+ cloneMessage,
22
22
  } from '@/messages/cache';
23
23
  import { createSchemaOnlyTools } from '@/tools/schema';
24
- import { DEFAULT_RESERVE_RATIO } from '@/messages';
24
+ import { apportionTokenCounts } from '@/utils/tokens';
25
+ import {
26
+ DEFAULT_RESERVE_RATIO,
27
+ createPruneMessages,
28
+ syncBudgetDerivedFields,
29
+ } from '@/messages';
30
+ import { isThinkingEnabled } from '@/llm/request';
25
31
  import { toJsonSchema } from '@/utils/schema';
26
32
 
27
33
  type AgentSystemTextBlock = {
@@ -191,6 +197,11 @@ export class AgentContext {
191
197
  dynamicInstructionTokens: number = 0;
192
198
  /** Token count for tool schemas only. */
193
199
  toolSchemaTokens: number = 0;
200
+ /** Per-tool schema token counts (post-multiplier), keyed by tool name.
201
+ * `undefined` when not calculated (e.g. cached aggregate schema tokens). */
202
+ toolTokenCounts?: Record<string, number>;
203
+ /** Names of counted tools that are deferred (`defer_loading`) and discovered. */
204
+ deferredToolNames: string[] = [];
194
205
  /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
195
206
  calibrationRatio: number = 1;
196
207
  /** Provider-observed instruction overhead from the pruner's best-variance turn. */
@@ -894,6 +905,8 @@ export class AgentContext {
894
905
  this.systemMessageTokens = 0;
895
906
  this.dynamicInstructionTokens = 0;
896
907
  this.toolSchemaTokens = 0;
908
+ this.toolTokenCounts = undefined;
909
+ this.deferredToolNames = [];
897
910
  this.cachedSystemRunnable = undefined;
898
911
  this.systemRunnableStale = true;
899
912
  this.lastToken = undefined;
@@ -1006,6 +1019,10 @@ export class AgentContext {
1006
1019
  ): Promise<void> {
1007
1020
  let toolTokens = 0;
1008
1021
  const countedToolNames = new Set<string>();
1022
+ /** Prototype-free: external tool names like `toString` must not hit
1023
+ * inherited properties during accumulation */
1024
+ const rawToolTokenCounts: Record<string, number> = Object.create(null);
1025
+ const deferredCountedNames = new Set<string>();
1009
1026
 
1010
1027
  /**
1011
1028
  * Iterate both `tools` (user-provided instance tools) and `graphTools`
@@ -1040,11 +1057,14 @@ export class AgentContext {
1040
1057
  toolName,
1041
1058
  (genericTool.description as string | undefined) ?? ''
1042
1059
  );
1043
- toolTokens += tokenCounter(
1060
+ const schemaTokens = tokenCounter(
1044
1061
  new SystemMessage(JSON.stringify(jsonSchema))
1045
1062
  );
1063
+ toolTokens += schemaTokens;
1046
1064
  if (toolName) {
1047
1065
  countedToolNames.add(toolName);
1066
+ rawToolTokenCounts[toolName] =
1067
+ (rawToolTokenCounts[toolName] ?? 0) + schemaTokens;
1048
1068
  }
1049
1069
  }
1050
1070
  }
@@ -1062,7 +1082,16 @@ export class AgentContext {
1062
1082
  parameters: def.parameters ?? {},
1063
1083
  },
1064
1084
  };
1065
- toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
1085
+ const schemaTokens = tokenCounter(
1086
+ new SystemMessage(JSON.stringify(schema))
1087
+ );
1088
+ toolTokens += schemaTokens;
1089
+ countedToolNames.add(def.name);
1090
+ rawToolTokenCounts[def.name] =
1091
+ (rawToolTokenCounts[def.name] ?? 0) + schemaTokens;
1092
+ if (def.defer_loading === true) {
1093
+ deferredCountedNames.add(def.name);
1094
+ }
1066
1095
  }
1067
1096
 
1068
1097
  const isAnthropic =
@@ -1077,6 +1106,25 @@ export class AgentContext {
1077
1106
  ? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
1078
1107
  : DEFAULT_TOOL_TOKEN_MULTIPLIER;
1079
1108
  this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
1109
+
1110
+ /** Largest-remainder apportionment keeps the per-tool counts summing
1111
+ * exactly to the aggregate despite per-entry rounding */
1112
+ const toolTokenCounts = apportionTokenCounts(
1113
+ rawToolTokenCounts,
1114
+ toolTokenMultiplier,
1115
+ this.toolSchemaTokens
1116
+ );
1117
+ const deferredToolNames: string[] = [];
1118
+ for (const name of Object.keys(rawToolTokenCounts)) {
1119
+ if (
1120
+ deferredCountedNames.has(name) ||
1121
+ this.toolRegistry?.get(name)?.defer_loading === true
1122
+ ) {
1123
+ deferredToolNames.push(name);
1124
+ }
1125
+ }
1126
+ this.toolTokenCounts = toolTokenCounts;
1127
+ this.deferredToolNames = deferredToolNames;
1080
1128
  }
1081
1129
 
1082
1130
  /**
@@ -1212,9 +1260,8 @@ export class AgentContext {
1212
1260
  * Returns a structured breakdown of how the context token budget is consumed.
1213
1261
  * Useful for diagnostics when context overflow or pruning issues occur.
1214
1262
  *
1215
- * Note: `toolCount` reflects discoveries immediately, but `toolSchemaTokens`
1216
- * is a snapshot taken during `calculateInstructionTokens` and is not
1217
- * recomputed when `markToolsAsDiscovered` is called mid-run.
1263
+ * Note: `markToolsAsDiscovered` re-triggers `calculateInstructionTokens`,
1264
+ * so `toolSchemaTokens`/`toolTokenCounts` refresh before the next call.
1218
1265
  */
1219
1266
  getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
1220
1267
  const maxContextTokens = this.maxContextTokens ?? 0;
@@ -1238,7 +1285,14 @@ export class AgentContext {
1238
1285
  }
1239
1286
  }
1240
1287
 
1241
- const reserveTokens = Math.round(maxContextTokens * DEFAULT_RESERVE_RATIO);
1288
+ /** Mirror the pruner's reserve math so availableForMessages agrees
1289
+ * with the contextBudget computed during pruning */
1290
+ const reserveRatio =
1291
+ this.summarizationConfig?.reserveRatio ?? DEFAULT_RESERVE_RATIO;
1292
+ const reserveTokens =
1293
+ reserveRatio > 0 && reserveRatio < 1
1294
+ ? Math.round(maxContextTokens * reserveRatio)
1295
+ : 0;
1242
1296
  const availableForMessages = Math.max(
1243
1297
  0,
1244
1298
  maxContextTokens - reserveTokens - this.instructionTokens
@@ -1255,6 +1309,12 @@ export class AgentContext {
1255
1309
  messageCount,
1256
1310
  messageTokens,
1257
1311
  availableForMessages,
1312
+ toolTokenCounts:
1313
+ this.toolTokenCounts != null ? { ...this.toolTokenCounts } : undefined,
1314
+ deferredToolNames:
1315
+ this.deferredToolNames.length > 0
1316
+ ? [...this.deferredToolNames]
1317
+ : undefined,
1258
1318
  };
1259
1319
  }
1260
1320
 
@@ -1275,6 +1335,102 @@ export class AgentContext {
1275
1335
  return lines.join('\n');
1276
1336
  }
1277
1337
 
1338
+ /**
1339
+ * Projects the context-usage snapshot for an arbitrary message set WITHOUT
1340
+ * invoking the model — the pre-send / page-load / window-switch counterpart to
1341
+ * the live `ON_CONTEXT_USAGE` snapshot. Runs the same pruner + budget math the
1342
+ * graph uses (`createPruneMessages` → `getTokenBudgetBreakdown` →
1343
+ * `syncBudgetDerivedFields`) so projected numbers match a real call. Returns
1344
+ * null when the context lacks the tokenizer or window needed to prune. Omits
1345
+ * the live post-format reconciliation (provider-specific, invoke-time) — a
1346
+ * small, acceptable delta for a pre-send estimate.
1347
+ *
1348
+ * Safe to call off the hot path: the supplied `messages` are never mutated
1349
+ * (each is passed as a clone — the pruner both replaces tool-result slots and
1350
+ * unshifts reasoning blocks into AI content arrays in place), and this
1351
+ * context's own state is untouched apart from refreshing stale instruction
1352
+ * counts (idempotent, exactly what a real call does). Token counts are
1353
+ * recounted for the supplied messages (the context's `indexTokenCountMap` is
1354
+ * keyed to the live run's branch and would missum an arbitrary branch) unless
1355
+ * the caller passes a map it guarantees matches. Calibration is NOT re-derived
1356
+ * from this context's live usage (a fresh pruner would compare the prior
1357
+ * call's provider input against the whole projected branch); the learned
1358
+ * `calibrationRatio` is applied as a static seed, and callers may override it
1359
+ * with a persisted ratio via `opts.calibrationRatio`.
1360
+ */
1361
+ projectContextUsage(
1362
+ messages: BaseMessage[],
1363
+ opts?: {
1364
+ runId?: string;
1365
+ agentId?: string;
1366
+ calibrationRatio?: number;
1367
+ indexTokenCountMap?: Record<string, number | undefined>;
1368
+ }
1369
+ ): t.ContextUsageEvent | null {
1370
+ const tokenCounter = this.tokenCounter;
1371
+ if (tokenCounter == null || this.maxContextTokens == null) {
1372
+ return null;
1373
+ }
1374
+ /** Refresh stale system overhead (handoff/summary changes) so instruction
1375
+ * tokens match the prompt a real call would send. */
1376
+ this.initializeSystemRunnable();
1377
+ /** Clone array-content messages: the pruner unshifts reasoning blocks into
1378
+ * AI content arrays in place, which would otherwise corrupt the caller's
1379
+ * history. (Slot replacements land on the mapped array, not the caller's.) */
1380
+ const projected = messages.map((message) =>
1381
+ Array.isArray(message.content)
1382
+ ? cloneMessage(message, [...message.content])
1383
+ : message
1384
+ );
1385
+ let indexTokenCountMap = opts?.indexTokenCountMap;
1386
+ if (indexTokenCountMap == null) {
1387
+ indexTokenCountMap = {};
1388
+ for (let i = 0; i < messages.length; i++) {
1389
+ indexTokenCountMap[String(i)] = tokenCounter(messages[i]);
1390
+ }
1391
+ }
1392
+ const prune = createPruneMessages({
1393
+ startIndex: 0,
1394
+ provider: this.provider,
1395
+ tokenCounter,
1396
+ maxTokens: this.maxContextTokens,
1397
+ thinkingEnabled: isThinkingEnabled(this.provider, this.clientOptions),
1398
+ indexTokenCountMap,
1399
+ contextPruningConfig: this.contextPruningConfig,
1400
+ summarizationEnabled: this.summarizationEnabled,
1401
+ reserveRatio: this.summarizationConfig?.reserveRatio,
1402
+ calibrationRatio: opts?.calibrationRatio ?? this.calibrationRatio,
1403
+ getInstructionTokens: () => this.instructionTokens,
1404
+ });
1405
+ const {
1406
+ context,
1407
+ prePruneContextTokens,
1408
+ remainingContextTokens,
1409
+ contextBudget,
1410
+ effectiveInstructionTokens,
1411
+ calibrationRatio,
1412
+ } = prune({
1413
+ messages: projected,
1414
+ usageMetadata: undefined,
1415
+ lastCallUsage: undefined,
1416
+ totalTokensFresh: false,
1417
+ });
1418
+ const breakdown = this.getTokenBudgetBreakdown(messages);
1419
+ breakdown.messageCount = context.length;
1420
+ const usage: t.ContextUsageEvent = {
1421
+ runId: opts?.runId,
1422
+ agentId: opts?.agentId,
1423
+ breakdown,
1424
+ contextBudget,
1425
+ effectiveInstructionTokens,
1426
+ prePruneContextTokens,
1427
+ remainingContextTokens,
1428
+ calibrationRatio,
1429
+ };
1430
+ syncBudgetDerivedFields(usage);
1431
+ return usage;
1432
+ }
1433
+
1278
1434
  /**
1279
1435
  * Updates the last-call usage with data from the most recent LLM response.
1280
1436
  * Unlike `currentUsage` which accumulates, this captures only the single call.
@@ -1324,6 +1480,14 @@ export class AgentContext {
1324
1480
  }
1325
1481
  if (hasNewDiscoveries) {
1326
1482
  this.systemRunnableStale = true;
1483
+ /** Refresh schema token accounting so the next call's budget and
1484
+ * per-tool breakdown include the newly discovered tools; awaited
1485
+ * via tokenCalculationPromise before the next model call */
1486
+ if (this.tokenCounter) {
1487
+ this.tokenCalculationPromise = this.calculateInstructionTokens(
1488
+ this.tokenCounter
1489
+ );
1490
+ }
1327
1491
  }
1328
1492
  return hasNewDiscoveries;
1329
1493
  }
@@ -1414,7 +1414,7 @@ describe('AgentContext', () => {
1414
1414
  expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
1415
1415
  });
1416
1416
 
1417
- it('toolSchemaTokens snapshot does not auto-update after markToolsAsDiscovered', async () => {
1417
+ it('refreshes toolSchemaTokens and per-tool counts after markToolsAsDiscovered', async () => {
1418
1418
  const toolDefinitions: t.LCTool[] = [
1419
1419
  {
1420
1420
  name: 'deferred',
@@ -1431,9 +1431,13 @@ describe('AgentContext', () => {
1431
1431
 
1432
1432
  await ctx.tokenCalculationPromise;
1433
1433
  expect(ctx.toolSchemaTokens).toBe(0);
1434
+ expect(ctx.toolTokenCounts).toEqual({});
1434
1435
 
1435
1436
  ctx.markToolsAsDiscovered(['deferred']);
1436
- expect(ctx.toolSchemaTokens).toBe(0);
1437
+ await ctx.tokenCalculationPromise;
1438
+ expect(ctx.toolSchemaTokens).toBeGreaterThan(0);
1439
+ expect(ctx.toolTokenCounts?.deferred).toBeGreaterThan(0);
1440
+ expect(ctx.deferredToolNames).toContain('deferred');
1437
1441
  });
1438
1442
  });
1439
1443
 
@@ -2143,4 +2147,233 @@ describe('AgentContext', () => {
2143
2147
  expect(ctx.lastCallUsage!.inputTokens).toBe(8005);
2144
2148
  });
2145
2149
  });
2150
+
2151
+ describe('projectContextUsage', () => {
2152
+ const countByChars = (msg: { content: unknown }): number => {
2153
+ const content =
2154
+ typeof msg.content === 'string'
2155
+ ? msg.content
2156
+ : JSON.stringify(msg.content);
2157
+ return content.length;
2158
+ };
2159
+
2160
+ const buildBranch = (
2161
+ maxContextTokens: number,
2162
+ perMessageTokens: number,
2163
+ count: number,
2164
+ ): { ctx: AgentContext; messages: AIMessage[] } => {
2165
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2166
+ ctx.maxContextTokens = maxContextTokens;
2167
+ const messages: AIMessage[] = [];
2168
+ for (let i = 0; i < count; i++) {
2169
+ // countByChars counts content length, and projectContextUsage recounts
2170
+ // the supplied messages — so size content to the intended per-msg tokens.
2171
+ const content = 'x'.repeat(perMessageTokens);
2172
+ messages.push(
2173
+ i % 2 === 0
2174
+ ? (new HumanMessage(content) as unknown as AIMessage)
2175
+ : new AIMessage(content),
2176
+ );
2177
+ }
2178
+ return { ctx, messages };
2179
+ };
2180
+
2181
+ it('returns null without a tokenizer or a window', () => {
2182
+ const noCounter = createBasicContext({});
2183
+ noCounter.maxContextTokens = 1000;
2184
+ expect(noCounter.projectContextUsage([new HumanMessage('hi')])).toBeNull();
2185
+
2186
+ const noWindow = createBasicContext({ tokenCounter: countByChars });
2187
+ noWindow.maxContextTokens = undefined;
2188
+ expect(noWindow.projectContextUsage([new HumanMessage('hi')])).toBeNull();
2189
+ });
2190
+
2191
+ it('keeps the whole branch and reports headroom when it fits', () => {
2192
+ const { ctx, messages } = buildBranch(100_000, 1_000, 4);
2193
+ const usage = ctx.projectContextUsage(messages);
2194
+
2195
+ expect(usage).not.toBeNull();
2196
+ expect(usage!.breakdown.messageCount).toBe(4);
2197
+ expect(usage!.breakdown.maxContextTokens).toBe(100_000);
2198
+ expect(usage!.remainingContextTokens).toBeGreaterThan(0);
2199
+ expect(usage!.breakdown.messageTokens).toBeGreaterThan(0);
2200
+
2201
+ const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
2202
+ const used = max - (usage!.remainingContextTokens ?? 0);
2203
+ expect(used).toBeLessThanOrEqual(max);
2204
+ });
2205
+
2206
+ it('prunes older messages when the branch exceeds the window', () => {
2207
+ const { ctx, messages } = buildBranch(3_000, 1_000, 6);
2208
+ const usage = ctx.projectContextUsage(messages);
2209
+
2210
+ expect(usage).not.toBeNull();
2211
+ expect(usage!.breakdown.messageCount).toBeGreaterThan(0);
2212
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
2213
+ expect(usage!.remainingContextTokens).toBeGreaterThanOrEqual(0);
2214
+
2215
+ const max = usage!.contextBudget ?? usage!.breakdown.maxContextTokens;
2216
+ expect(max - (usage!.remainingContextTokens ?? 0)).toBeLessThanOrEqual(max);
2217
+ });
2218
+
2219
+ it('does not mutate the context (local pruner, no field writes)', () => {
2220
+ const { ctx, messages } = buildBranch(3_000, 1_000, 6);
2221
+ const mapBefore = { ...ctx.indexTokenCountMap };
2222
+
2223
+ expect(ctx.pruneMessages).toBeUndefined();
2224
+ ctx.projectContextUsage(messages);
2225
+
2226
+ expect(ctx.pruneMessages).toBeUndefined();
2227
+ expect(ctx.indexTokenCountMap).toEqual(mapBefore);
2228
+ });
2229
+
2230
+ it('does not mutate the caller messages under context pressure', () => {
2231
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2232
+ ctx.maxContextTokens = 400;
2233
+ const consumed = new ToolMessage({
2234
+ content: 'x'.repeat(20_000),
2235
+ tool_call_id: 't1',
2236
+ name: 'tool',
2237
+ });
2238
+ const messages: AIMessage[] = [
2239
+ new HumanMessage('question') as unknown as AIMessage,
2240
+ new AIMessage({
2241
+ content: '',
2242
+ tool_calls: [{ id: 't1', name: 'tool', args: {} }],
2243
+ }),
2244
+ consumed as unknown as AIMessage,
2245
+ new AIMessage('final answer'),
2246
+ ];
2247
+ const originalRef = messages[2];
2248
+ const originalContent = (messages[2] as unknown as ToolMessage).content;
2249
+
2250
+ ctx.projectContextUsage(messages);
2251
+
2252
+ expect(messages[2]).toBe(originalRef);
2253
+ expect((messages[2] as unknown as ToolMessage).content).toBe(
2254
+ originalContent,
2255
+ );
2256
+ });
2257
+
2258
+ it('recounts the supplied branch, ignoring a stale context token map', () => {
2259
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2260
+ ctx.maxContextTokens = 3_000;
2261
+ // Empty/stale map — if it were reused, every message would count as 0 and
2262
+ // nothing would prune. The fresh recount must drive pruning instead.
2263
+ ctx.indexTokenCountMap = {};
2264
+ const messages: AIMessage[] = [];
2265
+ for (let i = 0; i < 6; i++) {
2266
+ messages.push(new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage);
2267
+ }
2268
+
2269
+ const usage = ctx.projectContextUsage(messages);
2270
+
2271
+ expect(usage).not.toBeNull();
2272
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
2273
+ });
2274
+
2275
+ it('uses a caller-supplied token map when provided', () => {
2276
+ const { ctx, messages } = buildBranch(3_000, 1, 6);
2277
+ // Each message is ~1 char, so a recount would fit all 6. The supplied map
2278
+ // claims 1000 each, forcing a prune — proving the map is honored.
2279
+ const indexTokenCountMap: Record<string, number> = {};
2280
+ for (let i = 0; i < messages.length; i++) {
2281
+ indexTokenCountMap[String(i)] = 1_000;
2282
+ }
2283
+
2284
+ const usage = ctx.projectContextUsage(messages, { indexTokenCountMap });
2285
+
2286
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
2287
+ });
2288
+
2289
+ it('ignores this context live usage so projections are not recalibrated', () => {
2290
+ const build = (): { ctx: AgentContext; messages: AIMessage[] } => {
2291
+ const ctx = createBasicContext({ tokenCounter: countByChars });
2292
+ ctx.maxContextTokens = 5_000;
2293
+ const messages: AIMessage[] = [0, 1, 2].map(
2294
+ () => new HumanMessage('x'.repeat(1_000)) as unknown as AIMessage,
2295
+ );
2296
+ return { ctx, messages };
2297
+ };
2298
+
2299
+ const clean = build();
2300
+ const cleanUsage = clean.ctx.projectContextUsage(clean.messages);
2301
+
2302
+ const dirty = build();
2303
+ dirty.ctx.currentUsage = {
2304
+ input_tokens: 4_000,
2305
+ output_tokens: 50,
2306
+ total_tokens: 4_050,
2307
+ };
2308
+ dirty.ctx.updateLastCallUsage({ input_tokens: 4_000, output_tokens: 50 });
2309
+ const dirtyUsage = dirty.ctx.projectContextUsage(dirty.messages);
2310
+
2311
+ expect(dirtyUsage!.remainingContextTokens).toBe(
2312
+ cleanUsage!.remainingContextTokens,
2313
+ );
2314
+ expect(dirtyUsage!.calibrationRatio).toBe(cleanUsage!.calibrationRatio);
2315
+ });
2316
+
2317
+ it('does not mutate AI message content arrays during projection', () => {
2318
+ const ctx = createBasicContext({
2319
+ agentConfig: {
2320
+ provider: Providers.ANTHROPIC,
2321
+ clientOptions: {
2322
+ model: 'claude-x',
2323
+ thinking: { type: 'enabled', budget_tokens: 1024 },
2324
+ } as never,
2325
+ },
2326
+ tokenCounter: countByChars,
2327
+ });
2328
+ ctx.maxContextTokens = 2_000;
2329
+ const aiContent = [
2330
+ { type: 'thinking', thinking: 'step by step', signature: 'sig' },
2331
+ { type: 'text', text: 'the answer' },
2332
+ ];
2333
+ const ai = new AIMessage({ content: aiContent as never });
2334
+ const messages: AIMessage[] = [
2335
+ new HumanMessage('question') as unknown as AIMessage,
2336
+ ai,
2337
+ new HumanMessage('another') as unknown as AIMessage,
2338
+ ];
2339
+ const contentRef = ai.content;
2340
+ const lenBefore = (ai.content as unknown[]).length;
2341
+
2342
+ ctx.projectContextUsage(messages);
2343
+
2344
+ expect(messages[1].content).toBe(contentRef);
2345
+ expect((messages[1].content as unknown[]).length).toBe(lenBefore);
2346
+ });
2347
+
2348
+ it('honors an explicit calibrationRatio seed', () => {
2349
+ const base = buildBranch(100_000, 1_000, 4);
2350
+ const baseUsage = base.ctx.projectContextUsage(base.messages);
2351
+
2352
+ const scaled = buildBranch(100_000, 1_000, 4);
2353
+ const scaledUsage = scaled.ctx.projectContextUsage(scaled.messages, {
2354
+ calibrationRatio: 3,
2355
+ });
2356
+
2357
+ expect(scaledUsage!.calibrationRatio).toBe(3);
2358
+ expect(scaledUsage!.remainingContextTokens).not.toBe(
2359
+ baseUsage!.remainingContextTokens,
2360
+ );
2361
+ });
2362
+
2363
+ it('refreshes a stale system runnable before projecting', () => {
2364
+ const ctx = createBasicContext({
2365
+ agentConfig: { instructions: 'system prompt' },
2366
+ tokenCounter: countByChars,
2367
+ });
2368
+ ctx.maxContextTokens = 5_000;
2369
+ ctx.initializeSystemRunnable();
2370
+ const systemBefore = ctx.systemMessageTokens;
2371
+
2372
+ // Adds a handoff preamble + marks stale, but defers the token recount.
2373
+ ctx.setHandoffContext('PriorAgent', ['SiblingA', 'SiblingB']);
2374
+ ctx.projectContextUsage([new HumanMessage('hi') as unknown as AIMessage]);
2375
+
2376
+ expect(ctx.systemMessageTokens).toBeGreaterThan(systemBefore);
2377
+ });
2378
+ });
2146
2379
  });
@@ -0,0 +1,73 @@
1
+ import { AIMessage, HumanMessage } from '@langchain/core/messages';
2
+ import type * as t from '@/types';
3
+ import { Providers } from '@/common';
4
+ import { projectAgentContextUsage } from '../projection';
5
+
6
+ const countByChars = (msg: { content: unknown }): number => {
7
+ const content =
8
+ typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
9
+ return content.length;
10
+ };
11
+
12
+ const agent = (maxContextTokens: number): t.AgentInputs => ({
13
+ agentId: 'test-agent',
14
+ provider: Providers.OPENAI,
15
+ instructions: 'system prompt',
16
+ maxContextTokens,
17
+ });
18
+
19
+ const branch = (perMessageChars: number, count: number): AIMessage[] => {
20
+ const messages: AIMessage[] = [];
21
+ for (let i = 0; i < count; i++) {
22
+ const content = 'x'.repeat(perMessageChars);
23
+ messages.push(
24
+ i % 2 === 0
25
+ ? (new HumanMessage(content) as unknown as AIMessage)
26
+ : new AIMessage(content),
27
+ );
28
+ }
29
+ return messages;
30
+ };
31
+
32
+ describe('projectAgentContextUsage', () => {
33
+ it('returns a budget snapshot for a branch that fits', async () => {
34
+ const usage = await projectAgentContextUsage({
35
+ agent: agent(100_000),
36
+ messages: branch(1_000, 4),
37
+ tokenCounter: countByChars,
38
+ });
39
+
40
+ expect(usage).not.toBeNull();
41
+ expect(usage!.breakdown.maxContextTokens).toBe(100_000);
42
+ expect(usage!.breakdown.messageCount).toBe(4);
43
+ expect(usage!.remainingContextTokens).toBeGreaterThan(0);
44
+ expect(usage!.agentId).toBe('test-agent');
45
+ });
46
+
47
+ it('prunes when the branch exceeds the window', async () => {
48
+ const usage = await projectAgentContextUsage({
49
+ agent: agent(3_000),
50
+ messages: branch(1_000, 6),
51
+ tokenCounter: countByChars,
52
+ });
53
+
54
+ expect(usage).not.toBeNull();
55
+ expect(usage!.breakdown.messageCount).toBeGreaterThan(0);
56
+ expect(usage!.breakdown.messageCount).toBeLessThan(6);
57
+ });
58
+
59
+ it('returns null without a context window', async () => {
60
+ const noWindow: t.AgentInputs = {
61
+ agentId: 'test-agent',
62
+ provider: Providers.OPENAI,
63
+ instructions: 'sys',
64
+ };
65
+ const usage = await projectAgentContextUsage({
66
+ agent: noWindow,
67
+ messages: branch(100, 2),
68
+ tokenCounter: countByChars,
69
+ });
70
+
71
+ expect(usage).toBeNull();
72
+ });
73
+ });
@@ -0,0 +1,46 @@
1
+ import type { BaseMessage } from '@langchain/core/messages';
2
+ import type * as t from '@/types';
3
+ import { AgentContext } from './AgentContext';
4
+
5
+ export interface ProjectAgentContextUsageParams {
6
+ /** Same `AgentInputs` a run is built from (instructions, tools, model, window). */
7
+ agent: t.AgentInputs;
8
+ /** Branch messages to project, in send order (no leading system message). */
9
+ messages: BaseMessage[];
10
+ tokenCounter: t.TokenCounter;
11
+ /** Per-message counts aligned to `messages` (e.g. from `formatAgentMessages`).
12
+ * When omitted, counts are recounted via `tokenCounter`. */
13
+ indexTokenCountMap?: Record<string, number>;
14
+ /** Provider-calibrated ratio from a prior snapshot, applied as a static seed. */
15
+ calibrationRatio?: number;
16
+ runId?: string;
17
+ agentId?: string;
18
+ }
19
+
20
+ /**
21
+ * Projects a pre-send context-usage snapshot for a branch under an agent config
22
+ * WITHOUT invoking the model — the host-side (page-load / branch-switch /
23
+ * window-switch) counterpart to the live `ON_CONTEXT_USAGE` event. Builds a
24
+ * throwaway `AgentContext` from the same `AgentInputs` a run uses, awaits its
25
+ * instruction/tool token accounting, then runs the shared pruner + budget math
26
+ * via `AgentContext.projectContextUsage` (which never mutates the supplied
27
+ * messages). Returns null when the config has no tokenizer or context window.
28
+ */
29
+ export async function projectAgentContextUsage({
30
+ agent,
31
+ messages,
32
+ tokenCounter,
33
+ indexTokenCountMap,
34
+ calibrationRatio,
35
+ runId,
36
+ agentId,
37
+ }: ProjectAgentContextUsageParams): Promise<t.ContextUsageEvent | null> {
38
+ const context = AgentContext.fromConfig(agent, tokenCounter, indexTokenCountMap);
39
+ await context.tokenCalculationPromise;
40
+ return context.projectContextUsage(messages, {
41
+ runId,
42
+ agentId: agentId ?? agent.agentId,
43
+ calibrationRatio,
44
+ indexTokenCountMap,
45
+ });
46
+ }
@@ -31,6 +31,8 @@ export enum GraphEvents {
31
31
  ON_SUBAGENT_UPDATE = 'on_subagent_update',
32
32
  /** [Custom] Diagnostic logging event for context management observability */
33
33
  ON_AGENT_LOG = 'on_agent_log',
34
+ /** [Custom] Per-model-call context window usage snapshot (post-prune token budget) */
35
+ ON_CONTEXT_USAGE = 'on_context_usage',
34
36
 
35
37
  /* Official Events */
36
38
 
@@ -185,6 +187,17 @@ export enum Constants {
185
187
  /** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
186
188
  ANTHROPIC_SERVER_TOOL_PREFIX = 'srvtoolu_',
187
189
  SKILL_TOOL = 'skill',
190
+ /**
191
+ * Callback-metadata keys stamped by `attemptInvoke` /
192
+ * `tryFallbackProviders` carrying the provider (SDK `Providers` enum
193
+ * value) and configured model that actually served a model invocation.
194
+ * Unlike `ls_provider` — which derived providers inherit from their base
195
+ * class (e.g. DeepSeek/OpenRouter report `'openai'`) — these reflect the
196
+ * SDK's own routing, including fallback-provider calls. Consumed by the
197
+ * subagent usage-capture handler to tag billing events.
198
+ */
199
+ INVOKED_PROVIDER = '__invoked_provider',
200
+ INVOKED_MODEL = '__invoked_model',
188
201
  READ_FILE = 'read_file',
189
202
  BASH_TOOL = 'bash_tool',
190
203
  BASH_PROGRAMMATIC_TOOL_CALLING = 'run_tools_with_bash',