@librechat/agents 3.2.34 → 3.2.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +119 -9
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/agents/projection.cjs +25 -0
  4. package/dist/cjs/agents/projection.cjs.map +1 -0
  5. package/dist/cjs/common/enum.cjs +13 -0
  6. package/dist/cjs/common/enum.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +106 -3
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +26 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +20 -0
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/invoke.cjs +49 -8
  14. package/dist/cjs/llm/invoke.cjs.map +1 -1
  15. package/dist/cjs/main.cjs +7 -0
  16. package/dist/cjs/messages/budget.cjs +23 -0
  17. package/dist/cjs/messages/budget.cjs.map +1 -0
  18. package/dist/cjs/messages/cache.cjs +1 -0
  19. package/dist/cjs/messages/cache.cjs.map +1 -1
  20. package/dist/cjs/messages/content.cjs +12 -14
  21. package/dist/cjs/messages/content.cjs.map +1 -1
  22. package/dist/cjs/messages/index.cjs +1 -0
  23. package/dist/cjs/messages/prune.cjs +31 -13
  24. package/dist/cjs/messages/prune.cjs.map +1 -1
  25. package/dist/cjs/run.cjs +7 -2
  26. package/dist/cjs/run.cjs.map +1 -1
  27. package/dist/cjs/summarization/node.cjs +12 -1
  28. package/dist/cjs/summarization/node.cjs.map +1 -1
  29. package/dist/cjs/tools/search/format.cjs +91 -2
  30. package/dist/cjs/tools/search/format.cjs.map +1 -1
  31. package/dist/cjs/tools/search/tool.cjs +4 -3
  32. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  33. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
  34. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  35. package/dist/cjs/utils/tokens.cjs +30 -0
  36. package/dist/cjs/utils/tokens.cjs.map +1 -1
  37. package/dist/esm/agents/AgentContext.mjs +121 -11
  38. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  39. package/dist/esm/agents/projection.mjs +25 -0
  40. package/dist/esm/agents/projection.mjs.map +1 -0
  41. package/dist/esm/common/enum.mjs +13 -0
  42. package/dist/esm/common/enum.mjs.map +1 -1
  43. package/dist/esm/graphs/Graph.mjs +107 -4
  44. package/dist/esm/graphs/Graph.mjs.map +1 -1
  45. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +26 -4
  46. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  47. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +20 -0
  48. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  49. package/dist/esm/llm/invoke.mjs +49 -8
  50. package/dist/esm/llm/invoke.mjs.map +1 -1
  51. package/dist/esm/main.mjs +6 -4
  52. package/dist/esm/messages/budget.mjs +23 -0
  53. package/dist/esm/messages/budget.mjs.map +1 -0
  54. package/dist/esm/messages/cache.mjs +1 -1
  55. package/dist/esm/messages/cache.mjs.map +1 -1
  56. package/dist/esm/messages/content.mjs +12 -15
  57. package/dist/esm/messages/content.mjs.map +1 -1
  58. package/dist/esm/messages/index.mjs +1 -0
  59. package/dist/esm/messages/prune.mjs +31 -13
  60. package/dist/esm/messages/prune.mjs.map +1 -1
  61. package/dist/esm/run.mjs +7 -2
  62. package/dist/esm/run.mjs.map +1 -1
  63. package/dist/esm/summarization/node.mjs +12 -1
  64. package/dist/esm/summarization/node.mjs.map +1 -1
  65. package/dist/esm/tools/search/format.mjs +91 -2
  66. package/dist/esm/tools/search/format.mjs.map +1 -1
  67. package/dist/esm/tools/search/tool.mjs +4 -3
  68. package/dist/esm/tools/search/tool.mjs.map +1 -1
  69. package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
  70. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  71. package/dist/esm/utils/tokens.mjs +30 -1
  72. package/dist/esm/utils/tokens.mjs.map +1 -1
  73. package/dist/types/agents/AgentContext.d.ts +37 -4
  74. package/dist/types/agents/projection.d.ts +26 -0
  75. package/dist/types/common/enum.d.ts +13 -0
  76. package/dist/types/graphs/Graph.d.ts +8 -1
  77. package/dist/types/index.d.ts +1 -0
  78. package/dist/types/llm/invoke.d.ts +1 -1
  79. package/dist/types/messages/budget.d.ts +11 -0
  80. package/dist/types/messages/cache.d.ts +7 -0
  81. package/dist/types/messages/content.d.ts +5 -0
  82. package/dist/types/messages/index.d.ts +1 -0
  83. package/dist/types/messages/prune.d.ts +4 -0
  84. package/dist/types/run.d.ts +1 -0
  85. package/dist/types/tools/search/format.d.ts +4 -1
  86. package/dist/types/tools/search/types.d.ts +7 -0
  87. package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
  88. package/dist/types/types/graph.d.ts +89 -3
  89. package/dist/types/types/run.d.ts +13 -0
  90. package/dist/types/utils/tokens.d.ts +7 -0
  91. package/package.json +1 -1
  92. package/src/agents/AgentContext.ts +172 -8
  93. package/src/agents/__tests__/AgentContext.test.ts +235 -2
  94. package/src/agents/__tests__/projection.test.ts +73 -0
  95. package/src/agents/projection.ts +46 -0
  96. package/src/common/enum.ts +13 -0
  97. package/src/graphs/Graph.ts +168 -0
  98. package/src/index.ts +3 -0
  99. package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts +317 -0
  100. package/src/llm/anthropic/utils/message_inputs.ts +78 -16
  101. package/src/llm/bedrock/utils/cross-provider-reasoning.test.ts +131 -0
  102. package/src/llm/bedrock/utils/message_inputs.ts +35 -0
  103. package/src/llm/invoke.test.ts +79 -1
  104. package/src/llm/invoke.ts +58 -4
  105. package/src/messages/budget.ts +32 -0
  106. package/src/messages/cache.ts +1 -1
  107. package/src/messages/content.ts +24 -32
  108. package/src/messages/index.ts +1 -0
  109. package/src/messages/prune.ts +39 -2
  110. package/src/run.ts +5 -0
  111. package/src/scripts/subagent-usage-sink.ts +176 -0
  112. package/src/specs/context-accuracy.live.test.ts +409 -0
  113. package/src/specs/context-usage-event.test.ts +117 -0
  114. package/src/specs/context-usage.live.test.ts +297 -0
  115. package/src/specs/prune.test.ts +51 -1
  116. package/src/specs/subagent.test.ts +124 -1
  117. package/src/summarization/__tests__/node.test.ts +60 -1
  118. package/src/summarization/node.ts +20 -1
  119. package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
  120. package/src/tools/search/format.test.ts +242 -0
  121. package/src/tools/search/format.ts +122 -5
  122. package/src/tools/search/tool.ts +5 -1
  123. package/src/tools/search/types.ts +7 -0
  124. package/src/tools/subagent/SubagentExecutor.ts +221 -3
  125. package/src/types/graph.ts +94 -1
  126. package/src/types/run.ts +13 -0
  127. package/src/utils/__tests__/apportion.test.ts +32 -0
  128. package/src/utils/tokens.ts +33 -0
@@ -1,6 +1,113 @@
1
1
  import type * as t from './types';
2
2
  import { getDomainName, fileExtRegex } from './utils';
3
3
 
4
+ /** Default per-search budget for model-facing highlight content (chars). Hosts
5
+ * that know the context window (e.g. LibreChat) pass a window-relative value;
6
+ * this fixed fallback keeps standalone consumers bounded instead of dumping the
7
+ * full reranked content of every source into the prompt. */
8
+ const DEFAULT_MAX_LLM_OUTPUT_CHARS = 50000;
9
+
10
+ /** Minimum room (chars) worth filling with a truncated boundary highlight; below
11
+ * this we drop it whole rather than emit a useless sliver. */
12
+ const MIN_PARTIAL_HIGHLIGHT_CHARS = 200;
13
+
14
+ /** Resolves the per-search highlight budget from config, the
15
+ * `SEARCH_MAX_LLM_OUTPUT_CHARS` env var, or the default (50,000 chars). */
16
+ export function resolveMaxLLMOutputChars(maxOutputChars?: number): number {
17
+ if (maxOutputChars != null && maxOutputChars > 0) {
18
+ return maxOutputChars;
19
+ }
20
+ const envValue = Number(process.env.SEARCH_MAX_LLM_OUTPUT_CHARS);
21
+ if (Number.isFinite(envValue) && envValue > 0) {
22
+ return envValue;
23
+ }
24
+ return DEFAULT_MAX_LLM_OUTPUT_CHARS;
25
+ }
26
+
27
+ /** Inline citation markers embedded in highlight text, e.g. `(link#2 "Title")`.
28
+ * Mirrors the matcher in `highlights.ts` so truncation can tell which citations
29
+ * survive in a sliced prefix. */
30
+ const REFERENCE_MARKER_REGEX = /\((link|image|video)#(\d+)(?:\s+"[^"]*")?\)/g;
31
+
32
+ /** Builds the set of `type#originalIndex` keys whose complete citation marker
33
+ * appears in `text`, so references can be filtered to those still visible. */
34
+ function visibleReferenceKeys(text: string): Set<string> {
35
+ const keys = new Set<string>();
36
+ if (!text.includes('#')) {
37
+ return keys;
38
+ }
39
+ const regex = new RegExp(REFERENCE_MARKER_REGEX);
40
+ let match: RegExpExecArray | null;
41
+ while ((match = regex.exec(text)) !== null) {
42
+ keys.add(`${match[1]}#${parseInt(match[2], 10) - 1}`);
43
+ }
44
+ return keys;
45
+ }
46
+
47
+ /** Truncates a highlight to `maxLen` chars of (already-trimmed) text, keeping
48
+ * only the references whose markers survive in the kept prefix — markers in the
49
+ * cut tail would otherwise emit Core References for citations the model can no
50
+ * longer see, while a blanket drop would lose still-visible ones. */
51
+ function truncateHighlight(highlight: t.Highlight, text: string, maxLen: number): t.Highlight {
52
+ const prefix = text.slice(0, maxLen);
53
+ const truncated: t.Highlight = { score: highlight.score, text: `${prefix}\n…[truncated]` };
54
+ if (highlight.references != null && highlight.references.length > 0) {
55
+ const keys = visibleReferenceKeys(prefix);
56
+ const visible = highlight.references.filter((ref) => keys.has(`${ref.type}#${ref.originalIndex}`));
57
+ if (visible.length > 0) {
58
+ truncated.references = visible;
59
+ }
60
+ }
61
+ return truncated;
62
+ }
63
+
64
+ /** Bounds the highlight chunks — the dominant, unbounded part of search output —
65
+ * to `maxChars`, walking sources in relevance order (organic first, then news;
66
+ * highlights in their reranked order). Whole highlights are kept until the
67
+ * budget is hit, the boundary one is truncated if meaningful room remains, and
68
+ * every later highlight is dropped (relevance-ordered prefix). Blank highlights
69
+ * are skipped (never rendered, so never charged); a truncated highlight keeps
70
+ * only references whose markers survive in the kept prefix. Snippets/titles/URLs
71
+ * are left untouched (small, high-signal) and per-source `content` stays in the
72
+ * `WEB_SEARCH` artifact for citations. Mutates `results` in place; returns how
73
+ * many highlights were dropped or truncated (0 when everything fit). */
74
+ function trimHighlightsToBudget(results: t.SearchResultData, maxChars: number): number {
75
+ let used = 0;
76
+ let trimmed = 0;
77
+ const sections: (t.ValidSource[] | undefined)[] = [results.organic, results.topStories];
78
+ for (const sources of sections) {
79
+ if (sources == null) {
80
+ continue;
81
+ }
82
+ for (const source of sources) {
83
+ const highlights = source.highlights;
84
+ if (highlights == null || highlights.length === 0) {
85
+ continue;
86
+ }
87
+ const kept: t.Highlight[] = [];
88
+ for (const highlight of highlights) {
89
+ const text = highlight.text.trim();
90
+ if (text.length === 0) {
91
+ continue;
92
+ }
93
+ if (used + text.length <= maxChars) {
94
+ kept.push(highlight);
95
+ used += text.length;
96
+ continue;
97
+ }
98
+ const remaining = maxChars - used;
99
+ if (remaining >= MIN_PARTIAL_HIGHLIGHT_CHARS) {
100
+ kept.push(truncateHighlight(highlight, text, remaining));
101
+ }
102
+ used = maxChars;
103
+ trimmed++;
104
+ }
105
+ source.highlights = kept;
106
+ }
107
+ }
108
+ return trimmed;
109
+ }
110
+
4
111
  function addHighlightSection(): string[] {
5
112
  return ['\n## Highlights', ''];
6
113
  }
@@ -112,8 +219,15 @@ function formatSource(
112
219
 
113
220
  export function formatResultsForLLM(
114
221
  turn: number,
115
- results: t.SearchResultData
222
+ results: t.SearchResultData,
223
+ maxOutputChars?: number
116
224
  ): { output: string; references: t.ResultReference[] } {
225
+ /** Bound highlight content to the per-search budget before formatting */
226
+ const trimmedHighlights = trimHighlightsToBudget(
227
+ results,
228
+ resolveMaxLLMOutputChars(maxOutputChars)
229
+ );
230
+
117
231
  /** Array to collect all output lines */
118
232
  const outputLines: string[] = [];
119
233
 
@@ -243,8 +357,11 @@ export function formatResultsForLLM(
243
357
  outputLines.push(paaLines.join(''));
244
358
  }
245
359
 
246
- return {
247
- output: outputLines.join('\n').trim(),
248
- references,
249
- };
360
+ let output = outputLines.join('\n').trim();
361
+ if (trimmedHighlights > 0) {
362
+ output += `\n\n_[${trimmedHighlights} additional highlight${
363
+ trimmedHighlights === 1 ? '' : 's'
364
+ } omitted to fit the context budget; the cited sources contain the full content.]_`;
365
+ }
366
+ return { output, references };
250
367
  }
@@ -289,10 +289,12 @@ function createOnSearchResults({
289
289
  function createTool({
290
290
  schema,
291
291
  search,
292
+ maxOutputChars,
292
293
  onSearchResults: _onSearchResults,
293
294
  }: {
294
295
  schema: Record<string, unknown>;
295
296
  search: ReturnType<typeof createSearchProcessor>;
297
+ maxOutputChars?: number;
296
298
  onSearchResults: t.SearchToolConfig['onSearchResults'];
297
299
  }): DynamicStructuredTool {
298
300
  return tool(
@@ -313,7 +315,7 @@ function createTool({
313
315
  }),
314
316
  });
315
317
  const turn = runnableConfig.toolCall?.turn ?? 0;
316
- const { output, references } = formatResultsForLLM(turn, searchResult);
318
+ const { output, references } = formatResultsForLLM(turn, searchResult, maxOutputChars);
317
319
  const data: t.SearchResultData = { turn, ...searchResult, references };
318
320
  return [output, { [Constants.WEB_SEARCH]: data }];
319
321
  },
@@ -359,6 +361,7 @@ export const createSearchTool = (
359
361
  rerankerType = 'cohere',
360
362
  topResults = 5,
361
363
  maxContentLength,
364
+ maxOutputChars,
362
365
  strategies = ['no_extraction'],
363
366
  filterContent = true,
364
367
  safeSearch = 1,
@@ -483,6 +486,7 @@ export const createSearchTool = (
483
486
  return createTool({
484
487
  search,
485
488
  schema: toolSchema,
489
+ maxOutputChars,
486
490
  onSearchResults: _onSearchResults,
487
491
  });
488
492
  };
@@ -218,6 +218,13 @@ export interface SearchToolConfig
218
218
  ProcessSourcesConfig,
219
219
  FirecrawlConfig {
220
220
  tavilyScraperOptions?: TavilyScraperConfig;
221
+ /** Max chars of highlight content this tool feeds the MODEL per search (the
222
+ * dominant, otherwise-unbounded part of the output). Distinct from
223
+ * `maxContentLength`, which caps scraped/reranked content per source — full
224
+ * content always remains in the `WEB_SEARCH` artifact. Defaults to 50,000;
225
+ * also configurable via the `SEARCH_MAX_LLM_OUTPUT_CHARS` env var. Hosts that
226
+ * know the context window (e.g. LibreChat) pass a window-relative value. */
227
+ maxOutputChars?: number;
221
228
  logger?: Logger;
222
229
  safeSearch?: SafeSearchLevel;
223
230
  jinaApiKey?: string;
@@ -1,8 +1,9 @@
1
1
  import { nanoid } from 'nanoid';
2
2
  import { HumanMessage } from '@langchain/core/messages';
3
3
  import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
4
+ import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
5
+ import type { ChatGeneration, LLMResult } from '@langchain/core/outputs';
4
6
  import type { Callbacks } from '@langchain/core/callbacks/manager';
5
- import type { BaseMessage } from '@langchain/core/messages';
6
7
  import type {
7
8
  AgentInputs,
8
9
  MessageDeltaEvent,
@@ -16,6 +17,7 @@ import type {
16
17
  SubagentConfig,
17
18
  SubagentUpdateEvent,
18
19
  SubagentUpdatePhase,
20
+ SubagentUsageSink,
19
21
  ToolExecuteBatchRequest,
20
22
  ToolCallDelta,
21
23
  TokenCounter,
@@ -24,7 +26,7 @@ import type { AggregatedHookResult, HookRegistry } from '@/hooks';
24
26
  import type { AgentContext } from '@/agents/AgentContext';
25
27
  import type { StandardGraph } from '@/graphs/Graph';
26
28
  import type { HandlerRegistry } from '@/events';
27
- import { GraphEvents, Callback, StepTypes } from '@/common';
29
+ import { Constants, GraphEvents, Callback, StepTypes } from '@/common';
28
30
  import { executeHooks } from '@/hooks';
29
31
 
30
32
  const DEFAULT_MAX_TURNS = 25;
@@ -236,6 +238,15 @@ export type SubagentExecutorOptions = {
236
238
  * post-`createWorkflow`, so `createAgentNode` must capture lazily).
237
239
  */
238
240
  parentHandlerRegistry?: HandlerRegistry | (() => HandlerRegistry | undefined);
241
+ /**
242
+ * Receives a usage event for every model call the child run makes. The
243
+ * child workflow executes via `invoke()` with a detached callbacks array,
244
+ * so its `on_chat_model_end` events never reach the parent's handler
245
+ * registry — without this sink, child token usage is invisible to the
246
+ * host (unbilled model calls). Forwarded into the child graph's input so
247
+ * nested subagents report through the same sink.
248
+ */
249
+ usageSink?: SubagentUsageSink;
239
250
  };
240
251
 
241
252
  export class SubagentExecutor {
@@ -248,6 +259,7 @@ export class SubagentExecutor {
248
259
  private readonly tokenCounter?: TokenCounter;
249
260
  private readonly maxDepth: number;
250
261
  private readonly createChildGraph: ChildGraphFactory;
262
+ private readonly usageSink?: SubagentUsageSink;
251
263
  private readonly resolveParentHandlerRegistry?: () =>
252
264
  | HandlerRegistry
253
265
  | undefined;
@@ -262,6 +274,7 @@ export class SubagentExecutor {
262
274
  this.tokenCounter = options.tokenCounter;
263
275
  this.maxDepth = options.maxDepth ?? 1;
264
276
  this.createChildGraph = options.createChildGraph;
277
+ this.usageSink = options.usageSink;
265
278
  const rawRegistry = options.parentHandlerRegistry;
266
279
  if (typeof rawRegistry === 'function') {
267
280
  this.resolveParentHandlerRegistry = rawRegistry;
@@ -351,12 +364,35 @@ export class SubagentExecutor {
351
364
  const childRunId = `${this.parentRunId}_sub_${nanoid(8)}`;
352
365
  const maxTurns = config.maxTurns ?? DEFAULT_MAX_TURNS;
353
366
 
367
+ const hostUsageSink = this.usageSink;
354
368
  const childGraph = this.createChildGraph({
355
369
  runId: childRunId,
356
370
  signal: this.parentSignal,
357
371
  agents: [childInputs],
358
372
  langfuse: this.langfuse,
359
373
  tokenCounter: this.tokenCounter,
374
+ /**
375
+ * Forwarded so the child graph's own `SubagentExecutor` (created in
376
+ * its `createAgentNode` when `allowNested` keeps subagentConfigs)
377
+ * reports nested-child usage through the same host sink. Each nesting
378
+ * level attaches its own capture callback — `workflow.invoke` replaces
379
+ * the inherited callback chain, so a single top-level handler would
380
+ * never see grandchild model calls.
381
+ *
382
+ * The wrapper rewrites `runId` to THIS executor's parent run: nested
383
+ * executors emit with their own `parentRunId` (a `*_sub_*` child id),
384
+ * and each wrapper layer rewrites upward, so by the time an event
385
+ * reaches the host sink its `runId` is the ROOT run — hosts keying
386
+ * billing by run id never see intermediate child run ids there
387
+ * (`subagentRunId` still identifies the emitting child).
388
+ */
389
+ subagentUsageSink:
390
+ hostUsageSink == null
391
+ ? undefined
392
+ : /** Returns the host sink's result so async sinks stay awaited
393
+ * through every wrapper layer. */
394
+ (event): void | Promise<void> =>
395
+ hostUsageSink({ ...event, runId: this.parentRunId }),
360
396
  });
361
397
 
362
398
  let forwarding: ForwarderCallback | undefined;
@@ -402,7 +438,31 @@ export class SubagentExecutor {
402
438
  * `runName` gives the child a distinct LangSmith trace root (avoids
403
439
  * nested trace pollution).
404
440
  */
405
- const callbacks: Callbacks = forwarder ? [forwarder] : [];
441
+ const callbackHandlers: BaseCallbackHandler[] = [];
442
+ if (forwarder) {
443
+ callbackHandlers.push(forwarder);
444
+ }
445
+ /**
446
+ * Usage capture rides the same detached callbacks array. Because
447
+ * `callbacks` REPLACES the inherited chain (see above), the host's
448
+ * `CHAT_MODEL_END` handler never observes the child's model calls —
449
+ * this handler is the child-side equivalent of `ModelEndHandler`,
450
+ * reporting per-call usage to the host's sink for billing.
451
+ */
452
+ if (this.usageSink) {
453
+ callbackHandlers.push(
454
+ createUsageCaptureHandler({
455
+ sink: this.usageSink,
456
+ subagentType,
457
+ subagentRunId: childRunId,
458
+ subagentAgentId: childAgentId,
459
+ parentRunId: this.parentRunId,
460
+ provider: config.agentInputs.provider,
461
+ fallbackModel: extractConfiguredModel(config.agentInputs),
462
+ })
463
+ );
464
+ }
465
+ const callbacks: Callbacks = callbackHandlers;
406
466
  /**
407
467
  * Inherit the parent's host `configurable` — host-set fields
408
468
  * (`requestBody`, `user`, `userMCPAuthMap`, etc.) AND the run-
@@ -719,6 +779,164 @@ export class SubagentExecutor {
719
779
  }
720
780
  }
721
781
 
782
+ /**
783
+ * Builds the child-run equivalent of a host `CHAT_MODEL_END` handler: a
784
+ * callback that joins per-call model identity (captured from
785
+ * `ls_model_name` at chat-model start) with the usage metadata reported at
786
+ * LLM end, and emits a {@link SubagentUsageEvent} through the host's sink.
787
+ *
788
+ * Attached to the child `workflow.invoke` callbacks array, so it observes
789
+ * every model call inside the child graph — the agent loop and any
790
+ * auxiliary calls (e.g. child-side summarization). It does NOT observe
791
+ * deeper subagent levels: each nesting level replaces the callback chain
792
+ * and attaches its own capture handler via the forwarded
793
+ * `subagentUsageSink` on the child graph's input.
794
+ */
795
+ function createUsageCaptureHandler(args: {
796
+ sink: SubagentUsageSink;
797
+ subagentType: string;
798
+ subagentRunId: string;
799
+ subagentAgentId: string;
800
+ parentRunId: string;
801
+ /**
802
+ * Child config's provider enum — the default tag when a call carries no
803
+ * `INVOKED_PROVIDER` metadata (hosts key pricing/cache semantics off it).
804
+ */
805
+ provider?: string;
806
+ /**
807
+ * Child config's model, used when a call carries neither `ls_model_name`
808
+ * nor `INVOKED_MODEL` metadata.
809
+ */
810
+ fallbackModel?: string;
811
+ }): BaseCallbackHandler {
812
+ const {
813
+ sink,
814
+ subagentType,
815
+ subagentRunId,
816
+ subagentAgentId,
817
+ parentRunId,
818
+ provider,
819
+ fallbackModel,
820
+ } = args;
821
+ /**
822
+ * Per-call attribution keyed by LangChain callback runId. `model` joins
823
+ * `ls_model_name` (provider-reported) with `INVOKED_MODEL` (stamped by
824
+ * `tryFallbackProviders` from the fallback's client options); `provider`
825
+ * is `INVOKED_PROVIDER`, stamped by `attemptInvoke` with the SDK enum of
826
+ * the provider that ACTUALLY served the call — correct for
827
+ * fallback-served calls, where the static config provider would mis-tag
828
+ * pricing/cache semantics.
829
+ */
830
+ const callInfoByCallId = new Map<
831
+ string,
832
+ { model?: string; provider?: string }
833
+ >();
834
+ const handler = BaseCallbackHandler.fromMethods({
835
+ handleChatModelStart: (
836
+ _llm: unknown,
837
+ _messages: unknown,
838
+ runId: string,
839
+ _parentRunId?: string,
840
+ _extraParams?: Record<string, unknown>,
841
+ _tags?: string[],
842
+ metadata?: Record<string, unknown>
843
+ ): void => {
844
+ const callModel =
845
+ asNonEmptyString(metadata?.ls_model_name) ??
846
+ asNonEmptyString(metadata?.[Constants.INVOKED_MODEL]);
847
+ const callProvider = asNonEmptyString(
848
+ metadata?.[Constants.INVOKED_PROVIDER]
849
+ );
850
+ if (callModel != null || callProvider != null) {
851
+ callInfoByCallId.set(runId, {
852
+ model: callModel,
853
+ provider: callProvider,
854
+ });
855
+ }
856
+ },
857
+ handleLLMEnd: async (output: LLMResult, runId: string): Promise<void> => {
858
+ const callInfo = callInfoByCallId.get(runId);
859
+ callInfoByCallId.delete(runId);
860
+ const model = callInfo?.model ?? fallbackModel;
861
+ const callProvider = callInfo?.provider ?? provider;
862
+ for (const generationGroup of output.generations) {
863
+ /**
864
+ * At most ONE event per generation group: each group is one
865
+ * provider request (the outer array is per-prompt for batched
866
+ * calls), and with multiple completions (`n > 1`) every choice in
867
+ * a group repeats the request-level `usage_metadata` — emitting
868
+ * per choice would multiply billed tokens.
869
+ */
870
+ for (const generation of generationGroup) {
871
+ const message = (generation as ChatGeneration | undefined)?.message;
872
+ const usage = (
873
+ message as { usage_metadata?: UsageMetadata } | undefined
874
+ )?.usage_metadata;
875
+ if (usage == null) {
876
+ continue;
877
+ }
878
+ /**
879
+ * Awaited so async host sinks (billing/persistence) complete
880
+ * before the model call resolves — `awaitHandlers` only waits on
881
+ * `handleLLMEnd` itself, so a dropped promise here would let the
882
+ * parent run finish before usage is recorded and would turn sink
883
+ * rejections into unhandled rejections.
884
+ */
885
+ try {
886
+ await sink({
887
+ usage,
888
+ model,
889
+ provider: callProvider,
890
+ subagentType,
891
+ subagentRunId,
892
+ subagentAgentId,
893
+ runId: parentRunId,
894
+ });
895
+ } catch {
896
+ /* observational — a throwing/rejecting host sink must not break the child run */
897
+ }
898
+ break;
899
+ }
900
+ }
901
+ },
902
+ handleLLMError: (_err: unknown, runId: string): void => {
903
+ callInfoByCallId.delete(runId);
904
+ },
905
+ });
906
+ /**
907
+ * Dispatch usage synchronously with each model call so all entries are
908
+ * sunk before `workflow.invoke` resolves — hosts read their accumulator
909
+ * right after the parent run completes.
910
+ */
911
+ handler.awaitHandlers = true;
912
+ return handler;
913
+ }
914
+
915
+ function asNonEmptyString(value: unknown): string | undefined {
916
+ return typeof value === 'string' && value !== '' ? value : undefined;
917
+ }
918
+
919
+ /**
920
+ * Best-effort read of the configured model from a subagent's client
921
+ * options. Providers disagree on the key (`model` vs `modelName`), and the
922
+ * value is only a fallback for calls that carry no `ls_model_name`.
923
+ */
924
+ function extractConfiguredModel(agentInputs: AgentInputs): string | undefined {
925
+ const clientOptions = agentInputs.clientOptions as
926
+ | { model?: unknown; modelName?: unknown }
927
+ | undefined;
928
+ if (typeof clientOptions?.model === 'string' && clientOptions.model !== '') {
929
+ return clientOptions.model;
930
+ }
931
+ if (
932
+ typeof clientOptions?.modelName === 'string' &&
933
+ clientOptions.modelName !== ''
934
+ ) {
935
+ return clientOptions.modelName;
936
+ }
937
+ return undefined;
938
+ }
939
+
722
940
  function sanitizeChildConfigurable(
723
941
  parentConfigurable: Record<string, unknown> | undefined
724
942
  ): Record<string, unknown> {
@@ -3,6 +3,7 @@ import type {
3
3
  BaseMessage,
4
4
  AIMessageChunk,
5
5
  SystemMessage,
6
+ UsageMetadata,
6
7
  } from '@langchain/core/messages';
7
8
  import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
8
9
  import type { START, StateGraph, StateGraphArgs } from '@langchain/langgraph';
@@ -29,10 +30,10 @@ import type {
29
30
  MessageDeltaEvent,
30
31
  ReasoningDeltaEvent,
31
32
  } from '@/types/stream';
33
+ import type { TokenCounter, TokenBudgetBreakdown } from '@/types/run';
32
34
  import type { Providers, Callback, GraphNodeKeys } from '@/common';
33
35
  import type { StandardGraph, MultiAgentGraph } from '@/graphs';
34
36
  import type { ClientOptions } from '@/types/llm';
35
- import type { TokenCounter } from '@/types/run';
36
37
 
37
38
  /** Interface for bound model with stream and invoke methods */
38
39
  export interface ChatModel {
@@ -89,6 +90,30 @@ export interface AgentLogEvent {
89
90
  agentId?: string;
90
91
  }
91
92
 
93
+ /**
94
+ * Per-model-call context window usage snapshot, dispatched after pruning and
95
+ * before the model invocation. Dispatched once per `callModel` invocation:
96
+ * fallback retries reuse the snapshot since the prompt is identical — budget
97
+ * numbers reflect the primary provider's tokenizer, and the calibration
98
+ * ratio self-corrects from whichever provider reports usage.
99
+ */
100
+ export interface ContextUsageEvent {
101
+ runId?: string;
102
+ agentId?: string;
103
+ /** Structural token budget snapshot from AgentContext.getTokenBudgetBreakdown */
104
+ breakdown: TokenBudgetBreakdown;
105
+ /** Usable budget this call: maxContextTokens minus output reserve */
106
+ contextBudget?: number;
107
+ /** Calibrated instruction overhead actually applied this call */
108
+ effectiveInstructionTokens?: number;
109
+ /** Calibrated message tokens before pruning (excluding instructions) */
110
+ prePruneContextTokens?: number;
111
+ /** Tokens still free after instructions + pruned messages */
112
+ remainingContextTokens?: number;
113
+ /** EMA ratio of provider-reported vs locally estimated token counts */
114
+ calibrationRatio?: number;
115
+ }
116
+
92
117
  export interface EventHandler {
93
118
  handle(
94
119
  event: string,
@@ -104,6 +129,7 @@ export interface EventHandler {
104
129
  | SummarizeCompleteEvent
105
130
  | SubagentUpdateEvent
106
131
  | AgentLogEvent
132
+ | ContextUsageEvent
107
133
  | ToolExecuteBatchRequest
108
134
  | { result: ToolEndEvent },
109
135
  metadata?: Record<string, unknown>,
@@ -299,6 +325,17 @@ export type StandardGraphInput = {
299
325
  tokenCounter?: TokenCounter;
300
326
  indexTokenCountMap?: Record<string, number>;
301
327
  calibrationRatio?: number;
328
+ /**
329
+ * Receives a {@link SubagentUsageEvent} for every model call made inside
330
+ * a subagent child run spawned from this graph (including nested
331
+ * subagents and child-side summarization calls). Child graphs run via
332
+ * `invoke()` outside the host's `streamEvents` loop, so their
333
+ * `on_chat_model_end` events never reach the run's handler registry —
334
+ * this sink is the only way hosts can observe child token usage for
335
+ * billing/accounting. Parent-graph model calls are NOT reported here;
336
+ * they already flow through the registry's `CHAT_MODEL_END` handler.
337
+ */
338
+ subagentUsageSink?: SubagentUsageSink;
302
339
  };
303
340
 
304
341
  export type GraphEdge = {
@@ -409,6 +446,62 @@ export interface SubagentUpdateEvent {
409
446
  timestamp: string;
410
447
  }
411
448
 
449
+ /**
450
+ * Token usage for a single model call made inside a subagent child run.
451
+ * Emitted through {@link SubagentUsageSink} as each call completes, so
452
+ * hosts can bill child-run model usage that never reaches the parent
453
+ * run's `CHAT_MODEL_END` handler (child graphs execute via `invoke()`
454
+ * outside the host's `streamEvents` loop).
455
+ */
456
+ export interface SubagentUsageEvent {
457
+ /** Usage metadata reported by the child's model call. */
458
+ usage: UsageMetadata;
459
+ /**
460
+ * Model that produced this usage. Per-call `ls_model_name` from the
461
+ * model's callback metadata when available (covers child-side
462
+ * summarization or any call that differs from the configured model),
463
+ * then the fallback-invocation's configured model (`INVOKED_MODEL`
464
+ * metadata), then the subagent config's `clientOptions` model.
465
+ */
466
+ model?: string;
467
+ /**
468
+ * Provider that actually served this call — the SDK `Providers` enum
469
+ * value stamped per-invocation by `attemptInvoke` (`INVOKED_PROVIDER`
470
+ * metadata), so fallback-served calls are attributed to the fallback
471
+ * provider, not the configured primary. Falls back to the subagent
472
+ * config's provider. Never LangSmith's `ls_provider` string — derived
473
+ * providers inherit that from their base class, and hosts key
474
+ * pricing/cache semantics off the enum.
475
+ */
476
+ provider?: string;
477
+ /** Subagent `type` identifier from the SubagentConfig. */
478
+ subagentType: string;
479
+ /** Child run ID (unique per subagent execution). */
480
+ subagentRunId: string;
481
+ /** Child agent ID assigned to this subagent execution. */
482
+ subagentAgentId: string;
483
+ /**
484
+ * ROOT run ID of the host run that owns billing. For nested subagents
485
+ * each forwarding layer rewrites this upward, so events from any depth
486
+ * surface with the outermost run's ID — never an intermediate
487
+ * `*_sub_*` child id (use {@link subagentRunId} to identify the
488
+ * emitting child).
489
+ */
490
+ runId: string;
491
+ }
492
+
493
+ /**
494
+ * Host-provided callback receiving {@link SubagentUsageEvent}s. Invoked as
495
+ * each child model call completes. May return a promise — the executor
496
+ * awaits each dispatch (so all usage is recorded before the child's result
497
+ * resolves to the parent) and swallows both synchronous throws and
498
+ * rejections; implementations should still be cheap, as they sit on the
499
+ * child's model-call path.
500
+ */
501
+ export type SubagentUsageSink = (
502
+ event: SubagentUsageEvent
503
+ ) => void | Promise<void>;
504
+
412
505
  export type LangfuseToolOutputTracingConfig = {
413
506
  /**
414
507
  * Whether tool outputs should be exported to Langfuse. Defaults to
package/src/types/run.ts CHANGED
@@ -125,6 +125,15 @@ export type RunConfig = {
125
125
  */
126
126
  langfuse?: g.LangfuseConfig;
127
127
  customHandlers?: Record<string, g.EventHandler>;
128
+ /**
129
+ * Receives token usage for every model call made inside subagent child
130
+ * runs (including nested subagents). Child graphs execute via `invoke()`
131
+ * outside this run's `streamEvents` loop, so their model-end events never
132
+ * reach `customHandlers` — without this sink, child usage is invisible to
133
+ * the host. Parent-graph calls are not reported here; they flow through
134
+ * the registered `CHAT_MODEL_END` handler as usual.
135
+ */
136
+ subagentUsageSink?: g.SubagentUsageSink;
128
137
  /**
129
138
  * Pre-constructed hook registry for this run. Hooks fire at lifecycle
130
139
  * points in `processStream` (RunStart, UserPromptSubmit, Stop,
@@ -242,6 +251,10 @@ export type TokenBudgetBreakdown = {
242
251
  messageTokens: number;
243
252
  /** Tokens available for messages after instructions. */
244
253
  availableForMessages: number;
254
+ /** Per-tool schema token counts (post-multiplier), keyed by tool name. */
255
+ toolTokenCounts?: Record<string, number>;
256
+ /** Names of counted tools that are deferred (`defer_loading`) and discovered. */
257
+ deferredToolNames?: string[];
245
258
  };
246
259
 
247
260
  export type EventStreamOptions = {
@@ -0,0 +1,32 @@
1
+ import { apportionTokenCounts } from '@/utils/tokens';
2
+
3
+ describe('apportionTokenCounts', () => {
4
+ it('sums exactly to a ceil-of-sum aggregate despite per-entry fractions', () => {
5
+ const raw = { add: 33, search: 33, fetch: 33 };
6
+ const multiplier = 1.4;
7
+ const target = Math.ceil((33 + 33 + 33) * multiplier);
8
+ const result = apportionTokenCounts(raw, multiplier, target);
9
+ const sum = Object.values(result).reduce((acc, count) => acc + count, 0);
10
+ expect(sum).toBe(target);
11
+ expect(Object.keys(result).sort()).toEqual(['add', 'fetch', 'search']);
12
+ });
13
+
14
+ it('gives larger remainders priority when distributing leftovers', () => {
15
+ const result = apportionTokenCounts({ a: 10, b: 19 }, 1.05, 31);
16
+ expect(result.a + result.b).toBe(31);
17
+ expect(result.b).toBe(20);
18
+ expect(result.a).toBe(11);
19
+ });
20
+
21
+ it('handles calibration-style rescaling to an arbitrary target', () => {
22
+ const counts = { a: 100, b: 200, c: 300 };
23
+ const target = 451;
24
+ const result = apportionTokenCounts(counts, target / 600, target);
25
+ const sum = Object.values(result).reduce((acc, count) => acc + count, 0);
26
+ expect(sum).toBe(target);
27
+ });
28
+
29
+ it('returns an empty map for no entries', () => {
30
+ expect(apportionTokenCounts({}, 1.4, 10)).toEqual({});
31
+ });
32
+ });