@librechat/agents 3.2.34 → 3.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +47 -10
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +121 -3
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/llm/invoke.cjs +49 -8
  8. package/dist/cjs/llm/invoke.cjs.map +1 -1
  9. package/dist/cjs/main.cjs +2 -0
  10. package/dist/cjs/messages/content.cjs +12 -14
  11. package/dist/cjs/messages/content.cjs.map +1 -1
  12. package/dist/cjs/messages/prune.cjs +31 -13
  13. package/dist/cjs/messages/prune.cjs.map +1 -1
  14. package/dist/cjs/run.cjs +7 -2
  15. package/dist/cjs/run.cjs.map +1 -1
  16. package/dist/cjs/summarization/node.cjs +12 -1
  17. package/dist/cjs/summarization/node.cjs.map +1 -1
  18. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
  19. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  20. package/dist/cjs/utils/tokens.cjs +30 -0
  21. package/dist/cjs/utils/tokens.cjs.map +1 -1
  22. package/dist/esm/agents/AgentContext.mjs +47 -10
  23. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  24. package/dist/esm/common/enum.mjs +13 -0
  25. package/dist/esm/common/enum.mjs.map +1 -1
  26. package/dist/esm/graphs/Graph.mjs +122 -4
  27. package/dist/esm/graphs/Graph.mjs.map +1 -1
  28. package/dist/esm/llm/invoke.mjs +49 -8
  29. package/dist/esm/llm/invoke.mjs.map +1 -1
  30. package/dist/esm/main.mjs +3 -3
  31. package/dist/esm/messages/content.mjs +12 -15
  32. package/dist/esm/messages/content.mjs.map +1 -1
  33. package/dist/esm/messages/prune.mjs +31 -13
  34. package/dist/esm/messages/prune.mjs.map +1 -1
  35. package/dist/esm/run.mjs +7 -2
  36. package/dist/esm/run.mjs.map +1 -1
  37. package/dist/esm/summarization/node.mjs +12 -1
  38. package/dist/esm/summarization/node.mjs.map +1 -1
  39. package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
  40. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  41. package/dist/esm/utils/tokens.mjs +30 -1
  42. package/dist/esm/utils/tokens.mjs.map +1 -1
  43. package/dist/types/agents/AgentContext.d.ts +7 -3
  44. package/dist/types/common/enum.d.ts +13 -0
  45. package/dist/types/graphs/Graph.d.ts +8 -1
  46. package/dist/types/llm/invoke.d.ts +1 -1
  47. package/dist/types/messages/content.d.ts +5 -0
  48. package/dist/types/messages/prune.d.ts +4 -0
  49. package/dist/types/run.d.ts +1 -0
  50. package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
  51. package/dist/types/types/graph.d.ts +89 -3
  52. package/dist/types/types/run.d.ts +13 -0
  53. package/dist/types/utils/tokens.d.ts +7 -0
  54. package/package.json +1 -1
  55. package/src/agents/AgentContext.ts +69 -6
  56. package/src/agents/__tests__/AgentContext.test.ts +6 -2
  57. package/src/common/enum.ts +13 -0
  58. package/src/graphs/Graph.ts +196 -0
  59. package/src/llm/invoke.test.ts +79 -1
  60. package/src/llm/invoke.ts +58 -4
  61. package/src/messages/content.ts +24 -32
  62. package/src/messages/prune.ts +39 -2
  63. package/src/run.ts +5 -0
  64. package/src/scripts/subagent-usage-sink.ts +176 -0
  65. package/src/specs/context-accuracy.live.test.ts +409 -0
  66. package/src/specs/context-usage-event.test.ts +117 -0
  67. package/src/specs/context-usage.live.test.ts +297 -0
  68. package/src/specs/prune.test.ts +51 -1
  69. package/src/specs/subagent.test.ts +124 -1
  70. package/src/summarization/__tests__/node.test.ts +60 -1
  71. package/src/summarization/node.ts +20 -1
  72. package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
  73. package/src/tools/subagent/SubagentExecutor.ts +221 -3
  74. package/src/types/graph.ts +94 -1
  75. package/src/types/run.ts +13 -0
  76. package/src/utils/__tests__/apportion.test.ts +32 -0
  77. package/src/utils/tokens.ts +33 -0
@@ -0,0 +1,297 @@
1
+ // src/specs/context-usage.live.test.ts
2
+ /**
3
+ * Live ON_CONTEXT_USAGE / usage accounting verification with real Anthropic
4
+ * calls — single agent, multi-agent handoff, and subagent isolation.
5
+ *
6
+ * Run with:
7
+ * RUN_CONTEXT_USAGE_LIVE_TESTS=1 ANTHROPIC_API_KEY=... npm test -- context-usage.live.test.ts --runInBand
8
+ */
9
+ import { config as dotenvConfig } from 'dotenv';
10
+ dotenvConfig();
11
+
12
+ import { HumanMessage } from '@langchain/core/messages';
13
+ import { describe, expect, it, jest } from '@jest/globals';
14
+ import type { ToolCall } from '@langchain/core/messages/tool';
15
+ import type { RunnableConfig } from '@langchain/core/runnables';
16
+ import type * as t from '@/types';
17
+ import { createTokenCounter, TokenEncoderManager } from '@/utils/tokens';
18
+ import { Constants, GraphEvents, Providers } from '@/common';
19
+ import { ModelEndHandler } from '@/events';
20
+ import { Run } from '@/run';
21
+
22
+ const shouldRunLive =
23
+ process.env.RUN_CONTEXT_USAGE_LIVE_TESTS === '1' &&
24
+ process.env.ANTHROPIC_API_KEY != null &&
25
+ process.env.ANTHROPIC_API_KEY !== '';
26
+
27
+ const describeIfLive = shouldRunLive ? describe : describe.skip;
28
+ const modelName =
29
+ process.env.ANTHROPIC_CONTEXT_LIVE_MODEL ?? 'claude-haiku-4-5';
30
+
31
+ const MAX_CONTEXT_TOKENS = 8000;
32
+
33
+ function createAnthropicAgent(
34
+ agentId: string,
35
+ instructions: string,
36
+ extras: Partial<t.AgentInputs> = {}
37
+ ): t.AgentInputs {
38
+ return {
39
+ agentId,
40
+ provider: Providers.ANTHROPIC,
41
+ clientOptions: {
42
+ modelName,
43
+ apiKey: process.env.ANTHROPIC_API_KEY,
44
+ temperature: 0,
45
+ maxTokens: 128,
46
+ streaming: true,
47
+ streamUsage: true,
48
+ },
49
+ instructions,
50
+ maxContextTokens: MAX_CONTEXT_TOKENS,
51
+ ...extras,
52
+ };
53
+ }
54
+
55
+ function createStreamConfig(threadId: string): Partial<RunnableConfig> & {
56
+ version: 'v1' | 'v2';
57
+ streamMode: string;
58
+ } {
59
+ return {
60
+ configurable: { thread_id: threadId },
61
+ streamMode: 'values',
62
+ version: 'v2',
63
+ };
64
+ }
65
+
66
+ interface CapturedEvents {
67
+ contextEvents: t.ContextUsageEvent[];
68
+ subagentUpdates: unknown[];
69
+ collectedUsage: Array<Record<string, number | undefined>>;
70
+ handlers: Record<string, t.EventHandler>;
71
+ }
72
+
73
+ function createCapture(): CapturedEvents {
74
+ const contextEvents: t.ContextUsageEvent[] = [];
75
+ const subagentUpdates: unknown[] = [];
76
+ const collectedUsage: Array<Record<string, number | undefined>> = [];
77
+ const handlers: Record<string, t.EventHandler> = {
78
+ [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage as never),
79
+ [GraphEvents.ON_CONTEXT_USAGE]: {
80
+ handle: (_event, data): void => {
81
+ contextEvents.push(data as unknown as t.ContextUsageEvent);
82
+ },
83
+ },
84
+ [GraphEvents.ON_SUBAGENT_UPDATE]: {
85
+ handle: (_event, data): void => {
86
+ subagentUpdates.push(data);
87
+ },
88
+ },
89
+ };
90
+ return { contextEvents, subagentUpdates, collectedUsage, handlers };
91
+ }
92
+
93
+ describeIfLive('Context usage live integration', () => {
94
+ jest.setTimeout(180_000);
95
+
96
+ let tokenCounter: t.TokenCounter;
97
+
98
+ beforeAll(async () => {
99
+ tokenCounter = await createTokenCounter();
100
+ });
101
+
102
+ afterAll(() => {
103
+ TokenEncoderManager.reset();
104
+ });
105
+
106
+ it('emits a snapshot whose estimate tracks real provider input tokens', async () => {
107
+ const capture = createCapture();
108
+ const run = await Run.create<t.IState>({
109
+ runId: `ctx-live-single-${Date.now()}`,
110
+ graphConfig: {
111
+ type: 'standard',
112
+ agents: [
113
+ createAnthropicAgent(
114
+ 'solo',
115
+ 'You are concise. Reply with one short sentence.'
116
+ ),
117
+ ],
118
+ },
119
+ returnContent: true,
120
+ skipCleanup: true,
121
+ customHandlers: capture.handlers,
122
+ tokenCounter,
123
+ indexTokenCountMap: {},
124
+ });
125
+
126
+ await run.processStream(
127
+ { messages: [new HumanMessage('Say hello in five words or fewer.')] },
128
+ createStreamConfig(`ctx-live-single-${Date.now()}`)
129
+ );
130
+
131
+ expect(capture.contextEvents).toHaveLength(1);
132
+ const event = capture.contextEvents[0];
133
+ expect(event.agentId).toBe('solo');
134
+ expect(event.breakdown.maxContextTokens).toBe(MAX_CONTEXT_TOKENS);
135
+ expect(event.contextBudget).toBeLessThanOrEqual(MAX_CONTEXT_TOKENS);
136
+
137
+ expect(capture.collectedUsage).toHaveLength(1);
138
+ const usage = capture.collectedUsage[0];
139
+ expect(usage.input_tokens ?? 0).toBeGreaterThan(0);
140
+ expect(usage.output_tokens ?? 0).toBeGreaterThan(0);
141
+
142
+ /** The gauge shows `contextBudget - remaining` as occupancy; with a real
143
+ * tokenizer it should land in the same ballpark as the provider count */
144
+ const estimatedUsed =
145
+ (event.contextBudget ?? 0) - (event.remainingContextTokens ?? 0);
146
+ const providerInput = usage.input_tokens ?? 0;
147
+ expect(estimatedUsed).toBeGreaterThan(0);
148
+ expect(estimatedUsed / providerInput).toBeGreaterThan(0.3);
149
+ expect(estimatedUsed / providerInput).toBeLessThan(3);
150
+ });
151
+
152
+ it('emits per-agent snapshots and usage across a real handoff', async () => {
153
+ const capture = createCapture();
154
+ const nonce = `ctx-live-handoff-${Date.now()}`;
155
+ const expectedReply = `${nonce}-confirmed`;
156
+ const handoffToolName = `${Constants.LC_TRANSFER_TO_}specialist`;
157
+
158
+ const run = await Run.create<t.IState>({
159
+ runId: `${nonce}-run`,
160
+ graphConfig: {
161
+ type: 'multi-agent',
162
+ agents: [
163
+ createAnthropicAgent(
164
+ 'router',
165
+ `You are a routing agent. For every user request, your only valid action is to call the handoff tool named ${handoffToolName}. Do not answer directly.
166
+
167
+ When you call the handoff tool, include instructions telling the specialist to reply exactly with this marker and no extra words: ${expectedReply}`
168
+ ),
169
+ createAnthropicAgent(
170
+ 'specialist',
171
+ 'You are the specialist. When you receive handoff instructions with a marker, reply exactly with that marker and no extra words.'
172
+ ),
173
+ ],
174
+ edges: [
175
+ {
176
+ from: 'router',
177
+ to: 'specialist',
178
+ edgeType: 'handoff',
179
+ description: 'Transfer to the specialist for the final response',
180
+ prompt:
181
+ 'Instructions for the specialist. Include any exact marker that must be returned.',
182
+ promptKey: 'instructions',
183
+ },
184
+ ],
185
+ },
186
+ returnContent: true,
187
+ skipCleanup: true,
188
+ customHandlers: capture.handlers,
189
+ tokenCounter,
190
+ indexTokenCountMap: {},
191
+ });
192
+
193
+ await run.processStream(
194
+ {
195
+ messages: [
196
+ new HumanMessage(
197
+ `Please delegate this to the specialist. The final answer must be exactly: ${expectedReply}`
198
+ ),
199
+ ],
200
+ },
201
+ createStreamConfig(`${nonce}-thread`)
202
+ );
203
+
204
+ const agentIds = new Set(
205
+ capture.contextEvents.map((event) => event.agentId)
206
+ );
207
+ expect(agentIds.has('router')).toBe(true);
208
+ expect(agentIds.has('specialist')).toBe(true);
209
+
210
+ for (const event of capture.contextEvents) {
211
+ expect(event.breakdown.maxContextTokens).toBe(MAX_CONTEXT_TOKENS);
212
+ expect(event.contextBudget).toBeLessThanOrEqual(MAX_CONTEXT_TOKENS);
213
+ expect(event.remainingContextTokens).toBeGreaterThan(0);
214
+ }
215
+
216
+ /** One snapshot per real model call — no ghost snapshots */
217
+ expect(capture.collectedUsage.length).toBe(capture.contextEvents.length);
218
+ expect(capture.collectedUsage.length).toBeGreaterThanOrEqual(2);
219
+ });
220
+
221
+ it('keeps subagent runs isolated from parent context/usage events', async () => {
222
+ const capture = createCapture();
223
+ const parent = createAnthropicAgent(
224
+ 'parent',
225
+ 'You are a supervisor. Delegate research tasks using the subagent tool.',
226
+ {
227
+ subagentConfigs: [
228
+ {
229
+ type: 'researcher',
230
+ name: 'Research Agent',
231
+ description: 'Researches and summarizes information',
232
+ agentInputs: createAnthropicAgent(
233
+ 'researcher',
234
+ 'You are a research agent. Answer in one short sentence.'
235
+ ),
236
+ },
237
+ ],
238
+ }
239
+ );
240
+
241
+ const run = await Run.create<t.IState>({
242
+ runId: `ctx-live-subagent-${Date.now()}`,
243
+ graphConfig: { type: 'standard', agents: [parent] },
244
+ returnContent: true,
245
+ skipCleanup: true,
246
+ customHandlers: capture.handlers,
247
+ tokenCounter,
248
+ indexTokenCountMap: {},
249
+ });
250
+
251
+ /** Parent is a fake forced to call the subagent tool — the child run
252
+ * executes on the real provider, exercising real isolation */
253
+ const subagentToolCall: ToolCall = {
254
+ id: 'call_subagent_live',
255
+ name: Constants.SUBAGENT,
256
+ args: {
257
+ description: 'What is the capital of France? One short sentence.',
258
+ subagent_type: 'researcher',
259
+ },
260
+ type: 'tool_call',
261
+ };
262
+ run.Graph?.overrideTestModel(
263
+ ['Delegating to the researcher.', 'The researcher confirmed the answer.'],
264
+ 10,
265
+ [subagentToolCall]
266
+ );
267
+
268
+ await run.processStream(
269
+ { messages: [new HumanMessage('What is the capital of France?')] },
270
+ createStreamConfig(`ctx-live-subagent-${Date.now()}`)
271
+ );
272
+
273
+ /** Child progress arrives only as wrapped subagent updates */
274
+ expect(capture.subagentUpdates.length).toBeGreaterThan(0);
275
+
276
+ /** No raw child snapshots leak into the parent handler registry */
277
+ const childContextEvents = capture.contextEvents.filter(
278
+ (event) => event.agentId !== 'parent'
279
+ );
280
+ expect(childContextEvents).toHaveLength(0);
281
+ for (const event of capture.contextEvents) {
282
+ expect(event.agentId).toBe('parent');
283
+ }
284
+
285
+ /** Documented isolation: child model-call usage does not reach the
286
+ * parent's collected usage (fake parent emits no usage_metadata) */
287
+ expect(capture.collectedUsage).toHaveLength(0);
288
+
289
+ const toolMessage = (run.getRunMessages() ?? []).find(
290
+ (message) =>
291
+ message.getType() === 'tool' &&
292
+ (message as { name?: string }).name === Constants.SUBAGENT
293
+ );
294
+ expect(toolMessage).toBeDefined();
295
+ expect(String(toolMessage?.content ?? '').toLowerCase()).toContain('paris');
296
+ });
297
+ });
@@ -472,6 +472,53 @@ describe('Prune Messages Tests', () => {
472
472
  expect(typeof result.remainingContextTokens).toBe('number');
473
473
  });
474
474
 
475
+ it('should return remaining tokens in calibrated units when pruning with calibration', () => {
476
+ const tokenCounter = createTestTokenCounter();
477
+ const messages = [
478
+ new SystemMessage('System instruction'),
479
+ new HumanMessage('Message 1'),
480
+ new AIMessage('Response 1'),
481
+ new HumanMessage('Message 2'),
482
+ new AIMessage('Response 2'),
483
+ ];
484
+
485
+ const indexTokenCountMap = {
486
+ 0: tokenCounter(messages[0]),
487
+ 1: tokenCounter(messages[1]),
488
+ 2: tokenCounter(messages[2]),
489
+ 3: tokenCounter(messages[3]),
490
+ 4: tokenCounter(messages[4]),
491
+ };
492
+
493
+ const calibrationRatio = 2;
494
+ const maxTokens = 80;
495
+ const pruneMessages = createPruneMessages({
496
+ maxTokens,
497
+ startIndex: 0,
498
+ tokenCounter,
499
+ indexTokenCountMap,
500
+ reserveRatio: 0,
501
+ calibrationRatio,
502
+ });
503
+
504
+ const result = pruneMessages({ messages });
505
+
506
+ expect(result.messagesToRefine?.length).toBeGreaterThan(0);
507
+
508
+ /** Pruning selects within rawSpaceBudget = maxTokens / ratio (raw units,
509
+ * minus the 3-token assistant label); the returned remaining must be
510
+ * scaled back so `budget - remaining` reflects provider-space usage */
511
+ const keptRaw = result.context.reduce(
512
+ (sum, msg) => sum + tokenCounter(msg),
513
+ 0
514
+ );
515
+ const rawSpaceBudget = Math.round(maxTokens / calibrationRatio);
516
+ const expectedRemaining =
517
+ (rawSpaceBudget - keptRaw - 3) * calibrationRatio;
518
+ expect(result.remainingContextTokens).toBe(expectedRemaining);
519
+ expect(result.contextBudget).toBe(maxTokens);
520
+ });
521
+
475
522
  it('should respect startType parameter', () => {
476
523
  const tokenCounter = createTestTokenCounter();
477
524
  const messages = [
@@ -1397,7 +1444,10 @@ describe('Prune Messages Tests', () => {
1397
1444
  expect(result.context).toEqual([]);
1398
1445
  expect(result.messagesToRefine).toEqual([]);
1399
1446
  expect(result.prePruneContextTokens).toBe(0);
1400
- expect(result.remainingContextTokens).toBe(8000);
1447
+ /** Reserve-adjusted budget (8000 − 5%) minus instruction overhead */
1448
+ expect(result.contextBudget).toBe(7600);
1449
+ expect(result.effectiveInstructionTokens).toBe(4000);
1450
+ expect(result.remainingContextTokens).toBe(3600);
1401
1451
  });
1402
1452
  });
1403
1453
 
@@ -1,6 +1,12 @@
1
+ import { ChatGenerationChunk } from '@langchain/core/outputs';
1
2
  import { FakeListChatModel } from '@langchain/core/utils/testing';
2
- import { AIMessage, HumanMessage } from '@langchain/core/messages';
3
+ import {
4
+ AIMessage,
5
+ AIMessageChunk,
6
+ HumanMessage,
7
+ } from '@langchain/core/messages';
3
8
  import type { RunnableConfig } from '@langchain/core/runnables';
9
+ import type { UsageMetadata } from '@langchain/core/messages';
4
10
  import type { ToolCall } from '@langchain/core/messages/tool';
5
11
  import type * as t from '@/types';
6
12
  import {
@@ -388,4 +394,121 @@ describe('Subagent Integration', () => {
388
394
  contextWithout!.toolSchemaTokens
389
395
  );
390
396
  });
397
+
398
+ it('reports child model usage through subagentUsageSink', async () => {
399
+ const CHILD_USAGE = {
400
+ input_tokens: 11,
401
+ output_tokens: 7,
402
+ total_tokens: 18,
403
+ };
404
+ /**
405
+ * The default mock (FakeListChatModel) reports no usage. Re-mock with a
406
+ * subclass that reports `usage_metadata` the way live providers do:
407
+ * stamped on the generation in the invoke path, and carried on a final
408
+ * zero-content chunk in the stream path (the graph's `attemptInvoke`
409
+ * prefers `model.stream()`, and chunk concatenation folds the usage
410
+ * into the aggregated message that `handleLLMEnd` receives).
411
+ */
412
+ getChatModelClassSpy.mockImplementation(((provider: Providers) => {
413
+ if (provider === Providers.OPENAI) {
414
+ return class extends FakeListChatModel {
415
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
416
+ constructor(_options: any) {
417
+ super({ responses: [CHILD_RESPONSE] });
418
+ }
419
+ async _generate(
420
+ ...args: Parameters<FakeListChatModel['_generate']>
421
+ ): ReturnType<FakeListChatModel['_generate']> {
422
+ const result = await super._generate(...args);
423
+ for (const generation of result.generations) {
424
+ (generation.message as AIMessage).usage_metadata = {
425
+ ...CHILD_USAGE,
426
+ };
427
+ }
428
+ return result;
429
+ }
430
+ async *_streamResponseChunks(
431
+ ...args: Parameters<FakeListChatModel['_streamResponseChunks']>
432
+ ): ReturnType<FakeListChatModel['_streamResponseChunks']> {
433
+ yield* super._streamResponseChunks(...args);
434
+ yield new ChatGenerationChunk({
435
+ text: '',
436
+ message: new AIMessageChunk({
437
+ content: '',
438
+ usage_metadata: { ...CHILD_USAGE },
439
+ }),
440
+ });
441
+ }
442
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
443
+ } as any;
444
+ }
445
+ return originalGetChatModelClass(provider);
446
+ }) as typeof providers.getChatModelClass);
447
+
448
+ const collectedUsage: UsageMetadata[] = [];
449
+ const sunkEvents: t.SubagentUsageEvent[] = [];
450
+ const customHandlers: Record<string, t.EventHandler> = {
451
+ [GraphEvents.TOOL_END]: new ToolEndHandler(),
452
+ [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
453
+ };
454
+
455
+ const runId = `subagent-usage-${Date.now()}`;
456
+ const run = await Run.create<t.IState>({
457
+ runId,
458
+ graphConfig: {
459
+ type: 'standard',
460
+ agents: [createParentAgent()],
461
+ },
462
+ returnContent: true,
463
+ skipCleanup: true,
464
+ customHandlers,
465
+ subagentUsageSink: (event) => {
466
+ sunkEvents.push(event);
467
+ },
468
+ });
469
+
470
+ const subagentToolCall: ToolCall = {
471
+ id: 'call_subagent_usage',
472
+ name: Constants.SUBAGENT,
473
+ args: {
474
+ description: 'What is the capital of France?',
475
+ subagent_type: 'researcher',
476
+ },
477
+ type: 'tool_call',
478
+ };
479
+
480
+ run.Graph?.overrideTestModel(
481
+ [
482
+ 'Let me delegate this research task.',
483
+ `Based on the research: ${CHILD_RESPONSE}`,
484
+ ],
485
+ 10,
486
+ [subagentToolCall]
487
+ );
488
+
489
+ await run.processStream(
490
+ { messages: [new HumanMessage('What is the capital of France?')] },
491
+ callerConfig
492
+ );
493
+
494
+ /** Child made exactly one model call; all events are child-tagged. */
495
+ expect(sunkEvents).toHaveLength(1);
496
+ const event = sunkEvents[0];
497
+ /** Chunk concat adds empty `*_token_details` — match on the counts. */
498
+ expect(event.usage).toMatchObject(CHILD_USAGE);
499
+ expect(event.subagentType).toBe('researcher');
500
+ expect(event.subagentAgentId).toBe('researcher');
501
+ expect(event.provider).toBe(Providers.OPENAI);
502
+ /** FakeListChatModel emits no ls_model_name → config fallback. */
503
+ expect(event.model).toBe('gpt-4o-mini');
504
+ expect(event.runId).toBe(runId);
505
+ expect(event.subagentRunId).toContain(`${runId}_sub_`);
506
+ /**
507
+ * The parent's own calls must NOT be routed through the sink — they
508
+ * flow through the registered CHAT_MODEL_END handler. (The fake
509
+ * override model reports no usage, so collectedUsage stays empty;
510
+ * the load-bearing assertion is that the sink saw no parent calls.)
511
+ */
512
+ expect(sunkEvents.every((e) => e.subagentType === 'researcher')).toBe(true);
513
+ });
391
514
  });
@@ -6,8 +6,8 @@ import {
6
6
  DEFAULT_SUMMARIZATION_PROMPT,
7
7
  DEFAULT_UPDATE_SUMMARIZATION_PROMPT,
8
8
  } from '@/summarization/node';
9
+ import { Constants, GraphEvents, Providers } from '@/common';
9
10
  import { AgentContext } from '@/agents/AgentContext';
10
- import { GraphEvents, Providers } from '@/common';
11
11
  import * as providers from '@/llm/providers';
12
12
  import * as eventUtils from '@/utils/events';
13
13
 
@@ -216,6 +216,65 @@ describe('createSummarizeNode', () => {
216
216
  ).toBeUndefined();
217
217
  });
218
218
 
219
+ it('stamps INVOKED_MODEL/INVOKED_PROVIDER metadata for a dedicated summarizer model', async () => {
220
+ captureEvents();
221
+
222
+ const capturedConfigs: unknown[] = [];
223
+ jest.spyOn(providers, 'getChatModelClass').mockReturnValue(
224
+ class {
225
+ constructor() {
226
+ return {
227
+ invoke: jest
228
+ .fn()
229
+ .mockImplementation(
230
+ async (_messages: unknown, config?: unknown) => {
231
+ capturedConfigs.push(config);
232
+ return { content: 'Summary text' };
233
+ }
234
+ ),
235
+ };
236
+ }
237
+ } as never
238
+ );
239
+
240
+ const agentContext = createAgentContext({
241
+ summarizationConfig: {
242
+ retainRecent: { turns: 0 },
243
+ model: 'gpt-4.1-mini',
244
+ },
245
+ });
246
+ const graph = mockGraph();
247
+ const node = createSummarizeNode({
248
+ agentContext,
249
+ graph,
250
+ generateStepId,
251
+ });
252
+
253
+ await node(
254
+ {
255
+ messages: [new HumanMessage('Hello'), new HumanMessage('World')],
256
+ summarizationRequest: {
257
+ remainingContextTokens: 1000,
258
+ agentId: 'agent_0',
259
+ },
260
+ },
261
+ {} as RunnableConfig
262
+ );
263
+
264
+ /**
265
+ * Usage consumers (the subagent usage-capture handler) attribute the
266
+ * call from these keys — without them, a summarizer model that differs
267
+ * from the agent's primary would be billed against the primary config.
268
+ */
269
+ const config = capturedConfigs[0] as {
270
+ metadata?: Record<string, unknown>;
271
+ };
272
+ expect(config.metadata?.[Constants.INVOKED_MODEL]).toBe('gpt-4.1-mini');
273
+ expect(config.metadata?.[Constants.INVOKED_PROVIDER]).toBe(
274
+ Providers.OPENAI
275
+ );
276
+ });
277
+
219
278
  it('collects streamed text when model supports stream()', async () => {
220
279
  captureEvents();
221
280
 
@@ -10,7 +10,13 @@ import type { AgentContext } from '@/agents/AgentContext';
10
10
  import type { HookRegistry } from '@/hooks';
11
11
  import type { OnChunk } from '@/llm/invoke';
12
12
  import type * as t from '@/types';
13
- import { ContentTypes, GraphEvents, StepTypes, Providers } from '@/common';
13
+ import {
14
+ Constants,
15
+ ContentTypes,
16
+ GraphEvents,
17
+ StepTypes,
18
+ Providers,
19
+ } from '@/common';
14
20
  import { safeDispatchCustomEvent, emitAgentLog } from '@/utils/events';
15
21
  import { attemptInvoke, tryFallbackProviders } from '@/llm/invoke';
16
22
  import { createRemoveAllMessage } from '@/messages/reducer';
@@ -938,6 +944,19 @@ export function createSummarizeNode({
938
944
  agent_id: request.agentId,
939
945
  summarization_provider: clientConfig.provider,
940
946
  summarization_model: clientConfig.modelName,
947
+ /**
948
+ * Per-call model attribution for usage consumers (the subagent
949
+ * usage-capture handler): the summarizer's model can differ from
950
+ * the agent's primary, and providers that emit no `ls_model_name`
951
+ * would otherwise be billed against the primary config's model.
952
+ * Omitted for self-summarize (no explicit model — the primary
953
+ * config fallback is then correct). `tryFallbackProviders`
954
+ * overrides this per fallback attempt; `INVOKED_PROVIDER` is
955
+ * stamped by `attemptInvoke` itself.
956
+ */
957
+ ...(clientConfig.modelName != null && clientConfig.modelName !== ''
958
+ ? { [Constants.INVOKED_MODEL]: clientConfig.modelName }
959
+ : {}),
941
960
  },
942
961
  }
943
962
  : undefined;