@librechat/agents 3.2.33 → 3.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +47 -10
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +121 -3
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/llm/bedrock/index.cjs +21 -2
  8. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +38 -2
  10. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  11. package/dist/cjs/llm/google/utils/common.cjs +6 -0
  12. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  13. package/dist/cjs/llm/invoke.cjs +49 -8
  14. package/dist/cjs/llm/invoke.cjs.map +1 -1
  15. package/dist/cjs/llm/openai/index.cjs +48 -1
  16. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  17. package/dist/cjs/llm/vertexai/index.cjs +19 -0
  18. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  19. package/dist/cjs/main.cjs +2 -0
  20. package/dist/cjs/messages/content.cjs +12 -14
  21. package/dist/cjs/messages/content.cjs.map +1 -1
  22. package/dist/cjs/messages/prune.cjs +31 -13
  23. package/dist/cjs/messages/prune.cjs.map +1 -1
  24. package/dist/cjs/run.cjs +7 -2
  25. package/dist/cjs/run.cjs.map +1 -1
  26. package/dist/cjs/stream.cjs +20 -2
  27. package/dist/cjs/stream.cjs.map +1 -1
  28. package/dist/cjs/summarization/node.cjs +12 -1
  29. package/dist/cjs/summarization/node.cjs.map +1 -1
  30. package/dist/cjs/tools/ToolNode.cjs +41 -4
  31. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  32. package/dist/cjs/tools/streamedToolCallSeals.cjs +30 -1
  33. package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -1
  34. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
  35. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  36. package/dist/cjs/utils/tokens.cjs +30 -0
  37. package/dist/cjs/utils/tokens.cjs.map +1 -1
  38. package/dist/esm/agents/AgentContext.mjs +47 -10
  39. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  40. package/dist/esm/common/enum.mjs +13 -0
  41. package/dist/esm/common/enum.mjs.map +1 -1
  42. package/dist/esm/graphs/Graph.mjs +122 -4
  43. package/dist/esm/graphs/Graph.mjs.map +1 -1
  44. package/dist/esm/llm/bedrock/index.mjs +22 -3
  45. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  46. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +38 -3
  47. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  48. package/dist/esm/llm/google/utils/common.mjs +6 -0
  49. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  50. package/dist/esm/llm/invoke.mjs +49 -8
  51. package/dist/esm/llm/invoke.mjs.map +1 -1
  52. package/dist/esm/llm/openai/index.mjs +48 -1
  53. package/dist/esm/llm/openai/index.mjs.map +1 -1
  54. package/dist/esm/llm/vertexai/index.mjs +19 -0
  55. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  56. package/dist/esm/main.mjs +3 -3
  57. package/dist/esm/messages/content.mjs +12 -15
  58. package/dist/esm/messages/content.mjs.map +1 -1
  59. package/dist/esm/messages/prune.mjs +31 -13
  60. package/dist/esm/messages/prune.mjs.map +1 -1
  61. package/dist/esm/run.mjs +7 -2
  62. package/dist/esm/run.mjs.map +1 -1
  63. package/dist/esm/stream.mjs +21 -3
  64. package/dist/esm/stream.mjs.map +1 -1
  65. package/dist/esm/summarization/node.mjs +12 -1
  66. package/dist/esm/summarization/node.mjs.map +1 -1
  67. package/dist/esm/tools/ToolNode.mjs +41 -4
  68. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  69. package/dist/esm/tools/streamedToolCallSeals.mjs +25 -2
  70. package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -1
  71. package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
  72. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  73. package/dist/esm/utils/tokens.mjs +30 -1
  74. package/dist/esm/utils/tokens.mjs.map +1 -1
  75. package/dist/types/agents/AgentContext.d.ts +7 -3
  76. package/dist/types/common/enum.d.ts +13 -0
  77. package/dist/types/graphs/Graph.d.ts +8 -1
  78. package/dist/types/llm/bedrock/utils/index.d.ts +1 -1
  79. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +9 -0
  80. package/dist/types/llm/invoke.d.ts +1 -1
  81. package/dist/types/llm/vertexai/index.d.ts +10 -0
  82. package/dist/types/messages/content.d.ts +5 -0
  83. package/dist/types/messages/prune.d.ts +4 -0
  84. package/dist/types/run.d.ts +1 -0
  85. package/dist/types/tools/ToolNode.d.ts +8 -0
  86. package/dist/types/tools/streamedToolCallSeals.d.ts +5 -1
  87. package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
  88. package/dist/types/types/graph.d.ts +89 -3
  89. package/dist/types/types/run.d.ts +13 -0
  90. package/dist/types/types/tools.d.ts +10 -0
  91. package/dist/types/utils/tokens.d.ts +7 -0
  92. package/package.json +1 -1
  93. package/src/__tests__/stream.eagerEventExecution.test.ts +703 -0
  94. package/src/agents/AgentContext.ts +69 -6
  95. package/src/agents/__tests__/AgentContext.test.ts +6 -2
  96. package/src/common/enum.ts +13 -0
  97. package/src/graphs/Graph.ts +196 -0
  98. package/src/llm/bedrock/index.ts +40 -0
  99. package/src/llm/bedrock/streamSealDispatch.test.ts +158 -0
  100. package/src/llm/bedrock/utils/index.ts +1 -0
  101. package/src/llm/bedrock/utils/message_outputs.test.ts +85 -0
  102. package/src/llm/bedrock/utils/message_outputs.ts +43 -0
  103. package/src/llm/google/utils/common.test.ts +64 -0
  104. package/src/llm/google/utils/common.ts +18 -0
  105. package/src/llm/invoke.test.ts +79 -1
  106. package/src/llm/invoke.ts +58 -4
  107. package/src/llm/openai/index.ts +95 -1
  108. package/src/llm/openai/sequentialToolCallSeals.test.ts +199 -0
  109. package/src/llm/vertexai/index.ts +31 -0
  110. package/src/llm/vertexai/sealStreamedToolCalls.test.ts +88 -0
  111. package/src/llm/vertexai/streamSealDispatch.test.ts +148 -0
  112. package/src/messages/content.ts +24 -32
  113. package/src/messages/prune.ts +39 -2
  114. package/src/run.ts +5 -0
  115. package/src/scripts/subagent-usage-sink.ts +176 -0
  116. package/src/specs/context-accuracy.live.test.ts +409 -0
  117. package/src/specs/context-usage-event.test.ts +117 -0
  118. package/src/specs/context-usage.live.test.ts +297 -0
  119. package/src/specs/prune.test.ts +51 -1
  120. package/src/specs/subagent.test.ts +124 -1
  121. package/src/stream.ts +40 -6
  122. package/src/summarization/__tests__/node.test.ts +60 -1
  123. package/src/summarization/node.ts +20 -1
  124. package/src/tools/ToolNode.ts +85 -3
  125. package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
  126. package/src/tools/__tests__/ToolNode.onResultCompletion.test.ts +368 -0
  127. package/src/tools/streamedToolCallSeals.ts +37 -9
  128. package/src/tools/subagent/SubagentExecutor.ts +221 -3
  129. package/src/types/graph.ts +94 -1
  130. package/src/types/run.ts +13 -0
  131. package/src/types/tools.ts +10 -0
  132. package/src/utils/__tests__/apportion.test.ts +32 -0
  133. package/src/utils/tokens.ts +33 -0
@@ -2476,6 +2476,49 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
2476
2476
  }
2477
2477
  }
2478
2478
 
2479
+ /**
2480
+ * Per-call completion fast-path: when the host reports a result
2481
+ * through `onResult` before the batch resolves, emit that call's
2482
+ * completed run step immediately instead of waiting for the slowest
2483
+ * call in the batch. Safe only when nothing can change the result
2484
+ * after execution — post-tool hooks may rewrite output and HITL may
2485
+ * deny a call, so those configurations keep batch-time emission.
2486
+ * Ids are claimed synchronously before the async dispatch and
2487
+ * released if the dispatch fails, letting the batch path re-emit.
2488
+ */
2489
+ const canEmitEarlyCompletions =
2490
+ this.hookRegistry == null && this.humanInTheLoop?.enabled !== true;
2491
+ const earlyCompletionDispatchedIds = new Set<string>();
2492
+ const earlyCompletionDispatches: Array<Promise<void>> = [];
2493
+ const dispatchRequestById = new Map(
2494
+ dispatchRequests.map((request) => [request.id, request])
2495
+ );
2496
+ const onResult = (result: t.ToolExecuteResult): void => {
2497
+ const request =
2498
+ result.toolCallId != null
2499
+ ? dispatchRequestById.get(result.toolCallId)
2500
+ : undefined;
2501
+ if (
2502
+ request == null ||
2503
+ earlyCompletionDispatchedIds.has(result.toolCallId)
2504
+ ) {
2505
+ return;
2506
+ }
2507
+ earlyCompletionDispatchedIds.add(result.toolCallId);
2508
+ earlyCompletionDispatches.push(
2509
+ this.dispatchEarlyToolCompletion(result, request, config).then(
2510
+ (dispatched) => {
2511
+ if (!dispatched) {
2512
+ earlyCompletionDispatchedIds.delete(result.toolCallId);
2513
+ }
2514
+ },
2515
+ () => {
2516
+ earlyCompletionDispatchedIds.delete(result.toolCallId);
2517
+ }
2518
+ )
2519
+ );
2520
+ };
2521
+
2479
2522
  const dispatchPromise =
2480
2523
  dispatchRequests.length === 0
2481
2524
  ? Promise.resolve([] as t.ToolExecuteResult[])
@@ -2506,6 +2549,7 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
2506
2549
  maybeResolve();
2507
2550
  },
2508
2551
  reject,
2552
+ ...(canEmitEarlyCompletions && { onResult }),
2509
2553
  };
2510
2554
 
2511
2555
  void safeDispatchCustomEvent(
@@ -2540,6 +2584,9 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
2540
2584
  eagerResultsPromise,
2541
2585
  dispatchPromise,
2542
2586
  ]);
2587
+ // Settle in-flight early completion dispatches before the batch loop
2588
+ // below decides which completions still need emitting.
2589
+ await Promise.allSettled(earlyCompletionDispatches);
2543
2590
  const eagerCompletionDispatchedIds = new Set(
2544
2591
  eagerResults
2545
2592
  .filter((result) => result.completionDispatched)
@@ -2728,7 +2775,10 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
2728
2775
  });
2729
2776
  }
2730
2777
 
2731
- if (!eagerCompletionDispatchedIds.has(result.toolCallId)) {
2778
+ if (
2779
+ !eagerCompletionDispatchedIds.has(result.toolCallId) &&
2780
+ !earlyCompletionDispatchedIds.has(result.toolCallId)
2781
+ ) {
2732
2782
  await this.dispatchStepCompleted(
2733
2783
  result.toolCallId,
2734
2784
  toolName,
@@ -2946,7 +2996,7 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
2946
2996
  output: string,
2947
2997
  config: RunnableConfig,
2948
2998
  turn?: number
2949
- ): Promise<void> {
2999
+ ): Promise<boolean> {
2950
3000
  const stepId = this.toolCallStepIds?.get(toolCallId) ?? '';
2951
3001
  if (!stepId) {
2952
3002
  // eslint-disable-next-line no-console
@@ -2957,7 +3007,7 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
2957
3007
  );
2958
3008
  }
2959
3009
 
2960
- await safeDispatchCustomEvent(
3010
+ const dispatched = await safeDispatchCustomEvent(
2961
3011
  GraphEvents.ON_RUN_STEP_COMPLETED,
2962
3012
  {
2963
3013
  result: {
@@ -2975,6 +3025,38 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
2975
3025
  },
2976
3026
  config
2977
3027
  );
3028
+ return dispatched !== false;
3029
+ }
3030
+
3031
+ /**
3032
+ * Emits the completed run step for a single host-reported result before
3033
+ * the batch resolves. Mirrors the batch loop's output formatting exactly;
3034
+ * callers gate on the no-hooks/no-HITL configuration, so the raw result
3035
+ * content here is also the final content. Returns whether the event was
3036
+ * actually dispatched so the caller can fall back to batch-time emission.
3037
+ */
3038
+ private async dispatchEarlyToolCompletion(
3039
+ result: t.ToolExecuteResult,
3040
+ request: t.ToolCallRequest,
3041
+ config: RunnableConfig
3042
+ ): Promise<boolean> {
3043
+ const output =
3044
+ result.status === 'error'
3045
+ ? `Error: ${result.errorMessage ?? 'Unknown error'}\n Please fix your mistakes.`
3046
+ : truncateToolResultContent(
3047
+ typeof result.content === 'string'
3048
+ ? result.content
3049
+ : JSON.stringify(result.content),
3050
+ this.maxToolResultChars
3051
+ );
3052
+ return this.dispatchStepCompleted(
3053
+ result.toolCallId,
3054
+ request.name,
3055
+ request.args,
3056
+ output,
3057
+ config,
3058
+ request.turn
3059
+ );
2978
3060
  }
2979
3061
 
2980
3062
  /**
@@ -4,7 +4,9 @@ import type { BaseMessage } from '@langchain/core/messages';
4
4
  import type {
5
5
  AgentInputs,
6
6
  ResolvedSubagentConfig,
7
+ StandardGraphInput,
7
8
  SubagentUpdateEvent,
9
+ SubagentUsageEvent,
8
10
  ToolExecuteBatchRequest,
9
11
  ToolExecuteResult,
10
12
  } from '@/types';
@@ -17,7 +19,7 @@ import {
17
19
  summarizeEvent,
18
20
  } from '../subagent';
19
21
  import { sanitizeForwardedSubagentUpdateData } from '../subagent/SubagentExecutor';
20
- import { Providers, GraphEvents, StepTypes } from '@/common';
22
+ import { Constants, Providers, GraphEvents, StepTypes } from '@/common';
21
23
  import { AgentContext } from '@/agents/AgentContext';
22
24
  import { HookRegistry } from '@/hooks/HookRegistry';
23
25
  import { HandlerRegistry } from '@/events';
@@ -456,6 +458,446 @@ describe('SubagentExecutor', () => {
456
458
  expect(observedLangfuse).toBe(langfuse);
457
459
  });
458
460
 
461
+ describe('usage sink', () => {
462
+ type CapturedCallbackHandler = {
463
+ handleChatModelStart?: (
464
+ llm: unknown,
465
+ messages: unknown,
466
+ runId: string,
467
+ parentRunId?: string,
468
+ extraParams?: Record<string, unknown>,
469
+ tags?: string[],
470
+ metadata?: Record<string, unknown>
471
+ ) => unknown;
472
+ handleLLMEnd?: (output: unknown, runId: string) => unknown;
473
+ handleLLMError?: (err: unknown, runId: string) => unknown;
474
+ };
475
+ type CapturedInvokeOptions = { callbacks?: CapturedCallbackHandler[] };
476
+
477
+ /**
478
+ * Stub factory that records the `StandardGraphInput` the executor
479
+ * builds and the options passed to `workflow.invoke`, so tests can
480
+ * drive the attached usage-capture callback directly (the stubbed
481
+ * invoke never makes real model calls, so callbacks would otherwise
482
+ * never fire).
483
+ */
484
+ function makeCapturingGraphFactory(driveDuringInvoke?: {
485
+ drive: (handler: CapturedCallbackHandler) => void | Promise<void>;
486
+ }): {
487
+ factory: (input: StandardGraphInput) => StandardGraph;
488
+ getInput: () => StandardGraphInput | undefined;
489
+ getInvokeOptions: () => CapturedInvokeOptions | undefined;
490
+ } {
491
+ let capturedInput: StandardGraphInput | undefined;
492
+ let capturedOptions: CapturedInvokeOptions | undefined;
493
+ const factory = (input: StandardGraphInput): StandardGraph => {
494
+ capturedInput = input;
495
+ return {
496
+ createWorkflow: (): { invoke: jest.Mock } => ({
497
+ invoke: jest
498
+ .fn()
499
+ .mockImplementation(
500
+ async (_input: unknown, options: CapturedInvokeOptions) => {
501
+ capturedOptions = options;
502
+ const usageHandler = options.callbacks?.find(
503
+ (cb) => cb.handleLLMEnd != null
504
+ );
505
+ if (driveDuringInvoke && usageHandler) {
506
+ await driveDuringInvoke.drive(usageHandler);
507
+ }
508
+ return { messages: [new AIMessage('child done')] };
509
+ }
510
+ ),
511
+ }),
512
+ clearHeavyState: jest.fn(),
513
+ } as unknown as StandardGraph;
514
+ };
515
+ return {
516
+ factory,
517
+ getInput: () => capturedInput,
518
+ getInvokeOptions: () => capturedOptions,
519
+ };
520
+ }
521
+
522
+ const makeChoice = (
523
+ usage: Record<string, number> | undefined
524
+ ): unknown => ({
525
+ text: 'ok',
526
+ message: new AIMessage({
527
+ content: 'ok',
528
+ ...(usage
529
+ ? {
530
+ usage_metadata: usage as unknown as AIMessage['usage_metadata'],
531
+ }
532
+ : {}),
533
+ }),
534
+ });
535
+
536
+ const makeLLMEndOutput = (
537
+ usage: Record<string, number> | undefined
538
+ ): unknown => ({
539
+ generations: [[makeChoice(usage)]],
540
+ });
541
+
542
+ it('forwards a wrapped sink into the child graph input that rewrites runId to the root run', async () => {
543
+ const events: SubagentUsageEvent[] = [];
544
+ const { factory, getInput } = makeCapturingGraphFactory();
545
+ const executor = createExecutor({
546
+ usageSink: (event) => {
547
+ events.push(event);
548
+ },
549
+ createChildGraph: factory,
550
+ });
551
+
552
+ await executor.execute({
553
+ description: 'Research this topic',
554
+ subagentType: 'researcher',
555
+ });
556
+
557
+ const forwarded = getInput()?.subagentUsageSink;
558
+ expect(typeof forwarded).toBe('function');
559
+ /**
560
+ * Simulate a NESTED child's emission: its executor stamps `runId`
561
+ * with its own parent (an intermediate `*_sub_*` id). The wrapper
562
+ * must rewrite it to THIS executor's parent run so the host always
563
+ * sees root-run attribution, while the emitting child's identity
564
+ * (`subagentRunId`) is preserved.
565
+ */
566
+ forwarded?.({
567
+ usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
568
+ model: 'gpt-4o-mini',
569
+ provider: Providers.OPENAI,
570
+ subagentType: 'nested-grandchild',
571
+ subagentRunId: 'test-run_sub_a_sub_b',
572
+ subagentAgentId: 'grandchild',
573
+ runId: 'test-run_sub_a',
574
+ });
575
+
576
+ expect(events).toHaveLength(1);
577
+ expect(events[0].runId).toBe('test-run');
578
+ expect(events[0].subagentRunId).toBe('test-run_sub_a_sub_b');
579
+ expect(events[0].subagentType).toBe('nested-grandchild');
580
+ });
581
+
582
+ it('does not attach a capture callback when no sink is provided', async () => {
583
+ const { factory, getInvokeOptions } = makeCapturingGraphFactory();
584
+ const executor = createExecutor({ createChildGraph: factory });
585
+
586
+ await executor.execute({
587
+ description: 'Research this topic',
588
+ subagentType: 'researcher',
589
+ });
590
+
591
+ expect(getInvokeOptions()?.callbacks).toEqual([]);
592
+ });
593
+
594
+ it('emits tagged usage events with per-call ls_model_name', async () => {
595
+ const events: SubagentUsageEvent[] = [];
596
+ const { factory } = makeCapturingGraphFactory({
597
+ drive: async (handler) => {
598
+ await handler.handleChatModelStart?.(
599
+ {},
600
+ [[]],
601
+ 'call-1',
602
+ undefined,
603
+ undefined,
604
+ undefined,
605
+ { ls_model_name: 'gpt-4o-mini-2024-07-18' }
606
+ );
607
+ await handler.handleLLMEnd?.(
608
+ makeLLMEndOutput({
609
+ input_tokens: 11,
610
+ output_tokens: 7,
611
+ total_tokens: 18,
612
+ }),
613
+ 'call-1'
614
+ );
615
+ },
616
+ });
617
+ const executor = createExecutor({
618
+ usageSink: (event) => {
619
+ events.push(event);
620
+ },
621
+ createChildGraph: factory,
622
+ });
623
+
624
+ await executor.execute({
625
+ description: 'Research this topic',
626
+ subagentType: 'researcher',
627
+ });
628
+
629
+ expect(events).toHaveLength(1);
630
+ const event = events[0];
631
+ expect(event.usage).toEqual({
632
+ input_tokens: 11,
633
+ output_tokens: 7,
634
+ total_tokens: 18,
635
+ });
636
+ expect(event.model).toBe('gpt-4o-mini-2024-07-18');
637
+ expect(event.provider).toBe(Providers.OPENAI);
638
+ expect(event.subagentType).toBe('researcher');
639
+ expect(event.subagentAgentId).toBe('child-agent');
640
+ expect(event.subagentRunId).toContain('test-run_sub_');
641
+ expect(event.runId).toBe('test-run');
642
+ });
643
+
644
+ it('falls back to the configured model when a call has no ls_model_name', async () => {
645
+ const events: SubagentUsageEvent[] = [];
646
+ const { factory } = makeCapturingGraphFactory({
647
+ drive: async (handler) => {
648
+ await handler.handleLLMEnd?.(
649
+ makeLLMEndOutput({
650
+ input_tokens: 3,
651
+ output_tokens: 2,
652
+ total_tokens: 5,
653
+ }),
654
+ 'call-1'
655
+ );
656
+ },
657
+ });
658
+ const executor = createExecutor({
659
+ usageSink: (event) => {
660
+ events.push(event);
661
+ },
662
+ createChildGraph: factory,
663
+ });
664
+
665
+ await executor.execute({
666
+ description: 'Research this topic',
667
+ subagentType: 'researcher',
668
+ });
669
+
670
+ expect(events).toHaveLength(1);
671
+ /** `makeChildInputs` configures `clientOptions.modelName`. */
672
+ expect(events[0].model).toBe('gpt-4o-mini');
673
+ });
674
+
675
+ it('emits one event per generation group when a call has multiple completions (n > 1)', async () => {
676
+ const usage = { input_tokens: 10, output_tokens: 4, total_tokens: 14 };
677
+ const events: SubagentUsageEvent[] = [];
678
+ const { factory } = makeCapturingGraphFactory({
679
+ drive: async (handler) => {
680
+ /**
681
+ * One provider request with two choices — both carry the same
682
+ * request-level usage. Emitting per choice would double-bill.
683
+ */
684
+ await handler.handleLLMEnd?.(
685
+ { generations: [[makeChoice(usage), makeChoice(usage)]] },
686
+ 'call-1'
687
+ );
688
+ /** Batched prompts: two groups = two requests = two events. */
689
+ await handler.handleLLMEnd?.(
690
+ { generations: [[makeChoice(usage)], [makeChoice(usage)]] },
691
+ 'call-2'
692
+ );
693
+ },
694
+ });
695
+ const executor = createExecutor({
696
+ usageSink: (event) => {
697
+ events.push(event);
698
+ },
699
+ createChildGraph: factory,
700
+ });
701
+
702
+ await executor.execute({
703
+ description: 'Research this topic',
704
+ subagentType: 'researcher',
705
+ });
706
+
707
+ expect(events).toHaveLength(3);
708
+ });
709
+
710
+ it('prefers INVOKED_PROVIDER/INVOKED_MODEL metadata for fallback-served calls', async () => {
711
+ const events: SubagentUsageEvent[] = [];
712
+ const { factory } = makeCapturingGraphFactory({
713
+ drive: async (handler) => {
714
+ /**
715
+ * Mirror a fallback-served call: `attemptInvoke` stamps the
716
+ * serving provider, `tryFallbackProviders` stamps the fallback's
717
+ * configured model, and the provider reports no `ls_model_name`.
718
+ */
719
+ await handler.handleChatModelStart?.(
720
+ {},
721
+ [[]],
722
+ 'call-1',
723
+ undefined,
724
+ undefined,
725
+ undefined,
726
+ {
727
+ [Constants.INVOKED_PROVIDER]: Providers.ANTHROPIC,
728
+ [Constants.INVOKED_MODEL]: 'claude-fallback-1',
729
+ }
730
+ );
731
+ await handler.handleLLMEnd?.(
732
+ makeLLMEndOutput({
733
+ input_tokens: 5,
734
+ output_tokens: 3,
735
+ total_tokens: 8,
736
+ }),
737
+ 'call-1'
738
+ );
739
+ },
740
+ });
741
+ const executor = createExecutor({
742
+ usageSink: (event) => {
743
+ events.push(event);
744
+ },
745
+ createChildGraph: factory,
746
+ });
747
+
748
+ await executor.execute({
749
+ description: 'Research this topic',
750
+ subagentType: 'researcher',
751
+ });
752
+
753
+ expect(events).toHaveLength(1);
754
+ /** Not the configured primary (openAI / gpt-4o-mini). */
755
+ expect(events[0].provider).toBe(Providers.ANTHROPIC);
756
+ expect(events[0].model).toBe('claude-fallback-1');
757
+ });
758
+
759
+ it('prefers provider-reported ls_model_name over INVOKED_MODEL', async () => {
760
+ const events: SubagentUsageEvent[] = [];
761
+ const { factory } = makeCapturingGraphFactory({
762
+ drive: async (handler) => {
763
+ await handler.handleChatModelStart?.(
764
+ {},
765
+ [[]],
766
+ 'call-1',
767
+ undefined,
768
+ undefined,
769
+ undefined,
770
+ {
771
+ ls_model_name: 'claude-fallback-1-20260101',
772
+ [Constants.INVOKED_PROVIDER]: Providers.ANTHROPIC,
773
+ [Constants.INVOKED_MODEL]: 'claude-fallback-1',
774
+ }
775
+ );
776
+ await handler.handleLLMEnd?.(
777
+ makeLLMEndOutput({
778
+ input_tokens: 5,
779
+ output_tokens: 3,
780
+ total_tokens: 8,
781
+ }),
782
+ 'call-1'
783
+ );
784
+ },
785
+ });
786
+ const executor = createExecutor({
787
+ usageSink: (event) => {
788
+ events.push(event);
789
+ },
790
+ createChildGraph: factory,
791
+ });
792
+
793
+ await executor.execute({
794
+ description: 'Research this topic',
795
+ subagentType: 'researcher',
796
+ });
797
+
798
+ expect(events[0].model).toBe('claude-fallback-1-20260101');
799
+ });
800
+
801
+ it('skips model calls that report no usage_metadata', async () => {
802
+ const events: SubagentUsageEvent[] = [];
803
+ const { factory } = makeCapturingGraphFactory({
804
+ drive: async (handler) => {
805
+ await handler.handleLLMEnd?.(makeLLMEndOutput(undefined), 'call-1');
806
+ },
807
+ });
808
+ const executor = createExecutor({
809
+ usageSink: (event) => {
810
+ events.push(event);
811
+ },
812
+ createChildGraph: factory,
813
+ });
814
+
815
+ await executor.execute({
816
+ description: 'Research this topic',
817
+ subagentType: 'researcher',
818
+ });
819
+
820
+ expect(events).toEqual([]);
821
+ });
822
+
823
+ it('swallows sink errors without breaking the child run', async () => {
824
+ const { factory } = makeCapturingGraphFactory({
825
+ drive: async (handler) => {
826
+ await handler.handleLLMEnd?.(
827
+ makeLLMEndOutput({
828
+ input_tokens: 1,
829
+ output_tokens: 1,
830
+ total_tokens: 2,
831
+ }),
832
+ 'call-1'
833
+ );
834
+ },
835
+ });
836
+ const executor = createExecutor({
837
+ usageSink: () => {
838
+ throw new Error('host sink exploded');
839
+ },
840
+ createChildGraph: factory,
841
+ });
842
+
843
+ const result = await executor.execute({
844
+ description: 'Research this topic',
845
+ subagentType: 'researcher',
846
+ });
847
+
848
+ expect(result.content).toBe('child done');
849
+ });
850
+
851
+ it('awaits async sinks and swallows their rejections', async () => {
852
+ const settled: string[] = [];
853
+ const { factory } = makeCapturingGraphFactory({
854
+ drive: async (handler) => {
855
+ await handler.handleLLMEnd?.(
856
+ makeLLMEndOutput({
857
+ input_tokens: 1,
858
+ output_tokens: 1,
859
+ total_tokens: 2,
860
+ }),
861
+ 'call-1'
862
+ );
863
+ await handler.handleLLMEnd?.(
864
+ makeLLMEndOutput({
865
+ input_tokens: 2,
866
+ output_tokens: 2,
867
+ total_tokens: 4,
868
+ }),
869
+ 'call-2'
870
+ );
871
+ /**
872
+ * Both sink dispatches must have settled by the time
873
+ * `handleLLMEnd` resolves — a dropped promise would leave
874
+ * `recorded` missing here and surface the second call's
875
+ * rejection as unhandled.
876
+ */
877
+ settled.push('drive-done');
878
+ },
879
+ });
880
+ const executor = createExecutor({
881
+ usageSink: async (event) => {
882
+ await new Promise((resolve) => setTimeout(resolve, 5));
883
+ if (event.usage.input_tokens === 2) {
884
+ throw new Error('async host sink rejected');
885
+ }
886
+ settled.push('recorded');
887
+ },
888
+ createChildGraph: factory,
889
+ });
890
+
891
+ const result = await executor.execute({
892
+ description: 'Research this topic',
893
+ subagentType: 'researcher',
894
+ });
895
+
896
+ expect(result.content).toBe('child done');
897
+ expect(settled).toEqual(['recorded', 'drive-done']);
898
+ });
899
+ });
900
+
459
901
  it('returns error message when child graph throws', async () => {
460
902
  const executor = createExecutor({
461
903
  createChildGraph: makeThrowingGraphFactory(