@librechat/agents 3.1.56 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +2 -2
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -2,18 +2,26 @@
2
2
  import { config } from 'dotenv';
3
3
  config();
4
4
  import {
5
- HumanMessage,
6
5
  AIMessage,
7
- SystemMessage,
8
6
  BaseMessage,
9
7
  ToolMessage,
8
+ HumanMessage,
9
+ isBaseMessage,
10
+ SystemMessage,
11
+ AIMessageChunk,
10
12
  } from '@langchain/core/messages';
11
13
  import type { RunnableConfig } from '@langchain/core/runnables';
12
14
  import type { UsageMetadata } from '@langchain/core/messages';
13
15
  import type * as t from '@/types';
14
- import { createPruneMessages } from '@/messages/prune';
16
+ import {
17
+ getMessagesWithinTokenLimit as realGetMessagesWithinTokenLimit,
18
+ preFlightTruncateToolCallInputs,
19
+ repairOrphanedToolMessages,
20
+ sanitizeOrphanToolBlocks,
21
+ createPruneMessages,
22
+ } from '@/messages/prune';
15
23
  import { getLLMConfig } from '@/utils/llmConfig';
16
- import { Providers } from '@/common';
24
+ import { Providers, ContentTypes } from '@/common';
17
25
  import { Run } from '@/run';
18
26
 
19
27
  // Create a simple token counter for testing
@@ -42,6 +50,14 @@ const createTestTokenCounter = (): t.TokenCounter => {
42
50
  if ('text' in item && typeof item.text === 'string') {
43
51
  totalLength += item.text.length;
44
52
  }
53
+ // Count tool_use input fields (serialized args contribute to token count)
54
+ if ('input' in item && item.input != null) {
55
+ const input = item.input;
56
+ totalLength +=
57
+ typeof input === 'string'
58
+ ? input.length
59
+ : JSON.stringify(input).length;
60
+ }
45
61
  }
46
62
  }
47
63
 
@@ -410,6 +426,8 @@ describe('Prune Messages Tests', () => {
410
426
 
411
427
  expect(result.context.length).toBe(3);
412
428
  expect(result.context).toEqual(messages);
429
+ expect(result.messagesToRefine).toEqual([]);
430
+ expect(result.remainingContextTokens).toBeGreaterThan(0);
413
431
  });
414
432
 
415
433
  it('should prune messages when over token limit', () => {
@@ -436,6 +454,7 @@ describe('Prune Messages Tests', () => {
436
454
  startIndex: 0,
437
455
  tokenCounter,
438
456
  indexTokenCountMap,
457
+ reserveRatio: 0,
439
458
  });
440
459
 
441
460
  const result = pruneMessages({ messages });
@@ -445,6 +464,9 @@ describe('Prune Messages Tests', () => {
445
464
  expect(result.context[0]).toBe(messages[0]); // System message
446
465
  expect(result.context[1]).toBe(messages[3]); // Message 2
447
466
  expect(result.context[2]).toBe(messages[4]); // Response 2
467
+ expect(Array.isArray(result.messagesToRefine)).toBe(true);
468
+ expect(result.messagesToRefine?.length).toBe(2);
469
+ expect(typeof result.remainingContextTokens).toBe('number');
448
470
  });
449
471
 
450
472
  it('should respect startType parameter', () => {
@@ -520,26 +542,229 @@ describe('Prune Messages Tests', () => {
520
542
  usageMetadata,
521
543
  });
522
544
 
523
- // The function should have updated the indexTokenCountMap based on the usage metadata
524
- expect(result.indexTokenCountMap).not.toEqual(indexTokenCountMap);
525
-
526
- // The total of all values in indexTokenCountMap should equal the total_tokens from usageMetadata
527
- const totalTokens = Object.values(result.indexTokenCountMap).reduce(
545
+ // Map stays in raw tiktoken space calibrationRatio captures the multiplier.
546
+ // rawSum * calibrationRatio should approximate input_tokens (50).
547
+ const rawSum = Object.values(result.indexTokenCountMap).reduce(
528
548
  (a = 0, b = 0) => a + b,
529
549
  0
550
+ ) as number;
551
+ const calibratedEstimate = Math.round(
552
+ rawSum * (result.calibrationRatio ?? 1)
530
553
  );
531
- expect(totalTokens).toBe(75);
554
+ expect(Math.abs(calibratedEstimate - 50)).toBeLessThanOrEqual(3);
532
555
  });
533
556
  });
534
557
 
535
558
  describe('Tool Message Handling', () => {
559
+ it('should drop orphan tool messages that no longer have matching AI tool calls', () => {
560
+ const tokenCounter = createTestTokenCounter();
561
+ const context = [
562
+ new SystemMessage('System instruction'),
563
+ new ToolMessage({
564
+ content: 'Orphan result',
565
+ tool_call_id: 'tool-orphan',
566
+ }),
567
+ new AIMessage({
568
+ content: [
569
+ { type: 'text', text: 'I will call a tool now' },
570
+ {
571
+ type: 'tool_use',
572
+ id: 'tool-valid',
573
+ name: 'read_file',
574
+ input: '{"path":"README.md"}',
575
+ },
576
+ ],
577
+ }),
578
+ new ToolMessage({
579
+ content: 'Valid result',
580
+ tool_call_id: 'tool-valid',
581
+ }),
582
+ ];
583
+
584
+ const indexTokenCountMap = {
585
+ 0: tokenCounter(context[0]),
586
+ 1: tokenCounter(context[1]),
587
+ 2: tokenCounter(context[2]),
588
+ 3: tokenCounter(context[3]),
589
+ };
590
+
591
+ const repaired = repairOrphanedToolMessages({
592
+ context,
593
+ allMessages: context,
594
+ tokenCounter,
595
+ indexTokenCountMap,
596
+ });
597
+
598
+ expect(repaired.context).toHaveLength(3);
599
+ expect(repaired.context[0]).toBe(context[0]);
600
+ expect(repaired.context[1]).toBe(context[2]);
601
+ expect(repaired.context[2]).toBe(context[3]);
602
+ expect(repaired.droppedOrphanCount).toBe(1);
603
+ expect(repaired.reclaimedTokens).toBe(indexTokenCountMap[1]);
604
+ });
605
+
606
+ it('should strip orphan tool_use blocks from AI messages when ToolMessages are not in context', () => {
607
+ const tokenCounter = createTestTokenCounter();
608
+ const context = [
609
+ new HumanMessage('Show me something cool'),
610
+ new AIMessage({
611
+ content: [
612
+ { type: 'text', text: 'Let me create an animation.' },
613
+ {
614
+ type: 'tool_use',
615
+ id: 'tool-navigate',
616
+ name: 'navigate_page',
617
+ input: '{"url":"about:blank"}',
618
+ },
619
+ {
620
+ type: 'tool_use',
621
+ id: 'tool-script',
622
+ name: 'evaluate_script',
623
+ input: '{"function":"' + 'x'.repeat(3000) + '"}',
624
+ },
625
+ ],
626
+ tool_calls: [
627
+ {
628
+ id: 'tool-navigate',
629
+ name: 'navigate_page',
630
+ args: { url: 'about:blank' },
631
+ },
632
+ {
633
+ id: 'tool-script',
634
+ name: 'evaluate_script',
635
+ args: { fn: 'x'.repeat(3000) },
636
+ },
637
+ ],
638
+ }),
639
+ // ToolMessages for both tool calls are NOT in context (pruned)
640
+ ];
641
+
642
+ const indexTokenCountMap = {
643
+ 0: tokenCounter(context[0]),
644
+ 1: tokenCounter(context[1]),
645
+ };
646
+
647
+ const repaired = repairOrphanedToolMessages({
648
+ context,
649
+ allMessages: context,
650
+ tokenCounter,
651
+ indexTokenCountMap,
652
+ });
653
+
654
+ // AI message should survive but with tool_use blocks stripped
655
+ expect(repaired.context).toHaveLength(2);
656
+ const repairedAI = repaired.context[1] as AIMessage;
657
+ expect(repairedAI.getType()).toBe('ai');
658
+
659
+ // Should only have the text block, no tool_use blocks
660
+ const content = repairedAI.content as Array<{ type: string }>;
661
+ expect(content).toHaveLength(1);
662
+ expect(content[0].type).toBe('text');
663
+
664
+ // tool_calls should be empty
665
+ expect(repairedAI.tool_calls ?? []).toHaveLength(0);
666
+
667
+ // Token savings from stripping the large tool_use blocks
668
+ expect(repaired.reclaimedTokens).toBeGreaterThan(0);
669
+ });
670
+
671
+ it('should drop AI message entirely when it has only tool_use blocks with no text', () => {
672
+ const tokenCounter = createTestTokenCounter();
673
+ const context = [
674
+ new HumanMessage('Do something'),
675
+ new AIMessage({
676
+ content: [
677
+ {
678
+ type: 'tool_use',
679
+ id: 'tool-only',
680
+ name: 'some_tool',
681
+ input: '{"query":"test"}',
682
+ },
683
+ ],
684
+ tool_calls: [
685
+ { id: 'tool-only', name: 'some_tool', args: { query: 'test' } },
686
+ ],
687
+ }),
688
+ ];
689
+
690
+ const indexTokenCountMap = {
691
+ 0: tokenCounter(context[0]),
692
+ 1: tokenCounter(context[1]),
693
+ };
694
+
695
+ const repaired = repairOrphanedToolMessages({
696
+ context,
697
+ allMessages: context,
698
+ tokenCounter,
699
+ indexTokenCountMap,
700
+ });
701
+
702
+ // AI message should be dropped since it had only tool_use blocks
703
+ expect(repaired.context).toHaveLength(1);
704
+ expect(repaired.context[0].getType()).toBe('human');
705
+ expect(repaired.droppedOrphanCount).toBe(1);
706
+ });
707
+
708
+ it('should keep tool_use blocks when their ToolMessages ARE in context', () => {
709
+ const tokenCounter = createTestTokenCounter();
710
+ const context = [
711
+ new HumanMessage('Do something'),
712
+ new AIMessage({
713
+ content: [
714
+ { type: 'text', text: 'Calling tool' },
715
+ {
716
+ type: 'tool_use',
717
+ id: 'tool-present',
718
+ name: 'read_file',
719
+ input: '{"path":"test.txt"}',
720
+ },
721
+ ],
722
+ tool_calls: [
723
+ {
724
+ id: 'tool-present',
725
+ name: 'read_file',
726
+ args: { path: 'test.txt' },
727
+ },
728
+ ],
729
+ }),
730
+ new ToolMessage({
731
+ content: 'File contents here',
732
+ tool_call_id: 'tool-present',
733
+ }),
734
+ ];
735
+
736
+ const indexTokenCountMap = {
737
+ 0: tokenCounter(context[0]),
738
+ 1: tokenCounter(context[1]),
739
+ 2: tokenCounter(context[2]),
740
+ };
741
+
742
+ const repaired = repairOrphanedToolMessages({
743
+ context,
744
+ allMessages: context,
745
+ tokenCounter,
746
+ indexTokenCountMap,
747
+ });
748
+
749
+ // Nothing should change — all tool_use blocks have matching ToolMessages
750
+ expect(repaired.context).toHaveLength(3);
751
+ expect(repaired.reclaimedTokens).toBe(0);
752
+ expect(repaired.droppedOrphanCount).toBe(0);
753
+ });
754
+
536
755
  it('should ensure context does not start with a tool message by finding an AI message', () => {
537
756
  const tokenCounter = createTestTokenCounter();
538
757
  const messages = [
539
758
  new SystemMessage('System instruction'),
540
- new AIMessage('AI message 1'),
759
+ new AIMessage({
760
+ content: [{ type: 'text', text: 'AI msg 1' }],
761
+ tool_calls: [{ id: 'tool1', name: 'read_file', args: { p: '1' } }],
762
+ }),
541
763
  new ToolMessage({ content: 'Tool result 1', tool_call_id: 'tool1' }),
542
- new AIMessage('AI message 2'),
764
+ new AIMessage({
765
+ content: [{ type: 'text', text: 'AI msg 2' }],
766
+ tool_calls: [{ id: 'tool2', name: 'read_file', args: { p: '2' } }],
767
+ }),
543
768
  new ToolMessage({ content: 'Tool result 2', tool_call_id: 'tool2' }),
544
769
  ];
545
770
 
@@ -562,10 +787,10 @@ describe('Prune Messages Tests', () => {
562
787
  const result = pruneMessages({ messages });
563
788
 
564
789
  // The context should include the system message, AI message 2, and Tool result 2
565
- // It should NOT start with Tool result 2 alone
790
+ // AI message 1 + Tool result 1 are pruned. Tool result 1 is orphaned (AI 1 pruned).
566
791
  expect(result.context.length).toBe(3);
567
792
  expect(result.context[0]).toBe(messages[0]); // System message
568
- expect(result.context[1]).toBe(messages[3]); // AI message 2
793
+ expect(result.context[1].getType()).toBe('ai'); // AI message 2
569
794
  expect(result.context[2]).toBe(messages[4]); // Tool result 2
570
795
  });
571
796
 
@@ -577,6 +802,7 @@ describe('Prune Messages Tests', () => {
577
802
  new AIMessage('AI message 1'),
578
803
  new ToolMessage({ content: 'Tool result 1', tool_call_id: 'tool1' }),
579
804
  new HumanMessage('Human message 2'),
805
+ // Tool result 2 has no parent AI tool_call — this is an orphan
580
806
  new ToolMessage({ content: 'Tool result 2', tool_call_id: 'tool2' }),
581
807
  ];
582
808
 
@@ -595,16 +821,16 @@ describe('Prune Messages Tests', () => {
595
821
  startIndex: 0,
596
822
  tokenCounter,
597
823
  indexTokenCountMap: { ...indexTokenCountMap },
824
+ reserveRatio: 0,
598
825
  });
599
826
 
600
827
  const result = pruneMessages({ messages });
601
828
 
602
- // The context should include the system message, Human message 2, and Tool result 2
603
- // It should NOT start with Tool result 2 alone
604
- expect(result.context.length).toBe(3);
829
+ // Tool result 2 is an orphan (no AI message with tool_call_id 'tool2' in context)
830
+ // so it gets dropped. Context is system + human message 2.
831
+ expect(result.context.length).toBe(2);
605
832
  expect(result.context[0]).toBe(messages[0]); // System message
606
833
  expect(result.context[1]).toBe(messages[4]); // Human message 2
607
- expect(result.context[2]).toBe(messages[5]); // Tool result 2
608
834
  });
609
835
 
610
836
  it('should handle the case where a tool message is followed by an AI message', () => {
@@ -676,11 +902,20 @@ describe('Prune Messages Tests', () => {
676
902
  const messages = [
677
903
  new SystemMessage('System instruction'),
678
904
  new HumanMessage('Human message 1'),
679
- new AIMessage('AI message 1 with tool use'),
905
+ new AIMessage({
906
+ content: [{ type: 'text', text: 'AI message 1' }],
907
+ tool_calls: [{ id: 'tool1', name: 'read_file', args: { path: 'a' } }],
908
+ }),
680
909
  new ToolMessage({ content: 'Tool result 1', tool_call_id: 'tool1' }),
681
- new AIMessage('AI message 2 with tool use'),
910
+ new AIMessage({
911
+ content: [{ type: 'text', text: 'AI message 2' }],
912
+ tool_calls: [{ id: 'tool2', name: 'read_file', args: { path: 'b' } }],
913
+ }),
682
914
  new ToolMessage({ content: 'Tool result 2', tool_call_id: 'tool2' }),
683
- new AIMessage('AI message 3 with tool use'),
915
+ new AIMessage({
916
+ content: [{ type: 'text', text: 'AI message 3' }],
917
+ tool_calls: [{ id: 'tool3', name: 'read_file', args: { path: 'c' } }],
918
+ }),
684
919
  new ToolMessage({ content: 'Tool result 3', tool_call_id: 'tool3' }),
685
920
  ];
686
921
 
@@ -704,15 +939,557 @@ describe('Prune Messages Tests', () => {
704
939
 
705
940
  const result = pruneMessages({ messages });
706
941
 
942
+ // AI messages 2 & 3 with their ToolMessages fit; AI1+Tool1 pruned; Tool1 is orphan (AI1 pruned)
707
943
  expect(result.context.length).toBe(5);
708
944
  expect(result.context[0]).toBe(messages[0]); // System message
709
- expect(result.context[1]).toBe(messages[4]); // AI message 2 with tool use
945
+ expect(result.context[1].getType()).toBe('ai'); // AI message 2
710
946
  expect(result.context[2]).toBe(messages[5]); // Tool result 2
711
- expect(result.context[3]).toBe(messages[6]); // AI message 3 with tool use
947
+ expect(result.context[3].getType()).toBe('ai'); // AI message 3
712
948
  expect(result.context[4]).toBe(messages[7]); // Tool result 3
713
949
  });
714
950
  });
715
951
 
952
+ describe('preFlightTruncateToolCallInputs', () => {
953
+ it('should truncate oversized tool_use input fields in AI messages', () => {
954
+ const tokenCounter = createTestTokenCounter();
955
+ const largeInput = '{"function":"' + 'x'.repeat(5000) + '"}';
956
+ const messages: BaseMessage[] = [
957
+ new HumanMessage('Run this script'),
958
+ new AIMessage({
959
+ content: [
960
+ { type: 'text', text: 'I will execute the script.' },
961
+ {
962
+ type: 'tool_use',
963
+ id: 'tool-exec',
964
+ name: 'evaluate_script',
965
+ input: largeInput,
966
+ },
967
+ ],
968
+ tool_calls: [
969
+ {
970
+ id: 'tool-exec',
971
+ name: 'evaluate_script',
972
+ args: { function: 'x'.repeat(5000) },
973
+ },
974
+ ],
975
+ }),
976
+ new ToolMessage({ content: 'Result: OK', tool_call_id: 'tool-exec' }),
977
+ ];
978
+
979
+ const indexTokenCountMap: Record<string, number | undefined> = {
980
+ 0: tokenCounter(messages[0]),
981
+ 1: tokenCounter(messages[1]),
982
+ 2: tokenCounter(messages[2]),
983
+ };
984
+
985
+ const originalTokens = indexTokenCountMap[1] as number;
986
+ expect(originalTokens).toBeGreaterThan(5000); // Large input counted
987
+
988
+ // maxContextTokens: 1000 → maxInputChars = floor(1000 * 0.15) * 4 = 600
989
+ const truncated = preFlightTruncateToolCallInputs({
990
+ messages,
991
+ maxContextTokens: 1000,
992
+ indexTokenCountMap,
993
+ tokenCounter,
994
+ });
995
+
996
+ expect(truncated).toBe(1);
997
+ const newTokens = indexTokenCountMap[1] as number;
998
+ expect(newTokens).toBeLessThan(originalTokens);
999
+
1000
+ // Verify the content block was truncated
1001
+ const aiMsg = messages[1] as AIMessage;
1002
+ const toolUseBlock = (
1003
+ aiMsg.content as Array<Record<string, unknown>>
1004
+ ).find((b) => b.type === 'tool_use');
1005
+ expect(toolUseBlock).toBeDefined();
1006
+ const truncatedInput = toolUseBlock!.input as {
1007
+ _truncated: string;
1008
+ _originalChars: number;
1009
+ };
1010
+ expect(truncatedInput._truncated).toContain('truncated');
1011
+ expect(truncatedInput._originalChars).toBeGreaterThan(600);
1012
+
1013
+ // Verify tool_calls args were also truncated
1014
+ expect(aiMsg.tool_calls).toBeDefined();
1015
+ const tc = aiMsg.tool_calls![0];
1016
+ expect(tc.args).toHaveProperty('_truncated');
1017
+ });
1018
+
1019
+ it('should not truncate inputs that fit within the budget', () => {
1020
+ const tokenCounter = createTestTokenCounter();
1021
+ const messages: BaseMessage[] = [
1022
+ new HumanMessage('Read a file'),
1023
+ new AIMessage({
1024
+ content: [
1025
+ { type: 'text', text: 'Reading file.' },
1026
+ {
1027
+ type: 'tool_use',
1028
+ id: 'tool-read',
1029
+ name: 'read_file',
1030
+ input: '{"path":"test.txt"}',
1031
+ },
1032
+ ],
1033
+ tool_calls: [
1034
+ { id: 'tool-read', name: 'read_file', args: { path: 'test.txt' } },
1035
+ ],
1036
+ }),
1037
+ ];
1038
+
1039
+ const indexTokenCountMap: Record<string, number | undefined> = {
1040
+ 0: tokenCounter(messages[0]),
1041
+ 1: tokenCounter(messages[1]),
1042
+ };
1043
+
1044
+ const originalTokens = indexTokenCountMap[1];
1045
+
1046
+ const truncated = preFlightTruncateToolCallInputs({
1047
+ messages,
1048
+ maxContextTokens: 1000,
1049
+ indexTokenCountMap,
1050
+ tokenCounter,
1051
+ });
1052
+
1053
+ expect(truncated).toBe(0);
1054
+ expect(indexTokenCountMap[1]).toBe(originalTokens);
1055
+ });
1056
+
1057
+ it('should skip non-AI messages', () => {
1058
+ const tokenCounter = createTestTokenCounter();
1059
+ const messages: BaseMessage[] = [
1060
+ new HumanMessage('Hello'),
1061
+ new ToolMessage({ content: 'x'.repeat(5000), tool_call_id: 'tool-1' }),
1062
+ ];
1063
+
1064
+ const indexTokenCountMap: Record<string, number | undefined> = {
1065
+ 0: tokenCounter(messages[0]),
1066
+ 1: tokenCounter(messages[1]),
1067
+ };
1068
+
1069
+ const truncated = preFlightTruncateToolCallInputs({
1070
+ messages,
1071
+ maxContextTokens: 1000,
1072
+ indexTokenCountMap,
1073
+ tokenCounter,
1074
+ });
1075
+
1076
+ // Should not touch ToolMessages (that's preFlightTruncateToolResults' job)
1077
+ expect(truncated).toBe(0);
1078
+ });
1079
+ });
1080
+
1081
+ describe('Instruction token budget reservation (getInstructionTokens)', () => {
1082
+ it('should reserve budget for instruction tokens when no system message is present', () => {
1083
+ const tokenCounter = createTestTokenCounter();
1084
+ // Agent flow: messages do NOT include a system message.
1085
+ // The system message is prepended later by buildSystemRunnable.
1086
+ const messages = [
1087
+ new HumanMessage('Hello there'), // 11 chars
1088
+ new AIMessage('Hi'), // 2 chars
1089
+ new HumanMessage('How are you?'), // 12 chars
1090
+ new AIMessage('Good'), // 4 chars
1091
+ ];
1092
+
1093
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1094
+ for (let i = 0; i < messages.length; i++) {
1095
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1096
+ }
1097
+
1098
+ // Total message tokens: 11 + 2 + 12 + 4 = 29
1099
+ // Instruction tokens: 20 (simulating system prompt overhead)
1100
+ // Effective budget for messages: 50 - 20 = 30 → fits all 29 tokens
1101
+ const pruneMessages = createPruneMessages({
1102
+ maxTokens: 50,
1103
+ startIndex: 0,
1104
+ tokenCounter,
1105
+ indexTokenCountMap,
1106
+ reserveRatio: 0,
1107
+ getInstructionTokens: () => 20,
1108
+ });
1109
+
1110
+ const result = pruneMessages({ messages });
1111
+
1112
+ // All messages should fit: 29 message tokens + 20 instruction = 49 ≤ 50
1113
+ expect(result.context.length).toBe(4);
1114
+ expect(result.context).toEqual(messages);
1115
+ expect(result.messagesToRefine).toEqual([]);
1116
+ });
1117
+
1118
+ it('should prune when messages + instruction tokens exceed budget', () => {
1119
+ const tokenCounter = createTestTokenCounter();
1120
+ const messages = [
1121
+ new HumanMessage('Hello there'), // 11 chars
1122
+ new AIMessage('Hi'), // 2 chars
1123
+ new HumanMessage('How are you?'), // 12 chars
1124
+ new AIMessage('Good'), // 4 chars
1125
+ ];
1126
+
1127
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1128
+ for (let i = 0; i < messages.length; i++) {
1129
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1130
+ }
1131
+
1132
+ // Total message tokens: 29
1133
+ // Instruction tokens: 25 (simulating large tool schema overhead)
1134
+ // Effective budget: 40 - 25 = 15 → must prune older messages
1135
+ const pruneMessages = createPruneMessages({
1136
+ maxTokens: 40,
1137
+ startIndex: 0,
1138
+ tokenCounter,
1139
+ indexTokenCountMap,
1140
+ getInstructionTokens: () => 25,
1141
+ });
1142
+
1143
+ const result = pruneMessages({ messages });
1144
+
1145
+ // Should prune older messages to fit within 15 available tokens.
1146
+ // Working backwards: "Good" (4) + "How are you?" (12) = 16 > 15
1147
+ // So only "Good" (4) fits, context starts on that AI message.
1148
+ // But startType may require a human message...
1149
+ // Actually with no startType and 3 tokens of overhead,
1150
+ // available = 15 - 3 = 12: "Good" (4) fits, "How are you?" (12) → 4+12=16 > 12
1151
+ // So only "Good" (4) fits.
1152
+ expect(result.context.length).toBeLessThan(4);
1153
+ expect(Array.isArray(result.messagesToRefine)).toBe(true);
1154
+ expect(result.messagesToRefine!.length).toBeGreaterThan(0);
1155
+ });
1156
+
1157
+ it('should correctly account for instruction tokens in early-return path', () => {
1158
+ const tokenCounter = createTestTokenCounter();
1159
+ const messages = [
1160
+ new HumanMessage('Hi'), // 2 chars
1161
+ new AIMessage('Hello'), // 5 chars
1162
+ ];
1163
+
1164
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1165
+ for (let i = 0; i < messages.length; i++) {
1166
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1167
+ }
1168
+
1169
+ // Message tokens: 7
1170
+ // Instruction tokens: 100 (simulating 26 MCP tools ~5000 chars)
1171
+ // Budget: 50 → 7 + 100 = 107 > 50, so early-return should NOT fire
1172
+ const pruneMessages = createPruneMessages({
1173
+ maxTokens: 50,
1174
+ startIndex: 0,
1175
+ tokenCounter,
1176
+ indexTokenCountMap,
1177
+ getInstructionTokens: () => 100,
1178
+ });
1179
+
1180
+ const result = pruneMessages({ messages });
1181
+
1182
+ // Even though messages alone (7) fit in 50, the instruction overhead (100)
1183
+ // means pruning must occur. With only 50 - 100 = -50 effective budget,
1184
+ // nothing fits → all messages pruned.
1185
+ expect(result.messagesToRefine!.length).toBeGreaterThan(0);
1186
+ });
1187
+
1188
+ it('should not double-subtract when messages include a system message', () => {
1189
+ const tokenCounter = createTestTokenCounter();
1190
+ const messages = [
1191
+ new SystemMessage('System'), // 6 chars
1192
+ new HumanMessage('Hello there'), // 11 chars
1193
+ new AIMessage('Hi'), // 2 chars
1194
+ ];
1195
+
1196
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1197
+ for (let i = 0; i < messages.length; i++) {
1198
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1199
+ }
1200
+
1201
+ // When messages[0] IS a system message, getMessagesWithinTokenLimit uses
1202
+ // indexTokenCountMap[0] (6) to subtract from budget, ignoring instructionTokens.
1203
+ // getInstructionTokens is only used when no system message is at index 0.
1204
+ const pruneMessages = createPruneMessages({
1205
+ maxTokens: 30,
1206
+ startIndex: 0,
1207
+ tokenCounter,
1208
+ indexTokenCountMap,
1209
+ getInstructionTokens: () => 999, // Should be ignored for system message path
1210
+ });
1211
+
1212
+ const result = pruneMessages({ messages });
1213
+
1214
+ // Budget: 30 - 6 (system) = 24 available.
1215
+ // "Hi" (2) + "Hello there" (11) + 3 overhead = 16, fits in 24.
1216
+ // All messages should be kept.
1217
+ expect(result.context.length).toBe(3);
1218
+ expect(result.context[0]).toBe(messages[0]); // System message preserved
1219
+ });
1220
+
1221
+ it('index 0 should NOT be inflated when getInstructionTokens is provided', () => {
1222
+ const tokenCounter = createTestTokenCounter();
1223
+ const messages = [
1224
+ new HumanMessage('Hello there'), // 11 chars
1225
+ new AIMessage('Hi'), // 2 chars
1226
+ ];
1227
+
1228
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1229
+ for (let i = 0; i < messages.length; i++) {
1230
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1231
+ }
1232
+
1233
+ const pruneMessages = createPruneMessages({
1234
+ maxTokens: 50,
1235
+ startIndex: 0,
1236
+ tokenCounter,
1237
+ indexTokenCountMap,
1238
+ getInstructionTokens: () => 10,
1239
+ });
1240
+
1241
+ // Before and after pruning, index 0 should remain 11 (the real token count)
1242
+ expect(indexTokenCountMap[0]).toBe(11);
1243
+ pruneMessages({ messages });
1244
+ // The returned indexTokenCountMap should still have the real count at index 0
1245
+ expect(indexTokenCountMap[0]).toBe(11);
1246
+ });
1247
+
1248
+ it('pre-flight truncation uses effective budget after instruction overhead', () => {
1249
+ const tokenCounter = createTestTokenCounter();
1250
+ // Simulate the real scenario: AI message has a massive tool_call input
1251
+ // (like the chrome-devtools evaluate_script with a 7000-char JS payload)
1252
+ const hugeInput = 'x'.repeat(7000);
1253
+ const messages = [
1254
+ new HumanMessage('show me something'), // 17 chars
1255
+ new AIMessage({
1256
+ content: [
1257
+ { type: 'text', text: 'Creating animation' },
1258
+ {
1259
+ type: 'tool_use',
1260
+ id: 'tool_1',
1261
+ name: 'evaluate_script',
1262
+ input: { function: hugeInput },
1263
+ },
1264
+ ],
1265
+ tool_calls: [
1266
+ {
1267
+ id: 'tool_1',
1268
+ name: 'evaluate_script',
1269
+ args: { function: hugeInput },
1270
+ type: 'tool_call' as const,
1271
+ },
1272
+ ],
1273
+ }),
1274
+ new ToolMessage({ content: 'Script executed', tool_call_id: 'tool_1' }),
1275
+ ];
1276
+
1277
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1278
+ for (let i = 0; i < messages.length; i++) {
1279
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1280
+ }
1281
+
1282
+ // Pre-flight truncation uses maxTokens for the truncation threshold:
1283
+ // Math.floor(8000*0.15)*4 = 4800 chars. The AI message's tool_use
1284
+ // input (~7015 chars) shrinks to ~4800 chars, giving an AI token
1285
+ // count of ~4850.
1286
+ //
1287
+ // The effective pruning budget subtracts instruction overhead:
1288
+ // effectiveMax = 8000 - 2000 = 6000, which is enough for all three
1289
+ // messages (~4850 + 17 + 15 ≈ 4882).
1290
+ const instructionTokens = 2000;
1291
+ const pruneMessages = createPruneMessages({
1292
+ maxTokens: 8000,
1293
+ startIndex: 0,
1294
+ tokenCounter,
1295
+ indexTokenCountMap,
1296
+ getInstructionTokens: () => instructionTokens,
1297
+ reserveRatio: 0,
1298
+ });
1299
+
1300
+ const result = pruneMessages({ messages });
1301
+
1302
+ // The AI message should survive pruning (not be in messagesToRefine)
1303
+ // because pre-flight truncation used the effective budget
1304
+ const aiMessagesInContext = result.context.filter(
1305
+ (m) => m.getType() === 'ai'
1306
+ );
1307
+ expect(aiMessagesInContext.length).toBe(1);
1308
+ expect(result.context.length).toBe(3); // All 3 messages fit after truncation
1309
+ });
1310
+
1311
+ it('emergency truncation recovers when initial prune produces empty context', () => {
1312
+ const tokenCounter = createTestTokenCounter();
1313
+ // Simulate post-summarization state: only 4 messages remain, but one
1314
+ // has a huge tool_call input that exceeds available budget alone.
1315
+ // With char-based counter, the AI message with 4000-char input is ~4000 tokens.
1316
+ // Available budget: 5000 - 4500 = 500. Nothing fits on first pass.
1317
+ const hugeInput = 'x'.repeat(4000);
1318
+ const messages = [
1319
+ new AIMessage({
1320
+ content: [
1321
+ { type: 'text', text: 'Running script' },
1322
+ {
1323
+ type: 'tool_use',
1324
+ id: 'tool_1',
1325
+ name: 'evaluate_script',
1326
+ input: { function: hugeInput },
1327
+ },
1328
+ ],
1329
+ tool_calls: [
1330
+ {
1331
+ id: 'tool_1',
1332
+ name: 'evaluate_script',
1333
+ args: { function: hugeInput },
1334
+ type: 'tool_call' as const,
1335
+ },
1336
+ ],
1337
+ }),
1338
+ new ToolMessage({
1339
+ content: 'Script ran successfully',
1340
+ tool_call_id: 'tool_1',
1341
+ }),
1342
+ new HumanMessage('that looks great'),
1343
+ new AIMessage('Thanks! Want more?'),
1344
+ ];
1345
+
1346
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1347
+ for (let i = 0; i < messages.length; i++) {
1348
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1349
+ }
1350
+
1351
+ // Available budget is extremely tight: 500 tokens for messages.
1352
+ // The AI message alone is ~4000+ tokens. Initial prune: nothing fits.
1353
+ // Emergency truncation should reduce tool inputs to 150 chars,
1354
+ // making the AI message fit.
1355
+ const pruneMessages = createPruneMessages({
1356
+ maxTokens: 5000,
1357
+ startIndex: 0,
1358
+ tokenCounter,
1359
+ indexTokenCountMap,
1360
+ getInstructionTokens: () => 4500,
1361
+ });
1362
+
1363
+ const result = pruneMessages({ messages });
1364
+
1365
+ // Emergency truncation should have recovered — context is NOT empty
1366
+ expect(result.context.length).toBeGreaterThan(0);
1367
+ // At minimum, the newest messages should be present
1368
+ const types = result.context.map((m) => m.getType());
1369
+ expect(types).toContain('human');
1370
+ });
1371
+ });
1372
+
1373
+ describe('Empty messages guard', () => {
1374
+ it('returns empty context without crashing when messages array is empty', () => {
1375
+ const tokenCounter = createTestTokenCounter();
1376
+ const pruneMessages = createPruneMessages({
1377
+ maxTokens: 8000,
1378
+ startIndex: 0,
1379
+ tokenCounter,
1380
+ indexTokenCountMap: {},
1381
+ getInstructionTokens: () => 4000,
1382
+ });
1383
+
1384
+ // Simulate post-summarization state where REMOVE_ALL left an empty messages array
1385
+ const result = pruneMessages({
1386
+ messages: [],
1387
+ usageMetadata: {
1388
+ input_tokens: 100,
1389
+ output_tokens: 50,
1390
+ total_tokens: 150,
1391
+ } as UsageMetadata,
1392
+ });
1393
+
1394
+ expect(result.context).toEqual([]);
1395
+ expect(result.messagesToRefine).toEqual([]);
1396
+ expect(result.prePruneContextTokens).toBe(0);
1397
+ expect(result.remainingContextTokens).toBe(8000);
1398
+ });
1399
+ });
1400
+
1401
+ describe('Dropped orphan ToolMessages appear in messagesToRefine', () => {
1402
+ it('appends orphan ToolMessage (whose parent AI was pruned) to messagesToRefine for summarization', () => {
1403
+ const tokenCounter = createTestTokenCounter();
1404
+
1405
+ // Build messages where the large AI(evaluate) won't fit in a tight budget,
1406
+ // but its smaller ToolMessage(evaluate) does. After backward iteration,
1407
+ // the ToolMessage lands in context while its parent AI is in prunedMemory.
1408
+ // repairOrphanedToolMessages then drops the orphan ToolMessage from context.
1409
+ // The fix: that dropped ToolMessage must appear in messagesToRefine so
1410
+ // summarization sees the tool result (otherwise summary says "in progress").
1411
+ const messages: BaseMessage[] = [
1412
+ new HumanMessage('Build me a solar system simulation'),
1413
+ new AIMessage({
1414
+ content: [
1415
+ { type: 'text', text: 'I will write the code now.' },
1416
+ {
1417
+ type: 'tool_use',
1418
+ id: 'tc_eval',
1419
+ name: 'evaluate_script',
1420
+ // Large input that consumes most of the budget
1421
+ input: { code: 'x'.repeat(3000) },
1422
+ },
1423
+ ],
1424
+ tool_calls: [
1425
+ {
1426
+ id: 'tc_eval',
1427
+ name: 'evaluate_script',
1428
+ args: { code: 'x'.repeat(3000) },
1429
+ type: 'tool_call' as const,
1430
+ },
1431
+ ],
1432
+ }),
1433
+ new ToolMessage({
1434
+ // Small result — fits in budget individually
1435
+ content: 'Solar system simulation launched successfully!',
1436
+ tool_call_id: 'tc_eval',
1437
+ name: 'evaluate_script',
1438
+ }),
1439
+ ];
1440
+
1441
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1442
+ for (let i = 0; i < messages.length; i++) {
1443
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1444
+ }
1445
+
1446
+ // Budget is tight enough that the large AI message won't fit
1447
+ // even after emergency truncation, but HumanMessage and ToolMessage
1448
+ // individually can. Budget must be low enough that proportional
1449
+ // emergency truncation (budget / messages * 4 chars) still leaves
1450
+ // the AI message too large to fit.
1451
+ const pruneMessages = createPruneMessages({
1452
+ maxTokens: 100,
1453
+ startIndex: 0,
1454
+ tokenCounter,
1455
+ indexTokenCountMap,
1456
+ getInstructionTokens: () => 0,
1457
+ });
1458
+
1459
+ const result = pruneMessages({ messages });
1460
+
1461
+ // The orphan ToolMessage(evaluate) should NOT be in context
1462
+ // (its parent AI was pruned away)
1463
+ const contextToolMsgs = result.context.filter(
1464
+ (m) => m.getType() === 'tool'
1465
+ );
1466
+ const orphanInContext = contextToolMsgs.some(
1467
+ (m) => (m as ToolMessage).tool_call_id === 'tc_eval'
1468
+ );
1469
+ expect(orphanInContext).toBe(false);
1470
+
1471
+ // The key assertion: the dropped ToolMessage MUST appear in messagesToRefine
1472
+ // so that summarization can see "Solar system simulation launched successfully!"
1473
+ expect(result.messagesToRefine).toBeDefined();
1474
+ const refineToolMsgs = result.messagesToRefine!.filter(
1475
+ (m) => m.getType() === 'tool'
1476
+ );
1477
+ const toolInRefine = refineToolMsgs.some(
1478
+ (m) => (m as ToolMessage).tool_call_id === 'tc_eval'
1479
+ );
1480
+ expect(toolInRefine).toBe(true);
1481
+
1482
+ // The parent AI message should also be in messagesToRefine (from prunedMemory)
1483
+ const refineAiMsgs = result.messagesToRefine!.filter(
1484
+ (m) => m.getType() === 'ai'
1485
+ );
1486
+ const aiInRefine = refineAiMsgs.some((m) =>
1487
+ ((m as AIMessage).tool_calls ?? []).some((tc) => tc.id === 'tc_eval')
1488
+ );
1489
+ expect(aiInRefine).toBe(true);
1490
+ });
1491
+ });
1492
+
716
1493
  describe('Integration with Run', () => {
717
1494
  it('should initialize Run with custom token counter and process messages', async () => {
718
1495
  const provider = Providers.OPENAI;
@@ -757,3 +1534,399 @@ describe('Prune Messages Tests', () => {
757
1534
  });
758
1535
  });
759
1536
  });
1537
+
1538
+ describe('sanitizeOrphanToolBlocks', () => {
1539
+ it('strips orphan tool_use blocks from AI messages with no matching ToolMessage', () => {
1540
+ const messages: BaseMessage[] = [
1541
+ new HumanMessage('Hello'),
1542
+ new AIMessage({
1543
+ content: [
1544
+ { type: 'text', text: 'Let me check.' },
1545
+ { type: 'tool_use', id: 'tool_1', name: 'calc', input: { x: 1 } },
1546
+ ],
1547
+ tool_calls: [
1548
+ { id: 'tool_1', name: 'calc', args: { x: 1 }, type: 'tool_call' },
1549
+ ],
1550
+ }),
1551
+ // No ToolMessage for tool_1 — orphan
1552
+ ];
1553
+
1554
+ const result = sanitizeOrphanToolBlocks(messages);
1555
+ // The stripped AI message was the last message → dropped (incomplete tool call)
1556
+ expect(result).toHaveLength(1);
1557
+ expect(result[0].getType()).toBe('human');
1558
+ });
1559
+
1560
+ it('drops orphan ToolMessages whose AI message is missing', () => {
1561
+ const messages: BaseMessage[] = [
1562
+ new HumanMessage('Hello'),
1563
+ new ToolMessage({
1564
+ content: 'result',
1565
+ tool_call_id: 'tool_orphan',
1566
+ name: 'calc',
1567
+ }),
1568
+ new AIMessage('Some response'),
1569
+ ];
1570
+
1571
+ const result = sanitizeOrphanToolBlocks(messages);
1572
+ expect(result).toHaveLength(2); // HumanMessage + AIMessage, orphan ToolMessage dropped
1573
+ expect(result[0].getType()).toBe('human');
1574
+ expect(result[1].getType()).toBe('ai');
1575
+ });
1576
+
1577
+ it('preserves correctly paired tool_use and ToolMessages', () => {
1578
+ const messages: BaseMessage[] = [
1579
+ new HumanMessage('Compute 1+1'),
1580
+ new AIMessage({
1581
+ content: [
1582
+ { type: 'text', text: 'Let me calculate.' },
1583
+ { type: 'tool_use', id: 'tool_a', name: 'calc', input: { x: 1 } },
1584
+ ],
1585
+ tool_calls: [
1586
+ { id: 'tool_a', name: 'calc', args: { x: 1 }, type: 'tool_call' },
1587
+ ],
1588
+ }),
1589
+ new ToolMessage({
1590
+ content: '2',
1591
+ tool_call_id: 'tool_a',
1592
+ name: 'calc',
1593
+ }),
1594
+ new AIMessage('The answer is 2.'),
1595
+ ];
1596
+
1597
+ const result = sanitizeOrphanToolBlocks(messages);
1598
+ expect(result).toHaveLength(4); // All messages preserved
1599
+ expect(result.map((m) => m.getType())).toEqual([
1600
+ 'human',
1601
+ 'ai',
1602
+ 'tool',
1603
+ 'ai',
1604
+ ]);
1605
+ });
1606
+
1607
+ it('drops AI message entirely when it only contained orphan tool_use blocks', () => {
1608
+ const messages: BaseMessage[] = [
1609
+ new HumanMessage('Do something'),
1610
+ new AIMessage({
1611
+ content: [{ type: 'tool_use', id: 'tool_x', name: 'run', input: {} }],
1612
+ tool_calls: [
1613
+ { id: 'tool_x', name: 'run', args: {}, type: 'tool_call' },
1614
+ ],
1615
+ }),
1616
+ // No ToolMessage for tool_x
1617
+ ];
1618
+
1619
+ const result = sanitizeOrphanToolBlocks(messages);
1620
+ // The AI message had only tool_use blocks, stripping them leaves nothing → dropped
1621
+ expect(result).toHaveLength(1);
1622
+ expect(result[0].getType()).toBe('human');
1623
+ });
1624
+
1625
+ it('keeps stripped AI message in the middle but drops stripped trailing AI', () => {
1626
+ const messages: BaseMessage[] = [
1627
+ new HumanMessage('First question'),
1628
+ new AIMessage({
1629
+ content: [
1630
+ { type: 'text', text: 'Let me use two tools.' },
1631
+ { type: 'tool_use', id: 'tool_a', name: 'calc', input: { x: 1 } },
1632
+ {
1633
+ type: 'tool_use',
1634
+ id: 'tool_orphan',
1635
+ name: 'search',
1636
+ input: { q: 'test' },
1637
+ },
1638
+ ],
1639
+ tool_calls: [
1640
+ { id: 'tool_a', name: 'calc', args: { x: 1 }, type: 'tool_call' },
1641
+ {
1642
+ id: 'tool_orphan',
1643
+ name: 'search',
1644
+ args: { q: 'test' },
1645
+ type: 'tool_call',
1646
+ },
1647
+ ],
1648
+ }),
1649
+ new ToolMessage({
1650
+ content: '42',
1651
+ tool_call_id: 'tool_a',
1652
+ name: 'calc',
1653
+ }),
1654
+ // No ToolMessage for tool_orphan, but conversation continues:
1655
+ new AIMessage({
1656
+ content: [{ type: 'text', text: 'Got the calc result.' }],
1657
+ tool_calls: [
1658
+ { id: 'tool_b', name: 'run', args: {}, type: 'tool_call' },
1659
+ ],
1660
+ }),
1661
+ // tool_b is also orphan → stripped, and this AI is last → dropped
1662
+ ];
1663
+
1664
+ const result = sanitizeOrphanToolBlocks(messages);
1665
+ // message[1]: AI has tool_orphan stripped but tool_a kept → stays (middle, not trailing)
1666
+ // message[3]: AI has tool_b stripped, is trailing → dropped
1667
+ expect(result).toHaveLength(3); // HumanMessage, stripped AI (kept tool_a), ToolMessage
1668
+ const ai = result[1] as AIMessage;
1669
+ expect(ai.tool_calls).toHaveLength(1);
1670
+ expect(ai.tool_calls![0].id).toBe('tool_a');
1671
+ expect(result[2].getType()).toBe('tool');
1672
+ });
1673
+
1674
+ it('keeps unmodified trailing AI message (no orphan tool_use)', () => {
1675
+ const messages: BaseMessage[] = [
1676
+ new HumanMessage('Hello'),
1677
+ new ToolMessage({
1678
+ content: 'result',
1679
+ tool_call_id: 'tool_orphan',
1680
+ name: 'calc',
1681
+ }),
1682
+ new AIMessage('Final response without tool calls.'),
1683
+ ];
1684
+
1685
+ const result = sanitizeOrphanToolBlocks(messages);
1686
+ // orphan ToolMessage dropped, trailing AI kept (was not stripped)
1687
+ expect(result).toHaveLength(2);
1688
+ expect(result[0].getType()).toBe('human');
1689
+ expect(result[1].getType()).toBe('ai');
1690
+ });
1691
+
1692
+ it('preserves BaseMessage prototype on stripped AIMessage instances', () => {
1693
+ const messages: BaseMessage[] = [
1694
+ new HumanMessage('Hello'),
1695
+ new AIMessage({
1696
+ content: [
1697
+ { type: 'text', text: 'Let me search and calculate.' },
1698
+ {
1699
+ type: 'tool_use',
1700
+ id: 'tool_a',
1701
+ name: 'search',
1702
+ input: { q: 'test' },
1703
+ },
1704
+ { type: 'tool_use', id: 'tool_b', name: 'calc', input: { x: 1 } },
1705
+ ],
1706
+ tool_calls: [
1707
+ {
1708
+ id: 'tool_a',
1709
+ name: 'search',
1710
+ args: { q: 'test' },
1711
+ type: 'tool_call' as const,
1712
+ },
1713
+ {
1714
+ id: 'tool_b',
1715
+ name: 'calc',
1716
+ args: { x: 1 },
1717
+ type: 'tool_call' as const,
1718
+ },
1719
+ ],
1720
+ }),
1721
+ new ToolMessage({ content: 'result', tool_call_id: 'tool_b' }),
1722
+ // No ToolMessage for tool_a — orphan
1723
+ ];
1724
+
1725
+ const result = sanitizeOrphanToolBlocks(messages);
1726
+ // AI message should survive (tool_a stripped, tool_b kept)
1727
+ expect(result).toHaveLength(3);
1728
+
1729
+ // Every output message must pass isBaseMessage and have getType()
1730
+ for (const msg of result) {
1731
+ expect(isBaseMessage(msg)).toBe(true);
1732
+ expect(typeof msg.getType()).toBe('string');
1733
+ }
1734
+ expect(result[1].getType()).toBe('ai');
1735
+ expect(result[1]).toBeInstanceOf(AIMessage);
1736
+ });
1737
+
1738
+ it('preserves AIMessageChunk prototype on stripped messages', () => {
1739
+ // Simulate what happens in real graph execution: model returns AIMessageChunk,
1740
+ // state passes through LangGraph, sanitizeOrphanToolBlocks strips orphan server tools.
1741
+ const chunk = new AIMessageChunk({
1742
+ content: [
1743
+ { type: 'text', text: 'Searching...' },
1744
+ { type: 'tool_use', id: 'srvtoolu_1', name: 'web_search', input: '' },
1745
+ { type: 'tool_use', id: 'toolu_2', name: 'calculator', input: '2+2' },
1746
+ ],
1747
+ tool_call_chunks: [
1748
+ { id: 'srvtoolu_1', index: 0, name: 'web_search', args: '' },
1749
+ { id: 'toolu_2', index: 2, name: 'calculator', args: '2+2' },
1750
+ ],
1751
+ });
1752
+
1753
+ const messages: BaseMessage[] = [
1754
+ new HumanMessage('Search and calculate'),
1755
+ chunk,
1756
+ new ToolMessage({ content: '4', tool_call_id: 'toolu_2' }),
1757
+ // No ToolMessage for srvtoolu_1 — server tool, orphan
1758
+ ];
1759
+
1760
+ const result = sanitizeOrphanToolBlocks(messages);
1761
+ expect(result).toHaveLength(3);
1762
+
1763
+ // The AIMessageChunk must retain its prototype so LangChain's
1764
+ // coerceMessageLikeToMessage recognizes it as a BaseMessage.
1765
+ const aiMsg = result[1];
1766
+ expect(isBaseMessage(aiMsg)).toBe(true);
1767
+ expect(typeof aiMsg.getType()).toBe('string');
1768
+ expect(aiMsg.getType()).toBe('ai');
1769
+ });
1770
+
1771
+ it('preserves prototype on plain-object messages with duck-typed patching', () => {
1772
+ // Simulate deserialized messages that still have a prototype (e.g. from
1773
+ // LangGraph subgraph state transfer) but aren't class instances.
1774
+ const proto = { _getType: (): string => 'ai', getType: (): string => 'ai' };
1775
+ const plainAi = Object.create(proto);
1776
+ Object.assign(plainAi, {
1777
+ role: 'assistant',
1778
+ content: [
1779
+ { type: 'text', text: 'checking' },
1780
+ { type: 'tool_use', id: 'orphan_1', name: 'tool', input: {} },
1781
+ ],
1782
+ tool_calls: [
1783
+ { id: 'orphan_1', name: 'tool', args: {}, type: 'tool_call' },
1784
+ ],
1785
+ });
1786
+
1787
+ const messages = [plainAi] as BaseMessage[];
1788
+ sanitizeOrphanToolBlocks(messages);
1789
+
1790
+ // Stripped AI was trailing → dropped. But if we add a human after:
1791
+ const messages2 = [
1792
+ new HumanMessage('hi'),
1793
+ plainAi,
1794
+ new HumanMessage('follow up'),
1795
+ ] as BaseMessage[];
1796
+ const result2 = sanitizeOrphanToolBlocks(messages2);
1797
+
1798
+ // The patched message in the middle must still have _getType from proto
1799
+ const middleMsg = result2[1];
1800
+ expect(typeof middleMsg._getType).toBe('function');
1801
+ expect(middleMsg._getType()).toBe('ai');
1802
+ });
1803
+
1804
+ it('handles plain objects (non-BaseMessage instances) via duck typing', () => {
1805
+ // Simulate messages that have lost their class instances (LangGraph state serialization)
1806
+ const plainMessages = [
1807
+ { role: 'user', content: 'Hello', _type: 'human' },
1808
+ {
1809
+ role: 'assistant',
1810
+ _type: 'ai',
1811
+ content: [
1812
+ { type: 'text', text: 'Let me check.' },
1813
+ { type: 'tool_use', id: 'tool_1', name: 'calc', input: { x: 1 } },
1814
+ ],
1815
+ tool_calls: [
1816
+ { id: 'tool_1', name: 'calc', args: { x: 1 }, type: 'tool_call' },
1817
+ ],
1818
+ },
1819
+ // No ToolMessage for tool_1 — orphan
1820
+ ] as unknown as BaseMessage[];
1821
+
1822
+ // Should not throw "getType is not a function"
1823
+ const result = sanitizeOrphanToolBlocks(plainMessages);
1824
+ // The stripped AI message was the last message → dropped (incomplete tool call)
1825
+ expect(result).toHaveLength(1);
1826
+ });
1827
+ });
1828
+
1829
+ describe('prunedMemory ordering with thinking enabled', () => {
1830
+ it('messagesToRefine preserves chronological order when thinking search pops multiple messages', () => {
1831
+ const tokenCounter = createTestTokenCounter();
1832
+ const messages: BaseMessage[] = [
1833
+ new HumanMessage('Hello'),
1834
+ new AIMessage({
1835
+ content: [
1836
+ {
1837
+ type: ContentTypes.REASONING_CONTENT,
1838
+ reasoningText: {
1839
+ text: 'Thinking about navigation...',
1840
+ signature: 'sig1',
1841
+ },
1842
+ },
1843
+ { type: 'text', text: 'Navigating now.' },
1844
+ ],
1845
+ tool_calls: [
1846
+ {
1847
+ id: 'tc_nav',
1848
+ name: 'navigate',
1849
+ args: { url: 'about:blank' },
1850
+ type: 'tool_call',
1851
+ },
1852
+ ],
1853
+ }),
1854
+ new ToolMessage({
1855
+ content: 'Navigated to about:blank.',
1856
+ tool_call_id: 'tc_nav',
1857
+ name: 'navigate',
1858
+ }),
1859
+ new AIMessage({
1860
+ content: [
1861
+ {
1862
+ type: ContentTypes.REASONING_CONTENT,
1863
+ reasoningText: {
1864
+ text: 'Now I will write code...',
1865
+ signature: 'sig2',
1866
+ },
1867
+ },
1868
+ { type: 'text', text: 'Running script.' },
1869
+ ],
1870
+ tool_calls: [
1871
+ {
1872
+ id: 'tc_eval',
1873
+ name: 'evaluate',
1874
+ args: { code: 'x'.repeat(5000) },
1875
+ type: 'tool_call',
1876
+ },
1877
+ ],
1878
+ }),
1879
+ new ToolMessage({
1880
+ content: 'y'.repeat(5000), // large tool result
1881
+ tool_call_id: 'tc_eval',
1882
+ name: 'evaluate',
1883
+ }),
1884
+ ];
1885
+
1886
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1887
+ for (let i = 0; i < messages.length; i++) {
1888
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1889
+ }
1890
+
1891
+ // Use a very tight budget so the backward iteration must prune messages
1892
+ // The thinking search will cause the loop to `continue` past the large ToolMessage
1893
+ const result = realGetMessagesWithinTokenLimit({
1894
+ messages,
1895
+ maxContextTokens: 200, // very tight
1896
+ indexTokenCountMap,
1897
+ thinkingEnabled: true,
1898
+ tokenCounter,
1899
+ reasoningType: ContentTypes.REASONING_CONTENT,
1900
+ });
1901
+
1902
+ // The key assertion: messagesToRefine must be in chronological order.
1903
+ // AI(evaluate) at index 3 must come BEFORE ToolMessage(evaluate) at index 4.
1904
+ for (let i = 0; i < result.messagesToRefine.length - 1; i++) {
1905
+ const current = result.messagesToRefine[i];
1906
+ const next = result.messagesToRefine[i + 1];
1907
+ // A ToolMessage should never come before its AI message
1908
+ if (next.getType() === 'ai' && current.getType() === 'tool') {
1909
+ const toolId = (current as ToolMessage).tool_call_id;
1910
+ const aiToolIds = ((next as AIMessage).tool_calls ?? []).map(
1911
+ (tc) => tc.id
1912
+ );
1913
+ expect(aiToolIds).not.toContain(toolId);
1914
+ }
1915
+ }
1916
+
1917
+ // Verify the specific ordering: if both AI(evaluate) and Tool(evaluate) are in
1918
+ // messagesToRefine, AI must come first.
1919
+ const evalAiIdx = result.messagesToRefine.findIndex(
1920
+ (m) =>
1921
+ m.getType() === 'ai' &&
1922
+ ((m as AIMessage).tool_calls ?? []).some((tc) => tc.id === 'tc_eval')
1923
+ );
1924
+ const evalToolIdx = result.messagesToRefine.findIndex(
1925
+ (m) =>
1926
+ m.getType() === 'tool' && (m as ToolMessage).tool_call_id === 'tc_eval'
1927
+ );
1928
+ if (evalAiIdx >= 0 && evalToolIdx >= 0) {
1929
+ expect(evalAiIdx).toBeLessThan(evalToolIdx);
1930
+ }
1931
+ });
1932
+ });