@librechat/agents 3.1.57 → 3.1.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +3 -3
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3827 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -0,0 +1,376 @@
1
+ import {
2
+ HumanMessage,
3
+ AIMessage,
4
+ SystemMessage,
5
+ } from '@langchain/core/messages';
6
+ import type { UsageMetadata } from '@langchain/core/messages';
7
+ import { createPruneMessages } from '@/messages/prune';
8
+ import { Providers, ContentTypes } from '@/common';
9
+
10
+ function tokenCounter(msg: { content: unknown }): number {
11
+ const content =
12
+ typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
13
+ return Math.ceil(content.length / 4);
14
+ }
15
+
16
+ describe('Prune + Summarize Integration', () => {
17
+ it('should return messagesToRefine when messages exceed token limit', () => {
18
+ const messages = [
19
+ new SystemMessage('You are a helpful assistant.'),
20
+ new HumanMessage('First question'),
21
+ new AIMessage('First answer'),
22
+ new HumanMessage('Second question'),
23
+ new AIMessage('Second answer'),
24
+ new HumanMessage('Third question'),
25
+ new AIMessage(
26
+ 'Third answer that is quite long to push things over the limit'
27
+ ),
28
+ ];
29
+
30
+ const indexTokenCountMap: Record<string, number | undefined> = {};
31
+ for (let i = 0; i < messages.length; i++) {
32
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
33
+ }
34
+
35
+ const totalTokens = Object.values(indexTokenCountMap).reduce(
36
+ (a = 0, b = 0) => a! + b!,
37
+ 0
38
+ ) as number;
39
+ const maxTokens = Math.floor(totalTokens * 0.6);
40
+
41
+ const pruneMessages = createPruneMessages({
42
+ provider: Providers.OPENAI,
43
+ maxTokens,
44
+ startIndex: messages.length,
45
+ tokenCounter,
46
+ indexTokenCountMap,
47
+ });
48
+
49
+ const result = pruneMessages({ messages });
50
+
51
+ expect(result.messagesToRefine).toBeDefined();
52
+ expect(result.messagesToRefine!.length).toBeGreaterThan(0);
53
+ expect(result.remainingContextTokens).toBeDefined();
54
+ expect(typeof result.remainingContextTokens).toBe('number');
55
+ expect(result.context.length).toBeLessThan(messages.length);
56
+ });
57
+
58
+ it('should return empty messagesToRefine when all messages fit', () => {
59
+ const messages = [new HumanMessage('Hi'), new AIMessage('Hello')];
60
+
61
+ const indexTokenCountMap: Record<string, number | undefined> = {};
62
+ for (let i = 0; i < messages.length; i++) {
63
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
64
+ }
65
+
66
+ const pruneMessages = createPruneMessages({
67
+ provider: Providers.OPENAI,
68
+ maxTokens: 10000,
69
+ startIndex: messages.length,
70
+ tokenCounter,
71
+ indexTokenCountMap,
72
+ });
73
+
74
+ const result = pruneMessages({ messages });
75
+
76
+ expect(result.messagesToRefine).toBeDefined();
77
+ expect(result.messagesToRefine).toHaveLength(0);
78
+ expect(result.remainingContextTokens).toBeGreaterThan(0);
79
+ expect(result.context).toEqual(messages);
80
+ });
81
+
82
+ it('should preserve system message in context even when pruning', () => {
83
+ const sysMsg = new SystemMessage(
84
+ 'Instructions for the assistant to follow carefully'
85
+ );
86
+ const messages = [
87
+ sysMsg,
88
+ new HumanMessage(
89
+ 'This is the first message in our conversation and it is fairly long'
90
+ ),
91
+ new AIMessage(
92
+ 'This is the first response and it is also fairly long with details'
93
+ ),
94
+ new HumanMessage(
95
+ 'This is the second message with more context and questions'
96
+ ),
97
+ new AIMessage(
98
+ 'This is the second response which is even more detailed and verbose'
99
+ ),
100
+ new HumanMessage('Third message in the conversation chain'),
101
+ new AIMessage('Third response with additional lengthy explanations'),
102
+ ];
103
+
104
+ const indexTokenCountMap: Record<string, number | undefined> = {};
105
+ for (let i = 0; i < messages.length; i++) {
106
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
107
+ }
108
+
109
+ const totalTokens = Object.values(indexTokenCountMap).reduce(
110
+ (a = 0, b = 0) => a! + b!,
111
+ 0
112
+ ) as number;
113
+ const maxTokens = Math.floor(totalTokens * 0.35);
114
+
115
+ const pruneMessages = createPruneMessages({
116
+ provider: Providers.OPENAI,
117
+ maxTokens,
118
+ startIndex: messages.length,
119
+ tokenCounter,
120
+ indexTokenCountMap,
121
+ });
122
+
123
+ const result = pruneMessages({ messages });
124
+
125
+ expect(result.context[0]).toBe(sysMsg);
126
+ expect(result.messagesToRefine!.length).toBeGreaterThan(0);
127
+ });
128
+
129
+ it('should not include summary content type in pruned messages passed to providers', () => {
130
+ const summaryBlock = {
131
+ type: ContentTypes.SUMMARY,
132
+ text: 'Summary of prior conversation',
133
+ };
134
+ expect(summaryBlock.type).toBe('summary');
135
+ expect(Object.values(ContentTypes)).toContain('summary');
136
+ });
137
+ });
138
+
139
+ describe('pruneMessages ratio-based token grounding', () => {
140
+ it('should adjust indexTokenCountMap entries proportionally when usageMetadata is provided', () => {
141
+ const messages = [
142
+ new SystemMessage('Be concise.'),
143
+ new HumanMessage('What is 2+2?'),
144
+ new AIMessage('The answer is 4.'),
145
+ ];
146
+
147
+ const indexTokenCountMap: Record<string, number | undefined> = {
148
+ 0: 10,
149
+ 1: 20,
150
+ 2: 30,
151
+ };
152
+
153
+ const pruneMessages = createPruneMessages({
154
+ provider: Providers.OPENAI,
155
+ maxTokens: 5000,
156
+ startIndex: messages.length,
157
+ tokenCounter,
158
+ indexTokenCountMap,
159
+ });
160
+
161
+ const usageMetadata: Partial<UsageMetadata> = {
162
+ input_tokens: 50,
163
+ output_tokens: 40,
164
+ };
165
+
166
+ const result = pruneMessages({ messages, usageMetadata });
167
+
168
+ // Map stays in raw tiktoken space — calibrationRatio captures the multiplier.
169
+ const originalTotal = 10 + 20 + 30;
170
+ const expectedRatio = 50 / originalTotal;
171
+
172
+ expect(result.indexTokenCountMap[0]).toBe(10);
173
+ expect(result.indexTokenCountMap[1]).toBe(20);
174
+ expect(result.indexTokenCountMap[2]).toBe(30);
175
+ expect(result.calibrationRatio).toBeCloseTo(expectedRatio, 2);
176
+ });
177
+
178
+ it('should NOT adjust when ratio falls outside safe bounds (< 1/3)', () => {
179
+ const messages = [
180
+ new HumanMessage('What is 2+2?'),
181
+ new AIMessage('The answer is 4.'),
182
+ ];
183
+
184
+ const indexTokenCountMap: Record<string, number | undefined> = {
185
+ 0: 100,
186
+ 1: 200,
187
+ };
188
+
189
+ const pruneMessages = createPruneMessages({
190
+ provider: Providers.OPENAI,
191
+ maxTokens: 50000,
192
+ startIndex: messages.length,
193
+ tokenCounter,
194
+ indexTokenCountMap,
195
+ });
196
+
197
+ const usageMetadata: Partial<UsageMetadata> = {
198
+ input_tokens: 5,
199
+ output_tokens: 5,
200
+ };
201
+
202
+ const result = pruneMessages({ messages, usageMetadata });
203
+
204
+ expect(result.indexTokenCountMap[0]).toBe(100);
205
+ expect(result.indexTokenCountMap[1]).toBe(200);
206
+ });
207
+
208
+ it('should NOT adjust when ratio falls outside safe bounds (> 2.5)', () => {
209
+ const messages = [new HumanMessage('Hi'), new AIMessage('Hello')];
210
+
211
+ const indexTokenCountMap: Record<string, number | undefined> = {
212
+ 0: 5,
213
+ 1: 5,
214
+ };
215
+
216
+ const pruneMessages = createPruneMessages({
217
+ provider: Providers.OPENAI,
218
+ maxTokens: 50000,
219
+ startIndex: messages.length,
220
+ tokenCounter,
221
+ indexTokenCountMap,
222
+ });
223
+
224
+ const usageMetadata: Partial<UsageMetadata> = {
225
+ input_tokens: 100,
226
+ output_tokens: 100,
227
+ };
228
+
229
+ const result = pruneMessages({ messages, usageMetadata });
230
+
231
+ expect(result.indexTokenCountMap[0]).toBe(5);
232
+ expect(result.indexTokenCountMap[1]).toBe(5);
233
+ });
234
+
235
+ it('should include cache_read and cache_creation in ratio total', () => {
236
+ const messages = [
237
+ new SystemMessage('Instructions'),
238
+ new HumanMessage('Hello'),
239
+ new AIMessage('Hi there!'),
240
+ ];
241
+
242
+ const indexTokenCountMap: Record<string, number | undefined> = {
243
+ 0: 10,
244
+ 1: 20,
245
+ 2: 30,
246
+ };
247
+
248
+ const pruneMessages = createPruneMessages({
249
+ provider: Providers.ANTHROPIC,
250
+ maxTokens: 5000,
251
+ startIndex: messages.length,
252
+ tokenCounter,
253
+ indexTokenCountMap,
254
+ });
255
+
256
+ // Anthropic: cache_read (15) + cache_creation (10) = 25 > input_tokens (30)?
257
+ // No, 25 < 30, so NOT additive. totalInput = 30.
258
+ // providerMessageTokens = 30 - 0 (no instruction overhead) = 30.
259
+ // ratio = 30 / 60 = 0.5 — safe (>= 1/3, <= 2.5).
260
+ const usageMetadata: Partial<UsageMetadata> = {
261
+ input_tokens: 30,
262
+ output_tokens: 20,
263
+ input_token_details: {
264
+ cache_read: 15,
265
+ cache_creation: 10,
266
+ },
267
+ };
268
+
269
+ const originalTotal = 10 + 20 + 30;
270
+ const expectedRatio = 30 / originalTotal;
271
+
272
+ const result = pruneMessages({ messages, usageMetadata });
273
+
274
+ // Map stays raw — calibrationRatio captures the multiplier
275
+ expect(result.indexTokenCountMap[0]).toBe(10);
276
+ expect(result.indexTokenCountMap[1]).toBe(20);
277
+ expect(result.indexTokenCountMap[2]).toBe(30);
278
+ expect(result.calibrationRatio).toBeCloseTo(expectedRatio, 2);
279
+ });
280
+
281
+ it('should assign output_tokens to the first new message at startIndex', () => {
282
+ const messages = [
283
+ new HumanMessage('What is 2+2?'),
284
+ new AIMessage('The answer is 4.'),
285
+ ];
286
+
287
+ const indexTokenCountMap: Record<string, number | undefined> = {
288
+ 0: 15,
289
+ };
290
+
291
+ const pruneMessages = createPruneMessages({
292
+ provider: Providers.OPENAI,
293
+ maxTokens: 5000,
294
+ startIndex: 1,
295
+ tokenCounter,
296
+ indexTokenCountMap,
297
+ });
298
+
299
+ const usageMetadata: Partial<UsageMetadata> = {
300
+ input_tokens: 20,
301
+ output_tokens: 25,
302
+ };
303
+
304
+ const result = pruneMessages({ messages, usageMetadata });
305
+
306
+ expect(result.indexTokenCountMap[1]).toBeDefined();
307
+ expect(result.indexTokenCountMap[1] as number).toBeGreaterThan(0);
308
+
309
+ // index[1] is the AI response at startIndex — assigned output_tokens (25).
310
+ // Calibration: providerMessageTokens = input_tokens (20) - overhead (0) = 20.
311
+ // messageTokenSum = index[0] (15) + index[1] is newOutput so excluded = 15.
312
+ // ratio = 20 / 15 = 1.33 — safe.
313
+ const preRatioIndex0 = 15;
314
+ const ratio = 20 / preRatioIndex0;
315
+ const isRatioSafe = ratio >= 1 / 3 && ratio <= 2.5;
316
+
317
+ // Map stays raw regardless of ratio safety
318
+ expect(result.indexTokenCountMap[0]).toBe(preRatioIndex0);
319
+ if (isRatioSafe) {
320
+ expect(result.calibrationRatio).toBeCloseTo(ratio, 1);
321
+ }
322
+ });
323
+
324
+ it('should ground tokens correctly across multiple pruneMessages calls', () => {
325
+ const turn1Messages = [
326
+ new SystemMessage('Be concise.'),
327
+ new HumanMessage('What is 2+2?'),
328
+ ];
329
+
330
+ const indexTokenCountMap: Record<string, number | undefined> = {
331
+ 0: 10,
332
+ 1: 20,
333
+ };
334
+
335
+ const pruneMessages = createPruneMessages({
336
+ provider: Providers.OPENAI,
337
+ maxTokens: 5000,
338
+ startIndex: turn1Messages.length,
339
+ tokenCounter,
340
+ indexTokenCountMap,
341
+ });
342
+
343
+ const turn1Result = pruneMessages({
344
+ messages: turn1Messages,
345
+ });
346
+
347
+ expect(turn1Result.indexTokenCountMap[0]).toBe(10);
348
+ expect(turn1Result.indexTokenCountMap[1]).toBe(20);
349
+
350
+ const turn2Messages = [
351
+ ...turn1Messages,
352
+ new AIMessage('4'),
353
+ new HumanMessage('And 3+3?'),
354
+ ];
355
+
356
+ const turn2Usage: Partial<UsageMetadata> = {
357
+ input_tokens: 25,
358
+ output_tokens: 10,
359
+ };
360
+
361
+ const turn2Result = pruneMessages({
362
+ messages: turn2Messages,
363
+ usageMetadata: turn2Usage,
364
+ });
365
+
366
+ expect(turn2Result.indexTokenCountMap[2]).toBeDefined();
367
+ expect(turn2Result.indexTokenCountMap[2] as number).toBeGreaterThan(0);
368
+ expect(turn2Result.indexTokenCountMap[3]).toBeDefined();
369
+ expect(turn2Result.indexTokenCountMap[3] as number).toBeGreaterThan(0);
370
+
371
+ for (let i = 0; i < turn2Messages.length; i++) {
372
+ expect(turn2Result.indexTokenCountMap[i]).toBeDefined();
373
+ expect(turn2Result.indexTokenCountMap[i] as number).toBeGreaterThan(0);
374
+ }
375
+ });
376
+ });
@@ -61,7 +61,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
61
61
  agentId: 'specialist',
62
62
  provider: Providers.ANTHROPIC,
63
63
  clientOptions: {
64
- modelName: 'claude-3-7-sonnet-20250219',
64
+ modelName: 'claude-sonnet-4-5-20250929',
65
65
  apiKey: 'test-key',
66
66
  thinking: {
67
67
  type: 'enabled',
@@ -150,7 +150,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
150
150
  agentId: 'agent_b',
151
151
  provider: Providers.ANTHROPIC,
152
152
  clientOptions: {
153
- modelName: 'claude-3-7-sonnet-20250219',
153
+ modelName: 'claude-sonnet-4-5-20250929',
154
154
  apiKey: 'test-key',
155
155
  thinking: {
156
156
  type: 'enabled',
@@ -195,7 +195,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
195
195
  provider: Providers.BEDROCK,
196
196
  clientOptions: {
197
197
  region: 'us-east-1',
198
- model: 'anthropic.claude-3-5-haiku-20241022-v1:0',
198
+ model: 'us.anthropic.claude-haiku-4-5-20251001-v1:0',
199
199
  // No thinking config
200
200
  },
201
201
  instructions: 'You are a coordinator',
@@ -206,7 +206,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
206
206
  provider: Providers.BEDROCK,
207
207
  clientOptions: {
208
208
  region: 'us-east-1',
209
- model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
209
+ model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
210
210
  additionalModelRequestFields: {
211
211
  thinking: {
212
212
  type: 'enabled',
@@ -280,7 +280,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
280
280
  provider: Providers.BEDROCK,
281
281
  clientOptions: {
282
282
  region: 'us-east-1',
283
- model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
283
+ model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
284
284
  additionalModelRequestFields: {
285
285
  thinking: {
286
286
  type: 'enabled',
@@ -343,7 +343,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
343
343
  provider: Providers.BEDROCK,
344
344
  clientOptions: {
345
345
  region: 'us-east-1',
346
- model: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
346
+ model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
347
347
  additionalModelRequestFields: {
348
348
  thinking: {
349
349
  type: 'enabled',
@@ -434,7 +434,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
434
434
  agentId: 'reviewer',
435
435
  provider: Providers.ANTHROPIC,
436
436
  clientOptions: {
437
- modelName: 'claude-3-7-sonnet-20250219',
437
+ modelName: 'claude-sonnet-4-5-20250929',
438
438
  apiKey: 'test-key',
439
439
  thinking: {
440
440
  type: 'enabled',
@@ -508,7 +508,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
508
508
  agentId: 'agent_a',
509
509
  provider: Providers.ANTHROPIC,
510
510
  clientOptions: {
511
- modelName: 'claude-3-7-sonnet-20250219',
511
+ modelName: 'claude-sonnet-4-5-20250929',
512
512
  apiKey: 'test-key',
513
513
  thinking: {
514
514
  type: 'enabled',
@@ -522,7 +522,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
522
522
  agentId: 'agent_b',
523
523
  provider: Providers.ANTHROPIC,
524
524
  clientOptions: {
525
- modelName: 'claude-3-7-sonnet-20250219',
525
+ modelName: 'claude-sonnet-4-5-20250929',
526
526
  apiKey: 'test-key',
527
527
  thinking: {
528
528
  type: 'enabled',
@@ -587,7 +587,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
587
587
  agentId: 'agent_b',
588
588
  provider: Providers.ANTHROPIC,
589
589
  clientOptions: {
590
- modelName: 'claude-3-7-sonnet-20250219',
590
+ modelName: 'claude-sonnet-4-5-20250929',
591
591
  apiKey: 'test-key',
592
592
  thinking: {
593
593
  type: 'enabled',
@@ -741,6 +741,7 @@ describe('Prune Messages with Thinking Mode Tests', () => {
741
741
  tokenCounter,
742
742
  indexTokenCountMap: { ...indexTokenCountMap },
743
743
  thinkingEnabled: true,
744
+ reserveRatio: 0,
744
745
  });
745
746
 
746
747
  // Prune messages
@@ -767,7 +768,7 @@ describe('Prune Messages with Thinking Mode Tests', () => {
767
768
  );
768
769
  });
769
770
 
770
- it('should throw descriptive error when aggressive pruning removes all AI messages', () => {
771
+ it('should gracefully degrade when aggressive pruning removes all AI messages', () => {
771
772
  const tokenCounter = createTestTokenCounter();
772
773
 
773
774
  const assistantMessageWithThinking = new AIMessage({
@@ -817,8 +818,10 @@ describe('Prune Messages with Thinking Mode Tests', () => {
817
818
  thinkingEnabled: true,
818
819
  });
819
820
 
820
- expect(() => pruneMessages({ messages })).toThrow(
821
- /Context window exceeded/
822
- );
821
+ // Should not throw — gracefully degrades by skipping thinking block reattachment
822
+ expect(() => pruneMessages({ messages })).not.toThrow();
823
+ const result = pruneMessages({ messages });
824
+ // Context should contain at most the system message (no AI messages survived)
825
+ expect(result.context.length).toBeLessThanOrEqual(2);
823
826
  });
824
827
  });