@librechat/agents 3.1.56 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +2 -2
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -0,0 +1,882 @@
1
+ import {
2
+ AIMessage,
3
+ ToolMessage,
4
+ BaseMessage,
5
+ HumanMessage,
6
+ SystemMessage,
7
+ } from '@langchain/core/messages';
8
+ import type { UsageMetadata } from '@langchain/core/messages';
9
+ import type * as t from '@/types';
10
+ import { createPruneMessages, calculateTotalTokens } from '@/messages/prune';
11
+ import { Providers } from '@/common';
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Deterministic char-based token counter — 1 char = 1 token, plus 3 per-message
15
+ // overhead (matches the real getTokenCountForMessage tokensPerMessage constant).
16
+ // ---------------------------------------------------------------------------
17
+ const charCounter: t.TokenCounter = (msg: BaseMessage): number => {
18
+ const content = msg.content;
19
+ if (typeof content === 'string') {
20
+ return content.length + 3;
21
+ }
22
+ if (Array.isArray(content)) {
23
+ let len = 3;
24
+ for (const item of content as Array<
25
+ string | { type: string; text?: string }
26
+ >) {
27
+ if (typeof item === 'string') {
28
+ len += item.length;
29
+ } else if (
30
+ typeof item === 'object' &&
31
+ 'text' in item &&
32
+ item.text != null &&
33
+ item.text
34
+ ) {
35
+ len += item.text.length;
36
+ }
37
+ }
38
+ return len;
39
+ }
40
+ return 3;
41
+ };
42
+
43
+ function toolMsg(
44
+ content: string,
45
+ name = 'tool',
46
+ toolCallId = `tc_${Math.random().toString(36).slice(2, 8)}`
47
+ ): ToolMessage {
48
+ return new ToolMessage({ content, tool_call_id: toolCallId, name });
49
+ }
50
+
51
+ function aiToolCall(toolCallId: string, name = 'tool'): AIMessage {
52
+ return new AIMessage({
53
+ content: [{ type: 'tool_use', id: toolCallId, name, input: {} }],
54
+ tool_calls: [{ id: toolCallId, name, args: {}, type: 'tool_call' }],
55
+ });
56
+ }
57
+
58
+ // ---------------------------------------------------------------------------
59
+ // Tests
60
+ // ---------------------------------------------------------------------------
61
+
62
+ describe('calculateTotalTokens — cache detection heuristic', () => {
63
+ it('treats cache as additive when cacheSum > baseInputTokens (Anthropic pattern)', () => {
64
+ const result = calculateTotalTokens({
65
+ input_tokens: 100,
66
+ output_tokens: 20,
67
+ input_token_details: { cache_creation: 50, cache_read: 200 },
68
+ });
69
+ expect(result).toEqual({
70
+ input_tokens: 350,
71
+ output_tokens: 20,
72
+ total_tokens: 370,
73
+ });
74
+ });
75
+
76
+ it('does NOT add cache when cacheSum <= baseInputTokens (OpenAI pattern)', () => {
77
+ const result = calculateTotalTokens({
78
+ input_tokens: 300,
79
+ output_tokens: 20,
80
+ input_token_details: { cache_read: 100 },
81
+ });
82
+ expect(result).toEqual({
83
+ input_tokens: 300,
84
+ output_tokens: 20,
85
+ total_tokens: 320,
86
+ });
87
+ });
88
+
89
+ it('handles zero cache gracefully', () => {
90
+ const result = calculateTotalTokens({
91
+ input_tokens: 500,
92
+ output_tokens: 50,
93
+ });
94
+ expect(result).toEqual({
95
+ input_tokens: 500,
96
+ output_tokens: 50,
97
+ total_tokens: 550,
98
+ });
99
+ });
100
+
101
+ it('handles all-zero usage', () => {
102
+ const result = calculateTotalTokens({});
103
+ expect(result).toEqual({
104
+ input_tokens: 0,
105
+ output_tokens: 0,
106
+ total_tokens: 0,
107
+ });
108
+ });
109
+ });
110
+
111
+ describe('Token accounting pipeline — multi-turn calibration', () => {
112
+ it('calibration scales message tokens to match provider input_tokens', () => {
113
+ const messages = [
114
+ new SystemMessage('You are a helpful assistant.'),
115
+ new HumanMessage('Hello'),
116
+ new AIMessage('Hi!'),
117
+ ];
118
+
119
+ const indexTokenCountMap: Record<string, number | undefined> = {};
120
+ for (let i = 0; i < messages.length; i++) {
121
+ indexTokenCountMap[i] = charCounter(messages[i]);
122
+ }
123
+ const originalSum = Object.values(indexTokenCountMap).reduce(
124
+ (a, b) => (a ?? 0) + (b ?? 0),
125
+ 0
126
+ ) as number;
127
+
128
+ const pruneMessages = createPruneMessages({
129
+ maxTokens: 5000,
130
+ startIndex: messages.length,
131
+ tokenCounter: charCounter,
132
+ indexTokenCountMap,
133
+ reserveRatio: 0,
134
+ });
135
+
136
+ const providerInput = Math.round(originalSum * 1.4);
137
+ const result = pruneMessages({
138
+ messages,
139
+ usageMetadata: { input_tokens: providerInput, output_tokens: 30 },
140
+ });
141
+
142
+ // Map stays in raw tiktoken space — calibrationRatio captures the multiplier.
143
+ // rawSum * calibrationRatio should approximate providerInput.
144
+ let rawSum = 0;
145
+ for (let i = 0; i < messages.length; i++) {
146
+ rawSum += result.indexTokenCountMap[i] ?? 0;
147
+ }
148
+ const calibratedEstimate = Math.round(
149
+ rawSum * (result.calibrationRatio ?? 1)
150
+ );
151
+
152
+ expect(Math.abs(calibratedEstimate - providerInput)).toBeLessThanOrEqual(
153
+ messages.length
154
+ );
155
+ });
156
+
157
+ it('first response at startIndex gets output_tokens and is excluded from calibration ratio', () => {
158
+ const messages = [
159
+ new HumanMessage('What is the meaning of life?'),
160
+ new AIMessage('42.'),
161
+ ];
162
+
163
+ const indexTokenCountMap: Record<string, number | undefined> = {
164
+ 0: charCounter(messages[0]),
165
+ };
166
+
167
+ const pruneMessages = createPruneMessages({
168
+ maxTokens: 5000,
169
+ startIndex: 1,
170
+ tokenCounter: charCounter,
171
+ indexTokenCountMap,
172
+ reserveRatio: 0,
173
+ });
174
+
175
+ const result = pruneMessages({
176
+ messages,
177
+ usageMetadata: { input_tokens: 40, output_tokens: 10 },
178
+ });
179
+
180
+ // Index 1 should be assigned output_tokens
181
+ expect(result.indexTokenCountMap[1]).toBe(10);
182
+
183
+ // Map stays raw — index 0 keeps its original count.
184
+ // calibrationRatio captures providerInput / rawMessageSum.
185
+ const index0Original = charCounter(messages[0]);
186
+ expect(result.indexTokenCountMap[0]).toBe(index0Original);
187
+ const expectedRatio = 40 / index0Original;
188
+ expect(result.calibrationRatio).toBeCloseTo(expectedRatio, 1);
189
+ });
190
+
191
+ it('unsafe ratio (< 1/3) prevents calibration — map stays unchanged', () => {
192
+ const messages = [
193
+ new HumanMessage('Long message content here'),
194
+ new AIMessage('Also a long response here'),
195
+ ];
196
+
197
+ const indexTokenCountMap: Record<string, number | undefined> = {
198
+ 0: 200,
199
+ 1: 300,
200
+ };
201
+
202
+ const pruneMessages = createPruneMessages({
203
+ maxTokens: 50000,
204
+ startIndex: messages.length,
205
+ tokenCounter: charCounter,
206
+ indexTokenCountMap,
207
+ reserveRatio: 0,
208
+ });
209
+
210
+ const result = pruneMessages({
211
+ messages,
212
+ usageMetadata: { input_tokens: 10, output_tokens: 5 },
213
+ });
214
+
215
+ // ratio = 10/500 = 0.02, way below 1/3
216
+ expect(result.indexTokenCountMap[0]).toBe(200);
217
+ expect(result.indexTokenCountMap[1]).toBe(300);
218
+ });
219
+
220
+ it('unsafe ratio (> 2.5) prevents calibration', () => {
221
+ const messages = [new HumanMessage('Hi'), new AIMessage('Hello')];
222
+
223
+ const indexTokenCountMap: Record<string, number | undefined> = {
224
+ 0: 5,
225
+ 1: 8,
226
+ };
227
+
228
+ const pruneMessages = createPruneMessages({
229
+ maxTokens: 50000,
230
+ startIndex: messages.length,
231
+ tokenCounter: charCounter,
232
+ indexTokenCountMap,
233
+ reserveRatio: 0,
234
+ });
235
+
236
+ const result = pruneMessages({
237
+ messages,
238
+ usageMetadata: { input_tokens: 500, output_tokens: 100 },
239
+ });
240
+
241
+ // ratio = 500/13 = 38.5, way above 2.5
242
+ expect(result.indexTokenCountMap[0]).toBe(5);
243
+ expect(result.indexTokenCountMap[1]).toBe(8);
244
+ });
245
+
246
+ it('multi-turn closure state persists calibrated values across calls', () => {
247
+ // Simulate realistic flow: human message counted before creating pruner,
248
+ // AI response is the first "new" message at startIndex.
249
+ const messages: BaseMessage[] = [
250
+ new HumanMessage('Turn 1 question'),
251
+ new AIMessage('Turn 1 answer'),
252
+ ];
253
+
254
+ const indexTokenCountMap: Record<string, number | undefined> = {
255
+ 0: charCounter(messages[0]),
256
+ };
257
+
258
+ // startIndex=1: the human message (0) was pre-existing, AI response (1) is new
259
+ const pruneMessages = createPruneMessages({
260
+ maxTokens: 10000,
261
+ startIndex: 1,
262
+ tokenCounter: charCounter,
263
+ indexTokenCountMap,
264
+ reserveRatio: 0,
265
+ });
266
+
267
+ // Turn 1: model responds — index 1 (at startIndex) gets output_tokens
268
+ const turn1 = pruneMessages({
269
+ messages,
270
+ usageMetadata: { input_tokens: 25, output_tokens: 20 },
271
+ });
272
+
273
+ expect(turn1.indexTokenCountMap[1]).toBe(20);
274
+ // Map stays raw — calibrationRatio captures the multiplier
275
+ const index0Original = charCounter(messages[0]);
276
+ expect(turn1.indexTokenCountMap[0]).toBe(index0Original);
277
+ const turn1Ratio = 25 / index0Original;
278
+ if (turn1Ratio >= 0.5 && turn1Ratio <= 5) {
279
+ expect(turn1.calibrationRatio).toBeCloseTo(turn1Ratio, 1);
280
+ }
281
+
282
+ // Turn 2: user sends message, model responds. Both new indices (2, 3) are unset.
283
+ // In real flow, the user message (2) is counted before processStream,
284
+ // but here the pruner hasn't seen it. Index 2 at lastTurnStartIndex gets output_tokens.
285
+ messages.push(new HumanMessage('Turn 2 question'));
286
+ messages.push(new AIMessage('Turn 2 answer'));
287
+
288
+ const turn2 = pruneMessages({
289
+ messages,
290
+ usageMetadata: { input_tokens: 60, output_tokens: 15 },
291
+ });
292
+
293
+ // All 4 indices should be populated
294
+ for (let i = 0; i < 4; i++) {
295
+ expect(turn2.indexTokenCountMap[i]).toBeDefined();
296
+ expect(turn2.indexTokenCountMap[i] as number).toBeGreaterThan(0);
297
+ }
298
+ });
299
+ });
300
+
301
+ describe('Token accounting pipeline — budget computation and context pressure', () => {
302
+ it('getInstructionTokens reduces effective budget', () => {
303
+ // 5 messages, each ~20 chars → ~23 tokens with 3-token overhead
304
+ const messages = [
305
+ new HumanMessage('a'.repeat(20)),
306
+ new AIMessage('b'.repeat(20)),
307
+ new HumanMessage('c'.repeat(20)),
308
+ new AIMessage('d'.repeat(20)),
309
+ new HumanMessage('e'.repeat(20)),
310
+ ];
311
+
312
+ const indexTokenCountMap: Record<string, number | undefined> = {};
313
+ let totalEstimate = 0;
314
+ for (let i = 0; i < messages.length; i++) {
315
+ indexTokenCountMap[i] = charCounter(messages[i]);
316
+ totalEstimate += indexTokenCountMap[i] ?? 0;
317
+ }
318
+
319
+ // Set maxTokens so messages fit WITHOUT instruction overhead
320
+ // but do NOT fit WITH 50 tokens of instruction overhead
321
+ const tightBudget = totalEstimate + 10;
322
+
323
+ const pruneMessages = createPruneMessages({
324
+ maxTokens: tightBudget,
325
+ startIndex: messages.length,
326
+ tokenCounter: charCounter,
327
+ indexTokenCountMap,
328
+ reserveRatio: 0,
329
+ getInstructionTokens: () => 50,
330
+ });
331
+
332
+ const result = pruneMessages({ messages });
333
+
334
+ // With 50 tokens of instruction overhead on a tight budget,
335
+ // pruning should have kicked in — context should be shorter
336
+ expect(result.context.length).toBeLessThan(messages.length);
337
+ });
338
+
339
+ it('reserve ratio reduces pruning budget by the configured fraction', () => {
340
+ const messages = [
341
+ new HumanMessage('x'.repeat(80)),
342
+ new AIMessage('y'.repeat(80)),
343
+ ];
344
+
345
+ const indexTokenCountMap: Record<string, number | undefined> = {};
346
+ let totalEstimate = 0;
347
+ for (let i = 0; i < messages.length; i++) {
348
+ indexTokenCountMap[i] = charCounter(messages[i]);
349
+ totalEstimate += indexTokenCountMap[i] ?? 0;
350
+ }
351
+
352
+ // With 0% reserve, messages fit. With 20% reserve, they won't.
353
+ const maxTokens = totalEstimate + 5;
354
+
355
+ const withReserve = createPruneMessages({
356
+ maxTokens,
357
+ startIndex: messages.length,
358
+ tokenCounter: charCounter,
359
+ indexTokenCountMap: { ...indexTokenCountMap },
360
+ reserveRatio: 0.2,
361
+ });
362
+
363
+ const withoutReserve = createPruneMessages({
364
+ maxTokens,
365
+ startIndex: messages.length,
366
+ tokenCounter: charCounter,
367
+ indexTokenCountMap: { ...indexTokenCountMap },
368
+ reserveRatio: 0,
369
+ });
370
+
371
+ const resultWithReserve = withReserve({ messages });
372
+ const resultWithoutReserve = withoutReserve({ messages });
373
+
374
+ expect(resultWithoutReserve.context.length).toBe(2);
375
+ expect(resultWithReserve.context.length).toBeLessThan(2);
376
+ });
377
+
378
+ it('context pressure is computed after calibration and recount', () => {
379
+ const messages = [
380
+ new HumanMessage('a'.repeat(100)),
381
+ new AIMessage('b'.repeat(100)),
382
+ ];
383
+
384
+ const indexTokenCountMap: Record<string, number | undefined> = {
385
+ 0: charCounter(messages[0]),
386
+ 1: charCounter(messages[1]),
387
+ };
388
+ const ourEstimate =
389
+ (indexTokenCountMap[0] ?? 0) + (indexTokenCountMap[1] ?? 0);
390
+
391
+ // Provider says input is 2× our estimate — calibration should inflate
392
+ const providerInput = ourEstimate * 2;
393
+ const maxTokens = Math.round(providerInput * 1.2);
394
+
395
+ const pruneMessages = createPruneMessages({
396
+ maxTokens,
397
+ startIndex: messages.length,
398
+ tokenCounter: charCounter,
399
+ indexTokenCountMap,
400
+ reserveRatio: 0,
401
+ });
402
+
403
+ const result = pruneMessages({
404
+ messages,
405
+ usageMetadata: { input_tokens: providerInput, output_tokens: 50 },
406
+ });
407
+
408
+ // After calibration, tokens ~= providerInput (2× original)
409
+ // contextPressure = calibratedSum / pruningBudget ≈ providerInput / maxTokens ≈ 0.83
410
+ expect(result.contextPressure).toBeDefined();
411
+ expect(result.contextPressure as number).toBeGreaterThan(0.7);
412
+ expect(result.contextPressure as number).toBeLessThan(1.0);
413
+ });
414
+ });
415
+
416
+ describe('Token accounting pipeline — observation masking at 80%+ pressure', () => {
417
+ it('masks consumed tool results when pressure >= 0.8', () => {
418
+ const tcId = 'tc_search';
419
+ const bigResult = 'R'.repeat(2000);
420
+ const messages: BaseMessage[] = [
421
+ new HumanMessage('Search for info'),
422
+ aiToolCall(tcId, 'search'),
423
+ toolMsg(bigResult, 'search', tcId),
424
+ new AIMessage('Based on the results, here is the answer.'),
425
+ ];
426
+
427
+ const indexTokenCountMap: Record<string, number | undefined> = {};
428
+ let sum = 0;
429
+ for (let i = 0; i < messages.length; i++) {
430
+ indexTokenCountMap[i] = charCounter(messages[i]);
431
+ sum += indexTokenCountMap[i] ?? 0;
432
+ }
433
+
434
+ // Set maxTokens so pressure is ~85%
435
+ const maxTokens = Math.round(sum / 0.85);
436
+
437
+ const pruneMessages = createPruneMessages({
438
+ maxTokens,
439
+ startIndex: messages.length,
440
+ tokenCounter: charCounter,
441
+ indexTokenCountMap,
442
+ reserveRatio: 0,
443
+ });
444
+
445
+ const result = pruneMessages({ messages });
446
+
447
+ // Budget-aware masking: if the result fits within the available
448
+ // message budget, it may be kept intact or only lightly trimmed.
449
+ // Verify masking ran (context pressure triggered it) and the result
450
+ // is within the raw message budget.
451
+ const maskedTokens = result.indexTokenCountMap[2] ?? 0;
452
+ const rawBudget = Math.round(maxTokens / (result.calibrationRatio ?? 1));
453
+ expect(maskedTokens).toBeLessThanOrEqual(rawBudget);
454
+ });
455
+
456
+ it('does NOT mask when pressure < 0.8', () => {
457
+ const tcId = 'tc_search';
458
+ const bigResult = 'R'.repeat(2000);
459
+ const messages: BaseMessage[] = [
460
+ new HumanMessage('Search for info'),
461
+ aiToolCall(tcId, 'search'),
462
+ toolMsg(bigResult, 'search', tcId),
463
+ new AIMessage('Based on the results, here is the answer.'),
464
+ ];
465
+
466
+ const indexTokenCountMap: Record<string, number | undefined> = {};
467
+ for (let i = 0; i < messages.length; i++) {
468
+ indexTokenCountMap[i] = charCounter(messages[i]);
469
+ }
470
+
471
+ const pruneMessages = createPruneMessages({
472
+ maxTokens: 50000,
473
+ startIndex: messages.length,
474
+ tokenCounter: charCounter,
475
+ indexTokenCountMap,
476
+ reserveRatio: 0,
477
+ });
478
+
479
+ const result = pruneMessages({ messages });
480
+
481
+ // No masking at low pressure — original token count preserved
482
+ expect(result.indexTokenCountMap[2]).toBe(charCounter(messages[2]));
483
+ });
484
+ });
485
+
486
+ describe('Token accounting pipeline — pruning drops oldest messages', () => {
487
+ it('preserves system message and most recent messages when budget exceeded', () => {
488
+ const sys = new SystemMessage('System prompt');
489
+ const messages: BaseMessage[] = [sys];
490
+ for (let i = 0; i < 10; i++) {
491
+ messages.push(new HumanMessage(`User message ${i}`));
492
+ messages.push(new AIMessage(`Assistant reply ${i}`));
493
+ }
494
+
495
+ const indexTokenCountMap: Record<string, number | undefined> = {};
496
+ let sum = 0;
497
+ for (let i = 0; i < messages.length; i++) {
498
+ indexTokenCountMap[i] = charCounter(messages[i]);
499
+ sum += indexTokenCountMap[i] ?? 0;
500
+ }
501
+
502
+ // Budget only allows ~half the messages
503
+ const pruneMessages = createPruneMessages({
504
+ maxTokens: Math.round(sum * 0.5),
505
+ startIndex: messages.length,
506
+ tokenCounter: charCounter,
507
+ indexTokenCountMap,
508
+ reserveRatio: 0,
509
+ });
510
+
511
+ const result = pruneMessages({ messages });
512
+
513
+ // System message should always be preserved
514
+ expect(result.context[0].content).toBe('System prompt');
515
+ // Should have fewer messages than original
516
+ expect(result.context.length).toBeLessThan(messages.length);
517
+ expect(result.context.length).toBeGreaterThan(1);
518
+
519
+ // Last message in result should be from near the end of the original
520
+ const lastResult = result.context[result.context.length - 1];
521
+ const lastOriginal = messages[messages.length - 1];
522
+ expect(lastResult.content).toBe(lastOriginal.content);
523
+ });
524
+
525
+ it('produces messagesToRefine when summarization is enabled', () => {
526
+ const messages: BaseMessage[] = [];
527
+ for (let i = 0; i < 10; i++) {
528
+ messages.push(new HumanMessage(`User ${i}: ${'x'.repeat(50)}`));
529
+ messages.push(new AIMessage(`Bot ${i}: ${'y'.repeat(50)}`));
530
+ }
531
+
532
+ const indexTokenCountMap: Record<string, number | undefined> = {};
533
+ let sum = 0;
534
+ for (let i = 0; i < messages.length; i++) {
535
+ indexTokenCountMap[i] = charCounter(messages[i]);
536
+ sum += indexTokenCountMap[i] ?? 0;
537
+ }
538
+
539
+ const pruneMessages = createPruneMessages({
540
+ maxTokens: Math.round(sum * 0.4),
541
+ startIndex: messages.length,
542
+ tokenCounter: charCounter,
543
+ indexTokenCountMap,
544
+ summarizationEnabled: true,
545
+ reserveRatio: 0,
546
+ });
547
+
548
+ const result = pruneMessages({ messages });
549
+
550
+ // With summarization enabled, pruned messages go to messagesToRefine
551
+ expect(result.messagesToRefine).toBeDefined();
552
+ expect(result.messagesToRefine!.length).toBeGreaterThan(0);
553
+ // messagesToRefine + context should account for all messages
554
+ expect(
555
+ result.context.length + result.messagesToRefine!.length
556
+ ).toBeGreaterThanOrEqual(messages.length);
557
+ });
558
+ });
559
+
560
+ describe('Token accounting pipeline — end-to-end multi-turn with calibration', () => {
561
+ it('simulates a 4-turn conversation with growing context and calibration each turn', () => {
562
+ const logs: Array<{ turn: number; message: string; data: unknown }> = [];
563
+ const log = (
564
+ _level: string,
565
+ message: string,
566
+ data?: Record<string, unknown>
567
+ ): void => {
568
+ logs.push({ turn: logs.length, message, data });
569
+ };
570
+
571
+ const systemMsg = new SystemMessage('You are helpful.');
572
+ const firstHuman = new HumanMessage('Hello, how are you?');
573
+ const conversationHistory: BaseMessage[] = [systemMsg, firstHuman];
574
+
575
+ // Pre-count system and first human message (as the real system does)
576
+ const indexTokenCountMap: Record<string, number | undefined> = {
577
+ 0: charCounter(systemMsg),
578
+ 1: charCounter(firstHuman),
579
+ };
580
+
581
+ // startIndex=2: system(0) + human(1) are pre-existing, AI response(2) is new
582
+ const pruneMessages = createPruneMessages({
583
+ maxTokens: 600,
584
+ startIndex: 2,
585
+ tokenCounter: charCounter,
586
+ indexTokenCountMap,
587
+ reserveRatio: 0.05,
588
+ log,
589
+ });
590
+
591
+ // The closure returns a reference to its internal map. We use it to
592
+ // pre-count human messages on subsequent turns (matching real behavior).
593
+ let liveMap: Record<string, number | undefined> = indexTokenCountMap;
594
+
595
+ const simulateTurn = (
596
+ humanText: string,
597
+ aiText: string,
598
+ usage: Partial<UsageMetadata>,
599
+ skipHumanPush = false
600
+ ): ReturnType<ReturnType<typeof createPruneMessages>> => {
601
+ if (!skipHumanPush) {
602
+ const humanMsg = new HumanMessage(humanText);
603
+ conversationHistory.push(humanMsg);
604
+ const humanIdx = conversationHistory.length - 1;
605
+ liveMap[humanIdx] = charCounter(humanMsg);
606
+ }
607
+
608
+ conversationHistory.push(new AIMessage(aiText));
609
+
610
+ const result = pruneMessages({
611
+ messages: conversationHistory,
612
+ usageMetadata: usage,
613
+ });
614
+ liveMap = result.indexTokenCountMap;
615
+ return result;
616
+ };
617
+
618
+ // --- Turn 1: human already pushed, only AI response is new ---
619
+ const turn1 = simulateTurn(
620
+ '',
621
+ 'I am fine, thank you for asking!',
622
+ { input_tokens: 30, output_tokens: 15 },
623
+ true
624
+ );
625
+ expect(turn1.context.length).toBe(3);
626
+ // AI response (index 2) should get output_tokens
627
+ expect(turn1.indexTokenCountMap[2]).toBe(15);
628
+
629
+ // --- Turn 2 ---
630
+ const turn2 = simulateTurn(
631
+ 'Can you explain quantum computing in detail?',
632
+ 'Quantum computing uses qubits that can exist in superposition. ' +
633
+ 'This allows quantum computers to process many possibilities simultaneously.',
634
+ { input_tokens: 80, output_tokens: 50 }
635
+ );
636
+ expect(turn2.context.length).toBe(5);
637
+ // AI response (index 4) gets tokenCounter count (not output_tokens) since
638
+ // the human message at lastTurnStartIndex was pre-counted.
639
+ expect(turn2.indexTokenCountMap[4]).toBeDefined();
640
+ expect(turn2.indexTokenCountMap[4] as number).toBeGreaterThan(0);
641
+
642
+ // --- Turn 3 ---
643
+ const turn3 = simulateTurn(
644
+ 'What about quantum entanglement?',
645
+ 'Quantum entanglement is a phenomenon where particles become correlated ' +
646
+ 'such that the quantum state of one instantly influences the other, ' +
647
+ 'regardless of distance. Einstein called it spooky action at a distance.',
648
+ { input_tokens: 200, output_tokens: 80 }
649
+ );
650
+ expect(turn3.indexTokenCountMap[6]).toBeDefined();
651
+ expect(turn3.indexTokenCountMap[6] as number).toBeGreaterThan(0);
652
+
653
+ // --- Turn 4: push past budget to trigger pruning ---
654
+ const turn4 = simulateTurn(
655
+ 'Tell me about ' + 'quantum '.repeat(30) + 'physics.',
656
+ 'A'.repeat(200),
657
+ { input_tokens: 500, output_tokens: 120 }
658
+ );
659
+
660
+ expect(turn4.context.length).toBeLessThan(conversationHistory.length);
661
+ expect(turn4.context.length).toBeGreaterThan(1);
662
+
663
+ // All returned indices should have token counts
664
+ for (let i = 0; i < conversationHistory.length; i++) {
665
+ expect(turn4.indexTokenCountMap[i]).toBeDefined();
666
+ }
667
+
668
+ // Verify variance logs were emitted
669
+ const varianceLogs = logs.filter(
670
+ (l) => l.message === 'Calibration observed'
671
+ );
672
+ expect(varianceLogs.length).toBeGreaterThanOrEqual(4);
673
+ });
674
+
675
+ it('calibration + observation masking + pruning produce consistent accounting', () => {
676
+ const tcId = 'tc_big';
677
+ const bigToolResult = 'D'.repeat(3000);
678
+
679
+ const messages: BaseMessage[] = [
680
+ new SystemMessage('Assistant'),
681
+ new HumanMessage('Search for data'),
682
+ aiToolCall(tcId, 'search'),
683
+ toolMsg(bigToolResult, 'search', tcId),
684
+ new AIMessage('Here is what I found from the search results.'),
685
+ new HumanMessage('Thanks, now summarize it'),
686
+ new AIMessage('The data shows important patterns in the results.'),
687
+ ];
688
+
689
+ const indexTokenCountMap: Record<string, number | undefined> = {};
690
+ let sum = 0;
691
+ for (let i = 0; i < messages.length; i++) {
692
+ indexTokenCountMap[i] = charCounter(messages[i]);
693
+ sum += indexTokenCountMap[i] ?? 0;
694
+ }
695
+
696
+ // Pressure ~90% to trigger both masking and context pressure fading
697
+ const maxTokens = Math.round(sum / 0.9);
698
+
699
+ const pruneMessages = createPruneMessages({
700
+ maxTokens,
701
+ startIndex: messages.length,
702
+ tokenCounter: charCounter,
703
+ indexTokenCountMap,
704
+ reserveRatio: 0,
705
+ });
706
+
707
+ const result = pruneMessages({ messages });
708
+
709
+ // Tool result at index 3 should have been masked (consumed by AI at index 4)
710
+ const toolTokensAfter = result.indexTokenCountMap[3] ?? 0;
711
+ expect(toolTokensAfter).toBeLessThan(3000);
712
+
713
+ // The final token sum should be within the pruning budget
714
+ let finalSum = 0;
715
+ for (let i = 0; i < result.context.length; i++) {
716
+ const origIdx = messages.indexOf(result.context[i]);
717
+ if (origIdx >= 0) {
718
+ finalSum += result.indexTokenCountMap[origIdx] ?? 0;
719
+ }
720
+ }
721
+ expect(finalSum).toBeLessThanOrEqual(maxTokens);
722
+
723
+ // Context pressure should have been computed
724
+ expect(result.contextPressure).toBeDefined();
725
+ expect(result.contextPressure as number).toBeGreaterThan(0);
726
+ });
727
+ });
728
+
729
+ describe('Token accounting pipeline — Anthropic vs OpenAI cache semantics', () => {
730
+ it('Anthropic additive cache inflates calibration input correctly', () => {
731
+ const messages = [new HumanMessage('Hello'), new AIMessage('Hi')];
732
+
733
+ const indexTokenCountMap: Record<string, number | undefined> = {
734
+ 0: 50,
735
+ 1: 50,
736
+ };
737
+
738
+ const pruneMessages = createPruneMessages({
739
+ provider: Providers.ANTHROPIC,
740
+ maxTokens: 10000,
741
+ startIndex: messages.length,
742
+ tokenCounter: charCounter,
743
+ indexTokenCountMap,
744
+ reserveRatio: 0,
745
+ });
746
+
747
+ // Anthropic: input_tokens=30, cache_read=60, cache_creation=20
748
+ // cacheSum=80 > baseInput=30 → additive → totalInput=30+80=110
749
+ const result = pruneMessages({
750
+ messages,
751
+ usageMetadata: {
752
+ input_tokens: 30,
753
+ output_tokens: 15,
754
+ input_token_details: { cache_read: 60, cache_creation: 20 },
755
+ },
756
+ });
757
+
758
+ // Map stays raw — calibrationRatio = 110 / 100 = 1.1
759
+ expect(result.indexTokenCountMap[0]).toBe(50);
760
+ expect(result.indexTokenCountMap[1]).toBe(50);
761
+ expect(result.calibrationRatio).toBeCloseTo(1.1, 1);
762
+ });
763
+
764
+ it('OpenAI inclusive cache does NOT inflate calibration input', () => {
765
+ const messages = [new HumanMessage('Hello'), new AIMessage('Hi')];
766
+
767
+ const indexTokenCountMap: Record<string, number | undefined> = {
768
+ 0: 50,
769
+ 1: 50,
770
+ };
771
+
772
+ const pruneMessages = createPruneMessages({
773
+ provider: Providers.OPENAI,
774
+ maxTokens: 10000,
775
+ startIndex: messages.length,
776
+ tokenCounter: charCounter,
777
+ indexTokenCountMap,
778
+ reserveRatio: 0,
779
+ });
780
+
781
+ // OpenAI: input_tokens=100, cache_read=40 — cacheSum=40 <= baseInput=100
782
+ const result = pruneMessages({
783
+ messages,
784
+ usageMetadata: {
785
+ input_tokens: 100,
786
+ output_tokens: 20,
787
+ input_token_details: { cache_read: 40 },
788
+ },
789
+ });
790
+
791
+ // ratio = 100 / 100 = 1.0 — no change
792
+ expect(result.indexTokenCountMap[0]).toBe(50);
793
+ expect(result.indexTokenCountMap[1]).toBe(50);
794
+ });
795
+ });
796
+
797
+ describe('Token accounting pipeline — instruction-budget short-circuit', () => {
798
+ it('yields all messages for summarization when instructions consume entire budget', () => {
799
+ const messages = [
800
+ new HumanMessage('First question'),
801
+ new AIMessage('First answer'),
802
+ new HumanMessage('Second question'),
803
+ new AIMessage('Second answer'),
804
+ ];
805
+
806
+ const indexTokenCountMap: Record<string, number | undefined> = {};
807
+ for (let i = 0; i < messages.length; i++) {
808
+ indexTokenCountMap[i] = charCounter(messages[i]);
809
+ }
810
+
811
+ // Budget 100, instruction overhead 100 → effectiveMaxTokens = 0
812
+ const pruneMessages = createPruneMessages({
813
+ maxTokens: 100,
814
+ startIndex: messages.length,
815
+ tokenCounter: charCounter,
816
+ indexTokenCountMap,
817
+ summarizationEnabled: true,
818
+ reserveRatio: 0,
819
+ getInstructionTokens: () => 100,
820
+ });
821
+
822
+ const result = pruneMessages({ messages });
823
+
824
+ expect(result.context).toHaveLength(0);
825
+ expect(result.messagesToRefine).toHaveLength(4);
826
+ expect(result.remainingContextTokens).toBe(0);
827
+ });
828
+
829
+ it('does NOT short-circuit when summarization is disabled', () => {
830
+ const messages = [
831
+ new HumanMessage('First question'),
832
+ new AIMessage('First answer'),
833
+ ];
834
+
835
+ const indexTokenCountMap: Record<string, number | undefined> = {};
836
+ for (let i = 0; i < messages.length; i++) {
837
+ indexTokenCountMap[i] = charCounter(messages[i]);
838
+ }
839
+
840
+ const pruneMessages = createPruneMessages({
841
+ maxTokens: 100,
842
+ startIndex: messages.length,
843
+ tokenCounter: charCounter,
844
+ indexTokenCountMap,
845
+ reserveRatio: 0,
846
+ getInstructionTokens: () => 100,
847
+ });
848
+
849
+ const result = pruneMessages({ messages });
850
+
851
+ // Without summarization, the pruner goes through normal/emergency path
852
+ // instead of the short-circuit — messagesToRefine may be empty
853
+ expect(
854
+ result.context.length + (result.messagesToRefine?.length ?? 0)
855
+ ).toBeGreaterThanOrEqual(0);
856
+ });
857
+
858
+ it('does NOT short-circuit when effectiveMaxTokens > 0', () => {
859
+ const messages = [new HumanMessage('Short'), new AIMessage('Reply')];
860
+
861
+ const indexTokenCountMap: Record<string, number | undefined> = {};
862
+ for (let i = 0; i < messages.length; i++) {
863
+ indexTokenCountMap[i] = charCounter(messages[i]);
864
+ }
865
+
866
+ const pruneMessages = createPruneMessages({
867
+ maxTokens: 200,
868
+ startIndex: messages.length,
869
+ tokenCounter: charCounter,
870
+ indexTokenCountMap,
871
+ summarizationEnabled: true,
872
+ reserveRatio: 0,
873
+ getInstructionTokens: () => 50,
874
+ });
875
+
876
+ const result = pruneMessages({ messages });
877
+
878
+ // effectiveMaxTokens = 150, messages fit → normal early return
879
+ expect(result.context.length).toBe(2);
880
+ expect(result.messagesToRefine).toHaveLength(0);
881
+ });
882
+ });