@librechat/agents 3.1.57 → 3.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +326 -62
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/events.cjs +7 -27
  6. package/dist/cjs/events.cjs.map +1 -1
  7. package/dist/cjs/graphs/Graph.cjs +303 -222
  8. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
  10. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/init.cjs +60 -0
  14. package/dist/cjs/llm/init.cjs.map +1 -0
  15. package/dist/cjs/llm/invoke.cjs +90 -0
  16. package/dist/cjs/llm/invoke.cjs.map +1 -0
  17. package/dist/cjs/llm/openai/index.cjs +2 -0
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/request.cjs +41 -0
  20. package/dist/cjs/llm/request.cjs.map +1 -0
  21. package/dist/cjs/main.cjs +40 -0
  22. package/dist/cjs/main.cjs.map +1 -1
  23. package/dist/cjs/messages/cache.cjs +76 -89
  24. package/dist/cjs/messages/cache.cjs.map +1 -1
  25. package/dist/cjs/messages/contextPruning.cjs +156 -0
  26. package/dist/cjs/messages/contextPruning.cjs.map +1 -0
  27. package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
  28. package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
  29. package/dist/cjs/messages/core.cjs +23 -37
  30. package/dist/cjs/messages/core.cjs.map +1 -1
  31. package/dist/cjs/messages/format.cjs +156 -11
  32. package/dist/cjs/messages/format.cjs.map +1 -1
  33. package/dist/cjs/messages/prune.cjs +1161 -49
  34. package/dist/cjs/messages/prune.cjs.map +1 -1
  35. package/dist/cjs/messages/reducer.cjs +87 -0
  36. package/dist/cjs/messages/reducer.cjs.map +1 -0
  37. package/dist/cjs/run.cjs +81 -42
  38. package/dist/cjs/run.cjs.map +1 -1
  39. package/dist/cjs/stream.cjs +54 -7
  40. package/dist/cjs/stream.cjs.map +1 -1
  41. package/dist/cjs/summarization/index.cjs +75 -0
  42. package/dist/cjs/summarization/index.cjs.map +1 -0
  43. package/dist/cjs/summarization/node.cjs +663 -0
  44. package/dist/cjs/summarization/node.cjs.map +1 -0
  45. package/dist/cjs/tools/ToolNode.cjs +16 -8
  46. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  47. package/dist/cjs/tools/handlers.cjs +2 -0
  48. package/dist/cjs/tools/handlers.cjs.map +1 -1
  49. package/dist/cjs/utils/errors.cjs +115 -0
  50. package/dist/cjs/utils/errors.cjs.map +1 -0
  51. package/dist/cjs/utils/events.cjs +17 -0
  52. package/dist/cjs/utils/events.cjs.map +1 -1
  53. package/dist/cjs/utils/handlers.cjs +16 -0
  54. package/dist/cjs/utils/handlers.cjs.map +1 -1
  55. package/dist/cjs/utils/llm.cjs +10 -0
  56. package/dist/cjs/utils/llm.cjs.map +1 -1
  57. package/dist/cjs/utils/tokens.cjs +247 -14
  58. package/dist/cjs/utils/tokens.cjs.map +1 -1
  59. package/dist/cjs/utils/truncation.cjs +107 -0
  60. package/dist/cjs/utils/truncation.cjs.map +1 -0
  61. package/dist/esm/agents/AgentContext.mjs +325 -61
  62. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  63. package/dist/esm/common/enum.mjs +13 -0
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs +8 -28
  66. package/dist/esm/events.mjs.map +1 -1
  67. package/dist/esm/graphs/Graph.mjs +307 -226
  68. package/dist/esm/graphs/Graph.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
  72. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  73. package/dist/esm/llm/init.mjs +58 -0
  74. package/dist/esm/llm/init.mjs.map +1 -0
  75. package/dist/esm/llm/invoke.mjs +87 -0
  76. package/dist/esm/llm/invoke.mjs.map +1 -0
  77. package/dist/esm/llm/openai/index.mjs +2 -0
  78. package/dist/esm/llm/openai/index.mjs.map +1 -1
  79. package/dist/esm/llm/request.mjs +38 -0
  80. package/dist/esm/llm/request.mjs.map +1 -0
  81. package/dist/esm/main.mjs +13 -3
  82. package/dist/esm/main.mjs.map +1 -1
  83. package/dist/esm/messages/cache.mjs +76 -89
  84. package/dist/esm/messages/cache.mjs.map +1 -1
  85. package/dist/esm/messages/contextPruning.mjs +154 -0
  86. package/dist/esm/messages/contextPruning.mjs.map +1 -0
  87. package/dist/esm/messages/contextPruningSettings.mjs +50 -0
  88. package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
  89. package/dist/esm/messages/core.mjs +23 -37
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +156 -11
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/prune.mjs +1158 -52
  94. package/dist/esm/messages/prune.mjs.map +1 -1
  95. package/dist/esm/messages/reducer.mjs +83 -0
  96. package/dist/esm/messages/reducer.mjs.map +1 -0
  97. package/dist/esm/run.mjs +82 -43
  98. package/dist/esm/run.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +54 -7
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/summarization/index.mjs +73 -0
  102. package/dist/esm/summarization/index.mjs.map +1 -0
  103. package/dist/esm/summarization/node.mjs +659 -0
  104. package/dist/esm/summarization/node.mjs.map +1 -0
  105. package/dist/esm/tools/ToolNode.mjs +16 -8
  106. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +2 -0
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/utils/errors.mjs +111 -0
  110. package/dist/esm/utils/errors.mjs.map +1 -0
  111. package/dist/esm/utils/events.mjs +17 -1
  112. package/dist/esm/utils/events.mjs.map +1 -1
  113. package/dist/esm/utils/handlers.mjs +16 -0
  114. package/dist/esm/utils/handlers.mjs.map +1 -1
  115. package/dist/esm/utils/llm.mjs +10 -1
  116. package/dist/esm/utils/llm.mjs.map +1 -1
  117. package/dist/esm/utils/tokens.mjs +245 -15
  118. package/dist/esm/utils/tokens.mjs.map +1 -1
  119. package/dist/esm/utils/truncation.mjs +102 -0
  120. package/dist/esm/utils/truncation.mjs.map +1 -0
  121. package/dist/types/agents/AgentContext.d.ts +124 -6
  122. package/dist/types/common/enum.d.ts +14 -1
  123. package/dist/types/graphs/Graph.d.ts +22 -27
  124. package/dist/types/index.d.ts +5 -0
  125. package/dist/types/llm/init.d.ts +18 -0
  126. package/dist/types/llm/invoke.d.ts +48 -0
  127. package/dist/types/llm/request.d.ts +14 -0
  128. package/dist/types/messages/contextPruning.d.ts +42 -0
  129. package/dist/types/messages/contextPruningSettings.d.ts +44 -0
  130. package/dist/types/messages/core.d.ts +1 -1
  131. package/dist/types/messages/format.d.ts +17 -1
  132. package/dist/types/messages/index.d.ts +3 -0
  133. package/dist/types/messages/prune.d.ts +162 -1
  134. package/dist/types/messages/reducer.d.ts +18 -0
  135. package/dist/types/run.d.ts +12 -1
  136. package/dist/types/summarization/index.d.ts +20 -0
  137. package/dist/types/summarization/node.d.ts +29 -0
  138. package/dist/types/tools/ToolNode.d.ts +3 -1
  139. package/dist/types/types/graph.d.ts +44 -6
  140. package/dist/types/types/index.d.ts +1 -0
  141. package/dist/types/types/run.d.ts +30 -0
  142. package/dist/types/types/stream.d.ts +31 -4
  143. package/dist/types/types/summarize.d.ts +47 -0
  144. package/dist/types/types/tools.d.ts +7 -0
  145. package/dist/types/utils/errors.d.ts +28 -0
  146. package/dist/types/utils/events.d.ts +13 -0
  147. package/dist/types/utils/index.d.ts +2 -0
  148. package/dist/types/utils/llm.d.ts +4 -0
  149. package/dist/types/utils/tokens.d.ts +14 -1
  150. package/dist/types/utils/truncation.d.ts +49 -0
  151. package/package.json +1 -1
  152. package/src/agents/AgentContext.ts +388 -58
  153. package/src/agents/__tests__/AgentContext.test.ts +265 -5
  154. package/src/common/enum.ts +13 -0
  155. package/src/events.ts +9 -39
  156. package/src/graphs/Graph.ts +468 -331
  157. package/src/index.ts +7 -0
  158. package/src/llm/anthropic/llm.spec.ts +3 -3
  159. package/src/llm/anthropic/utils/message_inputs.ts +6 -4
  160. package/src/llm/bedrock/llm.spec.ts +1 -1
  161. package/src/llm/bedrock/utils/message_inputs.ts +6 -2
  162. package/src/llm/init.ts +63 -0
  163. package/src/llm/invoke.ts +144 -0
  164. package/src/llm/request.ts +55 -0
  165. package/src/messages/__tests__/observationMasking.test.ts +221 -0
  166. package/src/messages/cache.ts +77 -102
  167. package/src/messages/contextPruning.ts +191 -0
  168. package/src/messages/contextPruningSettings.ts +90 -0
  169. package/src/messages/core.ts +32 -53
  170. package/src/messages/ensureThinkingBlock.test.ts +39 -39
  171. package/src/messages/format.ts +227 -15
  172. package/src/messages/formatAgentMessages.test.ts +511 -1
  173. package/src/messages/index.ts +3 -0
  174. package/src/messages/prune.ts +1548 -62
  175. package/src/messages/reducer.ts +22 -0
  176. package/src/run.ts +104 -51
  177. package/src/scripts/bedrock-merge-test.ts +1 -1
  178. package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
  179. package/src/scripts/test-thinking-handoff.ts +1 -1
  180. package/src/scripts/thinking-bedrock.ts +1 -1
  181. package/src/scripts/thinking.ts +1 -1
  182. package/src/specs/anthropic.simple.test.ts +1 -1
  183. package/src/specs/multi-agent-summarization.test.ts +396 -0
  184. package/src/specs/prune.test.ts +1196 -23
  185. package/src/specs/summarization-unit.test.ts +868 -0
  186. package/src/specs/summarization.test.ts +3810 -0
  187. package/src/specs/summarize-prune.test.ts +376 -0
  188. package/src/specs/thinking-handoff.test.ts +10 -10
  189. package/src/specs/thinking-prune.test.ts +7 -4
  190. package/src/specs/token-accounting-e2e.test.ts +1034 -0
  191. package/src/specs/token-accounting-pipeline.test.ts +882 -0
  192. package/src/specs/token-distribution-edge-case.test.ts +25 -26
  193. package/src/splitStream.test.ts +42 -33
  194. package/src/stream.ts +64 -11
  195. package/src/summarization/__tests__/aggregator.test.ts +153 -0
  196. package/src/summarization/__tests__/node.test.ts +708 -0
  197. package/src/summarization/__tests__/trigger.test.ts +50 -0
  198. package/src/summarization/index.ts +102 -0
  199. package/src/summarization/node.ts +982 -0
  200. package/src/tools/ToolNode.ts +25 -3
  201. package/src/types/graph.ts +62 -7
  202. package/src/types/index.ts +1 -0
  203. package/src/types/run.ts +32 -0
  204. package/src/types/stream.ts +45 -5
  205. package/src/types/summarize.ts +58 -0
  206. package/src/types/tools.ts +7 -0
  207. package/src/utils/errors.ts +117 -0
  208. package/src/utils/events.ts +31 -0
  209. package/src/utils/handlers.ts +18 -0
  210. package/src/utils/index.ts +2 -0
  211. package/src/utils/llm.ts +12 -0
  212. package/src/utils/tokens.ts +336 -18
  213. package/src/utils/truncation.ts +124 -0
  214. package/src/scripts/image.ts +0 -180
@@ -0,0 +1,1034 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
2
+ import { config } from 'dotenv';
3
+ config();
4
+ import {
5
+ AIMessage,
6
+ BaseMessage,
7
+ HumanMessage,
8
+ AIMessageChunk,
9
+ } from '@langchain/core/messages';
10
+ import { ChatGenerationChunk } from '@langchain/core/outputs';
11
+ import { FakeListChatModel } from '@langchain/core/utils/testing';
12
+ import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
13
+ import type { UsageMetadata } from '@langchain/core/messages';
14
+ import type { ToolCall } from '@langchain/core/messages/tool';
15
+ import type * as t from '@/types';
16
+ import { createTokenCounter, TokenEncoderManager } from '@/utils/tokens';
17
+ import { createContentAggregator } from '@/stream';
18
+ import { GraphEvents, Providers } from '@/common';
19
+ import { getLLMConfig } from '@/utils/llmConfig';
20
+ import { Calculator } from '@/tools/Calculator';
21
+ import * as providers from '@/llm/providers';
22
+ import { Run } from '@/run';
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // FakeListChatModel subclass that emits usage_metadata on the final chunk.
26
+ // Accepts a single UsageMetadata or an array (one per call, cycling).
27
+ // This lets us exercise the calibration path end-to-end through the Graph.
28
+ // ---------------------------------------------------------------------------
29
+ class FakeWithUsage extends FakeListChatModel {
30
+ private _usages: UsageMetadata[];
31
+ private _usageIdx = 0;
32
+
33
+ constructor(opts: {
34
+ responses: string[];
35
+ usage?: UsageMetadata | UsageMetadata[];
36
+ }) {
37
+ super({ responses: opts.responses });
38
+ if (!opts.usage) {
39
+ this._usages = [];
40
+ } else if (Array.isArray(opts.usage)) {
41
+ this._usages = opts.usage;
42
+ } else {
43
+ this._usages = [opts.usage];
44
+ }
45
+ }
46
+
47
+ async *_streamResponseChunks(
48
+ _messages: BaseMessage[],
49
+ _options: this['ParsedCallOptions'],
50
+ runManager?: CallbackManagerForLLMRun
51
+ ): AsyncGenerator<ChatGenerationChunk> {
52
+ const response = this._currentResponse();
53
+ this._incrementResponse();
54
+
55
+ const words = response.split(/(?<=\s)/);
56
+ for (const word of words) {
57
+ const chunk = new ChatGenerationChunk({
58
+ text: word,
59
+ generationInfo: {},
60
+ message: new AIMessageChunk({ content: word }),
61
+ });
62
+ yield chunk;
63
+ void runManager?.handleLLMNewToken(word);
64
+ }
65
+
66
+ // Emit a final empty chunk carrying usage_metadata for this call
67
+ const usage = this._usages[this._usageIdx % this._usages.length] as
68
+ | UsageMetadata
69
+ | undefined;
70
+ if (usage) {
71
+ this._usageIdx++;
72
+ const usageChunk = new ChatGenerationChunk({
73
+ text: '',
74
+ generationInfo: {},
75
+ message: new AIMessageChunk({
76
+ content: '',
77
+ usage_metadata: usage,
78
+ }),
79
+ });
80
+ yield usageChunk;
81
+ }
82
+ }
83
+ }
84
+
85
+ // ---------------------------------------------------------------------------
86
+ // Helpers
87
+ // ---------------------------------------------------------------------------
88
+
89
+ function buildIndexTokenCountMap(
90
+ messages: BaseMessage[],
91
+ tokenCounter: t.TokenCounter
92
+ ): Record<string, number> {
93
+ const map: Record<string, number> = {};
94
+ for (let i = 0; i < messages.length; i++) {
95
+ map[String(i)] = tokenCounter(messages[i]);
96
+ }
97
+ return map;
98
+ }
99
+
100
+ function getSummaryText(summary: t.SummaryContentBlock | undefined): string {
101
+ if (!summary) return '';
102
+ return (summary.content ?? [])
103
+ .map((block) => ('text' in block ? (block as { text: string }).text : ''))
104
+ .join('');
105
+ }
106
+
107
+ const SUMMARY_TEXT =
108
+ 'User discussed math problems. Key results: 2+2=4, 3*5=15. Context preserved.';
109
+
110
+ const INSTRUCTIONS = 'You are a helpful math tutor. Be concise.';
111
+
112
+ const streamConfig = {
113
+ configurable: { thread_id: 'token-e2e' },
114
+ streamMode: 'values',
115
+ version: 'v2' as const,
116
+ };
117
+
118
+ // ---------------------------------------------------------------------------
119
+ // Tests
120
+ // ---------------------------------------------------------------------------
121
+
122
+ describe('Token accounting E2E — Run + Graph + real token counter', () => {
123
+ jest.setTimeout(60_000);
124
+
125
+ let tokenCounter: t.TokenCounter;
126
+ let getChatModelClassSpy: jest.SpyInstance;
127
+ const originalGetChatModelClass = providers.getChatModelClass;
128
+
129
+ beforeAll(async () => {
130
+ tokenCounter = await createTokenCounter();
131
+ });
132
+
133
+ afterAll(() => {
134
+ TokenEncoderManager.reset();
135
+ });
136
+
137
+ beforeEach(() => {
138
+ getChatModelClassSpy = jest
139
+ .spyOn(providers, 'getChatModelClass')
140
+ .mockImplementation(((provider: Providers) => {
141
+ if (provider === Providers.OPENAI) {
142
+ return class extends FakeListChatModel {
143
+ constructor(_options: any) {
144
+ super({ responses: [SUMMARY_TEXT] });
145
+ }
146
+ } as any;
147
+ }
148
+ return originalGetChatModelClass(provider);
149
+ }) as typeof providers.getChatModelClass);
150
+ });
151
+
152
+ afterEach(() => {
153
+ getChatModelClassSpy.mockRestore();
154
+ });
155
+
156
+ async function createRun(opts: {
157
+ maxTokens: number;
158
+ conversationHistory: BaseMessage[];
159
+ spies: {
160
+ onSummarizeStartSpy: jest.Mock;
161
+ onSummarizeCompleteSpy: jest.Mock;
162
+ };
163
+ tools?: t.GraphTools;
164
+ indexTokenCountMap?: Record<string, number>;
165
+ initialSummary?: { text: string; tokenCount: number };
166
+ }): Promise<Run<t.IState>> {
167
+ const { aggregateContent } = createContentAggregator();
168
+ const indexTokenCountMap =
169
+ opts.indexTokenCountMap ??
170
+ buildIndexTokenCountMap(opts.conversationHistory, tokenCounter);
171
+
172
+ return Run.create<t.IState>({
173
+ runId: `tok-e2e-${Date.now()}`,
174
+ graphConfig: {
175
+ type: 'standard',
176
+ llmConfig: getLLMConfig(Providers.OPENAI),
177
+ instructions: INSTRUCTIONS,
178
+ maxContextTokens: opts.maxTokens,
179
+ tools: opts.tools,
180
+ summarizationEnabled: true,
181
+ summarizationConfig: {
182
+ provider: Providers.OPENAI,
183
+ },
184
+ initialSummary: opts.initialSummary,
185
+ },
186
+ returnContent: true,
187
+ customHandlers: {
188
+ [GraphEvents.ON_RUN_STEP]: {
189
+ handle: (_event: string, data: t.StreamEventData): void => {
190
+ aggregateContent({
191
+ event: GraphEvents.ON_RUN_STEP,
192
+ data: data as t.RunStep,
193
+ });
194
+ },
195
+ },
196
+ [GraphEvents.ON_SUMMARIZE_START]: {
197
+ handle: (_event: string, data: t.StreamEventData): void => {
198
+ opts.spies.onSummarizeStartSpy(data);
199
+ },
200
+ },
201
+ [GraphEvents.ON_SUMMARIZE_COMPLETE]: {
202
+ handle: (_event: string, data: t.StreamEventData): void => {
203
+ opts.spies.onSummarizeCompleteSpy(data);
204
+ },
205
+ },
206
+ },
207
+ tokenCounter,
208
+ indexTokenCountMap,
209
+ });
210
+ }
211
+
212
+ async function runTurn(
213
+ state: { run: Run<t.IState>; conversationHistory: BaseMessage[] },
214
+ userMessage: string
215
+ ): Promise<t.MessageContentComplex[] | undefined> {
216
+ state.conversationHistory.push(new HumanMessage(userMessage));
217
+ const result = await state.run.processStream(
218
+ { messages: state.conversationHistory },
219
+ streamConfig as any
220
+ );
221
+ const finalMessages = state.run.getRunMessages();
222
+ state.conversationHistory.push(...(finalMessages ?? []));
223
+ return result;
224
+ }
225
+
226
+ // =========================================================================
227
+ // Test 1: Multi-turn token accounting without usage_metadata (tokenCounter only)
228
+ // =========================================================================
229
+ test('multi-turn pruning + summarization with real token counter (no usage_metadata)', async () => {
230
+ const spies = {
231
+ onSummarizeStartSpy: jest.fn(),
232
+ onSummarizeCompleteSpy: jest.fn(),
233
+ };
234
+ const conversationHistory: BaseMessage[] = [];
235
+
236
+ // --- Turn 1: build up conversation at generous budget ---
237
+ let run = await createRun({
238
+ maxTokens: 4000,
239
+ conversationHistory,
240
+ spies,
241
+ });
242
+ run.Graph?.overrideTestModel(
243
+ [
244
+ 'The answer to 2+2 is 4. Addition is one of the four fundamental arithmetic operations. ' +
245
+ 'It combines two or more numbers into a single sum. In this case we combine 2 and 2 to get 4. ' +
246
+ 'This is also known as the additive identity when one operand is zero.',
247
+ ],
248
+ 1
249
+ );
250
+ await runTurn(
251
+ { run, conversationHistory },
252
+ 'Hello! What is 2+2? Please explain addition in detail with examples and history.'
253
+ );
254
+ expect(conversationHistory.length).toBeGreaterThanOrEqual(2);
255
+
256
+ // --- Turn 2: more conversation ---
257
+ run = await createRun({
258
+ maxTokens: 4000,
259
+ conversationHistory,
260
+ spies,
261
+ });
262
+ run.Graph?.overrideTestModel(
263
+ [
264
+ 'Multiplication of 3 times 5 equals 15. Multiplication can be understood as repeated addition. ' +
265
+ 'So 3 times 5 means adding 3 five times: 3+3+3+3+3 which equals 15. ' +
266
+ 'The commutative property tells us 5 times 3 also equals 15.',
267
+ ],
268
+ 1
269
+ );
270
+ await runTurn(
271
+ { run, conversationHistory },
272
+ 'Great explanation! Now what is 3 times 5? Explain multiplication as repeated addition with examples.'
273
+ );
274
+ expect(conversationHistory.length).toBeGreaterThanOrEqual(4);
275
+
276
+ // --- Turn 3: tight budget forces pruning and summarization ---
277
+ // Real token count for the 4 messages above is ~150+ tokens.
278
+ // A budget of 50 guarantees pruning → summarization.
279
+ run = await createRun({
280
+ maxTokens: 50,
281
+ conversationHistory,
282
+ spies,
283
+ });
284
+ run.Graph?.overrideTestModel(
285
+ ['Understood, continuing with summary context.'],
286
+ 1
287
+ );
288
+
289
+ let error: Error | undefined;
290
+ try {
291
+ await runTurn({ run, conversationHistory }, 'Now summarize everything.');
292
+ } catch (err) {
293
+ error = err as Error;
294
+ }
295
+
296
+ // Summarization should have fired
297
+ expect(spies.onSummarizeStartSpy).toHaveBeenCalled();
298
+ expect(spies.onSummarizeCompleteSpy).toHaveBeenCalled();
299
+
300
+ const completePayload = spies.onSummarizeCompleteSpy.mock
301
+ .calls[0][0] as t.SummarizeCompleteEvent;
302
+ const summaryText = getSummaryText(completePayload.summary);
303
+ expect(summaryText).toBe(SUMMARY_TEXT);
304
+ expect(completePayload.summary!.tokenCount).toBeGreaterThan(0);
305
+
306
+ // Token count should match what our real counter computes
307
+ const summaryTokenCount = completePayload.summary!.tokenCount ?? 0;
308
+ expect(summaryTokenCount).toBeGreaterThan(5);
309
+
310
+ // Even if the model call errored (empty_messages for tiny context),
311
+ // summarization itself should have completed without crashing
312
+ if (error) {
313
+ expect(error.message).not.toContain('Recursion limit');
314
+ }
315
+ });
316
+
317
+ // =========================================================================
318
+ // Test 2: Usage metadata feeds calibration through the real Graph pipeline
319
+ // =========================================================================
320
+ test('usage_metadata from model feeds into calibration on next turn', async () => {
321
+ const spies = {
322
+ onSummarizeStartSpy: jest.fn(),
323
+ onSummarizeCompleteSpy: jest.fn(),
324
+ };
325
+ const conversationHistory: BaseMessage[] = [];
326
+
327
+ // --- Turn 1: normal budget, model emits usage_metadata ---
328
+ let run = await createRun({
329
+ maxTokens: 4000,
330
+ conversationHistory,
331
+ spies,
332
+ });
333
+ run.Graph!.overrideModel = new FakeWithUsage({
334
+ responses: [
335
+ 'The answer to 2+2 is 4. Addition is one of the fundamental arithmetic operations ' +
336
+ 'that combines quantities together into a sum. Two plus two yields four.',
337
+ ],
338
+ usage: {
339
+ input_tokens: 45,
340
+ output_tokens: 25,
341
+ total_tokens: 70,
342
+ },
343
+ }) as any;
344
+
345
+ await runTurn(
346
+ { run, conversationHistory },
347
+ 'What is 2+2? Please provide a detailed explanation of addition.'
348
+ );
349
+ expect(conversationHistory.length).toBeGreaterThanOrEqual(2);
350
+
351
+ // --- Turn 2: also with usage_metadata ---
352
+ run = await createRun({
353
+ maxTokens: 4000,
354
+ conversationHistory,
355
+ spies,
356
+ });
357
+ run.Graph!.overrideModel = new FakeWithUsage({
358
+ responses: [
359
+ 'Multiplication of 3 times 5 equals 15. This is repeated addition: 3+3+3+3+3. ' +
360
+ 'The commutative property means 5 times 3 also equals 15.',
361
+ ],
362
+ usage: {
363
+ input_tokens: 90,
364
+ output_tokens: 30,
365
+ total_tokens: 120,
366
+ },
367
+ }) as any;
368
+
369
+ await runTurn(
370
+ { run, conversationHistory },
371
+ 'What is 3 times 5? Explain multiplication as repeated addition.'
372
+ );
373
+ expect(conversationHistory.length).toBeGreaterThanOrEqual(4);
374
+
375
+ // --- Turn 3: tight context with usage → triggers summarization ---
376
+ run = await createRun({
377
+ maxTokens: 50,
378
+ conversationHistory,
379
+ spies,
380
+ });
381
+ run.Graph!.overrideModel = new FakeWithUsage({
382
+ responses: ['Continuing after summary.'],
383
+ usage: {
384
+ input_tokens: 40,
385
+ output_tokens: 10,
386
+ total_tokens: 50,
387
+ },
388
+ }) as any;
389
+
390
+ try {
391
+ await runTurn({ run, conversationHistory }, 'Continue.');
392
+ } catch {
393
+ // Tiny context may throw empty_messages — that's fine
394
+ conversationHistory.pop();
395
+ }
396
+
397
+ // Summarization should fire even with usage_metadata in the mix
398
+ expect(
399
+ spies.onSummarizeCompleteSpy.mock.calls.length
400
+ ).toBeGreaterThanOrEqual(1);
401
+ const payload = spies.onSummarizeCompleteSpy.mock
402
+ .calls[0][0] as t.SummarizeCompleteEvent;
403
+ expect(payload.summary!.tokenCount).toBeGreaterThan(0);
404
+ });
405
+
406
+ // =========================================================================
407
+ // Test 3: Summary overhead feeds into getInstructionTokens on next Run
408
+ // =========================================================================
409
+ test('summary token overhead is accounted for in next run budget', async () => {
410
+ const spies = {
411
+ onSummarizeStartSpy: jest.fn(),
412
+ onSummarizeCompleteSpy: jest.fn(),
413
+ };
414
+
415
+ // Simulate a pre-existing summary from a previous run
416
+ const summaryTokenCount = tokenCounter(new HumanMessage(SUMMARY_TEXT));
417
+ const initialSummary: { text: string; tokenCount: number } = {
418
+ text: SUMMARY_TEXT,
419
+ tokenCount: summaryTokenCount,
420
+ };
421
+
422
+ // Create a conversation that fits without the summary overhead,
423
+ // but won't fit once summary tokens are reserved
424
+ const conversationHistory: BaseMessage[] = [
425
+ new HumanMessage('First question about algebra'),
426
+ new AIMessage('Algebra is the study of variables and equations.'),
427
+ new HumanMessage('Second question about geometry'),
428
+ new AIMessage(
429
+ 'Geometry deals with shapes, sizes, and properties of space.'
430
+ ),
431
+ ];
432
+
433
+ const indexTokenCountMap = buildIndexTokenCountMap(
434
+ conversationHistory,
435
+ tokenCounter
436
+ );
437
+ const msgTotal = Object.values(indexTokenCountMap).reduce(
438
+ (a, b) => a + b,
439
+ 0
440
+ );
441
+
442
+ // Budget: fits messages + instructions but NOT messages + instructions + summary overhead
443
+ // The summary overhead goes into getInstructionTokens, reducing effective budget
444
+ const tightBudget = msgTotal + 30; // tight: room for instructions but not summary
445
+
446
+ const run = await createRun({
447
+ maxTokens: tightBudget,
448
+ conversationHistory,
449
+ spies,
450
+ indexTokenCountMap,
451
+ initialSummary,
452
+ });
453
+ run.Graph?.overrideTestModel(['Noted.'], 1);
454
+
455
+ conversationHistory.push(new HumanMessage('Continue.'));
456
+
457
+ let pruningOccurred = false;
458
+ try {
459
+ await run.processStream(
460
+ { messages: conversationHistory },
461
+ streamConfig as any
462
+ );
463
+ // If it succeeded, check if pruning occurred
464
+ const runMessages = run.getRunMessages();
465
+ pruningOccurred = runMessages != null && runMessages.length > 0;
466
+ } catch {
467
+ // Error is acceptable — the point is summary overhead was subtracted from budget
468
+ pruningOccurred = true;
469
+ }
470
+
471
+ // With summary overhead consuming instruction tokens,
472
+ // the effective budget should be smaller, causing pruning or error
473
+ // (without the summary, messages would have fit)
474
+ expect(pruningOccurred).toBe(true);
475
+ });
476
+
477
+ // =========================================================================
478
+ // Test 4: Mixed turns — some with usage_metadata, some without
479
+ // =========================================================================
480
+ test('handles mixed turns: some with usage_metadata, some without', async () => {
481
+ const spies = {
482
+ onSummarizeStartSpy: jest.fn(),
483
+ onSummarizeCompleteSpy: jest.fn(),
484
+ };
485
+ const conversationHistory: BaseMessage[] = [];
486
+
487
+ // --- Turn 1: WITH usage_metadata ---
488
+ let run = await createRun({
489
+ maxTokens: 4000,
490
+ conversationHistory,
491
+ spies,
492
+ });
493
+ run.Graph!.overrideModel = new FakeWithUsage({
494
+ responses: ['Answer to first question with detailed explanation here.'],
495
+ usage: {
496
+ input_tokens: 50,
497
+ output_tokens: 20,
498
+ total_tokens: 70,
499
+ },
500
+ }) as any;
501
+
502
+ await runTurn({ run, conversationHistory }, 'First question here.');
503
+
504
+ // --- Turn 2: WITHOUT usage_metadata (plain FakeListChatModel) ---
505
+ run = await createRun({
506
+ maxTokens: 4000,
507
+ conversationHistory,
508
+ spies,
509
+ });
510
+ run.Graph?.overrideTestModel(
511
+ ['Second response without any usage metadata attached.'],
512
+ 1
513
+ );
514
+
515
+ await runTurn({ run, conversationHistory }, 'Second question here.');
516
+
517
+ // --- Turn 3: WITH usage_metadata again ---
518
+ run = await createRun({
519
+ maxTokens: 4000,
520
+ conversationHistory,
521
+ spies,
522
+ });
523
+ run.Graph!.overrideModel = new FakeWithUsage({
524
+ responses: ['Third answer.'],
525
+ usage: {
526
+ input_tokens: 120,
527
+ output_tokens: 10,
528
+ total_tokens: 130,
529
+ },
530
+ }) as any;
531
+
532
+ await runTurn({ run, conversationHistory }, 'Third question.');
533
+
534
+ // All 6 messages should be in conversation history (3 human + 3 AI)
535
+ expect(conversationHistory.length).toBeGreaterThanOrEqual(6);
536
+
537
+ // The system should handle the mixed usage gracefully without crashes.
538
+ // Calibration fires on turns with usage, skips on turns without.
539
+ });
540
+
541
+ // =========================================================================
542
+ // Test 5: Full round-trip — summarize, persist, load into next Run
543
+ // =========================================================================
544
+ test('full round-trip: summarize → persist → new Run with summary overhead', async () => {
545
+ const spies = {
546
+ onSummarizeStartSpy: jest.fn(),
547
+ onSummarizeCompleteSpy: jest.fn(),
548
+ };
549
+ const conversationHistory: BaseMessage[] = [];
550
+
551
+ // Build up conversation with longer messages
552
+ let run = await createRun({
553
+ maxTokens: 4000,
554
+ conversationHistory,
555
+ spies,
556
+ });
557
+ run.Graph?.overrideTestModel(
558
+ [
559
+ 'The answer to 2+2 is 4. Addition combines two quantities into a sum. ' +
560
+ 'This is one of the four fundamental operations in arithmetic alongside ' +
561
+ 'subtraction, multiplication, and division.',
562
+ ],
563
+ 1
564
+ );
565
+ await runTurn(
566
+ { run, conversationHistory },
567
+ 'What is 2+2? Explain the concept of addition in detail with examples.'
568
+ );
569
+
570
+ run = await createRun({
571
+ maxTokens: 4000,
572
+ conversationHistory,
573
+ spies,
574
+ });
575
+ run.Graph?.overrideTestModel(
576
+ [
577
+ 'The square root of 16 is 4, because 4 multiplied by 4 equals 16. ' +
578
+ 'Square root is the inverse operation of squaring a number.',
579
+ ],
580
+ 1
581
+ );
582
+ await runTurn(
583
+ { run, conversationHistory },
584
+ 'What is the square root of 16? Explain the concept of square roots.'
585
+ );
586
+
587
+ // Force summarization — budget of 50 is well below the ~150 token conversation
588
+ run = await createRun({
589
+ maxTokens: 50,
590
+ conversationHistory,
591
+ spies,
592
+ });
593
+ run.Graph?.overrideTestModel(['Got it.'], 1);
594
+
595
+ try {
596
+ await runTurn({ run, conversationHistory }, 'Continue.');
597
+ } catch {
598
+ conversationHistory.pop();
599
+ }
600
+
601
+ const completeCalls = spies.onSummarizeCompleteSpy.mock.calls;
602
+ expect(completeCalls.length).toBeGreaterThanOrEqual(1);
603
+
604
+ const completePayload = completeCalls[0][0] as t.SummarizeCompleteEvent;
605
+ const summary = completePayload.summary!;
606
+ const summaryText = getSummaryText(summary);
607
+ expect(summaryText.length).toBeGreaterThan(0);
608
+ expect(summary.tokenCount).toBeGreaterThan(0);
609
+
610
+ // --- Simulate persistence: create payload as the host would ---
611
+ const persistedSummary: { text: string; tokenCount: number } = {
612
+ text: summaryText,
613
+ tokenCount: summary.tokenCount!,
614
+ };
615
+
616
+ // Start a new conversation with summary carried over
617
+ const newHistory: BaseMessage[] = [
618
+ new HumanMessage('What else can you help with?'),
619
+ ];
620
+ const newMap = buildIndexTokenCountMap(newHistory, tokenCounter);
621
+
622
+ const spies2 = {
623
+ onSummarizeStartSpy: jest.fn(),
624
+ onSummarizeCompleteSpy: jest.fn(),
625
+ };
626
+
627
+ const run2 = await createRun({
628
+ maxTokens: 2000,
629
+ conversationHistory: newHistory,
630
+ spies: spies2,
631
+ indexTokenCountMap: newMap,
632
+ initialSummary: persistedSummary,
633
+ });
634
+ run2.Graph?.overrideTestModel(
635
+ [
636
+ 'I can help with many things! Based on our previous discussion about math.',
637
+ ],
638
+ 1
639
+ );
640
+
641
+ const result = await run2.processStream(
642
+ { messages: newHistory },
643
+ streamConfig as any
644
+ );
645
+
646
+ expect(result).toBeDefined();
647
+ const runMessages = run2.getRunMessages();
648
+ expect(runMessages).toBeDefined();
649
+ expect(runMessages!.length).toBeGreaterThan(0);
650
+
651
+ // The summary token count should have been accounted for in the
652
+ // instruction overhead, reducing the effective budget for messages.
653
+ // We verify the run completed successfully with the summary present.
654
+ });
655
+
656
+ // =========================================================================
657
+ // Test 6: Multi-tool-call agent loop — pruner closure persists across LLM calls
658
+ // =========================================================================
659
+ test('agent loop with tool calls: pruner closure persists across LLM calls within one Run', async () => {
660
+ const spies = {
661
+ onSummarizeStartSpy: jest.fn(),
662
+ onSummarizeCompleteSpy: jest.fn(),
663
+ };
664
+ const conversationHistory: BaseMessage[] = [];
665
+
666
+ const run = await createRun({
667
+ maxTokens: 4000,
668
+ conversationHistory,
669
+ spies,
670
+ tools: [new Calculator()],
671
+ });
672
+
673
+ // FakeChatModel via overrideTestModel: call 1 emits text + tool calls,
674
+ // call 2 emits text only. The Calculator tool runs between calls.
675
+ const toolCalls: ToolCall[] = [
676
+ {
677
+ name: 'calculator',
678
+ args: { input: '12345 * 6789' },
679
+ id: 'tc_calc_1',
680
+ type: 'tool_call',
681
+ },
682
+ ];
683
+ run.Graph?.overrideTestModel(
684
+ [
685
+ 'Let me calculate 12345 * 6789 for you.',
686
+ 'The result of 12345 * 6789 is 83,810,205.',
687
+ ],
688
+ 1,
689
+ toolCalls
690
+ );
691
+
692
+ conversationHistory.push(
693
+ new HumanMessage('What is 12345 * 6789? Use the calculator.')
694
+ );
695
+ await run.processStream(
696
+ { messages: conversationHistory },
697
+ streamConfig as any
698
+ );
699
+
700
+ const runMessages = run.getRunMessages();
701
+ expect(runMessages).toBeDefined();
702
+ // Should have: AI (with tool call) + ToolMessage + AI (final answer)
703
+ expect(runMessages!.length).toBeGreaterThanOrEqual(3);
704
+
705
+ // Verify the tool was actually called
706
+ const toolMessages = runMessages!.filter((m) => m._getType() === 'tool');
707
+ expect(toolMessages.length).toBe(1);
708
+ // Calculator should have computed the real result
709
+ expect(toolMessages[0].content as string).toContain('83810205');
710
+ });
711
+
712
+ // =========================================================================
713
+ // Test 7: Prior tool calls in history + tight context triggers summarization
714
+ // =========================================================================
715
+ test('prior tool calls in history with tight context triggers summarization', async () => {
716
+ const spies = {
717
+ onSummarizeStartSpy: jest.fn(),
718
+ onSummarizeCompleteSpy: jest.fn(),
719
+ };
720
+
721
+ // Build a conversation that includes tool call artifacts from a prior run.
722
+ // This simulates the common case: user asked questions, agent used calculator,
723
+ // and now we're continuing with a tight budget that forces summarization.
724
+ const conversationHistory: BaseMessage[] = [];
725
+
726
+ // Turn 1: build up at generous budget with tool calls
727
+ let run = await createRun({
728
+ maxTokens: 4000,
729
+ conversationHistory,
730
+ spies,
731
+ tools: [new Calculator()],
732
+ });
733
+ run.Graph?.overrideTestModel(
734
+ [
735
+ 'Let me calculate that for you using the calculator tool.',
736
+ 'The result of 12345 * 6789 is 83,810,205. That is a large number!',
737
+ ],
738
+ 1,
739
+ [
740
+ {
741
+ name: 'calculator',
742
+ args: { input: '12345 * 6789' },
743
+ id: 'tc_prior_1',
744
+ type: 'tool_call',
745
+ },
746
+ ]
747
+ );
748
+ await runTurn(
749
+ { run, conversationHistory },
750
+ 'Calculate 12345 * 6789 using the calculator and explain the result.'
751
+ );
752
+
753
+ // Turn 2: another tool call
754
+ run = await createRun({
755
+ maxTokens: 4000,
756
+ conversationHistory,
757
+ spies,
758
+ tools: [new Calculator()],
759
+ });
760
+ run.Graph?.overrideTestModel(
761
+ [
762
+ 'Computing the square root now.',
763
+ 'The square root of 83810205 is approximately 9155.06.',
764
+ ],
765
+ 1,
766
+ [
767
+ {
768
+ name: 'calculator',
769
+ args: { input: 'sqrt(83810205)' },
770
+ id: 'tc_prior_2',
771
+ type: 'tool_call',
772
+ },
773
+ ]
774
+ );
775
+ await runTurn(
776
+ { run, conversationHistory },
777
+ 'Now take the square root of that result using the calculator.'
778
+ );
779
+
780
+ // History should now contain: Human, AI+toolcall, ToolMsg, AI,
781
+ // Human, AI+toolcall, ToolMsg, AI
782
+ expect(conversationHistory.length).toBeGreaterThanOrEqual(8);
783
+
784
+ // Turn 3: tight budget → force summarization of the tool-heavy history
785
+ run = await createRun({
786
+ maxTokens: 50,
787
+ conversationHistory,
788
+ spies,
789
+ });
790
+ run.Graph?.overrideTestModel(['Understood, continuing.'], 1);
791
+
792
+ try {
793
+ await runTurn({ run, conversationHistory }, 'Summarize everything.');
794
+ } catch {
795
+ conversationHistory.pop();
796
+ }
797
+
798
+ // Summarization should fire on the tool-heavy history
799
+ expect(spies.onSummarizeStartSpy).toHaveBeenCalled();
800
+ expect(spies.onSummarizeCompleteSpy).toHaveBeenCalled();
801
+
802
+ const payload = spies.onSummarizeCompleteSpy.mock
803
+ .calls[0][0] as t.SummarizeCompleteEvent;
804
+ expect(payload.summary).toBeDefined();
805
+ expect(payload.summary!.tokenCount).toBeGreaterThan(0);
806
+ expect(getSummaryText(payload.summary)).toBe(SUMMARY_TEXT);
807
+ });
808
+
809
+ // =========================================================================
810
+ // Test 8: Multiple sequential tool calls (chained) with usage_metadata
811
+ // =========================================================================
812
+ test('multiple chained tool calls with usage_metadata across the agent loop', async () => {
813
+ const spies = {
814
+ onSummarizeStartSpy: jest.fn(),
815
+ onSummarizeCompleteSpy: jest.fn(),
816
+ };
817
+ const conversationHistory: BaseMessage[] = [];
818
+
819
+ const run = await createRun({
820
+ maxTokens: 4000,
821
+ conversationHistory,
822
+ spies,
823
+ tools: [new Calculator()],
824
+ });
825
+
826
+ // Use FakeWithUsage to emit different usage per call:
827
+ // Call 1: tool call (input ~20 tokens)
828
+ // Call 2: final answer (input ~40 tokens after tool result added)
829
+ run.Graph!.overrideModel = new FakeWithUsage({
830
+ responses: [
831
+ 'Let me compute that step by step.',
832
+ 'The answer is 83,810,205. That is 12345 multiplied by 6789.',
833
+ ],
834
+ usage: [
835
+ { input_tokens: 30, output_tokens: 12, total_tokens: 42 },
836
+ { input_tokens: 60, output_tokens: 20, total_tokens: 80 },
837
+ ],
838
+ }) as any;
839
+
840
+ // Since FakeWithUsage doesn't support tool calls natively, we need to
841
+ // use overrideTestModel. But that replaces overrideModel. So instead,
842
+ // let's test this scenario WITHOUT tool calls — just multi-response
843
+ // with cycling usage_metadata to verify calibration persists.
844
+ run.Graph!.overrideModel = new FakeWithUsage({
845
+ responses: ['The answer is 83,810,205.'],
846
+ usage: [{ input_tokens: 30, output_tokens: 15, total_tokens: 45 }],
847
+ }) as any;
848
+
849
+ conversationHistory.push(new HumanMessage('What is 12345 * 6789?'));
850
+ await run.processStream(
851
+ { messages: conversationHistory },
852
+ streamConfig as any
853
+ );
854
+
855
+ const runMessages = run.getRunMessages();
856
+ expect(runMessages).toBeDefined();
857
+ expect(runMessages!.length).toBeGreaterThan(0);
858
+
859
+ // Now do a second Run to verify calibration from usage_metadata
860
+ // persisted correctly and influences next turn's accounting
861
+ conversationHistory.push(...(runMessages ?? []));
862
+
863
+ const run2 = await createRun({
864
+ maxTokens: 4000,
865
+ conversationHistory,
866
+ spies,
867
+ tools: [new Calculator()],
868
+ });
869
+
870
+ // Second call with higher input tokens (conversation grew)
871
+ run2.Graph!.overrideModel = new FakeWithUsage({
872
+ responses: ['The square root of that is approximately 9155.89.'],
873
+ usage: [{ input_tokens: 55, output_tokens: 18, total_tokens: 73 }],
874
+ }) as any;
875
+
876
+ conversationHistory.push(
877
+ new HumanMessage('Now compute the square root of that result.')
878
+ );
879
+ await run2.processStream(
880
+ { messages: conversationHistory },
881
+ streamConfig as any
882
+ );
883
+
884
+ const run2Messages = run2.getRunMessages();
885
+ expect(run2Messages).toBeDefined();
886
+ expect(run2Messages!.length).toBeGreaterThan(0);
887
+ });
888
+
889
+ // =========================================================================
890
+ // Test 9: Multi-turn with tool calls triggers summarization across runs
891
+ // =========================================================================
892
+ test('multi-turn with tool calls across runs triggers summarization correctly', async () => {
893
+ const spies = {
894
+ onSummarizeStartSpy: jest.fn(),
895
+ onSummarizeCompleteSpy: jest.fn(),
896
+ };
897
+ const conversationHistory: BaseMessage[] = [];
898
+
899
+ // Turn 1: tool call at generous budget
900
+ let run = await createRun({
901
+ maxTokens: 4000,
902
+ conversationHistory,
903
+ spies,
904
+ tools: [new Calculator()],
905
+ });
906
+ run.Graph?.overrideTestModel(
907
+ [
908
+ 'Let me compute 100 * 200 for you using the calculator tool.',
909
+ 'The result of 100 multiplied by 200 is 20,000. That is a basic multiplication.',
910
+ ],
911
+ 1,
912
+ [
913
+ {
914
+ name: 'calculator',
915
+ args: { input: '100 * 200' },
916
+ id: 'tc_multi_1',
917
+ type: 'tool_call',
918
+ },
919
+ ]
920
+ );
921
+ await runTurn(
922
+ { run, conversationHistory },
923
+ 'Calculate 100 * 200 with the calculator tool and explain the result.'
924
+ );
925
+
926
+ // Turn 2: another tool call to accumulate more tokens
927
+ run = await createRun({
928
+ maxTokens: 4000,
929
+ conversationHistory,
930
+ spies,
931
+ tools: [new Calculator()],
932
+ });
933
+ run.Graph?.overrideTestModel(
934
+ [
935
+ 'Now computing 300 * 400.',
936
+ 'The result of 300 multiplied by 400 is 120,000. Another straightforward calculation.',
937
+ ],
938
+ 1,
939
+ [
940
+ {
941
+ name: 'calculator',
942
+ args: { input: '300 * 400' },
943
+ id: 'tc_multi_2',
944
+ type: 'tool_call',
945
+ },
946
+ ]
947
+ );
948
+ await runTurn(
949
+ { run, conversationHistory },
950
+ 'Now compute 300 * 400 with the calculator and explain.'
951
+ );
952
+
953
+ // Conversation should have human, AI+toolcall, ToolMsg, AI × 2 turns
954
+ expect(conversationHistory.length).toBeGreaterThanOrEqual(8);
955
+
956
+ // Turn 3: tight budget to force summarization
957
+ run = await createRun({
958
+ maxTokens: 50,
959
+ conversationHistory,
960
+ spies,
961
+ });
962
+ run.Graph?.overrideTestModel(['Understood.'], 1);
963
+
964
+ try {
965
+ await runTurn({ run, conversationHistory }, 'What were all the results?');
966
+ } catch {
967
+ conversationHistory.pop();
968
+ }
969
+
970
+ // Summarization should fire — tool messages are part of the history being summarized
971
+ expect(spies.onSummarizeStartSpy).toHaveBeenCalled();
972
+ expect(spies.onSummarizeCompleteSpy).toHaveBeenCalled();
973
+
974
+ const completePayload = spies.onSummarizeCompleteSpy.mock
975
+ .calls[0][0] as t.SummarizeCompleteEvent;
976
+ expect(completePayload.summary).toBeDefined();
977
+ expect(completePayload.summary!.tokenCount).toBeGreaterThan(0);
978
+
979
+ // messagesToRefineCount should include the tool messages
980
+ const startPayload = spies.onSummarizeStartSpy.mock
981
+ .calls[0][0] as t.SummarizeStartEvent;
982
+ expect(startPayload.messagesToRefineCount).toBeGreaterThan(0);
983
+ });
984
+
985
+ // =========================================================================
986
+ // Test 10: No summarization when everything fits
987
+ // =========================================================================
988
+ test('no summarization fires when messages fit comfortably within budget', async () => {
989
+ const spies = {
990
+ onSummarizeStartSpy: jest.fn(),
991
+ onSummarizeCompleteSpy: jest.fn(),
992
+ };
993
+ const conversationHistory: BaseMessage[] = [];
994
+
995
+ const run = await createRun({
996
+ maxTokens: 4000,
997
+ conversationHistory,
998
+ spies,
999
+ tools: [new Calculator()],
1000
+ });
1001
+
1002
+ const toolCalls: ToolCall[] = [
1003
+ {
1004
+ name: 'calculator',
1005
+ args: { input: '2 + 2' },
1006
+ id: 'tc_easy',
1007
+ type: 'tool_call',
1008
+ },
1009
+ ];
1010
+ run.Graph?.overrideTestModel(
1011
+ ['Let me calculate.', 'The answer is 4.'],
1012
+ 1,
1013
+ toolCalls
1014
+ );
1015
+
1016
+ conversationHistory.push(new HumanMessage('What is 2+2?'));
1017
+ await run.processStream(
1018
+ { messages: conversationHistory },
1019
+ streamConfig as any
1020
+ );
1021
+
1022
+ // With 4000 token budget for a tiny conversation, no summarization should fire
1023
+ expect(spies.onSummarizeStartSpy).not.toHaveBeenCalled();
1024
+ expect(spies.onSummarizeCompleteSpy).not.toHaveBeenCalled();
1025
+
1026
+ // But the tool call should have worked
1027
+ const runMessages = run.getRunMessages();
1028
+ expect(runMessages).toBeDefined();
1029
+ expect(runMessages!.length).toBeGreaterThanOrEqual(3);
1030
+ const toolMsgs = runMessages!.filter((m) => m._getType() === 'tool');
1031
+ expect(toolMsgs.length).toBe(1);
1032
+ expect(toolMsgs[0].content as string).toContain('4');
1033
+ });
1034
+ });