@illuma-ai/agents 1.0.96 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +78 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +191 -165
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +22 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/contextPressure.cjs +154 -0
  15. package/dist/cjs/utils/contextPressure.cjs.map +1 -0
  16. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  17. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  18. package/dist/cjs/utils/run.cjs.map +1 -1
  19. package/dist/cjs/utils/tokens.cjs.map +1 -1
  20. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  21. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  22. package/dist/esm/agents/AgentContext.mjs +6 -2
  23. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  24. package/dist/esm/common/constants.mjs +71 -1
  25. package/dist/esm/common/constants.mjs.map +1 -1
  26. package/dist/esm/graphs/Graph.mjs +192 -166
  27. package/dist/esm/graphs/Graph.mjs.map +1 -1
  28. package/dist/esm/main.mjs +5 -1
  29. package/dist/esm/main.mjs.map +1 -1
  30. package/dist/esm/messages/dedup.mjs +93 -0
  31. package/dist/esm/messages/dedup.mjs.map +1 -0
  32. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  33. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  34. package/dist/esm/types/graph.mjs.map +1 -1
  35. package/dist/esm/utils/contextPressure.mjs +148 -0
  36. package/dist/esm/utils/contextPressure.mjs.map +1 -0
  37. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  38. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  39. package/dist/esm/utils/run.mjs.map +1 -1
  40. package/dist/esm/utils/tokens.mjs.map +1 -1
  41. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  42. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  43. package/dist/types/agents/AgentContext.d.ts +4 -1
  44. package/dist/types/common/constants.d.ts +49 -0
  45. package/dist/types/graphs/Graph.d.ts +25 -0
  46. package/dist/types/messages/dedup.d.ts +25 -0
  47. package/dist/types/messages/index.d.ts +1 -0
  48. package/dist/types/types/graph.d.ts +63 -0
  49. package/dist/types/utils/contextPressure.d.ts +72 -0
  50. package/dist/types/utils/index.d.ts +3 -0
  51. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  52. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  53. package/package.json +1 -1
  54. package/src/agents/AgentContext.ts +7 -0
  55. package/src/common/constants.ts +82 -0
  56. package/src/graphs/Graph.ts +254 -208
  57. package/src/graphs/contextManagement.e2e.test.ts +28 -20
  58. package/src/graphs/gapFeatures.test.ts +520 -0
  59. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  60. package/src/messages/__tests__/dedup.test.ts +166 -0
  61. package/src/messages/dedup.ts +104 -0
  62. package/src/messages/index.ts +1 -0
  63. package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
  64. package/src/specs/agent-handoffs.test.ts +36 -36
  65. package/src/specs/thinking-handoff.test.ts +10 -10
  66. package/src/tools/CodeExecutor.ts +22 -3
  67. package/src/types/graph.ts +73 -0
  68. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  69. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  70. package/src/utils/contextPressure.test.ts +262 -0
  71. package/src/utils/contextPressure.ts +188 -0
  72. package/src/utils/index.ts +3 -0
  73. package/src/utils/pruneCalibration.ts +92 -0
  74. package/src/utils/run.ts +108 -108
  75. package/src/utils/tokens.ts +118 -118
  76. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -0,0 +1,520 @@
1
+ /**
2
+ * gapFeatures.test.ts
3
+ *
4
+ * Integration tests for the four LibreChat gap features:
5
+ * 1. Tool Discovery Caching
6
+ * 2. SummarizationConfig (trigger types, initialSummary)
7
+ * 3. EMA Pruning Calibration
8
+ * 4. Message Deduplication
9
+ *
10
+ * These tests verify the features work together in the Graph pipeline
11
+ * without breaking existing functionality.
12
+ */
13
+
14
+ import {
15
+ HumanMessage,
16
+ AIMessage,
17
+ AIMessageChunk,
18
+ SystemMessage,
19
+ ToolMessage,
20
+ BaseMessage,
21
+ } from '@langchain/core/messages';
22
+ import type { TokenCounter } from '@/types/run';
23
+ import type { SummarizationConfig } from '@/types/graph';
24
+ import { createPruneMessages } from '@/messages/prune';
25
+ import { deduplicateSystemMessages } from '@/messages/dedup';
26
+ import { ToolDiscoveryCache } from '@/utils/toolDiscoveryCache';
27
+ import {
28
+ createPruneCalibration,
29
+ updatePruneCalibration,
30
+ applyCalibration,
31
+ } from '@/utils/pruneCalibration';
32
+ import { Constants } from '@/common';
33
+
34
+ const simpleTokenCounter: TokenCounter = (msg: BaseMessage): number => {
35
+ const content =
36
+ typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
37
+ return Math.ceil(content.length / 4);
38
+ };
39
+
40
+ function buildConversation(
41
+ messageCount: number,
42
+ charsPerMsg = 400
43
+ ): BaseMessage[] {
44
+ const messages: BaseMessage[] = [
45
+ new SystemMessage('You are a helpful assistant.'),
46
+ ];
47
+ for (let i = 0; i < messageCount; i++) {
48
+ const text = `Message ${i}: ${'x'.repeat(charsPerMsg - 15)}`;
49
+ messages.push(i % 2 === 0 ? new HumanMessage(text) : new AIMessage(text));
50
+ }
51
+ return messages;
52
+ }
53
+
54
+ // ============================================================================
55
+ // 1. Tool Discovery Caching Integration
56
+ // ============================================================================
57
+
58
+ describe('Tool Discovery Caching — Integration', () => {
59
+ it('caches tool discoveries across multiple pruning iterations', () => {
60
+ const cache = new ToolDiscoveryCache();
61
+ const messages: BaseMessage[] = [
62
+ new SystemMessage('System prompt'),
63
+ new HumanMessage('Find a tool'),
64
+ new AIMessageChunk({
65
+ content: 'Searching',
66
+ tool_calls: [
67
+ { id: 'tc_1', name: Constants.TOOL_SEARCH, args: { query: 'web' } },
68
+ ],
69
+ }),
70
+ new ToolMessage({
71
+ content: 'Found tools',
72
+ tool_call_id: 'tc_1',
73
+ name: Constants.TOOL_SEARCH,
74
+ artifact: { tool_references: [{ tool_name: 'web_search' }] },
75
+ }),
76
+ ];
77
+
78
+ // Iteration 1: discovers web_search
79
+ const disc1 = cache.getNewDiscoveries(messages);
80
+ expect(disc1).toEqual(['web_search']);
81
+
82
+ // Iteration 2: same messages, no new discoveries (cached)
83
+ const disc2 = cache.getNewDiscoveries(messages);
84
+ expect(disc2).toEqual([]);
85
+
86
+ // Iteration 3: new tool search added
87
+ messages.push(
88
+ new AIMessageChunk({
89
+ content: 'More tools',
90
+ tool_calls: [
91
+ { id: 'tc_2', name: Constants.TOOL_SEARCH, args: { query: 'code' } },
92
+ ],
93
+ }),
94
+ new ToolMessage({
95
+ content: 'Found more',
96
+ tool_call_id: 'tc_2',
97
+ name: Constants.TOOL_SEARCH,
98
+ artifact: {
99
+ tool_references: [
100
+ { tool_name: 'code_exec' },
101
+ { tool_name: 'web_search' },
102
+ ],
103
+ },
104
+ })
105
+ );
106
+
107
+ // Only code_exec is new (web_search already cached)
108
+ const disc3 = cache.getNewDiscoveries(messages);
109
+ expect(disc3).toEqual(['code_exec']);
110
+ expect(cache.size).toBe(2);
111
+ });
112
+
113
+ it('seed + incremental discovery simulates cross-turn caching', () => {
114
+ const cache = new ToolDiscoveryCache();
115
+
116
+ // Seed from prior turn's discoveries
117
+ cache.seed(['tool_a', 'tool_b']);
118
+
119
+ const messages: BaseMessage[] = [
120
+ new HumanMessage('Use tool_a and find tool_c'),
121
+ new AIMessageChunk({
122
+ content: 'Searching',
123
+ tool_calls: [{ id: 'tc_1', name: Constants.TOOL_SEARCH, args: {} }],
124
+ }),
125
+ new ToolMessage({
126
+ content: 'Found',
127
+ tool_call_id: 'tc_1',
128
+ name: Constants.TOOL_SEARCH,
129
+ artifact: {
130
+ tool_references: [{ tool_name: 'tool_a' }, { tool_name: 'tool_c' }],
131
+ },
132
+ }),
133
+ ];
134
+
135
+ const newDisc = cache.getNewDiscoveries(messages);
136
+ // tool_a is seeded, only tool_c is new
137
+ expect(newDisc).toEqual(['tool_c']);
138
+ expect(cache.getAllDiscoveredTools()).toEqual(
139
+ expect.arrayContaining(['tool_a', 'tool_b', 'tool_c'])
140
+ );
141
+ });
142
+ });
143
+
144
+ // ============================================================================
145
+ // 2. SummarizationConfig Integration
146
+ // ============================================================================
147
+
148
+ describe('SummarizationConfig — Trigger Logic', () => {
149
+ /**
150
+ * Simulates the Graph's shouldTriggerSummarization logic.
151
+ */
152
+ function shouldTriggerSummarization(
153
+ prunedMessageCount: number,
154
+ maxContextTokens: number,
155
+ indexTokenCountMap: Record<string, number | undefined>,
156
+ instructionTokens: number,
157
+ config?: SummarizationConfig
158
+ ): boolean {
159
+ if (prunedMessageCount === 0) return false;
160
+ if (!config || !config.triggerType) return true;
161
+
162
+ const threshold = config.triggerThreshold;
163
+
164
+ switch (config.triggerType) {
165
+ case 'contextPercentage': {
166
+ if (maxContextTokens <= 0) return true;
167
+ const effectiveThreshold = threshold ?? 80;
168
+ let totalTokens = instructionTokens;
169
+ for (const key in indexTokenCountMap) {
170
+ totalTokens += indexTokenCountMap[key] ?? 0;
171
+ }
172
+ const utilization = (totalTokens / maxContextTokens) * 100;
173
+ return utilization >= effectiveThreshold;
174
+ }
175
+ case 'messageCount': {
176
+ const effectiveThreshold = threshold ?? 5;
177
+ return prunedMessageCount >= effectiveThreshold;
178
+ }
179
+ case 'tokenThreshold': {
180
+ if (threshold == null) return true;
181
+ let totalTokens = instructionTokens;
182
+ for (const key in indexTokenCountMap) {
183
+ totalTokens += indexTokenCountMap[key] ?? 0;
184
+ }
185
+ return totalTokens >= threshold;
186
+ }
187
+ default:
188
+ return true;
189
+ }
190
+ }
191
+
192
+ it('no config = always triggers (backward compatible)', () => {
193
+ expect(shouldTriggerSummarization(3, 10000, {}, 100)).toBe(true);
194
+ });
195
+
196
+ it('contextPercentage: triggers at 80% utilization', () => {
197
+ const tokenMap = { '0': 4000, '1': 3500, '2': 500 };
198
+ // Total = 100 + 8000 = 8100, 8100/10000 = 81% > 80%
199
+ expect(
200
+ shouldTriggerSummarization(2, 10000, tokenMap, 100, {
201
+ triggerType: 'contextPercentage',
202
+ triggerThreshold: 80,
203
+ })
204
+ ).toBe(true);
205
+ });
206
+
207
+ it('contextPercentage: does NOT trigger below threshold', () => {
208
+ const tokenMap = { '0': 2000, '1': 1000 };
209
+ // Total = 100 + 3000 = 3100, 3100/10000 = 31% < 80%
210
+ expect(
211
+ shouldTriggerSummarization(2, 10000, tokenMap, 100, {
212
+ triggerType: 'contextPercentage',
213
+ triggerThreshold: 80,
214
+ })
215
+ ).toBe(false);
216
+ });
217
+
218
+ it('messageCount: triggers when enough messages pruned', () => {
219
+ expect(
220
+ shouldTriggerSummarization(5, 10000, {}, 100, {
221
+ triggerType: 'messageCount',
222
+ triggerThreshold: 5,
223
+ })
224
+ ).toBe(true);
225
+
226
+ expect(
227
+ shouldTriggerSummarization(3, 10000, {}, 100, {
228
+ triggerType: 'messageCount',
229
+ triggerThreshold: 5,
230
+ })
231
+ ).toBe(false);
232
+ });
233
+
234
+ it('tokenThreshold: triggers when total tokens exceed threshold', () => {
235
+ const tokenMap = { '0': 5000, '1': 3000 };
236
+ expect(
237
+ shouldTriggerSummarization(2, 10000, tokenMap, 100, {
238
+ triggerType: 'tokenThreshold',
239
+ triggerThreshold: 8000,
240
+ })
241
+ ).toBe(true);
242
+
243
+ expect(
244
+ shouldTriggerSummarization(2, 10000, tokenMap, 100, {
245
+ triggerType: 'tokenThreshold',
246
+ triggerThreshold: 9000,
247
+ })
248
+ ).toBe(false);
249
+ });
250
+
251
+ it('never triggers with 0 pruned messages', () => {
252
+ expect(shouldTriggerSummarization(0, 10000, {}, 100)).toBe(false);
253
+ expect(
254
+ shouldTriggerSummarization(0, 10000, {}, 100, {
255
+ triggerType: 'messageCount',
256
+ triggerThreshold: 1,
257
+ })
258
+ ).toBe(false);
259
+ });
260
+
261
+ it('initialSummary provides cross-run seeding', () => {
262
+ const config: SummarizationConfig = {
263
+ initialSummary: 'This agent helps with data analysis tasks.',
264
+ };
265
+
266
+ // Simulate the Graph logic: when no cached/persisted summary exists,
267
+ // initialSummary is used as fallback
268
+ let summary: string | undefined;
269
+ const cachedRunSummary: string | null = null;
270
+ const persistedSummary: string | null = null;
271
+
272
+ if (cachedRunSummary != null) {
273
+ summary = cachedRunSummary;
274
+ } else if (persistedSummary != null && persistedSummary !== '') {
275
+ summary = persistedSummary;
276
+ } else if (config.initialSummary != null && config.initialSummary !== '') {
277
+ summary = config.initialSummary;
278
+ }
279
+
280
+ expect(summary).toBe('This agent helps with data analysis tasks.');
281
+ });
282
+ });
283
+
284
+ // ============================================================================
285
+ // 3. EMA Pruning Calibration Integration
286
+ // ============================================================================
287
+
288
+ describe('EMA Pruning Calibration — Integration', () => {
289
+ it('adjusts pruning budget across iterations', () => {
290
+ let calibration = createPruneCalibration();
291
+ const rawBudget = 10000;
292
+
293
+ // Iteration 1: no calibration data → raw budget
294
+ expect(applyCalibration(rawBudget, calibration)).toBe(10000);
295
+
296
+ // Simulate: our counter estimates 8000 tokens but API says 10000
297
+ // (we're under-counting → need to prune more aggressively)
298
+ calibration = updatePruneCalibration(calibration, 10000, 8000);
299
+ const adjusted = applyCalibration(rawBudget, calibration);
300
+ expect(adjusted).toBeLessThan(10000); // More aggressive pruning
301
+ });
302
+
303
+ it('full pruning cycle with calibration', () => {
304
+ let calibration = createPruneCalibration();
305
+ const messages = buildConversation(40);
306
+
307
+ // Iteration 1: uncalibrated
308
+ const maxTokens1 = applyCalibration(200, calibration);
309
+ const prune1 = createPruneMessages({
310
+ startIndex: 0,
311
+ provider: 'anthropic' as any,
312
+ tokenCounter: simpleTokenCounter,
313
+ maxTokens: maxTokens1,
314
+ indexTokenCountMap: {},
315
+ });
316
+ const result1 = prune1({ messages });
317
+ expect(result1.messagesToRefine.length).toBeGreaterThan(0);
318
+
319
+ // Simulate API returning higher token count than our estimate
320
+ calibration = updatePruneCalibration(calibration, 250, 200);
321
+
322
+ // Iteration 2: calibrated (should use adjusted budget)
323
+ const maxTokens2 = applyCalibration(200, calibration);
324
+ expect(maxTokens2).not.toBe(200); // Budget adjusted
325
+
326
+ // Multiple iterations should converge
327
+ for (let i = 0; i < 5; i++) {
328
+ calibration = updatePruneCalibration(calibration, 250, 200);
329
+ }
330
+ const finalBudget = applyCalibration(200, calibration);
331
+ // Should stabilize around 200 * (200/250) ≈ 160
332
+ expect(finalBudget).toBeLessThan(200);
333
+ expect(finalBudget).toBeGreaterThan(100);
334
+ });
335
+ });
336
+
337
+ // ============================================================================
338
+ // 4. Message Deduplication Integration
339
+ // ============================================================================
340
+
341
+ describe('Message Deduplication — Integration', () => {
342
+ it('deduplicates post-prune notes from multiple iterations', () => {
343
+ const postPruneNote = 'Note: Earlier messages have been compressed.';
344
+ const messages: BaseMessage[] = [
345
+ new SystemMessage('Main system prompt'),
346
+ new SystemMessage('[Conversation Summary]\nPrior context'),
347
+ new SystemMessage(postPruneNote), // Iteration 1 post-prune note
348
+ new HumanMessage('Q1'),
349
+ new AIMessage('A1'),
350
+ new SystemMessage(postPruneNote), // Iteration 2 duplicate
351
+ new HumanMessage('Q2'),
352
+ new AIMessage('A2'),
353
+ new SystemMessage(postPruneNote), // Iteration 3 duplicate
354
+ ];
355
+
356
+ const { messages: deduped, removedCount } =
357
+ deduplicateSystemMessages(messages);
358
+ expect(removedCount).toBe(2);
359
+ expect(deduped).toHaveLength(7);
360
+
361
+ // Non-system messages all preserved
362
+ const humanMsgs = deduped.filter((m) => m.getType() === 'human');
363
+ expect(humanMsgs).toHaveLength(2);
364
+ });
365
+
366
+ it('preserves unique system messages including summary', () => {
367
+ const messages: BaseMessage[] = [
368
+ new SystemMessage('Main prompt'),
369
+ new SystemMessage('[Conversation Summary]\nVersion 1'),
370
+ new HumanMessage('Q'),
371
+ new SystemMessage('[Conversation Summary]\nVersion 2 - updated'),
372
+ ];
373
+
374
+ const { messages: deduped, removedCount } =
375
+ deduplicateSystemMessages(messages);
376
+ expect(removedCount).toBe(0);
377
+ expect(deduped).toHaveLength(4);
378
+ });
379
+
380
+ it('works with pruning + dedup pipeline', () => {
381
+ // Simulate: prune messages, inject summary, then dedup
382
+ const allMessages = buildConversation(20);
383
+
384
+ // Step 1: Prune
385
+ const prune = createPruneMessages({
386
+ startIndex: 0,
387
+ provider: 'anthropic' as any,
388
+ tokenCounter: simpleTokenCounter,
389
+ maxTokens: 300,
390
+ indexTokenCountMap: {},
391
+ });
392
+ const { context, messagesToRefine } = prune({ messages: allMessages });
393
+ expect(messagesToRefine.length).toBeGreaterThan(0);
394
+
395
+ // Step 2: Inject summary
396
+ const summaryMsg = new SystemMessage(
397
+ '[Conversation Summary]\nUser discussed tasks'
398
+ );
399
+ const systemIdx = context[0]?.getType() === 'system' ? 1 : 0;
400
+ let withSummary = [
401
+ ...context.slice(0, systemIdx),
402
+ summaryMsg,
403
+ ...context.slice(systemIdx),
404
+ ];
405
+
406
+ // Simulate adding post-prune note
407
+ withSummary = [...withSummary, new SystemMessage('Context was compressed')];
408
+
409
+ // Step 3: Dedup (should not remove anything since all unique)
410
+ const { messages: final, removedCount } =
411
+ deduplicateSystemMessages(withSummary);
412
+ expect(removedCount).toBe(0);
413
+ expect(final.length).toBe(withSummary.length);
414
+ });
415
+ });
416
+
417
+ // ============================================================================
418
+ // Combined Integration
419
+ // ============================================================================
420
+
421
+ describe('All Features Combined — Full Pipeline', () => {
422
+ it('simulates 3-turn conversation with all features active', async () => {
423
+ const toolCache = new ToolDiscoveryCache();
424
+ let calibration = createPruneCalibration();
425
+ let persistedSummary: string | null = null;
426
+ const sumConfig: SummarizationConfig = {
427
+ triggerType: 'contextPercentage',
428
+ triggerThreshold: 50,
429
+ reserveRatio: 0.3,
430
+ };
431
+
432
+ const callback = jest
433
+ .fn()
434
+ .mockImplementation(async (msgs: BaseMessage[]) => {
435
+ const summary = `Summary of ${msgs.length} messages`;
436
+ persistedSummary = summary;
437
+ return summary;
438
+ });
439
+
440
+ for (let turn = 0; turn < 3; turn++) {
441
+ // Build conversation that exceeds budget
442
+ const messages = buildConversation(15);
443
+
444
+ // Tool discovery (turn 1 has tool search results)
445
+ if (turn === 0) {
446
+ messages.push(
447
+ new AIMessageChunk({
448
+ content: 'Searching',
449
+ tool_calls: [
450
+ { id: `tc_${turn}`, name: Constants.TOOL_SEARCH, args: {} },
451
+ ],
452
+ }),
453
+ new ToolMessage({
454
+ content: 'Found',
455
+ tool_call_id: `tc_${turn}`,
456
+ name: Constants.TOOL_SEARCH,
457
+ artifact: { tool_references: [{ tool_name: 'web_search' }] },
458
+ })
459
+ );
460
+ }
461
+
462
+ const discoveries = toolCache.getNewDiscoveries(messages);
463
+ if (turn === 0) {
464
+ expect(discoveries).toEqual(['web_search']);
465
+ } else {
466
+ expect(discoveries).toEqual([]);
467
+ }
468
+
469
+ // Prune with calibration
470
+ const maxTokens = applyCalibration(300, calibration);
471
+ const prune = createPruneMessages({
472
+ startIndex: 0,
473
+ provider: 'anthropic' as any,
474
+ tokenCounter: simpleTokenCounter,
475
+ maxTokens,
476
+ indexTokenCountMap: {},
477
+ });
478
+ const { context, messagesToRefine } = prune({ messages });
479
+
480
+ let assembled = [...context];
481
+
482
+ // Inject summary if available
483
+ if (persistedSummary && messagesToRefine.length > 0) {
484
+ const summaryMsg = new SystemMessage(
485
+ `[Conversation Summary]\n${persistedSummary}`
486
+ );
487
+ const sysIdx = assembled[0]?.getType() === 'system' ? 1 : 0;
488
+ assembled = [
489
+ ...assembled.slice(0, sysIdx),
490
+ summaryMsg,
491
+ ...assembled.slice(sysIdx),
492
+ ];
493
+ }
494
+
495
+ // Fire background summary
496
+ if (messagesToRefine.length > 0) {
497
+ callback(messagesToRefine).catch(() => {});
498
+ await new Promise((r) => setTimeout(r, 10));
499
+ }
500
+
501
+ // Dedup
502
+ const { messages: deduped } = deduplicateSystemMessages(assembled);
503
+ expect(deduped.length).toBeLessThanOrEqual(assembled.length);
504
+
505
+ // Update calibration (simulated API response)
506
+ calibration = updatePruneCalibration(
507
+ calibration,
508
+ maxTokens + 50,
509
+ maxTokens
510
+ );
511
+ }
512
+
513
+ // Verify state after 3 turns
514
+ expect(toolCache.size).toBe(1);
515
+ expect(toolCache.has('web_search')).toBe(true);
516
+ expect(calibration.iterations).toBe(3);
517
+ expect(persistedSummary).toContain('Summary of');
518
+ expect(callback).toHaveBeenCalled();
519
+ });
520
+ });