@illuma-ai/agents 1.0.96 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +6 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/constants.cjs +78 -0
- package/dist/cjs/common/constants.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +191 -165
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +22 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/dedup.cjs +95 -0
- package/dist/cjs/messages/dedup.cjs.map +1 -0
- package/dist/cjs/tools/CodeExecutor.cjs +22 -3
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/types/graph.cjs.map +1 -1
- package/dist/cjs/utils/contextPressure.cjs +154 -0
- package/dist/cjs/utils/contextPressure.cjs.map +1 -0
- package/dist/cjs/utils/pruneCalibration.cjs +78 -0
- package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
- package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +6 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/constants.mjs +71 -1
- package/dist/esm/common/constants.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +192 -166
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +5 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/dedup.mjs +93 -0
- package/dist/esm/messages/dedup.mjs.map +1 -0
- package/dist/esm/tools/CodeExecutor.mjs +22 -3
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/types/graph.mjs.map +1 -1
- package/dist/esm/utils/contextPressure.mjs +148 -0
- package/dist/esm/utils/contextPressure.mjs.map +1 -0
- package/dist/esm/utils/pruneCalibration.mjs +74 -0
- package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
- package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +4 -1
- package/dist/types/common/constants.d.ts +49 -0
- package/dist/types/graphs/Graph.d.ts +25 -0
- package/dist/types/messages/dedup.d.ts +25 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/types/graph.d.ts +63 -0
- package/dist/types/utils/contextPressure.d.ts +72 -0
- package/dist/types/utils/index.d.ts +3 -0
- package/dist/types/utils/pruneCalibration.d.ts +43 -0
- package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +7 -0
- package/src/common/constants.ts +82 -0
- package/src/graphs/Graph.ts +254 -208
- package/src/graphs/contextManagement.e2e.test.ts +28 -20
- package/src/graphs/gapFeatures.test.ts +520 -0
- package/src/graphs/nonBlockingSummarization.test.ts +307 -0
- package/src/messages/__tests__/dedup.test.ts +166 -0
- package/src/messages/dedup.ts +104 -0
- package/src/messages/index.ts +1 -0
- package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
- package/src/specs/agent-handoffs.test.ts +36 -36
- package/src/specs/thinking-handoff.test.ts +10 -10
- package/src/tools/CodeExecutor.ts +22 -3
- package/src/types/graph.ts +73 -0
- package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
- package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
- package/src/utils/contextPressure.test.ts +262 -0
- package/src/utils/contextPressure.ts +188 -0
- package/src/utils/index.ts +3 -0
- package/src/utils/pruneCalibration.ts +92 -0
- package/src/utils/run.ts +108 -108
- package/src/utils/tokens.ts +118 -118
- package/src/utils/toolDiscoveryCache.ts +150 -0
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* gapFeatures.test.ts
|
|
3
|
+
*
|
|
4
|
+
* Integration tests for the four LibreChat gap features:
|
|
5
|
+
* 1. Tool Discovery Caching
|
|
6
|
+
* 2. SummarizationConfig (trigger types, initialSummary)
|
|
7
|
+
* 3. EMA Pruning Calibration
|
|
8
|
+
* 4. Message Deduplication
|
|
9
|
+
*
|
|
10
|
+
* These tests verify the features work together in the Graph pipeline
|
|
11
|
+
* without breaking existing functionality.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import {
|
|
15
|
+
HumanMessage,
|
|
16
|
+
AIMessage,
|
|
17
|
+
AIMessageChunk,
|
|
18
|
+
SystemMessage,
|
|
19
|
+
ToolMessage,
|
|
20
|
+
BaseMessage,
|
|
21
|
+
} from '@langchain/core/messages';
|
|
22
|
+
import type { TokenCounter } from '@/types/run';
|
|
23
|
+
import type { SummarizationConfig } from '@/types/graph';
|
|
24
|
+
import { createPruneMessages } from '@/messages/prune';
|
|
25
|
+
import { deduplicateSystemMessages } from '@/messages/dedup';
|
|
26
|
+
import { ToolDiscoveryCache } from '@/utils/toolDiscoveryCache';
|
|
27
|
+
import {
|
|
28
|
+
createPruneCalibration,
|
|
29
|
+
updatePruneCalibration,
|
|
30
|
+
applyCalibration,
|
|
31
|
+
} from '@/utils/pruneCalibration';
|
|
32
|
+
import { Constants } from '@/common';
|
|
33
|
+
|
|
34
|
+
const simpleTokenCounter: TokenCounter = (msg: BaseMessage): number => {
|
|
35
|
+
const content =
|
|
36
|
+
typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
|
|
37
|
+
return Math.ceil(content.length / 4);
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
function buildConversation(
|
|
41
|
+
messageCount: number,
|
|
42
|
+
charsPerMsg = 400
|
|
43
|
+
): BaseMessage[] {
|
|
44
|
+
const messages: BaseMessage[] = [
|
|
45
|
+
new SystemMessage('You are a helpful assistant.'),
|
|
46
|
+
];
|
|
47
|
+
for (let i = 0; i < messageCount; i++) {
|
|
48
|
+
const text = `Message ${i}: ${'x'.repeat(charsPerMsg - 15)}`;
|
|
49
|
+
messages.push(i % 2 === 0 ? new HumanMessage(text) : new AIMessage(text));
|
|
50
|
+
}
|
|
51
|
+
return messages;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ============================================================================
|
|
55
|
+
// 1. Tool Discovery Caching Integration
|
|
56
|
+
// ============================================================================
|
|
57
|
+
|
|
58
|
+
describe('Tool Discovery Caching — Integration', () => {
|
|
59
|
+
it('caches tool discoveries across multiple pruning iterations', () => {
|
|
60
|
+
const cache = new ToolDiscoveryCache();
|
|
61
|
+
const messages: BaseMessage[] = [
|
|
62
|
+
new SystemMessage('System prompt'),
|
|
63
|
+
new HumanMessage('Find a tool'),
|
|
64
|
+
new AIMessageChunk({
|
|
65
|
+
content: 'Searching',
|
|
66
|
+
tool_calls: [
|
|
67
|
+
{ id: 'tc_1', name: Constants.TOOL_SEARCH, args: { query: 'web' } },
|
|
68
|
+
],
|
|
69
|
+
}),
|
|
70
|
+
new ToolMessage({
|
|
71
|
+
content: 'Found tools',
|
|
72
|
+
tool_call_id: 'tc_1',
|
|
73
|
+
name: Constants.TOOL_SEARCH,
|
|
74
|
+
artifact: { tool_references: [{ tool_name: 'web_search' }] },
|
|
75
|
+
}),
|
|
76
|
+
];
|
|
77
|
+
|
|
78
|
+
// Iteration 1: discovers web_search
|
|
79
|
+
const disc1 = cache.getNewDiscoveries(messages);
|
|
80
|
+
expect(disc1).toEqual(['web_search']);
|
|
81
|
+
|
|
82
|
+
// Iteration 2: same messages, no new discoveries (cached)
|
|
83
|
+
const disc2 = cache.getNewDiscoveries(messages);
|
|
84
|
+
expect(disc2).toEqual([]);
|
|
85
|
+
|
|
86
|
+
// Iteration 3: new tool search added
|
|
87
|
+
messages.push(
|
|
88
|
+
new AIMessageChunk({
|
|
89
|
+
content: 'More tools',
|
|
90
|
+
tool_calls: [
|
|
91
|
+
{ id: 'tc_2', name: Constants.TOOL_SEARCH, args: { query: 'code' } },
|
|
92
|
+
],
|
|
93
|
+
}),
|
|
94
|
+
new ToolMessage({
|
|
95
|
+
content: 'Found more',
|
|
96
|
+
tool_call_id: 'tc_2',
|
|
97
|
+
name: Constants.TOOL_SEARCH,
|
|
98
|
+
artifact: {
|
|
99
|
+
tool_references: [
|
|
100
|
+
{ tool_name: 'code_exec' },
|
|
101
|
+
{ tool_name: 'web_search' },
|
|
102
|
+
],
|
|
103
|
+
},
|
|
104
|
+
})
|
|
105
|
+
);
|
|
106
|
+
|
|
107
|
+
// Only code_exec is new (web_search already cached)
|
|
108
|
+
const disc3 = cache.getNewDiscoveries(messages);
|
|
109
|
+
expect(disc3).toEqual(['code_exec']);
|
|
110
|
+
expect(cache.size).toBe(2);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('seed + incremental discovery simulates cross-turn caching', () => {
|
|
114
|
+
const cache = new ToolDiscoveryCache();
|
|
115
|
+
|
|
116
|
+
// Seed from prior turn's discoveries
|
|
117
|
+
cache.seed(['tool_a', 'tool_b']);
|
|
118
|
+
|
|
119
|
+
const messages: BaseMessage[] = [
|
|
120
|
+
new HumanMessage('Use tool_a and find tool_c'),
|
|
121
|
+
new AIMessageChunk({
|
|
122
|
+
content: 'Searching',
|
|
123
|
+
tool_calls: [{ id: 'tc_1', name: Constants.TOOL_SEARCH, args: {} }],
|
|
124
|
+
}),
|
|
125
|
+
new ToolMessage({
|
|
126
|
+
content: 'Found',
|
|
127
|
+
tool_call_id: 'tc_1',
|
|
128
|
+
name: Constants.TOOL_SEARCH,
|
|
129
|
+
artifact: {
|
|
130
|
+
tool_references: [{ tool_name: 'tool_a' }, { tool_name: 'tool_c' }],
|
|
131
|
+
},
|
|
132
|
+
}),
|
|
133
|
+
];
|
|
134
|
+
|
|
135
|
+
const newDisc = cache.getNewDiscoveries(messages);
|
|
136
|
+
// tool_a is seeded, only tool_c is new
|
|
137
|
+
expect(newDisc).toEqual(['tool_c']);
|
|
138
|
+
expect(cache.getAllDiscoveredTools()).toEqual(
|
|
139
|
+
expect.arrayContaining(['tool_a', 'tool_b', 'tool_c'])
|
|
140
|
+
);
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// ============================================================================
|
|
145
|
+
// 2. SummarizationConfig Integration
|
|
146
|
+
// ============================================================================
|
|
147
|
+
|
|
148
|
+
describe('SummarizationConfig — Trigger Logic', () => {
|
|
149
|
+
/**
|
|
150
|
+
* Simulates the Graph's shouldTriggerSummarization logic.
|
|
151
|
+
*/
|
|
152
|
+
function shouldTriggerSummarization(
|
|
153
|
+
prunedMessageCount: number,
|
|
154
|
+
maxContextTokens: number,
|
|
155
|
+
indexTokenCountMap: Record<string, number | undefined>,
|
|
156
|
+
instructionTokens: number,
|
|
157
|
+
config?: SummarizationConfig
|
|
158
|
+
): boolean {
|
|
159
|
+
if (prunedMessageCount === 0) return false;
|
|
160
|
+
if (!config || !config.triggerType) return true;
|
|
161
|
+
|
|
162
|
+
const threshold = config.triggerThreshold;
|
|
163
|
+
|
|
164
|
+
switch (config.triggerType) {
|
|
165
|
+
case 'contextPercentage': {
|
|
166
|
+
if (maxContextTokens <= 0) return true;
|
|
167
|
+
const effectiveThreshold = threshold ?? 80;
|
|
168
|
+
let totalTokens = instructionTokens;
|
|
169
|
+
for (const key in indexTokenCountMap) {
|
|
170
|
+
totalTokens += indexTokenCountMap[key] ?? 0;
|
|
171
|
+
}
|
|
172
|
+
const utilization = (totalTokens / maxContextTokens) * 100;
|
|
173
|
+
return utilization >= effectiveThreshold;
|
|
174
|
+
}
|
|
175
|
+
case 'messageCount': {
|
|
176
|
+
const effectiveThreshold = threshold ?? 5;
|
|
177
|
+
return prunedMessageCount >= effectiveThreshold;
|
|
178
|
+
}
|
|
179
|
+
case 'tokenThreshold': {
|
|
180
|
+
if (threshold == null) return true;
|
|
181
|
+
let totalTokens = instructionTokens;
|
|
182
|
+
for (const key in indexTokenCountMap) {
|
|
183
|
+
totalTokens += indexTokenCountMap[key] ?? 0;
|
|
184
|
+
}
|
|
185
|
+
return totalTokens >= threshold;
|
|
186
|
+
}
|
|
187
|
+
default:
|
|
188
|
+
return true;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
it('no config = always triggers (backward compatible)', () => {
|
|
193
|
+
expect(shouldTriggerSummarization(3, 10000, {}, 100)).toBe(true);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('contextPercentage: triggers at 80% utilization', () => {
|
|
197
|
+
const tokenMap = { '0': 4000, '1': 3500, '2': 500 };
|
|
198
|
+
// Total = 100 + 8000 = 8100, 8100/10000 = 81% > 80%
|
|
199
|
+
expect(
|
|
200
|
+
shouldTriggerSummarization(2, 10000, tokenMap, 100, {
|
|
201
|
+
triggerType: 'contextPercentage',
|
|
202
|
+
triggerThreshold: 80,
|
|
203
|
+
})
|
|
204
|
+
).toBe(true);
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it('contextPercentage: does NOT trigger below threshold', () => {
|
|
208
|
+
const tokenMap = { '0': 2000, '1': 1000 };
|
|
209
|
+
// Total = 100 + 3000 = 3100, 3100/10000 = 31% < 80%
|
|
210
|
+
expect(
|
|
211
|
+
shouldTriggerSummarization(2, 10000, tokenMap, 100, {
|
|
212
|
+
triggerType: 'contextPercentage',
|
|
213
|
+
triggerThreshold: 80,
|
|
214
|
+
})
|
|
215
|
+
).toBe(false);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
it('messageCount: triggers when enough messages pruned', () => {
|
|
219
|
+
expect(
|
|
220
|
+
shouldTriggerSummarization(5, 10000, {}, 100, {
|
|
221
|
+
triggerType: 'messageCount',
|
|
222
|
+
triggerThreshold: 5,
|
|
223
|
+
})
|
|
224
|
+
).toBe(true);
|
|
225
|
+
|
|
226
|
+
expect(
|
|
227
|
+
shouldTriggerSummarization(3, 10000, {}, 100, {
|
|
228
|
+
triggerType: 'messageCount',
|
|
229
|
+
triggerThreshold: 5,
|
|
230
|
+
})
|
|
231
|
+
).toBe(false);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it('tokenThreshold: triggers when total tokens exceed threshold', () => {
|
|
235
|
+
const tokenMap = { '0': 5000, '1': 3000 };
|
|
236
|
+
expect(
|
|
237
|
+
shouldTriggerSummarization(2, 10000, tokenMap, 100, {
|
|
238
|
+
triggerType: 'tokenThreshold',
|
|
239
|
+
triggerThreshold: 8000,
|
|
240
|
+
})
|
|
241
|
+
).toBe(true);
|
|
242
|
+
|
|
243
|
+
expect(
|
|
244
|
+
shouldTriggerSummarization(2, 10000, tokenMap, 100, {
|
|
245
|
+
triggerType: 'tokenThreshold',
|
|
246
|
+
triggerThreshold: 9000,
|
|
247
|
+
})
|
|
248
|
+
).toBe(false);
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
it('never triggers with 0 pruned messages', () => {
|
|
252
|
+
expect(shouldTriggerSummarization(0, 10000, {}, 100)).toBe(false);
|
|
253
|
+
expect(
|
|
254
|
+
shouldTriggerSummarization(0, 10000, {}, 100, {
|
|
255
|
+
triggerType: 'messageCount',
|
|
256
|
+
triggerThreshold: 1,
|
|
257
|
+
})
|
|
258
|
+
).toBe(false);
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
it('initialSummary provides cross-run seeding', () => {
|
|
262
|
+
const config: SummarizationConfig = {
|
|
263
|
+
initialSummary: 'This agent helps with data analysis tasks.',
|
|
264
|
+
};
|
|
265
|
+
|
|
266
|
+
// Simulate the Graph logic: when no cached/persisted summary exists,
|
|
267
|
+
// initialSummary is used as fallback
|
|
268
|
+
let summary: string | undefined;
|
|
269
|
+
const cachedRunSummary: string | null = null;
|
|
270
|
+
const persistedSummary: string | null = null;
|
|
271
|
+
|
|
272
|
+
if (cachedRunSummary != null) {
|
|
273
|
+
summary = cachedRunSummary;
|
|
274
|
+
} else if (persistedSummary != null && persistedSummary !== '') {
|
|
275
|
+
summary = persistedSummary;
|
|
276
|
+
} else if (config.initialSummary != null && config.initialSummary !== '') {
|
|
277
|
+
summary = config.initialSummary;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
expect(summary).toBe('This agent helps with data analysis tasks.');
|
|
281
|
+
});
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
// ============================================================================
|
|
285
|
+
// 3. EMA Pruning Calibration Integration
|
|
286
|
+
// ============================================================================
|
|
287
|
+
|
|
288
|
+
describe('EMA Pruning Calibration — Integration', () => {
|
|
289
|
+
it('adjusts pruning budget across iterations', () => {
|
|
290
|
+
let calibration = createPruneCalibration();
|
|
291
|
+
const rawBudget = 10000;
|
|
292
|
+
|
|
293
|
+
// Iteration 1: no calibration data → raw budget
|
|
294
|
+
expect(applyCalibration(rawBudget, calibration)).toBe(10000);
|
|
295
|
+
|
|
296
|
+
// Simulate: our counter estimates 8000 tokens but API says 10000
|
|
297
|
+
// (we're under-counting → need to prune more aggressively)
|
|
298
|
+
calibration = updatePruneCalibration(calibration, 10000, 8000);
|
|
299
|
+
const adjusted = applyCalibration(rawBudget, calibration);
|
|
300
|
+
expect(adjusted).toBeLessThan(10000); // More aggressive pruning
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
it('full pruning cycle with calibration', () => {
|
|
304
|
+
let calibration = createPruneCalibration();
|
|
305
|
+
const messages = buildConversation(40);
|
|
306
|
+
|
|
307
|
+
// Iteration 1: uncalibrated
|
|
308
|
+
const maxTokens1 = applyCalibration(200, calibration);
|
|
309
|
+
const prune1 = createPruneMessages({
|
|
310
|
+
startIndex: 0,
|
|
311
|
+
provider: 'anthropic' as any,
|
|
312
|
+
tokenCounter: simpleTokenCounter,
|
|
313
|
+
maxTokens: maxTokens1,
|
|
314
|
+
indexTokenCountMap: {},
|
|
315
|
+
});
|
|
316
|
+
const result1 = prune1({ messages });
|
|
317
|
+
expect(result1.messagesToRefine.length).toBeGreaterThan(0);
|
|
318
|
+
|
|
319
|
+
// Simulate API returning higher token count than our estimate
|
|
320
|
+
calibration = updatePruneCalibration(calibration, 250, 200);
|
|
321
|
+
|
|
322
|
+
// Iteration 2: calibrated (should use adjusted budget)
|
|
323
|
+
const maxTokens2 = applyCalibration(200, calibration);
|
|
324
|
+
expect(maxTokens2).not.toBe(200); // Budget adjusted
|
|
325
|
+
|
|
326
|
+
// Multiple iterations should converge
|
|
327
|
+
for (let i = 0; i < 5; i++) {
|
|
328
|
+
calibration = updatePruneCalibration(calibration, 250, 200);
|
|
329
|
+
}
|
|
330
|
+
const finalBudget = applyCalibration(200, calibration);
|
|
331
|
+
// Should stabilize around 200 * (200/250) ≈ 160
|
|
332
|
+
expect(finalBudget).toBeLessThan(200);
|
|
333
|
+
expect(finalBudget).toBeGreaterThan(100);
|
|
334
|
+
});
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
// ============================================================================
|
|
338
|
+
// 4. Message Deduplication Integration
|
|
339
|
+
// ============================================================================
|
|
340
|
+
|
|
341
|
+
describe('Message Deduplication — Integration', () => {
|
|
342
|
+
it('deduplicates post-prune notes from multiple iterations', () => {
|
|
343
|
+
const postPruneNote = 'Note: Earlier messages have been compressed.';
|
|
344
|
+
const messages: BaseMessage[] = [
|
|
345
|
+
new SystemMessage('Main system prompt'),
|
|
346
|
+
new SystemMessage('[Conversation Summary]\nPrior context'),
|
|
347
|
+
new SystemMessage(postPruneNote), // Iteration 1 post-prune note
|
|
348
|
+
new HumanMessage('Q1'),
|
|
349
|
+
new AIMessage('A1'),
|
|
350
|
+
new SystemMessage(postPruneNote), // Iteration 2 duplicate
|
|
351
|
+
new HumanMessage('Q2'),
|
|
352
|
+
new AIMessage('A2'),
|
|
353
|
+
new SystemMessage(postPruneNote), // Iteration 3 duplicate
|
|
354
|
+
];
|
|
355
|
+
|
|
356
|
+
const { messages: deduped, removedCount } =
|
|
357
|
+
deduplicateSystemMessages(messages);
|
|
358
|
+
expect(removedCount).toBe(2);
|
|
359
|
+
expect(deduped).toHaveLength(7);
|
|
360
|
+
|
|
361
|
+
// Non-system messages all preserved
|
|
362
|
+
const humanMsgs = deduped.filter((m) => m.getType() === 'human');
|
|
363
|
+
expect(humanMsgs).toHaveLength(2);
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
it('preserves unique system messages including summary', () => {
|
|
367
|
+
const messages: BaseMessage[] = [
|
|
368
|
+
new SystemMessage('Main prompt'),
|
|
369
|
+
new SystemMessage('[Conversation Summary]\nVersion 1'),
|
|
370
|
+
new HumanMessage('Q'),
|
|
371
|
+
new SystemMessage('[Conversation Summary]\nVersion 2 - updated'),
|
|
372
|
+
];
|
|
373
|
+
|
|
374
|
+
const { messages: deduped, removedCount } =
|
|
375
|
+
deduplicateSystemMessages(messages);
|
|
376
|
+
expect(removedCount).toBe(0);
|
|
377
|
+
expect(deduped).toHaveLength(4);
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
it('works with pruning + dedup pipeline', () => {
|
|
381
|
+
// Simulate: prune messages, inject summary, then dedup
|
|
382
|
+
const allMessages = buildConversation(20);
|
|
383
|
+
|
|
384
|
+
// Step 1: Prune
|
|
385
|
+
const prune = createPruneMessages({
|
|
386
|
+
startIndex: 0,
|
|
387
|
+
provider: 'anthropic' as any,
|
|
388
|
+
tokenCounter: simpleTokenCounter,
|
|
389
|
+
maxTokens: 300,
|
|
390
|
+
indexTokenCountMap: {},
|
|
391
|
+
});
|
|
392
|
+
const { context, messagesToRefine } = prune({ messages: allMessages });
|
|
393
|
+
expect(messagesToRefine.length).toBeGreaterThan(0);
|
|
394
|
+
|
|
395
|
+
// Step 2: Inject summary
|
|
396
|
+
const summaryMsg = new SystemMessage(
|
|
397
|
+
'[Conversation Summary]\nUser discussed tasks'
|
|
398
|
+
);
|
|
399
|
+
const systemIdx = context[0]?.getType() === 'system' ? 1 : 0;
|
|
400
|
+
let withSummary = [
|
|
401
|
+
...context.slice(0, systemIdx),
|
|
402
|
+
summaryMsg,
|
|
403
|
+
...context.slice(systemIdx),
|
|
404
|
+
];
|
|
405
|
+
|
|
406
|
+
// Simulate adding post-prune note
|
|
407
|
+
withSummary = [...withSummary, new SystemMessage('Context was compressed')];
|
|
408
|
+
|
|
409
|
+
// Step 3: Dedup (should not remove anything since all unique)
|
|
410
|
+
const { messages: final, removedCount } =
|
|
411
|
+
deduplicateSystemMessages(withSummary);
|
|
412
|
+
expect(removedCount).toBe(0);
|
|
413
|
+
expect(final.length).toBe(withSummary.length);
|
|
414
|
+
});
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
// ============================================================================
|
|
418
|
+
// Combined Integration
|
|
419
|
+
// ============================================================================
|
|
420
|
+
|
|
421
|
+
describe('All Features Combined — Full Pipeline', () => {
|
|
422
|
+
it('simulates 3-turn conversation with all features active', async () => {
|
|
423
|
+
const toolCache = new ToolDiscoveryCache();
|
|
424
|
+
let calibration = createPruneCalibration();
|
|
425
|
+
let persistedSummary: string | null = null;
|
|
426
|
+
const sumConfig: SummarizationConfig = {
|
|
427
|
+
triggerType: 'contextPercentage',
|
|
428
|
+
triggerThreshold: 50,
|
|
429
|
+
reserveRatio: 0.3,
|
|
430
|
+
};
|
|
431
|
+
|
|
432
|
+
const callback = jest
|
|
433
|
+
.fn()
|
|
434
|
+
.mockImplementation(async (msgs: BaseMessage[]) => {
|
|
435
|
+
const summary = `Summary of ${msgs.length} messages`;
|
|
436
|
+
persistedSummary = summary;
|
|
437
|
+
return summary;
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
for (let turn = 0; turn < 3; turn++) {
|
|
441
|
+
// Build conversation that exceeds budget
|
|
442
|
+
const messages = buildConversation(15);
|
|
443
|
+
|
|
444
|
+
// Tool discovery (turn 1 has tool search results)
|
|
445
|
+
if (turn === 0) {
|
|
446
|
+
messages.push(
|
|
447
|
+
new AIMessageChunk({
|
|
448
|
+
content: 'Searching',
|
|
449
|
+
tool_calls: [
|
|
450
|
+
{ id: `tc_${turn}`, name: Constants.TOOL_SEARCH, args: {} },
|
|
451
|
+
],
|
|
452
|
+
}),
|
|
453
|
+
new ToolMessage({
|
|
454
|
+
content: 'Found',
|
|
455
|
+
tool_call_id: `tc_${turn}`,
|
|
456
|
+
name: Constants.TOOL_SEARCH,
|
|
457
|
+
artifact: { tool_references: [{ tool_name: 'web_search' }] },
|
|
458
|
+
})
|
|
459
|
+
);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const discoveries = toolCache.getNewDiscoveries(messages);
|
|
463
|
+
if (turn === 0) {
|
|
464
|
+
expect(discoveries).toEqual(['web_search']);
|
|
465
|
+
} else {
|
|
466
|
+
expect(discoveries).toEqual([]);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Prune with calibration
|
|
470
|
+
const maxTokens = applyCalibration(300, calibration);
|
|
471
|
+
const prune = createPruneMessages({
|
|
472
|
+
startIndex: 0,
|
|
473
|
+
provider: 'anthropic' as any,
|
|
474
|
+
tokenCounter: simpleTokenCounter,
|
|
475
|
+
maxTokens,
|
|
476
|
+
indexTokenCountMap: {},
|
|
477
|
+
});
|
|
478
|
+
const { context, messagesToRefine } = prune({ messages });
|
|
479
|
+
|
|
480
|
+
let assembled = [...context];
|
|
481
|
+
|
|
482
|
+
// Inject summary if available
|
|
483
|
+
if (persistedSummary && messagesToRefine.length > 0) {
|
|
484
|
+
const summaryMsg = new SystemMessage(
|
|
485
|
+
`[Conversation Summary]\n${persistedSummary}`
|
|
486
|
+
);
|
|
487
|
+
const sysIdx = assembled[0]?.getType() === 'system' ? 1 : 0;
|
|
488
|
+
assembled = [
|
|
489
|
+
...assembled.slice(0, sysIdx),
|
|
490
|
+
summaryMsg,
|
|
491
|
+
...assembled.slice(sysIdx),
|
|
492
|
+
];
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Fire background summary
|
|
496
|
+
if (messagesToRefine.length > 0) {
|
|
497
|
+
callback(messagesToRefine).catch(() => {});
|
|
498
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// Dedup
|
|
502
|
+
const { messages: deduped } = deduplicateSystemMessages(assembled);
|
|
503
|
+
expect(deduped.length).toBeLessThanOrEqual(assembled.length);
|
|
504
|
+
|
|
505
|
+
// Update calibration (simulated API response)
|
|
506
|
+
calibration = updatePruneCalibration(
|
|
507
|
+
calibration,
|
|
508
|
+
maxTokens + 50,
|
|
509
|
+
maxTokens
|
|
510
|
+
);
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// Verify state after 3 turns
|
|
514
|
+
expect(toolCache.size).toBe(1);
|
|
515
|
+
expect(toolCache.has('web_search')).toBe(true);
|
|
516
|
+
expect(calibration.iterations).toBe(3);
|
|
517
|
+
expect(persistedSummary).toContain('Summary of');
|
|
518
|
+
expect(callback).toHaveBeenCalled();
|
|
519
|
+
});
|
|
520
|
+
});
|