@illuma-ai/agents 1.0.98 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +6 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/constants.cjs +53 -0
- package/dist/cjs/common/constants.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +167 -31
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +14 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/dedup.cjs +95 -0
- package/dist/cjs/messages/dedup.cjs.map +1 -0
- package/dist/cjs/tools/CodeExecutor.cjs +22 -3
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/types/graph.cjs.map +1 -1
- package/dist/cjs/utils/pruneCalibration.cjs +78 -0
- package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
- package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +6 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/constants.mjs +48 -1
- package/dist/esm/common/constants.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +168 -32
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/dedup.mjs +93 -0
- package/dist/esm/messages/dedup.mjs.map +1 -0
- package/dist/esm/tools/CodeExecutor.mjs +22 -3
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/types/graph.mjs.map +1 -1
- package/dist/esm/utils/pruneCalibration.mjs +74 -0
- package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
- package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +4 -1
- package/dist/types/common/constants.d.ts +35 -0
- package/dist/types/graphs/Graph.d.ts +25 -0
- package/dist/types/messages/dedup.d.ts +25 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/types/graph.d.ts +63 -0
- package/dist/types/utils/index.d.ts +2 -0
- package/dist/types/utils/pruneCalibration.d.ts +43 -0
- package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +7 -0
- package/src/common/constants.ts +56 -0
- package/src/graphs/Graph.ts +220 -50
- package/src/graphs/gapFeatures.test.ts +520 -0
- package/src/graphs/nonBlockingSummarization.test.ts +307 -0
- package/src/messages/__tests__/dedup.test.ts +166 -0
- package/src/messages/dedup.ts +104 -0
- package/src/messages/index.ts +1 -0
- package/src/tools/CodeExecutor.ts +22 -3
- package/src/types/graph.ts +73 -0
- package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
- package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
- package/src/utils/contextPressure.test.ts +24 -9
- package/src/utils/index.ts +2 -0
- package/src/utils/pruneCalibration.ts +92 -0
- package/src/utils/run.ts +108 -108
- package/src/utils/tokens.ts +118 -118
- package/src/utils/toolDiscoveryCache.ts +150 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* nonBlockingSummarization.test.ts
|
|
3
|
+
*
|
|
4
|
+
* Tests that the Graph's summarization pipeline is fully non-blocking.
|
|
5
|
+
* The core invariant: summarizeCallback is NEVER awaited in the hot path.
|
|
6
|
+
* Instead, the graph uses cached/persisted summaries and fires background updates.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
HumanMessage,
|
|
11
|
+
AIMessage,
|
|
12
|
+
SystemMessage,
|
|
13
|
+
BaseMessage,
|
|
14
|
+
} from '@langchain/core/messages';
|
|
15
|
+
import type { TokenCounter } from '@/types/run';
|
|
16
|
+
import { createPruneMessages } from '@/messages/prune';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Simple token counter: ~1 token per 4 characters
|
|
20
|
+
*/
|
|
21
|
+
const simpleTokenCounter: TokenCounter = (msg: BaseMessage): number => {
|
|
22
|
+
const content =
|
|
23
|
+
typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
|
|
24
|
+
return Math.ceil(content.length / 4);
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Build a conversation that exceeds the token budget to trigger pruning.
|
|
29
|
+
* Each message is ~100 tokens (400 chars).
|
|
30
|
+
*/
|
|
31
|
+
function buildLargeConversation(messageCount: number): BaseMessage[] {
|
|
32
|
+
const messages: BaseMessage[] = [
|
|
33
|
+
new SystemMessage('You are a helpful assistant.'),
|
|
34
|
+
];
|
|
35
|
+
for (let i = 0; i < messageCount; i++) {
|
|
36
|
+
const longText = `Message ${i}: ${'x'.repeat(380)}`;
|
|
37
|
+
if (i % 2 === 0) {
|
|
38
|
+
messages.push(new HumanMessage(longText));
|
|
39
|
+
} else {
|
|
40
|
+
messages.push(new AIMessage(longText));
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return messages;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ============================================================================
|
|
47
|
+
// Non-blocking summarization behavior
|
|
48
|
+
// ============================================================================
|
|
49
|
+
|
|
50
|
+
describe('Non-blocking summarization in Graph pruning', () => {
|
|
51
|
+
/**
|
|
52
|
+
* Simulates the Graph.ts summarization decision logic (lines 1439-1492).
|
|
53
|
+
* This is extracted to test the branching behavior without needing a full Graph.
|
|
54
|
+
*/
|
|
55
|
+
function simulateGraphSummarization(opts: {
|
|
56
|
+
messagesToRefine: BaseMessage[];
|
|
57
|
+
cachedRunSummary: string | null;
|
|
58
|
+
persistedSummary: string | null;
|
|
59
|
+
summarizeCallback: (msgs: BaseMessage[]) => Promise<string | undefined>;
|
|
60
|
+
}): {
|
|
61
|
+
summary: string | undefined;
|
|
62
|
+
callbackCalled: boolean;
|
|
63
|
+
wasBlocking: boolean;
|
|
64
|
+
} {
|
|
65
|
+
const {
|
|
66
|
+
messagesToRefine,
|
|
67
|
+
cachedRunSummary,
|
|
68
|
+
persistedSummary,
|
|
69
|
+
summarizeCallback,
|
|
70
|
+
} = opts;
|
|
71
|
+
|
|
72
|
+
let summary: string | undefined;
|
|
73
|
+
let callbackCalled = false;
|
|
74
|
+
let wasBlocking = false;
|
|
75
|
+
|
|
76
|
+
if (messagesToRefine.length > 0 && summarizeCallback) {
|
|
77
|
+
if (cachedRunSummary != null) {
|
|
78
|
+
// Case 1: Reuse cached summary
|
|
79
|
+
summary = cachedRunSummary;
|
|
80
|
+
callbackCalled = true;
|
|
81
|
+
wasBlocking = false;
|
|
82
|
+
// Fire background update (non-blocking)
|
|
83
|
+
summarizeCallback(messagesToRefine).catch(() => {});
|
|
84
|
+
} else if (persistedSummary != null && persistedSummary !== '') {
|
|
85
|
+
// Case 2: Use persisted summary as fallback
|
|
86
|
+
summary = persistedSummary;
|
|
87
|
+
callbackCalled = true;
|
|
88
|
+
wasBlocking = false;
|
|
89
|
+
summarizeCallback(messagesToRefine).catch(() => {});
|
|
90
|
+
} else {
|
|
91
|
+
// Case 3: No summary exists — skip injection, fire background generation
|
|
92
|
+
summary = undefined;
|
|
93
|
+
callbackCalled = true;
|
|
94
|
+
wasBlocking = false;
|
|
95
|
+
summarizeCallback(messagesToRefine).catch(() => {});
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return { summary, callbackCalled, wasBlocking };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
it('Case 1: uses cached run summary without blocking', () => {
|
|
103
|
+
const callback = jest.fn().mockResolvedValue('updated summary');
|
|
104
|
+
const result = simulateGraphSummarization({
|
|
105
|
+
messagesToRefine: [new HumanMessage('old msg')],
|
|
106
|
+
cachedRunSummary: 'existing cached summary',
|
|
107
|
+
persistedSummary: null,
|
|
108
|
+
summarizeCallback: callback,
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
expect(result.summary).toBe('existing cached summary');
|
|
112
|
+
expect(result.wasBlocking).toBe(false);
|
|
113
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('Case 2: uses persisted summary as fallback without blocking', () => {
|
|
117
|
+
const callback = jest.fn().mockResolvedValue('updated summary');
|
|
118
|
+
const result = simulateGraphSummarization({
|
|
119
|
+
messagesToRefine: [new HumanMessage('old msg')],
|
|
120
|
+
cachedRunSummary: null,
|
|
121
|
+
persistedSummary: 'persisted from last turn',
|
|
122
|
+
summarizeCallback: callback,
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
expect(result.summary).toBe('persisted from last turn');
|
|
126
|
+
expect(result.wasBlocking).toBe(false);
|
|
127
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
it('Case 3: no summary available — skips injection, fires background', () => {
|
|
131
|
+
const callback = jest.fn().mockResolvedValue('new summary');
|
|
132
|
+
const result = simulateGraphSummarization({
|
|
133
|
+
messagesToRefine: [new HumanMessage('old msg')],
|
|
134
|
+
cachedRunSummary: null,
|
|
135
|
+
persistedSummary: null,
|
|
136
|
+
summarizeCallback: callback,
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
expect(result.summary).toBeUndefined();
|
|
140
|
+
expect(result.wasBlocking).toBe(false);
|
|
141
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('does not call callback when no messages were refined', () => {
|
|
145
|
+
const callback = jest.fn().mockResolvedValue('summary');
|
|
146
|
+
const result = simulateGraphSummarization({
|
|
147
|
+
messagesToRefine: [],
|
|
148
|
+
cachedRunSummary: null,
|
|
149
|
+
persistedSummary: null,
|
|
150
|
+
summarizeCallback: callback,
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
expect(result.summary).toBeUndefined();
|
|
154
|
+
expect(callback).not.toHaveBeenCalled();
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
it('handles callback failure gracefully', async () => {
|
|
158
|
+
const callback = jest.fn().mockRejectedValue(new Error('LLM timeout'));
|
|
159
|
+
const result = simulateGraphSummarization({
|
|
160
|
+
messagesToRefine: [new HumanMessage('msg')],
|
|
161
|
+
cachedRunSummary: 'cached',
|
|
162
|
+
persistedSummary: null,
|
|
163
|
+
summarizeCallback: callback,
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
// Should still use cached summary
|
|
167
|
+
expect(result.summary).toBe('cached');
|
|
168
|
+
// Wait for background promise to settle
|
|
169
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
170
|
+
expect(callback).toHaveBeenCalledTimes(1);
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
it('background callback updates cache for subsequent iterations', async () => {
|
|
174
|
+
let cachedSummary: string | null = null;
|
|
175
|
+
const callback = jest.fn().mockImplementation(async () => {
|
|
176
|
+
// Simulate LLM call delay
|
|
177
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
178
|
+
const updated = 'background-updated summary';
|
|
179
|
+
cachedSummary = updated;
|
|
180
|
+
return updated;
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
// First call: no cache
|
|
184
|
+
simulateGraphSummarization({
|
|
185
|
+
messagesToRefine: [new HumanMessage('msg')],
|
|
186
|
+
cachedRunSummary: null,
|
|
187
|
+
persistedSummary: 'stale persisted',
|
|
188
|
+
summarizeCallback: callback,
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
// Initially still using persisted
|
|
192
|
+
expect(cachedSummary).toBeNull();
|
|
193
|
+
|
|
194
|
+
// Wait for background to complete
|
|
195
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
196
|
+
expect(cachedSummary).toBe('background-updated summary');
|
|
197
|
+
|
|
198
|
+
// Second call: now has cached summary
|
|
199
|
+
const result2 = simulateGraphSummarization({
|
|
200
|
+
messagesToRefine: [new HumanMessage('msg2')],
|
|
201
|
+
cachedRunSummary: cachedSummary,
|
|
202
|
+
persistedSummary: null,
|
|
203
|
+
summarizeCallback: callback,
|
|
204
|
+
});
|
|
205
|
+
expect(result2.summary).toBe('background-updated summary');
|
|
206
|
+
});
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
// ============================================================================
|
|
210
|
+
// Pruning integration with summary injection
|
|
211
|
+
// ============================================================================
|
|
212
|
+
|
|
213
|
+
describe('Pruning + summary injection flow', () => {
|
|
214
|
+
it('pruneMessages produces messagesToRefine when context exceeds budget', () => {
|
|
215
|
+
// Each message ~100 tokens (400 chars). 40 messages = ~4000 tokens.
|
|
216
|
+
// maxTokens = 200 forces heavy pruning.
|
|
217
|
+
const messages = buildLargeConversation(40);
|
|
218
|
+
const maxTokens = 200;
|
|
219
|
+
|
|
220
|
+
const prune = createPruneMessages({
|
|
221
|
+
startIndex: 0,
|
|
222
|
+
provider: 'anthropic' as any,
|
|
223
|
+
tokenCounter: simpleTokenCounter,
|
|
224
|
+
maxTokens,
|
|
225
|
+
indexTokenCountMap: {},
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
const { context, messagesToRefine } = prune({ messages });
|
|
229
|
+
|
|
230
|
+
expect(messagesToRefine.length).toBeGreaterThan(0);
|
|
231
|
+
expect(context.length).toBeLessThan(messages.length);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it('summary is injected after system message when available', () => {
|
|
235
|
+
const messages: BaseMessage[] = [
|
|
236
|
+
new SystemMessage('System prompt'),
|
|
237
|
+
new HumanMessage('Recent question'),
|
|
238
|
+
];
|
|
239
|
+
const summaryText = 'User discussed project deadlines and budget';
|
|
240
|
+
const summaryMsg = new SystemMessage(
|
|
241
|
+
`[Conversation Summary]\n${summaryText}`
|
|
242
|
+
);
|
|
243
|
+
|
|
244
|
+
// Insert after system message
|
|
245
|
+
const systemIdx = messages[0]?.getType() === 'system' ? 1 : 0;
|
|
246
|
+
const result = [
|
|
247
|
+
...messages.slice(0, systemIdx),
|
|
248
|
+
summaryMsg,
|
|
249
|
+
...messages.slice(systemIdx),
|
|
250
|
+
];
|
|
251
|
+
|
|
252
|
+
expect(result.length).toBe(3);
|
|
253
|
+
expect(result[0].getType()).toBe('system'); // Original system
|
|
254
|
+
expect(result[1].content as string).toContain('[Conversation Summary]');
|
|
255
|
+
expect(result[2].getType()).toBe('human'); // Recent question preserved
|
|
256
|
+
});
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
// ============================================================================
|
|
260
|
+
// Multi-turn simulation
|
|
261
|
+
// ============================================================================
|
|
262
|
+
|
|
263
|
+
describe('Multi-turn conversation with rolling summaries', () => {
|
|
264
|
+
it('simulates 5 turns with persisted summary handoff', async () => {
|
|
265
|
+
let persistedSummary: string | null = null;
|
|
266
|
+
const summaryUpdates: string[] = [];
|
|
267
|
+
|
|
268
|
+
const callback = jest
|
|
269
|
+
.fn()
|
|
270
|
+
.mockImplementation(async (msgs: BaseMessage[]) => {
|
|
271
|
+
const msgCount = msgs.length;
|
|
272
|
+
const summary = `Summary of ${msgCount} messages (turn ${summaryUpdates.length + 1})`;
|
|
273
|
+
summaryUpdates.push(summary);
|
|
274
|
+
persistedSummary = summary;
|
|
275
|
+
return summary;
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
// Simulate 5 conversation turns
|
|
279
|
+
for (let turn = 0; turn < 5; turn++) {
|
|
280
|
+
const messages = buildLargeConversation(10);
|
|
281
|
+
const prune = createPruneMessages({
|
|
282
|
+
startIndex: 0,
|
|
283
|
+
provider: 'anthropic' as any,
|
|
284
|
+
tokenCounter: simpleTokenCounter,
|
|
285
|
+
maxTokens: 300,
|
|
286
|
+
indexTokenCountMap: {},
|
|
287
|
+
});
|
|
288
|
+
const { messagesToRefine } = prune({ messages });
|
|
289
|
+
|
|
290
|
+
if (messagesToRefine.length > 0) {
|
|
291
|
+
// Simulate Graph behavior: use persisted, fire background
|
|
292
|
+
const cachedSummary = persistedSummary;
|
|
293
|
+
if (cachedSummary) {
|
|
294
|
+
// Non-blocking: use existing summary
|
|
295
|
+
expect(cachedSummary).toBeDefined();
|
|
296
|
+
}
|
|
297
|
+
// Fire background update
|
|
298
|
+
callback(messagesToRefine).catch(() => {});
|
|
299
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// All turns should have fired background updates
|
|
304
|
+
expect(summaryUpdates.length).toBeGreaterThanOrEqual(4);
|
|
305
|
+
expect(persistedSummary).toContain('Summary of');
|
|
306
|
+
});
|
|
307
|
+
});
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
// src/messages/__tests__/dedup.test.ts
|
|
2
|
+
import {
|
|
3
|
+
SystemMessage,
|
|
4
|
+
HumanMessage,
|
|
5
|
+
AIMessage,
|
|
6
|
+
ToolMessage,
|
|
7
|
+
} from '@langchain/core/messages';
|
|
8
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
9
|
+
import { deduplicateSystemMessages } from '../dedup';
|
|
10
|
+
|
|
11
|
+
describe('deduplicateSystemMessages', () => {
|
|
12
|
+
it('returns empty array unchanged', () => {
|
|
13
|
+
const result = deduplicateSystemMessages([]);
|
|
14
|
+
expect(result.messages).toEqual([]);
|
|
15
|
+
expect(result.removedCount).toBe(0);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('returns single message unchanged', () => {
|
|
19
|
+
const msgs = [new SystemMessage('Hello')];
|
|
20
|
+
const result = deduplicateSystemMessages(msgs);
|
|
21
|
+
expect(result.messages).toHaveLength(1);
|
|
22
|
+
expect(result.removedCount).toBe(0);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it('preserves conversation with no duplicates', () => {
|
|
26
|
+
const msgs: BaseMessage[] = [
|
|
27
|
+
new SystemMessage('You are helpful'),
|
|
28
|
+
new HumanMessage('Hello'),
|
|
29
|
+
new AIMessage('Hi there'),
|
|
30
|
+
new SystemMessage('Summary of prior context'),
|
|
31
|
+
new HumanMessage('What about X?'),
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
const result = deduplicateSystemMessages(msgs);
|
|
35
|
+
expect(result.messages).toHaveLength(5);
|
|
36
|
+
expect(result.removedCount).toBe(0);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it('removes duplicate system messages', () => {
|
|
40
|
+
const msgs: BaseMessage[] = [
|
|
41
|
+
new SystemMessage('You are helpful'),
|
|
42
|
+
new SystemMessage('[Conversation Summary]\nUser discussed X'),
|
|
43
|
+
new HumanMessage('Hello'),
|
|
44
|
+
new AIMessage('Hi'),
|
|
45
|
+
new SystemMessage('[Conversation Summary]\nUser discussed X'), // duplicate
|
|
46
|
+
new HumanMessage('What?'),
|
|
47
|
+
];
|
|
48
|
+
|
|
49
|
+
const result = deduplicateSystemMessages(msgs);
|
|
50
|
+
expect(result.messages).toHaveLength(5);
|
|
51
|
+
expect(result.removedCount).toBe(1);
|
|
52
|
+
// Verify the duplicate at index 4 was removed
|
|
53
|
+
expect(result.messages.map((m) => m.getType())).toEqual([
|
|
54
|
+
'system',
|
|
55
|
+
'system',
|
|
56
|
+
'human',
|
|
57
|
+
'ai',
|
|
58
|
+
'human',
|
|
59
|
+
]);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('keeps the first occurrence of duplicate system messages', () => {
|
|
63
|
+
const duplicateContent = 'Context was compressed. Summary follows.';
|
|
64
|
+
const msgs: BaseMessage[] = [
|
|
65
|
+
new SystemMessage('Main prompt'),
|
|
66
|
+
new SystemMessage(duplicateContent), // first occurrence
|
|
67
|
+
new HumanMessage('Q1'),
|
|
68
|
+
new AIMessage('A1'),
|
|
69
|
+
new SystemMessage(duplicateContent), // duplicate
|
|
70
|
+
new HumanMessage('Q2'),
|
|
71
|
+
new AIMessage('A2'),
|
|
72
|
+
new SystemMessage(duplicateContent), // another duplicate
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
const result = deduplicateSystemMessages(msgs);
|
|
76
|
+
expect(result.messages).toHaveLength(6);
|
|
77
|
+
expect(result.removedCount).toBe(2);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it('never removes non-system messages', () => {
|
|
81
|
+
const msgs: BaseMessage[] = [
|
|
82
|
+
new SystemMessage('Prompt'),
|
|
83
|
+
new HumanMessage('Same content'),
|
|
84
|
+
new AIMessage('Same content'),
|
|
85
|
+
new HumanMessage('Same content'), // duplicate content but not system
|
|
86
|
+
new AIMessage('Same content'), // duplicate content but not system
|
|
87
|
+
];
|
|
88
|
+
|
|
89
|
+
const result = deduplicateSystemMessages(msgs);
|
|
90
|
+
expect(result.messages).toHaveLength(5);
|
|
91
|
+
expect(result.removedCount).toBe(0);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it('always preserves the first system message (main prompt)', () => {
|
|
95
|
+
const msgs: BaseMessage[] = [
|
|
96
|
+
new SystemMessage('Main system prompt'),
|
|
97
|
+
new HumanMessage('Hello'),
|
|
98
|
+
new SystemMessage('Main system prompt'), // duplicate of first
|
|
99
|
+
];
|
|
100
|
+
|
|
101
|
+
const result = deduplicateSystemMessages(msgs);
|
|
102
|
+
expect(result.messages).toHaveLength(2);
|
|
103
|
+
expect(result.removedCount).toBe(1);
|
|
104
|
+
// First system message is preserved
|
|
105
|
+
expect(result.messages[0].content as string).toBe('Main system prompt');
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('handles tool messages correctly (never deduped)', () => {
|
|
109
|
+
const msgs: BaseMessage[] = [
|
|
110
|
+
new SystemMessage('Prompt'),
|
|
111
|
+
new HumanMessage('Q'),
|
|
112
|
+
new AIMessage({
|
|
113
|
+
content: 'Using tool',
|
|
114
|
+
tool_calls: [{ id: 'tc1', name: 'test', args: {} }],
|
|
115
|
+
}),
|
|
116
|
+
new ToolMessage({ content: 'result', tool_call_id: 'tc1' }),
|
|
117
|
+
new SystemMessage('Post-prune note'),
|
|
118
|
+
new AIMessage({
|
|
119
|
+
content: 'Using tool',
|
|
120
|
+
tool_calls: [{ id: 'tc2', name: 'test', args: {} }],
|
|
121
|
+
}),
|
|
122
|
+
new ToolMessage({ content: 'result', tool_call_id: 'tc2' }),
|
|
123
|
+
new SystemMessage('Post-prune note'), // duplicate system msg
|
|
124
|
+
];
|
|
125
|
+
|
|
126
|
+
const result = deduplicateSystemMessages(msgs);
|
|
127
|
+
expect(result.removedCount).toBe(1);
|
|
128
|
+
// Tool messages are all preserved
|
|
129
|
+
const toolMsgs = result.messages.filter((m) => m.getType() === 'tool');
|
|
130
|
+
expect(toolMsgs).toHaveLength(2);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it('does not mutate input array', () => {
|
|
134
|
+
const msgs: BaseMessage[] = [
|
|
135
|
+
new SystemMessage('Prompt'),
|
|
136
|
+
new SystemMessage('Dup'),
|
|
137
|
+
new SystemMessage('Dup'),
|
|
138
|
+
];
|
|
139
|
+
const originalLength = msgs.length;
|
|
140
|
+
|
|
141
|
+
deduplicateSystemMessages(msgs);
|
|
142
|
+
expect(msgs.length).toBe(originalLength);
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
it('handles mixed duplicate patterns', () => {
|
|
146
|
+
const msgs: BaseMessage[] = [
|
|
147
|
+
new SystemMessage('A'),
|
|
148
|
+
new SystemMessage('B'),
|
|
149
|
+
new SystemMessage('A'), // dup of first
|
|
150
|
+
new HumanMessage('Q'),
|
|
151
|
+
new SystemMessage('B'), // dup
|
|
152
|
+
new SystemMessage('C'),
|
|
153
|
+
new SystemMessage('C'), // dup
|
|
154
|
+
];
|
|
155
|
+
|
|
156
|
+
const result = deduplicateSystemMessages(msgs);
|
|
157
|
+
expect(result.removedCount).toBe(3);
|
|
158
|
+
expect(result.messages).toHaveLength(4);
|
|
159
|
+
expect(
|
|
160
|
+
result.messages.map((m) => {
|
|
161
|
+
if (typeof m.content === 'string') return m.content;
|
|
162
|
+
return '';
|
|
163
|
+
})
|
|
164
|
+
).toEqual(['A', 'B', 'Q', 'C']);
|
|
165
|
+
});
|
|
166
|
+
});
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
// src/messages/dedup.ts
|
|
2
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
3
|
+
import { MessageTypes } from '@/common';
|
|
4
|
+
import { DEDUP_MAX_CONTENT_LENGTH } from '@/common/constants';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Deduplicates consecutive identical system messages in the context window.
|
|
8
|
+
*
|
|
9
|
+
* Problem: In long tool-use chains, the same system messages (e.g., post-prune notes,
|
|
10
|
+
* conversation summaries) can accumulate when the context is rebuilt on each iteration.
|
|
11
|
+
* These duplicates waste tokens without adding information.
|
|
12
|
+
*
|
|
13
|
+
* Strategy: Only deduplicate system messages that appear consecutively or are exact
|
|
14
|
+
* duplicates of an earlier system message. The FIRST occurrence is always kept.
|
|
15
|
+
* Non-system messages (human, ai, tool) are never touched.
|
|
16
|
+
*
|
|
17
|
+
* Important constraints:
|
|
18
|
+
* - The first system message (index 0) is ALWAYS preserved (it's the main system prompt)
|
|
19
|
+
* - Only system messages are candidates for deduplication
|
|
20
|
+
* - Messages with content longer than DEDUP_MAX_CONTENT_LENGTH are skipped (too expensive to compare)
|
|
21
|
+
* - Content comparison is by string equality (fast and deterministic)
|
|
22
|
+
*
|
|
23
|
+
* @param messages - The message array to deduplicate (not mutated)
|
|
24
|
+
* @returns A new array with duplicate system messages removed, and the count of removed messages
|
|
25
|
+
*/
|
|
26
|
+
export function deduplicateSystemMessages(messages: BaseMessage[]): {
|
|
27
|
+
messages: BaseMessage[];
|
|
28
|
+
removedCount: number;
|
|
29
|
+
} {
|
|
30
|
+
if (messages.length <= 1) {
|
|
31
|
+
return { messages, removedCount: 0 };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const seenSystemContents = new Set<string>();
|
|
35
|
+
const result: BaseMessage[] = [];
|
|
36
|
+
let removedCount = 0;
|
|
37
|
+
|
|
38
|
+
for (let i = 0; i < messages.length; i++) {
|
|
39
|
+
const msg = messages[i];
|
|
40
|
+
const type = msg.getType();
|
|
41
|
+
|
|
42
|
+
// Non-system messages are always kept
|
|
43
|
+
if (type !== MessageTypes.SYSTEM) {
|
|
44
|
+
result.push(msg);
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// First system message (main prompt) is always kept
|
|
49
|
+
if (i === 0) {
|
|
50
|
+
result.push(msg);
|
|
51
|
+
// Track its content for dedup of later duplicates
|
|
52
|
+
const content = getContentString(msg);
|
|
53
|
+
if (content != null) {
|
|
54
|
+
seenSystemContents.add(content);
|
|
55
|
+
}
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Get string content for comparison
|
|
60
|
+
const content = getContentString(msg);
|
|
61
|
+
|
|
62
|
+
// Skip dedup for very long or non-string content
|
|
63
|
+
if (content == null) {
|
|
64
|
+
result.push(msg);
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Check if this exact system message was already seen
|
|
69
|
+
if (seenSystemContents.has(content)) {
|
|
70
|
+
removedCount++;
|
|
71
|
+
continue; // Skip this duplicate
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// New unique system message — keep it and track
|
|
75
|
+
seenSystemContents.add(content);
|
|
76
|
+
result.push(msg);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return { messages: result, removedCount };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Extracts a comparable string from a message's content.
|
|
84
|
+
* Returns null if the content is too large or non-string (skip dedup for those).
|
|
85
|
+
*/
|
|
86
|
+
function getContentString(msg: BaseMessage): string | null {
|
|
87
|
+
if (typeof msg.content === 'string') {
|
|
88
|
+
if (msg.content.length > DEDUP_MAX_CONTENT_LENGTH) {
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
return msg.content;
|
|
92
|
+
}
|
|
93
|
+
// Array content (e.g., Anthropic cache_control blocks) — serialize for comparison
|
|
94
|
+
// but only if reasonably sized
|
|
95
|
+
try {
|
|
96
|
+
const serialized = JSON.stringify(msg.content);
|
|
97
|
+
if (serialized.length > DEDUP_MAX_CONTENT_LENGTH) {
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
return serialized;
|
|
101
|
+
} catch {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
}
|
package/src/messages/index.ts
CHANGED
|
@@ -94,10 +94,27 @@ export const CodeExecutionToolDescription = `
|
|
|
94
94
|
Runs code and returns stdout/stderr output from a stateless execution environment, similar to running scripts in a command-line interface. Each execution is isolated and independent.
|
|
95
95
|
|
|
96
96
|
Usage:
|
|
97
|
-
- No network access available.
|
|
97
|
+
- No network access available. Do NOT use pip install, npm install, or any package manager.
|
|
98
98
|
- Generated files are automatically delivered; **DO NOT** provide download links.
|
|
99
99
|
- NEVER use this tool to execute malicious code.
|
|
100
100
|
- When a code_id is returned in output, you can edit that code using code_id + old_str + new_str instead of rewriting the entire code block.
|
|
101
|
+
|
|
102
|
+
Pre-installed Python packages (use directly, no installation needed):
|
|
103
|
+
- Data Science: numpy, pandas
|
|
104
|
+
- Visualization: matplotlib, seaborn, plotly
|
|
105
|
+
- Documents: python-docx, python-pptx, reportlab, fpdf2, PyMuPDF, pdfplumber
|
|
106
|
+
- Spreadsheets: openpyxl, xlsxwriter
|
|
107
|
+
- Image: pillow
|
|
108
|
+
- Data: orjson, lxml, beautifulsoup4, faker
|
|
109
|
+
|
|
110
|
+
Pre-installed JavaScript packages:
|
|
111
|
+
- pptxgenjs, react, react-dom, react-icons, sharp
|
|
112
|
+
|
|
113
|
+
Pre-installed Go packages:
|
|
114
|
+
- excelize (Excel), gofpdf (PDF)
|
|
115
|
+
|
|
116
|
+
Pre-installed R packages:
|
|
117
|
+
- ggplot2, dplyr, tidyr, readxl, writexl, jsonlite, Cairo
|
|
101
118
|
`.trim();
|
|
102
119
|
|
|
103
120
|
export const CodeExecutionToolName = Constants.EXECUTE_CODE;
|
|
@@ -135,11 +152,13 @@ Runs code in a stateless execution environment. Each execution is isolated.
|
|
|
135
152
|
✅ ONLY USE FOR:
|
|
136
153
|
- File generation: PowerPoint (.pptx), Word (.docx), PDF (.pdf), Excel (.xlsx)
|
|
137
154
|
- Processing uploaded files (CSV, Excel analysis)
|
|
138
|
-
- Heavy computation requiring Python
|
|
155
|
+
- Heavy computation requiring Python (numpy, pandas for data analytics)
|
|
139
156
|
|
|
140
157
|
Rules:
|
|
141
|
-
- No network access
|
|
158
|
+
- No network access — do NOT use pip install, npm install, or any package manager
|
|
159
|
+
- All packages are pre-installed: numpy, pandas, matplotlib, seaborn, plotly, python-docx, python-pptx, reportlab, openpyxl, xlsxwriter, pillow, faker, orjson, lxml, beautifulsoup4
|
|
142
160
|
- Generated files auto-delivered (no download links needed)
|
|
161
|
+
- **Error recovery**: When execution fails, use \`code_id\` + \`old_str\` + \`new_str\` to fix only the broken part — do NOT rewrite the entire code block. This is faster and saves tokens.
|
|
143
162
|
`.trim();
|
|
144
163
|
|
|
145
164
|
return tool(
|