@illuma-ai/agents 1.0.98 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +53 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +167 -31
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +14 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  15. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  16. package/dist/cjs/utils/run.cjs.map +1 -1
  17. package/dist/cjs/utils/tokens.cjs.map +1 -1
  18. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  19. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  20. package/dist/esm/agents/AgentContext.mjs +6 -2
  21. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  22. package/dist/esm/common/constants.mjs +48 -1
  23. package/dist/esm/common/constants.mjs.map +1 -1
  24. package/dist/esm/graphs/Graph.mjs +168 -32
  25. package/dist/esm/graphs/Graph.mjs.map +1 -1
  26. package/dist/esm/main.mjs +4 -1
  27. package/dist/esm/main.mjs.map +1 -1
  28. package/dist/esm/messages/dedup.mjs +93 -0
  29. package/dist/esm/messages/dedup.mjs.map +1 -0
  30. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  31. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  32. package/dist/esm/types/graph.mjs.map +1 -1
  33. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  34. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  35. package/dist/esm/utils/run.mjs.map +1 -1
  36. package/dist/esm/utils/tokens.mjs.map +1 -1
  37. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  38. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  39. package/dist/types/agents/AgentContext.d.ts +4 -1
  40. package/dist/types/common/constants.d.ts +35 -0
  41. package/dist/types/graphs/Graph.d.ts +25 -0
  42. package/dist/types/messages/dedup.d.ts +25 -0
  43. package/dist/types/messages/index.d.ts +1 -0
  44. package/dist/types/types/graph.d.ts +63 -0
  45. package/dist/types/utils/index.d.ts +2 -0
  46. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  47. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  48. package/package.json +1 -1
  49. package/src/agents/AgentContext.ts +7 -0
  50. package/src/common/constants.ts +56 -0
  51. package/src/graphs/Graph.ts +220 -50
  52. package/src/graphs/gapFeatures.test.ts +520 -0
  53. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  54. package/src/messages/__tests__/dedup.test.ts +166 -0
  55. package/src/messages/dedup.ts +104 -0
  56. package/src/messages/index.ts +1 -0
  57. package/src/tools/CodeExecutor.ts +22 -3
  58. package/src/types/graph.ts +73 -0
  59. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  60. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  61. package/src/utils/contextPressure.test.ts +24 -9
  62. package/src/utils/index.ts +2 -0
  63. package/src/utils/pruneCalibration.ts +92 -0
  64. package/src/utils/run.ts +108 -108
  65. package/src/utils/tokens.ts +118 -118
  66. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -0,0 +1,307 @@
1
+ /**
2
+ * nonBlockingSummarization.test.ts
3
+ *
4
+ * Tests that the Graph's summarization pipeline is fully non-blocking.
5
+ * The core invariant: summarizeCallback is NEVER awaited in the hot path.
6
+ * Instead, the graph uses cached/persisted summaries and fires background updates.
7
+ */
8
+
9
+ import {
10
+ HumanMessage,
11
+ AIMessage,
12
+ SystemMessage,
13
+ BaseMessage,
14
+ } from '@langchain/core/messages';
15
+ import type { TokenCounter } from '@/types/run';
16
+ import { createPruneMessages } from '@/messages/prune';
17
+
18
+ /**
19
+ * Simple token counter: ~1 token per 4 characters
20
+ */
21
+ const simpleTokenCounter: TokenCounter = (msg: BaseMessage): number => {
22
+ const content =
23
+ typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
24
+ return Math.ceil(content.length / 4);
25
+ };
26
+
27
+ /**
28
+ * Build a conversation that exceeds the token budget to trigger pruning.
29
+ * Each message is ~100 tokens (400 chars).
30
+ */
31
+ function buildLargeConversation(messageCount: number): BaseMessage[] {
32
+ const messages: BaseMessage[] = [
33
+ new SystemMessage('You are a helpful assistant.'),
34
+ ];
35
+ for (let i = 0; i < messageCount; i++) {
36
+ const longText = `Message ${i}: ${'x'.repeat(380)}`;
37
+ if (i % 2 === 0) {
38
+ messages.push(new HumanMessage(longText));
39
+ } else {
40
+ messages.push(new AIMessage(longText));
41
+ }
42
+ }
43
+ return messages;
44
+ }
45
+
46
+ // ============================================================================
47
+ // Non-blocking summarization behavior
48
+ // ============================================================================
49
+
50
+ describe('Non-blocking summarization in Graph pruning', () => {
51
+ /**
52
+ * Simulates the Graph.ts summarization decision logic (lines 1439-1492).
53
+ * This is extracted to test the branching behavior without needing a full Graph.
54
+ */
55
+ function simulateGraphSummarization(opts: {
56
+ messagesToRefine: BaseMessage[];
57
+ cachedRunSummary: string | null;
58
+ persistedSummary: string | null;
59
+ summarizeCallback: (msgs: BaseMessage[]) => Promise<string | undefined>;
60
+ }): {
61
+ summary: string | undefined;
62
+ callbackCalled: boolean;
63
+ wasBlocking: boolean;
64
+ } {
65
+ const {
66
+ messagesToRefine,
67
+ cachedRunSummary,
68
+ persistedSummary,
69
+ summarizeCallback,
70
+ } = opts;
71
+
72
+ let summary: string | undefined;
73
+ let callbackCalled = false;
74
+ let wasBlocking = false;
75
+
76
+ if (messagesToRefine.length > 0 && summarizeCallback) {
77
+ if (cachedRunSummary != null) {
78
+ // Case 1: Reuse cached summary
79
+ summary = cachedRunSummary;
80
+ callbackCalled = true;
81
+ wasBlocking = false;
82
+ // Fire background update (non-blocking)
83
+ summarizeCallback(messagesToRefine).catch(() => {});
84
+ } else if (persistedSummary != null && persistedSummary !== '') {
85
+ // Case 2: Use persisted summary as fallback
86
+ summary = persistedSummary;
87
+ callbackCalled = true;
88
+ wasBlocking = false;
89
+ summarizeCallback(messagesToRefine).catch(() => {});
90
+ } else {
91
+ // Case 3: No summary exists — skip injection, fire background generation
92
+ summary = undefined;
93
+ callbackCalled = true;
94
+ wasBlocking = false;
95
+ summarizeCallback(messagesToRefine).catch(() => {});
96
+ }
97
+ }
98
+
99
+ return { summary, callbackCalled, wasBlocking };
100
+ }
101
+
102
+ it('Case 1: uses cached run summary without blocking', () => {
103
+ const callback = jest.fn().mockResolvedValue('updated summary');
104
+ const result = simulateGraphSummarization({
105
+ messagesToRefine: [new HumanMessage('old msg')],
106
+ cachedRunSummary: 'existing cached summary',
107
+ persistedSummary: null,
108
+ summarizeCallback: callback,
109
+ });
110
+
111
+ expect(result.summary).toBe('existing cached summary');
112
+ expect(result.wasBlocking).toBe(false);
113
+ expect(callback).toHaveBeenCalledTimes(1);
114
+ });
115
+
116
+ it('Case 2: uses persisted summary as fallback without blocking', () => {
117
+ const callback = jest.fn().mockResolvedValue('updated summary');
118
+ const result = simulateGraphSummarization({
119
+ messagesToRefine: [new HumanMessage('old msg')],
120
+ cachedRunSummary: null,
121
+ persistedSummary: 'persisted from last turn',
122
+ summarizeCallback: callback,
123
+ });
124
+
125
+ expect(result.summary).toBe('persisted from last turn');
126
+ expect(result.wasBlocking).toBe(false);
127
+ expect(callback).toHaveBeenCalledTimes(1);
128
+ });
129
+
130
+ it('Case 3: no summary available — skips injection, fires background', () => {
131
+ const callback = jest.fn().mockResolvedValue('new summary');
132
+ const result = simulateGraphSummarization({
133
+ messagesToRefine: [new HumanMessage('old msg')],
134
+ cachedRunSummary: null,
135
+ persistedSummary: null,
136
+ summarizeCallback: callback,
137
+ });
138
+
139
+ expect(result.summary).toBeUndefined();
140
+ expect(result.wasBlocking).toBe(false);
141
+ expect(callback).toHaveBeenCalledTimes(1);
142
+ });
143
+
144
+ it('does not call callback when no messages were refined', () => {
145
+ const callback = jest.fn().mockResolvedValue('summary');
146
+ const result = simulateGraphSummarization({
147
+ messagesToRefine: [],
148
+ cachedRunSummary: null,
149
+ persistedSummary: null,
150
+ summarizeCallback: callback,
151
+ });
152
+
153
+ expect(result.summary).toBeUndefined();
154
+ expect(callback).not.toHaveBeenCalled();
155
+ });
156
+
157
+ it('handles callback failure gracefully', async () => {
158
+ const callback = jest.fn().mockRejectedValue(new Error('LLM timeout'));
159
+ const result = simulateGraphSummarization({
160
+ messagesToRefine: [new HumanMessage('msg')],
161
+ cachedRunSummary: 'cached',
162
+ persistedSummary: null,
163
+ summarizeCallback: callback,
164
+ });
165
+
166
+ // Should still use cached summary
167
+ expect(result.summary).toBe('cached');
168
+ // Wait for background promise to settle
169
+ await new Promise((r) => setTimeout(r, 10));
170
+ expect(callback).toHaveBeenCalledTimes(1);
171
+ });
172
+
173
+ it('background callback updates cache for subsequent iterations', async () => {
174
+ let cachedSummary: string | null = null;
175
+ const callback = jest.fn().mockImplementation(async () => {
176
+ // Simulate LLM call delay
177
+ await new Promise((r) => setTimeout(r, 50));
178
+ const updated = 'background-updated summary';
179
+ cachedSummary = updated;
180
+ return updated;
181
+ });
182
+
183
+ // First call: no cache
184
+ simulateGraphSummarization({
185
+ messagesToRefine: [new HumanMessage('msg')],
186
+ cachedRunSummary: null,
187
+ persistedSummary: 'stale persisted',
188
+ summarizeCallback: callback,
189
+ });
190
+
191
+ // Initially still using persisted
192
+ expect(cachedSummary).toBeNull();
193
+
194
+ // Wait for background to complete
195
+ await new Promise((r) => setTimeout(r, 100));
196
+ expect(cachedSummary).toBe('background-updated summary');
197
+
198
+ // Second call: now has cached summary
199
+ const result2 = simulateGraphSummarization({
200
+ messagesToRefine: [new HumanMessage('msg2')],
201
+ cachedRunSummary: cachedSummary,
202
+ persistedSummary: null,
203
+ summarizeCallback: callback,
204
+ });
205
+ expect(result2.summary).toBe('background-updated summary');
206
+ });
207
+ });
208
+
209
+ // ============================================================================
210
+ // Pruning integration with summary injection
211
+ // ============================================================================
212
+
213
+ describe('Pruning + summary injection flow', () => {
214
+ it('pruneMessages produces messagesToRefine when context exceeds budget', () => {
215
+ // Each message ~100 tokens (400 chars). 40 messages = ~4000 tokens.
216
+ // maxTokens = 200 forces heavy pruning.
217
+ const messages = buildLargeConversation(40);
218
+ const maxTokens = 200;
219
+
220
+ const prune = createPruneMessages({
221
+ startIndex: 0,
222
+ provider: 'anthropic' as any,
223
+ tokenCounter: simpleTokenCounter,
224
+ maxTokens,
225
+ indexTokenCountMap: {},
226
+ });
227
+
228
+ const { context, messagesToRefine } = prune({ messages });
229
+
230
+ expect(messagesToRefine.length).toBeGreaterThan(0);
231
+ expect(context.length).toBeLessThan(messages.length);
232
+ });
233
+
234
+ it('summary is injected after system message when available', () => {
235
+ const messages: BaseMessage[] = [
236
+ new SystemMessage('System prompt'),
237
+ new HumanMessage('Recent question'),
238
+ ];
239
+ const summaryText = 'User discussed project deadlines and budget';
240
+ const summaryMsg = new SystemMessage(
241
+ `[Conversation Summary]\n${summaryText}`
242
+ );
243
+
244
+ // Insert after system message
245
+ const systemIdx = messages[0]?.getType() === 'system' ? 1 : 0;
246
+ const result = [
247
+ ...messages.slice(0, systemIdx),
248
+ summaryMsg,
249
+ ...messages.slice(systemIdx),
250
+ ];
251
+
252
+ expect(result.length).toBe(3);
253
+ expect(result[0].getType()).toBe('system'); // Original system
254
+ expect(result[1].content as string).toContain('[Conversation Summary]');
255
+ expect(result[2].getType()).toBe('human'); // Recent question preserved
256
+ });
257
+ });
258
+
259
+ // ============================================================================
260
+ // Multi-turn simulation
261
+ // ============================================================================
262
+
263
+ describe('Multi-turn conversation with rolling summaries', () => {
264
+ it('simulates 5 turns with persisted summary handoff', async () => {
265
+ let persistedSummary: string | null = null;
266
+ const summaryUpdates: string[] = [];
267
+
268
+ const callback = jest
269
+ .fn()
270
+ .mockImplementation(async (msgs: BaseMessage[]) => {
271
+ const msgCount = msgs.length;
272
+ const summary = `Summary of ${msgCount} messages (turn ${summaryUpdates.length + 1})`;
273
+ summaryUpdates.push(summary);
274
+ persistedSummary = summary;
275
+ return summary;
276
+ });
277
+
278
+ // Simulate 5 conversation turns
279
+ for (let turn = 0; turn < 5; turn++) {
280
+ const messages = buildLargeConversation(10);
281
+ const prune = createPruneMessages({
282
+ startIndex: 0,
283
+ provider: 'anthropic' as any,
284
+ tokenCounter: simpleTokenCounter,
285
+ maxTokens: 300,
286
+ indexTokenCountMap: {},
287
+ });
288
+ const { messagesToRefine } = prune({ messages });
289
+
290
+ if (messagesToRefine.length > 0) {
291
+ // Simulate Graph behavior: use persisted, fire background
292
+ const cachedSummary = persistedSummary;
293
+ if (cachedSummary) {
294
+ // Non-blocking: use existing summary
295
+ expect(cachedSummary).toBeDefined();
296
+ }
297
+ // Fire background update
298
+ callback(messagesToRefine).catch(() => {});
299
+ await new Promise((r) => setTimeout(r, 10));
300
+ }
301
+ }
302
+
303
+ // All turns should have fired background updates
304
+ expect(summaryUpdates.length).toBeGreaterThanOrEqual(4);
305
+ expect(persistedSummary).toContain('Summary of');
306
+ });
307
+ });
@@ -0,0 +1,166 @@
1
+ // src/messages/__tests__/dedup.test.ts
2
+ import {
3
+ SystemMessage,
4
+ HumanMessage,
5
+ AIMessage,
6
+ ToolMessage,
7
+ } from '@langchain/core/messages';
8
+ import type { BaseMessage } from '@langchain/core/messages';
9
+ import { deduplicateSystemMessages } from '../dedup';
10
+
11
+ describe('deduplicateSystemMessages', () => {
12
+ it('returns empty array unchanged', () => {
13
+ const result = deduplicateSystemMessages([]);
14
+ expect(result.messages).toEqual([]);
15
+ expect(result.removedCount).toBe(0);
16
+ });
17
+
18
+ it('returns single message unchanged', () => {
19
+ const msgs = [new SystemMessage('Hello')];
20
+ const result = deduplicateSystemMessages(msgs);
21
+ expect(result.messages).toHaveLength(1);
22
+ expect(result.removedCount).toBe(0);
23
+ });
24
+
25
+ it('preserves conversation with no duplicates', () => {
26
+ const msgs: BaseMessage[] = [
27
+ new SystemMessage('You are helpful'),
28
+ new HumanMessage('Hello'),
29
+ new AIMessage('Hi there'),
30
+ new SystemMessage('Summary of prior context'),
31
+ new HumanMessage('What about X?'),
32
+ ];
33
+
34
+ const result = deduplicateSystemMessages(msgs);
35
+ expect(result.messages).toHaveLength(5);
36
+ expect(result.removedCount).toBe(0);
37
+ });
38
+
39
+ it('removes duplicate system messages', () => {
40
+ const msgs: BaseMessage[] = [
41
+ new SystemMessage('You are helpful'),
42
+ new SystemMessage('[Conversation Summary]\nUser discussed X'),
43
+ new HumanMessage('Hello'),
44
+ new AIMessage('Hi'),
45
+ new SystemMessage('[Conversation Summary]\nUser discussed X'), // duplicate
46
+ new HumanMessage('What?'),
47
+ ];
48
+
49
+ const result = deduplicateSystemMessages(msgs);
50
+ expect(result.messages).toHaveLength(5);
51
+ expect(result.removedCount).toBe(1);
52
+ // Verify the duplicate at index 4 was removed
53
+ expect(result.messages.map((m) => m.getType())).toEqual([
54
+ 'system',
55
+ 'system',
56
+ 'human',
57
+ 'ai',
58
+ 'human',
59
+ ]);
60
+ });
61
+
62
+ it('keeps the first occurrence of duplicate system messages', () => {
63
+ const duplicateContent = 'Context was compressed. Summary follows.';
64
+ const msgs: BaseMessage[] = [
65
+ new SystemMessage('Main prompt'),
66
+ new SystemMessage(duplicateContent), // first occurrence
67
+ new HumanMessage('Q1'),
68
+ new AIMessage('A1'),
69
+ new SystemMessage(duplicateContent), // duplicate
70
+ new HumanMessage('Q2'),
71
+ new AIMessage('A2'),
72
+ new SystemMessage(duplicateContent), // another duplicate
73
+ ];
74
+
75
+ const result = deduplicateSystemMessages(msgs);
76
+ expect(result.messages).toHaveLength(6);
77
+ expect(result.removedCount).toBe(2);
78
+ });
79
+
80
+ it('never removes non-system messages', () => {
81
+ const msgs: BaseMessage[] = [
82
+ new SystemMessage('Prompt'),
83
+ new HumanMessage('Same content'),
84
+ new AIMessage('Same content'),
85
+ new HumanMessage('Same content'), // duplicate content but not system
86
+ new AIMessage('Same content'), // duplicate content but not system
87
+ ];
88
+
89
+ const result = deduplicateSystemMessages(msgs);
90
+ expect(result.messages).toHaveLength(5);
91
+ expect(result.removedCount).toBe(0);
92
+ });
93
+
94
+ it('always preserves the first system message (main prompt)', () => {
95
+ const msgs: BaseMessage[] = [
96
+ new SystemMessage('Main system prompt'),
97
+ new HumanMessage('Hello'),
98
+ new SystemMessage('Main system prompt'), // duplicate of first
99
+ ];
100
+
101
+ const result = deduplicateSystemMessages(msgs);
102
+ expect(result.messages).toHaveLength(2);
103
+ expect(result.removedCount).toBe(1);
104
+ // First system message is preserved
105
+ expect(result.messages[0].content as string).toBe('Main system prompt');
106
+ });
107
+
108
+ it('handles tool messages correctly (never deduped)', () => {
109
+ const msgs: BaseMessage[] = [
110
+ new SystemMessage('Prompt'),
111
+ new HumanMessage('Q'),
112
+ new AIMessage({
113
+ content: 'Using tool',
114
+ tool_calls: [{ id: 'tc1', name: 'test', args: {} }],
115
+ }),
116
+ new ToolMessage({ content: 'result', tool_call_id: 'tc1' }),
117
+ new SystemMessage('Post-prune note'),
118
+ new AIMessage({
119
+ content: 'Using tool',
120
+ tool_calls: [{ id: 'tc2', name: 'test', args: {} }],
121
+ }),
122
+ new ToolMessage({ content: 'result', tool_call_id: 'tc2' }),
123
+ new SystemMessage('Post-prune note'), // duplicate system msg
124
+ ];
125
+
126
+ const result = deduplicateSystemMessages(msgs);
127
+ expect(result.removedCount).toBe(1);
128
+ // Tool messages are all preserved
129
+ const toolMsgs = result.messages.filter((m) => m.getType() === 'tool');
130
+ expect(toolMsgs).toHaveLength(2);
131
+ });
132
+
133
+ it('does not mutate input array', () => {
134
+ const msgs: BaseMessage[] = [
135
+ new SystemMessage('Prompt'),
136
+ new SystemMessage('Dup'),
137
+ new SystemMessage('Dup'),
138
+ ];
139
+ const originalLength = msgs.length;
140
+
141
+ deduplicateSystemMessages(msgs);
142
+ expect(msgs.length).toBe(originalLength);
143
+ });
144
+
145
+ it('handles mixed duplicate patterns', () => {
146
+ const msgs: BaseMessage[] = [
147
+ new SystemMessage('A'),
148
+ new SystemMessage('B'),
149
+ new SystemMessage('A'), // dup of first
150
+ new HumanMessage('Q'),
151
+ new SystemMessage('B'), // dup
152
+ new SystemMessage('C'),
153
+ new SystemMessage('C'), // dup
154
+ ];
155
+
156
+ const result = deduplicateSystemMessages(msgs);
157
+ expect(result.removedCount).toBe(3);
158
+ expect(result.messages).toHaveLength(4);
159
+ expect(
160
+ result.messages.map((m) => {
161
+ if (typeof m.content === 'string') return m.content;
162
+ return '';
163
+ })
164
+ ).toEqual(['A', 'B', 'Q', 'C']);
165
+ });
166
+ });
@@ -0,0 +1,104 @@
1
+ // src/messages/dedup.ts
2
+ import type { BaseMessage } from '@langchain/core/messages';
3
+ import { MessageTypes } from '@/common';
4
+ import { DEDUP_MAX_CONTENT_LENGTH } from '@/common/constants';
5
+
6
+ /**
7
+ * Deduplicates consecutive identical system messages in the context window.
8
+ *
9
+ * Problem: In long tool-use chains, the same system messages (e.g., post-prune notes,
10
+ * conversation summaries) can accumulate when the context is rebuilt on each iteration.
11
+ * These duplicates waste tokens without adding information.
12
+ *
13
+ * Strategy: Only deduplicate system messages that appear consecutively or are exact
14
+ * duplicates of an earlier system message. The FIRST occurrence is always kept.
15
+ * Non-system messages (human, ai, tool) are never touched.
16
+ *
17
+ * Important constraints:
18
+ * - The first system message (index 0) is ALWAYS preserved (it's the main system prompt)
19
+ * - Only system messages are candidates for deduplication
20
+ * - Messages with content longer than DEDUP_MAX_CONTENT_LENGTH are skipped (too expensive to compare)
21
+ * - Content comparison is by string equality (fast and deterministic)
22
+ *
23
+ * @param messages - The message array to deduplicate (not mutated)
24
+ * @returns A new array with duplicate system messages removed, and the count of removed messages
25
+ */
26
+ export function deduplicateSystemMessages(messages: BaseMessage[]): {
27
+ messages: BaseMessage[];
28
+ removedCount: number;
29
+ } {
30
+ if (messages.length <= 1) {
31
+ return { messages, removedCount: 0 };
32
+ }
33
+
34
+ const seenSystemContents = new Set<string>();
35
+ const result: BaseMessage[] = [];
36
+ let removedCount = 0;
37
+
38
+ for (let i = 0; i < messages.length; i++) {
39
+ const msg = messages[i];
40
+ const type = msg.getType();
41
+
42
+ // Non-system messages are always kept
43
+ if (type !== MessageTypes.SYSTEM) {
44
+ result.push(msg);
45
+ continue;
46
+ }
47
+
48
+ // First system message (main prompt) is always kept
49
+ if (i === 0) {
50
+ result.push(msg);
51
+ // Track its content for dedup of later duplicates
52
+ const content = getContentString(msg);
53
+ if (content != null) {
54
+ seenSystemContents.add(content);
55
+ }
56
+ continue;
57
+ }
58
+
59
+ // Get string content for comparison
60
+ const content = getContentString(msg);
61
+
62
+ // Skip dedup for very long or non-string content
63
+ if (content == null) {
64
+ result.push(msg);
65
+ continue;
66
+ }
67
+
68
+ // Check if this exact system message was already seen
69
+ if (seenSystemContents.has(content)) {
70
+ removedCount++;
71
+ continue; // Skip this duplicate
72
+ }
73
+
74
+ // New unique system message — keep it and track
75
+ seenSystemContents.add(content);
76
+ result.push(msg);
77
+ }
78
+
79
+ return { messages: result, removedCount };
80
+ }
81
+
82
+ /**
83
+ * Extracts a comparable string from a message's content.
84
+ * Returns null if the content is too large or non-string (skip dedup for those).
85
+ */
86
+ function getContentString(msg: BaseMessage): string | null {
87
+ if (typeof msg.content === 'string') {
88
+ if (msg.content.length > DEDUP_MAX_CONTENT_LENGTH) {
89
+ return null;
90
+ }
91
+ return msg.content;
92
+ }
93
+ // Array content (e.g., Anthropic cache_control blocks) — serialize for comparison
94
+ // but only if reasonably sized
95
+ try {
96
+ const serialized = JSON.stringify(msg.content);
97
+ if (serialized.length > DEDUP_MAX_CONTENT_LENGTH) {
98
+ return null;
99
+ }
100
+ return serialized;
101
+ } catch {
102
+ return null;
103
+ }
104
+ }
@@ -6,3 +6,4 @@ export * from './cache';
6
6
  export * from './content';
7
7
  export * from './tools';
8
8
  export * from './summarize';
9
+ export * from './dedup';
@@ -94,10 +94,27 @@ export const CodeExecutionToolDescription = `
94
94
  Runs code and returns stdout/stderr output from a stateless execution environment, similar to running scripts in a command-line interface. Each execution is isolated and independent.
95
95
 
96
96
  Usage:
97
- - No network access available.
97
+ - No network access available. Do NOT use pip install, npm install, or any package manager.
98
98
  - Generated files are automatically delivered; **DO NOT** provide download links.
99
99
  - NEVER use this tool to execute malicious code.
100
100
  - When a code_id is returned in output, you can edit that code using code_id + old_str + new_str instead of rewriting the entire code block.
101
+
102
+ Pre-installed Python packages (use directly, no installation needed):
103
+ - Data Science: numpy, pandas
104
+ - Visualization: matplotlib, seaborn, plotly
105
+ - Documents: python-docx, python-pptx, reportlab, fpdf2, PyMuPDF, pdfplumber
106
+ - Spreadsheets: openpyxl, xlsxwriter
107
+ - Image: pillow
108
+ - Data: orjson, lxml, beautifulsoup4, faker
109
+
110
+ Pre-installed JavaScript packages:
111
+ - pptxgenjs, react, react-dom, react-icons, sharp
112
+
113
+ Pre-installed Go packages:
114
+ - excelize (Excel), gofpdf (PDF)
115
+
116
+ Pre-installed R packages:
117
+ - ggplot2, dplyr, tidyr, readxl, writexl, jsonlite, Cairo
101
118
  `.trim();
102
119
 
103
120
  export const CodeExecutionToolName = Constants.EXECUTE_CODE;
@@ -135,11 +152,13 @@ Runs code in a stateless execution environment. Each execution is isolated.
135
152
  ✅ ONLY USE FOR:
136
153
  - File generation: PowerPoint (.pptx), Word (.docx), PDF (.pdf), Excel (.xlsx)
137
154
  - Processing uploaded files (CSV, Excel analysis)
138
- - Heavy computation requiring Python
155
+ - Heavy computation requiring Python (numpy, pandas for data analytics)
139
156
 
140
157
  Rules:
141
- - No network access available
158
+ - No network access — do NOT use pip install, npm install, or any package manager
159
+ - All packages are pre-installed: numpy, pandas, matplotlib, seaborn, plotly, python-docx, python-pptx, reportlab, openpyxl, xlsxwriter, pillow, faker, orjson, lxml, beautifulsoup4
142
160
  - Generated files auto-delivered (no download links needed)
161
+ - **Error recovery**: When execution fails, use \`code_id\` + \`old_str\` + \`new_str\` to fix only the broken part — do NOT rewrite the entire code block. This is faster and saves tokens.
143
162
  `.trim();
144
163
 
145
164
  return tool(