@illuma-ai/agents 1.0.96 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +78 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +191 -165
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +22 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/contextPressure.cjs +154 -0
  15. package/dist/cjs/utils/contextPressure.cjs.map +1 -0
  16. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  17. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  18. package/dist/cjs/utils/run.cjs.map +1 -1
  19. package/dist/cjs/utils/tokens.cjs.map +1 -1
  20. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  21. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  22. package/dist/esm/agents/AgentContext.mjs +6 -2
  23. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  24. package/dist/esm/common/constants.mjs +71 -1
  25. package/dist/esm/common/constants.mjs.map +1 -1
  26. package/dist/esm/graphs/Graph.mjs +192 -166
  27. package/dist/esm/graphs/Graph.mjs.map +1 -1
  28. package/dist/esm/main.mjs +5 -1
  29. package/dist/esm/main.mjs.map +1 -1
  30. package/dist/esm/messages/dedup.mjs +93 -0
  31. package/dist/esm/messages/dedup.mjs.map +1 -0
  32. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  33. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  34. package/dist/esm/types/graph.mjs.map +1 -1
  35. package/dist/esm/utils/contextPressure.mjs +148 -0
  36. package/dist/esm/utils/contextPressure.mjs.map +1 -0
  37. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  38. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  39. package/dist/esm/utils/run.mjs.map +1 -1
  40. package/dist/esm/utils/tokens.mjs.map +1 -1
  41. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  42. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  43. package/dist/types/agents/AgentContext.d.ts +4 -1
  44. package/dist/types/common/constants.d.ts +49 -0
  45. package/dist/types/graphs/Graph.d.ts +25 -0
  46. package/dist/types/messages/dedup.d.ts +25 -0
  47. package/dist/types/messages/index.d.ts +1 -0
  48. package/dist/types/types/graph.d.ts +63 -0
  49. package/dist/types/utils/contextPressure.d.ts +72 -0
  50. package/dist/types/utils/index.d.ts +3 -0
  51. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  52. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  53. package/package.json +1 -1
  54. package/src/agents/AgentContext.ts +7 -0
  55. package/src/common/constants.ts +82 -0
  56. package/src/graphs/Graph.ts +254 -208
  57. package/src/graphs/contextManagement.e2e.test.ts +28 -20
  58. package/src/graphs/gapFeatures.test.ts +520 -0
  59. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  60. package/src/messages/__tests__/dedup.test.ts +166 -0
  61. package/src/messages/dedup.ts +104 -0
  62. package/src/messages/index.ts +1 -0
  63. package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
  64. package/src/specs/agent-handoffs.test.ts +36 -36
  65. package/src/specs/thinking-handoff.test.ts +10 -10
  66. package/src/tools/CodeExecutor.ts +22 -3
  67. package/src/types/graph.ts +73 -0
  68. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  69. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  70. package/src/utils/contextPressure.test.ts +262 -0
  71. package/src/utils/contextPressure.ts +188 -0
  72. package/src/utils/index.ts +3 -0
  73. package/src/utils/pruneCalibration.ts +92 -0
  74. package/src/utils/run.ts +108 -108
  75. package/src/utils/tokens.ts +118 -118
  76. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -0,0 +1,307 @@
1
+ /**
2
+ * nonBlockingSummarization.test.ts
3
+ *
4
+ * Tests that the Graph's summarization pipeline is fully non-blocking.
5
+ * The core invariant: summarizeCallback is NEVER awaited in the hot path.
6
+ * Instead, the graph uses cached/persisted summaries and fires background updates.
7
+ */
8
+
9
+ import {
10
+ HumanMessage,
11
+ AIMessage,
12
+ SystemMessage,
13
+ BaseMessage,
14
+ } from '@langchain/core/messages';
15
+ import type { TokenCounter } from '@/types/run';
16
+ import { createPruneMessages } from '@/messages/prune';
17
+
18
+ /**
19
+ * Simple token counter: ~1 token per 4 characters
20
+ */
21
+ const simpleTokenCounter: TokenCounter = (msg: BaseMessage): number => {
22
+ const content =
23
+ typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
24
+ return Math.ceil(content.length / 4);
25
+ };
26
+
27
+ /**
28
+ * Build a conversation that exceeds the token budget to trigger pruning.
29
+ * Each message is ~100 tokens (400 chars).
30
+ */
31
+ function buildLargeConversation(messageCount: number): BaseMessage[] {
32
+ const messages: BaseMessage[] = [
33
+ new SystemMessage('You are a helpful assistant.'),
34
+ ];
35
+ for (let i = 0; i < messageCount; i++) {
36
+ const longText = `Message ${i}: ${'x'.repeat(380)}`;
37
+ if (i % 2 === 0) {
38
+ messages.push(new HumanMessage(longText));
39
+ } else {
40
+ messages.push(new AIMessage(longText));
41
+ }
42
+ }
43
+ return messages;
44
+ }
45
+
46
+ // ============================================================================
47
+ // Non-blocking summarization behavior
48
+ // ============================================================================
49
+
50
+ describe('Non-blocking summarization in Graph pruning', () => {
51
+ /**
52
+ * Simulates the Graph.ts summarization decision logic (lines 1439-1492).
53
+ * This is extracted to test the branching behavior without needing a full Graph.
54
+ */
55
+ function simulateGraphSummarization(opts: {
56
+ messagesToRefine: BaseMessage[];
57
+ cachedRunSummary: string | null;
58
+ persistedSummary: string | null;
59
+ summarizeCallback: (msgs: BaseMessage[]) => Promise<string | undefined>;
60
+ }): {
61
+ summary: string | undefined;
62
+ callbackCalled: boolean;
63
+ wasBlocking: boolean;
64
+ } {
65
+ const {
66
+ messagesToRefine,
67
+ cachedRunSummary,
68
+ persistedSummary,
69
+ summarizeCallback,
70
+ } = opts;
71
+
72
+ let summary: string | undefined;
73
+ let callbackCalled = false;
74
+ let wasBlocking = false;
75
+
76
+ if (messagesToRefine.length > 0 && summarizeCallback) {
77
+ if (cachedRunSummary != null) {
78
+ // Case 1: Reuse cached summary
79
+ summary = cachedRunSummary;
80
+ callbackCalled = true;
81
+ wasBlocking = false;
82
+ // Fire background update (non-blocking)
83
+ summarizeCallback(messagesToRefine).catch(() => {});
84
+ } else if (persistedSummary != null && persistedSummary !== '') {
85
+ // Case 2: Use persisted summary as fallback
86
+ summary = persistedSummary;
87
+ callbackCalled = true;
88
+ wasBlocking = false;
89
+ summarizeCallback(messagesToRefine).catch(() => {});
90
+ } else {
91
+ // Case 3: No summary exists — skip injection, fire background generation
92
+ summary = undefined;
93
+ callbackCalled = true;
94
+ wasBlocking = false;
95
+ summarizeCallback(messagesToRefine).catch(() => {});
96
+ }
97
+ }
98
+
99
+ return { summary, callbackCalled, wasBlocking };
100
+ }
101
+
102
+ it('Case 1: uses cached run summary without blocking', () => {
103
+ const callback = jest.fn().mockResolvedValue('updated summary');
104
+ const result = simulateGraphSummarization({
105
+ messagesToRefine: [new HumanMessage('old msg')],
106
+ cachedRunSummary: 'existing cached summary',
107
+ persistedSummary: null,
108
+ summarizeCallback: callback,
109
+ });
110
+
111
+ expect(result.summary).toBe('existing cached summary');
112
+ expect(result.wasBlocking).toBe(false);
113
+ expect(callback).toHaveBeenCalledTimes(1);
114
+ });
115
+
116
+ it('Case 2: uses persisted summary as fallback without blocking', () => {
117
+ const callback = jest.fn().mockResolvedValue('updated summary');
118
+ const result = simulateGraphSummarization({
119
+ messagesToRefine: [new HumanMessage('old msg')],
120
+ cachedRunSummary: null,
121
+ persistedSummary: 'persisted from last turn',
122
+ summarizeCallback: callback,
123
+ });
124
+
125
+ expect(result.summary).toBe('persisted from last turn');
126
+ expect(result.wasBlocking).toBe(false);
127
+ expect(callback).toHaveBeenCalledTimes(1);
128
+ });
129
+
130
+ it('Case 3: no summary available — skips injection, fires background', () => {
131
+ const callback = jest.fn().mockResolvedValue('new summary');
132
+ const result = simulateGraphSummarization({
133
+ messagesToRefine: [new HumanMessage('old msg')],
134
+ cachedRunSummary: null,
135
+ persistedSummary: null,
136
+ summarizeCallback: callback,
137
+ });
138
+
139
+ expect(result.summary).toBeUndefined();
140
+ expect(result.wasBlocking).toBe(false);
141
+ expect(callback).toHaveBeenCalledTimes(1);
142
+ });
143
+
144
+ it('does not call callback when no messages were refined', () => {
145
+ const callback = jest.fn().mockResolvedValue('summary');
146
+ const result = simulateGraphSummarization({
147
+ messagesToRefine: [],
148
+ cachedRunSummary: null,
149
+ persistedSummary: null,
150
+ summarizeCallback: callback,
151
+ });
152
+
153
+ expect(result.summary).toBeUndefined();
154
+ expect(callback).not.toHaveBeenCalled();
155
+ });
156
+
157
+ it('handles callback failure gracefully', async () => {
158
+ const callback = jest.fn().mockRejectedValue(new Error('LLM timeout'));
159
+ const result = simulateGraphSummarization({
160
+ messagesToRefine: [new HumanMessage('msg')],
161
+ cachedRunSummary: 'cached',
162
+ persistedSummary: null,
163
+ summarizeCallback: callback,
164
+ });
165
+
166
+ // Should still use cached summary
167
+ expect(result.summary).toBe('cached');
168
+ // Wait for background promise to settle
169
+ await new Promise((r) => setTimeout(r, 10));
170
+ expect(callback).toHaveBeenCalledTimes(1);
171
+ });
172
+
173
+ it('background callback updates cache for subsequent iterations', async () => {
174
+ let cachedSummary: string | null = null;
175
+ const callback = jest.fn().mockImplementation(async () => {
176
+ // Simulate LLM call delay
177
+ await new Promise((r) => setTimeout(r, 50));
178
+ const updated = 'background-updated summary';
179
+ cachedSummary = updated;
180
+ return updated;
181
+ });
182
+
183
+ // First call: no cache
184
+ simulateGraphSummarization({
185
+ messagesToRefine: [new HumanMessage('msg')],
186
+ cachedRunSummary: null,
187
+ persistedSummary: 'stale persisted',
188
+ summarizeCallback: callback,
189
+ });
190
+
191
+ // Initially still using persisted
192
+ expect(cachedSummary).toBeNull();
193
+
194
+ // Wait for background to complete
195
+ await new Promise((r) => setTimeout(r, 100));
196
+ expect(cachedSummary).toBe('background-updated summary');
197
+
198
+ // Second call: now has cached summary
199
+ const result2 = simulateGraphSummarization({
200
+ messagesToRefine: [new HumanMessage('msg2')],
201
+ cachedRunSummary: cachedSummary,
202
+ persistedSummary: null,
203
+ summarizeCallback: callback,
204
+ });
205
+ expect(result2.summary).toBe('background-updated summary');
206
+ });
207
+ });
208
+
209
+ // ============================================================================
210
+ // Pruning integration with summary injection
211
+ // ============================================================================
212
+
213
+ describe('Pruning + summary injection flow', () => {
214
+ it('pruneMessages produces messagesToRefine when context exceeds budget', () => {
215
+ // Each message ~100 tokens (400 chars). 40 messages = ~4000 tokens.
216
+ // maxTokens = 200 forces heavy pruning.
217
+ const messages = buildLargeConversation(40);
218
+ const maxTokens = 200;
219
+
220
+ const prune = createPruneMessages({
221
+ startIndex: 0,
222
+ provider: 'anthropic' as any,
223
+ tokenCounter: simpleTokenCounter,
224
+ maxTokens,
225
+ indexTokenCountMap: {},
226
+ });
227
+
228
+ const { context, messagesToRefine } = prune({ messages });
229
+
230
+ expect(messagesToRefine.length).toBeGreaterThan(0);
231
+ expect(context.length).toBeLessThan(messages.length);
232
+ });
233
+
234
+ it('summary is injected after system message when available', () => {
235
+ const messages: BaseMessage[] = [
236
+ new SystemMessage('System prompt'),
237
+ new HumanMessage('Recent question'),
238
+ ];
239
+ const summaryText = 'User discussed project deadlines and budget';
240
+ const summaryMsg = new SystemMessage(
241
+ `[Conversation Summary]\n${summaryText}`
242
+ );
243
+
244
+ // Insert after system message
245
+ const systemIdx = messages[0]?.getType() === 'system' ? 1 : 0;
246
+ const result = [
247
+ ...messages.slice(0, systemIdx),
248
+ summaryMsg,
249
+ ...messages.slice(systemIdx),
250
+ ];
251
+
252
+ expect(result.length).toBe(3);
253
+ expect(result[0].getType()).toBe('system'); // Original system
254
+ expect(result[1].content as string).toContain('[Conversation Summary]');
255
+ expect(result[2].getType()).toBe('human'); // Recent question preserved
256
+ });
257
+ });
258
+
259
+ // ============================================================================
260
+ // Multi-turn simulation
261
+ // ============================================================================
262
+
263
+ describe('Multi-turn conversation with rolling summaries', () => {
264
+ it('simulates 5 turns with persisted summary handoff', async () => {
265
+ let persistedSummary: string | null = null;
266
+ const summaryUpdates: string[] = [];
267
+
268
+ const callback = jest
269
+ .fn()
270
+ .mockImplementation(async (msgs: BaseMessage[]) => {
271
+ const msgCount = msgs.length;
272
+ const summary = `Summary of ${msgCount} messages (turn ${summaryUpdates.length + 1})`;
273
+ summaryUpdates.push(summary);
274
+ persistedSummary = summary;
275
+ return summary;
276
+ });
277
+
278
+ // Simulate 5 conversation turns
279
+ for (let turn = 0; turn < 5; turn++) {
280
+ const messages = buildLargeConversation(10);
281
+ const prune = createPruneMessages({
282
+ startIndex: 0,
283
+ provider: 'anthropic' as any,
284
+ tokenCounter: simpleTokenCounter,
285
+ maxTokens: 300,
286
+ indexTokenCountMap: {},
287
+ });
288
+ const { messagesToRefine } = prune({ messages });
289
+
290
+ if (messagesToRefine.length > 0) {
291
+ // Simulate Graph behavior: use persisted, fire background
292
+ const cachedSummary = persistedSummary;
293
+ if (cachedSummary) {
294
+ // Non-blocking: use existing summary
295
+ expect(cachedSummary).toBeDefined();
296
+ }
297
+ // Fire background update
298
+ callback(messagesToRefine).catch(() => {});
299
+ await new Promise((r) => setTimeout(r, 10));
300
+ }
301
+ }
302
+
303
+ // All turns should have fired background updates
304
+ expect(summaryUpdates.length).toBeGreaterThanOrEqual(4);
305
+ expect(persistedSummary).toContain('Summary of');
306
+ });
307
+ });
@@ -0,0 +1,166 @@
1
+ // src/messages/__tests__/dedup.test.ts
2
+ import {
3
+ SystemMessage,
4
+ HumanMessage,
5
+ AIMessage,
6
+ ToolMessage,
7
+ } from '@langchain/core/messages';
8
+ import type { BaseMessage } from '@langchain/core/messages';
9
+ import { deduplicateSystemMessages } from '../dedup';
10
+
11
+ describe('deduplicateSystemMessages', () => {
12
+ it('returns empty array unchanged', () => {
13
+ const result = deduplicateSystemMessages([]);
14
+ expect(result.messages).toEqual([]);
15
+ expect(result.removedCount).toBe(0);
16
+ });
17
+
18
+ it('returns single message unchanged', () => {
19
+ const msgs = [new SystemMessage('Hello')];
20
+ const result = deduplicateSystemMessages(msgs);
21
+ expect(result.messages).toHaveLength(1);
22
+ expect(result.removedCount).toBe(0);
23
+ });
24
+
25
+ it('preserves conversation with no duplicates', () => {
26
+ const msgs: BaseMessage[] = [
27
+ new SystemMessage('You are helpful'),
28
+ new HumanMessage('Hello'),
29
+ new AIMessage('Hi there'),
30
+ new SystemMessage('Summary of prior context'),
31
+ new HumanMessage('What about X?'),
32
+ ];
33
+
34
+ const result = deduplicateSystemMessages(msgs);
35
+ expect(result.messages).toHaveLength(5);
36
+ expect(result.removedCount).toBe(0);
37
+ });
38
+
39
+ it('removes duplicate system messages', () => {
40
+ const msgs: BaseMessage[] = [
41
+ new SystemMessage('You are helpful'),
42
+ new SystemMessage('[Conversation Summary]\nUser discussed X'),
43
+ new HumanMessage('Hello'),
44
+ new AIMessage('Hi'),
45
+ new SystemMessage('[Conversation Summary]\nUser discussed X'), // duplicate
46
+ new HumanMessage('What?'),
47
+ ];
48
+
49
+ const result = deduplicateSystemMessages(msgs);
50
+ expect(result.messages).toHaveLength(5);
51
+ expect(result.removedCount).toBe(1);
52
+ // Verify the duplicate at index 4 was removed
53
+ expect(result.messages.map((m) => m.getType())).toEqual([
54
+ 'system',
55
+ 'system',
56
+ 'human',
57
+ 'ai',
58
+ 'human',
59
+ ]);
60
+ });
61
+
62
+ it('keeps the first occurrence of duplicate system messages', () => {
63
+ const duplicateContent = 'Context was compressed. Summary follows.';
64
+ const msgs: BaseMessage[] = [
65
+ new SystemMessage('Main prompt'),
66
+ new SystemMessage(duplicateContent), // first occurrence
67
+ new HumanMessage('Q1'),
68
+ new AIMessage('A1'),
69
+ new SystemMessage(duplicateContent), // duplicate
70
+ new HumanMessage('Q2'),
71
+ new AIMessage('A2'),
72
+ new SystemMessage(duplicateContent), // another duplicate
73
+ ];
74
+
75
+ const result = deduplicateSystemMessages(msgs);
76
+ expect(result.messages).toHaveLength(6);
77
+ expect(result.removedCount).toBe(2);
78
+ });
79
+
80
+ it('never removes non-system messages', () => {
81
+ const msgs: BaseMessage[] = [
82
+ new SystemMessage('Prompt'),
83
+ new HumanMessage('Same content'),
84
+ new AIMessage('Same content'),
85
+ new HumanMessage('Same content'), // duplicate content but not system
86
+ new AIMessage('Same content'), // duplicate content but not system
87
+ ];
88
+
89
+ const result = deduplicateSystemMessages(msgs);
90
+ expect(result.messages).toHaveLength(5);
91
+ expect(result.removedCount).toBe(0);
92
+ });
93
+
94
+ it('always preserves the first system message (main prompt)', () => {
95
+ const msgs: BaseMessage[] = [
96
+ new SystemMessage('Main system prompt'),
97
+ new HumanMessage('Hello'),
98
+ new SystemMessage('Main system prompt'), // duplicate of first
99
+ ];
100
+
101
+ const result = deduplicateSystemMessages(msgs);
102
+ expect(result.messages).toHaveLength(2);
103
+ expect(result.removedCount).toBe(1);
104
+ // First system message is preserved
105
+ expect(result.messages[0].content as string).toBe('Main system prompt');
106
+ });
107
+
108
+ it('handles tool messages correctly (never deduped)', () => {
109
+ const msgs: BaseMessage[] = [
110
+ new SystemMessage('Prompt'),
111
+ new HumanMessage('Q'),
112
+ new AIMessage({
113
+ content: 'Using tool',
114
+ tool_calls: [{ id: 'tc1', name: 'test', args: {} }],
115
+ }),
116
+ new ToolMessage({ content: 'result', tool_call_id: 'tc1' }),
117
+ new SystemMessage('Post-prune note'),
118
+ new AIMessage({
119
+ content: 'Using tool',
120
+ tool_calls: [{ id: 'tc2', name: 'test', args: {} }],
121
+ }),
122
+ new ToolMessage({ content: 'result', tool_call_id: 'tc2' }),
123
+ new SystemMessage('Post-prune note'), // duplicate system msg
124
+ ];
125
+
126
+ const result = deduplicateSystemMessages(msgs);
127
+ expect(result.removedCount).toBe(1);
128
+ // Tool messages are all preserved
129
+ const toolMsgs = result.messages.filter((m) => m.getType() === 'tool');
130
+ expect(toolMsgs).toHaveLength(2);
131
+ });
132
+
133
+ it('does not mutate input array', () => {
134
+ const msgs: BaseMessage[] = [
135
+ new SystemMessage('Prompt'),
136
+ new SystemMessage('Dup'),
137
+ new SystemMessage('Dup'),
138
+ ];
139
+ const originalLength = msgs.length;
140
+
141
+ deduplicateSystemMessages(msgs);
142
+ expect(msgs.length).toBe(originalLength);
143
+ });
144
+
145
+ it('handles mixed duplicate patterns', () => {
146
+ const msgs: BaseMessage[] = [
147
+ new SystemMessage('A'),
148
+ new SystemMessage('B'),
149
+ new SystemMessage('A'), // dup of first
150
+ new HumanMessage('Q'),
151
+ new SystemMessage('B'), // dup
152
+ new SystemMessage('C'),
153
+ new SystemMessage('C'), // dup
154
+ ];
155
+
156
+ const result = deduplicateSystemMessages(msgs);
157
+ expect(result.removedCount).toBe(3);
158
+ expect(result.messages).toHaveLength(4);
159
+ expect(
160
+ result.messages.map((m) => {
161
+ if (typeof m.content === 'string') return m.content;
162
+ return '';
163
+ })
164
+ ).toEqual(['A', 'B', 'Q', 'C']);
165
+ });
166
+ });
@@ -0,0 +1,104 @@
1
+ // src/messages/dedup.ts
2
+ import type { BaseMessage } from '@langchain/core/messages';
3
+ import { MessageTypes } from '@/common';
4
+ import { DEDUP_MAX_CONTENT_LENGTH } from '@/common/constants';
5
+
6
+ /**
7
+ * Deduplicates consecutive identical system messages in the context window.
8
+ *
9
+ * Problem: In long tool-use chains, the same system messages (e.g., post-prune notes,
10
+ * conversation summaries) can accumulate when the context is rebuilt on each iteration.
11
+ * These duplicates waste tokens without adding information.
12
+ *
13
+ * Strategy: Only deduplicate system messages that appear consecutively or are exact
14
+ * duplicates of an earlier system message. The FIRST occurrence is always kept.
15
+ * Non-system messages (human, ai, tool) are never touched.
16
+ *
17
+ * Important constraints:
18
+ * - The first system message (index 0) is ALWAYS preserved (it's the main system prompt)
19
+ * - Only system messages are candidates for deduplication
20
+ * - Messages with content longer than DEDUP_MAX_CONTENT_LENGTH are skipped (too expensive to compare)
21
+ * - Content comparison is by string equality (fast and deterministic)
22
+ *
23
+ * @param messages - The message array to deduplicate (not mutated)
24
+ * @returns A new array with duplicate system messages removed, and the count of removed messages
25
+ */
26
+ export function deduplicateSystemMessages(messages: BaseMessage[]): {
27
+ messages: BaseMessage[];
28
+ removedCount: number;
29
+ } {
30
+ if (messages.length <= 1) {
31
+ return { messages, removedCount: 0 };
32
+ }
33
+
34
+ const seenSystemContents = new Set<string>();
35
+ const result: BaseMessage[] = [];
36
+ let removedCount = 0;
37
+
38
+ for (let i = 0; i < messages.length; i++) {
39
+ const msg = messages[i];
40
+ const type = msg.getType();
41
+
42
+ // Non-system messages are always kept
43
+ if (type !== MessageTypes.SYSTEM) {
44
+ result.push(msg);
45
+ continue;
46
+ }
47
+
48
+ // First system message (main prompt) is always kept
49
+ if (i === 0) {
50
+ result.push(msg);
51
+ // Track its content for dedup of later duplicates
52
+ const content = getContentString(msg);
53
+ if (content != null) {
54
+ seenSystemContents.add(content);
55
+ }
56
+ continue;
57
+ }
58
+
59
+ // Get string content for comparison
60
+ const content = getContentString(msg);
61
+
62
+ // Skip dedup for very long or non-string content
63
+ if (content == null) {
64
+ result.push(msg);
65
+ continue;
66
+ }
67
+
68
+ // Check if this exact system message was already seen
69
+ if (seenSystemContents.has(content)) {
70
+ removedCount++;
71
+ continue; // Skip this duplicate
72
+ }
73
+
74
+ // New unique system message — keep it and track
75
+ seenSystemContents.add(content);
76
+ result.push(msg);
77
+ }
78
+
79
+ return { messages: result, removedCount };
80
+ }
81
+
82
+ /**
83
+ * Extracts a comparable string from a message's content.
84
+ * Returns null if the content is too large or non-string (skip dedup for those).
85
+ */
86
+ function getContentString(msg: BaseMessage): string | null {
87
+ if (typeof msg.content === 'string') {
88
+ if (msg.content.length > DEDUP_MAX_CONTENT_LENGTH) {
89
+ return null;
90
+ }
91
+ return msg.content;
92
+ }
93
+ // Array content (e.g., Anthropic cache_control blocks) — serialize for comparison
94
+ // but only if reasonably sized
95
+ try {
96
+ const serialized = JSON.stringify(msg.content);
97
+ if (serialized.length > DEDUP_MAX_CONTENT_LENGTH) {
98
+ return null;
99
+ }
100
+ return serialized;
101
+ } catch {
102
+ return null;
103
+ }
104
+ }
@@ -6,3 +6,4 @@ export * from './cache';
6
6
  export * from './content';
7
7
  export * from './tools';
8
8
  export * from './summarize';
9
+ export * from './dedup';
@@ -20,7 +20,7 @@ config({
20
20
  import { HumanMessage, ToolMessage } from '@langchain/core/messages';
21
21
  import type { RunnableConfig } from '@langchain/core/runnables';
22
22
  import type * as t from '@/types';
23
- import { Providers, Constants, GraphEvents } from '@/common';
23
+ import { Providers, Constants, GraphEvents, EdgeType } from '@/common';
24
24
  import { StandardGraph } from '@/graphs/Graph';
25
25
  import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
26
26
  import { ToolEndHandler, ModelEndHandler } from '@/events';
@@ -118,13 +118,13 @@ describeIf('Agent Handoff E2E with Bedrock', () => {
118
118
  {
119
119
  from: 'supervisor_abc123',
120
120
  to: 'agent_W47hBnn2RoVZEOy5595GC',
121
- edgeType: 'handoff',
121
+ edgeType: EdgeType.HANDOFF,
122
122
  // No description - should auto-generate from agent name + description
123
123
  },
124
124
  {
125
125
  from: 'supervisor_abc123',
126
126
  to: 'agent_X92kLmn4TpQR8vw3221HD',
127
- edgeType: 'handoff',
127
+ edgeType: EdgeType.HANDOFF,
128
128
  // No description
129
129
  },
130
130
  ];
@@ -203,8 +203,8 @@ describeIf('Agent Handoff E2E with Bedrock', () => {
203
203
  ];
204
204
 
205
205
  const edges: t.GraphEdge[] = [
206
- { from: 'router', to: 'sales_agent', edgeType: 'handoff' },
207
- { from: 'router', to: 'support_agent', edgeType: 'handoff' },
206
+ { from: 'router', to: 'sales_agent', edgeType: EdgeType.HANDOFF },
207
+ { from: 'router', to: 'support_agent', edgeType: EdgeType.HANDOFF },
208
208
  ];
209
209
 
210
210
  const { contentParts: _contentParts, aggregateContent } =
@@ -321,8 +321,8 @@ describeIf('Agent Handoff E2E with Bedrock', () => {
321
321
  ];
322
322
 
323
323
  const edges: t.GraphEdge[] = [
324
- { from: 'router', to: 'sales_agent', edgeType: 'handoff' },
325
- { from: 'router', to: 'support_agent', edgeType: 'handoff' },
324
+ { from: 'router', to: 'sales_agent', edgeType: EdgeType.HANDOFF },
325
+ { from: 'router', to: 'support_agent', edgeType: EdgeType.HANDOFF },
326
326
  ];
327
327
 
328
328
  const { contentParts: _contentParts, aggregateContent } =