@illuma-ai/agents 1.1.20 → 1.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/dist/cjs/graphs/Graph.cjs +12 -1
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/graphs/MultiAgentGraph.cjs +85 -1
  4. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  5. package/dist/cjs/llm/bedrock/index.cjs +14 -0
  6. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  7. package/dist/cjs/run.cjs +20 -9
  8. package/dist/cjs/run.cjs.map +1 -1
  9. package/dist/esm/graphs/Graph.mjs +12 -1
  10. package/dist/esm/graphs/Graph.mjs.map +1 -1
  11. package/dist/esm/graphs/MultiAgentGraph.mjs +85 -1
  12. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  13. package/dist/esm/llm/bedrock/index.mjs +14 -0
  14. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  15. package/dist/esm/run.mjs +20 -9
  16. package/dist/esm/run.mjs.map +1 -1
  17. package/dist/types/graphs/MultiAgentGraph.d.ts +17 -0
  18. package/package.json +1 -1
  19. package/src/graphs/Graph.ts +12 -1
  20. package/src/graphs/MultiAgentGraph.ts +105 -1
  21. package/src/graphs/__tests__/multi-agent-delegate.test.ts +191 -0
  22. package/src/llm/bedrock/index.ts +17 -0
  23. package/src/run.ts +20 -11
  24. package/src/scripts/test-bedrock-handoff-autonomous.ts +231 -0
  25. package/src/agents/AgentContext.js +0 -782
  26. package/src/agents/AgentContext.test.js +0 -421
  27. package/src/agents/__tests__/AgentContext.test.js +0 -678
  28. package/src/agents/__tests__/resolveStructuredOutputMode.test.js +0 -117
  29. package/src/common/enum.js +0 -192
  30. package/src/common/index.js +0 -3
  31. package/src/events.js +0 -166
  32. package/src/graphs/Graph.js +0 -1857
  33. package/src/graphs/MultiAgentGraph.js +0 -1092
  34. package/src/graphs/__tests__/structured-output.integration.test.js +0 -624
  35. package/src/graphs/__tests__/structured-output.test.js +0 -144
  36. package/src/graphs/contextManagement.e2e.test.js +0 -718
  37. package/src/graphs/contextManagement.test.js +0 -485
  38. package/src/graphs/handoffValidation.test.js +0 -276
  39. package/src/graphs/index.js +0 -3
  40. package/src/index.js +0 -28
  41. package/src/instrumentation.js +0 -21
  42. package/src/llm/anthropic/index.js +0 -319
  43. package/src/llm/anthropic/types.js +0 -46
  44. package/src/llm/anthropic/utils/message_inputs.js +0 -627
  45. package/src/llm/anthropic/utils/message_outputs.js +0 -290
  46. package/src/llm/anthropic/utils/output_parsers.js +0 -89
  47. package/src/llm/anthropic/utils/tools.js +0 -25
  48. package/src/llm/bedrock/__tests__/bedrock-caching.test.js +0 -392
  49. package/src/llm/bedrock/index.js +0 -303
  50. package/src/llm/bedrock/types.js +0 -2
  51. package/src/llm/bedrock/utils/index.js +0 -6
  52. package/src/llm/bedrock/utils/message_inputs.js +0 -463
  53. package/src/llm/bedrock/utils/message_outputs.js +0 -269
  54. package/src/llm/fake.js +0 -92
  55. package/src/llm/google/index.js +0 -215
  56. package/src/llm/google/types.js +0 -12
  57. package/src/llm/google/utils/common.js +0 -670
  58. package/src/llm/google/utils/tools.js +0 -111
  59. package/src/llm/google/utils/zod_to_genai_parameters.js +0 -47
  60. package/src/llm/openai/index.js +0 -1033
  61. package/src/llm/openai/types.js +0 -2
  62. package/src/llm/openai/utils/index.js +0 -756
  63. package/src/llm/openai/utils/isReasoningModel.test.js +0 -79
  64. package/src/llm/openrouter/index.js +0 -261
  65. package/src/llm/openrouter/reasoning.test.js +0 -181
  66. package/src/llm/providers.js +0 -36
  67. package/src/llm/text.js +0 -65
  68. package/src/llm/vertexai/index.js +0 -402
  69. package/src/messages/__tests__/tools.test.js +0 -392
  70. package/src/messages/cache.js +0 -404
  71. package/src/messages/cache.test.js +0 -1167
  72. package/src/messages/content.js +0 -48
  73. package/src/messages/content.test.js +0 -314
  74. package/src/messages/core.js +0 -359
  75. package/src/messages/ensureThinkingBlock.test.js +0 -997
  76. package/src/messages/format.js +0 -973
  77. package/src/messages/formatAgentMessages.test.js +0 -2278
  78. package/src/messages/formatAgentMessages.tools.test.js +0 -362
  79. package/src/messages/formatMessage.test.js +0 -608
  80. package/src/messages/ids.js +0 -18
  81. package/src/messages/index.js +0 -9
  82. package/src/messages/labelContentByAgent.test.js +0 -725
  83. package/src/messages/prune.js +0 -438
  84. package/src/messages/reducer.js +0 -60
  85. package/src/messages/shiftIndexTokenCountMap.test.js +0 -63
  86. package/src/messages/summarize.js +0 -146
  87. package/src/messages/summarize.test.js +0 -332
  88. package/src/messages/tools.js +0 -90
  89. package/src/mockStream.js +0 -81
  90. package/src/prompts/collab.js +0 -7
  91. package/src/prompts/index.js +0 -3
  92. package/src/prompts/taskmanager.js +0 -58
  93. package/src/run.js +0 -427
  94. package/src/schemas/index.js +0 -3
  95. package/src/schemas/schema-preparation.test.js +0 -370
  96. package/src/schemas/validate.js +0 -314
  97. package/src/schemas/validate.test.js +0 -264
  98. package/src/scripts/abort.js +0 -127
  99. package/src/scripts/ant_web_search.js +0 -130
  100. package/src/scripts/ant_web_search_edge_case.js +0 -133
  101. package/src/scripts/ant_web_search_error_edge_case.js +0 -119
  102. package/src/scripts/args.js +0 -41
  103. package/src/scripts/bedrock-cache-debug.js +0 -186
  104. package/src/scripts/bedrock-content-aggregation-test.js +0 -195
  105. package/src/scripts/bedrock-merge-test.js +0 -80
  106. package/src/scripts/bedrock-parallel-tools-test.js +0 -150
  107. package/src/scripts/caching.js +0 -106
  108. package/src/scripts/cli.js +0 -152
  109. package/src/scripts/cli2.js +0 -119
  110. package/src/scripts/cli3.js +0 -163
  111. package/src/scripts/cli4.js +0 -165
  112. package/src/scripts/cli5.js +0 -165
  113. package/src/scripts/code_exec.js +0 -171
  114. package/src/scripts/code_exec_files.js +0 -180
  115. package/src/scripts/code_exec_multi_session.js +0 -185
  116. package/src/scripts/code_exec_ptc.js +0 -265
  117. package/src/scripts/code_exec_session.js +0 -217
  118. package/src/scripts/code_exec_simple.js +0 -120
  119. package/src/scripts/content.js +0 -111
  120. package/src/scripts/empty_input.js +0 -125
  121. package/src/scripts/handoff-test.js +0 -96
  122. package/src/scripts/image.js +0 -138
  123. package/src/scripts/memory.js +0 -83
  124. package/src/scripts/multi-agent-chain.js +0 -271
  125. package/src/scripts/multi-agent-conditional.js +0 -185
  126. package/src/scripts/multi-agent-document-review-chain.js +0 -171
  127. package/src/scripts/multi-agent-hybrid-flow.js +0 -264
  128. package/src/scripts/multi-agent-parallel-start.js +0 -214
  129. package/src/scripts/multi-agent-parallel.js +0 -346
  130. package/src/scripts/multi-agent-sequence.js +0 -184
  131. package/src/scripts/multi-agent-supervisor.js +0 -324
  132. package/src/scripts/multi-agent-test.js +0 -147
  133. package/src/scripts/parallel-asymmetric-tools-test.js +0 -202
  134. package/src/scripts/parallel-full-metadata-test.js +0 -176
  135. package/src/scripts/parallel-tools-test.js +0 -256
  136. package/src/scripts/programmatic_exec.js +0 -277
  137. package/src/scripts/programmatic_exec_agent.js +0 -168
  138. package/src/scripts/search.js +0 -118
  139. package/src/scripts/sequential-full-metadata-test.js +0 -143
  140. package/src/scripts/simple.js +0 -174
  141. package/src/scripts/single-agent-metadata-test.js +0 -152
  142. package/src/scripts/stream.js +0 -113
  143. package/src/scripts/test-custom-prompt-key.js +0 -132
  144. package/src/scripts/test-handoff-input.js +0 -143
  145. package/src/scripts/test-handoff-preamble.js +0 -227
  146. package/src/scripts/test-handoff-steering.js +0 -353
  147. package/src/scripts/test-multi-agent-list-handoff.js +0 -318
  148. package/src/scripts/test-parallel-agent-labeling.js +0 -253
  149. package/src/scripts/test-parallel-handoffs.js +0 -229
  150. package/src/scripts/test-thinking-handoff-bedrock.js +0 -132
  151. package/src/scripts/test-thinking-handoff.js +0 -132
  152. package/src/scripts/test-thinking-to-thinking-handoff-bedrock.js +0 -140
  153. package/src/scripts/test-tool-before-handoff-role-order.js +0 -223
  154. package/src/scripts/test-tools-before-handoff.js +0 -187
  155. package/src/scripts/test_code_api.js +0 -263
  156. package/src/scripts/thinking-bedrock.js +0 -128
  157. package/src/scripts/thinking-vertexai.js +0 -130
  158. package/src/scripts/thinking.js +0 -134
  159. package/src/scripts/tool_search.js +0 -114
  160. package/src/scripts/tools.js +0 -125
  161. package/src/specs/agent-handoffs-bedrock.integration.test.js +0 -280
  162. package/src/specs/agent-handoffs.test.js +0 -924
  163. package/src/specs/anthropic.simple.test.js +0 -287
  164. package/src/specs/azure.simple.test.js +0 -381
  165. package/src/specs/cache.simple.test.js +0 -282
  166. package/src/specs/custom-event-await.test.js +0 -148
  167. package/src/specs/deepseek.simple.test.js +0 -189
  168. package/src/specs/emergency-prune.test.js +0 -308
  169. package/src/specs/moonshot.simple.test.js +0 -237
  170. package/src/specs/observability.integration.test.js +0 -1337
  171. package/src/specs/openai.simple.test.js +0 -233
  172. package/src/specs/openrouter.simple.test.js +0 -202
  173. package/src/specs/prune.test.js +0 -733
  174. package/src/specs/reasoning.test.js +0 -144
  175. package/src/specs/spec.utils.js +0 -4
  176. package/src/specs/thinking-handoff.test.js +0 -486
  177. package/src/specs/thinking-prune.test.js +0 -600
  178. package/src/specs/token-distribution-edge-case.test.js +0 -246
  179. package/src/specs/token-memoization.test.js +0 -32
  180. package/src/specs/tokens.test.js +0 -49
  181. package/src/specs/tool-error.test.js +0 -139
  182. package/src/splitStream.js +0 -204
  183. package/src/splitStream.test.js +0 -504
  184. package/src/stream.js +0 -650
  185. package/src/stream.test.js +0 -225
  186. package/src/test/mockTools.js +0 -340
  187. package/src/tools/BrowserTools.js +0 -245
  188. package/src/tools/Calculator.js +0 -38
  189. package/src/tools/Calculator.test.js +0 -225
  190. package/src/tools/CodeExecutor.js +0 -233
  191. package/src/tools/ProgrammaticToolCalling.js +0 -602
  192. package/src/tools/StreamingToolCallBuffer.js +0 -179
  193. package/src/tools/ToolNode.js +0 -930
  194. package/src/tools/ToolSearch.js +0 -904
  195. package/src/tools/__tests__/BrowserTools.test.js +0 -306
  196. package/src/tools/__tests__/ProgrammaticToolCalling.integration.test.js +0 -276
  197. package/src/tools/__tests__/ProgrammaticToolCalling.test.js +0 -807
  198. package/src/tools/__tests__/StreamingToolCallBuffer.test.js +0 -175
  199. package/src/tools/__tests__/ToolApproval.test.js +0 -675
  200. package/src/tools/__tests__/ToolNode.recovery.test.js +0 -200
  201. package/src/tools/__tests__/ToolNode.session.test.js +0 -319
  202. package/src/tools/__tests__/ToolSearch.integration.test.js +0 -125
  203. package/src/tools/__tests__/ToolSearch.test.js +0 -812
  204. package/src/tools/__tests__/handlers.test.js +0 -799
  205. package/src/tools/__tests__/truncation-recovery.integration.test.js +0 -362
  206. package/src/tools/handlers.js +0 -306
  207. package/src/tools/schema.js +0 -25
  208. package/src/tools/search/anthropic.js +0 -34
  209. package/src/tools/search/content.js +0 -116
  210. package/src/tools/search/content.test.js +0 -133
  211. package/src/tools/search/firecrawl.js +0 -173
  212. package/src/tools/search/format.js +0 -198
  213. package/src/tools/search/highlights.js +0 -241
  214. package/src/tools/search/index.js +0 -3
  215. package/src/tools/search/jina-reranker.test.js +0 -106
  216. package/src/tools/search/rerankers.js +0 -165
  217. package/src/tools/search/schema.js +0 -102
  218. package/src/tools/search/search.js +0 -561
  219. package/src/tools/search/serper-scraper.js +0 -126
  220. package/src/tools/search/test.js +0 -129
  221. package/src/tools/search/tool.js +0 -453
  222. package/src/tools/search/types.js +0 -2
  223. package/src/tools/search/utils.js +0 -59
  224. package/src/types/graph.js +0 -24
  225. package/src/types/graph.test.js +0 -192
  226. package/src/types/index.js +0 -7
  227. package/src/types/llm.js +0 -2
  228. package/src/types/messages.js +0 -2
  229. package/src/types/run.js +0 -2
  230. package/src/types/stream.js +0 -2
  231. package/src/types/tools.js +0 -2
  232. package/src/utils/contextAnalytics.js +0 -79
  233. package/src/utils/contextAnalytics.test.js +0 -166
  234. package/src/utils/events.js +0 -26
  235. package/src/utils/graph.js +0 -11
  236. package/src/utils/handlers.js +0 -65
  237. package/src/utils/index.js +0 -10
  238. package/src/utils/llm.js +0 -21
  239. package/src/utils/llmConfig.js +0 -205
  240. package/src/utils/logging.js +0 -37
  241. package/src/utils/misc.js +0 -51
  242. package/src/utils/run.js +0 -69
  243. package/src/utils/schema.js +0 -21
  244. package/src/utils/title.js +0 -119
  245. package/src/utils/tokens.js +0 -92
  246. package/src/utils/toonFormat.js +0 -379
@@ -1,718 +0,0 @@
1
- /**
2
- * End-to-End tests for context management across subagents, handoffs, and chaining.
3
- *
4
- * These tests exercise the full lifecycle of context management scenarios that
5
- * span multiple agents and turns, validating that context is preserved, compacted,
6
- * and transferred correctly across all agent execution patterns.
7
- *
8
- * Run with:
9
- * npx jest --no-coverage --forceExit src/graphs/contextManagement.e2e.test.ts
10
- */
11
- import { HumanMessage, AIMessage, SystemMessage, ToolMessage, } from '@langchain/core/messages';
12
- import { summarize, createEmergencySummary, formatMessagesForSummary, buildFullSummaryPrompt, } from '@/messages/summarize';
13
- import { getContextUtilization } from '@/messages/prune';
14
- // ---------------------------------------------------------------------------
15
- // Helpers
16
- // ---------------------------------------------------------------------------
17
- /** Simple token counter: 1 token ≈ 4 chars */
18
- const simpleTokenCounter = (msg) => {
19
- const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
20
- return Math.ceil(content.length / 4);
21
- };
22
- /** Create a realistic conversation with N turns */
23
- function buildConversation(turns, charPerMessage = 200) {
24
- const messages = [];
25
- for (let i = 0; i < turns; i++) {
26
- messages.push(new HumanMessage({ content: `User message ${i}: ${'x'.repeat(charPerMessage)}` }));
27
- messages.push(new AIMessage({ content: `AI response ${i}: ${'y'.repeat(charPerMessage)}` }));
28
- }
29
- return messages;
30
- }
31
- /** Simulates a summarize callback that returns a structured summary */
32
- const mockSummarizeCallback = async (prompt, maxTokens) => {
33
- return `[Summary] Conversation covered ${prompt.length} chars of content. Key topics discussed. Active context preserved.`;
34
- };
35
- /** Simulates a summarize callback that always fails */
36
- const failingSummarizeCallback = async () => {
37
- throw new Error('LLM unavailable');
38
- };
39
- // ============================================================================
40
- // 1. Subagent Result Truncation E2E
41
- // ============================================================================
42
- describe('Subagent result truncation E2E', () => {
43
- it('truncated subagent result fits within parent context budget', () => {
44
- // Simulate: subagent produces 50K chars, parent has 10K token budget
45
- const subagentResult = 'Finding: '.repeat(5000) + 'Conclusion reached.';
46
- const parentMaxTokens = 10000;
47
- const maxResultChars = 8192 * 4; // 8192 tokens * 4 chars/token = 32768
48
- // Truncation logic (mirrors TaskTool.truncateResult)
49
- const truncationNotice = '\n\n[... sub-agent output truncated — middle section omitted to fit parent context ...]\n\n';
50
- const available = maxResultChars - truncationNotice.length;
51
- const headSize = Math.floor(available * 0.6);
52
- const tailSize = available - headSize;
53
- const truncated = subagentResult.substring(0, headSize) +
54
- truncationNotice +
55
- subagentResult.substring(subagentResult.length - tailSize);
56
- // Result fits in parent budget (32768 chars ≈ 8192 tokens)
57
- const resultTokens = Math.ceil(truncated.length / 4);
58
- expect(resultTokens).toBeLessThanOrEqual(parentMaxTokens);
59
- expect(truncated).toContain('Finding:');
60
- expect(truncated).toContain('Conclusion reached.');
61
- expect(truncated).toContain('sub-agent output truncated');
62
- });
63
- it('multiple subagent results combined still fit parent context', () => {
64
- // Simulate 3 parallel subagent tasks each returning large results
65
- const maxResultCharsPerSubagent = 8192;
66
- const results = [
67
- 'A'.repeat(maxResultCharsPerSubagent),
68
- 'B'.repeat(maxResultCharsPerSubagent),
69
- 'C'.repeat(maxResultCharsPerSubagent),
70
- ];
71
- // Parent context: system + messages + 3 tool results
72
- const systemTokens = 500;
73
- const conversationTokens = 2000;
74
- const totalResultTokens = results.reduce((sum, r) => sum + Math.ceil(r.length / 4), 0);
75
- const totalUsed = systemTokens + conversationTokens + totalResultTokens;
76
- // With 3 subagents at 8192 chars each (≈2048 tokens each), total ≈ 6144 + 2500 = 8644 tokens
77
- // Fits within a 16K context window
78
- expect(totalUsed).toBeLessThan(16000);
79
- });
80
- it('subagent result truncation preserves actionable content', () => {
81
- // Simulate a subagent that found code, data, and conclusions
82
- const codeBlock = '```typescript\nfunction process() { return 42; }\n```\n';
83
- const dataSection = 'Data: ' + JSON.stringify({ metric: 99.5, status: 'healthy' }) + '\n';
84
- const middlePadding = 'Analysis details '.repeat(3000); // ~51K chars
85
- const conclusion = '\nFinal answer: The system is operating within normal parameters.';
86
- const fullResult = codeBlock + dataSection + middlePadding + conclusion;
87
- const maxChars = 2000;
88
- // Truncation keeps head (60%) and tail (40%)
89
- const notice = '\n\n[... sub-agent output truncated — middle section omitted to fit parent context ...]\n\n';
90
- const available = maxChars - notice.length;
91
- const headSize = Math.floor(available * 0.6);
92
- const tailSize = available - headSize;
93
- const truncated = fullResult.substring(0, headSize) + notice + fullResult.substring(fullResult.length - tailSize);
94
- // Head should contain the code and data (actionable)
95
- expect(truncated).toContain('function process()');
96
- expect(truncated).toContain('metric');
97
- // Tail should contain the conclusion
98
- expect(truncated).toContain('Final answer');
99
- });
100
- });
101
- // ============================================================================
102
- // 2. Agent Handoff with Context Compaction E2E
103
- // ============================================================================
104
- describe('Agent handoff with context compaction E2E', () => {
105
- it('compacts context before handoff when sender exceeds receiver budget', async () => {
106
- // Agent A has 200K context, Agent B has 32K context
107
- const senderMessages = buildConversation(50, 500); // 50 turns, ~500 chars each ≈ lots of tokens
108
- const receiverMaxTokens = 8000; // 32K chars
109
- // Calculate sender's context tokens
110
- const senderTokens = senderMessages.reduce((sum, m) => sum + simpleTokenCounter(m), 0);
111
- // Check if compaction needed (>70% of receiver budget)
112
- const compactionThreshold = receiverMaxTokens * 0.7;
113
- expect(senderTokens).toBeGreaterThan(compactionThreshold);
114
- // Compact: summarize + keep last 3 messages
115
- const result = await summarize(senderMessages, mockSummarizeCallback, {
116
- tokenCounter: simpleTokenCounter,
117
- summaryBudget: Math.floor(receiverMaxTokens * 0.2),
118
- });
119
- expect(result.tier).toBe('full');
120
- expect(result.summary).toBeTruthy();
121
- // Build compacted messages: system summary + last 3 messages
122
- const compactedMessages = [
123
- new SystemMessage({ content: `[Handoff Briefing]\n${result.summary}` }),
124
- ...senderMessages.slice(-3),
125
- ];
126
- // Compacted context fits receiver budget
127
- const compactedTokens = compactedMessages.reduce((sum, m) => sum + simpleTokenCounter(m), 0);
128
- expect(compactedTokens).toBeLessThan(receiverMaxTokens);
129
- });
130
- it('preserves critical context through handoff compaction', async () => {
131
- // First message has the user's original intent
132
- const messages = [
133
- new HumanMessage({
134
- content: 'Create a quarterly revenue report for Q4 2025 with charts and analysis',
135
- }),
136
- new AIMessage({ content: 'I will analyze the revenue data.' }),
137
- // ... many intermediate messages ...
138
- ...buildConversation(20, 300),
139
- // Last messages have current progress
140
- new HumanMessage({ content: 'Now add the competitor comparison section' }),
141
- new AIMessage({ content: 'Adding competitor analysis from the market data...' }),
142
- ];
143
- // Emergency summary should capture first user intent and last state
144
- const emergency = createEmergencySummary(messages);
145
- expect(emergency).toContain('quarterly revenue report');
146
- expect(emergency).toContain('competitor');
147
- // Full summary should also capture it
148
- const result = await summarize(messages, mockSummarizeCallback, {
149
- tokenCounter: simpleTokenCounter,
150
- });
151
- expect(result.summary.length).toBeGreaterThan(0);
152
- expect(result.messagesCompacted).toBe(messages.length);
153
- });
154
- it('handles handoff to agent with smaller context gracefully', async () => {
155
- // Simulate large context being handed to a mini agent
156
- const largeContext = buildConversation(100, 400); // Very large conversation
157
- const miniAgentBudget = 4000; // Very small budget (~16K chars)
158
- // First try full summarize
159
- const result = await summarize(largeContext, mockSummarizeCallback, {
160
- tokenCounter: simpleTokenCounter,
161
- summaryBudget: Math.floor(miniAgentBudget * 0.2),
162
- });
163
- // Should produce a summary
164
- expect(result.summary.length).toBeGreaterThan(0);
165
- // Build handoff context
166
- const handoffMessages = [
167
- new SystemMessage({ content: `[Handoff Briefing]\n${result.summary}` }),
168
- ...largeContext.slice(-2), // Keep last exchange
169
- ];
170
- const handoffTokens = handoffMessages.reduce((sum, m) => sum + simpleTokenCounter(m), 0);
171
- // Should fit within mini agent budget with room for its own responses
172
- expect(handoffTokens).toBeLessThan(miniAgentBudget * 0.5);
173
- });
174
- it('generates error ToolMessage for invalid handoff destination', () => {
175
- const agentContexts = new Map([
176
- ['research-agent', { name: 'Research Agent', maxContextTokens: 200000 }],
177
- ['writing-agent', { name: 'Writing Agent', maxContextTokens: 32000 }],
178
- ]);
179
- const invalidDest = 'coding-agent';
180
- expect(agentContexts.has(invalidDest)).toBe(false);
181
- // Build error message for self-correction
182
- const availableAgents = Array.from(agentContexts.keys()).join(', ');
183
- const errorMsg = new ToolMessage({
184
- content: `Agent "${invalidDest}" does not exist. Available agents: ${availableAgents}. Please choose a valid agent.`,
185
- tool_call_id: 'handoff_call_123',
186
- });
187
- expect(errorMsg.content).toContain('coding-agent');
188
- expect(errorMsg.content).toContain('research-agent');
189
- expect(errorMsg.content).toContain('writing-agent');
190
- });
191
- });
192
- // ============================================================================
193
- // 3. Agent Chaining with Rolling Summaries
194
- // ============================================================================
195
- describe('Agent chaining with rolling summaries', () => {
196
- it('maintains context across 3-agent chain via rolling summaries', async () => {
197
- // Agent A: Research phase
198
- const agentAMessages = [
199
- new HumanMessage({ content: 'Research the impact of AI on healthcare' }),
200
- new AIMessage({ content: 'Found 15 papers on AI diagnostics, drug discovery, and telemedicine.' }),
201
- new HumanMessage({ content: 'Focus on drug discovery findings' }),
202
- new AIMessage({ content: 'Drug discovery: AI reduces development time by 40%. Key compounds identified.' }),
203
- ];
204
- // Summarize Agent A context before passing to Agent B
205
- const summaryA = await summarize(agentAMessages, mockSummarizeCallback, {
206
- tokenCounter: simpleTokenCounter,
207
- });
208
- expect(summaryA.tier).toBe('full');
209
- // Agent B: Analysis phase — receives Agent A's summary
210
- const agentBMessages = [
211
- new SystemMessage({ content: `[Previous Agent Summary]\n${summaryA.summary}` }),
212
- new HumanMessage({ content: 'Analyze the drug discovery data and create visualizations' }),
213
- new AIMessage({ content: 'Created 3 charts showing AI impact on drug development timelines.' }),
214
- ];
215
- // Summarize Agent B context (including Agent A's summary) before passing to Agent C
216
- const summaryB = await summarize(agentBMessages, mockSummarizeCallback, {
217
- tokenCounter: simpleTokenCounter,
218
- });
219
- expect(summaryB.tier).toBe('full');
220
- // Agent B's summary should reference Agent A's work
221
- expect(summaryB.messagesCompacted).toBe(agentBMessages.length);
222
- // Agent C: Report generation — receives accumulated summaries
223
- const agentCMessages = [
224
- new SystemMessage({ content: `[Chain Summary]\n${summaryB.summary}` }),
225
- new HumanMessage({ content: 'Generate the final report with all findings and visualizations' }),
226
- ];
227
- // Agent C has enough context to generate the report
228
- const cTokens = agentCMessages.reduce((sum, m) => sum + simpleTokenCounter(m), 0);
229
- expect(cTokens).toBeLessThan(8000); // Well within any model's budget
230
- });
231
- it('chain summary grows but stays within budget', async () => {
232
- // Simulate a 5-agent chain where each adds to the rolling summary
233
- let rollingSummary = '';
234
- for (let i = 0; i < 5; i++) {
235
- const agentMessages = [
236
- ...(rollingSummary
237
- ? [new SystemMessage({ content: `[Chain Summary]\n${rollingSummary}` })]
238
- : []),
239
- new HumanMessage({ content: `Agent ${i} task: process step ${i}` }),
240
- new AIMessage({ content: `Agent ${i} completed step ${i}. ${'Result data '.repeat(50)}` }),
241
- ];
242
- const result = await summarize(agentMessages, mockSummarizeCallback, {
243
- tokenCounter: simpleTokenCounter,
244
- summaryBudget: 500,
245
- });
246
- rollingSummary = result.summary;
247
- }
248
- // After 5 agents, rolling summary should still be manageable
249
- const summaryTokens = Math.ceil(rollingSummary.length / 4);
250
- expect(summaryTokens).toBeLessThan(1000); // Summary stays compact
251
- expect(rollingSummary.length).toBeGreaterThan(0);
252
- });
253
- it('emergency summary preserves chain context when LLM fails', async () => {
254
- // Agent chain where LLM fails mid-chain
255
- const chainMessages = [
256
- new SystemMessage({ content: '[Chain Summary] Previous agents researched AI healthcare.' }),
257
- new HumanMessage({ content: 'Continue the analysis with cost projections' }),
258
- new AIMessage({ content: 'Cost analysis shows 30% reduction possible.' }),
259
- new ToolMessage({ content: 'spreadsheet data loaded', tool_call_id: 'tool_1' }),
260
- new HumanMessage({ content: 'Add regulatory compliance section' }),
261
- new AIMessage({ content: 'Regulatory review completed for FDA pathways.' }),
262
- ];
263
- // LLM unavailable — falls to emergency
264
- const result = await summarize(chainMessages, failingSummarizeCallback, {
265
- tokenCounter: simpleTokenCounter,
266
- });
267
- expect(result.tier).toBe('emergency');
268
- expect(result.summary).toContain('cost projections'); // First user msg
269
- expect(result.summary).toContain('FDA'); // Last AI msg
270
- expect(result.summary).toContain('Messages compacted: 6'); // Total count
271
- });
272
- });
273
- // ============================================================================
274
- // 4. Continuation Exhaustion and Retry
275
- // ============================================================================
276
- describe('Continuation exhaustion and retry', () => {
277
- it('detects max_tokens finish reason requiring continuation', () => {
278
- const finishReasons = ['length', 'max_tokens', 'end_turn', 'stop', null];
279
- const needsContinuation = finishReasons.filter((r) => r === 'length' || r === 'max_tokens');
280
- expect(needsContinuation).toEqual(['length', 'max_tokens']);
281
- });
282
- it('enforces maxContinuations limit', () => {
283
- const maxContinuations = 5;
284
- let continuationCount = 0;
285
- const results = [];
286
- // Simulate continuation loop
287
- while (continuationCount < maxContinuations) {
288
- const finishReason = 'max_tokens'; // Always hits limit
289
- if (finishReason !== 'max_tokens' && finishReason !== 'length') {
290
- break;
291
- }
292
- continuationCount++;
293
- results.push(`Continuation ${continuationCount}`);
294
- }
295
- expect(continuationCount).toBe(maxContinuations);
296
- expect(results.length).toBe(maxContinuations);
297
- });
298
- it('appends truncation notice when all continuations exhausted', () => {
299
- const maxContinuations = 5;
300
- const continuationCount = 5;
301
- const lastFinishReason = 'max_tokens';
302
- const isExhausted = continuationCount >= maxContinuations && lastFinishReason === 'max_tokens';
303
- expect(isExhausted).toBe(true);
304
- // Truncation notice appended to last content
305
- const truncationNotice = '\n\n[Note: Response was truncated due to length. Ask me to continue if you need the rest.]';
306
- const lastContent = 'Some AI response that was cut off';
307
- const withNotice = lastContent + truncationNotice;
308
- expect(withNotice).toContain('truncated due to length');
309
- expect(withNotice).toContain('continue');
310
- });
311
- it('retry-once logic: retries after 2s delay then gives up', async () => {
312
- let attempts = 0;
313
- let retried = false;
314
- const simulateContinuation = async () => {
315
- attempts++;
316
- if (attempts === 1) {
317
- throw new Error('Temporary failure');
318
- }
319
- return 'Recovered content';
320
- };
321
- // First attempt fails, retry once
322
- try {
323
- await simulateContinuation();
324
- }
325
- catch {
326
- if (!retried) {
327
- retried = true;
328
- // In real code: await new Promise(r => setTimeout(r, 2000));
329
- const result = await simulateContinuation();
330
- expect(result).toBe('Recovered content');
331
- }
332
- }
333
- expect(attempts).toBe(2);
334
- expect(retried).toBe(true);
335
- });
336
- it('saves partial response with interruption notice on final failure', () => {
337
- const partialResponse = 'Here is the beginning of the analysis...';
338
- const interruptionNotice = '\n\n[Response interrupted: An error occurred while generating this response. The content above may be incomplete.]';
339
- const finalContent = partialResponse + interruptionNotice;
340
- expect(finalContent).toContain('interrupted');
341
- expect(finalContent).toContain('beginning of the analysis');
342
- });
343
- it('context compaction enables additional continuations', async () => {
344
- // After 5 continuations, compact context to free space for more
345
- const longConversation = buildConversation(30, 400); // 30 turns
346
- // Before compaction: high utilization
347
- const totalTokens = longConversation.reduce((sum, m) => sum + simpleTokenCounter(m), 0);
348
- const maxTokens = totalTokens + 2000; // Just barely fits
349
- const utilBefore = getContextUtilization(Object.fromEntries(longConversation.map((_, i) => [String(i), simpleTokenCounter(longConversation[i])])), 500, maxTokens);
350
- expect(utilBefore).toBeGreaterThan(80);
351
- // After compaction
352
- const result = await summarize(longConversation, mockSummarizeCallback, {
353
- tokenCounter: simpleTokenCounter,
354
- });
355
- const compactedMessages = [
356
- new SystemMessage({ content: `[Conversation Summary]\n${result.summary}` }),
357
- ...longConversation.slice(-3),
358
- ];
359
- const compactedTokens = compactedMessages.reduce((sum, m) => sum + simpleTokenCounter(m), 0);
360
- // After compaction, plenty of room for more continuations
361
- expect(compactedTokens).toBeLessThan(maxTokens * 0.5);
362
- });
363
- });
364
- // ============================================================================
365
- // 5. Pre-invocation Utilization Gate
366
- // ============================================================================
367
- describe('Pre-invocation utilization gate', () => {
368
- it('emits warning at 70-85% utilization', () => {
369
- const maxContextTokens = 10000;
370
- const currentTokens = 7500; // 75%
371
- const utilization = (currentTokens / maxContextTokens) * 100;
372
- const events = [];
373
- if (utilization > 95) {
374
- events.push({ type: 'ON_CONTEXT_PRESSURE', level: 'emergency' });
375
- }
376
- else if (utilization > 85) {
377
- events.push({ type: 'ON_CONTEXT_PRESSURE', level: 'critical' });
378
- }
379
- else if (utilization > 70) {
380
- events.push({ type: 'ON_CONTEXT_PRESSURE', level: 'warning' });
381
- }
382
- expect(events).toHaveLength(1);
383
- expect(events[0].level).toBe('warning');
384
- });
385
- it('triggers proactive compaction at 85-95% utilization', async () => {
386
- const maxContextTokens = 10000;
387
- const currentTokens = 9000; // 90%
388
- const utilization = (currentTokens / maxContextTokens) * 100;
389
- expect(utilization).toBeGreaterThan(85);
390
- expect(utilization).toBeLessThan(95);
391
- // Should trigger proactive compaction
392
- const messages = buildConversation(20, 200);
393
- const result = await summarize(messages, mockSummarizeCallback, {
394
- tokenCounter: simpleTokenCounter,
395
- });
396
- expect(result.summary.length).toBeGreaterThan(0);
397
- // After compaction, utilization should drop significantly
398
- const compactedTokens = Math.ceil(result.summary.length / 4) + 500; // summary + last few messages
399
- const newUtilization = (compactedTokens / maxContextTokens) * 100;
400
- expect(newUtilization).toBeLessThan(50);
401
- });
402
- it('triggers emergency compaction at >95% utilization', async () => {
403
- const maxContextTokens = 10000;
404
- const currentTokens = 9600; // 96%
405
- const utilization = (currentTokens / maxContextTokens) * 100;
406
- expect(utilization).toBeGreaterThan(95);
407
- // Emergency: no LLM call, pure extraction
408
- const messages = buildConversation(30, 300);
409
- const emergency = createEmergencySummary(messages);
410
- expect(emergency).toBeTruthy();
411
- expect(emergency.length).toBeLessThan(2000); // Emergency summaries are compact
412
- });
413
- it('injects delegation hint at >70% utilization for agents with task tool', () => {
414
- const utilization = 75;
415
- const hasTaskTool = true;
416
- if (utilization > 70 && hasTaskTool) {
417
- const delegationHint = new HumanMessage({
418
- content: '[System] Context window is at 75% capacity. Consider delegating complex sub-tasks ' +
419
- 'to the task tool to maintain context availability.',
420
- });
421
- expect(delegationHint.content).toContain('75%');
422
- expect(delegationHint.content).toContain('task tool');
423
- }
424
- });
425
- it('does not inject delegation hint below 70%', () => {
426
- const utilization = 65;
427
- let delegationInjected = false;
428
- if (utilization > 70) {
429
- delegationInjected = true;
430
- }
431
- expect(delegationInjected).toBe(false);
432
- });
433
- });
434
- // ============================================================================
435
- // 6. Emergency Context Preservation (First+Last Pair)
436
- // ============================================================================
437
- describe('Emergency context preservation', () => {
438
- it('preserves first user message and last AI message from 50-turn conversation', () => {
439
- const messages = buildConversation(50, 200);
440
- const firstUserMsg = messages[0]; // First HumanMessage
441
- const lastAIMsg = messages[messages.length - 1]; // Last AIMessage
442
- const emergency = createEmergencySummary(messages);
443
- // Should contain first user intent
444
- expect(emergency).toContain('User message 0');
445
- // Should contain last AI state
446
- expect(emergency).toContain('AI response 49');
447
- // Should report correct count
448
- expect(emergency).toContain('Messages compacted: 100');
449
- });
450
- it('captures tool names in emergency summary', () => {
451
- const messages = [
452
- new HumanMessage({ content: 'Search for revenue data' }),
453
- new ToolMessage({ content: 'Found 5 documents', tool_call_id: 'tc_1', name: 'file_search' }),
454
- new ToolMessage({ content: 'Web results for Q4', tool_call_id: 'tc_2', name: 'web_search' }),
455
- new ToolMessage({
456
- content: 'Code executed successfully',
457
- tool_call_id: 'tc_3',
458
- name: 'code_execution',
459
- }),
460
- new AIMessage({ content: 'Analysis complete' }),
461
- ];
462
- const emergency = createEmergencySummary(messages);
463
- expect(emergency).toContain('file_search');
464
- expect(emergency).toContain('web_search');
465
- expect(emergency).toContain('code_execution');
466
- });
467
- it('emergency summary never throws even with malformed messages', () => {
468
- const weirdMessages = [
469
- new HumanMessage({ content: '' }), // Empty
470
- new AIMessage({ content: [{ type: 'text', text: '' }] }), // Complex empty
471
- new SystemMessage({ content: 'system' }),
472
- ];
473
- expect(() => createEmergencySummary(weirdMessages)).not.toThrow();
474
- const result = createEmergencySummary(weirdMessages);
475
- expect(typeof result).toBe('string');
476
- });
477
- it('preserves context even when all intermediate messages are removed', async () => {
478
- const messages = [
479
- new HumanMessage({ content: 'CRITICAL: Build the authentication system for the app' }),
480
- ...buildConversation(40, 300), // 40 turns of intermediate work
481
- new HumanMessage({ content: 'Now deploy to production' }),
482
- new AIMessage({ content: 'Deploying the authentication system to AWS ECS...' }),
483
- ];
484
- const emergency = createEmergencySummary(messages);
485
- // First user message preserved
486
- expect(emergency).toContain('authentication system');
487
- // Last AI response preserved
488
- expect(emergency).toContain('Deploying');
489
- });
490
- });
491
- // ============================================================================
492
- // 7. Multi-Agent Context Handoff Scenarios
493
- // ============================================================================
494
- describe('Multi-agent context handoff scenarios', () => {
495
- it('research agent → writing agent handoff preserves findings', async () => {
496
- const researchMessages = [
497
- new HumanMessage({ content: 'Research competitors: Acme Corp, Beta Inc, Gamma Ltd' }),
498
- new ToolMessage({
499
- content: 'Acme Corp: $50M revenue, 200 employees',
500
- tool_call_id: 'ws_1',
501
- name: 'web_search',
502
- }),
503
- new ToolMessage({
504
- content: 'Beta Inc: $30M revenue, 150 employees',
505
- tool_call_id: 'ws_2',
506
- name: 'web_search',
507
- }),
508
- new AIMessage({
509
- content: 'Found competitive data: Acme leads with $50M, Beta at $30M, Gamma data unavailable.',
510
- }),
511
- ];
512
- // Summarize for handoff
513
- const result = await summarize(researchMessages, mockSummarizeCallback, {
514
- tokenCounter: simpleTokenCounter,
515
- });
516
- expect(result.tier).toBe('full');
517
- expect(result.messagesCompacted).toBe(4);
518
- // Writing agent receives the summary
519
- const writingContext = [
520
- new SystemMessage({
521
- content: `[Handoff from Research Agent]\n${result.summary}\n\nYou are the Writing Agent. Create a report based on the research above.`,
522
- }),
523
- ];
524
- const writingTokens = writingContext.reduce((sum, m) => sum + simpleTokenCounter(m), 0);
525
- expect(writingTokens).toBeLessThan(2000);
526
- });
527
- it('handles cascade handoff: A → B → C → A (circular)', async () => {
528
- const agentContexts = new Map([
529
- ['agent-a', { messages: [], maxTokens: 10000 }],
530
- ['agent-b', { messages: [], maxTokens: 8000 }],
531
- ['agent-c', { messages: [], maxTokens: 6000 }],
532
- ]);
533
- // A → B handoff
534
- const summaryAB = await summarize(buildConversation(5, 200), mockSummarizeCallback, {
535
- tokenCounter: simpleTokenCounter,
536
- });
537
- // B → C handoff
538
- const summaryBC = await summarize([new SystemMessage({ content: `[From A]\n${summaryAB.summary}` }), ...buildConversation(3, 200)], mockSummarizeCallback, { tokenCounter: simpleTokenCounter });
539
- // C → A handoff (back to original)
540
- const summaryCA = await summarize([new SystemMessage({ content: `[From B via A]\n${summaryBC.summary}` }), ...buildConversation(3, 200)], mockSummarizeCallback, { tokenCounter: simpleTokenCounter });
541
- // All summaries should exist and be compact
542
- expect(summaryAB.summary.length).toBeGreaterThan(0);
543
- expect(summaryBC.summary.length).toBeGreaterThan(0);
544
- expect(summaryCA.summary.length).toBeGreaterThan(0);
545
- // Final summary shouldn't have exploded in size
546
- const finalTokens = Math.ceil(summaryCA.summary.length / 4);
547
- expect(finalTokens).toBeLessThan(1000);
548
- });
549
- it('multi-agent workflow state is included in full summary prompt', () => {
550
- const messages = [
551
- new HumanMessage({ content: 'Start multi-agent research' }),
552
- new AIMessage({ content: 'Delegating to specialized agents.' }),
553
- ];
554
- const formatted = formatMessagesForSummary(messages);
555
- const prompt = buildFullSummaryPrompt(formatted, {
556
- isMultiAgent: true,
557
- agentWorkflowState: {
558
- currentAgentId: 'analysis-agent',
559
- agentChain: ['research-agent', 'analysis-agent'],
560
- pendingAgents: ['writing-agent'],
561
- },
562
- });
563
- expect(prompt).toContain('analysis-agent');
564
- expect(prompt).toContain('research-agent');
565
- expect(prompt).toContain('writing-agent');
566
- expect(prompt).toContain('Agent Workflow State');
567
- });
568
- });
569
- // ============================================================================
570
- // 8. summaryModel Dedicated Cheap Model Usage
571
- // ============================================================================
572
- describe('summaryModel dedicated cheap model usage', () => {
573
- it('summarize passes maxOutputTokens to callback for model configuration', async () => {
574
- let capturedMaxTokens;
575
- const trackingCallback = async (prompt, maxTokens) => {
576
- capturedMaxTokens = maxTokens;
577
- return '[Summary] Tracked callback.';
578
- };
579
- await summarize(buildConversation(5, 200), trackingCallback, {
580
- tokenCounter: simpleTokenCounter,
581
- maxOutputTokens: 2048,
582
- });
583
- expect(capturedMaxTokens).toBe(2048);
584
- });
585
- it('Tier 2 uses lower maxTokens (512) than Tier 1', async () => {
586
- const capturedMaxTokens = [];
587
- let callCount = 0;
588
- const tieredCallback = async (prompt, maxTokens) => {
589
- capturedMaxTokens.push(maxTokens);
590
- callCount++;
591
- if (callCount === 1) {
592
- throw new Error('Tier 1 failed'); // Force fallback to Tier 2
593
- }
594
- return '[Simple Summary]';
595
- };
596
- await summarize(buildConversation(5, 200), tieredCallback, {
597
- tokenCounter: simpleTokenCounter,
598
- maxOutputTokens: 1024,
599
- });
600
- // Tier 1 tried with 1024, Tier 2 tried with 512
601
- expect(capturedMaxTokens[0]).toBe(1024);
602
- expect(capturedMaxTokens[1]).toBe(512);
603
- });
604
- it('summaryModel config follows titleModel pattern', () => {
605
- // Verify the config structure matches expectations
606
- const endpointConfig = {
607
- titleModel: 'us.amazon.nova-micro-v1:0',
608
- summaryModel: 'us.anthropic.claude-haiku-4-5-20251001-v1:0',
609
- modelDisplayLabel: 'Ranger',
610
- };
611
- // summaryModel should be a different, cheaper model than the main model
612
- expect(endpointConfig.summaryModel).not.toBe(endpointConfig.titleModel);
613
- expect(endpointConfig.summaryModel).toContain('haiku'); // Cheap model
614
- });
615
- it('falls back through provider defaults when no summaryModel configured', () => {
616
- const providers = [
617
- { name: 'bedrock', expected: 'claude-3-haiku-20240307' },
618
- { name: 'anthropic', expected: 'claude-3-haiku-20240307' },
619
- { name: 'openAI', expected: 'gpt-4o-mini' },
620
- { name: 'google', expected: 'gemini-2.0-flash' },
621
- { name: 'unknown', expected: 'claude-3-haiku-20240307' },
622
- ];
623
- for (const { name, expected } of providers) {
624
- let summaryModel;
625
- if (name === 'bedrock' || name === 'anthropic') {
626
- summaryModel = 'claude-3-haiku-20240307';
627
- }
628
- else if (name === 'openAI' || name === 'azureOpenAI') {
629
- summaryModel = 'gpt-4o-mini';
630
- }
631
- else if (name === 'google') {
632
- summaryModel = 'gemini-2.0-flash';
633
- }
634
- else {
635
- summaryModel = 'claude-3-haiku-20240307';
636
- }
637
- expect(summaryModel).toBe(expected);
638
- }
639
- });
640
- });
641
- // ============================================================================
642
- // 9. Config Wiring Validation
643
- // ============================================================================
644
- describe('Config wiring validation', () => {
645
- it('contextManagement config has all required fields', () => {
646
- const config = {
647
- compactMode: 'auto',
648
- recoveryMode: 'summarize',
649
- continuationRetries: 1,
650
- toolResultBudgetPct: 0.3,
651
- subagentMaxResultTokens: 8192,
652
- chainRollingSummary: false,
653
- fileReadDedup: true,
654
- compactionAudit: true,
655
- };
656
- // All fields present
657
- expect(config.compactMode).toBeDefined();
658
- expect(config.recoveryMode).toBeDefined();
659
- expect(config.continuationRetries).toBeDefined();
660
- expect(config.toolResultBudgetPct).toBeDefined();
661
- expect(config.subagentMaxResultTokens).toBeDefined();
662
- expect(config.chainRollingSummary).toBeDefined();
663
- expect(config.fileReadDedup).toBeDefined();
664
- expect(config.compactionAudit).toBeDefined();
665
- });
666
- it('subagentMaxResultTokens converts to chars correctly', () => {
667
- const tokenValues = [4096, 8192, 16384, 32768];
668
- for (const tokens of tokenValues) {
669
- const chars = tokens * 4;
670
- expect(chars).toBe(tokens * 4);
671
- // Chars should be reasonable (not exceeding 200K chars)
672
- expect(chars).toBeLessThan(200000);
673
- }
674
- });
675
- it('compactMode values are valid', () => {
676
- const validModes = ['auto', 'summarize', 'prune'];
677
- for (const mode of validModes) {
678
- expect(['auto', 'summarize', 'prune']).toContain(mode);
679
- }
680
- });
681
- it('recoveryMode values are valid', () => {
682
- const validModes = ['summarize', 'prune', 'emergency'];
683
- for (const mode of validModes) {
684
- expect(['summarize', 'prune', 'emergency']).toContain(mode);
685
- }
686
- });
687
- });
688
- // ============================================================================
689
- // 10. Stress Tests: Large Conversations
690
- // ============================================================================
691
- describe('Stress tests with large conversations', () => {
692
- it('handles 200-turn conversation summarization', async () => {
693
- const messages = buildConversation(200, 100); // 400 messages
694
- expect(messages.length).toBe(400);
695
- const result = await summarize(messages, mockSummarizeCallback, {
696
- tokenCounter: simpleTokenCounter,
697
- });
698
- expect(result.summary.length).toBeGreaterThan(0);
699
- expect(result.messagesCompacted).toBe(400);
700
- });
701
- it('emergency summary from 500-turn conversation is still compact', () => {
702
- const messages = buildConversation(500, 50); // 1000 messages
703
- const emergency = createEmergencySummary(messages);
704
- // Emergency summary should be compact regardless of input size
705
- expect(emergency.length).toBeLessThan(3000);
706
- expect(emergency).toContain('Messages compacted: 1000');
707
- });
708
- it('getContextUtilization handles 1000-message token map', () => {
709
- const map = {};
710
- for (let i = 0; i < 1000; i++) {
711
- map[String(i)] = 100;
712
- }
713
- const result = getContextUtilization(map, 500, 200000);
714
- // 1000 * 100 + 500 = 100500 / 200000 = 50.25%
715
- expect(result).toBeCloseTo(50.25, 1);
716
- });
717
- });
718
- //# sourceMappingURL=contextManagement.e2e.test.js.map