@illuma-ai/agents 1.1.20 → 1.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/dist/cjs/graphs/Graph.cjs +12 -1
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/graphs/MultiAgentGraph.cjs +85 -1
  4. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  5. package/dist/cjs/llm/bedrock/index.cjs +14 -0
  6. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  7. package/dist/cjs/run.cjs +20 -9
  8. package/dist/cjs/run.cjs.map +1 -1
  9. package/dist/esm/graphs/Graph.mjs +12 -1
  10. package/dist/esm/graphs/Graph.mjs.map +1 -1
  11. package/dist/esm/graphs/MultiAgentGraph.mjs +85 -1
  12. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  13. package/dist/esm/llm/bedrock/index.mjs +14 -0
  14. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  15. package/dist/esm/run.mjs +20 -9
  16. package/dist/esm/run.mjs.map +1 -1
  17. package/dist/types/graphs/MultiAgentGraph.d.ts +17 -0
  18. package/package.json +1 -1
  19. package/src/graphs/Graph.ts +12 -1
  20. package/src/graphs/MultiAgentGraph.ts +105 -1
  21. package/src/graphs/__tests__/multi-agent-delegate.test.ts +191 -0
  22. package/src/llm/bedrock/index.ts +17 -0
  23. package/src/run.ts +20 -11
  24. package/src/scripts/test-bedrock-handoff-autonomous.ts +231 -0
  25. package/src/agents/AgentContext.js +0 -782
  26. package/src/agents/AgentContext.test.js +0 -421
  27. package/src/agents/__tests__/AgentContext.test.js +0 -678
  28. package/src/agents/__tests__/resolveStructuredOutputMode.test.js +0 -117
  29. package/src/common/enum.js +0 -192
  30. package/src/common/index.js +0 -3
  31. package/src/events.js +0 -166
  32. package/src/graphs/Graph.js +0 -1857
  33. package/src/graphs/MultiAgentGraph.js +0 -1092
  34. package/src/graphs/__tests__/structured-output.integration.test.js +0 -624
  35. package/src/graphs/__tests__/structured-output.test.js +0 -144
  36. package/src/graphs/contextManagement.e2e.test.js +0 -718
  37. package/src/graphs/contextManagement.test.js +0 -485
  38. package/src/graphs/handoffValidation.test.js +0 -276
  39. package/src/graphs/index.js +0 -3
  40. package/src/index.js +0 -28
  41. package/src/instrumentation.js +0 -21
  42. package/src/llm/anthropic/index.js +0 -319
  43. package/src/llm/anthropic/types.js +0 -46
  44. package/src/llm/anthropic/utils/message_inputs.js +0 -627
  45. package/src/llm/anthropic/utils/message_outputs.js +0 -290
  46. package/src/llm/anthropic/utils/output_parsers.js +0 -89
  47. package/src/llm/anthropic/utils/tools.js +0 -25
  48. package/src/llm/bedrock/__tests__/bedrock-caching.test.js +0 -392
  49. package/src/llm/bedrock/index.js +0 -303
  50. package/src/llm/bedrock/types.js +0 -2
  51. package/src/llm/bedrock/utils/index.js +0 -6
  52. package/src/llm/bedrock/utils/message_inputs.js +0 -463
  53. package/src/llm/bedrock/utils/message_outputs.js +0 -269
  54. package/src/llm/fake.js +0 -92
  55. package/src/llm/google/index.js +0 -215
  56. package/src/llm/google/types.js +0 -12
  57. package/src/llm/google/utils/common.js +0 -670
  58. package/src/llm/google/utils/tools.js +0 -111
  59. package/src/llm/google/utils/zod_to_genai_parameters.js +0 -47
  60. package/src/llm/openai/index.js +0 -1033
  61. package/src/llm/openai/types.js +0 -2
  62. package/src/llm/openai/utils/index.js +0 -756
  63. package/src/llm/openai/utils/isReasoningModel.test.js +0 -79
  64. package/src/llm/openrouter/index.js +0 -261
  65. package/src/llm/openrouter/reasoning.test.js +0 -181
  66. package/src/llm/providers.js +0 -36
  67. package/src/llm/text.js +0 -65
  68. package/src/llm/vertexai/index.js +0 -402
  69. package/src/messages/__tests__/tools.test.js +0 -392
  70. package/src/messages/cache.js +0 -404
  71. package/src/messages/cache.test.js +0 -1167
  72. package/src/messages/content.js +0 -48
  73. package/src/messages/content.test.js +0 -314
  74. package/src/messages/core.js +0 -359
  75. package/src/messages/ensureThinkingBlock.test.js +0 -997
  76. package/src/messages/format.js +0 -973
  77. package/src/messages/formatAgentMessages.test.js +0 -2278
  78. package/src/messages/formatAgentMessages.tools.test.js +0 -362
  79. package/src/messages/formatMessage.test.js +0 -608
  80. package/src/messages/ids.js +0 -18
  81. package/src/messages/index.js +0 -9
  82. package/src/messages/labelContentByAgent.test.js +0 -725
  83. package/src/messages/prune.js +0 -438
  84. package/src/messages/reducer.js +0 -60
  85. package/src/messages/shiftIndexTokenCountMap.test.js +0 -63
  86. package/src/messages/summarize.js +0 -146
  87. package/src/messages/summarize.test.js +0 -332
  88. package/src/messages/tools.js +0 -90
  89. package/src/mockStream.js +0 -81
  90. package/src/prompts/collab.js +0 -7
  91. package/src/prompts/index.js +0 -3
  92. package/src/prompts/taskmanager.js +0 -58
  93. package/src/run.js +0 -427
  94. package/src/schemas/index.js +0 -3
  95. package/src/schemas/schema-preparation.test.js +0 -370
  96. package/src/schemas/validate.js +0 -314
  97. package/src/schemas/validate.test.js +0 -264
  98. package/src/scripts/abort.js +0 -127
  99. package/src/scripts/ant_web_search.js +0 -130
  100. package/src/scripts/ant_web_search_edge_case.js +0 -133
  101. package/src/scripts/ant_web_search_error_edge_case.js +0 -119
  102. package/src/scripts/args.js +0 -41
  103. package/src/scripts/bedrock-cache-debug.js +0 -186
  104. package/src/scripts/bedrock-content-aggregation-test.js +0 -195
  105. package/src/scripts/bedrock-merge-test.js +0 -80
  106. package/src/scripts/bedrock-parallel-tools-test.js +0 -150
  107. package/src/scripts/caching.js +0 -106
  108. package/src/scripts/cli.js +0 -152
  109. package/src/scripts/cli2.js +0 -119
  110. package/src/scripts/cli3.js +0 -163
  111. package/src/scripts/cli4.js +0 -165
  112. package/src/scripts/cli5.js +0 -165
  113. package/src/scripts/code_exec.js +0 -171
  114. package/src/scripts/code_exec_files.js +0 -180
  115. package/src/scripts/code_exec_multi_session.js +0 -185
  116. package/src/scripts/code_exec_ptc.js +0 -265
  117. package/src/scripts/code_exec_session.js +0 -217
  118. package/src/scripts/code_exec_simple.js +0 -120
  119. package/src/scripts/content.js +0 -111
  120. package/src/scripts/empty_input.js +0 -125
  121. package/src/scripts/handoff-test.js +0 -96
  122. package/src/scripts/image.js +0 -138
  123. package/src/scripts/memory.js +0 -83
  124. package/src/scripts/multi-agent-chain.js +0 -271
  125. package/src/scripts/multi-agent-conditional.js +0 -185
  126. package/src/scripts/multi-agent-document-review-chain.js +0 -171
  127. package/src/scripts/multi-agent-hybrid-flow.js +0 -264
  128. package/src/scripts/multi-agent-parallel-start.js +0 -214
  129. package/src/scripts/multi-agent-parallel.js +0 -346
  130. package/src/scripts/multi-agent-sequence.js +0 -184
  131. package/src/scripts/multi-agent-supervisor.js +0 -324
  132. package/src/scripts/multi-agent-test.js +0 -147
  133. package/src/scripts/parallel-asymmetric-tools-test.js +0 -202
  134. package/src/scripts/parallel-full-metadata-test.js +0 -176
  135. package/src/scripts/parallel-tools-test.js +0 -256
  136. package/src/scripts/programmatic_exec.js +0 -277
  137. package/src/scripts/programmatic_exec_agent.js +0 -168
  138. package/src/scripts/search.js +0 -118
  139. package/src/scripts/sequential-full-metadata-test.js +0 -143
  140. package/src/scripts/simple.js +0 -174
  141. package/src/scripts/single-agent-metadata-test.js +0 -152
  142. package/src/scripts/stream.js +0 -113
  143. package/src/scripts/test-custom-prompt-key.js +0 -132
  144. package/src/scripts/test-handoff-input.js +0 -143
  145. package/src/scripts/test-handoff-preamble.js +0 -227
  146. package/src/scripts/test-handoff-steering.js +0 -353
  147. package/src/scripts/test-multi-agent-list-handoff.js +0 -318
  148. package/src/scripts/test-parallel-agent-labeling.js +0 -253
  149. package/src/scripts/test-parallel-handoffs.js +0 -229
  150. package/src/scripts/test-thinking-handoff-bedrock.js +0 -132
  151. package/src/scripts/test-thinking-handoff.js +0 -132
  152. package/src/scripts/test-thinking-to-thinking-handoff-bedrock.js +0 -140
  153. package/src/scripts/test-tool-before-handoff-role-order.js +0 -223
  154. package/src/scripts/test-tools-before-handoff.js +0 -187
  155. package/src/scripts/test_code_api.js +0 -263
  156. package/src/scripts/thinking-bedrock.js +0 -128
  157. package/src/scripts/thinking-vertexai.js +0 -130
  158. package/src/scripts/thinking.js +0 -134
  159. package/src/scripts/tool_search.js +0 -114
  160. package/src/scripts/tools.js +0 -125
  161. package/src/specs/agent-handoffs-bedrock.integration.test.js +0 -280
  162. package/src/specs/agent-handoffs.test.js +0 -924
  163. package/src/specs/anthropic.simple.test.js +0 -287
  164. package/src/specs/azure.simple.test.js +0 -381
  165. package/src/specs/cache.simple.test.js +0 -282
  166. package/src/specs/custom-event-await.test.js +0 -148
  167. package/src/specs/deepseek.simple.test.js +0 -189
  168. package/src/specs/emergency-prune.test.js +0 -308
  169. package/src/specs/moonshot.simple.test.js +0 -237
  170. package/src/specs/observability.integration.test.js +0 -1337
  171. package/src/specs/openai.simple.test.js +0 -233
  172. package/src/specs/openrouter.simple.test.js +0 -202
  173. package/src/specs/prune.test.js +0 -733
  174. package/src/specs/reasoning.test.js +0 -144
  175. package/src/specs/spec.utils.js +0 -4
  176. package/src/specs/thinking-handoff.test.js +0 -486
  177. package/src/specs/thinking-prune.test.js +0 -600
  178. package/src/specs/token-distribution-edge-case.test.js +0 -246
  179. package/src/specs/token-memoization.test.js +0 -32
  180. package/src/specs/tokens.test.js +0 -49
  181. package/src/specs/tool-error.test.js +0 -139
  182. package/src/splitStream.js +0 -204
  183. package/src/splitStream.test.js +0 -504
  184. package/src/stream.js +0 -650
  185. package/src/stream.test.js +0 -225
  186. package/src/test/mockTools.js +0 -340
  187. package/src/tools/BrowserTools.js +0 -245
  188. package/src/tools/Calculator.js +0 -38
  189. package/src/tools/Calculator.test.js +0 -225
  190. package/src/tools/CodeExecutor.js +0 -233
  191. package/src/tools/ProgrammaticToolCalling.js +0 -602
  192. package/src/tools/StreamingToolCallBuffer.js +0 -179
  193. package/src/tools/ToolNode.js +0 -930
  194. package/src/tools/ToolSearch.js +0 -904
  195. package/src/tools/__tests__/BrowserTools.test.js +0 -306
  196. package/src/tools/__tests__/ProgrammaticToolCalling.integration.test.js +0 -276
  197. package/src/tools/__tests__/ProgrammaticToolCalling.test.js +0 -807
  198. package/src/tools/__tests__/StreamingToolCallBuffer.test.js +0 -175
  199. package/src/tools/__tests__/ToolApproval.test.js +0 -675
  200. package/src/tools/__tests__/ToolNode.recovery.test.js +0 -200
  201. package/src/tools/__tests__/ToolNode.session.test.js +0 -319
  202. package/src/tools/__tests__/ToolSearch.integration.test.js +0 -125
  203. package/src/tools/__tests__/ToolSearch.test.js +0 -812
  204. package/src/tools/__tests__/handlers.test.js +0 -799
  205. package/src/tools/__tests__/truncation-recovery.integration.test.js +0 -362
  206. package/src/tools/handlers.js +0 -306
  207. package/src/tools/schema.js +0 -25
  208. package/src/tools/search/anthropic.js +0 -34
  209. package/src/tools/search/content.js +0 -116
  210. package/src/tools/search/content.test.js +0 -133
  211. package/src/tools/search/firecrawl.js +0 -173
  212. package/src/tools/search/format.js +0 -198
  213. package/src/tools/search/highlights.js +0 -241
  214. package/src/tools/search/index.js +0 -3
  215. package/src/tools/search/jina-reranker.test.js +0 -106
  216. package/src/tools/search/rerankers.js +0 -165
  217. package/src/tools/search/schema.js +0 -102
  218. package/src/tools/search/search.js +0 -561
  219. package/src/tools/search/serper-scraper.js +0 -126
  220. package/src/tools/search/test.js +0 -129
  221. package/src/tools/search/tool.js +0 -453
  222. package/src/tools/search/types.js +0 -2
  223. package/src/tools/search/utils.js +0 -59
  224. package/src/types/graph.js +0 -24
  225. package/src/types/graph.test.js +0 -192
  226. package/src/types/index.js +0 -7
  227. package/src/types/llm.js +0 -2
  228. package/src/types/messages.js +0 -2
  229. package/src/types/run.js +0 -2
  230. package/src/types/stream.js +0 -2
  231. package/src/types/tools.js +0 -2
  232. package/src/utils/contextAnalytics.js +0 -79
  233. package/src/utils/contextAnalytics.test.js +0 -166
  234. package/src/utils/events.js +0 -26
  235. package/src/utils/graph.js +0 -11
  236. package/src/utils/handlers.js +0 -65
  237. package/src/utils/index.js +0 -10
  238. package/src/utils/llm.js +0 -21
  239. package/src/utils/llmConfig.js +0 -205
  240. package/src/utils/logging.js +0 -37
  241. package/src/utils/misc.js +0 -51
  242. package/src/utils/run.js +0 -69
  243. package/src/utils/schema.js +0 -21
  244. package/src/utils/title.js +0 -119
  245. package/src/utils/tokens.js +0 -92
  246. package/src/utils/toonFormat.js +0 -379
@@ -1,353 +0,0 @@
1
- import { config } from 'dotenv';
2
- config();
3
- import { HumanMessage } from '@langchain/core/messages';
4
- import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
5
- import { ToolEndHandler, ModelEndHandler } from '@/events';
6
- import { GraphEvents, Providers } from '@/common';
7
- import { Run } from '@/run';
8
- /**
9
- * Test LLM steering quality after handoff with system prompt instructions.
10
- *
11
- * Validates that the receiving agent clearly understands:
12
- * 1. WHO it is (its role/identity)
13
- * 2. WHAT the task is (instructions from the handoff)
14
- * 3. WHO transferred control (source agent context)
15
- *
16
- * Uses specific, verifiable instructions so we can check the output.
17
- */
18
- async function testHandoffSteering() {
19
- console.log('='.repeat(60));
20
- console.log('Test: Handoff Steering Quality (System Prompt Instructions)');
21
- console.log('='.repeat(60));
22
- const { contentParts, aggregateContent } = createContentAggregator();
23
- let currentAgent = '';
24
- const agentResponses = {};
25
- const customHandlers = {
26
- [GraphEvents.TOOL_END]: new ToolEndHandler(),
27
- [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
28
- [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
29
- [GraphEvents.ON_RUN_STEP]: {
30
- handle: (event, data) => {
31
- const runStep = data;
32
- if (runStep.agentId) {
33
- currentAgent = runStep.agentId;
34
- console.log(`\n[Agent: ${currentAgent}] Processing...`);
35
- }
36
- aggregateContent({ event, data: runStep });
37
- },
38
- },
39
- [GraphEvents.ON_RUN_STEP_COMPLETED]: {
40
- handle: (event, data) => {
41
- aggregateContent({
42
- event,
43
- data: data,
44
- });
45
- },
46
- },
47
- [GraphEvents.ON_MESSAGE_DELTA]: {
48
- handle: (event, data) => {
49
- aggregateContent({ event, data: data });
50
- },
51
- },
52
- [GraphEvents.TOOL_START]: {
53
- handle: (_event, data, _metadata) => {
54
- const toolData = data;
55
- if (toolData?.name?.includes('transfer_to_')) {
56
- const specialist = toolData.name.replace('lc_transfer_to_', '');
57
- console.log(`\n >> Handoff to: ${specialist}`);
58
- }
59
- },
60
- },
61
- };
62
- /**
63
- * Test 1: Basic handoff with specific task instructions
64
- * The specialist should clearly follow the coordinator's instructions.
65
- */
66
- async function test1_basicInstructions() {
67
- console.log('\n' + '-'.repeat(60));
68
- console.log('TEST 1: Basic handoff with specific task instructions');
69
- console.log('-'.repeat(60));
70
- const agents = [
71
- {
72
- agentId: 'coordinator',
73
- provider: Providers.OPENAI,
74
- clientOptions: {
75
- modelName: 'gpt-4.1-mini',
76
- apiKey: process.env.OPENAI_API_KEY,
77
- },
78
- instructions: `You are a Task Coordinator. When a user makes a request:
79
- 1. Analyze what they need
80
- 2. Transfer to the specialist with SPECIFIC instructions about what to do
81
-
82
- IMPORTANT: Always use the transfer tool. Do not try to do the work yourself.`,
83
- maxContextTokens: 8000,
84
- },
85
- {
86
- agentId: 'specialist',
87
- provider: Providers.OPENAI,
88
- clientOptions: {
89
- modelName: 'gpt-4.1-mini',
90
- apiKey: process.env.OPENAI_API_KEY,
91
- },
92
- instructions: `You are a Technical Specialist. You provide detailed technical responses.
93
- When you receive a task, execute it thoroughly. Always identify yourself as the Technical Specialist in your response.`,
94
- maxContextTokens: 8000,
95
- },
96
- ];
97
- const edges = [
98
- {
99
- from: 'coordinator',
100
- to: 'specialist',
101
- edgeType: 'transfer',
102
- description: 'Transfer to specialist for detailed work',
103
- prompt: 'Provide specific instructions for the specialist about what to analyze or create',
104
- promptKey: 'instructions',
105
- },
106
- ];
107
- const run = await Run.create({
108
- runId: `steering-test1-${Date.now()}`,
109
- graphConfig: { type: 'multi-agent', agents, edges },
110
- customHandlers,
111
- returnContent: true,
112
- skipCleanup: true,
113
- });
114
- const streamConfig = {
115
- configurable: { thread_id: 'steering-test1' },
116
- streamMode: 'values',
117
- version: 'v2',
118
- };
119
- const query = 'Explain the difference between TCP and UDP. I need exactly 3 bullet points for each protocol.';
120
- console.log(`\nQuery: "${query}"\n`);
121
- const messages = [new HumanMessage(query)];
122
- await run.processStream({ messages }, streamConfig);
123
- const finalMessages = run.getRunMessages();
124
- console.log('\n--- Specialist Response ---');
125
- if (finalMessages) {
126
- for (const msg of finalMessages) {
127
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
128
- console.log(msg.content);
129
- agentResponses['test1'] = msg.content;
130
- }
131
- }
132
- }
133
- // Check steering quality
134
- const response = agentResponses['test1'] || '';
135
- const mentionsSpecialist = response.toLowerCase().includes('specialist') ||
136
- response.toLowerCase().includes('technical');
137
- const hasBulletPoints = (response.match(/[-•*]\s/g) || []).length >= 4 ||
138
- (response.match(/\d\./g) || []).length >= 4;
139
- const mentionsTCP = response.toLowerCase().includes('tcp');
140
- const mentionsUDP = response.toLowerCase().includes('udp');
141
- console.log('\n--- Steering Checks ---');
142
- console.log(` Identifies as specialist: ${mentionsSpecialist ? 'YES' : 'NO'}`);
143
- console.log(` Has bullet points: ${hasBulletPoints ? 'YES' : 'NO'}`);
144
- console.log(` Covers TCP: ${mentionsTCP ? 'YES' : 'NO'}`);
145
- console.log(` Covers UDP: ${mentionsUDP ? 'YES' : 'NO'}`);
146
- }
147
- /**
148
- * Test 2: Handoff with very specific formatting instructions
149
- * Tests whether the receiving agent follows precise instructions from the handoff.
150
- */
151
- async function test2_preciseFormatting() {
152
- console.log('\n' + '-'.repeat(60));
153
- console.log('TEST 2: Handoff with precise formatting instructions');
154
- console.log('-'.repeat(60));
155
- const agents = [
156
- {
157
- agentId: 'manager',
158
- provider: Providers.OPENAI,
159
- clientOptions: {
160
- modelName: 'gpt-4.1-mini',
161
- apiKey: process.env.OPENAI_API_KEY,
162
- },
163
- instructions: `You are a Project Manager. When a user asks about a topic:
164
- 1. Transfer to the writer with VERY SPECIFIC formatting instructions
165
- 2. Tell the writer to start their response with "REPORT:" and end with "END REPORT"
166
- 3. Tell the writer to use exactly 2 paragraphs
167
-
168
- CRITICAL: Always transfer to the writer. Do NOT write the report yourself.`,
169
- maxContextTokens: 8000,
170
- },
171
- {
172
- agentId: 'writer',
173
- provider: Providers.OPENAI,
174
- clientOptions: {
175
- modelName: 'gpt-4.1-mini',
176
- apiKey: process.env.OPENAI_API_KEY,
177
- },
178
- instructions: `You are a Report Writer. Follow any formatting instructions you receive precisely.
179
- You must follow the exact format requested.`,
180
- maxContextTokens: 8000,
181
- },
182
- ];
183
- const edges = [
184
- {
185
- from: 'manager',
186
- to: 'writer',
187
- edgeType: 'transfer',
188
- description: 'Transfer to writer for report creation',
189
- prompt: 'Provide specific formatting and content instructions for the writer',
190
- promptKey: 'instructions',
191
- },
192
- ];
193
- const run = await Run.create({
194
- runId: `steering-test2-${Date.now()}`,
195
- graphConfig: { type: 'multi-agent', agents, edges },
196
- customHandlers,
197
- returnContent: true,
198
- skipCleanup: true,
199
- });
200
- const streamConfig = {
201
- configurable: { thread_id: 'steering-test2' },
202
- streamMode: 'values',
203
- version: 'v2',
204
- };
205
- const query = 'Write a brief report about cloud computing benefits.';
206
- console.log(`\nQuery: "${query}"\n`);
207
- const messages = [new HumanMessage(query)];
208
- await run.processStream({ messages }, streamConfig);
209
- const finalMessages = run.getRunMessages();
210
- console.log('\n--- Writer Response ---');
211
- if (finalMessages) {
212
- for (const msg of finalMessages) {
213
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
214
- console.log(msg.content);
215
- agentResponses['test2'] = msg.content;
216
- }
217
- }
218
- }
219
- // Check if the writer followed the manager's formatting instructions
220
- const response = agentResponses['test2'] || '';
221
- const startsWithReport = response.trimStart().startsWith('REPORT:');
222
- const endsWithEndReport = response.trimEnd().endsWith('END REPORT');
223
- const mentionsCloud = response.toLowerCase().includes('cloud');
224
- console.log('\n--- Steering Checks ---');
225
- console.log(` Starts with "REPORT:": ${startsWithReport ? 'YES' : 'NO'}`);
226
- console.log(` Ends with "END REPORT": ${endsWithEndReport ? 'YES' : 'NO'}`);
227
- console.log(` Covers cloud computing: ${mentionsCloud ? 'YES' : 'NO'}`);
228
- }
229
- /**
230
- * Test 3: Multi-turn after handoff
231
- * Tests that identity and context persist across turns.
232
- */
233
- async function test3_multiTurn() {
234
- console.log('\n' + '-'.repeat(60));
235
- console.log('TEST 3: Multi-turn conversation after handoff');
236
- console.log('-'.repeat(60));
237
- const agents = [
238
- {
239
- agentId: 'router',
240
- provider: Providers.OPENAI,
241
- clientOptions: {
242
- modelName: 'gpt-4.1-mini',
243
- apiKey: process.env.OPENAI_API_KEY,
244
- },
245
- instructions: `You are a Router. Transfer all requests to the chef.
246
- When transferring, tell the chef to respond ONLY about Italian cuisine.
247
- CRITICAL: Always transfer. Never answer directly.`,
248
- maxContextTokens: 8000,
249
- },
250
- {
251
- agentId: 'chef',
252
- provider: Providers.OPENAI,
253
- clientOptions: {
254
- modelName: 'gpt-4.1-mini',
255
- apiKey: process.env.OPENAI_API_KEY,
256
- },
257
- instructions: `You are Chef Marco, an Italian cuisine expert.
258
- Always introduce yourself as Chef Marco. Only discuss Italian food.
259
- If asked about non-Italian food, politely redirect to Italian alternatives.`,
260
- maxContextTokens: 8000,
261
- },
262
- ];
263
- const edges = [
264
- {
265
- from: 'router',
266
- to: 'chef',
267
- edgeType: 'transfer',
268
- description: 'Transfer to chef',
269
- prompt: 'Instructions for the chef about how to respond',
270
- promptKey: 'instructions',
271
- },
272
- ];
273
- const run = await Run.create({
274
- runId: `steering-test3-${Date.now()}`,
275
- graphConfig: { type: 'multi-agent', agents, edges },
276
- customHandlers,
277
- returnContent: true,
278
- skipCleanup: true,
279
- });
280
- const streamConfig = {
281
- configurable: { thread_id: 'steering-test3' },
282
- streamMode: 'values',
283
- version: 'v2',
284
- };
285
- const conversationHistory = [];
286
- // Turn 1
287
- const query1 = 'What is a good pasta recipe?';
288
- console.log(`\nTurn 1: "${query1}"\n`);
289
- conversationHistory.push(new HumanMessage(query1));
290
- await run.processStream({ messages: conversationHistory }, streamConfig);
291
- const turn1Messages = run.getRunMessages();
292
- if (turn1Messages) {
293
- conversationHistory.push(...turn1Messages);
294
- for (const msg of turn1Messages) {
295
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
296
- console.log(msg.content.substring(0, 300) + '...');
297
- agentResponses['test3_turn1'] = msg.content;
298
- }
299
- }
300
- }
301
- // Turn 2 - follow up
302
- const query2 = 'What about sushi instead?';
303
- console.log(`\nTurn 2: "${query2}"\n`);
304
- conversationHistory.push(new HumanMessage(query2));
305
- await run.processStream({ messages: conversationHistory }, streamConfig);
306
- const turn2Messages = run.getRunMessages();
307
- if (turn2Messages) {
308
- conversationHistory.push(...turn2Messages);
309
- for (const msg of turn2Messages) {
310
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
311
- console.log(msg.content.substring(0, 300) + '...');
312
- agentResponses['test3_turn2'] = msg.content;
313
- }
314
- }
315
- }
316
- const response1 = agentResponses['test3_turn1'] || '';
317
- const response2 = agentResponses['test3_turn2'] || '';
318
- const t1Identity = response1.toLowerCase().includes('marco') ||
319
- response1.toLowerCase().includes('chef');
320
- const t1Italian = response1.toLowerCase().includes('italian') ||
321
- response1.toLowerCase().includes('pasta');
322
- const t2Redirects = response2.toLowerCase().includes('italian') ||
323
- response2.toLowerCase().includes('instead');
324
- console.log('\n--- Steering Checks ---');
325
- console.log(` Turn 1 - Chef identity: ${t1Identity ? 'YES' : 'NO'}`);
326
- console.log(` Turn 1 - Italian focus: ${t1Italian ? 'YES' : 'NO'}`);
327
- console.log(` Turn 2 - Redirects to Italian: ${t2Redirects ? 'YES' : 'NO'}`);
328
- }
329
- try {
330
- await test1_basicInstructions();
331
- await test2_preciseFormatting();
332
- await test3_multiTurn();
333
- console.log('\n\n' + '='.repeat(60));
334
- console.log('ALL TESTS COMPLETE');
335
- console.log('='.repeat(60));
336
- console.log('\nReview the steering checks above.');
337
- console.log('If the receiving agents consistently follow instructions and maintain identity,');
338
- console.log('the system prompt injection approach is working correctly.');
339
- }
340
- catch (error) {
341
- console.error('\nTest failed:', error);
342
- process.exit(1);
343
- }
344
- }
345
- process.on('unhandledRejection', (reason) => {
346
- console.error('Unhandled Rejection:', reason);
347
- process.exit(1);
348
- });
349
- testHandoffSteering().catch((err) => {
350
- console.error('Test failed:', err);
351
- process.exit(1);
352
- });
353
- //# sourceMappingURL=test-handoff-steering.js.map
@@ -1,318 +0,0 @@
1
- #!/usr/bin/env bun
2
- import { config } from 'dotenv';
3
- config();
4
- import { HumanMessage } from '@langchain/core/messages';
5
- import { labelContentByAgent, formatAgentMessages } from '@/messages/format';
6
- import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
7
- import { Providers, GraphEvents, Constants, StepTypes } from '@/common';
8
- import { ToolEndHandler, ModelEndHandler } from '@/events';
9
- import { Run } from '@/run';
10
- const conversationHistory = [];
11
- /**
12
- * Test supervisor-based multi-agent system using a single edge with multiple destinations
13
- *
14
- * Instead of creating 5 separate edges, we use one edge with an array of destinations
15
- * This should create handoff tools for all 5 specialists from a single edge definition
16
- */
17
- async function testSupervisorListHandoff() {
18
- console.log('Testing Supervisor with List-Based Handoff Edge...\n');
19
- // Set up content aggregator
20
- const { contentParts, aggregateContent } = createContentAggregator();
21
- // Track which specialist role was selected
22
- let selectedRole = '';
23
- // Create custom handlers
24
- const customHandlers = {
25
- [GraphEvents.TOOL_END]: new ToolEndHandler(),
26
- [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
27
- [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
28
- [GraphEvents.ON_RUN_STEP]: {
29
- handle: (event, data) => {
30
- const runStepData = data;
31
- if (runStepData?.name) {
32
- console.log(`\n[${runStepData.name}] Processing...`);
33
- }
34
- aggregateContent({ event, data: data });
35
- },
36
- },
37
- [GraphEvents.ON_RUN_STEP_COMPLETED]: {
38
- handle: (event, data) => {
39
- aggregateContent({
40
- event,
41
- data: data,
42
- });
43
- },
44
- },
45
- [GraphEvents.ON_MESSAGE_DELTA]: {
46
- handle: (event, data) => {
47
- aggregateContent({ event, data: data });
48
- },
49
- },
50
- [GraphEvents.TOOL_START]: {
51
- handle: (_event, data, metadata) => {
52
- const toolData = data;
53
- if (toolData?.name?.startsWith(Constants.LC_TRANSFER_TO_)) {
54
- const specialist = toolData.name.replace(Constants.LC_TRANSFER_TO_, '');
55
- console.log(`\n🔀 Transferring to ${specialist}...`);
56
- selectedRole = specialist;
57
- }
58
- },
59
- },
60
- };
61
- // Function to create the graph with a single edge to multiple specialists
62
- function createSupervisorGraphWithListEdge() {
63
- console.log(`\nCreating graph with supervisor and 5 specialist agents.`);
64
- console.log('Using a SINGLE edge with multiple destinations (list-based handoff).\n');
65
- // Define the adaptive specialist configuration that will be reused
66
- const specialistConfig = {
67
- provider: Providers.ANTHROPIC,
68
- clientOptions: {
69
- modelName: 'claude-haiku-4-5',
70
- apiKey: process.env.ANTHROPIC_API_KEY,
71
- },
72
- instructions: `You are an Adaptive Specialist. Your agent ID indicates your role:
73
-
74
- - data_analyst: Focus on statistical analysis, metrics, ML evaluation, A/B testing
75
- - security_expert: Focus on cybersecurity, vulnerability assessment, compliance
76
- - product_designer: Focus on UX/UI design, user research, accessibility
77
- - devops_engineer: Focus on CI/CD, infrastructure, cloud platforms, monitoring
78
- - legal_advisor: Focus on licensing, privacy laws, contracts, regulatory compliance
79
-
80
- The supervisor will provide specific instructions. Follow them while maintaining your expert perspective.`,
81
- maxContextTokens: 8000,
82
- };
83
- // Create the graph with supervisor and all 5 specialists
84
- const agents = [
85
- {
86
- agentId: 'supervisor',
87
- provider: Providers.ANTHROPIC,
88
- clientOptions: {
89
- modelName: 'claude-haiku-4-5',
90
- apiKey: process.env.ANTHROPIC_API_KEY,
91
- },
92
- instructions: `You are a Task Supervisor with access to 5 specialist agents:
93
- 1. transfer_to_data_analyst - For statistical analysis and metrics
94
- 2. transfer_to_security_expert - For cybersecurity and vulnerability assessment
95
- 3. transfer_to_product_designer - For UX/UI design
96
- 4. transfer_to_devops_engineer - For infrastructure and deployment
97
- 5. transfer_to_legal_advisor - For compliance and licensing
98
-
99
- Your role is to:
100
- 1. Analyze the incoming request
101
- 2. Decide which specialist is best suited
102
- 3. Use the appropriate transfer tool (e.g., transfer_to_data_analyst)
103
- 4. Provide specific instructions to guide their work
104
-
105
- Be specific about what you need from the specialist.`,
106
- maxContextTokens: 8000,
107
- },
108
- // Include all 5 specialists with the same adaptive configuration
109
- {
110
- agentId: 'data_analyst',
111
- ...specialistConfig,
112
- },
113
- {
114
- agentId: 'security_expert',
115
- ...specialistConfig,
116
- },
117
- {
118
- agentId: 'product_designer',
119
- ...specialistConfig,
120
- },
121
- {
122
- agentId: 'devops_engineer',
123
- ...specialistConfig,
124
- },
125
- {
126
- agentId: 'legal_advisor',
127
- ...specialistConfig,
128
- },
129
- ];
130
- // Create a SINGLE edge from supervisor to ALL 5 specialists using a list
131
- const edges = [
132
- {
133
- from: 'supervisor',
134
- to: [
135
- 'data_analyst',
136
- 'security_expert',
137
- 'product_designer',
138
- 'devops_engineer',
139
- 'legal_advisor',
140
- ],
141
- description: 'Transfer to appropriate specialist based on task requirements',
142
- edgeType: 'transfer',
143
- },
144
- ];
145
- return {
146
- runId: `supervisor-list-handoff-${Date.now()}`,
147
- graphConfig: {
148
- type: 'multi-agent',
149
- agents,
150
- edges,
151
- },
152
- customHandlers,
153
- returnContent: true,
154
- skipCleanup: true,
155
- };
156
- }
157
- try {
158
- // Test with different queries
159
- const testQueries = [
160
- 'What are the legal implications of using GPL-licensed code in our product?',
161
- ];
162
- const config = {
163
- configurable: {
164
- thread_id: 'supervisor-list-handoff-1',
165
- },
166
- streamMode: 'values',
167
- version: 'v2',
168
- };
169
- for (const query of testQueries) {
170
- console.log(`\n${'='.repeat(80)}`);
171
- console.log(`FIRST RUN - USER QUERY: "${query}"`);
172
- console.log('='.repeat(80));
173
- // Reset conversation
174
- conversationHistory.length = 0;
175
- conversationHistory.push(new HumanMessage(query));
176
- // Create graph with supervisor having a single edge to multiple specialists
177
- const runConfig = createSupervisorGraphWithListEdge();
178
- const run = await Run.create(runConfig);
179
- console.log('Processing first request...');
180
- // Process with streaming
181
- const inputs = {
182
- messages: conversationHistory,
183
- };
184
- const finalContentParts = await run.processStream(inputs, config);
185
- const finalMessages = run.getRunMessages();
186
- if (finalMessages) {
187
- conversationHistory.push(...finalMessages);
188
- }
189
- // Demo: Map contentParts to agentIds
190
- console.log(`\n${'─'.repeat(60)}`);
191
- console.log('CONTENT PARTS TO AGENT MAPPING:');
192
- console.log('─'.repeat(60));
193
- if (run.Graph) {
194
- // Get the mapping of contentPart index to agentId
195
- const contentPartAgentMap = run.Graph.getContentPartAgentMap();
196
- console.log(`\nTotal content parts: ${contentParts.length}`);
197
- console.log(`\nContent Part → Agent Mapping:`);
198
- contentPartAgentMap.forEach((agentId, index) => {
199
- const contentPart = contentParts[index];
200
- const contentType = contentPart?.type || 'unknown';
201
- const preview = contentType === 'text'
202
- ? contentPart.text?.slice(0, 50) || ''
203
- : contentType === 'tool_call'
204
- ? `Tool: ${contentPart.tool_call?.name || 'unknown'}`
205
- : contentType;
206
- console.log(` [${index}] ${agentId} → ${contentType}: ${preview}${preview.length >= 50 ? '...' : ''}`);
207
- });
208
- // Show agent participation summary
209
- console.log(`\n${'─'.repeat(60)}`);
210
- console.log('AGENT PARTICIPATION SUMMARY:');
211
- console.log('─'.repeat(60));
212
- const activeAgents = run.Graph.getActiveAgentIds();
213
- console.log(`\nActive agents (${activeAgents.length}):`, activeAgents);
214
- const stepsByAgent = run.Graph.getRunStepsByAgent();
215
- stepsByAgent.forEach((steps, agentId) => {
216
- const toolCallSteps = steps.filter((s) => s.type === StepTypes.TOOL_CALLS).length;
217
- const messageSteps = steps.filter((s) => s.type === StepTypes.MESSAGE_CREATION).length;
218
- console.log(`\n ${agentId}:`);
219
- console.log(` - Total steps: ${steps.length}`);
220
- console.log(` - Message steps: ${messageSteps}`);
221
- console.log(` - Tool call steps: ${toolCallSteps}`);
222
- });
223
- }
224
- // Show graph structure summary
225
- console.log(`\n${'─'.repeat(60)}`);
226
- console.log(`GRAPH STRUCTURE:`);
227
- console.log(`- Agents: 6 total (supervisor + 5 specialists)`);
228
- console.log(`- Edges: 1 edge with multiple destinations`);
229
- console.log(`- Edge type: handoff (creates individual tools for each destination)`);
230
- console.log(`- Result: Supervisor has 5 handoff tools from a single edge`);
231
- console.log('─'.repeat(60));
232
- // =============================================================
233
- // SECOND RUN: Demonstrate agent-labeled history
234
- // =============================================================
235
- console.log(`\n${'='.repeat(80)}`);
236
- console.log(`SECOND RUN - Simulating DB Load with Agent-Labeled History`);
237
- console.log('='.repeat(80));
238
- // Simulate what happens in the main app:
239
- // 1. Store contentParts + agentIdMap to "DB" (in-memory here)
240
- const dbStoredContentParts = [...contentParts];
241
- const dbStoredAgentIdMap = Object.fromEntries(run.Graph.getContentPartAgentMap());
242
- console.log('\n📦 Simulating DB storage:');
243
- console.log(` - Stored ${dbStoredContentParts.length} content parts`);
244
- console.log(` - Stored agent mappings for ${Object.keys(dbStoredAgentIdMap).length} parts`);
245
- // 2. On next run, load from "DB" and label by agent
246
- console.log('\n📥 Loading from DB and labeling by agent...');
247
- const agentNames = {
248
- supervisor: 'Supervisor',
249
- legal_advisor: 'Legal Advisor',
250
- data_analyst: 'Data Analyst',
251
- security_expert: 'Security Expert',
252
- product_designer: 'Product Designer',
253
- devops_engineer: 'DevOps Engineer',
254
- };
255
- const labeledContentParts = labelContentByAgent(dbStoredContentParts.filter((p) => p != null), dbStoredAgentIdMap, agentNames);
256
- console.log(` - Labeled ${labeledContentParts.length} content parts by agent`);
257
- // 3. Convert labeled content parts to payload format
258
- const payload = [
259
- {
260
- role: 'user',
261
- content: query,
262
- },
263
- {
264
- role: 'assistant',
265
- content: labeledContentParts,
266
- },
267
- ];
268
- // 4. Format using formatAgentMessages (simulates what main app does)
269
- console.log('\n🔧 Calling formatAgentMessages...');
270
- const { messages: formattedMessages } = formatAgentMessages(payload);
271
- console.log(` - Formatted into ${formattedMessages.length} BaseMessages`);
272
- // Show a preview of what the supervisor will see
273
- console.log('\n👁️ Preview of formatted history for supervisor:');
274
- console.log('─'.repeat(80));
275
- for (let i = 0; i < formattedMessages.length; i++) {
276
- const msg = formattedMessages[i];
277
- const role = msg._getType();
278
- const preview = typeof msg.content === 'string'
279
- ? msg.content.slice(0, 200)
280
- : JSON.stringify(msg.content).slice(0, 200);
281
- console.log(`[${i}] ${role}: ${preview}${preview.length >= 200 ? '...' : ''}`);
282
- }
283
- console.log('─'.repeat(80));
284
- // 5. Create a new run with the formatted history + a followup question
285
- console.log('\n🚀 Starting second run with agent-labeled history + followup question...');
286
- const followupQuery = 'Can you summarize the key legal points from your previous response?';
287
- console.log(` Followup: "${followupQuery}"`);
288
- // Reset for second run
289
- const secondRunHistory = [
290
- ...formattedMessages,
291
- new HumanMessage(followupQuery),
292
- ];
293
- const runConfig2 = createSupervisorGraphWithListEdge();
294
- const run2 = await Run.create(runConfig2);
295
- const inputs2 = {
296
- messages: secondRunHistory,
297
- };
298
- await run2.processStream(inputs2, config);
299
- console.log('\n✅ Second run completed successfully!');
300
- console.log(' The supervisor correctly understood that the legal_advisor handled');
301
- console.log(' the previous query, avoiding identity confusion.');
302
- }
303
- // Final summary
304
- console.log(`\n${'='.repeat(60)}`);
305
- console.log('TEST COMPLETE');
306
- console.log('='.repeat(60));
307
- console.log('\nThis test demonstrates that a single edge with multiple');
308
- console.log('destinations in the "to" field creates individual handoff');
309
- console.log('tools for each destination agent, achieving the same result');
310
- console.log('as creating separate edges for each specialist.');
311
- }
312
- catch (error) {
313
- console.error('Error in supervisor list handoff test:', error);
314
- }
315
- }
316
- // Run the test
317
- testSupervisorListHandoff();
318
- //# sourceMappingURL=test-multi-agent-list-handoff.js.map