@illuma-ai/agents 1.1.21 → 1.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. package/dist/cjs/graphs/Graph.cjs +12 -1
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/graphs/MultiAgentGraph.cjs +105 -1
  4. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  5. package/dist/cjs/run.cjs +20 -9
  6. package/dist/cjs/run.cjs.map +1 -1
  7. package/dist/cjs/utils/llm.cjs.map +1 -1
  8. package/dist/esm/graphs/Graph.mjs +12 -1
  9. package/dist/esm/graphs/Graph.mjs.map +1 -1
  10. package/dist/esm/graphs/MultiAgentGraph.mjs +105 -1
  11. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  12. package/dist/esm/run.mjs +20 -9
  13. package/dist/esm/run.mjs.map +1 -1
  14. package/dist/esm/utils/llm.mjs.map +1 -1
  15. package/dist/types/graphs/MultiAgentGraph.d.ts +17 -0
  16. package/package.json +1 -1
  17. package/src/graphs/Graph.ts +13 -1
  18. package/src/graphs/MultiAgentGraph.ts +128 -1
  19. package/src/graphs/__tests__/multi-agent-delegate.test.ts +205 -0
  20. package/src/run.ts +20 -11
  21. package/src/scripts/test-bedrock-handoff-autonomous.ts +231 -0
  22. package/src/utils/llm.ts +1 -0
  23. package/src/agents/AgentContext.js +0 -782
  24. package/src/agents/AgentContext.test.js +0 -421
  25. package/src/agents/__tests__/AgentContext.test.js +0 -678
  26. package/src/agents/__tests__/resolveStructuredOutputMode.test.js +0 -117
  27. package/src/common/enum.js +0 -192
  28. package/src/common/index.js +0 -3
  29. package/src/events.js +0 -166
  30. package/src/graphs/Graph.js +0 -1857
  31. package/src/graphs/MultiAgentGraph.js +0 -1092
  32. package/src/graphs/__tests__/structured-output.integration.test.js +0 -624
  33. package/src/graphs/__tests__/structured-output.test.js +0 -144
  34. package/src/graphs/contextManagement.e2e.test.js +0 -718
  35. package/src/graphs/contextManagement.test.js +0 -485
  36. package/src/graphs/handoffValidation.test.js +0 -276
  37. package/src/graphs/index.js +0 -3
  38. package/src/index.js +0 -28
  39. package/src/instrumentation.js +0 -21
  40. package/src/llm/anthropic/index.js +0 -319
  41. package/src/llm/anthropic/types.js +0 -46
  42. package/src/llm/anthropic/utils/message_inputs.js +0 -627
  43. package/src/llm/anthropic/utils/message_outputs.js +0 -290
  44. package/src/llm/anthropic/utils/output_parsers.js +0 -89
  45. package/src/llm/anthropic/utils/tools.js +0 -25
  46. package/src/llm/bedrock/__tests__/bedrock-caching.test.js +0 -392
  47. package/src/llm/bedrock/index.js +0 -303
  48. package/src/llm/bedrock/types.js +0 -2
  49. package/src/llm/bedrock/utils/index.js +0 -6
  50. package/src/llm/bedrock/utils/message_inputs.js +0 -463
  51. package/src/llm/bedrock/utils/message_outputs.js +0 -269
  52. package/src/llm/fake.js +0 -92
  53. package/src/llm/google/index.js +0 -215
  54. package/src/llm/google/types.js +0 -12
  55. package/src/llm/google/utils/common.js +0 -670
  56. package/src/llm/google/utils/tools.js +0 -111
  57. package/src/llm/google/utils/zod_to_genai_parameters.js +0 -47
  58. package/src/llm/openai/index.js +0 -1033
  59. package/src/llm/openai/types.js +0 -2
  60. package/src/llm/openai/utils/index.js +0 -756
  61. package/src/llm/openai/utils/isReasoningModel.test.js +0 -79
  62. package/src/llm/openrouter/index.js +0 -261
  63. package/src/llm/openrouter/reasoning.test.js +0 -181
  64. package/src/llm/providers.js +0 -36
  65. package/src/llm/text.js +0 -65
  66. package/src/llm/vertexai/index.js +0 -402
  67. package/src/messages/__tests__/tools.test.js +0 -392
  68. package/src/messages/cache.js +0 -404
  69. package/src/messages/cache.test.js +0 -1167
  70. package/src/messages/content.js +0 -48
  71. package/src/messages/content.test.js +0 -314
  72. package/src/messages/core.js +0 -359
  73. package/src/messages/ensureThinkingBlock.test.js +0 -997
  74. package/src/messages/format.js +0 -973
  75. package/src/messages/formatAgentMessages.test.js +0 -2278
  76. package/src/messages/formatAgentMessages.tools.test.js +0 -362
  77. package/src/messages/formatMessage.test.js +0 -608
  78. package/src/messages/ids.js +0 -18
  79. package/src/messages/index.js +0 -9
  80. package/src/messages/labelContentByAgent.test.js +0 -725
  81. package/src/messages/prune.js +0 -438
  82. package/src/messages/reducer.js +0 -60
  83. package/src/messages/shiftIndexTokenCountMap.test.js +0 -63
  84. package/src/messages/summarize.js +0 -146
  85. package/src/messages/summarize.test.js +0 -332
  86. package/src/messages/tools.js +0 -90
  87. package/src/mockStream.js +0 -81
  88. package/src/prompts/collab.js +0 -7
  89. package/src/prompts/index.js +0 -3
  90. package/src/prompts/taskmanager.js +0 -58
  91. package/src/run.js +0 -427
  92. package/src/schemas/index.js +0 -3
  93. package/src/schemas/schema-preparation.test.js +0 -370
  94. package/src/schemas/validate.js +0 -314
  95. package/src/schemas/validate.test.js +0 -264
  96. package/src/scripts/abort.js +0 -127
  97. package/src/scripts/ant_web_search.js +0 -130
  98. package/src/scripts/ant_web_search_edge_case.js +0 -133
  99. package/src/scripts/ant_web_search_error_edge_case.js +0 -119
  100. package/src/scripts/args.js +0 -41
  101. package/src/scripts/bedrock-cache-debug.js +0 -186
  102. package/src/scripts/bedrock-content-aggregation-test.js +0 -195
  103. package/src/scripts/bedrock-merge-test.js +0 -80
  104. package/src/scripts/bedrock-parallel-tools-test.js +0 -150
  105. package/src/scripts/caching.js +0 -106
  106. package/src/scripts/cli.js +0 -152
  107. package/src/scripts/cli2.js +0 -119
  108. package/src/scripts/cli3.js +0 -163
  109. package/src/scripts/cli4.js +0 -165
  110. package/src/scripts/cli5.js +0 -165
  111. package/src/scripts/code_exec.js +0 -171
  112. package/src/scripts/code_exec_files.js +0 -180
  113. package/src/scripts/code_exec_multi_session.js +0 -185
  114. package/src/scripts/code_exec_ptc.js +0 -265
  115. package/src/scripts/code_exec_session.js +0 -217
  116. package/src/scripts/code_exec_simple.js +0 -120
  117. package/src/scripts/content.js +0 -111
  118. package/src/scripts/empty_input.js +0 -125
  119. package/src/scripts/handoff-test.js +0 -96
  120. package/src/scripts/image.js +0 -138
  121. package/src/scripts/memory.js +0 -83
  122. package/src/scripts/multi-agent-chain.js +0 -271
  123. package/src/scripts/multi-agent-conditional.js +0 -185
  124. package/src/scripts/multi-agent-document-review-chain.js +0 -171
  125. package/src/scripts/multi-agent-hybrid-flow.js +0 -264
  126. package/src/scripts/multi-agent-parallel-start.js +0 -214
  127. package/src/scripts/multi-agent-parallel.js +0 -346
  128. package/src/scripts/multi-agent-sequence.js +0 -184
  129. package/src/scripts/multi-agent-supervisor.js +0 -324
  130. package/src/scripts/multi-agent-test.js +0 -147
  131. package/src/scripts/parallel-asymmetric-tools-test.js +0 -202
  132. package/src/scripts/parallel-full-metadata-test.js +0 -176
  133. package/src/scripts/parallel-tools-test.js +0 -256
  134. package/src/scripts/programmatic_exec.js +0 -277
  135. package/src/scripts/programmatic_exec_agent.js +0 -168
  136. package/src/scripts/search.js +0 -118
  137. package/src/scripts/sequential-full-metadata-test.js +0 -143
  138. package/src/scripts/simple.js +0 -174
  139. package/src/scripts/single-agent-metadata-test.js +0 -152
  140. package/src/scripts/stream.js +0 -113
  141. package/src/scripts/test-custom-prompt-key.js +0 -132
  142. package/src/scripts/test-handoff-input.js +0 -143
  143. package/src/scripts/test-handoff-preamble.js +0 -227
  144. package/src/scripts/test-handoff-steering.js +0 -353
  145. package/src/scripts/test-multi-agent-list-handoff.js +0 -318
  146. package/src/scripts/test-parallel-agent-labeling.js +0 -253
  147. package/src/scripts/test-parallel-handoffs.js +0 -229
  148. package/src/scripts/test-thinking-handoff-bedrock.js +0 -132
  149. package/src/scripts/test-thinking-handoff.js +0 -132
  150. package/src/scripts/test-thinking-to-thinking-handoff-bedrock.js +0 -140
  151. package/src/scripts/test-tool-before-handoff-role-order.js +0 -223
  152. package/src/scripts/test-tools-before-handoff.js +0 -187
  153. package/src/scripts/test_code_api.js +0 -263
  154. package/src/scripts/thinking-bedrock.js +0 -128
  155. package/src/scripts/thinking-vertexai.js +0 -130
  156. package/src/scripts/thinking.js +0 -134
  157. package/src/scripts/tool_search.js +0 -114
  158. package/src/scripts/tools.js +0 -125
  159. package/src/specs/agent-handoffs-bedrock.integration.test.js +0 -280
  160. package/src/specs/agent-handoffs.test.js +0 -924
  161. package/src/specs/anthropic.simple.test.js +0 -287
  162. package/src/specs/azure.simple.test.js +0 -381
  163. package/src/specs/cache.simple.test.js +0 -282
  164. package/src/specs/custom-event-await.test.js +0 -148
  165. package/src/specs/deepseek.simple.test.js +0 -189
  166. package/src/specs/emergency-prune.test.js +0 -308
  167. package/src/specs/moonshot.simple.test.js +0 -237
  168. package/src/specs/observability.integration.test.js +0 -1337
  169. package/src/specs/openai.simple.test.js +0 -233
  170. package/src/specs/openrouter.simple.test.js +0 -202
  171. package/src/specs/prune.test.js +0 -733
  172. package/src/specs/reasoning.test.js +0 -144
  173. package/src/specs/spec.utils.js +0 -4
  174. package/src/specs/thinking-handoff.test.js +0 -486
  175. package/src/specs/thinking-prune.test.js +0 -600
  176. package/src/specs/token-distribution-edge-case.test.js +0 -246
  177. package/src/specs/token-memoization.test.js +0 -32
  178. package/src/specs/tokens.test.js +0 -49
  179. package/src/specs/tool-error.test.js +0 -139
  180. package/src/splitStream.js +0 -204
  181. package/src/splitStream.test.js +0 -504
  182. package/src/stream.js +0 -650
  183. package/src/stream.test.js +0 -225
  184. package/src/test/mockTools.js +0 -340
  185. package/src/tools/BrowserTools.js +0 -245
  186. package/src/tools/Calculator.js +0 -38
  187. package/src/tools/Calculator.test.js +0 -225
  188. package/src/tools/CodeExecutor.js +0 -233
  189. package/src/tools/ProgrammaticToolCalling.js +0 -602
  190. package/src/tools/StreamingToolCallBuffer.js +0 -179
  191. package/src/tools/ToolNode.js +0 -930
  192. package/src/tools/ToolSearch.js +0 -904
  193. package/src/tools/__tests__/BrowserTools.test.js +0 -306
  194. package/src/tools/__tests__/ProgrammaticToolCalling.integration.test.js +0 -276
  195. package/src/tools/__tests__/ProgrammaticToolCalling.test.js +0 -807
  196. package/src/tools/__tests__/StreamingToolCallBuffer.test.js +0 -175
  197. package/src/tools/__tests__/ToolApproval.test.js +0 -675
  198. package/src/tools/__tests__/ToolNode.recovery.test.js +0 -200
  199. package/src/tools/__tests__/ToolNode.session.test.js +0 -319
  200. package/src/tools/__tests__/ToolSearch.integration.test.js +0 -125
  201. package/src/tools/__tests__/ToolSearch.test.js +0 -812
  202. package/src/tools/__tests__/handlers.test.js +0 -799
  203. package/src/tools/__tests__/truncation-recovery.integration.test.js +0 -362
  204. package/src/tools/handlers.js +0 -306
  205. package/src/tools/schema.js +0 -25
  206. package/src/tools/search/anthropic.js +0 -34
  207. package/src/tools/search/content.js +0 -116
  208. package/src/tools/search/content.test.js +0 -133
  209. package/src/tools/search/firecrawl.js +0 -173
  210. package/src/tools/search/format.js +0 -198
  211. package/src/tools/search/highlights.js +0 -241
  212. package/src/tools/search/index.js +0 -3
  213. package/src/tools/search/jina-reranker.test.js +0 -106
  214. package/src/tools/search/rerankers.js +0 -165
  215. package/src/tools/search/schema.js +0 -102
  216. package/src/tools/search/search.js +0 -561
  217. package/src/tools/search/serper-scraper.js +0 -126
  218. package/src/tools/search/test.js +0 -129
  219. package/src/tools/search/tool.js +0 -453
  220. package/src/tools/search/types.js +0 -2
  221. package/src/tools/search/utils.js +0 -59
  222. package/src/types/graph.js +0 -24
  223. package/src/types/graph.test.js +0 -192
  224. package/src/types/index.js +0 -7
  225. package/src/types/llm.js +0 -2
  226. package/src/types/messages.js +0 -2
  227. package/src/types/run.js +0 -2
  228. package/src/types/stream.js +0 -2
  229. package/src/types/tools.js +0 -2
  230. package/src/utils/contextAnalytics.js +0 -79
  231. package/src/utils/contextAnalytics.test.js +0 -166
  232. package/src/utils/events.js +0 -26
  233. package/src/utils/graph.js +0 -11
  234. package/src/utils/handlers.js +0 -65
  235. package/src/utils/index.js +0 -10
  236. package/src/utils/llm.js +0 -21
  237. package/src/utils/llmConfig.js +0 -205
  238. package/src/utils/logging.js +0 -37
  239. package/src/utils/misc.js +0 -51
  240. package/src/utils/run.js +0 -69
  241. package/src/utils/schema.js +0 -21
  242. package/src/utils/title.js +0 -119
  243. package/src/utils/tokens.js +0 -92
  244. package/src/utils/toonFormat.js +0 -379
@@ -1,353 +0,0 @@
1
- import { config } from 'dotenv';
2
- config();
3
- import { HumanMessage } from '@langchain/core/messages';
4
- import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
5
- import { ToolEndHandler, ModelEndHandler } from '@/events';
6
- import { GraphEvents, Providers } from '@/common';
7
- import { Run } from '@/run';
8
- /**
9
- * Test LLM steering quality after handoff with system prompt instructions.
10
- *
11
- * Validates that the receiving agent clearly understands:
12
- * 1. WHO it is (its role/identity)
13
- * 2. WHAT the task is (instructions from the handoff)
14
- * 3. WHO transferred control (source agent context)
15
- *
16
- * Uses specific, verifiable instructions so we can check the output.
17
- */
18
- async function testHandoffSteering() {
19
- console.log('='.repeat(60));
20
- console.log('Test: Handoff Steering Quality (System Prompt Instructions)');
21
- console.log('='.repeat(60));
22
- const { contentParts, aggregateContent } = createContentAggregator();
23
- let currentAgent = '';
24
- const agentResponses = {};
25
- const customHandlers = {
26
- [GraphEvents.TOOL_END]: new ToolEndHandler(),
27
- [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
28
- [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
29
- [GraphEvents.ON_RUN_STEP]: {
30
- handle: (event, data) => {
31
- const runStep = data;
32
- if (runStep.agentId) {
33
- currentAgent = runStep.agentId;
34
- console.log(`\n[Agent: ${currentAgent}] Processing...`);
35
- }
36
- aggregateContent({ event, data: runStep });
37
- },
38
- },
39
- [GraphEvents.ON_RUN_STEP_COMPLETED]: {
40
- handle: (event, data) => {
41
- aggregateContent({
42
- event,
43
- data: data,
44
- });
45
- },
46
- },
47
- [GraphEvents.ON_MESSAGE_DELTA]: {
48
- handle: (event, data) => {
49
- aggregateContent({ event, data: data });
50
- },
51
- },
52
- [GraphEvents.TOOL_START]: {
53
- handle: (_event, data, _metadata) => {
54
- const toolData = data;
55
- if (toolData?.name?.includes('transfer_to_')) {
56
- const specialist = toolData.name.replace('lc_transfer_to_', '');
57
- console.log(`\n >> Handoff to: ${specialist}`);
58
- }
59
- },
60
- },
61
- };
62
- /**
63
- * Test 1: Basic handoff with specific task instructions
64
- * The specialist should clearly follow the coordinator's instructions.
65
- */
66
- async function test1_basicInstructions() {
67
- console.log('\n' + '-'.repeat(60));
68
- console.log('TEST 1: Basic handoff with specific task instructions');
69
- console.log('-'.repeat(60));
70
- const agents = [
71
- {
72
- agentId: 'coordinator',
73
- provider: Providers.OPENAI,
74
- clientOptions: {
75
- modelName: 'gpt-4.1-mini',
76
- apiKey: process.env.OPENAI_API_KEY,
77
- },
78
- instructions: `You are a Task Coordinator. When a user makes a request:
79
- 1. Analyze what they need
80
- 2. Transfer to the specialist with SPECIFIC instructions about what to do
81
-
82
- IMPORTANT: Always use the transfer tool. Do not try to do the work yourself.`,
83
- maxContextTokens: 8000,
84
- },
85
- {
86
- agentId: 'specialist',
87
- provider: Providers.OPENAI,
88
- clientOptions: {
89
- modelName: 'gpt-4.1-mini',
90
- apiKey: process.env.OPENAI_API_KEY,
91
- },
92
- instructions: `You are a Technical Specialist. You provide detailed technical responses.
93
- When you receive a task, execute it thoroughly. Always identify yourself as the Technical Specialist in your response.`,
94
- maxContextTokens: 8000,
95
- },
96
- ];
97
- const edges = [
98
- {
99
- from: 'coordinator',
100
- to: 'specialist',
101
- edgeType: 'transfer',
102
- description: 'Transfer to specialist for detailed work',
103
- prompt: 'Provide specific instructions for the specialist about what to analyze or create',
104
- promptKey: 'instructions',
105
- },
106
- ];
107
- const run = await Run.create({
108
- runId: `steering-test1-${Date.now()}`,
109
- graphConfig: { type: 'multi-agent', agents, edges },
110
- customHandlers,
111
- returnContent: true,
112
- skipCleanup: true,
113
- });
114
- const streamConfig = {
115
- configurable: { thread_id: 'steering-test1' },
116
- streamMode: 'values',
117
- version: 'v2',
118
- };
119
- const query = 'Explain the difference between TCP and UDP. I need exactly 3 bullet points for each protocol.';
120
- console.log(`\nQuery: "${query}"\n`);
121
- const messages = [new HumanMessage(query)];
122
- await run.processStream({ messages }, streamConfig);
123
- const finalMessages = run.getRunMessages();
124
- console.log('\n--- Specialist Response ---');
125
- if (finalMessages) {
126
- for (const msg of finalMessages) {
127
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
128
- console.log(msg.content);
129
- agentResponses['test1'] = msg.content;
130
- }
131
- }
132
- }
133
- // Check steering quality
134
- const response = agentResponses['test1'] || '';
135
- const mentionsSpecialist = response.toLowerCase().includes('specialist') ||
136
- response.toLowerCase().includes('technical');
137
- const hasBulletPoints = (response.match(/[-•*]\s/g) || []).length >= 4 ||
138
- (response.match(/\d\./g) || []).length >= 4;
139
- const mentionsTCP = response.toLowerCase().includes('tcp');
140
- const mentionsUDP = response.toLowerCase().includes('udp');
141
- console.log('\n--- Steering Checks ---');
142
- console.log(` Identifies as specialist: ${mentionsSpecialist ? 'YES' : 'NO'}`);
143
- console.log(` Has bullet points: ${hasBulletPoints ? 'YES' : 'NO'}`);
144
- console.log(` Covers TCP: ${mentionsTCP ? 'YES' : 'NO'}`);
145
- console.log(` Covers UDP: ${mentionsUDP ? 'YES' : 'NO'}`);
146
- }
147
- /**
148
- * Test 2: Handoff with very specific formatting instructions
149
- * Tests whether the receiving agent follows precise instructions from the handoff.
150
- */
151
- async function test2_preciseFormatting() {
152
- console.log('\n' + '-'.repeat(60));
153
- console.log('TEST 2: Handoff with precise formatting instructions');
154
- console.log('-'.repeat(60));
155
- const agents = [
156
- {
157
- agentId: 'manager',
158
- provider: Providers.OPENAI,
159
- clientOptions: {
160
- modelName: 'gpt-4.1-mini',
161
- apiKey: process.env.OPENAI_API_KEY,
162
- },
163
- instructions: `You are a Project Manager. When a user asks about a topic:
164
- 1. Transfer to the writer with VERY SPECIFIC formatting instructions
165
- 2. Tell the writer to start their response with "REPORT:" and end with "END REPORT"
166
- 3. Tell the writer to use exactly 2 paragraphs
167
-
168
- CRITICAL: Always transfer to the writer. Do NOT write the report yourself.`,
169
- maxContextTokens: 8000,
170
- },
171
- {
172
- agentId: 'writer',
173
- provider: Providers.OPENAI,
174
- clientOptions: {
175
- modelName: 'gpt-4.1-mini',
176
- apiKey: process.env.OPENAI_API_KEY,
177
- },
178
- instructions: `You are a Report Writer. Follow any formatting instructions you receive precisely.
179
- You must follow the exact format requested.`,
180
- maxContextTokens: 8000,
181
- },
182
- ];
183
- const edges = [
184
- {
185
- from: 'manager',
186
- to: 'writer',
187
- edgeType: 'transfer',
188
- description: 'Transfer to writer for report creation',
189
- prompt: 'Provide specific formatting and content instructions for the writer',
190
- promptKey: 'instructions',
191
- },
192
- ];
193
- const run = await Run.create({
194
- runId: `steering-test2-${Date.now()}`,
195
- graphConfig: { type: 'multi-agent', agents, edges },
196
- customHandlers,
197
- returnContent: true,
198
- skipCleanup: true,
199
- });
200
- const streamConfig = {
201
- configurable: { thread_id: 'steering-test2' },
202
- streamMode: 'values',
203
- version: 'v2',
204
- };
205
- const query = 'Write a brief report about cloud computing benefits.';
206
- console.log(`\nQuery: "${query}"\n`);
207
- const messages = [new HumanMessage(query)];
208
- await run.processStream({ messages }, streamConfig);
209
- const finalMessages = run.getRunMessages();
210
- console.log('\n--- Writer Response ---');
211
- if (finalMessages) {
212
- for (const msg of finalMessages) {
213
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
214
- console.log(msg.content);
215
- agentResponses['test2'] = msg.content;
216
- }
217
- }
218
- }
219
- // Check if the writer followed the manager's formatting instructions
220
- const response = agentResponses['test2'] || '';
221
- const startsWithReport = response.trimStart().startsWith('REPORT:');
222
- const endsWithEndReport = response.trimEnd().endsWith('END REPORT');
223
- const mentionsCloud = response.toLowerCase().includes('cloud');
224
- console.log('\n--- Steering Checks ---');
225
- console.log(` Starts with "REPORT:": ${startsWithReport ? 'YES' : 'NO'}`);
226
- console.log(` Ends with "END REPORT": ${endsWithEndReport ? 'YES' : 'NO'}`);
227
- console.log(` Covers cloud computing: ${mentionsCloud ? 'YES' : 'NO'}`);
228
- }
229
- /**
230
- * Test 3: Multi-turn after handoff
231
- * Tests that identity and context persist across turns.
232
- */
233
- async function test3_multiTurn() {
234
- console.log('\n' + '-'.repeat(60));
235
- console.log('TEST 3: Multi-turn conversation after handoff');
236
- console.log('-'.repeat(60));
237
- const agents = [
238
- {
239
- agentId: 'router',
240
- provider: Providers.OPENAI,
241
- clientOptions: {
242
- modelName: 'gpt-4.1-mini',
243
- apiKey: process.env.OPENAI_API_KEY,
244
- },
245
- instructions: `You are a Router. Transfer all requests to the chef.
246
- When transferring, tell the chef to respond ONLY about Italian cuisine.
247
- CRITICAL: Always transfer. Never answer directly.`,
248
- maxContextTokens: 8000,
249
- },
250
- {
251
- agentId: 'chef',
252
- provider: Providers.OPENAI,
253
- clientOptions: {
254
- modelName: 'gpt-4.1-mini',
255
- apiKey: process.env.OPENAI_API_KEY,
256
- },
257
- instructions: `You are Chef Marco, an Italian cuisine expert.
258
- Always introduce yourself as Chef Marco. Only discuss Italian food.
259
- If asked about non-Italian food, politely redirect to Italian alternatives.`,
260
- maxContextTokens: 8000,
261
- },
262
- ];
263
- const edges = [
264
- {
265
- from: 'router',
266
- to: 'chef',
267
- edgeType: 'transfer',
268
- description: 'Transfer to chef',
269
- prompt: 'Instructions for the chef about how to respond',
270
- promptKey: 'instructions',
271
- },
272
- ];
273
- const run = await Run.create({
274
- runId: `steering-test3-${Date.now()}`,
275
- graphConfig: { type: 'multi-agent', agents, edges },
276
- customHandlers,
277
- returnContent: true,
278
- skipCleanup: true,
279
- });
280
- const streamConfig = {
281
- configurable: { thread_id: 'steering-test3' },
282
- streamMode: 'values',
283
- version: 'v2',
284
- };
285
- const conversationHistory = [];
286
- // Turn 1
287
- const query1 = 'What is a good pasta recipe?';
288
- console.log(`\nTurn 1: "${query1}"\n`);
289
- conversationHistory.push(new HumanMessage(query1));
290
- await run.processStream({ messages: conversationHistory }, streamConfig);
291
- const turn1Messages = run.getRunMessages();
292
- if (turn1Messages) {
293
- conversationHistory.push(...turn1Messages);
294
- for (const msg of turn1Messages) {
295
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
296
- console.log(msg.content.substring(0, 300) + '...');
297
- agentResponses['test3_turn1'] = msg.content;
298
- }
299
- }
300
- }
301
- // Turn 2 - follow up
302
- const query2 = 'What about sushi instead?';
303
- console.log(`\nTurn 2: "${query2}"\n`);
304
- conversationHistory.push(new HumanMessage(query2));
305
- await run.processStream({ messages: conversationHistory }, streamConfig);
306
- const turn2Messages = run.getRunMessages();
307
- if (turn2Messages) {
308
- conversationHistory.push(...turn2Messages);
309
- for (const msg of turn2Messages) {
310
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
311
- console.log(msg.content.substring(0, 300) + '...');
312
- agentResponses['test3_turn2'] = msg.content;
313
- }
314
- }
315
- }
316
- const response1 = agentResponses['test3_turn1'] || '';
317
- const response2 = agentResponses['test3_turn2'] || '';
318
- const t1Identity = response1.toLowerCase().includes('marco') ||
319
- response1.toLowerCase().includes('chef');
320
- const t1Italian = response1.toLowerCase().includes('italian') ||
321
- response1.toLowerCase().includes('pasta');
322
- const t2Redirects = response2.toLowerCase().includes('italian') ||
323
- response2.toLowerCase().includes('instead');
324
- console.log('\n--- Steering Checks ---');
325
- console.log(` Turn 1 - Chef identity: ${t1Identity ? 'YES' : 'NO'}`);
326
- console.log(` Turn 1 - Italian focus: ${t1Italian ? 'YES' : 'NO'}`);
327
- console.log(` Turn 2 - Redirects to Italian: ${t2Redirects ? 'YES' : 'NO'}`);
328
- }
329
- try {
330
- await test1_basicInstructions();
331
- await test2_preciseFormatting();
332
- await test3_multiTurn();
333
- console.log('\n\n' + '='.repeat(60));
334
- console.log('ALL TESTS COMPLETE');
335
- console.log('='.repeat(60));
336
- console.log('\nReview the steering checks above.');
337
- console.log('If the receiving agents consistently follow instructions and maintain identity,');
338
- console.log('the system prompt injection approach is working correctly.');
339
- }
340
- catch (error) {
341
- console.error('\nTest failed:', error);
342
- process.exit(1);
343
- }
344
- }
345
- process.on('unhandledRejection', (reason) => {
346
- console.error('Unhandled Rejection:', reason);
347
- process.exit(1);
348
- });
349
- testHandoffSteering().catch((err) => {
350
- console.error('Test failed:', err);
351
- process.exit(1);
352
- });
353
- //# sourceMappingURL=test-handoff-steering.js.map
@@ -1,318 +0,0 @@
1
- #!/usr/bin/env bun
2
- import { config } from 'dotenv';
3
- config();
4
- import { HumanMessage } from '@langchain/core/messages';
5
- import { labelContentByAgent, formatAgentMessages } from '@/messages/format';
6
- import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
7
- import { Providers, GraphEvents, Constants, StepTypes } from '@/common';
8
- import { ToolEndHandler, ModelEndHandler } from '@/events';
9
- import { Run } from '@/run';
10
- const conversationHistory = [];
11
- /**
12
- * Test supervisor-based multi-agent system using a single edge with multiple destinations
13
- *
14
- * Instead of creating 5 separate edges, we use one edge with an array of destinations
15
- * This should create handoff tools for all 5 specialists from a single edge definition
16
- */
17
- async function testSupervisorListHandoff() {
18
- console.log('Testing Supervisor with List-Based Handoff Edge...\n');
19
- // Set up content aggregator
20
- const { contentParts, aggregateContent } = createContentAggregator();
21
- // Track which specialist role was selected
22
- let selectedRole = '';
23
- // Create custom handlers
24
- const customHandlers = {
25
- [GraphEvents.TOOL_END]: new ToolEndHandler(),
26
- [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
27
- [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
28
- [GraphEvents.ON_RUN_STEP]: {
29
- handle: (event, data) => {
30
- const runStepData = data;
31
- if (runStepData?.name) {
32
- console.log(`\n[${runStepData.name}] Processing...`);
33
- }
34
- aggregateContent({ event, data: data });
35
- },
36
- },
37
- [GraphEvents.ON_RUN_STEP_COMPLETED]: {
38
- handle: (event, data) => {
39
- aggregateContent({
40
- event,
41
- data: data,
42
- });
43
- },
44
- },
45
- [GraphEvents.ON_MESSAGE_DELTA]: {
46
- handle: (event, data) => {
47
- aggregateContent({ event, data: data });
48
- },
49
- },
50
- [GraphEvents.TOOL_START]: {
51
- handle: (_event, data, metadata) => {
52
- const toolData = data;
53
- if (toolData?.name?.startsWith(Constants.LC_TRANSFER_TO_)) {
54
- const specialist = toolData.name.replace(Constants.LC_TRANSFER_TO_, '');
55
- console.log(`\n🔀 Transferring to ${specialist}...`);
56
- selectedRole = specialist;
57
- }
58
- },
59
- },
60
- };
61
- // Function to create the graph with a single edge to multiple specialists
62
- function createSupervisorGraphWithListEdge() {
63
- console.log(`\nCreating graph with supervisor and 5 specialist agents.`);
64
- console.log('Using a SINGLE edge with multiple destinations (list-based handoff).\n');
65
- // Define the adaptive specialist configuration that will be reused
66
- const specialistConfig = {
67
- provider: Providers.ANTHROPIC,
68
- clientOptions: {
69
- modelName: 'claude-haiku-4-5',
70
- apiKey: process.env.ANTHROPIC_API_KEY,
71
- },
72
- instructions: `You are an Adaptive Specialist. Your agent ID indicates your role:
73
-
74
- - data_analyst: Focus on statistical analysis, metrics, ML evaluation, A/B testing
75
- - security_expert: Focus on cybersecurity, vulnerability assessment, compliance
76
- - product_designer: Focus on UX/UI design, user research, accessibility
77
- - devops_engineer: Focus on CI/CD, infrastructure, cloud platforms, monitoring
78
- - legal_advisor: Focus on licensing, privacy laws, contracts, regulatory compliance
79
-
80
- The supervisor will provide specific instructions. Follow them while maintaining your expert perspective.`,
81
- maxContextTokens: 8000,
82
- };
83
- // Create the graph with supervisor and all 5 specialists
84
- const agents = [
85
- {
86
- agentId: 'supervisor',
87
- provider: Providers.ANTHROPIC,
88
- clientOptions: {
89
- modelName: 'claude-haiku-4-5',
90
- apiKey: process.env.ANTHROPIC_API_KEY,
91
- },
92
- instructions: `You are a Task Supervisor with access to 5 specialist agents:
93
- 1. transfer_to_data_analyst - For statistical analysis and metrics
94
- 2. transfer_to_security_expert - For cybersecurity and vulnerability assessment
95
- 3. transfer_to_product_designer - For UX/UI design
96
- 4. transfer_to_devops_engineer - For infrastructure and deployment
97
- 5. transfer_to_legal_advisor - For compliance and licensing
98
-
99
- Your role is to:
100
- 1. Analyze the incoming request
101
- 2. Decide which specialist is best suited
102
- 3. Use the appropriate transfer tool (e.g., transfer_to_data_analyst)
103
- 4. Provide specific instructions to guide their work
104
-
105
- Be specific about what you need from the specialist.`,
106
- maxContextTokens: 8000,
107
- },
108
- // Include all 5 specialists with the same adaptive configuration
109
- {
110
- agentId: 'data_analyst',
111
- ...specialistConfig,
112
- },
113
- {
114
- agentId: 'security_expert',
115
- ...specialistConfig,
116
- },
117
- {
118
- agentId: 'product_designer',
119
- ...specialistConfig,
120
- },
121
- {
122
- agentId: 'devops_engineer',
123
- ...specialistConfig,
124
- },
125
- {
126
- agentId: 'legal_advisor',
127
- ...specialistConfig,
128
- },
129
- ];
130
- // Create a SINGLE edge from supervisor to ALL 5 specialists using a list
131
- const edges = [
132
- {
133
- from: 'supervisor',
134
- to: [
135
- 'data_analyst',
136
- 'security_expert',
137
- 'product_designer',
138
- 'devops_engineer',
139
- 'legal_advisor',
140
- ],
141
- description: 'Transfer to appropriate specialist based on task requirements',
142
- edgeType: 'transfer',
143
- },
144
- ];
145
- return {
146
- runId: `supervisor-list-handoff-${Date.now()}`,
147
- graphConfig: {
148
- type: 'multi-agent',
149
- agents,
150
- edges,
151
- },
152
- customHandlers,
153
- returnContent: true,
154
- skipCleanup: true,
155
- };
156
- }
157
- try {
158
- // Test with different queries
159
- const testQueries = [
160
- 'What are the legal implications of using GPL-licensed code in our product?',
161
- ];
162
- const config = {
163
- configurable: {
164
- thread_id: 'supervisor-list-handoff-1',
165
- },
166
- streamMode: 'values',
167
- version: 'v2',
168
- };
169
- for (const query of testQueries) {
170
- console.log(`\n${'='.repeat(80)}`);
171
- console.log(`FIRST RUN - USER QUERY: "${query}"`);
172
- console.log('='.repeat(80));
173
- // Reset conversation
174
- conversationHistory.length = 0;
175
- conversationHistory.push(new HumanMessage(query));
176
- // Create graph with supervisor having a single edge to multiple specialists
177
- const runConfig = createSupervisorGraphWithListEdge();
178
- const run = await Run.create(runConfig);
179
- console.log('Processing first request...');
180
- // Process with streaming
181
- const inputs = {
182
- messages: conversationHistory,
183
- };
184
- const finalContentParts = await run.processStream(inputs, config);
185
- const finalMessages = run.getRunMessages();
186
- if (finalMessages) {
187
- conversationHistory.push(...finalMessages);
188
- }
189
- // Demo: Map contentParts to agentIds
190
- console.log(`\n${'─'.repeat(60)}`);
191
- console.log('CONTENT PARTS TO AGENT MAPPING:');
192
- console.log('─'.repeat(60));
193
- if (run.Graph) {
194
- // Get the mapping of contentPart index to agentId
195
- const contentPartAgentMap = run.Graph.getContentPartAgentMap();
196
- console.log(`\nTotal content parts: ${contentParts.length}`);
197
- console.log(`\nContent Part → Agent Mapping:`);
198
- contentPartAgentMap.forEach((agentId, index) => {
199
- const contentPart = contentParts[index];
200
- const contentType = contentPart?.type || 'unknown';
201
- const preview = contentType === 'text'
202
- ? contentPart.text?.slice(0, 50) || ''
203
- : contentType === 'tool_call'
204
- ? `Tool: ${contentPart.tool_call?.name || 'unknown'}`
205
- : contentType;
206
- console.log(` [${index}] ${agentId} → ${contentType}: ${preview}${preview.length >= 50 ? '...' : ''}`);
207
- });
208
- // Show agent participation summary
209
- console.log(`\n${'─'.repeat(60)}`);
210
- console.log('AGENT PARTICIPATION SUMMARY:');
211
- console.log('─'.repeat(60));
212
- const activeAgents = run.Graph.getActiveAgentIds();
213
- console.log(`\nActive agents (${activeAgents.length}):`, activeAgents);
214
- const stepsByAgent = run.Graph.getRunStepsByAgent();
215
- stepsByAgent.forEach((steps, agentId) => {
216
- const toolCallSteps = steps.filter((s) => s.type === StepTypes.TOOL_CALLS).length;
217
- const messageSteps = steps.filter((s) => s.type === StepTypes.MESSAGE_CREATION).length;
218
- console.log(`\n ${agentId}:`);
219
- console.log(` - Total steps: ${steps.length}`);
220
- console.log(` - Message steps: ${messageSteps}`);
221
- console.log(` - Tool call steps: ${toolCallSteps}`);
222
- });
223
- }
224
- // Show graph structure summary
225
- console.log(`\n${'─'.repeat(60)}`);
226
- console.log(`GRAPH STRUCTURE:`);
227
- console.log(`- Agents: 6 total (supervisor + 5 specialists)`);
228
- console.log(`- Edges: 1 edge with multiple destinations`);
229
- console.log(`- Edge type: handoff (creates individual tools for each destination)`);
230
- console.log(`- Result: Supervisor has 5 handoff tools from a single edge`);
231
- console.log('─'.repeat(60));
232
- // =============================================================
233
- // SECOND RUN: Demonstrate agent-labeled history
234
- // =============================================================
235
- console.log(`\n${'='.repeat(80)}`);
236
- console.log(`SECOND RUN - Simulating DB Load with Agent-Labeled History`);
237
- console.log('='.repeat(80));
238
- // Simulate what happens in the main app:
239
- // 1. Store contentParts + agentIdMap to "DB" (in-memory here)
240
- const dbStoredContentParts = [...contentParts];
241
- const dbStoredAgentIdMap = Object.fromEntries(run.Graph.getContentPartAgentMap());
242
- console.log('\n📦 Simulating DB storage:');
243
- console.log(` - Stored ${dbStoredContentParts.length} content parts`);
244
- console.log(` - Stored agent mappings for ${Object.keys(dbStoredAgentIdMap).length} parts`);
245
- // 2. On next run, load from "DB" and label by agent
246
- console.log('\n📥 Loading from DB and labeling by agent...');
247
- const agentNames = {
248
- supervisor: 'Supervisor',
249
- legal_advisor: 'Legal Advisor',
250
- data_analyst: 'Data Analyst',
251
- security_expert: 'Security Expert',
252
- product_designer: 'Product Designer',
253
- devops_engineer: 'DevOps Engineer',
254
- };
255
- const labeledContentParts = labelContentByAgent(dbStoredContentParts.filter((p) => p != null), dbStoredAgentIdMap, agentNames);
256
- console.log(` - Labeled ${labeledContentParts.length} content parts by agent`);
257
- // 3. Convert labeled content parts to payload format
258
- const payload = [
259
- {
260
- role: 'user',
261
- content: query,
262
- },
263
- {
264
- role: 'assistant',
265
- content: labeledContentParts,
266
- },
267
- ];
268
- // 4. Format using formatAgentMessages (simulates what main app does)
269
- console.log('\n🔧 Calling formatAgentMessages...');
270
- const { messages: formattedMessages } = formatAgentMessages(payload);
271
- console.log(` - Formatted into ${formattedMessages.length} BaseMessages`);
272
- // Show a preview of what the supervisor will see
273
- console.log('\n👁️ Preview of formatted history for supervisor:');
274
- console.log('─'.repeat(80));
275
- for (let i = 0; i < formattedMessages.length; i++) {
276
- const msg = formattedMessages[i];
277
- const role = msg._getType();
278
- const preview = typeof msg.content === 'string'
279
- ? msg.content.slice(0, 200)
280
- : JSON.stringify(msg.content).slice(0, 200);
281
- console.log(`[${i}] ${role}: ${preview}${preview.length >= 200 ? '...' : ''}`);
282
- }
283
- console.log('─'.repeat(80));
284
- // 5. Create a new run with the formatted history + a followup question
285
- console.log('\n🚀 Starting second run with agent-labeled history + followup question...');
286
- const followupQuery = 'Can you summarize the key legal points from your previous response?';
287
- console.log(` Followup: "${followupQuery}"`);
288
- // Reset for second run
289
- const secondRunHistory = [
290
- ...formattedMessages,
291
- new HumanMessage(followupQuery),
292
- ];
293
- const runConfig2 = createSupervisorGraphWithListEdge();
294
- const run2 = await Run.create(runConfig2);
295
- const inputs2 = {
296
- messages: secondRunHistory,
297
- };
298
- await run2.processStream(inputs2, config);
299
- console.log('\n✅ Second run completed successfully!');
300
- console.log(' The supervisor correctly understood that the legal_advisor handled');
301
- console.log(' the previous query, avoiding identity confusion.');
302
- }
303
- // Final summary
304
- console.log(`\n${'='.repeat(60)}`);
305
- console.log('TEST COMPLETE');
306
- console.log('='.repeat(60));
307
- console.log('\nThis test demonstrates that a single edge with multiple');
308
- console.log('destinations in the "to" field creates individual handoff');
309
- console.log('tools for each destination agent, achieving the same result');
310
- console.log('as creating separate edges for each specialist.');
311
- }
312
- catch (error) {
313
- console.error('Error in supervisor list handoff test:', error);
314
- }
315
- }
316
- // Run the test
317
- testSupervisorListHandoff();
318
- //# sourceMappingURL=test-multi-agent-list-handoff.js.map