@illuma-ai/agents 1.1.21 → 1.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/dist/cjs/graphs/Graph.cjs +12 -1
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/graphs/MultiAgentGraph.cjs +85 -1
  4. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  5. package/dist/cjs/run.cjs +20 -9
  6. package/dist/cjs/run.cjs.map +1 -1
  7. package/dist/esm/graphs/Graph.mjs +12 -1
  8. package/dist/esm/graphs/Graph.mjs.map +1 -1
  9. package/dist/esm/graphs/MultiAgentGraph.mjs +85 -1
  10. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  11. package/dist/esm/run.mjs +20 -9
  12. package/dist/esm/run.mjs.map +1 -1
  13. package/dist/types/graphs/MultiAgentGraph.d.ts +17 -0
  14. package/package.json +1 -1
  15. package/src/graphs/Graph.ts +12 -1
  16. package/src/graphs/MultiAgentGraph.ts +105 -1
  17. package/src/graphs/__tests__/multi-agent-delegate.test.ts +191 -0
  18. package/src/run.ts +20 -11
  19. package/src/scripts/test-bedrock-handoff-autonomous.ts +231 -0
  20. package/src/agents/AgentContext.js +0 -782
  21. package/src/agents/AgentContext.test.js +0 -421
  22. package/src/agents/__tests__/AgentContext.test.js +0 -678
  23. package/src/agents/__tests__/resolveStructuredOutputMode.test.js +0 -117
  24. package/src/common/enum.js +0 -192
  25. package/src/common/index.js +0 -3
  26. package/src/events.js +0 -166
  27. package/src/graphs/Graph.js +0 -1857
  28. package/src/graphs/MultiAgentGraph.js +0 -1092
  29. package/src/graphs/__tests__/structured-output.integration.test.js +0 -624
  30. package/src/graphs/__tests__/structured-output.test.js +0 -144
  31. package/src/graphs/contextManagement.e2e.test.js +0 -718
  32. package/src/graphs/contextManagement.test.js +0 -485
  33. package/src/graphs/handoffValidation.test.js +0 -276
  34. package/src/graphs/index.js +0 -3
  35. package/src/index.js +0 -28
  36. package/src/instrumentation.js +0 -21
  37. package/src/llm/anthropic/index.js +0 -319
  38. package/src/llm/anthropic/types.js +0 -46
  39. package/src/llm/anthropic/utils/message_inputs.js +0 -627
  40. package/src/llm/anthropic/utils/message_outputs.js +0 -290
  41. package/src/llm/anthropic/utils/output_parsers.js +0 -89
  42. package/src/llm/anthropic/utils/tools.js +0 -25
  43. package/src/llm/bedrock/__tests__/bedrock-caching.test.js +0 -392
  44. package/src/llm/bedrock/index.js +0 -303
  45. package/src/llm/bedrock/types.js +0 -2
  46. package/src/llm/bedrock/utils/index.js +0 -6
  47. package/src/llm/bedrock/utils/message_inputs.js +0 -463
  48. package/src/llm/bedrock/utils/message_outputs.js +0 -269
  49. package/src/llm/fake.js +0 -92
  50. package/src/llm/google/index.js +0 -215
  51. package/src/llm/google/types.js +0 -12
  52. package/src/llm/google/utils/common.js +0 -670
  53. package/src/llm/google/utils/tools.js +0 -111
  54. package/src/llm/google/utils/zod_to_genai_parameters.js +0 -47
  55. package/src/llm/openai/index.js +0 -1033
  56. package/src/llm/openai/types.js +0 -2
  57. package/src/llm/openai/utils/index.js +0 -756
  58. package/src/llm/openai/utils/isReasoningModel.test.js +0 -79
  59. package/src/llm/openrouter/index.js +0 -261
  60. package/src/llm/openrouter/reasoning.test.js +0 -181
  61. package/src/llm/providers.js +0 -36
  62. package/src/llm/text.js +0 -65
  63. package/src/llm/vertexai/index.js +0 -402
  64. package/src/messages/__tests__/tools.test.js +0 -392
  65. package/src/messages/cache.js +0 -404
  66. package/src/messages/cache.test.js +0 -1167
  67. package/src/messages/content.js +0 -48
  68. package/src/messages/content.test.js +0 -314
  69. package/src/messages/core.js +0 -359
  70. package/src/messages/ensureThinkingBlock.test.js +0 -997
  71. package/src/messages/format.js +0 -973
  72. package/src/messages/formatAgentMessages.test.js +0 -2278
  73. package/src/messages/formatAgentMessages.tools.test.js +0 -362
  74. package/src/messages/formatMessage.test.js +0 -608
  75. package/src/messages/ids.js +0 -18
  76. package/src/messages/index.js +0 -9
  77. package/src/messages/labelContentByAgent.test.js +0 -725
  78. package/src/messages/prune.js +0 -438
  79. package/src/messages/reducer.js +0 -60
  80. package/src/messages/shiftIndexTokenCountMap.test.js +0 -63
  81. package/src/messages/summarize.js +0 -146
  82. package/src/messages/summarize.test.js +0 -332
  83. package/src/messages/tools.js +0 -90
  84. package/src/mockStream.js +0 -81
  85. package/src/prompts/collab.js +0 -7
  86. package/src/prompts/index.js +0 -3
  87. package/src/prompts/taskmanager.js +0 -58
  88. package/src/run.js +0 -427
  89. package/src/schemas/index.js +0 -3
  90. package/src/schemas/schema-preparation.test.js +0 -370
  91. package/src/schemas/validate.js +0 -314
  92. package/src/schemas/validate.test.js +0 -264
  93. package/src/scripts/abort.js +0 -127
  94. package/src/scripts/ant_web_search.js +0 -130
  95. package/src/scripts/ant_web_search_edge_case.js +0 -133
  96. package/src/scripts/ant_web_search_error_edge_case.js +0 -119
  97. package/src/scripts/args.js +0 -41
  98. package/src/scripts/bedrock-cache-debug.js +0 -186
  99. package/src/scripts/bedrock-content-aggregation-test.js +0 -195
  100. package/src/scripts/bedrock-merge-test.js +0 -80
  101. package/src/scripts/bedrock-parallel-tools-test.js +0 -150
  102. package/src/scripts/caching.js +0 -106
  103. package/src/scripts/cli.js +0 -152
  104. package/src/scripts/cli2.js +0 -119
  105. package/src/scripts/cli3.js +0 -163
  106. package/src/scripts/cli4.js +0 -165
  107. package/src/scripts/cli5.js +0 -165
  108. package/src/scripts/code_exec.js +0 -171
  109. package/src/scripts/code_exec_files.js +0 -180
  110. package/src/scripts/code_exec_multi_session.js +0 -185
  111. package/src/scripts/code_exec_ptc.js +0 -265
  112. package/src/scripts/code_exec_session.js +0 -217
  113. package/src/scripts/code_exec_simple.js +0 -120
  114. package/src/scripts/content.js +0 -111
  115. package/src/scripts/empty_input.js +0 -125
  116. package/src/scripts/handoff-test.js +0 -96
  117. package/src/scripts/image.js +0 -138
  118. package/src/scripts/memory.js +0 -83
  119. package/src/scripts/multi-agent-chain.js +0 -271
  120. package/src/scripts/multi-agent-conditional.js +0 -185
  121. package/src/scripts/multi-agent-document-review-chain.js +0 -171
  122. package/src/scripts/multi-agent-hybrid-flow.js +0 -264
  123. package/src/scripts/multi-agent-parallel-start.js +0 -214
  124. package/src/scripts/multi-agent-parallel.js +0 -346
  125. package/src/scripts/multi-agent-sequence.js +0 -184
  126. package/src/scripts/multi-agent-supervisor.js +0 -324
  127. package/src/scripts/multi-agent-test.js +0 -147
  128. package/src/scripts/parallel-asymmetric-tools-test.js +0 -202
  129. package/src/scripts/parallel-full-metadata-test.js +0 -176
  130. package/src/scripts/parallel-tools-test.js +0 -256
  131. package/src/scripts/programmatic_exec.js +0 -277
  132. package/src/scripts/programmatic_exec_agent.js +0 -168
  133. package/src/scripts/search.js +0 -118
  134. package/src/scripts/sequential-full-metadata-test.js +0 -143
  135. package/src/scripts/simple.js +0 -174
  136. package/src/scripts/single-agent-metadata-test.js +0 -152
  137. package/src/scripts/stream.js +0 -113
  138. package/src/scripts/test-custom-prompt-key.js +0 -132
  139. package/src/scripts/test-handoff-input.js +0 -143
  140. package/src/scripts/test-handoff-preamble.js +0 -227
  141. package/src/scripts/test-handoff-steering.js +0 -353
  142. package/src/scripts/test-multi-agent-list-handoff.js +0 -318
  143. package/src/scripts/test-parallel-agent-labeling.js +0 -253
  144. package/src/scripts/test-parallel-handoffs.js +0 -229
  145. package/src/scripts/test-thinking-handoff-bedrock.js +0 -132
  146. package/src/scripts/test-thinking-handoff.js +0 -132
  147. package/src/scripts/test-thinking-to-thinking-handoff-bedrock.js +0 -140
  148. package/src/scripts/test-tool-before-handoff-role-order.js +0 -223
  149. package/src/scripts/test-tools-before-handoff.js +0 -187
  150. package/src/scripts/test_code_api.js +0 -263
  151. package/src/scripts/thinking-bedrock.js +0 -128
  152. package/src/scripts/thinking-vertexai.js +0 -130
  153. package/src/scripts/thinking.js +0 -134
  154. package/src/scripts/tool_search.js +0 -114
  155. package/src/scripts/tools.js +0 -125
  156. package/src/specs/agent-handoffs-bedrock.integration.test.js +0 -280
  157. package/src/specs/agent-handoffs.test.js +0 -924
  158. package/src/specs/anthropic.simple.test.js +0 -287
  159. package/src/specs/azure.simple.test.js +0 -381
  160. package/src/specs/cache.simple.test.js +0 -282
  161. package/src/specs/custom-event-await.test.js +0 -148
  162. package/src/specs/deepseek.simple.test.js +0 -189
  163. package/src/specs/emergency-prune.test.js +0 -308
  164. package/src/specs/moonshot.simple.test.js +0 -237
  165. package/src/specs/observability.integration.test.js +0 -1337
  166. package/src/specs/openai.simple.test.js +0 -233
  167. package/src/specs/openrouter.simple.test.js +0 -202
  168. package/src/specs/prune.test.js +0 -733
  169. package/src/specs/reasoning.test.js +0 -144
  170. package/src/specs/spec.utils.js +0 -4
  171. package/src/specs/thinking-handoff.test.js +0 -486
  172. package/src/specs/thinking-prune.test.js +0 -600
  173. package/src/specs/token-distribution-edge-case.test.js +0 -246
  174. package/src/specs/token-memoization.test.js +0 -32
  175. package/src/specs/tokens.test.js +0 -49
  176. package/src/specs/tool-error.test.js +0 -139
  177. package/src/splitStream.js +0 -204
  178. package/src/splitStream.test.js +0 -504
  179. package/src/stream.js +0 -650
  180. package/src/stream.test.js +0 -225
  181. package/src/test/mockTools.js +0 -340
  182. package/src/tools/BrowserTools.js +0 -245
  183. package/src/tools/Calculator.js +0 -38
  184. package/src/tools/Calculator.test.js +0 -225
  185. package/src/tools/CodeExecutor.js +0 -233
  186. package/src/tools/ProgrammaticToolCalling.js +0 -602
  187. package/src/tools/StreamingToolCallBuffer.js +0 -179
  188. package/src/tools/ToolNode.js +0 -930
  189. package/src/tools/ToolSearch.js +0 -904
  190. package/src/tools/__tests__/BrowserTools.test.js +0 -306
  191. package/src/tools/__tests__/ProgrammaticToolCalling.integration.test.js +0 -276
  192. package/src/tools/__tests__/ProgrammaticToolCalling.test.js +0 -807
  193. package/src/tools/__tests__/StreamingToolCallBuffer.test.js +0 -175
  194. package/src/tools/__tests__/ToolApproval.test.js +0 -675
  195. package/src/tools/__tests__/ToolNode.recovery.test.js +0 -200
  196. package/src/tools/__tests__/ToolNode.session.test.js +0 -319
  197. package/src/tools/__tests__/ToolSearch.integration.test.js +0 -125
  198. package/src/tools/__tests__/ToolSearch.test.js +0 -812
  199. package/src/tools/__tests__/handlers.test.js +0 -799
  200. package/src/tools/__tests__/truncation-recovery.integration.test.js +0 -362
  201. package/src/tools/handlers.js +0 -306
  202. package/src/tools/schema.js +0 -25
  203. package/src/tools/search/anthropic.js +0 -34
  204. package/src/tools/search/content.js +0 -116
  205. package/src/tools/search/content.test.js +0 -133
  206. package/src/tools/search/firecrawl.js +0 -173
  207. package/src/tools/search/format.js +0 -198
  208. package/src/tools/search/highlights.js +0 -241
  209. package/src/tools/search/index.js +0 -3
  210. package/src/tools/search/jina-reranker.test.js +0 -106
  211. package/src/tools/search/rerankers.js +0 -165
  212. package/src/tools/search/schema.js +0 -102
  213. package/src/tools/search/search.js +0 -561
  214. package/src/tools/search/serper-scraper.js +0 -126
  215. package/src/tools/search/test.js +0 -129
  216. package/src/tools/search/tool.js +0 -453
  217. package/src/tools/search/types.js +0 -2
  218. package/src/tools/search/utils.js +0 -59
  219. package/src/types/graph.js +0 -24
  220. package/src/types/graph.test.js +0 -192
  221. package/src/types/index.js +0 -7
  222. package/src/types/llm.js +0 -2
  223. package/src/types/messages.js +0 -2
  224. package/src/types/run.js +0 -2
  225. package/src/types/stream.js +0 -2
  226. package/src/types/tools.js +0 -2
  227. package/src/utils/contextAnalytics.js +0 -79
  228. package/src/utils/contextAnalytics.test.js +0 -166
  229. package/src/utils/events.js +0 -26
  230. package/src/utils/graph.js +0 -11
  231. package/src/utils/handlers.js +0 -65
  232. package/src/utils/index.js +0 -10
  233. package/src/utils/llm.js +0 -21
  234. package/src/utils/llmConfig.js +0 -205
  235. package/src/utils/logging.js +0 -37
  236. package/src/utils/misc.js +0 -51
  237. package/src/utils/run.js +0 -69
  238. package/src/utils/schema.js +0 -21
  239. package/src/utils/title.js +0 -119
  240. package/src/utils/tokens.js +0 -92
  241. package/src/utils/toonFormat.js +0 -379
@@ -1,353 +0,0 @@
1
- import { config } from 'dotenv';
2
- config();
3
- import { HumanMessage } from '@langchain/core/messages';
4
- import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
5
- import { ToolEndHandler, ModelEndHandler } from '@/events';
6
- import { GraphEvents, Providers } from '@/common';
7
- import { Run } from '@/run';
8
- /**
9
- * Test LLM steering quality after handoff with system prompt instructions.
10
- *
11
- * Validates that the receiving agent clearly understands:
12
- * 1. WHO it is (its role/identity)
13
- * 2. WHAT the task is (instructions from the handoff)
14
- * 3. WHO transferred control (source agent context)
15
- *
16
- * Uses specific, verifiable instructions so we can check the output.
17
- */
18
- async function testHandoffSteering() {
19
- console.log('='.repeat(60));
20
- console.log('Test: Handoff Steering Quality (System Prompt Instructions)');
21
- console.log('='.repeat(60));
22
- const { contentParts, aggregateContent } = createContentAggregator();
23
- let currentAgent = '';
24
- const agentResponses = {};
25
- const customHandlers = {
26
- [GraphEvents.TOOL_END]: new ToolEndHandler(),
27
- [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
28
- [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
29
- [GraphEvents.ON_RUN_STEP]: {
30
- handle: (event, data) => {
31
- const runStep = data;
32
- if (runStep.agentId) {
33
- currentAgent = runStep.agentId;
34
- console.log(`\n[Agent: ${currentAgent}] Processing...`);
35
- }
36
- aggregateContent({ event, data: runStep });
37
- },
38
- },
39
- [GraphEvents.ON_RUN_STEP_COMPLETED]: {
40
- handle: (event, data) => {
41
- aggregateContent({
42
- event,
43
- data: data,
44
- });
45
- },
46
- },
47
- [GraphEvents.ON_MESSAGE_DELTA]: {
48
- handle: (event, data) => {
49
- aggregateContent({ event, data: data });
50
- },
51
- },
52
- [GraphEvents.TOOL_START]: {
53
- handle: (_event, data, _metadata) => {
54
- const toolData = data;
55
- if (toolData?.name?.includes('transfer_to_')) {
56
- const specialist = toolData.name.replace('lc_transfer_to_', '');
57
- console.log(`\n >> Handoff to: ${specialist}`);
58
- }
59
- },
60
- },
61
- };
62
- /**
63
- * Test 1: Basic handoff with specific task instructions
64
- * The specialist should clearly follow the coordinator's instructions.
65
- */
66
- async function test1_basicInstructions() {
67
- console.log('\n' + '-'.repeat(60));
68
- console.log('TEST 1: Basic handoff with specific task instructions');
69
- console.log('-'.repeat(60));
70
- const agents = [
71
- {
72
- agentId: 'coordinator',
73
- provider: Providers.OPENAI,
74
- clientOptions: {
75
- modelName: 'gpt-4.1-mini',
76
- apiKey: process.env.OPENAI_API_KEY,
77
- },
78
- instructions: `You are a Task Coordinator. When a user makes a request:
79
- 1. Analyze what they need
80
- 2. Transfer to the specialist with SPECIFIC instructions about what to do
81
-
82
- IMPORTANT: Always use the transfer tool. Do not try to do the work yourself.`,
83
- maxContextTokens: 8000,
84
- },
85
- {
86
- agentId: 'specialist',
87
- provider: Providers.OPENAI,
88
- clientOptions: {
89
- modelName: 'gpt-4.1-mini',
90
- apiKey: process.env.OPENAI_API_KEY,
91
- },
92
- instructions: `You are a Technical Specialist. You provide detailed technical responses.
93
- When you receive a task, execute it thoroughly. Always identify yourself as the Technical Specialist in your response.`,
94
- maxContextTokens: 8000,
95
- },
96
- ];
97
- const edges = [
98
- {
99
- from: 'coordinator',
100
- to: 'specialist',
101
- edgeType: 'transfer',
102
- description: 'Transfer to specialist for detailed work',
103
- prompt: 'Provide specific instructions for the specialist about what to analyze or create',
104
- promptKey: 'instructions',
105
- },
106
- ];
107
- const run = await Run.create({
108
- runId: `steering-test1-${Date.now()}`,
109
- graphConfig: { type: 'multi-agent', agents, edges },
110
- customHandlers,
111
- returnContent: true,
112
- skipCleanup: true,
113
- });
114
- const streamConfig = {
115
- configurable: { thread_id: 'steering-test1' },
116
- streamMode: 'values',
117
- version: 'v2',
118
- };
119
- const query = 'Explain the difference between TCP and UDP. I need exactly 3 bullet points for each protocol.';
120
- console.log(`\nQuery: "${query}"\n`);
121
- const messages = [new HumanMessage(query)];
122
- await run.processStream({ messages }, streamConfig);
123
- const finalMessages = run.getRunMessages();
124
- console.log('\n--- Specialist Response ---');
125
- if (finalMessages) {
126
- for (const msg of finalMessages) {
127
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
128
- console.log(msg.content);
129
- agentResponses['test1'] = msg.content;
130
- }
131
- }
132
- }
133
- // Check steering quality
134
- const response = agentResponses['test1'] || '';
135
- const mentionsSpecialist = response.toLowerCase().includes('specialist') ||
136
- response.toLowerCase().includes('technical');
137
- const hasBulletPoints = (response.match(/[-•*]\s/g) || []).length >= 4 ||
138
- (response.match(/\d\./g) || []).length >= 4;
139
- const mentionsTCP = response.toLowerCase().includes('tcp');
140
- const mentionsUDP = response.toLowerCase().includes('udp');
141
- console.log('\n--- Steering Checks ---');
142
- console.log(` Identifies as specialist: ${mentionsSpecialist ? 'YES' : 'NO'}`);
143
- console.log(` Has bullet points: ${hasBulletPoints ? 'YES' : 'NO'}`);
144
- console.log(` Covers TCP: ${mentionsTCP ? 'YES' : 'NO'}`);
145
- console.log(` Covers UDP: ${mentionsUDP ? 'YES' : 'NO'}`);
146
- }
147
- /**
148
- * Test 2: Handoff with very specific formatting instructions
149
- * Tests whether the receiving agent follows precise instructions from the handoff.
150
- */
151
- async function test2_preciseFormatting() {
152
- console.log('\n' + '-'.repeat(60));
153
- console.log('TEST 2: Handoff with precise formatting instructions');
154
- console.log('-'.repeat(60));
155
- const agents = [
156
- {
157
- agentId: 'manager',
158
- provider: Providers.OPENAI,
159
- clientOptions: {
160
- modelName: 'gpt-4.1-mini',
161
- apiKey: process.env.OPENAI_API_KEY,
162
- },
163
- instructions: `You are a Project Manager. When a user asks about a topic:
164
- 1. Transfer to the writer with VERY SPECIFIC formatting instructions
165
- 2. Tell the writer to start their response with "REPORT:" and end with "END REPORT"
166
- 3. Tell the writer to use exactly 2 paragraphs
167
-
168
- CRITICAL: Always transfer to the writer. Do NOT write the report yourself.`,
169
- maxContextTokens: 8000,
170
- },
171
- {
172
- agentId: 'writer',
173
- provider: Providers.OPENAI,
174
- clientOptions: {
175
- modelName: 'gpt-4.1-mini',
176
- apiKey: process.env.OPENAI_API_KEY,
177
- },
178
- instructions: `You are a Report Writer. Follow any formatting instructions you receive precisely.
179
- You must follow the exact format requested.`,
180
- maxContextTokens: 8000,
181
- },
182
- ];
183
- const edges = [
184
- {
185
- from: 'manager',
186
- to: 'writer',
187
- edgeType: 'transfer',
188
- description: 'Transfer to writer for report creation',
189
- prompt: 'Provide specific formatting and content instructions for the writer',
190
- promptKey: 'instructions',
191
- },
192
- ];
193
- const run = await Run.create({
194
- runId: `steering-test2-${Date.now()}`,
195
- graphConfig: { type: 'multi-agent', agents, edges },
196
- customHandlers,
197
- returnContent: true,
198
- skipCleanup: true,
199
- });
200
- const streamConfig = {
201
- configurable: { thread_id: 'steering-test2' },
202
- streamMode: 'values',
203
- version: 'v2',
204
- };
205
- const query = 'Write a brief report about cloud computing benefits.';
206
- console.log(`\nQuery: "${query}"\n`);
207
- const messages = [new HumanMessage(query)];
208
- await run.processStream({ messages }, streamConfig);
209
- const finalMessages = run.getRunMessages();
210
- console.log('\n--- Writer Response ---');
211
- if (finalMessages) {
212
- for (const msg of finalMessages) {
213
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
214
- console.log(msg.content);
215
- agentResponses['test2'] = msg.content;
216
- }
217
- }
218
- }
219
- // Check if the writer followed the manager's formatting instructions
220
- const response = agentResponses['test2'] || '';
221
- const startsWithReport = response.trimStart().startsWith('REPORT:');
222
- const endsWithEndReport = response.trimEnd().endsWith('END REPORT');
223
- const mentionsCloud = response.toLowerCase().includes('cloud');
224
- console.log('\n--- Steering Checks ---');
225
- console.log(` Starts with "REPORT:": ${startsWithReport ? 'YES' : 'NO'}`);
226
- console.log(` Ends with "END REPORT": ${endsWithEndReport ? 'YES' : 'NO'}`);
227
- console.log(` Covers cloud computing: ${mentionsCloud ? 'YES' : 'NO'}`);
228
- }
229
- /**
230
- * Test 3: Multi-turn after handoff
231
- * Tests that identity and context persist across turns.
232
- */
233
- async function test3_multiTurn() {
234
- console.log('\n' + '-'.repeat(60));
235
- console.log('TEST 3: Multi-turn conversation after handoff');
236
- console.log('-'.repeat(60));
237
- const agents = [
238
- {
239
- agentId: 'router',
240
- provider: Providers.OPENAI,
241
- clientOptions: {
242
- modelName: 'gpt-4.1-mini',
243
- apiKey: process.env.OPENAI_API_KEY,
244
- },
245
- instructions: `You are a Router. Transfer all requests to the chef.
246
- When transferring, tell the chef to respond ONLY about Italian cuisine.
247
- CRITICAL: Always transfer. Never answer directly.`,
248
- maxContextTokens: 8000,
249
- },
250
- {
251
- agentId: 'chef',
252
- provider: Providers.OPENAI,
253
- clientOptions: {
254
- modelName: 'gpt-4.1-mini',
255
- apiKey: process.env.OPENAI_API_KEY,
256
- },
257
- instructions: `You are Chef Marco, an Italian cuisine expert.
258
- Always introduce yourself as Chef Marco. Only discuss Italian food.
259
- If asked about non-Italian food, politely redirect to Italian alternatives.`,
260
- maxContextTokens: 8000,
261
- },
262
- ];
263
- const edges = [
264
- {
265
- from: 'router',
266
- to: 'chef',
267
- edgeType: 'transfer',
268
- description: 'Transfer to chef',
269
- prompt: 'Instructions for the chef about how to respond',
270
- promptKey: 'instructions',
271
- },
272
- ];
273
- const run = await Run.create({
274
- runId: `steering-test3-${Date.now()}`,
275
- graphConfig: { type: 'multi-agent', agents, edges },
276
- customHandlers,
277
- returnContent: true,
278
- skipCleanup: true,
279
- });
280
- const streamConfig = {
281
- configurable: { thread_id: 'steering-test3' },
282
- streamMode: 'values',
283
- version: 'v2',
284
- };
285
- const conversationHistory = [];
286
- // Turn 1
287
- const query1 = 'What is a good pasta recipe?';
288
- console.log(`\nTurn 1: "${query1}"\n`);
289
- conversationHistory.push(new HumanMessage(query1));
290
- await run.processStream({ messages: conversationHistory }, streamConfig);
291
- const turn1Messages = run.getRunMessages();
292
- if (turn1Messages) {
293
- conversationHistory.push(...turn1Messages);
294
- for (const msg of turn1Messages) {
295
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
296
- console.log(msg.content.substring(0, 300) + '...');
297
- agentResponses['test3_turn1'] = msg.content;
298
- }
299
- }
300
- }
301
- // Turn 2 - follow up
302
- const query2 = 'What about sushi instead?';
303
- console.log(`\nTurn 2: "${query2}"\n`);
304
- conversationHistory.push(new HumanMessage(query2));
305
- await run.processStream({ messages: conversationHistory }, streamConfig);
306
- const turn2Messages = run.getRunMessages();
307
- if (turn2Messages) {
308
- conversationHistory.push(...turn2Messages);
309
- for (const msg of turn2Messages) {
310
- if (msg.getType() === 'ai' && typeof msg.content === 'string') {
311
- console.log(msg.content.substring(0, 300) + '...');
312
- agentResponses['test3_turn2'] = msg.content;
313
- }
314
- }
315
- }
316
- const response1 = agentResponses['test3_turn1'] || '';
317
- const response2 = agentResponses['test3_turn2'] || '';
318
- const t1Identity = response1.toLowerCase().includes('marco') ||
319
- response1.toLowerCase().includes('chef');
320
- const t1Italian = response1.toLowerCase().includes('italian') ||
321
- response1.toLowerCase().includes('pasta');
322
- const t2Redirects = response2.toLowerCase().includes('italian') ||
323
- response2.toLowerCase().includes('instead');
324
- console.log('\n--- Steering Checks ---');
325
- console.log(` Turn 1 - Chef identity: ${t1Identity ? 'YES' : 'NO'}`);
326
- console.log(` Turn 1 - Italian focus: ${t1Italian ? 'YES' : 'NO'}`);
327
- console.log(` Turn 2 - Redirects to Italian: ${t2Redirects ? 'YES' : 'NO'}`);
328
- }
329
- try {
330
- await test1_basicInstructions();
331
- await test2_preciseFormatting();
332
- await test3_multiTurn();
333
- console.log('\n\n' + '='.repeat(60));
334
- console.log('ALL TESTS COMPLETE');
335
- console.log('='.repeat(60));
336
- console.log('\nReview the steering checks above.');
337
- console.log('If the receiving agents consistently follow instructions and maintain identity,');
338
- console.log('the system prompt injection approach is working correctly.');
339
- }
340
- catch (error) {
341
- console.error('\nTest failed:', error);
342
- process.exit(1);
343
- }
344
- }
345
- process.on('unhandledRejection', (reason) => {
346
- console.error('Unhandled Rejection:', reason);
347
- process.exit(1);
348
- });
349
- testHandoffSteering().catch((err) => {
350
- console.error('Test failed:', err);
351
- process.exit(1);
352
- });
353
- //# sourceMappingURL=test-handoff-steering.js.map
@@ -1,318 +0,0 @@
1
- #!/usr/bin/env bun
2
- import { config } from 'dotenv';
3
- config();
4
- import { HumanMessage } from '@langchain/core/messages';
5
- import { labelContentByAgent, formatAgentMessages } from '@/messages/format';
6
- import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
7
- import { Providers, GraphEvents, Constants, StepTypes } from '@/common';
8
- import { ToolEndHandler, ModelEndHandler } from '@/events';
9
- import { Run } from '@/run';
10
- const conversationHistory = [];
11
- /**
12
- * Test supervisor-based multi-agent system using a single edge with multiple destinations
13
- *
14
- * Instead of creating 5 separate edges, we use one edge with an array of destinations
15
- * This should create handoff tools for all 5 specialists from a single edge definition
16
- */
17
- async function testSupervisorListHandoff() {
18
- console.log('Testing Supervisor with List-Based Handoff Edge...\n');
19
- // Set up content aggregator
20
- const { contentParts, aggregateContent } = createContentAggregator();
21
- // Track which specialist role was selected
22
- let selectedRole = '';
23
- // Create custom handlers
24
- const customHandlers = {
25
- [GraphEvents.TOOL_END]: new ToolEndHandler(),
26
- [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
27
- [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
28
- [GraphEvents.ON_RUN_STEP]: {
29
- handle: (event, data) => {
30
- const runStepData = data;
31
- if (runStepData?.name) {
32
- console.log(`\n[${runStepData.name}] Processing...`);
33
- }
34
- aggregateContent({ event, data: data });
35
- },
36
- },
37
- [GraphEvents.ON_RUN_STEP_COMPLETED]: {
38
- handle: (event, data) => {
39
- aggregateContent({
40
- event,
41
- data: data,
42
- });
43
- },
44
- },
45
- [GraphEvents.ON_MESSAGE_DELTA]: {
46
- handle: (event, data) => {
47
- aggregateContent({ event, data: data });
48
- },
49
- },
50
- [GraphEvents.TOOL_START]: {
51
- handle: (_event, data, metadata) => {
52
- const toolData = data;
53
- if (toolData?.name?.startsWith(Constants.LC_TRANSFER_TO_)) {
54
- const specialist = toolData.name.replace(Constants.LC_TRANSFER_TO_, '');
55
- console.log(`\n🔀 Transferring to ${specialist}...`);
56
- selectedRole = specialist;
57
- }
58
- },
59
- },
60
- };
61
- // Function to create the graph with a single edge to multiple specialists
62
- function createSupervisorGraphWithListEdge() {
63
- console.log(`\nCreating graph with supervisor and 5 specialist agents.`);
64
- console.log('Using a SINGLE edge with multiple destinations (list-based handoff).\n');
65
- // Define the adaptive specialist configuration that will be reused
66
- const specialistConfig = {
67
- provider: Providers.ANTHROPIC,
68
- clientOptions: {
69
- modelName: 'claude-haiku-4-5',
70
- apiKey: process.env.ANTHROPIC_API_KEY,
71
- },
72
- instructions: `You are an Adaptive Specialist. Your agent ID indicates your role:
73
-
74
- - data_analyst: Focus on statistical analysis, metrics, ML evaluation, A/B testing
75
- - security_expert: Focus on cybersecurity, vulnerability assessment, compliance
76
- - product_designer: Focus on UX/UI design, user research, accessibility
77
- - devops_engineer: Focus on CI/CD, infrastructure, cloud platforms, monitoring
78
- - legal_advisor: Focus on licensing, privacy laws, contracts, regulatory compliance
79
-
80
- The supervisor will provide specific instructions. Follow them while maintaining your expert perspective.`,
81
- maxContextTokens: 8000,
82
- };
83
- // Create the graph with supervisor and all 5 specialists
84
- const agents = [
85
- {
86
- agentId: 'supervisor',
87
- provider: Providers.ANTHROPIC,
88
- clientOptions: {
89
- modelName: 'claude-haiku-4-5',
90
- apiKey: process.env.ANTHROPIC_API_KEY,
91
- },
92
- instructions: `You are a Task Supervisor with access to 5 specialist agents:
93
- 1. transfer_to_data_analyst - For statistical analysis and metrics
94
- 2. transfer_to_security_expert - For cybersecurity and vulnerability assessment
95
- 3. transfer_to_product_designer - For UX/UI design
96
- 4. transfer_to_devops_engineer - For infrastructure and deployment
97
- 5. transfer_to_legal_advisor - For compliance and licensing
98
-
99
- Your role is to:
100
- 1. Analyze the incoming request
101
- 2. Decide which specialist is best suited
102
- 3. Use the appropriate transfer tool (e.g., transfer_to_data_analyst)
103
- 4. Provide specific instructions to guide their work
104
-
105
- Be specific about what you need from the specialist.`,
106
- maxContextTokens: 8000,
107
- },
108
- // Include all 5 specialists with the same adaptive configuration
109
- {
110
- agentId: 'data_analyst',
111
- ...specialistConfig,
112
- },
113
- {
114
- agentId: 'security_expert',
115
- ...specialistConfig,
116
- },
117
- {
118
- agentId: 'product_designer',
119
- ...specialistConfig,
120
- },
121
- {
122
- agentId: 'devops_engineer',
123
- ...specialistConfig,
124
- },
125
- {
126
- agentId: 'legal_advisor',
127
- ...specialistConfig,
128
- },
129
- ];
130
- // Create a SINGLE edge from supervisor to ALL 5 specialists using a list
131
- const edges = [
132
- {
133
- from: 'supervisor',
134
- to: [
135
- 'data_analyst',
136
- 'security_expert',
137
- 'product_designer',
138
- 'devops_engineer',
139
- 'legal_advisor',
140
- ],
141
- description: 'Transfer to appropriate specialist based on task requirements',
142
- edgeType: 'transfer',
143
- },
144
- ];
145
- return {
146
- runId: `supervisor-list-handoff-${Date.now()}`,
147
- graphConfig: {
148
- type: 'multi-agent',
149
- agents,
150
- edges,
151
- },
152
- customHandlers,
153
- returnContent: true,
154
- skipCleanup: true,
155
- };
156
- }
157
- try {
158
- // Test with different queries
159
- const testQueries = [
160
- 'What are the legal implications of using GPL-licensed code in our product?',
161
- ];
162
- const config = {
163
- configurable: {
164
- thread_id: 'supervisor-list-handoff-1',
165
- },
166
- streamMode: 'values',
167
- version: 'v2',
168
- };
169
- for (const query of testQueries) {
170
- console.log(`\n${'='.repeat(80)}`);
171
- console.log(`FIRST RUN - USER QUERY: "${query}"`);
172
- console.log('='.repeat(80));
173
- // Reset conversation
174
- conversationHistory.length = 0;
175
- conversationHistory.push(new HumanMessage(query));
176
- // Create graph with supervisor having a single edge to multiple specialists
177
- const runConfig = createSupervisorGraphWithListEdge();
178
- const run = await Run.create(runConfig);
179
- console.log('Processing first request...');
180
- // Process with streaming
181
- const inputs = {
182
- messages: conversationHistory,
183
- };
184
- const finalContentParts = await run.processStream(inputs, config);
185
- const finalMessages = run.getRunMessages();
186
- if (finalMessages) {
187
- conversationHistory.push(...finalMessages);
188
- }
189
- // Demo: Map contentParts to agentIds
190
- console.log(`\n${'─'.repeat(60)}`);
191
- console.log('CONTENT PARTS TO AGENT MAPPING:');
192
- console.log('─'.repeat(60));
193
- if (run.Graph) {
194
- // Get the mapping of contentPart index to agentId
195
- const contentPartAgentMap = run.Graph.getContentPartAgentMap();
196
- console.log(`\nTotal content parts: ${contentParts.length}`);
197
- console.log(`\nContent Part → Agent Mapping:`);
198
- contentPartAgentMap.forEach((agentId, index) => {
199
- const contentPart = contentParts[index];
200
- const contentType = contentPart?.type || 'unknown';
201
- const preview = contentType === 'text'
202
- ? contentPart.text?.slice(0, 50) || ''
203
- : contentType === 'tool_call'
204
- ? `Tool: ${contentPart.tool_call?.name || 'unknown'}`
205
- : contentType;
206
- console.log(` [${index}] ${agentId} → ${contentType}: ${preview}${preview.length >= 50 ? '...' : ''}`);
207
- });
208
- // Show agent participation summary
209
- console.log(`\n${'─'.repeat(60)}`);
210
- console.log('AGENT PARTICIPATION SUMMARY:');
211
- console.log('─'.repeat(60));
212
- const activeAgents = run.Graph.getActiveAgentIds();
213
- console.log(`\nActive agents (${activeAgents.length}):`, activeAgents);
214
- const stepsByAgent = run.Graph.getRunStepsByAgent();
215
- stepsByAgent.forEach((steps, agentId) => {
216
- const toolCallSteps = steps.filter((s) => s.type === StepTypes.TOOL_CALLS).length;
217
- const messageSteps = steps.filter((s) => s.type === StepTypes.MESSAGE_CREATION).length;
218
- console.log(`\n ${agentId}:`);
219
- console.log(` - Total steps: ${steps.length}`);
220
- console.log(` - Message steps: ${messageSteps}`);
221
- console.log(` - Tool call steps: ${toolCallSteps}`);
222
- });
223
- }
224
- // Show graph structure summary
225
- console.log(`\n${'─'.repeat(60)}`);
226
- console.log(`GRAPH STRUCTURE:`);
227
- console.log(`- Agents: 6 total (supervisor + 5 specialists)`);
228
- console.log(`- Edges: 1 edge with multiple destinations`);
229
- console.log(`- Edge type: handoff (creates individual tools for each destination)`);
230
- console.log(`- Result: Supervisor has 5 handoff tools from a single edge`);
231
- console.log('─'.repeat(60));
232
- // =============================================================
233
- // SECOND RUN: Demonstrate agent-labeled history
234
- // =============================================================
235
- console.log(`\n${'='.repeat(80)}`);
236
- console.log(`SECOND RUN - Simulating DB Load with Agent-Labeled History`);
237
- console.log('='.repeat(80));
238
- // Simulate what happens in the main app:
239
- // 1. Store contentParts + agentIdMap to "DB" (in-memory here)
240
- const dbStoredContentParts = [...contentParts];
241
- const dbStoredAgentIdMap = Object.fromEntries(run.Graph.getContentPartAgentMap());
242
- console.log('\n📦 Simulating DB storage:');
243
- console.log(` - Stored ${dbStoredContentParts.length} content parts`);
244
- console.log(` - Stored agent mappings for ${Object.keys(dbStoredAgentIdMap).length} parts`);
245
- // 2. On next run, load from "DB" and label by agent
246
- console.log('\n📥 Loading from DB and labeling by agent...');
247
- const agentNames = {
248
- supervisor: 'Supervisor',
249
- legal_advisor: 'Legal Advisor',
250
- data_analyst: 'Data Analyst',
251
- security_expert: 'Security Expert',
252
- product_designer: 'Product Designer',
253
- devops_engineer: 'DevOps Engineer',
254
- };
255
- const labeledContentParts = labelContentByAgent(dbStoredContentParts.filter((p) => p != null), dbStoredAgentIdMap, agentNames);
256
- console.log(` - Labeled ${labeledContentParts.length} content parts by agent`);
257
- // 3. Convert labeled content parts to payload format
258
- const payload = [
259
- {
260
- role: 'user',
261
- content: query,
262
- },
263
- {
264
- role: 'assistant',
265
- content: labeledContentParts,
266
- },
267
- ];
268
- // 4. Format using formatAgentMessages (simulates what main app does)
269
- console.log('\n🔧 Calling formatAgentMessages...');
270
- const { messages: formattedMessages } = formatAgentMessages(payload);
271
- console.log(` - Formatted into ${formattedMessages.length} BaseMessages`);
272
- // Show a preview of what the supervisor will see
273
- console.log('\n👁️ Preview of formatted history for supervisor:');
274
- console.log('─'.repeat(80));
275
- for (let i = 0; i < formattedMessages.length; i++) {
276
- const msg = formattedMessages[i];
277
- const role = msg._getType();
278
- const preview = typeof msg.content === 'string'
279
- ? msg.content.slice(0, 200)
280
- : JSON.stringify(msg.content).slice(0, 200);
281
- console.log(`[${i}] ${role}: ${preview}${preview.length >= 200 ? '...' : ''}`);
282
- }
283
- console.log('─'.repeat(80));
284
- // 5. Create a new run with the formatted history + a followup question
285
- console.log('\n🚀 Starting second run with agent-labeled history + followup question...');
286
- const followupQuery = 'Can you summarize the key legal points from your previous response?';
287
- console.log(` Followup: "${followupQuery}"`);
288
- // Reset for second run
289
- const secondRunHistory = [
290
- ...formattedMessages,
291
- new HumanMessage(followupQuery),
292
- ];
293
- const runConfig2 = createSupervisorGraphWithListEdge();
294
- const run2 = await Run.create(runConfig2);
295
- const inputs2 = {
296
- messages: secondRunHistory,
297
- };
298
- await run2.processStream(inputs2, config);
299
- console.log('\n✅ Second run completed successfully!');
300
- console.log(' The supervisor correctly understood that the legal_advisor handled');
301
- console.log(' the previous query, avoiding identity confusion.');
302
- }
303
- // Final summary
304
- console.log(`\n${'='.repeat(60)}`);
305
- console.log('TEST COMPLETE');
306
- console.log('='.repeat(60));
307
- console.log('\nThis test demonstrates that a single edge with multiple');
308
- console.log('destinations in the "to" field creates individual handoff');
309
- console.log('tools for each destination agent, achieving the same result');
310
- console.log('as creating separate edges for each specialist.');
311
- }
312
- catch (error) {
313
- console.error('Error in supervisor list handoff test:', error);
314
- }
315
- }
316
- // Run the test
317
- testSupervisorListHandoff();
318
- //# sourceMappingURL=test-multi-agent-list-handoff.js.map