illuma-agents 1.0.37 → 1.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +112 -14
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +5 -1
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +148 -8
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/graphs/MultiAgentGraph.cjs +277 -11
  8. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/index.cjs +128 -61
  10. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  11. package/dist/cjs/main.cjs +22 -7
  12. package/dist/cjs/main.cjs.map +1 -1
  13. package/dist/cjs/messages/cache.cjs +140 -46
  14. package/dist/cjs/messages/cache.cjs.map +1 -1
  15. package/dist/cjs/messages/core.cjs +1 -1
  16. package/dist/cjs/messages/core.cjs.map +1 -1
  17. package/dist/cjs/messages/tools.cjs +2 -2
  18. package/dist/cjs/messages/tools.cjs.map +1 -1
  19. package/dist/cjs/schemas/validate.cjs +173 -0
  20. package/dist/cjs/schemas/validate.cjs.map +1 -0
  21. package/dist/cjs/stream.cjs +4 -2
  22. package/dist/cjs/stream.cjs.map +1 -1
  23. package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
  24. package/dist/cjs/tools/CodeExecutor.cjs +22 -21
  25. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  26. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +14 -11
  27. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  28. package/dist/cjs/tools/ToolNode.cjs +101 -2
  29. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  30. package/dist/cjs/tools/ToolSearch.cjs +862 -0
  31. package/dist/cjs/tools/ToolSearch.cjs.map +1 -0
  32. package/dist/esm/agents/AgentContext.mjs +112 -14
  33. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  34. package/dist/esm/common/enum.mjs +5 -1
  35. package/dist/esm/common/enum.mjs.map +1 -1
  36. package/dist/esm/graphs/Graph.mjs +149 -9
  37. package/dist/esm/graphs/Graph.mjs.map +1 -1
  38. package/dist/esm/graphs/MultiAgentGraph.mjs +278 -12
  39. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  40. package/dist/esm/llm/bedrock/index.mjs +127 -60
  41. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  42. package/dist/esm/main.mjs +2 -1
  43. package/dist/esm/main.mjs.map +1 -1
  44. package/dist/esm/messages/cache.mjs +140 -46
  45. package/dist/esm/messages/cache.mjs.map +1 -1
  46. package/dist/esm/messages/core.mjs +1 -1
  47. package/dist/esm/messages/core.mjs.map +1 -1
  48. package/dist/esm/messages/tools.mjs +2 -2
  49. package/dist/esm/messages/tools.mjs.map +1 -1
  50. package/dist/esm/schemas/validate.mjs +167 -0
  51. package/dist/esm/schemas/validate.mjs.map +1 -0
  52. package/dist/esm/stream.mjs +4 -2
  53. package/dist/esm/stream.mjs.map +1 -1
  54. package/dist/esm/tools/BrowserTools.mjs.map +1 -1
  55. package/dist/esm/tools/CodeExecutor.mjs +22 -21
  56. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  57. package/dist/esm/tools/ProgrammaticToolCalling.mjs +14 -11
  58. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  59. package/dist/esm/tools/ToolNode.mjs +102 -3
  60. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  61. package/dist/esm/tools/ToolSearch.mjs +827 -0
  62. package/dist/esm/tools/ToolSearch.mjs.map +1 -0
  63. package/dist/types/agents/AgentContext.d.ts +51 -1
  64. package/dist/types/common/enum.d.ts +6 -2
  65. package/dist/types/graphs/Graph.d.ts +12 -0
  66. package/dist/types/graphs/MultiAgentGraph.d.ts +16 -0
  67. package/dist/types/index.d.ts +2 -1
  68. package/dist/types/llm/bedrock/index.d.ts +89 -11
  69. package/dist/types/llm/bedrock/types.d.ts +27 -0
  70. package/dist/types/llm/bedrock/utils/index.d.ts +5 -0
  71. package/dist/types/llm/bedrock/utils/message_inputs.d.ts +31 -0
  72. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +33 -0
  73. package/dist/types/messages/cache.d.ts +4 -1
  74. package/dist/types/schemas/index.d.ts +1 -0
  75. package/dist/types/schemas/validate.d.ts +36 -0
  76. package/dist/types/tools/CodeExecutor.d.ts +0 -3
  77. package/dist/types/tools/ProgrammaticToolCalling.d.ts +0 -3
  78. package/dist/types/tools/ToolNode.d.ts +3 -1
  79. package/dist/types/tools/ToolSearch.d.ts +148 -0
  80. package/dist/types/types/graph.d.ts +71 -0
  81. package/dist/types/types/llm.d.ts +3 -1
  82. package/dist/types/types/tools.d.ts +42 -2
  83. package/package.json +13 -6
  84. package/src/agents/AgentContext.test.ts +312 -0
  85. package/src/agents/AgentContext.ts +144 -16
  86. package/src/common/enum.ts +5 -1
  87. package/src/graphs/Graph.ts +214 -13
  88. package/src/graphs/MultiAgentGraph.ts +350 -13
  89. package/src/index.ts +4 -1
  90. package/src/llm/bedrock/index.ts +221 -99
  91. package/src/llm/bedrock/llm.spec.ts +616 -0
  92. package/src/llm/bedrock/types.ts +51 -0
  93. package/src/llm/bedrock/utils/index.ts +18 -0
  94. package/src/llm/bedrock/utils/message_inputs.ts +563 -0
  95. package/src/llm/bedrock/utils/message_outputs.ts +310 -0
  96. package/src/messages/__tests__/tools.test.ts +21 -21
  97. package/src/messages/cache.test.ts +304 -0
  98. package/src/messages/cache.ts +183 -53
  99. package/src/messages/core.ts +1 -1
  100. package/src/messages/tools.ts +2 -2
  101. package/src/schemas/index.ts +2 -0
  102. package/src/schemas/validate.test.ts +358 -0
  103. package/src/schemas/validate.ts +238 -0
  104. package/src/scripts/caching.ts +27 -19
  105. package/src/scripts/code_exec_files.ts +58 -15
  106. package/src/scripts/code_exec_multi_session.ts +241 -0
  107. package/src/scripts/code_exec_session.ts +282 -0
  108. package/src/scripts/multi-agent-conditional.ts +1 -0
  109. package/src/scripts/multi-agent-supervisor.ts +1 -0
  110. package/src/scripts/programmatic_exec_agent.ts +4 -4
  111. package/src/scripts/test-handoff-preamble.ts +277 -0
  112. package/src/scripts/test-parallel-handoffs.ts +291 -0
  113. package/src/scripts/test-tools-before-handoff.ts +8 -4
  114. package/src/scripts/test_code_api.ts +361 -0
  115. package/src/scripts/thinking-bedrock.ts +159 -0
  116. package/src/scripts/thinking.ts +39 -18
  117. package/src/scripts/{tool_search_regex.ts → tool_search.ts} +5 -5
  118. package/src/scripts/tools.ts +7 -3
  119. package/src/specs/cache.simple.test.ts +396 -0
  120. package/src/stream.ts +4 -2
  121. package/src/tools/BrowserTools.ts +39 -17
  122. package/src/tools/CodeExecutor.ts +26 -23
  123. package/src/tools/ProgrammaticToolCalling.ts +18 -14
  124. package/src/tools/ToolNode.ts +114 -1
  125. package/src/tools/ToolSearch.ts +1041 -0
  126. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +0 -2
  127. package/src/tools/__tests__/{ToolSearchRegex.integration.test.ts → ToolSearch.integration.test.ts} +6 -6
  128. package/src/tools/__tests__/ToolSearch.test.ts +1003 -0
  129. package/src/types/graph.test.ts +183 -0
  130. package/src/types/graph.ts +73 -0
  131. package/src/types/llm.ts +3 -1
  132. package/src/types/tools.ts +51 -2
  133. package/dist/cjs/tools/ToolSearchRegex.cjs +0 -455
  134. package/dist/cjs/tools/ToolSearchRegex.cjs.map +0 -1
  135. package/dist/esm/tools/ToolSearchRegex.mjs +0 -448
  136. package/dist/esm/tools/ToolSearchRegex.mjs.map +0 -1
  137. package/dist/types/tools/ToolSearchRegex.d.ts +0 -80
  138. package/src/tools/ToolSearchRegex.ts +0 -535
  139. package/src/tools/__tests__/ToolSearchRegex.test.ts +0 -232
@@ -0,0 +1,396 @@
1
+ /* eslint-disable no-console */
2
+ /* eslint-disable @typescript-eslint/no-explicit-any */
3
+ import { config } from 'dotenv';
4
+ config();
5
+ import { Calculator } from '@/tools/Calculator';
6
+ import {
7
+ AIMessage,
8
+ BaseMessage,
9
+ HumanMessage,
10
+ UsageMetadata,
11
+ } from '@langchain/core/messages';
12
+ import type * as t from '@/types';
13
+ import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
14
+ import { ModelEndHandler, ToolEndHandler } from '@/events';
15
+ import { capitalizeFirstLetter } from './spec.utils';
16
+ import { GraphEvents, Providers } from '@/common';
17
+ import { getLLMConfig } from '@/utils/llmConfig';
18
+ import { getArgs } from '@/scripts/args';
19
+ import { Run } from '@/run';
20
+
21
+ /**
22
+ * These tests verify that prompt caching works correctly across multi-turn
23
+ * conversations and that messages are not mutated in place.
24
+ */
25
+ describe('Prompt Caching Integration Tests', () => {
26
+ jest.setTimeout(120000);
27
+
28
+ const setupTest = (): {
29
+ collectedUsage: UsageMetadata[];
30
+ contentParts: Array<t.MessageContentComplex | undefined>;
31
+ customHandlers: Record<string | GraphEvents, t.EventHandler>;
32
+ } => {
33
+ const collectedUsage: UsageMetadata[] = [];
34
+ const { contentParts, aggregateContent } = createContentAggregator();
35
+
36
+ const customHandlers: Record<string | GraphEvents, t.EventHandler> = {
37
+ [GraphEvents.TOOL_END]: new ToolEndHandler(),
38
+ [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
39
+ [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
40
+ [GraphEvents.ON_RUN_STEP_COMPLETED]: {
41
+ handle: (
42
+ event: GraphEvents.ON_RUN_STEP_COMPLETED,
43
+ data: t.StreamEventData
44
+ ): void => {
45
+ aggregateContent({
46
+ event,
47
+ data: data as unknown as { result: t.ToolEndEvent },
48
+ });
49
+ },
50
+ },
51
+ [GraphEvents.ON_RUN_STEP]: {
52
+ handle: (
53
+ event: GraphEvents.ON_RUN_STEP,
54
+ data: t.StreamEventData
55
+ ): void => {
56
+ aggregateContent({ event, data: data as t.RunStep });
57
+ },
58
+ },
59
+ [GraphEvents.ON_RUN_STEP_DELTA]: {
60
+ handle: (
61
+ event: GraphEvents.ON_RUN_STEP_DELTA,
62
+ data: t.StreamEventData
63
+ ): void => {
64
+ aggregateContent({ event, data: data as t.RunStepDeltaEvent });
65
+ },
66
+ },
67
+ [GraphEvents.ON_MESSAGE_DELTA]: {
68
+ handle: (
69
+ event: GraphEvents.ON_MESSAGE_DELTA,
70
+ data: t.StreamEventData
71
+ ): void => {
72
+ aggregateContent({ event, data: data as t.MessageDeltaEvent });
73
+ },
74
+ },
75
+ };
76
+
77
+ return { collectedUsage, contentParts, customHandlers };
78
+ };
79
+
80
+ const streamConfig = {
81
+ configurable: { thread_id: 'cache-test-thread' },
82
+ streamMode: 'values',
83
+ version: 'v2' as const,
84
+ };
85
+
86
+ describe('Anthropic Prompt Caching', () => {
87
+ const provider = Providers.ANTHROPIC;
88
+
89
+ test(`${capitalizeFirstLetter(provider)}: multi-turn conversation with caching should not corrupt messages`, async () => {
90
+ const { userName, location } = await getArgs();
91
+ const llmConfig = getLLMConfig(provider);
92
+ const { collectedUsage, customHandlers } = setupTest();
93
+
94
+ const run = await Run.create<t.IState>({
95
+ runId: 'cache-test-anthropic',
96
+ graphConfig: {
97
+ type: 'standard',
98
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
99
+ tools: [new Calculator()],
100
+ instructions: 'You are a helpful assistant.',
101
+ additional_instructions: `User: ${userName}, Location: ${location}`,
102
+ },
103
+ returnContent: true,
104
+ customHandlers,
105
+ });
106
+
107
+ // Turn 1
108
+ const turn1Messages: BaseMessage[] = [
109
+ new HumanMessage('Hello, what is 2+2?'),
110
+ ];
111
+ const turn1ContentSnapshot = JSON.stringify(turn1Messages[0].content);
112
+
113
+ const turn1Result = await run.processStream(
114
+ { messages: turn1Messages },
115
+ streamConfig
116
+ );
117
+ expect(turn1Result).toBeDefined();
118
+
119
+ // Verify original message was NOT mutated
120
+ expect(JSON.stringify(turn1Messages[0].content)).toBe(
121
+ turn1ContentSnapshot
122
+ );
123
+ expect((turn1Messages[0] as any).content).not.toContain('cache_control');
124
+
125
+ const turn1RunMessages = run.getRunMessages();
126
+ expect(turn1RunMessages).toBeDefined();
127
+ expect(turn1RunMessages!.length).toBeGreaterThan(0);
128
+
129
+ // Turn 2 - build on conversation
130
+ const turn2Messages: BaseMessage[] = [
131
+ ...turn1Messages,
132
+ ...turn1RunMessages!,
133
+ new HumanMessage('Now multiply that by 10'),
134
+ ];
135
+ const turn2HumanContentSnapshot = JSON.stringify(
136
+ turn2Messages[turn2Messages.length - 1].content
137
+ );
138
+
139
+ const run2 = await Run.create<t.IState>({
140
+ runId: 'cache-test-anthropic-2',
141
+ graphConfig: {
142
+ type: 'standard',
143
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
144
+ tools: [new Calculator()],
145
+ instructions: 'You are a helpful assistant.',
146
+ additional_instructions: `User: ${userName}, Location: ${location}`,
147
+ },
148
+ returnContent: true,
149
+ customHandlers,
150
+ });
151
+
152
+ const turn2Result = await run2.processStream(
153
+ { messages: turn2Messages },
154
+ streamConfig
155
+ );
156
+ expect(turn2Result).toBeDefined();
157
+
158
+ // Verify messages were NOT mutated
159
+ expect(
160
+ JSON.stringify(turn2Messages[turn2Messages.length - 1].content)
161
+ ).toBe(turn2HumanContentSnapshot);
162
+
163
+ // Check that we got cache read tokens (indicating caching worked)
164
+ console.log(`${provider} Usage:`, collectedUsage);
165
+ expect(collectedUsage.length).toBeGreaterThan(0);
166
+
167
+ console.log(
168
+ `${capitalizeFirstLetter(provider)} multi-turn caching test passed - messages not mutated`
169
+ );
170
+ });
171
+
172
+ test(`${capitalizeFirstLetter(provider)}: tool calls should work with caching enabled`, async () => {
173
+ const llmConfig = getLLMConfig(provider);
174
+ const { customHandlers } = setupTest();
175
+
176
+ const run = await Run.create<t.IState>({
177
+ runId: 'cache-test-anthropic-tools',
178
+ graphConfig: {
179
+ type: 'standard',
180
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
181
+ tools: [new Calculator()],
182
+ instructions:
183
+ 'You are a math assistant. Use the calculator tool for all calculations.',
184
+ },
185
+ returnContent: true,
186
+ customHandlers,
187
+ });
188
+
189
+ const messages: BaseMessage[] = [
190
+ new HumanMessage('Calculate 123 * 456 using the calculator'),
191
+ ];
192
+
193
+ const result = await run.processStream({ messages }, streamConfig);
194
+ expect(result).toBeDefined();
195
+
196
+ const runMessages = run.getRunMessages();
197
+ expect(runMessages).toBeDefined();
198
+
199
+ // Should have used the calculator tool
200
+ const hasToolUse = runMessages?.some(
201
+ (msg) =>
202
+ msg._getType() === 'ai' &&
203
+ ((msg as AIMessage).tool_calls?.length ?? 0) > 0
204
+ );
205
+ expect(hasToolUse).toBe(true);
206
+
207
+ console.log(
208
+ `${capitalizeFirstLetter(provider)} tool call with caching test passed`
209
+ );
210
+ });
211
+ });
212
+
213
+ describe('Bedrock Prompt Caching', () => {
214
+ const provider = Providers.BEDROCK;
215
+
216
+ test(`${capitalizeFirstLetter(provider)}: multi-turn conversation with caching should not corrupt messages`, async () => {
217
+ const { userName, location } = await getArgs();
218
+ const llmConfig = getLLMConfig(provider);
219
+ const { collectedUsage, customHandlers } = setupTest();
220
+
221
+ const run = await Run.create<t.IState>({
222
+ runId: 'cache-test-bedrock',
223
+ graphConfig: {
224
+ type: 'standard',
225
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
226
+ tools: [new Calculator()],
227
+ instructions: 'You are a helpful assistant.',
228
+ additional_instructions: `User: ${userName}, Location: ${location}`,
229
+ },
230
+ returnContent: true,
231
+ customHandlers,
232
+ });
233
+
234
+ // Turn 1
235
+ const turn1Messages: BaseMessage[] = [
236
+ new HumanMessage('Hello, what is 5+5?'),
237
+ ];
238
+ const turn1ContentSnapshot = JSON.stringify(turn1Messages[0].content);
239
+
240
+ const turn1Result = await run.processStream(
241
+ { messages: turn1Messages },
242
+ streamConfig
243
+ );
244
+ expect(turn1Result).toBeDefined();
245
+
246
+ // Verify original message was NOT mutated
247
+ expect(JSON.stringify(turn1Messages[0].content)).toBe(
248
+ turn1ContentSnapshot
249
+ );
250
+
251
+ const turn1RunMessages = run.getRunMessages();
252
+ expect(turn1RunMessages).toBeDefined();
253
+ expect(turn1RunMessages!.length).toBeGreaterThan(0);
254
+
255
+ // Turn 2
256
+ const turn2Messages: BaseMessage[] = [
257
+ ...turn1Messages,
258
+ ...turn1RunMessages!,
259
+ new HumanMessage('Multiply that by 3'),
260
+ ];
261
+ const turn2HumanContentSnapshot = JSON.stringify(
262
+ turn2Messages[turn2Messages.length - 1].content
263
+ );
264
+
265
+ const run2 = await Run.create<t.IState>({
266
+ runId: 'cache-test-bedrock-2',
267
+ graphConfig: {
268
+ type: 'standard',
269
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
270
+ tools: [new Calculator()],
271
+ instructions: 'You are a helpful assistant.',
272
+ additional_instructions: `User: ${userName}, Location: ${location}`,
273
+ },
274
+ returnContent: true,
275
+ customHandlers,
276
+ });
277
+
278
+ const turn2Result = await run2.processStream(
279
+ { messages: turn2Messages },
280
+ streamConfig
281
+ );
282
+ expect(turn2Result).toBeDefined();
283
+
284
+ // Verify messages were NOT mutated
285
+ expect(
286
+ JSON.stringify(turn2Messages[turn2Messages.length - 1].content)
287
+ ).toBe(turn2HumanContentSnapshot);
288
+
289
+ console.log(`${provider} Usage:`, collectedUsage);
290
+ expect(collectedUsage.length).toBeGreaterThan(0);
291
+
292
+ console.log(
293
+ `${capitalizeFirstLetter(provider)} multi-turn caching test passed - messages not mutated`
294
+ );
295
+ });
296
+
297
+ test(`${capitalizeFirstLetter(provider)}: tool calls should work with caching enabled`, async () => {
298
+ const llmConfig = getLLMConfig(provider);
299
+ const { customHandlers } = setupTest();
300
+
301
+ const run = await Run.create<t.IState>({
302
+ runId: 'cache-test-bedrock-tools',
303
+ graphConfig: {
304
+ type: 'standard',
305
+ llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
306
+ tools: [new Calculator()],
307
+ instructions:
308
+ 'You are a math assistant. Use the calculator tool for all calculations.',
309
+ },
310
+ returnContent: true,
311
+ customHandlers,
312
+ });
313
+
314
+ const messages: BaseMessage[] = [
315
+ new HumanMessage('Calculate 789 * 123 using the calculator'),
316
+ ];
317
+
318
+ const result = await run.processStream({ messages }, streamConfig);
319
+ expect(result).toBeDefined();
320
+
321
+ const runMessages = run.getRunMessages();
322
+ expect(runMessages).toBeDefined();
323
+
324
+ // Should have used the calculator tool
325
+ const hasToolUse = runMessages?.some(
326
+ (msg) =>
327
+ msg._getType() === 'ai' &&
328
+ ((msg as AIMessage).tool_calls?.length ?? 0) > 0
329
+ );
330
+ expect(hasToolUse).toBe(true);
331
+
332
+ console.log(
333
+ `${capitalizeFirstLetter(provider)} tool call with caching test passed`
334
+ );
335
+ });
336
+ });
337
+
338
+ describe('Cross-provider message isolation', () => {
339
+ test('Messages processed by Anthropic should not affect Bedrock processing', async () => {
340
+ const anthropicConfig = getLLMConfig(Providers.ANTHROPIC);
341
+ const bedrockConfig = getLLMConfig(Providers.BEDROCK);
342
+ const { customHandlers: handlers1 } = setupTest();
343
+ const { customHandlers: handlers2 } = setupTest();
344
+
345
+ // Create a shared message array
346
+ const sharedMessages: BaseMessage[] = [
347
+ new HumanMessage('Hello, what is the capital of France?'),
348
+ ];
349
+ const originalContent = JSON.stringify(sharedMessages[0].content);
350
+
351
+ // Process with Anthropic first
352
+ const anthropicRun = await Run.create<t.IState>({
353
+ runId: 'cross-provider-anthropic',
354
+ graphConfig: {
355
+ type: 'standard',
356
+ llmConfig: { ...anthropicConfig, promptCache: true } as t.LLMConfig,
357
+ instructions: 'You are a helpful assistant.',
358
+ },
359
+ returnContent: true,
360
+ customHandlers: handlers1,
361
+ });
362
+
363
+ const anthropicResult = await anthropicRun.processStream(
364
+ { messages: sharedMessages },
365
+ streamConfig
366
+ );
367
+ expect(anthropicResult).toBeDefined();
368
+
369
+ // Verify message not mutated
370
+ expect(JSON.stringify(sharedMessages[0].content)).toBe(originalContent);
371
+
372
+ // Now process with Bedrock using the SAME messages
373
+ const bedrockRun = await Run.create<t.IState>({
374
+ runId: 'cross-provider-bedrock',
375
+ graphConfig: {
376
+ type: 'standard',
377
+ llmConfig: { ...bedrockConfig, promptCache: true } as t.LLMConfig,
378
+ instructions: 'You are a helpful assistant.',
379
+ },
380
+ returnContent: true,
381
+ customHandlers: handlers2,
382
+ });
383
+
384
+ const bedrockResult = await bedrockRun.processStream(
385
+ { messages: sharedMessages },
386
+ streamConfig
387
+ );
388
+ expect(bedrockResult).toBeDefined();
389
+
390
+ // Verify message STILL not mutated after both providers processed
391
+ expect(JSON.stringify(sharedMessages[0].content)).toBe(originalContent);
392
+
393
+ console.log('Cross-provider message isolation test passed');
394
+ });
395
+ });
396
+ });
package/src/stream.ts CHANGED
@@ -339,7 +339,8 @@ hasToolCallChunks: ${hasToolCallChunks}
339
339
  (c) =>
340
340
  (c.type?.startsWith(ContentTypes.THINKING) ?? false) ||
341
341
  (c.type?.startsWith(ContentTypes.REASONING) ?? false) ||
342
- (c.type?.startsWith(ContentTypes.REASONING_CONTENT) ?? false)
342
+ (c.type?.startsWith(ContentTypes.REASONING_CONTENT) ?? false) ||
343
+ c.type === 'redacted_thinking'
343
344
  )
344
345
  ) {
345
346
  await graph.dispatchReasoningDelta(stepId, {
@@ -365,7 +366,8 @@ hasToolCallChunks: ${hasToolCallChunks}
365
366
  Array.isArray(chunk.content) &&
366
367
  (chunk.content[0]?.type === ContentTypes.THINKING ||
367
368
  chunk.content[0]?.type === ContentTypes.REASONING ||
368
- chunk.content[0]?.type === ContentTypes.REASONING_CONTENT)
369
+ chunk.content[0]?.type === ContentTypes.REASONING_CONTENT ||
370
+ chunk.content[0]?.type === 'redacted_thinking')
369
371
  ) {
370
372
  reasoning_content = 'valid';
371
373
  } else if (
@@ -83,12 +83,16 @@ const BrowserClickSchema = z.object({
83
83
  const BrowserTypeSchema = z.object({
84
84
  index: z
85
85
  .number()
86
- .describe('The [index] of the INPUT element to type into. Target <input> or <textarea> elements. Check fieldLabel to identify the correct field.'),
86
+ .describe(
87
+ 'The [index] of the INPUT element to type into. Target <input> or <textarea> elements. Check fieldLabel to identify the correct field.'
88
+ ),
87
89
  text: z.string().describe('The text to type into the element'),
88
90
  pressEnter: z
89
91
  .boolean()
90
92
  .optional()
91
- .describe('Whether to press Enter after typing (useful for search forms and submitting)'),
93
+ .describe(
94
+ 'Whether to press Enter after typing (useful for search forms and submitting)'
95
+ ),
92
96
  });
93
97
 
94
98
  const BrowserNavigateSchema = z.object({
@@ -134,13 +138,17 @@ const BrowserGetPageStateSchema = z.object({});
134
138
  const BrowserKeypressSchema = z.object({
135
139
  keys: z
136
140
  .string()
137
- .describe('Keyboard keys to press. Use "+" to combine modifiers (e.g., "Control+Enter", "Control+a", "Escape", "Tab", "Enter"). Common shortcuts: Control+Enter (submit forms/send), Escape (close dialogs), Tab (next field).'),
141
+ .describe(
142
+ 'Keyboard keys to press. Use "+" to combine modifiers (e.g., "Control+Enter", "Control+a", "Escape", "Tab", "Enter"). Common shortcuts: Control+Enter (submit forms/send), Escape (close dialogs), Tab (next field).'
143
+ ),
138
144
  });
139
145
 
140
146
  const BrowserSwitchTabSchema = z.object({
141
147
  tabId: z
142
148
  .number()
143
- .describe('The tab ID to switch to. Use the tab IDs shown in the tabs list from page state.'),
149
+ .describe(
150
+ 'The tab ID to switch to. Use the tab IDs shown in the tabs list from page state.'
151
+ ),
144
152
  });
145
153
 
146
154
  /**
@@ -187,7 +195,9 @@ function formatResultForLLM(
187
195
  }
188
196
  if (result.elementList != null && result.elementList !== '') {
189
197
  // Add hint about fieldLabel and targeting inputs for form interactions
190
- parts.push(`\n**Interactive Elements** (for typing: target <input> elements with fieldLabel, NOT parent <div> containers):\n${result.elementList}`);
198
+ parts.push(
199
+ `\n**Interactive Elements** (for typing: target <input> elements with fieldLabel, NOT parent <div> containers):\n${result.elementList}`
200
+ );
191
201
  }
192
202
  if (result.screenshot != null && result.screenshot !== '') {
193
203
  parts.push('\n[Screenshot captured and displayed to user]');
@@ -263,7 +273,8 @@ export function createBrowserTools(
263
273
  tools.push(
264
274
  tool(createToolFunction('click'), {
265
275
  name: EBrowserTools.CLICK,
266
- description: 'Click element by [index]. Use fieldLabel attribute to identify correct element. For form fields, target <input> elements NOT parent <div> containers.',
276
+ description:
277
+ 'Click element by [index]. Use fieldLabel attribute to identify correct element. For form fields, target <input> elements NOT parent <div> containers.',
267
278
  schema: BrowserClickSchema,
268
279
  })
269
280
  );
@@ -272,7 +283,8 @@ export function createBrowserTools(
272
283
  tools.push(
273
284
  tool(createToolFunction('type'), {
274
285
  name: EBrowserTools.TYPE,
275
- description: 'Type text into <input> element by [index]. CRITICAL: Always target <input> or <textarea> tags (NOT parent <div> containers). Use fieldLabel to identify correct field (e.g., fieldLabel="To recipients" for To field).',
286
+ description:
287
+ 'Type text into <input> element by [index]. CRITICAL: Always target <input> or <textarea> tags (NOT parent <div> containers). Use fieldLabel to identify correct field (e.g., fieldLabel="To recipients" for To field).',
276
288
  schema: BrowserTypeSchema,
277
289
  })
278
290
  );
@@ -281,7 +293,8 @@ export function createBrowserTools(
281
293
  tools.push(
282
294
  tool(createToolFunction('navigate'), {
283
295
  name: EBrowserTools.NAVIGATE,
284
- description: 'Navigate to URL (include https://). Returns new page element list.',
296
+ description:
297
+ 'Navigate to URL (include https://). Returns new page element list.',
285
298
  schema: BrowserNavigateSchema,
286
299
  })
287
300
  );
@@ -290,7 +303,8 @@ export function createBrowserTools(
290
303
  tools.push(
291
304
  tool(createToolFunction('scroll'), {
292
305
  name: EBrowserTools.SCROLL,
293
- description: 'Scroll page (up/down/left/right). Returns updated element list.',
306
+ description:
307
+ 'Scroll page (up/down/left/right). Returns updated element list.',
294
308
  schema: BrowserScrollSchema,
295
309
  })
296
310
  );
@@ -299,7 +313,8 @@ export function createBrowserTools(
299
313
  tools.push(
300
314
  tool(createToolFunction('extract'), {
301
315
  name: EBrowserTools.EXTRACT,
302
- description: 'Extract page content. Returns URL, title, and element list.',
316
+ description:
317
+ 'Extract page content. Returns URL, title, and element list.',
303
318
  schema: BrowserExtractSchema,
304
319
  })
305
320
  );
@@ -308,7 +323,8 @@ export function createBrowserTools(
308
323
  tools.push(
309
324
  tool(createToolFunction('hover'), {
310
325
  name: EBrowserTools.HOVER,
311
- description: 'Hover element by [index] to reveal menus/tooltips. Returns updated element list.',
326
+ description:
327
+ 'Hover element by [index] to reveal menus/tooltips. Returns updated element list.',
312
328
  schema: BrowserHoverSchema,
313
329
  })
314
330
  );
@@ -317,7 +333,8 @@ export function createBrowserTools(
317
333
  tools.push(
318
334
  tool(createToolFunction('wait'), {
319
335
  name: EBrowserTools.WAIT,
320
- description: 'Wait for async content to load. Returns updated element list.',
336
+ description:
337
+ 'Wait for async content to load. Returns updated element list.',
321
338
  schema: BrowserWaitSchema,
322
339
  })
323
340
  );
@@ -326,7 +343,8 @@ export function createBrowserTools(
326
343
  tools.push(
327
344
  tool(createToolFunction('back'), {
328
345
  name: EBrowserTools.BACK,
329
- description: 'Go back in browser history. Returns previous page element list.',
346
+ description:
347
+ 'Go back in browser history. Returns previous page element list.',
330
348
  schema: BrowserBackSchema,
331
349
  })
332
350
  );
@@ -335,7 +353,8 @@ export function createBrowserTools(
335
353
  tools.push(
336
354
  tool(createToolFunction('screenshot'), {
337
355
  name: EBrowserTools.SCREENSHOT,
338
- description: 'Capture screenshot. Displayed to user. Use get_page_state for automation.',
356
+ description:
357
+ 'Capture screenshot. Displayed to user. Use get_page_state for automation.',
339
358
  schema: BrowserScreenshotSchema,
340
359
  })
341
360
  );
@@ -344,7 +363,8 @@ export function createBrowserTools(
344
363
  tools.push(
345
364
  tool(createToolFunction('get_page_state'), {
346
365
  name: EBrowserTools.GET_PAGE_STATE,
347
- description: 'Get page URL, title, and interactive elements with [index] for actions. Start here.',
366
+ description:
367
+ 'Get page URL, title, and interactive elements with [index] for actions. Start here.',
348
368
  schema: BrowserGetPageStateSchema,
349
369
  })
350
370
  );
@@ -353,7 +373,8 @@ export function createBrowserTools(
353
373
  tools.push(
354
374
  tool(createToolFunction('keypress'), {
355
375
  name: EBrowserTools.KEYPRESS,
356
- description: 'Send keyboard shortcut or key press. Use for: Control+Enter (send email/submit), Escape (close dialog/cancel), Tab (next field), Enter (confirm). The keys are sent to the currently focused element.',
376
+ description:
377
+ 'Send keyboard shortcut or key press. Use for: Control+Enter (send email/submit), Escape (close dialog/cancel), Tab (next field), Enter (confirm). The keys are sent to the currently focused element.',
357
378
  schema: BrowserKeypressSchema,
358
379
  })
359
380
  );
@@ -362,7 +383,8 @@ export function createBrowserTools(
362
383
  tools.push(
363
384
  tool(createToolFunction('switch_tab'), {
364
385
  name: EBrowserTools.SWITCH_TAB,
365
- description: 'Switch to a different browser tab by its ID. Tab IDs are shown in the page state. Use this to work with existing open tabs (e.g., use existing Gmail tab instead of opening a new one).',
386
+ description:
387
+ 'Switch to a different browser tab by its ID. Tab IDs are shown in the page state. Use this to work with existing open tabs (e.g., use existing Gmail tab instead of opening a new one).',
366
388
  schema: BrowserSwitchTabSchema,
367
389
  })
368
390
  );
@@ -17,7 +17,7 @@ export const getCodeBaseURL = (): string =>
17
17
  const imageMessage = 'Image is already displayed to the user';
18
18
  const otherMessage = 'File is already downloaded by the user';
19
19
  const accessMessage =
20
- 'Note: Files are READ-ONLY. Save changes to NEW filenames. To access these files in future executions, provide the `session_id` as a parameter (not in your code).';
20
+ 'Note: Files from previous executions are automatically available and can be modified.';
21
21
  const emptyOutputMessage =
22
22
  'stdout: Empty. Ensure you\'re writing output explicitly.\n';
23
23
 
@@ -41,7 +41,8 @@ const CodeExecutionToolSchema = z.object({
41
41
  code: z.string()
42
42
  .describe(`The complete, self-contained code to execute, without any truncation or minimization.
43
43
  - The environment is stateless; variables and imports don't persist between executions.
44
- - When using \`session_id\`: Don't hardcode it in \`code\`, and write file modifications to NEW filenames (files are READ-ONLY).
44
+ - Generated files from previous executions are automatically available in "/mnt/data/".
45
+ - Files from previous executions are automatically available and can be modified in place.
45
46
  - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
46
47
  - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
47
48
  - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -50,17 +51,6 @@ const CodeExecutionToolSchema = z.object({
50
51
  - js: use the \`console\` or \`process\` methods for all outputs.
51
52
  - r: IMPORTANT: No X11 display available. ALL graphics MUST use Cairo library (library(Cairo)).
52
53
  - Other languages: use appropriate output functions.`),
53
- session_id: z
54
- .string()
55
- .optional()
56
- .describe(
57
- `Session ID from a previous response to access generated files.
58
- - Files load into the current working directory ("/mnt/data/")
59
- - Use relative paths ONLY
60
- - Files are READ-ONLY and cannot be modified in-place
61
- - To modify: read original file, write to NEW filename
62
- `.trim()
63
- ),
64
54
  args: z
65
55
  .array(z.string())
66
56
  .optional()
@@ -107,15 +97,33 @@ Rules:
107
97
  `.trim();
108
98
 
109
99
  return tool<typeof CodeExecutionToolSchema>(
110
- async ({ lang, code, session_id, ...rest }) => {
111
- const postData = {
100
+ async ({ lang, code, ...rest }, config) => {
101
+ /**
102
+ * Extract session context from config.toolCall (injected by ToolNode).
103
+ * - session_id: For API to associate with previous session
104
+ * - _injected_files: File refs to pass directly (avoids /files endpoint race condition)
105
+ */
106
+ const { session_id, _injected_files } = (config.toolCall ?? {}) as {
107
+ session_id?: string;
108
+ _injected_files?: t.CodeEnvFile[];
109
+ };
110
+
111
+ const postData: Record<string, unknown> = {
112
112
  lang,
113
113
  code,
114
114
  ...rest,
115
115
  ...params,
116
116
  };
117
117
 
118
- if (session_id != null && session_id.length > 0) {
118
+ /**
119
+ * File injection priority:
120
+ * 1. Use _injected_files from ToolNode (avoids /files endpoint race condition)
121
+ * 2. Fall back to fetching from /files endpoint if session_id provided but no injected files
122
+ */
123
+ if (_injected_files && _injected_files.length > 0) {
124
+ postData.files = _injected_files;
125
+ } else if (session_id != null && session_id.length > 0) {
126
+ /** Fallback: fetch from /files endpoint (may have race condition issues) */
119
127
  try {
120
128
  const filesEndpoint = `${baseEndpoint}/files/${session_id}?detail=full`;
121
129
  const fetchOptions: RequestInit = {
@@ -140,7 +148,6 @@ Rules:
140
148
  const files = await response.json();
141
149
  if (Array.isArray(files) && files.length > 0) {
142
150
  const fileReferences: t.CodeEnvFile[] = files.map((file) => {
143
- // Extract the ID from the file name (part after session ID prefix and before extension)
144
151
  const nameParts = file.name.split('/');
145
152
  const id = nameParts.length > 1 ? nameParts[1].split('.')[0] : '';
146
153
 
@@ -151,11 +158,7 @@ Rules:
151
158
  };
152
159
  });
153
160
 
154
- if (!postData.files) {
155
- postData.files = fileReferences;
156
- } else if (Array.isArray(postData.files)) {
157
- postData.files = [...postData.files, ...fileReferences];
158
- }
161
+ postData.files = fileReferences;
159
162
  }
160
163
  } catch {
161
164
  // eslint-disable-next-line no-console
@@ -204,7 +207,7 @@ Rules:
204
207
  }
205
208
  }
206
209
 
207
- formattedOutput += `\nsession_id: ${result.session_id}\n\n${accessMessage}`;
210
+ formattedOutput += `\n\n${accessMessage}`;
208
211
  return [
209
212
  formattedOutput.trim(),
210
213
  {