npm - illuma-agents - Versions diffs - 1.0.37 → 1.0.39 - Mend

illuma-agents 1.0.37 → 1.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

package/dist/cjs/agents/AgentContext.cjs +112 -14
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/common/enum.cjs +5 -1
package/dist/cjs/common/enum.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +148 -8
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/graphs/MultiAgentGraph.cjs +277 -11
package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
package/dist/cjs/llm/bedrock/index.cjs +128 -61
package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
package/dist/cjs/main.cjs +22 -7
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/messages/cache.cjs +140 -46
package/dist/cjs/messages/cache.cjs.map +1 -1
package/dist/cjs/messages/core.cjs +1 -1
package/dist/cjs/messages/core.cjs.map +1 -1
package/dist/cjs/messages/tools.cjs +2 -2
package/dist/cjs/messages/tools.cjs.map +1 -1
package/dist/cjs/schemas/validate.cjs +173 -0
package/dist/cjs/schemas/validate.cjs.map +1 -0
package/dist/cjs/stream.cjs +4 -2
package/dist/cjs/stream.cjs.map +1 -1
package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
package/dist/cjs/tools/CodeExecutor.cjs +22 -21
package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
package/dist/cjs/tools/ProgrammaticToolCalling.cjs +14 -11
package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
package/dist/cjs/tools/ToolNode.cjs +101 -2
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/cjs/tools/ToolSearch.cjs +862 -0
package/dist/cjs/tools/ToolSearch.cjs.map +1 -0
package/dist/esm/agents/AgentContext.mjs +112 -14
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/common/enum.mjs +5 -1
package/dist/esm/common/enum.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +149 -9
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/graphs/MultiAgentGraph.mjs +278 -12
package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
package/dist/esm/llm/bedrock/index.mjs +127 -60
package/dist/esm/llm/bedrock/index.mjs.map +1 -1
package/dist/esm/main.mjs +2 -1
package/dist/esm/main.mjs.map +1 -1
package/dist/esm/messages/cache.mjs +140 -46
package/dist/esm/messages/cache.mjs.map +1 -1
package/dist/esm/messages/core.mjs +1 -1
package/dist/esm/messages/core.mjs.map +1 -1
package/dist/esm/messages/tools.mjs +2 -2
package/dist/esm/messages/tools.mjs.map +1 -1
package/dist/esm/schemas/validate.mjs +167 -0
package/dist/esm/schemas/validate.mjs.map +1 -0
package/dist/esm/stream.mjs +4 -2
package/dist/esm/stream.mjs.map +1 -1
package/dist/esm/tools/BrowserTools.mjs.map +1 -1
package/dist/esm/tools/CodeExecutor.mjs +22 -21
package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
package/dist/esm/tools/ProgrammaticToolCalling.mjs +14 -11
package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
package/dist/esm/tools/ToolNode.mjs +102 -3
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/esm/tools/ToolSearch.mjs +827 -0
package/dist/esm/tools/ToolSearch.mjs.map +1 -0
package/dist/types/agents/AgentContext.d.ts +51 -1
package/dist/types/common/enum.d.ts +6 -2
package/dist/types/graphs/Graph.d.ts +12 -0
package/dist/types/graphs/MultiAgentGraph.d.ts +16 -0
package/dist/types/index.d.ts +2 -1
package/dist/types/llm/bedrock/index.d.ts +89 -11
package/dist/types/llm/bedrock/types.d.ts +27 -0
package/dist/types/llm/bedrock/utils/index.d.ts +5 -0
package/dist/types/llm/bedrock/utils/message_inputs.d.ts +31 -0
package/dist/types/llm/bedrock/utils/message_outputs.d.ts +33 -0
package/dist/types/messages/cache.d.ts +4 -1
package/dist/types/schemas/index.d.ts +1 -0
package/dist/types/schemas/validate.d.ts +36 -0
package/dist/types/tools/CodeExecutor.d.ts +0 -3
package/dist/types/tools/ProgrammaticToolCalling.d.ts +0 -3
package/dist/types/tools/ToolNode.d.ts +3 -1
package/dist/types/tools/ToolSearch.d.ts +148 -0
package/dist/types/types/graph.d.ts +71 -0
package/dist/types/types/llm.d.ts +3 -1
package/dist/types/types/tools.d.ts +42 -2
package/package.json +13 -6
package/src/agents/AgentContext.test.ts +312 -0
package/src/agents/AgentContext.ts +144 -16
package/src/common/enum.ts +5 -1
package/src/graphs/Graph.ts +214 -13
package/src/graphs/MultiAgentGraph.ts +350 -13
package/src/index.ts +4 -1
package/src/llm/bedrock/index.ts +221 -99
package/src/llm/bedrock/llm.spec.ts +616 -0
package/src/llm/bedrock/types.ts +51 -0
package/src/llm/bedrock/utils/index.ts +18 -0
package/src/llm/bedrock/utils/message_inputs.ts +563 -0
package/src/llm/bedrock/utils/message_outputs.ts +310 -0
package/src/messages/__tests__/tools.test.ts +21 -21
package/src/messages/cache.test.ts +304 -0
package/src/messages/cache.ts +183 -53
package/src/messages/core.ts +1 -1
package/src/messages/tools.ts +2 -2
package/src/schemas/index.ts +2 -0
package/src/schemas/validate.test.ts +358 -0
package/src/schemas/validate.ts +238 -0
package/src/scripts/caching.ts +27 -19
package/src/scripts/code_exec_files.ts +58 -15
package/src/scripts/code_exec_multi_session.ts +241 -0
package/src/scripts/code_exec_session.ts +282 -0
package/src/scripts/multi-agent-conditional.ts +1 -0
package/src/scripts/multi-agent-supervisor.ts +1 -0
package/src/scripts/programmatic_exec_agent.ts +4 -4
package/src/scripts/test-handoff-preamble.ts +277 -0
package/src/scripts/test-parallel-handoffs.ts +291 -0
package/src/scripts/test-tools-before-handoff.ts +8 -4
package/src/scripts/test_code_api.ts +361 -0
package/src/scripts/thinking-bedrock.ts +159 -0
package/src/scripts/thinking.ts +39 -18
package/src/scripts/{tool_search_regex.ts → tool_search.ts} +5 -5
package/src/scripts/tools.ts +7 -3
package/src/specs/cache.simple.test.ts +396 -0
package/src/stream.ts +4 -2
package/src/tools/BrowserTools.ts +39 -17
package/src/tools/CodeExecutor.ts +26 -23
package/src/tools/ProgrammaticToolCalling.ts +18 -14
package/src/tools/ToolNode.ts +114 -1
package/src/tools/ToolSearch.ts +1041 -0
package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +0 -2
package/src/tools/__tests__/{ToolSearchRegex.integration.test.ts → ToolSearch.integration.test.ts} +6 -6
package/src/tools/__tests__/ToolSearch.test.ts +1003 -0
package/src/types/graph.test.ts +183 -0
package/src/types/graph.ts +73 -0
package/src/types/llm.ts +3 -1
package/src/types/tools.ts +51 -2
package/dist/cjs/tools/ToolSearchRegex.cjs +0 -455
package/dist/cjs/tools/ToolSearchRegex.cjs.map +0 -1
package/dist/esm/tools/ToolSearchRegex.mjs +0 -448
package/dist/esm/tools/ToolSearchRegex.mjs.map +0 -1
package/dist/types/tools/ToolSearchRegex.d.ts +0 -80
package/src/tools/ToolSearchRegex.ts +0 -535
package/src/tools/__tests__/ToolSearchRegex.test.ts +0 -232

package/src/specs/cache.simple.test.ts ADDED Viewed

@@ -0,0 +1,396 @@
+/* eslint-disable no-console */
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import { config } from 'dotenv';
+config();
+import { Calculator } from '@/tools/Calculator';
+import {
+  AIMessage,
+  BaseMessage,
+  HumanMessage,
+  UsageMetadata,
+} from '@langchain/core/messages';
+import type * as t from '@/types';
+import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
+import { ModelEndHandler, ToolEndHandler } from '@/events';
+import { capitalizeFirstLetter } from './spec.utils';
+import { GraphEvents, Providers } from '@/common';
+import { getLLMConfig } from '@/utils/llmConfig';
+import { getArgs } from '@/scripts/args';
+import { Run } from '@/run';
+/**
+ * These tests verify that prompt caching works correctly across multi-turn
+ * conversations and that messages are not mutated in place.
+ */
+describe('Prompt Caching Integration Tests', () => {
+  jest.setTimeout(120000);
+  const setupTest = (): {
+    collectedUsage: UsageMetadata[];
+    contentParts: Array<t.MessageContentComplex | undefined>;
+    customHandlers: Record<string | GraphEvents, t.EventHandler>;
+  } => {
+    const collectedUsage: UsageMetadata[] = [];
+    const { contentParts, aggregateContent } = createContentAggregator();
+    const customHandlers: Record<string | GraphEvents, t.EventHandler> = {
+      [GraphEvents.TOOL_END]: new ToolEndHandler(),
+      [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
+      [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
+      [GraphEvents.ON_RUN_STEP_COMPLETED]: {
+        handle: (
+          event: GraphEvents.ON_RUN_STEP_COMPLETED,
+          data: t.StreamEventData
+        ): void => {
+          aggregateContent({
+            event,
+            data: data as unknown as { result: t.ToolEndEvent },
+          });
+        },
+      },
+      [GraphEvents.ON_RUN_STEP]: {
+        handle: (
+          event: GraphEvents.ON_RUN_STEP,
+          data: t.StreamEventData
+        ): void => {
+          aggregateContent({ event, data: data as t.RunStep });
+        },
+      },
+      [GraphEvents.ON_RUN_STEP_DELTA]: {
+        handle: (
+          event: GraphEvents.ON_RUN_STEP_DELTA,
+          data: t.StreamEventData
+        ): void => {
+          aggregateContent({ event, data: data as t.RunStepDeltaEvent });
+        },
+      },
+      [GraphEvents.ON_MESSAGE_DELTA]: {
+        handle: (
+          event: GraphEvents.ON_MESSAGE_DELTA,
+          data: t.StreamEventData
+        ): void => {
+          aggregateContent({ event, data: data as t.MessageDeltaEvent });
+        },
+      },
+    };
+    return { collectedUsage, contentParts, customHandlers };
+  };
+  const streamConfig = {
+    configurable: { thread_id: 'cache-test-thread' },
+    streamMode: 'values',
+    version: 'v2' as const,
+  };
+  describe('Anthropic Prompt Caching', () => {
+    const provider = Providers.ANTHROPIC;
+    test(`${capitalizeFirstLetter(provider)}: multi-turn conversation with caching should not corrupt messages`, async () => {
+      const { userName, location } = await getArgs();
+      const llmConfig = getLLMConfig(provider);
+      const { collectedUsage, customHandlers } = setupTest();
+      const run = await Run.create<t.IState>({
+        runId: 'cache-test-anthropic',
+        graphConfig: {
+          type: 'standard',
+          llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
+          tools: [new Calculator()],
+          instructions: 'You are a helpful assistant.',
+          additional_instructions: `User: ${userName}, Location: ${location}`,
+        },
+        returnContent: true,
+        customHandlers,
+      });
+      // Turn 1
+      const turn1Messages: BaseMessage[] = [
+        new HumanMessage('Hello, what is 2+2?'),
+      ];
+      const turn1ContentSnapshot = JSON.stringify(turn1Messages[0].content);
+      const turn1Result = await run.processStream(
+        { messages: turn1Messages },
+        streamConfig
+      );
+      expect(turn1Result).toBeDefined();
+      // Verify original message was NOT mutated
+      expect(JSON.stringify(turn1Messages[0].content)).toBe(
+        turn1ContentSnapshot
+      );
+      expect((turn1Messages[0] as any).content).not.toContain('cache_control');
+      const turn1RunMessages = run.getRunMessages();
+      expect(turn1RunMessages).toBeDefined();
+      expect(turn1RunMessages!.length).toBeGreaterThan(0);
+      // Turn 2 - build on conversation
+      const turn2Messages: BaseMessage[] = [
+        ...turn1Messages,
+        ...turn1RunMessages!,
+        new HumanMessage('Now multiply that by 10'),
+      ];
+      const turn2HumanContentSnapshot = JSON.stringify(
+        turn2Messages[turn2Messages.length - 1].content
+      );
+      const run2 = await Run.create<t.IState>({
+        runId: 'cache-test-anthropic-2',
+        graphConfig: {
+          type: 'standard',
+          llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
+          tools: [new Calculator()],
+          instructions: 'You are a helpful assistant.',
+          additional_instructions: `User: ${userName}, Location: ${location}`,
+        },
+        returnContent: true,
+        customHandlers,
+      });
+      const turn2Result = await run2.processStream(
+        { messages: turn2Messages },
+        streamConfig
+      );
+      expect(turn2Result).toBeDefined();
+      // Verify messages were NOT mutated
+      expect(
+        JSON.stringify(turn2Messages[turn2Messages.length - 1].content)
+      ).toBe(turn2HumanContentSnapshot);
+      // Check that we got cache read tokens (indicating caching worked)
+      console.log(`${provider} Usage:`, collectedUsage);
+      expect(collectedUsage.length).toBeGreaterThan(0);
+      console.log(
+        `${capitalizeFirstLetter(provider)} multi-turn caching test passed - messages not mutated`
+      );
+    });
+    test(`${capitalizeFirstLetter(provider)}: tool calls should work with caching enabled`, async () => {
+      const llmConfig = getLLMConfig(provider);
+      const { customHandlers } = setupTest();
+      const run = await Run.create<t.IState>({
+        runId: 'cache-test-anthropic-tools',
+        graphConfig: {
+          type: 'standard',
+          llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
+          tools: [new Calculator()],
+          instructions:
+            'You are a math assistant. Use the calculator tool for all calculations.',
+        },
+        returnContent: true,
+        customHandlers,
+      });
+      const messages: BaseMessage[] = [
+        new HumanMessage('Calculate 123 * 456 using the calculator'),
+      ];
+      const result = await run.processStream({ messages }, streamConfig);
+      expect(result).toBeDefined();
+      const runMessages = run.getRunMessages();
+      expect(runMessages).toBeDefined();
+      // Should have used the calculator tool
+      const hasToolUse = runMessages?.some(
+        (msg) =>
+          msg._getType() === 'ai' &&
+          ((msg as AIMessage).tool_calls?.length ?? 0) > 0
+      );
+      expect(hasToolUse).toBe(true);
+      console.log(
+        `${capitalizeFirstLetter(provider)} tool call with caching test passed`
+      );
+    });
+  });
+  describe('Bedrock Prompt Caching', () => {
+    const provider = Providers.BEDROCK;
+    test(`${capitalizeFirstLetter(provider)}: multi-turn conversation with caching should not corrupt messages`, async () => {
+      const { userName, location } = await getArgs();
+      const llmConfig = getLLMConfig(provider);
+      const { collectedUsage, customHandlers } = setupTest();
+      const run = await Run.create<t.IState>({
+        runId: 'cache-test-bedrock',
+        graphConfig: {
+          type: 'standard',
+          llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
+          tools: [new Calculator()],
+          instructions: 'You are a helpful assistant.',
+          additional_instructions: `User: ${userName}, Location: ${location}`,
+        },
+        returnContent: true,
+        customHandlers,
+      });
+      // Turn 1
+      const turn1Messages: BaseMessage[] = [
+        new HumanMessage('Hello, what is 5+5?'),
+      ];
+      const turn1ContentSnapshot = JSON.stringify(turn1Messages[0].content);
+      const turn1Result = await run.processStream(
+        { messages: turn1Messages },
+        streamConfig
+      );
+      expect(turn1Result).toBeDefined();
+      // Verify original message was NOT mutated
+      expect(JSON.stringify(turn1Messages[0].content)).toBe(
+        turn1ContentSnapshot
+      );
+      const turn1RunMessages = run.getRunMessages();
+      expect(turn1RunMessages).toBeDefined();
+      expect(turn1RunMessages!.length).toBeGreaterThan(0);
+      // Turn 2
+      const turn2Messages: BaseMessage[] = [
+        ...turn1Messages,
+        ...turn1RunMessages!,
+        new HumanMessage('Multiply that by 3'),
+      ];
+      const turn2HumanContentSnapshot = JSON.stringify(
+        turn2Messages[turn2Messages.length - 1].content
+      );
+      const run2 = await Run.create<t.IState>({
+        runId: 'cache-test-bedrock-2',
+        graphConfig: {
+          type: 'standard',
+          llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
+          tools: [new Calculator()],
+          instructions: 'You are a helpful assistant.',
+          additional_instructions: `User: ${userName}, Location: ${location}`,
+        },
+        returnContent: true,
+        customHandlers,
+      });
+      const turn2Result = await run2.processStream(
+        { messages: turn2Messages },
+        streamConfig
+      );
+      expect(turn2Result).toBeDefined();
+      // Verify messages were NOT mutated
+      expect(
+        JSON.stringify(turn2Messages[turn2Messages.length - 1].content)
+      ).toBe(turn2HumanContentSnapshot);
+      console.log(`${provider} Usage:`, collectedUsage);
+      expect(collectedUsage.length).toBeGreaterThan(0);
+      console.log(
+        `${capitalizeFirstLetter(provider)} multi-turn caching test passed - messages not mutated`
+      );
+    });
+    test(`${capitalizeFirstLetter(provider)}: tool calls should work with caching enabled`, async () => {
+      const llmConfig = getLLMConfig(provider);
+      const { customHandlers } = setupTest();
+      const run = await Run.create<t.IState>({
+        runId: 'cache-test-bedrock-tools',
+        graphConfig: {
+          type: 'standard',
+          llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
+          tools: [new Calculator()],
+          instructions:
+            'You are a math assistant. Use the calculator tool for all calculations.',
+        },
+        returnContent: true,
+        customHandlers,
+      });
+      const messages: BaseMessage[] = [
+        new HumanMessage('Calculate 789 * 123 using the calculator'),
+      ];
+      const result = await run.processStream({ messages }, streamConfig);
+      expect(result).toBeDefined();
+      const runMessages = run.getRunMessages();
+      expect(runMessages).toBeDefined();
+      // Should have used the calculator tool
+      const hasToolUse = runMessages?.some(
+        (msg) =>
+          msg._getType() === 'ai' &&
+          ((msg as AIMessage).tool_calls?.length ?? 0) > 0
+      );
+      expect(hasToolUse).toBe(true);
+      console.log(
+        `${capitalizeFirstLetter(provider)} tool call with caching test passed`
+      );
+    });
+  });
+  describe('Cross-provider message isolation', () => {
+    test('Messages processed by Anthropic should not affect Bedrock processing', async () => {
+      const anthropicConfig = getLLMConfig(Providers.ANTHROPIC);
+      const bedrockConfig = getLLMConfig(Providers.BEDROCK);
+      const { customHandlers: handlers1 } = setupTest();
+      const { customHandlers: handlers2 } = setupTest();
+      // Create a shared message array
+      const sharedMessages: BaseMessage[] = [
+        new HumanMessage('Hello, what is the capital of France?'),
+      ];
+      const originalContent = JSON.stringify(sharedMessages[0].content);
+      // Process with Anthropic first
+      const anthropicRun = await Run.create<t.IState>({
+        runId: 'cross-provider-anthropic',
+        graphConfig: {
+          type: 'standard',
+          llmConfig: { ...anthropicConfig, promptCache: true } as t.LLMConfig,
+          instructions: 'You are a helpful assistant.',
+        },
+        returnContent: true,
+        customHandlers: handlers1,
+      });
+      const anthropicResult = await anthropicRun.processStream(
+        { messages: sharedMessages },
+        streamConfig
+      );
+      expect(anthropicResult).toBeDefined();
+      // Verify message not mutated
+      expect(JSON.stringify(sharedMessages[0].content)).toBe(originalContent);
+      // Now process with Bedrock using the SAME messages
+      const bedrockRun = await Run.create<t.IState>({
+        runId: 'cross-provider-bedrock',
+        graphConfig: {
+          type: 'standard',
+          llmConfig: { ...bedrockConfig, promptCache: true } as t.LLMConfig,
+          instructions: 'You are a helpful assistant.',
+        },
+        returnContent: true,
+        customHandlers: handlers2,
+      });
+      const bedrockResult = await bedrockRun.processStream(
+        { messages: sharedMessages },
+        streamConfig
+      );
+      expect(bedrockResult).toBeDefined();
+      // Verify message STILL not mutated after both providers processed
+      expect(JSON.stringify(sharedMessages[0].content)).toBe(originalContent);
+      console.log('Cross-provider message isolation test passed');
+    });
+  });
+});

package/src/stream.ts CHANGED Viewed

@@ -339,7 +339,8 @@ hasToolCallChunks: ${hasToolCallChunks}
         (c) =>
           (c.type?.startsWith(ContentTypes.THINKING) ?? false) ||
           (c.type?.startsWith(ContentTypes.REASONING) ?? false) ||
-          (c.type?.startsWith(ContentTypes.REASONING_CONTENT) ?? false)
+          (c.type?.startsWith(ContentTypes.REASONING_CONTENT) ?? false) ||
+          c.type === 'redacted_thinking'
       )
     ) {
       await graph.dispatchReasoningDelta(stepId, {
@@ -365,7 +366,8 @@ hasToolCallChunks: ${hasToolCallChunks}
       Array.isArray(chunk.content) &&
       (chunk.content[0]?.type === ContentTypes.THINKING ||
         chunk.content[0]?.type === ContentTypes.REASONING ||
-        chunk.content[0]?.type === ContentTypes.REASONING_CONTENT)
+        chunk.content[0]?.type === ContentTypes.REASONING_CONTENT ||
+        chunk.content[0]?.type === 'redacted_thinking')
     ) {
       reasoning_content = 'valid';
     } else if (

package/src/tools/BrowserTools.ts CHANGED Viewed

@@ -83,12 +83,16 @@ const BrowserClickSchema = z.object({
 const BrowserTypeSchema = z.object({
   index: z
     .number()
-    .describe('The [index] of the INPUT element to type into. Target <input> or <textarea> elements. Check fieldLabel to identify the correct field.'),
+    .describe(
+      'The [index] of the INPUT element to type into. Target <input> or <textarea> elements. Check fieldLabel to identify the correct field.'
+    ),
   text: z.string().describe('The text to type into the element'),
   pressEnter: z
     .boolean()
     .optional()
-    .describe('Whether to press Enter after typing (useful for search forms and submitting)'),
+    .describe(
+      'Whether to press Enter after typing (useful for search forms and submitting)'
+    ),
 });
 const BrowserNavigateSchema = z.object({
@@ -134,13 +138,17 @@ const BrowserGetPageStateSchema = z.object({});
 const BrowserKeypressSchema = z.object({
   keys: z
     .string()
-    .describe('Keyboard keys to press. Use "+" to combine modifiers (e.g., "Control+Enter", "Control+a", "Escape", "Tab", "Enter"). Common shortcuts: Control+Enter (submit forms/send), Escape (close dialogs), Tab (next field).'),
+    .describe(
+      'Keyboard keys to press. Use "+" to combine modifiers (e.g., "Control+Enter", "Control+a", "Escape", "Tab", "Enter"). Common shortcuts: Control+Enter (submit forms/send), Escape (close dialogs), Tab (next field).'
+    ),
 });
 const BrowserSwitchTabSchema = z.object({
   tabId: z
     .number()
-    .describe('The tab ID to switch to. Use the tab IDs shown in the tabs list from page state.'),
+    .describe(
+      'The tab ID to switch to. Use the tab IDs shown in the tabs list from page state.'
+    ),
 });
 /**
@@ -187,7 +195,9 @@ function formatResultForLLM(
   }
   if (result.elementList != null && result.elementList !== '') {
     // Add hint about fieldLabel and targeting inputs for form interactions
-    parts.push(`\n**Interactive Elements** (for typing: target <input> elements with fieldLabel, NOT parent <div> containers):\n${result.elementList}`);
+    parts.push(
+      `\n**Interactive Elements** (for typing: target <input> elements with fieldLabel, NOT parent <div> containers):\n${result.elementList}`
+    );
   }
   if (result.screenshot != null && result.screenshot !== '') {
     parts.push('\n[Screenshot captured and displayed to user]');
@@ -263,7 +273,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('click'), {
       name: EBrowserTools.CLICK,
-      description: 'Click element by [index]. Use fieldLabel attribute to identify correct element. For form fields, target <input> elements NOT parent <div> containers.',
+      description:
+        'Click element by [index]. Use fieldLabel attribute to identify correct element. For form fields, target <input> elements NOT parent <div> containers.',
       schema: BrowserClickSchema,
     })
   );
@@ -272,7 +283,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('type'), {
       name: EBrowserTools.TYPE,
-      description: 'Type text into <input> element by [index]. CRITICAL: Always target <input> or <textarea> tags (NOT parent <div> containers). Use fieldLabel to identify correct field (e.g., fieldLabel="To recipients" for To field).',
+      description:
+        'Type text into <input> element by [index]. CRITICAL: Always target <input> or <textarea> tags (NOT parent <div> containers). Use fieldLabel to identify correct field (e.g., fieldLabel="To recipients" for To field).',
       schema: BrowserTypeSchema,
     })
   );
@@ -281,7 +293,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('navigate'), {
       name: EBrowserTools.NAVIGATE,
-      description: 'Navigate to URL (include https://). Returns new page element list.',
+      description:
+        'Navigate to URL (include https://). Returns new page element list.',
       schema: BrowserNavigateSchema,
     })
   );
@@ -290,7 +303,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('scroll'), {
       name: EBrowserTools.SCROLL,
-      description: 'Scroll page (up/down/left/right). Returns updated element list.',
+      description:
+        'Scroll page (up/down/left/right). Returns updated element list.',
       schema: BrowserScrollSchema,
     })
   );
@@ -299,7 +313,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('extract'), {
       name: EBrowserTools.EXTRACT,
-      description: 'Extract page content. Returns URL, title, and element list.',
+      description:
+        'Extract page content. Returns URL, title, and element list.',
       schema: BrowserExtractSchema,
     })
   );
@@ -308,7 +323,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('hover'), {
       name: EBrowserTools.HOVER,
-      description: 'Hover element by [index] to reveal menus/tooltips. Returns updated element list.',
+      description:
+        'Hover element by [index] to reveal menus/tooltips. Returns updated element list.',
       schema: BrowserHoverSchema,
     })
   );
@@ -317,7 +333,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('wait'), {
       name: EBrowserTools.WAIT,
-      description: 'Wait for async content to load. Returns updated element list.',
+      description:
+        'Wait for async content to load. Returns updated element list.',
       schema: BrowserWaitSchema,
     })
   );
@@ -326,7 +343,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('back'), {
       name: EBrowserTools.BACK,
-      description: 'Go back in browser history. Returns previous page element list.',
+      description:
+        'Go back in browser history. Returns previous page element list.',
       schema: BrowserBackSchema,
     })
   );
@@ -335,7 +353,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('screenshot'), {
       name: EBrowserTools.SCREENSHOT,
-      description: 'Capture screenshot. Displayed to user. Use get_page_state for automation.',
+      description:
+        'Capture screenshot. Displayed to user. Use get_page_state for automation.',
       schema: BrowserScreenshotSchema,
     })
   );
@@ -344,7 +363,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('get_page_state'), {
       name: EBrowserTools.GET_PAGE_STATE,
-      description: 'Get page URL, title, and interactive elements with [index] for actions. Start here.',
+      description:
+        'Get page URL, title, and interactive elements with [index] for actions. Start here.',
       schema: BrowserGetPageStateSchema,
     })
   );
@@ -353,7 +373,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('keypress'), {
       name: EBrowserTools.KEYPRESS,
-      description: 'Send keyboard shortcut or key press. Use for: Control+Enter (send email/submit), Escape (close dialog/cancel), Tab (next field), Enter (confirm). The keys are sent to the currently focused element.',
+      description:
+        'Send keyboard shortcut or key press. Use for: Control+Enter (send email/submit), Escape (close dialog/cancel), Tab (next field), Enter (confirm). The keys are sent to the currently focused element.',
       schema: BrowserKeypressSchema,
     })
   );
@@ -362,7 +383,8 @@ export function createBrowserTools(
   tools.push(
     tool(createToolFunction('switch_tab'), {
       name: EBrowserTools.SWITCH_TAB,
-      description: 'Switch to a different browser tab by its ID. Tab IDs are shown in the page state. Use this to work with existing open tabs (e.g., use existing Gmail tab instead of opening a new one).',
+      description:
+        'Switch to a different browser tab by its ID. Tab IDs are shown in the page state. Use this to work with existing open tabs (e.g., use existing Gmail tab instead of opening a new one).',
       schema: BrowserSwitchTabSchema,
     })
   );

package/src/tools/CodeExecutor.ts CHANGED Viewed

@@ -17,7 +17,7 @@ export const getCodeBaseURL = (): string =>
 const imageMessage = 'Image is already displayed to the user';
 const otherMessage = 'File is already downloaded by the user';
 const accessMessage =
-  'Note: Files are READ-ONLY. Save changes to NEW filenames. To access these files in future executions, provide the `session_id` as a parameter (not in your code).';
+  'Note: Files from previous executions are automatically available and can be modified.';
 const emptyOutputMessage =
   'stdout: Empty. Ensure you\'re writing output explicitly.\n';
@@ -41,7 +41,8 @@ const CodeExecutionToolSchema = z.object({
   code: z.string()
     .describe(`The complete, self-contained code to execute, without any truncation or minimization.
 - The environment is stateless; variables and imports don't persist between executions.
-- When using \`session_id\`: Don't hardcode it in \`code\`, and write file modifications to NEW filenames (files are READ-ONLY).
+- Generated files from previous executions are automatically available in "/mnt/data/".
+- Files from previous executions are automatically available and can be modified in place.
 - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
 - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
 - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -50,17 +51,6 @@ const CodeExecutionToolSchema = z.object({
 - js: use the \`console\` or \`process\` methods for all outputs.
 - r: IMPORTANT: No X11 display available. ALL graphics MUST use Cairo library (library(Cairo)).
 - Other languages: use appropriate output functions.`),
-  session_id: z
-    .string()
-    .optional()
-    .describe(
-      `Session ID from a previous response to access generated files.
-- Files load into the current working directory ("/mnt/data/")
-- Use relative paths ONLY
-- Files are READ-ONLY and cannot be modified in-place
-- To modify: read original file, write to NEW filename
-`.trim()
-    ),
   args: z
     .array(z.string())
     .optional()
@@ -107,15 +97,33 @@ Rules:
 `.trim();
   return tool<typeof CodeExecutionToolSchema>(
-    async ({ lang, code, session_id, ...rest }) => {
-      const postData = {
+    async ({ lang, code, ...rest }, config) => {
+      /**
+       * Extract session context from config.toolCall (injected by ToolNode).
+       * - session_id: For API to associate with previous session
+       * - _injected_files: File refs to pass directly (avoids /files endpoint race condition)
+       */
+      const { session_id, _injected_files } = (config.toolCall ?? {}) as {
+        session_id?: string;
+        _injected_files?: t.CodeEnvFile[];
+      };
+      const postData: Record<string, unknown> = {
         lang,
         code,
         ...rest,
         ...params,
       };
-      if (session_id != null && session_id.length > 0) {
+      /**
+       * File injection priority:
+       * 1. Use _injected_files from ToolNode (avoids /files endpoint race condition)
+       * 2. Fall back to fetching from /files endpoint if session_id provided but no injected files
+       */
+      if (_injected_files && _injected_files.length > 0) {
+        postData.files = _injected_files;
+      } else if (session_id != null && session_id.length > 0) {
+        /** Fallback: fetch from /files endpoint (may have race condition issues) */
         try {
           const filesEndpoint = `${baseEndpoint}/files/${session_id}?detail=full`;
           const fetchOptions: RequestInit = {
@@ -140,7 +148,6 @@ Rules:
           const files = await response.json();
           if (Array.isArray(files) && files.length > 0) {
             const fileReferences: t.CodeEnvFile[] = files.map((file) => {
-              // Extract the ID from the file name (part after session ID prefix and before extension)
               const nameParts = file.name.split('/');
               const id = nameParts.length > 1 ? nameParts[1].split('.')[0] : '';
@@ -151,11 +158,7 @@ Rules:
               };
             });
-            if (!postData.files) {
-              postData.files = fileReferences;
-            } else if (Array.isArray(postData.files)) {
-              postData.files = [...postData.files, ...fileReferences];
-            }
+            postData.files = fileReferences;
           }
         } catch {
           // eslint-disable-next-line no-console
@@ -204,7 +207,7 @@ Rules:
             }
           }
-          formattedOutput += `\nsession_id: ${result.session_id}\n\n${accessMessage}`;
+          formattedOutput += `\n\n${accessMessage}`;
           return [
             formattedOutput.trim(),
             {