npm - @librechat/agents - Versions diffs - 3.1.72 → 3.1.74 - Mend

@librechat/agents 3.1.72 → 3.1.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/dist/cjs/agents/AgentContext.cjs +62 -20
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +11 -1
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/main.cjs +1 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/messages/format.cjs +27 -1
package/dist/cjs/messages/format.cjs.map +1 -1
package/dist/cjs/tools/BashExecutor.cjs +21 -11
package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
package/dist/cjs/tools/CodeExecutor.cjs +37 -10
package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
package/dist/cjs/tools/ProgrammaticToolCalling.cjs +16 -11
package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
package/dist/cjs/tools/ToolNode.cjs +21 -2
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/esm/agents/AgentContext.mjs +62 -20
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +11 -1
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/main.mjs +1 -1
package/dist/esm/messages/format.mjs +27 -1
package/dist/esm/messages/format.mjs.map +1 -1
package/dist/esm/tools/BashExecutor.mjs +22 -12
package/dist/esm/tools/BashExecutor.mjs.map +1 -1
package/dist/esm/tools/CodeExecutor.mjs +37 -11
package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
package/dist/esm/tools/ProgrammaticToolCalling.mjs +17 -12
package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
package/dist/esm/tools/ToolNode.mjs +21 -2
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/types/agents/AgentContext.d.ts +15 -0
package/dist/types/messages/format.d.ts +11 -1
package/dist/types/tools/CodeExecutor.d.ts +6 -0
package/dist/types/types/tools.d.ts +9 -0
package/package.json +1 -1
package/src/agents/AgentContext.ts +66 -27
package/src/agents/__tests__/AgentContext.test.ts +178 -0
package/src/graphs/Graph.ts +12 -1
package/src/messages/ensureThinkingBlock.test.ts +167 -0
package/src/messages/format.ts +29 -1
package/src/tools/BashExecutor.ts +37 -13
package/src/tools/CodeExecutor.ts +55 -11
package/src/tools/ProgrammaticToolCalling.ts +29 -14
package/src/tools/ToolNode.ts +21 -2
package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +60 -0
package/src/tools/__tests__/ToolNode.session.test.ts +124 -0
package/src/types/tools.ts +9 -0

package/src/agents/AgentContext.ts CHANGED Viewed

@@ -681,10 +681,47 @@ export class AgentContext {
     if (!this.toolDefinitions) {
       return [];
     }
-    return this.toolDefinitions.filter(
-      (def) =>
+    /**
+     * Mirror `getEventDrivenToolsForBinding`'s gate: a definition is only
+     * bound to the model when its `allowed_callers` include `'direct'` and
+     * (if deferred) it has been discovered. Filtering by `defer_loading`
+     * alone left programmatic-only definitions counted in
+     * `toolSchemaTokens` even though they were never bound.
+     */
+    return this.toolDefinitions.filter((def) => {
+      const allowedCallers = def.allowed_callers ?? ['direct'];
+      if (!allowedCallers.includes('direct')) {
+        return false;
+      }
+      return (
         def.defer_loading !== true || this.discoveredToolNames.has(def.name)
-    );
+      );
+    });
+  }
+  /**
+   * Single source of truth for "which entries of `this.tools` should be
+   * treated as actually bound". Callers:
+   *   - `getToolsForBinding` (non-event-driven branch)
+   *   - `getEventDrivenToolsForBinding` (appends instance tools alongside
+   *     schema-only definitions)
+   *   - `calculateInstructionTokens` (counts schema bytes for accounting)
+   *
+   * In event-driven mode (`toolDefinitions` present) instance tools are
+   * appended unfiltered; outside event-driven mode they pass through
+   * `filterToolsForBinding`. Centralizing the decision here prevents the
+   * accounting/binding paths from drifting apart, which was the root
+   * cause of the original miscount.
+   */
+  private getEffectiveInstanceTools(): t.GraphTools | undefined {
+    if (!this.tools) {
+      return undefined;
+    }
+    const isEventDriven = (this.toolDefinitions?.length ?? 0) > 0;
+    if (isEventDriven || !this.toolRegistry) {
+      return this.tools;
+    }
+    return this.filterToolsForBinding(this.tools);
   }
   /**
@@ -703,9 +740,17 @@ export class AgentContext {
      * populated after `fromConfig()` kicks off the initial calculation, so
      * callers that mutate `graphTools` must re-trigger this method to
      * refresh `toolSchemaTokens`.
+     *
+     * Use `getEffectiveInstanceTools()` so accounting reflects exactly the
+     * subset that `getToolsForBinding` would emit — preventing the
+     * worst-case-ceiling miscount that triggered spurious `empty_messages`
+     * preflight rejections at low `maxContextTokens`. Deferred and
+     * non-`'direct'` `toolDefinitions` are excluded by
+     * `getActiveToolDefinitions()` below.
      */
     const instanceTools: t.GraphTools = [
-      ...((this.tools as t.GenericTool[] | undefined) ?? []),
+      ...((this.getEffectiveInstanceTools() as t.GenericTool[] | undefined) ??
+        []),
       ...((this.graphTools as t.GenericTool[] | undefined) ?? []),
     ];
@@ -900,8 +945,16 @@ export class AgentContext {
    */
   getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
     const maxContextTokens = this.maxContextTokens ?? 0;
-    const toolCount =
-      (this.tools?.length ?? 0) + this.getActiveToolDefinitions().length;
+    /**
+     * Derive `toolCount` from `getToolsForBinding()` so the diagnostic stays
+     * aligned with what is actually bound to the model — and with what
+     * `calculateInstructionTokens` counts into `toolSchemaTokens`. Using raw
+     * `this.tools.length` would inflate the count whenever the registry
+     * marks instance tools as deferred-undiscovered or non-`'direct'`,
+     * producing the same misleading "N tools" diagnostic this fix is meant
+     * to eliminate.
+     */
+    const toolCount = this.getToolsForBinding()?.length ?? 0;
     const messageCount = messages?.length ?? 0;
     let messageTokens = 0;
@@ -1014,10 +1067,7 @@ export class AgentContext {
       return this.getEventDrivenToolsForBinding();
     }
-    const filtered =
-      !this.tools || !this.toolRegistry
-        ? this.tools
-        : this.filterToolsForBinding(this.tools);
+    const filtered = this.getEffectiveInstanceTools();
     if (this.graphTools && this.graphTools.length > 0) {
       return [...(filtered ?? []), ...this.graphTools];
@@ -1032,21 +1082,9 @@ export class AgentContext {
       return this.graphTools ?? [];
     }
-    const defsToInclude = this.toolDefinitions.filter((def) => {
-      const allowedCallers = def.allowed_callers ?? ['direct'];
-      if (!allowedCallers.includes('direct')) {
-        return false;
-      }
-      if (
-        def.defer_loading === true &&
-        !this.discoveredToolNames.has(def.name)
-      ) {
-        return false;
-      }
-      return true;
-    });
-    const schemaTools = createSchemaOnlyTools(defsToInclude) as t.GraphTools;
+    const schemaTools = createSchemaOnlyTools(
+      this.getActiveToolDefinitions()
+    ) as t.GraphTools;
     const allTools = [...schemaTools];
@@ -1054,8 +1092,9 @@ export class AgentContext {
       allTools.push(...this.graphTools);
     }
-    if (this.tools && this.tools.length > 0) {
-      allTools.push(...this.tools);
+    const instanceTools = this.getEffectiveInstanceTools();
+    if (instanceTools && instanceTools.length > 0) {
+      allTools.push(...instanceTools);
     }
     return allTools;

package/src/agents/__tests__/AgentContext.test.ts CHANGED Viewed

@@ -404,6 +404,141 @@ describe('AgentContext', () => {
       expect(ctxWithDeferred.toolSchemaTokens).toBe(ctxBase.toolSchemaTokens);
     });
+    it('excludes programmatic-only toolDefinitions from toolSchemaTokens', async () => {
+      // getEventDrivenToolsForBinding excludes definitions whose
+      // allowed_callers omit 'direct'. Accounting must mirror that — a
+      // programmatic-only definition is never bound to the model and
+      // shouldn't inflate toolSchemaTokens.
+      const activeDef: t.LCTool = {
+        name: 'active_tool',
+        description: 'Always loaded',
+        parameters: { type: 'object', properties: {} },
+      };
+      const programmaticDef: t.LCTool = {
+        name: 'programmatic_tool',
+        description: 'Only callable via code execution',
+        parameters: { type: 'object', properties: {} },
+        allowed_callers: ['code_execution'],
+      };
+      const ctxBase = createBasicContext({
+        agentConfig: { toolDefinitions: [activeDef] },
+        tokenCounter: mockTokenCounter,
+      });
+      const ctxWithProgrammatic = createBasicContext({
+        agentConfig: { toolDefinitions: [activeDef, programmaticDef] },
+        tokenCounter: mockTokenCounter,
+      });
+      await ctxBase.tokenCalculationPromise;
+      await ctxWithProgrammatic.tokenCalculationPromise;
+      expect(ctxWithProgrammatic.toolSchemaTokens).toBe(
+        ctxBase.toolSchemaTokens
+      );
+    });
+    it('excludes deferred-undiscovered instance tools from toolSchemaTokens', async () => {
+      const activeTool = createMockTool('active_tool');
+      const deferredTool = createMockTool('deferred_tool');
+      const programmaticTool = createMockTool('programmatic_tool');
+      const toolRegistry: t.LCToolRegistry = new Map([
+        ['active_tool', { name: 'active_tool' }],
+        ['deferred_tool', { name: 'deferred_tool', defer_loading: true }],
+        [
+          'programmatic_tool',
+          {
+            name: 'programmatic_tool',
+            allowed_callers: ['code_execution'],
+          },
+        ],
+      ]);
+      const ctxBase = createBasicContext({
+        agentConfig: { tools: [activeTool], toolRegistry },
+        tokenCounter: mockTokenCounter,
+      });
+      const ctxWithExcluded = createBasicContext({
+        agentConfig: {
+          tools: [activeTool, deferredTool, programmaticTool],
+          toolRegistry,
+        },
+        tokenCounter: mockTokenCounter,
+      });
+      await ctxBase.tokenCalculationPromise;
+      await ctxWithExcluded.tokenCalculationPromise;
+      expect(ctxWithExcluded.toolSchemaTokens).toBe(ctxBase.toolSchemaTokens);
+    });
+    it('includes deferred instance tools once discovered via discoveredTools input', async () => {
+      const tools = [createMockTool('deferred_tool')];
+      const toolRegistry: t.LCToolRegistry = new Map([
+        ['deferred_tool', { name: 'deferred_tool', defer_loading: true }],
+      ]);
+      const ctxUndiscovered = createBasicContext({
+        agentConfig: { tools, toolRegistry },
+        tokenCounter: mockTokenCounter,
+      });
+      const ctxDiscovered = createBasicContext({
+        agentConfig: {
+          tools,
+          toolRegistry,
+          discoveredTools: ['deferred_tool'],
+        },
+        tokenCounter: mockTokenCounter,
+      });
+      await ctxUndiscovered.tokenCalculationPromise;
+      await ctxDiscovered.tokenCalculationPromise;
+      expect(ctxUndiscovered.toolSchemaTokens).toBe(0);
+      expect(ctxDiscovered.toolSchemaTokens).toBeGreaterThan(0);
+    });
+    it('does not filter instance tools in event-driven mode (matches getEventDrivenToolsForBinding)', async () => {
+      // In event-driven mode, getEventDrivenToolsForBinding appends
+      // `this.tools` UNFILTERED. Accounting must do the same — otherwise we
+      // under-count and risk exceeding the model's context budget.
+      const activeDef: t.LCTool = {
+        name: 'active_def',
+        description: 'Always loaded',
+        parameters: { type: 'object', properties: {} },
+      };
+      const nativeTool = createMockTool('native_tool');
+      // Registry marks the native tool as deferred-undiscovered. In the
+      // non-event-driven path this would exclude it; in event-driven mode
+      // it is still bound and must still be counted.
+      const toolRegistry: t.LCToolRegistry = new Map([
+        ['native_tool', { name: 'native_tool', defer_loading: true }],
+      ]);
+      const ctxWithoutNative = createBasicContext({
+        agentConfig: {
+          toolDefinitions: [activeDef],
+          toolRegistry,
+        },
+        tokenCounter: mockTokenCounter,
+      });
+      const ctxWithNative = createBasicContext({
+        agentConfig: {
+          toolDefinitions: [activeDef],
+          tools: [nativeTool],
+          toolRegistry,
+        },
+        tokenCounter: mockTokenCounter,
+      });
+      await ctxWithoutNative.tokenCalculationPromise;
+      await ctxWithNative.tokenCalculationPromise;
+      expect(ctxWithNative.toolSchemaTokens).toBeGreaterThan(
+        ctxWithoutNative.toolSchemaTokens
+      );
+    });
     it('includes deferred toolDefinitions once discovered via discoveredTools input', async () => {
       const toolDefinitions: t.LCTool[] = [
         {
@@ -448,6 +583,36 @@ describe('AgentContext', () => {
       expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(1);
     });
+    it('getTokenBudgetBreakdown toolCount excludes deferred-undiscovered instance tools', () => {
+      // Mirrors the toolDefinitions test for the instance-tools path so
+      // toolCount stays aligned with toolSchemaTokens (and with what
+      // getToolsForBinding actually emits) for non-event-driven runs.
+      const tools = [
+        createMockTool('active_tool'),
+        createMockTool('deferred_tool'),
+        createMockTool('programmatic_tool'),
+      ];
+      const toolRegistry: t.LCToolRegistry = new Map([
+        ['active_tool', { name: 'active_tool' }],
+        ['deferred_tool', { name: 'deferred_tool', defer_loading: true }],
+        [
+          'programmatic_tool',
+          {
+            name: 'programmatic_tool',
+            allowed_callers: ['code_execution'],
+          },
+        ],
+      ]);
+      const ctx = createBasicContext({
+        agentConfig: { tools, toolRegistry },
+      });
+      expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(1);
+      ctx.markToolsAsDiscovered(['deferred_tool']);
+      expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
+    });
     it('getTokenBudgetBreakdown toolCount reflects newly discovered deferred tools', () => {
       const toolDefinitions: t.LCTool[] = [
         {
@@ -464,6 +629,19 @@ describe('AgentContext', () => {
       expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(1);
     });
+    it('getTokenBudgetBreakdown toolCount includes graphTools', () => {
+      // graphTools (handoff/subagent) are bound to the model alongside
+      // instance tools. Now that toolCount derives from getToolsForBinding(),
+      // graphTools are reflected in the diagnostic just like they're
+      // counted in toolSchemaTokens. Locks in that alignment.
+      const ctx = createBasicContext({
+        agentConfig: { tools: [createMockTool('direct_tool')] },
+      });
+      ctx.graphTools = [createMockTool('handoff_tool')];
+      expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
+    });
     it('toolSchemaTokens snapshot does not auto-update after markToolsAsDiscovered', async () => {
       const toolDefinitions: t.LCTool[] = [
         {

package/src/graphs/Graph.ts CHANGED Viewed

@@ -898,10 +898,21 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       if (
         isThinkingEnabled(agentContext.provider, agentContext.clientOptions)
       ) {
+        /**
+         * Pass `this.startIndex` so the function can distinguish CURRENT-run
+         * AI messages (the agent's own iterations — possibly without a
+         * leading thinking block, which Claude is allowed to skip) from
+         * historical context that genuinely needs the
+         * `[Previous agent context]` placeholder. Without this signal the
+         * function would convert the agent's own in-run tool_use messages,
+         * polluting the next iteration's prompt with a placeholder the
+         * model treats as suspicious injected content.
+         */
         finalMessages = ensureThinkingBlockInMessages(
           finalMessages,
           agentContext.provider,
-          config
+          config,
+          this.startIndex
         );
       }

package/src/messages/ensureThinkingBlock.test.ts CHANGED Viewed

@@ -1209,4 +1209,171 @@ describe('ensureThinkingBlockInMessages', () => {
       expect(outputImageBlock).not.toBe(originalImageBlock);
     });
   });
+  describe('runStartIndex (current-run boundary)', () => {
+    /**
+     * Claude is allowed to skip a thinking block before a tool_use (cf.
+     * PR #116). When the agent's own first iteration produces an
+     * `AI(tool_use, no thinking)`, the function would otherwise convert
+     * it to a `[Previous agent context]` HumanMessage — polluting the
+     * next iteration's prompt with text the model treats as suspicious
+     * injected content. The model then ignores its own real prior tool
+     * result and re-runs the tool to verify, often failing because the
+     * subsequent sandbox doesn't have the file.
+     *
+     * The `runStartIndex` parameter tells the function which messages
+     * are the agent's own in-run work: those at or after it must NEVER
+     * be converted, even if no thinking block appears in the chain.
+     */
+    test('preserves the agent first-iteration AI(tool_use) when its index is at runStartIndex', () => {
+      const messages = [
+        new HumanMessage({ content: 'fetch the data' }),
+        // No thinking block — Claude validly skipped it before tool_use
+        new AIMessage({
+          content: '',
+          tool_calls: [
+            { id: 'c1', name: 'fetch', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'data', tool_call_id: 'c1' }),
+      ];
+      const result = ensureThinkingBlockInMessages(
+        messages,
+        Providers.BEDROCK,
+        undefined,
+        /* runStartIndex */ 1
+      );
+      // All 3 preserved — the AI at index 1 is the agent's own work
+      expect(result).toHaveLength(3);
+      expect(result[1]).toBeInstanceOf(AIMessage);
+      expect((result[1] as AIMessage).tool_calls).toHaveLength(1);
+      expect(result[2]).toBeInstanceOf(ToolMessage);
+      // No placeholder leaked in
+      expect(getTextContent(result[1])).not.toContain(
+        '[Previous agent context]'
+      );
+    });
+    test('preserves multiple in-run AI(tool_use) iterations without thinking blocks', () => {
+      const messages = [
+        new HumanMessage({ content: 'do work' }),
+        new AIMessage({
+          content: '',
+          tool_calls: [
+            { id: 'c1', name: 'step1', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'r1', tool_call_id: 'c1' }),
+        new AIMessage({
+          content: '',
+          tool_calls: [
+            { id: 'c2', name: 'step2', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'r2', tool_call_id: 'c2' }),
+      ];
+      const result = ensureThinkingBlockInMessages(
+        messages,
+        Providers.BEDROCK,
+        undefined,
+        /* runStartIndex */ 1
+      );
+      expect(result).toHaveLength(5);
+      expect(result[1]).toBeInstanceOf(AIMessage);
+      expect(result[3]).toBeInstanceOf(AIMessage);
+      // Neither AI was converted
+      expect(getTextContent(result[1])).not.toContain(
+        '[Previous agent context]'
+      );
+      expect(getTextContent(result[3])).not.toContain(
+        '[Previous agent context]'
+      );
+    });
+    test('still converts pre-runStartIndex history that lacks thinking blocks', () => {
+      // Real handoff scenario: a prior non-thinking agent's tool calls
+      // appear before this run started. They genuinely need the
+      // placeholder (the legacy reason this function exists).
+      const messages = [
+        new HumanMessage({ content: 'first request' }),
+        new AIMessage({
+          content: 'using tool',
+          tool_calls: [
+            { id: 'old', name: 'legacy', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'old result', tool_call_id: 'old' }),
+        // Current run starts here — say after a handoff. Index >= 3 is
+        // the new agent's own work.
+      ];
+      const result = ensureThinkingBlockInMessages(
+        messages,
+        Providers.BEDROCK,
+        undefined,
+        /* runStartIndex */ 3
+      );
+      // The pre-run AI(tool_use)+Tool got converted to a placeholder
+      expect(result).toHaveLength(2);
+      expect(result[0]).toBeInstanceOf(HumanMessage);
+      expect(result[1]).toBeInstanceOf(HumanMessage);
+      expect(getTextContent(result[1])).toContain('[Previous agent context]');
+    });
+    test('falls back to chainHasThinkingBlock heuristic when runStartIndex is undefined (backward compat)', () => {
+      const messages = [
+        new HumanMessage({ content: 'do work' }),
+        // No reasoning + no runStartIndex hint → still gets converted
+        // (preserves the prior behavior for callers that haven't been
+        // updated to pass the boundary).
+        new AIMessage({
+          content: 'using tool',
+          tool_calls: [
+            { id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
+      ];
+      const result = ensureThinkingBlockInMessages(messages, Providers.BEDROCK);
+      expect(result).toHaveLength(2);
+      expect(result[1]).toBeInstanceOf(HumanMessage);
+      expect(getTextContent(result[1])).toContain('[Previous agent context]');
+    });
+    test('runStartIndex of 0 is honored (whole array is the current run)', () => {
+      // Edge: a fresh run with no prior history at all. Everything is
+      // in-run and must be preserved even without thinking blocks.
+      const messages = [
+        new HumanMessage({ content: 'do work' }),
+        new AIMessage({
+          content: '',
+          tool_calls: [
+            { id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
+      ];
+      const result = ensureThinkingBlockInMessages(
+        messages,
+        Providers.BEDROCK,
+        undefined,
+        /* runStartIndex */ 0
+      );
+      expect(result).toHaveLength(3);
+      expect(result[1]).toBeInstanceOf(AIMessage);
+      expect(getTextContent(result[1])).not.toContain(
+        '[Previous agent context]'
+      );
+    });
+  });
 });

package/src/messages/format.ts CHANGED Viewed

@@ -1391,12 +1391,23 @@ function appendToolCalls(
  * @param messages - Array of messages to process
  * @param provider - The provider being used (unused but kept for future compatibility)
  * @param config - Optional RunnableConfig for structured agent logging
+ * @param runStartIndex - Index in `messages` where the CURRENT run's own
+ *   appended AI/Tool messages begin (i.e. anything at this index or later
+ *   was just produced by this run's own iterations, not historical
+ *   context). When provided, AI messages at or after this index are
+ *   never converted to `[Previous agent context]` placeholders — Claude
+ *   can validly skip a thinking block before a tool_use (cf. PR #116),
+ *   so the agent's own in-run iterations must not be misclassified as
+ *   foreign history. Without the signal the function falls back to its
+ *   prior heuristic (`chainHasThinkingBlock`), preserving backward
+ *   compatibility for callers that don't yet pass the boundary.
  * @returns The messages array with tool sequences converted to buffer strings if necessary
  */
 export function ensureThinkingBlockInMessages(
   messages: BaseMessage[],
   _provider: Providers,
-  config?: RunnableConfig
+  config?: RunnableConfig,
+  runStartIndex?: number
 ): BaseMessage[] {
   if (messages.length === 0) {
     return messages;
@@ -1483,6 +1494,23 @@ export function ensureThinkingBlockInMessages(
     // but follow-ups have content: "" with only tool_calls. These are the
     // same agent's turn and must NOT be converted to HumanMessages.
     if (hasToolUse && !hasThinkingBlock) {
+      // Current-run boundary check: anything at or after `runStartIndex`
+      // is the current run's own work — preserve it. Claude is allowed
+      // to skip a thinking block before a tool_use (cf. PR #116 in the
+      // agents repo), so the agent's own first-iteration AI message can
+      // legitimately have tool_calls without reasoning. Converting it to
+      // a `[Previous agent context]` placeholder pollutes the next
+      // iteration's prompt — the LLM sees the placeholder, treats it as
+      // suspicious injected content, ignores its own real prior tool
+      // result, and re-runs the tool to verify (which then often fails
+      // because subsequent calls land in fresh sandboxes without the
+      // file). Skip the conversion when we know this is in-run.
+      if (runStartIndex !== undefined && i >= runStartIndex) {
+        result.push(msg);
+        i++;
+        continue;
+      }
       // Walk backwards — if an earlier AI message in the same chain (before
       // the nearest HumanMessage) has a thinking/reasoning block, this is a
       // continuation of a thinking-enabled turn, not a non-thinking handoff.

package/src/tools/BashExecutor.ts CHANGED Viewed

@@ -3,17 +3,23 @@ import fetch, { RequestInit } from 'node-fetch';
 import { HttpsProxyAgent } from 'https-proxy-agent';
 import { tool, DynamicStructuredTool } from '@langchain/core/tools';
 import type * as t from '@/types';
-import { imageExtRegex, getCodeBaseURL } from './CodeExecutor';
+import { getCodeBaseURL, renderFileSection } from './CodeExecutor';
 import { Constants } from '@/common';
 config();
-const imageMessage = 'Image is already displayed to the user';
 const otherMessage = 'File is already downloaded by the user';
+const inheritedFileMessage =
+  'Available as an input — already known to the user';
 const accessMessage =
   'Note: Files from previous executions are automatically available and can be modified.';
 const emptyOutputMessage =
   'stdout: Empty. Ensure you\'re writing output explicitly.\n';
+const inheritedFilesHeader =
+  'Available files (inputs, not generated by this execution):';
+const generatedFilesHeader = 'Generated files:';
+const inheritedNote =
+  'Note: Files in "Available files" are inputs the user (or a skill) already provided to the sandbox. They were not produced by this execution and you should not present them as new outputs in your response.';
 const baseEndpoint = getCodeBaseURL();
 const EXEC_ENDPOINT = `${baseEndpoint}/exec`;
@@ -198,20 +204,38 @@ function createBashExecutionTool(
         }
         if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
         if (result.files && result.files.length > 0) {
-          formattedOutput += 'Generated files:\n';
+          /* Split inherited (read-only / unchanged-input passthroughs from
+           * codeapi) from genuine generated outputs. The LLM was previously
+           * shown skill files under "Generated files:" with the message
+           * "File is already downloaded by the user", which led it to
+           * (a) believe it had just produced files it merely referenced
+           * and (b) sometimes invent paths like /mnt/user-data/uploads/
+           * trying to find the "originals". Labeling them as inputs makes
+           * the mental model accurate. */
+          const inheritedFiles = result.files.filter(
+            (f) => f.inherited === true
+          );
+          const generatedFiles = result.files.filter(
+            (f) => f.inherited !== true
+          );
-          const fileCount = result.files.length;
-          for (let i = 0; i < fileCount; i++) {
-            const file = result.files[i];
-            const isImage = imageExtRegex.test(file.name);
-            formattedOutput += `- /mnt/data/${file.name} | ${isImage ? imageMessage : otherMessage}`;
+          formattedOutput += renderFileSection(
+            generatedFilesHeader,
+            generatedFiles,
+            otherMessage
+          );
+          formattedOutput += renderFileSection(
+            inheritedFilesHeader,
+            inheritedFiles,
+            inheritedFileMessage
+          );
-            if (i < fileCount - 1) {
-              formattedOutput += fileCount <= 3 ? ', ' : ',\n';
-            }
+          if (generatedFiles.length > 0) {
+            formattedOutput += `\n\n${accessMessage}`;
+          }
+          if (inheritedFiles.length > 0) {
+            formattedOutput += `\n\n${inheritedNote}`;
           }
-          formattedOutput += `\n\n${accessMessage}`;
           return [
             formattedOutput.trim(),
             {