npm - @librechat/agents - Versions diffs - 3.1.89 → 3.1.90 - Mend

@librechat/agents 3.1.89 → 3.1.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/dist/cjs/graphs/Graph.cjs +7 -0
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/hooks/executeHooks.cjs +14 -7
package/dist/cjs/hooks/executeHooks.cjs.map +1 -1
package/dist/cjs/llm/anthropic/index.cjs +8 -2
package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +34 -0
package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
package/dist/cjs/main.cjs +9 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/tools/BashExecutor.cjs +10 -9
package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
package/dist/cjs/tools/BashProgrammaticToolCalling.cjs +12 -8
package/dist/cjs/tools/BashProgrammaticToolCalling.cjs.map +1 -1
package/dist/cjs/tools/CodeExecutor.cjs +35 -11
package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
package/dist/cjs/tools/CodeSessionFileSummary.cjs +63 -0
package/dist/cjs/tools/CodeSessionFileSummary.cjs.map +1 -0
package/dist/cjs/tools/ProgrammaticToolCalling.cjs +16 -12
package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
package/dist/cjs/tools/ToolNode.cjs +8 -5
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/cjs/tools/subagent/SubagentExecutor.cjs +319 -29
package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +7 -0
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/hooks/executeHooks.mjs +14 -7
package/dist/esm/hooks/executeHooks.mjs.map +1 -1
package/dist/esm/llm/anthropic/index.mjs +9 -3
package/dist/esm/llm/anthropic/index.mjs.map +1 -1
package/dist/esm/llm/anthropic/utils/message_inputs.mjs +33 -1
package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
package/dist/esm/main.mjs +2 -1
package/dist/esm/main.mjs.map +1 -1
package/dist/esm/tools/BashExecutor.mjs +11 -10
package/dist/esm/tools/BashExecutor.mjs.map +1 -1
package/dist/esm/tools/BashProgrammaticToolCalling.mjs +13 -9
package/dist/esm/tools/BashProgrammaticToolCalling.mjs.map +1 -1
package/dist/esm/tools/CodeExecutor.mjs +29 -12
package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
package/dist/esm/tools/CodeSessionFileSummary.mjs +60 -0
package/dist/esm/tools/CodeSessionFileSummary.mjs.map +1 -0
package/dist/esm/tools/ProgrammaticToolCalling.mjs +17 -13
package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
package/dist/esm/tools/ToolNode.mjs +8 -5
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/esm/tools/subagent/SubagentExecutor.mjs +320 -31
package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
package/dist/types/llm/anthropic/index.d.ts +3 -1
package/dist/types/llm/anthropic/utils/message_inputs.d.ts +4 -0
package/dist/types/tools/BashExecutor.d.ts +3 -3
package/dist/types/tools/CodeExecutor.d.ts +10 -3
package/dist/types/tools/CodeSessionFileSummary.d.ts +3 -0
package/dist/types/tools/ProgrammaticToolCalling.d.ts +4 -4
package/dist/types/tools/subagent/SubagentExecutor.d.ts +8 -5
package/dist/types/types/tools.d.ts +2 -3
package/package.json +1 -1
package/src/graphs/Graph.ts +7 -0
package/src/hooks/__tests__/executeHooks.test.ts +38 -0
package/src/hooks/executeHooks.ts +27 -7
package/src/llm/anthropic/index.ts +27 -3
package/src/llm/anthropic/llm.spec.ts +60 -1
package/src/llm/anthropic/utils/message_inputs.ts +46 -0
package/src/tools/BashExecutor.ts +21 -10
package/src/tools/BashProgrammaticToolCalling.ts +21 -9
package/src/tools/CodeExecutor.ts +55 -12
package/src/tools/CodeSessionFileSummary.ts +80 -0
package/src/tools/ProgrammaticToolCalling.ts +25 -12
package/src/tools/ToolNode.ts +8 -5
package/src/tools/__tests__/BashExecutor.test.ts +9 -0
package/src/tools/__tests__/CodeApiAuthHeaders.test.ts +43 -0
package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +100 -16
package/src/tools/__tests__/SubagentExecutor.test.ts +540 -6
package/src/tools/__tests__/ToolNode.outputReferences.test.ts +52 -0
package/src/tools/__tests__/subagentHooks.test.ts +237 -0
package/src/tools/subagent/SubagentExecutor.ts +514 -36
package/src/types/tools.ts +2 -3

package/src/llm/anthropic/index.ts CHANGED Viewed

@@ -17,9 +17,13 @@ import type {
   ChatAnthropicToolType,
   AnthropicMCPServerURLDefinition,
   AnthropicContextManagementConfigParam,
+  AnthropicRequestOptions,
 } from '@/llm/anthropic/types';
 import { _makeMessageChunkFromAnthropicEvent } from './utils/message_outputs';
-import { _convertMessagesToAnthropicPayload } from './utils/message_inputs';
+import {
+  _convertMessagesToAnthropicPayload,
+  stripUnsupportedAssistantPrefill,
+} from './utils/message_inputs';
 import { handleToolChoice } from './utils/tools';
 const DEFAULT_STREAM_DELAY = 25;
@@ -591,6 +595,26 @@ export class CustomAnthropic extends ChatAnthropicMessages {
     });
   }
+  protected override async createStreamWithRetry(
+    request: AnthropicStreamingMessageCreateParams,
+    options?: AnthropicRequestOptions
+  ): ReturnType<ChatAnthropicMessages['createStreamWithRetry']> {
+    return super.createStreamWithRetry(
+      stripUnsupportedAssistantPrefill(request),
+      options
+    );
+  }
+  protected override async completionWithRetry(
+    request: AnthropicMessageCreateParams,
+    options: AnthropicRequestOptions
+  ): ReturnType<ChatAnthropicMessages['completionWithRetry']> {
+    return super.completionWithRetry(
+      stripUnsupportedAssistantPrefill(request),
+      options
+    );
+  }
   async *_streamResponseChunks(
     messages: BaseMessage[],
     options: this['ParsedCallOptions'],
@@ -599,11 +623,11 @@ export class CustomAnthropic extends ChatAnthropicMessages {
     this.resetTokenEvents();
     const params = this.invocationParams(options);
     const formattedMessages = _convertMessagesToAnthropicPayload(messages);
-    const payload = {
+    const payload = stripUnsupportedAssistantPrefill({
       ...params,
       ...formattedMessages,
       stream: true,
-    } as const;
+    } as const);
     const coerceContentToString =
       !_toolsInParams(payload) &&
       !_documentsInParams(payload) &&

package/src/llm/anthropic/llm.spec.ts CHANGED Viewed

@@ -64,7 +64,11 @@ import type {
   ToolEndEvent,
   TPayload,
 } from '@/types';
-import { _convertMessagesToAnthropicPayload } from './utils/message_inputs';
+import {
+  _convertMessagesToAnthropicPayload,
+  modelDisallowsAssistantPrefill,
+  stripUnsupportedAssistantPrefill,
+} from './utils/message_inputs';
 import {
   _makeMessageChunkFromAnthropicEvent,
   getAnthropicUsageMetadata,
@@ -2637,6 +2641,61 @@ describe('Anthropic Reasoning with contentBlocks', () => {
   });
 });
+describe('Claude assistant prefill compatibility', () => {
+  test.each([
+    'claude-sonnet-4-6',
+    'claude-sonnet-4-6@20260217',
+    'claude-opus-4-7',
+    'claude-opus-4-10',
+    'global.anthropic.claude-opus-4-6-v1:0',
+    'anthropic/claude-sonnet-4.6',
+    'anthropic/claude-sonnet-4.12',
+  ])('detects %s as not supporting assistant prefill', (model) => {
+    expect(modelDisallowsAssistantPrefill(model)).toBe(true);
+  });
+  test.each([
+    'claude-sonnet-4-5-20250929',
+    'claude-opus-4-20250514',
+    'anthropic.claude-opus-4-20250514-v1:0',
+    'gpt-5.4',
+  ])('leaves %s prefill support unchanged', (model) => {
+    expect(modelDisallowsAssistantPrefill(model)).toBe(false);
+  });
+  test('strips trailing assistant messages for Claude 4.6+ requests', () => {
+    const request = {
+      model: 'claude-opus-4-6',
+      max_tokens: 100,
+      messages: [
+        { role: 'user' as const, content: 'What changed?' },
+        { role: 'assistant' as const, content: 'Draft prefill' },
+        { role: 'assistant' as const, content: 'Another prefill' },
+      ],
+    };
+    const sanitized = stripUnsupportedAssistantPrefill(request);
+    expect(sanitized).not.toBe(request);
+    expect(sanitized.messages).toEqual([
+      { role: 'user', content: 'What changed?' },
+    ]);
+  });
+  test('does not strip assistant messages for older Claude models', () => {
+    const request = {
+      model: 'claude-sonnet-4-5-20250929',
+      max_tokens: 100,
+      messages: [
+        { role: 'user' as const, content: 'Write JSON only.' },
+        { role: 'assistant' as const, content: '{' },
+      ],
+    };
+    expect(stripUnsupportedAssistantPrefill(request)).toBe(request);
+  });
+});
 const opus46Model = 'claude-opus-4-6';
 describe('Opus 4.6', () => {

package/src/llm/anthropic/utils/message_inputs.ts CHANGED Viewed

@@ -49,6 +49,10 @@ type GoogleFunctionCallBlock = MessageContentComplex & {
 };
 const ANTHROPIC_EMPTY_TEXT_PLACEHOLDER = '_';
+const CLAUDE_4_RELEASE_DATE_MODEL_PATTERN =
+  /claude-(?:opus|sonnet|haiku)-4-\d{8}(?:[-.@]|$)/i;
+const CLAUDE_4_MINOR_MODEL_PATTERN =
+  /claude-(?:opus|sonnet|haiku)-4[-.](\d+)(?:[-.@]|$)/i;
 function _formatImage(imageUrl: string) {
   const parsed = parseBase64DataUrl({ dataUrl: imageUrl });
@@ -796,6 +800,48 @@ export function _convertMessagesToAnthropicPayload(
   } as AnthropicMessageCreateParams;
 }
+export function modelDisallowsAssistantPrefill(model?: string): boolean {
+  const modelId = model ?? '';
+  if (CLAUDE_4_RELEASE_DATE_MODEL_PATTERN.test(modelId)) {
+    return false;
+  }
+  const match = CLAUDE_4_MINOR_MODEL_PATTERN.exec(modelId);
+  if (!match) {
+    return false;
+  }
+  return Number(match[1]) >= 6;
+}
+export function stripUnsupportedAssistantPrefill<
+  T extends Pick<AnthropicMessageCreateParams, 'messages'> & { model?: string },
+>(request: T): T {
+  if (!modelDisallowsAssistantPrefill(request.model)) {
+    return request;
+  }
+  const messages = request.messages;
+  if (
+    messages.length <= 1 ||
+    messages[messages.length - 1]?.role !== 'assistant'
+  ) {
+    return request;
+  }
+  const nextMessages = [...messages];
+  while (
+    nextMessages.length > 1 &&
+    nextMessages[nextMessages.length - 1]?.role === 'assistant'
+  ) {
+    nextMessages.pop();
+  }
+  return {
+    ...request,
+    messages: nextMessages,
+  };
+}
 function mergeMessages(messages: AnthropicMessageCreateParams['messages']) {
   if (messages.length <= 1) {
     return messages;

package/src/tools/BashExecutor.ts CHANGED Viewed

@@ -4,6 +4,11 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
 import { tool, DynamicStructuredTool } from '@langchain/core/tools';
 import type * as t from '@/types';
 import {
+  BASH_SHELL_GUIDANCE,
+  CODE_ARTIFACT_PATH_GUIDANCE,
+  appendFailedExecutionFileReminder,
+  appendTmpScratchReminder,
+  appendCodeSessionFileSummary,
   emptyOutputMessage,
   buildCodeApiHttpErrorMessage,
   getCodeBaseURL,
@@ -23,8 +28,9 @@ export const BashExecutionToolSchema = {
       type: 'string',
       description: `The bash command or script to execute.
 - The environment is stateless; variables and state don't persist between executions.
-- Generated files from previous executions are automatically available in "/mnt/data/".
-- Files from previous executions are automatically available and can be modified in place.
+- Prior /mnt/data files are available and can be modified in place.
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
+- ${BASH_SHELL_GUIDANCE}
 - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
 - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
 - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -46,6 +52,8 @@ Runs bash commands and returns stdout/stderr output from a stateless execution e
 Usage:
 - No network access available.
 - Generated files are automatically delivered; **DO NOT** provide download links.
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
+- ${BASH_SHELL_GUIDANCE}
 - NEVER use this tool to execute malicious commands.
 `.trim();
@@ -105,7 +113,7 @@ export const BashExecutionToolDefinition = {
 } as const;
 function createBashExecutionTool(
-  params: t.BashExecutionToolParams = {}
+  params: t.BashExecutionToolParams | null = {}
 ): DynamicStructuredTool {
   return tool(
     async (rawInput, config) => {
@@ -166,11 +174,6 @@ function createBashExecutionTool(
         }
         const result: t.ExecuteResult = await response.json();
-        /* See `CodeExecutor.ts` — file listings were removed from the
-         * LLM-facing tool result. Bash especially benefits: models
-         * naturally `ls /mnt/data/` to discover what's available
-         * rather than relying on a prescriptive summary that
-         * misleads as often as it helps. */
         let formattedOutput = '';
         if (result.stdout) {
           formattedOutput += `stdout:\n${result.stdout}\n`;
@@ -179,9 +182,13 @@ function createBashExecutionTool(
         }
         if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
+        const outputWithReminder = appendTmpScratchReminder(
+          formattedOutput,
+          command
+        );
         const hasFiles = result.files != null && result.files.length > 0;
         return [
-          formattedOutput.trim(),
+          appendCodeSessionFileSummary(outputWithReminder, result.files),
           (hasFiles
             ? { session_id: result.session_id, files: result.files }
             : {
@@ -189,8 +196,12 @@ function createBashExecutionTool(
             }) satisfies t.CodeExecutionArtifact,
         ];
       } catch (error) {
+        const messageWithReminder = appendFailedExecutionFileReminder(
+          (error as Error | undefined)?.message ?? '',
+          command
+        );
         throw new Error(
-          `Execution error:\n\n${(error as Error | undefined)?.message}`
+          `Execution error:\n\n${messageWithReminder}`
         );
       }
     },

package/src/tools/BashProgrammaticToolCalling.ts CHANGED Viewed

@@ -8,7 +8,12 @@ import {
   executeTools,
   formatCompletedResponse,
 } from './ProgrammaticToolCalling';
-import { getCodeBaseURL } from './CodeExecutor';
+import {
+  BASH_SHELL_GUIDANCE,
+  CODE_ARTIFACT_PATH_GUIDANCE,
+  appendFailedExecutionFileReminder,
+  getCodeBaseURL,
+} from './CodeExecutor';
 import {
   clampCodeApiRunTimeoutMs,
   createCodeApiRunTimeoutSchema,
@@ -62,11 +67,14 @@ You MUST complete your entire workflow in ONE code block.
 DO NOT split work across multiple calls expecting to reuse variables.`;
 const CORE_RULES = `Rules:
-- EVERYTHING in one call—no state persists between executions
+- One call: state does not persist
 - Tools are pre-defined as bash functions—DO NOT redefine them
 - Each tool function accepts a JSON string argument
+- Save tool output with raw=$(tool '{}'); printf '%s\n' "$raw" > /mnt/data/file.json; direct tool > file may be empty
+- jq: use fromjson? // . on saved tool stdout and again on JSON-string fields; check types since arrays may contain strings
 - Only echo/printf output returns to the model
-- Generated files are automatically available in /mnt/data/ for subsequent executions
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
+- ${BASH_SHELL_GUIDANCE}
 - timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;
 const ADDITIONAL_RULES =
@@ -78,11 +86,11 @@ const EXAMPLES = `Example (Complete workflow in one call):
   echo "$data" | jq '.[] | .name'
 Example (Parallel calls):
-  web_search '{"query": "SF weather"}' > /tmp/sf.txt &
-  web_search '{"query": "NY weather"}' > /tmp/ny.txt &
+  { sf=$(web_search '{"query": "SF weather"}'); printf '%s\n' "$sf" > /mnt/data/sf.json; } &
+  { ny=$(web_search '{"query": "NY weather"}'); printf '%s\n' "$ny" > /mnt/data/ny.json; } &
   wait
-  echo "SF: $(cat /tmp/sf.txt)"
-  echo "NY: $(cat /tmp/ny.txt)"`;
+  echo "SF: $(jq -r . /mnt/data/sf.json)"
+  echo "NY: $(jq -r . /mnt/data/ny.json)"`;
 const CODE_PARAM_DESCRIPTION = `Bash code that calls tools programmatically. Tools are available as bash functions.
@@ -369,7 +377,7 @@ export function createBashProgrammaticToolCallingTool(
         // ====================================================================
         if (response.status === 'completed') {
-          return formatCompletedResponse(response);
+          return formatCompletedResponse(response, code);
         }
         if (response.status === 'error') {
@@ -383,8 +391,12 @@ export function createBashProgrammaticToolCallingTool(
         throw new Error(`Unexpected response status: ${response.status}`);
       } catch (error) {
+        const messageWithReminder = appendFailedExecutionFileReminder(
+          (error as Error).message,
+          code
+        );
         throw new Error(
-          `Bash programmatic execution failed: ${(error as Error).message}`
+          `Bash programmatic execution failed: ${messageWithReminder}`
         );
       }
     },

package/src/tools/CodeExecutor.ts CHANGED Viewed

@@ -4,8 +4,14 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
 import { tool, DynamicStructuredTool } from '@langchain/core/tools';
 import { getEnvironmentVariable } from '@langchain/core/utils/env';
 import type * as t from '@/types';
+import { appendCodeSessionFileSummary } from '@/tools/CodeSessionFileSummary';
 import { EnvVar, Constants } from '@/common';
+export {
+  appendCodeSessionFileSummary,
+  stripCodeSessionFileSummary,
+} from '@/tools/CodeSessionFileSummary';
 config();
 export const getCodeBaseURL = (): string =>
@@ -15,6 +21,41 @@ export const getCodeBaseURL = (): string =>
 export const emptyOutputMessage =
   'stdout: Empty. Ensure you\'re writing output explicitly.\n';
+export const CODE_ARTIFACT_PATH_GUIDANCE =
+  'Persist handoff artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); failed executions do not register new files; `/tmp` and odd extensions are same-call scratch only, not later-call storage.';
+export const BASH_SHELL_GUIDANCE =
+  'Bash: multi-line files use heredoc/printf; run Python via python3 -c/heredoc, not bare Python.';
+const TMP_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/tmp(?:\/|\b)/;
+const MNT_DATA_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/mnt\/data(?:\/|\b)/;
+export const TMP_SCRATCH_OUTPUT_REMINDER =
+  'Note: /tmp files are same-call scratch only and were not persisted; use /mnt/data for files needed later.';
+export const FAILED_EXECUTION_FILE_REMINDER =
+  'Note: any files written during this failed call were not registered for later calls; fix the error and rerun before relying on them.';
+export function appendTmpScratchReminder(output: string, code: string): string {
+  if (!TMP_PATH_PATTERN.test(code)) {
+    return output;
+  }
+  return `${output.trimEnd()}\n${TMP_SCRATCH_OUTPUT_REMINDER}\n`;
+}
+export function appendFailedExecutionFileReminder(
+  output: string,
+  code: string
+): string {
+  if (
+    !MNT_DATA_PATH_PATTERN.test(code) ||
+    output.includes(FAILED_EXECUTION_FILE_REMINDER)
+  ) {
+    return output;
+  }
+  return `${output.trimEnd()}\n${FAILED_EXECUTION_FILE_REMINDER}\n`;
+}
 const SUPPORTED_LANGUAGES = [
   'py',
   'js',
@@ -44,8 +85,8 @@ export const CodeExecutionToolSchema = {
       type: 'string',
       description: `The complete, self-contained code to execute, without any truncation or minimization.
 - The environment is stateless; variables and imports don't persist between executions.
-- Generated files from previous executions are automatically available in "/mnt/data/".
-- Files from previous executions are automatically available and can be modified in place.
+- Prior /mnt/data files are available and can be modified in place.
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
 - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
 - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
 - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -104,6 +145,7 @@ Runs code and returns stdout/stderr output from a stateless execution environmen
 Usage:
 - No network access available.
 - Generated files are automatically delivered; **DO NOT** provide download links.
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
 - NEVER use this tool to execute malicious code.
 `.trim();
@@ -116,7 +158,7 @@ export const CodeExecutionToolDefinition = {
 } as const;
 function createCodeExecutionTool(
-  params: t.CodeExecutionToolParams = {}
+  params: t.CodeExecutionToolParams | null = {}
 ): DynamicStructuredTool {
   return tool(
     async (rawInput, config) => {
@@ -187,13 +229,6 @@ function createCodeExecutionTool(
         }
         const result: t.ExecuteResult = await response.json();
-        /* Output is stdout/stderr only — file listings were removed
-         * because the LLM-facing summary (split inherited/generated
-         * with prescriptive notes) caused more confusion than help,
-         * especially for bash where models naturally explore
-         * `/mnt/data/` themselves. The artifact still carries every
-         * file so the host's session map stays in sync; the LLM
-         * doesn't see them in the tool result text. */
         let formattedOutput = '';
         if (result.stdout) {
           formattedOutput += `stdout:\n${result.stdout}\n`;
@@ -202,9 +237,13 @@ function createCodeExecutionTool(
         }
         if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
+        const outputWithReminder = appendTmpScratchReminder(
+          formattedOutput,
+          code
+        );
         const hasFiles = result.files != null && result.files.length > 0;
         return [
-          formattedOutput.trim(),
+          appendCodeSessionFileSummary(outputWithReminder, result.files),
           (hasFiles
             ? { session_id: result.session_id, files: result.files }
             : {
@@ -212,8 +251,12 @@ function createCodeExecutionTool(
             }) satisfies t.CodeExecutionArtifact,
         ];
       } catch (error) {
+        const messageWithReminder = appendFailedExecutionFileReminder(
+          (error as Error | undefined)?.message ?? '',
+          code
+        );
         throw new Error(
-          `Execution error:\n\n${(error as Error | undefined)?.message}`
+          `Execution error:\n\n${messageWithReminder}`
         );
       }
     },

package/src/tools/CodeSessionFileSummary.ts ADDED Viewed

@@ -0,0 +1,80 @@
+import type * as t from '@/types';
+const IMAGE_FILE_EXTENSIONS = new Set([
+  '.avif',
+  '.bmp',
+  '.gif',
+  '.ico',
+  '.jpeg',
+  '.jpg',
+  '.png',
+  '.tif',
+  '.tiff',
+  '.webp',
+]);
+const CODE_SESSION_FILE_SUMMARY_PATTERN =
+  /^Generated files:\nSession files: \d+ persisted file\(s\) are available in \/mnt\/data, including \d+ image\(s\)\. Use known \/mnt\/data paths directly in later code-tool calls\. The app displays files\/images automatically; do not invent download links or wrap generated images in Markdown\.$/;
+function getFileExtension(name: string): string {
+  const lastSlash = name.lastIndexOf('/');
+  const basename = lastSlash >= 0 ? name.slice(lastSlash + 1) : name;
+  const lastDot = basename.lastIndexOf('.');
+  return lastDot >= 0 ? basename.slice(lastDot).toLowerCase() : '';
+}
+function isImageFile(file: Partial<t.FileRef> | null | undefined): boolean {
+  const name = file?.name;
+  return (
+    typeof name === 'string' &&
+    IMAGE_FILE_EXTENSIONS.has(getFileExtension(name))
+  );
+}
+function buildCodeSessionFileSummary(
+  fileCount: number,
+  imageCount: number
+): string {
+  return (
+    'Generated files:\n' +
+    `Session files: ${fileCount} persisted file(s) are available in /mnt/data, including ${imageCount} image(s). ` +
+    'Use known /mnt/data paths directly in later code-tool calls. ' +
+    'The app displays files/images automatically; do not invent download links or wrap generated images in Markdown.'
+  );
+}
+function isGeneratedFile(file: Partial<t.FileRef> | null | undefined): boolean {
+  return file?.inherited !== true;
+}
+export function stripCodeSessionFileSummary(output: string): string {
+  const summaryStart = output.lastIndexOf('Generated files:');
+  if (summaryStart < 0) return output;
+  const beforeSummary = output.slice(0, summaryStart);
+  if (beforeSummary !== '' && !beforeSummary.endsWith('\n\n')) return output;
+  const maybeSummary = output.slice(summaryStart);
+  if (!CODE_SESSION_FILE_SUMMARY_PATTERN.test(maybeSummary)) return output;
+  return beforeSummary.trimEnd();
+}
+export function appendCodeSessionFileSummary(
+  output: string,
+  files: t.FileRefs | undefined
+): string {
+  if (files == null || files.length === 0) {
+    return output.trim();
+  }
+  const generatedFiles = files.filter(isGeneratedFile);
+  if (generatedFiles.length === 0) {
+    return output.trim();
+  }
+  const imageCount = generatedFiles.filter(isImageFile).length;
+  const summary = buildCodeSessionFileSummary(
+    generatedFiles.length,
+    imageCount
+  );
+  return `${output.trimEnd()}\n\n${summary}`.trim();
+}

package/src/tools/ProgrammaticToolCalling.ts CHANGED Viewed

@@ -7,9 +7,13 @@ import type { ToolCall } from '@langchain/core/messages/tool';
 import type { ProgrammaticToolCallingJsonSchema } from './ptcTimeout';
 import type * as t from '@/types';
 import {
+  CODE_ARTIFACT_PATH_GUIDANCE,
+  appendCodeSessionFileSummary,
+  appendFailedExecutionFileReminder,
   buildCodeApiHttpErrorMessage,
   emptyOutputMessage,
   getCodeBaseURL,
+  appendTmpScratchReminder,
   resolveCodeApiAuthHeaders,
 } from './CodeExecutor';
 import {
@@ -36,15 +40,17 @@ You MUST complete your entire workflow in ONE code block: query → process →
 DO NOT split work across multiple calls expecting to reuse variables.`;
 const CORE_RULES = `Rules:
-- EVERYTHING in one call—no state persists between executions
-- Just write code with await—auto-wrapped in async context
-- DO NOT define async def main() or call asyncio.run()
+- One call: state does not persist
+- Auto-wrapped async; use await, no main()/asyncio.run()
 - Tools are pre-defined—DO NOT write function definitions
+- Call tools with keyword args only (await tool(arg=value), never pass a dict)
+- Tool results are decoded Python values (dict/list/str)
 - Only print() output returns to the model
+- ${CODE_ARTIFACT_PATH_GUIDANCE}
 - timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;
-const ADDITIONAL_RULES = `- Generated files are automatically available in /mnt/data/ for subsequent executions
-- Tool names normalized: hyphens→underscores, keywords get \`_tool\` suffix`;
+const ADDITIONAL_RULES =
+  '- Tool names normalized: hyphens→underscores, keywords get `_tool` suffix';
 const EXAMPLES = `Example (Complete workflow in one call):
   # Query data
@@ -678,15 +684,16 @@ export async function executeTools(
 /**
  * Formats the completed response for the agent.
  *
- * Output is stdout/stderr only — see `CodeExecutor.ts`. The
- * artifact still carries every file so the host's session map
- * stays in sync; the LLM doesn't see them in the tool result text.
+ * Output includes stdout/stderr plus a compact session-file summary
+ * when artifacts were persisted. The artifact still carries every
+ * file so the host's session map stays in sync.
  *
  * @param response - The completed API response
  * @returns Tuple of [formatted string, artifact]
  */
 export function formatCompletedResponse(
-  response: t.ProgrammaticExecutionResponse
+  response: t.ProgrammaticExecutionResponse,
+  sourceCode = ''
 ): [string, t.ProgrammaticExecutionArtifact] {
   let formatted = '';
@@ -700,8 +707,10 @@ export function formatCompletedResponse(
     formatted += `stderr:\n${response.stderr}\n`;
   }
+  const outputWithReminder = appendTmpScratchReminder(formatted, sourceCode);
   return [
-    formatted.trim(),
+    appendCodeSessionFileSummary(outputWithReminder, response.files),
     {
       session_id: response.session_id,
       files: response.files,
@@ -859,7 +868,7 @@ export function createProgrammaticToolCallingTool(
         // ====================================================================
         if (response.status === 'completed') {
-          return formatCompletedResponse(response);
+          return formatCompletedResponse(response, code);
         }
         if (response.status === 'error') {
@@ -873,8 +882,12 @@ export function createProgrammaticToolCallingTool(
         throw new Error(`Unexpected response status: ${response.status}`);
       } catch (error) {
+        const messageWithReminder = appendFailedExecutionFileReminder(
+          (error as Error).message,
+          code
+        );
         throw new Error(
-          `Programmatic execution failed: ${(error as Error).message}`
+          `Programmatic execution failed: ${messageWithReminder}`
         );
       }
     },