@librechat/agents 3.1.89 → 3.1.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/cjs/graphs/Graph.cjs +7 -0
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/hooks/executeHooks.cjs +14 -7
  4. package/dist/cjs/hooks/executeHooks.cjs.map +1 -1
  5. package/dist/cjs/llm/anthropic/index.cjs +8 -2
  6. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  7. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +34 -0
  8. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  9. package/dist/cjs/main.cjs +9 -0
  10. package/dist/cjs/main.cjs.map +1 -1
  11. package/dist/cjs/tools/BashExecutor.cjs +10 -9
  12. package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
  13. package/dist/cjs/tools/BashProgrammaticToolCalling.cjs +12 -8
  14. package/dist/cjs/tools/BashProgrammaticToolCalling.cjs.map +1 -1
  15. package/dist/cjs/tools/CodeExecutor.cjs +35 -11
  16. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  17. package/dist/cjs/tools/CodeSessionFileSummary.cjs +63 -0
  18. package/dist/cjs/tools/CodeSessionFileSummary.cjs.map +1 -0
  19. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +16 -12
  20. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  21. package/dist/cjs/tools/ToolNode.cjs +8 -5
  22. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  23. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +319 -29
  24. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  25. package/dist/esm/graphs/Graph.mjs +7 -0
  26. package/dist/esm/graphs/Graph.mjs.map +1 -1
  27. package/dist/esm/hooks/executeHooks.mjs +14 -7
  28. package/dist/esm/hooks/executeHooks.mjs.map +1 -1
  29. package/dist/esm/llm/anthropic/index.mjs +9 -3
  30. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  31. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +33 -1
  32. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  33. package/dist/esm/main.mjs +2 -1
  34. package/dist/esm/main.mjs.map +1 -1
  35. package/dist/esm/tools/BashExecutor.mjs +11 -10
  36. package/dist/esm/tools/BashExecutor.mjs.map +1 -1
  37. package/dist/esm/tools/BashProgrammaticToolCalling.mjs +13 -9
  38. package/dist/esm/tools/BashProgrammaticToolCalling.mjs.map +1 -1
  39. package/dist/esm/tools/CodeExecutor.mjs +29 -12
  40. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  41. package/dist/esm/tools/CodeSessionFileSummary.mjs +60 -0
  42. package/dist/esm/tools/CodeSessionFileSummary.mjs.map +1 -0
  43. package/dist/esm/tools/ProgrammaticToolCalling.mjs +17 -13
  44. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  45. package/dist/esm/tools/ToolNode.mjs +8 -5
  46. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  47. package/dist/esm/tools/subagent/SubagentExecutor.mjs +320 -31
  48. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  49. package/dist/types/llm/anthropic/index.d.ts +3 -1
  50. package/dist/types/llm/anthropic/utils/message_inputs.d.ts +4 -0
  51. package/dist/types/tools/BashExecutor.d.ts +3 -3
  52. package/dist/types/tools/CodeExecutor.d.ts +10 -3
  53. package/dist/types/tools/CodeSessionFileSummary.d.ts +3 -0
  54. package/dist/types/tools/ProgrammaticToolCalling.d.ts +4 -4
  55. package/dist/types/tools/subagent/SubagentExecutor.d.ts +8 -5
  56. package/dist/types/types/tools.d.ts +2 -3
  57. package/package.json +1 -1
  58. package/src/graphs/Graph.ts +7 -0
  59. package/src/hooks/__tests__/executeHooks.test.ts +38 -0
  60. package/src/hooks/executeHooks.ts +27 -7
  61. package/src/llm/anthropic/index.ts +27 -3
  62. package/src/llm/anthropic/llm.spec.ts +60 -1
  63. package/src/llm/anthropic/utils/message_inputs.ts +46 -0
  64. package/src/tools/BashExecutor.ts +21 -10
  65. package/src/tools/BashProgrammaticToolCalling.ts +21 -9
  66. package/src/tools/CodeExecutor.ts +55 -12
  67. package/src/tools/CodeSessionFileSummary.ts +80 -0
  68. package/src/tools/ProgrammaticToolCalling.ts +25 -12
  69. package/src/tools/ToolNode.ts +8 -5
  70. package/src/tools/__tests__/BashExecutor.test.ts +9 -0
  71. package/src/tools/__tests__/CodeApiAuthHeaders.test.ts +43 -0
  72. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +100 -16
  73. package/src/tools/__tests__/SubagentExecutor.test.ts +540 -6
  74. package/src/tools/__tests__/ToolNode.outputReferences.test.ts +52 -0
  75. package/src/tools/__tests__/subagentHooks.test.ts +237 -0
  76. package/src/tools/subagent/SubagentExecutor.ts +514 -36
  77. package/src/types/tools.ts +2 -3
@@ -17,9 +17,13 @@ import type {
17
17
  ChatAnthropicToolType,
18
18
  AnthropicMCPServerURLDefinition,
19
19
  AnthropicContextManagementConfigParam,
20
+ AnthropicRequestOptions,
20
21
  } from '@/llm/anthropic/types';
21
22
  import { _makeMessageChunkFromAnthropicEvent } from './utils/message_outputs';
22
- import { _convertMessagesToAnthropicPayload } from './utils/message_inputs';
23
+ import {
24
+ _convertMessagesToAnthropicPayload,
25
+ stripUnsupportedAssistantPrefill,
26
+ } from './utils/message_inputs';
23
27
  import { handleToolChoice } from './utils/tools';
24
28
 
25
29
  const DEFAULT_STREAM_DELAY = 25;
@@ -591,6 +595,26 @@ export class CustomAnthropic extends ChatAnthropicMessages {
591
595
  });
592
596
  }
593
597
 
598
+ protected override async createStreamWithRetry(
599
+ request: AnthropicStreamingMessageCreateParams,
600
+ options?: AnthropicRequestOptions
601
+ ): ReturnType<ChatAnthropicMessages['createStreamWithRetry']> {
602
+ return super.createStreamWithRetry(
603
+ stripUnsupportedAssistantPrefill(request),
604
+ options
605
+ );
606
+ }
607
+
608
+ protected override async completionWithRetry(
609
+ request: AnthropicMessageCreateParams,
610
+ options: AnthropicRequestOptions
611
+ ): ReturnType<ChatAnthropicMessages['completionWithRetry']> {
612
+ return super.completionWithRetry(
613
+ stripUnsupportedAssistantPrefill(request),
614
+ options
615
+ );
616
+ }
617
+
594
618
  async *_streamResponseChunks(
595
619
  messages: BaseMessage[],
596
620
  options: this['ParsedCallOptions'],
@@ -599,11 +623,11 @@ export class CustomAnthropic extends ChatAnthropicMessages {
599
623
  this.resetTokenEvents();
600
624
  const params = this.invocationParams(options);
601
625
  const formattedMessages = _convertMessagesToAnthropicPayload(messages);
602
- const payload = {
626
+ const payload = stripUnsupportedAssistantPrefill({
603
627
  ...params,
604
628
  ...formattedMessages,
605
629
  stream: true,
606
- } as const;
630
+ } as const);
607
631
  const coerceContentToString =
608
632
  !_toolsInParams(payload) &&
609
633
  !_documentsInParams(payload) &&
@@ -64,7 +64,11 @@ import type {
64
64
  ToolEndEvent,
65
65
  TPayload,
66
66
  } from '@/types';
67
- import { _convertMessagesToAnthropicPayload } from './utils/message_inputs';
67
+ import {
68
+ _convertMessagesToAnthropicPayload,
69
+ modelDisallowsAssistantPrefill,
70
+ stripUnsupportedAssistantPrefill,
71
+ } from './utils/message_inputs';
68
72
  import {
69
73
  _makeMessageChunkFromAnthropicEvent,
70
74
  getAnthropicUsageMetadata,
@@ -2637,6 +2641,61 @@ describe('Anthropic Reasoning with contentBlocks', () => {
2637
2641
  });
2638
2642
  });
2639
2643
 
2644
+ describe('Claude assistant prefill compatibility', () => {
2645
+ test.each([
2646
+ 'claude-sonnet-4-6',
2647
+ 'claude-sonnet-4-6@20260217',
2648
+ 'claude-opus-4-7',
2649
+ 'claude-opus-4-10',
2650
+ 'global.anthropic.claude-opus-4-6-v1:0',
2651
+ 'anthropic/claude-sonnet-4.6',
2652
+ 'anthropic/claude-sonnet-4.12',
2653
+ ])('detects %s as not supporting assistant prefill', (model) => {
2654
+ expect(modelDisallowsAssistantPrefill(model)).toBe(true);
2655
+ });
2656
+
2657
+ test.each([
2658
+ 'claude-sonnet-4-5-20250929',
2659
+ 'claude-opus-4-20250514',
2660
+ 'anthropic.claude-opus-4-20250514-v1:0',
2661
+ 'gpt-5.4',
2662
+ ])('leaves %s prefill support unchanged', (model) => {
2663
+ expect(modelDisallowsAssistantPrefill(model)).toBe(false);
2664
+ });
2665
+
2666
+ test('strips trailing assistant messages for Claude 4.6+ requests', () => {
2667
+ const request = {
2668
+ model: 'claude-opus-4-6',
2669
+ max_tokens: 100,
2670
+ messages: [
2671
+ { role: 'user' as const, content: 'What changed?' },
2672
+ { role: 'assistant' as const, content: 'Draft prefill' },
2673
+ { role: 'assistant' as const, content: 'Another prefill' },
2674
+ ],
2675
+ };
2676
+
2677
+ const sanitized = stripUnsupportedAssistantPrefill(request);
2678
+
2679
+ expect(sanitized).not.toBe(request);
2680
+ expect(sanitized.messages).toEqual([
2681
+ { role: 'user', content: 'What changed?' },
2682
+ ]);
2683
+ });
2684
+
2685
+ test('does not strip assistant messages for older Claude models', () => {
2686
+ const request = {
2687
+ model: 'claude-sonnet-4-5-20250929',
2688
+ max_tokens: 100,
2689
+ messages: [
2690
+ { role: 'user' as const, content: 'Write JSON only.' },
2691
+ { role: 'assistant' as const, content: '{' },
2692
+ ],
2693
+ };
2694
+
2695
+ expect(stripUnsupportedAssistantPrefill(request)).toBe(request);
2696
+ });
2697
+ });
2698
+
2640
2699
  const opus46Model = 'claude-opus-4-6';
2641
2700
 
2642
2701
  describe('Opus 4.6', () => {
@@ -49,6 +49,10 @@ type GoogleFunctionCallBlock = MessageContentComplex & {
49
49
  };
50
50
 
51
51
  const ANTHROPIC_EMPTY_TEXT_PLACEHOLDER = '_';
52
+ const CLAUDE_4_RELEASE_DATE_MODEL_PATTERN =
53
+ /claude-(?:opus|sonnet|haiku)-4-\d{8}(?:[-.@]|$)/i;
54
+ const CLAUDE_4_MINOR_MODEL_PATTERN =
55
+ /claude-(?:opus|sonnet|haiku)-4[-.](\d+)(?:[-.@]|$)/i;
52
56
 
53
57
  function _formatImage(imageUrl: string) {
54
58
  const parsed = parseBase64DataUrl({ dataUrl: imageUrl });
@@ -796,6 +800,48 @@ export function _convertMessagesToAnthropicPayload(
796
800
  } as AnthropicMessageCreateParams;
797
801
  }
798
802
 
803
+ export function modelDisallowsAssistantPrefill(model?: string): boolean {
804
+ const modelId = model ?? '';
805
+ if (CLAUDE_4_RELEASE_DATE_MODEL_PATTERN.test(modelId)) {
806
+ return false;
807
+ }
808
+
809
+ const match = CLAUDE_4_MINOR_MODEL_PATTERN.exec(modelId);
810
+ if (!match) {
811
+ return false;
812
+ }
813
+ return Number(match[1]) >= 6;
814
+ }
815
+
816
+ export function stripUnsupportedAssistantPrefill<
817
+ T extends Pick<AnthropicMessageCreateParams, 'messages'> & { model?: string },
818
+ >(request: T): T {
819
+ if (!modelDisallowsAssistantPrefill(request.model)) {
820
+ return request;
821
+ }
822
+
823
+ const messages = request.messages;
824
+ if (
825
+ messages.length <= 1 ||
826
+ messages[messages.length - 1]?.role !== 'assistant'
827
+ ) {
828
+ return request;
829
+ }
830
+
831
+ const nextMessages = [...messages];
832
+ while (
833
+ nextMessages.length > 1 &&
834
+ nextMessages[nextMessages.length - 1]?.role === 'assistant'
835
+ ) {
836
+ nextMessages.pop();
837
+ }
838
+
839
+ return {
840
+ ...request,
841
+ messages: nextMessages,
842
+ };
843
+ }
844
+
799
845
  function mergeMessages(messages: AnthropicMessageCreateParams['messages']) {
800
846
  if (messages.length <= 1) {
801
847
  return messages;
@@ -4,6 +4,11 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
4
4
  import { tool, DynamicStructuredTool } from '@langchain/core/tools';
5
5
  import type * as t from '@/types';
6
6
  import {
7
+ BASH_SHELL_GUIDANCE,
8
+ CODE_ARTIFACT_PATH_GUIDANCE,
9
+ appendFailedExecutionFileReminder,
10
+ appendTmpScratchReminder,
11
+ appendCodeSessionFileSummary,
7
12
  emptyOutputMessage,
8
13
  buildCodeApiHttpErrorMessage,
9
14
  getCodeBaseURL,
@@ -23,8 +28,9 @@ export const BashExecutionToolSchema = {
23
28
  type: 'string',
24
29
  description: `The bash command or script to execute.
25
30
  - The environment is stateless; variables and state don't persist between executions.
26
- - Generated files from previous executions are automatically available in "/mnt/data/".
27
- - Files from previous executions are automatically available and can be modified in place.
31
+ - Prior /mnt/data files are available and can be modified in place.
32
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
33
+ - ${BASH_SHELL_GUIDANCE}
28
34
  - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
29
35
  - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
30
36
  - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -46,6 +52,8 @@ Runs bash commands and returns stdout/stderr output from a stateless execution e
46
52
  Usage:
47
53
  - No network access available.
48
54
  - Generated files are automatically delivered; **DO NOT** provide download links.
55
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
56
+ - ${BASH_SHELL_GUIDANCE}
49
57
  - NEVER use this tool to execute malicious commands.
50
58
  `.trim();
51
59
 
@@ -105,7 +113,7 @@ export const BashExecutionToolDefinition = {
105
113
  } as const;
106
114
 
107
115
  function createBashExecutionTool(
108
- params: t.BashExecutionToolParams = {}
116
+ params: t.BashExecutionToolParams | null = {}
109
117
  ): DynamicStructuredTool {
110
118
  return tool(
111
119
  async (rawInput, config) => {
@@ -166,11 +174,6 @@ function createBashExecutionTool(
166
174
  }
167
175
 
168
176
  const result: t.ExecuteResult = await response.json();
169
- /* See `CodeExecutor.ts` — file listings were removed from the
170
- * LLM-facing tool result. Bash especially benefits: models
171
- * naturally `ls /mnt/data/` to discover what's available
172
- * rather than relying on a prescriptive summary that
173
- * misleads as often as it helps. */
174
177
  let formattedOutput = '';
175
178
  if (result.stdout) {
176
179
  formattedOutput += `stdout:\n${result.stdout}\n`;
@@ -179,9 +182,13 @@ function createBashExecutionTool(
179
182
  }
180
183
  if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
181
184
 
185
+ const outputWithReminder = appendTmpScratchReminder(
186
+ formattedOutput,
187
+ command
188
+ );
182
189
  const hasFiles = result.files != null && result.files.length > 0;
183
190
  return [
184
- formattedOutput.trim(),
191
+ appendCodeSessionFileSummary(outputWithReminder, result.files),
185
192
  (hasFiles
186
193
  ? { session_id: result.session_id, files: result.files }
187
194
  : {
@@ -189,8 +196,12 @@ function createBashExecutionTool(
189
196
  }) satisfies t.CodeExecutionArtifact,
190
197
  ];
191
198
  } catch (error) {
199
+ const messageWithReminder = appendFailedExecutionFileReminder(
200
+ (error as Error | undefined)?.message ?? '',
201
+ command
202
+ );
192
203
  throw new Error(
193
- `Execution error:\n\n${(error as Error | undefined)?.message}`
204
+ `Execution error:\n\n${messageWithReminder}`
194
205
  );
195
206
  }
196
207
  },
@@ -8,7 +8,12 @@ import {
8
8
  executeTools,
9
9
  formatCompletedResponse,
10
10
  } from './ProgrammaticToolCalling';
11
- import { getCodeBaseURL } from './CodeExecutor';
11
+ import {
12
+ BASH_SHELL_GUIDANCE,
13
+ CODE_ARTIFACT_PATH_GUIDANCE,
14
+ appendFailedExecutionFileReminder,
15
+ getCodeBaseURL,
16
+ } from './CodeExecutor';
12
17
  import {
13
18
  clampCodeApiRunTimeoutMs,
14
19
  createCodeApiRunTimeoutSchema,
@@ -62,11 +67,14 @@ You MUST complete your entire workflow in ONE code block.
62
67
  DO NOT split work across multiple calls expecting to reuse variables.`;
63
68
 
64
69
  const CORE_RULES = `Rules:
65
- - EVERYTHING in one call—no state persists between executions
70
+ - One call: state does not persist
66
71
  - Tools are pre-defined as bash functions—DO NOT redefine them
67
72
  - Each tool function accepts a JSON string argument
73
+ - Save tool output with raw=$(tool '{}'); printf '%s\n' "$raw" > /mnt/data/file.json; direct tool > file may be empty
74
+ - jq: use fromjson? // . on saved tool stdout and again on JSON-string fields; check types since arrays may contain strings
68
75
  - Only echo/printf output returns to the model
69
- - Generated files are automatically available in /mnt/data/ for subsequent executions
76
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
77
+ - ${BASH_SHELL_GUIDANCE}
70
78
  - timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;
71
79
 
72
80
  const ADDITIONAL_RULES =
@@ -78,11 +86,11 @@ const EXAMPLES = `Example (Complete workflow in one call):
78
86
  echo "$data" | jq '.[] | .name'
79
87
 
80
88
  Example (Parallel calls):
81
- web_search '{"query": "SF weather"}' > /tmp/sf.txt &
82
- web_search '{"query": "NY weather"}' > /tmp/ny.txt &
89
+ { sf=$(web_search '{"query": "SF weather"}'); printf '%s\n' "$sf" > /mnt/data/sf.json; } &
90
+ { ny=$(web_search '{"query": "NY weather"}'); printf '%s\n' "$ny" > /mnt/data/ny.json; } &
83
91
  wait
84
- echo "SF: $(cat /tmp/sf.txt)"
85
- echo "NY: $(cat /tmp/ny.txt)"`;
92
+ echo "SF: $(jq -r . /mnt/data/sf.json)"
93
+ echo "NY: $(jq -r . /mnt/data/ny.json)"`;
86
94
 
87
95
  const CODE_PARAM_DESCRIPTION = `Bash code that calls tools programmatically. Tools are available as bash functions.
88
96
 
@@ -369,7 +377,7 @@ export function createBashProgrammaticToolCallingTool(
369
377
  // ====================================================================
370
378
 
371
379
  if (response.status === 'completed') {
372
- return formatCompletedResponse(response);
380
+ return formatCompletedResponse(response, code);
373
381
  }
374
382
 
375
383
  if (response.status === 'error') {
@@ -383,8 +391,12 @@ export function createBashProgrammaticToolCallingTool(
383
391
 
384
392
  throw new Error(`Unexpected response status: ${response.status}`);
385
393
  } catch (error) {
394
+ const messageWithReminder = appendFailedExecutionFileReminder(
395
+ (error as Error).message,
396
+ code
397
+ );
386
398
  throw new Error(
387
- `Bash programmatic execution failed: ${(error as Error).message}`
399
+ `Bash programmatic execution failed: ${messageWithReminder}`
388
400
  );
389
401
  }
390
402
  },
@@ -4,8 +4,14 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
4
4
  import { tool, DynamicStructuredTool } from '@langchain/core/tools';
5
5
  import { getEnvironmentVariable } from '@langchain/core/utils/env';
6
6
  import type * as t from '@/types';
7
+ import { appendCodeSessionFileSummary } from '@/tools/CodeSessionFileSummary';
7
8
  import { EnvVar, Constants } from '@/common';
8
9
 
10
+ export {
11
+ appendCodeSessionFileSummary,
12
+ stripCodeSessionFileSummary,
13
+ } from '@/tools/CodeSessionFileSummary';
14
+
9
15
  config();
10
16
 
11
17
  export const getCodeBaseURL = (): string =>
@@ -15,6 +21,41 @@ export const getCodeBaseURL = (): string =>
15
21
  export const emptyOutputMessage =
16
22
  'stdout: Empty. Ensure you\'re writing output explicitly.\n';
17
23
 
24
+ export const CODE_ARTIFACT_PATH_GUIDANCE =
25
+ 'Persist handoff artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); failed executions do not register new files; `/tmp` and odd extensions are same-call scratch only, not later-call storage.';
26
+
27
+ export const BASH_SHELL_GUIDANCE =
28
+ 'Bash: multi-line files use heredoc/printf; run Python via python3 -c/heredoc, not bare Python.';
29
+
30
+ const TMP_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/tmp(?:\/|\b)/;
31
+ const MNT_DATA_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/mnt\/data(?:\/|\b)/;
32
+
33
+ export const TMP_SCRATCH_OUTPUT_REMINDER =
34
+ 'Note: /tmp files are same-call scratch only and were not persisted; use /mnt/data for files needed later.';
35
+
36
+ export const FAILED_EXECUTION_FILE_REMINDER =
37
+ 'Note: any files written during this failed call were not registered for later calls; fix the error and rerun before relying on them.';
38
+
39
+ export function appendTmpScratchReminder(output: string, code: string): string {
40
+ if (!TMP_PATH_PATTERN.test(code)) {
41
+ return output;
42
+ }
43
+ return `${output.trimEnd()}\n${TMP_SCRATCH_OUTPUT_REMINDER}\n`;
44
+ }
45
+
46
+ export function appendFailedExecutionFileReminder(
47
+ output: string,
48
+ code: string
49
+ ): string {
50
+ if (
51
+ !MNT_DATA_PATH_PATTERN.test(code) ||
52
+ output.includes(FAILED_EXECUTION_FILE_REMINDER)
53
+ ) {
54
+ return output;
55
+ }
56
+ return `${output.trimEnd()}\n${FAILED_EXECUTION_FILE_REMINDER}\n`;
57
+ }
58
+
18
59
  const SUPPORTED_LANGUAGES = [
19
60
  'py',
20
61
  'js',
@@ -44,8 +85,8 @@ export const CodeExecutionToolSchema = {
44
85
  type: 'string',
45
86
  description: `The complete, self-contained code to execute, without any truncation or minimization.
46
87
  - The environment is stateless; variables and imports don't persist between executions.
47
- - Generated files from previous executions are automatically available in "/mnt/data/".
48
- - Files from previous executions are automatically available and can be modified in place.
88
+ - Prior /mnt/data files are available and can be modified in place.
89
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
49
90
  - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
50
91
  - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
51
92
  - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -104,6 +145,7 @@ Runs code and returns stdout/stderr output from a stateless execution environmen
104
145
  Usage:
105
146
  - No network access available.
106
147
  - Generated files are automatically delivered; **DO NOT** provide download links.
148
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
107
149
  - NEVER use this tool to execute malicious code.
108
150
  `.trim();
109
151
 
@@ -116,7 +158,7 @@ export const CodeExecutionToolDefinition = {
116
158
  } as const;
117
159
 
118
160
  function createCodeExecutionTool(
119
- params: t.CodeExecutionToolParams = {}
161
+ params: t.CodeExecutionToolParams | null = {}
120
162
  ): DynamicStructuredTool {
121
163
  return tool(
122
164
  async (rawInput, config) => {
@@ -187,13 +229,6 @@ function createCodeExecutionTool(
187
229
  }
188
230
 
189
231
  const result: t.ExecuteResult = await response.json();
190
- /* Output is stdout/stderr only — file listings were removed
191
- * because the LLM-facing summary (split inherited/generated
192
- * with prescriptive notes) caused more confusion than help,
193
- * especially for bash where models naturally explore
194
- * `/mnt/data/` themselves. The artifact still carries every
195
- * file so the host's session map stays in sync; the LLM
196
- * doesn't see them in the tool result text. */
197
232
  let formattedOutput = '';
198
233
  if (result.stdout) {
199
234
  formattedOutput += `stdout:\n${result.stdout}\n`;
@@ -202,9 +237,13 @@ function createCodeExecutionTool(
202
237
  }
203
238
  if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
204
239
 
240
+ const outputWithReminder = appendTmpScratchReminder(
241
+ formattedOutput,
242
+ code
243
+ );
205
244
  const hasFiles = result.files != null && result.files.length > 0;
206
245
  return [
207
- formattedOutput.trim(),
246
+ appendCodeSessionFileSummary(outputWithReminder, result.files),
208
247
  (hasFiles
209
248
  ? { session_id: result.session_id, files: result.files }
210
249
  : {
@@ -212,8 +251,12 @@ function createCodeExecutionTool(
212
251
  }) satisfies t.CodeExecutionArtifact,
213
252
  ];
214
253
  } catch (error) {
254
+ const messageWithReminder = appendFailedExecutionFileReminder(
255
+ (error as Error | undefined)?.message ?? '',
256
+ code
257
+ );
215
258
  throw new Error(
216
- `Execution error:\n\n${(error as Error | undefined)?.message}`
259
+ `Execution error:\n\n${messageWithReminder}`
217
260
  );
218
261
  }
219
262
  },
@@ -0,0 +1,80 @@
1
+ import type * as t from '@/types';
2
+
3
+ const IMAGE_FILE_EXTENSIONS = new Set([
4
+ '.avif',
5
+ '.bmp',
6
+ '.gif',
7
+ '.ico',
8
+ '.jpeg',
9
+ '.jpg',
10
+ '.png',
11
+ '.tif',
12
+ '.tiff',
13
+ '.webp',
14
+ ]);
15
+
16
+ const CODE_SESSION_FILE_SUMMARY_PATTERN =
17
+ /^Generated files:\nSession files: \d+ persisted file\(s\) are available in \/mnt\/data, including \d+ image\(s\)\. Use known \/mnt\/data paths directly in later code-tool calls\. The app displays files\/images automatically; do not invent download links or wrap generated images in Markdown\.$/;
18
+
19
+ function getFileExtension(name: string): string {
20
+ const lastSlash = name.lastIndexOf('/');
21
+ const basename = lastSlash >= 0 ? name.slice(lastSlash + 1) : name;
22
+ const lastDot = basename.lastIndexOf('.');
23
+ return lastDot >= 0 ? basename.slice(lastDot).toLowerCase() : '';
24
+ }
25
+
26
+ function isImageFile(file: Partial<t.FileRef> | null | undefined): boolean {
27
+ const name = file?.name;
28
+ return (
29
+ typeof name === 'string' &&
30
+ IMAGE_FILE_EXTENSIONS.has(getFileExtension(name))
31
+ );
32
+ }
33
+
34
+ function buildCodeSessionFileSummary(
35
+ fileCount: number,
36
+ imageCount: number
37
+ ): string {
38
+ return (
39
+ 'Generated files:\n' +
40
+ `Session files: ${fileCount} persisted file(s) are available in /mnt/data, including ${imageCount} image(s). ` +
41
+ 'Use known /mnt/data paths directly in later code-tool calls. ' +
42
+ 'The app displays files/images automatically; do not invent download links or wrap generated images in Markdown.'
43
+ );
44
+ }
45
+
46
+ function isGeneratedFile(file: Partial<t.FileRef> | null | undefined): boolean {
47
+ return file?.inherited !== true;
48
+ }
49
+
50
+ export function stripCodeSessionFileSummary(output: string): string {
51
+ const summaryStart = output.lastIndexOf('Generated files:');
52
+ if (summaryStart < 0) return output;
53
+ const beforeSummary = output.slice(0, summaryStart);
54
+ if (beforeSummary !== '' && !beforeSummary.endsWith('\n\n')) return output;
55
+ const maybeSummary = output.slice(summaryStart);
56
+ if (!CODE_SESSION_FILE_SUMMARY_PATTERN.test(maybeSummary)) return output;
57
+ return beforeSummary.trimEnd();
58
+ }
59
+
60
+ export function appendCodeSessionFileSummary(
61
+ output: string,
62
+ files: t.FileRefs | undefined
63
+ ): string {
64
+ if (files == null || files.length === 0) {
65
+ return output.trim();
66
+ }
67
+
68
+ const generatedFiles = files.filter(isGeneratedFile);
69
+ if (generatedFiles.length === 0) {
70
+ return output.trim();
71
+ }
72
+
73
+ const imageCount = generatedFiles.filter(isImageFile).length;
74
+ const summary = buildCodeSessionFileSummary(
75
+ generatedFiles.length,
76
+ imageCount
77
+ );
78
+
79
+ return `${output.trimEnd()}\n\n${summary}`.trim();
80
+ }
@@ -7,9 +7,13 @@ import type { ToolCall } from '@langchain/core/messages/tool';
7
7
  import type { ProgrammaticToolCallingJsonSchema } from './ptcTimeout';
8
8
  import type * as t from '@/types';
9
9
  import {
10
+ CODE_ARTIFACT_PATH_GUIDANCE,
11
+ appendCodeSessionFileSummary,
12
+ appendFailedExecutionFileReminder,
10
13
  buildCodeApiHttpErrorMessage,
11
14
  emptyOutputMessage,
12
15
  getCodeBaseURL,
16
+ appendTmpScratchReminder,
13
17
  resolveCodeApiAuthHeaders,
14
18
  } from './CodeExecutor';
15
19
  import {
@@ -36,15 +40,17 @@ You MUST complete your entire workflow in ONE code block: query → process →
36
40
  DO NOT split work across multiple calls expecting to reuse variables.`;
37
41
 
38
42
  const CORE_RULES = `Rules:
39
- - EVERYTHING in one call—no state persists between executions
40
- - Just write code with await—auto-wrapped in async context
41
- - DO NOT define async def main() or call asyncio.run()
43
+ - One call: state does not persist
44
+ - Auto-wrapped async; use await, no main()/asyncio.run()
42
45
  - Tools are pre-defined—DO NOT write function definitions
46
+ - Call tools with keyword args only (await tool(arg=value), never pass a dict)
47
+ - Tool results are decoded Python values (dict/list/str)
43
48
  - Only print() output returns to the model
49
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
44
50
  - timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;
45
51
 
46
- const ADDITIONAL_RULES = `- Generated files are automatically available in /mnt/data/ for subsequent executions
47
- - Tool names normalized: hyphens→underscores, keywords get \`_tool\` suffix`;
52
+ const ADDITIONAL_RULES =
53
+ '- Tool names normalized: hyphens→underscores, keywords get `_tool` suffix';
48
54
 
49
55
  const EXAMPLES = `Example (Complete workflow in one call):
50
56
  # Query data
@@ -678,15 +684,16 @@ export async function executeTools(
678
684
  /**
679
685
  * Formats the completed response for the agent.
680
686
  *
681
- * Output is stdout/stderr only see `CodeExecutor.ts`. The
682
- * artifact still carries every file so the host's session map
683
- * stays in sync; the LLM doesn't see them in the tool result text.
687
+ * Output includes stdout/stderr plus a compact session-file summary
688
+ * when artifacts were persisted. The artifact still carries every
689
+ * file so the host's session map stays in sync.
684
690
  *
685
691
  * @param response - The completed API response
686
692
  * @returns Tuple of [formatted string, artifact]
687
693
  */
688
694
  export function formatCompletedResponse(
689
- response: t.ProgrammaticExecutionResponse
695
+ response: t.ProgrammaticExecutionResponse,
696
+ sourceCode = ''
690
697
  ): [string, t.ProgrammaticExecutionArtifact] {
691
698
  let formatted = '';
692
699
 
@@ -700,8 +707,10 @@ export function formatCompletedResponse(
700
707
  formatted += `stderr:\n${response.stderr}\n`;
701
708
  }
702
709
 
710
+ const outputWithReminder = appendTmpScratchReminder(formatted, sourceCode);
711
+
703
712
  return [
704
- formatted.trim(),
713
+ appendCodeSessionFileSummary(outputWithReminder, response.files),
705
714
  {
706
715
  session_id: response.session_id,
707
716
  files: response.files,
@@ -859,7 +868,7 @@ export function createProgrammaticToolCallingTool(
859
868
  // ====================================================================
860
869
 
861
870
  if (response.status === 'completed') {
862
- return formatCompletedResponse(response);
871
+ return formatCompletedResponse(response, code);
863
872
  }
864
873
 
865
874
  if (response.status === 'error') {
@@ -873,8 +882,12 @@ export function createProgrammaticToolCallingTool(
873
882
 
874
883
  throw new Error(`Unexpected response status: ${response.status}`);
875
884
  } catch (error) {
885
+ const messageWithReminder = appendFailedExecutionFileReminder(
886
+ (error as Error).message,
887
+ code
888
+ );
876
889
  throw new Error(
877
- `Programmatic execution failed: ${(error as Error).message}`
890
+ `Programmatic execution failed: ${messageWithReminder}`
878
891
  );
879
892
  }
880
893
  },