@librechat/agents 3.1.88 → 3.1.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/dist/cjs/graphs/Graph.cjs +25 -1
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/hooks/executeHooks.cjs +14 -7
  4. package/dist/cjs/hooks/executeHooks.cjs.map +1 -1
  5. package/dist/cjs/llm/anthropic/index.cjs +8 -2
  6. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  7. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +34 -0
  8. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  9. package/dist/cjs/main.cjs +9 -0
  10. package/dist/cjs/main.cjs.map +1 -1
  11. package/dist/cjs/stream.cjs +115 -8
  12. package/dist/cjs/stream.cjs.map +1 -1
  13. package/dist/cjs/tools/BashExecutor.cjs +10 -9
  14. package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
  15. package/dist/cjs/tools/BashProgrammaticToolCalling.cjs +12 -8
  16. package/dist/cjs/tools/BashProgrammaticToolCalling.cjs.map +1 -1
  17. package/dist/cjs/tools/CodeExecutor.cjs +35 -11
  18. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  19. package/dist/cjs/tools/CodeSessionFileSummary.cjs +63 -0
  20. package/dist/cjs/tools/CodeSessionFileSummary.cjs.map +1 -0
  21. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +16 -12
  22. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  23. package/dist/cjs/tools/ToolNode.cjs +32 -12
  24. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  25. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +319 -29
  26. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  27. package/dist/cjs/tools/toolOutputReferences.cjs +8 -0
  28. package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
  29. package/dist/cjs/utils/events.cjs +3 -1
  30. package/dist/cjs/utils/events.cjs.map +1 -1
  31. package/dist/esm/graphs/Graph.mjs +25 -1
  32. package/dist/esm/graphs/Graph.mjs.map +1 -1
  33. package/dist/esm/hooks/executeHooks.mjs +14 -7
  34. package/dist/esm/hooks/executeHooks.mjs.map +1 -1
  35. package/dist/esm/llm/anthropic/index.mjs +9 -3
  36. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  37. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +33 -1
  38. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  39. package/dist/esm/main.mjs +2 -1
  40. package/dist/esm/main.mjs.map +1 -1
  41. package/dist/esm/stream.mjs +115 -8
  42. package/dist/esm/stream.mjs.map +1 -1
  43. package/dist/esm/tools/BashExecutor.mjs +11 -10
  44. package/dist/esm/tools/BashExecutor.mjs.map +1 -1
  45. package/dist/esm/tools/BashProgrammaticToolCalling.mjs +13 -9
  46. package/dist/esm/tools/BashProgrammaticToolCalling.mjs.map +1 -1
  47. package/dist/esm/tools/CodeExecutor.mjs +29 -12
  48. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  49. package/dist/esm/tools/CodeSessionFileSummary.mjs +60 -0
  50. package/dist/esm/tools/CodeSessionFileSummary.mjs.map +1 -0
  51. package/dist/esm/tools/ProgrammaticToolCalling.mjs +17 -13
  52. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  53. package/dist/esm/tools/ToolNode.mjs +32 -12
  54. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  55. package/dist/esm/tools/subagent/SubagentExecutor.mjs +320 -31
  56. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  57. package/dist/esm/tools/toolOutputReferences.mjs +8 -1
  58. package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
  59. package/dist/esm/utils/events.mjs +3 -1
  60. package/dist/esm/utils/events.mjs.map +1 -1
  61. package/dist/types/graphs/Graph.d.ts +8 -0
  62. package/dist/types/llm/anthropic/index.d.ts +3 -1
  63. package/dist/types/llm/anthropic/utils/message_inputs.d.ts +4 -0
  64. package/dist/types/tools/BashExecutor.d.ts +3 -3
  65. package/dist/types/tools/CodeExecutor.d.ts +10 -3
  66. package/dist/types/tools/CodeSessionFileSummary.d.ts +3 -0
  67. package/dist/types/tools/ProgrammaticToolCalling.d.ts +4 -4
  68. package/dist/types/tools/subagent/SubagentExecutor.d.ts +8 -5
  69. package/dist/types/types/tools.d.ts +11 -3
  70. package/dist/types/utils/events.d.ts +1 -1
  71. package/package.json +1 -1
  72. package/src/__tests__/stream.eagerEventExecution.test.ts +1073 -221
  73. package/src/graphs/Graph.ts +27 -5
  74. package/src/hooks/__tests__/executeHooks.test.ts +38 -0
  75. package/src/hooks/executeHooks.ts +27 -7
  76. package/src/llm/anthropic/index.ts +27 -3
  77. package/src/llm/anthropic/llm.spec.ts +60 -1
  78. package/src/llm/anthropic/utils/message_inputs.ts +46 -0
  79. package/src/specs/subagent.test.ts +87 -1
  80. package/src/stream.ts +163 -12
  81. package/src/tools/BashExecutor.ts +21 -10
  82. package/src/tools/BashProgrammaticToolCalling.ts +21 -9
  83. package/src/tools/CodeExecutor.ts +55 -12
  84. package/src/tools/CodeSessionFileSummary.ts +80 -0
  85. package/src/tools/ProgrammaticToolCalling.ts +25 -12
  86. package/src/tools/ToolNode.ts +142 -116
  87. package/src/tools/__tests__/BashExecutor.test.ts +9 -0
  88. package/src/tools/__tests__/CodeApiAuthHeaders.test.ts +43 -0
  89. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +100 -16
  90. package/src/tools/__tests__/SubagentExecutor.test.ts +540 -6
  91. package/src/tools/__tests__/ToolNode.eagerEventExecution.test.ts +278 -14
  92. package/src/tools/__tests__/ToolNode.outputReferences.test.ts +52 -0
  93. package/src/tools/__tests__/subagentHooks.test.ts +237 -0
  94. package/src/tools/subagent/SubagentExecutor.ts +514 -36
  95. package/src/types/tools.ts +11 -3
  96. package/src/utils/events.ts +4 -2
package/src/stream.ts CHANGED
@@ -27,11 +27,16 @@ import {
27
27
  coerceRecordArgs,
28
28
  normalizeError,
29
29
  } from '@/tools/eagerEventExecution';
30
+ import {
31
+ calculateMaxToolResultChars,
32
+ truncateToolResultContent,
33
+ } from '@/utils/truncation';
30
34
  import {
31
35
  getStreamedToolCallSeal,
32
36
  getStreamedToolCallAdapter,
33
37
  type StreamedToolCallSeal,
34
38
  } from '@/tools/streamedToolCallSeals';
39
+ import { TOOL_OUTPUT_REF_PATTERN } from '@/tools/toolOutputReferences';
35
40
 
36
41
  const LOCAL_CODING_BUNDLE_NAME_SET: ReadonlySet<string> = new Set(
37
42
  LOCAL_CODING_BUNDLE_NAMES
@@ -98,11 +103,22 @@ function getNonEmptyValue(possibleValues: string[]): string | undefined {
98
103
  }
99
104
 
100
105
  function isBatchSensitiveToolExecution(graph: StandardGraph): boolean {
101
- return (
102
- graph.hookRegistry != null ||
103
- graph.humanInTheLoop?.enabled === true ||
104
- graph.toolOutputReferences?.enabled === true
105
- );
106
+ return graph.hookRegistry != null || graph.humanInTheLoop?.enabled === true;
107
+ }
108
+
109
+ function hasToolOutputReference(value: unknown): boolean {
110
+ if (typeof value === 'string') {
111
+ return TOOL_OUTPUT_REF_PATTERN.test(value);
112
+ }
113
+ if (Array.isArray(value)) {
114
+ return value.some((item) => hasToolOutputReference(item));
115
+ }
116
+ if (value !== null && typeof value === 'object') {
117
+ return Object.values(value as Record<string, unknown>).some((item) =>
118
+ hasToolOutputReference(item)
119
+ );
120
+ }
121
+ return false;
106
122
  }
107
123
 
108
124
  function isDirectGraphTool(
@@ -194,7 +210,10 @@ function isEagerToolExecutionEnabledForBatch(args: {
194
210
  ) {
195
211
  return false;
196
212
  }
197
- if (graph.handlerRegistry?.getHandler(GraphEvents.ON_TOOL_EXECUTE) == null) {
213
+ if (
214
+ graph.handlerRegistry?.getHandler(GraphEvents.ON_TOOL_EXECUTE) == null &&
215
+ graph.eventToolExecutionAvailable !== true
216
+ ) {
198
217
  return false;
199
218
  }
200
219
  return true;
@@ -257,13 +276,49 @@ function hasPotentialDirectToolInStreamContext(args: {
257
276
  if ((agentContext?.graphTools?.length ?? 0) > 0) {
258
277
  return true;
259
278
  }
279
+ return false;
280
+ }
281
+
282
+ function hasDirectToolCallChunkInBatch(args: {
283
+ graph: StandardGraph;
284
+ agentContext?: AgentContext;
285
+ toolCallChunks?: ToolCallChunk[];
286
+ }): boolean {
287
+ const { graph, agentContext, toolCallChunks } = args;
260
288
  return (
261
- agentContext?.toolDefinitions?.some((toolDefinition) =>
262
- toolDefinition.name.startsWith(Constants.LC_TRANSFER_TO_)
289
+ toolCallChunks?.some(
290
+ (toolCallChunk) =>
291
+ toolCallChunk.name != null &&
292
+ toolCallChunk.name !== '' &&
293
+ (isDirectGraphTool(toolCallChunk.name, agentContext) ||
294
+ isDirectLocalTool(toolCallChunk.name, graph))
263
295
  ) === true
264
296
  );
265
297
  }
266
298
 
299
+ function hasDirectToolCallChunkStateInStep(args: {
300
+ graph: StandardGraph;
301
+ agentContext?: AgentContext;
302
+ stepKey: string;
303
+ }): boolean {
304
+ const { graph, agentContext, stepKey } = args;
305
+ const prefix = `${stepKey}\u0000`;
306
+ for (const [key, state] of graph.eagerEventToolCallChunks) {
307
+ if (!key.startsWith(prefix)) {
308
+ continue;
309
+ }
310
+ const name = state.name;
311
+ if (
312
+ name != null &&
313
+ name !== '' &&
314
+ (isDirectGraphTool(name, agentContext) || isDirectLocalTool(name, graph))
315
+ ) {
316
+ return true;
317
+ }
318
+ }
319
+ return false;
320
+ }
321
+
267
322
  type EagerToolExecutionEntry = {
268
323
  id: string;
269
324
  toolName: string;
@@ -298,6 +353,12 @@ function createEagerToolExecutionPlan(args: {
298
353
  if (hasDirectToolCallInBatch({ graph, agentContext, toolCalls })) {
299
354
  return undefined;
300
355
  }
356
+ if (
357
+ graph.toolOutputReferences?.enabled === true &&
358
+ toolCalls.some((toolCall) => hasToolOutputReference(toolCall.args))
359
+ ) {
360
+ return undefined;
361
+ }
301
362
 
302
363
  const candidateToolCalls = skipExisting
303
364
  ? toolCalls.filter((toolCall) => {
@@ -369,6 +430,7 @@ function startEagerToolExecutions(args: {
369
430
  return;
370
431
  }
371
432
 
433
+ const records: t.EagerEventToolExecution[] = [];
372
434
  const promise: Promise<t.EagerEventToolExecutionOutcome> = new Promise<
373
435
  t.ToolExecuteResult[]
374
436
  >((resolve, reject) => {
@@ -407,20 +469,104 @@ function startEagerToolExecutions(args: {
407
469
  })
408
470
  .catch(reject);
409
471
  }).then(
410
- (results): t.EagerEventToolExecutionOutcome => ({ results }),
472
+ async (results): Promise<t.EagerEventToolExecutionOutcome> => {
473
+ await dispatchEagerToolCompletions({
474
+ graph,
475
+ agentContext,
476
+ records,
477
+ results,
478
+ });
479
+ return { results };
480
+ },
411
481
  (error): t.EagerEventToolExecutionOutcome => ({
412
482
  error: normalizeError(error),
413
483
  })
414
484
  );
415
485
 
416
486
  for (const entry of entries) {
417
- graph.eagerEventToolExecutions.set(entry.id, {
487
+ const record: t.EagerEventToolExecution = {
418
488
  toolCallId: entry.id,
419
489
  toolName: entry.toolName,
420
490
  args: entry.coercedArgs,
421
491
  request: entry.request,
422
492
  promise,
423
- });
493
+ };
494
+ records.push(record);
495
+ graph.eagerEventToolExecutions.set(entry.id, record);
496
+ }
497
+ }
498
+
499
+ async function dispatchEagerToolCompletions(args: {
500
+ graph: StandardGraph;
501
+ agentContext?: AgentContext;
502
+ records: t.EagerEventToolExecution[];
503
+ results: t.ToolExecuteResult[];
504
+ }): Promise<void> {
505
+ const { graph, agentContext, records, results } = args;
506
+ const recordById = new Map(
507
+ records.map((record) => [record.toolCallId, record])
508
+ );
509
+ const maxToolResultChars =
510
+ agentContext?.maxToolResultChars ??
511
+ calculateMaxToolResultChars(agentContext?.maxContextTokens);
512
+
513
+ for (const result of results) {
514
+ const record = recordById.get(result.toolCallId);
515
+ if (record == null) {
516
+ continue;
517
+ }
518
+ if (graph.eagerEventToolExecutions.get(result.toolCallId) !== record) {
519
+ continue;
520
+ }
521
+ const stepId =
522
+ record.request.stepId ??
523
+ graph.toolCallStepIds.get(result.toolCallId) ??
524
+ '';
525
+ if (stepId === '') {
526
+ continue;
527
+ }
528
+ const output =
529
+ result.status === 'error'
530
+ ? `Error: ${result.errorMessage ?? 'Unknown error'}\n Please fix your mistakes.`
531
+ : truncateToolResultContent(
532
+ typeof result.content === 'string'
533
+ ? result.content
534
+ : JSON.stringify(result.content),
535
+ maxToolResultChars
536
+ );
537
+
538
+ try {
539
+ const dispatched = await safeDispatchCustomEvent(
540
+ GraphEvents.ON_RUN_STEP_COMPLETED,
541
+ {
542
+ result: {
543
+ id: stepId,
544
+ index: record.request.turn ?? 0,
545
+ type: 'tool_call' as const,
546
+ eager: true,
547
+ tool_call: {
548
+ args: JSON.stringify(record.request.args),
549
+ name: record.toolName,
550
+ id: result.toolCallId,
551
+ output,
552
+ progress: 1,
553
+ } as t.ProcessedToolCall,
554
+ },
555
+ },
556
+ graph.config
557
+ );
558
+ if (dispatched === false) {
559
+ continue;
560
+ }
561
+ record.completionDispatched = true;
562
+ } catch (error) {
563
+ // Let ToolNode dispatch the completion through the normal path later.
564
+
565
+ console.warn(
566
+ `[stream] eager completion dispatch failed for toolCallId=${result.toolCallId}:`,
567
+ error instanceof Error ? error.message : error
568
+ );
569
+ }
424
570
  }
425
571
  }
426
572
 
@@ -699,6 +845,8 @@ function startReadyStreamedEagerToolExecutions(args: {
699
845
  } = args;
700
846
  if (
701
847
  hasPotentialDirectToolInStreamContext({ graph, agentContext }) ||
848
+ hasDirectToolCallChunkInBatch({ graph, agentContext, toolCallChunks }) ||
849
+ hasDirectToolCallChunkStateInStep({ graph, agentContext, stepKey }) ||
702
850
  !isEagerToolExecutionEnabledForBatch({ graph, metadata, agentContext })
703
851
  ) {
704
852
  return;
@@ -1265,9 +1413,12 @@ export function createContentAggregator(): t.ContentAggregatorResult {
1265
1413
 
1266
1414
  const existingContent = contentParts[index] as
1267
1415
  | (Omit<t.ToolCallContent, 'tool_call'> & {
1268
- tool_call?: t.ToolCallPart;
1416
+ tool_call?: t.ToolCallPart & t.PartMetadata;
1269
1417
  })
1270
1418
  | undefined;
1419
+ if (!finalUpdate && existingContent?.tool_call?.progress === 1) {
1420
+ return;
1421
+ }
1271
1422
 
1272
1423
  /** When args are a valid object, they are likely already invoked */
1273
1424
  let args =
@@ -4,6 +4,11 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
4
4
  import { tool, DynamicStructuredTool } from '@langchain/core/tools';
5
5
  import type * as t from '@/types';
6
6
  import {
7
+ BASH_SHELL_GUIDANCE,
8
+ CODE_ARTIFACT_PATH_GUIDANCE,
9
+ appendFailedExecutionFileReminder,
10
+ appendTmpScratchReminder,
11
+ appendCodeSessionFileSummary,
7
12
  emptyOutputMessage,
8
13
  buildCodeApiHttpErrorMessage,
9
14
  getCodeBaseURL,
@@ -23,8 +28,9 @@ export const BashExecutionToolSchema = {
23
28
  type: 'string',
24
29
  description: `The bash command or script to execute.
25
30
  - The environment is stateless; variables and state don't persist between executions.
26
- - Generated files from previous executions are automatically available in "/mnt/data/".
27
- - Files from previous executions are automatically available and can be modified in place.
31
+ - Prior /mnt/data files are available and can be modified in place.
32
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
33
+ - ${BASH_SHELL_GUIDANCE}
28
34
  - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
29
35
  - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
30
36
  - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -46,6 +52,8 @@ Runs bash commands and returns stdout/stderr output from a stateless execution e
46
52
  Usage:
47
53
  - No network access available.
48
54
  - Generated files are automatically delivered; **DO NOT** provide download links.
55
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
56
+ - ${BASH_SHELL_GUIDANCE}
49
57
  - NEVER use this tool to execute malicious commands.
50
58
  `.trim();
51
59
 
@@ -105,7 +113,7 @@ export const BashExecutionToolDefinition = {
105
113
  } as const;
106
114
 
107
115
  function createBashExecutionTool(
108
- params: t.BashExecutionToolParams = {}
116
+ params: t.BashExecutionToolParams | null = {}
109
117
  ): DynamicStructuredTool {
110
118
  return tool(
111
119
  async (rawInput, config) => {
@@ -166,11 +174,6 @@ function createBashExecutionTool(
166
174
  }
167
175
 
168
176
  const result: t.ExecuteResult = await response.json();
169
- /* See `CodeExecutor.ts` — file listings were removed from the
170
- * LLM-facing tool result. Bash especially benefits: models
171
- * naturally `ls /mnt/data/` to discover what's available
172
- * rather than relying on a prescriptive summary that
173
- * misleads as often as it helps. */
174
177
  let formattedOutput = '';
175
178
  if (result.stdout) {
176
179
  formattedOutput += `stdout:\n${result.stdout}\n`;
@@ -179,9 +182,13 @@ function createBashExecutionTool(
179
182
  }
180
183
  if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
181
184
 
185
+ const outputWithReminder = appendTmpScratchReminder(
186
+ formattedOutput,
187
+ command
188
+ );
182
189
  const hasFiles = result.files != null && result.files.length > 0;
183
190
  return [
184
- formattedOutput.trim(),
191
+ appendCodeSessionFileSummary(outputWithReminder, result.files),
185
192
  (hasFiles
186
193
  ? { session_id: result.session_id, files: result.files }
187
194
  : {
@@ -189,8 +196,12 @@ function createBashExecutionTool(
189
196
  }) satisfies t.CodeExecutionArtifact,
190
197
  ];
191
198
  } catch (error) {
199
+ const messageWithReminder = appendFailedExecutionFileReminder(
200
+ (error as Error | undefined)?.message ?? '',
201
+ command
202
+ );
192
203
  throw new Error(
193
- `Execution error:\n\n${(error as Error | undefined)?.message}`
204
+ `Execution error:\n\n${messageWithReminder}`
194
205
  );
195
206
  }
196
207
  },
@@ -8,7 +8,12 @@ import {
8
8
  executeTools,
9
9
  formatCompletedResponse,
10
10
  } from './ProgrammaticToolCalling';
11
- import { getCodeBaseURL } from './CodeExecutor';
11
+ import {
12
+ BASH_SHELL_GUIDANCE,
13
+ CODE_ARTIFACT_PATH_GUIDANCE,
14
+ appendFailedExecutionFileReminder,
15
+ getCodeBaseURL,
16
+ } from './CodeExecutor';
12
17
  import {
13
18
  clampCodeApiRunTimeoutMs,
14
19
  createCodeApiRunTimeoutSchema,
@@ -62,11 +67,14 @@ You MUST complete your entire workflow in ONE code block.
62
67
  DO NOT split work across multiple calls expecting to reuse variables.`;
63
68
 
64
69
  const CORE_RULES = `Rules:
65
- - EVERYTHING in one call—no state persists between executions
70
+ - One call: state does not persist
66
71
  - Tools are pre-defined as bash functions—DO NOT redefine them
67
72
  - Each tool function accepts a JSON string argument
73
+ - Save tool output with raw=$(tool '{}'); printf '%s\n' "$raw" > /mnt/data/file.json; direct tool > file may be empty
74
+ - jq: use fromjson? // . on saved tool stdout and again on JSON-string fields; check types since arrays may contain strings
68
75
  - Only echo/printf output returns to the model
69
- - Generated files are automatically available in /mnt/data/ for subsequent executions
76
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
77
+ - ${BASH_SHELL_GUIDANCE}
70
78
  - timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;
71
79
 
72
80
  const ADDITIONAL_RULES =
@@ -78,11 +86,11 @@ const EXAMPLES = `Example (Complete workflow in one call):
78
86
  echo "$data" | jq '.[] | .name'
79
87
 
80
88
  Example (Parallel calls):
81
- web_search '{"query": "SF weather"}' > /tmp/sf.txt &
82
- web_search '{"query": "NY weather"}' > /tmp/ny.txt &
89
+ { sf=$(web_search '{"query": "SF weather"}'); printf '%s\n' "$sf" > /mnt/data/sf.json; } &
90
+ { ny=$(web_search '{"query": "NY weather"}'); printf '%s\n' "$ny" > /mnt/data/ny.json; } &
83
91
  wait
84
- echo "SF: $(cat /tmp/sf.txt)"
85
- echo "NY: $(cat /tmp/ny.txt)"`;
92
+ echo "SF: $(jq -r . /mnt/data/sf.json)"
93
+ echo "NY: $(jq -r . /mnt/data/ny.json)"`;
86
94
 
87
95
  const CODE_PARAM_DESCRIPTION = `Bash code that calls tools programmatically. Tools are available as bash functions.
88
96
 
@@ -369,7 +377,7 @@ export function createBashProgrammaticToolCallingTool(
369
377
  // ====================================================================
370
378
 
371
379
  if (response.status === 'completed') {
372
- return formatCompletedResponse(response);
380
+ return formatCompletedResponse(response, code);
373
381
  }
374
382
 
375
383
  if (response.status === 'error') {
@@ -383,8 +391,12 @@ export function createBashProgrammaticToolCallingTool(
383
391
 
384
392
  throw new Error(`Unexpected response status: ${response.status}`);
385
393
  } catch (error) {
394
+ const messageWithReminder = appendFailedExecutionFileReminder(
395
+ (error as Error).message,
396
+ code
397
+ );
386
398
  throw new Error(
387
- `Bash programmatic execution failed: ${(error as Error).message}`
399
+ `Bash programmatic execution failed: ${messageWithReminder}`
388
400
  );
389
401
  }
390
402
  },
@@ -4,8 +4,14 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
4
4
  import { tool, DynamicStructuredTool } from '@langchain/core/tools';
5
5
  import { getEnvironmentVariable } from '@langchain/core/utils/env';
6
6
  import type * as t from '@/types';
7
+ import { appendCodeSessionFileSummary } from '@/tools/CodeSessionFileSummary';
7
8
  import { EnvVar, Constants } from '@/common';
8
9
 
10
+ export {
11
+ appendCodeSessionFileSummary,
12
+ stripCodeSessionFileSummary,
13
+ } from '@/tools/CodeSessionFileSummary';
14
+
9
15
  config();
10
16
 
11
17
  export const getCodeBaseURL = (): string =>
@@ -15,6 +21,41 @@ export const getCodeBaseURL = (): string =>
15
21
  export const emptyOutputMessage =
16
22
  'stdout: Empty. Ensure you\'re writing output explicitly.\n';
17
23
 
24
+ export const CODE_ARTIFACT_PATH_GUIDANCE =
25
+ 'Persist handoff artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); failed executions do not register new files; `/tmp` and odd extensions are same-call scratch only, not later-call storage.';
26
+
27
+ export const BASH_SHELL_GUIDANCE =
28
+ 'Bash: multi-line files use heredoc/printf; run Python via python3 -c/heredoc, not bare Python.';
29
+
30
+ const TMP_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/tmp(?:\/|\b)/;
31
+ const MNT_DATA_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/mnt\/data(?:\/|\b)/;
32
+
33
+ export const TMP_SCRATCH_OUTPUT_REMINDER =
34
+ 'Note: /tmp files are same-call scratch only and were not persisted; use /mnt/data for files needed later.';
35
+
36
+ export const FAILED_EXECUTION_FILE_REMINDER =
37
+ 'Note: any files written during this failed call were not registered for later calls; fix the error and rerun before relying on them.';
38
+
39
+ export function appendTmpScratchReminder(output: string, code: string): string {
40
+ if (!TMP_PATH_PATTERN.test(code)) {
41
+ return output;
42
+ }
43
+ return `${output.trimEnd()}\n${TMP_SCRATCH_OUTPUT_REMINDER}\n`;
44
+ }
45
+
46
+ export function appendFailedExecutionFileReminder(
47
+ output: string,
48
+ code: string
49
+ ): string {
50
+ if (
51
+ !MNT_DATA_PATH_PATTERN.test(code) ||
52
+ output.includes(FAILED_EXECUTION_FILE_REMINDER)
53
+ ) {
54
+ return output;
55
+ }
56
+ return `${output.trimEnd()}\n${FAILED_EXECUTION_FILE_REMINDER}\n`;
57
+ }
58
+
18
59
  const SUPPORTED_LANGUAGES = [
19
60
  'py',
20
61
  'js',
@@ -44,8 +85,8 @@ export const CodeExecutionToolSchema = {
44
85
  type: 'string',
45
86
  description: `The complete, self-contained code to execute, without any truncation or minimization.
46
87
  - The environment is stateless; variables and imports don't persist between executions.
47
- - Generated files from previous executions are automatically available in "/mnt/data/".
48
- - Files from previous executions are automatically available and can be modified in place.
88
+ - Prior /mnt/data files are available and can be modified in place.
89
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
49
90
  - Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
50
91
  - Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
51
92
  - IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
@@ -104,6 +145,7 @@ Runs code and returns stdout/stderr output from a stateless execution environmen
104
145
  Usage:
105
146
  - No network access available.
106
147
  - Generated files are automatically delivered; **DO NOT** provide download links.
148
+ - ${CODE_ARTIFACT_PATH_GUIDANCE}
107
149
  - NEVER use this tool to execute malicious code.
108
150
  `.trim();
109
151
 
@@ -116,7 +158,7 @@ export const CodeExecutionToolDefinition = {
116
158
  } as const;
117
159
 
118
160
  function createCodeExecutionTool(
119
- params: t.CodeExecutionToolParams = {}
161
+ params: t.CodeExecutionToolParams | null = {}
120
162
  ): DynamicStructuredTool {
121
163
  return tool(
122
164
  async (rawInput, config) => {
@@ -187,13 +229,6 @@ function createCodeExecutionTool(
187
229
  }
188
230
 
189
231
  const result: t.ExecuteResult = await response.json();
190
- /* Output is stdout/stderr only — file listings were removed
191
- * because the LLM-facing summary (split inherited/generated
192
- * with prescriptive notes) caused more confusion than help,
193
- * especially for bash where models naturally explore
194
- * `/mnt/data/` themselves. The artifact still carries every
195
- * file so the host's session map stays in sync; the LLM
196
- * doesn't see them in the tool result text. */
197
232
  let formattedOutput = '';
198
233
  if (result.stdout) {
199
234
  formattedOutput += `stdout:\n${result.stdout}\n`;
@@ -202,9 +237,13 @@ function createCodeExecutionTool(
202
237
  }
203
238
  if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
204
239
 
240
+ const outputWithReminder = appendTmpScratchReminder(
241
+ formattedOutput,
242
+ code
243
+ );
205
244
  const hasFiles = result.files != null && result.files.length > 0;
206
245
  return [
207
- formattedOutput.trim(),
246
+ appendCodeSessionFileSummary(outputWithReminder, result.files),
208
247
  (hasFiles
209
248
  ? { session_id: result.session_id, files: result.files }
210
249
  : {
@@ -212,8 +251,12 @@ function createCodeExecutionTool(
212
251
  }) satisfies t.CodeExecutionArtifact,
213
252
  ];
214
253
  } catch (error) {
254
+ const messageWithReminder = appendFailedExecutionFileReminder(
255
+ (error as Error | undefined)?.message ?? '',
256
+ code
257
+ );
215
258
  throw new Error(
216
- `Execution error:\n\n${(error as Error | undefined)?.message}`
259
+ `Execution error:\n\n${messageWithReminder}`
217
260
  );
218
261
  }
219
262
  },
@@ -0,0 +1,80 @@
1
+ import type * as t from '@/types';
2
+
3
+ const IMAGE_FILE_EXTENSIONS = new Set([
4
+ '.avif',
5
+ '.bmp',
6
+ '.gif',
7
+ '.ico',
8
+ '.jpeg',
9
+ '.jpg',
10
+ '.png',
11
+ '.tif',
12
+ '.tiff',
13
+ '.webp',
14
+ ]);
15
+
16
+ const CODE_SESSION_FILE_SUMMARY_PATTERN =
17
+ /^Generated files:\nSession files: \d+ persisted file\(s\) are available in \/mnt\/data, including \d+ image\(s\)\. Use known \/mnt\/data paths directly in later code-tool calls\. The app displays files\/images automatically; do not invent download links or wrap generated images in Markdown\.$/;
18
+
19
+ function getFileExtension(name: string): string {
20
+ const lastSlash = name.lastIndexOf('/');
21
+ const basename = lastSlash >= 0 ? name.slice(lastSlash + 1) : name;
22
+ const lastDot = basename.lastIndexOf('.');
23
+ return lastDot >= 0 ? basename.slice(lastDot).toLowerCase() : '';
24
+ }
25
+
26
+ function isImageFile(file: Partial<t.FileRef> | null | undefined): boolean {
27
+ const name = file?.name;
28
+ return (
29
+ typeof name === 'string' &&
30
+ IMAGE_FILE_EXTENSIONS.has(getFileExtension(name))
31
+ );
32
+ }
33
+
34
+ function buildCodeSessionFileSummary(
35
+ fileCount: number,
36
+ imageCount: number
37
+ ): string {
38
+ return (
39
+ 'Generated files:\n' +
40
+ `Session files: ${fileCount} persisted file(s) are available in /mnt/data, including ${imageCount} image(s). ` +
41
+ 'Use known /mnt/data paths directly in later code-tool calls. ' +
42
+ 'The app displays files/images automatically; do not invent download links or wrap generated images in Markdown.'
43
+ );
44
+ }
45
+
46
+ function isGeneratedFile(file: Partial<t.FileRef> | null | undefined): boolean {
47
+ return file?.inherited !== true;
48
+ }
49
+
50
+ export function stripCodeSessionFileSummary(output: string): string {
51
+ const summaryStart = output.lastIndexOf('Generated files:');
52
+ if (summaryStart < 0) return output;
53
+ const beforeSummary = output.slice(0, summaryStart);
54
+ if (beforeSummary !== '' && !beforeSummary.endsWith('\n\n')) return output;
55
+ const maybeSummary = output.slice(summaryStart);
56
+ if (!CODE_SESSION_FILE_SUMMARY_PATTERN.test(maybeSummary)) return output;
57
+ return beforeSummary.trimEnd();
58
+ }
59
+
60
+ export function appendCodeSessionFileSummary(
61
+ output: string,
62
+ files: t.FileRefs | undefined
63
+ ): string {
64
+ if (files == null || files.length === 0) {
65
+ return output.trim();
66
+ }
67
+
68
+ const generatedFiles = files.filter(isGeneratedFile);
69
+ if (generatedFiles.length === 0) {
70
+ return output.trim();
71
+ }
72
+
73
+ const imageCount = generatedFiles.filter(isImageFile).length;
74
+ const summary = buildCodeSessionFileSummary(
75
+ generatedFiles.length,
76
+ imageCount
77
+ );
78
+
79
+ return `${output.trimEnd()}\n\n${summary}`.trim();
80
+ }