@librechat/agents 3.1.89 → 3.1.90
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +7 -0
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/hooks/executeHooks.cjs +14 -7
- package/dist/cjs/hooks/executeHooks.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/index.cjs +8 -2
- package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +34 -0
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/main.cjs +9 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/tools/BashExecutor.cjs +10 -9
- package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
- package/dist/cjs/tools/BashProgrammaticToolCalling.cjs +12 -8
- package/dist/cjs/tools/BashProgrammaticToolCalling.cjs.map +1 -1
- package/dist/cjs/tools/CodeExecutor.cjs +35 -11
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/tools/CodeSessionFileSummary.cjs +63 -0
- package/dist/cjs/tools/CodeSessionFileSummary.cjs.map +1 -0
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs +16 -12
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +8 -5
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +319 -29
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +7 -0
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/hooks/executeHooks.mjs +14 -7
- package/dist/esm/hooks/executeHooks.mjs.map +1 -1
- package/dist/esm/llm/anthropic/index.mjs +9 -3
- package/dist/esm/llm/anthropic/index.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +33 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/tools/BashExecutor.mjs +11 -10
- package/dist/esm/tools/BashExecutor.mjs.map +1 -1
- package/dist/esm/tools/BashProgrammaticToolCalling.mjs +13 -9
- package/dist/esm/tools/BashProgrammaticToolCalling.mjs.map +1 -1
- package/dist/esm/tools/CodeExecutor.mjs +29 -12
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/tools/CodeSessionFileSummary.mjs +60 -0
- package/dist/esm/tools/CodeSessionFileSummary.mjs.map +1 -0
- package/dist/esm/tools/ProgrammaticToolCalling.mjs +17 -13
- package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +8 -5
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +320 -31
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/types/llm/anthropic/index.d.ts +3 -1
- package/dist/types/llm/anthropic/utils/message_inputs.d.ts +4 -0
- package/dist/types/tools/BashExecutor.d.ts +3 -3
- package/dist/types/tools/CodeExecutor.d.ts +10 -3
- package/dist/types/tools/CodeSessionFileSummary.d.ts +3 -0
- package/dist/types/tools/ProgrammaticToolCalling.d.ts +4 -4
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +8 -5
- package/dist/types/types/tools.d.ts +2 -3
- package/package.json +1 -1
- package/src/graphs/Graph.ts +7 -0
- package/src/hooks/__tests__/executeHooks.test.ts +38 -0
- package/src/hooks/executeHooks.ts +27 -7
- package/src/llm/anthropic/index.ts +27 -3
- package/src/llm/anthropic/llm.spec.ts +60 -1
- package/src/llm/anthropic/utils/message_inputs.ts +46 -0
- package/src/tools/BashExecutor.ts +21 -10
- package/src/tools/BashProgrammaticToolCalling.ts +21 -9
- package/src/tools/CodeExecutor.ts +55 -12
- package/src/tools/CodeSessionFileSummary.ts +80 -0
- package/src/tools/ProgrammaticToolCalling.ts +25 -12
- package/src/tools/ToolNode.ts +8 -5
- package/src/tools/__tests__/BashExecutor.test.ts +9 -0
- package/src/tools/__tests__/CodeApiAuthHeaders.test.ts +43 -0
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +100 -16
- package/src/tools/__tests__/SubagentExecutor.test.ts +540 -6
- package/src/tools/__tests__/ToolNode.outputReferences.test.ts +52 -0
- package/src/tools/__tests__/subagentHooks.test.ts +237 -0
- package/src/tools/subagent/SubagentExecutor.ts +514 -36
- package/src/types/tools.ts +2 -3
|
@@ -17,9 +17,13 @@ import type {
|
|
|
17
17
|
ChatAnthropicToolType,
|
|
18
18
|
AnthropicMCPServerURLDefinition,
|
|
19
19
|
AnthropicContextManagementConfigParam,
|
|
20
|
+
AnthropicRequestOptions,
|
|
20
21
|
} from '@/llm/anthropic/types';
|
|
21
22
|
import { _makeMessageChunkFromAnthropicEvent } from './utils/message_outputs';
|
|
22
|
-
import {
|
|
23
|
+
import {
|
|
24
|
+
_convertMessagesToAnthropicPayload,
|
|
25
|
+
stripUnsupportedAssistantPrefill,
|
|
26
|
+
} from './utils/message_inputs';
|
|
23
27
|
import { handleToolChoice } from './utils/tools';
|
|
24
28
|
|
|
25
29
|
const DEFAULT_STREAM_DELAY = 25;
|
|
@@ -591,6 +595,26 @@ export class CustomAnthropic extends ChatAnthropicMessages {
|
|
|
591
595
|
});
|
|
592
596
|
}
|
|
593
597
|
|
|
598
|
+
protected override async createStreamWithRetry(
|
|
599
|
+
request: AnthropicStreamingMessageCreateParams,
|
|
600
|
+
options?: AnthropicRequestOptions
|
|
601
|
+
): ReturnType<ChatAnthropicMessages['createStreamWithRetry']> {
|
|
602
|
+
return super.createStreamWithRetry(
|
|
603
|
+
stripUnsupportedAssistantPrefill(request),
|
|
604
|
+
options
|
|
605
|
+
);
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
protected override async completionWithRetry(
|
|
609
|
+
request: AnthropicMessageCreateParams,
|
|
610
|
+
options: AnthropicRequestOptions
|
|
611
|
+
): ReturnType<ChatAnthropicMessages['completionWithRetry']> {
|
|
612
|
+
return super.completionWithRetry(
|
|
613
|
+
stripUnsupportedAssistantPrefill(request),
|
|
614
|
+
options
|
|
615
|
+
);
|
|
616
|
+
}
|
|
617
|
+
|
|
594
618
|
async *_streamResponseChunks(
|
|
595
619
|
messages: BaseMessage[],
|
|
596
620
|
options: this['ParsedCallOptions'],
|
|
@@ -599,11 +623,11 @@ export class CustomAnthropic extends ChatAnthropicMessages {
|
|
|
599
623
|
this.resetTokenEvents();
|
|
600
624
|
const params = this.invocationParams(options);
|
|
601
625
|
const formattedMessages = _convertMessagesToAnthropicPayload(messages);
|
|
602
|
-
const payload = {
|
|
626
|
+
const payload = stripUnsupportedAssistantPrefill({
|
|
603
627
|
...params,
|
|
604
628
|
...formattedMessages,
|
|
605
629
|
stream: true,
|
|
606
|
-
} as const;
|
|
630
|
+
} as const);
|
|
607
631
|
const coerceContentToString =
|
|
608
632
|
!_toolsInParams(payload) &&
|
|
609
633
|
!_documentsInParams(payload) &&
|
|
@@ -64,7 +64,11 @@ import type {
|
|
|
64
64
|
ToolEndEvent,
|
|
65
65
|
TPayload,
|
|
66
66
|
} from '@/types';
|
|
67
|
-
import {
|
|
67
|
+
import {
|
|
68
|
+
_convertMessagesToAnthropicPayload,
|
|
69
|
+
modelDisallowsAssistantPrefill,
|
|
70
|
+
stripUnsupportedAssistantPrefill,
|
|
71
|
+
} from './utils/message_inputs';
|
|
68
72
|
import {
|
|
69
73
|
_makeMessageChunkFromAnthropicEvent,
|
|
70
74
|
getAnthropicUsageMetadata,
|
|
@@ -2637,6 +2641,61 @@ describe('Anthropic Reasoning with contentBlocks', () => {
|
|
|
2637
2641
|
});
|
|
2638
2642
|
});
|
|
2639
2643
|
|
|
2644
|
+
describe('Claude assistant prefill compatibility', () => {
|
|
2645
|
+
test.each([
|
|
2646
|
+
'claude-sonnet-4-6',
|
|
2647
|
+
'claude-sonnet-4-6@20260217',
|
|
2648
|
+
'claude-opus-4-7',
|
|
2649
|
+
'claude-opus-4-10',
|
|
2650
|
+
'global.anthropic.claude-opus-4-6-v1:0',
|
|
2651
|
+
'anthropic/claude-sonnet-4.6',
|
|
2652
|
+
'anthropic/claude-sonnet-4.12',
|
|
2653
|
+
])('detects %s as not supporting assistant prefill', (model) => {
|
|
2654
|
+
expect(modelDisallowsAssistantPrefill(model)).toBe(true);
|
|
2655
|
+
});
|
|
2656
|
+
|
|
2657
|
+
test.each([
|
|
2658
|
+
'claude-sonnet-4-5-20250929',
|
|
2659
|
+
'claude-opus-4-20250514',
|
|
2660
|
+
'anthropic.claude-opus-4-20250514-v1:0',
|
|
2661
|
+
'gpt-5.4',
|
|
2662
|
+
])('leaves %s prefill support unchanged', (model) => {
|
|
2663
|
+
expect(modelDisallowsAssistantPrefill(model)).toBe(false);
|
|
2664
|
+
});
|
|
2665
|
+
|
|
2666
|
+
test('strips trailing assistant messages for Claude 4.6+ requests', () => {
|
|
2667
|
+
const request = {
|
|
2668
|
+
model: 'claude-opus-4-6',
|
|
2669
|
+
max_tokens: 100,
|
|
2670
|
+
messages: [
|
|
2671
|
+
{ role: 'user' as const, content: 'What changed?' },
|
|
2672
|
+
{ role: 'assistant' as const, content: 'Draft prefill' },
|
|
2673
|
+
{ role: 'assistant' as const, content: 'Another prefill' },
|
|
2674
|
+
],
|
|
2675
|
+
};
|
|
2676
|
+
|
|
2677
|
+
const sanitized = stripUnsupportedAssistantPrefill(request);
|
|
2678
|
+
|
|
2679
|
+
expect(sanitized).not.toBe(request);
|
|
2680
|
+
expect(sanitized.messages).toEqual([
|
|
2681
|
+
{ role: 'user', content: 'What changed?' },
|
|
2682
|
+
]);
|
|
2683
|
+
});
|
|
2684
|
+
|
|
2685
|
+
test('does not strip assistant messages for older Claude models', () => {
|
|
2686
|
+
const request = {
|
|
2687
|
+
model: 'claude-sonnet-4-5-20250929',
|
|
2688
|
+
max_tokens: 100,
|
|
2689
|
+
messages: [
|
|
2690
|
+
{ role: 'user' as const, content: 'Write JSON only.' },
|
|
2691
|
+
{ role: 'assistant' as const, content: '{' },
|
|
2692
|
+
],
|
|
2693
|
+
};
|
|
2694
|
+
|
|
2695
|
+
expect(stripUnsupportedAssistantPrefill(request)).toBe(request);
|
|
2696
|
+
});
|
|
2697
|
+
});
|
|
2698
|
+
|
|
2640
2699
|
const opus46Model = 'claude-opus-4-6';
|
|
2641
2700
|
|
|
2642
2701
|
describe('Opus 4.6', () => {
|
|
@@ -49,6 +49,10 @@ type GoogleFunctionCallBlock = MessageContentComplex & {
|
|
|
49
49
|
};
|
|
50
50
|
|
|
51
51
|
const ANTHROPIC_EMPTY_TEXT_PLACEHOLDER = '_';
|
|
52
|
+
const CLAUDE_4_RELEASE_DATE_MODEL_PATTERN =
|
|
53
|
+
/claude-(?:opus|sonnet|haiku)-4-\d{8}(?:[-.@]|$)/i;
|
|
54
|
+
const CLAUDE_4_MINOR_MODEL_PATTERN =
|
|
55
|
+
/claude-(?:opus|sonnet|haiku)-4[-.](\d+)(?:[-.@]|$)/i;
|
|
52
56
|
|
|
53
57
|
function _formatImage(imageUrl: string) {
|
|
54
58
|
const parsed = parseBase64DataUrl({ dataUrl: imageUrl });
|
|
@@ -796,6 +800,48 @@ export function _convertMessagesToAnthropicPayload(
|
|
|
796
800
|
} as AnthropicMessageCreateParams;
|
|
797
801
|
}
|
|
798
802
|
|
|
803
|
+
export function modelDisallowsAssistantPrefill(model?: string): boolean {
|
|
804
|
+
const modelId = model ?? '';
|
|
805
|
+
if (CLAUDE_4_RELEASE_DATE_MODEL_PATTERN.test(modelId)) {
|
|
806
|
+
return false;
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
const match = CLAUDE_4_MINOR_MODEL_PATTERN.exec(modelId);
|
|
810
|
+
if (!match) {
|
|
811
|
+
return false;
|
|
812
|
+
}
|
|
813
|
+
return Number(match[1]) >= 6;
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
export function stripUnsupportedAssistantPrefill<
|
|
817
|
+
T extends Pick<AnthropicMessageCreateParams, 'messages'> & { model?: string },
|
|
818
|
+
>(request: T): T {
|
|
819
|
+
if (!modelDisallowsAssistantPrefill(request.model)) {
|
|
820
|
+
return request;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
const messages = request.messages;
|
|
824
|
+
if (
|
|
825
|
+
messages.length <= 1 ||
|
|
826
|
+
messages[messages.length - 1]?.role !== 'assistant'
|
|
827
|
+
) {
|
|
828
|
+
return request;
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
const nextMessages = [...messages];
|
|
832
|
+
while (
|
|
833
|
+
nextMessages.length > 1 &&
|
|
834
|
+
nextMessages[nextMessages.length - 1]?.role === 'assistant'
|
|
835
|
+
) {
|
|
836
|
+
nextMessages.pop();
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
return {
|
|
840
|
+
...request,
|
|
841
|
+
messages: nextMessages,
|
|
842
|
+
};
|
|
843
|
+
}
|
|
844
|
+
|
|
799
845
|
function mergeMessages(messages: AnthropicMessageCreateParams['messages']) {
|
|
800
846
|
if (messages.length <= 1) {
|
|
801
847
|
return messages;
|
|
@@ -4,6 +4,11 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
|
|
|
4
4
|
import { tool, DynamicStructuredTool } from '@langchain/core/tools';
|
|
5
5
|
import type * as t from '@/types';
|
|
6
6
|
import {
|
|
7
|
+
BASH_SHELL_GUIDANCE,
|
|
8
|
+
CODE_ARTIFACT_PATH_GUIDANCE,
|
|
9
|
+
appendFailedExecutionFileReminder,
|
|
10
|
+
appendTmpScratchReminder,
|
|
11
|
+
appendCodeSessionFileSummary,
|
|
7
12
|
emptyOutputMessage,
|
|
8
13
|
buildCodeApiHttpErrorMessage,
|
|
9
14
|
getCodeBaseURL,
|
|
@@ -23,8 +28,9 @@ export const BashExecutionToolSchema = {
|
|
|
23
28
|
type: 'string',
|
|
24
29
|
description: `The bash command or script to execute.
|
|
25
30
|
- The environment is stateless; variables and state don't persist between executions.
|
|
26
|
-
-
|
|
27
|
-
-
|
|
31
|
+
- Prior /mnt/data files are available and can be modified in place.
|
|
32
|
+
- ${CODE_ARTIFACT_PATH_GUIDANCE}
|
|
33
|
+
- ${BASH_SHELL_GUIDANCE}
|
|
28
34
|
- Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
|
|
29
35
|
- Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
|
|
30
36
|
- IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
|
|
@@ -46,6 +52,8 @@ Runs bash commands and returns stdout/stderr output from a stateless execution e
|
|
|
46
52
|
Usage:
|
|
47
53
|
- No network access available.
|
|
48
54
|
- Generated files are automatically delivered; **DO NOT** provide download links.
|
|
55
|
+
- ${CODE_ARTIFACT_PATH_GUIDANCE}
|
|
56
|
+
- ${BASH_SHELL_GUIDANCE}
|
|
49
57
|
- NEVER use this tool to execute malicious commands.
|
|
50
58
|
`.trim();
|
|
51
59
|
|
|
@@ -105,7 +113,7 @@ export const BashExecutionToolDefinition = {
|
|
|
105
113
|
} as const;
|
|
106
114
|
|
|
107
115
|
function createBashExecutionTool(
|
|
108
|
-
params: t.BashExecutionToolParams = {}
|
|
116
|
+
params: t.BashExecutionToolParams | null = {}
|
|
109
117
|
): DynamicStructuredTool {
|
|
110
118
|
return tool(
|
|
111
119
|
async (rawInput, config) => {
|
|
@@ -166,11 +174,6 @@ function createBashExecutionTool(
|
|
|
166
174
|
}
|
|
167
175
|
|
|
168
176
|
const result: t.ExecuteResult = await response.json();
|
|
169
|
-
/* See `CodeExecutor.ts` — file listings were removed from the
|
|
170
|
-
* LLM-facing tool result. Bash especially benefits: models
|
|
171
|
-
* naturally `ls /mnt/data/` to discover what's available
|
|
172
|
-
* rather than relying on a prescriptive summary that
|
|
173
|
-
* misleads as often as it helps. */
|
|
174
177
|
let formattedOutput = '';
|
|
175
178
|
if (result.stdout) {
|
|
176
179
|
formattedOutput += `stdout:\n${result.stdout}\n`;
|
|
@@ -179,9 +182,13 @@ function createBashExecutionTool(
|
|
|
179
182
|
}
|
|
180
183
|
if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
|
|
181
184
|
|
|
185
|
+
const outputWithReminder = appendTmpScratchReminder(
|
|
186
|
+
formattedOutput,
|
|
187
|
+
command
|
|
188
|
+
);
|
|
182
189
|
const hasFiles = result.files != null && result.files.length > 0;
|
|
183
190
|
return [
|
|
184
|
-
|
|
191
|
+
appendCodeSessionFileSummary(outputWithReminder, result.files),
|
|
185
192
|
(hasFiles
|
|
186
193
|
? { session_id: result.session_id, files: result.files }
|
|
187
194
|
: {
|
|
@@ -189,8 +196,12 @@ function createBashExecutionTool(
|
|
|
189
196
|
}) satisfies t.CodeExecutionArtifact,
|
|
190
197
|
];
|
|
191
198
|
} catch (error) {
|
|
199
|
+
const messageWithReminder = appendFailedExecutionFileReminder(
|
|
200
|
+
(error as Error | undefined)?.message ?? '',
|
|
201
|
+
command
|
|
202
|
+
);
|
|
192
203
|
throw new Error(
|
|
193
|
-
`Execution error:\n\n${
|
|
204
|
+
`Execution error:\n\n${messageWithReminder}`
|
|
194
205
|
);
|
|
195
206
|
}
|
|
196
207
|
},
|
|
@@ -8,7 +8,12 @@ import {
|
|
|
8
8
|
executeTools,
|
|
9
9
|
formatCompletedResponse,
|
|
10
10
|
} from './ProgrammaticToolCalling';
|
|
11
|
-
import {
|
|
11
|
+
import {
|
|
12
|
+
BASH_SHELL_GUIDANCE,
|
|
13
|
+
CODE_ARTIFACT_PATH_GUIDANCE,
|
|
14
|
+
appendFailedExecutionFileReminder,
|
|
15
|
+
getCodeBaseURL,
|
|
16
|
+
} from './CodeExecutor';
|
|
12
17
|
import {
|
|
13
18
|
clampCodeApiRunTimeoutMs,
|
|
14
19
|
createCodeApiRunTimeoutSchema,
|
|
@@ -62,11 +67,14 @@ You MUST complete your entire workflow in ONE code block.
|
|
|
62
67
|
DO NOT split work across multiple calls expecting to reuse variables.`;
|
|
63
68
|
|
|
64
69
|
const CORE_RULES = `Rules:
|
|
65
|
-
-
|
|
70
|
+
- One call: state does not persist
|
|
66
71
|
- Tools are pre-defined as bash functions—DO NOT redefine them
|
|
67
72
|
- Each tool function accepts a JSON string argument
|
|
73
|
+
- Save tool output with raw=$(tool '{}'); printf '%s\n' "$raw" > /mnt/data/file.json; direct tool > file may be empty
|
|
74
|
+
- jq: use fromjson? // . on saved tool stdout and again on JSON-string fields; check types since arrays may contain strings
|
|
68
75
|
- Only echo/printf output returns to the model
|
|
69
|
-
-
|
|
76
|
+
- ${CODE_ARTIFACT_PATH_GUIDANCE}
|
|
77
|
+
- ${BASH_SHELL_GUIDANCE}
|
|
70
78
|
- timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;
|
|
71
79
|
|
|
72
80
|
const ADDITIONAL_RULES =
|
|
@@ -78,11 +86,11 @@ const EXAMPLES = `Example (Complete workflow in one call):
|
|
|
78
86
|
echo "$data" | jq '.[] | .name'
|
|
79
87
|
|
|
80
88
|
Example (Parallel calls):
|
|
81
|
-
web_search '{"query": "SF weather"}' > /
|
|
82
|
-
web_search '{"query": "NY weather"}' > /
|
|
89
|
+
{ sf=$(web_search '{"query": "SF weather"}'); printf '%s\n' "$sf" > /mnt/data/sf.json; } &
|
|
90
|
+
{ ny=$(web_search '{"query": "NY weather"}'); printf '%s\n' "$ny" > /mnt/data/ny.json; } &
|
|
83
91
|
wait
|
|
84
|
-
echo "SF: $(
|
|
85
|
-
echo "NY: $(
|
|
92
|
+
echo "SF: $(jq -r . /mnt/data/sf.json)"
|
|
93
|
+
echo "NY: $(jq -r . /mnt/data/ny.json)"`;
|
|
86
94
|
|
|
87
95
|
const CODE_PARAM_DESCRIPTION = `Bash code that calls tools programmatically. Tools are available as bash functions.
|
|
88
96
|
|
|
@@ -369,7 +377,7 @@ export function createBashProgrammaticToolCallingTool(
|
|
|
369
377
|
// ====================================================================
|
|
370
378
|
|
|
371
379
|
if (response.status === 'completed') {
|
|
372
|
-
return formatCompletedResponse(response);
|
|
380
|
+
return formatCompletedResponse(response, code);
|
|
373
381
|
}
|
|
374
382
|
|
|
375
383
|
if (response.status === 'error') {
|
|
@@ -383,8 +391,12 @@ export function createBashProgrammaticToolCallingTool(
|
|
|
383
391
|
|
|
384
392
|
throw new Error(`Unexpected response status: ${response.status}`);
|
|
385
393
|
} catch (error) {
|
|
394
|
+
const messageWithReminder = appendFailedExecutionFileReminder(
|
|
395
|
+
(error as Error).message,
|
|
396
|
+
code
|
|
397
|
+
);
|
|
386
398
|
throw new Error(
|
|
387
|
-
`Bash programmatic execution failed: ${
|
|
399
|
+
`Bash programmatic execution failed: ${messageWithReminder}`
|
|
388
400
|
);
|
|
389
401
|
}
|
|
390
402
|
},
|
|
@@ -4,8 +4,14 @@ import { HttpsProxyAgent } from 'https-proxy-agent';
|
|
|
4
4
|
import { tool, DynamicStructuredTool } from '@langchain/core/tools';
|
|
5
5
|
import { getEnvironmentVariable } from '@langchain/core/utils/env';
|
|
6
6
|
import type * as t from '@/types';
|
|
7
|
+
import { appendCodeSessionFileSummary } from '@/tools/CodeSessionFileSummary';
|
|
7
8
|
import { EnvVar, Constants } from '@/common';
|
|
8
9
|
|
|
10
|
+
export {
|
|
11
|
+
appendCodeSessionFileSummary,
|
|
12
|
+
stripCodeSessionFileSummary,
|
|
13
|
+
} from '@/tools/CodeSessionFileSummary';
|
|
14
|
+
|
|
9
15
|
config();
|
|
10
16
|
|
|
11
17
|
export const getCodeBaseURL = (): string =>
|
|
@@ -15,6 +21,41 @@ export const getCodeBaseURL = (): string =>
|
|
|
15
21
|
export const emptyOutputMessage =
|
|
16
22
|
'stdout: Empty. Ensure you\'re writing output explicitly.\n';
|
|
17
23
|
|
|
24
|
+
export const CODE_ARTIFACT_PATH_GUIDANCE =
|
|
25
|
+
'Persist handoff artifacts in `/mnt/data` with standard extensions (.json/.txt/.csv/.tsv/.log/.parquet/.png/.jpg/.pdf/.xlsx); failed executions do not register new files; `/tmp` and odd extensions are same-call scratch only, not later-call storage.';
|
|
26
|
+
|
|
27
|
+
export const BASH_SHELL_GUIDANCE =
|
|
28
|
+
'Bash: multi-line files use heredoc/printf; run Python via python3 -c/heredoc, not bare Python.';
|
|
29
|
+
|
|
30
|
+
const TMP_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/tmp(?:\/|\b)/;
|
|
31
|
+
const MNT_DATA_PATH_PATTERN = /(^|[^A-Za-z0-9_])\/mnt\/data(?:\/|\b)/;
|
|
32
|
+
|
|
33
|
+
export const TMP_SCRATCH_OUTPUT_REMINDER =
|
|
34
|
+
'Note: /tmp files are same-call scratch only and were not persisted; use /mnt/data for files needed later.';
|
|
35
|
+
|
|
36
|
+
export const FAILED_EXECUTION_FILE_REMINDER =
|
|
37
|
+
'Note: any files written during this failed call were not registered for later calls; fix the error and rerun before relying on them.';
|
|
38
|
+
|
|
39
|
+
export function appendTmpScratchReminder(output: string, code: string): string {
|
|
40
|
+
if (!TMP_PATH_PATTERN.test(code)) {
|
|
41
|
+
return output;
|
|
42
|
+
}
|
|
43
|
+
return `${output.trimEnd()}\n${TMP_SCRATCH_OUTPUT_REMINDER}\n`;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function appendFailedExecutionFileReminder(
|
|
47
|
+
output: string,
|
|
48
|
+
code: string
|
|
49
|
+
): string {
|
|
50
|
+
if (
|
|
51
|
+
!MNT_DATA_PATH_PATTERN.test(code) ||
|
|
52
|
+
output.includes(FAILED_EXECUTION_FILE_REMINDER)
|
|
53
|
+
) {
|
|
54
|
+
return output;
|
|
55
|
+
}
|
|
56
|
+
return `${output.trimEnd()}\n${FAILED_EXECUTION_FILE_REMINDER}\n`;
|
|
57
|
+
}
|
|
58
|
+
|
|
18
59
|
const SUPPORTED_LANGUAGES = [
|
|
19
60
|
'py',
|
|
20
61
|
'js',
|
|
@@ -44,8 +85,8 @@ export const CodeExecutionToolSchema = {
|
|
|
44
85
|
type: 'string',
|
|
45
86
|
description: `The complete, self-contained code to execute, without any truncation or minimization.
|
|
46
87
|
- The environment is stateless; variables and imports don't persist between executions.
|
|
47
|
-
-
|
|
48
|
-
-
|
|
88
|
+
- Prior /mnt/data files are available and can be modified in place.
|
|
89
|
+
- ${CODE_ARTIFACT_PATH_GUIDANCE}
|
|
49
90
|
- Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
|
|
50
91
|
- Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
|
|
51
92
|
- IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
|
|
@@ -104,6 +145,7 @@ Runs code and returns stdout/stderr output from a stateless execution environmen
|
|
|
104
145
|
Usage:
|
|
105
146
|
- No network access available.
|
|
106
147
|
- Generated files are automatically delivered; **DO NOT** provide download links.
|
|
148
|
+
- ${CODE_ARTIFACT_PATH_GUIDANCE}
|
|
107
149
|
- NEVER use this tool to execute malicious code.
|
|
108
150
|
`.trim();
|
|
109
151
|
|
|
@@ -116,7 +158,7 @@ export const CodeExecutionToolDefinition = {
|
|
|
116
158
|
} as const;
|
|
117
159
|
|
|
118
160
|
function createCodeExecutionTool(
|
|
119
|
-
params: t.CodeExecutionToolParams = {}
|
|
161
|
+
params: t.CodeExecutionToolParams | null = {}
|
|
120
162
|
): DynamicStructuredTool {
|
|
121
163
|
return tool(
|
|
122
164
|
async (rawInput, config) => {
|
|
@@ -187,13 +229,6 @@ function createCodeExecutionTool(
|
|
|
187
229
|
}
|
|
188
230
|
|
|
189
231
|
const result: t.ExecuteResult = await response.json();
|
|
190
|
-
/* Output is stdout/stderr only — file listings were removed
|
|
191
|
-
* because the LLM-facing summary (split inherited/generated
|
|
192
|
-
* with prescriptive notes) caused more confusion than help,
|
|
193
|
-
* especially for bash where models naturally explore
|
|
194
|
-
* `/mnt/data/` themselves. The artifact still carries every
|
|
195
|
-
* file so the host's session map stays in sync; the LLM
|
|
196
|
-
* doesn't see them in the tool result text. */
|
|
197
232
|
let formattedOutput = '';
|
|
198
233
|
if (result.stdout) {
|
|
199
234
|
formattedOutput += `stdout:\n${result.stdout}\n`;
|
|
@@ -202,9 +237,13 @@ function createCodeExecutionTool(
|
|
|
202
237
|
}
|
|
203
238
|
if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
|
|
204
239
|
|
|
240
|
+
const outputWithReminder = appendTmpScratchReminder(
|
|
241
|
+
formattedOutput,
|
|
242
|
+
code
|
|
243
|
+
);
|
|
205
244
|
const hasFiles = result.files != null && result.files.length > 0;
|
|
206
245
|
return [
|
|
207
|
-
|
|
246
|
+
appendCodeSessionFileSummary(outputWithReminder, result.files),
|
|
208
247
|
(hasFiles
|
|
209
248
|
? { session_id: result.session_id, files: result.files }
|
|
210
249
|
: {
|
|
@@ -212,8 +251,12 @@ function createCodeExecutionTool(
|
|
|
212
251
|
}) satisfies t.CodeExecutionArtifact,
|
|
213
252
|
];
|
|
214
253
|
} catch (error) {
|
|
254
|
+
const messageWithReminder = appendFailedExecutionFileReminder(
|
|
255
|
+
(error as Error | undefined)?.message ?? '',
|
|
256
|
+
code
|
|
257
|
+
);
|
|
215
258
|
throw new Error(
|
|
216
|
-
`Execution error:\n\n${
|
|
259
|
+
`Execution error:\n\n${messageWithReminder}`
|
|
217
260
|
);
|
|
218
261
|
}
|
|
219
262
|
},
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import type * as t from '@/types';
|
|
2
|
+
|
|
3
|
+
const IMAGE_FILE_EXTENSIONS = new Set([
|
|
4
|
+
'.avif',
|
|
5
|
+
'.bmp',
|
|
6
|
+
'.gif',
|
|
7
|
+
'.ico',
|
|
8
|
+
'.jpeg',
|
|
9
|
+
'.jpg',
|
|
10
|
+
'.png',
|
|
11
|
+
'.tif',
|
|
12
|
+
'.tiff',
|
|
13
|
+
'.webp',
|
|
14
|
+
]);
|
|
15
|
+
|
|
16
|
+
const CODE_SESSION_FILE_SUMMARY_PATTERN =
|
|
17
|
+
/^Generated files:\nSession files: \d+ persisted file\(s\) are available in \/mnt\/data, including \d+ image\(s\)\. Use known \/mnt\/data paths directly in later code-tool calls\. The app displays files\/images automatically; do not invent download links or wrap generated images in Markdown\.$/;
|
|
18
|
+
|
|
19
|
+
function getFileExtension(name: string): string {
|
|
20
|
+
const lastSlash = name.lastIndexOf('/');
|
|
21
|
+
const basename = lastSlash >= 0 ? name.slice(lastSlash + 1) : name;
|
|
22
|
+
const lastDot = basename.lastIndexOf('.');
|
|
23
|
+
return lastDot >= 0 ? basename.slice(lastDot).toLowerCase() : '';
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function isImageFile(file: Partial<t.FileRef> | null | undefined): boolean {
|
|
27
|
+
const name = file?.name;
|
|
28
|
+
return (
|
|
29
|
+
typeof name === 'string' &&
|
|
30
|
+
IMAGE_FILE_EXTENSIONS.has(getFileExtension(name))
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function buildCodeSessionFileSummary(
|
|
35
|
+
fileCount: number,
|
|
36
|
+
imageCount: number
|
|
37
|
+
): string {
|
|
38
|
+
return (
|
|
39
|
+
'Generated files:\n' +
|
|
40
|
+
`Session files: ${fileCount} persisted file(s) are available in /mnt/data, including ${imageCount} image(s). ` +
|
|
41
|
+
'Use known /mnt/data paths directly in later code-tool calls. ' +
|
|
42
|
+
'The app displays files/images automatically; do not invent download links or wrap generated images in Markdown.'
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function isGeneratedFile(file: Partial<t.FileRef> | null | undefined): boolean {
|
|
47
|
+
return file?.inherited !== true;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function stripCodeSessionFileSummary(output: string): string {
|
|
51
|
+
const summaryStart = output.lastIndexOf('Generated files:');
|
|
52
|
+
if (summaryStart < 0) return output;
|
|
53
|
+
const beforeSummary = output.slice(0, summaryStart);
|
|
54
|
+
if (beforeSummary !== '' && !beforeSummary.endsWith('\n\n')) return output;
|
|
55
|
+
const maybeSummary = output.slice(summaryStart);
|
|
56
|
+
if (!CODE_SESSION_FILE_SUMMARY_PATTERN.test(maybeSummary)) return output;
|
|
57
|
+
return beforeSummary.trimEnd();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function appendCodeSessionFileSummary(
|
|
61
|
+
output: string,
|
|
62
|
+
files: t.FileRefs | undefined
|
|
63
|
+
): string {
|
|
64
|
+
if (files == null || files.length === 0) {
|
|
65
|
+
return output.trim();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const generatedFiles = files.filter(isGeneratedFile);
|
|
69
|
+
if (generatedFiles.length === 0) {
|
|
70
|
+
return output.trim();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const imageCount = generatedFiles.filter(isImageFile).length;
|
|
74
|
+
const summary = buildCodeSessionFileSummary(
|
|
75
|
+
generatedFiles.length,
|
|
76
|
+
imageCount
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
return `${output.trimEnd()}\n\n${summary}`.trim();
|
|
80
|
+
}
|
|
@@ -7,9 +7,13 @@ import type { ToolCall } from '@langchain/core/messages/tool';
|
|
|
7
7
|
import type { ProgrammaticToolCallingJsonSchema } from './ptcTimeout';
|
|
8
8
|
import type * as t from '@/types';
|
|
9
9
|
import {
|
|
10
|
+
CODE_ARTIFACT_PATH_GUIDANCE,
|
|
11
|
+
appendCodeSessionFileSummary,
|
|
12
|
+
appendFailedExecutionFileReminder,
|
|
10
13
|
buildCodeApiHttpErrorMessage,
|
|
11
14
|
emptyOutputMessage,
|
|
12
15
|
getCodeBaseURL,
|
|
16
|
+
appendTmpScratchReminder,
|
|
13
17
|
resolveCodeApiAuthHeaders,
|
|
14
18
|
} from './CodeExecutor';
|
|
15
19
|
import {
|
|
@@ -36,15 +40,17 @@ You MUST complete your entire workflow in ONE code block: query → process →
|
|
|
36
40
|
DO NOT split work across multiple calls expecting to reuse variables.`;
|
|
37
41
|
|
|
38
42
|
const CORE_RULES = `Rules:
|
|
39
|
-
-
|
|
40
|
-
-
|
|
41
|
-
- DO NOT define async def main() or call asyncio.run()
|
|
43
|
+
- One call: state does not persist
|
|
44
|
+
- Auto-wrapped async; use await, no main()/asyncio.run()
|
|
42
45
|
- Tools are pre-defined—DO NOT write function definitions
|
|
46
|
+
- Call tools with keyword args only (await tool(arg=value), never pass a dict)
|
|
47
|
+
- Tool results are decoded Python values (dict/list/str)
|
|
43
48
|
- Only print() output returns to the model
|
|
49
|
+
- ${CODE_ARTIFACT_PATH_GUIDANCE}
|
|
44
50
|
- timeout caps one sandbox run/replay iteration, not the total multi-round-trip workflow`;
|
|
45
51
|
|
|
46
|
-
const ADDITIONAL_RULES =
|
|
47
|
-
- Tool names normalized: hyphens→underscores, keywords get
|
|
52
|
+
const ADDITIONAL_RULES =
|
|
53
|
+
'- Tool names normalized: hyphens→underscores, keywords get `_tool` suffix';
|
|
48
54
|
|
|
49
55
|
const EXAMPLES = `Example (Complete workflow in one call):
|
|
50
56
|
# Query data
|
|
@@ -678,15 +684,16 @@ export async function executeTools(
|
|
|
678
684
|
/**
|
|
679
685
|
* Formats the completed response for the agent.
|
|
680
686
|
*
|
|
681
|
-
* Output
|
|
682
|
-
*
|
|
683
|
-
*
|
|
687
|
+
* Output includes stdout/stderr plus a compact session-file summary
|
|
688
|
+
* when artifacts were persisted. The artifact still carries every
|
|
689
|
+
* file so the host's session map stays in sync.
|
|
684
690
|
*
|
|
685
691
|
* @param response - The completed API response
|
|
686
692
|
* @returns Tuple of [formatted string, artifact]
|
|
687
693
|
*/
|
|
688
694
|
export function formatCompletedResponse(
|
|
689
|
-
response: t.ProgrammaticExecutionResponse
|
|
695
|
+
response: t.ProgrammaticExecutionResponse,
|
|
696
|
+
sourceCode = ''
|
|
690
697
|
): [string, t.ProgrammaticExecutionArtifact] {
|
|
691
698
|
let formatted = '';
|
|
692
699
|
|
|
@@ -700,8 +707,10 @@ export function formatCompletedResponse(
|
|
|
700
707
|
formatted += `stderr:\n${response.stderr}\n`;
|
|
701
708
|
}
|
|
702
709
|
|
|
710
|
+
const outputWithReminder = appendTmpScratchReminder(formatted, sourceCode);
|
|
711
|
+
|
|
703
712
|
return [
|
|
704
|
-
|
|
713
|
+
appendCodeSessionFileSummary(outputWithReminder, response.files),
|
|
705
714
|
{
|
|
706
715
|
session_id: response.session_id,
|
|
707
716
|
files: response.files,
|
|
@@ -859,7 +868,7 @@ export function createProgrammaticToolCallingTool(
|
|
|
859
868
|
// ====================================================================
|
|
860
869
|
|
|
861
870
|
if (response.status === 'completed') {
|
|
862
|
-
return formatCompletedResponse(response);
|
|
871
|
+
return formatCompletedResponse(response, code);
|
|
863
872
|
}
|
|
864
873
|
|
|
865
874
|
if (response.status === 'error') {
|
|
@@ -873,8 +882,12 @@ export function createProgrammaticToolCallingTool(
|
|
|
873
882
|
|
|
874
883
|
throw new Error(`Unexpected response status: ${response.status}`);
|
|
875
884
|
} catch (error) {
|
|
885
|
+
const messageWithReminder = appendFailedExecutionFileReminder(
|
|
886
|
+
(error as Error).message,
|
|
887
|
+
code
|
|
888
|
+
);
|
|
876
889
|
throw new Error(
|
|
877
|
-
`Programmatic execution failed: ${
|
|
890
|
+
`Programmatic execution failed: ${messageWithReminder}`
|
|
878
891
|
);
|
|
879
892
|
}
|
|
880
893
|
},
|