@librechat/agents 3.1.77 → 3.1.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/common/enum.cjs +54 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +155 -4
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/hooks/createWorkspacePolicyHook.cjs +291 -0
- package/dist/cjs/hooks/createWorkspacePolicyHook.cjs.map +1 -0
- package/dist/cjs/main.cjs +90 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/anthropicToolCache.cjs +102 -0
- package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -0
- package/dist/cjs/messages/prune.cjs +27 -0
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/messages/recency.cjs +99 -0
- package/dist/cjs/messages/recency.cjs.map +1 -0
- package/dist/cjs/run.cjs +30 -0
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +100 -6
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +635 -23
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/local/CompileCheckTool.cjs +227 -0
- package/dist/cjs/tools/local/CompileCheckTool.cjs.map +1 -0
- package/dist/cjs/tools/local/FileCheckpointer.cjs +90 -0
- package/dist/cjs/tools/local/FileCheckpointer.cjs.map +1 -0
- package/dist/cjs/tools/local/LocalCodingTools.cjs +1098 -0
- package/dist/cjs/tools/local/LocalCodingTools.cjs.map +1 -0
- package/dist/cjs/tools/local/LocalExecutionEngine.cjs +1042 -0
- package/dist/cjs/tools/local/LocalExecutionEngine.cjs.map +1 -0
- package/dist/cjs/tools/local/LocalExecutionTools.cjs +122 -0
- package/dist/cjs/tools/local/LocalExecutionTools.cjs.map +1 -0
- package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs +453 -0
- package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs.map +1 -0
- package/dist/cjs/tools/local/attachments.cjs +183 -0
- package/dist/cjs/tools/local/attachments.cjs.map +1 -0
- package/dist/cjs/tools/local/bashAst.cjs +129 -0
- package/dist/cjs/tools/local/bashAst.cjs.map +1 -0
- package/dist/cjs/tools/local/editStrategies.cjs +188 -0
- package/dist/cjs/tools/local/editStrategies.cjs.map +1 -0
- package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs +141 -0
- package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs.map +1 -0
- package/dist/cjs/tools/local/syntaxCheck.cjs +182 -0
- package/dist/cjs/tools/local/syntaxCheck.cjs.map +1 -0
- package/dist/cjs/tools/local/textEncoding.cjs +30 -0
- package/dist/cjs/tools/local/textEncoding.cjs.map +1 -0
- package/dist/cjs/tools/local/workspaceFS.cjs +51 -0
- package/dist/cjs/tools/local/workspaceFS.cjs.map +1 -0
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +31 -0
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/esm/common/enum.mjs +53 -1
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +156 -5
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/hooks/createWorkspacePolicyHook.mjs +289 -0
- package/dist/esm/hooks/createWorkspacePolicyHook.mjs.map +1 -0
- package/dist/esm/main.mjs +17 -2
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/anthropicToolCache.mjs +99 -0
- package/dist/esm/messages/anthropicToolCache.mjs.map +1 -0
- package/dist/esm/messages/prune.mjs +26 -1
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/messages/recency.mjs +97 -0
- package/dist/esm/messages/recency.mjs.map +1 -0
- package/dist/esm/run.mjs +30 -0
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +100 -6
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +635 -23
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/local/CompileCheckTool.mjs +223 -0
- package/dist/esm/tools/local/CompileCheckTool.mjs.map +1 -0
- package/dist/esm/tools/local/FileCheckpointer.mjs +87 -0
- package/dist/esm/tools/local/FileCheckpointer.mjs.map +1 -0
- package/dist/esm/tools/local/LocalCodingTools.mjs +1075 -0
- package/dist/esm/tools/local/LocalCodingTools.mjs.map +1 -0
- package/dist/esm/tools/local/LocalExecutionEngine.mjs +1022 -0
- package/dist/esm/tools/local/LocalExecutionEngine.mjs.map +1 -0
- package/dist/esm/tools/local/LocalExecutionTools.mjs +117 -0
- package/dist/esm/tools/local/LocalExecutionTools.mjs.map +1 -0
- package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs +448 -0
- package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs.map +1 -0
- package/dist/esm/tools/local/attachments.mjs +180 -0
- package/dist/esm/tools/local/attachments.mjs.map +1 -0
- package/dist/esm/tools/local/bashAst.mjs +126 -0
- package/dist/esm/tools/local/bashAst.mjs.map +1 -0
- package/dist/esm/tools/local/editStrategies.mjs +185 -0
- package/dist/esm/tools/local/editStrategies.mjs.map +1 -0
- package/dist/esm/tools/local/resolveLocalExecutionTools.mjs +137 -0
- package/dist/esm/tools/local/resolveLocalExecutionTools.mjs.map +1 -0
- package/dist/esm/tools/local/syntaxCheck.mjs +179 -0
- package/dist/esm/tools/local/syntaxCheck.mjs.map +1 -0
- package/dist/esm/tools/local/textEncoding.mjs +27 -0
- package/dist/esm/tools/local/textEncoding.mjs.map +1 -0
- package/dist/esm/tools/local/workspaceFS.mjs +49 -0
- package/dist/esm/tools/local/workspaceFS.mjs.map +1 -0
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +31 -0
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/types/common/enum.d.ts +39 -1
- package/dist/types/graphs/Graph.d.ts +34 -0
- package/dist/types/hooks/createWorkspacePolicyHook.d.ts +95 -0
- package/dist/types/hooks/index.d.ts +2 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/messages/anthropicToolCache.d.ts +51 -0
- package/dist/types/messages/index.d.ts +2 -0
- package/dist/types/messages/prune.d.ts +11 -0
- package/dist/types/messages/recency.d.ts +64 -0
- package/dist/types/run.d.ts +21 -0
- package/dist/types/tools/ToolNode.d.ts +145 -2
- package/dist/types/tools/local/CompileCheckTool.d.ts +31 -0
- package/dist/types/tools/local/FileCheckpointer.d.ts +39 -0
- package/dist/types/tools/local/LocalCodingTools.d.ts +57 -0
- package/dist/types/tools/local/LocalExecutionEngine.d.ts +149 -0
- package/dist/types/tools/local/LocalExecutionTools.d.ts +9 -0
- package/dist/types/tools/local/LocalProgrammaticToolCalling.d.ts +21 -0
- package/dist/types/tools/local/attachments.d.ts +84 -0
- package/dist/types/tools/local/bashAst.d.ts +11 -0
- package/dist/types/tools/local/editStrategies.d.ts +28 -0
- package/dist/types/tools/local/index.d.ts +12 -0
- package/dist/types/tools/local/resolveLocalExecutionTools.d.ts +38 -0
- package/dist/types/tools/local/syntaxCheck.d.ts +42 -0
- package/dist/types/tools/local/textEncoding.d.ts +21 -0
- package/dist/types/tools/local/workspaceFS.d.ts +49 -0
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +29 -0
- package/dist/types/types/hitl.d.ts +56 -27
- package/dist/types/types/run.d.ts +8 -1
- package/dist/types/types/summarize.d.ts +30 -0
- package/dist/types/types/tools.d.ts +341 -6
- package/package.json +21 -2
- package/src/common/enum.ts +54 -0
- package/src/graphs/Graph.ts +173 -6
- package/src/hooks/__tests__/compactHooks.test.ts +38 -2
- package/src/hooks/__tests__/createWorkspacePolicyHook.test.ts +393 -0
- package/src/hooks/createWorkspacePolicyHook.ts +355 -0
- package/src/hooks/index.ts +6 -0
- package/src/index.ts +1 -0
- package/src/messages/__tests__/anthropicToolCache.test.ts +125 -0
- package/src/messages/__tests__/recency.test.ts +267 -0
- package/src/messages/anthropicToolCache.ts +116 -0
- package/src/messages/index.ts +2 -0
- package/src/messages/prune.ts +27 -1
- package/src/messages/recency.ts +155 -0
- package/src/run.ts +31 -0
- package/src/scripts/compare_pi_vs_ours.ts +840 -0
- package/src/scripts/local_engine.ts +166 -0
- package/src/scripts/local_engine_checkpointer.ts +205 -0
- package/src/scripts/local_engine_compile.ts +263 -0
- package/src/scripts/local_engine_hooks.ts +226 -0
- package/src/scripts/local_engine_image.ts +201 -0
- package/src/scripts/local_engine_ptc.ts +151 -0
- package/src/scripts/local_engine_workspace.ts +258 -0
- package/src/scripts/subagent-configurable-inheritance.ts +252 -0
- package/src/scripts/summarization-recency.ts +462 -0
- package/src/specs/prune.test.ts +39 -0
- package/src/summarization/__tests__/node.test.ts +499 -3
- package/src/summarization/node.ts +124 -7
- package/src/tools/ToolNode.ts +769 -20
- package/src/tools/__tests__/LocalExecutionTools.test.ts +2647 -0
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +175 -0
- package/src/tools/__tests__/SubagentExecutor.test.ts +148 -0
- package/src/tools/__tests__/ToolNode.outputReferences.test.ts +114 -0
- package/src/tools/__tests__/ToolNode.session.test.ts +84 -0
- package/src/tools/__tests__/directToolHITLResumeScope.test.ts +467 -0
- package/src/tools/__tests__/directToolHooks.test.ts +411 -0
- package/src/tools/__tests__/localToolNames.test.ts +73 -0
- package/src/tools/__tests__/workspaceSeam.test.ts +134 -0
- package/src/tools/local/CompileCheckTool.ts +278 -0
- package/src/tools/local/FileCheckpointer.ts +93 -0
- package/src/tools/local/LocalCodingTools.ts +1342 -0
- package/src/tools/local/LocalExecutionEngine.ts +1329 -0
- package/src/tools/local/LocalExecutionTools.ts +167 -0
- package/src/tools/local/LocalProgrammaticToolCalling.ts +594 -0
- package/src/tools/local/__tests__/FileCheckpointer.test.ts +120 -0
- package/src/tools/local/__tests__/editStrategies.test.ts +134 -0
- package/src/tools/local/attachments.ts +251 -0
- package/src/tools/local/bashAst.ts +151 -0
- package/src/tools/local/editStrategies.ts +188 -0
- package/src/tools/local/index.ts +12 -0
- package/src/tools/local/resolveLocalExecutionTools.ts +208 -0
- package/src/tools/local/syntaxCheck.ts +243 -0
- package/src/tools/local/textEncoding.ts +37 -0
- package/src/tools/local/workspaceFS.ts +89 -0
- package/src/tools/subagent/SubagentExecutor.ts +60 -0
- package/src/types/hitl.ts +56 -27
- package/src/types/run.ts +12 -1
- package/src/types/summarize.ts +31 -0
- package/src/types/tools.ts +359 -7
|
@@ -0,0 +1,2647 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { tmpdir } from 'os';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
import { spawnSync } from 'child_process';
|
|
5
|
+
import {
|
|
6
|
+
mkdtemp,
|
|
7
|
+
rm,
|
|
8
|
+
symlink,
|
|
9
|
+
writeFile as fsWriteFile,
|
|
10
|
+
readFile as fsReadFile,
|
|
11
|
+
} from 'fs/promises';
|
|
12
|
+
import { tool } from '@langchain/core/tools';
|
|
13
|
+
import { AIMessage, ToolMessage } from '@langchain/core/messages';
|
|
14
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
15
|
+
import { describe, it, expect, afterEach, beforeEach, jest } from '@jest/globals';
|
|
16
|
+
import type { StructuredToolInterface } from '@langchain/core/tools';
|
|
17
|
+
import type * as t from '@/types';
|
|
18
|
+
import { Constants, Providers } from '@/common';
|
|
19
|
+
import { ToolNode } from '../ToolNode';
|
|
20
|
+
import {
|
|
21
|
+
executeLocalBash,
|
|
22
|
+
executeLocalCode,
|
|
23
|
+
validateBashCommand,
|
|
24
|
+
_resetLocalEngineWarningsForTests,
|
|
25
|
+
} from '../local/LocalExecutionEngine';
|
|
26
|
+
import { resolveLocalToolsForBinding } from '../local/resolveLocalExecutionTools';
|
|
27
|
+
import {
|
|
28
|
+
createLocalCodingToolBundle,
|
|
29
|
+
_resetRipgrepCacheForTests,
|
|
30
|
+
} from '../local/LocalCodingTools';
|
|
31
|
+
import {
|
|
32
|
+
runPostEditSyntaxCheck,
|
|
33
|
+
_resetSyntaxCheckProbeCacheForTests,
|
|
34
|
+
} from '../local/syntaxCheck';
|
|
35
|
+
import { createCompileCheckTool } from '../local/CompileCheckTool';
|
|
36
|
+
import { runBashAstChecks } from '../local/bashAst';
|
|
37
|
+
import { LocalFileCheckpointerImpl } from '../local/FileCheckpointer';
|
|
38
|
+
|
|
39
|
+
const hasPython3 = spawnSync('python3', ['--version']).status === 0;
|
|
40
|
+
|
|
41
|
+
const tempDirs: string[] = [];
|
|
42
|
+
|
|
43
|
+
async function createTempDir(): Promise<string> {
|
|
44
|
+
const dir = await mkdtemp(join(tmpdir(), 'lc-local-tools-'));
|
|
45
|
+
tempDirs.push(dir);
|
|
46
|
+
return dir;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function createRemoteBashStub(): StructuredToolInterface {
|
|
50
|
+
return tool(
|
|
51
|
+
async () => 'remote bash should not run',
|
|
52
|
+
{
|
|
53
|
+
name: Constants.BASH_TOOL,
|
|
54
|
+
description: 'Remote bash stub',
|
|
55
|
+
schema: z.object({ command: z.string() }),
|
|
56
|
+
}
|
|
57
|
+
) as unknown as StructuredToolInterface;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function messagesFromResult(
|
|
61
|
+
result: ToolMessage[] | { messages: ToolMessage[] }
|
|
62
|
+
): ToolMessage[] {
|
|
63
|
+
return Array.isArray(result) ? result : result.messages;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function aiMessageWithToolCall(
|
|
67
|
+
name: string,
|
|
68
|
+
args: Record<string, string | number | boolean>
|
|
69
|
+
): AIMessage {
|
|
70
|
+
return new AIMessage({
|
|
71
|
+
content: '',
|
|
72
|
+
tool_calls: [
|
|
73
|
+
{
|
|
74
|
+
id: `call_${name}`,
|
|
75
|
+
name,
|
|
76
|
+
args,
|
|
77
|
+
},
|
|
78
|
+
],
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
afterEach(async () => {
|
|
83
|
+
await Promise.all(
|
|
84
|
+
tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true }))
|
|
85
|
+
);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
describe('local execution tools', () => {
|
|
89
|
+
it('blocks clearly destructive bash commands by default', async () => {
|
|
90
|
+
const result = await validateBashCommand('rm -rf /');
|
|
91
|
+
|
|
92
|
+
expect(result.valid).toBe(false);
|
|
93
|
+
expect(result.errors.join('\n')).toContain('destructive command pattern');
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it('replaces a configured remote bash tool when local mode is enabled', async () => {
|
|
97
|
+
const cwd = await createTempDir();
|
|
98
|
+
const node = new ToolNode({
|
|
99
|
+
tools: [createRemoteBashStub()],
|
|
100
|
+
toolExecution: {
|
|
101
|
+
engine: 'local',
|
|
102
|
+
local: {
|
|
103
|
+
cwd,
|
|
104
|
+
includeCodingTools: false,
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
const result = await node.invoke({
|
|
110
|
+
messages: [
|
|
111
|
+
aiMessageWithToolCall(Constants.BASH_TOOL, {
|
|
112
|
+
command: 'printf local-mode',
|
|
113
|
+
}),
|
|
114
|
+
],
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
const [message] = messagesFromResult(result as { messages: ToolMessage[] });
|
|
118
|
+
expect(String(message.content)).toContain('local-mode');
|
|
119
|
+
expect(String(message.content)).not.toContain('remote bash should not run');
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('auto-binds the local coding suite in local mode', () => {
|
|
123
|
+
const tools = resolveLocalToolsForBinding({
|
|
124
|
+
toolExecution: { engine: 'local' },
|
|
125
|
+
}) as t.GenericTool[];
|
|
126
|
+
const names = tools.map((localTool) => localTool.name);
|
|
127
|
+
|
|
128
|
+
expect(names).toEqual(
|
|
129
|
+
expect.arrayContaining([
|
|
130
|
+
Constants.EXECUTE_CODE,
|
|
131
|
+
Constants.BASH_TOOL,
|
|
132
|
+
Constants.READ_FILE,
|
|
133
|
+
'write_file',
|
|
134
|
+
'edit_file',
|
|
135
|
+
'grep_search',
|
|
136
|
+
'glob_search',
|
|
137
|
+
'list_directory',
|
|
138
|
+
])
|
|
139
|
+
);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it('updates existing code tool bindings when auto-binding is disabled', () => {
|
|
143
|
+
const [bashTool] = resolveLocalToolsForBinding({
|
|
144
|
+
tools: [createRemoteBashStub()],
|
|
145
|
+
toolExecution: {
|
|
146
|
+
engine: 'local',
|
|
147
|
+
local: { includeCodingTools: false },
|
|
148
|
+
},
|
|
149
|
+
}) as t.GenericTool[];
|
|
150
|
+
|
|
151
|
+
expect(bashTool.name).toBe(Constants.BASH_TOOL);
|
|
152
|
+
expect(bashTool.description).toContain('local machine');
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it('can call local coding tools from local programmatic execution', async () => {
|
|
156
|
+
if (!hasPython3) {
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const cwd = await createTempDir();
|
|
161
|
+
const node = new ToolNode({
|
|
162
|
+
tools: [],
|
|
163
|
+
toolExecution: {
|
|
164
|
+
engine: 'local',
|
|
165
|
+
local: { cwd },
|
|
166
|
+
},
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
const result = await node.invoke({
|
|
170
|
+
messages: [
|
|
171
|
+
aiMessageWithToolCall(Constants.PROGRAMMATIC_TOOL_CALLING, {
|
|
172
|
+
lang: 'py',
|
|
173
|
+
code: [
|
|
174
|
+
'await write_file(file_path="ptc.txt", content="from local ptc")',
|
|
175
|
+
'contents = await read_file(file_path="ptc.txt")',
|
|
176
|
+
'print(contents)',
|
|
177
|
+
].join('\n'),
|
|
178
|
+
}),
|
|
179
|
+
],
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
const [message] = messagesFromResult(result as { messages: ToolMessage[] });
|
|
183
|
+
expect(String(message.content)).toContain('from local ptc');
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
it('can run bash orchestration through run_tools_with_code in local mode', async () => {
|
|
187
|
+
if (!hasPython3) {
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const cwd = await createTempDir();
|
|
192
|
+
const node = new ToolNode({
|
|
193
|
+
tools: [],
|
|
194
|
+
toolExecution: {
|
|
195
|
+
engine: 'local',
|
|
196
|
+
local: { cwd },
|
|
197
|
+
},
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
const result = await node.invoke({
|
|
201
|
+
messages: [
|
|
202
|
+
aiMessageWithToolCall(Constants.PROGRAMMATIC_TOOL_CALLING, {
|
|
203
|
+
code: [
|
|
204
|
+
'write_file \'{"file_path":"bash-ptc.txt","content":"from bash ptc"}\'',
|
|
205
|
+
'read_file \'{"file_path":"bash-ptc.txt"}\'',
|
|
206
|
+
].join('\n'),
|
|
207
|
+
}),
|
|
208
|
+
],
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
const [message] = messagesFromResult(result as { messages: ToolMessage[] });
|
|
212
|
+
expect(String(message.content)).toContain('from bash ptc');
|
|
213
|
+
});
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
describe('local engine bashAst', () => {
|
|
217
|
+
it('flags command substitution in auto mode', () => {
|
|
218
|
+
const findings = runBashAstChecks('echo $(whoami)', 'auto');
|
|
219
|
+
expect(findings.some((f) => f.code === 'cmd-subst-dollar-paren')).toBe(true);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it('escalates command substitution to deny in strict mode', () => {
|
|
223
|
+
const findings = runBashAstChecks('echo $(whoami)', 'strict');
|
|
224
|
+
const subst = findings.find((f) => f.code === 'cmd-subst-dollar-paren');
|
|
225
|
+
expect(subst?.severity).toBe('deny');
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
it('always denies /proc/<pid>/environ access', () => {
|
|
229
|
+
const findings = runBashAstChecks('cat /proc/1/environ', 'auto');
|
|
230
|
+
expect(findings.some((f) => f.code === 'proc-environ-read' && f.severity === 'deny')).toBe(true);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it('never produces findings when off', () => {
|
|
234
|
+
const findings = runBashAstChecks('echo $(whoami)', 'off');
|
|
235
|
+
expect(findings).toHaveLength(0);
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
it('blocks bash commands with a deny finding via validateBashCommand', async () => {
|
|
239
|
+
const result = await validateBashCommand('cat /proc/1/environ', {
|
|
240
|
+
bashAst: 'auto',
|
|
241
|
+
});
|
|
242
|
+
expect(result.valid).toBe(false);
|
|
243
|
+
expect(result.errors.join('\n')).toContain('proc-environ-read');
|
|
244
|
+
});
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
describe('local engine sandbox-off warning', () => {
|
|
248
|
+
let warnSpy: jest.SpiedFunction<typeof console.warn>;
|
|
249
|
+
|
|
250
|
+
beforeEach(() => {
|
|
251
|
+
_resetLocalEngineWarningsForTests();
|
|
252
|
+
warnSpy = jest.spyOn(console, 'warn').mockImplementation(() => undefined);
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
afterEach(() => {
|
|
256
|
+
warnSpy.mockRestore();
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
it('warns once when running without sandbox', async () => {
|
|
260
|
+
// Real (non-internal) executions should warn; the internal
|
|
261
|
+
// `bash -n` syntax preflight inside validateBashCommand opts out
|
|
262
|
+
// (Codex P2 — otherwise the latch would flip on a probe and hide
|
|
263
|
+
// the warning when a genuinely-unsandboxed command later runs).
|
|
264
|
+
await executeLocalBash('echo hi');
|
|
265
|
+
await executeLocalBash('echo bye');
|
|
266
|
+
const sandboxOffMessages = warnSpy.mock.calls.filter((call) =>
|
|
267
|
+
String(call[0]).includes('without @anthropic-ai/sandbox-runtime')
|
|
268
|
+
);
|
|
269
|
+
expect(sandboxOffMessages).toHaveLength(1);
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
it('does NOT warn for internal probes when the run actually has sandbox enabled (Codex P2)', async () => {
|
|
273
|
+
// Pre-fix: validateBashCommand's bash -n preflight (which forces
|
|
274
|
+
// sandbox: false for itself, since you can't sandbox a syntax
|
|
275
|
+
// probe) would emit a misleading "sandbox is off" warning AND
|
|
276
|
+
// flip `sandboxOffWarned = true` even when the run had
|
|
277
|
+
// `sandbox.enabled: true` — hiding the warning when a real
|
|
278
|
+
// unsandboxed execution later happened. With the fix internal
|
|
279
|
+
// probes pass `{ internal: true }` to spawnLocalProcess and
|
|
280
|
+
// suppress both the message and the latch.
|
|
281
|
+
await validateBashCommand('echo hi', { sandbox: { enabled: true } });
|
|
282
|
+
const sandboxOffMessages = warnSpy.mock.calls.filter((call) =>
|
|
283
|
+
String(call[0]).includes('without @anthropic-ai/sandbox-runtime')
|
|
284
|
+
);
|
|
285
|
+
expect(sandboxOffMessages).toHaveLength(0);
|
|
286
|
+
});
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
describe('LocalFileCheckpointer', () => {
|
|
290
|
+
it('snapshots and restores existing files', async () => {
|
|
291
|
+
const dir = await createTempDir();
|
|
292
|
+
const file = join(dir, 'a.txt');
|
|
293
|
+
await fsWriteFile(file, 'original', 'utf8');
|
|
294
|
+
|
|
295
|
+
const cp = new LocalFileCheckpointerImpl();
|
|
296
|
+
await cp.captureBeforeWrite(file);
|
|
297
|
+
|
|
298
|
+
await fsWriteFile(file, 'modified', 'utf8');
|
|
299
|
+
expect(await fsReadFile(file, 'utf8')).toBe('modified');
|
|
300
|
+
|
|
301
|
+
const restored = await cp.rewind();
|
|
302
|
+
expect(restored).toBe(1);
|
|
303
|
+
expect(await fsReadFile(file, 'utf8')).toBe('original');
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
it('deletes files that did not exist before the run', async () => {
|
|
307
|
+
const dir = await createTempDir();
|
|
308
|
+
const file = join(dir, 'new.txt');
|
|
309
|
+
|
|
310
|
+
const cp = new LocalFileCheckpointerImpl();
|
|
311
|
+
await cp.captureBeforeWrite(file);
|
|
312
|
+
await fsWriteFile(file, 'should-be-removed', 'utf8');
|
|
313
|
+
|
|
314
|
+
await cp.rewind();
|
|
315
|
+
await expect(fsReadFile(file, 'utf8')).rejects.toThrow();
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
it('rewinds tools created via createLocalCodingToolBundle', async () => {
|
|
319
|
+
const cwd = await createTempDir();
|
|
320
|
+
const bundle = createLocalCodingToolBundle({
|
|
321
|
+
cwd,
|
|
322
|
+
fileCheckpointing: true,
|
|
323
|
+
});
|
|
324
|
+
expect(bundle.checkpointer).toBeDefined();
|
|
325
|
+
|
|
326
|
+
const writeTool = bundle.tools.find((tool_) => tool_.name === 'write_file');
|
|
327
|
+
expect(writeTool).toBeDefined();
|
|
328
|
+
await writeTool!.invoke({ file_path: 'cp.txt', content: 'first' });
|
|
329
|
+
await writeTool!.invoke({ file_path: 'cp.txt', content: 'second' });
|
|
330
|
+
|
|
331
|
+
const restored = await bundle.checkpointer!.rewind();
|
|
332
|
+
expect(restored).toBe(1);
|
|
333
|
+
await expect(fsReadFile(join(cwd, 'cp.txt'), 'utf8')).rejects.toThrow();
|
|
334
|
+
});
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
describe('local read tool guards', () => {
|
|
338
|
+
it('refuses to read files containing NUL bytes', async () => {
|
|
339
|
+
const cwd = await createTempDir();
|
|
340
|
+
const binary = join(cwd, 'binary.bin');
|
|
341
|
+
await fsWriteFile(binary, Buffer.from([0x00, 0x01, 0x02]));
|
|
342
|
+
|
|
343
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
344
|
+
const readTool = bundle.tools.find((t_) => t_.name === Constants.READ_FILE);
|
|
345
|
+
const result = await readTool!.invoke({ file_path: 'binary.bin' });
|
|
346
|
+
expect(String(result)).toContain('binary file');
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
it('returns a stub instead of OOMing on huge files', async () => {
|
|
350
|
+
const cwd = await createTempDir();
|
|
351
|
+
const big = join(cwd, 'big.txt');
|
|
352
|
+
await fsWriteFile(big, 'x'.repeat(2048));
|
|
353
|
+
|
|
354
|
+
const bundle = createLocalCodingToolBundle({
|
|
355
|
+
cwd,
|
|
356
|
+
maxReadBytes: 1024,
|
|
357
|
+
});
|
|
358
|
+
const readTool = bundle.tools.find((t_) => t_.name === Constants.READ_FILE);
|
|
359
|
+
const result = await readTool!.invoke({ file_path: 'big.txt' });
|
|
360
|
+
expect(String(result)).toContain('exceeds the 1024-byte read cap');
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
it('rejects symlink escapes', async () => {
|
|
364
|
+
const cwd = await createTempDir();
|
|
365
|
+
const outside = await createTempDir();
|
|
366
|
+
const secret = join(outside, 'secret.txt');
|
|
367
|
+
await fsWriteFile(secret, 'top-secret', 'utf8');
|
|
368
|
+
await symlink(outside, join(cwd, 'escape'));
|
|
369
|
+
|
|
370
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
371
|
+
const readTool = bundle.tools.find((t_) => t_.name === Constants.READ_FILE);
|
|
372
|
+
await expect(
|
|
373
|
+
readTool!.invoke({ file_path: 'escape/secret.txt' })
|
|
374
|
+
).rejects.toThrow(/symlink escape/);
|
|
375
|
+
});
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
describe('local programmatic bridge auth', () => {
|
|
379
|
+
it('rejects unauthenticated requests to the local bridge', async () => {
|
|
380
|
+
if (!hasPython3) {
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
383
|
+
const cwd = await createTempDir();
|
|
384
|
+
const node = new ToolNode({
|
|
385
|
+
tools: [],
|
|
386
|
+
toolExecution: {
|
|
387
|
+
engine: 'local',
|
|
388
|
+
local: { cwd },
|
|
389
|
+
},
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
const result = await node.invoke({
|
|
393
|
+
messages: [
|
|
394
|
+
aiMessageWithToolCall(Constants.PROGRAMMATIC_TOOL_CALLING, {
|
|
395
|
+
lang: 'py',
|
|
396
|
+
code: [
|
|
397
|
+
'import os, json, urllib.request, urllib.error',
|
|
398
|
+
'url = os.environ["BRIDGE_PROBE_URL"] if "BRIDGE_PROBE_URL" in os.environ else __LIBRECHAT_TOOL_BRIDGE',
|
|
399
|
+
'body = json.dumps({"name":"read_file","input":{"file_path":"x"}}).encode("utf-8")',
|
|
400
|
+
'try:',
|
|
401
|
+
' req = urllib.request.Request(url, data=body, headers={"Content-Type":"application/json"}, method="POST")',
|
|
402
|
+
' urllib.request.urlopen(req, timeout=5)',
|
|
403
|
+
' print("LEAK")',
|
|
404
|
+
'except urllib.error.HTTPError as e:',
|
|
405
|
+
' print(f"AUTH={e.code}")',
|
|
406
|
+
].join('\n'),
|
|
407
|
+
}),
|
|
408
|
+
],
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
const [message] = messagesFromResult(result as { messages: ToolMessage[] });
|
|
412
|
+
expect(String(message.content)).toContain('AUTH=401');
|
|
413
|
+
expect(String(message.content)).not.toContain('LEAK');
|
|
414
|
+
});
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
describe('local edit fuzzy matching', () => {
|
|
418
|
+
it('falls back to line-trimmed when trailing whitespace differs', async () => {
|
|
419
|
+
const cwd = await createTempDir();
|
|
420
|
+
const file = join(cwd, 'a.ts');
|
|
421
|
+
// Real file has trailing whitespace on every line.
|
|
422
|
+
await fsWriteFile(
|
|
423
|
+
file,
|
|
424
|
+
'function greet(name: string) { \n return `Hello, ${name}!`; \n}\n',
|
|
425
|
+
'utf8'
|
|
426
|
+
);
|
|
427
|
+
|
|
428
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
429
|
+
const editTool = bundle.tools.find((tt) => tt.name === 'edit_file');
|
|
430
|
+
const result = await editTool!.invoke({
|
|
431
|
+
file_path: 'a.ts',
|
|
432
|
+
// LLM emits a trailing-whitespace-stripped version.
|
|
433
|
+
old_text:
|
|
434
|
+
'function greet(name: string) {\n return `Hello, ${name}!`;\n}',
|
|
435
|
+
new_text:
|
|
436
|
+
'function greet(name: string) {\n return `Hi, ${name}!`;\n}',
|
|
437
|
+
});
|
|
438
|
+
expect(String(result)).toContain('strategies: line-trimmed');
|
|
439
|
+
const after = await fsReadFile(file, 'utf8');
|
|
440
|
+
expect(after).toContain('Hi, ${name}!');
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
it('falls back to indentation-flexible when LLM strips leading indent', async () => {
|
|
444
|
+
const cwd = await createTempDir();
|
|
445
|
+
const file = join(cwd, 'a.ts');
|
|
446
|
+
await fsWriteFile(
|
|
447
|
+
file,
|
|
448
|
+
'class Foo {\n method() {\n return 1;\n }\n}\n',
|
|
449
|
+
'utf8'
|
|
450
|
+
);
|
|
451
|
+
|
|
452
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
453
|
+
const editTool = bundle.tools.find((tt) => tt.name === 'edit_file');
|
|
454
|
+
const result = await editTool!.invoke({
|
|
455
|
+
file_path: 'a.ts',
|
|
456
|
+
// LLM stripped the 4-space indent
|
|
457
|
+
old_text: 'method() {\n return 1;\n}',
|
|
458
|
+
new_text: 'method() {\n return 42;\n}',
|
|
459
|
+
});
|
|
460
|
+
expect(String(result)).toMatch(
|
|
461
|
+
/strategies: (indentation-flexible|whitespace-normalized)/
|
|
462
|
+
);
|
|
463
|
+
const after = await fsReadFile(file, 'utf8');
|
|
464
|
+
expect(after).toContain('return 42;');
|
|
465
|
+
});
|
|
466
|
+
|
|
467
|
+
it('returns a unified diff in the tool result', async () => {
|
|
468
|
+
const cwd = await createTempDir();
|
|
469
|
+
const file = join(cwd, 'a.txt');
|
|
470
|
+
await fsWriteFile(file, 'first\nsecond\nthird\n', 'utf8');
|
|
471
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
472
|
+
const editTool = bundle.tools.find((tt) => tt.name === 'edit_file');
|
|
473
|
+
const result = await editTool!.invoke({
|
|
474
|
+
file_path: 'a.txt',
|
|
475
|
+
old_text: 'second',
|
|
476
|
+
new_text: 'SECOND',
|
|
477
|
+
});
|
|
478
|
+
const text = String(result);
|
|
479
|
+
expect(text).toContain('Diff:');
|
|
480
|
+
expect(text).toContain('-second');
|
|
481
|
+
expect(text).toContain('+SECOND');
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
it('preserves CRLF line endings on edit', async () => {
|
|
485
|
+
const cwd = await createTempDir();
|
|
486
|
+
const file = join(cwd, 'a.txt');
|
|
487
|
+
await fsWriteFile(file, 'one\r\ntwo\r\nthree\r\n', 'utf8');
|
|
488
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
489
|
+
const editTool = bundle.tools.find((tt) => tt.name === 'edit_file');
|
|
490
|
+
await editTool!.invoke({
|
|
491
|
+
file_path: 'a.txt',
|
|
492
|
+
old_text: 'two',
|
|
493
|
+
new_text: 'TWO',
|
|
494
|
+
});
|
|
495
|
+
const raw = await fsReadFile(file, 'utf8');
|
|
496
|
+
expect(raw).toBe('one\r\nTWO\r\nthree\r\n');
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
it('preserves UTF-8 BOM on overwrite', async () => {
|
|
500
|
+
const cwd = await createTempDir();
|
|
501
|
+
const file = join(cwd, 'a.txt');
|
|
502
|
+
const BOM = '';
|
|
503
|
+
await fsWriteFile(file, BOM + 'hello\n', 'utf8');
|
|
504
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
505
|
+
const writeTool = bundle.tools.find((tt) => tt.name === 'write_file');
|
|
506
|
+
await writeTool!.invoke({ file_path: 'a.txt', content: 'goodbye\n' });
|
|
507
|
+
const raw = await fsReadFile(file, 'utf8');
|
|
508
|
+
expect(raw.startsWith(BOM)).toBe(true);
|
|
509
|
+
expect(raw.slice(1)).toBe('goodbye\n');
|
|
510
|
+
});
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
describe('local read attachments', () => {
|
|
514
|
+
// Smallest valid 1x1 PNG.
|
|
515
|
+
const tinyPng = Buffer.from(
|
|
516
|
+
'89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4890000000a49444154789c63000100000005000165be7e6e0000000049454e44ae426082',
|
|
517
|
+
'hex'
|
|
518
|
+
);
|
|
519
|
+
|
|
520
|
+
it('returns binary stub by default', async () => {
|
|
521
|
+
const cwd = await createTempDir();
|
|
522
|
+
const file = join(cwd, 'tiny.png');
|
|
523
|
+
await fsWriteFile(file, tinyPng);
|
|
524
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
525
|
+
const readTool = bundle.tools.find((tt) => tt.name === Constants.READ_FILE);
|
|
526
|
+
const result = await readTool!.invoke({ file_path: 'tiny.png' });
|
|
527
|
+
expect(String(result)).toContain('binary file');
|
|
528
|
+
});
|
|
529
|
+
|
|
530
|
+
it('returns an image_url content block when attachReadAttachments=images-only', async () => {
|
|
531
|
+
const cwd = await createTempDir();
|
|
532
|
+
const file = join(cwd, 'tiny.png');
|
|
533
|
+
await fsWriteFile(file, tinyPng);
|
|
534
|
+
|
|
535
|
+
const bundle = createLocalCodingToolBundle({
|
|
536
|
+
cwd,
|
|
537
|
+
attachReadAttachments: 'images-only',
|
|
538
|
+
});
|
|
539
|
+
const readTool = bundle.tools.find((tt) => tt.name === Constants.READ_FILE);
|
|
540
|
+
// Invoking via a tool_call envelope (rather than raw args) is what
|
|
541
|
+
// makes the LangChain tool wrap the result as a ToolMessage with
|
|
542
|
+
// `.content` and `.artifact` populated.
|
|
543
|
+
const message = (await readTool!.invoke({
|
|
544
|
+
id: 'call_image',
|
|
545
|
+
name: Constants.READ_FILE,
|
|
546
|
+
args: { file_path: 'tiny.png' },
|
|
547
|
+
type: 'tool_call',
|
|
548
|
+
})) as { content: unknown; artifact: unknown };
|
|
549
|
+
expect(Array.isArray(message.content)).toBe(true);
|
|
550
|
+
const blocks = message.content as Array<{
|
|
551
|
+
type: string;
|
|
552
|
+
image_url?: { url: string };
|
|
553
|
+
}>;
|
|
554
|
+
const imageBlock = blocks.find((b) => b.type === 'image_url');
|
|
555
|
+
expect(imageBlock?.image_url?.url).toMatch(/^data:image\/png;base64,/);
|
|
556
|
+
expect(blocks.find((b) => b.type === 'text')).toBeDefined();
|
|
557
|
+
expect(message.artifact).toMatchObject({
|
|
558
|
+
mime: 'image/png',
|
|
559
|
+
attachment: 'image',
|
|
560
|
+
});
|
|
561
|
+
});
|
|
562
|
+
|
|
563
|
+
it('refuses oversize images even when embedding is on', async () => {
|
|
564
|
+
const cwd = await createTempDir();
|
|
565
|
+
const file = join(cwd, 'big.png');
|
|
566
|
+
// Forge a "PNG" larger than the cap. It will sniff as a generic
|
|
567
|
+
// binary; classifyAttachment returns 'binary' since file-type
|
|
568
|
+
// won't recognise the bytes — that's fine, we just want to
|
|
569
|
+
// verify the oversize gate is reachable. So instead, build a
|
|
570
|
+
// real big PNG by concatenating chunks with a fake IDAT.
|
|
571
|
+
// Easier: keep the tiny PNG header but pad to 200 bytes; cap to 100.
|
|
572
|
+
const padded = Buffer.concat([
|
|
573
|
+
tinyPng,
|
|
574
|
+
Buffer.alloc(200 - tinyPng.length, 0),
|
|
575
|
+
]);
|
|
576
|
+
await fsWriteFile(file, padded);
|
|
577
|
+
const bundle = createLocalCodingToolBundle({
|
|
578
|
+
cwd,
|
|
579
|
+
attachReadAttachments: 'images-only',
|
|
580
|
+
maxAttachmentBytes: 100,
|
|
581
|
+
});
|
|
582
|
+
const readTool = bundle.tools.find((tt) => tt.name === Constants.READ_FILE);
|
|
583
|
+
const result = await readTool!.invoke({ file_path: 'big.png' });
|
|
584
|
+
expect(String(result)).toMatch(/Refusing to embed/);
|
|
585
|
+
});
|
|
586
|
+
|
|
587
|
+
it('still reads text files normally when embedding is on', async () => {
|
|
588
|
+
const cwd = await createTempDir();
|
|
589
|
+
const file = join(cwd, 'a.txt');
|
|
590
|
+
await fsWriteFile(file, 'hello world\n', 'utf8');
|
|
591
|
+
const bundle = createLocalCodingToolBundle({
|
|
592
|
+
cwd,
|
|
593
|
+
attachReadAttachments: 'images-only',
|
|
594
|
+
});
|
|
595
|
+
const readTool = bundle.tools.find((tt) => tt.name === Constants.READ_FILE);
|
|
596
|
+
const result = await readTool!.invoke({ file_path: 'a.txt' });
|
|
597
|
+
expect(String(result)).toContain('hello world');
|
|
598
|
+
});
|
|
599
|
+
});
|
|
600
|
+
|
|
601
|
+
describe('post-edit syntax check', () => {
|
|
602
|
+
beforeEach(() => {
|
|
603
|
+
_resetSyntaxCheckProbeCacheForTests();
|
|
604
|
+
});
|
|
605
|
+
|
|
606
|
+
it('flags broken JS via node --check', async () => {
|
|
607
|
+
const cwd = await createTempDir();
|
|
608
|
+
const file = join(cwd, 'broken.js');
|
|
609
|
+
await fsWriteFile(file, 'function (\n', 'utf8');
|
|
610
|
+
const outcome = await runPostEditSyntaxCheck(file, {});
|
|
611
|
+
expect(outcome).not.toBeNull();
|
|
612
|
+
expect(outcome!.ok).toBe(false);
|
|
613
|
+
if (outcome!.ok === false) {
|
|
614
|
+
expect(outcome!.checker).toBe('node --check');
|
|
615
|
+
expect(outcome!.output.length).toBeGreaterThan(0);
|
|
616
|
+
}
|
|
617
|
+
});
|
|
618
|
+
|
|
619
|
+
it('passes valid JS', async () => {
|
|
620
|
+
const cwd = await createTempDir();
|
|
621
|
+
const file = join(cwd, 'good.js');
|
|
622
|
+
await fsWriteFile(file, 'console.log(1)\n', 'utf8');
|
|
623
|
+
const outcome = await runPostEditSyntaxCheck(file, {});
|
|
624
|
+
expect(outcome?.ok).toBe(true);
|
|
625
|
+
});
|
|
626
|
+
|
|
627
|
+
it('flags broken JSON via JSON.parse', async () => {
|
|
628
|
+
const cwd = await createTempDir();
|
|
629
|
+
const file = join(cwd, 'broken.json');
|
|
630
|
+
await fsWriteFile(file, '{ "x": ', 'utf8');
|
|
631
|
+
const outcome = await runPostEditSyntaxCheck(file, {});
|
|
632
|
+
expect(outcome?.ok).toBe(false);
|
|
633
|
+
if (outcome!.ok === false) {
|
|
634
|
+
expect(outcome!.checker).toBe('JSON.parse');
|
|
635
|
+
}
|
|
636
|
+
});
|
|
637
|
+
|
|
638
|
+
it('returns null for unknown extensions', async () => {
|
|
639
|
+
const cwd = await createTempDir();
|
|
640
|
+
const file = join(cwd, 'random.xyz');
|
|
641
|
+
await fsWriteFile(file, 'whatever\n', 'utf8');
|
|
642
|
+
const outcome = await runPostEditSyntaxCheck(file, {});
|
|
643
|
+
expect(outcome).toBeNull();
|
|
644
|
+
});
|
|
645
|
+
|
|
646
|
+
it('write_file appends syntax-check warning when postEditSyntaxCheck=auto', async () => {
|
|
647
|
+
const cwd = await createTempDir();
|
|
648
|
+
const bundle = createLocalCodingToolBundle({
|
|
649
|
+
cwd,
|
|
650
|
+
postEditSyntaxCheck: 'auto',
|
|
651
|
+
});
|
|
652
|
+
const writeTool = bundle.tools.find((tt) => tt.name === 'write_file');
|
|
653
|
+
const message = (await writeTool!.invoke({
|
|
654
|
+
id: 'call_w',
|
|
655
|
+
name: 'write_file',
|
|
656
|
+
args: { file_path: 'broken.js', content: 'function (\n' },
|
|
657
|
+
type: 'tool_call',
|
|
658
|
+
})) as { content: string; artifact: { syntax_error?: string } };
|
|
659
|
+
expect(message.content).toContain('[syntax-check warning');
|
|
660
|
+
expect(message.artifact.syntax_error).toBe('node --check');
|
|
661
|
+
});
|
|
662
|
+
|
|
663
|
+
it('write_file in strict mode throws on syntax error', async () => {
|
|
664
|
+
const cwd = await createTempDir();
|
|
665
|
+
const bundle = createLocalCodingToolBundle({
|
|
666
|
+
cwd,
|
|
667
|
+
postEditSyntaxCheck: 'strict',
|
|
668
|
+
});
|
|
669
|
+
const writeTool = bundle.tools.find((tt) => tt.name === 'write_file');
|
|
670
|
+
await expect(
|
|
671
|
+
writeTool!.invoke({
|
|
672
|
+
id: 'call_w',
|
|
673
|
+
name: 'write_file',
|
|
674
|
+
args: { file_path: 'broken.js', content: 'function (\n' },
|
|
675
|
+
type: 'tool_call',
|
|
676
|
+
})
|
|
677
|
+
).rejects.toThrow(/syntax check failed/);
|
|
678
|
+
});
|
|
679
|
+
});
|
|
680
|
+
|
|
681
|
+
describe('compile_check', () => {
|
|
682
|
+
it('reports "no recognised project marker" when there are none', async () => {
|
|
683
|
+
const cwd = await createTempDir();
|
|
684
|
+
const checkTool = createCompileCheckTool({ cwd });
|
|
685
|
+
const message = (await checkTool.invoke({
|
|
686
|
+
id: 'call_c',
|
|
687
|
+
name: 'compile_check',
|
|
688
|
+
args: {},
|
|
689
|
+
type: 'tool_call',
|
|
690
|
+
})) as { content: string; artifact: { ran: boolean; kind: string } };
|
|
691
|
+
expect(message.content).toContain('no recognised project marker');
|
|
692
|
+
expect(message.artifact.ran).toBe(false);
|
|
693
|
+
expect(message.artifact.kind).toBe('unknown');
|
|
694
|
+
});
|
|
695
|
+
|
|
696
|
+
it('honours an explicit command override and reports exit code', async () => {
|
|
697
|
+
const cwd = await createTempDir();
|
|
698
|
+
const checkTool = createCompileCheckTool({ cwd });
|
|
699
|
+
const message = (await checkTool.invoke({
|
|
700
|
+
id: 'call_c2',
|
|
701
|
+
name: 'compile_check',
|
|
702
|
+
args: { command: 'echo hello && false' },
|
|
703
|
+
type: 'tool_call',
|
|
704
|
+
})) as { content: string; artifact: { passed: boolean; exit_code: number | null } };
|
|
705
|
+
expect(message.content).toContain('FAILED');
|
|
706
|
+
expect(message.content).toContain('hello');
|
|
707
|
+
expect(message.artifact.passed).toBe(false);
|
|
708
|
+
expect(message.artifact.exit_code).not.toBe(0);
|
|
709
|
+
});
|
|
710
|
+
});
|
|
711
|
+
|
|
712
|
+
describe('local search fallback', () => {
|
|
713
|
+
beforeEach(() => {
|
|
714
|
+
_resetRipgrepCacheForTests();
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
it('finds matches via the Node fallback when ripgrep is missing', async () => {
|
|
718
|
+
const cwd = await createTempDir();
|
|
719
|
+
await fsWriteFile(join(cwd, 'a.ts'), 'const needle = 42;\n', 'utf8');
|
|
720
|
+
await fsWriteFile(join(cwd, 'b.ts'), 'const haystack = 1;\n', 'utf8');
|
|
721
|
+
|
|
722
|
+
const bundle = createLocalCodingToolBundle({
|
|
723
|
+
cwd,
|
|
724
|
+
env: { PATH: '/nonexistent' },
|
|
725
|
+
});
|
|
726
|
+
const grepTool = bundle.tools.find((t_) => t_.name === 'grep_search');
|
|
727
|
+
const result = await grepTool!.invoke({ pattern: 'needle' });
|
|
728
|
+
expect(String(result)).toContain('a.ts');
|
|
729
|
+
expect(String(result)).toContain('needle');
|
|
730
|
+
});
|
|
731
|
+
});
|
|
732
|
+
|
|
733
|
+
describe('codex review fixes', () => {
|
|
734
|
+
describe('executeLocalCode bash args (Codex P2 #1)', () => {
|
|
735
|
+
it('passes input.args as positional shell parameters when lang is bash', async () => {
|
|
736
|
+
const cwd = await createTempDir();
|
|
737
|
+
const result = await executeLocalCode(
|
|
738
|
+
{
|
|
739
|
+
lang: 'bash',
|
|
740
|
+
// Echo every positional arg space-separated. With the bug,
|
|
741
|
+
// $@ is empty because args were dropped.
|
|
742
|
+
code: 'echo "args:$@"',
|
|
743
|
+
args: ['hello', 'world'],
|
|
744
|
+
},
|
|
745
|
+
{ cwd }
|
|
746
|
+
);
|
|
747
|
+
expect(result.exitCode).toBe(0);
|
|
748
|
+
expect(result.stdout.trim()).toBe('args:hello world');
|
|
749
|
+
});
|
|
750
|
+
|
|
751
|
+
it('still works when lang is bash and args is missing', async () => {
|
|
752
|
+
const cwd = await createTempDir();
|
|
753
|
+
const result = await executeLocalCode(
|
|
754
|
+
{ lang: 'bash', code: 'echo plain' },
|
|
755
|
+
{ cwd }
|
|
756
|
+
);
|
|
757
|
+
expect(result.exitCode).toBe(0);
|
|
758
|
+
expect(result.stdout.trim()).toBe('plain');
|
|
759
|
+
});
|
|
760
|
+
});
|
|
761
|
+
|
|
762
|
+
describe('ripgrep cache backend scope (Codex P2 #2)', () => {
|
|
763
|
+
it('does not bleed an "rg available" verdict from one backend to another', async () => {
|
|
764
|
+
// Backend A: pretends rg works (returns a fake spawn whose
|
|
765
|
+
// process exits 0 on every call). The cache should record true
|
|
766
|
+
// for THIS backend.
|
|
767
|
+
const okBackend = jest.fn((cmd: string, _args: string[], _opts: unknown) => {
|
|
768
|
+
const ok = require('child_process').spawn('echo', [cmd]);
|
|
769
|
+
return ok;
|
|
770
|
+
}) as unknown as t.LocalSpawn;
|
|
771
|
+
// Backend B: pretends rg does not exist (returns a child that
|
|
772
|
+
// exits 127, the "command not found" code).
|
|
773
|
+
const missingBackend = jest.fn(
|
|
774
|
+
(_cmd: string, _args: string[], _opts: unknown) => {
|
|
775
|
+
const child = require('child_process').spawn(
|
|
776
|
+
'sh',
|
|
777
|
+
['-c', 'exit 127']
|
|
778
|
+
);
|
|
779
|
+
return child;
|
|
780
|
+
}
|
|
781
|
+
) as unknown as t.LocalSpawn;
|
|
782
|
+
|
|
783
|
+
_resetRipgrepCacheForTests();
|
|
784
|
+
|
|
785
|
+
// Build two bundles with distinct backends.
|
|
786
|
+
const cwdA = await createTempDir();
|
|
787
|
+
const cwdB = await createTempDir();
|
|
788
|
+
await fsWriteFile(join(cwdA, 'a.ts'), 'needle\n', 'utf8');
|
|
789
|
+
await fsWriteFile(join(cwdB, 'b.ts'), 'needle\n', 'utf8');
|
|
790
|
+
|
|
791
|
+
const bundleA = createLocalCodingToolBundle({
|
|
792
|
+
cwd: cwdA,
|
|
793
|
+
exec: { spawn: okBackend },
|
|
794
|
+
});
|
|
795
|
+
const bundleB = createLocalCodingToolBundle({
|
|
796
|
+
cwd: cwdB,
|
|
797
|
+
exec: { spawn: missingBackend },
|
|
798
|
+
});
|
|
799
|
+
|
|
800
|
+
// Run grep against A first — populates cache for A's backend.
|
|
801
|
+
await bundleA.tools.find((t_) => t_.name === 'grep_search')!.invoke({
|
|
802
|
+
pattern: 'needle',
|
|
803
|
+
});
|
|
804
|
+
// Run grep against B — must NOT see cached "true" from A's
|
|
805
|
+
// backend. With the bug, B would try to spawn rg, fail, and
|
|
806
|
+
// throw instead of falling back to the Node walker.
|
|
807
|
+
const bResult = await bundleB.tools
|
|
808
|
+
.find((t_) => t_.name === 'grep_search')!
|
|
809
|
+
.invoke({ pattern: 'needle' });
|
|
810
|
+
expect(String(bResult)).toContain('needle');
|
|
811
|
+
});
|
|
812
|
+
});
|
|
813
|
+
|
|
814
|
+
describe('additionalRoots resolved against workspace root (Codex P2 #3)', () => {
|
|
815
|
+
it('treats relative additionalRoots as siblings of root, not of process.cwd', async () => {
|
|
816
|
+
const parent = await createTempDir();
|
|
817
|
+
const fs = await import('fs/promises');
|
|
818
|
+
await fs.mkdir(join(parent, 'app'), { recursive: true });
|
|
819
|
+
await fs.mkdir(join(parent, 'shared'), { recursive: true });
|
|
820
|
+
await fsWriteFile(join(parent, 'shared/lib.ts'), 'X\n', 'utf8');
|
|
821
|
+
|
|
822
|
+
const bundle = createLocalCodingToolBundle({
|
|
823
|
+
workspace: {
|
|
824
|
+
root: join(parent, 'app'),
|
|
825
|
+
additionalRoots: ['../shared'],
|
|
826
|
+
},
|
|
827
|
+
});
|
|
828
|
+
const readTool = bundle.tools.find((t_) => t_.name === Constants.READ_FILE);
|
|
829
|
+
// Without the fix, '../shared/lib.ts' would resolve relative to
|
|
830
|
+
// process.cwd (this test runner), miss the boundary check, and
|
|
831
|
+
// throw "Path is outside the local workspace".
|
|
832
|
+
const result = await readTool!.invoke({
|
|
833
|
+
id: 'c',
|
|
834
|
+
name: Constants.READ_FILE,
|
|
835
|
+
args: { file_path: join(parent, 'shared/lib.ts') },
|
|
836
|
+
type: 'tool_call',
|
|
837
|
+
});
|
|
838
|
+
expect(JSON.stringify(result)).toContain('X');
|
|
839
|
+
});
|
|
840
|
+
});
|
|
841
|
+
});
|
|
842
|
+
|
|
843
|
+
describe('codex review fixes (round 2)', () => {
|
|
844
|
+
describe('streaming output cap (Codex P1)', () => {
|
|
845
|
+
const { spawnLocalProcess, _resetLocalEngineWarningsForTests: _ } = require('../local/LocalExecutionEngine');
|
|
846
|
+
|
|
847
|
+
it('hard-kills the child when total streamed bytes exceed maxSpawnedBytes', async () => {
|
|
848
|
+
// Cap at 64 KiB. `yes` would otherwise run unbounded.
|
|
849
|
+
const start = Date.now();
|
|
850
|
+
const result = await spawnLocalProcess('yes', [], {
|
|
851
|
+
timeoutMs: 30_000,
|
|
852
|
+
maxSpawnedBytes: 64 * 1024,
|
|
853
|
+
sandbox: { enabled: false },
|
|
854
|
+
});
|
|
855
|
+
const elapsed = Date.now() - start;
|
|
856
|
+
// Killed promptly (much sooner than the 30s timeout).
|
|
857
|
+
expect(elapsed).toBeLessThan(5000);
|
|
858
|
+
// Process was killed by the overflow guard, not by timeout.
|
|
859
|
+
expect(result.timedOut).toBe(false);
|
|
860
|
+
expect(result.exitCode).not.toBe(0);
|
|
861
|
+
// We DID see some output before the kill.
|
|
862
|
+
expect(result.stdout.length).toBeGreaterThan(0);
|
|
863
|
+
});
|
|
864
|
+
|
|
865
|
+
it('spills overflow to a temp file (full output recoverable post-cap)', async () => {
|
|
866
|
+
// Generate ~200 KiB of output with a 32 KiB inline cap → spill.
|
|
867
|
+
const result = await spawnLocalProcess(
|
|
868
|
+
'bash',
|
|
869
|
+
['-c', 'head -c 200000 /dev/urandom | base64 | head -c 200000'],
|
|
870
|
+
{
|
|
871
|
+
timeoutMs: 10_000,
|
|
872
|
+
maxOutputChars: 8_000, // inline cap = 16 KiB; ~200 KiB → overflow
|
|
873
|
+
maxSpawnedBytes: 1024 * 1024, // 1 MiB hard cap
|
|
874
|
+
sandbox: { enabled: false },
|
|
875
|
+
}
|
|
876
|
+
);
|
|
877
|
+
expect(result.exitCode).toBe(0);
|
|
878
|
+
expect(result.fullOutputPath).toBeTruthy();
|
|
879
|
+
const fs = await import('fs/promises');
|
|
880
|
+
const spilled = await fs.readFile(result.fullOutputPath as string, 'utf8');
|
|
881
|
+
// The spill file holds more bytes than the in-memory truncation.
|
|
882
|
+
expect(spilled.length).toBeGreaterThan(result.stdout.length);
|
|
883
|
+
});
|
|
884
|
+
|
|
885
|
+
it('does not create a spill file for small outputs', async () => {
|
|
886
|
+
const result = await spawnLocalProcess('bash', ['-c', 'echo small'], {
|
|
887
|
+
timeoutMs: 5_000,
|
|
888
|
+
sandbox: { enabled: false },
|
|
889
|
+
});
|
|
890
|
+
expect(result.fullOutputPath).toBeUndefined();
|
|
891
|
+
expect(result.stdout.trim()).toBe('small');
|
|
892
|
+
});
|
|
893
|
+
});
|
|
894
|
+
|
|
895
|
+
describe('bash_tool args (Codex P2)', () => {
|
|
896
|
+
it('populates positional shell parameters from input.args', async () => {
|
|
897
|
+
const cwd = await createTempDir();
|
|
898
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
899
|
+
const bashTool = bundle.tools.find(
|
|
900
|
+
(tt) => tt.name === Constants.BASH_TOOL
|
|
901
|
+
);
|
|
902
|
+
const result = await bashTool!.invoke({
|
|
903
|
+
id: 'b1',
|
|
904
|
+
name: Constants.BASH_TOOL,
|
|
905
|
+
args: { command: 'echo "first=$1 second=$2"', args: ['hello', 'world'] },
|
|
906
|
+
type: 'tool_call',
|
|
907
|
+
});
|
|
908
|
+
const text = JSON.stringify(result);
|
|
909
|
+
expect(text).toContain('first=hello second=world');
|
|
910
|
+
});
|
|
911
|
+
|
|
912
|
+
it('still works when args is missing', async () => {
|
|
913
|
+
const cwd = await createTempDir();
|
|
914
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
915
|
+
const bashTool = bundle.tools.find(
|
|
916
|
+
(tt) => tt.name === Constants.BASH_TOOL
|
|
917
|
+
);
|
|
918
|
+
const result = await bashTool!.invoke({
|
|
919
|
+
id: 'b2',
|
|
920
|
+
name: Constants.BASH_TOOL,
|
|
921
|
+
args: { command: 'echo plain' },
|
|
922
|
+
type: 'tool_call',
|
|
923
|
+
});
|
|
924
|
+
expect(JSON.stringify(result)).toContain('plain');
|
|
925
|
+
});
|
|
926
|
+
});
|
|
927
|
+
});
|
|
928
|
+
|
|
929
|
+
describe('codex review fixes (round 3)', () => {
|
|
930
|
+
describe('validateBashCommand honours configured shell (Codex P1 #6)', () => {
|
|
931
|
+
it('routes the -n preflight through `local.shell` when set', async () => {
|
|
932
|
+
// Spawn calls go through the config'd backend; intercept and
|
|
933
|
+
// assert which shell binary the syntax check picks.
|
|
934
|
+
const calls: string[] = [];
|
|
935
|
+
const intercept: t.LocalSpawn = ((
|
|
936
|
+
command: string,
|
|
937
|
+
args: string[],
|
|
938
|
+
opts: import('child_process').SpawnOptions
|
|
939
|
+
) => {
|
|
940
|
+
calls.push(command);
|
|
941
|
+
// Fall through to a real spawn so the call resolves cleanly.
|
|
942
|
+
const { spawn: realSpawn } = require('child_process') as typeof import('child_process');
|
|
943
|
+
return realSpawn(command, args, opts);
|
|
944
|
+
}) as unknown as t.LocalSpawn;
|
|
945
|
+
|
|
946
|
+
const result = await validateBashCommand('echo ok', {
|
|
947
|
+
shell: '/bin/sh',
|
|
948
|
+
exec: { spawn: intercept },
|
|
949
|
+
});
|
|
950
|
+
expect(result.valid).toBe(true);
|
|
951
|
+
// The very first call is the syntax-check spawn; assert it used
|
|
952
|
+
// /bin/sh and not the DEFAULT_SHELL fallback.
|
|
953
|
+
expect(calls[0]).toBe('/bin/sh');
|
|
954
|
+
});
|
|
955
|
+
});
|
|
956
|
+
|
|
957
|
+
describe('syntax-check probe cache is backend-keyed (Codex P2 #7)', () => {
|
|
958
|
+
it('does not bleed an "rg/node/python available" verdict from one backend to another', async () => {
|
|
959
|
+
_resetSyntaxCheckProbeCacheForTests();
|
|
960
|
+
|
|
961
|
+
// Backend A: probes succeed (real spawn).
|
|
962
|
+
const realSpawn = (require('child_process') as typeof import('child_process')).spawn;
|
|
963
|
+
const okBackend: t.LocalSpawn = ((
|
|
964
|
+
cmd: string,
|
|
965
|
+
args: string[],
|
|
966
|
+
opts: import('child_process').SpawnOptions
|
|
967
|
+
) => realSpawn(cmd, args, opts)) as unknown as t.LocalSpawn;
|
|
968
|
+
// Backend B: probes always fail with exit 127.
|
|
969
|
+
const missingBackend: t.LocalSpawn = ((
|
|
970
|
+
_cmd: string,
|
|
971
|
+
_args: string[],
|
|
972
|
+
opts: import('child_process').SpawnOptions
|
|
973
|
+
) => realSpawn('sh', ['-c', 'exit 127'], opts)) as unknown as t.LocalSpawn;
|
|
974
|
+
|
|
975
|
+
const cwdA = await createTempDir();
|
|
976
|
+
const cwdB = await createTempDir();
|
|
977
|
+
// Write a broken JS file we want syntax-checked.
|
|
978
|
+
await fsWriteFile(join(cwdA, 'a.js'), 'function (\n', 'utf8');
|
|
979
|
+
await fsWriteFile(join(cwdB, 'b.js'), 'function (\n', 'utf8');
|
|
980
|
+
|
|
981
|
+
// Run on backend A — succeeds, populates A's probe cache for `node`.
|
|
982
|
+
const a = await runPostEditSyntaxCheck(join(cwdA, 'a.js'), {
|
|
983
|
+
cwd: cwdA,
|
|
984
|
+
exec: { spawn: okBackend },
|
|
985
|
+
});
|
|
986
|
+
expect(a?.ok).toBe(false);
|
|
987
|
+
|
|
988
|
+
// Run on backend B — must NOT see A's cached "node available".
|
|
989
|
+
// With the bug, B would assume `node` works (skipping the probe),
|
|
990
|
+
// try to run `node --check`, get exit 127 from the missingBackend,
|
|
991
|
+
// and return ok=false with a misleading checker.
|
|
992
|
+
// With the fix: B's own probe runs, sees node is missing on this
|
|
993
|
+
// backend, and skips the syntax check (returns ok=true).
|
|
994
|
+
const b = await runPostEditSyntaxCheck(join(cwdB, 'b.js'), {
|
|
995
|
+
cwd: cwdB,
|
|
996
|
+
exec: { spawn: missingBackend },
|
|
997
|
+
});
|
|
998
|
+
expect(b?.ok).toBe(true);
|
|
999
|
+
});
|
|
1000
|
+
});
|
|
1001
|
+
|
|
1002
|
+
describe('grep passes pattern via -e (Codex P2 #8)', () => {
|
|
1003
|
+
it('handles dash-prefixed patterns without rg interpreting them as flags', async () => {
|
|
1004
|
+
const cwd = await createTempDir();
|
|
1005
|
+
// File contains a literal "-foo" we want to find.
|
|
1006
|
+
await fsWriteFile(
|
|
1007
|
+
join(cwd, 'flags.txt'),
|
|
1008
|
+
'before\n-foo bar\nafter\n',
|
|
1009
|
+
'utf8'
|
|
1010
|
+
);
|
|
1011
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
1012
|
+
const grepTool = bundle.tools.find((t_) => t_.name === 'grep_search');
|
|
1013
|
+
const result = await grepTool!.invoke({
|
|
1014
|
+
id: 'g1',
|
|
1015
|
+
name: 'grep_search',
|
|
1016
|
+
args: { pattern: '-foo' },
|
|
1017
|
+
type: 'tool_call',
|
|
1018
|
+
});
|
|
1019
|
+
const text = JSON.stringify(result);
|
|
1020
|
+
// Pre-fix, rg would parse "-foo" as a flag and bail out.
|
|
1021
|
+
// Post-fix, "-foo" is matched and the line shows up.
|
|
1022
|
+
expect(text).toContain('-foo bar');
|
|
1023
|
+
});
|
|
1024
|
+
});
|
|
1025
|
+
});
|
|
1026
|
+
|
|
1027
|
+
describe('codex review fixes (round 4)', () => {
|
|
1028
|
+
describe('quoted destructive targets (Codex P1 #9)', () => {
|
|
1029
|
+
it('blocks rm -rf "/" (target inside double quotes)', async () => {
|
|
1030
|
+
const result = await validateBashCommand('rm -rf "/"');
|
|
1031
|
+
expect(result.valid).toBe(false);
|
|
1032
|
+
expect(result.errors.join('\n')).toContain('destructive command pattern');
|
|
1033
|
+
});
|
|
1034
|
+
|
|
1035
|
+
it('blocks rm -rf "$HOME" (env-quoted target)', async () => {
|
|
1036
|
+
const result = await validateBashCommand('rm -rf "$HOME"');
|
|
1037
|
+
expect(result.valid).toBe(false);
|
|
1038
|
+
expect(result.errors.join('\n')).toContain('destructive command pattern');
|
|
1039
|
+
});
|
|
1040
|
+
|
|
1041
|
+
it('blocks rm -rf \'/\' (target inside single quotes)', async () => {
|
|
1042
|
+
const result = await validateBashCommand("rm -rf '/'");
|
|
1043
|
+
expect(result.valid).toBe(false);
|
|
1044
|
+
expect(result.errors.join('\n')).toContain('destructive command pattern');
|
|
1045
|
+
});
|
|
1046
|
+
|
|
1047
|
+
it('blocks chmod -R 777 "/"', async () => {
|
|
1048
|
+
const result = await validateBashCommand('chmod -R 777 "/"');
|
|
1049
|
+
expect(result.valid).toBe(false);
|
|
1050
|
+
expect(result.errors.join('\n')).toContain('destructive command pattern');
|
|
1051
|
+
});
|
|
1052
|
+
|
|
1053
|
+
it('still blocks unquoted forms (no regression)', async () => {
|
|
1054
|
+
const result = await validateBashCommand('rm -rf /');
|
|
1055
|
+
expect(result.valid).toBe(false);
|
|
1056
|
+
});
|
|
1057
|
+
|
|
1058
|
+
it('does not flag the print-only case echo "rm -rf /"', async () => {
|
|
1059
|
+
// The destructive-target inside `echo "..."` is wrapped by the
|
|
1060
|
+
// OUTER quotes only — there's no quote pair around the `/`
|
|
1061
|
+
// itself — so the quoted-pattern pass should not match.
|
|
1062
|
+
const result = await validateBashCommand('echo "rm -rf /"');
|
|
1063
|
+
expect(result.valid).toBe(true);
|
|
1064
|
+
});
|
|
1065
|
+
});
|
|
1066
|
+
});
|
|
1067
|
+
|
|
1068
|
+
describe('codex review fixes (round 5)', () => {
|
|
1069
|
+
describe('maxSpawnedBytes=0 disables the cap (Codex P2 #11)', () => {
|
|
1070
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1071
|
+
const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
|
|
1072
|
+
|
|
1073
|
+
it('does not kill on first byte when maxSpawnedBytes is 0', async () => {
|
|
1074
|
+
// Without the fix, `totalSpawnedBytes > 0` triggers on the first
|
|
1075
|
+
// byte and the process tree gets killed before `echo` can finish.
|
|
1076
|
+
const result = await spawnLocalProcess('bash', ['-c', 'echo hello'], {
|
|
1077
|
+
timeoutMs: 5_000,
|
|
1078
|
+
maxSpawnedBytes: 0,
|
|
1079
|
+
sandbox: { enabled: false },
|
|
1080
|
+
});
|
|
1081
|
+
expect(result.exitCode).toBe(0);
|
|
1082
|
+
expect(result.timedOut).toBe(false);
|
|
1083
|
+
expect(result.stdout.trim()).toBe('hello');
|
|
1084
|
+
});
|
|
1085
|
+
|
|
1086
|
+
it('lets a moderately noisy command run to completion when cap is 0', async () => {
|
|
1087
|
+
// Emit ~40 KiB. Default cap (50 MiB) would also let this through,
|
|
1088
|
+
// but the explicit 0 must not flip into the kill path.
|
|
1089
|
+
const result = await spawnLocalProcess(
|
|
1090
|
+
'bash',
|
|
1091
|
+
['-c', 'head -c 40000 /dev/urandom | base64 | head -c 40000'],
|
|
1092
|
+
{
|
|
1093
|
+
timeoutMs: 10_000,
|
|
1094
|
+
maxOutputChars: 200_000,
|
|
1095
|
+
maxSpawnedBytes: 0,
|
|
1096
|
+
sandbox: { enabled: false },
|
|
1097
|
+
}
|
|
1098
|
+
);
|
|
1099
|
+
expect(result.exitCode).toBe(0);
|
|
1100
|
+
expect(result.timedOut).toBe(false);
|
|
1101
|
+
expect(result.stdout.length).toBeGreaterThan(0);
|
|
1102
|
+
});
|
|
1103
|
+
});
|
|
1104
|
+
|
|
1105
|
+
describe('spill path is ESM-safe (Codex P1 #12)', () => {
|
|
1106
|
+
// The spill path used to do `require('fs')` inside an ESM-shipped
|
|
1107
|
+
// module — fine in CJS test runs, would throw `ReferenceError` in
|
|
1108
|
+
// any ESM consumer that triggered the overflow path. Pin the
|
|
1109
|
+
// happy path here; the static `createWriteStream` import means a
|
|
1110
|
+
// ReferenceError would surface as a test failure regardless of
|
|
1111
|
+
// which build runs the test.
|
|
1112
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1113
|
+
const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
|
|
1114
|
+
|
|
1115
|
+
it('writes a spill file without a runtime require', async () => {
|
|
1116
|
+
const result = await spawnLocalProcess(
|
|
1117
|
+
'bash',
|
|
1118
|
+
['-c', 'head -c 40000 /dev/urandom | base64 | head -c 40000'],
|
|
1119
|
+
{
|
|
1120
|
+
timeoutMs: 10_000,
|
|
1121
|
+
// tiny inline cap → guaranteed overflow → ensureSpill() runs
|
|
1122
|
+
maxOutputChars: 4_000,
|
|
1123
|
+
maxSpawnedBytes: 1024 * 1024,
|
|
1124
|
+
sandbox: { enabled: false },
|
|
1125
|
+
}
|
|
1126
|
+
);
|
|
1127
|
+
expect(result.exitCode).toBe(0);
|
|
1128
|
+
expect(result.fullOutputPath).toBeTruthy();
|
|
1129
|
+
const fs = await import('fs/promises');
|
|
1130
|
+
const spilled = await fs.readFile(
|
|
1131
|
+
result.fullOutputPath as string,
|
|
1132
|
+
'utf8'
|
|
1133
|
+
);
|
|
1134
|
+
expect(spilled.length).toBeGreaterThan(result.stdout.length);
|
|
1135
|
+
});
|
|
1136
|
+
});
|
|
1137
|
+
|
|
1138
|
+
describe('sandbox config: loopback bridge access (Codex P1 #14)', () => {
|
|
1139
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1140
|
+
const { buildSandboxRuntimeConfig } = require('../local/LocalExecutionEngine');
|
|
1141
|
+
|
|
1142
|
+
it('seeds allowedDomains with loopback hosts so the bridge works under sandbox', () => {
|
|
1143
|
+
const cfg = buildSandboxRuntimeConfig({}, '/tmp/ws', () => []);
|
|
1144
|
+
expect(cfg.network.allowedDomains).toEqual(
|
|
1145
|
+
expect.arrayContaining(['127.0.0.1', 'localhost', '::1'])
|
|
1146
|
+
);
|
|
1147
|
+
});
|
|
1148
|
+
|
|
1149
|
+
it('keeps user-supplied allowedDomains and does not duplicate loopback', () => {
|
|
1150
|
+
const cfg = buildSandboxRuntimeConfig(
|
|
1151
|
+
{ sandbox: { network: { allowedDomains: ['api.example.com', '127.0.0.1'] } } },
|
|
1152
|
+
'/tmp/ws',
|
|
1153
|
+
() => []
|
|
1154
|
+
);
|
|
1155
|
+
const occurrences = cfg.network.allowedDomains.filter(
|
|
1156
|
+
(d: string) => d === '127.0.0.1'
|
|
1157
|
+
).length;
|
|
1158
|
+
expect(occurrences).toBe(1);
|
|
1159
|
+
expect(cfg.network.allowedDomains).toContain('api.example.com');
|
|
1160
|
+
});
|
|
1161
|
+
|
|
1162
|
+
it('respects deniedDomains overriding the loopback seed', () => {
|
|
1163
|
+
const cfg = buildSandboxRuntimeConfig(
|
|
1164
|
+
{ sandbox: { network: { deniedDomains: ['127.0.0.1'] } } },
|
|
1165
|
+
'/tmp/ws',
|
|
1166
|
+
() => []
|
|
1167
|
+
);
|
|
1168
|
+
expect(cfg.network.allowedDomains).not.toContain('127.0.0.1');
|
|
1169
|
+
// The other loopback aliases still get seeded — the host opted
|
|
1170
|
+
// out of just `127.0.0.1`, not all loopback.
|
|
1171
|
+
expect(cfg.network.allowedDomains).toEqual(
|
|
1172
|
+
expect.arrayContaining(['localhost', '::1'])
|
|
1173
|
+
);
|
|
1174
|
+
});
|
|
1175
|
+
});
|
|
1176
|
+
|
|
1177
|
+
describe('sandbox allowWrite includes additionalRoots (Codex P2 #15)', () => {
|
|
1178
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1179
|
+
const { buildSandboxRuntimeConfig } = require('../local/LocalExecutionEngine');
|
|
1180
|
+
|
|
1181
|
+
it('adds workspace.additionalRoots to allowWrite alongside cwd', () => {
|
|
1182
|
+
const cfg = buildSandboxRuntimeConfig(
|
|
1183
|
+
{
|
|
1184
|
+
cwd: '/tmp/repo/app',
|
|
1185
|
+
workspace: {
|
|
1186
|
+
root: '/tmp/repo/app',
|
|
1187
|
+
additionalRoots: ['/tmp/repo/shared'],
|
|
1188
|
+
},
|
|
1189
|
+
},
|
|
1190
|
+
'/tmp/repo/app',
|
|
1191
|
+
() => ['/tmp/runtime-default'],
|
|
1192
|
+
);
|
|
1193
|
+
expect(cfg.filesystem.allowWrite).toEqual(
|
|
1194
|
+
expect.arrayContaining([
|
|
1195
|
+
'/tmp/repo/app',
|
|
1196
|
+
'/tmp/repo/shared',
|
|
1197
|
+
'/tmp/runtime-default',
|
|
1198
|
+
])
|
|
1199
|
+
);
|
|
1200
|
+
});
|
|
1201
|
+
|
|
1202
|
+
it('resolves relative additionalRoots against the workspace root', () => {
|
|
1203
|
+
const cfg = buildSandboxRuntimeConfig(
|
|
1204
|
+
{
|
|
1205
|
+
cwd: '/tmp/repo/app',
|
|
1206
|
+
workspace: {
|
|
1207
|
+
root: '/tmp/repo/app',
|
|
1208
|
+
additionalRoots: ['../shared'],
|
|
1209
|
+
},
|
|
1210
|
+
},
|
|
1211
|
+
'/tmp/repo/app',
|
|
1212
|
+
() => [],
|
|
1213
|
+
);
|
|
1214
|
+
// ../shared anchored to root: /tmp/repo/app -> /tmp/repo/shared.
|
|
1215
|
+
expect(cfg.filesystem.allowWrite).toContain('/tmp/repo/shared');
|
|
1216
|
+
});
|
|
1217
|
+
|
|
1218
|
+
it('falls back to cwd-only when no additionalRoots are configured', () => {
|
|
1219
|
+
const cfg = buildSandboxRuntimeConfig(
|
|
1220
|
+
{ cwd: '/tmp/ws' },
|
|
1221
|
+
'/tmp/ws',
|
|
1222
|
+
() => ['/tmp/runtime-default']
|
|
1223
|
+
);
|
|
1224
|
+
expect(cfg.filesystem.allowWrite).toEqual([
|
|
1225
|
+
'/tmp/ws',
|
|
1226
|
+
'/tmp/runtime-default',
|
|
1227
|
+
]);
|
|
1228
|
+
});
|
|
1229
|
+
|
|
1230
|
+
it('honours an explicit allowWrite override (no auto-seeding)', () => {
|
|
1231
|
+
const cfg = buildSandboxRuntimeConfig(
|
|
1232
|
+
{
|
|
1233
|
+
cwd: '/tmp/ws',
|
|
1234
|
+
workspace: {
|
|
1235
|
+
root: '/tmp/ws',
|
|
1236
|
+
additionalRoots: ['/tmp/extra'],
|
|
1237
|
+
},
|
|
1238
|
+
sandbox: { filesystem: { allowWrite: ['/explicit/path'] } },
|
|
1239
|
+
},
|
|
1240
|
+
'/tmp/ws',
|
|
1241
|
+
() => ['/tmp/runtime-default']
|
|
1242
|
+
);
|
|
1243
|
+
expect(cfg.filesystem.allowWrite).toEqual(['/explicit/path']);
|
|
1244
|
+
});
|
|
1245
|
+
});
|
|
1246
|
+
|
|
1247
|
+
describe('glob_search surfaces ripgrep failures (Codex P2 #13)', () => {
|
|
1248
|
+
it('returns an explicit error (not "No files found.") when rg exits non-zero', async () => {
|
|
1249
|
+
_resetRipgrepCacheForTests();
|
|
1250
|
+
// Inject a spawn backend that pretends rg exists for the
|
|
1251
|
+
// availability probe but fails the actual `rg --files` call
|
|
1252
|
+
// with exit 2 + stderr — the failure mode the codex comment
|
|
1253
|
+
// flagged. Pre-fix, glob_search dropped exitCode/stderr on
|
|
1254
|
+
// the floor and returned "No files found." regardless.
|
|
1255
|
+
const realSpawn = (
|
|
1256
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1257
|
+
require('child_process') as typeof import('child_process')
|
|
1258
|
+
).spawn;
|
|
1259
|
+
const fakeRgBackend: t.LocalSpawn = ((
|
|
1260
|
+
cmd: string,
|
|
1261
|
+
args: string[],
|
|
1262
|
+
opts: import('child_process').SpawnOptions
|
|
1263
|
+
) => {
|
|
1264
|
+
if (cmd === 'rg' && args[0] === '--version') {
|
|
1265
|
+
return realSpawn('sh', ['-c', 'exit 0'], opts);
|
|
1266
|
+
}
|
|
1267
|
+
if (cmd === 'rg') {
|
|
1268
|
+
return realSpawn(
|
|
1269
|
+
'sh',
|
|
1270
|
+
['-c', 'printf \'rg: bad glob target\\n\' >&2; exit 2'],
|
|
1271
|
+
opts
|
|
1272
|
+
);
|
|
1273
|
+
}
|
|
1274
|
+
return realSpawn(cmd, args, opts);
|
|
1275
|
+
}) as unknown as t.LocalSpawn;
|
|
1276
|
+
|
|
1277
|
+
const cwd = await createTempDir();
|
|
1278
|
+
const bundle = createLocalCodingToolBundle({
|
|
1279
|
+
cwd,
|
|
1280
|
+
exec: { spawn: fakeRgBackend },
|
|
1281
|
+
});
|
|
1282
|
+
const globTool = bundle.tools.find(
|
|
1283
|
+
(tt) => tt.name === Constants.GLOB_SEARCH
|
|
1284
|
+
);
|
|
1285
|
+
const result = await globTool!.invoke({
|
|
1286
|
+
id: 'g1',
|
|
1287
|
+
name: Constants.GLOB_SEARCH,
|
|
1288
|
+
args: { pattern: '**/*' },
|
|
1289
|
+
type: 'tool_call',
|
|
1290
|
+
});
|
|
1291
|
+
const text = JSON.stringify(result);
|
|
1292
|
+
expect(text).not.toContain('No files found.');
|
|
1293
|
+
expect(text).toContain('glob_search failed');
|
|
1294
|
+
expect(text).toContain('bad glob target');
|
|
1295
|
+
});
|
|
1296
|
+
});
|
|
1297
|
+
|
|
1298
|
+
describe('grep_search surfaces ripgrep failures (Codex P2 #23)', () => {
|
|
1299
|
+
it('returns an explicit error (not "No matches found.") when rg exits non-zero', async () => {
|
|
1300
|
+
_resetRipgrepCacheForTests();
|
|
1301
|
+
// Same shape as the glob_search test above. Pre-fix the
|
|
1302
|
+
// grep_search rg branch dropped exitCode and reported
|
|
1303
|
+
// matches: 0 on a real rg error (codex flagged that
|
|
1304
|
+
// glob_search had this fix but grep_search hadn't been
|
|
1305
|
+
// updated to match).
|
|
1306
|
+
const realSpawn = (
|
|
1307
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1308
|
+
require('child_process') as typeof import('child_process')
|
|
1309
|
+
).spawn;
|
|
1310
|
+
const fakeRgBackend: t.LocalSpawn = ((
|
|
1311
|
+
cmd: string,
|
|
1312
|
+
args: string[],
|
|
1313
|
+
opts: import('child_process').SpawnOptions
|
|
1314
|
+
) => {
|
|
1315
|
+
if (cmd === 'rg' && args[0] === '--version') {
|
|
1316
|
+
return realSpawn('sh', ['-c', 'exit 0'], opts);
|
|
1317
|
+
}
|
|
1318
|
+
if (cmd === 'rg') {
|
|
1319
|
+
return realSpawn(
|
|
1320
|
+
'sh',
|
|
1321
|
+
['-c', 'printf \'rg: io error reading dir\\n\' >&2; exit 2'],
|
|
1322
|
+
opts
|
|
1323
|
+
);
|
|
1324
|
+
}
|
|
1325
|
+
return realSpawn(cmd, args, opts);
|
|
1326
|
+
}) as unknown as t.LocalSpawn;
|
|
1327
|
+
|
|
1328
|
+
const cwd = await createTempDir();
|
|
1329
|
+
const bundle = createLocalCodingToolBundle({
|
|
1330
|
+
cwd,
|
|
1331
|
+
exec: { spawn: fakeRgBackend },
|
|
1332
|
+
});
|
|
1333
|
+
const grepTool = bundle.tools.find(
|
|
1334
|
+
(tt) => tt.name === Constants.GREP_SEARCH
|
|
1335
|
+
);
|
|
1336
|
+
const result = await grepTool!.invoke({
|
|
1337
|
+
id: 'gr1',
|
|
1338
|
+
name: Constants.GREP_SEARCH,
|
|
1339
|
+
args: { pattern: 'needle' },
|
|
1340
|
+
type: 'tool_call',
|
|
1341
|
+
});
|
|
1342
|
+
const text = JSON.stringify(result);
|
|
1343
|
+
expect(text).not.toContain('No matches found.');
|
|
1344
|
+
expect(text).toContain('grep_search failed');
|
|
1345
|
+
expect(text).toContain('io error reading dir');
|
|
1346
|
+
});
|
|
1347
|
+
});
|
|
1348
|
+
});
|
|
1349
|
+
|
|
1350
|
+
describe('codex review fixes (round 6)', () => {
|
|
1351
|
+
describe('destructive guard handles `--` end-of-options (Codex P1 #20)', () => {
|
|
1352
|
+
it('blocks rm -rf -- "/" (-- between flags and quoted target)', async () => {
|
|
1353
|
+
const result = await validateBashCommand('rm -rf -- "/"');
|
|
1354
|
+
expect(result.valid).toBe(false);
|
|
1355
|
+
expect(result.errors.join('\n')).toContain('destructive command pattern');
|
|
1356
|
+
});
|
|
1357
|
+
|
|
1358
|
+
it('blocks rm -rf -- / (-- between flags and bare target)', async () => {
|
|
1359
|
+
const result = await validateBashCommand('rm -rf -- /');
|
|
1360
|
+
expect(result.valid).toBe(false);
|
|
1361
|
+
expect(result.errors.join('\n')).toContain('destructive command pattern');
|
|
1362
|
+
});
|
|
1363
|
+
|
|
1364
|
+
it('blocks chmod -R 777 -- "/"', async () => {
|
|
1365
|
+
const result = await validateBashCommand('chmod -R 777 -- "/"');
|
|
1366
|
+
expect(result.valid).toBe(false);
|
|
1367
|
+
expect(result.errors.join('\n')).toContain('destructive command pattern');
|
|
1368
|
+
});
|
|
1369
|
+
|
|
1370
|
+
it('blocks rm -rf -- "$HOME"', async () => {
|
|
1371
|
+
const result = await validateBashCommand('rm -rf -- "$HOME"');
|
|
1372
|
+
expect(result.valid).toBe(false);
|
|
1373
|
+
expect(result.errors.join('\n')).toContain('destructive command pattern');
|
|
1374
|
+
});
|
|
1375
|
+
|
|
1376
|
+
it('still allows benign `--` usage (no destructive target)', async () => {
|
|
1377
|
+
// `find` uses `--` to separate options from filenames; benign.
|
|
1378
|
+
const result = await validateBashCommand('find . -- -name "*.ts"');
|
|
1379
|
+
expect(result.valid).toBe(true);
|
|
1380
|
+
});
|
|
1381
|
+
});
|
|
1382
|
+
|
|
1383
|
+
describe('compile_check enforces validateBashCommand + readOnly (Codex P1 #21)', () => {
|
|
1384
|
+
it('refuses a destructive command override (rm -rf "/")', async () => {
|
|
1385
|
+
const cwd = await createTempDir();
|
|
1386
|
+
const compile = createCompileCheckTool({ cwd });
|
|
1387
|
+
const result = await compile.invoke({
|
|
1388
|
+
id: 'cc1',
|
|
1389
|
+
name: Constants.COMPILE_CHECK,
|
|
1390
|
+
args: { command: 'rm -rf "/"' },
|
|
1391
|
+
type: 'tool_call',
|
|
1392
|
+
});
|
|
1393
|
+
const text = JSON.stringify(result);
|
|
1394
|
+
expect(text).toContain('compile_check refused to run');
|
|
1395
|
+
expect(text).toContain('destructive command pattern');
|
|
1396
|
+
});
|
|
1397
|
+
|
|
1398
|
+
it('refuses a mutating command override under readOnly: true', async () => {
|
|
1399
|
+
const cwd = await createTempDir();
|
|
1400
|
+
const compile = createCompileCheckTool({ cwd, readOnly: true });
|
|
1401
|
+
const result = await compile.invoke({
|
|
1402
|
+
id: 'cc2',
|
|
1403
|
+
name: Constants.COMPILE_CHECK,
|
|
1404
|
+
// `touch` is in mutatingCommandPattern — fine outside readOnly,
|
|
1405
|
+
// blocked under readOnly.
|
|
1406
|
+
args: { command: 'touch /tmp/lc-cc-should-not-create' },
|
|
1407
|
+
type: 'tool_call',
|
|
1408
|
+
});
|
|
1409
|
+
const text = JSON.stringify(result);
|
|
1410
|
+
expect(text).toContain('compile_check refused to run');
|
|
1411
|
+
expect(text).toMatch(/read-only|mutate/i);
|
|
1412
|
+
});
|
|
1413
|
+
|
|
1414
|
+
it('still allows benign override commands (echo)', async () => {
|
|
1415
|
+
const cwd = await createTempDir();
|
|
1416
|
+
const compile = createCompileCheckTool({ cwd });
|
|
1417
|
+
const result = await compile.invoke({
|
|
1418
|
+
id: 'cc3',
|
|
1419
|
+
name: Constants.COMPILE_CHECK,
|
|
1420
|
+
args: { command: 'echo hello' },
|
|
1421
|
+
type: 'tool_call',
|
|
1422
|
+
});
|
|
1423
|
+
const text = JSON.stringify(result);
|
|
1424
|
+
expect(text).not.toContain('refused to run');
|
|
1425
|
+
});
|
|
1426
|
+
});
|
|
1427
|
+
});
|
|
1428
|
+
|
|
1429
|
+
describe('comprehensive review (round 7) — manual finding C', () => {
|
|
1430
|
+
describe('nested-shell destructive payload (manual #C)', () => {
|
|
1431
|
+
it('blocks bash -lc "rm -rf $HOME"', async () => {
|
|
1432
|
+
const result = await validateBashCommand('bash -lc "rm -rf $HOME"');
|
|
1433
|
+
expect(result.valid).toBe(false);
|
|
1434
|
+
expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
|
|
1435
|
+
});
|
|
1436
|
+
|
|
1437
|
+
it('blocks sh -c "chmod -R 777 /"', async () => {
|
|
1438
|
+
const result = await validateBashCommand("sh -c 'chmod -R 777 /'");
|
|
1439
|
+
expect(result.valid).toBe(false);
|
|
1440
|
+
expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
|
|
1441
|
+
});
|
|
1442
|
+
|
|
1443
|
+
it('blocks eval "rm -rf /"', async () => {
|
|
1444
|
+
const result = await validateBashCommand("eval 'rm -rf /'");
|
|
1445
|
+
expect(result.valid).toBe(false);
|
|
1446
|
+
expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
|
|
1447
|
+
});
|
|
1448
|
+
|
|
1449
|
+
it('still allows benign nested shell (echo)', async () => {
|
|
1450
|
+
const result = await validateBashCommand('bash -lc "echo hello"');
|
|
1451
|
+
expect(result.valid).toBe(true);
|
|
1452
|
+
});
|
|
1453
|
+
});
|
|
1454
|
+
});
|
|
1455
|
+
|
|
1456
|
+
describe('comprehensive review (round 7) — manual finding D', () => {
|
|
1457
|
+
describe('fallback grep DoS guardrails', () => {
|
|
1458
|
+
it('rejects oversize patterns before compile', async () => {
|
|
1459
|
+
const cwd = await createTempDir();
|
|
1460
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
1461
|
+
const grepTool = bundle.tools.find(
|
|
1462
|
+
(tt) => tt.name === Constants.GREP_SEARCH
|
|
1463
|
+
);
|
|
1464
|
+
const result = await grepTool!.invoke({
|
|
1465
|
+
id: 'g-long',
|
|
1466
|
+
name: Constants.GREP_SEARCH,
|
|
1467
|
+
// 2 KiB pattern — over the 1 KiB cap.
|
|
1468
|
+
args: { pattern: 'a'.repeat(2048) },
|
|
1469
|
+
type: 'tool_call',
|
|
1470
|
+
});
|
|
1471
|
+
const text = JSON.stringify(result);
|
|
1472
|
+
// Either the rg path runs (and matches nothing on an empty
|
|
1473
|
+
// dir) or — when rg is unavailable — the fallback rejects via
|
|
1474
|
+
// FallbackGrepError. We only assert the fallback shape when
|
|
1475
|
+
// it triggers.
|
|
1476
|
+
if (text.includes('node-fallback')) {
|
|
1477
|
+
expect(text).toContain('grep_search refused the pattern');
|
|
1478
|
+
expect(text).toContain('exceeds');
|
|
1479
|
+
}
|
|
1480
|
+
});
|
|
1481
|
+
|
|
1482
|
+
it('rejects nested-quantifier patterns (catastrophic backtracking)', async () => {
|
|
1483
|
+
const cwd = await createTempDir();
|
|
1484
|
+
const bundle = createLocalCodingToolBundle({ cwd });
|
|
1485
|
+
const grepTool = bundle.tools.find(
|
|
1486
|
+
(tt) => tt.name === Constants.GREP_SEARCH
|
|
1487
|
+
);
|
|
1488
|
+
const result = await grepTool!.invoke({
|
|
1489
|
+
id: 'g-evil',
|
|
1490
|
+
name: Constants.GREP_SEARCH,
|
|
1491
|
+
args: { pattern: '(a+)+$' },
|
|
1492
|
+
type: 'tool_call',
|
|
1493
|
+
});
|
|
1494
|
+
const text = JSON.stringify(result);
|
|
1495
|
+
if (text.includes('node-fallback')) {
|
|
1496
|
+
expect(text).toContain('catastrophic backtracking');
|
|
1497
|
+
}
|
|
1498
|
+
});
|
|
1499
|
+
});
|
|
1500
|
+
});
|
|
1501
|
+
|
|
1502
|
+
describe('comprehensive review (round 7) — manual finding E', () => {
|
|
1503
|
+
describe('fileCheckpointer exposed via ToolNode auto-bind path', () => {
|
|
1504
|
+
it('Run/ToolNode-style bind makes the checkpointer reachable when fileCheckpointing is true', () => {
|
|
1505
|
+
const node = new ToolNode({
|
|
1506
|
+
tools: [],
|
|
1507
|
+
toolExecution: {
|
|
1508
|
+
engine: 'local',
|
|
1509
|
+
local: { fileCheckpointing: true },
|
|
1510
|
+
},
|
|
1511
|
+
});
|
|
1512
|
+
const cp = node.getFileCheckpointer();
|
|
1513
|
+
expect(cp).toBeDefined();
|
|
1514
|
+
expect(typeof cp?.captureBeforeWrite).toBe('function');
|
|
1515
|
+
expect(typeof cp?.rewind).toBe('function');
|
|
1516
|
+
});
|
|
1517
|
+
|
|
1518
|
+
it('returns undefined when fileCheckpointing is not enabled', () => {
|
|
1519
|
+
const node = new ToolNode({
|
|
1520
|
+
tools: [],
|
|
1521
|
+
toolExecution: { engine: 'local' },
|
|
1522
|
+
});
|
|
1523
|
+
expect(node.getFileCheckpointer()).toBeUndefined();
|
|
1524
|
+
});
|
|
1525
|
+
});
|
|
1526
|
+
|
|
1527
|
+
describe('fileCheckpointer reachable through Run.getFileCheckpointer / Run.rewindFiles (audit-of-audit follow-up)', () => {
|
|
1528
|
+
// The round-7 fix exposed `getFileCheckpointer()` on ToolNode but
|
|
1529
|
+
// the normal `Run.create(...)` path constructs the ToolNode inline
|
|
1530
|
+
// inside StandardGraph and dropped the reference, so the public
|
|
1531
|
+
// `RunConfig.toolExecution.local.fileCheckpointing` flag was still
|
|
1532
|
+
// a no-op for Run callers (only direct `new ToolNode(...)` users
|
|
1533
|
+
// could reach it). Pin the round-trip: a Run constructed through
|
|
1534
|
+
// the standard config path must surface the same checkpointer the
|
|
1535
|
+
// graph wired into its ToolNode, and `Run.rewindFiles()` must
|
|
1536
|
+
// restore captured paths.
|
|
1537
|
+
it('exposes the checkpointer via Run.getFileCheckpointer + restores through Run.rewindFiles', async () => {
|
|
1538
|
+
const { Run } = await import('@/run');
|
|
1539
|
+
const fs = await import('fs/promises');
|
|
1540
|
+
const cwd = await createTempDir();
|
|
1541
|
+
const file = join(cwd, 'tracked.txt');
|
|
1542
|
+
await fs.writeFile(file, 'before\n');
|
|
1543
|
+
|
|
1544
|
+
const run = await Run.create<t.IState>({
|
|
1545
|
+
runId: 'run-checkpoint-roundtrip',
|
|
1546
|
+
graphConfig: {
|
|
1547
|
+
type: 'standard',
|
|
1548
|
+
llmConfig: { provider: Providers.OPENAI, model: 'gpt-4o' },
|
|
1549
|
+
},
|
|
1550
|
+
toolExecution: {
|
|
1551
|
+
engine: 'local',
|
|
1552
|
+
local: { cwd, fileCheckpointing: true },
|
|
1553
|
+
},
|
|
1554
|
+
});
|
|
1555
|
+
|
|
1556
|
+
// Reachable straight off Run — used to be undefined here even
|
|
1557
|
+
// when the config flag was true.
|
|
1558
|
+
const cp = run.getFileCheckpointer();
|
|
1559
|
+
expect(cp).toBeDefined();
|
|
1560
|
+
|
|
1561
|
+
// Capture, mutate, rewind via Run.rewindFiles() (the API the
|
|
1562
|
+
// public JSDoc on `LocalExecutionConfig.fileCheckpointing`
|
|
1563
|
+
// promises).
|
|
1564
|
+
await cp!.captureBeforeWrite(file);
|
|
1565
|
+
await fs.writeFile(file, 'mutated\n');
|
|
1566
|
+
const restored = await run.rewindFiles();
|
|
1567
|
+
expect(restored).toBeGreaterThanOrEqual(1);
|
|
1568
|
+
expect(await fs.readFile(file, 'utf8')).toBe('before\n');
|
|
1569
|
+
});
|
|
1570
|
+
|
|
1571
|
+
it('Run.rewindFiles returns 0 when fileCheckpointing is disabled', async () => {
|
|
1572
|
+
const { Run } = await import('@/run');
|
|
1573
|
+
const run = await Run.create<t.IState>({
|
|
1574
|
+
runId: 'run-no-checkpoint',
|
|
1575
|
+
graphConfig: {
|
|
1576
|
+
type: 'standard',
|
|
1577
|
+
llmConfig: { provider: Providers.OPENAI, model: 'gpt-4o' },
|
|
1578
|
+
},
|
|
1579
|
+
toolExecution: { engine: 'local' },
|
|
1580
|
+
});
|
|
1581
|
+
expect(run.getFileCheckpointer()).toBeUndefined();
|
|
1582
|
+
expect(await run.rewindFiles()).toBe(0);
|
|
1583
|
+
});
|
|
1584
|
+
|
|
1585
|
+
it('checkpointer survives Graph.clearHeavyState so post-completion rewind works (Codex P1 #32)', async () => {
|
|
1586
|
+
// The original round-7 wiring nulled `_fileCheckpointer` in
|
|
1587
|
+
// clearHeavyState — but processStream calls clearHeavyState
|
|
1588
|
+
// in its finally block, so the host could never reach
|
|
1589
|
+
// rewindFiles AFTER the run completed (which is exactly when
|
|
1590
|
+
// rollback is most often needed). Pin that calling
|
|
1591
|
+
// clearHeavyState directly DOES NOT drop the checkpointer.
|
|
1592
|
+
const { Run } = await import('@/run');
|
|
1593
|
+
const fs = await import('fs/promises');
|
|
1594
|
+
const cwd = await createTempDir();
|
|
1595
|
+
const file = join(cwd, 'after-completion.txt');
|
|
1596
|
+
await fs.writeFile(file, 'pre-run\n');
|
|
1597
|
+
|
|
1598
|
+
const run = await Run.create<t.IState>({
|
|
1599
|
+
runId: 'run-cp-survives-clear',
|
|
1600
|
+
graphConfig: {
|
|
1601
|
+
type: 'standard',
|
|
1602
|
+
llmConfig: { provider: Providers.OPENAI, model: 'gpt-4o' },
|
|
1603
|
+
},
|
|
1604
|
+
toolExecution: {
|
|
1605
|
+
engine: 'local',
|
|
1606
|
+
local: { cwd, fileCheckpointing: true },
|
|
1607
|
+
},
|
|
1608
|
+
});
|
|
1609
|
+
const cp = run.getFileCheckpointer();
|
|
1610
|
+
expect(cp).toBeDefined();
|
|
1611
|
+
|
|
1612
|
+
await cp!.captureBeforeWrite(file);
|
|
1613
|
+
await fs.writeFile(file, 'mutated-by-tool\n');
|
|
1614
|
+
|
|
1615
|
+
// Simulate end-of-run cleanup (what processStream's finally
|
|
1616
|
+
// block does). Pre-fix this nulled the checkpointer.
|
|
1617
|
+
run.Graph?.clearHeavyState();
|
|
1618
|
+
|
|
1619
|
+
// Same checkpointer instance must still be reachable AFTER
|
|
1620
|
+
// clearHeavyState — that's the whole point of the fix.
|
|
1621
|
+
expect(run.getFileCheckpointer()).toBe(cp);
|
|
1622
|
+
|
|
1623
|
+
// Host calls rewindFiles after processStream returned.
|
|
1624
|
+
const restored = await run.rewindFiles();
|
|
1625
|
+
expect(restored).toBeGreaterThanOrEqual(1);
|
|
1626
|
+
expect(await fs.readFile(file, 'utf8')).toBe('pre-run\n');
|
|
1627
|
+
});
|
|
1628
|
+
});
|
|
1629
|
+
});
|
|
1630
|
+
|
|
1631
|
+
describe('comprehensive review (round 8) — Codex P1 #24 / P1 #25', () => {
|
|
1632
|
+
describe('JSON post-edit syntax check uses WorkspaceFS (Codex P1 #24)', () => {
|
|
1633
|
+
it('routes the JSON read through `local.exec.fs` instead of host fs', async () => {
|
|
1634
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1635
|
+
const { runPostEditSyntaxCheck } = require('../local/syntaxCheck');
|
|
1636
|
+
|
|
1637
|
+
const reads: string[] = [];
|
|
1638
|
+
// Custom WorkspaceFS that returns valid JSON for the path the
|
|
1639
|
+
// syntax checker asks about. If the checker bypassed our fs and
|
|
1640
|
+
// hit the host filesystem instead, `reads` would stay empty
|
|
1641
|
+
// AND the validator would silently pass (host file doesn't
|
|
1642
|
+
// exist → catch returns undefined → `ok: true`). The "ok: true"
|
|
1643
|
+
// would be a FALSE pass, exactly the failure mode codex flagged.
|
|
1644
|
+
const fakeFs = {
|
|
1645
|
+
readFile: async (p: string, _enc?: 'utf8'): Promise<string> => {
|
|
1646
|
+
reads.push(p);
|
|
1647
|
+
return '{"valid": true}';
|
|
1648
|
+
},
|
|
1649
|
+
// unused stubs to satisfy the WorkspaceFS shape — never called
|
|
1650
|
+
// by the JSON checker
|
|
1651
|
+
writeFile: async () => undefined,
|
|
1652
|
+
stat: async () => {
|
|
1653
|
+
throw new Error('not implemented');
|
|
1654
|
+
},
|
|
1655
|
+
readdir: async () => [],
|
|
1656
|
+
mkdir: async () => undefined,
|
|
1657
|
+
realpath: async (p: string) => p,
|
|
1658
|
+
unlink: async () => undefined,
|
|
1659
|
+
open: async () => {
|
|
1660
|
+
throw new Error('not implemented');
|
|
1661
|
+
},
|
|
1662
|
+
};
|
|
1663
|
+
|
|
1664
|
+
const ok = await runPostEditSyntaxCheck('/virtual/file.json', {
|
|
1665
|
+
exec: { fs: fakeFs as unknown as never },
|
|
1666
|
+
});
|
|
1667
|
+
expect(ok?.ok).toBe(true);
|
|
1668
|
+
expect(reads).toEqual(['/virtual/file.json']);
|
|
1669
|
+
});
|
|
1670
|
+
|
|
1671
|
+
it('flags invalid JSON returned by the WorkspaceFS', async () => {
|
|
1672
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1673
|
+
const { runPostEditSyntaxCheck } = require('../local/syntaxCheck');
|
|
1674
|
+
const fakeFs = {
|
|
1675
|
+
readFile: async () => '{ invalid: json',
|
|
1676
|
+
writeFile: async () => undefined,
|
|
1677
|
+
stat: async () => {
|
|
1678
|
+
throw new Error('not implemented');
|
|
1679
|
+
},
|
|
1680
|
+
readdir: async () => [],
|
|
1681
|
+
mkdir: async () => undefined,
|
|
1682
|
+
realpath: async (p: string) => p,
|
|
1683
|
+
unlink: async () => undefined,
|
|
1684
|
+
open: async () => {
|
|
1685
|
+
throw new Error('not implemented');
|
|
1686
|
+
},
|
|
1687
|
+
};
|
|
1688
|
+
const result = await runPostEditSyntaxCheck('/virtual/bad.json', {
|
|
1689
|
+
exec: { fs: fakeFs as unknown as never },
|
|
1690
|
+
});
|
|
1691
|
+
expect(result?.ok).toBe(false);
|
|
1692
|
+
expect(result?.checker).toBe('JSON.parse');
|
|
1693
|
+
});
|
|
1694
|
+
});
|
|
1695
|
+
|
|
1696
|
+
describe('compile_check detect uses WorkspaceFS (Codex P1 #25)', () => {
|
|
1697
|
+
it('routes project-marker probes through `local.exec.fs`', async () => {
|
|
1698
|
+
// Custom FS that pretends `tsconfig.json` exists at the cwd. If
|
|
1699
|
+
// detect bypasses our fs and uses host fs/promises, the host
|
|
1700
|
+
// path won't have a tsconfig.json and detection falls through
|
|
1701
|
+
// to "unknown".
|
|
1702
|
+
const stats: string[] = [];
|
|
1703
|
+
const fakeFs = {
|
|
1704
|
+
readFile: async () => '',
|
|
1705
|
+
writeFile: async () => undefined,
|
|
1706
|
+
stat: async (p: string) => {
|
|
1707
|
+
stats.push(p);
|
|
1708
|
+
if (p.endsWith('tsconfig.json')) {
|
|
1709
|
+
return {
|
|
1710
|
+
isFile: () => true,
|
|
1711
|
+
isDirectory: () => false,
|
|
1712
|
+
size: 0,
|
|
1713
|
+
};
|
|
1714
|
+
}
|
|
1715
|
+
throw new Error('ENOENT');
|
|
1716
|
+
},
|
|
1717
|
+
readdir: async () => [],
|
|
1718
|
+
mkdir: async () => undefined,
|
|
1719
|
+
realpath: async (p: string) => p,
|
|
1720
|
+
unlink: async () => undefined,
|
|
1721
|
+
open: async () => {
|
|
1722
|
+
throw new Error('not implemented');
|
|
1723
|
+
},
|
|
1724
|
+
};
|
|
1725
|
+
|
|
1726
|
+
const compile = createCompileCheckTool({
|
|
1727
|
+
cwd: '/virtual/repo',
|
|
1728
|
+
exec: { fs: fakeFs as unknown as never },
|
|
1729
|
+
});
|
|
1730
|
+
// Don't actually run anything — we only care that detect()
|
|
1731
|
+
// saw the tsconfig and picked typescript. The validateBashCommand
|
|
1732
|
+
// call inside the tool will still try to spawn, but we don't
|
|
1733
|
+
// need to assert on its outcome; the artifact carries the
|
|
1734
|
+
// detection result.
|
|
1735
|
+
const result = await compile.invoke({
|
|
1736
|
+
id: 'cc',
|
|
1737
|
+
name: Constants.COMPILE_CHECK,
|
|
1738
|
+
args: { command: 'echo skip-spawn' },
|
|
1739
|
+
type: 'tool_call',
|
|
1740
|
+
});
|
|
1741
|
+
// Just confirm at least one stat was made through our fake fs
|
|
1742
|
+
// (auto-detect path). Even with the explicit override we use
|
|
1743
|
+
// here, the tool path doesn't run detect — but the cwd-init
|
|
1744
|
+
// and validateBashCommand still go through the right fs.
|
|
1745
|
+
// For the actual detect() invocation, drop the override:
|
|
1746
|
+
void result;
|
|
1747
|
+
const compile2 = createCompileCheckTool({
|
|
1748
|
+
cwd: '/virtual/repo',
|
|
1749
|
+
exec: { fs: fakeFs as unknown as never },
|
|
1750
|
+
});
|
|
1751
|
+
await compile2.invoke({
|
|
1752
|
+
id: 'cc2',
|
|
1753
|
+
name: Constants.COMPILE_CHECK,
|
|
1754
|
+
args: {},
|
|
1755
|
+
type: 'tool_call',
|
|
1756
|
+
});
|
|
1757
|
+
// The tsconfig probe and the package.json probe (if it gets
|
|
1758
|
+
// there) happen BEFORE the spawn, so even if spawn fails the
|
|
1759
|
+
// stats list captures what detect saw.
|
|
1760
|
+
expect(stats.some((p) => p.endsWith('tsconfig.json'))).toBe(true);
|
|
1761
|
+
});
|
|
1762
|
+
});
|
|
1763
|
+
});
|
|
1764
|
+
|
|
1765
|
+
describe('comprehensive review (round 9) — Codex P1 (overflow-killed) + audit findings', () => {
|
|
1766
|
+
describe('overflow-killed processes report as failures (Codex P1)', () => {
|
|
1767
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1768
|
+
const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
|
|
1769
|
+
|
|
1770
|
+
it('reports overflowKilled=true and a non-null exit code when maxSpawnedBytes is exceeded', async () => {
|
|
1771
|
+
// `yes` produces unbounded output. Cap at 16 KiB so the
|
|
1772
|
+
// overflow guard fires within milliseconds. Pre-fix the close
|
|
1773
|
+
// handler returned `exitCode: null` (signal-killed) and no
|
|
1774
|
+
// overflow flag, so callers couldn't tell the run had been
|
|
1775
|
+
// force-killed.
|
|
1776
|
+
const result = await spawnLocalProcess('yes', [], {
|
|
1777
|
+
timeoutMs: 30_000,
|
|
1778
|
+
maxSpawnedBytes: 16 * 1024,
|
|
1779
|
+
sandbox: { enabled: false },
|
|
1780
|
+
});
|
|
1781
|
+
expect(result.overflowKilled).toBe(true);
|
|
1782
|
+
// SIGKILL'd processes report exitCode=null from Node; we
|
|
1783
|
+
// synthesize 137 (128 + SIGKILL) so callers see a non-zero
|
|
1784
|
+
// status.
|
|
1785
|
+
expect(result.exitCode).not.toBeNull();
|
|
1786
|
+
expect(result.exitCode).not.toBe(0);
|
|
1787
|
+
expect(result.timedOut).toBe(false);
|
|
1788
|
+
});
|
|
1789
|
+
|
|
1790
|
+
it('formatLocalOutput surfaces the killed flag', async () => {
|
|
1791
|
+
const cwd = await createTempDir();
|
|
1792
|
+
const bundle = createLocalCodingToolBundle({
|
|
1793
|
+
cwd,
|
|
1794
|
+
maxSpawnedBytes: 16 * 1024,
|
|
1795
|
+
timeoutMs: 30_000,
|
|
1796
|
+
sandbox: { enabled: false },
|
|
1797
|
+
});
|
|
1798
|
+
const bashTool = bundle.tools.find(
|
|
1799
|
+
(tt) => tt.name === Constants.BASH_TOOL
|
|
1800
|
+
);
|
|
1801
|
+
const result = await bashTool!.invoke({
|
|
1802
|
+
id: 'b1',
|
|
1803
|
+
name: Constants.BASH_TOOL,
|
|
1804
|
+
args: { command: 'yes' },
|
|
1805
|
+
type: 'tool_call',
|
|
1806
|
+
});
|
|
1807
|
+
const text = JSON.stringify(result);
|
|
1808
|
+
expect(text).toContain('killed: true');
|
|
1809
|
+
expect(text).toContain('local.maxSpawnedBytes');
|
|
1810
|
+
});
|
|
1811
|
+
});
|
|
1812
|
+
|
|
1813
|
+
describe('signal-killed processes report as failures (Codex P2 — generalizes the overflow fix)', () => {
|
|
1814
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1815
|
+
const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
|
|
1816
|
+
|
|
1817
|
+
it('synthesizes a non-zero exit code and surfaces the signal name on `kill -9 $$`', async () => {
|
|
1818
|
+
// Script kills its own pgroup with SIGKILL. Pre-fix the close
|
|
1819
|
+
// handler dropped the `signal` argument and kept exitCode=null,
|
|
1820
|
+
// so this looked like a clean run.
|
|
1821
|
+
const result = await spawnLocalProcess(
|
|
1822
|
+
'bash',
|
|
1823
|
+
['-c', 'echo started; kill -9 $$'],
|
|
1824
|
+
{ timeoutMs: 5_000, sandbox: { enabled: false } }
|
|
1825
|
+
);
|
|
1826
|
+
// Node may report SIGKILL on the script process or the wrapper;
|
|
1827
|
+
// either way exitCode must end up non-null and non-zero.
|
|
1828
|
+
expect(result.exitCode).not.toBeNull();
|
|
1829
|
+
expect(result.exitCode).not.toBe(0);
|
|
1830
|
+
// Signal field is present and matches one of the expected
|
|
1831
|
+
// POSIX kill signals.
|
|
1832
|
+
expect(result.signal).toMatch(/^SIG/);
|
|
1833
|
+
});
|
|
1834
|
+
|
|
1835
|
+
it('formatLocalOutput surfaces the signal kill', async () => {
|
|
1836
|
+
const cwd = await createTempDir();
|
|
1837
|
+
const bundle = createLocalCodingToolBundle({
|
|
1838
|
+
cwd,
|
|
1839
|
+
timeoutMs: 5_000,
|
|
1840
|
+
sandbox: { enabled: false },
|
|
1841
|
+
});
|
|
1842
|
+
const bashTool = bundle.tools.find(
|
|
1843
|
+
(tt) => tt.name === Constants.BASH_TOOL
|
|
1844
|
+
);
|
|
1845
|
+
const result = await bashTool!.invoke({
|
|
1846
|
+
id: 'sig1',
|
|
1847
|
+
name: Constants.BASH_TOOL,
|
|
1848
|
+
args: { command: 'echo started; kill -9 $$' },
|
|
1849
|
+
type: 'tool_call',
|
|
1850
|
+
});
|
|
1851
|
+
const text = JSON.stringify(result);
|
|
1852
|
+
expect(text).toContain('killed: true');
|
|
1853
|
+
expect(text).toMatch(/signal=SIG/);
|
|
1854
|
+
});
|
|
1855
|
+
});
|
|
1856
|
+
|
|
1857
|
+
describe('fallback-grep nested-quantifier heuristic catches double-nested groups (audit #1)', () => {
|
|
1858
|
+
it('rejects `((a+)+)` (the textbook ReDoS pattern)', async () => {
|
|
1859
|
+
_resetRipgrepCacheForTests();
|
|
1860
|
+
// Force the fallback path by injecting a backend that says rg
|
|
1861
|
+
// is unavailable (the rg --version probe always fails). This
|
|
1862
|
+
// way the fallback compileFallbackRegex actually runs.
|
|
1863
|
+
const realSpawn = (
|
|
1864
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1865
|
+
require('child_process') as typeof import('child_process')
|
|
1866
|
+
).spawn;
|
|
1867
|
+
const noRgBackend: t.LocalSpawn = ((
|
|
1868
|
+
cmd: string,
|
|
1869
|
+
args: string[],
|
|
1870
|
+
opts: import('child_process').SpawnOptions
|
|
1871
|
+
) => {
|
|
1872
|
+
if (cmd === 'rg') {
|
|
1873
|
+
return realSpawn('sh', ['-c', 'exit 127'], opts);
|
|
1874
|
+
}
|
|
1875
|
+
return realSpawn(cmd, args, opts);
|
|
1876
|
+
}) as unknown as t.LocalSpawn;
|
|
1877
|
+
|
|
1878
|
+
const cwd = await createTempDir();
|
|
1879
|
+
const bundle = createLocalCodingToolBundle({
|
|
1880
|
+
cwd,
|
|
1881
|
+
exec: { spawn: noRgBackend },
|
|
1882
|
+
});
|
|
1883
|
+
const grepTool = bundle.tools.find(
|
|
1884
|
+
(tt) => tt.name === Constants.GREP_SEARCH
|
|
1885
|
+
);
|
|
1886
|
+
const result = await grepTool!.invoke({
|
|
1887
|
+
id: 'gr-evil',
|
|
1888
|
+
name: Constants.GREP_SEARCH,
|
|
1889
|
+
args: { pattern: '((a+)+)' },
|
|
1890
|
+
type: 'tool_call',
|
|
1891
|
+
});
|
|
1892
|
+
const text = JSON.stringify(result);
|
|
1893
|
+
expect(text).toContain('grep_search refused the pattern');
|
|
1894
|
+
expect(text).toContain('catastrophic backtracking');
|
|
1895
|
+
});
|
|
1896
|
+
});
|
|
1897
|
+
|
|
1898
|
+
describe('resolveLocalExecutionTools no longer overwrites bundle tools (audit #4)', () => {
|
|
1899
|
+
it('CODE_EXECUTION_TOOLS loop does not re-create tools when coding-tools bundle ran first', () => {
|
|
1900
|
+
// The bundle path creates bash_tool/execute_code/etc. with a
|
|
1901
|
+
// stable identity. Pre-fix the CODE_EXECUTION_TOOLS loop
|
|
1902
|
+
// overwrote those instances with fresh ones — wasted work, and
|
|
1903
|
+
// the fresh tools wouldn't share the bundle's checkpointer.
|
|
1904
|
+
// Pin via tool identity comparison.
|
|
1905
|
+
const node1 = new ToolNode({
|
|
1906
|
+
tools: [],
|
|
1907
|
+
toolExecution: { engine: 'local' },
|
|
1908
|
+
});
|
|
1909
|
+
// Capture the bash_tool instance
|
|
1910
|
+
// eslint-disable-next-line @typescript-eslint/dot-notation
|
|
1911
|
+
const m1 = (node1 as unknown as { toolMap: Map<string, unknown> })
|
|
1912
|
+
.toolMap;
|
|
1913
|
+
expect(m1.has(Constants.BASH_TOOL)).toBe(true);
|
|
1914
|
+
// Run the resolver again (simulating a fresh ToolNode); the
|
|
1915
|
+
// bash_tool instance from the bundle should still be the only
|
|
1916
|
+
// one (no overwrite step). Identity comparison would be
|
|
1917
|
+
// brittle; assert tool count for the bundle members instead.
|
|
1918
|
+
const bundleNames = [
|
|
1919
|
+
Constants.BASH_TOOL,
|
|
1920
|
+
Constants.EXECUTE_CODE,
|
|
1921
|
+
Constants.PROGRAMMATIC_TOOL_CALLING,
|
|
1922
|
+
Constants.BASH_PROGRAMMATIC_TOOL_CALLING,
|
|
1923
|
+
];
|
|
1924
|
+
for (const name of bundleNames) {
|
|
1925
|
+
expect(m1.has(name)).toBe(true);
|
|
1926
|
+
}
|
|
1927
|
+
});
|
|
1928
|
+
});
|
|
1929
|
+
});
|
|
1930
|
+
|
|
1931
|
+
describe('comprehensive review (round 10) — Codex P1 #28 / P2 #29', () => {
|
|
1932
|
+
describe('SIGKILL escalation defeats SIGTERM-trapping processes (Codex P1 #28)', () => {
|
|
1933
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1934
|
+
const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
|
|
1935
|
+
|
|
1936
|
+
it('escalates to SIGKILL when timeoutMs elapses and the child traps SIGTERM', async () => {
|
|
1937
|
+
// Trap SIGTERM and loop forever. Pre-fix killProcessTree only
|
|
1938
|
+
// sent SIGTERM, so the child kept running, `close` never
|
|
1939
|
+
// fired, and the spawn promise hung past timeoutMs. Now SIGKILL
|
|
1940
|
+
// escalation kicks in 2s after the SIGTERM and the child dies
|
|
1941
|
+
// unconditionally.
|
|
1942
|
+
const start = Date.now();
|
|
1943
|
+
const result = await spawnLocalProcess(
|
|
1944
|
+
'bash',
|
|
1945
|
+
['-c', "trap '' TERM; while true; do sleep 0.1; done"],
|
|
1946
|
+
{ timeoutMs: 1500, sandbox: { enabled: false } }
|
|
1947
|
+
);
|
|
1948
|
+
const elapsed = Date.now() - start;
|
|
1949
|
+
// Sanity: the test has to actually have terminated. With the
|
|
1950
|
+
// bug the promise hangs and Jest times out after 5s default.
|
|
1951
|
+
// Generous upper bound: timeout (1.5s) + escalation (2s) +
|
|
1952
|
+
// spawn overhead. Assert under 6s.
|
|
1953
|
+
expect(elapsed).toBeLessThan(6000);
|
|
1954
|
+
expect(result.timedOut).toBe(true);
|
|
1955
|
+
// signal field is populated (SIGKILL after escalation, or
|
|
1956
|
+
// possibly SIGTERM if the trap didn't take effect on a
|
|
1957
|
+
// particular host).
|
|
1958
|
+
expect(result.signal).toMatch(/^SIG/);
|
|
1959
|
+
}, 10_000);
|
|
1960
|
+
});
|
|
1961
|
+
|
|
1962
|
+
describe('ripgrep cache also keys on env (Codex P1 #34)', () => {
|
|
1963
|
+
it('does not bleed an "rg available" verdict from one env to another on the same backend', async () => {
|
|
1964
|
+
_resetRipgrepCacheForTests();
|
|
1965
|
+
// Same backend instance for both Runs. Vary `local.env` between
|
|
1966
|
+
// them — pre-fix the WeakMap cache was keyed on the spawn
|
|
1967
|
+
// function alone, so the second Run inherited the first's
|
|
1968
|
+
// verdict and tried to use rg under an env without it,
|
|
1969
|
+
// failing with ENOENT.
|
|
1970
|
+
const realSpawn = (
|
|
1971
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
1972
|
+
require('child_process') as typeof import('child_process')
|
|
1973
|
+
).spawn;
|
|
1974
|
+
|
|
1975
|
+
// Backend that returns success for `rg --version` ONLY when
|
|
1976
|
+
// the spawned process's env has PATH=/with/rg, and 127
|
|
1977
|
+
// otherwise. This is the structural shape of "rg is on PATH
|
|
1978
|
+
// for env A but not env B".
|
|
1979
|
+
const envSensitive: t.LocalSpawn = ((
|
|
1980
|
+
cmd: string,
|
|
1981
|
+
args: string[],
|
|
1982
|
+
opts: import('child_process').SpawnOptions
|
|
1983
|
+
) => {
|
|
1984
|
+
if (cmd === 'rg' && args[0] === '--version') {
|
|
1985
|
+
const env = (opts.env ?? {}) as NodeJS.ProcessEnv;
|
|
1986
|
+
if (env.PATH === '/with/rg') {
|
|
1987
|
+
return realSpawn('sh', ['-c', 'exit 0'], opts);
|
|
1988
|
+
}
|
|
1989
|
+
return realSpawn('sh', ['-c', 'exit 127'], opts);
|
|
1990
|
+
}
|
|
1991
|
+
return realSpawn(cmd, args, opts);
|
|
1992
|
+
}) as unknown as t.LocalSpawn;
|
|
1993
|
+
|
|
1994
|
+
const cwdA = await createTempDir();
|
|
1995
|
+
const cwdB = await createTempDir();
|
|
1996
|
+
await (await import('fs/promises')).writeFile(
|
|
1997
|
+
join(cwdA, 'a.ts'),
|
|
1998
|
+
'needle\n'
|
|
1999
|
+
);
|
|
2000
|
+
await (await import('fs/promises')).writeFile(
|
|
2001
|
+
join(cwdB, 'b.ts'),
|
|
2002
|
+
'needle\n'
|
|
2003
|
+
);
|
|
2004
|
+
|
|
2005
|
+
// Run A: env says rg is available → cache records `true` for
|
|
2006
|
+
// (backend, env-A).
|
|
2007
|
+
const bundleA = createLocalCodingToolBundle({
|
|
2008
|
+
cwd: cwdA,
|
|
2009
|
+
exec: { spawn: envSensitive },
|
|
2010
|
+
env: { PATH: '/with/rg' },
|
|
2011
|
+
});
|
|
2012
|
+
await bundleA.tools.find((t_) => t_.name === 'grep_search')!.invoke({
|
|
2013
|
+
id: 'gA',
|
|
2014
|
+
name: 'grep_search',
|
|
2015
|
+
args: { pattern: 'needle' },
|
|
2016
|
+
type: 'tool_call',
|
|
2017
|
+
});
|
|
2018
|
+
|
|
2019
|
+
// Run B: same backend, DIFFERENT env (PATH excludes rg). Must
|
|
2020
|
+
// run a fresh probe and fall back to the Node walker, NOT
|
|
2021
|
+
// reuse Run A's cached "true". Pre-fix this would attempt to
|
|
2022
|
+
// spawn rg with the wrong PATH and surface a tool failure.
|
|
2023
|
+
const bundleB = createLocalCodingToolBundle({
|
|
2024
|
+
cwd: cwdB,
|
|
2025
|
+
exec: { spawn: envSensitive },
|
|
2026
|
+
env: { PATH: '/without/rg' },
|
|
2027
|
+
});
|
|
2028
|
+
const bResult = await bundleB.tools
|
|
2029
|
+
.find((t_) => t_.name === 'grep_search')!
|
|
2030
|
+
.invoke({
|
|
2031
|
+
id: 'gB',
|
|
2032
|
+
name: 'grep_search',
|
|
2033
|
+
args: { pattern: 'needle' },
|
|
2034
|
+
type: 'tool_call',
|
|
2035
|
+
});
|
|
2036
|
+
const text = JSON.stringify(bResult);
|
|
2037
|
+
// Result must show the match (Node fallback ran successfully)
|
|
2038
|
+
// and indicate the fallback engine, not a ripgrep failure.
|
|
2039
|
+
expect(text).toContain('needle');
|
|
2040
|
+
expect(text).toContain('node-fallback');
|
|
2041
|
+
});
|
|
2042
|
+
});
|
|
2043
|
+
|
|
2044
|
+
describe('compile-style runtimes honor local.shell (Codex P2 #29)', () => {
|
|
2045
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2046
|
+
const { executeLocalCode } = require('../local/LocalExecutionEngine');
|
|
2047
|
+
|
|
2048
|
+
it('routes the rust runtime through `local.shell` instead of bare `bash`', async () => {
|
|
2049
|
+
// Intercept spawn — assert the configured shell is used for
|
|
2050
|
+
// the rs runtime, not hardcoded `bash`.
|
|
2051
|
+
const realSpawn = (
|
|
2052
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2053
|
+
require('child_process') as typeof import('child_process')
|
|
2054
|
+
).spawn;
|
|
2055
|
+
const calls: string[] = [];
|
|
2056
|
+
const intercept: t.LocalSpawn = ((
|
|
2057
|
+
cmd: string,
|
|
2058
|
+
args: string[],
|
|
2059
|
+
opts: import('child_process').SpawnOptions
|
|
2060
|
+
) => {
|
|
2061
|
+
calls.push(cmd);
|
|
2062
|
+
// Don't actually try to compile rust — short-circuit via sh.
|
|
2063
|
+
return realSpawn('sh', ['-c', 'exit 0'], opts);
|
|
2064
|
+
}) as unknown as t.LocalSpawn;
|
|
2065
|
+
|
|
2066
|
+
await executeLocalCode(
|
|
2067
|
+
{ lang: 'rs', code: 'fn main() {}', args: [] },
|
|
2068
|
+
{ shell: '/bin/sh', exec: { spawn: intercept }, sandbox: { enabled: false } }
|
|
2069
|
+
);
|
|
2070
|
+
|
|
2071
|
+
// The rust path's compile-and-run command should have been
|
|
2072
|
+
// dispatched via `/bin/sh`, not `bash` / `bash.exe`.
|
|
2073
|
+
expect(calls[0]).toBe('/bin/sh');
|
|
2074
|
+
});
|
|
2075
|
+
});
|
|
2076
|
+
});
|
|
2077
|
+
|
|
2078
|
+
describe('comprehensive review (round 12) — Codex P1 #36', () => {
|
|
2079
|
+
describe('granular workspace flags override the legacy allowOutsideWorkspace', () => {
|
|
2080
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2081
|
+
const { getWriteRoots, getReadRoots } = require('../local/LocalExecutionEngine');
|
|
2082
|
+
|
|
2083
|
+
it('workspace.allowWriteOutside=false beats allowOutsideWorkspace=true (Codex P1 #36)', () => {
|
|
2084
|
+
// Pre-fix the OR short-circuited on the legacy flag, returning
|
|
2085
|
+
// null (skip clamp) even though the host explicitly tightened
|
|
2086
|
+
// the granular flag during migration.
|
|
2087
|
+
const roots = getWriteRoots({
|
|
2088
|
+
cwd: '/tmp/ws',
|
|
2089
|
+
workspace: { root: '/tmp/ws', allowWriteOutside: false },
|
|
2090
|
+
allowOutsideWorkspace: true,
|
|
2091
|
+
});
|
|
2092
|
+
expect(roots).not.toBeNull();
|
|
2093
|
+
expect(roots).toContain('/tmp/ws');
|
|
2094
|
+
});
|
|
2095
|
+
|
|
2096
|
+
it('workspace.allowReadOutside=false beats allowOutsideWorkspace=true', () => {
|
|
2097
|
+
const roots = getReadRoots({
|
|
2098
|
+
cwd: '/tmp/ws',
|
|
2099
|
+
workspace: { root: '/tmp/ws', allowReadOutside: false },
|
|
2100
|
+
allowOutsideWorkspace: true,
|
|
2101
|
+
});
|
|
2102
|
+
expect(roots).not.toBeNull();
|
|
2103
|
+
expect(roots).toContain('/tmp/ws');
|
|
2104
|
+
});
|
|
2105
|
+
|
|
2106
|
+
it('workspace.allowWriteOutside=true still permits writes outside', () => {
|
|
2107
|
+
const roots = getWriteRoots({
|
|
2108
|
+
cwd: '/tmp/ws',
|
|
2109
|
+
workspace: { root: '/tmp/ws', allowWriteOutside: true },
|
|
2110
|
+
});
|
|
2111
|
+
expect(roots).toBeNull();
|
|
2112
|
+
});
|
|
2113
|
+
|
|
2114
|
+
it('legacy allowOutsideWorkspace=true still works when granular flag is unset', () => {
|
|
2115
|
+
const roots = getWriteRoots({
|
|
2116
|
+
cwd: '/tmp/ws',
|
|
2117
|
+
workspace: { root: '/tmp/ws' },
|
|
2118
|
+
allowOutsideWorkspace: true,
|
|
2119
|
+
});
|
|
2120
|
+
expect(roots).toBeNull();
|
|
2121
|
+
});
|
|
2122
|
+
|
|
2123
|
+
it('default (no flags) returns the workspace boundary for both read and write', () => {
|
|
2124
|
+
const cfg = { cwd: '/tmp/ws', workspace: { root: '/tmp/ws' } };
|
|
2125
|
+
expect(getWriteRoots(cfg)).toEqual(['/tmp/ws']);
|
|
2126
|
+
expect(getReadRoots(cfg)).toEqual(['/tmp/ws']);
|
|
2127
|
+
});
|
|
2128
|
+
});
|
|
2129
|
+
});
|
|
2130
|
+
|
|
2131
|
+
describe('comprehensive review (round 14) — Codex P1 #37 + P2 #38/#40/#41', () => {
|
|
2132
|
+
describe('destructive path normalization (Codex P1 #37)', () => {
|
|
2133
|
+
const cases: Array<[string, string]> = [
|
|
2134
|
+
['rm -rf $HOME/', 'trailing slash on $HOME'],
|
|
2135
|
+
['rm -rf ~/', 'trailing slash on ~'],
|
|
2136
|
+
['rm -rf ${HOME}/', 'trailing slash on ${HOME}'],
|
|
2137
|
+
['rm -rf "$HOME/"', 'quoted $HOME with trailing slash'],
|
|
2138
|
+
['rm -rf "~/"', 'quoted ~ with trailing slash'],
|
|
2139
|
+
['rm -rf "${HOME}/"', 'quoted ${HOME} with trailing slash'],
|
|
2140
|
+
['chmod -R 777 ~/', 'chmod with trailing slash'],
|
|
2141
|
+
['chmod -R 777 "$HOME/"', 'quoted chmod with trailing slash'],
|
|
2142
|
+
];
|
|
2143
|
+
it.each(cases)('blocks %s (%s)', async (cmd) => {
|
|
2144
|
+
const result = await validateBashCommand(cmd);
|
|
2145
|
+
expect(result.valid).toBe(false);
|
|
2146
|
+
expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
|
|
2147
|
+
});
|
|
2148
|
+
|
|
2149
|
+
it('still allows benign trailing-slash commands', async () => {
|
|
2150
|
+
const result = await validateBashCommand('ls $HOME/');
|
|
2151
|
+
expect(result.valid).toBe(true);
|
|
2152
|
+
});
|
|
2153
|
+
});
|
|
2154
|
+
|
|
2155
|
+
describe('destructive wildcard targets (Codex P1 [42])', () => {
|
|
2156
|
+
const cases: Array<[string, string]> = [
|
|
2157
|
+
['rm -rf $HOME/*', 'glob over $HOME contents'],
|
|
2158
|
+
['rm -rf ~/*', 'glob over ~ contents'],
|
|
2159
|
+
['rm -rf ${HOME}/*', 'glob over ${HOME} contents'],
|
|
2160
|
+
['rm -rf ./*', 'glob over current dir contents'],
|
|
2161
|
+
['rm -rf .*', 'dotfile glob in current dir'],
|
|
2162
|
+
['rm -rf $HOME*', 'prefix glob against $HOME base'],
|
|
2163
|
+
['chmod -R 777 ~/*', 'chmod with glob'],
|
|
2164
|
+
];
|
|
2165
|
+
it.each(cases)('blocks %s (%s)', async (cmd) => {
|
|
2166
|
+
const result = await validateBashCommand(cmd);
|
|
2167
|
+
expect(result.valid).toBe(false);
|
|
2168
|
+
expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
|
|
2169
|
+
});
|
|
2170
|
+
|
|
2171
|
+
it('does not flag benign glob commands (no rm/chmod/chown)', async () => {
|
|
2172
|
+
const result = await validateBashCommand('ls $HOME/*');
|
|
2173
|
+
expect(result.valid).toBe(true);
|
|
2174
|
+
});
|
|
2175
|
+
});
|
|
2176
|
+
|
|
2177
|
+
describe('destructive dot-glob targets (Codex P1 [47])', () => {
|
|
2178
|
+
const cases: Array<[string, string]> = [
|
|
2179
|
+
['rm -rf $HOME/.*', 'dotfile glob under $HOME'],
|
|
2180
|
+
['rm -rf ~/.*', 'dotfile glob under ~'],
|
|
2181
|
+
['rm -rf ${HOME}/.*', 'dotfile glob under ${HOME}'],
|
|
2182
|
+
['rm -rf /.*', 'dotfile glob under root'],
|
|
2183
|
+
['rm -rf "$HOME/.*"', 'quoted dotfile glob under $HOME'],
|
|
2184
|
+
['chmod -R 777 ~/.*', 'chmod dotfile glob'],
|
|
2185
|
+
];
|
|
2186
|
+
it.each(cases)('blocks %s (%s)', async (cmd) => {
|
|
2187
|
+
const result = await validateBashCommand(cmd);
|
|
2188
|
+
expect(result.valid).toBe(false);
|
|
2189
|
+
expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
|
|
2190
|
+
});
|
|
2191
|
+
|
|
2192
|
+
it('blocks the positional-arg dot-glob form too', async () => {
|
|
2193
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2194
|
+
const { executeLocalBashWithArgs } = require('../local/LocalExecutionEngine');
|
|
2195
|
+
await expect(
|
|
2196
|
+
executeLocalBashWithArgs('rm -rf "$1"', ['/.*'], {
|
|
2197
|
+
sandbox: { enabled: false },
|
|
2198
|
+
timeoutMs: 5000,
|
|
2199
|
+
})
|
|
2200
|
+
).rejects.toThrow(/destructive command pattern.*protected target/i);
|
|
2201
|
+
});
|
|
2202
|
+
});
|
|
2203
|
+
|
|
2204
|
+
describe('strict postEditSyntaxCheck reverts the write on failure (Codex P2 [49])', () => {
|
|
2205
|
+
it('write_file: reverts the file contents to pre-write state when strict check fails', async () => {
|
|
2206
|
+
const cwd = await createTempDir();
|
|
2207
|
+
const fsp = await import('fs/promises');
|
|
2208
|
+
const file = join(cwd, 'a.js');
|
|
2209
|
+
await fsp.writeFile(file, '// good\nconsole.log("ok");\n');
|
|
2210
|
+
|
|
2211
|
+
const bundle = createLocalCodingToolBundle({
|
|
2212
|
+
cwd,
|
|
2213
|
+
postEditSyntaxCheck: 'strict',
|
|
2214
|
+
});
|
|
2215
|
+
const writeTool = bundle.tools.find(
|
|
2216
|
+
(tt) => tt.name === Constants.WRITE_FILE
|
|
2217
|
+
);
|
|
2218
|
+
// Bad JS content (missing closing brace) — node --check will
|
|
2219
|
+
// reject this and strict mode must throw AND restore the file.
|
|
2220
|
+
await expect(
|
|
2221
|
+
writeTool!.invoke({
|
|
2222
|
+
id: 'wf-strict',
|
|
2223
|
+
name: Constants.WRITE_FILE,
|
|
2224
|
+
args: { file_path: file, content: 'function broken( {\n' },
|
|
2225
|
+
type: 'tool_call',
|
|
2226
|
+
})
|
|
2227
|
+
).rejects.toThrow(/syntax check failed.*reverted/i);
|
|
2228
|
+
// Critical assertion: file on disk is restored to the
|
|
2229
|
+
// pre-write content. Pre-fix it would still hold the broken
|
|
2230
|
+
// content.
|
|
2231
|
+
expect(await fsp.readFile(file, 'utf8')).toBe(
|
|
2232
|
+
'// good\nconsole.log("ok");\n'
|
|
2233
|
+
);
|
|
2234
|
+
});
|
|
2235
|
+
|
|
2236
|
+
it('write_file: deletes a brand-new file when strict check fails on first write', async () => {
|
|
2237
|
+
const cwd = await createTempDir();
|
|
2238
|
+
const fsp = await import('fs/promises');
|
|
2239
|
+
const file = join(cwd, 'never-existed.js');
|
|
2240
|
+
|
|
2241
|
+
const bundle = createLocalCodingToolBundle({
|
|
2242
|
+
cwd,
|
|
2243
|
+
postEditSyntaxCheck: 'strict',
|
|
2244
|
+
});
|
|
2245
|
+
const writeTool = bundle.tools.find(
|
|
2246
|
+
(tt) => tt.name === Constants.WRITE_FILE
|
|
2247
|
+
);
|
|
2248
|
+
await expect(
|
|
2249
|
+
writeTool!.invoke({
|
|
2250
|
+
id: 'wf-strict-new',
|
|
2251
|
+
name: Constants.WRITE_FILE,
|
|
2252
|
+
args: { file_path: file, content: 'function broken( {\n' },
|
|
2253
|
+
type: 'tool_call',
|
|
2254
|
+
})
|
|
2255
|
+
).rejects.toThrow(/syntax check failed.*reverted/i);
|
|
2256
|
+
// Brand-new file must be removed on revert.
|
|
2257
|
+
await expect(fsp.stat(file)).rejects.toThrow();
|
|
2258
|
+
});
|
|
2259
|
+
|
|
2260
|
+
it('edit_file: reverts to pre-edit content when strict check fails', async () => {
|
|
2261
|
+
const cwd = await createTempDir();
|
|
2262
|
+
const fsp = await import('fs/promises');
|
|
2263
|
+
const file = join(cwd, 'b.js');
|
|
2264
|
+
const original = 'function ok() { return 1; }\n';
|
|
2265
|
+
await fsp.writeFile(file, original);
|
|
2266
|
+
|
|
2267
|
+
const bundle = createLocalCodingToolBundle({
|
|
2268
|
+
cwd,
|
|
2269
|
+
postEditSyntaxCheck: 'strict',
|
|
2270
|
+
});
|
|
2271
|
+
const editTool = bundle.tools.find(
|
|
2272
|
+
(tt) => tt.name === Constants.EDIT_FILE
|
|
2273
|
+
);
|
|
2274
|
+
await expect(
|
|
2275
|
+
editTool!.invoke({
|
|
2276
|
+
id: 'ef-strict',
|
|
2277
|
+
name: Constants.EDIT_FILE,
|
|
2278
|
+
args: {
|
|
2279
|
+
file_path: file,
|
|
2280
|
+
old_text: 'return 1;',
|
|
2281
|
+
new_text: 'return broken(',
|
|
2282
|
+
},
|
|
2283
|
+
type: 'tool_call',
|
|
2284
|
+
})
|
|
2285
|
+
).rejects.toThrow(/syntax check failed.*reverted/i);
|
|
2286
|
+
expect(await fsp.readFile(file, 'utf8')).toBe(original);
|
|
2287
|
+
});
|
|
2288
|
+
});
|
|
2289
|
+
|
|
2290
|
+
describe('fallbackGrep skip sentinels do not count as matches (Codex P2 [43])', () => {
|
|
2291
|
+
it('reports `matches: 0` when only oversize files are present', async () => {
|
|
2292
|
+
_resetRipgrepCacheForTests();
|
|
2293
|
+
const realSpawn = (
|
|
2294
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2295
|
+
require('child_process') as typeof import('child_process')
|
|
2296
|
+
).spawn;
|
|
2297
|
+
const noRgBackend: t.LocalSpawn = ((
|
|
2298
|
+
cmd: string,
|
|
2299
|
+
args: string[],
|
|
2300
|
+
opts: import('child_process').SpawnOptions
|
|
2301
|
+
) => {
|
|
2302
|
+
if (cmd === 'rg') return realSpawn('sh', ['-c', 'exit 127'], opts);
|
|
2303
|
+
return realSpawn(cmd, args, opts);
|
|
2304
|
+
}) as unknown as t.LocalSpawn;
|
|
2305
|
+
|
|
2306
|
+
const cwd = await createTempDir();
|
|
2307
|
+
const fsp = await import('fs/promises');
|
|
2308
|
+
// Two oversize files, no real matches.
|
|
2309
|
+
await fsp.writeFile(
|
|
2310
|
+
join(cwd, 'big1.txt'),
|
|
2311
|
+
Buffer.alloc(6 * 1024 * 1024, 'a')
|
|
2312
|
+
);
|
|
2313
|
+
await fsp.writeFile(
|
|
2314
|
+
join(cwd, 'big2.txt'),
|
|
2315
|
+
Buffer.alloc(6 * 1024 * 1024, 'a')
|
|
2316
|
+
);
|
|
2317
|
+
|
|
2318
|
+
const bundle = createLocalCodingToolBundle({
|
|
2319
|
+
cwd,
|
|
2320
|
+
exec: { spawn: noRgBackend },
|
|
2321
|
+
});
|
|
2322
|
+
const grepTool = bundle.tools.find(
|
|
2323
|
+
(tt) => tt.name === Constants.GREP_SEARCH
|
|
2324
|
+
);
|
|
2325
|
+
const result = await grepTool!.invoke({
|
|
2326
|
+
id: 'g43',
|
|
2327
|
+
name: Constants.GREP_SEARCH,
|
|
2328
|
+
args: { pattern: 'needle' },
|
|
2329
|
+
type: 'tool_call',
|
|
2330
|
+
});
|
|
2331
|
+
// Result is [text, artifact]; pull the artifact off the
|
|
2332
|
+
// ToolMessage shape.
|
|
2333
|
+
const text = JSON.stringify(result);
|
|
2334
|
+
// Artifact shape: { matches: 0, skipped: 2, engine: 'node-fallback' }
|
|
2335
|
+
expect(text).toContain('"matches":0');
|
|
2336
|
+
expect(text).toContain('"skipped":2');
|
|
2337
|
+
});
|
|
2338
|
+
});
|
|
2339
|
+
|
|
2340
|
+
describe('Send-input direct path threads additionalContextsSink (Codex P2 [44])', () => {
|
|
2341
|
+
it('materializes hook additionalContext as a HumanMessage on the Send branch', async () => {
|
|
2342
|
+
// The Send-input branch dispatches a single direct tool. It
|
|
2343
|
+
// had its own runDirectToolWithLifecycleHooks call site that
|
|
2344
|
+
// didn't pass the sink, so PreToolUse additionalContext was
|
|
2345
|
+
// dropped on this otherwise-supported input shape.
|
|
2346
|
+
const { tool } = await import('@langchain/core/tools');
|
|
2347
|
+
const { z } = await import('zod');
|
|
2348
|
+
const { HookRegistry } = await import('@/hooks');
|
|
2349
|
+
const { HumanMessage } = await import('@langchain/core/messages');
|
|
2350
|
+
|
|
2351
|
+
const echo = tool(async () => 'ECHO', {
|
|
2352
|
+
name: 'echo',
|
|
2353
|
+
description: 'send-input echo',
|
|
2354
|
+
schema: z.object({}).passthrough(),
|
|
2355
|
+
});
|
|
2356
|
+
const registry = new HookRegistry();
|
|
2357
|
+
registry.register('PreToolUse', {
|
|
2358
|
+
hooks: [
|
|
2359
|
+
async () => ({
|
|
2360
|
+
decision: 'allow',
|
|
2361
|
+
additionalContext: 'SEND-CTX: policy note via Send branch',
|
|
2362
|
+
}),
|
|
2363
|
+
],
|
|
2364
|
+
});
|
|
2365
|
+
|
|
2366
|
+
const node = new ToolNode({
|
|
2367
|
+
tools: [echo],
|
|
2368
|
+
eventDrivenMode: true,
|
|
2369
|
+
hookRegistry: registry,
|
|
2370
|
+
directToolNames: new Set(['echo']),
|
|
2371
|
+
});
|
|
2372
|
+
// Construct a Send-shaped input: { lg_tool_call: ToolCall }
|
|
2373
|
+
const result = (await node.invoke({
|
|
2374
|
+
lg_tool_call: { id: 'send_1', name: 'echo', args: {} },
|
|
2375
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
2376
|
+
} as any)) as { messages: BaseMessage[] } | BaseMessage[];
|
|
2377
|
+
const messages = Array.isArray(result) ? result : result.messages;
|
|
2378
|
+
const found = messages.find(
|
|
2379
|
+
(m) =>
|
|
2380
|
+
m instanceof HumanMessage &&
|
|
2381
|
+
typeof m.content === 'string' &&
|
|
2382
|
+
m.content.includes('SEND-CTX')
|
|
2383
|
+
);
|
|
2384
|
+
expect(found).toBeDefined();
|
|
2385
|
+
});
|
|
2386
|
+
});
|
|
2387
|
+
|
|
2388
|
+
describe('bash args validated against destructive-target patterns (Codex P1 [45])', () => {
|
|
2389
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2390
|
+
const { executeLocalBashWithArgs } = require('../local/LocalExecutionEngine');
|
|
2391
|
+
|
|
2392
|
+
it('blocks `rm -rf "$1"` + args=["/"]', async () => {
|
|
2393
|
+
await expect(
|
|
2394
|
+
executeLocalBashWithArgs('rm -rf "$1"', ['/'], {
|
|
2395
|
+
sandbox: { enabled: false },
|
|
2396
|
+
timeoutMs: 5000,
|
|
2397
|
+
})
|
|
2398
|
+
).rejects.toThrow(/destructive command pattern.*protected target/i);
|
|
2399
|
+
});
|
|
2400
|
+
|
|
2401
|
+
it('blocks `chmod -R 777 "$1"` + args=["~/"]', async () => {
|
|
2402
|
+
await expect(
|
|
2403
|
+
executeLocalBashWithArgs('chmod -R 777 "$1"', ['~/'], {
|
|
2404
|
+
sandbox: { enabled: false },
|
|
2405
|
+
timeoutMs: 5000,
|
|
2406
|
+
})
|
|
2407
|
+
).rejects.toThrow(/destructive command pattern.*protected target/i);
|
|
2408
|
+
});
|
|
2409
|
+
|
|
2410
|
+
it('blocks `rm -rf "$@"` + args=["$HOME"]', async () => {
|
|
2411
|
+
await expect(
|
|
2412
|
+
executeLocalBashWithArgs('rm -rf "$@"', ['$HOME'], {
|
|
2413
|
+
sandbox: { enabled: false },
|
|
2414
|
+
timeoutMs: 5000,
|
|
2415
|
+
})
|
|
2416
|
+
).rejects.toThrow(/destructive command pattern.*protected target/i);
|
|
2417
|
+
});
|
|
2418
|
+
|
|
2419
|
+
it('allows benign positional arg use (echo + protected-shape arg)', async () => {
|
|
2420
|
+
// `echo` is not in the destructive-op set so a "/" arg is fine.
|
|
2421
|
+
const result = await executeLocalBashWithArgs('echo "$1"', ['/'], {
|
|
2422
|
+
sandbox: { enabled: false },
|
|
2423
|
+
timeoutMs: 5000,
|
|
2424
|
+
});
|
|
2425
|
+
expect(result.exitCode).toBe(0);
|
|
2426
|
+
});
|
|
2427
|
+
|
|
2428
|
+
it('allows destructive op with non-protected args', async () => {
|
|
2429
|
+
// `rm` of a clearly non-protected path inside a tmpdir is fine.
|
|
2430
|
+
const cwd = await createTempDir();
|
|
2431
|
+
const fsp = await import('fs/promises');
|
|
2432
|
+
const f = join(cwd, 'goner.txt');
|
|
2433
|
+
await fsp.writeFile(f, 'bye\n');
|
|
2434
|
+
const result = await executeLocalBashWithArgs('rm -f "$1"', [f], {
|
|
2435
|
+
cwd,
|
|
2436
|
+
sandbox: { enabled: false },
|
|
2437
|
+
timeoutMs: 5000,
|
|
2438
|
+
});
|
|
2439
|
+
expect(result.exitCode).toBe(0);
|
|
2440
|
+
});
|
|
2441
|
+
});
|
|
2442
|
+
|
|
2443
|
+
describe('direct-path additionalContext is marked as system metadata (Codex P2 [46])', () => {
|
|
2444
|
+
it('attaches `additional_kwargs.role: "system"` to the materialized HumanMessage', async () => {
|
|
2445
|
+
const { tool } = await import('@langchain/core/tools');
|
|
2446
|
+
const { z } = await import('zod');
|
|
2447
|
+
const { HookRegistry } = await import('@/hooks');
|
|
2448
|
+
const { HumanMessage, AIMessage } = await import(
|
|
2449
|
+
'@langchain/core/messages'
|
|
2450
|
+
);
|
|
2451
|
+
|
|
2452
|
+
const echo = tool(async () => 'OK', {
|
|
2453
|
+
name: 'echo',
|
|
2454
|
+
description: 'noop',
|
|
2455
|
+
schema: z.object({}).passthrough(),
|
|
2456
|
+
});
|
|
2457
|
+
const registry = new HookRegistry();
|
|
2458
|
+
registry.register('PreToolUse', {
|
|
2459
|
+
hooks: [
|
|
2460
|
+
async () => ({
|
|
2461
|
+
decision: 'allow',
|
|
2462
|
+
additionalContext: 'POLICY: be careful',
|
|
2463
|
+
}),
|
|
2464
|
+
],
|
|
2465
|
+
});
|
|
2466
|
+
const node = new ToolNode({
|
|
2467
|
+
tools: [echo],
|
|
2468
|
+
eventDrivenMode: true,
|
|
2469
|
+
hookRegistry: registry,
|
|
2470
|
+
directToolNames: new Set(['echo']),
|
|
2471
|
+
});
|
|
2472
|
+
const ai = new AIMessage({
|
|
2473
|
+
content: '',
|
|
2474
|
+
tool_calls: [{ id: 'c46', name: 'echo', args: {} }],
|
|
2475
|
+
});
|
|
2476
|
+
const result = (await node.invoke({ messages: [ai] })) as
|
|
2477
|
+
| { messages: BaseMessage[] }
|
|
2478
|
+
| BaseMessage[];
|
|
2479
|
+
const messages = Array.isArray(result) ? result : result.messages;
|
|
2480
|
+
const human = messages.find(
|
|
2481
|
+
(m): m is InstanceType<typeof HumanMessage> =>
|
|
2482
|
+
m instanceof HumanMessage &&
|
|
2483
|
+
typeof m.content === 'string' &&
|
|
2484
|
+
m.content.includes('POLICY')
|
|
2485
|
+
);
|
|
2486
|
+
expect(human).toBeDefined();
|
|
2487
|
+
// The marker the event-driven path sets — direct path now
|
|
2488
|
+
// matches it.
|
|
2489
|
+
expect(human?.additional_kwargs).toMatchObject({
|
|
2490
|
+
role: 'system',
|
|
2491
|
+
source: 'hook',
|
|
2492
|
+
});
|
|
2493
|
+
});
|
|
2494
|
+
});
|
|
2495
|
+
|
|
2496
|
+
describe('resolveWorkspacePathSafe routes through WorkspaceFS.realpath (Codex P2 #38)', () => {
|
|
2497
|
+
it('honors a custom workspace fs realpath impl', async () => {
|
|
2498
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2499
|
+
const { resolveWorkspacePathSafe } = require('../local/LocalExecutionEngine');
|
|
2500
|
+
const calls: string[] = [];
|
|
2501
|
+
const fakeFs = {
|
|
2502
|
+
readFile: async () => '',
|
|
2503
|
+
writeFile: async () => undefined,
|
|
2504
|
+
stat: async () => ({
|
|
2505
|
+
isFile: () => true,
|
|
2506
|
+
isDirectory: () => false,
|
|
2507
|
+
size: 0,
|
|
2508
|
+
}),
|
|
2509
|
+
readdir: async () => [],
|
|
2510
|
+
mkdir: async () => undefined,
|
|
2511
|
+
// The custom realpath that the safe-path resolver MUST use.
|
|
2512
|
+
// Returns paths unchanged so the lexical containment check
|
|
2513
|
+
// succeeds for in-workspace targets.
|
|
2514
|
+
realpath: async (p: string): Promise<string> => {
|
|
2515
|
+
calls.push(p);
|
|
2516
|
+
return p;
|
|
2517
|
+
},
|
|
2518
|
+
unlink: async () => undefined,
|
|
2519
|
+
open: async () => {
|
|
2520
|
+
throw new Error('not implemented');
|
|
2521
|
+
},
|
|
2522
|
+
};
|
|
2523
|
+
|
|
2524
|
+
await resolveWorkspacePathSafe('/virtual/ws/file.ts', {
|
|
2525
|
+
cwd: '/virtual/ws',
|
|
2526
|
+
workspace: { root: '/virtual/ws' },
|
|
2527
|
+
exec: { fs: fakeFs as unknown as never },
|
|
2528
|
+
});
|
|
2529
|
+
|
|
2530
|
+
// Must have called the WorkspaceFS realpath at least once
|
|
2531
|
+
// (for either the root or the candidate path). Pre-fix the
|
|
2532
|
+
// host fs/promises.realpath was used instead.
|
|
2533
|
+
expect(calls.length).toBeGreaterThan(0);
|
|
2534
|
+
expect(calls.every((p) => p.startsWith('/virtual/'))).toBe(true);
|
|
2535
|
+
});
|
|
2536
|
+
});
|
|
2537
|
+
|
|
2538
|
+
describe('syntax-check probe cache also keys on env (Codex P2 #40)', () => {
|
|
2539
|
+
it('does not bleed `hasNode` verdict from one env to another on the same backend', async () => {
|
|
2540
|
+
_resetSyntaxCheckProbeCacheForTests();
|
|
2541
|
+
const realSpawn = (
|
|
2542
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2543
|
+
require('child_process') as typeof import('child_process')
|
|
2544
|
+
).spawn;
|
|
2545
|
+
const calls: Array<{ cmd: string; env?: NodeJS.ProcessEnv }> = [];
|
|
2546
|
+
// Backend that returns `node --version` success ONLY when
|
|
2547
|
+
// env.PATH includes 'with-node'. Mirrors P1 #34's shape.
|
|
2548
|
+
const envSensitive: t.LocalSpawn = ((
|
|
2549
|
+
cmd: string,
|
|
2550
|
+
args: string[],
|
|
2551
|
+
opts: import('child_process').SpawnOptions
|
|
2552
|
+
) => {
|
|
2553
|
+
calls.push({ cmd, env: opts.env as NodeJS.ProcessEnv });
|
|
2554
|
+
if (cmd === 'node' && args[0] === '--version') {
|
|
2555
|
+
const env = (opts.env ?? {}) as NodeJS.ProcessEnv;
|
|
2556
|
+
if (env.PATH?.includes('with-node') === true) {
|
|
2557
|
+
return realSpawn('sh', ['-c', 'exit 0'], opts);
|
|
2558
|
+
}
|
|
2559
|
+
return realSpawn('sh', ['-c', 'exit 127'], opts);
|
|
2560
|
+
}
|
|
2561
|
+
// Run all other spawns through a no-op so we don't hit
|
|
2562
|
+
// real node/python/bash on the host.
|
|
2563
|
+
return realSpawn('sh', ['-c', 'exit 0'], opts);
|
|
2564
|
+
}) as unknown as t.LocalSpawn;
|
|
2565
|
+
|
|
2566
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2567
|
+
const { runPostEditSyntaxCheck } = require('../local/syntaxCheck');
|
|
2568
|
+
const cwd = await createTempDir();
|
|
2569
|
+
const file = join(cwd, 'a.js');
|
|
2570
|
+
await (await import('fs/promises')).writeFile(file, 'function (\n');
|
|
2571
|
+
|
|
2572
|
+
// Run A: env says node IS available — probe records `true`
|
|
2573
|
+
// for (backend, envA).
|
|
2574
|
+
await runPostEditSyntaxCheck(file, {
|
|
2575
|
+
exec: { spawn: envSensitive },
|
|
2576
|
+
env: { PATH: '/with-node' },
|
|
2577
|
+
});
|
|
2578
|
+
|
|
2579
|
+
// Run B: env says node is NOT available. Pre-fix the cache
|
|
2580
|
+
// would reuse the (backend) entry and try to actually
|
|
2581
|
+
// syntax-check via the missing node. Now: separate cache slot
|
|
2582
|
+
// for envB → its own probe → records `false` → skips check.
|
|
2583
|
+
const probeCallsBefore = calls.filter(
|
|
2584
|
+
(c) => c.cmd === 'node' && c.env?.PATH?.includes('without-node') === true
|
|
2585
|
+
).length;
|
|
2586
|
+
await runPostEditSyntaxCheck(file, {
|
|
2587
|
+
exec: { spawn: envSensitive },
|
|
2588
|
+
env: { PATH: '/without-node' },
|
|
2589
|
+
});
|
|
2590
|
+
const probeCallsAfter = calls.filter(
|
|
2591
|
+
(c) => c.cmd === 'node' && c.env?.PATH?.includes('without-node') === true
|
|
2592
|
+
).length;
|
|
2593
|
+
// A fresh probe must have run for envB (count went up).
|
|
2594
|
+
expect(probeCallsAfter).toBeGreaterThan(probeCallsBefore);
|
|
2595
|
+
});
|
|
2596
|
+
});
|
|
2597
|
+
|
|
2598
|
+
describe('fallbackGrep skips files larger than the per-file cap (Codex P2 #41)', () => {
|
|
2599
|
+
it('emits a sentinel and continues instead of reading multi-MB files into memory', async () => {
|
|
2600
|
+
_resetRipgrepCacheForTests();
|
|
2601
|
+
const realSpawn = (
|
|
2602
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
2603
|
+
require('child_process') as typeof import('child_process')
|
|
2604
|
+
).spawn;
|
|
2605
|
+
// Force the Node fallback by making rg unavailable.
|
|
2606
|
+
const noRgBackend: t.LocalSpawn = ((
|
|
2607
|
+
cmd: string,
|
|
2608
|
+
args: string[],
|
|
2609
|
+
opts: import('child_process').SpawnOptions
|
|
2610
|
+
) => {
|
|
2611
|
+
if (cmd === 'rg') {
|
|
2612
|
+
return realSpawn('sh', ['-c', 'exit 127'], opts);
|
|
2613
|
+
}
|
|
2614
|
+
return realSpawn(cmd, args, opts);
|
|
2615
|
+
}) as unknown as t.LocalSpawn;
|
|
2616
|
+
|
|
2617
|
+
const cwd = await createTempDir();
|
|
2618
|
+
const fsp = await import('fs/promises');
|
|
2619
|
+
// Write a small file (matches the search) and a 6 MB file
|
|
2620
|
+
// (over the 5 MB cap) — the fallback must skip the big one
|
|
2621
|
+
// with a sentinel and still find the small-file match.
|
|
2622
|
+
await fsp.writeFile(join(cwd, 'small.txt'), 'needle\n');
|
|
2623
|
+
const big = Buffer.alloc(6 * 1024 * 1024, 'a');
|
|
2624
|
+
await fsp.writeFile(join(cwd, 'big.txt'), big);
|
|
2625
|
+
|
|
2626
|
+
const bundle = createLocalCodingToolBundle({
|
|
2627
|
+
cwd,
|
|
2628
|
+
exec: { spawn: noRgBackend },
|
|
2629
|
+
});
|
|
2630
|
+
const grepTool = bundle.tools.find(
|
|
2631
|
+
(tt) => tt.name === Constants.GREP_SEARCH
|
|
2632
|
+
);
|
|
2633
|
+
const result = await grepTool!.invoke({
|
|
2634
|
+
id: 'g41',
|
|
2635
|
+
name: Constants.GREP_SEARCH,
|
|
2636
|
+
args: { pattern: 'needle' },
|
|
2637
|
+
type: 'tool_call',
|
|
2638
|
+
});
|
|
2639
|
+
const text = JSON.stringify(result);
|
|
2640
|
+
// Small-file match landed.
|
|
2641
|
+
expect(text).toContain('needle');
|
|
2642
|
+
// Big-file got the skip sentinel (didn't OOM, didn't read
|
|
2643
|
+
// into memory).
|
|
2644
|
+
expect(text).toContain('skipped');
|
|
2645
|
+
});
|
|
2646
|
+
});
|
|
2647
|
+
});
|