@librechat/agents 3.1.77 → 3.1.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/dist/cjs/common/enum.cjs +54 -0
  2. package/dist/cjs/common/enum.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +155 -4
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs +291 -0
  6. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs.map +1 -0
  7. package/dist/cjs/main.cjs +90 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/anthropicToolCache.cjs +102 -0
  10. package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -0
  11. package/dist/cjs/messages/prune.cjs +27 -0
  12. package/dist/cjs/messages/prune.cjs.map +1 -1
  13. package/dist/cjs/messages/recency.cjs +99 -0
  14. package/dist/cjs/messages/recency.cjs.map +1 -0
  15. package/dist/cjs/run.cjs +30 -0
  16. package/dist/cjs/run.cjs.map +1 -1
  17. package/dist/cjs/summarization/node.cjs +100 -6
  18. package/dist/cjs/summarization/node.cjs.map +1 -1
  19. package/dist/cjs/tools/ToolNode.cjs +635 -23
  20. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  21. package/dist/cjs/tools/local/CompileCheckTool.cjs +227 -0
  22. package/dist/cjs/tools/local/CompileCheckTool.cjs.map +1 -0
  23. package/dist/cjs/tools/local/FileCheckpointer.cjs +90 -0
  24. package/dist/cjs/tools/local/FileCheckpointer.cjs.map +1 -0
  25. package/dist/cjs/tools/local/LocalCodingTools.cjs +1098 -0
  26. package/dist/cjs/tools/local/LocalCodingTools.cjs.map +1 -0
  27. package/dist/cjs/tools/local/LocalExecutionEngine.cjs +1042 -0
  28. package/dist/cjs/tools/local/LocalExecutionEngine.cjs.map +1 -0
  29. package/dist/cjs/tools/local/LocalExecutionTools.cjs +122 -0
  30. package/dist/cjs/tools/local/LocalExecutionTools.cjs.map +1 -0
  31. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs +453 -0
  32. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs.map +1 -0
  33. package/dist/cjs/tools/local/attachments.cjs +183 -0
  34. package/dist/cjs/tools/local/attachments.cjs.map +1 -0
  35. package/dist/cjs/tools/local/bashAst.cjs +129 -0
  36. package/dist/cjs/tools/local/bashAst.cjs.map +1 -0
  37. package/dist/cjs/tools/local/editStrategies.cjs +188 -0
  38. package/dist/cjs/tools/local/editStrategies.cjs.map +1 -0
  39. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs +141 -0
  40. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs.map +1 -0
  41. package/dist/cjs/tools/local/syntaxCheck.cjs +182 -0
  42. package/dist/cjs/tools/local/syntaxCheck.cjs.map +1 -0
  43. package/dist/cjs/tools/local/textEncoding.cjs +30 -0
  44. package/dist/cjs/tools/local/textEncoding.cjs.map +1 -0
  45. package/dist/cjs/tools/local/workspaceFS.cjs +51 -0
  46. package/dist/cjs/tools/local/workspaceFS.cjs.map +1 -0
  47. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +31 -0
  48. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  49. package/dist/esm/common/enum.mjs +53 -1
  50. package/dist/esm/common/enum.mjs.map +1 -1
  51. package/dist/esm/graphs/Graph.mjs +156 -5
  52. package/dist/esm/graphs/Graph.mjs.map +1 -1
  53. package/dist/esm/hooks/createWorkspacePolicyHook.mjs +289 -0
  54. package/dist/esm/hooks/createWorkspacePolicyHook.mjs.map +1 -0
  55. package/dist/esm/main.mjs +17 -2
  56. package/dist/esm/main.mjs.map +1 -1
  57. package/dist/esm/messages/anthropicToolCache.mjs +99 -0
  58. package/dist/esm/messages/anthropicToolCache.mjs.map +1 -0
  59. package/dist/esm/messages/prune.mjs +26 -1
  60. package/dist/esm/messages/prune.mjs.map +1 -1
  61. package/dist/esm/messages/recency.mjs +97 -0
  62. package/dist/esm/messages/recency.mjs.map +1 -0
  63. package/dist/esm/run.mjs +30 -0
  64. package/dist/esm/run.mjs.map +1 -1
  65. package/dist/esm/summarization/node.mjs +100 -6
  66. package/dist/esm/summarization/node.mjs.map +1 -1
  67. package/dist/esm/tools/ToolNode.mjs +635 -23
  68. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  69. package/dist/esm/tools/local/CompileCheckTool.mjs +223 -0
  70. package/dist/esm/tools/local/CompileCheckTool.mjs.map +1 -0
  71. package/dist/esm/tools/local/FileCheckpointer.mjs +87 -0
  72. package/dist/esm/tools/local/FileCheckpointer.mjs.map +1 -0
  73. package/dist/esm/tools/local/LocalCodingTools.mjs +1075 -0
  74. package/dist/esm/tools/local/LocalCodingTools.mjs.map +1 -0
  75. package/dist/esm/tools/local/LocalExecutionEngine.mjs +1022 -0
  76. package/dist/esm/tools/local/LocalExecutionEngine.mjs.map +1 -0
  77. package/dist/esm/tools/local/LocalExecutionTools.mjs +117 -0
  78. package/dist/esm/tools/local/LocalExecutionTools.mjs.map +1 -0
  79. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs +448 -0
  80. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs.map +1 -0
  81. package/dist/esm/tools/local/attachments.mjs +180 -0
  82. package/dist/esm/tools/local/attachments.mjs.map +1 -0
  83. package/dist/esm/tools/local/bashAst.mjs +126 -0
  84. package/dist/esm/tools/local/bashAst.mjs.map +1 -0
  85. package/dist/esm/tools/local/editStrategies.mjs +185 -0
  86. package/dist/esm/tools/local/editStrategies.mjs.map +1 -0
  87. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs +137 -0
  88. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs.map +1 -0
  89. package/dist/esm/tools/local/syntaxCheck.mjs +179 -0
  90. package/dist/esm/tools/local/syntaxCheck.mjs.map +1 -0
  91. package/dist/esm/tools/local/textEncoding.mjs +27 -0
  92. package/dist/esm/tools/local/textEncoding.mjs.map +1 -0
  93. package/dist/esm/tools/local/workspaceFS.mjs +49 -0
  94. package/dist/esm/tools/local/workspaceFS.mjs.map +1 -0
  95. package/dist/esm/tools/subagent/SubagentExecutor.mjs +31 -0
  96. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  97. package/dist/types/common/enum.d.ts +39 -1
  98. package/dist/types/graphs/Graph.d.ts +34 -0
  99. package/dist/types/hooks/createWorkspacePolicyHook.d.ts +95 -0
  100. package/dist/types/hooks/index.d.ts +2 -0
  101. package/dist/types/index.d.ts +1 -0
  102. package/dist/types/messages/anthropicToolCache.d.ts +51 -0
  103. package/dist/types/messages/index.d.ts +2 -0
  104. package/dist/types/messages/prune.d.ts +11 -0
  105. package/dist/types/messages/recency.d.ts +64 -0
  106. package/dist/types/run.d.ts +21 -0
  107. package/dist/types/tools/ToolNode.d.ts +145 -2
  108. package/dist/types/tools/local/CompileCheckTool.d.ts +31 -0
  109. package/dist/types/tools/local/FileCheckpointer.d.ts +39 -0
  110. package/dist/types/tools/local/LocalCodingTools.d.ts +57 -0
  111. package/dist/types/tools/local/LocalExecutionEngine.d.ts +149 -0
  112. package/dist/types/tools/local/LocalExecutionTools.d.ts +9 -0
  113. package/dist/types/tools/local/LocalProgrammaticToolCalling.d.ts +21 -0
  114. package/dist/types/tools/local/attachments.d.ts +84 -0
  115. package/dist/types/tools/local/bashAst.d.ts +11 -0
  116. package/dist/types/tools/local/editStrategies.d.ts +28 -0
  117. package/dist/types/tools/local/index.d.ts +12 -0
  118. package/dist/types/tools/local/resolveLocalExecutionTools.d.ts +38 -0
  119. package/dist/types/tools/local/syntaxCheck.d.ts +42 -0
  120. package/dist/types/tools/local/textEncoding.d.ts +21 -0
  121. package/dist/types/tools/local/workspaceFS.d.ts +49 -0
  122. package/dist/types/tools/subagent/SubagentExecutor.d.ts +29 -0
  123. package/dist/types/types/hitl.d.ts +56 -27
  124. package/dist/types/types/run.d.ts +8 -1
  125. package/dist/types/types/summarize.d.ts +30 -0
  126. package/dist/types/types/tools.d.ts +341 -6
  127. package/package.json +21 -2
  128. package/src/common/enum.ts +54 -0
  129. package/src/graphs/Graph.ts +173 -6
  130. package/src/hooks/__tests__/compactHooks.test.ts +38 -2
  131. package/src/hooks/__tests__/createWorkspacePolicyHook.test.ts +393 -0
  132. package/src/hooks/createWorkspacePolicyHook.ts +355 -0
  133. package/src/hooks/index.ts +6 -0
  134. package/src/index.ts +1 -0
  135. package/src/messages/__tests__/anthropicToolCache.test.ts +125 -0
  136. package/src/messages/__tests__/recency.test.ts +267 -0
  137. package/src/messages/anthropicToolCache.ts +116 -0
  138. package/src/messages/index.ts +2 -0
  139. package/src/messages/prune.ts +27 -1
  140. package/src/messages/recency.ts +155 -0
  141. package/src/run.ts +31 -0
  142. package/src/scripts/compare_pi_vs_ours.ts +840 -0
  143. package/src/scripts/local_engine.ts +166 -0
  144. package/src/scripts/local_engine_checkpointer.ts +205 -0
  145. package/src/scripts/local_engine_compile.ts +263 -0
  146. package/src/scripts/local_engine_hooks.ts +226 -0
  147. package/src/scripts/local_engine_image.ts +201 -0
  148. package/src/scripts/local_engine_ptc.ts +151 -0
  149. package/src/scripts/local_engine_workspace.ts +258 -0
  150. package/src/scripts/subagent-configurable-inheritance.ts +252 -0
  151. package/src/scripts/summarization-recency.ts +462 -0
  152. package/src/specs/prune.test.ts +39 -0
  153. package/src/summarization/__tests__/node.test.ts +499 -3
  154. package/src/summarization/node.ts +124 -7
  155. package/src/tools/ToolNode.ts +769 -20
  156. package/src/tools/__tests__/LocalExecutionTools.test.ts +2647 -0
  157. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +175 -0
  158. package/src/tools/__tests__/SubagentExecutor.test.ts +148 -0
  159. package/src/tools/__tests__/ToolNode.outputReferences.test.ts +114 -0
  160. package/src/tools/__tests__/ToolNode.session.test.ts +84 -0
  161. package/src/tools/__tests__/directToolHITLResumeScope.test.ts +467 -0
  162. package/src/tools/__tests__/directToolHooks.test.ts +411 -0
  163. package/src/tools/__tests__/localToolNames.test.ts +73 -0
  164. package/src/tools/__tests__/workspaceSeam.test.ts +134 -0
  165. package/src/tools/local/CompileCheckTool.ts +278 -0
  166. package/src/tools/local/FileCheckpointer.ts +93 -0
  167. package/src/tools/local/LocalCodingTools.ts +1342 -0
  168. package/src/tools/local/LocalExecutionEngine.ts +1329 -0
  169. package/src/tools/local/LocalExecutionTools.ts +167 -0
  170. package/src/tools/local/LocalProgrammaticToolCalling.ts +594 -0
  171. package/src/tools/local/__tests__/FileCheckpointer.test.ts +120 -0
  172. package/src/tools/local/__tests__/editStrategies.test.ts +134 -0
  173. package/src/tools/local/attachments.ts +251 -0
  174. package/src/tools/local/bashAst.ts +151 -0
  175. package/src/tools/local/editStrategies.ts +188 -0
  176. package/src/tools/local/index.ts +12 -0
  177. package/src/tools/local/resolveLocalExecutionTools.ts +208 -0
  178. package/src/tools/local/syntaxCheck.ts +243 -0
  179. package/src/tools/local/textEncoding.ts +37 -0
  180. package/src/tools/local/workspaceFS.ts +89 -0
  181. package/src/tools/subagent/SubagentExecutor.ts +60 -0
  182. package/src/types/hitl.ts +56 -27
  183. package/src/types/run.ts +12 -1
  184. package/src/types/summarize.ts +31 -0
  185. package/src/types/tools.ts +359 -7
@@ -0,0 +1,2647 @@
1
+ import { z } from 'zod';
2
+ import { tmpdir } from 'os';
3
+ import { join } from 'path';
4
+ import { spawnSync } from 'child_process';
5
+ import {
6
+ mkdtemp,
7
+ rm,
8
+ symlink,
9
+ writeFile as fsWriteFile,
10
+ readFile as fsReadFile,
11
+ } from 'fs/promises';
12
+ import { tool } from '@langchain/core/tools';
13
+ import { AIMessage, ToolMessage } from '@langchain/core/messages';
14
+ import type { BaseMessage } from '@langchain/core/messages';
15
+ import { describe, it, expect, afterEach, beforeEach, jest } from '@jest/globals';
16
+ import type { StructuredToolInterface } from '@langchain/core/tools';
17
+ import type * as t from '@/types';
18
+ import { Constants, Providers } from '@/common';
19
+ import { ToolNode } from '../ToolNode';
20
+ import {
21
+ executeLocalBash,
22
+ executeLocalCode,
23
+ validateBashCommand,
24
+ _resetLocalEngineWarningsForTests,
25
+ } from '../local/LocalExecutionEngine';
26
+ import { resolveLocalToolsForBinding } from '../local/resolveLocalExecutionTools';
27
+ import {
28
+ createLocalCodingToolBundle,
29
+ _resetRipgrepCacheForTests,
30
+ } from '../local/LocalCodingTools';
31
+ import {
32
+ runPostEditSyntaxCheck,
33
+ _resetSyntaxCheckProbeCacheForTests,
34
+ } from '../local/syntaxCheck';
35
+ import { createCompileCheckTool } from '../local/CompileCheckTool';
36
+ import { runBashAstChecks } from '../local/bashAst';
37
+ import { LocalFileCheckpointerImpl } from '../local/FileCheckpointer';
38
+
39
+ const hasPython3 = spawnSync('python3', ['--version']).status === 0;
40
+
41
+ const tempDirs: string[] = [];
42
+
43
+ async function createTempDir(): Promise<string> {
44
+ const dir = await mkdtemp(join(tmpdir(), 'lc-local-tools-'));
45
+ tempDirs.push(dir);
46
+ return dir;
47
+ }
48
+
49
+ function createRemoteBashStub(): StructuredToolInterface {
50
+ return tool(
51
+ async () => 'remote bash should not run',
52
+ {
53
+ name: Constants.BASH_TOOL,
54
+ description: 'Remote bash stub',
55
+ schema: z.object({ command: z.string() }),
56
+ }
57
+ ) as unknown as StructuredToolInterface;
58
+ }
59
+
60
+ function messagesFromResult(
61
+ result: ToolMessage[] | { messages: ToolMessage[] }
62
+ ): ToolMessage[] {
63
+ return Array.isArray(result) ? result : result.messages;
64
+ }
65
+
66
+ function aiMessageWithToolCall(
67
+ name: string,
68
+ args: Record<string, string | number | boolean>
69
+ ): AIMessage {
70
+ return new AIMessage({
71
+ content: '',
72
+ tool_calls: [
73
+ {
74
+ id: `call_${name}`,
75
+ name,
76
+ args,
77
+ },
78
+ ],
79
+ });
80
+ }
81
+
82
+ afterEach(async () => {
83
+ await Promise.all(
84
+ tempDirs.splice(0).map((dir) => rm(dir, { recursive: true, force: true }))
85
+ );
86
+ });
87
+
88
+ describe('local execution tools', () => {
89
+ it('blocks clearly destructive bash commands by default', async () => {
90
+ const result = await validateBashCommand('rm -rf /');
91
+
92
+ expect(result.valid).toBe(false);
93
+ expect(result.errors.join('\n')).toContain('destructive command pattern');
94
+ });
95
+
96
+ it('replaces a configured remote bash tool when local mode is enabled', async () => {
97
+ const cwd = await createTempDir();
98
+ const node = new ToolNode({
99
+ tools: [createRemoteBashStub()],
100
+ toolExecution: {
101
+ engine: 'local',
102
+ local: {
103
+ cwd,
104
+ includeCodingTools: false,
105
+ },
106
+ },
107
+ });
108
+
109
+ const result = await node.invoke({
110
+ messages: [
111
+ aiMessageWithToolCall(Constants.BASH_TOOL, {
112
+ command: 'printf local-mode',
113
+ }),
114
+ ],
115
+ });
116
+
117
+ const [message] = messagesFromResult(result as { messages: ToolMessage[] });
118
+ expect(String(message.content)).toContain('local-mode');
119
+ expect(String(message.content)).not.toContain('remote bash should not run');
120
+ });
121
+
122
+ it('auto-binds the local coding suite in local mode', () => {
123
+ const tools = resolveLocalToolsForBinding({
124
+ toolExecution: { engine: 'local' },
125
+ }) as t.GenericTool[];
126
+ const names = tools.map((localTool) => localTool.name);
127
+
128
+ expect(names).toEqual(
129
+ expect.arrayContaining([
130
+ Constants.EXECUTE_CODE,
131
+ Constants.BASH_TOOL,
132
+ Constants.READ_FILE,
133
+ 'write_file',
134
+ 'edit_file',
135
+ 'grep_search',
136
+ 'glob_search',
137
+ 'list_directory',
138
+ ])
139
+ );
140
+ });
141
+
142
+ it('updates existing code tool bindings when auto-binding is disabled', () => {
143
+ const [bashTool] = resolveLocalToolsForBinding({
144
+ tools: [createRemoteBashStub()],
145
+ toolExecution: {
146
+ engine: 'local',
147
+ local: { includeCodingTools: false },
148
+ },
149
+ }) as t.GenericTool[];
150
+
151
+ expect(bashTool.name).toBe(Constants.BASH_TOOL);
152
+ expect(bashTool.description).toContain('local machine');
153
+ });
154
+
155
+ it('can call local coding tools from local programmatic execution', async () => {
156
+ if (!hasPython3) {
157
+ return;
158
+ }
159
+
160
+ const cwd = await createTempDir();
161
+ const node = new ToolNode({
162
+ tools: [],
163
+ toolExecution: {
164
+ engine: 'local',
165
+ local: { cwd },
166
+ },
167
+ });
168
+
169
+ const result = await node.invoke({
170
+ messages: [
171
+ aiMessageWithToolCall(Constants.PROGRAMMATIC_TOOL_CALLING, {
172
+ lang: 'py',
173
+ code: [
174
+ 'await write_file(file_path="ptc.txt", content="from local ptc")',
175
+ 'contents = await read_file(file_path="ptc.txt")',
176
+ 'print(contents)',
177
+ ].join('\n'),
178
+ }),
179
+ ],
180
+ });
181
+
182
+ const [message] = messagesFromResult(result as { messages: ToolMessage[] });
183
+ expect(String(message.content)).toContain('from local ptc');
184
+ });
185
+
186
+ it('can run bash orchestration through run_tools_with_code in local mode', async () => {
187
+ if (!hasPython3) {
188
+ return;
189
+ }
190
+
191
+ const cwd = await createTempDir();
192
+ const node = new ToolNode({
193
+ tools: [],
194
+ toolExecution: {
195
+ engine: 'local',
196
+ local: { cwd },
197
+ },
198
+ });
199
+
200
+ const result = await node.invoke({
201
+ messages: [
202
+ aiMessageWithToolCall(Constants.PROGRAMMATIC_TOOL_CALLING, {
203
+ code: [
204
+ 'write_file \'{"file_path":"bash-ptc.txt","content":"from bash ptc"}\'',
205
+ 'read_file \'{"file_path":"bash-ptc.txt"}\'',
206
+ ].join('\n'),
207
+ }),
208
+ ],
209
+ });
210
+
211
+ const [message] = messagesFromResult(result as { messages: ToolMessage[] });
212
+ expect(String(message.content)).toContain('from bash ptc');
213
+ });
214
+ });
215
+
216
+ describe('local engine bashAst', () => {
217
+ it('flags command substitution in auto mode', () => {
218
+ const findings = runBashAstChecks('echo $(whoami)', 'auto');
219
+ expect(findings.some((f) => f.code === 'cmd-subst-dollar-paren')).toBe(true);
220
+ });
221
+
222
+ it('escalates command substitution to deny in strict mode', () => {
223
+ const findings = runBashAstChecks('echo $(whoami)', 'strict');
224
+ const subst = findings.find((f) => f.code === 'cmd-subst-dollar-paren');
225
+ expect(subst?.severity).toBe('deny');
226
+ });
227
+
228
+ it('always denies /proc/<pid>/environ access', () => {
229
+ const findings = runBashAstChecks('cat /proc/1/environ', 'auto');
230
+ expect(findings.some((f) => f.code === 'proc-environ-read' && f.severity === 'deny')).toBe(true);
231
+ });
232
+
233
+ it('never produces findings when off', () => {
234
+ const findings = runBashAstChecks('echo $(whoami)', 'off');
235
+ expect(findings).toHaveLength(0);
236
+ });
237
+
238
+ it('blocks bash commands with a deny finding via validateBashCommand', async () => {
239
+ const result = await validateBashCommand('cat /proc/1/environ', {
240
+ bashAst: 'auto',
241
+ });
242
+ expect(result.valid).toBe(false);
243
+ expect(result.errors.join('\n')).toContain('proc-environ-read');
244
+ });
245
+ });
246
+
247
+ describe('local engine sandbox-off warning', () => {
248
+ let warnSpy: jest.SpiedFunction<typeof console.warn>;
249
+
250
+ beforeEach(() => {
251
+ _resetLocalEngineWarningsForTests();
252
+ warnSpy = jest.spyOn(console, 'warn').mockImplementation(() => undefined);
253
+ });
254
+
255
+ afterEach(() => {
256
+ warnSpy.mockRestore();
257
+ });
258
+
259
+ it('warns once when running without sandbox', async () => {
260
+ // Real (non-internal) executions should warn; the internal
261
+ // `bash -n` syntax preflight inside validateBashCommand opts out
262
+ // (Codex P2 — otherwise the latch would flip on a probe and hide
263
+ // the warning when a genuinely-unsandboxed command later runs).
264
+ await executeLocalBash('echo hi');
265
+ await executeLocalBash('echo bye');
266
+ const sandboxOffMessages = warnSpy.mock.calls.filter((call) =>
267
+ String(call[0]).includes('without @anthropic-ai/sandbox-runtime')
268
+ );
269
+ expect(sandboxOffMessages).toHaveLength(1);
270
+ });
271
+
272
+ it('does NOT warn for internal probes when the run actually has sandbox enabled (Codex P2)', async () => {
273
+ // Pre-fix: validateBashCommand's bash -n preflight (which forces
274
+ // sandbox: false for itself, since you can't sandbox a syntax
275
+ // probe) would emit a misleading "sandbox is off" warning AND
276
+ // flip `sandboxOffWarned = true` even when the run had
277
+ // `sandbox.enabled: true` — hiding the warning when a real
278
+ // unsandboxed execution later happened. With the fix internal
279
+ // probes pass `{ internal: true }` to spawnLocalProcess and
280
+ // suppress both the message and the latch.
281
+ await validateBashCommand('echo hi', { sandbox: { enabled: true } });
282
+ const sandboxOffMessages = warnSpy.mock.calls.filter((call) =>
283
+ String(call[0]).includes('without @anthropic-ai/sandbox-runtime')
284
+ );
285
+ expect(sandboxOffMessages).toHaveLength(0);
286
+ });
287
+ });
288
+
289
+ describe('LocalFileCheckpointer', () => {
290
+ it('snapshots and restores existing files', async () => {
291
+ const dir = await createTempDir();
292
+ const file = join(dir, 'a.txt');
293
+ await fsWriteFile(file, 'original', 'utf8');
294
+
295
+ const cp = new LocalFileCheckpointerImpl();
296
+ await cp.captureBeforeWrite(file);
297
+
298
+ await fsWriteFile(file, 'modified', 'utf8');
299
+ expect(await fsReadFile(file, 'utf8')).toBe('modified');
300
+
301
+ const restored = await cp.rewind();
302
+ expect(restored).toBe(1);
303
+ expect(await fsReadFile(file, 'utf8')).toBe('original');
304
+ });
305
+
306
+ it('deletes files that did not exist before the run', async () => {
307
+ const dir = await createTempDir();
308
+ const file = join(dir, 'new.txt');
309
+
310
+ const cp = new LocalFileCheckpointerImpl();
311
+ await cp.captureBeforeWrite(file);
312
+ await fsWriteFile(file, 'should-be-removed', 'utf8');
313
+
314
+ await cp.rewind();
315
+ await expect(fsReadFile(file, 'utf8')).rejects.toThrow();
316
+ });
317
+
318
+ it('rewinds tools created via createLocalCodingToolBundle', async () => {
319
+ const cwd = await createTempDir();
320
+ const bundle = createLocalCodingToolBundle({
321
+ cwd,
322
+ fileCheckpointing: true,
323
+ });
324
+ expect(bundle.checkpointer).toBeDefined();
325
+
326
+ const writeTool = bundle.tools.find((tool_) => tool_.name === 'write_file');
327
+ expect(writeTool).toBeDefined();
328
+ await writeTool!.invoke({ file_path: 'cp.txt', content: 'first' });
329
+ await writeTool!.invoke({ file_path: 'cp.txt', content: 'second' });
330
+
331
+ const restored = await bundle.checkpointer!.rewind();
332
+ expect(restored).toBe(1);
333
+ await expect(fsReadFile(join(cwd, 'cp.txt'), 'utf8')).rejects.toThrow();
334
+ });
335
+ });
336
+
337
+ describe('local read tool guards', () => {
338
+ it('refuses to read files containing NUL bytes', async () => {
339
+ const cwd = await createTempDir();
340
+ const binary = join(cwd, 'binary.bin');
341
+ await fsWriteFile(binary, Buffer.from([0x00, 0x01, 0x02]));
342
+
343
+ const bundle = createLocalCodingToolBundle({ cwd });
344
+ const readTool = bundle.tools.find((t_) => t_.name === Constants.READ_FILE);
345
+ const result = await readTool!.invoke({ file_path: 'binary.bin' });
346
+ expect(String(result)).toContain('binary file');
347
+ });
348
+
349
+ it('returns a stub instead of OOMing on huge files', async () => {
350
+ const cwd = await createTempDir();
351
+ const big = join(cwd, 'big.txt');
352
+ await fsWriteFile(big, 'x'.repeat(2048));
353
+
354
+ const bundle = createLocalCodingToolBundle({
355
+ cwd,
356
+ maxReadBytes: 1024,
357
+ });
358
+ const readTool = bundle.tools.find((t_) => t_.name === Constants.READ_FILE);
359
+ const result = await readTool!.invoke({ file_path: 'big.txt' });
360
+ expect(String(result)).toContain('exceeds the 1024-byte read cap');
361
+ });
362
+
363
+ it('rejects symlink escapes', async () => {
364
+ const cwd = await createTempDir();
365
+ const outside = await createTempDir();
366
+ const secret = join(outside, 'secret.txt');
367
+ await fsWriteFile(secret, 'top-secret', 'utf8');
368
+ await symlink(outside, join(cwd, 'escape'));
369
+
370
+ const bundle = createLocalCodingToolBundle({ cwd });
371
+ const readTool = bundle.tools.find((t_) => t_.name === Constants.READ_FILE);
372
+ await expect(
373
+ readTool!.invoke({ file_path: 'escape/secret.txt' })
374
+ ).rejects.toThrow(/symlink escape/);
375
+ });
376
+ });
377
+
378
+ describe('local programmatic bridge auth', () => {
379
+ it('rejects unauthenticated requests to the local bridge', async () => {
380
+ if (!hasPython3) {
381
+ return;
382
+ }
383
+ const cwd = await createTempDir();
384
+ const node = new ToolNode({
385
+ tools: [],
386
+ toolExecution: {
387
+ engine: 'local',
388
+ local: { cwd },
389
+ },
390
+ });
391
+
392
+ const result = await node.invoke({
393
+ messages: [
394
+ aiMessageWithToolCall(Constants.PROGRAMMATIC_TOOL_CALLING, {
395
+ lang: 'py',
396
+ code: [
397
+ 'import os, json, urllib.request, urllib.error',
398
+ 'url = os.environ["BRIDGE_PROBE_URL"] if "BRIDGE_PROBE_URL" in os.environ else __LIBRECHAT_TOOL_BRIDGE',
399
+ 'body = json.dumps({"name":"read_file","input":{"file_path":"x"}}).encode("utf-8")',
400
+ 'try:',
401
+ ' req = urllib.request.Request(url, data=body, headers={"Content-Type":"application/json"}, method="POST")',
402
+ ' urllib.request.urlopen(req, timeout=5)',
403
+ ' print("LEAK")',
404
+ 'except urllib.error.HTTPError as e:',
405
+ ' print(f"AUTH={e.code}")',
406
+ ].join('\n'),
407
+ }),
408
+ ],
409
+ });
410
+
411
+ const [message] = messagesFromResult(result as { messages: ToolMessage[] });
412
+ expect(String(message.content)).toContain('AUTH=401');
413
+ expect(String(message.content)).not.toContain('LEAK');
414
+ });
415
+ });
416
+
417
+ describe('local edit fuzzy matching', () => {
418
+ it('falls back to line-trimmed when trailing whitespace differs', async () => {
419
+ const cwd = await createTempDir();
420
+ const file = join(cwd, 'a.ts');
421
+ // Real file has trailing whitespace on every line.
422
+ await fsWriteFile(
423
+ file,
424
+ 'function greet(name: string) { \n return `Hello, ${name}!`; \n}\n',
425
+ 'utf8'
426
+ );
427
+
428
+ const bundle = createLocalCodingToolBundle({ cwd });
429
+ const editTool = bundle.tools.find((tt) => tt.name === 'edit_file');
430
+ const result = await editTool!.invoke({
431
+ file_path: 'a.ts',
432
+ // LLM emits a trailing-whitespace-stripped version.
433
+ old_text:
434
+ 'function greet(name: string) {\n return `Hello, ${name}!`;\n}',
435
+ new_text:
436
+ 'function greet(name: string) {\n return `Hi, ${name}!`;\n}',
437
+ });
438
+ expect(String(result)).toContain('strategies: line-trimmed');
439
+ const after = await fsReadFile(file, 'utf8');
440
+ expect(after).toContain('Hi, ${name}!');
441
+ });
442
+
443
+ it('falls back to indentation-flexible when LLM strips leading indent', async () => {
444
+ const cwd = await createTempDir();
445
+ const file = join(cwd, 'a.ts');
446
+ await fsWriteFile(
447
+ file,
448
+ 'class Foo {\n method() {\n return 1;\n }\n}\n',
449
+ 'utf8'
450
+ );
451
+
452
+ const bundle = createLocalCodingToolBundle({ cwd });
453
+ const editTool = bundle.tools.find((tt) => tt.name === 'edit_file');
454
+ const result = await editTool!.invoke({
455
+ file_path: 'a.ts',
456
+ // LLM stripped the 4-space indent
457
+ old_text: 'method() {\n return 1;\n}',
458
+ new_text: 'method() {\n return 42;\n}',
459
+ });
460
+ expect(String(result)).toMatch(
461
+ /strategies: (indentation-flexible|whitespace-normalized)/
462
+ );
463
+ const after = await fsReadFile(file, 'utf8');
464
+ expect(after).toContain('return 42;');
465
+ });
466
+
467
+ it('returns a unified diff in the tool result', async () => {
468
+ const cwd = await createTempDir();
469
+ const file = join(cwd, 'a.txt');
470
+ await fsWriteFile(file, 'first\nsecond\nthird\n', 'utf8');
471
+ const bundle = createLocalCodingToolBundle({ cwd });
472
+ const editTool = bundle.tools.find((tt) => tt.name === 'edit_file');
473
+ const result = await editTool!.invoke({
474
+ file_path: 'a.txt',
475
+ old_text: 'second',
476
+ new_text: 'SECOND',
477
+ });
478
+ const text = String(result);
479
+ expect(text).toContain('Diff:');
480
+ expect(text).toContain('-second');
481
+ expect(text).toContain('+SECOND');
482
+ });
483
+
484
+ it('preserves CRLF line endings on edit', async () => {
485
+ const cwd = await createTempDir();
486
+ const file = join(cwd, 'a.txt');
487
+ await fsWriteFile(file, 'one\r\ntwo\r\nthree\r\n', 'utf8');
488
+ const bundle = createLocalCodingToolBundle({ cwd });
489
+ const editTool = bundle.tools.find((tt) => tt.name === 'edit_file');
490
+ await editTool!.invoke({
491
+ file_path: 'a.txt',
492
+ old_text: 'two',
493
+ new_text: 'TWO',
494
+ });
495
+ const raw = await fsReadFile(file, 'utf8');
496
+ expect(raw).toBe('one\r\nTWO\r\nthree\r\n');
497
+ });
498
+
499
+ it('preserves UTF-8 BOM on overwrite', async () => {
500
+ const cwd = await createTempDir();
501
+ const file = join(cwd, 'a.txt');
502
+ const BOM = '';
503
+ await fsWriteFile(file, BOM + 'hello\n', 'utf8');
504
+ const bundle = createLocalCodingToolBundle({ cwd });
505
+ const writeTool = bundle.tools.find((tt) => tt.name === 'write_file');
506
+ await writeTool!.invoke({ file_path: 'a.txt', content: 'goodbye\n' });
507
+ const raw = await fsReadFile(file, 'utf8');
508
+ expect(raw.startsWith(BOM)).toBe(true);
509
+ expect(raw.slice(1)).toBe('goodbye\n');
510
+ });
511
+ });
512
+
513
+ describe('local read attachments', () => {
514
+ // Smallest valid 1x1 PNG.
515
+ const tinyPng = Buffer.from(
516
+ '89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4890000000a49444154789c63000100000005000165be7e6e0000000049454e44ae426082',
517
+ 'hex'
518
+ );
519
+
520
+ it('returns binary stub by default', async () => {
521
+ const cwd = await createTempDir();
522
+ const file = join(cwd, 'tiny.png');
523
+ await fsWriteFile(file, tinyPng);
524
+ const bundle = createLocalCodingToolBundle({ cwd });
525
+ const readTool = bundle.tools.find((tt) => tt.name === Constants.READ_FILE);
526
+ const result = await readTool!.invoke({ file_path: 'tiny.png' });
527
+ expect(String(result)).toContain('binary file');
528
+ });
529
+
530
+ it('returns an image_url content block when attachReadAttachments=images-only', async () => {
531
+ const cwd = await createTempDir();
532
+ const file = join(cwd, 'tiny.png');
533
+ await fsWriteFile(file, tinyPng);
534
+
535
+ const bundle = createLocalCodingToolBundle({
536
+ cwd,
537
+ attachReadAttachments: 'images-only',
538
+ });
539
+ const readTool = bundle.tools.find((tt) => tt.name === Constants.READ_FILE);
540
+ // Invoking via a tool_call envelope (rather than raw args) is what
541
+ // makes the LangChain tool wrap the result as a ToolMessage with
542
+ // `.content` and `.artifact` populated.
543
+ const message = (await readTool!.invoke({
544
+ id: 'call_image',
545
+ name: Constants.READ_FILE,
546
+ args: { file_path: 'tiny.png' },
547
+ type: 'tool_call',
548
+ })) as { content: unknown; artifact: unknown };
549
+ expect(Array.isArray(message.content)).toBe(true);
550
+ const blocks = message.content as Array<{
551
+ type: string;
552
+ image_url?: { url: string };
553
+ }>;
554
+ const imageBlock = blocks.find((b) => b.type === 'image_url');
555
+ expect(imageBlock?.image_url?.url).toMatch(/^data:image\/png;base64,/);
556
+ expect(blocks.find((b) => b.type === 'text')).toBeDefined();
557
+ expect(message.artifact).toMatchObject({
558
+ mime: 'image/png',
559
+ attachment: 'image',
560
+ });
561
+ });
562
+
563
+ it('refuses oversize images even when embedding is on', async () => {
564
+ const cwd = await createTempDir();
565
+ const file = join(cwd, 'big.png');
566
+ // Forge a "PNG" larger than the cap. It will sniff as a generic
567
+ // binary; classifyAttachment returns 'binary' since file-type
568
+ // won't recognise the bytes — that's fine, we just want to
569
+ // verify the oversize gate is reachable. So instead, build a
570
+ // real big PNG by concatenating chunks with a fake IDAT.
571
+ // Easier: keep the tiny PNG header but pad to 200 bytes; cap to 100.
572
+ const padded = Buffer.concat([
573
+ tinyPng,
574
+ Buffer.alloc(200 - tinyPng.length, 0),
575
+ ]);
576
+ await fsWriteFile(file, padded);
577
+ const bundle = createLocalCodingToolBundle({
578
+ cwd,
579
+ attachReadAttachments: 'images-only',
580
+ maxAttachmentBytes: 100,
581
+ });
582
+ const readTool = bundle.tools.find((tt) => tt.name === Constants.READ_FILE);
583
+ const result = await readTool!.invoke({ file_path: 'big.png' });
584
+ expect(String(result)).toMatch(/Refusing to embed/);
585
+ });
586
+
587
+ it('still reads text files normally when embedding is on', async () => {
588
+ const cwd = await createTempDir();
589
+ const file = join(cwd, 'a.txt');
590
+ await fsWriteFile(file, 'hello world\n', 'utf8');
591
+ const bundle = createLocalCodingToolBundle({
592
+ cwd,
593
+ attachReadAttachments: 'images-only',
594
+ });
595
+ const readTool = bundle.tools.find((tt) => tt.name === Constants.READ_FILE);
596
+ const result = await readTool!.invoke({ file_path: 'a.txt' });
597
+ expect(String(result)).toContain('hello world');
598
+ });
599
+ });
600
+
601
+ describe('post-edit syntax check', () => {
602
+ beforeEach(() => {
603
+ _resetSyntaxCheckProbeCacheForTests();
604
+ });
605
+
606
+ it('flags broken JS via node --check', async () => {
607
+ const cwd = await createTempDir();
608
+ const file = join(cwd, 'broken.js');
609
+ await fsWriteFile(file, 'function (\n', 'utf8');
610
+ const outcome = await runPostEditSyntaxCheck(file, {});
611
+ expect(outcome).not.toBeNull();
612
+ expect(outcome!.ok).toBe(false);
613
+ if (outcome!.ok === false) {
614
+ expect(outcome!.checker).toBe('node --check');
615
+ expect(outcome!.output.length).toBeGreaterThan(0);
616
+ }
617
+ });
618
+
619
+ it('passes valid JS', async () => {
620
+ const cwd = await createTempDir();
621
+ const file = join(cwd, 'good.js');
622
+ await fsWriteFile(file, 'console.log(1)\n', 'utf8');
623
+ const outcome = await runPostEditSyntaxCheck(file, {});
624
+ expect(outcome?.ok).toBe(true);
625
+ });
626
+
627
+ it('flags broken JSON via JSON.parse', async () => {
628
+ const cwd = await createTempDir();
629
+ const file = join(cwd, 'broken.json');
630
+ await fsWriteFile(file, '{ "x": ', 'utf8');
631
+ const outcome = await runPostEditSyntaxCheck(file, {});
632
+ expect(outcome?.ok).toBe(false);
633
+ if (outcome!.ok === false) {
634
+ expect(outcome!.checker).toBe('JSON.parse');
635
+ }
636
+ });
637
+
638
+ it('returns null for unknown extensions', async () => {
639
+ const cwd = await createTempDir();
640
+ const file = join(cwd, 'random.xyz');
641
+ await fsWriteFile(file, 'whatever\n', 'utf8');
642
+ const outcome = await runPostEditSyntaxCheck(file, {});
643
+ expect(outcome).toBeNull();
644
+ });
645
+
646
+ it('write_file appends syntax-check warning when postEditSyntaxCheck=auto', async () => {
647
+ const cwd = await createTempDir();
648
+ const bundle = createLocalCodingToolBundle({
649
+ cwd,
650
+ postEditSyntaxCheck: 'auto',
651
+ });
652
+ const writeTool = bundle.tools.find((tt) => tt.name === 'write_file');
653
+ const message = (await writeTool!.invoke({
654
+ id: 'call_w',
655
+ name: 'write_file',
656
+ args: { file_path: 'broken.js', content: 'function (\n' },
657
+ type: 'tool_call',
658
+ })) as { content: string; artifact: { syntax_error?: string } };
659
+ expect(message.content).toContain('[syntax-check warning');
660
+ expect(message.artifact.syntax_error).toBe('node --check');
661
+ });
662
+
663
+ it('write_file in strict mode throws on syntax error', async () => {
664
+ const cwd = await createTempDir();
665
+ const bundle = createLocalCodingToolBundle({
666
+ cwd,
667
+ postEditSyntaxCheck: 'strict',
668
+ });
669
+ const writeTool = bundle.tools.find((tt) => tt.name === 'write_file');
670
+ await expect(
671
+ writeTool!.invoke({
672
+ id: 'call_w',
673
+ name: 'write_file',
674
+ args: { file_path: 'broken.js', content: 'function (\n' },
675
+ type: 'tool_call',
676
+ })
677
+ ).rejects.toThrow(/syntax check failed/);
678
+ });
679
+ });
680
+
681
+ describe('compile_check', () => {
682
+ it('reports "no recognised project marker" when there are none', async () => {
683
+ const cwd = await createTempDir();
684
+ const checkTool = createCompileCheckTool({ cwd });
685
+ const message = (await checkTool.invoke({
686
+ id: 'call_c',
687
+ name: 'compile_check',
688
+ args: {},
689
+ type: 'tool_call',
690
+ })) as { content: string; artifact: { ran: boolean; kind: string } };
691
+ expect(message.content).toContain('no recognised project marker');
692
+ expect(message.artifact.ran).toBe(false);
693
+ expect(message.artifact.kind).toBe('unknown');
694
+ });
695
+
696
+ it('honours an explicit command override and reports exit code', async () => {
697
+ const cwd = await createTempDir();
698
+ const checkTool = createCompileCheckTool({ cwd });
699
+ const message = (await checkTool.invoke({
700
+ id: 'call_c2',
701
+ name: 'compile_check',
702
+ args: { command: 'echo hello && false' },
703
+ type: 'tool_call',
704
+ })) as { content: string; artifact: { passed: boolean; exit_code: number | null } };
705
+ expect(message.content).toContain('FAILED');
706
+ expect(message.content).toContain('hello');
707
+ expect(message.artifact.passed).toBe(false);
708
+ expect(message.artifact.exit_code).not.toBe(0);
709
+ });
710
+ });
711
+
712
+ describe('local search fallback', () => {
713
+ beforeEach(() => {
714
+ _resetRipgrepCacheForTests();
715
+ });
716
+
717
+ it('finds matches via the Node fallback when ripgrep is missing', async () => {
718
+ const cwd = await createTempDir();
719
+ await fsWriteFile(join(cwd, 'a.ts'), 'const needle = 42;\n', 'utf8');
720
+ await fsWriteFile(join(cwd, 'b.ts'), 'const haystack = 1;\n', 'utf8');
721
+
722
+ const bundle = createLocalCodingToolBundle({
723
+ cwd,
724
+ env: { PATH: '/nonexistent' },
725
+ });
726
+ const grepTool = bundle.tools.find((t_) => t_.name === 'grep_search');
727
+ const result = await grepTool!.invoke({ pattern: 'needle' });
728
+ expect(String(result)).toContain('a.ts');
729
+ expect(String(result)).toContain('needle');
730
+ });
731
+ });
732
+
733
+ describe('codex review fixes', () => {
734
+ describe('executeLocalCode bash args (Codex P2 #1)', () => {
735
+ it('passes input.args as positional shell parameters when lang is bash', async () => {
736
+ const cwd = await createTempDir();
737
+ const result = await executeLocalCode(
738
+ {
739
+ lang: 'bash',
740
+ // Echo every positional arg space-separated. With the bug,
741
+ // $@ is empty because args were dropped.
742
+ code: 'echo "args:$@"',
743
+ args: ['hello', 'world'],
744
+ },
745
+ { cwd }
746
+ );
747
+ expect(result.exitCode).toBe(0);
748
+ expect(result.stdout.trim()).toBe('args:hello world');
749
+ });
750
+
751
+ it('still works when lang is bash and args is missing', async () => {
752
+ const cwd = await createTempDir();
753
+ const result = await executeLocalCode(
754
+ { lang: 'bash', code: 'echo plain' },
755
+ { cwd }
756
+ );
757
+ expect(result.exitCode).toBe(0);
758
+ expect(result.stdout.trim()).toBe('plain');
759
+ });
760
+ });
761
+
762
+ describe('ripgrep cache backend scope (Codex P2 #2)', () => {
763
+ it('does not bleed an "rg available" verdict from one backend to another', async () => {
764
+ // Backend A: pretends rg works (returns a fake spawn whose
765
+ // process exits 0 on every call). The cache should record true
766
+ // for THIS backend.
767
+ const okBackend = jest.fn((cmd: string, _args: string[], _opts: unknown) => {
768
+ const ok = require('child_process').spawn('echo', [cmd]);
769
+ return ok;
770
+ }) as unknown as t.LocalSpawn;
771
+ // Backend B: pretends rg does not exist (returns a child that
772
+ // exits 127, the "command not found" code).
773
+ const missingBackend = jest.fn(
774
+ (_cmd: string, _args: string[], _opts: unknown) => {
775
+ const child = require('child_process').spawn(
776
+ 'sh',
777
+ ['-c', 'exit 127']
778
+ );
779
+ return child;
780
+ }
781
+ ) as unknown as t.LocalSpawn;
782
+
783
+ _resetRipgrepCacheForTests();
784
+
785
+ // Build two bundles with distinct backends.
786
+ const cwdA = await createTempDir();
787
+ const cwdB = await createTempDir();
788
+ await fsWriteFile(join(cwdA, 'a.ts'), 'needle\n', 'utf8');
789
+ await fsWriteFile(join(cwdB, 'b.ts'), 'needle\n', 'utf8');
790
+
791
+ const bundleA = createLocalCodingToolBundle({
792
+ cwd: cwdA,
793
+ exec: { spawn: okBackend },
794
+ });
795
+ const bundleB = createLocalCodingToolBundle({
796
+ cwd: cwdB,
797
+ exec: { spawn: missingBackend },
798
+ });
799
+
800
+ // Run grep against A first — populates cache for A's backend.
801
+ await bundleA.tools.find((t_) => t_.name === 'grep_search')!.invoke({
802
+ pattern: 'needle',
803
+ });
804
+ // Run grep against B — must NOT see cached "true" from A's
805
+ // backend. With the bug, B would try to spawn rg, fail, and
806
+ // throw instead of falling back to the Node walker.
807
+ const bResult = await bundleB.tools
808
+ .find((t_) => t_.name === 'grep_search')!
809
+ .invoke({ pattern: 'needle' });
810
+ expect(String(bResult)).toContain('needle');
811
+ });
812
+ });
813
+
814
+ describe('additionalRoots resolved against workspace root (Codex P2 #3)', () => {
815
+ it('treats relative additionalRoots as siblings of root, not of process.cwd', async () => {
816
+ const parent = await createTempDir();
817
+ const fs = await import('fs/promises');
818
+ await fs.mkdir(join(parent, 'app'), { recursive: true });
819
+ await fs.mkdir(join(parent, 'shared'), { recursive: true });
820
+ await fsWriteFile(join(parent, 'shared/lib.ts'), 'X\n', 'utf8');
821
+
822
+ const bundle = createLocalCodingToolBundle({
823
+ workspace: {
824
+ root: join(parent, 'app'),
825
+ additionalRoots: ['../shared'],
826
+ },
827
+ });
828
+ const readTool = bundle.tools.find((t_) => t_.name === Constants.READ_FILE);
829
+ // Without the fix, '../shared/lib.ts' would resolve relative to
830
+ // process.cwd (this test runner), miss the boundary check, and
831
+ // throw "Path is outside the local workspace".
832
+ const result = await readTool!.invoke({
833
+ id: 'c',
834
+ name: Constants.READ_FILE,
835
+ args: { file_path: join(parent, 'shared/lib.ts') },
836
+ type: 'tool_call',
837
+ });
838
+ expect(JSON.stringify(result)).toContain('X');
839
+ });
840
+ });
841
+ });
842
+
843
+ describe('codex review fixes (round 2)', () => {
844
+ describe('streaming output cap (Codex P1)', () => {
845
+ const { spawnLocalProcess, _resetLocalEngineWarningsForTests: _ } = require('../local/LocalExecutionEngine');
846
+
847
+ it('hard-kills the child when total streamed bytes exceed maxSpawnedBytes', async () => {
848
+ // Cap at 64 KiB. `yes` would otherwise run unbounded.
849
+ const start = Date.now();
850
+ const result = await spawnLocalProcess('yes', [], {
851
+ timeoutMs: 30_000,
852
+ maxSpawnedBytes: 64 * 1024,
853
+ sandbox: { enabled: false },
854
+ });
855
+ const elapsed = Date.now() - start;
856
+ // Killed promptly (much sooner than the 30s timeout).
857
+ expect(elapsed).toBeLessThan(5000);
858
+ // Process was killed by the overflow guard, not by timeout.
859
+ expect(result.timedOut).toBe(false);
860
+ expect(result.exitCode).not.toBe(0);
861
+ // We DID see some output before the kill.
862
+ expect(result.stdout.length).toBeGreaterThan(0);
863
+ });
864
+
865
+ it('spills overflow to a temp file (full output recoverable post-cap)', async () => {
866
+ // Generate ~200 KiB of output with a 32 KiB inline cap → spill.
867
+ const result = await spawnLocalProcess(
868
+ 'bash',
869
+ ['-c', 'head -c 200000 /dev/urandom | base64 | head -c 200000'],
870
+ {
871
+ timeoutMs: 10_000,
872
+ maxOutputChars: 8_000, // inline cap = 16 KiB; ~200 KiB → overflow
873
+ maxSpawnedBytes: 1024 * 1024, // 1 MiB hard cap
874
+ sandbox: { enabled: false },
875
+ }
876
+ );
877
+ expect(result.exitCode).toBe(0);
878
+ expect(result.fullOutputPath).toBeTruthy();
879
+ const fs = await import('fs/promises');
880
+ const spilled = await fs.readFile(result.fullOutputPath as string, 'utf8');
881
+ // The spill file holds more bytes than the in-memory truncation.
882
+ expect(spilled.length).toBeGreaterThan(result.stdout.length);
883
+ });
884
+
885
+ it('does not create a spill file for small outputs', async () => {
886
+ const result = await spawnLocalProcess('bash', ['-c', 'echo small'], {
887
+ timeoutMs: 5_000,
888
+ sandbox: { enabled: false },
889
+ });
890
+ expect(result.fullOutputPath).toBeUndefined();
891
+ expect(result.stdout.trim()).toBe('small');
892
+ });
893
+ });
894
+
895
+ describe('bash_tool args (Codex P2)', () => {
896
+ it('populates positional shell parameters from input.args', async () => {
897
+ const cwd = await createTempDir();
898
+ const bundle = createLocalCodingToolBundle({ cwd });
899
+ const bashTool = bundle.tools.find(
900
+ (tt) => tt.name === Constants.BASH_TOOL
901
+ );
902
+ const result = await bashTool!.invoke({
903
+ id: 'b1',
904
+ name: Constants.BASH_TOOL,
905
+ args: { command: 'echo "first=$1 second=$2"', args: ['hello', 'world'] },
906
+ type: 'tool_call',
907
+ });
908
+ const text = JSON.stringify(result);
909
+ expect(text).toContain('first=hello second=world');
910
+ });
911
+
912
+ it('still works when args is missing', async () => {
913
+ const cwd = await createTempDir();
914
+ const bundle = createLocalCodingToolBundle({ cwd });
915
+ const bashTool = bundle.tools.find(
916
+ (tt) => tt.name === Constants.BASH_TOOL
917
+ );
918
+ const result = await bashTool!.invoke({
919
+ id: 'b2',
920
+ name: Constants.BASH_TOOL,
921
+ args: { command: 'echo plain' },
922
+ type: 'tool_call',
923
+ });
924
+ expect(JSON.stringify(result)).toContain('plain');
925
+ });
926
+ });
927
+ });
928
+
929
+ describe('codex review fixes (round 3)', () => {
930
+ describe('validateBashCommand honours configured shell (Codex P1 #6)', () => {
931
+ it('routes the -n preflight through `local.shell` when set', async () => {
932
+ // Spawn calls go through the config'd backend; intercept and
933
+ // assert which shell binary the syntax check picks.
934
+ const calls: string[] = [];
935
+ const intercept: t.LocalSpawn = ((
936
+ command: string,
937
+ args: string[],
938
+ opts: import('child_process').SpawnOptions
939
+ ) => {
940
+ calls.push(command);
941
+ // Fall through to a real spawn so the call resolves cleanly.
942
+ const { spawn: realSpawn } = require('child_process') as typeof import('child_process');
943
+ return realSpawn(command, args, opts);
944
+ }) as unknown as t.LocalSpawn;
945
+
946
+ const result = await validateBashCommand('echo ok', {
947
+ shell: '/bin/sh',
948
+ exec: { spawn: intercept },
949
+ });
950
+ expect(result.valid).toBe(true);
951
+ // The very first call is the syntax-check spawn; assert it used
952
+ // /bin/sh and not the DEFAULT_SHELL fallback.
953
+ expect(calls[0]).toBe('/bin/sh');
954
+ });
955
+ });
956
+
957
+ describe('syntax-check probe cache is backend-keyed (Codex P2 #7)', () => {
958
+ it('does not bleed an "rg/node/python available" verdict from one backend to another', async () => {
959
+ _resetSyntaxCheckProbeCacheForTests();
960
+
961
+ // Backend A: probes succeed (real spawn).
962
+ const realSpawn = (require('child_process') as typeof import('child_process')).spawn;
963
+ const okBackend: t.LocalSpawn = ((
964
+ cmd: string,
965
+ args: string[],
966
+ opts: import('child_process').SpawnOptions
967
+ ) => realSpawn(cmd, args, opts)) as unknown as t.LocalSpawn;
968
+ // Backend B: probes always fail with exit 127.
969
+ const missingBackend: t.LocalSpawn = ((
970
+ _cmd: string,
971
+ _args: string[],
972
+ opts: import('child_process').SpawnOptions
973
+ ) => realSpawn('sh', ['-c', 'exit 127'], opts)) as unknown as t.LocalSpawn;
974
+
975
+ const cwdA = await createTempDir();
976
+ const cwdB = await createTempDir();
977
+ // Write a broken JS file we want syntax-checked.
978
+ await fsWriteFile(join(cwdA, 'a.js'), 'function (\n', 'utf8');
979
+ await fsWriteFile(join(cwdB, 'b.js'), 'function (\n', 'utf8');
980
+
981
+ // Run on backend A — succeeds, populates A's probe cache for `node`.
982
+ const a = await runPostEditSyntaxCheck(join(cwdA, 'a.js'), {
983
+ cwd: cwdA,
984
+ exec: { spawn: okBackend },
985
+ });
986
+ expect(a?.ok).toBe(false);
987
+
988
+ // Run on backend B — must NOT see A's cached "node available".
989
+ // With the bug, B would assume `node` works (skipping the probe),
990
+ // try to run `node --check`, get exit 127 from the missingBackend,
991
+ // and return ok=false with a misleading checker.
992
+ // With the fix: B's own probe runs, sees node is missing on this
993
+ // backend, and skips the syntax check (returns ok=true).
994
+ const b = await runPostEditSyntaxCheck(join(cwdB, 'b.js'), {
995
+ cwd: cwdB,
996
+ exec: { spawn: missingBackend },
997
+ });
998
+ expect(b?.ok).toBe(true);
999
+ });
1000
+ });
1001
+
1002
+ describe('grep passes pattern via -e (Codex P2 #8)', () => {
1003
+ it('handles dash-prefixed patterns without rg interpreting them as flags', async () => {
1004
+ const cwd = await createTempDir();
1005
+ // File contains a literal "-foo" we want to find.
1006
+ await fsWriteFile(
1007
+ join(cwd, 'flags.txt'),
1008
+ 'before\n-foo bar\nafter\n',
1009
+ 'utf8'
1010
+ );
1011
+ const bundle = createLocalCodingToolBundle({ cwd });
1012
+ const grepTool = bundle.tools.find((t_) => t_.name === 'grep_search');
1013
+ const result = await grepTool!.invoke({
1014
+ id: 'g1',
1015
+ name: 'grep_search',
1016
+ args: { pattern: '-foo' },
1017
+ type: 'tool_call',
1018
+ });
1019
+ const text = JSON.stringify(result);
1020
+ // Pre-fix, rg would parse "-foo" as a flag and bail out.
1021
+ // Post-fix, "-foo" is matched and the line shows up.
1022
+ expect(text).toContain('-foo bar');
1023
+ });
1024
+ });
1025
+ });
1026
+
1027
+ describe('codex review fixes (round 4)', () => {
1028
+ describe('quoted destructive targets (Codex P1 #9)', () => {
1029
+ it('blocks rm -rf "/" (target inside double quotes)', async () => {
1030
+ const result = await validateBashCommand('rm -rf "/"');
1031
+ expect(result.valid).toBe(false);
1032
+ expect(result.errors.join('\n')).toContain('destructive command pattern');
1033
+ });
1034
+
1035
+ it('blocks rm -rf "$HOME" (env-quoted target)', async () => {
1036
+ const result = await validateBashCommand('rm -rf "$HOME"');
1037
+ expect(result.valid).toBe(false);
1038
+ expect(result.errors.join('\n')).toContain('destructive command pattern');
1039
+ });
1040
+
1041
+ it('blocks rm -rf \'/\' (target inside single quotes)', async () => {
1042
+ const result = await validateBashCommand("rm -rf '/'");
1043
+ expect(result.valid).toBe(false);
1044
+ expect(result.errors.join('\n')).toContain('destructive command pattern');
1045
+ });
1046
+
1047
+ it('blocks chmod -R 777 "/"', async () => {
1048
+ const result = await validateBashCommand('chmod -R 777 "/"');
1049
+ expect(result.valid).toBe(false);
1050
+ expect(result.errors.join('\n')).toContain('destructive command pattern');
1051
+ });
1052
+
1053
+ it('still blocks unquoted forms (no regression)', async () => {
1054
+ const result = await validateBashCommand('rm -rf /');
1055
+ expect(result.valid).toBe(false);
1056
+ });
1057
+
1058
+ it('does not flag the print-only case echo "rm -rf /"', async () => {
1059
+ // The destructive-target inside `echo "..."` is wrapped by the
1060
+ // OUTER quotes only — there's no quote pair around the `/`
1061
+ // itself — so the quoted-pattern pass should not match.
1062
+ const result = await validateBashCommand('echo "rm -rf /"');
1063
+ expect(result.valid).toBe(true);
1064
+ });
1065
+ });
1066
+ });
1067
+
1068
+ describe('codex review fixes (round 5)', () => {
1069
+ describe('maxSpawnedBytes=0 disables the cap (Codex P2 #11)', () => {
1070
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1071
+ const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
1072
+
1073
+ it('does not kill on first byte when maxSpawnedBytes is 0', async () => {
1074
+ // Without the fix, `totalSpawnedBytes > 0` triggers on the first
1075
+ // byte and the process tree gets killed before `echo` can finish.
1076
+ const result = await spawnLocalProcess('bash', ['-c', 'echo hello'], {
1077
+ timeoutMs: 5_000,
1078
+ maxSpawnedBytes: 0,
1079
+ sandbox: { enabled: false },
1080
+ });
1081
+ expect(result.exitCode).toBe(0);
1082
+ expect(result.timedOut).toBe(false);
1083
+ expect(result.stdout.trim()).toBe('hello');
1084
+ });
1085
+
1086
+ it('lets a moderately noisy command run to completion when cap is 0', async () => {
1087
+ // Emit ~40 KiB. Default cap (50 MiB) would also let this through,
1088
+ // but the explicit 0 must not flip into the kill path.
1089
+ const result = await spawnLocalProcess(
1090
+ 'bash',
1091
+ ['-c', 'head -c 40000 /dev/urandom | base64 | head -c 40000'],
1092
+ {
1093
+ timeoutMs: 10_000,
1094
+ maxOutputChars: 200_000,
1095
+ maxSpawnedBytes: 0,
1096
+ sandbox: { enabled: false },
1097
+ }
1098
+ );
1099
+ expect(result.exitCode).toBe(0);
1100
+ expect(result.timedOut).toBe(false);
1101
+ expect(result.stdout.length).toBeGreaterThan(0);
1102
+ });
1103
+ });
1104
+
1105
+ describe('spill path is ESM-safe (Codex P1 #12)', () => {
1106
+ // The spill path used to do `require('fs')` inside an ESM-shipped
1107
+ // module — fine in CJS test runs, would throw `ReferenceError` in
1108
+ // any ESM consumer that triggered the overflow path. Pin the
1109
+ // happy path here; the static `createWriteStream` import means a
1110
+ // ReferenceError would surface as a test failure regardless of
1111
+ // which build runs the test.
1112
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1113
+ const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
1114
+
1115
+ it('writes a spill file without a runtime require', async () => {
1116
+ const result = await spawnLocalProcess(
1117
+ 'bash',
1118
+ ['-c', 'head -c 40000 /dev/urandom | base64 | head -c 40000'],
1119
+ {
1120
+ timeoutMs: 10_000,
1121
+ // tiny inline cap → guaranteed overflow → ensureSpill() runs
1122
+ maxOutputChars: 4_000,
1123
+ maxSpawnedBytes: 1024 * 1024,
1124
+ sandbox: { enabled: false },
1125
+ }
1126
+ );
1127
+ expect(result.exitCode).toBe(0);
1128
+ expect(result.fullOutputPath).toBeTruthy();
1129
+ const fs = await import('fs/promises');
1130
+ const spilled = await fs.readFile(
1131
+ result.fullOutputPath as string,
1132
+ 'utf8'
1133
+ );
1134
+ expect(spilled.length).toBeGreaterThan(result.stdout.length);
1135
+ });
1136
+ });
1137
+
1138
+ describe('sandbox config: loopback bridge access (Codex P1 #14)', () => {
1139
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1140
+ const { buildSandboxRuntimeConfig } = require('../local/LocalExecutionEngine');
1141
+
1142
+ it('seeds allowedDomains with loopback hosts so the bridge works under sandbox', () => {
1143
+ const cfg = buildSandboxRuntimeConfig({}, '/tmp/ws', () => []);
1144
+ expect(cfg.network.allowedDomains).toEqual(
1145
+ expect.arrayContaining(['127.0.0.1', 'localhost', '::1'])
1146
+ );
1147
+ });
1148
+
1149
+ it('keeps user-supplied allowedDomains and does not duplicate loopback', () => {
1150
+ const cfg = buildSandboxRuntimeConfig(
1151
+ { sandbox: { network: { allowedDomains: ['api.example.com', '127.0.0.1'] } } },
1152
+ '/tmp/ws',
1153
+ () => []
1154
+ );
1155
+ const occurrences = cfg.network.allowedDomains.filter(
1156
+ (d: string) => d === '127.0.0.1'
1157
+ ).length;
1158
+ expect(occurrences).toBe(1);
1159
+ expect(cfg.network.allowedDomains).toContain('api.example.com');
1160
+ });
1161
+
1162
+ it('respects deniedDomains overriding the loopback seed', () => {
1163
+ const cfg = buildSandboxRuntimeConfig(
1164
+ { sandbox: { network: { deniedDomains: ['127.0.0.1'] } } },
1165
+ '/tmp/ws',
1166
+ () => []
1167
+ );
1168
+ expect(cfg.network.allowedDomains).not.toContain('127.0.0.1');
1169
+ // The other loopback aliases still get seeded — the host opted
1170
+ // out of just `127.0.0.1`, not all loopback.
1171
+ expect(cfg.network.allowedDomains).toEqual(
1172
+ expect.arrayContaining(['localhost', '::1'])
1173
+ );
1174
+ });
1175
+ });
1176
+
1177
+ describe('sandbox allowWrite includes additionalRoots (Codex P2 #15)', () => {
1178
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1179
+ const { buildSandboxRuntimeConfig } = require('../local/LocalExecutionEngine');
1180
+
1181
+ it('adds workspace.additionalRoots to allowWrite alongside cwd', () => {
1182
+ const cfg = buildSandboxRuntimeConfig(
1183
+ {
1184
+ cwd: '/tmp/repo/app',
1185
+ workspace: {
1186
+ root: '/tmp/repo/app',
1187
+ additionalRoots: ['/tmp/repo/shared'],
1188
+ },
1189
+ },
1190
+ '/tmp/repo/app',
1191
+ () => ['/tmp/runtime-default'],
1192
+ );
1193
+ expect(cfg.filesystem.allowWrite).toEqual(
1194
+ expect.arrayContaining([
1195
+ '/tmp/repo/app',
1196
+ '/tmp/repo/shared',
1197
+ '/tmp/runtime-default',
1198
+ ])
1199
+ );
1200
+ });
1201
+
1202
+ it('resolves relative additionalRoots against the workspace root', () => {
1203
+ const cfg = buildSandboxRuntimeConfig(
1204
+ {
1205
+ cwd: '/tmp/repo/app',
1206
+ workspace: {
1207
+ root: '/tmp/repo/app',
1208
+ additionalRoots: ['../shared'],
1209
+ },
1210
+ },
1211
+ '/tmp/repo/app',
1212
+ () => [],
1213
+ );
1214
+ // ../shared anchored to root: /tmp/repo/app -> /tmp/repo/shared.
1215
+ expect(cfg.filesystem.allowWrite).toContain('/tmp/repo/shared');
1216
+ });
1217
+
1218
+ it('falls back to cwd-only when no additionalRoots are configured', () => {
1219
+ const cfg = buildSandboxRuntimeConfig(
1220
+ { cwd: '/tmp/ws' },
1221
+ '/tmp/ws',
1222
+ () => ['/tmp/runtime-default']
1223
+ );
1224
+ expect(cfg.filesystem.allowWrite).toEqual([
1225
+ '/tmp/ws',
1226
+ '/tmp/runtime-default',
1227
+ ]);
1228
+ });
1229
+
1230
+ it('honours an explicit allowWrite override (no auto-seeding)', () => {
1231
+ const cfg = buildSandboxRuntimeConfig(
1232
+ {
1233
+ cwd: '/tmp/ws',
1234
+ workspace: {
1235
+ root: '/tmp/ws',
1236
+ additionalRoots: ['/tmp/extra'],
1237
+ },
1238
+ sandbox: { filesystem: { allowWrite: ['/explicit/path'] } },
1239
+ },
1240
+ '/tmp/ws',
1241
+ () => ['/tmp/runtime-default']
1242
+ );
1243
+ expect(cfg.filesystem.allowWrite).toEqual(['/explicit/path']);
1244
+ });
1245
+ });
1246
+
1247
+ describe('glob_search surfaces ripgrep failures (Codex P2 #13)', () => {
1248
+ it('returns an explicit error (not "No files found.") when rg exits non-zero', async () => {
1249
+ _resetRipgrepCacheForTests();
1250
+ // Inject a spawn backend that pretends rg exists for the
1251
+ // availability probe but fails the actual `rg --files` call
1252
+ // with exit 2 + stderr — the failure mode the codex comment
1253
+ // flagged. Pre-fix, glob_search dropped exitCode/stderr on
1254
+ // the floor and returned "No files found." regardless.
1255
+ const realSpawn = (
1256
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1257
+ require('child_process') as typeof import('child_process')
1258
+ ).spawn;
1259
+ const fakeRgBackend: t.LocalSpawn = ((
1260
+ cmd: string,
1261
+ args: string[],
1262
+ opts: import('child_process').SpawnOptions
1263
+ ) => {
1264
+ if (cmd === 'rg' && args[0] === '--version') {
1265
+ return realSpawn('sh', ['-c', 'exit 0'], opts);
1266
+ }
1267
+ if (cmd === 'rg') {
1268
+ return realSpawn(
1269
+ 'sh',
1270
+ ['-c', 'printf \'rg: bad glob target\\n\' >&2; exit 2'],
1271
+ opts
1272
+ );
1273
+ }
1274
+ return realSpawn(cmd, args, opts);
1275
+ }) as unknown as t.LocalSpawn;
1276
+
1277
+ const cwd = await createTempDir();
1278
+ const bundle = createLocalCodingToolBundle({
1279
+ cwd,
1280
+ exec: { spawn: fakeRgBackend },
1281
+ });
1282
+ const globTool = bundle.tools.find(
1283
+ (tt) => tt.name === Constants.GLOB_SEARCH
1284
+ );
1285
+ const result = await globTool!.invoke({
1286
+ id: 'g1',
1287
+ name: Constants.GLOB_SEARCH,
1288
+ args: { pattern: '**/*' },
1289
+ type: 'tool_call',
1290
+ });
1291
+ const text = JSON.stringify(result);
1292
+ expect(text).not.toContain('No files found.');
1293
+ expect(text).toContain('glob_search failed');
1294
+ expect(text).toContain('bad glob target');
1295
+ });
1296
+ });
1297
+
1298
+ describe('grep_search surfaces ripgrep failures (Codex P2 #23)', () => {
1299
+ it('returns an explicit error (not "No matches found.") when rg exits non-zero', async () => {
1300
+ _resetRipgrepCacheForTests();
1301
+ // Same shape as the glob_search test above. Pre-fix the
1302
+ // grep_search rg branch dropped exitCode and reported
1303
+ // matches: 0 on a real rg error (codex flagged that
1304
+ // glob_search had this fix but grep_search hadn't been
1305
+ // updated to match).
1306
+ const realSpawn = (
1307
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1308
+ require('child_process') as typeof import('child_process')
1309
+ ).spawn;
1310
+ const fakeRgBackend: t.LocalSpawn = ((
1311
+ cmd: string,
1312
+ args: string[],
1313
+ opts: import('child_process').SpawnOptions
1314
+ ) => {
1315
+ if (cmd === 'rg' && args[0] === '--version') {
1316
+ return realSpawn('sh', ['-c', 'exit 0'], opts);
1317
+ }
1318
+ if (cmd === 'rg') {
1319
+ return realSpawn(
1320
+ 'sh',
1321
+ ['-c', 'printf \'rg: io error reading dir\\n\' >&2; exit 2'],
1322
+ opts
1323
+ );
1324
+ }
1325
+ return realSpawn(cmd, args, opts);
1326
+ }) as unknown as t.LocalSpawn;
1327
+
1328
+ const cwd = await createTempDir();
1329
+ const bundle = createLocalCodingToolBundle({
1330
+ cwd,
1331
+ exec: { spawn: fakeRgBackend },
1332
+ });
1333
+ const grepTool = bundle.tools.find(
1334
+ (tt) => tt.name === Constants.GREP_SEARCH
1335
+ );
1336
+ const result = await grepTool!.invoke({
1337
+ id: 'gr1',
1338
+ name: Constants.GREP_SEARCH,
1339
+ args: { pattern: 'needle' },
1340
+ type: 'tool_call',
1341
+ });
1342
+ const text = JSON.stringify(result);
1343
+ expect(text).not.toContain('No matches found.');
1344
+ expect(text).toContain('grep_search failed');
1345
+ expect(text).toContain('io error reading dir');
1346
+ });
1347
+ });
1348
+ });
1349
+
1350
+ describe('codex review fixes (round 6)', () => {
1351
+ describe('destructive guard handles `--` end-of-options (Codex P1 #20)', () => {
1352
+ it('blocks rm -rf -- "/" (-- between flags and quoted target)', async () => {
1353
+ const result = await validateBashCommand('rm -rf -- "/"');
1354
+ expect(result.valid).toBe(false);
1355
+ expect(result.errors.join('\n')).toContain('destructive command pattern');
1356
+ });
1357
+
1358
+ it('blocks rm -rf -- / (-- between flags and bare target)', async () => {
1359
+ const result = await validateBashCommand('rm -rf -- /');
1360
+ expect(result.valid).toBe(false);
1361
+ expect(result.errors.join('\n')).toContain('destructive command pattern');
1362
+ });
1363
+
1364
+ it('blocks chmod -R 777 -- "/"', async () => {
1365
+ const result = await validateBashCommand('chmod -R 777 -- "/"');
1366
+ expect(result.valid).toBe(false);
1367
+ expect(result.errors.join('\n')).toContain('destructive command pattern');
1368
+ });
1369
+
1370
+ it('blocks rm -rf -- "$HOME"', async () => {
1371
+ const result = await validateBashCommand('rm -rf -- "$HOME"');
1372
+ expect(result.valid).toBe(false);
1373
+ expect(result.errors.join('\n')).toContain('destructive command pattern');
1374
+ });
1375
+
1376
+ it('still allows benign `--` usage (no destructive target)', async () => {
1377
+ // `find` uses `--` to separate options from filenames; benign.
1378
+ const result = await validateBashCommand('find . -- -name "*.ts"');
1379
+ expect(result.valid).toBe(true);
1380
+ });
1381
+ });
1382
+
1383
+ describe('compile_check enforces validateBashCommand + readOnly (Codex P1 #21)', () => {
1384
+ it('refuses a destructive command override (rm -rf "/")', async () => {
1385
+ const cwd = await createTempDir();
1386
+ const compile = createCompileCheckTool({ cwd });
1387
+ const result = await compile.invoke({
1388
+ id: 'cc1',
1389
+ name: Constants.COMPILE_CHECK,
1390
+ args: { command: 'rm -rf "/"' },
1391
+ type: 'tool_call',
1392
+ });
1393
+ const text = JSON.stringify(result);
1394
+ expect(text).toContain('compile_check refused to run');
1395
+ expect(text).toContain('destructive command pattern');
1396
+ });
1397
+
1398
+ it('refuses a mutating command override under readOnly: true', async () => {
1399
+ const cwd = await createTempDir();
1400
+ const compile = createCompileCheckTool({ cwd, readOnly: true });
1401
+ const result = await compile.invoke({
1402
+ id: 'cc2',
1403
+ name: Constants.COMPILE_CHECK,
1404
+ // `touch` is in mutatingCommandPattern — fine outside readOnly,
1405
+ // blocked under readOnly.
1406
+ args: { command: 'touch /tmp/lc-cc-should-not-create' },
1407
+ type: 'tool_call',
1408
+ });
1409
+ const text = JSON.stringify(result);
1410
+ expect(text).toContain('compile_check refused to run');
1411
+ expect(text).toMatch(/read-only|mutate/i);
1412
+ });
1413
+
1414
+ it('still allows benign override commands (echo)', async () => {
1415
+ const cwd = await createTempDir();
1416
+ const compile = createCompileCheckTool({ cwd });
1417
+ const result = await compile.invoke({
1418
+ id: 'cc3',
1419
+ name: Constants.COMPILE_CHECK,
1420
+ args: { command: 'echo hello' },
1421
+ type: 'tool_call',
1422
+ });
1423
+ const text = JSON.stringify(result);
1424
+ expect(text).not.toContain('refused to run');
1425
+ });
1426
+ });
1427
+ });
1428
+
1429
+ describe('comprehensive review (round 7) — manual finding C', () => {
1430
+ describe('nested-shell destructive payload (manual #C)', () => {
1431
+ it('blocks bash -lc "rm -rf $HOME"', async () => {
1432
+ const result = await validateBashCommand('bash -lc "rm -rf $HOME"');
1433
+ expect(result.valid).toBe(false);
1434
+ expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
1435
+ });
1436
+
1437
+ it('blocks sh -c "chmod -R 777 /"', async () => {
1438
+ const result = await validateBashCommand("sh -c 'chmod -R 777 /'");
1439
+ expect(result.valid).toBe(false);
1440
+ expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
1441
+ });
1442
+
1443
+ it('blocks eval "rm -rf /"', async () => {
1444
+ const result = await validateBashCommand("eval 'rm -rf /'");
1445
+ expect(result.valid).toBe(false);
1446
+ expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
1447
+ });
1448
+
1449
+ it('still allows benign nested shell (echo)', async () => {
1450
+ const result = await validateBashCommand('bash -lc "echo hello"');
1451
+ expect(result.valid).toBe(true);
1452
+ });
1453
+ });
1454
+ });
1455
+
1456
+ describe('comprehensive review (round 7) — manual finding D', () => {
1457
+ describe('fallback grep DoS guardrails', () => {
1458
+ it('rejects oversize patterns before compile', async () => {
1459
+ const cwd = await createTempDir();
1460
+ const bundle = createLocalCodingToolBundle({ cwd });
1461
+ const grepTool = bundle.tools.find(
1462
+ (tt) => tt.name === Constants.GREP_SEARCH
1463
+ );
1464
+ const result = await grepTool!.invoke({
1465
+ id: 'g-long',
1466
+ name: Constants.GREP_SEARCH,
1467
+ // 2 KiB pattern — over the 1 KiB cap.
1468
+ args: { pattern: 'a'.repeat(2048) },
1469
+ type: 'tool_call',
1470
+ });
1471
+ const text = JSON.stringify(result);
1472
+ // Either the rg path runs (and matches nothing on an empty
1473
+ // dir) or — when rg is unavailable — the fallback rejects via
1474
+ // FallbackGrepError. We only assert the fallback shape when
1475
+ // it triggers.
1476
+ if (text.includes('node-fallback')) {
1477
+ expect(text).toContain('grep_search refused the pattern');
1478
+ expect(text).toContain('exceeds');
1479
+ }
1480
+ });
1481
+
1482
+ it('rejects nested-quantifier patterns (catastrophic backtracking)', async () => {
1483
+ const cwd = await createTempDir();
1484
+ const bundle = createLocalCodingToolBundle({ cwd });
1485
+ const grepTool = bundle.tools.find(
1486
+ (tt) => tt.name === Constants.GREP_SEARCH
1487
+ );
1488
+ const result = await grepTool!.invoke({
1489
+ id: 'g-evil',
1490
+ name: Constants.GREP_SEARCH,
1491
+ args: { pattern: '(a+)+$' },
1492
+ type: 'tool_call',
1493
+ });
1494
+ const text = JSON.stringify(result);
1495
+ if (text.includes('node-fallback')) {
1496
+ expect(text).toContain('catastrophic backtracking');
1497
+ }
1498
+ });
1499
+ });
1500
+ });
1501
+
1502
+ describe('comprehensive review (round 7) — manual finding E', () => {
1503
+ describe('fileCheckpointer exposed via ToolNode auto-bind path', () => {
1504
+ it('Run/ToolNode-style bind makes the checkpointer reachable when fileCheckpointing is true', () => {
1505
+ const node = new ToolNode({
1506
+ tools: [],
1507
+ toolExecution: {
1508
+ engine: 'local',
1509
+ local: { fileCheckpointing: true },
1510
+ },
1511
+ });
1512
+ const cp = node.getFileCheckpointer();
1513
+ expect(cp).toBeDefined();
1514
+ expect(typeof cp?.captureBeforeWrite).toBe('function');
1515
+ expect(typeof cp?.rewind).toBe('function');
1516
+ });
1517
+
1518
+ it('returns undefined when fileCheckpointing is not enabled', () => {
1519
+ const node = new ToolNode({
1520
+ tools: [],
1521
+ toolExecution: { engine: 'local' },
1522
+ });
1523
+ expect(node.getFileCheckpointer()).toBeUndefined();
1524
+ });
1525
+ });
1526
+
1527
+ describe('fileCheckpointer reachable through Run.getFileCheckpointer / Run.rewindFiles (audit-of-audit follow-up)', () => {
1528
+ // The round-7 fix exposed `getFileCheckpointer()` on ToolNode but
1529
+ // the normal `Run.create(...)` path constructs the ToolNode inline
1530
+ // inside StandardGraph and dropped the reference, so the public
1531
+ // `RunConfig.toolExecution.local.fileCheckpointing` flag was still
1532
+ // a no-op for Run callers (only direct `new ToolNode(...)` users
1533
+ // could reach it). Pin the round-trip: a Run constructed through
1534
+ // the standard config path must surface the same checkpointer the
1535
+ // graph wired into its ToolNode, and `Run.rewindFiles()` must
1536
+ // restore captured paths.
1537
+ it('exposes the checkpointer via Run.getFileCheckpointer + restores through Run.rewindFiles', async () => {
1538
+ const { Run } = await import('@/run');
1539
+ const fs = await import('fs/promises');
1540
+ const cwd = await createTempDir();
1541
+ const file = join(cwd, 'tracked.txt');
1542
+ await fs.writeFile(file, 'before\n');
1543
+
1544
+ const run = await Run.create<t.IState>({
1545
+ runId: 'run-checkpoint-roundtrip',
1546
+ graphConfig: {
1547
+ type: 'standard',
1548
+ llmConfig: { provider: Providers.OPENAI, model: 'gpt-4o' },
1549
+ },
1550
+ toolExecution: {
1551
+ engine: 'local',
1552
+ local: { cwd, fileCheckpointing: true },
1553
+ },
1554
+ });
1555
+
1556
+ // Reachable straight off Run — used to be undefined here even
1557
+ // when the config flag was true.
1558
+ const cp = run.getFileCheckpointer();
1559
+ expect(cp).toBeDefined();
1560
+
1561
+ // Capture, mutate, rewind via Run.rewindFiles() (the API the
1562
+ // public JSDoc on `LocalExecutionConfig.fileCheckpointing`
1563
+ // promises).
1564
+ await cp!.captureBeforeWrite(file);
1565
+ await fs.writeFile(file, 'mutated\n');
1566
+ const restored = await run.rewindFiles();
1567
+ expect(restored).toBeGreaterThanOrEqual(1);
1568
+ expect(await fs.readFile(file, 'utf8')).toBe('before\n');
1569
+ });
1570
+
1571
+ it('Run.rewindFiles returns 0 when fileCheckpointing is disabled', async () => {
1572
+ const { Run } = await import('@/run');
1573
+ const run = await Run.create<t.IState>({
1574
+ runId: 'run-no-checkpoint',
1575
+ graphConfig: {
1576
+ type: 'standard',
1577
+ llmConfig: { provider: Providers.OPENAI, model: 'gpt-4o' },
1578
+ },
1579
+ toolExecution: { engine: 'local' },
1580
+ });
1581
+ expect(run.getFileCheckpointer()).toBeUndefined();
1582
+ expect(await run.rewindFiles()).toBe(0);
1583
+ });
1584
+
1585
+ it('checkpointer survives Graph.clearHeavyState so post-completion rewind works (Codex P1 #32)', async () => {
1586
+ // The original round-7 wiring nulled `_fileCheckpointer` in
1587
+ // clearHeavyState — but processStream calls clearHeavyState
1588
+ // in its finally block, so the host could never reach
1589
+ // rewindFiles AFTER the run completed (which is exactly when
1590
+ // rollback is most often needed). Pin that calling
1591
+ // clearHeavyState directly DOES NOT drop the checkpointer.
1592
+ const { Run } = await import('@/run');
1593
+ const fs = await import('fs/promises');
1594
+ const cwd = await createTempDir();
1595
+ const file = join(cwd, 'after-completion.txt');
1596
+ await fs.writeFile(file, 'pre-run\n');
1597
+
1598
+ const run = await Run.create<t.IState>({
1599
+ runId: 'run-cp-survives-clear',
1600
+ graphConfig: {
1601
+ type: 'standard',
1602
+ llmConfig: { provider: Providers.OPENAI, model: 'gpt-4o' },
1603
+ },
1604
+ toolExecution: {
1605
+ engine: 'local',
1606
+ local: { cwd, fileCheckpointing: true },
1607
+ },
1608
+ });
1609
+ const cp = run.getFileCheckpointer();
1610
+ expect(cp).toBeDefined();
1611
+
1612
+ await cp!.captureBeforeWrite(file);
1613
+ await fs.writeFile(file, 'mutated-by-tool\n');
1614
+
1615
+ // Simulate end-of-run cleanup (what processStream's finally
1616
+ // block does). Pre-fix this nulled the checkpointer.
1617
+ run.Graph?.clearHeavyState();
1618
+
1619
+ // Same checkpointer instance must still be reachable AFTER
1620
+ // clearHeavyState — that's the whole point of the fix.
1621
+ expect(run.getFileCheckpointer()).toBe(cp);
1622
+
1623
+ // Host calls rewindFiles after processStream returned.
1624
+ const restored = await run.rewindFiles();
1625
+ expect(restored).toBeGreaterThanOrEqual(1);
1626
+ expect(await fs.readFile(file, 'utf8')).toBe('pre-run\n');
1627
+ });
1628
+ });
1629
+ });
1630
+
1631
+ describe('comprehensive review (round 8) — Codex P1 #24 / P1 #25', () => {
1632
+ describe('JSON post-edit syntax check uses WorkspaceFS (Codex P1 #24)', () => {
1633
+ it('routes the JSON read through `local.exec.fs` instead of host fs', async () => {
1634
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1635
+ const { runPostEditSyntaxCheck } = require('../local/syntaxCheck');
1636
+
1637
+ const reads: string[] = [];
1638
+ // Custom WorkspaceFS that returns valid JSON for the path the
1639
+ // syntax checker asks about. If the checker bypassed our fs and
1640
+ // hit the host filesystem instead, `reads` would stay empty
1641
+ // AND the validator would silently pass (host file doesn't
1642
+ // exist → catch returns undefined → `ok: true`). The "ok: true"
1643
+ // would be a FALSE pass, exactly the failure mode codex flagged.
1644
+ const fakeFs = {
1645
+ readFile: async (p: string, _enc?: 'utf8'): Promise<string> => {
1646
+ reads.push(p);
1647
+ return '{"valid": true}';
1648
+ },
1649
+ // unused stubs to satisfy the WorkspaceFS shape — never called
1650
+ // by the JSON checker
1651
+ writeFile: async () => undefined,
1652
+ stat: async () => {
1653
+ throw new Error('not implemented');
1654
+ },
1655
+ readdir: async () => [],
1656
+ mkdir: async () => undefined,
1657
+ realpath: async (p: string) => p,
1658
+ unlink: async () => undefined,
1659
+ open: async () => {
1660
+ throw new Error('not implemented');
1661
+ },
1662
+ };
1663
+
1664
+ const ok = await runPostEditSyntaxCheck('/virtual/file.json', {
1665
+ exec: { fs: fakeFs as unknown as never },
1666
+ });
1667
+ expect(ok?.ok).toBe(true);
1668
+ expect(reads).toEqual(['/virtual/file.json']);
1669
+ });
1670
+
1671
+ it('flags invalid JSON returned by the WorkspaceFS', async () => {
1672
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1673
+ const { runPostEditSyntaxCheck } = require('../local/syntaxCheck');
1674
+ const fakeFs = {
1675
+ readFile: async () => '{ invalid: json',
1676
+ writeFile: async () => undefined,
1677
+ stat: async () => {
1678
+ throw new Error('not implemented');
1679
+ },
1680
+ readdir: async () => [],
1681
+ mkdir: async () => undefined,
1682
+ realpath: async (p: string) => p,
1683
+ unlink: async () => undefined,
1684
+ open: async () => {
1685
+ throw new Error('not implemented');
1686
+ },
1687
+ };
1688
+ const result = await runPostEditSyntaxCheck('/virtual/bad.json', {
1689
+ exec: { fs: fakeFs as unknown as never },
1690
+ });
1691
+ expect(result?.ok).toBe(false);
1692
+ expect(result?.checker).toBe('JSON.parse');
1693
+ });
1694
+ });
1695
+
1696
+ describe('compile_check detect uses WorkspaceFS (Codex P1 #25)', () => {
1697
+ it('routes project-marker probes through `local.exec.fs`', async () => {
1698
+ // Custom FS that pretends `tsconfig.json` exists at the cwd. If
1699
+ // detect bypasses our fs and uses host fs/promises, the host
1700
+ // path won't have a tsconfig.json and detection falls through
1701
+ // to "unknown".
1702
+ const stats: string[] = [];
1703
+ const fakeFs = {
1704
+ readFile: async () => '',
1705
+ writeFile: async () => undefined,
1706
+ stat: async (p: string) => {
1707
+ stats.push(p);
1708
+ if (p.endsWith('tsconfig.json')) {
1709
+ return {
1710
+ isFile: () => true,
1711
+ isDirectory: () => false,
1712
+ size: 0,
1713
+ };
1714
+ }
1715
+ throw new Error('ENOENT');
1716
+ },
1717
+ readdir: async () => [],
1718
+ mkdir: async () => undefined,
1719
+ realpath: async (p: string) => p,
1720
+ unlink: async () => undefined,
1721
+ open: async () => {
1722
+ throw new Error('not implemented');
1723
+ },
1724
+ };
1725
+
1726
+ const compile = createCompileCheckTool({
1727
+ cwd: '/virtual/repo',
1728
+ exec: { fs: fakeFs as unknown as never },
1729
+ });
1730
+ // Don't actually run anything — we only care that detect()
1731
+ // saw the tsconfig and picked typescript. The validateBashCommand
1732
+ // call inside the tool will still try to spawn, but we don't
1733
+ // need to assert on its outcome; the artifact carries the
1734
+ // detection result.
1735
+ const result = await compile.invoke({
1736
+ id: 'cc',
1737
+ name: Constants.COMPILE_CHECK,
1738
+ args: { command: 'echo skip-spawn' },
1739
+ type: 'tool_call',
1740
+ });
1741
+ // Just confirm at least one stat was made through our fake fs
1742
+ // (auto-detect path). Even with the explicit override we use
1743
+ // here, the tool path doesn't run detect — but the cwd-init
1744
+ // and validateBashCommand still go through the right fs.
1745
+ // For the actual detect() invocation, drop the override:
1746
+ void result;
1747
+ const compile2 = createCompileCheckTool({
1748
+ cwd: '/virtual/repo',
1749
+ exec: { fs: fakeFs as unknown as never },
1750
+ });
1751
+ await compile2.invoke({
1752
+ id: 'cc2',
1753
+ name: Constants.COMPILE_CHECK,
1754
+ args: {},
1755
+ type: 'tool_call',
1756
+ });
1757
+ // The tsconfig probe and the package.json probe (if it gets
1758
+ // there) happen BEFORE the spawn, so even if spawn fails the
1759
+ // stats list captures what detect saw.
1760
+ expect(stats.some((p) => p.endsWith('tsconfig.json'))).toBe(true);
1761
+ });
1762
+ });
1763
+ });
1764
+
1765
+ describe('comprehensive review (round 9) — Codex P1 (overflow-killed) + audit findings', () => {
1766
+ describe('overflow-killed processes report as failures (Codex P1)', () => {
1767
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1768
+ const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
1769
+
1770
+ it('reports overflowKilled=true and a non-null exit code when maxSpawnedBytes is exceeded', async () => {
1771
+ // `yes` produces unbounded output. Cap at 16 KiB so the
1772
+ // overflow guard fires within milliseconds. Pre-fix the close
1773
+ // handler returned `exitCode: null` (signal-killed) and no
1774
+ // overflow flag, so callers couldn't tell the run had been
1775
+ // force-killed.
1776
+ const result = await spawnLocalProcess('yes', [], {
1777
+ timeoutMs: 30_000,
1778
+ maxSpawnedBytes: 16 * 1024,
1779
+ sandbox: { enabled: false },
1780
+ });
1781
+ expect(result.overflowKilled).toBe(true);
1782
+ // SIGKILL'd processes report exitCode=null from Node; we
1783
+ // synthesize 137 (128 + SIGKILL) so callers see a non-zero
1784
+ // status.
1785
+ expect(result.exitCode).not.toBeNull();
1786
+ expect(result.exitCode).not.toBe(0);
1787
+ expect(result.timedOut).toBe(false);
1788
+ });
1789
+
1790
+ it('formatLocalOutput surfaces the killed flag', async () => {
1791
+ const cwd = await createTempDir();
1792
+ const bundle = createLocalCodingToolBundle({
1793
+ cwd,
1794
+ maxSpawnedBytes: 16 * 1024,
1795
+ timeoutMs: 30_000,
1796
+ sandbox: { enabled: false },
1797
+ });
1798
+ const bashTool = bundle.tools.find(
1799
+ (tt) => tt.name === Constants.BASH_TOOL
1800
+ );
1801
+ const result = await bashTool!.invoke({
1802
+ id: 'b1',
1803
+ name: Constants.BASH_TOOL,
1804
+ args: { command: 'yes' },
1805
+ type: 'tool_call',
1806
+ });
1807
+ const text = JSON.stringify(result);
1808
+ expect(text).toContain('killed: true');
1809
+ expect(text).toContain('local.maxSpawnedBytes');
1810
+ });
1811
+ });
1812
+
1813
+ describe('signal-killed processes report as failures (Codex P2 — generalizes the overflow fix)', () => {
1814
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1815
+ const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
1816
+
1817
+ it('synthesizes a non-zero exit code and surfaces the signal name on `kill -9 $$`', async () => {
1818
+ // Script kills its own pgroup with SIGKILL. Pre-fix the close
1819
+ // handler dropped the `signal` argument and kept exitCode=null,
1820
+ // so this looked like a clean run.
1821
+ const result = await spawnLocalProcess(
1822
+ 'bash',
1823
+ ['-c', 'echo started; kill -9 $$'],
1824
+ { timeoutMs: 5_000, sandbox: { enabled: false } }
1825
+ );
1826
+ // Node may report SIGKILL on the script process or the wrapper;
1827
+ // either way exitCode must end up non-null and non-zero.
1828
+ expect(result.exitCode).not.toBeNull();
1829
+ expect(result.exitCode).not.toBe(0);
1830
+ // Signal field is present and matches one of the expected
1831
+ // POSIX kill signals.
1832
+ expect(result.signal).toMatch(/^SIG/);
1833
+ });
1834
+
1835
+ it('formatLocalOutput surfaces the signal kill', async () => {
1836
+ const cwd = await createTempDir();
1837
+ const bundle = createLocalCodingToolBundle({
1838
+ cwd,
1839
+ timeoutMs: 5_000,
1840
+ sandbox: { enabled: false },
1841
+ });
1842
+ const bashTool = bundle.tools.find(
1843
+ (tt) => tt.name === Constants.BASH_TOOL
1844
+ );
1845
+ const result = await bashTool!.invoke({
1846
+ id: 'sig1',
1847
+ name: Constants.BASH_TOOL,
1848
+ args: { command: 'echo started; kill -9 $$' },
1849
+ type: 'tool_call',
1850
+ });
1851
+ const text = JSON.stringify(result);
1852
+ expect(text).toContain('killed: true');
1853
+ expect(text).toMatch(/signal=SIG/);
1854
+ });
1855
+ });
1856
+
1857
+ describe('fallback-grep nested-quantifier heuristic catches double-nested groups (audit #1)', () => {
1858
+ it('rejects `((a+)+)` (the textbook ReDoS pattern)', async () => {
1859
+ _resetRipgrepCacheForTests();
1860
+ // Force the fallback path by injecting a backend that says rg
1861
+ // is unavailable (the rg --version probe always fails). This
1862
+ // way the fallback compileFallbackRegex actually runs.
1863
+ const realSpawn = (
1864
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1865
+ require('child_process') as typeof import('child_process')
1866
+ ).spawn;
1867
+ const noRgBackend: t.LocalSpawn = ((
1868
+ cmd: string,
1869
+ args: string[],
1870
+ opts: import('child_process').SpawnOptions
1871
+ ) => {
1872
+ if (cmd === 'rg') {
1873
+ return realSpawn('sh', ['-c', 'exit 127'], opts);
1874
+ }
1875
+ return realSpawn(cmd, args, opts);
1876
+ }) as unknown as t.LocalSpawn;
1877
+
1878
+ const cwd = await createTempDir();
1879
+ const bundle = createLocalCodingToolBundle({
1880
+ cwd,
1881
+ exec: { spawn: noRgBackend },
1882
+ });
1883
+ const grepTool = bundle.tools.find(
1884
+ (tt) => tt.name === Constants.GREP_SEARCH
1885
+ );
1886
+ const result = await grepTool!.invoke({
1887
+ id: 'gr-evil',
1888
+ name: Constants.GREP_SEARCH,
1889
+ args: { pattern: '((a+)+)' },
1890
+ type: 'tool_call',
1891
+ });
1892
+ const text = JSON.stringify(result);
1893
+ expect(text).toContain('grep_search refused the pattern');
1894
+ expect(text).toContain('catastrophic backtracking');
1895
+ });
1896
+ });
1897
+
1898
+ describe('resolveLocalExecutionTools no longer overwrites bundle tools (audit #4)', () => {
1899
+ it('CODE_EXECUTION_TOOLS loop does not re-create tools when coding-tools bundle ran first', () => {
1900
+ // The bundle path creates bash_tool/execute_code/etc. with a
1901
+ // stable identity. Pre-fix the CODE_EXECUTION_TOOLS loop
1902
+ // overwrote those instances with fresh ones — wasted work, and
1903
+ // the fresh tools wouldn't share the bundle's checkpointer.
1904
+ // Pin via tool identity comparison.
1905
+ const node1 = new ToolNode({
1906
+ tools: [],
1907
+ toolExecution: { engine: 'local' },
1908
+ });
1909
+ // Capture the bash_tool instance
1910
+ // eslint-disable-next-line @typescript-eslint/dot-notation
1911
+ const m1 = (node1 as unknown as { toolMap: Map<string, unknown> })
1912
+ .toolMap;
1913
+ expect(m1.has(Constants.BASH_TOOL)).toBe(true);
1914
+ // Run the resolver again (simulating a fresh ToolNode); the
1915
+ // bash_tool instance from the bundle should still be the only
1916
+ // one (no overwrite step). Identity comparison would be
1917
+ // brittle; assert tool count for the bundle members instead.
1918
+ const bundleNames = [
1919
+ Constants.BASH_TOOL,
1920
+ Constants.EXECUTE_CODE,
1921
+ Constants.PROGRAMMATIC_TOOL_CALLING,
1922
+ Constants.BASH_PROGRAMMATIC_TOOL_CALLING,
1923
+ ];
1924
+ for (const name of bundleNames) {
1925
+ expect(m1.has(name)).toBe(true);
1926
+ }
1927
+ });
1928
+ });
1929
+ });
1930
+
1931
+ describe('comprehensive review (round 10) — Codex P1 #28 / P2 #29', () => {
1932
+ describe('SIGKILL escalation defeats SIGTERM-trapping processes (Codex P1 #28)', () => {
1933
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1934
+ const { spawnLocalProcess } = require('../local/LocalExecutionEngine');
1935
+
1936
+ it('escalates to SIGKILL when timeoutMs elapses and the child traps SIGTERM', async () => {
1937
+ // Trap SIGTERM and loop forever. Pre-fix killProcessTree only
1938
+ // sent SIGTERM, so the child kept running, `close` never
1939
+ // fired, and the spawn promise hung past timeoutMs. Now SIGKILL
1940
+ // escalation kicks in 2s after the SIGTERM and the child dies
1941
+ // unconditionally.
1942
+ const start = Date.now();
1943
+ const result = await spawnLocalProcess(
1944
+ 'bash',
1945
+ ['-c', "trap '' TERM; while true; do sleep 0.1; done"],
1946
+ { timeoutMs: 1500, sandbox: { enabled: false } }
1947
+ );
1948
+ const elapsed = Date.now() - start;
1949
+ // Sanity: the test has to actually have terminated. With the
1950
+ // bug the promise hangs and Jest times out after 5s default.
1951
+ // Generous upper bound: timeout (1.5s) + escalation (2s) +
1952
+ // spawn overhead. Assert under 6s.
1953
+ expect(elapsed).toBeLessThan(6000);
1954
+ expect(result.timedOut).toBe(true);
1955
+ // signal field is populated (SIGKILL after escalation, or
1956
+ // possibly SIGTERM if the trap didn't take effect on a
1957
+ // particular host).
1958
+ expect(result.signal).toMatch(/^SIG/);
1959
+ }, 10_000);
1960
+ });
1961
+
1962
+ describe('ripgrep cache also keys on env (Codex P1 #34)', () => {
1963
+ it('does not bleed an "rg available" verdict from one env to another on the same backend', async () => {
1964
+ _resetRipgrepCacheForTests();
1965
+ // Same backend instance for both Runs. Vary `local.env` between
1966
+ // them — pre-fix the WeakMap cache was keyed on the spawn
1967
+ // function alone, so the second Run inherited the first's
1968
+ // verdict and tried to use rg under an env without it,
1969
+ // failing with ENOENT.
1970
+ const realSpawn = (
1971
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1972
+ require('child_process') as typeof import('child_process')
1973
+ ).spawn;
1974
+
1975
+ // Backend that returns success for `rg --version` ONLY when
1976
+ // the spawned process's env has PATH=/with/rg, and 127
1977
+ // otherwise. This is the structural shape of "rg is on PATH
1978
+ // for env A but not env B".
1979
+ const envSensitive: t.LocalSpawn = ((
1980
+ cmd: string,
1981
+ args: string[],
1982
+ opts: import('child_process').SpawnOptions
1983
+ ) => {
1984
+ if (cmd === 'rg' && args[0] === '--version') {
1985
+ const env = (opts.env ?? {}) as NodeJS.ProcessEnv;
1986
+ if (env.PATH === '/with/rg') {
1987
+ return realSpawn('sh', ['-c', 'exit 0'], opts);
1988
+ }
1989
+ return realSpawn('sh', ['-c', 'exit 127'], opts);
1990
+ }
1991
+ return realSpawn(cmd, args, opts);
1992
+ }) as unknown as t.LocalSpawn;
1993
+
1994
+ const cwdA = await createTempDir();
1995
+ const cwdB = await createTempDir();
1996
+ await (await import('fs/promises')).writeFile(
1997
+ join(cwdA, 'a.ts'),
1998
+ 'needle\n'
1999
+ );
2000
+ await (await import('fs/promises')).writeFile(
2001
+ join(cwdB, 'b.ts'),
2002
+ 'needle\n'
2003
+ );
2004
+
2005
+ // Run A: env says rg is available → cache records `true` for
2006
+ // (backend, env-A).
2007
+ const bundleA = createLocalCodingToolBundle({
2008
+ cwd: cwdA,
2009
+ exec: { spawn: envSensitive },
2010
+ env: { PATH: '/with/rg' },
2011
+ });
2012
+ await bundleA.tools.find((t_) => t_.name === 'grep_search')!.invoke({
2013
+ id: 'gA',
2014
+ name: 'grep_search',
2015
+ args: { pattern: 'needle' },
2016
+ type: 'tool_call',
2017
+ });
2018
+
2019
+ // Run B: same backend, DIFFERENT env (PATH excludes rg). Must
2020
+ // run a fresh probe and fall back to the Node walker, NOT
2021
+ // reuse Run A's cached "true". Pre-fix this would attempt to
2022
+ // spawn rg with the wrong PATH and surface a tool failure.
2023
+ const bundleB = createLocalCodingToolBundle({
2024
+ cwd: cwdB,
2025
+ exec: { spawn: envSensitive },
2026
+ env: { PATH: '/without/rg' },
2027
+ });
2028
+ const bResult = await bundleB.tools
2029
+ .find((t_) => t_.name === 'grep_search')!
2030
+ .invoke({
2031
+ id: 'gB',
2032
+ name: 'grep_search',
2033
+ args: { pattern: 'needle' },
2034
+ type: 'tool_call',
2035
+ });
2036
+ const text = JSON.stringify(bResult);
2037
+ // Result must show the match (Node fallback ran successfully)
2038
+ // and indicate the fallback engine, not a ripgrep failure.
2039
+ expect(text).toContain('needle');
2040
+ expect(text).toContain('node-fallback');
2041
+ });
2042
+ });
2043
+
2044
+ describe('compile-style runtimes honor local.shell (Codex P2 #29)', () => {
2045
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2046
+ const { executeLocalCode } = require('../local/LocalExecutionEngine');
2047
+
2048
+ it('routes the rust runtime through `local.shell` instead of bare `bash`', async () => {
2049
+ // Intercept spawn — assert the configured shell is used for
2050
+ // the rs runtime, not hardcoded `bash`.
2051
+ const realSpawn = (
2052
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2053
+ require('child_process') as typeof import('child_process')
2054
+ ).spawn;
2055
+ const calls: string[] = [];
2056
+ const intercept: t.LocalSpawn = ((
2057
+ cmd: string,
2058
+ args: string[],
2059
+ opts: import('child_process').SpawnOptions
2060
+ ) => {
2061
+ calls.push(cmd);
2062
+ // Don't actually try to compile rust — short-circuit via sh.
2063
+ return realSpawn('sh', ['-c', 'exit 0'], opts);
2064
+ }) as unknown as t.LocalSpawn;
2065
+
2066
+ await executeLocalCode(
2067
+ { lang: 'rs', code: 'fn main() {}', args: [] },
2068
+ { shell: '/bin/sh', exec: { spawn: intercept }, sandbox: { enabled: false } }
2069
+ );
2070
+
2071
+ // The rust path's compile-and-run command should have been
2072
+ // dispatched via `/bin/sh`, not `bash` / `bash.exe`.
2073
+ expect(calls[0]).toBe('/bin/sh');
2074
+ });
2075
+ });
2076
+ });
2077
+
2078
+ describe('comprehensive review (round 12) — Codex P1 #36', () => {
2079
+ describe('granular workspace flags override the legacy allowOutsideWorkspace', () => {
2080
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2081
+ const { getWriteRoots, getReadRoots } = require('../local/LocalExecutionEngine');
2082
+
2083
+ it('workspace.allowWriteOutside=false beats allowOutsideWorkspace=true (Codex P1 #36)', () => {
2084
+ // Pre-fix the OR short-circuited on the legacy flag, returning
2085
+ // null (skip clamp) even though the host explicitly tightened
2086
+ // the granular flag during migration.
2087
+ const roots = getWriteRoots({
2088
+ cwd: '/tmp/ws',
2089
+ workspace: { root: '/tmp/ws', allowWriteOutside: false },
2090
+ allowOutsideWorkspace: true,
2091
+ });
2092
+ expect(roots).not.toBeNull();
2093
+ expect(roots).toContain('/tmp/ws');
2094
+ });
2095
+
2096
+ it('workspace.allowReadOutside=false beats allowOutsideWorkspace=true', () => {
2097
+ const roots = getReadRoots({
2098
+ cwd: '/tmp/ws',
2099
+ workspace: { root: '/tmp/ws', allowReadOutside: false },
2100
+ allowOutsideWorkspace: true,
2101
+ });
2102
+ expect(roots).not.toBeNull();
2103
+ expect(roots).toContain('/tmp/ws');
2104
+ });
2105
+
2106
+ it('workspace.allowWriteOutside=true still permits writes outside', () => {
2107
+ const roots = getWriteRoots({
2108
+ cwd: '/tmp/ws',
2109
+ workspace: { root: '/tmp/ws', allowWriteOutside: true },
2110
+ });
2111
+ expect(roots).toBeNull();
2112
+ });
2113
+
2114
+ it('legacy allowOutsideWorkspace=true still works when granular flag is unset', () => {
2115
+ const roots = getWriteRoots({
2116
+ cwd: '/tmp/ws',
2117
+ workspace: { root: '/tmp/ws' },
2118
+ allowOutsideWorkspace: true,
2119
+ });
2120
+ expect(roots).toBeNull();
2121
+ });
2122
+
2123
+ it('default (no flags) returns the workspace boundary for both read and write', () => {
2124
+ const cfg = { cwd: '/tmp/ws', workspace: { root: '/tmp/ws' } };
2125
+ expect(getWriteRoots(cfg)).toEqual(['/tmp/ws']);
2126
+ expect(getReadRoots(cfg)).toEqual(['/tmp/ws']);
2127
+ });
2128
+ });
2129
+ });
2130
+
2131
+ describe('comprehensive review (round 14) — Codex P1 #37 + P2 #38/#40/#41', () => {
2132
+ describe('destructive path normalization (Codex P1 #37)', () => {
2133
+ const cases: Array<[string, string]> = [
2134
+ ['rm -rf $HOME/', 'trailing slash on $HOME'],
2135
+ ['rm -rf ~/', 'trailing slash on ~'],
2136
+ ['rm -rf ${HOME}/', 'trailing slash on ${HOME}'],
2137
+ ['rm -rf "$HOME/"', 'quoted $HOME with trailing slash'],
2138
+ ['rm -rf "~/"', 'quoted ~ with trailing slash'],
2139
+ ['rm -rf "${HOME}/"', 'quoted ${HOME} with trailing slash'],
2140
+ ['chmod -R 777 ~/', 'chmod with trailing slash'],
2141
+ ['chmod -R 777 "$HOME/"', 'quoted chmod with trailing slash'],
2142
+ ];
2143
+ it.each(cases)('blocks %s (%s)', async (cmd) => {
2144
+ const result = await validateBashCommand(cmd);
2145
+ expect(result.valid).toBe(false);
2146
+ expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
2147
+ });
2148
+
2149
+ it('still allows benign trailing-slash commands', async () => {
2150
+ const result = await validateBashCommand('ls $HOME/');
2151
+ expect(result.valid).toBe(true);
2152
+ });
2153
+ });
2154
+
2155
+ describe('destructive wildcard targets (Codex P1 [42])', () => {
2156
+ const cases: Array<[string, string]> = [
2157
+ ['rm -rf $HOME/*', 'glob over $HOME contents'],
2158
+ ['rm -rf ~/*', 'glob over ~ contents'],
2159
+ ['rm -rf ${HOME}/*', 'glob over ${HOME} contents'],
2160
+ ['rm -rf ./*', 'glob over current dir contents'],
2161
+ ['rm -rf .*', 'dotfile glob in current dir'],
2162
+ ['rm -rf $HOME*', 'prefix glob against $HOME base'],
2163
+ ['chmod -R 777 ~/*', 'chmod with glob'],
2164
+ ];
2165
+ it.each(cases)('blocks %s (%s)', async (cmd) => {
2166
+ const result = await validateBashCommand(cmd);
2167
+ expect(result.valid).toBe(false);
2168
+ expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
2169
+ });
2170
+
2171
+ it('does not flag benign glob commands (no rm/chmod/chown)', async () => {
2172
+ const result = await validateBashCommand('ls $HOME/*');
2173
+ expect(result.valid).toBe(true);
2174
+ });
2175
+ });
2176
+
2177
+ describe('destructive dot-glob targets (Codex P1 [47])', () => {
2178
+ const cases: Array<[string, string]> = [
2179
+ ['rm -rf $HOME/.*', 'dotfile glob under $HOME'],
2180
+ ['rm -rf ~/.*', 'dotfile glob under ~'],
2181
+ ['rm -rf ${HOME}/.*', 'dotfile glob under ${HOME}'],
2182
+ ['rm -rf /.*', 'dotfile glob under root'],
2183
+ ['rm -rf "$HOME/.*"', 'quoted dotfile glob under $HOME'],
2184
+ ['chmod -R 777 ~/.*', 'chmod dotfile glob'],
2185
+ ];
2186
+ it.each(cases)('blocks %s (%s)', async (cmd) => {
2187
+ const result = await validateBashCommand(cmd);
2188
+ expect(result.valid).toBe(false);
2189
+ expect(result.errors.join('\n')).toMatch(/destructive command pattern/);
2190
+ });
2191
+
2192
+ it('blocks the positional-arg dot-glob form too', async () => {
2193
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2194
+ const { executeLocalBashWithArgs } = require('../local/LocalExecutionEngine');
2195
+ await expect(
2196
+ executeLocalBashWithArgs('rm -rf "$1"', ['/.*'], {
2197
+ sandbox: { enabled: false },
2198
+ timeoutMs: 5000,
2199
+ })
2200
+ ).rejects.toThrow(/destructive command pattern.*protected target/i);
2201
+ });
2202
+ });
2203
+
2204
+ describe('strict postEditSyntaxCheck reverts the write on failure (Codex P2 [49])', () => {
2205
+ it('write_file: reverts the file contents to pre-write state when strict check fails', async () => {
2206
+ const cwd = await createTempDir();
2207
+ const fsp = await import('fs/promises');
2208
+ const file = join(cwd, 'a.js');
2209
+ await fsp.writeFile(file, '// good\nconsole.log("ok");\n');
2210
+
2211
+ const bundle = createLocalCodingToolBundle({
2212
+ cwd,
2213
+ postEditSyntaxCheck: 'strict',
2214
+ });
2215
+ const writeTool = bundle.tools.find(
2216
+ (tt) => tt.name === Constants.WRITE_FILE
2217
+ );
2218
+ // Bad JS content (missing closing brace) — node --check will
2219
+ // reject this and strict mode must throw AND restore the file.
2220
+ await expect(
2221
+ writeTool!.invoke({
2222
+ id: 'wf-strict',
2223
+ name: Constants.WRITE_FILE,
2224
+ args: { file_path: file, content: 'function broken( {\n' },
2225
+ type: 'tool_call',
2226
+ })
2227
+ ).rejects.toThrow(/syntax check failed.*reverted/i);
2228
+ // Critical assertion: file on disk is restored to the
2229
+ // pre-write content. Pre-fix it would still hold the broken
2230
+ // content.
2231
+ expect(await fsp.readFile(file, 'utf8')).toBe(
2232
+ '// good\nconsole.log("ok");\n'
2233
+ );
2234
+ });
2235
+
2236
+ it('write_file: deletes a brand-new file when strict check fails on first write', async () => {
2237
+ const cwd = await createTempDir();
2238
+ const fsp = await import('fs/promises');
2239
+ const file = join(cwd, 'never-existed.js');
2240
+
2241
+ const bundle = createLocalCodingToolBundle({
2242
+ cwd,
2243
+ postEditSyntaxCheck: 'strict',
2244
+ });
2245
+ const writeTool = bundle.tools.find(
2246
+ (tt) => tt.name === Constants.WRITE_FILE
2247
+ );
2248
+ await expect(
2249
+ writeTool!.invoke({
2250
+ id: 'wf-strict-new',
2251
+ name: Constants.WRITE_FILE,
2252
+ args: { file_path: file, content: 'function broken( {\n' },
2253
+ type: 'tool_call',
2254
+ })
2255
+ ).rejects.toThrow(/syntax check failed.*reverted/i);
2256
+ // Brand-new file must be removed on revert.
2257
+ await expect(fsp.stat(file)).rejects.toThrow();
2258
+ });
2259
+
2260
+ it('edit_file: reverts to pre-edit content when strict check fails', async () => {
2261
+ const cwd = await createTempDir();
2262
+ const fsp = await import('fs/promises');
2263
+ const file = join(cwd, 'b.js');
2264
+ const original = 'function ok() { return 1; }\n';
2265
+ await fsp.writeFile(file, original);
2266
+
2267
+ const bundle = createLocalCodingToolBundle({
2268
+ cwd,
2269
+ postEditSyntaxCheck: 'strict',
2270
+ });
2271
+ const editTool = bundle.tools.find(
2272
+ (tt) => tt.name === Constants.EDIT_FILE
2273
+ );
2274
+ await expect(
2275
+ editTool!.invoke({
2276
+ id: 'ef-strict',
2277
+ name: Constants.EDIT_FILE,
2278
+ args: {
2279
+ file_path: file,
2280
+ old_text: 'return 1;',
2281
+ new_text: 'return broken(',
2282
+ },
2283
+ type: 'tool_call',
2284
+ })
2285
+ ).rejects.toThrow(/syntax check failed.*reverted/i);
2286
+ expect(await fsp.readFile(file, 'utf8')).toBe(original);
2287
+ });
2288
+ });
2289
+
2290
+ describe('fallbackGrep skip sentinels do not count as matches (Codex P2 [43])', () => {
2291
+ it('reports `matches: 0` when only oversize files are present', async () => {
2292
+ _resetRipgrepCacheForTests();
2293
+ const realSpawn = (
2294
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2295
+ require('child_process') as typeof import('child_process')
2296
+ ).spawn;
2297
+ const noRgBackend: t.LocalSpawn = ((
2298
+ cmd: string,
2299
+ args: string[],
2300
+ opts: import('child_process').SpawnOptions
2301
+ ) => {
2302
+ if (cmd === 'rg') return realSpawn('sh', ['-c', 'exit 127'], opts);
2303
+ return realSpawn(cmd, args, opts);
2304
+ }) as unknown as t.LocalSpawn;
2305
+
2306
+ const cwd = await createTempDir();
2307
+ const fsp = await import('fs/promises');
2308
+ // Two oversize files, no real matches.
2309
+ await fsp.writeFile(
2310
+ join(cwd, 'big1.txt'),
2311
+ Buffer.alloc(6 * 1024 * 1024, 'a')
2312
+ );
2313
+ await fsp.writeFile(
2314
+ join(cwd, 'big2.txt'),
2315
+ Buffer.alloc(6 * 1024 * 1024, 'a')
2316
+ );
2317
+
2318
+ const bundle = createLocalCodingToolBundle({
2319
+ cwd,
2320
+ exec: { spawn: noRgBackend },
2321
+ });
2322
+ const grepTool = bundle.tools.find(
2323
+ (tt) => tt.name === Constants.GREP_SEARCH
2324
+ );
2325
+ const result = await grepTool!.invoke({
2326
+ id: 'g43',
2327
+ name: Constants.GREP_SEARCH,
2328
+ args: { pattern: 'needle' },
2329
+ type: 'tool_call',
2330
+ });
2331
+ // Result is [text, artifact]; pull the artifact off the
2332
+ // ToolMessage shape.
2333
+ const text = JSON.stringify(result);
2334
+ // Artifact shape: { matches: 0, skipped: 2, engine: 'node-fallback' }
2335
+ expect(text).toContain('"matches":0');
2336
+ expect(text).toContain('"skipped":2');
2337
+ });
2338
+ });
2339
+
2340
+ describe('Send-input direct path threads additionalContextsSink (Codex P2 [44])', () => {
2341
+ it('materializes hook additionalContext as a HumanMessage on the Send branch', async () => {
2342
+ // The Send-input branch dispatches a single direct tool. It
2343
+ // had its own runDirectToolWithLifecycleHooks call site that
2344
+ // didn't pass the sink, so PreToolUse additionalContext was
2345
+ // dropped on this otherwise-supported input shape.
2346
+ const { tool } = await import('@langchain/core/tools');
2347
+ const { z } = await import('zod');
2348
+ const { HookRegistry } = await import('@/hooks');
2349
+ const { HumanMessage } = await import('@langchain/core/messages');
2350
+
2351
+ const echo = tool(async () => 'ECHO', {
2352
+ name: 'echo',
2353
+ description: 'send-input echo',
2354
+ schema: z.object({}).passthrough(),
2355
+ });
2356
+ const registry = new HookRegistry();
2357
+ registry.register('PreToolUse', {
2358
+ hooks: [
2359
+ async () => ({
2360
+ decision: 'allow',
2361
+ additionalContext: 'SEND-CTX: policy note via Send branch',
2362
+ }),
2363
+ ],
2364
+ });
2365
+
2366
+ const node = new ToolNode({
2367
+ tools: [echo],
2368
+ eventDrivenMode: true,
2369
+ hookRegistry: registry,
2370
+ directToolNames: new Set(['echo']),
2371
+ });
2372
+ // Construct a Send-shaped input: { lg_tool_call: ToolCall }
2373
+ const result = (await node.invoke({
2374
+ lg_tool_call: { id: 'send_1', name: 'echo', args: {} },
2375
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2376
+ } as any)) as { messages: BaseMessage[] } | BaseMessage[];
2377
+ const messages = Array.isArray(result) ? result : result.messages;
2378
+ const found = messages.find(
2379
+ (m) =>
2380
+ m instanceof HumanMessage &&
2381
+ typeof m.content === 'string' &&
2382
+ m.content.includes('SEND-CTX')
2383
+ );
2384
+ expect(found).toBeDefined();
2385
+ });
2386
+ });
2387
+
2388
+ describe('bash args validated against destructive-target patterns (Codex P1 [45])', () => {
2389
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2390
+ const { executeLocalBashWithArgs } = require('../local/LocalExecutionEngine');
2391
+
2392
+ it('blocks `rm -rf "$1"` + args=["/"]', async () => {
2393
+ await expect(
2394
+ executeLocalBashWithArgs('rm -rf "$1"', ['/'], {
2395
+ sandbox: { enabled: false },
2396
+ timeoutMs: 5000,
2397
+ })
2398
+ ).rejects.toThrow(/destructive command pattern.*protected target/i);
2399
+ });
2400
+
2401
+ it('blocks `chmod -R 777 "$1"` + args=["~/"]', async () => {
2402
+ await expect(
2403
+ executeLocalBashWithArgs('chmod -R 777 "$1"', ['~/'], {
2404
+ sandbox: { enabled: false },
2405
+ timeoutMs: 5000,
2406
+ })
2407
+ ).rejects.toThrow(/destructive command pattern.*protected target/i);
2408
+ });
2409
+
2410
+ it('blocks `rm -rf "$@"` + args=["$HOME"]', async () => {
2411
+ await expect(
2412
+ executeLocalBashWithArgs('rm -rf "$@"', ['$HOME'], {
2413
+ sandbox: { enabled: false },
2414
+ timeoutMs: 5000,
2415
+ })
2416
+ ).rejects.toThrow(/destructive command pattern.*protected target/i);
2417
+ });
2418
+
2419
+ it('allows benign positional arg use (echo + protected-shape arg)', async () => {
2420
+ // `echo` is not in the destructive-op set so a "/" arg is fine.
2421
+ const result = await executeLocalBashWithArgs('echo "$1"', ['/'], {
2422
+ sandbox: { enabled: false },
2423
+ timeoutMs: 5000,
2424
+ });
2425
+ expect(result.exitCode).toBe(0);
2426
+ });
2427
+
2428
+ it('allows destructive op with non-protected args', async () => {
2429
+ // `rm` of a clearly non-protected path inside a tmpdir is fine.
2430
+ const cwd = await createTempDir();
2431
+ const fsp = await import('fs/promises');
2432
+ const f = join(cwd, 'goner.txt');
2433
+ await fsp.writeFile(f, 'bye\n');
2434
+ const result = await executeLocalBashWithArgs('rm -f "$1"', [f], {
2435
+ cwd,
2436
+ sandbox: { enabled: false },
2437
+ timeoutMs: 5000,
2438
+ });
2439
+ expect(result.exitCode).toBe(0);
2440
+ });
2441
+ });
2442
+
2443
+ describe('direct-path additionalContext is marked as system metadata (Codex P2 [46])', () => {
2444
+ it('attaches `additional_kwargs.role: "system"` to the materialized HumanMessage', async () => {
2445
+ const { tool } = await import('@langchain/core/tools');
2446
+ const { z } = await import('zod');
2447
+ const { HookRegistry } = await import('@/hooks');
2448
+ const { HumanMessage, AIMessage } = await import(
2449
+ '@langchain/core/messages'
2450
+ );
2451
+
2452
+ const echo = tool(async () => 'OK', {
2453
+ name: 'echo',
2454
+ description: 'noop',
2455
+ schema: z.object({}).passthrough(),
2456
+ });
2457
+ const registry = new HookRegistry();
2458
+ registry.register('PreToolUse', {
2459
+ hooks: [
2460
+ async () => ({
2461
+ decision: 'allow',
2462
+ additionalContext: 'POLICY: be careful',
2463
+ }),
2464
+ ],
2465
+ });
2466
+ const node = new ToolNode({
2467
+ tools: [echo],
2468
+ eventDrivenMode: true,
2469
+ hookRegistry: registry,
2470
+ directToolNames: new Set(['echo']),
2471
+ });
2472
+ const ai = new AIMessage({
2473
+ content: '',
2474
+ tool_calls: [{ id: 'c46', name: 'echo', args: {} }],
2475
+ });
2476
+ const result = (await node.invoke({ messages: [ai] })) as
2477
+ | { messages: BaseMessage[] }
2478
+ | BaseMessage[];
2479
+ const messages = Array.isArray(result) ? result : result.messages;
2480
+ const human = messages.find(
2481
+ (m): m is InstanceType<typeof HumanMessage> =>
2482
+ m instanceof HumanMessage &&
2483
+ typeof m.content === 'string' &&
2484
+ m.content.includes('POLICY')
2485
+ );
2486
+ expect(human).toBeDefined();
2487
+ // The marker the event-driven path sets — direct path now
2488
+ // matches it.
2489
+ expect(human?.additional_kwargs).toMatchObject({
2490
+ role: 'system',
2491
+ source: 'hook',
2492
+ });
2493
+ });
2494
+ });
2495
+
2496
+ describe('resolveWorkspacePathSafe routes through WorkspaceFS.realpath (Codex P2 #38)', () => {
2497
+ it('honors a custom workspace fs realpath impl', async () => {
2498
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2499
+ const { resolveWorkspacePathSafe } = require('../local/LocalExecutionEngine');
2500
+ const calls: string[] = [];
2501
+ const fakeFs = {
2502
+ readFile: async () => '',
2503
+ writeFile: async () => undefined,
2504
+ stat: async () => ({
2505
+ isFile: () => true,
2506
+ isDirectory: () => false,
2507
+ size: 0,
2508
+ }),
2509
+ readdir: async () => [],
2510
+ mkdir: async () => undefined,
2511
+ // The custom realpath that the safe-path resolver MUST use.
2512
+ // Returns paths unchanged so the lexical containment check
2513
+ // succeeds for in-workspace targets.
2514
+ realpath: async (p: string): Promise<string> => {
2515
+ calls.push(p);
2516
+ return p;
2517
+ },
2518
+ unlink: async () => undefined,
2519
+ open: async () => {
2520
+ throw new Error('not implemented');
2521
+ },
2522
+ };
2523
+
2524
+ await resolveWorkspacePathSafe('/virtual/ws/file.ts', {
2525
+ cwd: '/virtual/ws',
2526
+ workspace: { root: '/virtual/ws' },
2527
+ exec: { fs: fakeFs as unknown as never },
2528
+ });
2529
+
2530
+ // Must have called the WorkspaceFS realpath at least once
2531
+ // (for either the root or the candidate path). Pre-fix the
2532
+ // host fs/promises.realpath was used instead.
2533
+ expect(calls.length).toBeGreaterThan(0);
2534
+ expect(calls.every((p) => p.startsWith('/virtual/'))).toBe(true);
2535
+ });
2536
+ });
2537
+
2538
+ describe('syntax-check probe cache also keys on env (Codex P2 #40)', () => {
2539
+ it('does not bleed `hasNode` verdict from one env to another on the same backend', async () => {
2540
+ _resetSyntaxCheckProbeCacheForTests();
2541
+ const realSpawn = (
2542
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2543
+ require('child_process') as typeof import('child_process')
2544
+ ).spawn;
2545
+ const calls: Array<{ cmd: string; env?: NodeJS.ProcessEnv }> = [];
2546
+ // Backend that returns `node --version` success ONLY when
2547
+ // env.PATH includes 'with-node'. Mirrors P1 #34's shape.
2548
+ const envSensitive: t.LocalSpawn = ((
2549
+ cmd: string,
2550
+ args: string[],
2551
+ opts: import('child_process').SpawnOptions
2552
+ ) => {
2553
+ calls.push({ cmd, env: opts.env as NodeJS.ProcessEnv });
2554
+ if (cmd === 'node' && args[0] === '--version') {
2555
+ const env = (opts.env ?? {}) as NodeJS.ProcessEnv;
2556
+ if (env.PATH?.includes('with-node') === true) {
2557
+ return realSpawn('sh', ['-c', 'exit 0'], opts);
2558
+ }
2559
+ return realSpawn('sh', ['-c', 'exit 127'], opts);
2560
+ }
2561
+ // Run all other spawns through a no-op so we don't hit
2562
+ // real node/python/bash on the host.
2563
+ return realSpawn('sh', ['-c', 'exit 0'], opts);
2564
+ }) as unknown as t.LocalSpawn;
2565
+
2566
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2567
+ const { runPostEditSyntaxCheck } = require('../local/syntaxCheck');
2568
+ const cwd = await createTempDir();
2569
+ const file = join(cwd, 'a.js');
2570
+ await (await import('fs/promises')).writeFile(file, 'function (\n');
2571
+
2572
+ // Run A: env says node IS available — probe records `true`
2573
+ // for (backend, envA).
2574
+ await runPostEditSyntaxCheck(file, {
2575
+ exec: { spawn: envSensitive },
2576
+ env: { PATH: '/with-node' },
2577
+ });
2578
+
2579
+ // Run B: env says node is NOT available. Pre-fix the cache
2580
+ // would reuse the (backend) entry and try to actually
2581
+ // syntax-check via the missing node. Now: separate cache slot
2582
+ // for envB → its own probe → records `false` → skips check.
2583
+ const probeCallsBefore = calls.filter(
2584
+ (c) => c.cmd === 'node' && c.env?.PATH?.includes('without-node') === true
2585
+ ).length;
2586
+ await runPostEditSyntaxCheck(file, {
2587
+ exec: { spawn: envSensitive },
2588
+ env: { PATH: '/without-node' },
2589
+ });
2590
+ const probeCallsAfter = calls.filter(
2591
+ (c) => c.cmd === 'node' && c.env?.PATH?.includes('without-node') === true
2592
+ ).length;
2593
+ // A fresh probe must have run for envB (count went up).
2594
+ expect(probeCallsAfter).toBeGreaterThan(probeCallsBefore);
2595
+ });
2596
+ });
2597
+
2598
+ describe('fallbackGrep skips files larger than the per-file cap (Codex P2 #41)', () => {
2599
+ it('emits a sentinel and continues instead of reading multi-MB files into memory', async () => {
2600
+ _resetRipgrepCacheForTests();
2601
+ const realSpawn = (
2602
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
2603
+ require('child_process') as typeof import('child_process')
2604
+ ).spawn;
2605
+ // Force the Node fallback by making rg unavailable.
2606
+ const noRgBackend: t.LocalSpawn = ((
2607
+ cmd: string,
2608
+ args: string[],
2609
+ opts: import('child_process').SpawnOptions
2610
+ ) => {
2611
+ if (cmd === 'rg') {
2612
+ return realSpawn('sh', ['-c', 'exit 127'], opts);
2613
+ }
2614
+ return realSpawn(cmd, args, opts);
2615
+ }) as unknown as t.LocalSpawn;
2616
+
2617
+ const cwd = await createTempDir();
2618
+ const fsp = await import('fs/promises');
2619
+ // Write a small file (matches the search) and a 6 MB file
2620
+ // (over the 5 MB cap) — the fallback must skip the big one
2621
+ // with a sentinel and still find the small-file match.
2622
+ await fsp.writeFile(join(cwd, 'small.txt'), 'needle\n');
2623
+ const big = Buffer.alloc(6 * 1024 * 1024, 'a');
2624
+ await fsp.writeFile(join(cwd, 'big.txt'), big);
2625
+
2626
+ const bundle = createLocalCodingToolBundle({
2627
+ cwd,
2628
+ exec: { spawn: noRgBackend },
2629
+ });
2630
+ const grepTool = bundle.tools.find(
2631
+ (tt) => tt.name === Constants.GREP_SEARCH
2632
+ );
2633
+ const result = await grepTool!.invoke({
2634
+ id: 'g41',
2635
+ name: Constants.GREP_SEARCH,
2636
+ args: { pattern: 'needle' },
2637
+ type: 'tool_call',
2638
+ });
2639
+ const text = JSON.stringify(result);
2640
+ // Small-file match landed.
2641
+ expect(text).toContain('needle');
2642
+ // Big-file got the skip sentinel (didn't OOM, didn't read
2643
+ // into memory).
2644
+ expect(text).toContain('skipped');
2645
+ });
2646
+ });
2647
+ });