@librechat/agents 3.1.77 → 3.1.78-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/dist/cjs/common/enum.cjs +54 -0
  2. package/dist/cjs/common/enum.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +148 -4
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs +291 -0
  6. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs.map +1 -0
  7. package/dist/cjs/main.cjs +90 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/anthropicToolCache.cjs +102 -0
  10. package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -0
  11. package/dist/cjs/messages/prune.cjs +27 -0
  12. package/dist/cjs/messages/prune.cjs.map +1 -1
  13. package/dist/cjs/messages/recency.cjs +99 -0
  14. package/dist/cjs/messages/recency.cjs.map +1 -0
  15. package/dist/cjs/run.cjs +30 -0
  16. package/dist/cjs/run.cjs.map +1 -1
  17. package/dist/cjs/summarization/node.cjs +100 -6
  18. package/dist/cjs/summarization/node.cjs.map +1 -1
  19. package/dist/cjs/tools/ToolNode.cjs +635 -23
  20. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  21. package/dist/cjs/tools/local/CompileCheckTool.cjs +227 -0
  22. package/dist/cjs/tools/local/CompileCheckTool.cjs.map +1 -0
  23. package/dist/cjs/tools/local/FileCheckpointer.cjs +90 -0
  24. package/dist/cjs/tools/local/FileCheckpointer.cjs.map +1 -0
  25. package/dist/cjs/tools/local/LocalCodingTools.cjs +1098 -0
  26. package/dist/cjs/tools/local/LocalCodingTools.cjs.map +1 -0
  27. package/dist/cjs/tools/local/LocalExecutionEngine.cjs +1042 -0
  28. package/dist/cjs/tools/local/LocalExecutionEngine.cjs.map +1 -0
  29. package/dist/cjs/tools/local/LocalExecutionTools.cjs +122 -0
  30. package/dist/cjs/tools/local/LocalExecutionTools.cjs.map +1 -0
  31. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs +453 -0
  32. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs.map +1 -0
  33. package/dist/cjs/tools/local/attachments.cjs +183 -0
  34. package/dist/cjs/tools/local/attachments.cjs.map +1 -0
  35. package/dist/cjs/tools/local/bashAst.cjs +129 -0
  36. package/dist/cjs/tools/local/bashAst.cjs.map +1 -0
  37. package/dist/cjs/tools/local/editStrategies.cjs +188 -0
  38. package/dist/cjs/tools/local/editStrategies.cjs.map +1 -0
  39. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs +141 -0
  40. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs.map +1 -0
  41. package/dist/cjs/tools/local/syntaxCheck.cjs +182 -0
  42. package/dist/cjs/tools/local/syntaxCheck.cjs.map +1 -0
  43. package/dist/cjs/tools/local/textEncoding.cjs +30 -0
  44. package/dist/cjs/tools/local/textEncoding.cjs.map +1 -0
  45. package/dist/cjs/tools/local/workspaceFS.cjs +51 -0
  46. package/dist/cjs/tools/local/workspaceFS.cjs.map +1 -0
  47. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +1 -0
  48. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  49. package/dist/esm/common/enum.mjs +53 -1
  50. package/dist/esm/common/enum.mjs.map +1 -1
  51. package/dist/esm/graphs/Graph.mjs +149 -5
  52. package/dist/esm/graphs/Graph.mjs.map +1 -1
  53. package/dist/esm/hooks/createWorkspacePolicyHook.mjs +289 -0
  54. package/dist/esm/hooks/createWorkspacePolicyHook.mjs.map +1 -0
  55. package/dist/esm/main.mjs +17 -2
  56. package/dist/esm/main.mjs.map +1 -1
  57. package/dist/esm/messages/anthropicToolCache.mjs +99 -0
  58. package/dist/esm/messages/anthropicToolCache.mjs.map +1 -0
  59. package/dist/esm/messages/prune.mjs +26 -1
  60. package/dist/esm/messages/prune.mjs.map +1 -1
  61. package/dist/esm/messages/recency.mjs +97 -0
  62. package/dist/esm/messages/recency.mjs.map +1 -0
  63. package/dist/esm/run.mjs +30 -0
  64. package/dist/esm/run.mjs.map +1 -1
  65. package/dist/esm/summarization/node.mjs +100 -6
  66. package/dist/esm/summarization/node.mjs.map +1 -1
  67. package/dist/esm/tools/ToolNode.mjs +635 -23
  68. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  69. package/dist/esm/tools/local/CompileCheckTool.mjs +223 -0
  70. package/dist/esm/tools/local/CompileCheckTool.mjs.map +1 -0
  71. package/dist/esm/tools/local/FileCheckpointer.mjs +87 -0
  72. package/dist/esm/tools/local/FileCheckpointer.mjs.map +1 -0
  73. package/dist/esm/tools/local/LocalCodingTools.mjs +1075 -0
  74. package/dist/esm/tools/local/LocalCodingTools.mjs.map +1 -0
  75. package/dist/esm/tools/local/LocalExecutionEngine.mjs +1022 -0
  76. package/dist/esm/tools/local/LocalExecutionEngine.mjs.map +1 -0
  77. package/dist/esm/tools/local/LocalExecutionTools.mjs +117 -0
  78. package/dist/esm/tools/local/LocalExecutionTools.mjs.map +1 -0
  79. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs +448 -0
  80. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs.map +1 -0
  81. package/dist/esm/tools/local/attachments.mjs +180 -0
  82. package/dist/esm/tools/local/attachments.mjs.map +1 -0
  83. package/dist/esm/tools/local/bashAst.mjs +126 -0
  84. package/dist/esm/tools/local/bashAst.mjs.map +1 -0
  85. package/dist/esm/tools/local/editStrategies.mjs +185 -0
  86. package/dist/esm/tools/local/editStrategies.mjs.map +1 -0
  87. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs +137 -0
  88. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs.map +1 -0
  89. package/dist/esm/tools/local/syntaxCheck.mjs +179 -0
  90. package/dist/esm/tools/local/syntaxCheck.mjs.map +1 -0
  91. package/dist/esm/tools/local/textEncoding.mjs +27 -0
  92. package/dist/esm/tools/local/textEncoding.mjs.map +1 -0
  93. package/dist/esm/tools/local/workspaceFS.mjs +49 -0
  94. package/dist/esm/tools/local/workspaceFS.mjs.map +1 -0
  95. package/dist/esm/tools/subagent/SubagentExecutor.mjs +1 -0
  96. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  97. package/dist/types/common/enum.d.ts +39 -1
  98. package/dist/types/graphs/Graph.d.ts +34 -0
  99. package/dist/types/hooks/createWorkspacePolicyHook.d.ts +95 -0
  100. package/dist/types/hooks/index.d.ts +2 -0
  101. package/dist/types/index.d.ts +1 -0
  102. package/dist/types/messages/anthropicToolCache.d.ts +51 -0
  103. package/dist/types/messages/index.d.ts +2 -0
  104. package/dist/types/messages/prune.d.ts +11 -0
  105. package/dist/types/messages/recency.d.ts +64 -0
  106. package/dist/types/run.d.ts +21 -0
  107. package/dist/types/tools/ToolNode.d.ts +145 -2
  108. package/dist/types/tools/local/CompileCheckTool.d.ts +31 -0
  109. package/dist/types/tools/local/FileCheckpointer.d.ts +39 -0
  110. package/dist/types/tools/local/LocalCodingTools.d.ts +57 -0
  111. package/dist/types/tools/local/LocalExecutionEngine.d.ts +149 -0
  112. package/dist/types/tools/local/LocalExecutionTools.d.ts +9 -0
  113. package/dist/types/tools/local/LocalProgrammaticToolCalling.d.ts +21 -0
  114. package/dist/types/tools/local/attachments.d.ts +84 -0
  115. package/dist/types/tools/local/bashAst.d.ts +11 -0
  116. package/dist/types/tools/local/editStrategies.d.ts +28 -0
  117. package/dist/types/tools/local/index.d.ts +12 -0
  118. package/dist/types/tools/local/resolveLocalExecutionTools.d.ts +38 -0
  119. package/dist/types/tools/local/syntaxCheck.d.ts +42 -0
  120. package/dist/types/tools/local/textEncoding.d.ts +21 -0
  121. package/dist/types/tools/local/workspaceFS.d.ts +49 -0
  122. package/dist/types/types/hitl.d.ts +56 -27
  123. package/dist/types/types/run.d.ts +8 -1
  124. package/dist/types/types/summarize.d.ts +30 -0
  125. package/dist/types/types/tools.d.ts +341 -6
  126. package/package.json +21 -2
  127. package/src/common/enum.ts +54 -0
  128. package/src/graphs/Graph.ts +164 -6
  129. package/src/hooks/__tests__/compactHooks.test.ts +38 -2
  130. package/src/hooks/__tests__/createWorkspacePolicyHook.test.ts +393 -0
  131. package/src/hooks/createWorkspacePolicyHook.ts +355 -0
  132. package/src/hooks/index.ts +6 -0
  133. package/src/index.ts +1 -0
  134. package/src/messages/__tests__/anthropicToolCache.test.ts +125 -0
  135. package/src/messages/__tests__/recency.test.ts +267 -0
  136. package/src/messages/anthropicToolCache.ts +116 -0
  137. package/src/messages/index.ts +2 -0
  138. package/src/messages/prune.ts +27 -1
  139. package/src/messages/recency.ts +155 -0
  140. package/src/run.ts +31 -0
  141. package/src/scripts/compare_pi_vs_ours.ts +840 -0
  142. package/src/scripts/local_engine.ts +166 -0
  143. package/src/scripts/local_engine_checkpointer.ts +205 -0
  144. package/src/scripts/local_engine_compile.ts +263 -0
  145. package/src/scripts/local_engine_hooks.ts +226 -0
  146. package/src/scripts/local_engine_image.ts +201 -0
  147. package/src/scripts/local_engine_ptc.ts +151 -0
  148. package/src/scripts/local_engine_workspace.ts +258 -0
  149. package/src/scripts/summarization-recency.ts +462 -0
  150. package/src/specs/prune.test.ts +39 -0
  151. package/src/summarization/__tests__/node.test.ts +499 -3
  152. package/src/summarization/node.ts +124 -7
  153. package/src/tools/ToolNode.ts +769 -20
  154. package/src/tools/__tests__/LocalExecutionTools.test.ts +2647 -0
  155. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +175 -0
  156. package/src/tools/__tests__/ToolNode.outputReferences.test.ts +114 -0
  157. package/src/tools/__tests__/ToolNode.session.test.ts +84 -0
  158. package/src/tools/__tests__/directToolHITLResumeScope.test.ts +467 -0
  159. package/src/tools/__tests__/directToolHooks.test.ts +411 -0
  160. package/src/tools/__tests__/localToolNames.test.ts +73 -0
  161. package/src/tools/__tests__/workspaceSeam.test.ts +134 -0
  162. package/src/tools/local/CompileCheckTool.ts +278 -0
  163. package/src/tools/local/FileCheckpointer.ts +93 -0
  164. package/src/tools/local/LocalCodingTools.ts +1342 -0
  165. package/src/tools/local/LocalExecutionEngine.ts +1329 -0
  166. package/src/tools/local/LocalExecutionTools.ts +167 -0
  167. package/src/tools/local/LocalProgrammaticToolCalling.ts +594 -0
  168. package/src/tools/local/__tests__/FileCheckpointer.test.ts +120 -0
  169. package/src/tools/local/__tests__/editStrategies.test.ts +134 -0
  170. package/src/tools/local/attachments.ts +251 -0
  171. package/src/tools/local/bashAst.ts +151 -0
  172. package/src/tools/local/editStrategies.ts +188 -0
  173. package/src/tools/local/index.ts +12 -0
  174. package/src/tools/local/resolveLocalExecutionTools.ts +208 -0
  175. package/src/tools/local/syntaxCheck.ts +243 -0
  176. package/src/tools/local/textEncoding.ts +37 -0
  177. package/src/tools/local/workspaceFS.ts +89 -0
  178. package/src/types/hitl.ts +56 -27
  179. package/src/types/run.ts +12 -1
  180. package/src/types/summarize.ts +31 -0
  181. package/src/types/tools.ts +359 -7
@@ -0,0 +1,1342 @@
1
+ import { basename, dirname } from 'path';
2
+ import { tool } from '@langchain/core/tools';
3
+ import { createTwoFilesPatch } from 'diff';
4
+ import type { DynamicStructuredTool } from '@langchain/core/tools';
5
+ import type * as t from '@/types';
6
+ import {
7
+ createLocalBashExecutionTool,
8
+ createLocalCodeExecutionTool,
9
+ } from './LocalExecutionTools';
10
+ import {
11
+ createLocalBashProgrammaticToolCallingTool,
12
+ createLocalProgrammaticToolCallingTool,
13
+ } from './LocalProgrammaticToolCalling';
14
+ import {
15
+ getSpawn,
16
+ getWorkspaceFS,
17
+ resolveWorkspacePathSafe,
18
+ spawnLocalProcess,
19
+ truncateLocalOutput,
20
+ } from './LocalExecutionEngine';
21
+ import { createLocalFileCheckpointer } from './FileCheckpointer';
22
+ import { applyEdit, locateEdit } from './editStrategies';
23
+ import { decodeFile, encodeFile } from './textEncoding';
24
+ import { classifyAttachment, imageAttachmentContent } from './attachments';
25
+ import { runPostEditSyntaxCheck } from './syntaxCheck';
26
+ import {
27
+ createCompileCheckTool,
28
+ createCompileCheckToolDefinition,
29
+ } from './CompileCheckTool';
30
+ import { Constants } from '@/common';
31
+
32
+ const MAX_READ_CHARS = 256000;
33
+ const DEFAULT_MAX_RESULTS = 200;
34
+ const DEFAULT_MAX_READ_BYTES = 10 * 1024 * 1024;
35
+ const BINARY_DETECTION_BYTES = 8000;
36
+
37
+ /**
38
+ * Tool name aliases retained for back-compat with consumers that imported
39
+ * the per-file `Local*ToolName` constants. The canonical names live on
40
+ * `Constants.*` (see `src/common/enum.ts`); these aliases just point at
41
+ * them so a typo upstream gets caught at the type level.
42
+ */
43
+ export const LocalWriteFileToolName = Constants.WRITE_FILE;
44
+ export const LocalEditFileToolName = Constants.EDIT_FILE;
45
+ export const LocalGrepSearchToolName = Constants.GREP_SEARCH;
46
+ export const LocalGlobSearchToolName = Constants.GLOB_SEARCH;
47
+ export const LocalListDirectoryToolName = Constants.LIST_DIRECTORY;
48
+
49
+ export const LocalReadFileToolSchema: t.JsonSchemaType = {
50
+ type: 'object',
51
+ properties: {
52
+ file_path: {
53
+ type: 'string',
54
+ description: 'Path to a local file, relative to the configured cwd unless absolute paths are allowed.',
55
+ },
56
+ offset: {
57
+ type: 'integer',
58
+ description: 'Optional 1-indexed line offset for large files.',
59
+ },
60
+ limit: {
61
+ type: 'integer',
62
+ description: 'Optional maximum number of lines to return.',
63
+ },
64
+ },
65
+ required: ['file_path'],
66
+ };
67
+
68
+ export const LocalWriteFileToolSchema: t.JsonSchemaType = {
69
+ type: 'object',
70
+ properties: {
71
+ file_path: {
72
+ type: 'string',
73
+ description: 'Path to write, relative to the configured cwd unless absolute paths are allowed.',
74
+ },
75
+ content: {
76
+ type: 'string',
77
+ description: 'Complete file contents to write.',
78
+ },
79
+ },
80
+ required: ['file_path', 'content'],
81
+ };
82
+
83
+ export const LocalEditFileToolSchema: t.JsonSchemaType = {
84
+ type: 'object',
85
+ properties: {
86
+ file_path: {
87
+ type: 'string',
88
+ description: 'Path to edit, relative to the configured cwd unless absolute paths are allowed.',
89
+ },
90
+ old_text: {
91
+ type: 'string',
92
+ description: 'Exact text to replace. Must appear exactly once.',
93
+ },
94
+ new_text: {
95
+ type: 'string',
96
+ description: 'Replacement text.',
97
+ },
98
+ edits: {
99
+ type: 'array',
100
+ description: 'Optional batch of exact replacements. Each old_text must appear exactly once in the original file.',
101
+ items: {
102
+ type: 'object',
103
+ properties: {
104
+ old_text: { type: 'string' },
105
+ new_text: { type: 'string' },
106
+ },
107
+ required: ['old_text', 'new_text'],
108
+ },
109
+ },
110
+ },
111
+ required: ['file_path'],
112
+ };
113
+
114
+ export const LocalGrepSearchToolSchema: t.JsonSchemaType = {
115
+ type: 'object',
116
+ properties: {
117
+ pattern: {
118
+ type: 'string',
119
+ description: 'Regex pattern to search for.',
120
+ },
121
+ path: {
122
+ type: 'string',
123
+ description: 'Directory or file to search. Defaults to cwd.',
124
+ },
125
+ glob: {
126
+ type: 'string',
127
+ description: 'Optional file glob passed to rg -g.',
128
+ },
129
+ max_results: {
130
+ type: 'integer',
131
+ description: 'Maximum matching lines to return.',
132
+ },
133
+ },
134
+ required: ['pattern'],
135
+ };
136
+
137
+ export const LocalGlobSearchToolSchema: t.JsonSchemaType = {
138
+ type: 'object',
139
+ properties: {
140
+ pattern: {
141
+ type: 'string',
142
+ description: 'File glob pattern, for example "src/**/*.ts".',
143
+ },
144
+ path: {
145
+ type: 'string',
146
+ description: 'Directory to search. Defaults to cwd.',
147
+ },
148
+ max_results: {
149
+ type: 'integer',
150
+ description: 'Maximum file paths to return.',
151
+ },
152
+ },
153
+ required: ['pattern'],
154
+ };
155
+
156
+ export const LocalListDirectoryToolSchema: t.JsonSchemaType = {
157
+ type: 'object',
158
+ properties: {
159
+ path: {
160
+ type: 'string',
161
+ description: 'Directory to list. Defaults to cwd.',
162
+ },
163
+ },
164
+ };
165
+
166
+ function lineWindow(
167
+ content: string,
168
+ offset?: number,
169
+ limit?: number
170
+ ): { text: string; truncated: boolean } {
171
+ const start = Math.max((offset ?? 1) - 1, 0);
172
+ // Avoid splitting the whole file when the caller asked for a small
173
+ // window. For a 10 MB file with `offset: 1, limit: 10`, the prior
174
+ // `content.split('\n')` allocated millions of strings to throw all
175
+ // but 10 away. We walk newline indices directly: O(start + limit)
176
+ // instead of O(file). When `limit` is omitted, fall back to the
177
+ // simple split — it's the same amount of work either way.
178
+ if (limit == null || limit <= 0) {
179
+ const lines = content.split('\n');
180
+ const selected = lines.slice(start);
181
+ const numbered = selected
182
+ .map(
183
+ (line, index) =>
184
+ `${String(start + index + 1).padStart(6, ' ')}\t${line}`
185
+ )
186
+ .join('\n');
187
+ return {
188
+ text: truncateLocalOutput(numbered, MAX_READ_CHARS),
189
+ truncated: numbered.length > MAX_READ_CHARS,
190
+ };
191
+ }
192
+ // Walk to the start line by counting newlines.
193
+ let cursor = 0;
194
+ for (let i = 0; i < start; i++) {
195
+ const next = content.indexOf('\n', cursor);
196
+ if (next === -1) {
197
+ // File has fewer lines than `offset` — return empty window.
198
+ return { text: '', truncated: false };
199
+ }
200
+ cursor = next + 1;
201
+ }
202
+ // Collect up to `limit` lines from `cursor`.
203
+ const out: string[] = [];
204
+ let pos = cursor;
205
+ let exhausted = true;
206
+ for (let k = 0; k < limit; k++) {
207
+ const next = content.indexOf('\n', pos);
208
+ if (next === -1) {
209
+ out.push(content.slice(pos));
210
+ break;
211
+ }
212
+ out.push(content.slice(pos, next));
213
+ pos = next + 1;
214
+ if (k === limit - 1 && pos < content.length) {
215
+ exhausted = false;
216
+ }
217
+ }
218
+ const numbered = out
219
+ .map(
220
+ (text, index) =>
221
+ `${String(start + index + 1).padStart(6, ' ')}\t${text}`
222
+ )
223
+ .join('\n');
224
+ return {
225
+ text: truncateLocalOutput(numbered, MAX_READ_CHARS),
226
+ truncated: !exhausted || numbered.length > MAX_READ_CHARS,
227
+ };
228
+ }
229
+
230
+ const MAX_DIFF_CHARS = 4000;
231
+
232
+ type SyntaxRun =
233
+ | {
234
+ mode: 'auto' | 'strict';
235
+ outcome: import('./syntaxCheck').SyntaxCheckOutcome;
236
+ }
237
+ | undefined;
238
+
239
+ async function maybeRunSyntaxCheck(
240
+ path: string,
241
+ config: t.LocalExecutionConfig
242
+ ): Promise<SyntaxRun> {
243
+ const mode = config.postEditSyntaxCheck ?? 'off';
244
+ if (mode === 'off') return undefined;
245
+ const outcome = await runPostEditSyntaxCheck(path, config);
246
+ if (outcome == null) return undefined;
247
+ return { mode, outcome };
248
+ }
249
+
250
+ function appendSyntaxCheckSummary(
251
+ base: string,
252
+ run: SyntaxRun
253
+ ): string {
254
+ if (run == null) return base;
255
+ if (run.outcome.ok) return base;
256
+ const banner =
257
+ run.mode === 'strict'
258
+ ? `\n\n[syntax-check FAILED via ${run.outcome.checker}]\n`
259
+ : `\n\n[syntax-check warning via ${run.outcome.checker}]\n`;
260
+ return `${base}${banner}${run.outcome.output}`;
261
+ }
262
+
263
+ /**
264
+ * Revert a write_file/edit_file mutation in `postEditSyntaxCheck:
265
+ * 'strict'` mode after the post-write syntax check failed. Strict
266
+ * mode advertises a safety gate, so leaving the corrupted file on
267
+ * disk + throwing is a half-broken contract — the model "reacts" to
268
+ * the error but the next call sees broken on-disk state. Codex P2
269
+ * [49]. Best-effort: a swallowed error here means the workspace is
270
+ * still in the bad post-write state, but we still throw the
271
+ * original syntax-check error so the caller knows.
272
+ *
273
+ * - If the file existed pre-write: restore the previous bytes with
274
+ * the original encoding.
275
+ * - If the file is brand-new: unlink it.
276
+ */
277
+ async function revertStrictWrite(
278
+ fs: import('./workspaceFS').WorkspaceFS,
279
+ path: string,
280
+ existed: boolean,
281
+ before: string,
282
+ encoding: { text: string; hasBom: boolean; newline: '\n' | '\r\n' }
283
+ ): Promise<void> {
284
+ try {
285
+ if (existed) {
286
+ // encodeFile uses encoding.{hasBom,newline} to restore the
287
+ // on-disk shape; the `text` field is overridden by the
288
+ // explicit `before` arg we pass in.
289
+ await fs.writeFile(
290
+ path,
291
+ encodeFile(before, { ...encoding, text: before }),
292
+ 'utf8'
293
+ );
294
+ } else {
295
+ await fs.unlink(path);
296
+ }
297
+ } catch {
298
+ /* best-effort: caller still sees the original syntax error */
299
+ }
300
+ }
301
+
302
+ function summariseDiff(
303
+ filePath: string,
304
+ before: string,
305
+ after: string
306
+ ): string {
307
+ if (before === after) {
308
+ return '(no textual changes)';
309
+ }
310
+ const name = basename(filePath);
311
+ const patch = createTwoFilesPatch(name, name, before, after, '', '', {
312
+ context: 3,
313
+ });
314
+ if (patch.length <= MAX_DIFF_CHARS) {
315
+ return patch;
316
+ }
317
+ return (
318
+ patch.slice(0, MAX_DIFF_CHARS) +
319
+ `\n[... diff truncated, ${patch.length - MAX_DIFF_CHARS} more chars ...]`
320
+ );
321
+ }
322
+
323
+ function normalizeEdits(input: {
324
+ old_text?: string;
325
+ new_text?: string;
326
+ edits?: Array<{ old_text?: string; new_text?: string }>;
327
+ }): Array<{ oldText: string; newText: string }> {
328
+ const edits = Array.isArray(input.edits)
329
+ ? input.edits.map((edit) => ({
330
+ oldText: edit.old_text ?? '',
331
+ newText: edit.new_text ?? '',
332
+ }))
333
+ : [];
334
+
335
+ if (input.old_text != null || input.new_text != null) {
336
+ edits.push({
337
+ oldText: input.old_text ?? '',
338
+ newText: input.new_text ?? '',
339
+ });
340
+ }
341
+
342
+ return edits;
343
+ }
344
+
345
+ function toolDefinition(
346
+ name: string,
347
+ description: string,
348
+ parameters: t.JsonSchemaType
349
+ ): t.LCTool {
350
+ return {
351
+ name,
352
+ description,
353
+ parameters,
354
+ allowed_callers: ['direct', 'code_execution'],
355
+ responseFormat: Constants.CONTENT_AND_ARTIFACT,
356
+ toolType: 'builtin',
357
+ };
358
+ }
359
+
360
+ async function looksBinary(
361
+ path: string,
362
+ fs: import('./workspaceFS').WorkspaceFS
363
+ ): Promise<boolean> {
364
+ let handle;
365
+ try {
366
+ handle = await fs.open(path, 'r');
367
+ const sample = Buffer.alloc(BINARY_DETECTION_BYTES);
368
+ const { bytesRead } = await handle.read(
369
+ sample,
370
+ 0,
371
+ BINARY_DETECTION_BYTES,
372
+ 0
373
+ );
374
+ for (let i = 0; i < bytesRead; i++) {
375
+ if (sample[i] === 0) {
376
+ return true;
377
+ }
378
+ }
379
+ return false;
380
+ } finally {
381
+ await handle?.close();
382
+ }
383
+ }
384
+
385
+ const DEFAULT_MAX_ATTACHMENT_BYTES = 5 * 1024 * 1024;
386
+
387
+ export function createLocalReadFileTool(
388
+ config: t.LocalExecutionConfig = {}
389
+ ): DynamicStructuredTool {
390
+ const fs = getWorkspaceFS(config);
391
+ return tool(
392
+ async (rawInput) => {
393
+ const input = rawInput as {
394
+ file_path: string;
395
+ offset?: number;
396
+ limit?: number;
397
+ };
398
+ const path = await resolveWorkspacePathSafe(input.file_path, config, 'read');
399
+ const fileStat = await fs.stat(path);
400
+ if (!fileStat.isFile()) {
401
+ throw new Error(`Path is not a file: ${input.file_path}`);
402
+ }
403
+ const maxBytes = Math.max(
404
+ config.maxReadBytes ?? DEFAULT_MAX_READ_BYTES,
405
+ 1
406
+ );
407
+ if (fileStat.size > maxBytes) {
408
+ const stub = `File is ${fileStat.size} bytes, exceeds the ${maxBytes}-byte read cap. Read a slice via bash (e.g. head/sed) or raise local.maxReadBytes.`;
409
+ return [stub, { path, bytes: fileStat.size, truncated: true }];
410
+ }
411
+
412
+ if (await looksBinary(path, fs)) {
413
+ const attachmentMode = config.attachReadAttachments ?? 'off';
414
+ if (attachmentMode !== 'off') {
415
+ const attachment = await classifyAttachment({
416
+ path,
417
+ bytes: fileStat.size,
418
+ mode: attachmentMode,
419
+ maxBytes:
420
+ config.maxAttachmentBytes ?? DEFAULT_MAX_ATTACHMENT_BYTES,
421
+ // Route through the configured WorkspaceFS so a custom
422
+ // engine sees the same path semantics as `read_file`
423
+ // itself (manual review finding F).
424
+ fs,
425
+ });
426
+ if (attachment.kind === 'image') {
427
+ return [
428
+ imageAttachmentContent(path, attachment),
429
+ {
430
+ path,
431
+ bytes: fileStat.size,
432
+ mime: attachment.mime,
433
+ attachment: 'image',
434
+ },
435
+ ];
436
+ }
437
+ if (attachment.kind === 'pdf') {
438
+ return [
439
+ [
440
+ {
441
+ type: 'text',
442
+ text: `Read ${path} (application/pdf, ${fileStat.size} bytes). PDF attached as base64 data URL; vision-capable models that accept PDF will render it.`,
443
+ },
444
+ {
445
+ type: 'image_url',
446
+ image_url: { url: attachment.dataUrl },
447
+ },
448
+ ],
449
+ {
450
+ path,
451
+ bytes: fileStat.size,
452
+ mime: attachment.mime,
453
+ attachment: 'pdf',
454
+ },
455
+ ];
456
+ }
457
+ if (attachment.kind === 'oversize') {
458
+ return [
459
+ `Refusing to embed ${attachment.mime} attachment (${attachment.bytes} bytes exceeds ${attachment.maxBytes}-byte cap).`,
460
+ {
461
+ path,
462
+ bytes: fileStat.size,
463
+ mime: attachment.mime,
464
+ attachment: 'oversize',
465
+ },
466
+ ];
467
+ }
468
+ if (attachment.kind === 'binary') {
469
+ return [
470
+ `Refusing to read binary file (${fileStat.size} bytes, ${attachment.mime}): ${path}`,
471
+ {
472
+ path,
473
+ bytes: fileStat.size,
474
+ mime: attachment.mime,
475
+ binary: true,
476
+ },
477
+ ];
478
+ }
479
+ // text-or-unknown falls through to the text-read path below.
480
+ } else {
481
+ return [
482
+ `Refusing to read binary file (${fileStat.size} bytes): ${path}`,
483
+ { path, bytes: fileStat.size, binary: true },
484
+ ];
485
+ }
486
+ }
487
+
488
+ const content = await fs.readFile(path, 'utf8');
489
+ const result = lineWindow(content, input.offset, input.limit);
490
+ return [
491
+ result.truncated ? `${result.text}\n[truncated]` : result.text,
492
+ { path, bytes: fileStat.size },
493
+ ];
494
+ },
495
+ {
496
+ name: Constants.READ_FILE,
497
+ description:
498
+ 'Read a local text file from the configured working directory with line numbers. ' +
499
+ 'When `attachReadAttachments` is enabled (e.g. images-only), reading an image returns an ' +
500
+ '`image_url` content block so vision-capable models can see the file directly.',
501
+ schema: LocalReadFileToolSchema,
502
+ responseFormat: Constants.CONTENT_AND_ARTIFACT,
503
+ }
504
+ );
505
+ }
506
+
507
+ export function createLocalWriteFileTool(
508
+ config: t.LocalExecutionConfig = {},
509
+ checkpointer?: t.LocalFileCheckpointer
510
+ ): DynamicStructuredTool {
511
+ const fs = getWorkspaceFS(config);
512
+ return tool(
513
+ async (rawInput) => {
514
+ const input = rawInput as { file_path: string; content: string };
515
+ if (config.readOnly === true) {
516
+ throw new Error('write_file is blocked in read-only local mode.');
517
+ }
518
+ const path = await resolveWorkspacePathSafe(input.file_path, config, 'write');
519
+ if (checkpointer != null) {
520
+ await checkpointer.captureBeforeWrite(path);
521
+ }
522
+
523
+ let before = '';
524
+ let encoding = { text: '', hasBom: false, newline: '\n' as const } as
525
+ | ReturnType<typeof decodeFile>
526
+ | { text: string; hasBom: false; newline: '\n' };
527
+ let existed = false;
528
+ try {
529
+ const raw = await fs.readFile(path, 'utf8');
530
+ const decoded = decodeFile(raw);
531
+ before = decoded.text;
532
+ encoding = decoded;
533
+ existed = true;
534
+ } catch {
535
+ existed = false;
536
+ }
537
+
538
+ await fs.mkdir(dirname(path), { recursive: true });
539
+ const finalText = encodeFile(input.content, encoding);
540
+ await fs.writeFile(path, finalText, 'utf8');
541
+
542
+ const syntax = await maybeRunSyntaxCheck(path, config);
543
+
544
+ const diff = existed
545
+ ? summariseDiff(path, before, input.content)
546
+ : `(new file, ${input.content.length} chars)`;
547
+ const baseSummary = existed
548
+ ? `Overwrote ${path} (${input.content.length} chars). Diff:\n${diff}`
549
+ : `Created ${path} (${input.content.length} chars).`;
550
+ const summary = appendSyntaxCheckSummary(baseSummary, syntax);
551
+ if (syntax?.outcome.ok === false && syntax.mode === 'strict') {
552
+ // Roll back the write so strict mode is an actual gate, not
553
+ // "fail the call AND leave the corrupted file on disk".
554
+ // Codex P2 [49].
555
+ await revertStrictWrite(fs, path, existed, before, encoding);
556
+ throw new Error(
557
+ `write_file syntax check failed (${syntax.outcome.checker}); reverted to pre-write state.\n${syntax.outcome.output}`
558
+ );
559
+ }
560
+ return [
561
+ summary,
562
+ {
563
+ path,
564
+ bytes: finalText.length,
565
+ new_file: !existed,
566
+ newline: encoding.newline === '\r\n' ? 'CRLF' : 'LF',
567
+ had_bom: encoding.hasBom,
568
+ ...(syntax != null && syntax.outcome.ok === false
569
+ ? { syntax_error: syntax.outcome.checker }
570
+ : {}),
571
+ },
572
+ ];
573
+ },
574
+ {
575
+ name: LocalWriteFileToolName,
576
+ description:
577
+ 'Create or overwrite a local text file in the configured working directory. ' +
578
+ 'Preserves the existing BOM and line endings when overwriting; defaults to LF without BOM for new files. ' +
579
+ 'Returns a unified diff of the changes when overwriting.',
580
+ schema: LocalWriteFileToolSchema,
581
+ responseFormat: Constants.CONTENT_AND_ARTIFACT,
582
+ }
583
+ );
584
+ }
585
+
586
+ export function createLocalEditFileTool(
587
+ config: t.LocalExecutionConfig = {},
588
+ checkpointer?: t.LocalFileCheckpointer
589
+ ): DynamicStructuredTool {
590
+ const fs = getWorkspaceFS(config);
591
+ return tool(
592
+ async (rawInput) => {
593
+ const input = rawInput as {
594
+ file_path: string;
595
+ old_text?: string;
596
+ new_text?: string;
597
+ edits?: Array<{ old_text?: string; new_text?: string }>;
598
+ };
599
+ if (config.readOnly === true) {
600
+ throw new Error('edit_file is blocked in read-only local mode.');
601
+ }
602
+ const edits = normalizeEdits(input);
603
+ if (edits.length === 0) {
604
+ throw new Error('edit_file requires old_text/new_text or edits[].');
605
+ }
606
+
607
+ const path = await resolveWorkspacePathSafe(input.file_path, config, 'write');
608
+ const raw = await fs.readFile(path, 'utf8');
609
+ const encoding = decodeFile(raw);
610
+ const original = encoding.text;
611
+
612
+ let next = original;
613
+ const strategiesUsed: string[] = [];
614
+ for (let i = 0; i < edits.length; i++) {
615
+ const edit = edits[i];
616
+ const match = locateEdit(next, edit.oldText);
617
+ if (match == null) {
618
+ throw new Error(
619
+ `Edit ${i + 1}/${edits.length}: could not locate old_text in ${input.file_path}. ` +
620
+ 'Tried exact, line-trimmed, whitespace-normalized, and indentation-flexible matching. ' +
621
+ 'Re-read the file and copy the literal lines.'
622
+ );
623
+ }
624
+ strategiesUsed.push(match.strategy);
625
+ next = applyEdit(next, match, edit.newText);
626
+ }
627
+
628
+ if (checkpointer != null) {
629
+ await checkpointer.captureBeforeWrite(path);
630
+ }
631
+ const finalText = encodeFile(next, encoding);
632
+ await fs.writeFile(path, finalText, 'utf8');
633
+
634
+ const syntax = await maybeRunSyntaxCheck(path, config);
635
+
636
+ const diff = summariseDiff(path, original, next);
637
+ const fuzzy = strategiesUsed.some((s) => s !== 'exact');
638
+ const baseSummary =
639
+ `Applied ${edits.length} edit(s) to ${path}` +
640
+ (fuzzy ? ` (strategies: ${strategiesUsed.join(', ')})` : '') +
641
+ `. Diff:\n${diff}`;
642
+ const summary = appendSyntaxCheckSummary(baseSummary, syntax);
643
+ if (syntax?.outcome.ok === false && syntax.mode === 'strict') {
644
+ // Restore the pre-edit bytes so strict mode is an actual
645
+ // gate (Codex P2 [49]). edit_file always operates on an
646
+ // existing file, so `existed = true` here.
647
+ await revertStrictWrite(fs, path, true, original, encoding);
648
+ throw new Error(
649
+ `edit_file syntax check failed (${syntax.outcome.checker}); reverted to pre-edit state.\n${syntax.outcome.output}`
650
+ );
651
+ }
652
+ return [
653
+ summary,
654
+ {
655
+ path,
656
+ edits: edits.length,
657
+ strategies: strategiesUsed,
658
+ newline: encoding.newline === '\r\n' ? 'CRLF' : 'LF',
659
+ had_bom: encoding.hasBom,
660
+ ...(syntax != null && syntax.outcome.ok === false
661
+ ? { syntax_error: syntax.outcome.checker }
662
+ : {}),
663
+ },
664
+ ];
665
+ },
666
+ {
667
+ name: LocalEditFileToolName,
668
+ description:
669
+ 'Apply exact text replacements to a local file. The matcher tries exact, line-trimmed, whitespace-normalized, and indentation-flexible strategies in order so common LLM whitespace mistakes are recoverable. Each old_text must still match exactly one location. Returns a unified diff of the changes.',
670
+ schema: LocalEditFileToolSchema,
671
+ responseFormat: Constants.CONTENT_AND_ARTIFACT,
672
+ }
673
+ );
674
+ }
675
+
676
+ /**
677
+ * Ripgrep availability cache, keyed on the *effective execution
678
+ * backend* — whatever function `getSpawn(config)` returns. Without
679
+ * the backend key, a Run that probes `rg` over Node's
680
+ * `child_process.spawn` would poison subsequent Runs whose
681
+ * `local.exec.spawn` routes to a remote sandbox or container that
682
+ * doesn't have rg installed: the cached `true` would skip the probe,
683
+ * the rg invocation would throw, and the Node fallback wouldn't be
684
+ * reached. Per-backend caching avoids that without paying for a
685
+ * spawn-per-search.
686
+ */
687
+ // Per-backend × per-env cache. Codex P1 #34 — keying by spawn
688
+ // backend alone misses the case where two Runs share a backend but
689
+ // vary `local.env` (especially PATH). Stale cache then claims `rg`
690
+ // is available, the rg path runs, and the spawn fails with ENOENT
691
+ // instead of falling back to the Node walker. The inner Map is
692
+ // keyed by a stable JSON hash of the effective env so each unique
693
+ // env gets its own probe.
694
+ let ripgrepAvailabilityByBackend = new WeakMap<
695
+ t.LocalSpawn,
696
+ Map<string, Promise<boolean>>
697
+ >();
698
+
699
+ function envCacheKey(env: NodeJS.ProcessEnv | undefined): string {
700
+ // PATH is the only env entry that affects command lookup, but
701
+ // hashing the whole env keeps the key correct for hosts that
702
+ // vary anything else relevant. Stable JSON via sorted keys so
703
+ // {A:1,B:2} and {B:2,A:1} produce the same hash.
704
+ if (env == null) return '';
705
+ const sorted: Record<string, string | undefined> = {};
706
+ for (const k of Object.keys(env).sort()) {
707
+ sorted[k] = env[k];
708
+ }
709
+ return JSON.stringify(sorted);
710
+ }
711
+
712
+ async function isRipgrepAvailable(
713
+ config: t.LocalExecutionConfig
714
+ ): Promise<boolean> {
715
+ const backend = getSpawn(config);
716
+ let envMap = ripgrepAvailabilityByBackend.get(backend);
717
+ if (envMap == null) {
718
+ envMap = new Map();
719
+ ripgrepAvailabilityByBackend.set(backend, envMap);
720
+ }
721
+ const envKey = envCacheKey(config.env);
722
+ let probePromise = envMap.get(envKey);
723
+ if (probePromise == null) {
724
+ probePromise = spawnLocalProcess(
725
+ 'rg',
726
+ ['--version'],
727
+ { ...config, timeoutMs: 5000, sandbox: { enabled: false } },
728
+ { internal: true }
729
+ )
730
+ .then((probe) => probe != null && probe.exitCode === 0)
731
+ .catch(() => false);
732
+ envMap.set(envKey, probePromise);
733
+ }
734
+ return probePromise;
735
+ }
736
+
737
+ /**
738
+ * Test-only reset hook. Clears the ripgrep-availability cache so
739
+ * tests can swap in mocked spawn backends and reprobe deterministically.
740
+ *
741
+ * @internal Not part of the public SDK surface; the leading underscore
742
+ * and `@internal` tag together signal that consumers should not call
743
+ * this. Tests import it via the module path directly.
744
+ */
745
+ export function _resetRipgrepCacheForTests(): void {
746
+ ripgrepAvailabilityByBackend = new WeakMap();
747
+ }
748
+
749
+ // Skipped by the Node-fallback walker (used when ripgrep is
750
+ // unavailable). Covers common build outputs, virtualenvs, and
751
+ // caches so a `grep_search`/`glob_search` on a large monorepo or a
752
+ // Python project with `.venv/` doesn't read every file under those
753
+ // trees. ripgrep itself respects .gitignore so it doesn't need this
754
+ // list. Audit follow-up from the comprehensive review (finding #3).
755
+ const SKIP_DIRS = new Set([
756
+ '.git',
757
+ '.svn',
758
+ '.hg',
759
+ 'node_modules',
760
+ '.next',
761
+ '.nuxt',
762
+ '.cache',
763
+ '.parcel-cache',
764
+ '.turbo',
765
+ 'dist',
766
+ 'build',
767
+ 'out',
768
+ 'target',
769
+ 'vendor',
770
+ 'coverage',
771
+ '.nyc_output',
772
+ '__pycache__',
773
+ '.venv',
774
+ 'venv',
775
+ 'env',
776
+ '.tox',
777
+ '.mypy_cache',
778
+ '.pytest_cache',
779
+ '.ruff_cache',
780
+ ]);
781
+
782
+ function globToRegExp(pattern: string): RegExp {
783
+ let result = '^';
784
+ for (let i = 0; i < pattern.length; i++) {
785
+ const c = pattern[i];
786
+ if (c === '*') {
787
+ if (pattern[i + 1] === '*') {
788
+ result += '.*';
789
+ i += 1;
790
+ if (pattern[i + 1] === '/') {
791
+ i += 1;
792
+ }
793
+ } else {
794
+ result += '[^/]*';
795
+ }
796
+ } else if (c === '?') {
797
+ result += '[^/]';
798
+ } else if ('.+^$|(){}[]\\'.includes(c)) {
799
+ result += '\\' + c;
800
+ } else {
801
+ result += c;
802
+ }
803
+ }
804
+ result += '$';
805
+ return new RegExp(result);
806
+ }
807
+
808
+ async function* walkFiles(
809
+ root: string,
810
+ fs: import('./workspaceFS').WorkspaceFS
811
+ ): AsyncGenerator<string> {
812
+ const stack: string[] = [root];
813
+ while (stack.length > 0) {
814
+ const dir = stack.pop() as string;
815
+ let entries;
816
+ try {
817
+ entries = await fs.readdir(dir, { withFileTypes: true });
818
+ } catch {
819
+ continue;
820
+ }
821
+ for (const entry of entries) {
822
+ if (entry.name.startsWith('.git') || SKIP_DIRS.has(entry.name)) {
823
+ continue;
824
+ }
825
+ const full = `${dir}/${entry.name}`;
826
+ if (entry.isDirectory()) {
827
+ stack.push(full);
828
+ } else if (entry.isFile()) {
829
+ yield full;
830
+ }
831
+ }
832
+ }
833
+ }
834
+
835
+ /**
836
+ * Catastrophic-backtracking guardrails for the fallback grep path.
837
+ *
838
+ * Without ripgrep we run the model-supplied pattern through Node's
839
+ * `RegExp` engine, which uses a backtracking implementation. Patterns
840
+ * with nested unbounded quantifiers (`(a+)+`, `(.*)*`, etc.) can
841
+ * monopolise the event loop for arbitrary wall-clock time on
842
+ * pathological input, and `setTimeout` cannot interrupt a synchronous
843
+ * `RegExp.exec`. Manual review (finding D) flagged this as a real DoS.
844
+ *
845
+ * Mitigations applied here, in order of severity:
846
+ * 1. Cap pattern length so an obviously oversize regex is rejected
847
+ * before compile.
848
+ * 2. Reject patterns that contain a nested unbounded quantifier of
849
+ * the form `(...+|*)([+*]|{n,})` — the standard pathological
850
+ * shape. Still a heuristic (not a full safety proof), but blocks
851
+ * every common DoS construction we've seen in coding-agent logs.
852
+ * 3. Wall-clock budget for the overall search: each file's regex
853
+ * pass is checked against a deadline; once exceeded the search
854
+ * bails with a partial result. Doesn't interrupt a stuck
855
+ * `exec()` call, but stops a slow pattern from making the whole
856
+ * Run hang once the first hung file finishes.
857
+ *
858
+ * Hosts that need bulletproof regex safety should install `rg` —
859
+ * ripgrep uses RE2 internally and has no backtracking.
860
+ */
861
+ const MAX_FALLBACK_PATTERN_LENGTH = 1024;
862
+ const FALLBACK_GREP_BUDGET_MS = 5000;
863
+ // Per-file byte cap. Codex P2 #41 — without it, the whole-file
864
+ // `readFile` + `split('\n')` for a multi-GB log is an unbounded
865
+ // allocation that the wall-clock budget (checked between files)
866
+ // can't interrupt. Hosts that need to grep large files should
867
+ // install ripgrep.
868
+ const FALLBACK_GREP_MAX_FILE_BYTES = 5 * 1024 * 1024;
869
+
870
+ /**
871
+ * Heuristic: walks `pattern` to find any `(<contents>)<quant>` where
872
+ * `<contents>` itself has an unbounded quantifier. Catches the
873
+ * classic `(a+)+` form AND the double-nested `((a+)+)` form (which a
874
+ * single-pass regex misses because `[^)]*` stops at the first inner
875
+ * close-paren). Misses sufficiently obfuscated cases — bulletproof
876
+ * ReDoS detection requires a real parser. The 5 s wall-clock budget
877
+ * is the hard backstop for anything this slip past.
878
+ */
879
+ function hasNestedUnboundedQuantifier(pattern: string): boolean {
880
+ for (let i = 1; i < pattern.length - 1; i++) {
881
+ if (pattern[i] !== ')') continue;
882
+ if (pattern[i - 1] === '\\') continue;
883
+ const next = pattern[i + 1];
884
+ if (next !== '+' && next !== '*' && next !== '{') continue;
885
+ // Walk back to find the matching opening paren (respecting depth
886
+ // and `\(` escapes).
887
+ let depth = 1;
888
+ let j = i - 1;
889
+ while (j >= 0) {
890
+ const c = pattern[j];
891
+ const escaped = j > 0 && pattern[j - 1] === '\\';
892
+ if (!escaped) {
893
+ if (c === ')') depth++;
894
+ else if (c === '(') {
895
+ depth--;
896
+ if (depth === 0) break;
897
+ }
898
+ }
899
+ j--;
900
+ }
901
+ if (j < 0) continue;
902
+ const inner = pattern.slice(j + 1, i);
903
+ if (/(?<!\\)[+*]/.test(inner)) return true;
904
+ }
905
+ return false;
906
+ }
907
+
908
+ class FallbackGrepError extends Error {
909
+ readonly kind: 'pattern-too-long' | 'unsafe-pattern' | 'invalid-pattern';
910
+ constructor(
911
+ kind: 'pattern-too-long' | 'unsafe-pattern' | 'invalid-pattern',
912
+ message: string
913
+ ) {
914
+ super(message);
915
+ this.kind = kind;
916
+ }
917
+ }
918
+
919
+ function compileFallbackRegex(pattern: string): RegExp {
920
+ if (pattern.length > MAX_FALLBACK_PATTERN_LENGTH) {
921
+ throw new FallbackGrepError(
922
+ 'pattern-too-long',
923
+ `Pattern exceeds ${MAX_FALLBACK_PATTERN_LENGTH}-char fallback cap (install ripgrep for unbounded patterns).`
924
+ );
925
+ }
926
+ if (hasNestedUnboundedQuantifier(pattern)) {
927
+ throw new FallbackGrepError(
928
+ 'unsafe-pattern',
929
+ 'Pattern contains a nested unbounded quantifier (e.g. `(a+)+` or `((a+)+)`) which can cause catastrophic backtracking in the Node fallback. Install ripgrep for RE2-safe matching.'
930
+ );
931
+ }
932
+ try {
933
+ return new RegExp(pattern);
934
+ } catch (e) {
935
+ throw new FallbackGrepError(
936
+ 'invalid-pattern',
937
+ `Invalid regex: ${(e as Error).message}`
938
+ );
939
+ }
940
+ }
941
+
942
+ /** Structured return so callers can count matches separately from
943
+ * diagnostic skip-sentinels (Codex P2 [43]). */
944
+ type FallbackGrepResult = { matches: string[]; skipped: string[] };
945
+
946
+ async function fallbackGrep(
947
+ root: string,
948
+ pattern: string,
949
+ globFilter: string | undefined,
950
+ maxResults: number,
951
+ fs: import('./workspaceFS').WorkspaceFS
952
+ ): Promise<FallbackGrepResult> {
953
+ const rx = compileFallbackRegex(pattern);
954
+ const deadline = Date.now() + FALLBACK_GREP_BUDGET_MS;
955
+ const globRx =
956
+ globFilter != null && globFilter !== '' ? globToRegExp(globFilter) : undefined;
957
+ const matches: string[] = [];
958
+ // Track skipped (oversize) files separately so they don't consume
959
+ // the maxResults budget. Codex P2 [43]: round 14's fix pushed skip
960
+ // sentinels into `matches`, so a directory of one oversize non-
961
+ // matching file falsely reported `matches: 1`, and enough
962
+ // oversize files could fill the budget before any real match was
963
+ // scanned. Now diagnostics are appended after real matches and
964
+ // independent of the budget.
965
+ const skippedDiagnostics: string[] = [];
966
+ for await (const file of walkFiles(root, fs)) {
967
+ if (Date.now() > deadline) {
968
+ // Wall-clock budget exceeded — return partial results rather
969
+ // than letting a slow pattern hang the Run.
970
+ return { matches, skipped: skippedDiagnostics };
971
+ }
972
+ if (globRx != null) {
973
+ const rel = file.startsWith(root + '/') ? file.slice(root.length + 1) : file;
974
+ if (!globRx.test(rel)) {
975
+ continue;
976
+ }
977
+ }
978
+ // Skip files larger than the per-file cap and remember them as
979
+ // diagnostics (NOT as matches). Codex P2 [41]: pre-fix
980
+ // `fs.readFile` then `.split('\n')` allocated the whole file +
981
+ // an array of every line, which a single multi-GB log could
982
+ // turn into an OOM even after the regex DoS guards.
983
+ let stat;
984
+ try {
985
+ stat = await fs.stat(file);
986
+ } catch {
987
+ continue;
988
+ }
989
+ if (stat.size > FALLBACK_GREP_MAX_FILE_BYTES) {
990
+ skippedDiagnostics.push(
991
+ `${file}:0:[skipped: file > ${FALLBACK_GREP_MAX_FILE_BYTES} bytes; install ripgrep for unbounded grep]`
992
+ );
993
+ continue;
994
+ }
995
+ let content;
996
+ try {
997
+ content = await fs.readFile(file, 'utf8');
998
+ } catch {
999
+ continue;
1000
+ }
1001
+ if (content.includes('\0')) {
1002
+ continue;
1003
+ }
1004
+ // Re-check the deadline AFTER the read — a slow disk on one
1005
+ // file can blow the budget without us noticing.
1006
+ if (Date.now() > deadline) {
1007
+ return { matches, skipped: skippedDiagnostics };
1008
+ }
1009
+ const lines = content.split('\n');
1010
+ for (let i = 0; i < lines.length; i++) {
1011
+ if (rx.test(lines[i])) {
1012
+ matches.push(`${file}:${i + 1}:${lines[i]}`);
1013
+ if (matches.length >= maxResults) {
1014
+ return { matches, skipped: skippedDiagnostics };
1015
+ }
1016
+ }
1017
+ }
1018
+ }
1019
+ return { matches, skipped: skippedDiagnostics };
1020
+ }
1021
+
1022
+ async function fallbackGlob(
1023
+ root: string,
1024
+ pattern: string,
1025
+ maxResults: number,
1026
+ fs: import('./workspaceFS').WorkspaceFS
1027
+ ): Promise<string[]> {
1028
+ const rx = globToRegExp(pattern);
1029
+ const out: string[] = [];
1030
+ for await (const file of walkFiles(root, fs)) {
1031
+ const rel = file.startsWith(root + '/') ? file.slice(root.length + 1) : file;
1032
+ if (rx.test(rel)) {
1033
+ out.push(file);
1034
+ if (out.length >= maxResults) {
1035
+ break;
1036
+ }
1037
+ }
1038
+ }
1039
+ return out;
1040
+ }
1041
+
1042
+ export function createLocalGrepSearchTool(
1043
+ config: t.LocalExecutionConfig = {}
1044
+ ): DynamicStructuredTool {
1045
+ const fs = getWorkspaceFS(config);
1046
+ return tool(
1047
+ async (rawInput) => {
1048
+ const input = rawInput as {
1049
+ pattern: string;
1050
+ path?: string;
1051
+ glob?: string;
1052
+ max_results?: number;
1053
+ };
1054
+ const target = await resolveWorkspacePathSafe(input.path ?? '.', config, 'read');
1055
+ const maxResults = Math.max(input.max_results ?? DEFAULT_MAX_RESULTS, 1);
1056
+
1057
+ if (await isRipgrepAvailable(config)) {
1058
+ // Pass the pattern through `-e` so dash-prefixed patterns
1059
+ // like `-foo` are treated as the search regex, not as a
1060
+ // (probably-unknown) flag. `rg --help` explicitly requires
1061
+ // `-e/--regexp` (or `--`) for that case. Same trick avoids
1062
+ // any future flag-conflict if a user query happens to look
1063
+ // like an rg long option.
1064
+ const args = [
1065
+ '--line-number',
1066
+ '--column',
1067
+ '--hidden',
1068
+ '--glob',
1069
+ '!.git/**',
1070
+ ...(input.glob != null && input.glob !== '' ? ['--glob', input.glob] : []),
1071
+ '-e',
1072
+ input.pattern,
1073
+ target,
1074
+ ];
1075
+ const result = await spawnLocalProcess('rg', args, {
1076
+ ...config,
1077
+ timeoutMs: config.timeoutMs ?? 30000,
1078
+ });
1079
+ // ripgrep exit codes:
1080
+ // 0 → at least one match
1081
+ // 1 → no matches (clean — "No matches found.")
1082
+ // 2 → real error (bad regex, unreadable target, etc.)
1083
+ // Without this branch (Codex P2 #23 — same fix shape glob_search
1084
+ // got from P2 #13), exit-2 errors silently mapped to
1085
+ // `matches: 0`, so the agent treated tooling failures as a
1086
+ // genuine absence of matches.
1087
+ if (result.timedOut || (result.exitCode != null && result.exitCode > 1)) {
1088
+ const detail = result.stderr.trim() || `rg exited ${result.exitCode}`;
1089
+ return [
1090
+ `grep_search failed: ${detail}`,
1091
+ {
1092
+ matches: 0,
1093
+ engine: 'ripgrep',
1094
+ error: detail,
1095
+ exitCode: result.exitCode,
1096
+ },
1097
+ ];
1098
+ }
1099
+ const lines = result.stdout.split('\n').filter(Boolean).slice(0, maxResults);
1100
+ const output =
1101
+ lines.length > 0
1102
+ ? lines.join('\n')
1103
+ : result.stderr.trim() || 'No matches found.';
1104
+ return [output, { matches: lines.length, engine: 'ripgrep' }];
1105
+ }
1106
+
1107
+ try {
1108
+ const { matches, skipped } = await fallbackGrep(
1109
+ target,
1110
+ input.pattern,
1111
+ input.glob,
1112
+ maxResults,
1113
+ fs
1114
+ );
1115
+ // Display: real matches first, skip diagnostics appended.
1116
+ // Artifact count: ONLY real matches (Codex P2 [43] —
1117
+ // skip sentinels used to inflate the count and the budget).
1118
+ const display =
1119
+ matches.length > 0
1120
+ ? [...matches, ...skipped].join('\n')
1121
+ : skipped.length > 0
1122
+ ? skipped.join('\n')
1123
+ : 'No matches found.';
1124
+ return [
1125
+ display,
1126
+ {
1127
+ matches: matches.length,
1128
+ skipped: skipped.length,
1129
+ engine: 'node-fallback',
1130
+ },
1131
+ ];
1132
+ } catch (e) {
1133
+ if (e instanceof FallbackGrepError) {
1134
+ return [
1135
+ `grep_search refused the pattern: ${e.message}`,
1136
+ {
1137
+ matches: 0,
1138
+ engine: 'node-fallback',
1139
+ error: e.message,
1140
+ kind: e.kind,
1141
+ },
1142
+ ];
1143
+ }
1144
+ throw e;
1145
+ }
1146
+ },
1147
+ {
1148
+ name: LocalGrepSearchToolName,
1149
+ description:
1150
+ 'Search local files for a regex pattern (ripgrep when available, Node fallback otherwise).',
1151
+ schema: LocalGrepSearchToolSchema,
1152
+ responseFormat: Constants.CONTENT_AND_ARTIFACT,
1153
+ }
1154
+ );
1155
+ }
1156
+
1157
+ export function createLocalGlobSearchTool(
1158
+ config: t.LocalExecutionConfig = {}
1159
+ ): DynamicStructuredTool {
1160
+ const fs = getWorkspaceFS(config);
1161
+ return tool(
1162
+ async (rawInput) => {
1163
+ const input = rawInput as {
1164
+ pattern: string;
1165
+ path?: string;
1166
+ max_results?: number;
1167
+ };
1168
+ const target = await resolveWorkspacePathSafe(input.path ?? '.', config, 'read');
1169
+ const maxResults = Math.max(input.max_results ?? DEFAULT_MAX_RESULTS, 1);
1170
+
1171
+ if (await isRipgrepAvailable(config)) {
1172
+ const result = await spawnLocalProcess(
1173
+ 'rg',
1174
+ ['--files', '--hidden', '--glob', '!.git/**', '--glob', input.pattern, target],
1175
+ { ...config, timeoutMs: config.timeoutMs ?? 30000 }
1176
+ );
1177
+ // rg --files exit codes:
1178
+ // 0 → at least one file matched
1179
+ // 1 → no files matched (clean — "No files found.")
1180
+ // 2 → real error (bad glob, unreadable target, etc.)
1181
+ // Without this branch, exit-2 errors used to silently map to
1182
+ // "No files found." — the agent then treats a tooling failure
1183
+ // as a real absence of matches.
1184
+ if (result.timedOut || (result.exitCode != null && result.exitCode > 1)) {
1185
+ const detail = result.stderr.trim() || `rg exited ${result.exitCode}`;
1186
+ return [
1187
+ `glob_search failed: ${detail}`,
1188
+ {
1189
+ files: [],
1190
+ engine: 'ripgrep',
1191
+ error: detail,
1192
+ exitCode: result.exitCode,
1193
+ },
1194
+ ];
1195
+ }
1196
+ const lines = result.stdout
1197
+ .split('\n')
1198
+ .filter(Boolean)
1199
+ .slice(0, maxResults);
1200
+ return [
1201
+ lines.length > 0 ? lines.join('\n') : 'No files found.',
1202
+ { files: lines, engine: 'ripgrep' },
1203
+ ];
1204
+ }
1205
+
1206
+ const files = await fallbackGlob(target, input.pattern, maxResults, fs);
1207
+ return [
1208
+ files.length > 0 ? files.join('\n') : 'No files found.',
1209
+ { files, engine: 'node-fallback' },
1210
+ ];
1211
+ },
1212
+ {
1213
+ name: LocalGlobSearchToolName,
1214
+ description:
1215
+ 'Find local files matching a glob pattern (ripgrep when available, Node fallback otherwise).',
1216
+ schema: LocalGlobSearchToolSchema,
1217
+ responseFormat: Constants.CONTENT_AND_ARTIFACT,
1218
+ }
1219
+ );
1220
+ }
1221
+
1222
+ export function createLocalListDirectoryTool(
1223
+ config: t.LocalExecutionConfig = {}
1224
+ ): DynamicStructuredTool {
1225
+ const fs = getWorkspaceFS(config);
1226
+ return tool(
1227
+ async (rawInput) => {
1228
+ const input = rawInput as { path?: string };
1229
+ const path = await resolveWorkspacePathSafe(input.path ?? '.', config, 'read');
1230
+ const entries = await fs.readdir(path, { withFileTypes: true });
1231
+ const output = entries
1232
+ .map((entry) => `${entry.isDirectory() ? 'dir ' : 'file'}\t${entry.name}`)
1233
+ .join('\n');
1234
+ return [output || 'Directory is empty.', { path, count: entries.length }];
1235
+ },
1236
+ {
1237
+ name: LocalListDirectoryToolName,
1238
+ description: 'List files and directories in a local directory.',
1239
+ schema: LocalListDirectoryToolSchema,
1240
+ responseFormat: Constants.CONTENT_AND_ARTIFACT,
1241
+ }
1242
+ );
1243
+ }
1244
+
1245
+ export type LocalCodingToolBundle = {
1246
+ tools: DynamicStructuredTool[];
1247
+ /**
1248
+ * Present when `config.fileCheckpointing === true` or a `checkpointer`
1249
+ * was passed in. Callers can call `rewind()` to restore captured
1250
+ * pre-write contents.
1251
+ */
1252
+ checkpointer?: t.LocalFileCheckpointer;
1253
+ };
1254
+
1255
+ export function createLocalCodingTools(
1256
+ config: t.LocalExecutionConfig = {},
1257
+ options: { checkpointer?: t.LocalFileCheckpointer } = {}
1258
+ ): DynamicStructuredTool[] {
1259
+ const checkpointer =
1260
+ options.checkpointer ??
1261
+ (config.fileCheckpointing === true
1262
+ ? createLocalFileCheckpointer({ fs: config.exec?.fs })
1263
+ : undefined);
1264
+ return [
1265
+ createLocalReadFileTool(config),
1266
+ createLocalWriteFileTool(config, checkpointer),
1267
+ createLocalEditFileTool(config, checkpointer),
1268
+ createLocalGrepSearchTool(config),
1269
+ createLocalGlobSearchTool(config),
1270
+ createLocalListDirectoryTool(config),
1271
+ createCompileCheckTool(config),
1272
+ createLocalBashExecutionTool({ config }),
1273
+ createLocalCodeExecutionTool(config),
1274
+ createLocalProgrammaticToolCallingTool(config),
1275
+ createLocalBashProgrammaticToolCallingTool(config),
1276
+ ];
1277
+ }
1278
+
1279
+ /**
1280
+ * Variant of `createLocalCodingTools` that returns the bundle alongside
1281
+ * the file checkpointer so callers can later call
1282
+ * `bundle.checkpointer?.rewind()`.
1283
+ */
1284
+ export function createLocalCodingToolBundle(
1285
+ config: t.LocalExecutionConfig = {},
1286
+ options: { checkpointer?: t.LocalFileCheckpointer } = {}
1287
+ ): LocalCodingToolBundle {
1288
+ const checkpointer =
1289
+ options.checkpointer ??
1290
+ (config.fileCheckpointing === true
1291
+ ? createLocalFileCheckpointer({ fs: config.exec?.fs })
1292
+ : undefined);
1293
+ return {
1294
+ tools: createLocalCodingTools(config, { checkpointer }),
1295
+ checkpointer,
1296
+ };
1297
+ }
1298
+
1299
+ export function createLocalCodingToolDefinitions(): t.LCTool[] {
1300
+ return [
1301
+ toolDefinition(
1302
+ Constants.READ_FILE,
1303
+ 'Read a local text file from the configured working directory with line numbers.',
1304
+ LocalReadFileToolSchema as t.JsonSchemaType
1305
+ ),
1306
+ toolDefinition(
1307
+ LocalWriteFileToolName,
1308
+ 'Create or overwrite a local text file in the configured working directory.',
1309
+ LocalWriteFileToolSchema as t.JsonSchemaType
1310
+ ),
1311
+ toolDefinition(
1312
+ LocalEditFileToolName,
1313
+ 'Apply exact text replacements to a local file.',
1314
+ LocalEditFileToolSchema as t.JsonSchemaType
1315
+ ),
1316
+ toolDefinition(
1317
+ LocalGrepSearchToolName,
1318
+ 'Search local files with ripgrep and return matching lines.',
1319
+ LocalGrepSearchToolSchema as t.JsonSchemaType
1320
+ ),
1321
+ toolDefinition(
1322
+ LocalGlobSearchToolName,
1323
+ 'Find local files matching a glob pattern.',
1324
+ LocalGlobSearchToolSchema as t.JsonSchemaType
1325
+ ),
1326
+ toolDefinition(
1327
+ LocalListDirectoryToolName,
1328
+ 'List files and directories in a local directory.',
1329
+ LocalListDirectoryToolSchema as t.JsonSchemaType
1330
+ ),
1331
+ createCompileCheckToolDefinition(),
1332
+ ];
1333
+ }
1334
+
1335
+ export function createLocalCodingToolRegistry(): t.LCToolRegistry {
1336
+ return new Map(
1337
+ createLocalCodingToolDefinitions().map((definition) => [
1338
+ definition.name,
1339
+ definition,
1340
+ ])
1341
+ );
1342
+ }