@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
@@ -0,0 +1,2862 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Runtime tool registry — one registration per tool.
5
+ // ---------------------------------------------------------------------------
6
+ //
7
+ // This is the single place that owns, per tool, EVERYTHING needed to recover,
8
+ // gate, and run a call — for BOTH transport paths:
9
+ //
10
+ // * parseXml(text) — XML/tag path (the ~25 regexes once inlined in
11
+ // extractToolCalls now live next to their tool).
12
+ // * fromParams(params) — native function-calling path (was mapInvokeToCall).
13
+ // * permission(ctx, args) — the gate descriptor (was the describePermission
14
+ // switch). Returns null for ungated read-only ops.
15
+ // * execute(ctx, args, opts) — the operation (was the agentExecFile branch).
16
+ //
17
+ // Both transports resolve to the SAME entry and produce the SAME [action, ...args]
18
+ // tuple, and dispatch (agentExecFile / describePermission in lib/tools.js) is a
19
+ // registry lookup keyed on the tool's canonical action.
20
+ //
21
+ // `ctx` is a dependency bag built once by createToolExecutor (lib/tools.js) and
22
+ // passed in at call time. It carries the factory-scoped collaborators (colors,
23
+ // permissionManager, getConfig) and the tools.js-internal helpers (isPathSafe,
24
+ // the sandbox/secret guards, _log, …). Passing them in — rather than requiring
25
+ // lib/tools.js here — is what keeps this module free of the tools.js ↔ registry
26
+ // require cycle. Executor/permission bodies were moved VERBATIM from agentExecFile
27
+ // / describePermission; the `const { … } = ctx` preamble re-binds the same names
28
+ // so the bodies below are unchanged.
29
+ //
30
+ // Adding a tool is now ONE registration object here + its TOOL_SPECS schema + its
31
+ // TAG_REGISTRY classification. The first two are asserted in lockstep by the
32
+ // load-time parity check in lib/constants.js (which also requires execute +
33
+ // permission on every non-wrapper entry).
34
+ //
35
+ // IMPORTANT — parse ORDER: extractToolCalls runs entries in array order; the
36
+ // per-format ordering is pinned by test/extract-tool-calls.test.js.
37
+
38
+ const fs = require('fs');
39
+ const fsp = require('fs/promises');
40
+ const path = require('path');
41
+ const os = require('os');
42
+ const http = require('http');
43
+ const https = require('https');
44
+ const { spawnSync } = require('child_process');
45
+ const { extractContent, capToTokens, defaultEstimate, markupEstimate, MARKUP_CHARS_PER_TOKEN, classifyContentType } = require('./web-extract');
46
+ const { summarizeWebContent } = require('./web-summarize');
47
+
48
+ // Resolve the User-Agent for the fetch tools (Task W.3 Part 2). A fixed,
49
+ // realistic browser UA defeats SIMPLE UA-based bot-blocking (sites that 403/406
50
+ // an empty/curl-like UA). Operator-overridable via config.web.user_agent;
51
+ // deliberately NOT model-selectable — the agent does not control how the tool
52
+ // presents itself to the outside, so there is no UA parameter in the tool spec.
53
+ // Reads the already-normalized config (getConfig() returns web.user_agent set to
54
+ // the override or the default) but falls back defensively to DEFAULT_USER_AGENT
55
+ // for any partially-built config. The constant is required LAZILY because
56
+ // constants.js requires this module at load time (circular dep) — a top-level
57
+ // destructure would capture `undefined`; by call time constants is fully loaded.
58
+ function _resolveUserAgent(cfg) {
59
+ const web = cfg && cfg.web && typeof cfg.web === 'object' ? cfg.web : {};
60
+ const ua = typeof web.user_agent === 'string' ? web.user_agent.trim() : '';
61
+ if (ua) return ua;
62
+ return require('./constants').DEFAULT_USER_AGENT;
63
+ }
64
+
65
+ // http_get per-call options (Task W.1 / W.1b). The agent may override the global
66
+ // web-fetch behavior for a single fetch via a three-level `mode` enum:
67
+ // mode="summarized" (default) → extract → Markdown → secondary-LLM summary.
68
+ // mode="extracted" → extract → Markdown, NO summary (exact snippets).
69
+ // mode="raw" → bypass extraction entirely; return the ORIGINAL
70
+ // fetched HTML/content (token-capped, fenced) — for analyzing a page's
71
+ // markup/CSS/JS/structure, the one task extraction destroys.
72
+ // intent="…" → the reason for fetching, focusing the summary.
73
+ // Deprecated boolean aliases (kept for back-compat): summarize="false" and
74
+ // raw="true" both map to `extracted`. Precedence: an explicit `mode` always
75
+ // beats the legacy booleans; with neither, the global config default applies.
76
+ const WEB_FETCH_MODES = ['summarized', 'extracted', 'raw'];
77
+
78
+ function _httpGetBool(v) {
79
+ if (v == null) return undefined;
80
+ const s = String(v).trim().toLowerCase();
81
+ if (s === 'true' || s === '1' || s === 'yes' || s === 'on') return true;
82
+ if (s === 'false' || s === '0' || s === 'no' || s === 'off') return false;
83
+ return undefined;
84
+ }
85
+
86
+ // Normalize a `mode` value to one of WEB_FETCH_MODES, or undefined if unknown.
87
+ function _httpGetMode(v) {
88
+ if (v == null) return undefined;
89
+ const s = String(v).trim().toLowerCase();
90
+ return WEB_FETCH_MODES.includes(s) ? s : undefined;
91
+ }
92
+
93
+ // Map a legacy boolean pair to a mode (explicit `mode` is resolved by the caller
94
+ // first and takes precedence). summarize=false / raw=true → extracted.
95
+ function _legacyBoolsToMode(summarize, raw) {
96
+ if (summarize !== undefined) return summarize ? 'summarized' : 'extracted';
97
+ if (raw !== undefined) return raw ? 'extracted' : 'summarized';
98
+ return undefined;
99
+ }
100
+
101
+ // Validate + normalize a URL for the fetch tools (http_get / download).
102
+ //
103
+ // `new URL(...)` — and `http.get`/`https.get`'s own internal parse — throws
104
+ // SYNCHRONOUSLY for a malformed URL, before any request starts. That throw
105
+ // happens OUTSIDE the request-level `.on('error')` handlers (which only catch
106
+ // async network failures: EHOSTUNREACH, DNS, timeout, …), so a bad URL would
107
+ // escape the executor as an uncaught exception and crash the whole session
108
+ // instead of becoming a recoverable tool error. The model routinely produces
109
+ // malformed/guessed URLs (invented domains, non-ASCII hosts, stray chars), so
110
+ // every fetch must validate up front and turn ANY bad input into a clean tool
111
+ // error in the SAME `{ error, error_code }` shape the network-failure path
112
+ // returns — so the agent handles it identically to EHOSTUNREACH/timeout.
113
+ //
114
+ // Returns `{ url }` (the normalized href) on success, or `{ error, error_code }`
115
+ // on failure. Only http/https schemes are allowed; everything else (file:, ftp:,
116
+ // javascript:, data:, …) is refused (these parse cleanly but must never be
117
+ // fetched). `base` (optional) resolves a relative URL — used for redirect
118
+ // `Location` headers, which are often relative.
119
+ function _validateFetchUrl(raw, base) {
120
+ if (typeof raw !== 'string') {
121
+ return {
122
+ error: `Invalid URL: expected a string, got ${raw === null ? 'null' : typeof raw}`,
123
+ error_code: 'ERR_INVALID_URL',
124
+ };
125
+ }
126
+ const trimmed = raw.trim();
127
+ if (!trimmed) {
128
+ return { error: 'Invalid URL: empty URL', error_code: 'ERR_INVALID_URL' };
129
+ }
130
+ let parsed;
131
+ try {
132
+ parsed = base ? new URL(trimmed, base) : new URL(trimmed);
133
+ } catch (err) {
134
+ return { error: `Invalid URL: ${err.message}`, error_code: 'ERR_INVALID_URL' };
135
+ }
136
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
137
+ return {
138
+ error: `Invalid URL: unsupported protocol "${parsed.protocol}" (only http and https are allowed)`,
139
+ error_code: 'ERR_INVALID_PROTOCOL',
140
+ };
141
+ }
142
+ return { url: parsed.href };
143
+ }
144
+
145
+ function _httpGetOpts(attrStr) {
146
+ const s = String(attrStr || '');
147
+ const get = (name) => {
148
+ const m = s.match(new RegExp(`${name}="([^"]*)"`)) || s.match(new RegExp(`${name}='([^']*)'`));
149
+ return m ? m[1] : undefined;
150
+ };
151
+ const opts = {};
152
+ const mode = _httpGetMode(get('mode'));
153
+ if (mode) opts.mode = mode;
154
+ else {
155
+ const legacy = _legacyBoolsToMode(_httpGetBool(get('summarize')), _httpGetBool(get('raw')));
156
+ if (legacy) opts.mode = legacy;
157
+ }
158
+ const intent = get('intent');
159
+ if (intent != null && intent !== '') opts.intent = intent;
160
+ return opts;
161
+ }
162
+
163
+ function _httpGetOptsFromParams(p) {
164
+ const opts = {};
165
+ const mode = _httpGetMode(p.mode);
166
+ if (mode) opts.mode = mode;
167
+ else {
168
+ const summarize = typeof p.summarize === 'boolean' ? p.summarize : undefined;
169
+ const raw = typeof p.raw === 'boolean' ? p.raw : undefined;
170
+ const legacy = _legacyBoolsToMode(summarize, raw);
171
+ if (legacy) opts.mode = legacy;
172
+ }
173
+ if (typeof p.intent === 'string' && p.intent.trim()) opts.intent = p.intent.trim();
174
+ return opts;
175
+ }
176
+
177
+ // web_search per-call options (Task W.2b). Only `count` today — bounded so a
178
+ // huge value never leaves the client; the backend clamps further. Returns
179
+ // `undefined` for a missing/invalid/zero count so the backend default applies.
180
+ const _WEB_SEARCH_MAX_COUNT = 10;
181
+ function _clampSearchCount(v) {
182
+ if (v == null || v === '') return undefined;
183
+ const n = parseInt(v, 10);
184
+ if (!Number.isFinite(n) || n <= 0) return undefined;
185
+ return Math.min(n, _WEB_SEARCH_MAX_COUNT);
186
+ }
187
+
188
+ function _webSearchOpts(attrStr) {
189
+ const s = String(attrStr || '');
190
+ const m = s.match(/count="([^"]*)"/) || s.match(/count='([^']*)'/);
191
+ const count = _clampSearchCount(m ? m[1] : undefined);
192
+ return count ? { count } : {};
193
+ }
194
+
195
+ function _webSearchOptsFromParams(p) {
196
+ const count = _clampSearchCount(p && p.count);
197
+ return count ? { count } : {};
198
+ }
199
+
200
+ // The web-fetch pipeline (Task W.1 / W.1b), shared by http_get's execute. Turns
201
+ // a fetched body into the content that enters the main context. The `mode` enum
202
+ // selects the depth of processing:
203
+ // raw → bypass extraction ENTIRELY; return the ORIGINAL fetched content
204
+ // (token-capped). For analyzing a page's HTML/CSS/JS/structure —
205
+ // the one task extraction destroys (Task W.1b).
206
+ // extracted → extract main content → Markdown (HTML only; json/text/markdown
207
+ // pass through untouched so they are never mangled), token-cap it,
208
+ // NO secondary summary.
209
+ // summarized → as `extracted`, then summarize via a secondary cheap LLM call —
210
+ // only the summary enters context; the extracted full text never
211
+ // does.
212
+ // Context protection (token-cap via web.max_content_tokens) applies in EVERY
213
+ // mode, including raw (raw HTML is token-heavier, so it matters more, not less).
214
+ // Containment: a summarizer failure falls back to the capped extracted Markdown,
215
+ // NEVER the raw page. Network-free here (the LLM call is the injected webChat).
216
+ async function processWebContent({
217
+ rawBody, contentType, url, statusCode, totalBytes, transferCapped,
218
+ mode, intent, summaryModel, maxContentTokens, webChat, signal,
219
+ }) {
220
+ // RAW mode (Task W.1b): the original content is returned with NO extraction —
221
+ // no Readability, no Turndown, no summary. Context protection still holds: cap
222
+ // to the token budget with the standard truncation notice. The untrusted fence
223
+ // is applied by the caller (lib/agent.js) for raw exactly as for every mode.
224
+ if (mode === 'raw') {
225
+ const kind = classifyContentType(contentType, url, rawBody);
226
+ // Raw HTML/markup tokenizes denser than prose, so char/4 over-admits markup
227
+ // (Task W.4 Part 2). Use the markup-aware estimate + matching char budget for
228
+ // markup; JSON/text raw bodies stay on the prose estimate (unchanged).
229
+ const isMarkup = kind === 'html';
230
+ const capped = isMarkup
231
+ ? capToTokens(rawBody, maxContentTokens, markupEstimate, MARKUP_CHARS_PER_TOKEN)
232
+ : capToTokens(rawBody, maxContentTokens, defaultEstimate);
233
+ return {
234
+ status_code: statusCode,
235
+ bytes: totalBytes,
236
+ kind,
237
+ mode: 'raw',
238
+ extracted: false,
239
+ summarized: false,
240
+ content_tokens: capped.tokens,
241
+ content_truncated: capped.truncated,
242
+ transfer_capped: !!transferCapped,
243
+ body: capped.text,
244
+ };
245
+ }
246
+
247
+ const { kind, markdown, title, extracted } = extractContent({ body: rawBody, contentType, url });
248
+ const capped = capToTokens(markdown, maxContentTokens, defaultEstimate);
249
+ const base = {
250
+ status_code: statusCode,
251
+ bytes: totalBytes,
252
+ kind,
253
+ mode,
254
+ title: title || undefined,
255
+ extracted,
256
+ content_tokens: capped.tokens,
257
+ content_truncated: capped.truncated,
258
+ transfer_capped: !!transferCapped,
259
+ };
260
+ // Summarize ONLY HTML — JSON/plain text/Markdown pass through verbatim so
261
+ // structured data is never smoothed over. Requires mode==='summarized'
262
+ // AND an available LLM call. Otherwise return the capped extracted Markdown.
263
+ const summarizable = kind === 'html' && capped.text.trim().length > 0;
264
+ if (mode === 'summarized' && summarizable && typeof webChat === 'function') {
265
+ try {
266
+ const summary = await summarizeWebContent({
267
+ markdown: capped.text, intent, chat: webChat, model: summaryModel, signal,
268
+ });
269
+ return { ...base, body: summary, summarized: true };
270
+ } catch (err) {
271
+ // Summary errored/timed out → degrade to the capped extracted Markdown,
272
+ // never the raw HTML.
273
+ return { ...base, body: capped.text, summarized: false, summary_error: err.message };
274
+ }
275
+ }
276
+ return { ...base, body: capped.text, summarized: false };
277
+ }
278
+
279
+ // ── XML parse helpers (moved from lib/tools.js) ────────────────────────────
280
+
281
+ function _matchDual(text, template) {
282
+ const results = [];
283
+ for (const q of ['"', "'"]) {
284
+ const re = new RegExp(template.replace(/Q/g, q), 'g');
285
+ for (const m of text.matchAll(re)) results.push(m);
286
+ }
287
+ return results;
288
+ }
289
+
290
+ // 1-based starting line numbers of every literal occurrence of `needle` in
291
+ // `data`. Used by replace_in_file's uniqueness guard to tell the model WHERE the
292
+ // ambiguous matches are so it can add disambiguating context. Capped so a needle
293
+ // that matches hundreds of times doesn't produce a giant error string.
294
+ function _literalOccurrenceLines(data, needle, cap = 10) {
295
+ const lines = [];
296
+ let from = 0;
297
+ let idx;
298
+ while ((idx = data.indexOf(needle, from)) !== -1) {
299
+ lines.push(data.slice(0, idx).split('\n').length);
300
+ from = idx + needle.length;
301
+ if (lines.length >= cap) break;
302
+ }
303
+ return lines;
304
+ }
305
+
306
+ function _unwrapInnerTag(inner) {
307
+ if (inner == null) return inner;
308
+ const trimmed = String(inner).trim();
309
+ const m = trimmed.match(/^<(\w+)(?:\s[^>]*)?>([\s\S]*)<\/\1>$/);
310
+ if (!m) return inner;
311
+ return m[2].trim();
312
+ }
313
+
314
+ // read_file pagination rail (Task W.7). Parses both forms in one pass and
315
+ // resolves the optional start_line/end_line/show_line_numbers attributes onto the
316
+ // tuple ['read', path, startLine|null, endLine|null, showLineNumbers]. Absent
317
+ // range → null (parity with fromParams), so the formatter's defaults apply. Path
318
+ // comes from the `path` attr or the inline body (the historical two forms).
319
+ function _parseReadTag(text) {
320
+ const out = [];
321
+ const re = /<read_file\b([^>]*?)(?:\/>|>([\s\S]*?)<\/read_file>)/g;
322
+ for (const m of text.matchAll(re)) {
323
+ const attrStr = m[1] || '';
324
+ const body = m[2] != null ? m[2] : '';
325
+ const attr = (k) => {
326
+ const mm = attrStr.match(new RegExp(`${k}="([^"]*)"`)) || attrStr.match(new RegExp(`${k}='([^']*)'`));
327
+ return mm ? mm[1] : null;
328
+ };
329
+ const num = (v) => { if (v == null) return null; const n = parseInt(v, 10); return Number.isFinite(n) ? n : null; };
330
+ let p = attr('path');
331
+ if (p == null) { const b = _unwrapInnerTag(body).trim(); p = b || null; }
332
+ if (p == null) continue;
333
+ const sln = attr('show_line_numbers');
334
+ out.push(['read', p, num(attr('start_line')), num(attr('end_line')),
335
+ sln === 'true' || sln === '1' || sln === 'yes']);
336
+ }
337
+ return out;
338
+ }
339
+
340
+ function _inline(text, tagAlternation, action, extraArgs = []) {
341
+ const re = new RegExp(`<(?:${tagAlternation})>([\\s\\S]*?)<\\/(?:${tagAlternation})>`, 'g');
342
+ const out = [];
343
+ for (const m of text.matchAll(re)) out.push([action, _unwrapInnerTag(m[1]).trim(), ...extraArgs]);
344
+ return out;
345
+ }
346
+
347
+ // The full ctx destructure, reused at the top of every execute/permission so the
348
+ // moved bodies see the same free names they had inside the createToolExecutor
349
+ // closure. Unused names in any given body are harmless.
350
+ // const CTX = (ctx) => ... (we inline the destructure literally for clarity)
351
+
352
+ // ── write/append share one body in agentExecFile; keep that sharing here ────
353
+ async function _execWriteAppend(ctx, action, args, options) {
354
+ const signal = (options && options.signal) || null; // eslint-disable-line no-unused-vars
355
+ const [arg0 = null, arg1 = null] = args;
356
+ const { _log, logToolCall, isPathSafe, isProtectedConfigPath, _sandboxError, _protectedConfigWriteError, _dryRun, _skippedOps, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
357
+
358
+ const filePath = arg0;
359
+ const content = arg1;
360
+ const tag = action === 'write' ? 'write_file' : 'append_file';
361
+
362
+ const blocked = permissionManager.readonlyBlock(tag);
363
+ if (blocked) {
364
+ logToolCall(tag, { path: filePath, content }, false, 'denied');
365
+ return blocked;
366
+ }
367
+
368
+ if (isProtectedConfigPath(filePath)) {
369
+ logToolCall(tag, { path: filePath }, false, 'denied');
370
+ return _protectedConfigWriteError(filePath);
371
+ }
372
+
373
+ if (!isPathSafe(filePath)) {
374
+ logToolCall(tag, { path: filePath }, false, 'denied');
375
+ return _sandboxError(filePath);
376
+ }
377
+
378
+ if (_dryRun) {
379
+ const verb = action === 'write' ? 'write' : 'append';
380
+ _skippedOps.push({ category: 'file', symbol: '✎', desc: `${verb} ${filePath}` });
381
+ logToolCall(tag, { path: filePath }, false, 'dry-run');
382
+ return { status: 'dry-run', message: 'dry-run: write skipped', path: filePath };
383
+ }
384
+
385
+ try {
386
+ // Capture prior content so the diff can render at EXECUTION time (the agent
387
+ // loop hands _diffBefore/_diffAfter to onToolEnd). Non-existent file → '' →
388
+ // the diff renders as a new file. Cheap relative to the write itself.
389
+ let before = '';
390
+ try { before = await fsp.readFile(filePath, 'utf8'); } catch {}
391
+ const after = action === 'write' ? (content || '') : (before + (content || ''));
392
+ const dir = path.dirname(filePath);
393
+ if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
394
+ if (action === 'write') await fsp.writeFile(filePath, content || '');
395
+ else await fsp.appendFile(filePath, content || '');
396
+ const verb = action === 'write' ? 'Wrote' : 'Appended to';
397
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}${verb} ${filePath}${RST}`);
398
+ logToolCall(tag, { path: filePath, content }, true, 'ok');
399
+ return { status: 'ok', path: filePath, bytes: (content || '').length, _diffBefore: before, _diffAfter: after };
400
+ } catch (error) {
401
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
402
+ logToolCall(tag, { path: filePath, content }, true, 'error');
403
+ return { error: error.message };
404
+ }
405
+ }
406
+
407
+ async function _permWriteAppend(ctx, action, args) {
408
+ const { _dryRun, renderDiff, writer } = ctx;
409
+ const _uiActive = ctx._uiActive;
410
+ const filePath = args[0];
411
+ const content = args[1];
412
+ const tag = action === 'write' ? 'write_file' : 'append_file';
413
+
414
+ // The full diff is rendered at EXECUTION time (the agent loop's onToolEnd),
415
+ // decoupled from this modal — so an auto-approved write shows its diff just
416
+ // like a manually-approved one, and the diff is shown exactly once. The modal
417
+ // therefore carries only a compact description, NOT the diff. The one path
418
+ // without an execution-time renderer (headless / oneshot, !_uiActive) still
419
+ // surfaces the diff here so a write is never silent there.
420
+ if (!_uiActive && !_dryRun) {
421
+ let existing = '';
422
+ try { existing = await fsp.readFile(filePath, 'utf8'); } catch {}
423
+ const finalContent = action === 'write' ? (content || '') : (existing + (content || ''));
424
+ writer.scrollback(renderDiff(existing, finalContent, filePath));
425
+ }
426
+
427
+ if (_dryRun) return null;
428
+
429
+ let desc = `${action === 'write' ? 'Write' : 'Append to'} ${filePath}`;
430
+ if (content) desc += ` (${content.length} chars)`;
431
+ return { actionType: 'file', description: desc, tag };
432
+ }
433
+
434
+ // ── grep / glob (Task 2.1) ─────────────────────────────────────────────────
435
+ //
436
+ // Canonical search semantics. The pure-Node implementation is the REFERENCE;
437
+ // ripgrep is invoked with a flag set chosen to reproduce it byte-for-byte (the
438
+ // equivalence is pinned by test/grep-glob.test.js):
439
+ // * recurse from baseDir
440
+ // * always skip directories named node_modules or .git
441
+ // * skip hidden entries (names beginning with ".")
442
+ // * honor a .gitignore in baseDir if present (common-subset rules below)
443
+ // * skip binary files (a NUL byte in the first 8 KB)
444
+ // * emit one record per matching LINE: { file, line, text }, sorted by
445
+ // (file, line); file is baseDir-relative POSIX. Output never carries the
446
+ // engine identity, so rg and Node results are deep-equal.
447
+
448
+ const GREP_MAX_MATCHES = 1000;
449
+ const GLOB_MAX_FILES = 5000;
450
+ const GREP_SKIP_DIRS = new Set(['node_modules', '.git']);
451
+ const BINARY_SNIFF_BYTES = 8192;
452
+
453
+ // grep output modes (Task W.5), Claude-Code-style. The model selects one via the
454
+ // `output_mode` parameter; the mode is shaped at serialization time
455
+ // (lib/agent.js formatFileResult) from the same engine result:
456
+ // * content — file:line:text per match (default; "show me the lines")
457
+ // * files_with_matches — unique file paths only ("which files")
458
+ // * count — match counts per file + total ("how many")
459
+ const GREP_OUTPUT_MODES = ['content', 'files_with_matches', 'count'];
460
+ function _normGrepMode(m) {
461
+ return GREP_OUTPUT_MODES.includes(m) ? m : 'content';
462
+ }
463
+ // head_limit / offset normalization (Task W.5). A positive integer bounds /
464
+ // skips results; anything else falls back (limit → default, offset → 0).
465
+ function _normHeadLimit(v, dflt) {
466
+ const n = typeof v === 'number' ? v : parseInt(v, 10);
467
+ return Number.isFinite(n) && n > 0 ? Math.floor(n) : dflt;
468
+ }
469
+ function _normOffset(v) {
470
+ const n = typeof v === 'number' ? v : parseInt(v, 10);
471
+ return Number.isFinite(n) && n > 0 ? Math.floor(n) : 0;
472
+ }
473
+
474
+ // ripgrep detection, performed once and cached. SEMALT_NO_RG forces the Node
475
+ // fallback; SEMALT_RG_BIN points at an alternate binary (both used by tests).
476
+ let _rgChecked = false;
477
+ let _rgBin = null;
478
+ function _detectRipgrep() {
479
+ if (_rgChecked) return _rgBin;
480
+ _rgChecked = true;
481
+ if (process.env.SEMALT_NO_RG) { _rgBin = null; return _rgBin; }
482
+ const bin = process.env.SEMALT_RG_BIN || 'rg';
483
+ try {
484
+ const r = spawnSync(bin, ['--version'], { encoding: 'utf8' });
485
+ if (r && r.status === 0) _rgBin = bin;
486
+ } catch { /* rg not on PATH */ }
487
+ return _rgBin;
488
+ }
489
+
490
+ function _toPosix(p) { return p.split(path.sep).join('/'); }
491
+
492
+ // Glob → anchored RegExp. Mirrors the search_files conversion so the two file
493
+ // matchers agree: * → one path segment, ** → any depth.
494
+ function _globToRegExp(glob) {
495
+ let s = glob.replace(/[.+^${}()|[\]\\]/g, '\\$&');
496
+ s = s.replace(/\*\*/g, '\x00');
497
+ s = s.replace(/\*/g, '[^/]*');
498
+ s = s.replace(/\x00\//g, '(?:.*/)?');
499
+ s = s.replace(/\x00/g, '.*');
500
+ return new RegExp(`^${s}$`);
501
+ }
502
+
503
+ // .gitignore, common subset: blank/comment lines; basename globs (no slash,
504
+ // matched at any depth); anchored path globs (a slash anywhere but trailing);
505
+ // dir-only (trailing slash); negation (!). Only the baseDir .gitignore is read
506
+ // (no nested files). Last matching rule wins.
507
+ function _loadGitignore(baseDir) {
508
+ let txt;
509
+ try { txt = fs.readFileSync(path.join(baseDir, '.gitignore'), 'utf8'); }
510
+ catch { return []; }
511
+ const rules = [];
512
+ for (let line of txt.split('\n')) {
513
+ line = line.replace(/\r$/, '').replace(/^\s+|\s+$/g, '');
514
+ if (!line || line.startsWith('#')) continue;
515
+ let negate = false;
516
+ if (line.startsWith('!')) { negate = true; line = line.slice(1); }
517
+ let dirOnly = false;
518
+ if (line.endsWith('/')) { dirOnly = true; line = line.slice(0, -1); }
519
+ let anchored = false;
520
+ if (line.startsWith('/')) { anchored = true; line = line.slice(1); }
521
+ if (!line) continue;
522
+ rules.push({ negate, dirOnly, anchored: anchored || line.includes('/'), re: _globToRegExp(line) });
523
+ }
524
+ return rules;
525
+ }
526
+
527
+ // rel: baseDir-relative POSIX path of the entry; name: its basename.
528
+ function _gitignored(rules, rel, name, isDir) {
529
+ let ignored = false;
530
+ for (const r of rules) {
531
+ if (r.dirOnly && !isDir) continue;
532
+ if (r.re.test(r.anchored ? rel : name)) ignored = !r.negate;
533
+ }
534
+ return ignored;
535
+ }
536
+
537
+ function _isBinaryBuf(buf) {
538
+ const n = Math.min(buf.length, BINARY_SNIFF_BYTES);
539
+ for (let i = 0; i < n; i++) if (buf[i] === 0) return true;
540
+ return false;
541
+ }
542
+
543
+ // Iterative DFS over baseDir applying the canonical skip rules; calls
544
+ // onFile(rel, name, absPath) for each surviving file. Honors an abort signal
545
+ // between entries. Returns false if aborted, true otherwise.
546
+ function _walkTree(baseDir, { rules = [], signal = null, onFile }) {
547
+ const stack = [{ dir: baseDir, rel: '' }];
548
+ while (stack.length) {
549
+ if (signal && signal.aborted) return false;
550
+ const { dir, rel } = stack.pop();
551
+ let entries;
552
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
553
+ catch { continue; }
554
+ for (const e of entries) {
555
+ const name = e.name;
556
+ if (name.startsWith('.')) continue; // hidden
557
+ const isDir = e.isDirectory();
558
+ const childRel = rel ? `${rel}/${name}` : name;
559
+ if (isDir) {
560
+ if (GREP_SKIP_DIRS.has(name)) continue;
561
+ if (rules.length && _gitignored(rules, childRel, name, true)) continue;
562
+ stack.push({ dir: path.join(dir, name), rel: childRel });
563
+ continue;
564
+ }
565
+ if (!e.isFile()) continue; // symlinks / sockets / etc.
566
+ if (rules.length && _gitignored(rules, childRel, name, false)) continue;
567
+ onFile(childRel, name, path.join(dir, name));
568
+ }
569
+ }
570
+ return true;
571
+ }
572
+
573
+ function _grepNode({ pattern, pathGlob, ignoreCase, baseDir, signal }) {
574
+ let re;
575
+ try { re = new RegExp(pattern, ignoreCase ? 'i' : ''); }
576
+ catch (err) { return { error: `Invalid regex pattern: ${err.message}` }; }
577
+ const rules = _loadGitignore(baseDir);
578
+ const pf = pathGlob ? _globToRegExp(pathGlob) : null;
579
+ const pfBasename = pathGlob && !pathGlob.includes('/');
580
+ const matches = [];
581
+ const ok = _walkTree(baseDir, {
582
+ rules,
583
+ signal,
584
+ onFile: (rel, name, abs) => {
585
+ if (pf && !pf.test(pfBasename ? name : rel)) return;
586
+ let buf;
587
+ try { buf = fs.readFileSync(abs); } catch { return; }
588
+ if (_isBinaryBuf(buf)) return;
589
+ const data = buf.toString('utf8');
590
+ const lines = data.split('\n');
591
+ // A trailing newline terminates the last line; drop the phantom empty
592
+ // element split() produces so line counting matches ripgrep.
593
+ if (data.endsWith('\n')) lines.pop();
594
+ const posix = _toPosix(rel);
595
+ for (let i = 0; i < lines.length; i++) {
596
+ if (re.test(lines[i])) matches.push({ file: posix, line: i + 1, text: lines[i] });
597
+ }
598
+ },
599
+ });
600
+ if (!ok) return { aborted: true };
601
+ return { matches };
602
+ }
603
+
604
+ function _grepRg({ pattern, pathGlob, ignoreCase, baseDir, signal }) {
605
+ if (signal && signal.aborted) return { aborted: true };
606
+ const bin = _detectRipgrep() || process.env.SEMALT_RG_BIN || 'rg';
607
+ // These flags make rg honor the baseDir .gitignore without a git repo while
608
+ // ignoring parent/global/.ignore files, and unconditionally drop node_modules
609
+ // — exactly the canonical Node semantics above. Hidden entries and binary
610
+ // files are skipped by rg's defaults.
611
+ const args = ['--json', '--no-require-git', '--no-ignore-parent', '--no-ignore-global', '--no-ignore-dot', '-g', '!node_modules'];
612
+ if (ignoreCase) args.push('-i');
613
+ if (pathGlob) args.push('-g', pathGlob);
614
+ args.push('--regexp', pattern, '--', '.');
615
+ const r = spawnSync(bin, args, { cwd: baseDir, encoding: 'utf8', maxBuffer: 64 * 1024 * 1024 });
616
+ if (r.error) return { error: r.error.message };
617
+ if (r.status === 2) return { error: (r.stderr || 'ripgrep error').trim() };
618
+ // status 0 = matches, 1 = no matches — both are success.
619
+ const matches = [];
620
+ if (r.stdout) {
621
+ for (const ln of r.stdout.split('\n')) {
622
+ if (!ln) continue;
623
+ let obj;
624
+ try { obj = JSON.parse(ln); } catch { continue; }
625
+ if (obj.type !== 'match') continue;
626
+ const d = obj.data;
627
+ if (!d.lines || typeof d.lines.text !== 'string') continue; // non-UTF8 line
628
+ let file = d.path && d.path.text ? d.path.text : '';
629
+ if (file.startsWith('./')) file = file.slice(2);
630
+ let text = d.lines.text;
631
+ if (text.endsWith('\n')) text = text.slice(0, -1);
632
+ matches.push({ file: _toPosix(file), line: d.line_number, text });
633
+ }
634
+ }
635
+ return { matches };
636
+ }
637
+
638
+ function _finalizeGrep(raw, pattern) {
639
+ if (!raw || raw.error || raw.aborted) return raw || { error: 'grep failed' };
640
+ const matches = raw.matches.slice().sort((a, b) =>
641
+ (a.file < b.file ? -1 : a.file > b.file ? 1 : a.line - b.line));
642
+ let truncated = false;
643
+ if (matches.length > GREP_MAX_MATCHES) { matches.length = GREP_MAX_MATCHES; truncated = true; }
644
+ const out = { matches, pattern, count: matches.length };
645
+ if (truncated) out.truncated = true;
646
+ return out;
647
+ }
648
+
649
+ // Resolve the grep `path` argument into a concrete search plan. `path` is no
650
+ // longer a glob-only filter: it may denote an existing FILE (search just that
651
+ // file, like search_in_file), an existing DIRECTORY (use it as the walk root),
652
+ // or — when it is NOT an existing filesystem path — a GLOB filter applied to the
653
+ // cwd walk (the legacy behavior, preserved for callers like `path="*.js"`). A
654
+ // `path` that is a literal that does not exist falls into the glob branch and
655
+ // simply matches zero candidate files; the wrapper's safety net then turns that
656
+ // into a clear diagnostic instead of a silent {count:0}. Relative paths resolve
657
+ // against the process cwd (statSync semantics), matching read_file/search_in_file.
658
+ function _resolveGrepPath(pathArg) {
659
+ if (pathArg == null || pathArg === '') return { mode: 'none', baseDir: '.', pathGlob: null };
660
+ let st = null;
661
+ try { st = fs.statSync(pathArg); } catch { st = null; }
662
+ if (st && st.isFile()) return { mode: 'file', file: pathArg, display: pathArg };
663
+ if (st && st.isDirectory()) return { mode: 'dir', baseDir: pathArg, pathGlob: null };
664
+ return { mode: 'glob', baseDir: '.', pathGlob: pathArg };
665
+ }
666
+
667
+ // Grep a single explicit file (FILE-target mode). Engine-independent — an
668
+ // explicitly named file is read directly (like search_in_file), which also means
669
+ // it is searched even if .gitignore'd, since the model asked for THIS file. Binary
670
+ // files yield no matches. Returns the same { matches:[{file,line,text}] } shape as
671
+ // _grepNode so it flows through _finalizeGrep identically.
672
+ function _grepFile({ pattern, ignoreCase, file, display, signal }) {
673
+ let re;
674
+ try { re = new RegExp(pattern, ignoreCase ? 'i' : ''); }
675
+ catch (err) { return { error: `Invalid regex pattern: ${err.message}` }; }
676
+ if (signal && signal.aborted) return { aborted: true };
677
+ let buf;
678
+ try { buf = fs.readFileSync(file); } catch (err) { return { error: err.message }; }
679
+ if (_isBinaryBuf(buf)) return { matches: [] };
680
+ const data = buf.toString('utf8');
681
+ const lines = data.split('\n');
682
+ if (data.endsWith('\n')) lines.pop();
683
+ const posix = _toPosix(display);
684
+ const matches = [];
685
+ for (let i = 0; i < lines.length; i++) {
686
+ if (re.test(lines[i])) matches.push({ file: posix, line: i + 1, text: lines[i] });
687
+ }
688
+ return { matches };
689
+ }
690
+
691
+ // Does the walk root contain ANY file grep would consider searching (passing the
692
+ // gitignore + skip-dir + pathGlob filters)? Used ONLY by the safety net, and only
693
+ // when a result came back empty — it answers "was there anything to search at
694
+ // all?" so a glob that matched real files (true negative) is never demoted to an
695
+ // error. Deliberately does NOT read file contents (no binary sniff): over-counting
696
+ // errs toward returning {count:0}, the safe direction. Short-circuits on first hit.
697
+ function _grepHasCandidate({ baseDir, pathGlob, signal }) {
698
+ const rules = _loadGitignore(baseDir);
699
+ const pf = pathGlob ? _globToRegExp(pathGlob) : null;
700
+ const pfBasename = pathGlob && !pathGlob.includes('/');
701
+ let found = false;
702
+ _walkTree(baseDir, {
703
+ rules,
704
+ signal,
705
+ onFile: (rel, name) => {
706
+ if (found) return;
707
+ if (pf && !pf.test(pfBasename ? name : rel)) return;
708
+ found = true;
709
+ },
710
+ });
711
+ return found;
712
+ }
713
+
714
+ // engine: 'auto' (rg if available, else Node), 'rg', or 'node'. Exported for
715
+ // the parity tests, which drive both engines and assert deep equality.
716
+ //
717
+ // `path` (when supplied) is the path-aware target: it is resolved via
718
+ // _resolveGrepPath into a FILE read / DIRECTORY walk root / GLOB filter, and feeds
719
+ // BOTH engines identically (FILE mode is a direct read, trivially engine-agnostic).
720
+ // The legacy `pathGlob`/`baseDir` params are still honored directly for callers
721
+ // that pre-resolve (the parity tests). The safety net fires ONLY on the `path`
722
+ // pathway, so existing pathGlob/baseDir callers are byte-for-byte unaffected.
723
+ function _grepSearch({ pattern, path: pathArg = null, pathGlob = null, ignoreCase = false, baseDir = '.', engine = 'auto', signal = null }) {
724
+ if (typeof pattern !== 'string' || pattern === '') return { error: 'grep: pattern is required' };
725
+ const pathSupplied = pathArg != null && pathArg !== '';
726
+ if (pathSupplied) {
727
+ const plan = _resolveGrepPath(pathArg);
728
+ if (plan.mode === 'file') {
729
+ const raw = _grepFile({ pattern, ignoreCase, file: plan.file, display: plan.display, signal });
730
+ if (raw && (raw.error || raw.aborted)) return raw;
731
+ return _finalizeGrep(raw, pattern); // an existing file with 0 hits is a true negative, not an error
732
+ }
733
+ baseDir = plan.baseDir;
734
+ pathGlob = plan.pathGlob;
735
+ }
736
+ const useRg = engine === 'rg' || (engine === 'auto' && !!_detectRipgrep());
737
+ let raw;
738
+ if (useRg) {
739
+ raw = _grepRg({ pattern, pathGlob, ignoreCase, baseDir, signal });
740
+ if (raw && raw.error && engine === 'auto') {
741
+ raw = _grepNode({ pattern, pathGlob, ignoreCase, baseDir, signal });
742
+ }
743
+ } else {
744
+ raw = _grepNode({ pattern, pathGlob, ignoreCase, baseDir, signal });
745
+ }
746
+ if (raw && (raw.error || raw.aborted)) return raw;
747
+ const out = _finalizeGrep(raw, pattern);
748
+ // Safety net (FIX 2): a `path` was supplied but there was NOTHING to search
749
+ // (path doesn't exist, or the glob/dir matched zero candidate files). Surface a
750
+ // diagnostic instead of a silent {count:0} that reads as "pattern absent". Gated
751
+ // strictly on zero CANDIDATE FILES — a glob that matched real files where the
752
+ // pattern just isn't present still returns {count:0} (a true negative).
753
+ if (out && !out.error && out.count === 0 && pathSupplied
754
+ && !_grepHasCandidate({ baseDir, pathGlob, signal })) {
755
+ return { error: `grep: path "${pathArg}" did not resolve to any file within the search root ${baseDir}` };
756
+ }
757
+ return out;
758
+ }
759
+
760
+ function _globSearch({ pattern, baseDir = '.', signal = null }) {
761
+ if (typeof pattern !== 'string' || pattern === '') return { error: 'glob: pattern is required' };
762
+ const re = _globToRegExp(pattern);
763
+ const byBasename = !pattern.includes('/');
764
+ const files = [];
765
+ let truncated = false;
766
+ // glob does not apply .gitignore (only node_modules/.git/hidden are skipped).
767
+ const ok = _walkTree(baseDir, {
768
+ rules: [],
769
+ signal,
770
+ onFile: (rel, name, abs) => {
771
+ if (truncated) return;
772
+ if (!re.test(byBasename ? name : rel)) return;
773
+ let st;
774
+ try { st = fs.statSync(abs); } catch { return; }
775
+ files.push({ path: _toPosix(rel), size: st.size, mtime: st.mtime.toISOString() });
776
+ if (files.length >= GLOB_MAX_FILES) truncated = true;
777
+ },
778
+ });
779
+ if (!ok) return { aborted: true };
780
+ files.sort((a, b) => (a.path < b.path ? -1 : a.path > b.path ? 1 : 0));
781
+ const out = { files, pattern, dir: baseDir, count: files.length };
782
+ if (truncated) out.truncated = true;
783
+ return out;
784
+ }
785
+
786
+ // Shared XML parser for the attribute-or-inline <grep>/<glob> tags.
787
+ function _parseSearchTag(text, tag) {
788
+ const out = [];
789
+ const re = new RegExp(`<${tag}\\b([^>]*?)(?:\\/>|>([\\s\\S]*?)<\\/${tag}>)`, 'g');
790
+ for (const m of text.matchAll(re)) {
791
+ const attrStr = m[1] || '';
792
+ const body = m[2] != null ? m[2] : '';
793
+ const attr = (k) => {
794
+ const mm = attrStr.match(new RegExp(`${k}="([^"]*)"`)) || attrStr.match(new RegExp(`${k}='([^']*)'`));
795
+ return mm ? mm[1] : null;
796
+ };
797
+ let pattern = attr('pattern');
798
+ if (pattern == null) { const b = body.trim(); pattern = b || null; }
799
+ if (pattern == null) continue;
800
+ if (tag === 'grep') {
801
+ const ic = attr('ignore_case');
802
+ out.push(['grep', pattern, attr('path') || null, ic === 'true' || ic === '1' || ic === 'yes',
803
+ attr('output_mode') || null, attr('head_limit'), attr('offset')]);
804
+ } else {
805
+ out.push(['glob', pattern, attr('path') || attr('dir') || '.', attr('head_limit'), attr('offset')]);
806
+ }
807
+ }
808
+ return out;
809
+ }
810
+
811
+ // ── Native git tools (Task 5.1) ────────────────────────────────────────────
812
+ //
813
+ // First-class git operations: structured results for the common verbs, the long
814
+ // tail left to the (sandboxed) generic shell. Every git tool shells out through
815
+ // ctx.agentExecShell — the SAME sandbox + deny-list chokepoint as <shell> — so it
816
+ // gets NO privileged path around confinement (constraint #5). Read-only tools
817
+ // (status/diff/log, and the list ops of branch/worktree) return a null permission
818
+ // descriptor; mutating tools (add/commit/branch-create/checkout/worktree-add+remove)
819
+ // require approval, honor --readonly via permissionManager.readonlyBlock, and are
820
+ // subject to the per-pattern rules + deny-list. Checkpoints (Task 4.3) snapshot
821
+ // FILE-TOOL mutations only — git operations are NOT reversible via /rewind, and
822
+ // git_checkout can discard uncommitted work that checkpoints never captured.
823
+
824
+ // Shell-quote one argument so the command string we hand to agentExecShell is
825
+ // safe regardless of metacharacters in branch names / paths / commit messages.
826
+ // Platform-aware: cmd.exe double-quote convention on Windows, POSIX single-quote
827
+ // elsewhere. The deny-list + sandbox remain the security boundary; this only
828
+ // prevents accidental word-splitting of the structured arguments.
829
+ function _shQuote(arg) {
830
+ const s = String(arg == null ? '' : arg);
831
+ if (process.platform === 'win32') return '"' + s.replace(/"/g, '""') + '"';
832
+ return "'" + s.replace(/'/g, "'\\''") + "'";
833
+ }
834
+
835
+ function _gitCommand(argv) {
836
+ return 'git ' + argv.map(_shQuote).join(' ');
837
+ }
838
+
839
+ // Run a git invocation through the shared shell chokepoint and return the raw
840
+ // shell result ({ exit_code, stdout, stderr, blocked, sandbox }).
841
+ function _runGit(ctx, argv, options) {
842
+ return ctx.agentExecShell(_gitCommand(argv), options || {});
843
+ }
844
+
845
+ // Map a failed git invocation to a structured { error } — degrading gracefully
846
+ // for the "not a repo" and "git absent" cases rather than surfacing raw noise.
847
+ function _gitFailure(res) {
848
+ if (res && res.blocked) return { error: (res.stderr || 'git command blocked').trim() };
849
+ const err = (res && res.stderr ? res.stderr : '').trim();
850
+ const combined = (err + '\n' + (res && res.stdout ? res.stdout : '')).toLowerCase();
851
+ if (/not a git repository/.test(combined)) {
852
+ return { error: 'Not a git repository. Run this inside a git working tree.' };
853
+ }
854
+ if ((res && res.exit_code === 127) || /command not found|not recognized as an internal|no such file or directory/.test(combined)) {
855
+ return { error: 'git is not installed or not found on PATH.' };
856
+ }
857
+ return { error: err || (res && res.stdout ? res.stdout.trim() : '') || `git exited with code ${res ? res.exit_code : '?'}` };
858
+ }
859
+
860
+ function _gitLog(ctx, action, args, status) {
861
+ try { ctx.logToolCall(action, { args }, status !== 'error', status); } catch { /* audit best-effort */ }
862
+ }
863
+
864
+ // Parse the `## …` branch header of `git status --porcelain=v1 --branch`.
865
+ function _parseStatusBranch(header) {
866
+ let h = String(header).replace(/^##\s*/, '');
867
+ if (h.startsWith('No commits yet on ')) return h.slice('No commits yet on '.length).trim();
868
+ if (h.startsWith('HEAD ')) return 'HEAD (detached)';
869
+ const dots = h.indexOf('...');
870
+ if (dots !== -1) h = h.slice(0, dots);
871
+ const sp = h.indexOf(' ');
872
+ if (sp !== -1) h = h.slice(0, sp);
873
+ return h.trim();
874
+ }
875
+
876
+ // Parse a unified diff into { files: [{ file, additions, deletions, hunks }] }.
877
+ function _parseDiff(raw) {
878
+ const files = [];
879
+ let cur = null;
880
+ let hunk = null;
881
+ for (const line of String(raw).split('\n')) {
882
+ if (line.startsWith('diff --git ')) {
883
+ cur = { file: null, additions: 0, deletions: 0, hunks: [] };
884
+ hunk = null;
885
+ files.push(cur);
886
+ const m = line.match(/ b\/(.+)$/);
887
+ if (m) cur.file = m[1];
888
+ continue;
889
+ }
890
+ if (!cur) continue;
891
+ if (line.startsWith('+++ b/')) { cur.file = line.slice(6); continue; }
892
+ if (line.startsWith('--- ') || line.startsWith('+++ ')) continue;
893
+ if (line.startsWith('@@')) {
894
+ hunk = { header: line, lines: [] };
895
+ cur.hunks.push(hunk);
896
+ continue;
897
+ }
898
+ if (!hunk) continue;
899
+ hunk.lines.push(line);
900
+ if (line.startsWith('+')) cur.additions++;
901
+ else if (line.startsWith('-')) cur.deletions++;
902
+ }
903
+ return files;
904
+ }
905
+
906
+ // Parse `git worktree list --porcelain` into [{ path, head, branch }].
907
+ function _parseWorktrees(raw) {
908
+ const out = [];
909
+ let cur = null;
910
+ for (const line of String(raw).split('\n')) {
911
+ if (line.startsWith('worktree ')) {
912
+ cur = { path: line.slice('worktree '.length), head: null, branch: null };
913
+ out.push(cur);
914
+ } else if (cur && line.startsWith('HEAD ')) {
915
+ cur.head = line.slice('HEAD '.length);
916
+ } else if (cur && line.startsWith('branch ')) {
917
+ cur.branch = line.slice('branch '.length).replace(/^refs\/heads\//, '');
918
+ } else if (cur && line === 'detached') {
919
+ cur.branch = '(detached)';
920
+ }
921
+ }
922
+ return out;
923
+ }
924
+
925
+ // XML attribute extractor (dual-quote) + a small typed-attr parser shared by the
926
+ // git tags. `spec` = { str: [...], bool: [...], num: [...], inline: 'key'? }.
927
+ function _gitAttr(attrStr, key) {
928
+ const m = attrStr.match(new RegExp(`${key}="([^"]*)"`)) || attrStr.match(new RegExp(`${key}='([^']*)'`));
929
+ return m ? m[1] : null;
930
+ }
931
+ function _gitTruthy(v) { return v === 'true' || v === '1' || v === 'yes' || v === ''; }
932
+ function _parseGitTag(text, tag, spec) {
933
+ const out = [];
934
+ const re = new RegExp(`<${tag}\\b([^>]*?)(?:\\/>|>([\\s\\S]*?)<\\/${tag}>)`, 'g');
935
+ for (const m of text.matchAll(re)) {
936
+ const attrStr = m[1] || '';
937
+ const body = m[2] != null ? m[2] : '';
938
+ const opts = {};
939
+ for (const k of spec.str || []) { const v = _gitAttr(attrStr, k); if (v != null) opts[k] = v; }
940
+ for (const k of spec.bool || []) { const v = _gitAttr(attrStr, k); if (v != null) opts[k] = _gitTruthy(v); }
941
+ for (const k of spec.num || []) {
942
+ const v = _gitAttr(attrStr, k);
943
+ if (v != null && v !== '') { const n = parseInt(v, 10); if (!Number.isNaN(n)) opts[k] = n; }
944
+ }
945
+ if (spec.inline) { const b = body.trim(); if (b && opts[spec.inline] == null) opts[spec.inline] = b; }
946
+ out.push([tag, opts]);
947
+ }
948
+ return out;
949
+ }
950
+
951
+ const GIT_TOOL_REGISTRY = [
952
+ {
953
+ tool: 'git_status',
954
+ specNames: ['git_status'],
955
+ tags: ['git_status'],
956
+ parseXml: (text) => _parseGitTag(text, 'git_status', {}),
957
+ fromParams: () => ['git_status', {}],
958
+ permission: () => null,
959
+ execute: async (ctx, args, options) => {
960
+ const res = await _runGit(ctx, ['status', '--porcelain=v1', '--branch'], options);
961
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_status', args, 'error'); return _gitFailure(res); }
962
+ const staged = [];
963
+ const unstaged = [];
964
+ const untracked = [];
965
+ let branch = null;
966
+ for (const line of res.stdout.split('\n')) {
967
+ if (!line) continue;
968
+ if (line.startsWith('## ')) { branch = _parseStatusBranch(line); continue; }
969
+ const x = line[0];
970
+ const y = line[1];
971
+ const p = line.slice(3);
972
+ if (line.startsWith('??')) { untracked.push(p); continue; }
973
+ if (x && x !== ' ' && x !== '?') staged.push({ path: p, status: x });
974
+ if (y && y !== ' ' && y !== '?') unstaged.push({ path: p, status: y });
975
+ }
976
+ const clean = staged.length === 0 && unstaged.length === 0 && untracked.length === 0;
977
+ const summary = `On branch ${branch || '(unknown)'} — ${clean ? 'clean' : `staged: ${staged.length}, unstaged: ${unstaged.length}, untracked: ${untracked.length}`}`;
978
+ _gitLog(ctx, 'git_status', args, 'ok');
979
+ return { status: 'ok', branch, staged, unstaged, untracked, clean, summary };
980
+ },
981
+ },
982
+ {
983
+ tool: 'git_diff',
984
+ specNames: ['git_diff'],
985
+ tags: ['git_diff'],
986
+ parseXml: (text) => _parseGitTag(text, 'git_diff', { str: ['path'], bool: ['staged'] }),
987
+ fromParams: (p) => ['git_diff', { ...((p.staged || p.cached) ? { staged: true } : {}), ...(p.path ? { path: String(p.path) } : {}) }],
988
+ permission: () => null,
989
+ execute: async (ctx, args, options) => {
990
+ const o = args[0] || {};
991
+ const argv = ['diff'];
992
+ if (o.staged) argv.push('--cached');
993
+ if (o.path) argv.push('--', String(o.path));
994
+ const res = await _runGit(ctx, argv, options);
995
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_diff', args, 'error'); return _gitFailure(res); }
996
+ const files = _parseDiff(res.stdout);
997
+ const additions = files.reduce((s, f) => s + f.additions, 0);
998
+ const deletions = files.reduce((s, f) => s + f.deletions, 0);
999
+ const summary = files.length
1000
+ ? `${files.length} file(s) changed, +${additions} -${deletions}`
1001
+ : 'No changes';
1002
+ _gitLog(ctx, 'git_diff', args, 'ok');
1003
+ return { status: 'ok', staged: !!o.staged, files, additions, deletions, raw: res.stdout, summary };
1004
+ },
1005
+ },
1006
+ {
1007
+ tool: 'git_log',
1008
+ specNames: ['git_log'],
1009
+ tags: ['git_log'],
1010
+ parseXml: (text) => _parseGitTag(text, 'git_log', { str: ['path'], num: ['count'] }),
1011
+ fromParams: (p) => {
1012
+ const count = Number.isInteger(p.count) ? p.count : (p.count != null && p.count !== '' ? parseInt(p.count, 10) : undefined);
1013
+ return ['git_log', { ...(count ? { count } : {}), ...(p.path ? { path: String(p.path) } : {}) }];
1014
+ },
1015
+ permission: () => null,
1016
+ execute: async (ctx, args, options) => {
1017
+ const o = args[0] || {};
1018
+ const count = Number.isInteger(o.count) && o.count > 0 ? o.count : 20;
1019
+ const SEP = '\x1f';
1020
+ const argv = ['log', '-n', String(count), `--pretty=format:%H${SEP}%an${SEP}%ae${SEP}%ad${SEP}%s`, '--date=iso'];
1021
+ if (o.path) argv.push('--', String(o.path));
1022
+ const res = await _runGit(ctx, argv, options);
1023
+ if (res.blocked || res.exit_code !== 0) {
1024
+ const combined = `${res.stderr || ''}\n${res.stdout || ''}`.toLowerCase();
1025
+ // A fresh repo with no commits is a valid empty result, not an error.
1026
+ if (/does not have any commits yet|bad default revision|unknown revision|ambiguous argument 'head'/.test(combined)) {
1027
+ _gitLog(ctx, 'git_log', args, 'ok');
1028
+ return { status: 'ok', commits: [], count: 0, summary: 'No commits yet' };
1029
+ }
1030
+ _gitLog(ctx, 'git_log', args, 'error');
1031
+ return _gitFailure(res);
1032
+ }
1033
+ const commits = [];
1034
+ for (const line of res.stdout.split('\n')) {
1035
+ if (!line) continue;
1036
+ const [hash, author, email, date, ...rest] = line.split(SEP);
1037
+ commits.push({ hash, short: (hash || '').slice(0, 7), author, email, date, subject: rest.join(SEP) });
1038
+ }
1039
+ _gitLog(ctx, 'git_log', args, 'ok');
1040
+ return { status: 'ok', commits, count: commits.length, summary: `${commits.length} commit(s)` };
1041
+ },
1042
+ },
1043
+ {
1044
+ tool: 'git_add',
1045
+ specNames: ['git_add'],
1046
+ tags: ['git_add'],
1047
+ parseXml: (text) => _parseGitTag(text, 'git_add', { str: ['paths'], bool: ['all'] }),
1048
+ fromParams: (p) => ['git_add', { ...(p.paths != null ? { paths: p.paths } : {}), ...(p.all ? { all: true } : {}) }],
1049
+ permission: () => ({ actionType: 'git', description: 'git add (stage changes)', tag: 'git_add' }),
1050
+ execute: async (ctx, args, options) => {
1051
+ const o = args[0] || {};
1052
+ const blocked = ctx.permissionManager.readonlyBlock('git_add');
1053
+ if (blocked) { _gitLog(ctx, 'git_add', args, 'error'); return blocked; }
1054
+ let paths = [];
1055
+ if (Array.isArray(o.paths)) paths = o.paths.map(String).filter(Boolean);
1056
+ else if (typeof o.paths === 'string' && o.paths.trim()) paths = o.paths.trim().split(/\s+/);
1057
+ if (!o.all && paths.length === 0) {
1058
+ _gitLog(ctx, 'git_add', args, 'error');
1059
+ return { error: 'git_add requires `paths` (one or more files) or `all: true`.' };
1060
+ }
1061
+ const argv = o.all ? ['add', '-A'] : ['add', '--', ...paths];
1062
+ const res = await _runGit(ctx, argv, options);
1063
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_add', args, 'error'); return _gitFailure(res); }
1064
+ const added = o.all ? ['-A (all)'] : paths;
1065
+ _gitLog(ctx, 'git_add', args, 'ok');
1066
+ return { status: 'ok', added, summary: `Staged ${o.all ? 'all changes' : paths.join(', ')}` };
1067
+ },
1068
+ },
1069
+ {
1070
+ tool: 'git_commit',
1071
+ specNames: ['git_commit'],
1072
+ tags: ['git_commit'],
1073
+ parseXml: (text) => _parseGitTag(text, 'git_commit', { str: ['message'], bool: ['all'], inline: 'message' }),
1074
+ fromParams: (p) => ['git_commit', { message: p.message != null ? String(p.message) : '', ...(p.all ? { all: true } : {}) }],
1075
+ permission: () => ({ actionType: 'git', description: 'git commit', tag: 'git_commit' }),
1076
+ execute: async (ctx, args, options) => {
1077
+ const o = args[0] || {};
1078
+ const blocked = ctx.permissionManager.readonlyBlock('git_commit');
1079
+ if (blocked) { _gitLog(ctx, 'git_commit', args, 'error'); return blocked; }
1080
+ const message = (o.message == null ? '' : String(o.message)).trim();
1081
+ if (!message) {
1082
+ _gitLog(ctx, 'git_commit', args, 'error');
1083
+ return { error: 'git_commit requires a non-empty commit message.' };
1084
+ }
1085
+ const argv = ['commit', '-m', message];
1086
+ if (o.all) argv.push('-a');
1087
+ const res = await _runGit(ctx, argv, options);
1088
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_commit', args, 'error'); return _gitFailure(res); }
1089
+ const hashRes = await _runGit(ctx, ['rev-parse', 'HEAD'], options);
1090
+ const branchRes = await _runGit(ctx, ['rev-parse', '--abbrev-ref', 'HEAD'], options);
1091
+ const hash = (hashRes.stdout || '').trim();
1092
+ const branch = (branchRes.stdout || '').trim();
1093
+ _gitLog(ctx, 'git_commit', args, 'ok');
1094
+ return { status: 'ok', hash, short: hash.slice(0, 7), branch, summary: `Committed ${hash.slice(0, 7)} on ${branch}` };
1095
+ },
1096
+ },
1097
+ {
1098
+ tool: 'git_branch',
1099
+ specNames: ['git_branch'],
1100
+ tags: ['git_branch'],
1101
+ parseXml: (text) => _parseGitTag(text, 'git_branch', { str: ['name'], bool: ['delete', 'force'] }),
1102
+ fromParams: (p) => ['git_branch', { ...(p.name ? { name: String(p.name) } : {}), ...((p.delete || p.remove) ? { delete: true } : {}), ...(p.force ? { force: true } : {}) }],
1103
+ // op-dependent: listing branches is read-only (null); create/delete is mutating.
1104
+ permission: (ctx, args) => {
1105
+ const o = args[0] || {};
1106
+ if (!o.name) return null;
1107
+ return { actionType: 'git', description: `git branch ${o.delete ? 'delete' : 'create'} ${o.name}`, tag: 'git_branch' };
1108
+ },
1109
+ execute: async (ctx, args, options) => {
1110
+ const o = args[0] || {};
1111
+ if (!o.name) {
1112
+ const res = await _runGit(ctx, ['branch', '--no-color'], options);
1113
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_branch', args, 'error'); return _gitFailure(res); }
1114
+ const branches = [];
1115
+ let current = null;
1116
+ for (const line of res.stdout.split('\n')) {
1117
+ if (!line.trim()) continue;
1118
+ const isCurrent = line.startsWith('*');
1119
+ const name = line.replace(/^\*?\s+/, '').trim();
1120
+ if (!name || name.startsWith('(')) continue; // skip "(HEAD detached …)"
1121
+ if (isCurrent) current = name;
1122
+ branches.push({ name, current: isCurrent });
1123
+ }
1124
+ _gitLog(ctx, 'git_branch', args, 'ok');
1125
+ return { status: 'ok', branches, current, summary: `${branches.length} branch(es), on ${current || '(detached)'}` };
1126
+ }
1127
+ const blocked = ctx.permissionManager.readonlyBlock('git_branch');
1128
+ if (blocked) { _gitLog(ctx, 'git_branch', args, 'error'); return blocked; }
1129
+ const argv = o.delete ? ['branch', o.force ? '-D' : '-d', String(o.name)] : ['branch', String(o.name)];
1130
+ const res = await _runGit(ctx, argv, options);
1131
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_branch', args, 'error'); return _gitFailure(res); }
1132
+ _gitLog(ctx, 'git_branch', args, 'ok');
1133
+ return o.delete
1134
+ ? { status: 'ok', deleted: String(o.name), summary: `Deleted branch ${o.name}` }
1135
+ : { status: 'ok', created: String(o.name), summary: `Created branch ${o.name}` };
1136
+ },
1137
+ },
1138
+ {
1139
+ tool: 'git_checkout',
1140
+ specNames: ['git_checkout'],
1141
+ tags: ['git_checkout'],
1142
+ parseXml: (text) => _parseGitTag(text, 'git_checkout', { str: ['name'], bool: ['create', 'force'] }),
1143
+ fromParams: (p) => ['git_checkout', { name: p.name != null ? String(p.name) : '', ...(p.create ? { create: true } : {}), ...(p.force ? { force: true } : {}) }],
1144
+ permission: (ctx, args) => {
1145
+ const o = args[0] || {};
1146
+ // Destructive-git ↔ checkpoint honesty: a checkout can discard uncommitted
1147
+ // working-tree changes that checkpoints never snapshot (not rewindable).
1148
+ return { actionType: 'git', description: `git checkout ${o.create ? '-b ' : ''}${o.name || ''} (may discard uncommitted changes — NOT recoverable via /rewind)`, tag: 'git_checkout' };
1149
+ },
1150
+ execute: async (ctx, args, options) => {
1151
+ const o = args[0] || {};
1152
+ const blocked = ctx.permissionManager.readonlyBlock('git_checkout');
1153
+ if (blocked) { _gitLog(ctx, 'git_checkout', args, 'error'); return blocked; }
1154
+ const name = (o.name == null ? '' : String(o.name)).trim();
1155
+ if (!name) { _gitLog(ctx, 'git_checkout', args, 'error'); return { error: 'git_checkout requires a target `name` (branch or ref).' }; }
1156
+ const argv = ['checkout'];
1157
+ if (o.force) argv.push('-f');
1158
+ if (o.create) argv.push('-b');
1159
+ argv.push(name);
1160
+ const res = await _runGit(ctx, argv, options);
1161
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_checkout', args, 'error'); return _gitFailure(res); }
1162
+ const branchRes = await _runGit(ctx, ['rev-parse', '--abbrev-ref', 'HEAD'], options);
1163
+ const branch = (branchRes.stdout || '').trim() || name;
1164
+ _gitLog(ctx, 'git_checkout', args, 'ok');
1165
+ return { status: 'ok', branch, created: !!o.create, summary: `Switched to ${branch}` };
1166
+ },
1167
+ },
1168
+ {
1169
+ tool: 'git_worktree',
1170
+ specNames: ['git_worktree'],
1171
+ tags: ['git_worktree'],
1172
+ parseXml: (text) => _parseGitTag(text, 'git_worktree', { str: ['op', 'path', 'branch'], bool: ['force'] }),
1173
+ fromParams: (p) => ['git_worktree', { op: p.op || 'list', ...(p.path ? { path: String(p.path) } : {}), ...(p.branch ? { branch: String(p.branch) } : {}), ...(p.force ? { force: true } : {}) }],
1174
+ // op-dependent: list is read-only (null); add/remove are mutating.
1175
+ permission: (ctx, args) => {
1176
+ const o = args[0] || {};
1177
+ const op = o.op || 'list';
1178
+ if (op === 'list') return null;
1179
+ return { actionType: 'git', description: `git worktree ${op} ${o.path || ''}`, tag: 'git_worktree' };
1180
+ },
1181
+ execute: async (ctx, args, options) => {
1182
+ const o = args[0] || {};
1183
+ const op = o.op || 'list';
1184
+ if (op === 'list') {
1185
+ const res = await _runGit(ctx, ['worktree', 'list', '--porcelain'], options);
1186
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_worktree', args, 'error'); return _gitFailure(res); }
1187
+ const worktrees = _parseWorktrees(res.stdout);
1188
+ _gitLog(ctx, 'git_worktree', args, 'ok');
1189
+ return { status: 'ok', op: 'list', worktrees, summary: `${worktrees.length} worktree(s)` };
1190
+ }
1191
+ const blocked = ctx.permissionManager.readonlyBlock('git_worktree');
1192
+ if (blocked) { _gitLog(ctx, 'git_worktree', args, 'error'); return blocked; }
1193
+ if (op === 'add') {
1194
+ if (!o.path) { _gitLog(ctx, 'git_worktree', args, 'error'); return { error: 'git_worktree add requires a `path`.' }; }
1195
+ const argv = ['worktree', 'add'];
1196
+ if (o.branch) argv.push('-b', String(o.branch));
1197
+ argv.push(String(o.path));
1198
+ const res = await _runGit(ctx, argv, options);
1199
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_worktree', args, 'error'); return _gitFailure(res); }
1200
+ _gitLog(ctx, 'git_worktree', args, 'ok');
1201
+ return { status: 'ok', op: 'add', path: String(o.path), branch: o.branch ? String(o.branch) : null, summary: `Added worktree at ${o.path}` };
1202
+ }
1203
+ if (op === 'remove') {
1204
+ if (!o.path) { _gitLog(ctx, 'git_worktree', args, 'error'); return { error: 'git_worktree remove requires a `path`.' }; }
1205
+ const argv = ['worktree', 'remove'];
1206
+ if (o.force) argv.push('--force');
1207
+ argv.push(String(o.path));
1208
+ const res = await _runGit(ctx, argv, options);
1209
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_worktree', args, 'error'); return _gitFailure(res); }
1210
+ _gitLog(ctx, 'git_worktree', args, 'ok');
1211
+ return { status: 'ok', op: 'remove', path: String(o.path), summary: `Removed worktree at ${o.path}` };
1212
+ }
1213
+ _gitLog(ctx, 'git_worktree', args, 'error');
1214
+ return { error: `git_worktree: unknown op "${op}" (expected list | add | remove).` };
1215
+ },
1216
+ },
1217
+ ];
1218
+
1219
+ const TOOL_REGISTRY = [
1220
+ {
1221
+ tool: 'shell',
1222
+ specNames: ['exec', 'shell'],
1223
+ tags: ['exec', 'shell', 'run_command', 'run'],
1224
+ parseXml: (text) => _inline(text, 'shell|exec|run_command|run', 'shell'),
1225
+ fromParams: (p) => (p.command ? ['shell', p.command] : null),
1226
+ // shell is executed through agentExecShell (deny-list chokepoint), not the
1227
+ // agentExecFile dispatch — this execute exists for registry completeness.
1228
+ execute: (ctx, args, options) => ctx.agentExecShell(args[0], options || {}),
1229
+ permission: (ctx, args) => ({ actionType: 'shell', description: args[0] || '', tag: 'exec' }),
1230
+ },
1231
+ {
1232
+ tool: 'read',
1233
+ specNames: ['read_file'],
1234
+ tags: ['read_file'],
1235
+ parseXml: (text) => _parseReadTag(text),
1236
+ fromParams: (p) => (p.path
1237
+ ? ['read', p.path, p.start_line ?? null, p.end_line ?? null, !!p.show_line_numbers]
1238
+ : null),
1239
+ permission: () => null,
1240
+ execute: async (ctx, args, options) => {
1241
+ const signal = (options && options.signal) || null;
1242
+ const [arg0 = null] = args;
1243
+ const { _log, logToolCall, isProtectedSecretPath, _secretReadError, getConfig, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1244
+ const filePath = arg0;
1245
+ if (isProtectedSecretPath(filePath)) {
1246
+ logToolCall('read_file', { path: filePath }, false, 'denied');
1247
+ return _secretReadError(filePath);
1248
+ }
1249
+ const startedAt = Date.now();
1250
+ const stat = await fsp.stat(filePath).catch(() => null);
1251
+ if (stat) {
1252
+ const cfg = getConfig ? getConfig() : {};
1253
+ // Byte BACKSTOP only (Task W.7). Pagination (formatReadResult) is now the
1254
+ // primary context bound — a large line-readable file paginates rather than
1255
+ // hard-refusing. This ceiling (default 50 MB) just rules out slurping a
1256
+ // multi-GB file whole into memory; an operator can lower max_file_size_kb
1257
+ // to hard-refuse smaller files.
1258
+ const defKb = require('./constants').DEFAULT_READ_MAX_FILE_KB;
1259
+ const maxKb = cfg.max_file_size_kb || defKb;
1260
+ const maxBytes = maxKb * 1024;
1261
+ if (stat.size > maxBytes) {
1262
+ const kb = (stat.size / 1024).toFixed(0);
1263
+ logToolCall('read_file', { path: filePath }, false, 'error');
1264
+ return { error: `File too large: ${kb} KB exceeds max_file_size_kb=${maxKb}` };
1265
+ }
1266
+ }
1267
+ if (signal && signal.aborted) {
1268
+ logToolCall('read_file', { path: filePath }, true, 'aborted');
1269
+ return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
1270
+ }
1271
+ try {
1272
+ const data = await fsp.readFile(filePath, { encoding: 'utf8', signal: signal || undefined });
1273
+ const lines = data.split('\n').length;
1274
+ if (lines > 10) {
1275
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath} (${lines} lines, ${data.length} chars)${RST}`);
1276
+ } else {
1277
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath}${RST}`);
1278
+ }
1279
+ logToolCall('read_file', { path: filePath }, true, 'ok');
1280
+ return { content: data, path: filePath, bytes: Buffer.byteLength(data, 'utf8') };
1281
+ } catch (error) {
1282
+ if (error && (error.name === 'AbortError' || error.code === 'ABORT_ERR')) {
1283
+ logToolCall('read_file', { path: filePath }, true, 'aborted');
1284
+ return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
1285
+ }
1286
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1287
+ logToolCall('read_file', { path: filePath }, true, 'error');
1288
+ return { error: error.message };
1289
+ }
1290
+ },
1291
+ },
1292
+ {
1293
+ tool: 'view_image',
1294
+ specNames: ['view_image'],
1295
+ tags: ['view_image'],
1296
+ // Both XML forms accepted: the attribute form `<view_image path="…"/>` and
1297
+ // the inline form `<view_image>PATH</view_image>` (like read_file/download).
1298
+ parseXml: (text) => {
1299
+ const out = [];
1300
+ for (const m of _matchDual(text, '<view_image\\s+path=Q([^Q]+)Q\\s*(?:><\\/view_image>|\\/>)')) {
1301
+ out.push(['view_image', m[1]]);
1302
+ }
1303
+ for (const m of text.matchAll(/<view_image>([\s\S]*?)<\/view_image>/g)) {
1304
+ out.push(['view_image', _unwrapInnerTag(m[1]).trim()]);
1305
+ }
1306
+ return out;
1307
+ },
1308
+ fromParams: (p) => (p.path ? ['view_image', String(p.path)] : null),
1309
+ // Read-only: a local file read, so no permission gate (parity with read_file /
1310
+ // grep, permission: () => null). Path safety + the size cap are enforced by
1311
+ // readImage (via isPathSafe) inside execute, exactly as the /image command does.
1312
+ permission: () => null,
1313
+ // Stage a LOCAL image into vision context. Reuses readImage — the SAME encoder
1314
+ // the /image slash command uses (read through isPathSafe, size-capped, magic-byte
1315
+ // media-type detect, base64). The returned `image` record is collected by the
1316
+ // agent loop and attached to the tool-result message's `images[]`, so api.js
1317
+ // buildProviderMessages turns it into a provider image block on the next turn.
1318
+ execute: async (ctx, args) => {
1319
+ const [arg0 = null] = args;
1320
+ const { _log, logToolCall, isPathSafe, getConfig, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1321
+ const filePath = arg0;
1322
+ try {
1323
+ const { readImage } = require('./images');
1324
+ const cfg = getConfig ? getConfig() : {};
1325
+ const img = readImage(filePath, { maxBytes: cfg.image_max_bytes, isPathSafe });
1326
+ const kb = Math.max(1, Math.round(img.bytes / 1024));
1327
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Viewing image ${filePath} (${img.media_type}, ${kb} KB)${RST}`);
1328
+ logToolCall('view_image', { path: filePath }, true, 'ok');
1329
+ // `image` carries the base64 bytes for staging; it never enters the
1330
+ // model-facing text (formatFileResult builds a short confirmation line).
1331
+ return { status: 'ok', path: filePath, media_type: img.media_type, bytes: img.bytes, image: img };
1332
+ } catch (error) {
1333
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1334
+ logToolCall('view_image', { path: filePath }, true, 'error');
1335
+ return { error: error.message };
1336
+ }
1337
+ },
1338
+ },
1339
+ {
1340
+ tool: 'write',
1341
+ specNames: ['write_file', 'create_file'],
1342
+ tags: ['write_file', 'create_file'],
1343
+ parseXml: (text) => {
1344
+ const out = [];
1345
+ // QUIRK: attribute-form content (m[2]) is captured RAW — not trimmed —
1346
+ // unlike inline-tag bodies which go through _unwrapInnerTag().trim().
1347
+ // Preserved deliberately; pinned by test/extract-tool-calls.test.js
1348
+ // ("QUIRK: attribute-form content is NOT trimmed (unlike inline tags)").
1349
+ // Any change to this is out of scope for the tool-registry refactor.
1350
+ for (const m of _matchDual(text, '<write_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/write_file>')) out.push(['write', m[1], m[2]]);
1351
+ for (const m of _matchDual(text, '<create_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/create_file>')) out.push(['write', m[1], m[2]]);
1352
+ return out;
1353
+ },
1354
+ fromParams: (p) => (p.path ? ['write', p.path, p.content != null ? p.content : ''] : null),
1355
+ permission: (ctx, args) => _permWriteAppend(ctx, 'write', args),
1356
+ execute: (ctx, args, options) => _execWriteAppend(ctx, 'write', args, options),
1357
+ },
1358
+ {
1359
+ tool: 'append',
1360
+ specNames: ['append_file'],
1361
+ tags: ['append_file'],
1362
+ // QUIRK: as with write_file, append content is captured raw (not trimmed).
1363
+ parseXml: (text) => _matchDual(text, '<append_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/append_file>').map((m) => ['append', m[1], m[2]]),
1364
+ fromParams: (p) => (p.path ? ['append', p.path, p.content != null ? p.content : ''] : null),
1365
+ permission: (ctx, args) => _permWriteAppend(ctx, 'append', args),
1366
+ execute: (ctx, args, options) => _execWriteAppend(ctx, 'append', args, options),
1367
+ },
1368
+ {
1369
+ tool: 'list_dir',
1370
+ specNames: ['list_dir'],
1371
+ tags: ['list_dir'],
1372
+ parseXml: (text) => _inline(text, 'list_dir', 'list_dir'),
1373
+ fromParams: (p) => ['list_dir', p.path || '.'],
1374
+ permission: () => null,
1375
+ execute: async (ctx, args) => {
1376
+ const [arg0 = null] = args;
1377
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1378
+ const dirPath = arg0;
1379
+ try {
1380
+ const entries = await fsp.readdir(dirPath, { withFileTypes: true });
1381
+ const items = entries.map((e) => {
1382
+ if (e.isSymbolicLink()) return `[L] ${e.name}`;
1383
+ if (e.isDirectory()) return `[D] ${e.name}`;
1384
+ return `[F] ${e.name}`;
1385
+ });
1386
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${dirPath} (${items.length} items)${RST}`);
1387
+ logToolCall('list_dir', { path: dirPath }, true, 'ok');
1388
+ return { items, path: dirPath };
1389
+ } catch (error) {
1390
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1391
+ logToolCall('list_dir', { path: dirPath }, true, 'error');
1392
+ return { error: error.message };
1393
+ }
1394
+ },
1395
+ },
1396
+ {
1397
+ tool: 'search_files',
1398
+ specNames: ['search_files'],
1399
+ tags: ['search_files'],
1400
+ parseXml: (text) => {
1401
+ const out = _inline(text, 'search_files', 'search_files', ['.']);
1402
+ for (const m of _matchDual(text, '<search_files\\s+pattern=Q([^Q]+)Q(?:\\s+dir=Q([^Q]*)Q)?\\s*(?:><\\/search_files>|\\/>)')) {
1403
+ out.push(['search_files', m[1], m[2] || '.']);
1404
+ }
1405
+ return out;
1406
+ },
1407
+ fromParams: (p) => ['search_files', p.pattern || '*', p.dir || '.'],
1408
+ permission: () => null,
1409
+ execute: async (ctx, args, options) => {
1410
+ const signal = (options && options.signal) || null;
1411
+ const [arg0 = null, arg1 = null] = args;
1412
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1413
+ const pattern = arg0;
1414
+ const searchDir = arg1 || '.';
1415
+ const startedAt = Date.now();
1416
+ try {
1417
+ let regStr = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&');
1418
+ regStr = regStr.replace(/\*\*/g, '\x00');
1419
+ regStr = regStr.replace(/\*/g, '[^/]*');
1420
+ regStr = regStr.replace(/\x00\//g, '(?:.*/)?');
1421
+ regStr = regStr.replace(/\x00/g, '.*');
1422
+ const regex = new RegExp(`^${regStr}$`);
1423
+ const matchName = !pattern.includes('/');
1424
+ const files = [];
1425
+ async function walk(dir, rel) {
1426
+ if (signal && signal.aborted) return;
1427
+ let entries;
1428
+ try { entries = await fsp.readdir(dir, { withFileTypes: true }); } catch { return; }
1429
+ for (const entry of entries) {
1430
+ if (signal && signal.aborted) return;
1431
+ const relPath = rel ? `${rel}/${entry.name}` : entry.name;
1432
+ if (regex.test(matchName ? entry.name : relPath)) files.push(relPath);
1433
+ if (entry.isDirectory()) await walk(path.join(dir, entry.name), relPath);
1434
+ }
1435
+ }
1436
+ await walk(searchDir, '');
1437
+ if (signal && signal.aborted) {
1438
+ logToolCall('search_files', { pattern, dir: searchDir }, true, 'aborted');
1439
+ return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
1440
+ }
1441
+ files.sort();
1442
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${files.length} file(s) matching "${pattern}"${RST}`);
1443
+ logToolCall('search_files', { pattern, dir: searchDir }, true, 'ok');
1444
+ return { files, pattern, dir: searchDir };
1445
+ } catch (error) {
1446
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1447
+ logToolCall('search_files', { pattern, dir: searchDir }, true, 'error');
1448
+ return { error: error.message };
1449
+ }
1450
+ },
1451
+ },
1452
+ {
1453
+ tool: 'grep',
1454
+ specNames: ['grep'],
1455
+ tags: ['grep'],
1456
+ parseXml: (text) => _parseSearchTag(text, 'grep'),
1457
+ fromParams: (p) => (p.pattern
1458
+ ? ['grep', p.pattern, p.path || null, !!p.ignore_case, p.output_mode || null, p.head_limit ?? null, p.offset ?? null]
1459
+ : null),
1460
+ permission: () => null,
1461
+ execute: async (ctx, args, options) => {
1462
+ const signal = (options && options.signal) || null;
1463
+ const [pattern = null, pathArg = null, ignoreCase = false, outputMode = null, headLimit, offset] = args;
1464
+ const { _log, logToolCall, isProtectedSecretPath, _secretReadError, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1465
+ // Path-as-target now reads files directly, so apply the SAME secret-file read
1466
+ // guard read_file/search_in_file use (the OS sandbox remains the outer
1467
+ // confinement; this just refuses the credential/history files by name).
1468
+ if (pathArg != null && pathArg !== '' && isProtectedSecretPath(pathArg)) {
1469
+ logToolCall('grep', { pattern, path: pathArg }, false, 'denied');
1470
+ return _secretReadError(pathArg);
1471
+ }
1472
+ const res = _grepSearch({ pattern, path: pathArg, ignoreCase, engine: 'auto', signal });
1473
+ if (res.aborted) { logToolCall('grep', { pattern }, true, 'aborted'); return res; }
1474
+ if (res.error) {
1475
+ _log(` ${FG_RED}✗ ${res.error}${RST}`);
1476
+ logToolCall('grep', { pattern }, true, 'error');
1477
+ return res;
1478
+ }
1479
+ // Shape the serialization controls onto the result (Task W.5). The engine
1480
+ // returns the full (engine-capped) match set; output_mode + head_limit +
1481
+ // offset bound what reaches the model in formatFileResult (lib/agent.js).
1482
+ res.output_mode = _normGrepMode(outputMode);
1483
+ res.head_limit = _normHeadLimit(headLimit, require('./constants').DEFAULT_GREP_HEAD_LIMIT);
1484
+ res.offset = _normOffset(offset);
1485
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}grep "${pattern}" — ${res.count} match(es)${RST}`);
1486
+ logToolCall('grep', { pattern, path: pathArg }, true, 'ok');
1487
+ return res;
1488
+ },
1489
+ },
1490
+ {
1491
+ tool: 'glob',
1492
+ specNames: ['glob'],
1493
+ tags: ['glob'],
1494
+ parseXml: (text) => _parseSearchTag(text, 'glob'),
1495
+ fromParams: (p) => (p.pattern ? ['glob', p.pattern, p.path || p.dir || '.', p.head_limit ?? null, p.offset ?? null] : null),
1496
+ permission: () => null,
1497
+ execute: async (ctx, args, options) => {
1498
+ const signal = (options && options.signal) || null;
1499
+ const [pattern = null, base = '.', headLimit, offset] = args;
1500
+ const { _log, logToolCall, isPathSafe, _sandboxError, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1501
+ if (!isPathSafe(base)) {
1502
+ logToolCall('glob', { pattern, dir: base }, false, 'denied');
1503
+ return _sandboxError(base);
1504
+ }
1505
+ const res = _globSearch({ pattern, baseDir: base, signal });
1506
+ if (res.aborted) { logToolCall('glob', { pattern }, true, 'aborted'); return res; }
1507
+ if (res.error) {
1508
+ _log(` ${FG_RED}✗ ${res.error}${RST}`);
1509
+ logToolCall('glob', { pattern }, true, 'error');
1510
+ return res;
1511
+ }
1512
+ // head_limit + offset bound the file list that reaches the model (Task W.5);
1513
+ // the engine returns the full (engine-capped) list, serialized in formatFileResult.
1514
+ res.head_limit = _normHeadLimit(headLimit, require('./constants').DEFAULT_GLOB_HEAD_LIMIT);
1515
+ res.offset = _normOffset(offset);
1516
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}glob "${pattern}" — ${res.count} file(s)${RST}`);
1517
+ logToolCall('glob', { pattern, dir: base }, true, 'ok');
1518
+ return res;
1519
+ },
1520
+ },
1521
+ {
1522
+ tool: 'delete_file',
1523
+ specNames: ['delete_file'],
1524
+ tags: ['delete_file'],
1525
+ parseXml: (text) => _inline(text, 'delete_file', 'delete_file'),
1526
+ fromParams: (p) => (p.path ? ['delete_file', p.path] : null),
1527
+ permission: (ctx, args) => {
1528
+ const { _log, FG_YELLOW, BOLD, RST } = ctx;
1529
+ const filePath = args[0];
1530
+ _log(` ${FG_YELLOW}${BOLD}⚠ Deleting: ${filePath}${RST}`);
1531
+ return { actionType: 'file', description: `Delete ${filePath}`, tag: 'delete_file' };
1532
+ },
1533
+ execute: async (ctx, args) => {
1534
+ const [arg0 = null] = args;
1535
+ const { _log, logToolCall, isPathSafe, _sandboxError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1536
+ const filePath = arg0;
1537
+ const blocked = permissionManager.readonlyBlock('delete_file');
1538
+ if (blocked) {
1539
+ logToolCall('delete_file', { path: filePath }, false, 'denied');
1540
+ return blocked;
1541
+ }
1542
+ if (!isPathSafe(filePath)) {
1543
+ logToolCall('delete_file', { path: filePath }, false, 'denied');
1544
+ return _sandboxError(filePath);
1545
+ }
1546
+ try {
1547
+ await fsp.unlink(filePath);
1548
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Deleted ${filePath}${RST}`);
1549
+ logToolCall('delete_file', { path: filePath }, true, 'ok');
1550
+ return { status: 'ok', path: filePath };
1551
+ } catch (error) {
1552
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1553
+ logToolCall('delete_file', { path: filePath }, true, 'error');
1554
+ return { error: error.message };
1555
+ }
1556
+ },
1557
+ },
1558
+ {
1559
+ tool: 'make_dir',
1560
+ specNames: ['make_dir'],
1561
+ tags: ['make_dir'],
1562
+ parseXml: (text) => _inline(text, 'make_dir', 'make_dir'),
1563
+ fromParams: (p) => (p.path ? ['make_dir', p.path] : null),
1564
+ permission: (ctx, args) => ({ actionType: 'file', description: `Create directory ${args[0]}`, tag: 'make_dir' }),
1565
+ execute: async (ctx, args) => {
1566
+ const [arg0 = null] = args;
1567
+ const { _log, logToolCall, isPathSafe, _sandboxError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1568
+ const dirPath = arg0;
1569
+ const blocked = permissionManager.readonlyBlock('make_dir');
1570
+ if (blocked) {
1571
+ logToolCall('make_dir', { path: dirPath }, false, 'denied');
1572
+ return blocked;
1573
+ }
1574
+ if (!isPathSafe(dirPath)) {
1575
+ logToolCall('make_dir', { path: dirPath }, false, 'denied');
1576
+ return _sandboxError(dirPath);
1577
+ }
1578
+ try {
1579
+ await fsp.mkdir(dirPath, { recursive: true });
1580
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Created directory ${dirPath}${RST}`);
1581
+ logToolCall('make_dir', { path: dirPath }, true, 'ok');
1582
+ return { status: 'ok', path: dirPath };
1583
+ } catch (error) {
1584
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1585
+ logToolCall('make_dir', { path: dirPath }, true, 'error');
1586
+ return { error: error.message };
1587
+ }
1588
+ },
1589
+ },
1590
+ {
1591
+ tool: 'remove_dir',
1592
+ specNames: ['remove_dir'],
1593
+ tags: ['remove_dir'],
1594
+ parseXml: (text) => _inline(text, 'remove_dir', 'remove_dir'),
1595
+ fromParams: (p) => (p.path ? ['remove_dir', p.path] : null),
1596
+ permission: (ctx, args) => ({ actionType: 'file', description: `Remove directory ${args[0]}`, tag: 'remove_dir' }),
1597
+ execute: async (ctx, args) => {
1598
+ const [arg0 = null] = args;
1599
+ const { _log, logToolCall, isPathSafe, _sandboxError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1600
+ const dirPath = arg0;
1601
+ const blocked = permissionManager.readonlyBlock('remove_dir');
1602
+ if (blocked) {
1603
+ logToolCall('remove_dir', { path: dirPath }, false, 'denied');
1604
+ return blocked;
1605
+ }
1606
+ if (!isPathSafe(dirPath)) {
1607
+ logToolCall('remove_dir', { path: dirPath }, false, 'denied');
1608
+ return _sandboxError(dirPath);
1609
+ }
1610
+ try {
1611
+ await fsp.rm(dirPath, { recursive: true, force: true });
1612
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Removed directory ${dirPath}${RST}`);
1613
+ logToolCall('remove_dir', { path: dirPath }, true, 'ok');
1614
+ return { status: 'ok', path: dirPath };
1615
+ } catch (error) {
1616
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1617
+ logToolCall('remove_dir', { path: dirPath }, true, 'error');
1618
+ return { error: error.message };
1619
+ }
1620
+ },
1621
+ },
1622
+ {
1623
+ tool: 'move_file',
1624
+ specNames: ['move_file'],
1625
+ tags: ['move_file'],
1626
+ parseXml: (text) => _matchDual(text, '<move_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/move_file>|\\/>)').map((m) => ['move_file', m[1], m[2]]),
1627
+ fromParams: (p) => (p.src && p.dst ? ['move_file', p.src, p.dst] : null),
1628
+ permission: (ctx, args) => {
1629
+ const { _log, FG_YELLOW, BOLD, RST } = ctx;
1630
+ const src = args[0];
1631
+ const dst = args[1];
1632
+ _log(` ${FG_YELLOW}${BOLD}⚠ Moving: ${src} → ${dst}${RST}`);
1633
+ return { actionType: 'file', description: `Move ${src} to ${dst}`, tag: 'move_file' };
1634
+ },
1635
+ execute: async (ctx, args) => {
1636
+ const [arg0 = null, arg1 = null] = args;
1637
+ const { _log, logToolCall, isPathSafe, isProtectedSecretPath, isProtectedConfigPath, _sandboxError, _secretReadError, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1638
+ const src = arg0;
1639
+ const dst = arg1;
1640
+ const blocked = permissionManager.readonlyBlock('move_file');
1641
+ if (blocked) {
1642
+ logToolCall('move_file', { src, dst }, false, 'denied');
1643
+ return blocked;
1644
+ }
1645
+ if (isProtectedSecretPath(src)) {
1646
+ logToolCall('move_file', { src, dst }, false, 'denied');
1647
+ return _secretReadError(src);
1648
+ }
1649
+ if (isProtectedConfigPath(dst)) {
1650
+ logToolCall('move_file', { src, dst }, false, 'denied');
1651
+ return _protectedConfigWriteError(dst);
1652
+ }
1653
+ if (!isPathSafe(dst)) {
1654
+ logToolCall('move_file', { src, dst }, false, 'denied');
1655
+ return _sandboxError(dst);
1656
+ }
1657
+ try {
1658
+ const dstDir = path.dirname(dst);
1659
+ if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
1660
+ try {
1661
+ await fsp.rename(src, dst);
1662
+ } catch (renameErr) {
1663
+ if (renameErr.code !== 'EXDEV') throw renameErr;
1664
+ await fsp.cp(src, dst, { recursive: true });
1665
+ await fsp.rm(src, { recursive: true, force: true });
1666
+ }
1667
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Moved ${src} → ${dst}${RST}`);
1668
+ logToolCall('move_file', { src, dst }, true, 'ok');
1669
+ return { status: 'ok', src, dst };
1670
+ } catch (error) {
1671
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1672
+ logToolCall('move_file', { src, dst }, true, 'error');
1673
+ return { error: error.message };
1674
+ }
1675
+ },
1676
+ },
1677
+ {
1678
+ tool: 'copy_file',
1679
+ specNames: ['copy_file'],
1680
+ tags: ['copy_file'],
1681
+ parseXml: (text) => _matchDual(text, '<copy_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/copy_file>|\\/>)').map((m) => ['copy_file', m[1], m[2]]),
1682
+ fromParams: (p) => (p.src && p.dst ? ['copy_file', p.src, p.dst] : null),
1683
+ permission: (ctx, args) => ({ actionType: 'file', description: `Copy ${args[0]} to ${args[1]}`, tag: 'copy_file' }),
1684
+ execute: async (ctx, args) => {
1685
+ const [arg0 = null, arg1 = null] = args;
1686
+ const { _log, logToolCall, isPathSafe, isProtectedSecretPath, isProtectedConfigPath, _sandboxError, _secretReadError, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1687
+ const src = arg0;
1688
+ const dst = arg1;
1689
+ const blocked = permissionManager.readonlyBlock('copy_file');
1690
+ if (blocked) {
1691
+ logToolCall('copy_file', { src, dst }, false, 'denied');
1692
+ return blocked;
1693
+ }
1694
+ if (isProtectedSecretPath(src)) {
1695
+ logToolCall('copy_file', { src, dst }, false, 'denied');
1696
+ return _secretReadError(src);
1697
+ }
1698
+ if (isProtectedConfigPath(dst)) {
1699
+ logToolCall('copy_file', { src, dst }, false, 'denied');
1700
+ return _protectedConfigWriteError(dst);
1701
+ }
1702
+ if (!isPathSafe(dst)) {
1703
+ logToolCall('copy_file', { src, dst }, false, 'denied');
1704
+ return _sandboxError(dst);
1705
+ }
1706
+ try {
1707
+ const dstDir = path.dirname(dst);
1708
+ if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
1709
+ await fsp.cp(src, dst, { recursive: true });
1710
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Copied ${src} → ${dst}${RST}`);
1711
+ logToolCall('copy_file', { src, dst }, true, 'ok');
1712
+ return { status: 'ok', src, dst };
1713
+ } catch (error) {
1714
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1715
+ logToolCall('copy_file', { src, dst }, true, 'error');
1716
+ return { error: error.message };
1717
+ }
1718
+ },
1719
+ },
1720
+ {
1721
+ tool: 'edit_file',
1722
+ specNames: ['edit_file'],
1723
+ tags: ['edit_file'],
1724
+ // Optional `end_line` (W.5 trailing-arg discipline: absent → the 4-element
1725
+ // single-line tuple, byte-for-byte the prior behavior). When present, lines
1726
+ // `line..end_line` are replaced wholesale by the content — a regex-free way
1727
+ // to swap a block, pairing with read_file's start_line/end_line + line
1728
+ // numbers (read a numbered slice, then replace that exact range).
1729
+ parseXml: (text) => _matchDual(text, '<edit_file\\s+path=Q([^Q]+)Q\\s+line=Q(\\d+)Q(?:\\s+end_line=Q(\\d+)Q)?>([\\s\\S]*?)<\\/edit_file>').map((m) => {
1730
+ const call = ['edit_file', m[1], parseInt(m[2], 10), m[4]];
1731
+ if (m[3] != null) call.push(parseInt(m[3], 10));
1732
+ return call;
1733
+ }),
1734
+ fromParams: (p) => {
1735
+ if (!(p.path && p.line !== undefined)) return null;
1736
+ const call = ['edit_file', p.path, parseInt(p.line, 10), p.content != null ? p.content : ''];
1737
+ if (p.end_line != null) call.push(parseInt(p.end_line, 10));
1738
+ return call;
1739
+ },
1740
+ permission: (ctx, args) => ({ actionType: 'file', description: args[4] != null ? `Edit lines ${args[1]}-${args[4]} in ${args[0]}` : `Edit line ${args[1]} in ${args[0]}`, tag: 'edit_file' }),
1741
+ execute: async (ctx, args) => {
1742
+ const [arg0 = null, arg1 = null, arg2 = null, arg3] = args;
1743
+ const { _log, logToolCall, isProtectedConfigPath, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1744
+ const filePath = arg0;
1745
+ const lineNum = arg1;
1746
+ const newContent = arg2;
1747
+ const endLine = arg3; // undefined → single-line edit (unchanged)
1748
+ const blocked = permissionManager.readonlyBlock('edit_file');
1749
+ if (blocked) {
1750
+ logToolCall('edit_file', { path: filePath, line: lineNum }, false, 'denied');
1751
+ return blocked;
1752
+ }
1753
+ if (isProtectedConfigPath(filePath)) {
1754
+ logToolCall('edit_file', { path: filePath, line: lineNum }, false, 'denied');
1755
+ return _protectedConfigWriteError(filePath);
1756
+ }
1757
+ try {
1758
+ const data = await fsp.readFile(filePath, 'utf8');
1759
+ const lines = data.split('\n');
1760
+ if (lineNum < 1 || lineNum > lines.length) {
1761
+ logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
1762
+ return { error: `Line ${lineNum} out of range (file has ${lines.length} lines)` };
1763
+ }
1764
+ if (endLine == null) {
1765
+ // Single-line replace — exactly the prior behavior (no regression).
1766
+ lines[lineNum - 1] = newContent;
1767
+ const after = lines.join('\n');
1768
+ await fsp.writeFile(filePath, after);
1769
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Edited line ${lineNum} in ${filePath}${RST}`);
1770
+ logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'ok');
1771
+ // _diffBefore/_diffAfter feed the execution-time diff renderer (onToolEnd).
1772
+ return { status: 'ok', path: filePath, line: lineNum, _diffBefore: data, _diffAfter: after };
1773
+ }
1774
+ // Line-range replace: swap lines lineNum..endLine for newContent (which
1775
+ // may itself be multi-line). No regex involved → no ReDoS surface.
1776
+ if (endLine < lineNum || endLine > lines.length) {
1777
+ logToolCall('edit_file', { path: filePath, line: lineNum, end_line: endLine }, true, 'error');
1778
+ return { error: `Line range ${lineNum}-${endLine} out of range (file has ${lines.length} lines)` };
1779
+ }
1780
+ lines.splice(lineNum - 1, endLine - lineNum + 1, ...newContent.split('\n'));
1781
+ const afterRange = lines.join('\n');
1782
+ await fsp.writeFile(filePath, afterRange);
1783
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Edited lines ${lineNum}-${endLine} in ${filePath}${RST}`);
1784
+ logToolCall('edit_file', { path: filePath, line: lineNum, end_line: endLine }, true, 'ok');
1785
+ return { status: 'ok', path: filePath, line: lineNum, end_line: endLine, _diffBefore: data, _diffAfter: afterRange };
1786
+ } catch (error) {
1787
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1788
+ logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
1789
+ return { error: error.message };
1790
+ }
1791
+ },
1792
+ },
1793
+ {
1794
+ tool: 'search_in_file',
1795
+ specNames: ['search_in_file'],
1796
+ tags: ['search_in_file'],
1797
+ parseXml: (text) => _matchDual(text, '<search_in_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/search_in_file>').map((m) => ['search_in_file', m[1], m[2].trim()]),
1798
+ fromParams: (p) => (p.path && p.pattern ? ['search_in_file', p.path, p.pattern] : null),
1799
+ permission: () => null,
1800
+ execute: async (ctx, args) => {
1801
+ const [arg0 = null, arg1 = null] = args;
1802
+ const { _log, logToolCall, isProtectedSecretPath, _secretReadError, _checkRegexSafety, _isLiteralPattern, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1803
+ const filePath = arg0;
1804
+ const pattern = arg1;
1805
+ if (isProtectedSecretPath(filePath)) {
1806
+ logToolCall('search_in_file', { path: filePath, pattern }, false, 'denied');
1807
+ return _secretReadError(filePath);
1808
+ }
1809
+ try {
1810
+ const data = await fsp.readFile(filePath, 'utf8');
1811
+ const guardErr = _checkRegexSafety(pattern, data);
1812
+ if (guardErr) {
1813
+ logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
1814
+ return guardErr;
1815
+ }
1816
+ // A metacharacter-free pattern is matched literally (substring) — same
1817
+ // line results as a regex for plain text, but unbounded by length so a
1818
+ // long pasted block can be located. Genuine regexes still compile.
1819
+ const isLiteral = _isLiteralPattern(pattern);
1820
+ const test = isLiteral ? (content) => content.includes(pattern) : ((regex) => (content) => regex.test(content))(new RegExp(pattern));
1821
+ const matches = data.split('\n')
1822
+ .map((content, idx) => test(content) ? { line: idx + 1, content } : null)
1823
+ .filter(Boolean);
1824
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${matches.length} match(es) in ${filePath}${RST}`);
1825
+ logToolCall('search_in_file', { path: filePath, pattern }, true, 'ok');
1826
+ return { matches, path: filePath };
1827
+ } catch (error) {
1828
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1829
+ logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
1830
+ return { error: error.message };
1831
+ }
1832
+ },
1833
+ },
1834
+ {
1835
+ tool: 'replace_in_file',
1836
+ specNames: ['replace_in_file'],
1837
+ tags: ['replace_in_file'],
1838
+ // LITERAL by default (Claude Code Edit model): the search text is matched
1839
+ // VERBATIM, byte-for-byte, no matter what regex-special chars it contains —
1840
+ // so a copied code block with ( ) { } . [ ] just works. Regex is opt-in via
1841
+ // `regex="true"`. Matching must be UNIQUE: 0 matches → error (no-op masked as
1842
+ // success was the silent-corruption trap), >1 matches → error unless
1843
+ // `replace_all="true"`. Inline body = regex flags (only meaningful with
1844
+ // regex="true"). Attr-based parse (like read_file) so the optional flags
1845
+ // appear in any order.
1846
+ parseXml: (text) => {
1847
+ const out = [];
1848
+ const re = /<replace_in_file\b([^>]*?)>([\s\S]*?)<\/replace_in_file>/g;
1849
+ for (const m of text.matchAll(re)) {
1850
+ const attrStr = m[1] || '';
1851
+ const body = m[2] != null ? m[2] : '';
1852
+ const attr = (k) => {
1853
+ const mm = attrStr.match(new RegExp(`${k}="([^"]*)"`)) || attrStr.match(new RegExp(`${k}='([^']*)'`));
1854
+ return mm ? mm[1] : null;
1855
+ };
1856
+ const p = attr('path');
1857
+ const search = attr('search');
1858
+ if (p == null || search == null) continue;
1859
+ const replace = attr('replace') != null ? attr('replace') : '';
1860
+ out.push(['replace_in_file', p, search, replace, body.trim(), attr('regex') === 'true', attr('replace_all') === 'true']);
1861
+ }
1862
+ return out;
1863
+ },
1864
+ fromParams: (p) => (p.path && p.search !== undefined ? ['replace_in_file', p.path, p.search, p.replace != null ? p.replace : '', p.flags || '', p.regex === true || p.regex === 'true', p.replace_all === true || p.replace_all === 'true'] : null),
1865
+ permission: (ctx, args) => ({ actionType: 'file', description: `Replace in ${args[0]}`, tag: 'replace_in_file' }),
1866
+ execute: async (ctx, args) => {
1867
+ const [arg0 = null, arg1 = null, arg2 = null, arg3 = null, arg4 = false, arg5 = false] = args;
1868
+ const { _log, logToolCall, isProtectedConfigPath, _protectedConfigWriteError, _checkRegexSafety, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1869
+ const filePath = arg0;
1870
+ const searchStr = arg1;
1871
+ const replaceStr = arg2;
1872
+ const flags = arg3 || '';
1873
+ const useRegex = arg4 === true;
1874
+ const replaceAll = arg5 === true;
1875
+ const blocked = permissionManager.readonlyBlock('replace_in_file');
1876
+ if (blocked) {
1877
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, false, 'denied');
1878
+ return blocked;
1879
+ }
1880
+ if (isProtectedConfigPath(filePath)) {
1881
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, false, 'denied');
1882
+ return _protectedConfigWriteError(filePath);
1883
+ }
1884
+ try {
1885
+ const data = await fsp.readFile(filePath, 'utf8');
1886
+ if (searchStr == null || searchStr === '') {
1887
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
1888
+ return { error: 'replace_in_file: search string is empty — nothing to match.' };
1889
+ }
1890
+ let newData;
1891
+ let count;
1892
+ let remaining; // post-replace occurrences of the search string in newData
1893
+ if (!useRegex) {
1894
+ // LITERAL path (default): O(dataLen) substring match — no regex
1895
+ // compiled, so a long block is matched verbatim with no ReDoS surface
1896
+ // and no length bound. The replacement is raw text (no $1/$& handling).
1897
+ const parts = data.split(searchStr);
1898
+ count = parts.length - 1;
1899
+ // ---- Uniqueness / occurrence guard (BEFORE writing) ----
1900
+ if (count === 0) {
1901
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
1902
+ return { error: `replace_in_file: search string not found in ${filePath} — file unchanged. Verify exact text including whitespace/indentation.` };
1903
+ }
1904
+ if (count > 1 && !replaceAll) {
1905
+ const lines = _literalOccurrenceLines(data, searchStr);
1906
+ const at = lines.length ? ` (matches start at line${lines.length > 1 ? 's' : ''} ${lines.join(', ')}${count > lines.length ? ', …' : ''})` : '';
1907
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
1908
+ return { error: `replace_in_file: found ${count} matches but replace_all is not set${at}. Add surrounding context to uniquely identify ONE occurrence, or set replace_all:true to replace all ${count}.` };
1909
+ }
1910
+ if (replaceAll) {
1911
+ newData = parts.join(replaceStr);
1912
+ } else {
1913
+ // count === 1: replace the single occurrence.
1914
+ const idx = data.indexOf(searchStr);
1915
+ newData = data.slice(0, idx) + replaceStr + data.slice(idx + searchStr.length);
1916
+ count = 1;
1917
+ }
1918
+ remaining = newData.split(searchStr).length - 1;
1919
+ } else {
1920
+ // REGEX path (opt-in via regex:true): ReDoS guard applies. `g` flag OR
1921
+ // replace_all replaces all; otherwise the match must be unique.
1922
+ const guardErr = _checkRegexSafety(searchStr, data, false);
1923
+ if (guardErr) {
1924
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
1925
+ return guardErr;
1926
+ }
1927
+ const safeFlags = flags.replace(/[^gimsuy]/g, '');
1928
+ const globalFlags = safeFlags.replace('g', '') + 'g';
1929
+ const isGlobal = safeFlags.includes('g') || replaceAll;
1930
+ count = (data.match(new RegExp(searchStr, globalFlags)) || []).length;
1931
+ // ---- Uniqueness / occurrence guard (BEFORE writing) ----
1932
+ if (count === 0) {
1933
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
1934
+ return { error: `replace_in_file: pattern not found in ${filePath} — file unchanged. Verify the regex (or drop regex:true to match literally).` };
1935
+ }
1936
+ if (count > 1 && !isGlobal) {
1937
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
1938
+ return { error: `replace_in_file: pattern matched ${count} times but neither the "g" flag nor replace_all is set. Refine the pattern to match ONE occurrence, or set replace_all:true to replace all ${count}.` };
1939
+ }
1940
+ const regex = new RegExp(searchStr, (isGlobal ? globalFlags : safeFlags.replace('g', '')) || undefined);
1941
+ newData = data.replace(regex, replaceStr);
1942
+ count = isGlobal ? count : 1;
1943
+ remaining = (newData.match(new RegExp(searchStr, globalFlags)) || []).length;
1944
+ }
1945
+ await fsp.writeFile(filePath, newData);
1946
+ const result = { status: 'ok', path: filePath, count, _diffBefore: data, _diffAfter: newData };
1947
+ // POST-REPLACE VERIFICATION: if the search string still appears (e.g. the
1948
+ // replacement contains it, or matches overlapped), surface a warning
1949
+ // instead of reporting clean success.
1950
+ if (remaining > 0) {
1951
+ result.warning = `replace_in_file: replaced ${count} occurrence(s), but the search string still appears ${remaining} time(s) in ${filePath} (the replacement may contain the search text, or matches overlapped).`;
1952
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Replaced ${count} occurrence(s) in ${filePath} (${remaining} still present)${RST}`);
1953
+ } else {
1954
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Replaced ${count} occurrence(s) in ${filePath}${RST}`);
1955
+ }
1956
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'ok');
1957
+ // _diffBefore/_diffAfter feed the execution-time diff renderer (onToolEnd).
1958
+ return result;
1959
+ } catch (error) {
1960
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1961
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
1962
+ return { error: error.message };
1963
+ }
1964
+ },
1965
+ },
1966
+ {
1967
+ tool: 'download',
1968
+ specNames: ['download'],
1969
+ tags: ['download'],
1970
+ // Optional `path` destination (Pre-Task 4.0b). Both the attribute form
1971
+ // (`<download path="dest">URL</download>`) and the plain form
1972
+ // (`<download>URL</download>`, defaulting to the CWD) are accepted.
1973
+ parseXml: (text) => {
1974
+ const out = [];
1975
+ for (const m of _matchDual(text, '<download\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/download>')) {
1976
+ out.push(['download', _unwrapInnerTag(m[2]).trim(), m[1]]);
1977
+ }
1978
+ for (const m of text.matchAll(/<download>([\s\S]*?)<\/download>/g)) {
1979
+ out.push(['download', _unwrapInnerTag(m[1]).trim()]);
1980
+ }
1981
+ return out;
1982
+ },
1983
+ fromParams: (p) => {
1984
+ if (!p.url) return null;
1985
+ const dest = p.path || p.dest;
1986
+ return dest ? ['download', p.url, dest] : ['download', p.url];
1987
+ },
1988
+ permission: (ctx, args) => ({ actionType: 'net', description: `Download ${args[0]}`, tag: 'download' }),
1989
+ execute: async (ctx, args, options) => {
1990
+ const signal = (options && options.signal) || null;
1991
+ const [arg0 = null, arg1 = null] = args;
1992
+ const {
1993
+ _log, logToolCall, _dryRun, _skippedOps,
1994
+ isPathSafe, _sandboxError, isProtectedSecretPath, _secretReadError,
1995
+ isProtectedConfigPath, _protectedConfigWriteError,
1996
+ permissionManager, getConfig,
1997
+ FG_GREEN, FG_GRAY, FG_RED, RST,
1998
+ } = ctx;
1999
+ const url = arg0;
2000
+ const dest = arg1 || null;
2001
+ if (_dryRun) {
2002
+ _skippedOps.push({ category: 'net', symbol: '↓', desc: `download ${url}` });
2003
+ logToolCall('download', { url }, false, 'dry-run');
2004
+ return { status: 'dry-run', message: 'dry-run: network call skipped' };
2005
+ }
2006
+ // Validate/normalize the URL BEFORE building the request or resolving a
2007
+ // destination — a malformed URL (or non-http(s) scheme, empty, non-string)
2008
+ // is a clean tool error, never an uncaught throw out of the executor.
2009
+ const validatedDlUrl = _validateFetchUrl(url);
2010
+ if (validatedDlUrl.error) {
2011
+ _log(` ${FG_RED}✗ ${validatedDlUrl.error}${RST}`);
2012
+ logToolCall('download', { url }, true, 'error');
2013
+ return validatedDlUrl;
2014
+ }
2015
+ const normalizedDlUrl = validatedDlUrl.url;
2016
+ // Resolve the destination: an explicit path (relative → CWD, or absolute),
2017
+ // otherwise the URL basename into the CWD (historical default).
2018
+ let outPath;
2019
+ if (dest) {
2020
+ outPath = path.resolve(dest);
2021
+ } else {
2022
+ let fileName;
2023
+ try {
2024
+ fileName = path.basename(new URL(normalizedDlUrl).pathname) || 'download';
2025
+ } catch {
2026
+ fileName = 'download';
2027
+ }
2028
+ outPath = path.join(process.cwd(), fileName);
2029
+ }
2030
+ // Confinement (Pre-Task 4.0b): download is a write path and must honor the
2031
+ // same guards as every other mutating file tool — --readonly, the
2032
+ // secret-file guard, and isPathSafe (CWD confinement / --allow-anywhere).
2033
+ const blocked = permissionManager.readonlyBlock('download');
2034
+ if (blocked) {
2035
+ logToolCall('download', { url, path: outPath }, false, 'denied');
2036
+ return blocked;
2037
+ }
2038
+ if (isProtectedSecretPath(outPath)) {
2039
+ logToolCall('download', { url, path: outPath }, false, 'denied');
2040
+ return _secretReadError(outPath);
2041
+ }
2042
+ if (isProtectedConfigPath(outPath)) {
2043
+ logToolCall('download', { url, path: outPath }, false, 'denied');
2044
+ return _protectedConfigWriteError(outPath);
2045
+ }
2046
+ if (!isPathSafe(outPath)) {
2047
+ logToolCall('download', { url, path: outPath }, false, 'denied');
2048
+ return _sandboxError(outPath);
2049
+ }
2050
+ const cfg = getConfig ? getConfig() : {};
2051
+ const maxBytes = Math.max(1024, cfg.download_max_bytes || 104857600);
2052
+ const userAgent = _resolveUserAgent(cfg);
2053
+ const startedAt = Date.now();
2054
+ return new Promise((resolve) => {
2055
+ let abortedByUser = false;
2056
+ let cappedExceeded = false;
2057
+ let onAbort = null;
2058
+ let activeReq = null;
2059
+ let activeFile = null;
2060
+ const detachAbort = () => {
2061
+ if (onAbort && signal) {
2062
+ try { signal.removeEventListener('abort', onAbort); } catch {}
2063
+ onAbort = null;
2064
+ }
2065
+ };
2066
+ const finishAborted = () => {
2067
+ fs.unlink(outPath, () => {});
2068
+ logToolCall('download', { url }, true, 'aborted');
2069
+ resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
2070
+ };
2071
+ if (signal) {
2072
+ if (signal.aborted) {
2073
+ abortedByUser = true;
2074
+ finishAborted();
2075
+ return;
2076
+ }
2077
+ onAbort = () => {
2078
+ abortedByUser = true;
2079
+ try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
2080
+ try { if (activeFile) activeFile.destroy(); } catch {}
2081
+ };
2082
+ signal.addEventListener('abort', onAbort, { once: true });
2083
+ }
2084
+
2085
+ function doDownload(target, redirectsLeft) {
2086
+ const proto = target.startsWith('https') ? https : http;
2087
+ let req;
2088
+ try {
2089
+ req = proto.get(target, { headers: { 'User-Agent': userAgent } }, (res) => {
2090
+ if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
2091
+ res.resume();
2092
+ // A redirect Location may be relative or malformed — resolve +
2093
+ // validate it against the current target rather than throwing.
2094
+ const nextUrl = _validateFetchUrl(res.headers.location, target);
2095
+ if (nextUrl.error) {
2096
+ detachAbort();
2097
+ _log(` ${FG_RED}✗ ${nextUrl.error}${RST}`);
2098
+ logToolCall('download', { url: target }, true, 'error');
2099
+ return resolve(nextUrl);
2100
+ }
2101
+ return doDownload(nextUrl.url, redirectsLeft - 1);
2102
+ }
2103
+ if (res.statusCode >= 400) {
2104
+ res.resume();
2105
+ const msg = `HTTP ${res.statusCode}`;
2106
+ detachAbort();
2107
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
2108
+ logToolCall('download', { url }, true, 'error');
2109
+ return resolve({ error: msg });
2110
+ }
2111
+ const file = fs.createWriteStream(outPath);
2112
+ activeFile = file;
2113
+ let downloadedBytes = 0;
2114
+ // Manual stream (instead of res.pipe) so we can enforce the byte cap
2115
+ // mid-flight: on exceeding it, abort the request, destroy the file,
2116
+ // remove the partial artifact, and resolve once cleanup completes so
2117
+ // no truncated file is ever left behind.
2118
+ res.on('data', (chunk) => {
2119
+ if (cappedExceeded || abortedByUser) return;
2120
+ downloadedBytes += chunk.length;
2121
+ if (downloadedBytes > maxBytes) {
2122
+ cappedExceeded = true;
2123
+ try { if (activeReq) activeReq.destroy(); } catch {}
2124
+ try { res.destroy(); } catch {}
2125
+ detachAbort();
2126
+ const msg = `Download aborted: exceeded byte cap (${maxBytes} bytes)`;
2127
+ file.destroy();
2128
+ file.once('close', () => {
2129
+ fs.unlink(outPath, () => {
2130
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
2131
+ logToolCall('download', { url, path: outPath }, true, 'error');
2132
+ resolve({ error: msg, capped: true, bytes: downloadedBytes });
2133
+ });
2134
+ });
2135
+ return;
2136
+ }
2137
+ if (!file.write(chunk)) {
2138
+ res.pause();
2139
+ file.once('drain', () => { if (!cappedExceeded && !abortedByUser) res.resume(); });
2140
+ }
2141
+ });
2142
+ res.on('end', () => {
2143
+ if (cappedExceeded || abortedByUser) return;
2144
+ file.end();
2145
+ });
2146
+ res.on('error', (err) => {
2147
+ if (cappedExceeded) return;
2148
+ if (abortedByUser) { detachAbort(); finishAborted(); return; }
2149
+ file.destroy();
2150
+ fs.unlink(outPath, () => {});
2151
+ detachAbort();
2152
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
2153
+ logToolCall('download', { url }, true, 'error');
2154
+ resolve({ error: err.message });
2155
+ });
2156
+ file.on('finish', () => {
2157
+ if (cappedExceeded || abortedByUser) return;
2158
+ file.close();
2159
+ detachAbort();
2160
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Downloaded to ${outPath}${RST}`);
2161
+ logToolCall('download', { url }, true, 'ok');
2162
+ resolve({ status: 'ok', path: outPath, bytes: downloadedBytes });
2163
+ });
2164
+ file.on('error', (err) => {
2165
+ if (cappedExceeded) return;
2166
+ if (abortedByUser) {
2167
+ detachAbort();
2168
+ finishAborted();
2169
+ return;
2170
+ }
2171
+ fs.unlink(outPath, () => {});
2172
+ detachAbort();
2173
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
2174
+ logToolCall('download', { url }, true, 'error');
2175
+ resolve({ error: err.message });
2176
+ });
2177
+ });
2178
+ } catch (err) {
2179
+ // Defense-in-depth: the URL is validated before we get here, but any
2180
+ // synchronous throw from proto.get must still become a tool error.
2181
+ detachAbort();
2182
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
2183
+ logToolCall('download', { url: target }, true, 'error');
2184
+ resolve({ error: `Invalid URL: ${err.message}`, error_code: err.code || 'ERR_INVALID_URL' });
2185
+ return;
2186
+ }
2187
+ activeReq = req;
2188
+ req.on('error', (err) => {
2189
+ if (cappedExceeded) return;
2190
+ if (abortedByUser) {
2191
+ detachAbort();
2192
+ finishAborted();
2193
+ return;
2194
+ }
2195
+ fs.unlink(outPath, () => {});
2196
+ detachAbort();
2197
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
2198
+ logToolCall('download', { url }, true, 'error');
2199
+ resolve({ error: err.message });
2200
+ });
2201
+ req.setTimeout(120000, () => {
2202
+ req.destroy();
2203
+ fs.unlink(outPath, () => {});
2204
+ detachAbort();
2205
+ logToolCall('download', { url }, true, 'error');
2206
+ resolve({ error: 'Request timeout' });
2207
+ });
2208
+ }
2209
+ doDownload(normalizedDlUrl, 5);
2210
+ });
2211
+ },
2212
+ },
2213
+ {
2214
+ tool: 'upload',
2215
+ specNames: ['upload'],
2216
+ tags: ['upload'],
2217
+ // QUIRK: upload content (base64) is captured raw (not trimmed), like write.
2218
+ parseXml: (text) => _matchDual(text, '<upload\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/upload>').map((m) => ['upload', m[1], m[2]]),
2219
+ fromParams: (p) => (p.path ? ['upload', p.path, p.content != null ? p.content : ''] : null),
2220
+ permission: (ctx, args) => ({ actionType: 'file', description: `Upload to ${args[0]}`, tag: 'upload' }),
2221
+ execute: async (ctx, args) => {
2222
+ const [arg0 = null, arg1 = null] = args;
2223
+ const { _log, logToolCall, isPathSafe, isProtectedConfigPath, _sandboxError, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2224
+ const filePath = arg0;
2225
+ const encodedContent = arg1 || '';
2226
+ const blocked = permissionManager.readonlyBlock('upload');
2227
+ if (blocked) {
2228
+ logToolCall('upload', { path: filePath }, false, 'denied');
2229
+ return blocked;
2230
+ }
2231
+ if (isProtectedConfigPath(filePath)) {
2232
+ logToolCall('upload', { path: filePath }, false, 'denied');
2233
+ return _protectedConfigWriteError(filePath);
2234
+ }
2235
+ if (!isPathSafe(filePath)) {
2236
+ logToolCall('upload', { path: filePath }, false, 'denied');
2237
+ return _sandboxError(filePath);
2238
+ }
2239
+ try {
2240
+ const dir = path.dirname(filePath);
2241
+ if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
2242
+ const buffer = Buffer.from(encodedContent.trim(), 'base64');
2243
+ await fsp.writeFile(filePath, buffer);
2244
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Uploaded ${buffer.length} bytes to ${filePath}${RST}`);
2245
+ logToolCall('upload', { path: filePath }, true, 'ok');
2246
+ return { status: 'ok', path: filePath, bytes: buffer.length };
2247
+ } catch (error) {
2248
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
2249
+ logToolCall('upload', { path: filePath }, true, 'error');
2250
+ return { error: error.message };
2251
+ }
2252
+ },
2253
+ },
2254
+ {
2255
+ tool: 'file_stat',
2256
+ specNames: ['file_stat'],
2257
+ tags: ['file_stat'],
2258
+ parseXml: (text) => _inline(text, 'file_stat', 'file_stat'),
2259
+ fromParams: (p) => (p.path ? ['file_stat', p.path] : null),
2260
+ permission: () => null,
2261
+ execute: async (ctx, args) => {
2262
+ const [arg0 = null] = args;
2263
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2264
+ const filePath = arg0;
2265
+ try {
2266
+ const stat = await fsp.stat(filePath);
2267
+ const type = stat.isDirectory() ? 'directory' : stat.isSymbolicLink() ? 'symlink' : 'file';
2268
+ const size_kb = (stat.size / 1024).toFixed(2);
2269
+ const mode = '0o' + stat.mode.toString(8);
2270
+ const mtime = stat.mtime.toISOString();
2271
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stat ${filePath}${RST}`);
2272
+ logToolCall('file_stat', { path: filePath }, true, 'ok');
2273
+ return { path: filePath, size_kb, mtime, type, mode };
2274
+ } catch (error) {
2275
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
2276
+ logToolCall('file_stat', { path: filePath }, true, 'error');
2277
+ return { error: error.message };
2278
+ }
2279
+ },
2280
+ },
2281
+ {
2282
+ tool: 'http_get',
2283
+ specNames: ['http_get'],
2284
+ tags: ['http_get'],
2285
+ parseXml: (text) => {
2286
+ const out = [];
2287
+ for (const m of text.matchAll(/<http_get\b([^>]*?)(?:><\/http_get>|\/>)/g)) {
2288
+ const attrStr = m[1];
2289
+ const urlMatch = attrStr.match(/url="([^"]+)"/) || attrStr.match(/url='([^']+)'/);
2290
+ if (urlMatch) out.push(['http_get', urlMatch[1], _httpGetOpts(attrStr)]);
2291
+ }
2292
+ for (const m of text.matchAll(/<http_get>([\s\S]*?)<\/http_get>/g)) {
2293
+ const inner = m[1].trim();
2294
+ if (!inner) continue;
2295
+ const urlAttr = inner.match(/url="([^"]+)"/) || inner.match(/url='([^']+)'/);
2296
+ out.push(['http_get', urlAttr ? urlAttr[1] : _unwrapInnerTag(inner).trim(), _httpGetOpts(inner)]);
2297
+ }
2298
+ return out;
2299
+ },
2300
+ fromParams: (p) => (p.url ? ['http_get', p.url, _httpGetOptsFromParams(p)] : null),
2301
+ permission: (ctx, args) => ({ actionType: 'net', description: `HTTP GET ${args[0]}`, tag: 'http_get' }),
2302
+ execute: async (ctx, args, options) => {
2303
+ const signal = (options && options.signal) || null;
2304
+ const [arg0 = null, callOpts = {}] = args;
2305
+ const { _log, logToolCall, _dryRun, _skippedOps, getConfig, webChat, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2306
+ const url = arg0;
2307
+ if (_dryRun) {
2308
+ _skippedOps.push({ category: 'net', symbol: '↓', desc: `GET ${url}` });
2309
+ logToolCall('http_get', { url }, false, 'dry-run');
2310
+ return { status: 'dry-run', message: 'dry-run: network call skipped' };
2311
+ }
2312
+ // Validate/normalize the URL BEFORE constructing any request. A malformed
2313
+ // URL (or a non-http(s) scheme, empty/whitespace, non-string) is a clean
2314
+ // tool error the agent can recover from — never an uncaught throw out of
2315
+ // the executor. Same shape as the request-error path below.
2316
+ const validatedUrl = _validateFetchUrl(url);
2317
+ if (validatedUrl.error) {
2318
+ _log(` ${FG_RED}✗ ${validatedUrl.error}${RST}`);
2319
+ logToolCall('http_get', { url }, true, 'error');
2320
+ return validatedUrl;
2321
+ }
2322
+ const normalizedUrl = validatedUrl.url;
2323
+ const httpCfg = getConfig ? getConfig() : {};
2324
+ const reqTimeoutMs = Math.max(15000, httpCfg.request_timeout_ms || 15000);
2325
+ // Byte cap is now ONLY a transfer/disk guard — the context-protection
2326
+ // mechanism is the post-extraction TOKEN budget (web.max_content_tokens).
2327
+ const maxBytes = Math.max(1024, httpCfg.http_fetch_max_bytes || 262144);
2328
+ const userAgent = _resolveUserAgent(httpCfg);
2329
+ const webCfg = (httpCfg.web && typeof httpCfg.web === 'object') ? httpCfg.web : {};
2330
+ const maxContentTokens = Number.isFinite(webCfg.max_content_tokens) && webCfg.max_content_tokens > 0
2331
+ ? webCfg.max_content_tokens : 6000;
2332
+ const summaryModel = typeof webCfg.summary_model === 'string' && webCfg.summary_model.trim()
2333
+ ? webCfg.summary_model.trim() : undefined;
2334
+ // Resolve the web-fetch mode (Task W.1b). Precedence: an explicit per-call
2335
+ // `mode` (the canonical enum the parser emits) beats the deprecated legacy
2336
+ // booleans (summarize/raw — which may still arrive directly on callOpts from
2337
+ // older callers), which beat the global config default (web.summarize mapped
2338
+ // to summarized/extracted). Summary needs an injected LLM call (webChat);
2339
+ // without one (headless/oneshot without an api client) the summarized branch
2340
+ // degrades to extracted Markdown, never the raw page.
2341
+ const mode = (callOpts && WEB_FETCH_MODES.includes(callOpts.mode) && callOpts.mode)
2342
+ || _legacyBoolsToMode(
2343
+ typeof (callOpts && callOpts.summarize) === 'boolean' ? callOpts.summarize : undefined,
2344
+ typeof (callOpts && callOpts.raw) === 'boolean' ? callOpts.raw : undefined,
2345
+ )
2346
+ || (webCfg.summarize !== false ? 'summarized' : 'extracted');
2347
+ const intent = callOpts && typeof callOpts.intent === 'string' ? callOpts.intent : '';
2348
+ const startedAt = Date.now();
2349
+ return new Promise((resolve) => {
2350
+ let abortedByUser = false;
2351
+ let onAbort = null;
2352
+ let activeReq = null;
2353
+ const detachAbort = () => {
2354
+ if (onAbort && signal) {
2355
+ try { signal.removeEventListener('abort', onAbort); } catch {}
2356
+ onAbort = null;
2357
+ }
2358
+ };
2359
+ const finishAborted = () => {
2360
+ logToolCall('http_get', { url }, true, 'aborted');
2361
+ resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
2362
+ };
2363
+ if (signal) {
2364
+ if (signal.aborted) {
2365
+ abortedByUser = true;
2366
+ finishAborted();
2367
+ return;
2368
+ }
2369
+ onAbort = () => {
2370
+ abortedByUser = true;
2371
+ try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
2372
+ };
2373
+ signal.addEventListener('abort', onAbort, { once: true });
2374
+ }
2375
+
2376
+ function doGet(target, redirectsLeft) {
2377
+ const proto = target.startsWith('https') ? https : http;
2378
+ let req;
2379
+ try {
2380
+ req = proto.get(target, { headers: { 'User-Agent': userAgent } }, (res) => {
2381
+ if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
2382
+ res.resume();
2383
+ // A redirect Location may be relative or malformed — resolve it
2384
+ // against the current target and validate, so a bad redirect is a
2385
+ // clean tool error rather than a synchronous throw in this callback.
2386
+ const nextUrl = _validateFetchUrl(res.headers.location, target);
2387
+ if (nextUrl.error) {
2388
+ detachAbort();
2389
+ _log(` ${FG_RED}✗ ${nextUrl.error}${RST}`);
2390
+ logToolCall('http_get', { url: target }, true, 'error');
2391
+ return resolve(nextUrl);
2392
+ }
2393
+ return doGet(nextUrl.url, redirectsLeft - 1);
2394
+ }
2395
+ const bufs = [];
2396
+ let totalBytes = 0;
2397
+ let capped = false;
2398
+ res.on('data', (chunk) => {
2399
+ totalBytes += chunk.length;
2400
+ if (!capped) {
2401
+ if (totalBytes <= maxBytes) {
2402
+ bufs.push(chunk);
2403
+ } else {
2404
+ const keep = maxBytes - (totalBytes - chunk.length);
2405
+ if (keep > 0) bufs.push(chunk.slice(0, keep));
2406
+ capped = true;
2407
+ }
2408
+ }
2409
+ });
2410
+ res.on('end', () => {
2411
+ if (abortedByUser) return;
2412
+ detachAbort();
2413
+ const kept = Buffer.concat(bufs);
2414
+ const keptBytes = kept.length;
2415
+ const rawBody = kept.toString('utf8');
2416
+ const contentType = res.headers && res.headers['content-type'];
2417
+ const statusCode = res.statusCode;
2418
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}HTTP GET ${target} (${statusCode}, ${totalBytes} bytes${capped ? `, transfer-capped to ${keptBytes}` : ''})${RST}`);
2419
+ logToolCall('http_get', { url: target }, true, statusCode < 400 ? 'ok' : 'error');
2420
+ // Stage 1+2+3: extract main content → Markdown → (optional) summary.
2421
+ // The RAW page never enters the main context — only the processed
2422
+ // result does. Fully contained: any pipeline error degrades to the
2423
+ // capped extracted Markdown (and as a last resort the crude-stripped
2424
+ // text), NEVER the raw HTML.
2425
+ (async () => {
2426
+ let result;
2427
+ try {
2428
+ result = await processWebContent({
2429
+ rawBody, contentType, url: target, statusCode,
2430
+ totalBytes, transferCapped: capped,
2431
+ mode, intent, summaryModel, maxContentTokens,
2432
+ webChat, signal,
2433
+ });
2434
+ } catch (err) {
2435
+ // Defensive: extraction itself should not throw, but if it does,
2436
+ // fall back to a crude tag-strip rather than dumping raw HTML.
2437
+ const { stripTagsCrude } = require('./web-extract');
2438
+ const safe = capToTokens(stripTagsCrude(rawBody), maxContentTokens, defaultEstimate);
2439
+ result = { status_code: statusCode, body: safe.text, bytes: totalBytes,
2440
+ kind: 'text', extracted: false, summarized: false, processing_error: err.message };
2441
+ }
2442
+ resolve(result);
2443
+ })();
2444
+ });
2445
+ });
2446
+ } catch (err) {
2447
+ // Defense-in-depth: the URL is validated before we get here, but any
2448
+ // synchronous throw from proto.get must still become a tool error,
2449
+ // never escape the executor.
2450
+ detachAbort();
2451
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
2452
+ logToolCall('http_get', { url: target }, true, 'error');
2453
+ resolve({ error: `Invalid URL: ${err.message}`, error_code: err.code || 'ERR_INVALID_URL' });
2454
+ return;
2455
+ }
2456
+ activeReq = req;
2457
+ req.on('error', (err) => {
2458
+ if (abortedByUser) {
2459
+ detachAbort();
2460
+ finishAborted();
2461
+ return;
2462
+ }
2463
+ detachAbort();
2464
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
2465
+ logToolCall('http_get', { url: target }, true, 'error');
2466
+ resolve({ error: err.message, error_code: err.code });
2467
+ });
2468
+ req.setTimeout(reqTimeoutMs, () => {
2469
+ req.destroy();
2470
+ detachAbort();
2471
+ logToolCall('http_get', { url: target }, true, 'error');
2472
+ resolve({ error: 'Request timeout', error_code: 'ETIMEDOUT' });
2473
+ });
2474
+ }
2475
+ doGet(normalizedUrl, 5);
2476
+ });
2477
+ },
2478
+ },
2479
+ {
2480
+ // Web search (Task W.2b). Calls the backend POST /api/search via the
2481
+ // injected ctx.webSearch (api client's dashboardSearch → SearXNG) and
2482
+ // returns a COMPACT { title, url, snippet } list — never page content
2483
+ // (that is http_get's job). The spec steers the model to read the snippets,
2484
+ // pick the relevant result(s), and fetch only those with http_get, instead
2485
+ // of blindly multi-fetching. The backend is on another machine and may be
2486
+ // down/unreachable/erroring — every failure mode is caught and surfaced as a
2487
+ // clean tool error; NOTHING throws out of the executor (the http_get-fix
2488
+ // lesson). Results are untrusted external content, fenced in lib/agent.js.
2489
+ tool: 'web_search',
2490
+ specNames: ['web_search'],
2491
+ tags: ['web_search'],
2492
+ parseXml: (text) => {
2493
+ const out = [];
2494
+ for (const m of text.matchAll(/<web_search\b([^>]*?)(?:><\/web_search>|\/>)/g)) {
2495
+ const attrStr = m[1];
2496
+ const qMatch = attrStr.match(/query="([^"]*)"/) || attrStr.match(/query='([^']*)'/);
2497
+ if (qMatch) out.push(['web_search', qMatch[1], _webSearchOpts(attrStr)]);
2498
+ }
2499
+ for (const m of text.matchAll(/<web_search>([\s\S]*?)<\/web_search>/g)) {
2500
+ const inner = m[1].trim();
2501
+ if (!inner) continue;
2502
+ const qAttr = inner.match(/query="([^"]*)"/) || inner.match(/query='([^']*)'/);
2503
+ out.push(['web_search', qAttr ? qAttr[1] : inner, _webSearchOpts(inner)]);
2504
+ }
2505
+ return out;
2506
+ },
2507
+ fromParams: (p) => (p.query ? ['web_search', String(p.query), _webSearchOptsFromParams(p)] : null),
2508
+ // A network read like http_get — same descriptor shape (net, gated; not a
2509
+ // privileged path). Performs no mutation.
2510
+ permission: (ctx, args) => ({ actionType: 'net', description: `Web search: ${args[0]}`, tag: 'web_search' }),
2511
+ execute: async (ctx, args, options) => {
2512
+ const signal = (options && options.signal) || null;
2513
+ const [arg0 = '', callOpts = {}] = args;
2514
+ const { _log, logToolCall, _dryRun, _skippedOps, webSearch, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2515
+ const query = typeof arg0 === 'string' ? arg0.trim() : '';
2516
+ if (!query) {
2517
+ logToolCall('web_search', { query: arg0 }, true, 'error');
2518
+ return { error: 'web search unavailable: empty query' };
2519
+ }
2520
+ if (_dryRun) {
2521
+ _skippedOps.push({ category: 'net', symbol: '⌕', desc: `search ${query}` });
2522
+ logToolCall('web_search', { query }, false, 'dry-run');
2523
+ return { status: 'dry-run', message: 'dry-run: web search skipped' };
2524
+ }
2525
+ if (typeof webSearch !== 'function') {
2526
+ logToolCall('web_search', { query }, true, 'error');
2527
+ return { error: 'web search unavailable: no backend client configured (available in interactive chat / the SDK with dashboard auth)' };
2528
+ }
2529
+ // Bound count BEFORE the backend call; the backend clamps further but a
2530
+ // huge value should never leave the client. An invalid/zero count is
2531
+ // dropped so the backend default applies.
2532
+ const count = _clampSearchCount(callOpts && callOpts.count);
2533
+ const limit = count || 10;
2534
+ try {
2535
+ const resp = await webSearch(query, count ? { count, signal } : { signal });
2536
+ const raw = resp && Array.isArray(resp.results) ? resp.results : [];
2537
+ const results = raw.slice(0, limit).map((r) => ({
2538
+ title: r && typeof r.title === 'string' ? r.title : '',
2539
+ url: r && typeof r.url === 'string' ? r.url : '',
2540
+ snippet: r && typeof r.snippet === 'string' ? r.snippet : '',
2541
+ }));
2542
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}web search "${query}" (${results.length} result${results.length === 1 ? '' : 's'})${RST}`);
2543
+ logToolCall('web_search', { query }, true, 'ok');
2544
+ return { query, count: results.length, results };
2545
+ } catch (err) {
2546
+ const reason = (err && err.message) ? err.message : String(err || 'unknown error');
2547
+ _log(` ${FG_RED}✗ web search unavailable: ${reason}${RST}`);
2548
+ logToolCall('web_search', { query }, true, 'error');
2549
+ return { error: `web search unavailable: ${reason}` };
2550
+ }
2551
+ },
2552
+ },
2553
+ {
2554
+ tool: 'ask_user',
2555
+ specNames: ['ask_user'],
2556
+ tags: ['ask_user'],
2557
+ parseXml: (text) => _matchDual(text, '<ask_user\\s+question=Q([^Q]+)Q\\s*(?:><\\/ask_user>|\\/>)').map((m) => ['ask_user', m[1]]),
2558
+ fromParams: (p) => (p.question ? ['ask_user', p.question] : null),
2559
+ // No permission descriptor: ask_user has no system side effects — it only
2560
+ // prompts the interactive user, which IS the interaction. A separate "may I
2561
+ // ask you?" gate would be pure friction (a double prompt before the real
2562
+ // question/menu). A null descriptor also makes it available during plan mode
2563
+ // (the plan-mode gate withholds only effectful tools, i.e. non-null
2564
+ // descriptors), so the agent can ask clarifying questions while planning.
2565
+ permission: () => null,
2566
+ execute: async (ctx, args) => {
2567
+ const [arg0 = null] = args;
2568
+ const { _log, logToolCall, _parseAskMenu, permissionManager, writer, FG_YELLOW, FG_GRAY, RST, DIM } = ctx;
2569
+ const question = arg0;
2570
+ // Display-only split: the menu gets ONLY the numbered options; the modal
2571
+ // header gets ONLY the prose prompt (no duplication). The model-facing
2572
+ // result below still carries the FULL original `question` (agent.js builds
2573
+ // "User answered \"<question>\": <answer>" from it). Edge: a question with
2574
+ // no prose before the numbers yields an empty prompt — fall back to the raw
2575
+ // question so the modal never renders an empty header.
2576
+ const { prompt, options } = _parseAskMenu(question);
2577
+ if (options.length >= 2) {
2578
+ const selected = await permissionManager.captureSelect({ prompt: prompt || question, options });
2579
+ logToolCall('ask_user', { question }, true, 'ok');
2580
+ return { question, answer: selected || options[0] };
2581
+ }
2582
+ if (!process.stdout.isTTY || process.stdin.isRaw) {
2583
+ writer.scrollback(`\n ${FG_YELLOW}?${RST} ${question}\n ${DIM}[auto-answering 'y']${RST}`);
2584
+ logToolCall('ask_user', { question }, true, 'ok');
2585
+ return { question, answer: 'y' };
2586
+ }
2587
+ process.stdout.write(`\n ${FG_YELLOW}?${RST} ${question}\n ${FG_GRAY}>${RST} `);
2588
+ const buf = Buffer.alloc(4096);
2589
+ let input = '';
2590
+ while (true) {
2591
+ const n = fs.readSync(0, buf, 0, 1);
2592
+ if (n === 0) break;
2593
+ const ch = buf[0];
2594
+ if (ch === 0x0a) break;
2595
+ if (ch === 0x0d) continue;
2596
+ input += String.fromCharCode(ch);
2597
+ }
2598
+ _log();
2599
+ logToolCall('ask_user', { question }, true, 'ok');
2600
+ return { question, answer: input };
2601
+ },
2602
+ },
2603
+ {
2604
+ tool: 'store_memory',
2605
+ specNames: ['store_memory'],
2606
+ tags: ['store_memory'],
2607
+ // QUIRK: store_memory value is captured raw (not trimmed), like write.
2608
+ parseXml: (text) => _matchDual(text, '<store_memory\\s+key=Q([^Q]+)Q>([\\s\\S]*?)<\\/store_memory>').map((m) => ['store_memory', m[1], m[2]]),
2609
+ fromParams: (p) => (p.key ? ['store_memory', p.key, p.value != null ? p.value : ''] : null),
2610
+ permission: (ctx, args) => ({ actionType: 'memory', description: `Store memory: ${args[0]}`, tag: 'store_memory' }),
2611
+ execute: async (ctx, args) => {
2612
+ const [arg0 = null, arg1 = null] = args;
2613
+ const { _log, logToolCall, MEMORY_PATH, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2614
+ const key = arg0;
2615
+ const value = arg1 || '';
2616
+ try {
2617
+ let mem = {};
2618
+ try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
2619
+ mem[key] = value;
2620
+ await fsp.mkdir(path.dirname(MEMORY_PATH), { recursive: true });
2621
+ await fsp.writeFile(MEMORY_PATH, JSON.stringify(mem, null, 2));
2622
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stored memory: ${key}${RST}`);
2623
+ logToolCall('store_memory', { key }, true, 'ok');
2624
+ return { status: 'ok', key };
2625
+ } catch (error) {
2626
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
2627
+ logToolCall('store_memory', { key }, true, 'error');
2628
+ return { error: error.message };
2629
+ }
2630
+ },
2631
+ },
2632
+ {
2633
+ tool: 'recall_memory',
2634
+ specNames: ['recall_memory'],
2635
+ tags: ['recall_memory'],
2636
+ parseXml: (text) => _matchDual(text, '<recall_memory\\s+key=Q([^Q]+)Q\\s*(?:><\\/recall_memory>|\\/>)').map((m) => ['recall_memory', m[1]]),
2637
+ fromParams: (p) => (p.key ? ['recall_memory', p.key] : null),
2638
+ permission: () => null,
2639
+ execute: async (ctx, args) => {
2640
+ const [arg0 = null] = args;
2641
+ const { _log, logToolCall, MEMORY_PATH, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2642
+ const key = arg0;
2643
+ try {
2644
+ let mem = {};
2645
+ try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
2646
+ const found = key in mem;
2647
+ const value = found ? mem[key] : null;
2648
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Recalled memory: ${key}${RST}`);
2649
+ logToolCall('recall_memory', { key }, true, 'ok');
2650
+ return { key, value, found };
2651
+ } catch (error) {
2652
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
2653
+ logToolCall('recall_memory', { key }, true, 'error');
2654
+ return { error: error.message };
2655
+ }
2656
+ },
2657
+ },
2658
+ {
2659
+ tool: 'list_memories',
2660
+ specNames: ['list_memories'],
2661
+ tags: ['list_memories'],
2662
+ parseXml: (text) => [...text.matchAll(/<list_memories\s*(?:><\/list_memories>|\/>)/g)].map(() => ['list_memories']),
2663
+ fromParams: () => ['list_memories'],
2664
+ permission: () => null,
2665
+ execute: async (ctx) => {
2666
+ const { _log, logToolCall, MEMORY_PATH, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2667
+ try {
2668
+ let mem = {};
2669
+ try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
2670
+ const keys = Object.keys(mem);
2671
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${keys.length} memory key(s)${RST}`);
2672
+ logToolCall('list_memories', {}, true, 'ok');
2673
+ return { keys };
2674
+ } catch (error) {
2675
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
2676
+ logToolCall('list_memories', {}, true, 'error');
2677
+ return { error: error.message };
2678
+ }
2679
+ },
2680
+ },
2681
+ {
2682
+ tool: 'get_env',
2683
+ specNames: ['get_env'],
2684
+ tags: ['get_env'],
2685
+ parseXml: (text) => _inline(text, 'get_env', 'get_env'),
2686
+ fromParams: (p) => (p.name ? ['get_env', p.name] : null),
2687
+ permission: () => null,
2688
+ execute: async (ctx, args) => {
2689
+ const [arg0 = null] = args;
2690
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, RST } = ctx;
2691
+ const varName = arg0;
2692
+ const value = process.env[varName];
2693
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Got env ${varName}${RST}`);
2694
+ logToolCall('get_env', { name: varName }, true, 'ok');
2695
+ return { name: varName, value: value !== undefined ? value : null };
2696
+ },
2697
+ },
2698
+ {
2699
+ tool: 'set_env',
2700
+ specNames: ['set_env'],
2701
+ tags: ['set_env'],
2702
+ parseXml: (text) => _matchDual(text, '<set_env\\s+name=Q([^Q]+)Q\\s+value=Q([^Q]*)Q\\s*(?:><\\/set_env>|\\/>)').map((m) => ['set_env', m[1], m[2]]),
2703
+ fromParams: (p) => (p.name ? ['set_env', p.name, p.value != null ? p.value : ''] : null),
2704
+ permission: (ctx, args) => ({ actionType: 'env', description: `Set env ${args[0]}=${args[1] || ''}`, tag: 'set_env' }),
2705
+ execute: async (ctx, args) => {
2706
+ const [arg0 = null, arg1 = null] = args;
2707
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, RST } = ctx;
2708
+ const varName = arg0;
2709
+ const value = arg1 || '';
2710
+ process.env[varName] = value;
2711
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Set env ${varName}${RST}`);
2712
+ logToolCall('set_env', { name: varName }, true, 'ok');
2713
+ return { status: 'ok', name: varName };
2714
+ },
2715
+ },
2716
+ {
2717
+ tool: 'system_info',
2718
+ specNames: ['system_info'],
2719
+ tags: ['system_info'],
2720
+ parseXml: (text) => [...text.matchAll(/<system_info\s*(?:><\/system_info>|\/>)/g)].map(() => ['system_info']),
2721
+ fromParams: () => ['system_info'],
2722
+ permission: () => null,
2723
+ execute: async (ctx) => {
2724
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, RST } = ctx;
2725
+ const info = {
2726
+ platform: os.platform(),
2727
+ arch: os.arch(),
2728
+ hostname: os.hostname(),
2729
+ user: process.env.USER || process.env.USERNAME || '',
2730
+ total_mem_mb: Math.round(os.totalmem() / 1024 / 1024),
2731
+ free_mem_mb: Math.round(os.freemem() / 1024 / 1024),
2732
+ node_version: process.version,
2733
+ cwd: process.cwd(),
2734
+ };
2735
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}System info: ${info.platform}/${info.arch}${RST}`);
2736
+ logToolCall('system_info', {}, true, 'ok');
2737
+ return info;
2738
+ },
2739
+ },
2740
+ ...GIT_TOOL_REGISTRY,
2741
+ ];
2742
+
2743
+ // name (TOOL_SPECS key / native function name) → registry entry.
2744
+ const _byName = new Map();
2745
+ // canonical action (tuple[0]) → registry entry, for executor / permission dispatch.
2746
+ const _byAction = new Map();
2747
+ for (const entry of TOOL_REGISTRY) {
2748
+ for (const n of entry.specNames) _byName.set(n.toLowerCase(), entry);
2749
+ _byAction.set(entry.tool, entry);
2750
+ }
2751
+
2752
+ // ── Dynamic (runtime-registered) tools — MCP, Task 3.3 ─────────────────────
2753
+ //
2754
+ // Tools discovered at runtime (MCP servers) are registered here, SEPARATE from
2755
+ // the static TOOL_REGISTRY array above. This separation is deliberate: the
2756
+ // load-time parity check in lib/constants.js validates only the static set
2757
+ // (TAG_REGISTRY ↔ TOOL_SPECS ↔ TOOL_REGISTRY), and it runs once at module load,
2758
+ // before any MCP server has connected. Keeping dynamic tools out of that array
2759
+ // means MCP tools never break the parity invariant.
2760
+ //
2761
+ // Dispatch (entryForAction) and native mapping (fromInvoke) consult this map
2762
+ // AFTER the static one, so a dynamic tool can never shadow a built-in. Each
2763
+ // entry has the same shape as a static one — { tool, fromParams, execute,
2764
+ // permission, parseXml?, spec? } — so it dispatches through the agent loop
2765
+ // identically. `spec` (an OpenAI-format { description, parameters }) is surfaced
2766
+ // to the native function-calling `tools` array via dynamicToolSpecs().
2767
+ const _dynamic = new Map(); // canonical name (== entry.tool) → entry
2768
+
2769
+ function _lookupDynamic(name) {
2770
+ if (name == null) return null;
2771
+ return _dynamic.get(name) || _dynamic.get(String(name).toLowerCase()) || null;
2772
+ }
2773
+
2774
+ function registerDynamicTool(entry) {
2775
+ if (!entry || typeof entry.tool !== 'string' || !entry.tool) {
2776
+ throw new Error('registerDynamicTool: entry.tool (canonical name) is required');
2777
+ }
2778
+ if (typeof entry.execute !== 'function') {
2779
+ throw new Error(`registerDynamicTool(${entry.tool}): execute() is required`);
2780
+ }
2781
+ if (typeof entry.fromParams !== 'function') {
2782
+ throw new Error(`registerDynamicTool(${entry.tool}): fromParams() is required`);
2783
+ }
2784
+ if (typeof entry.permission !== 'function') {
2785
+ throw new Error(`registerDynamicTool(${entry.tool}): permission() is required`);
2786
+ }
2787
+ _dynamic.set(entry.tool, entry);
2788
+ }
2789
+
2790
+ function unregisterDynamicTool(name) {
2791
+ return _dynamic.delete(name);
2792
+ }
2793
+
2794
+ function clearDynamicTools() {
2795
+ _dynamic.clear();
2796
+ }
2797
+
2798
+ function dynamicToolEntries() {
2799
+ return [..._dynamic.values()];
2800
+ }
2801
+
2802
+ // { name → { description, parameters } } for every dynamic tool that carries a
2803
+ // spec. Merged into the native function-calling tools array in lib/api.js.
2804
+ function dynamicToolSpecs() {
2805
+ const out = {};
2806
+ for (const e of _dynamic.values()) {
2807
+ if (e.spec) out[e.tool] = e.spec;
2808
+ }
2809
+ return out;
2810
+ }
2811
+
2812
+ function fromInvoke(toolName, params) {
2813
+ const entry = _byName.get((toolName || '').toLowerCase()) || _lookupDynamic(toolName);
2814
+ if (!entry) return null;
2815
+ return entry.fromParams(params || {});
2816
+ }
2817
+
2818
+ function entryForAction(action) {
2819
+ return _byAction.get(action) || _lookupDynamic(action) || null;
2820
+ }
2821
+
2822
+ // Static (load-time-parity-checked) tool names only — MUST NOT include dynamic
2823
+ // tools, or the lib/constants.js parity assertion would see phantom entries.
2824
+ function registryToolNames() {
2825
+ return [..._byName.keys()];
2826
+ }
2827
+
2828
+ module.exports = {
2829
+ TOOL_REGISTRY,
2830
+ fromInvoke,
2831
+ entryForAction,
2832
+ registryToolNames,
2833
+ // Dynamic (runtime) tool registry — MCP (Task 3.3).
2834
+ registerDynamicTool,
2835
+ unregisterDynamicTool,
2836
+ clearDynamicTools,
2837
+ dynamicToolEntries,
2838
+ dynamicToolSpecs,
2839
+ // Exported for the grep/glob characterization + parity tests (Task 2.1).
2840
+ // The execute() bodies above use these same functions; tests drive both
2841
+ // engines explicitly to prove rg- and Node-path outputs are identical.
2842
+ _grepSearch,
2843
+ _globSearch,
2844
+ _detectRipgrep,
2845
+ // Path-aware grep target resolution + single-file grep (file/dir/glob path
2846
+ // semantics + the unresolvable-path safety net). Exported for focused tests.
2847
+ _resolveGrepPath,
2848
+ _grepFile,
2849
+ _grepHasCandidate,
2850
+ // grep output modes + bound normalizers (Task W.5).
2851
+ GREP_OUTPUT_MODES,
2852
+ _normGrepMode,
2853
+ _normHeadLimit,
2854
+ _normOffset,
2855
+ // Exported for the web-fetch mode-resolution tests (Task W.1b).
2856
+ _httpGetOpts,
2857
+ _httpGetOptsFromParams,
2858
+ processWebContent,
2859
+ WEB_FETCH_MODES,
2860
+ // Exported for the URL-validation tests (fetch-url-validation.test.js).
2861
+ _validateFetchUrl,
2862
+ };