@semalt-ai/code 1.8.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/.claude/settings.local.json +6 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1584 -26
  4. package/README.md +147 -3
  5. package/examples/embed.js +74 -0
  6. package/index.js +251 -10
  7. package/lib/agent.js +711 -104
  8. package/lib/api.js +213 -49
  9. package/lib/args.js +74 -2
  10. package/lib/audit.js +23 -1
  11. package/lib/background.js +584 -0
  12. package/lib/checkpoints.js +757 -0
  13. package/lib/commands/auth.js +94 -0
  14. package/lib/commands/chat-session.js +306 -0
  15. package/lib/commands/chat-slash.js +399 -0
  16. package/lib/commands/chat-turn.js +446 -0
  17. package/lib/commands/chat.js +403 -0
  18. package/lib/commands/custom.js +157 -0
  19. package/lib/commands/history-utils.js +66 -0
  20. package/lib/commands/index.js +268 -0
  21. package/lib/commands/mcp.js +113 -0
  22. package/lib/commands/oneshot.js +193 -0
  23. package/lib/commands/registry.js +269 -0
  24. package/lib/commands/tasks.js +89 -0
  25. package/lib/compact.js +87 -0
  26. package/lib/config.js +333 -11
  27. package/lib/constants.js +372 -3
  28. package/lib/deny.js +199 -0
  29. package/lib/doctor.js +160 -0
  30. package/lib/headless.js +167 -0
  31. package/lib/hooks.js +286 -0
  32. package/lib/images.js +264 -0
  33. package/lib/internals.js +49 -0
  34. package/lib/mcp/boundary.js +131 -0
  35. package/lib/mcp/client.js +270 -0
  36. package/lib/mcp/oauth.js +134 -0
  37. package/lib/memory.js +209 -0
  38. package/lib/metrics.js +37 -2
  39. package/lib/payload.js +54 -0
  40. package/lib/permission-rules.js +401 -0
  41. package/lib/permissions.js +100 -10
  42. package/lib/pricing.js +67 -0
  43. package/lib/proc.js +62 -0
  44. package/lib/prompts.js +84 -5
  45. package/lib/sandbox.js +568 -0
  46. package/lib/sdk.js +328 -0
  47. package/lib/secrets.js +211 -0
  48. package/lib/skills.js +223 -0
  49. package/lib/subagents.js +516 -0
  50. package/lib/tool_registry.js +2558 -0
  51. package/lib/tool_specs.js +222 -2
  52. package/lib/tools.js +272 -1020
  53. package/lib/ui/format.js +22 -1
  54. package/lib/ui/input-field.js +16 -7
  55. package/lib/ui/status-bar.js +79 -11
  56. package/lib/ui/theme.js +1 -0
  57. package/lib/ui/web-activity.js +218 -0
  58. package/lib/verify.js +229 -0
  59. package/lib/web-extract.js +213 -0
  60. package/lib/web-summarize.js +68 -0
  61. package/package.json +19 -4
  62. package/scripts/lint.js +57 -0
  63. package/test/agent-loop.test.js +389 -0
  64. package/test/background.test.js +414 -0
  65. package/test/chat.test.js +114 -0
  66. package/test/checkpoints-agent.test.js +181 -0
  67. package/test/checkpoints.test.js +650 -0
  68. package/test/command-registry.test.js +160 -0
  69. package/test/compact.test.js +116 -0
  70. package/test/completion-lazy.test.js +52 -0
  71. package/test/config-merge.test.js +324 -0
  72. package/test/config-quarantine.test.js +128 -0
  73. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  74. package/test/config-write-guard-skip.test.js +46 -0
  75. package/test/config-write-guard.test.js +153 -0
  76. package/test/context-split.test.js +215 -0
  77. package/test/cost-doctor.test.js +142 -0
  78. package/test/custom-commands-chat.test.js +106 -0
  79. package/test/custom-commands.test.js +230 -0
  80. package/test/deny-windows.test.js +120 -0
  81. package/test/deny.test.js +83 -0
  82. package/test/download-allow-anywhere.test.js +66 -0
  83. package/test/download-confine.test.js +153 -0
  84. package/test/executors.test.js +362 -0
  85. package/test/extract-tool-calls.test.js +315 -0
  86. package/test/fetch-url-validation.test.js +219 -0
  87. package/test/fixtures/tool-calls.js +57 -0
  88. package/test/fixtures/web-page.js +91 -0
  89. package/test/git-tools.test.js +384 -0
  90. package/test/grep-glob-serialize.test.js +242 -0
  91. package/test/grep-glob.test.js +268 -0
  92. package/test/harness/README.md +57 -0
  93. package/test/harness/chat-harness.js +142 -0
  94. package/test/harness/memwarn-headless-child.js +65 -0
  95. package/test/harness/mock-llm.js +120 -0
  96. package/test/harness/mock-mcp-server.js +142 -0
  97. package/test/harness/sse-server.js +69 -0
  98. package/test/headless.test.js +203 -0
  99. package/test/history-utils.test.js +88 -0
  100. package/test/hooks-agent.test.js +238 -0
  101. package/test/hooks-verify-sandbox.test.js +232 -0
  102. package/test/hooks.test.js +216 -0
  103. package/test/http-get-user-agent.test.js +142 -0
  104. package/test/images-api.test.js +208 -0
  105. package/test/images.test.js +238 -0
  106. package/test/max-iterations.test.js +216 -0
  107. package/test/mcp-boundary.test.js +57 -0
  108. package/test/mcp-client.test.js +267 -0
  109. package/test/mcp-oauth.test.js +86 -0
  110. package/test/memory-truncation-warning.test.js +222 -0
  111. package/test/memory.test.js +198 -0
  112. package/test/native-dispatch.test.js +356 -0
  113. package/test/output-chokepoint.test.js +188 -0
  114. package/test/path-guards.test.js +134 -0
  115. package/test/payload.test.js +99 -0
  116. package/test/permission-rules-agent.test.js +210 -0
  117. package/test/permission-rules.test.js +297 -0
  118. package/test/permissions.test.js +163 -0
  119. package/test/plan-mode.test.js +167 -0
  120. package/test/read-paginate.test.js +275 -0
  121. package/test/readonly-tools.test.js +177 -0
  122. package/test/result-cap.test.js +233 -0
  123. package/test/sandbox-agent.test.js +147 -0
  124. package/test/sandbox-integration.test.js +216 -0
  125. package/test/sandbox.test.js +408 -0
  126. package/test/sdk.test.js +234 -0
  127. package/test/shell-output-cap.test.js +181 -0
  128. package/test/skills-chat.test.js +110 -0
  129. package/test/skills.test.js +295 -0
  130. package/test/smoke.test.js +68 -0
  131. package/test/status-bar-pause.test.js +164 -0
  132. package/test/stream-parser.test.js +147 -0
  133. package/test/subagents-agent.test.js +178 -0
  134. package/test/subagents.test.js +222 -0
  135. package/test/tool-registry.test.js +85 -0
  136. package/test/trim-budget.test.js +101 -0
  137. package/test/verify-agent.test.js +317 -0
  138. package/test/verify.test.js +141 -0
  139. package/test/web-activity-ordering.test.js +194 -0
  140. package/test/web-activity.test.js +207 -0
  141. package/test/web-data-extraction-guidance.test.js +71 -0
  142. package/test/web-extract.test.js +185 -0
  143. package/test/web-fetch-agent.test.js +291 -0
  144. package/test/web-fetch-mode.test.js +193 -0
  145. package/test/web-search.test.js +380 -0
  146. package/lib/commands.js +0 -1438
@@ -0,0 +1,2558 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Runtime tool registry — one registration per tool.
5
+ // ---------------------------------------------------------------------------
6
+ //
7
+ // This is the single place that owns, per tool, EVERYTHING needed to recover,
8
+ // gate, and run a call — for BOTH transport paths:
9
+ //
10
+ // * parseXml(text) — XML/tag path (the ~25 regexes once inlined in
11
+ // extractToolCalls now live next to their tool).
12
+ // * fromParams(params) — native function-calling path (was mapInvokeToCall).
13
+ // * permission(ctx, args) — the gate descriptor (was the describePermission
14
+ // switch). Returns null for ungated read-only ops.
15
+ // * execute(ctx, args, opts) — the operation (was the agentExecFile branch).
16
+ //
17
+ // Both transports resolve to the SAME entry and produce the SAME [action, ...args]
18
+ // tuple, and dispatch (agentExecFile / describePermission in lib/tools.js) is a
19
+ // registry lookup keyed on the tool's canonical action.
20
+ //
21
+ // `ctx` is a dependency bag built once by createToolExecutor (lib/tools.js) and
22
+ // passed in at call time. It carries the factory-scoped collaborators (colors,
23
+ // permissionManager, getConfig) and the tools.js-internal helpers (isPathSafe,
24
+ // the sandbox/secret guards, _log, …). Passing them in — rather than requiring
25
+ // lib/tools.js here — is what keeps this module free of the tools.js ↔ registry
26
+ // require cycle. Executor/permission bodies were moved VERBATIM from agentExecFile
27
+ // / describePermission; the `const { … } = ctx` preamble re-binds the same names
28
+ // so the bodies below are unchanged.
29
+ //
30
+ // Adding a tool is now ONE registration object here + its TOOL_SPECS schema + its
31
+ // TAG_REGISTRY classification. The first two are asserted in lockstep by the
32
+ // load-time parity check in lib/constants.js (which also requires execute +
33
+ // permission on every non-wrapper entry).
34
+ //
35
+ // IMPORTANT — parse ORDER: extractToolCalls runs entries in array order; the
36
+ // per-format ordering is pinned by test/extract-tool-calls.test.js.
37
+
38
+ const fs = require('fs');
39
+ const fsp = require('fs/promises');
40
+ const path = require('path');
41
+ const os = require('os');
42
+ const http = require('http');
43
+ const https = require('https');
44
+ const { spawnSync } = require('child_process');
45
+ const { extractContent, capToTokens, defaultEstimate, markupEstimate, MARKUP_CHARS_PER_TOKEN, classifyContentType } = require('./web-extract');
46
+ const { summarizeWebContent } = require('./web-summarize');
47
+
48
+ // Resolve the User-Agent for the fetch tools (Task W.3 Part 2). A fixed,
49
+ // realistic browser UA defeats SIMPLE UA-based bot-blocking (sites that 403/406
50
+ // an empty/curl-like UA). Operator-overridable via config.web.user_agent;
51
+ // deliberately NOT model-selectable — the agent does not control how the tool
52
+ // presents itself to the outside, so there is no UA parameter in the tool spec.
53
+ // Reads the already-normalized config (getConfig() returns web.user_agent set to
54
+ // the override or the default) but falls back defensively to DEFAULT_USER_AGENT
55
+ // for any partially-built config. The constant is required LAZILY because
56
+ // constants.js requires this module at load time (circular dep) — a top-level
57
+ // destructure would capture `undefined`; by call time constants is fully loaded.
58
+ function _resolveUserAgent(cfg) {
59
+ const web = cfg && cfg.web && typeof cfg.web === 'object' ? cfg.web : {};
60
+ const ua = typeof web.user_agent === 'string' ? web.user_agent.trim() : '';
61
+ if (ua) return ua;
62
+ return require('./constants').DEFAULT_USER_AGENT;
63
+ }
64
+
65
+ // http_get per-call options (Task W.1 / W.1b). The agent may override the global
66
+ // web-fetch behavior for a single fetch via a three-level `mode` enum:
67
+ // mode="summarized" (default) → extract → Markdown → secondary-LLM summary.
68
+ // mode="extracted" → extract → Markdown, NO summary (exact snippets).
69
+ // mode="raw" → bypass extraction entirely; return the ORIGINAL
70
+ // fetched HTML/content (token-capped, fenced) — for analyzing a page's
71
+ // markup/CSS/JS/structure, the one task extraction destroys.
72
+ // intent="…" → the reason for fetching, focusing the summary.
73
+ // Deprecated boolean aliases (kept for back-compat): summarize="false" and
74
+ // raw="true" both map to `extracted`. Precedence: an explicit `mode` always
75
+ // beats the legacy booleans; with neither, the global config default applies.
76
+ const WEB_FETCH_MODES = ['summarized', 'extracted', 'raw'];
77
+
78
+ function _httpGetBool(v) {
79
+ if (v == null) return undefined;
80
+ const s = String(v).trim().toLowerCase();
81
+ if (s === 'true' || s === '1' || s === 'yes' || s === 'on') return true;
82
+ if (s === 'false' || s === '0' || s === 'no' || s === 'off') return false;
83
+ return undefined;
84
+ }
85
+
86
+ // Normalize a `mode` value to one of WEB_FETCH_MODES, or undefined if unknown.
87
+ function _httpGetMode(v) {
88
+ if (v == null) return undefined;
89
+ const s = String(v).trim().toLowerCase();
90
+ return WEB_FETCH_MODES.includes(s) ? s : undefined;
91
+ }
92
+
93
+ // Map a legacy boolean pair to a mode (explicit `mode` is resolved by the caller
94
+ // first and takes precedence). summarize=false / raw=true → extracted.
95
+ function _legacyBoolsToMode(summarize, raw) {
96
+ if (summarize !== undefined) return summarize ? 'summarized' : 'extracted';
97
+ if (raw !== undefined) return raw ? 'extracted' : 'summarized';
98
+ return undefined;
99
+ }
100
+
101
+ // Validate + normalize a URL for the fetch tools (http_get / download).
102
+ //
103
+ // `new URL(...)` — and `http.get`/`https.get`'s own internal parse — throws
104
+ // SYNCHRONOUSLY for a malformed URL, before any request starts. That throw
105
+ // happens OUTSIDE the request-level `.on('error')` handlers (which only catch
106
+ // async network failures: EHOSTUNREACH, DNS, timeout, …), so a bad URL would
107
+ // escape the executor as an uncaught exception and crash the whole session
108
+ // instead of becoming a recoverable tool error. The model routinely produces
109
+ // malformed/guessed URLs (invented domains, non-ASCII hosts, stray chars), so
110
+ // every fetch must validate up front and turn ANY bad input into a clean tool
111
+ // error in the SAME `{ error, error_code }` shape the network-failure path
112
+ // returns — so the agent handles it identically to EHOSTUNREACH/timeout.
113
+ //
114
+ // Returns `{ url }` (the normalized href) on success, or `{ error, error_code }`
115
+ // on failure. Only http/https schemes are allowed; everything else (file:, ftp:,
116
+ // javascript:, data:, …) is refused (these parse cleanly but must never be
117
+ // fetched). `base` (optional) resolves a relative URL — used for redirect
118
+ // `Location` headers, which are often relative.
119
+ function _validateFetchUrl(raw, base) {
120
+ if (typeof raw !== 'string') {
121
+ return {
122
+ error: `Invalid URL: expected a string, got ${raw === null ? 'null' : typeof raw}`,
123
+ error_code: 'ERR_INVALID_URL',
124
+ };
125
+ }
126
+ const trimmed = raw.trim();
127
+ if (!trimmed) {
128
+ return { error: 'Invalid URL: empty URL', error_code: 'ERR_INVALID_URL' };
129
+ }
130
+ let parsed;
131
+ try {
132
+ parsed = base ? new URL(trimmed, base) : new URL(trimmed);
133
+ } catch (err) {
134
+ return { error: `Invalid URL: ${err.message}`, error_code: 'ERR_INVALID_URL' };
135
+ }
136
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
137
+ return {
138
+ error: `Invalid URL: unsupported protocol "${parsed.protocol}" (only http and https are allowed)`,
139
+ error_code: 'ERR_INVALID_PROTOCOL',
140
+ };
141
+ }
142
+ return { url: parsed.href };
143
+ }
144
+
145
+ function _httpGetOpts(attrStr) {
146
+ const s = String(attrStr || '');
147
+ const get = (name) => {
148
+ const m = s.match(new RegExp(`${name}="([^"]*)"`)) || s.match(new RegExp(`${name}='([^']*)'`));
149
+ return m ? m[1] : undefined;
150
+ };
151
+ const opts = {};
152
+ const mode = _httpGetMode(get('mode'));
153
+ if (mode) opts.mode = mode;
154
+ else {
155
+ const legacy = _legacyBoolsToMode(_httpGetBool(get('summarize')), _httpGetBool(get('raw')));
156
+ if (legacy) opts.mode = legacy;
157
+ }
158
+ const intent = get('intent');
159
+ if (intent != null && intent !== '') opts.intent = intent;
160
+ return opts;
161
+ }
162
+
163
+ function _httpGetOptsFromParams(p) {
164
+ const opts = {};
165
+ const mode = _httpGetMode(p.mode);
166
+ if (mode) opts.mode = mode;
167
+ else {
168
+ const summarize = typeof p.summarize === 'boolean' ? p.summarize : undefined;
169
+ const raw = typeof p.raw === 'boolean' ? p.raw : undefined;
170
+ const legacy = _legacyBoolsToMode(summarize, raw);
171
+ if (legacy) opts.mode = legacy;
172
+ }
173
+ if (typeof p.intent === 'string' && p.intent.trim()) opts.intent = p.intent.trim();
174
+ return opts;
175
+ }
176
+
177
+ // web_search per-call options (Task W.2b). Only `count` today — bounded so a
178
+ // huge value never leaves the client; the backend clamps further. Returns
179
+ // `undefined` for a missing/invalid/zero count so the backend default applies.
180
+ const _WEB_SEARCH_MAX_COUNT = 10;
181
+ function _clampSearchCount(v) {
182
+ if (v == null || v === '') return undefined;
183
+ const n = parseInt(v, 10);
184
+ if (!Number.isFinite(n) || n <= 0) return undefined;
185
+ return Math.min(n, _WEB_SEARCH_MAX_COUNT);
186
+ }
187
+
188
+ function _webSearchOpts(attrStr) {
189
+ const s = String(attrStr || '');
190
+ const m = s.match(/count="([^"]*)"/) || s.match(/count='([^']*)'/);
191
+ const count = _clampSearchCount(m ? m[1] : undefined);
192
+ return count ? { count } : {};
193
+ }
194
+
195
+ function _webSearchOptsFromParams(p) {
196
+ const count = _clampSearchCount(p && p.count);
197
+ return count ? { count } : {};
198
+ }
199
+
200
+ // The web-fetch pipeline (Task W.1 / W.1b), shared by http_get's execute. Turns
201
+ // a fetched body into the content that enters the main context. The `mode` enum
202
+ // selects the depth of processing:
203
+ // raw → bypass extraction ENTIRELY; return the ORIGINAL fetched content
204
+ // (token-capped). For analyzing a page's HTML/CSS/JS/structure —
205
+ // the one task extraction destroys (Task W.1b).
206
+ // extracted → extract main content → Markdown (HTML only; json/text/markdown
207
+ // pass through untouched so they are never mangled), token-cap it,
208
+ // NO secondary summary.
209
+ // summarized → as `extracted`, then summarize via a secondary cheap LLM call —
210
+ // only the summary enters context; the extracted full text never
211
+ // does.
212
+ // Context protection (token-cap via web.max_content_tokens) applies in EVERY
213
+ // mode, including raw (raw HTML is token-heavier, so it matters more, not less).
214
+ // Containment: a summarizer failure falls back to the capped extracted Markdown,
215
+ // NEVER the raw page. Network-free here (the LLM call is the injected webChat).
216
+ async function processWebContent({
217
+ rawBody, contentType, url, statusCode, totalBytes, transferCapped,
218
+ mode, intent, summaryModel, maxContentTokens, webChat, signal,
219
+ }) {
220
+ // RAW mode (Task W.1b): the original content is returned with NO extraction —
221
+ // no Readability, no Turndown, no summary. Context protection still holds: cap
222
+ // to the token budget with the standard truncation notice. The untrusted fence
223
+ // is applied by the caller (lib/agent.js) for raw exactly as for every mode.
224
+ if (mode === 'raw') {
225
+ const kind = classifyContentType(contentType, url, rawBody);
226
+ // Raw HTML/markup tokenizes denser than prose, so char/4 over-admits markup
227
+ // (Task W.4 Part 2). Use the markup-aware estimate + matching char budget for
228
+ // markup; JSON/text raw bodies stay on the prose estimate (unchanged).
229
+ const isMarkup = kind === 'html';
230
+ const capped = isMarkup
231
+ ? capToTokens(rawBody, maxContentTokens, markupEstimate, MARKUP_CHARS_PER_TOKEN)
232
+ : capToTokens(rawBody, maxContentTokens, defaultEstimate);
233
+ return {
234
+ status_code: statusCode,
235
+ bytes: totalBytes,
236
+ kind,
237
+ mode: 'raw',
238
+ extracted: false,
239
+ summarized: false,
240
+ content_tokens: capped.tokens,
241
+ content_truncated: capped.truncated,
242
+ transfer_capped: !!transferCapped,
243
+ body: capped.text,
244
+ };
245
+ }
246
+
247
+ const { kind, markdown, title, extracted } = extractContent({ body: rawBody, contentType, url });
248
+ const capped = capToTokens(markdown, maxContentTokens, defaultEstimate);
249
+ const base = {
250
+ status_code: statusCode,
251
+ bytes: totalBytes,
252
+ kind,
253
+ mode,
254
+ title: title || undefined,
255
+ extracted,
256
+ content_tokens: capped.tokens,
257
+ content_truncated: capped.truncated,
258
+ transfer_capped: !!transferCapped,
259
+ };
260
+ // Summarize ONLY HTML — JSON/plain text/Markdown pass through verbatim so
261
+ // structured data is never smoothed over. Requires mode==='summarized'
262
+ // AND an available LLM call. Otherwise return the capped extracted Markdown.
263
+ const summarizable = kind === 'html' && capped.text.trim().length > 0;
264
+ if (mode === 'summarized' && summarizable && typeof webChat === 'function') {
265
+ try {
266
+ const summary = await summarizeWebContent({
267
+ markdown: capped.text, intent, chat: webChat, model: summaryModel, signal,
268
+ });
269
+ return { ...base, body: summary, summarized: true };
270
+ } catch (err) {
271
+ // Summary errored/timed out → degrade to the capped extracted Markdown,
272
+ // never the raw HTML.
273
+ return { ...base, body: capped.text, summarized: false, summary_error: err.message };
274
+ }
275
+ }
276
+ return { ...base, body: capped.text, summarized: false };
277
+ }
278
+
279
+ // ── XML parse helpers (moved from lib/tools.js) ────────────────────────────
280
+
281
+ function _matchDual(text, template) {
282
+ const results = [];
283
+ for (const q of ['"', "'"]) {
284
+ const re = new RegExp(template.replace(/Q/g, q), 'g');
285
+ for (const m of text.matchAll(re)) results.push(m);
286
+ }
287
+ return results;
288
+ }
289
+
290
+ function _unwrapInnerTag(inner) {
291
+ if (inner == null) return inner;
292
+ const trimmed = String(inner).trim();
293
+ const m = trimmed.match(/^<(\w+)(?:\s[^>]*)?>([\s\S]*)<\/\1>$/);
294
+ if (!m) return inner;
295
+ return m[2].trim();
296
+ }
297
+
298
+ // read_file pagination rail (Task W.7). Parses both forms in one pass and
299
+ // resolves the optional start_line/end_line/show_line_numbers attributes onto the
300
+ // tuple ['read', path, startLine|null, endLine|null, showLineNumbers]. Absent
301
+ // range → null (parity with fromParams), so the formatter's defaults apply. Path
302
+ // comes from the `path` attr or the inline body (the historical two forms).
303
+ function _parseReadTag(text) {
304
+ const out = [];
305
+ const re = /<read_file\b([^>]*?)(?:\/>|>([\s\S]*?)<\/read_file>)/g;
306
+ for (const m of text.matchAll(re)) {
307
+ const attrStr = m[1] || '';
308
+ const body = m[2] != null ? m[2] : '';
309
+ const attr = (k) => {
310
+ const mm = attrStr.match(new RegExp(`${k}="([^"]*)"`)) || attrStr.match(new RegExp(`${k}='([^']*)'`));
311
+ return mm ? mm[1] : null;
312
+ };
313
+ const num = (v) => { if (v == null) return null; const n = parseInt(v, 10); return Number.isFinite(n) ? n : null; };
314
+ let p = attr('path');
315
+ if (p == null) { const b = _unwrapInnerTag(body).trim(); p = b || null; }
316
+ if (p == null) continue;
317
+ const sln = attr('show_line_numbers');
318
+ out.push(['read', p, num(attr('start_line')), num(attr('end_line')),
319
+ sln === 'true' || sln === '1' || sln === 'yes']);
320
+ }
321
+ return out;
322
+ }
323
+
324
+ function _inline(text, tagAlternation, action, extraArgs = []) {
325
+ const re = new RegExp(`<(?:${tagAlternation})>([\\s\\S]*?)<\\/(?:${tagAlternation})>`, 'g');
326
+ const out = [];
327
+ for (const m of text.matchAll(re)) out.push([action, _unwrapInnerTag(m[1]).trim(), ...extraArgs]);
328
+ return out;
329
+ }
330
+
331
+ // The full ctx destructure, reused at the top of every execute/permission so the
332
+ // moved bodies see the same free names they had inside the createToolExecutor
333
+ // closure. Unused names in any given body are harmless.
334
+ // const CTX = (ctx) => ... (we inline the destructure literally for clarity)
335
+
336
+ // ── write/append share one body in agentExecFile; keep that sharing here ────
337
+ async function _execWriteAppend(ctx, action, args, options) {
338
+ const signal = (options && options.signal) || null; // eslint-disable-line no-unused-vars
339
+ const [arg0 = null, arg1 = null] = args;
340
+ const { _log, logToolCall, isPathSafe, isProtectedConfigPath, _sandboxError, _protectedConfigWriteError, _dryRun, _skippedOps, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
341
+
342
+ const filePath = arg0;
343
+ const content = arg1;
344
+ const tag = action === 'write' ? 'write_file' : 'append_file';
345
+
346
+ const blocked = permissionManager.readonlyBlock(tag);
347
+ if (blocked) {
348
+ logToolCall(tag, { path: filePath, content }, false, 'denied');
349
+ return blocked;
350
+ }
351
+
352
+ if (isProtectedConfigPath(filePath)) {
353
+ logToolCall(tag, { path: filePath }, false, 'denied');
354
+ return _protectedConfigWriteError(filePath);
355
+ }
356
+
357
+ if (!isPathSafe(filePath)) {
358
+ logToolCall(tag, { path: filePath }, false, 'denied');
359
+ return _sandboxError(filePath);
360
+ }
361
+
362
+ if (_dryRun) {
363
+ const verb = action === 'write' ? 'write' : 'append';
364
+ _skippedOps.push({ category: 'file', symbol: '✎', desc: `${verb} ${filePath}` });
365
+ logToolCall(tag, { path: filePath }, false, 'dry-run');
366
+ return { status: 'dry-run', message: 'dry-run: write skipped', path: filePath };
367
+ }
368
+
369
+ try {
370
+ const dir = path.dirname(filePath);
371
+ if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
372
+ if (action === 'write') await fsp.writeFile(filePath, content || '');
373
+ else await fsp.appendFile(filePath, content || '');
374
+ const verb = action === 'write' ? 'Wrote' : 'Appended to';
375
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}${verb} ${filePath}${RST}`);
376
+ logToolCall(tag, { path: filePath, content }, true, 'ok');
377
+ return { status: 'ok', path: filePath, bytes: (content || '').length };
378
+ } catch (error) {
379
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
380
+ logToolCall(tag, { path: filePath, content }, true, 'error');
381
+ return { error: error.message };
382
+ }
383
+ }
384
+
385
+ async function _permWriteAppend(ctx, action, args) {
386
+ const { _dryRun, renderDiff, DIFF_BUBBLE_INSET, writer } = ctx;
387
+ const _uiActive = ctx._uiActive;
388
+ const filePath = args[0];
389
+ const content = args[1];
390
+ const tag = action === 'write' ? 'write_file' : 'append_file';
391
+
392
+ let existing = '';
393
+ try { existing = await fsp.readFile(filePath, 'utf8'); } catch {}
394
+ const finalContent = action === 'write' ? (content || '') : (existing + (content || ''));
395
+ const diffOutput = _uiActive
396
+ ? renderDiff(existing, finalContent, filePath, { inset: DIFF_BUBBLE_INSET })
397
+ : renderDiff(existing, finalContent, filePath);
398
+ if (!_uiActive) writer.scrollback(diffOutput);
399
+
400
+ if (_dryRun) return null;
401
+
402
+ let desc = `${action === 'write' ? 'Write' : 'Append to'} ${filePath}`;
403
+ if (content) desc += ` (${content.length} chars)`;
404
+ if (_uiActive) desc = `${desc}\n${diffOutput}`;
405
+ return { actionType: 'file', description: desc, tag };
406
+ }
407
+
408
+ // ── grep / glob (Task 2.1) ─────────────────────────────────────────────────
409
+ //
410
+ // Canonical search semantics. The pure-Node implementation is the REFERENCE;
411
+ // ripgrep is invoked with a flag set chosen to reproduce it byte-for-byte (the
412
+ // equivalence is pinned by test/grep-glob.test.js):
413
+ // * recurse from baseDir
414
+ // * always skip directories named node_modules or .git
415
+ // * skip hidden entries (names beginning with ".")
416
+ // * honor a .gitignore in baseDir if present (common-subset rules below)
417
+ // * skip binary files (a NUL byte in the first 8 KB)
418
+ // * emit one record per matching LINE: { file, line, text }, sorted by
419
+ // (file, line); file is baseDir-relative POSIX. Output never carries the
420
+ // engine identity, so rg and Node results are deep-equal.
421
+
422
+ const GREP_MAX_MATCHES = 1000;
423
+ const GLOB_MAX_FILES = 5000;
424
+ const GREP_SKIP_DIRS = new Set(['node_modules', '.git']);
425
+ const BINARY_SNIFF_BYTES = 8192;
426
+
427
+ // grep output modes (Task W.5), Claude-Code-style. The model selects one via the
428
+ // `output_mode` parameter; the mode is shaped at serialization time
429
+ // (lib/agent.js formatFileResult) from the same engine result:
430
+ // * content — file:line:text per match (default; "show me the lines")
431
+ // * files_with_matches — unique file paths only ("which files")
432
+ // * count — match counts per file + total ("how many")
433
+ const GREP_OUTPUT_MODES = ['content', 'files_with_matches', 'count'];
434
+ function _normGrepMode(m) {
435
+ return GREP_OUTPUT_MODES.includes(m) ? m : 'content';
436
+ }
437
+ // head_limit / offset normalization (Task W.5). A positive integer bounds /
438
+ // skips results; anything else falls back (limit → default, offset → 0).
439
+ function _normHeadLimit(v, dflt) {
440
+ const n = typeof v === 'number' ? v : parseInt(v, 10);
441
+ return Number.isFinite(n) && n > 0 ? Math.floor(n) : dflt;
442
+ }
443
+ function _normOffset(v) {
444
+ const n = typeof v === 'number' ? v : parseInt(v, 10);
445
+ return Number.isFinite(n) && n > 0 ? Math.floor(n) : 0;
446
+ }
447
+
448
+ // ripgrep detection, performed once and cached. SEMALT_NO_RG forces the Node
449
+ // fallback; SEMALT_RG_BIN points at an alternate binary (both used by tests).
450
+ let _rgChecked = false;
451
+ let _rgBin = null;
452
+ function _detectRipgrep() {
453
+ if (_rgChecked) return _rgBin;
454
+ _rgChecked = true;
455
+ if (process.env.SEMALT_NO_RG) { _rgBin = null; return _rgBin; }
456
+ const bin = process.env.SEMALT_RG_BIN || 'rg';
457
+ try {
458
+ const r = spawnSync(bin, ['--version'], { encoding: 'utf8' });
459
+ if (r && r.status === 0) _rgBin = bin;
460
+ } catch { /* rg not on PATH */ }
461
+ return _rgBin;
462
+ }
463
+
464
+ function _toPosix(p) { return p.split(path.sep).join('/'); }
465
+
466
+ // Glob → anchored RegExp. Mirrors the search_files conversion so the two file
467
+ // matchers agree: * → one path segment, ** → any depth.
468
+ function _globToRegExp(glob) {
469
+ let s = glob.replace(/[.+^${}()|[\]\\]/g, '\\$&');
470
+ s = s.replace(/\*\*/g, '\x00');
471
+ s = s.replace(/\*/g, '[^/]*');
472
+ s = s.replace(/\x00\//g, '(?:.*/)?');
473
+ s = s.replace(/\x00/g, '.*');
474
+ return new RegExp(`^${s}$`);
475
+ }
476
+
477
+ // .gitignore, common subset: blank/comment lines; basename globs (no slash,
478
+ // matched at any depth); anchored path globs (a slash anywhere but trailing);
479
+ // dir-only (trailing slash); negation (!). Only the baseDir .gitignore is read
480
+ // (no nested files). Last matching rule wins.
481
+ function _loadGitignore(baseDir) {
482
+ let txt;
483
+ try { txt = fs.readFileSync(path.join(baseDir, '.gitignore'), 'utf8'); }
484
+ catch { return []; }
485
+ const rules = [];
486
+ for (let line of txt.split('\n')) {
487
+ line = line.replace(/\r$/, '').replace(/^\s+|\s+$/g, '');
488
+ if (!line || line.startsWith('#')) continue;
489
+ let negate = false;
490
+ if (line.startsWith('!')) { negate = true; line = line.slice(1); }
491
+ let dirOnly = false;
492
+ if (line.endsWith('/')) { dirOnly = true; line = line.slice(0, -1); }
493
+ let anchored = false;
494
+ if (line.startsWith('/')) { anchored = true; line = line.slice(1); }
495
+ if (!line) continue;
496
+ rules.push({ negate, dirOnly, anchored: anchored || line.includes('/'), re: _globToRegExp(line) });
497
+ }
498
+ return rules;
499
+ }
500
+
501
+ // rel: baseDir-relative POSIX path of the entry; name: its basename.
502
+ function _gitignored(rules, rel, name, isDir) {
503
+ let ignored = false;
504
+ for (const r of rules) {
505
+ if (r.dirOnly && !isDir) continue;
506
+ if (r.re.test(r.anchored ? rel : name)) ignored = !r.negate;
507
+ }
508
+ return ignored;
509
+ }
510
+
511
+ function _isBinaryBuf(buf) {
512
+ const n = Math.min(buf.length, BINARY_SNIFF_BYTES);
513
+ for (let i = 0; i < n; i++) if (buf[i] === 0) return true;
514
+ return false;
515
+ }
516
+
517
+ // Iterative DFS over baseDir applying the canonical skip rules; calls
518
+ // onFile(rel, name, absPath) for each surviving file. Honors an abort signal
519
+ // between entries. Returns false if aborted, true otherwise.
520
+ function _walkTree(baseDir, { rules = [], signal = null, onFile }) {
521
+ const stack = [{ dir: baseDir, rel: '' }];
522
+ while (stack.length) {
523
+ if (signal && signal.aborted) return false;
524
+ const { dir, rel } = stack.pop();
525
+ let entries;
526
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
527
+ catch { continue; }
528
+ for (const e of entries) {
529
+ const name = e.name;
530
+ if (name.startsWith('.')) continue; // hidden
531
+ const isDir = e.isDirectory();
532
+ const childRel = rel ? `${rel}/${name}` : name;
533
+ if (isDir) {
534
+ if (GREP_SKIP_DIRS.has(name)) continue;
535
+ if (rules.length && _gitignored(rules, childRel, name, true)) continue;
536
+ stack.push({ dir: path.join(dir, name), rel: childRel });
537
+ continue;
538
+ }
539
+ if (!e.isFile()) continue; // symlinks / sockets / etc.
540
+ if (rules.length && _gitignored(rules, childRel, name, false)) continue;
541
+ onFile(childRel, name, path.join(dir, name));
542
+ }
543
+ }
544
+ return true;
545
+ }
546
+
547
+ function _grepNode({ pattern, pathGlob, ignoreCase, baseDir, signal }) {
548
+ let re;
549
+ try { re = new RegExp(pattern, ignoreCase ? 'i' : ''); }
550
+ catch (err) { return { error: `Invalid regex pattern: ${err.message}` }; }
551
+ const rules = _loadGitignore(baseDir);
552
+ const pf = pathGlob ? _globToRegExp(pathGlob) : null;
553
+ const pfBasename = pathGlob && !pathGlob.includes('/');
554
+ const matches = [];
555
+ const ok = _walkTree(baseDir, {
556
+ rules,
557
+ signal,
558
+ onFile: (rel, name, abs) => {
559
+ if (pf && !pf.test(pfBasename ? name : rel)) return;
560
+ let buf;
561
+ try { buf = fs.readFileSync(abs); } catch { return; }
562
+ if (_isBinaryBuf(buf)) return;
563
+ const data = buf.toString('utf8');
564
+ const lines = data.split('\n');
565
+ // A trailing newline terminates the last line; drop the phantom empty
566
+ // element split() produces so line counting matches ripgrep.
567
+ if (data.endsWith('\n')) lines.pop();
568
+ const posix = _toPosix(rel);
569
+ for (let i = 0; i < lines.length; i++) {
570
+ if (re.test(lines[i])) matches.push({ file: posix, line: i + 1, text: lines[i] });
571
+ }
572
+ },
573
+ });
574
+ if (!ok) return { aborted: true };
575
+ return { matches };
576
+ }
577
+
578
+ function _grepRg({ pattern, pathGlob, ignoreCase, baseDir, signal }) {
579
+ if (signal && signal.aborted) return { aborted: true };
580
+ const bin = _detectRipgrep() || process.env.SEMALT_RG_BIN || 'rg';
581
+ // These flags make rg honor the baseDir .gitignore without a git repo while
582
+ // ignoring parent/global/.ignore files, and unconditionally drop node_modules
583
+ // — exactly the canonical Node semantics above. Hidden entries and binary
584
+ // files are skipped by rg's defaults.
585
+ const args = ['--json', '--no-require-git', '--no-ignore-parent', '--no-ignore-global', '--no-ignore-dot', '-g', '!node_modules'];
586
+ if (ignoreCase) args.push('-i');
587
+ if (pathGlob) args.push('-g', pathGlob);
588
+ args.push('--regexp', pattern, '--', '.');
589
+ const r = spawnSync(bin, args, { cwd: baseDir, encoding: 'utf8', maxBuffer: 64 * 1024 * 1024 });
590
+ if (r.error) return { error: r.error.message };
591
+ if (r.status === 2) return { error: (r.stderr || 'ripgrep error').trim() };
592
+ // status 0 = matches, 1 = no matches — both are success.
593
+ const matches = [];
594
+ if (r.stdout) {
595
+ for (const ln of r.stdout.split('\n')) {
596
+ if (!ln) continue;
597
+ let obj;
598
+ try { obj = JSON.parse(ln); } catch { continue; }
599
+ if (obj.type !== 'match') continue;
600
+ const d = obj.data;
601
+ if (!d.lines || typeof d.lines.text !== 'string') continue; // non-UTF8 line
602
+ let file = d.path && d.path.text ? d.path.text : '';
603
+ if (file.startsWith('./')) file = file.slice(2);
604
+ let text = d.lines.text;
605
+ if (text.endsWith('\n')) text = text.slice(0, -1);
606
+ matches.push({ file: _toPosix(file), line: d.line_number, text });
607
+ }
608
+ }
609
+ return { matches };
610
+ }
611
+
612
+ function _finalizeGrep(raw, pattern) {
613
+ if (!raw || raw.error || raw.aborted) return raw || { error: 'grep failed' };
614
+ const matches = raw.matches.slice().sort((a, b) =>
615
+ (a.file < b.file ? -1 : a.file > b.file ? 1 : a.line - b.line));
616
+ let truncated = false;
617
+ if (matches.length > GREP_MAX_MATCHES) { matches.length = GREP_MAX_MATCHES; truncated = true; }
618
+ const out = { matches, pattern, count: matches.length };
619
+ if (truncated) out.truncated = true;
620
+ return out;
621
+ }
622
+
623
+ // engine: 'auto' (rg if available, else Node), 'rg', or 'node'. Exported for
624
+ // the parity tests, which drive both engines and assert deep equality.
625
+ function _grepSearch({ pattern, pathGlob = null, ignoreCase = false, baseDir = '.', engine = 'auto', signal = null }) {
626
+ if (typeof pattern !== 'string' || pattern === '') return { error: 'grep: pattern is required' };
627
+ const useRg = engine === 'rg' || (engine === 'auto' && !!_detectRipgrep());
628
+ let raw;
629
+ if (useRg) {
630
+ raw = _grepRg({ pattern, pathGlob, ignoreCase, baseDir, signal });
631
+ if (raw && raw.error && engine === 'auto') {
632
+ raw = _grepNode({ pattern, pathGlob, ignoreCase, baseDir, signal });
633
+ }
634
+ } else {
635
+ raw = _grepNode({ pattern, pathGlob, ignoreCase, baseDir, signal });
636
+ }
637
+ if (raw && (raw.error || raw.aborted)) return raw;
638
+ return _finalizeGrep(raw, pattern);
639
+ }
640
+
641
+ function _globSearch({ pattern, baseDir = '.', signal = null }) {
642
+ if (typeof pattern !== 'string' || pattern === '') return { error: 'glob: pattern is required' };
643
+ const re = _globToRegExp(pattern);
644
+ const byBasename = !pattern.includes('/');
645
+ const files = [];
646
+ let truncated = false;
647
+ // glob does not apply .gitignore (only node_modules/.git/hidden are skipped).
648
+ const ok = _walkTree(baseDir, {
649
+ rules: [],
650
+ signal,
651
+ onFile: (rel, name, abs) => {
652
+ if (truncated) return;
653
+ if (!re.test(byBasename ? name : rel)) return;
654
+ let st;
655
+ try { st = fs.statSync(abs); } catch { return; }
656
+ files.push({ path: _toPosix(rel), size: st.size, mtime: st.mtime.toISOString() });
657
+ if (files.length >= GLOB_MAX_FILES) truncated = true;
658
+ },
659
+ });
660
+ if (!ok) return { aborted: true };
661
+ files.sort((a, b) => (a.path < b.path ? -1 : a.path > b.path ? 1 : 0));
662
+ const out = { files, pattern, dir: baseDir, count: files.length };
663
+ if (truncated) out.truncated = true;
664
+ return out;
665
+ }
666
+
667
+ // Shared XML parser for the attribute-or-inline <grep>/<glob> tags.
668
+ function _parseSearchTag(text, tag) {
669
+ const out = [];
670
+ const re = new RegExp(`<${tag}\\b([^>]*?)(?:\\/>|>([\\s\\S]*?)<\\/${tag}>)`, 'g');
671
+ for (const m of text.matchAll(re)) {
672
+ const attrStr = m[1] || '';
673
+ const body = m[2] != null ? m[2] : '';
674
+ const attr = (k) => {
675
+ const mm = attrStr.match(new RegExp(`${k}="([^"]*)"`)) || attrStr.match(new RegExp(`${k}='([^']*)'`));
676
+ return mm ? mm[1] : null;
677
+ };
678
+ let pattern = attr('pattern');
679
+ if (pattern == null) { const b = body.trim(); pattern = b || null; }
680
+ if (pattern == null) continue;
681
+ if (tag === 'grep') {
682
+ const ic = attr('ignore_case');
683
+ out.push(['grep', pattern, attr('path') || null, ic === 'true' || ic === '1' || ic === 'yes',
684
+ attr('output_mode') || null, attr('head_limit'), attr('offset')]);
685
+ } else {
686
+ out.push(['glob', pattern, attr('path') || attr('dir') || '.', attr('head_limit'), attr('offset')]);
687
+ }
688
+ }
689
+ return out;
690
+ }
691
+
692
+ // ── Native git tools (Task 5.1) ────────────────────────────────────────────
693
+ //
694
+ // First-class git operations: structured results for the common verbs, the long
695
+ // tail left to the (sandboxed) generic shell. Every git tool shells out through
696
+ // ctx.agentExecShell — the SAME sandbox + deny-list chokepoint as <shell> — so it
697
+ // gets NO privileged path around confinement (constraint #5). Read-only tools
698
+ // (status/diff/log, and the list ops of branch/worktree) return a null permission
699
+ // descriptor; mutating tools (add/commit/branch-create/checkout/worktree-add+remove)
700
+ // require approval, honor --readonly via permissionManager.readonlyBlock, and are
701
+ // subject to the per-pattern rules + deny-list. Checkpoints (Task 4.3) snapshot
702
+ // FILE-TOOL mutations only — git operations are NOT reversible via /rewind, and
703
+ // git_checkout can discard uncommitted work that checkpoints never captured.
704
+
705
+ // Shell-quote one argument so the command string we hand to agentExecShell is
706
+ // safe regardless of metacharacters in branch names / paths / commit messages.
707
+ // Platform-aware: cmd.exe double-quote convention on Windows, POSIX single-quote
708
+ // elsewhere. The deny-list + sandbox remain the security boundary; this only
709
+ // prevents accidental word-splitting of the structured arguments.
710
+ function _shQuote(arg) {
711
+ const s = String(arg == null ? '' : arg);
712
+ if (process.platform === 'win32') return '"' + s.replace(/"/g, '""') + '"';
713
+ return "'" + s.replace(/'/g, "'\\''") + "'";
714
+ }
715
+
716
+ function _gitCommand(argv) {
717
+ return 'git ' + argv.map(_shQuote).join(' ');
718
+ }
719
+
720
+ // Run a git invocation through the shared shell chokepoint and return the raw
721
+ // shell result ({ exit_code, stdout, stderr, blocked, sandbox }).
722
+ function _runGit(ctx, argv, options) {
723
+ return ctx.agentExecShell(_gitCommand(argv), options || {});
724
+ }
725
+
726
+ // Map a failed git invocation to a structured { error } — degrading gracefully
727
+ // for the "not a repo" and "git absent" cases rather than surfacing raw noise.
728
+ function _gitFailure(res) {
729
+ if (res && res.blocked) return { error: (res.stderr || 'git command blocked').trim() };
730
+ const err = (res && res.stderr ? res.stderr : '').trim();
731
+ const combined = (err + '\n' + (res && res.stdout ? res.stdout : '')).toLowerCase();
732
+ if (/not a git repository/.test(combined)) {
733
+ return { error: 'Not a git repository. Run this inside a git working tree.' };
734
+ }
735
+ if ((res && res.exit_code === 127) || /command not found|not recognized as an internal|no such file or directory/.test(combined)) {
736
+ return { error: 'git is not installed or not found on PATH.' };
737
+ }
738
+ return { error: err || (res && res.stdout ? res.stdout.trim() : '') || `git exited with code ${res ? res.exit_code : '?'}` };
739
+ }
740
+
741
+ function _gitLog(ctx, action, args, status) {
742
+ try { ctx.logToolCall(action, { args }, status !== 'error', status); } catch { /* audit best-effort */ }
743
+ }
744
+
745
+ // Parse the `## …` branch header of `git status --porcelain=v1 --branch`.
746
+ function _parseStatusBranch(header) {
747
+ let h = String(header).replace(/^##\s*/, '');
748
+ if (h.startsWith('No commits yet on ')) return h.slice('No commits yet on '.length).trim();
749
+ if (h.startsWith('HEAD ')) return 'HEAD (detached)';
750
+ const dots = h.indexOf('...');
751
+ if (dots !== -1) h = h.slice(0, dots);
752
+ const sp = h.indexOf(' ');
753
+ if (sp !== -1) h = h.slice(0, sp);
754
+ return h.trim();
755
+ }
756
+
757
+ // Parse a unified diff into { files: [{ file, additions, deletions, hunks }] }.
758
+ function _parseDiff(raw) {
759
+ const files = [];
760
+ let cur = null;
761
+ let hunk = null;
762
+ for (const line of String(raw).split('\n')) {
763
+ if (line.startsWith('diff --git ')) {
764
+ cur = { file: null, additions: 0, deletions: 0, hunks: [] };
765
+ hunk = null;
766
+ files.push(cur);
767
+ const m = line.match(/ b\/(.+)$/);
768
+ if (m) cur.file = m[1];
769
+ continue;
770
+ }
771
+ if (!cur) continue;
772
+ if (line.startsWith('+++ b/')) { cur.file = line.slice(6); continue; }
773
+ if (line.startsWith('--- ') || line.startsWith('+++ ')) continue;
774
+ if (line.startsWith('@@')) {
775
+ hunk = { header: line, lines: [] };
776
+ cur.hunks.push(hunk);
777
+ continue;
778
+ }
779
+ if (!hunk) continue;
780
+ hunk.lines.push(line);
781
+ if (line.startsWith('+')) cur.additions++;
782
+ else if (line.startsWith('-')) cur.deletions++;
783
+ }
784
+ return files;
785
+ }
786
+
787
+ // Parse `git worktree list --porcelain` into [{ path, head, branch }].
788
+ function _parseWorktrees(raw) {
789
+ const out = [];
790
+ let cur = null;
791
+ for (const line of String(raw).split('\n')) {
792
+ if (line.startsWith('worktree ')) {
793
+ cur = { path: line.slice('worktree '.length), head: null, branch: null };
794
+ out.push(cur);
795
+ } else if (cur && line.startsWith('HEAD ')) {
796
+ cur.head = line.slice('HEAD '.length);
797
+ } else if (cur && line.startsWith('branch ')) {
798
+ cur.branch = line.slice('branch '.length).replace(/^refs\/heads\//, '');
799
+ } else if (cur && line === 'detached') {
800
+ cur.branch = '(detached)';
801
+ }
802
+ }
803
+ return out;
804
+ }
805
+
806
+ // XML attribute extractor (dual-quote) + a small typed-attr parser shared by the
807
+ // git tags. `spec` = { str: [...], bool: [...], num: [...], inline: 'key'? }.
808
+ function _gitAttr(attrStr, key) {
809
+ const m = attrStr.match(new RegExp(`${key}="([^"]*)"`)) || attrStr.match(new RegExp(`${key}='([^']*)'`));
810
+ return m ? m[1] : null;
811
+ }
812
+ function _gitTruthy(v) { return v === 'true' || v === '1' || v === 'yes' || v === ''; }
813
+ function _parseGitTag(text, tag, spec) {
814
+ const out = [];
815
+ const re = new RegExp(`<${tag}\\b([^>]*?)(?:\\/>|>([\\s\\S]*?)<\\/${tag}>)`, 'g');
816
+ for (const m of text.matchAll(re)) {
817
+ const attrStr = m[1] || '';
818
+ const body = m[2] != null ? m[2] : '';
819
+ const opts = {};
820
+ for (const k of spec.str || []) { const v = _gitAttr(attrStr, k); if (v != null) opts[k] = v; }
821
+ for (const k of spec.bool || []) { const v = _gitAttr(attrStr, k); if (v != null) opts[k] = _gitTruthy(v); }
822
+ for (const k of spec.num || []) {
823
+ const v = _gitAttr(attrStr, k);
824
+ if (v != null && v !== '') { const n = parseInt(v, 10); if (!Number.isNaN(n)) opts[k] = n; }
825
+ }
826
+ if (spec.inline) { const b = body.trim(); if (b && opts[spec.inline] == null) opts[spec.inline] = b; }
827
+ out.push([tag, opts]);
828
+ }
829
+ return out;
830
+ }
831
+
832
+ const GIT_TOOL_REGISTRY = [
833
+ {
834
+ tool: 'git_status',
835
+ specNames: ['git_status'],
836
+ tags: ['git_status'],
837
+ parseXml: (text) => _parseGitTag(text, 'git_status', {}),
838
+ fromParams: () => ['git_status', {}],
839
+ permission: () => null,
840
+ execute: async (ctx, args, options) => {
841
+ const res = await _runGit(ctx, ['status', '--porcelain=v1', '--branch'], options);
842
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_status', args, 'error'); return _gitFailure(res); }
843
+ const staged = [];
844
+ const unstaged = [];
845
+ const untracked = [];
846
+ let branch = null;
847
+ for (const line of res.stdout.split('\n')) {
848
+ if (!line) continue;
849
+ if (line.startsWith('## ')) { branch = _parseStatusBranch(line); continue; }
850
+ const x = line[0];
851
+ const y = line[1];
852
+ const p = line.slice(3);
853
+ if (line.startsWith('??')) { untracked.push(p); continue; }
854
+ if (x && x !== ' ' && x !== '?') staged.push({ path: p, status: x });
855
+ if (y && y !== ' ' && y !== '?') unstaged.push({ path: p, status: y });
856
+ }
857
+ const clean = staged.length === 0 && unstaged.length === 0 && untracked.length === 0;
858
+ const summary = `On branch ${branch || '(unknown)'} — ${clean ? 'clean' : `staged: ${staged.length}, unstaged: ${unstaged.length}, untracked: ${untracked.length}`}`;
859
+ _gitLog(ctx, 'git_status', args, 'ok');
860
+ return { status: 'ok', branch, staged, unstaged, untracked, clean, summary };
861
+ },
862
+ },
863
+ {
864
+ tool: 'git_diff',
865
+ specNames: ['git_diff'],
866
+ tags: ['git_diff'],
867
+ parseXml: (text) => _parseGitTag(text, 'git_diff', { str: ['path'], bool: ['staged'] }),
868
+ fromParams: (p) => ['git_diff', { ...((p.staged || p.cached) ? { staged: true } : {}), ...(p.path ? { path: String(p.path) } : {}) }],
869
+ permission: () => null,
870
+ execute: async (ctx, args, options) => {
871
+ const o = args[0] || {};
872
+ const argv = ['diff'];
873
+ if (o.staged) argv.push('--cached');
874
+ if (o.path) argv.push('--', String(o.path));
875
+ const res = await _runGit(ctx, argv, options);
876
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_diff', args, 'error'); return _gitFailure(res); }
877
+ const files = _parseDiff(res.stdout);
878
+ const additions = files.reduce((s, f) => s + f.additions, 0);
879
+ const deletions = files.reduce((s, f) => s + f.deletions, 0);
880
+ const summary = files.length
881
+ ? `${files.length} file(s) changed, +${additions} -${deletions}`
882
+ : 'No changes';
883
+ _gitLog(ctx, 'git_diff', args, 'ok');
884
+ return { status: 'ok', staged: !!o.staged, files, additions, deletions, raw: res.stdout, summary };
885
+ },
886
+ },
887
+ {
888
+ tool: 'git_log',
889
+ specNames: ['git_log'],
890
+ tags: ['git_log'],
891
+ parseXml: (text) => _parseGitTag(text, 'git_log', { str: ['path'], num: ['count'] }),
892
+ fromParams: (p) => {
893
+ const count = Number.isInteger(p.count) ? p.count : (p.count != null && p.count !== '' ? parseInt(p.count, 10) : undefined);
894
+ return ['git_log', { ...(count ? { count } : {}), ...(p.path ? { path: String(p.path) } : {}) }];
895
+ },
896
+ permission: () => null,
897
+ execute: async (ctx, args, options) => {
898
+ const o = args[0] || {};
899
+ const count = Number.isInteger(o.count) && o.count > 0 ? o.count : 20;
900
+ const SEP = '\x1f';
901
+ const argv = ['log', '-n', String(count), `--pretty=format:%H${SEP}%an${SEP}%ae${SEP}%ad${SEP}%s`, '--date=iso'];
902
+ if (o.path) argv.push('--', String(o.path));
903
+ const res = await _runGit(ctx, argv, options);
904
+ if (res.blocked || res.exit_code !== 0) {
905
+ const combined = `${res.stderr || ''}\n${res.stdout || ''}`.toLowerCase();
906
+ // A fresh repo with no commits is a valid empty result, not an error.
907
+ if (/does not have any commits yet|bad default revision|unknown revision|ambiguous argument 'head'/.test(combined)) {
908
+ _gitLog(ctx, 'git_log', args, 'ok');
909
+ return { status: 'ok', commits: [], count: 0, summary: 'No commits yet' };
910
+ }
911
+ _gitLog(ctx, 'git_log', args, 'error');
912
+ return _gitFailure(res);
913
+ }
914
+ const commits = [];
915
+ for (const line of res.stdout.split('\n')) {
916
+ if (!line) continue;
917
+ const [hash, author, email, date, ...rest] = line.split(SEP);
918
+ commits.push({ hash, short: (hash || '').slice(0, 7), author, email, date, subject: rest.join(SEP) });
919
+ }
920
+ _gitLog(ctx, 'git_log', args, 'ok');
921
+ return { status: 'ok', commits, count: commits.length, summary: `${commits.length} commit(s)` };
922
+ },
923
+ },
924
+ {
925
+ tool: 'git_add',
926
+ specNames: ['git_add'],
927
+ tags: ['git_add'],
928
+ parseXml: (text) => _parseGitTag(text, 'git_add', { str: ['paths'], bool: ['all'] }),
929
+ fromParams: (p) => ['git_add', { ...(p.paths != null ? { paths: p.paths } : {}), ...(p.all ? { all: true } : {}) }],
930
+ permission: () => ({ actionType: 'git', description: 'git add (stage changes)', tag: 'git_add' }),
931
+ execute: async (ctx, args, options) => {
932
+ const o = args[0] || {};
933
+ const blocked = ctx.permissionManager.readonlyBlock('git_add');
934
+ if (blocked) { _gitLog(ctx, 'git_add', args, 'error'); return blocked; }
935
+ let paths = [];
936
+ if (Array.isArray(o.paths)) paths = o.paths.map(String).filter(Boolean);
937
+ else if (typeof o.paths === 'string' && o.paths.trim()) paths = o.paths.trim().split(/\s+/);
938
+ if (!o.all && paths.length === 0) {
939
+ _gitLog(ctx, 'git_add', args, 'error');
940
+ return { error: 'git_add requires `paths` (one or more files) or `all: true`.' };
941
+ }
942
+ const argv = o.all ? ['add', '-A'] : ['add', '--', ...paths];
943
+ const res = await _runGit(ctx, argv, options);
944
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_add', args, 'error'); return _gitFailure(res); }
945
+ const added = o.all ? ['-A (all)'] : paths;
946
+ _gitLog(ctx, 'git_add', args, 'ok');
947
+ return { status: 'ok', added, summary: `Staged ${o.all ? 'all changes' : paths.join(', ')}` };
948
+ },
949
+ },
950
+ {
951
+ tool: 'git_commit',
952
+ specNames: ['git_commit'],
953
+ tags: ['git_commit'],
954
+ parseXml: (text) => _parseGitTag(text, 'git_commit', { str: ['message'], bool: ['all'], inline: 'message' }),
955
+ fromParams: (p) => ['git_commit', { message: p.message != null ? String(p.message) : '', ...(p.all ? { all: true } : {}) }],
956
+ permission: () => ({ actionType: 'git', description: 'git commit', tag: 'git_commit' }),
957
+ execute: async (ctx, args, options) => {
958
+ const o = args[0] || {};
959
+ const blocked = ctx.permissionManager.readonlyBlock('git_commit');
960
+ if (blocked) { _gitLog(ctx, 'git_commit', args, 'error'); return blocked; }
961
+ const message = (o.message == null ? '' : String(o.message)).trim();
962
+ if (!message) {
963
+ _gitLog(ctx, 'git_commit', args, 'error');
964
+ return { error: 'git_commit requires a non-empty commit message.' };
965
+ }
966
+ const argv = ['commit', '-m', message];
967
+ if (o.all) argv.push('-a');
968
+ const res = await _runGit(ctx, argv, options);
969
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_commit', args, 'error'); return _gitFailure(res); }
970
+ const hashRes = await _runGit(ctx, ['rev-parse', 'HEAD'], options);
971
+ const branchRes = await _runGit(ctx, ['rev-parse', '--abbrev-ref', 'HEAD'], options);
972
+ const hash = (hashRes.stdout || '').trim();
973
+ const branch = (branchRes.stdout || '').trim();
974
+ _gitLog(ctx, 'git_commit', args, 'ok');
975
+ return { status: 'ok', hash, short: hash.slice(0, 7), branch, summary: `Committed ${hash.slice(0, 7)} on ${branch}` };
976
+ },
977
+ },
978
+ {
979
+ tool: 'git_branch',
980
+ specNames: ['git_branch'],
981
+ tags: ['git_branch'],
982
+ parseXml: (text) => _parseGitTag(text, 'git_branch', { str: ['name'], bool: ['delete', 'force'] }),
983
+ fromParams: (p) => ['git_branch', { ...(p.name ? { name: String(p.name) } : {}), ...((p.delete || p.remove) ? { delete: true } : {}), ...(p.force ? { force: true } : {}) }],
984
+ // op-dependent: listing branches is read-only (null); create/delete is mutating.
985
+ permission: (ctx, args) => {
986
+ const o = args[0] || {};
987
+ if (!o.name) return null;
988
+ return { actionType: 'git', description: `git branch ${o.delete ? 'delete' : 'create'} ${o.name}`, tag: 'git_branch' };
989
+ },
990
+ execute: async (ctx, args, options) => {
991
+ const o = args[0] || {};
992
+ if (!o.name) {
993
+ const res = await _runGit(ctx, ['branch', '--no-color'], options);
994
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_branch', args, 'error'); return _gitFailure(res); }
995
+ const branches = [];
996
+ let current = null;
997
+ for (const line of res.stdout.split('\n')) {
998
+ if (!line.trim()) continue;
999
+ const isCurrent = line.startsWith('*');
1000
+ const name = line.replace(/^\*?\s+/, '').trim();
1001
+ if (!name || name.startsWith('(')) continue; // skip "(HEAD detached …)"
1002
+ if (isCurrent) current = name;
1003
+ branches.push({ name, current: isCurrent });
1004
+ }
1005
+ _gitLog(ctx, 'git_branch', args, 'ok');
1006
+ return { status: 'ok', branches, current, summary: `${branches.length} branch(es), on ${current || '(detached)'}` };
1007
+ }
1008
+ const blocked = ctx.permissionManager.readonlyBlock('git_branch');
1009
+ if (blocked) { _gitLog(ctx, 'git_branch', args, 'error'); return blocked; }
1010
+ const argv = o.delete ? ['branch', o.force ? '-D' : '-d', String(o.name)] : ['branch', String(o.name)];
1011
+ const res = await _runGit(ctx, argv, options);
1012
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_branch', args, 'error'); return _gitFailure(res); }
1013
+ _gitLog(ctx, 'git_branch', args, 'ok');
1014
+ return o.delete
1015
+ ? { status: 'ok', deleted: String(o.name), summary: `Deleted branch ${o.name}` }
1016
+ : { status: 'ok', created: String(o.name), summary: `Created branch ${o.name}` };
1017
+ },
1018
+ },
1019
+ {
1020
+ tool: 'git_checkout',
1021
+ specNames: ['git_checkout'],
1022
+ tags: ['git_checkout'],
1023
+ parseXml: (text) => _parseGitTag(text, 'git_checkout', { str: ['name'], bool: ['create', 'force'] }),
1024
+ fromParams: (p) => ['git_checkout', { name: p.name != null ? String(p.name) : '', ...(p.create ? { create: true } : {}), ...(p.force ? { force: true } : {}) }],
1025
+ permission: (ctx, args) => {
1026
+ const o = args[0] || {};
1027
+ // Destructive-git ↔ checkpoint honesty: a checkout can discard uncommitted
1028
+ // working-tree changes that checkpoints never snapshot (not rewindable).
1029
+ return { actionType: 'git', description: `git checkout ${o.create ? '-b ' : ''}${o.name || ''} (may discard uncommitted changes — NOT recoverable via /rewind)`, tag: 'git_checkout' };
1030
+ },
1031
+ execute: async (ctx, args, options) => {
1032
+ const o = args[0] || {};
1033
+ const blocked = ctx.permissionManager.readonlyBlock('git_checkout');
1034
+ if (blocked) { _gitLog(ctx, 'git_checkout', args, 'error'); return blocked; }
1035
+ const name = (o.name == null ? '' : String(o.name)).trim();
1036
+ if (!name) { _gitLog(ctx, 'git_checkout', args, 'error'); return { error: 'git_checkout requires a target `name` (branch or ref).' }; }
1037
+ const argv = ['checkout'];
1038
+ if (o.force) argv.push('-f');
1039
+ if (o.create) argv.push('-b');
1040
+ argv.push(name);
1041
+ const res = await _runGit(ctx, argv, options);
1042
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_checkout', args, 'error'); return _gitFailure(res); }
1043
+ const branchRes = await _runGit(ctx, ['rev-parse', '--abbrev-ref', 'HEAD'], options);
1044
+ const branch = (branchRes.stdout || '').trim() || name;
1045
+ _gitLog(ctx, 'git_checkout', args, 'ok');
1046
+ return { status: 'ok', branch, created: !!o.create, summary: `Switched to ${branch}` };
1047
+ },
1048
+ },
1049
+ {
1050
+ tool: 'git_worktree',
1051
+ specNames: ['git_worktree'],
1052
+ tags: ['git_worktree'],
1053
+ parseXml: (text) => _parseGitTag(text, 'git_worktree', { str: ['op', 'path', 'branch'], bool: ['force'] }),
1054
+ fromParams: (p) => ['git_worktree', { op: p.op || 'list', ...(p.path ? { path: String(p.path) } : {}), ...(p.branch ? { branch: String(p.branch) } : {}), ...(p.force ? { force: true } : {}) }],
1055
+ // op-dependent: list is read-only (null); add/remove are mutating.
1056
+ permission: (ctx, args) => {
1057
+ const o = args[0] || {};
1058
+ const op = o.op || 'list';
1059
+ if (op === 'list') return null;
1060
+ return { actionType: 'git', description: `git worktree ${op} ${o.path || ''}`, tag: 'git_worktree' };
1061
+ },
1062
+ execute: async (ctx, args, options) => {
1063
+ const o = args[0] || {};
1064
+ const op = o.op || 'list';
1065
+ if (op === 'list') {
1066
+ const res = await _runGit(ctx, ['worktree', 'list', '--porcelain'], options);
1067
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_worktree', args, 'error'); return _gitFailure(res); }
1068
+ const worktrees = _parseWorktrees(res.stdout);
1069
+ _gitLog(ctx, 'git_worktree', args, 'ok');
1070
+ return { status: 'ok', op: 'list', worktrees, summary: `${worktrees.length} worktree(s)` };
1071
+ }
1072
+ const blocked = ctx.permissionManager.readonlyBlock('git_worktree');
1073
+ if (blocked) { _gitLog(ctx, 'git_worktree', args, 'error'); return blocked; }
1074
+ if (op === 'add') {
1075
+ if (!o.path) { _gitLog(ctx, 'git_worktree', args, 'error'); return { error: 'git_worktree add requires a `path`.' }; }
1076
+ const argv = ['worktree', 'add'];
1077
+ if (o.branch) argv.push('-b', String(o.branch));
1078
+ argv.push(String(o.path));
1079
+ const res = await _runGit(ctx, argv, options);
1080
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_worktree', args, 'error'); return _gitFailure(res); }
1081
+ _gitLog(ctx, 'git_worktree', args, 'ok');
1082
+ return { status: 'ok', op: 'add', path: String(o.path), branch: o.branch ? String(o.branch) : null, summary: `Added worktree at ${o.path}` };
1083
+ }
1084
+ if (op === 'remove') {
1085
+ if (!o.path) { _gitLog(ctx, 'git_worktree', args, 'error'); return { error: 'git_worktree remove requires a `path`.' }; }
1086
+ const argv = ['worktree', 'remove'];
1087
+ if (o.force) argv.push('--force');
1088
+ argv.push(String(o.path));
1089
+ const res = await _runGit(ctx, argv, options);
1090
+ if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_worktree', args, 'error'); return _gitFailure(res); }
1091
+ _gitLog(ctx, 'git_worktree', args, 'ok');
1092
+ return { status: 'ok', op: 'remove', path: String(o.path), summary: `Removed worktree at ${o.path}` };
1093
+ }
1094
+ _gitLog(ctx, 'git_worktree', args, 'error');
1095
+ return { error: `git_worktree: unknown op "${op}" (expected list | add | remove).` };
1096
+ },
1097
+ },
1098
+ ];
1099
+
1100
+ const TOOL_REGISTRY = [
1101
+ {
1102
+ tool: 'shell',
1103
+ specNames: ['exec', 'shell'],
1104
+ tags: ['exec', 'shell', 'run_command', 'run'],
1105
+ parseXml: (text) => _inline(text, 'shell|exec|run_command|run', 'shell'),
1106
+ fromParams: (p) => (p.command ? ['shell', p.command] : null),
1107
+ // shell is executed through agentExecShell (deny-list chokepoint), not the
1108
+ // agentExecFile dispatch — this execute exists for registry completeness.
1109
+ execute: (ctx, args, options) => ctx.agentExecShell(args[0], options || {}),
1110
+ permission: (ctx, args) => ({ actionType: 'shell', description: args[0] || '', tag: 'exec' }),
1111
+ },
1112
+ {
1113
+ tool: 'read',
1114
+ specNames: ['read_file'],
1115
+ tags: ['read_file'],
1116
+ parseXml: (text) => _parseReadTag(text),
1117
+ fromParams: (p) => (p.path
1118
+ ? ['read', p.path, p.start_line ?? null, p.end_line ?? null, !!p.show_line_numbers]
1119
+ : null),
1120
+ permission: () => null,
1121
+ execute: async (ctx, args, options) => {
1122
+ const signal = (options && options.signal) || null;
1123
+ const [arg0 = null] = args;
1124
+ const { _log, logToolCall, isProtectedSecretPath, _secretReadError, getConfig, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1125
+ const filePath = arg0;
1126
+ if (isProtectedSecretPath(filePath)) {
1127
+ logToolCall('read_file', { path: filePath }, false, 'denied');
1128
+ return _secretReadError(filePath);
1129
+ }
1130
+ const startedAt = Date.now();
1131
+ const stat = await fsp.stat(filePath).catch(() => null);
1132
+ if (stat) {
1133
+ const cfg = getConfig ? getConfig() : {};
1134
+ // Byte BACKSTOP only (Task W.7). Pagination (formatReadResult) is now the
1135
+ // primary context bound — a large line-readable file paginates rather than
1136
+ // hard-refusing. This ceiling (default 50 MB) just rules out slurping a
1137
+ // multi-GB file whole into memory; an operator can lower max_file_size_kb
1138
+ // to hard-refuse smaller files.
1139
+ const defKb = require('./constants').DEFAULT_READ_MAX_FILE_KB;
1140
+ const maxKb = cfg.max_file_size_kb || defKb;
1141
+ const maxBytes = maxKb * 1024;
1142
+ if (stat.size > maxBytes) {
1143
+ const kb = (stat.size / 1024).toFixed(0);
1144
+ logToolCall('read_file', { path: filePath }, false, 'error');
1145
+ return { error: `File too large: ${kb} KB exceeds max_file_size_kb=${maxKb}` };
1146
+ }
1147
+ }
1148
+ if (signal && signal.aborted) {
1149
+ logToolCall('read_file', { path: filePath }, true, 'aborted');
1150
+ return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
1151
+ }
1152
+ try {
1153
+ const data = await fsp.readFile(filePath, { encoding: 'utf8', signal: signal || undefined });
1154
+ const lines = data.split('\n').length;
1155
+ if (lines > 10) {
1156
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath} (${lines} lines, ${data.length} chars)${RST}`);
1157
+ } else {
1158
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath}${RST}`);
1159
+ }
1160
+ logToolCall('read_file', { path: filePath }, true, 'ok');
1161
+ return { content: data, path: filePath, bytes: Buffer.byteLength(data, 'utf8') };
1162
+ } catch (error) {
1163
+ if (error && (error.name === 'AbortError' || error.code === 'ABORT_ERR')) {
1164
+ logToolCall('read_file', { path: filePath }, true, 'aborted');
1165
+ return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
1166
+ }
1167
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1168
+ logToolCall('read_file', { path: filePath }, true, 'error');
1169
+ return { error: error.message };
1170
+ }
1171
+ },
1172
+ },
1173
+ {
1174
+ tool: 'write',
1175
+ specNames: ['write_file', 'create_file'],
1176
+ tags: ['write_file', 'create_file'],
1177
+ parseXml: (text) => {
1178
+ const out = [];
1179
+ // QUIRK: attribute-form content (m[2]) is captured RAW — not trimmed —
1180
+ // unlike inline-tag bodies which go through _unwrapInnerTag().trim().
1181
+ // Preserved deliberately; pinned by test/extract-tool-calls.test.js
1182
+ // ("QUIRK: attribute-form content is NOT trimmed (unlike inline tags)").
1183
+ // Any change to this is out of scope for the tool-registry refactor.
1184
+ for (const m of _matchDual(text, '<write_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/write_file>')) out.push(['write', m[1], m[2]]);
1185
+ for (const m of _matchDual(text, '<create_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/create_file>')) out.push(['write', m[1], m[2]]);
1186
+ return out;
1187
+ },
1188
+ fromParams: (p) => (p.path ? ['write', p.path, p.content != null ? p.content : ''] : null),
1189
+ permission: (ctx, args) => _permWriteAppend(ctx, 'write', args),
1190
+ execute: (ctx, args, options) => _execWriteAppend(ctx, 'write', args, options),
1191
+ },
1192
+ {
1193
+ tool: 'append',
1194
+ specNames: ['append_file'],
1195
+ tags: ['append_file'],
1196
+ // QUIRK: as with write_file, append content is captured raw (not trimmed).
1197
+ parseXml: (text) => _matchDual(text, '<append_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/append_file>').map((m) => ['append', m[1], m[2]]),
1198
+ fromParams: (p) => (p.path ? ['append', p.path, p.content != null ? p.content : ''] : null),
1199
+ permission: (ctx, args) => _permWriteAppend(ctx, 'append', args),
1200
+ execute: (ctx, args, options) => _execWriteAppend(ctx, 'append', args, options),
1201
+ },
1202
+ {
1203
+ tool: 'list_dir',
1204
+ specNames: ['list_dir'],
1205
+ tags: ['list_dir'],
1206
+ parseXml: (text) => _inline(text, 'list_dir', 'list_dir'),
1207
+ fromParams: (p) => ['list_dir', p.path || '.'],
1208
+ permission: () => null,
1209
+ execute: async (ctx, args) => {
1210
+ const [arg0 = null] = args;
1211
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1212
+ const dirPath = arg0;
1213
+ try {
1214
+ const entries = await fsp.readdir(dirPath, { withFileTypes: true });
1215
+ const items = entries.map((e) => {
1216
+ if (e.isSymbolicLink()) return `[L] ${e.name}`;
1217
+ if (e.isDirectory()) return `[D] ${e.name}`;
1218
+ return `[F] ${e.name}`;
1219
+ });
1220
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${dirPath} (${items.length} items)${RST}`);
1221
+ logToolCall('list_dir', { path: dirPath }, true, 'ok');
1222
+ return { items, path: dirPath };
1223
+ } catch (error) {
1224
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1225
+ logToolCall('list_dir', { path: dirPath }, true, 'error');
1226
+ return { error: error.message };
1227
+ }
1228
+ },
1229
+ },
1230
+ {
1231
+ tool: 'search_files',
1232
+ specNames: ['search_files'],
1233
+ tags: ['search_files'],
1234
+ parseXml: (text) => {
1235
+ const out = _inline(text, 'search_files', 'search_files', ['.']);
1236
+ for (const m of _matchDual(text, '<search_files\\s+pattern=Q([^Q]+)Q(?:\\s+dir=Q([^Q]*)Q)?\\s*(?:><\\/search_files>|\\/>)')) {
1237
+ out.push(['search_files', m[1], m[2] || '.']);
1238
+ }
1239
+ return out;
1240
+ },
1241
+ fromParams: (p) => ['search_files', p.pattern || '*', p.dir || '.'],
1242
+ permission: () => null,
1243
+ execute: async (ctx, args, options) => {
1244
+ const signal = (options && options.signal) || null;
1245
+ const [arg0 = null, arg1 = null] = args;
1246
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1247
+ const pattern = arg0;
1248
+ const searchDir = arg1 || '.';
1249
+ const startedAt = Date.now();
1250
+ try {
1251
+ let regStr = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&');
1252
+ regStr = regStr.replace(/\*\*/g, '\x00');
1253
+ regStr = regStr.replace(/\*/g, '[^/]*');
1254
+ regStr = regStr.replace(/\x00\//g, '(?:.*/)?');
1255
+ regStr = regStr.replace(/\x00/g, '.*');
1256
+ const regex = new RegExp(`^${regStr}$`);
1257
+ const matchName = !pattern.includes('/');
1258
+ const files = [];
1259
+ async function walk(dir, rel) {
1260
+ if (signal && signal.aborted) return;
1261
+ let entries;
1262
+ try { entries = await fsp.readdir(dir, { withFileTypes: true }); } catch { return; }
1263
+ for (const entry of entries) {
1264
+ if (signal && signal.aborted) return;
1265
+ const relPath = rel ? `${rel}/${entry.name}` : entry.name;
1266
+ if (regex.test(matchName ? entry.name : relPath)) files.push(relPath);
1267
+ if (entry.isDirectory()) await walk(path.join(dir, entry.name), relPath);
1268
+ }
1269
+ }
1270
+ await walk(searchDir, '');
1271
+ if (signal && signal.aborted) {
1272
+ logToolCall('search_files', { pattern, dir: searchDir }, true, 'aborted');
1273
+ return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
1274
+ }
1275
+ files.sort();
1276
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${files.length} file(s) matching "${pattern}"${RST}`);
1277
+ logToolCall('search_files', { pattern, dir: searchDir }, true, 'ok');
1278
+ return { files, pattern, dir: searchDir };
1279
+ } catch (error) {
1280
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1281
+ logToolCall('search_files', { pattern, dir: searchDir }, true, 'error');
1282
+ return { error: error.message };
1283
+ }
1284
+ },
1285
+ },
1286
+ {
1287
+ tool: 'grep',
1288
+ specNames: ['grep'],
1289
+ tags: ['grep'],
1290
+ parseXml: (text) => _parseSearchTag(text, 'grep'),
1291
+ fromParams: (p) => (p.pattern
1292
+ ? ['grep', p.pattern, p.path || null, !!p.ignore_case, p.output_mode || null, p.head_limit ?? null, p.offset ?? null]
1293
+ : null),
1294
+ permission: () => null,
1295
+ execute: async (ctx, args, options) => {
1296
+ const signal = (options && options.signal) || null;
1297
+ const [pattern = null, pathGlob = null, ignoreCase = false, outputMode = null, headLimit, offset] = args;
1298
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1299
+ const res = _grepSearch({ pattern, pathGlob, ignoreCase, baseDir: '.', engine: 'auto', signal });
1300
+ if (res.aborted) { logToolCall('grep', { pattern }, true, 'aborted'); return res; }
1301
+ if (res.error) {
1302
+ _log(` ${FG_RED}✗ ${res.error}${RST}`);
1303
+ logToolCall('grep', { pattern }, true, 'error');
1304
+ return res;
1305
+ }
1306
+ // Shape the serialization controls onto the result (Task W.5). The engine
1307
+ // returns the full (engine-capped) match set; output_mode + head_limit +
1308
+ // offset bound what reaches the model in formatFileResult (lib/agent.js).
1309
+ res.output_mode = _normGrepMode(outputMode);
1310
+ res.head_limit = _normHeadLimit(headLimit, require('./constants').DEFAULT_GREP_HEAD_LIMIT);
1311
+ res.offset = _normOffset(offset);
1312
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}grep "${pattern}" — ${res.count} match(es)${RST}`);
1313
+ logToolCall('grep', { pattern, path: pathGlob }, true, 'ok');
1314
+ return res;
1315
+ },
1316
+ },
1317
+ {
1318
+ tool: 'glob',
1319
+ specNames: ['glob'],
1320
+ tags: ['glob'],
1321
+ parseXml: (text) => _parseSearchTag(text, 'glob'),
1322
+ fromParams: (p) => (p.pattern ? ['glob', p.pattern, p.path || p.dir || '.', p.head_limit ?? null, p.offset ?? null] : null),
1323
+ permission: () => null,
1324
+ execute: async (ctx, args, options) => {
1325
+ const signal = (options && options.signal) || null;
1326
+ const [pattern = null, base = '.', headLimit, offset] = args;
1327
+ const { _log, logToolCall, isPathSafe, _sandboxError, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1328
+ if (!isPathSafe(base)) {
1329
+ logToolCall('glob', { pattern, dir: base }, false, 'denied');
1330
+ return _sandboxError(base);
1331
+ }
1332
+ const res = _globSearch({ pattern, baseDir: base, signal });
1333
+ if (res.aborted) { logToolCall('glob', { pattern }, true, 'aborted'); return res; }
1334
+ if (res.error) {
1335
+ _log(` ${FG_RED}✗ ${res.error}${RST}`);
1336
+ logToolCall('glob', { pattern }, true, 'error');
1337
+ return res;
1338
+ }
1339
+ // head_limit + offset bound the file list that reaches the model (Task W.5);
1340
+ // the engine returns the full (engine-capped) list, serialized in formatFileResult.
1341
+ res.head_limit = _normHeadLimit(headLimit, require('./constants').DEFAULT_GLOB_HEAD_LIMIT);
1342
+ res.offset = _normOffset(offset);
1343
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}glob "${pattern}" — ${res.count} file(s)${RST}`);
1344
+ logToolCall('glob', { pattern, dir: base }, true, 'ok');
1345
+ return res;
1346
+ },
1347
+ },
1348
+ {
1349
+ tool: 'delete_file',
1350
+ specNames: ['delete_file'],
1351
+ tags: ['delete_file'],
1352
+ parseXml: (text) => _inline(text, 'delete_file', 'delete_file'),
1353
+ fromParams: (p) => (p.path ? ['delete_file', p.path] : null),
1354
+ permission: (ctx, args) => {
1355
+ const { _log, FG_YELLOW, BOLD, RST } = ctx;
1356
+ const filePath = args[0];
1357
+ _log(` ${FG_YELLOW}${BOLD}⚠ Deleting: ${filePath}${RST}`);
1358
+ return { actionType: 'file', description: `Delete ${filePath}`, tag: 'delete_file' };
1359
+ },
1360
+ execute: async (ctx, args) => {
1361
+ const [arg0 = null] = args;
1362
+ const { _log, logToolCall, isPathSafe, _sandboxError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1363
+ const filePath = arg0;
1364
+ const blocked = permissionManager.readonlyBlock('delete_file');
1365
+ if (blocked) {
1366
+ logToolCall('delete_file', { path: filePath }, false, 'denied');
1367
+ return blocked;
1368
+ }
1369
+ if (!isPathSafe(filePath)) {
1370
+ logToolCall('delete_file', { path: filePath }, false, 'denied');
1371
+ return _sandboxError(filePath);
1372
+ }
1373
+ try {
1374
+ await fsp.unlink(filePath);
1375
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Deleted ${filePath}${RST}`);
1376
+ logToolCall('delete_file', { path: filePath }, true, 'ok');
1377
+ return { status: 'ok', path: filePath };
1378
+ } catch (error) {
1379
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1380
+ logToolCall('delete_file', { path: filePath }, true, 'error');
1381
+ return { error: error.message };
1382
+ }
1383
+ },
1384
+ },
1385
+ {
1386
+ tool: 'make_dir',
1387
+ specNames: ['make_dir'],
1388
+ tags: ['make_dir'],
1389
+ parseXml: (text) => _inline(text, 'make_dir', 'make_dir'),
1390
+ fromParams: (p) => (p.path ? ['make_dir', p.path] : null),
1391
+ permission: (ctx, args) => ({ actionType: 'file', description: `Create directory ${args[0]}`, tag: 'make_dir' }),
1392
+ execute: async (ctx, args) => {
1393
+ const [arg0 = null] = args;
1394
+ const { _log, logToolCall, isPathSafe, _sandboxError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1395
+ const dirPath = arg0;
1396
+ const blocked = permissionManager.readonlyBlock('make_dir');
1397
+ if (blocked) {
1398
+ logToolCall('make_dir', { path: dirPath }, false, 'denied');
1399
+ return blocked;
1400
+ }
1401
+ if (!isPathSafe(dirPath)) {
1402
+ logToolCall('make_dir', { path: dirPath }, false, 'denied');
1403
+ return _sandboxError(dirPath);
1404
+ }
1405
+ try {
1406
+ await fsp.mkdir(dirPath, { recursive: true });
1407
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Created directory ${dirPath}${RST}`);
1408
+ logToolCall('make_dir', { path: dirPath }, true, 'ok');
1409
+ return { status: 'ok', path: dirPath };
1410
+ } catch (error) {
1411
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1412
+ logToolCall('make_dir', { path: dirPath }, true, 'error');
1413
+ return { error: error.message };
1414
+ }
1415
+ },
1416
+ },
1417
+ {
1418
+ tool: 'remove_dir',
1419
+ specNames: ['remove_dir'],
1420
+ tags: ['remove_dir'],
1421
+ parseXml: (text) => _inline(text, 'remove_dir', 'remove_dir'),
1422
+ fromParams: (p) => (p.path ? ['remove_dir', p.path] : null),
1423
+ permission: (ctx, args) => ({ actionType: 'file', description: `Remove directory ${args[0]}`, tag: 'remove_dir' }),
1424
+ execute: async (ctx, args) => {
1425
+ const [arg0 = null] = args;
1426
+ const { _log, logToolCall, isPathSafe, _sandboxError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1427
+ const dirPath = arg0;
1428
+ const blocked = permissionManager.readonlyBlock('remove_dir');
1429
+ if (blocked) {
1430
+ logToolCall('remove_dir', { path: dirPath }, false, 'denied');
1431
+ return blocked;
1432
+ }
1433
+ if (!isPathSafe(dirPath)) {
1434
+ logToolCall('remove_dir', { path: dirPath }, false, 'denied');
1435
+ return _sandboxError(dirPath);
1436
+ }
1437
+ try {
1438
+ await fsp.rm(dirPath, { recursive: true, force: true });
1439
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Removed directory ${dirPath}${RST}`);
1440
+ logToolCall('remove_dir', { path: dirPath }, true, 'ok');
1441
+ return { status: 'ok', path: dirPath };
1442
+ } catch (error) {
1443
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1444
+ logToolCall('remove_dir', { path: dirPath }, true, 'error');
1445
+ return { error: error.message };
1446
+ }
1447
+ },
1448
+ },
1449
+ {
1450
+ tool: 'move_file',
1451
+ specNames: ['move_file'],
1452
+ tags: ['move_file'],
1453
+ parseXml: (text) => _matchDual(text, '<move_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/move_file>|\\/>)').map((m) => ['move_file', m[1], m[2]]),
1454
+ fromParams: (p) => (p.src && p.dst ? ['move_file', p.src, p.dst] : null),
1455
+ permission: (ctx, args) => {
1456
+ const { _log, FG_YELLOW, BOLD, RST } = ctx;
1457
+ const src = args[0];
1458
+ const dst = args[1];
1459
+ _log(` ${FG_YELLOW}${BOLD}⚠ Moving: ${src} → ${dst}${RST}`);
1460
+ return { actionType: 'file', description: `Move ${src} to ${dst}`, tag: 'move_file' };
1461
+ },
1462
+ execute: async (ctx, args) => {
1463
+ const [arg0 = null, arg1 = null] = args;
1464
+ const { _log, logToolCall, isPathSafe, isProtectedSecretPath, isProtectedConfigPath, _sandboxError, _secretReadError, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1465
+ const src = arg0;
1466
+ const dst = arg1;
1467
+ const blocked = permissionManager.readonlyBlock('move_file');
1468
+ if (blocked) {
1469
+ logToolCall('move_file', { src, dst }, false, 'denied');
1470
+ return blocked;
1471
+ }
1472
+ if (isProtectedSecretPath(src)) {
1473
+ logToolCall('move_file', { src, dst }, false, 'denied');
1474
+ return _secretReadError(src);
1475
+ }
1476
+ if (isProtectedConfigPath(dst)) {
1477
+ logToolCall('move_file', { src, dst }, false, 'denied');
1478
+ return _protectedConfigWriteError(dst);
1479
+ }
1480
+ if (!isPathSafe(dst)) {
1481
+ logToolCall('move_file', { src, dst }, false, 'denied');
1482
+ return _sandboxError(dst);
1483
+ }
1484
+ try {
1485
+ const dstDir = path.dirname(dst);
1486
+ if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
1487
+ try {
1488
+ await fsp.rename(src, dst);
1489
+ } catch (renameErr) {
1490
+ if (renameErr.code !== 'EXDEV') throw renameErr;
1491
+ await fsp.cp(src, dst, { recursive: true });
1492
+ await fsp.rm(src, { recursive: true, force: true });
1493
+ }
1494
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Moved ${src} → ${dst}${RST}`);
1495
+ logToolCall('move_file', { src, dst }, true, 'ok');
1496
+ return { status: 'ok', src, dst };
1497
+ } catch (error) {
1498
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1499
+ logToolCall('move_file', { src, dst }, true, 'error');
1500
+ return { error: error.message };
1501
+ }
1502
+ },
1503
+ },
1504
+ {
1505
+ tool: 'copy_file',
1506
+ specNames: ['copy_file'],
1507
+ tags: ['copy_file'],
1508
+ parseXml: (text) => _matchDual(text, '<copy_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/copy_file>|\\/>)').map((m) => ['copy_file', m[1], m[2]]),
1509
+ fromParams: (p) => (p.src && p.dst ? ['copy_file', p.src, p.dst] : null),
1510
+ permission: (ctx, args) => ({ actionType: 'file', description: `Copy ${args[0]} to ${args[1]}`, tag: 'copy_file' }),
1511
+ execute: async (ctx, args) => {
1512
+ const [arg0 = null, arg1 = null] = args;
1513
+ const { _log, logToolCall, isPathSafe, isProtectedSecretPath, isProtectedConfigPath, _sandboxError, _secretReadError, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1514
+ const src = arg0;
1515
+ const dst = arg1;
1516
+ const blocked = permissionManager.readonlyBlock('copy_file');
1517
+ if (blocked) {
1518
+ logToolCall('copy_file', { src, dst }, false, 'denied');
1519
+ return blocked;
1520
+ }
1521
+ if (isProtectedSecretPath(src)) {
1522
+ logToolCall('copy_file', { src, dst }, false, 'denied');
1523
+ return _secretReadError(src);
1524
+ }
1525
+ if (isProtectedConfigPath(dst)) {
1526
+ logToolCall('copy_file', { src, dst }, false, 'denied');
1527
+ return _protectedConfigWriteError(dst);
1528
+ }
1529
+ if (!isPathSafe(dst)) {
1530
+ logToolCall('copy_file', { src, dst }, false, 'denied');
1531
+ return _sandboxError(dst);
1532
+ }
1533
+ try {
1534
+ const dstDir = path.dirname(dst);
1535
+ if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
1536
+ await fsp.cp(src, dst, { recursive: true });
1537
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Copied ${src} → ${dst}${RST}`);
1538
+ logToolCall('copy_file', { src, dst }, true, 'ok');
1539
+ return { status: 'ok', src, dst };
1540
+ } catch (error) {
1541
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1542
+ logToolCall('copy_file', { src, dst }, true, 'error');
1543
+ return { error: error.message };
1544
+ }
1545
+ },
1546
+ },
1547
+ {
1548
+ tool: 'edit_file',
1549
+ specNames: ['edit_file'],
1550
+ tags: ['edit_file'],
1551
+ parseXml: (text) => _matchDual(text, '<edit_file\\s+path=Q([^Q]+)Q\\s+line=Q(\\d+)Q>([\\s\\S]*?)<\\/edit_file>').map((m) => ['edit_file', m[1], parseInt(m[2], 10), m[3]]),
1552
+ fromParams: (p) => (p.path && p.line !== undefined ? ['edit_file', p.path, parseInt(p.line, 10), p.content != null ? p.content : ''] : null),
1553
+ permission: (ctx, args) => ({ actionType: 'file', description: `Edit line ${args[1]} in ${args[0]}`, tag: 'edit_file' }),
1554
+ execute: async (ctx, args) => {
1555
+ const [arg0 = null, arg1 = null, arg2 = null] = args;
1556
+ const { _log, logToolCall, isProtectedConfigPath, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1557
+ const filePath = arg0;
1558
+ const lineNum = arg1;
1559
+ const newContent = arg2;
1560
+ const blocked = permissionManager.readonlyBlock('edit_file');
1561
+ if (blocked) {
1562
+ logToolCall('edit_file', { path: filePath, line: lineNum }, false, 'denied');
1563
+ return blocked;
1564
+ }
1565
+ if (isProtectedConfigPath(filePath)) {
1566
+ logToolCall('edit_file', { path: filePath, line: lineNum }, false, 'denied');
1567
+ return _protectedConfigWriteError(filePath);
1568
+ }
1569
+ try {
1570
+ const data = await fsp.readFile(filePath, 'utf8');
1571
+ const lines = data.split('\n');
1572
+ if (lineNum < 1 || lineNum > lines.length) {
1573
+ logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
1574
+ return { error: `Line ${lineNum} out of range (file has ${lines.length} lines)` };
1575
+ }
1576
+ lines[lineNum - 1] = newContent;
1577
+ await fsp.writeFile(filePath, lines.join('\n'));
1578
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Edited line ${lineNum} in ${filePath}${RST}`);
1579
+ logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'ok');
1580
+ return { status: 'ok', path: filePath, line: lineNum };
1581
+ } catch (error) {
1582
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1583
+ logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
1584
+ return { error: error.message };
1585
+ }
1586
+ },
1587
+ },
1588
+ {
1589
+ tool: 'search_in_file',
1590
+ specNames: ['search_in_file'],
1591
+ tags: ['search_in_file'],
1592
+ parseXml: (text) => _matchDual(text, '<search_in_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/search_in_file>').map((m) => ['search_in_file', m[1], m[2].trim()]),
1593
+ fromParams: (p) => (p.path && p.pattern ? ['search_in_file', p.path, p.pattern] : null),
1594
+ permission: () => null,
1595
+ execute: async (ctx, args) => {
1596
+ const [arg0 = null, arg1 = null] = args;
1597
+ const { _log, logToolCall, isProtectedSecretPath, _secretReadError, _checkRegexSafety, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1598
+ const filePath = arg0;
1599
+ const pattern = arg1;
1600
+ if (isProtectedSecretPath(filePath)) {
1601
+ logToolCall('search_in_file', { path: filePath, pattern }, false, 'denied');
1602
+ return _secretReadError(filePath);
1603
+ }
1604
+ try {
1605
+ const data = await fsp.readFile(filePath, 'utf8');
1606
+ const guardErr = _checkRegexSafety(pattern, data);
1607
+ if (guardErr) {
1608
+ logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
1609
+ return guardErr;
1610
+ }
1611
+ const regex = new RegExp(pattern);
1612
+ const matches = data.split('\n')
1613
+ .map((content, idx) => regex.test(content) ? { line: idx + 1, content } : null)
1614
+ .filter(Boolean);
1615
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${matches.length} match(es) in ${filePath}${RST}`);
1616
+ logToolCall('search_in_file', { path: filePath, pattern }, true, 'ok');
1617
+ return { matches, path: filePath };
1618
+ } catch (error) {
1619
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1620
+ logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
1621
+ return { error: error.message };
1622
+ }
1623
+ },
1624
+ },
1625
+ {
1626
+ tool: 'replace_in_file',
1627
+ specNames: ['replace_in_file'],
1628
+ tags: ['replace_in_file'],
1629
+ parseXml: (text) => _matchDual(text, '<replace_in_file\\s+path=Q([^Q]+)Q\\s+search=Q([^Q]*)Q\\s+replace=Q([^Q]*)Q>([\\s\\S]*?)<\\/replace_in_file>').map((m) => ['replace_in_file', m[1], m[2], m[3], m[4].trim()]),
1630
+ fromParams: (p) => (p.path && p.search !== undefined ? ['replace_in_file', p.path, p.search, p.replace != null ? p.replace : '', p.flags || ''] : null),
1631
+ permission: (ctx, args) => ({ actionType: 'file', description: `Replace in ${args[0]}`, tag: 'replace_in_file' }),
1632
+ execute: async (ctx, args) => {
1633
+ const [arg0 = null, arg1 = null, arg2 = null, arg3 = null] = args;
1634
+ const { _log, logToolCall, isProtectedConfigPath, _protectedConfigWriteError, _checkRegexSafety, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1635
+ const filePath = arg0;
1636
+ const searchStr = arg1;
1637
+ const replaceStr = arg2;
1638
+ const flags = arg3 || '';
1639
+ const blocked = permissionManager.readonlyBlock('replace_in_file');
1640
+ if (blocked) {
1641
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, false, 'denied');
1642
+ return blocked;
1643
+ }
1644
+ if (isProtectedConfigPath(filePath)) {
1645
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, false, 'denied');
1646
+ return _protectedConfigWriteError(filePath);
1647
+ }
1648
+ try {
1649
+ const data = await fsp.readFile(filePath, 'utf8');
1650
+ const guardErr = _checkRegexSafety(searchStr, data);
1651
+ if (guardErr) {
1652
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
1653
+ return guardErr;
1654
+ }
1655
+ const safeFlags = flags.replace(/[^gimsuy]/g, '');
1656
+ const regex = new RegExp(searchStr, safeFlags || undefined);
1657
+ // Semantics (intentional, unchanged): String.prototype.replace replaces
1658
+ // ALL matches only when the regex is global; without "g" it replaces just
1659
+ // the first match. The returned count must equal the replacements actually
1660
+ // performed — so count all matches when global, else 1 if there is a match
1661
+ // (else 0). (Task 1.4c: previously count was computed with an always-global
1662
+ // regex and overstated non-global replacements.)
1663
+ const isGlobal = safeFlags.includes('g');
1664
+ const count = isGlobal
1665
+ ? (data.match(new RegExp(searchStr, safeFlags)) || []).length
1666
+ : (new RegExp(searchStr, safeFlags).test(data) ? 1 : 0);
1667
+ const newData = data.replace(regex, replaceStr);
1668
+ await fsp.writeFile(filePath, newData);
1669
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Replaced ${count} occurrence(s) in ${filePath}${RST}`);
1670
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'ok');
1671
+ return { status: 'ok', path: filePath, count };
1672
+ } catch (error) {
1673
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1674
+ logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
1675
+ return { error: error.message };
1676
+ }
1677
+ },
1678
+ },
1679
+ {
1680
+ tool: 'download',
1681
+ specNames: ['download'],
1682
+ tags: ['download'],
1683
+ // Optional `path` destination (Pre-Task 4.0b). Both the attribute form
1684
+ // (`<download path="dest">URL</download>`) and the plain form
1685
+ // (`<download>URL</download>`, defaulting to the CWD) are accepted.
1686
+ parseXml: (text) => {
1687
+ const out = [];
1688
+ for (const m of _matchDual(text, '<download\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/download>')) {
1689
+ out.push(['download', _unwrapInnerTag(m[2]).trim(), m[1]]);
1690
+ }
1691
+ for (const m of text.matchAll(/<download>([\s\S]*?)<\/download>/g)) {
1692
+ out.push(['download', _unwrapInnerTag(m[1]).trim()]);
1693
+ }
1694
+ return out;
1695
+ },
1696
+ fromParams: (p) => {
1697
+ if (!p.url) return null;
1698
+ const dest = p.path || p.dest;
1699
+ return dest ? ['download', p.url, dest] : ['download', p.url];
1700
+ },
1701
+ permission: (ctx, args) => ({ actionType: 'net', description: `Download ${args[0]}`, tag: 'download' }),
1702
+ execute: async (ctx, args, options) => {
1703
+ const signal = (options && options.signal) || null;
1704
+ const [arg0 = null, arg1 = null] = args;
1705
+ const {
1706
+ _log, logToolCall, _dryRun, _skippedOps,
1707
+ isPathSafe, _sandboxError, isProtectedSecretPath, _secretReadError,
1708
+ isProtectedConfigPath, _protectedConfigWriteError,
1709
+ permissionManager, getConfig,
1710
+ FG_GREEN, FG_GRAY, FG_RED, RST,
1711
+ } = ctx;
1712
+ const url = arg0;
1713
+ const dest = arg1 || null;
1714
+ if (_dryRun) {
1715
+ _skippedOps.push({ category: 'net', symbol: '↓', desc: `download ${url}` });
1716
+ logToolCall('download', { url }, false, 'dry-run');
1717
+ return { status: 'dry-run', message: 'dry-run: network call skipped' };
1718
+ }
1719
+ // Validate/normalize the URL BEFORE building the request or resolving a
1720
+ // destination — a malformed URL (or non-http(s) scheme, empty, non-string)
1721
+ // is a clean tool error, never an uncaught throw out of the executor.
1722
+ const validatedDlUrl = _validateFetchUrl(url);
1723
+ if (validatedDlUrl.error) {
1724
+ _log(` ${FG_RED}✗ ${validatedDlUrl.error}${RST}`);
1725
+ logToolCall('download', { url }, true, 'error');
1726
+ return validatedDlUrl;
1727
+ }
1728
+ const normalizedDlUrl = validatedDlUrl.url;
1729
+ // Resolve the destination: an explicit path (relative → CWD, or absolute),
1730
+ // otherwise the URL basename into the CWD (historical default).
1731
+ let outPath;
1732
+ if (dest) {
1733
+ outPath = path.resolve(dest);
1734
+ } else {
1735
+ let fileName;
1736
+ try {
1737
+ fileName = path.basename(new URL(normalizedDlUrl).pathname) || 'download';
1738
+ } catch {
1739
+ fileName = 'download';
1740
+ }
1741
+ outPath = path.join(process.cwd(), fileName);
1742
+ }
1743
+ // Confinement (Pre-Task 4.0b): download is a write path and must honor the
1744
+ // same guards as every other mutating file tool — --readonly, the
1745
+ // secret-file guard, and isPathSafe (CWD confinement / --allow-anywhere).
1746
+ const blocked = permissionManager.readonlyBlock('download');
1747
+ if (blocked) {
1748
+ logToolCall('download', { url, path: outPath }, false, 'denied');
1749
+ return blocked;
1750
+ }
1751
+ if (isProtectedSecretPath(outPath)) {
1752
+ logToolCall('download', { url, path: outPath }, false, 'denied');
1753
+ return _secretReadError(outPath);
1754
+ }
1755
+ if (isProtectedConfigPath(outPath)) {
1756
+ logToolCall('download', { url, path: outPath }, false, 'denied');
1757
+ return _protectedConfigWriteError(outPath);
1758
+ }
1759
+ if (!isPathSafe(outPath)) {
1760
+ logToolCall('download', { url, path: outPath }, false, 'denied');
1761
+ return _sandboxError(outPath);
1762
+ }
1763
+ const cfg = getConfig ? getConfig() : {};
1764
+ const maxBytes = Math.max(1024, cfg.download_max_bytes || 104857600);
1765
+ const userAgent = _resolveUserAgent(cfg);
1766
+ const startedAt = Date.now();
1767
+ return new Promise((resolve) => {
1768
+ let abortedByUser = false;
1769
+ let cappedExceeded = false;
1770
+ let onAbort = null;
1771
+ let activeReq = null;
1772
+ let activeFile = null;
1773
+ const detachAbort = () => {
1774
+ if (onAbort && signal) {
1775
+ try { signal.removeEventListener('abort', onAbort); } catch {}
1776
+ onAbort = null;
1777
+ }
1778
+ };
1779
+ const finishAborted = () => {
1780
+ fs.unlink(outPath, () => {});
1781
+ logToolCall('download', { url }, true, 'aborted');
1782
+ resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
1783
+ };
1784
+ if (signal) {
1785
+ if (signal.aborted) {
1786
+ abortedByUser = true;
1787
+ finishAborted();
1788
+ return;
1789
+ }
1790
+ onAbort = () => {
1791
+ abortedByUser = true;
1792
+ try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
1793
+ try { if (activeFile) activeFile.destroy(); } catch {}
1794
+ };
1795
+ signal.addEventListener('abort', onAbort, { once: true });
1796
+ }
1797
+
1798
+ function doDownload(target, redirectsLeft) {
1799
+ const proto = target.startsWith('https') ? https : http;
1800
+ let req;
1801
+ try {
1802
+ req = proto.get(target, { headers: { 'User-Agent': userAgent } }, (res) => {
1803
+ if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
1804
+ res.resume();
1805
+ // A redirect Location may be relative or malformed — resolve +
1806
+ // validate it against the current target rather than throwing.
1807
+ const nextUrl = _validateFetchUrl(res.headers.location, target);
1808
+ if (nextUrl.error) {
1809
+ detachAbort();
1810
+ _log(` ${FG_RED}✗ ${nextUrl.error}${RST}`);
1811
+ logToolCall('download', { url: target }, true, 'error');
1812
+ return resolve(nextUrl);
1813
+ }
1814
+ return doDownload(nextUrl.url, redirectsLeft - 1);
1815
+ }
1816
+ if (res.statusCode >= 400) {
1817
+ res.resume();
1818
+ const msg = `HTTP ${res.statusCode}`;
1819
+ detachAbort();
1820
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
1821
+ logToolCall('download', { url }, true, 'error');
1822
+ return resolve({ error: msg });
1823
+ }
1824
+ const file = fs.createWriteStream(outPath);
1825
+ activeFile = file;
1826
+ let downloadedBytes = 0;
1827
+ // Manual stream (instead of res.pipe) so we can enforce the byte cap
1828
+ // mid-flight: on exceeding it, abort the request, destroy the file,
1829
+ // remove the partial artifact, and resolve once cleanup completes so
1830
+ // no truncated file is ever left behind.
1831
+ res.on('data', (chunk) => {
1832
+ if (cappedExceeded || abortedByUser) return;
1833
+ downloadedBytes += chunk.length;
1834
+ if (downloadedBytes > maxBytes) {
1835
+ cappedExceeded = true;
1836
+ try { if (activeReq) activeReq.destroy(); } catch {}
1837
+ try { res.destroy(); } catch {}
1838
+ detachAbort();
1839
+ const msg = `Download aborted: exceeded byte cap (${maxBytes} bytes)`;
1840
+ file.destroy();
1841
+ file.once('close', () => {
1842
+ fs.unlink(outPath, () => {
1843
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
1844
+ logToolCall('download', { url, path: outPath }, true, 'error');
1845
+ resolve({ error: msg, capped: true, bytes: downloadedBytes });
1846
+ });
1847
+ });
1848
+ return;
1849
+ }
1850
+ if (!file.write(chunk)) {
1851
+ res.pause();
1852
+ file.once('drain', () => { if (!cappedExceeded && !abortedByUser) res.resume(); });
1853
+ }
1854
+ });
1855
+ res.on('end', () => {
1856
+ if (cappedExceeded || abortedByUser) return;
1857
+ file.end();
1858
+ });
1859
+ res.on('error', (err) => {
1860
+ if (cappedExceeded) return;
1861
+ if (abortedByUser) { detachAbort(); finishAborted(); return; }
1862
+ file.destroy();
1863
+ fs.unlink(outPath, () => {});
1864
+ detachAbort();
1865
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
1866
+ logToolCall('download', { url }, true, 'error');
1867
+ resolve({ error: err.message });
1868
+ });
1869
+ file.on('finish', () => {
1870
+ if (cappedExceeded || abortedByUser) return;
1871
+ file.close();
1872
+ detachAbort();
1873
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Downloaded to ${outPath}${RST}`);
1874
+ logToolCall('download', { url }, true, 'ok');
1875
+ resolve({ status: 'ok', path: outPath, bytes: downloadedBytes });
1876
+ });
1877
+ file.on('error', (err) => {
1878
+ if (cappedExceeded) return;
1879
+ if (abortedByUser) {
1880
+ detachAbort();
1881
+ finishAborted();
1882
+ return;
1883
+ }
1884
+ fs.unlink(outPath, () => {});
1885
+ detachAbort();
1886
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
1887
+ logToolCall('download', { url }, true, 'error');
1888
+ resolve({ error: err.message });
1889
+ });
1890
+ });
1891
+ } catch (err) {
1892
+ // Defense-in-depth: the URL is validated before we get here, but any
1893
+ // synchronous throw from proto.get must still become a tool error.
1894
+ detachAbort();
1895
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
1896
+ logToolCall('download', { url: target }, true, 'error');
1897
+ resolve({ error: `Invalid URL: ${err.message}`, error_code: err.code || 'ERR_INVALID_URL' });
1898
+ return;
1899
+ }
1900
+ activeReq = req;
1901
+ req.on('error', (err) => {
1902
+ if (cappedExceeded) return;
1903
+ if (abortedByUser) {
1904
+ detachAbort();
1905
+ finishAborted();
1906
+ return;
1907
+ }
1908
+ fs.unlink(outPath, () => {});
1909
+ detachAbort();
1910
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
1911
+ logToolCall('download', { url }, true, 'error');
1912
+ resolve({ error: err.message });
1913
+ });
1914
+ req.setTimeout(120000, () => {
1915
+ req.destroy();
1916
+ fs.unlink(outPath, () => {});
1917
+ detachAbort();
1918
+ logToolCall('download', { url }, true, 'error');
1919
+ resolve({ error: 'Request timeout' });
1920
+ });
1921
+ }
1922
+ doDownload(normalizedDlUrl, 5);
1923
+ });
1924
+ },
1925
+ },
1926
+ {
1927
+ tool: 'upload',
1928
+ specNames: ['upload'],
1929
+ tags: ['upload'],
1930
+ // QUIRK: upload content (base64) is captured raw (not trimmed), like write.
1931
+ parseXml: (text) => _matchDual(text, '<upload\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/upload>').map((m) => ['upload', m[1], m[2]]),
1932
+ fromParams: (p) => (p.path ? ['upload', p.path, p.content != null ? p.content : ''] : null),
1933
+ permission: (ctx, args) => ({ actionType: 'file', description: `Upload to ${args[0]}`, tag: 'upload' }),
1934
+ execute: async (ctx, args) => {
1935
+ const [arg0 = null, arg1 = null] = args;
1936
+ const { _log, logToolCall, isPathSafe, isProtectedConfigPath, _sandboxError, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1937
+ const filePath = arg0;
1938
+ const encodedContent = arg1 || '';
1939
+ const blocked = permissionManager.readonlyBlock('upload');
1940
+ if (blocked) {
1941
+ logToolCall('upload', { path: filePath }, false, 'denied');
1942
+ return blocked;
1943
+ }
1944
+ if (isProtectedConfigPath(filePath)) {
1945
+ logToolCall('upload', { path: filePath }, false, 'denied');
1946
+ return _protectedConfigWriteError(filePath);
1947
+ }
1948
+ if (!isPathSafe(filePath)) {
1949
+ logToolCall('upload', { path: filePath }, false, 'denied');
1950
+ return _sandboxError(filePath);
1951
+ }
1952
+ try {
1953
+ const dir = path.dirname(filePath);
1954
+ if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
1955
+ const buffer = Buffer.from(encodedContent.trim(), 'base64');
1956
+ await fsp.writeFile(filePath, buffer);
1957
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Uploaded ${buffer.length} bytes to ${filePath}${RST}`);
1958
+ logToolCall('upload', { path: filePath }, true, 'ok');
1959
+ return { status: 'ok', path: filePath, bytes: buffer.length };
1960
+ } catch (error) {
1961
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1962
+ logToolCall('upload', { path: filePath }, true, 'error');
1963
+ return { error: error.message };
1964
+ }
1965
+ },
1966
+ },
1967
+ {
1968
+ tool: 'file_stat',
1969
+ specNames: ['file_stat'],
1970
+ tags: ['file_stat'],
1971
+ parseXml: (text) => _inline(text, 'file_stat', 'file_stat'),
1972
+ fromParams: (p) => (p.path ? ['file_stat', p.path] : null),
1973
+ permission: () => null,
1974
+ execute: async (ctx, args) => {
1975
+ const [arg0 = null] = args;
1976
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
1977
+ const filePath = arg0;
1978
+ try {
1979
+ const stat = await fsp.stat(filePath);
1980
+ const type = stat.isDirectory() ? 'directory' : stat.isSymbolicLink() ? 'symlink' : 'file';
1981
+ const size_kb = (stat.size / 1024).toFixed(2);
1982
+ const mode = '0o' + stat.mode.toString(8);
1983
+ const mtime = stat.mtime.toISOString();
1984
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stat ${filePath}${RST}`);
1985
+ logToolCall('file_stat', { path: filePath }, true, 'ok');
1986
+ return { path: filePath, size_kb, mtime, type, mode };
1987
+ } catch (error) {
1988
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
1989
+ logToolCall('file_stat', { path: filePath }, true, 'error');
1990
+ return { error: error.message };
1991
+ }
1992
+ },
1993
+ },
1994
+ {
1995
+ tool: 'http_get',
1996
+ specNames: ['http_get'],
1997
+ tags: ['http_get'],
1998
+ parseXml: (text) => {
1999
+ const out = [];
2000
+ for (const m of text.matchAll(/<http_get\b([^>]*?)(?:><\/http_get>|\/>)/g)) {
2001
+ const attrStr = m[1];
2002
+ const urlMatch = attrStr.match(/url="([^"]+)"/) || attrStr.match(/url='([^']+)'/);
2003
+ if (urlMatch) out.push(['http_get', urlMatch[1], _httpGetOpts(attrStr)]);
2004
+ }
2005
+ for (const m of text.matchAll(/<http_get>([\s\S]*?)<\/http_get>/g)) {
2006
+ const inner = m[1].trim();
2007
+ if (!inner) continue;
2008
+ const urlAttr = inner.match(/url="([^"]+)"/) || inner.match(/url='([^']+)'/);
2009
+ out.push(['http_get', urlAttr ? urlAttr[1] : _unwrapInnerTag(inner).trim(), _httpGetOpts(inner)]);
2010
+ }
2011
+ return out;
2012
+ },
2013
+ fromParams: (p) => (p.url ? ['http_get', p.url, _httpGetOptsFromParams(p)] : null),
2014
+ permission: (ctx, args) => ({ actionType: 'net', description: `HTTP GET ${args[0]}`, tag: 'http_get' }),
2015
+ execute: async (ctx, args, options) => {
2016
+ const signal = (options && options.signal) || null;
2017
+ const [arg0 = null, callOpts = {}] = args;
2018
+ const { _log, logToolCall, _dryRun, _skippedOps, getConfig, webChat, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2019
+ const url = arg0;
2020
+ if (_dryRun) {
2021
+ _skippedOps.push({ category: 'net', symbol: '↓', desc: `GET ${url}` });
2022
+ logToolCall('http_get', { url }, false, 'dry-run');
2023
+ return { status: 'dry-run', message: 'dry-run: network call skipped' };
2024
+ }
2025
+ // Validate/normalize the URL BEFORE constructing any request. A malformed
2026
+ // URL (or a non-http(s) scheme, empty/whitespace, non-string) is a clean
2027
+ // tool error the agent can recover from — never an uncaught throw out of
2028
+ // the executor. Same shape as the request-error path below.
2029
+ const validatedUrl = _validateFetchUrl(url);
2030
+ if (validatedUrl.error) {
2031
+ _log(` ${FG_RED}✗ ${validatedUrl.error}${RST}`);
2032
+ logToolCall('http_get', { url }, true, 'error');
2033
+ return validatedUrl;
2034
+ }
2035
+ const normalizedUrl = validatedUrl.url;
2036
+ const httpCfg = getConfig ? getConfig() : {};
2037
+ const reqTimeoutMs = Math.max(15000, httpCfg.request_timeout_ms || 15000);
2038
+ // Byte cap is now ONLY a transfer/disk guard — the context-protection
2039
+ // mechanism is the post-extraction TOKEN budget (web.max_content_tokens).
2040
+ const maxBytes = Math.max(1024, httpCfg.http_fetch_max_bytes || 262144);
2041
+ const userAgent = _resolveUserAgent(httpCfg);
2042
+ const webCfg = (httpCfg.web && typeof httpCfg.web === 'object') ? httpCfg.web : {};
2043
+ const maxContentTokens = Number.isFinite(webCfg.max_content_tokens) && webCfg.max_content_tokens > 0
2044
+ ? webCfg.max_content_tokens : 6000;
2045
+ const summaryModel = typeof webCfg.summary_model === 'string' && webCfg.summary_model.trim()
2046
+ ? webCfg.summary_model.trim() : undefined;
2047
+ // Resolve the web-fetch mode (Task W.1b). Precedence: an explicit per-call
2048
+ // `mode` (the canonical enum the parser emits) beats the deprecated legacy
2049
+ // booleans (summarize/raw — which may still arrive directly on callOpts from
2050
+ // older callers), which beat the global config default (web.summarize mapped
2051
+ // to summarized/extracted). Summary needs an injected LLM call (webChat);
2052
+ // without one (headless/oneshot without an api client) the summarized branch
2053
+ // degrades to extracted Markdown, never the raw page.
2054
+ const mode = (callOpts && WEB_FETCH_MODES.includes(callOpts.mode) && callOpts.mode)
2055
+ || _legacyBoolsToMode(
2056
+ typeof (callOpts && callOpts.summarize) === 'boolean' ? callOpts.summarize : undefined,
2057
+ typeof (callOpts && callOpts.raw) === 'boolean' ? callOpts.raw : undefined,
2058
+ )
2059
+ || (webCfg.summarize !== false ? 'summarized' : 'extracted');
2060
+ const intent = callOpts && typeof callOpts.intent === 'string' ? callOpts.intent : '';
2061
+ const startedAt = Date.now();
2062
+ return new Promise((resolve) => {
2063
+ let abortedByUser = false;
2064
+ let onAbort = null;
2065
+ let activeReq = null;
2066
+ const detachAbort = () => {
2067
+ if (onAbort && signal) {
2068
+ try { signal.removeEventListener('abort', onAbort); } catch {}
2069
+ onAbort = null;
2070
+ }
2071
+ };
2072
+ const finishAborted = () => {
2073
+ logToolCall('http_get', { url }, true, 'aborted');
2074
+ resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
2075
+ };
2076
+ if (signal) {
2077
+ if (signal.aborted) {
2078
+ abortedByUser = true;
2079
+ finishAborted();
2080
+ return;
2081
+ }
2082
+ onAbort = () => {
2083
+ abortedByUser = true;
2084
+ try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
2085
+ };
2086
+ signal.addEventListener('abort', onAbort, { once: true });
2087
+ }
2088
+
2089
+ function doGet(target, redirectsLeft) {
2090
+ const proto = target.startsWith('https') ? https : http;
2091
+ let req;
2092
+ try {
2093
+ req = proto.get(target, { headers: { 'User-Agent': userAgent } }, (res) => {
2094
+ if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
2095
+ res.resume();
2096
+ // A redirect Location may be relative or malformed — resolve it
2097
+ // against the current target and validate, so a bad redirect is a
2098
+ // clean tool error rather than a synchronous throw in this callback.
2099
+ const nextUrl = _validateFetchUrl(res.headers.location, target);
2100
+ if (nextUrl.error) {
2101
+ detachAbort();
2102
+ _log(` ${FG_RED}✗ ${nextUrl.error}${RST}`);
2103
+ logToolCall('http_get', { url: target }, true, 'error');
2104
+ return resolve(nextUrl);
2105
+ }
2106
+ return doGet(nextUrl.url, redirectsLeft - 1);
2107
+ }
2108
+ const bufs = [];
2109
+ let totalBytes = 0;
2110
+ let capped = false;
2111
+ res.on('data', (chunk) => {
2112
+ totalBytes += chunk.length;
2113
+ if (!capped) {
2114
+ if (totalBytes <= maxBytes) {
2115
+ bufs.push(chunk);
2116
+ } else {
2117
+ const keep = maxBytes - (totalBytes - chunk.length);
2118
+ if (keep > 0) bufs.push(chunk.slice(0, keep));
2119
+ capped = true;
2120
+ }
2121
+ }
2122
+ });
2123
+ res.on('end', () => {
2124
+ if (abortedByUser) return;
2125
+ detachAbort();
2126
+ const kept = Buffer.concat(bufs);
2127
+ const keptBytes = kept.length;
2128
+ const rawBody = kept.toString('utf8');
2129
+ const contentType = res.headers && res.headers['content-type'];
2130
+ const statusCode = res.statusCode;
2131
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}HTTP GET ${target} (${statusCode}, ${totalBytes} bytes${capped ? `, transfer-capped to ${keptBytes}` : ''})${RST}`);
2132
+ logToolCall('http_get', { url: target }, true, statusCode < 400 ? 'ok' : 'error');
2133
+ // Stage 1+2+3: extract main content → Markdown → (optional) summary.
2134
+ // The RAW page never enters the main context — only the processed
2135
+ // result does. Fully contained: any pipeline error degrades to the
2136
+ // capped extracted Markdown (and as a last resort the crude-stripped
2137
+ // text), NEVER the raw HTML.
2138
+ (async () => {
2139
+ let result;
2140
+ try {
2141
+ result = await processWebContent({
2142
+ rawBody, contentType, url: target, statusCode,
2143
+ totalBytes, transferCapped: capped,
2144
+ mode, intent, summaryModel, maxContentTokens,
2145
+ webChat, signal,
2146
+ });
2147
+ } catch (err) {
2148
+ // Defensive: extraction itself should not throw, but if it does,
2149
+ // fall back to a crude tag-strip rather than dumping raw HTML.
2150
+ const { stripTagsCrude } = require('./web-extract');
2151
+ const safe = capToTokens(stripTagsCrude(rawBody), maxContentTokens, defaultEstimate);
2152
+ result = { status_code: statusCode, body: safe.text, bytes: totalBytes,
2153
+ kind: 'text', extracted: false, summarized: false, processing_error: err.message };
2154
+ }
2155
+ resolve(result);
2156
+ })();
2157
+ });
2158
+ });
2159
+ } catch (err) {
2160
+ // Defense-in-depth: the URL is validated before we get here, but any
2161
+ // synchronous throw from proto.get must still become a tool error,
2162
+ // never escape the executor.
2163
+ detachAbort();
2164
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
2165
+ logToolCall('http_get', { url: target }, true, 'error');
2166
+ resolve({ error: `Invalid URL: ${err.message}`, error_code: err.code || 'ERR_INVALID_URL' });
2167
+ return;
2168
+ }
2169
+ activeReq = req;
2170
+ req.on('error', (err) => {
2171
+ if (abortedByUser) {
2172
+ detachAbort();
2173
+ finishAborted();
2174
+ return;
2175
+ }
2176
+ detachAbort();
2177
+ _log(` ${FG_RED}✗ ${err.message}${RST}`);
2178
+ logToolCall('http_get', { url: target }, true, 'error');
2179
+ resolve({ error: err.message, error_code: err.code });
2180
+ });
2181
+ req.setTimeout(reqTimeoutMs, () => {
2182
+ req.destroy();
2183
+ detachAbort();
2184
+ logToolCall('http_get', { url: target }, true, 'error');
2185
+ resolve({ error: 'Request timeout', error_code: 'ETIMEDOUT' });
2186
+ });
2187
+ }
2188
+ doGet(normalizedUrl, 5);
2189
+ });
2190
+ },
2191
+ },
2192
+ {
2193
+ // Web search (Task W.2b). Calls the backend POST /api/search via the
2194
+ // injected ctx.webSearch (api client's dashboardSearch → SearXNG) and
2195
+ // returns a COMPACT { title, url, snippet } list — never page content
2196
+ // (that is http_get's job). The spec steers the model to read the snippets,
2197
+ // pick the relevant result(s), and fetch only those with http_get, instead
2198
+ // of blindly multi-fetching. The backend is on another machine and may be
2199
+ // down/unreachable/erroring — every failure mode is caught and surfaced as a
2200
+ // clean tool error; NOTHING throws out of the executor (the http_get-fix
2201
+ // lesson). Results are untrusted external content, fenced in lib/agent.js.
2202
+ tool: 'web_search',
2203
+ specNames: ['web_search'],
2204
+ tags: ['web_search'],
2205
+ parseXml: (text) => {
2206
+ const out = [];
2207
+ for (const m of text.matchAll(/<web_search\b([^>]*?)(?:><\/web_search>|\/>)/g)) {
2208
+ const attrStr = m[1];
2209
+ const qMatch = attrStr.match(/query="([^"]*)"/) || attrStr.match(/query='([^']*)'/);
2210
+ if (qMatch) out.push(['web_search', qMatch[1], _webSearchOpts(attrStr)]);
2211
+ }
2212
+ for (const m of text.matchAll(/<web_search>([\s\S]*?)<\/web_search>/g)) {
2213
+ const inner = m[1].trim();
2214
+ if (!inner) continue;
2215
+ const qAttr = inner.match(/query="([^"]*)"/) || inner.match(/query='([^']*)'/);
2216
+ out.push(['web_search', qAttr ? qAttr[1] : inner, _webSearchOpts(inner)]);
2217
+ }
2218
+ return out;
2219
+ },
2220
+ fromParams: (p) => (p.query ? ['web_search', String(p.query), _webSearchOptsFromParams(p)] : null),
2221
+ // A network read like http_get — same descriptor shape (net, gated; not a
2222
+ // privileged path). Performs no mutation.
2223
+ permission: (ctx, args) => ({ actionType: 'net', description: `Web search: ${args[0]}`, tag: 'web_search' }),
2224
+ execute: async (ctx, args, options) => {
2225
+ const signal = (options && options.signal) || null;
2226
+ const [arg0 = '', callOpts = {}] = args;
2227
+ const { _log, logToolCall, _dryRun, _skippedOps, webSearch, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2228
+ const query = typeof arg0 === 'string' ? arg0.trim() : '';
2229
+ if (!query) {
2230
+ logToolCall('web_search', { query: arg0 }, true, 'error');
2231
+ return { error: 'web search unavailable: empty query' };
2232
+ }
2233
+ if (_dryRun) {
2234
+ _skippedOps.push({ category: 'net', symbol: '⌕', desc: `search ${query}` });
2235
+ logToolCall('web_search', { query }, false, 'dry-run');
2236
+ return { status: 'dry-run', message: 'dry-run: web search skipped' };
2237
+ }
2238
+ if (typeof webSearch !== 'function') {
2239
+ logToolCall('web_search', { query }, true, 'error');
2240
+ return { error: 'web search unavailable: no backend client configured (available in interactive chat / the SDK with dashboard auth)' };
2241
+ }
2242
+ // Bound count BEFORE the backend call; the backend clamps further but a
2243
+ // huge value should never leave the client. An invalid/zero count is
2244
+ // dropped so the backend default applies.
2245
+ const count = _clampSearchCount(callOpts && callOpts.count);
2246
+ const limit = count || 10;
2247
+ try {
2248
+ const resp = await webSearch(query, count ? { count, signal } : { signal });
2249
+ const raw = resp && Array.isArray(resp.results) ? resp.results : [];
2250
+ const results = raw.slice(0, limit).map((r) => ({
2251
+ title: r && typeof r.title === 'string' ? r.title : '',
2252
+ url: r && typeof r.url === 'string' ? r.url : '',
2253
+ snippet: r && typeof r.snippet === 'string' ? r.snippet : '',
2254
+ }));
2255
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}web search "${query}" (${results.length} result${results.length === 1 ? '' : 's'})${RST}`);
2256
+ logToolCall('web_search', { query }, true, 'ok');
2257
+ return { query, count: results.length, results };
2258
+ } catch (err) {
2259
+ const reason = (err && err.message) ? err.message : String(err || 'unknown error');
2260
+ _log(` ${FG_RED}✗ web search unavailable: ${reason}${RST}`);
2261
+ logToolCall('web_search', { query }, true, 'error');
2262
+ return { error: `web search unavailable: ${reason}` };
2263
+ }
2264
+ },
2265
+ },
2266
+ {
2267
+ tool: 'ask_user',
2268
+ specNames: ['ask_user'],
2269
+ tags: ['ask_user'],
2270
+ parseXml: (text) => _matchDual(text, '<ask_user\\s+question=Q([^Q]+)Q\\s*(?:><\\/ask_user>|\\/>)').map((m) => ['ask_user', m[1]]),
2271
+ fromParams: (p) => (p.question ? ['ask_user', p.question] : null),
2272
+ permission: (ctx, args) => ({ actionType: 'user', description: `Ask user: ${args[0]}`, tag: 'ask_user' }),
2273
+ execute: async (ctx, args) => {
2274
+ const [arg0 = null] = args;
2275
+ const { _log, logToolCall, _parseNumberedOptions, permissionManager, writer, FG_YELLOW, FG_GRAY, RST, DIM } = ctx;
2276
+ const question = arg0;
2277
+ const options = _parseNumberedOptions(question);
2278
+ if (options.length >= 2) {
2279
+ const selected = await permissionManager.captureSelect({ options });
2280
+ logToolCall('ask_user', { question }, true, 'ok');
2281
+ return { question, answer: selected || options[0] };
2282
+ }
2283
+ if (!process.stdout.isTTY || process.stdin.isRaw) {
2284
+ writer.scrollback(`\n ${FG_YELLOW}?${RST} ${question}\n ${DIM}[auto-answering 'y']${RST}`);
2285
+ logToolCall('ask_user', { question }, true, 'ok');
2286
+ return { question, answer: 'y' };
2287
+ }
2288
+ process.stdout.write(`\n ${FG_YELLOW}?${RST} ${question}\n ${FG_GRAY}>${RST} `);
2289
+ const buf = Buffer.alloc(4096);
2290
+ let input = '';
2291
+ while (true) {
2292
+ const n = fs.readSync(0, buf, 0, 1);
2293
+ if (n === 0) break;
2294
+ const ch = buf[0];
2295
+ if (ch === 0x0a) break;
2296
+ if (ch === 0x0d) continue;
2297
+ input += String.fromCharCode(ch);
2298
+ }
2299
+ _log();
2300
+ logToolCall('ask_user', { question }, true, 'ok');
2301
+ return { question, answer: input };
2302
+ },
2303
+ },
2304
+ {
2305
+ tool: 'store_memory',
2306
+ specNames: ['store_memory'],
2307
+ tags: ['store_memory'],
2308
+ // QUIRK: store_memory value is captured raw (not trimmed), like write.
2309
+ parseXml: (text) => _matchDual(text, '<store_memory\\s+key=Q([^Q]+)Q>([\\s\\S]*?)<\\/store_memory>').map((m) => ['store_memory', m[1], m[2]]),
2310
+ fromParams: (p) => (p.key ? ['store_memory', p.key, p.value != null ? p.value : ''] : null),
2311
+ permission: (ctx, args) => ({ actionType: 'memory', description: `Store memory: ${args[0]}`, tag: 'store_memory' }),
2312
+ execute: async (ctx, args) => {
2313
+ const [arg0 = null, arg1 = null] = args;
2314
+ const { _log, logToolCall, MEMORY_PATH, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2315
+ const key = arg0;
2316
+ const value = arg1 || '';
2317
+ try {
2318
+ let mem = {};
2319
+ try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
2320
+ mem[key] = value;
2321
+ await fsp.mkdir(path.dirname(MEMORY_PATH), { recursive: true });
2322
+ await fsp.writeFile(MEMORY_PATH, JSON.stringify(mem, null, 2));
2323
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stored memory: ${key}${RST}`);
2324
+ logToolCall('store_memory', { key }, true, 'ok');
2325
+ return { status: 'ok', key };
2326
+ } catch (error) {
2327
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
2328
+ logToolCall('store_memory', { key }, true, 'error');
2329
+ return { error: error.message };
2330
+ }
2331
+ },
2332
+ },
2333
+ {
2334
+ tool: 'recall_memory',
2335
+ specNames: ['recall_memory'],
2336
+ tags: ['recall_memory'],
2337
+ parseXml: (text) => _matchDual(text, '<recall_memory\\s+key=Q([^Q]+)Q\\s*(?:><\\/recall_memory>|\\/>)').map((m) => ['recall_memory', m[1]]),
2338
+ fromParams: (p) => (p.key ? ['recall_memory', p.key] : null),
2339
+ permission: () => null,
2340
+ execute: async (ctx, args) => {
2341
+ const [arg0 = null] = args;
2342
+ const { _log, logToolCall, MEMORY_PATH, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2343
+ const key = arg0;
2344
+ try {
2345
+ let mem = {};
2346
+ try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
2347
+ const found = key in mem;
2348
+ const value = found ? mem[key] : null;
2349
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Recalled memory: ${key}${RST}`);
2350
+ logToolCall('recall_memory', { key }, true, 'ok');
2351
+ return { key, value, found };
2352
+ } catch (error) {
2353
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
2354
+ logToolCall('recall_memory', { key }, true, 'error');
2355
+ return { error: error.message };
2356
+ }
2357
+ },
2358
+ },
2359
+ {
2360
+ tool: 'list_memories',
2361
+ specNames: ['list_memories'],
2362
+ tags: ['list_memories'],
2363
+ parseXml: (text) => [...text.matchAll(/<list_memories\s*(?:><\/list_memories>|\/>)/g)].map(() => ['list_memories']),
2364
+ fromParams: () => ['list_memories'],
2365
+ permission: () => null,
2366
+ execute: async (ctx) => {
2367
+ const { _log, logToolCall, MEMORY_PATH, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
2368
+ try {
2369
+ let mem = {};
2370
+ try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
2371
+ const keys = Object.keys(mem);
2372
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${keys.length} memory key(s)${RST}`);
2373
+ logToolCall('list_memories', {}, true, 'ok');
2374
+ return { keys };
2375
+ } catch (error) {
2376
+ _log(` ${FG_RED}✗ ${error.message}${RST}`);
2377
+ logToolCall('list_memories', {}, true, 'error');
2378
+ return { error: error.message };
2379
+ }
2380
+ },
2381
+ },
2382
+ {
2383
+ tool: 'get_env',
2384
+ specNames: ['get_env'],
2385
+ tags: ['get_env'],
2386
+ parseXml: (text) => _inline(text, 'get_env', 'get_env'),
2387
+ fromParams: (p) => (p.name ? ['get_env', p.name] : null),
2388
+ permission: () => null,
2389
+ execute: async (ctx, args) => {
2390
+ const [arg0 = null] = args;
2391
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, RST } = ctx;
2392
+ const varName = arg0;
2393
+ const value = process.env[varName];
2394
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Got env ${varName}${RST}`);
2395
+ logToolCall('get_env', { name: varName }, true, 'ok');
2396
+ return { name: varName, value: value !== undefined ? value : null };
2397
+ },
2398
+ },
2399
+ {
2400
+ tool: 'set_env',
2401
+ specNames: ['set_env'],
2402
+ tags: ['set_env'],
2403
+ parseXml: (text) => _matchDual(text, '<set_env\\s+name=Q([^Q]+)Q\\s+value=Q([^Q]*)Q\\s*(?:><\\/set_env>|\\/>)').map((m) => ['set_env', m[1], m[2]]),
2404
+ fromParams: (p) => (p.name ? ['set_env', p.name, p.value != null ? p.value : ''] : null),
2405
+ permission: (ctx, args) => ({ actionType: 'env', description: `Set env ${args[0]}=${args[1] || ''}`, tag: 'set_env' }),
2406
+ execute: async (ctx, args) => {
2407
+ const [arg0 = null, arg1 = null] = args;
2408
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, RST } = ctx;
2409
+ const varName = arg0;
2410
+ const value = arg1 || '';
2411
+ process.env[varName] = value;
2412
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Set env ${varName}${RST}`);
2413
+ logToolCall('set_env', { name: varName }, true, 'ok');
2414
+ return { status: 'ok', name: varName };
2415
+ },
2416
+ },
2417
+ {
2418
+ tool: 'system_info',
2419
+ specNames: ['system_info'],
2420
+ tags: ['system_info'],
2421
+ parseXml: (text) => [...text.matchAll(/<system_info\s*(?:><\/system_info>|\/>)/g)].map(() => ['system_info']),
2422
+ fromParams: () => ['system_info'],
2423
+ permission: () => null,
2424
+ execute: async (ctx) => {
2425
+ const { _log, logToolCall, FG_GREEN, FG_GRAY, RST } = ctx;
2426
+ const info = {
2427
+ platform: os.platform(),
2428
+ arch: os.arch(),
2429
+ hostname: os.hostname(),
2430
+ user: process.env.USER || process.env.USERNAME || '',
2431
+ total_mem_mb: Math.round(os.totalmem() / 1024 / 1024),
2432
+ free_mem_mb: Math.round(os.freemem() / 1024 / 1024),
2433
+ node_version: process.version,
2434
+ cwd: process.cwd(),
2435
+ };
2436
+ _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}System info: ${info.platform}/${info.arch}${RST}`);
2437
+ logToolCall('system_info', {}, true, 'ok');
2438
+ return info;
2439
+ },
2440
+ },
2441
+ ...GIT_TOOL_REGISTRY,
2442
+ ];
2443
+
2444
+ // name (TOOL_SPECS key / native function name) → registry entry.
2445
+ const _byName = new Map();
2446
+ // canonical action (tuple[0]) → registry entry, for executor / permission dispatch.
2447
+ const _byAction = new Map();
2448
+ for (const entry of TOOL_REGISTRY) {
2449
+ for (const n of entry.specNames) _byName.set(n.toLowerCase(), entry);
2450
+ _byAction.set(entry.tool, entry);
2451
+ }
2452
+
2453
+ // ── Dynamic (runtime-registered) tools — MCP, Task 3.3 ─────────────────────
2454
+ //
2455
+ // Tools discovered at runtime (MCP servers) are registered here, SEPARATE from
2456
+ // the static TOOL_REGISTRY array above. This separation is deliberate: the
2457
+ // load-time parity check in lib/constants.js validates only the static set
2458
+ // (TAG_REGISTRY ↔ TOOL_SPECS ↔ TOOL_REGISTRY), and it runs once at module load,
2459
+ // before any MCP server has connected. Keeping dynamic tools out of that array
2460
+ // means MCP tools never break the parity invariant.
2461
+ //
2462
+ // Dispatch (entryForAction) and native mapping (fromInvoke) consult this map
2463
+ // AFTER the static one, so a dynamic tool can never shadow a built-in. Each
2464
+ // entry has the same shape as a static one — { tool, fromParams, execute,
2465
+ // permission, parseXml?, spec? } — so it dispatches through the agent loop
2466
+ // identically. `spec` (an OpenAI-format { description, parameters }) is surfaced
2467
+ // to the native function-calling `tools` array via dynamicToolSpecs().
2468
+ const _dynamic = new Map(); // canonical name (== entry.tool) → entry
2469
+
2470
+ function _lookupDynamic(name) {
2471
+ if (name == null) return null;
2472
+ return _dynamic.get(name) || _dynamic.get(String(name).toLowerCase()) || null;
2473
+ }
2474
+
2475
+ function registerDynamicTool(entry) {
2476
+ if (!entry || typeof entry.tool !== 'string' || !entry.tool) {
2477
+ throw new Error('registerDynamicTool: entry.tool (canonical name) is required');
2478
+ }
2479
+ if (typeof entry.execute !== 'function') {
2480
+ throw new Error(`registerDynamicTool(${entry.tool}): execute() is required`);
2481
+ }
2482
+ if (typeof entry.fromParams !== 'function') {
2483
+ throw new Error(`registerDynamicTool(${entry.tool}): fromParams() is required`);
2484
+ }
2485
+ if (typeof entry.permission !== 'function') {
2486
+ throw new Error(`registerDynamicTool(${entry.tool}): permission() is required`);
2487
+ }
2488
+ _dynamic.set(entry.tool, entry);
2489
+ }
2490
+
2491
+ function unregisterDynamicTool(name) {
2492
+ return _dynamic.delete(name);
2493
+ }
2494
+
2495
+ function clearDynamicTools() {
2496
+ _dynamic.clear();
2497
+ }
2498
+
2499
+ function dynamicToolEntries() {
2500
+ return [..._dynamic.values()];
2501
+ }
2502
+
2503
+ // { name → { description, parameters } } for every dynamic tool that carries a
2504
+ // spec. Merged into the native function-calling tools array in lib/api.js.
2505
+ function dynamicToolSpecs() {
2506
+ const out = {};
2507
+ for (const e of _dynamic.values()) {
2508
+ if (e.spec) out[e.tool] = e.spec;
2509
+ }
2510
+ return out;
2511
+ }
2512
+
2513
+ function fromInvoke(toolName, params) {
2514
+ const entry = _byName.get((toolName || '').toLowerCase()) || _lookupDynamic(toolName);
2515
+ if (!entry) return null;
2516
+ return entry.fromParams(params || {});
2517
+ }
2518
+
2519
+ function entryForAction(action) {
2520
+ return _byAction.get(action) || _lookupDynamic(action) || null;
2521
+ }
2522
+
2523
+ // Static (load-time-parity-checked) tool names only — MUST NOT include dynamic
2524
+ // tools, or the lib/constants.js parity assertion would see phantom entries.
2525
+ function registryToolNames() {
2526
+ return [..._byName.keys()];
2527
+ }
2528
+
2529
+ module.exports = {
2530
+ TOOL_REGISTRY,
2531
+ fromInvoke,
2532
+ entryForAction,
2533
+ registryToolNames,
2534
+ // Dynamic (runtime) tool registry — MCP (Task 3.3).
2535
+ registerDynamicTool,
2536
+ unregisterDynamicTool,
2537
+ clearDynamicTools,
2538
+ dynamicToolEntries,
2539
+ dynamicToolSpecs,
2540
+ // Exported for the grep/glob characterization + parity tests (Task 2.1).
2541
+ // The execute() bodies above use these same functions; tests drive both
2542
+ // engines explicitly to prove rg- and Node-path outputs are identical.
2543
+ _grepSearch,
2544
+ _globSearch,
2545
+ _detectRipgrep,
2546
+ // grep output modes + bound normalizers (Task W.5).
2547
+ GREP_OUTPUT_MODES,
2548
+ _normGrepMode,
2549
+ _normHeadLimit,
2550
+ _normOffset,
2551
+ // Exported for the web-fetch mode-resolution tests (Task W.1b).
2552
+ _httpGetOpts,
2553
+ _httpGetOptsFromParams,
2554
+ processWebContent,
2555
+ WEB_FETCH_MODES,
2556
+ // Exported for the URL-validation tests (fetch-url-validation.test.js).
2557
+ _validateFetchUrl,
2558
+ };