@semalt-ai/code 1.8.5 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +6 -1
- package/.github/workflows/ci.yml +69 -0
- package/CLAUDE.md +1584 -26
- package/README.md +147 -3
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +711 -104
- package/lib/api.js +213 -49
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +306 -0
- package/lib/commands/chat-slash.js +399 -0
- package/lib/commands/chat-turn.js +446 -0
- package/lib/commands/chat.js +403 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +333 -11
- package/lib/constants.js +372 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +167 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +264 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +100 -10
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +84 -5
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2558 -0
- package/lib/tool_specs.js +222 -2
- package/lib/tools.js +272 -1020
- package/lib/ui/format.js +22 -1
- package/lib/ui/input-field.js +16 -7
- package/lib/ui/status-bar.js +79 -11
- package/lib/ui/theme.js +1 -0
- package/lib/ui/web-activity.js +218 -0
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/background.test.js +414 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/executors.test.js +362 -0
- package/test/extract-tool-calls.test.js +315 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +142 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +203 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/max-iterations.test.js +216 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +356 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +163 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/result-cap.test.js +233 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/stream-parser.test.js +147 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/web-activity-ordering.test.js +194 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
|
@@ -0,0 +1,2558 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Runtime tool registry — one registration per tool.
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
//
|
|
7
|
+
// This is the single place that owns, per tool, EVERYTHING needed to recover,
|
|
8
|
+
// gate, and run a call — for BOTH transport paths:
|
|
9
|
+
//
|
|
10
|
+
// * parseXml(text) — XML/tag path (the ~25 regexes once inlined in
|
|
11
|
+
// extractToolCalls now live next to their tool).
|
|
12
|
+
// * fromParams(params) — native function-calling path (was mapInvokeToCall).
|
|
13
|
+
// * permission(ctx, args) — the gate descriptor (was the describePermission
|
|
14
|
+
// switch). Returns null for ungated read-only ops.
|
|
15
|
+
// * execute(ctx, args, opts) — the operation (was the agentExecFile branch).
|
|
16
|
+
//
|
|
17
|
+
// Both transports resolve to the SAME entry and produce the SAME [action, ...args]
|
|
18
|
+
// tuple, and dispatch (agentExecFile / describePermission in lib/tools.js) is a
|
|
19
|
+
// registry lookup keyed on the tool's canonical action.
|
|
20
|
+
//
|
|
21
|
+
// `ctx` is a dependency bag built once by createToolExecutor (lib/tools.js) and
|
|
22
|
+
// passed in at call time. It carries the factory-scoped collaborators (colors,
|
|
23
|
+
// permissionManager, getConfig) and the tools.js-internal helpers (isPathSafe,
|
|
24
|
+
// the sandbox/secret guards, _log, …). Passing them in — rather than requiring
|
|
25
|
+
// lib/tools.js here — is what keeps this module free of the tools.js ↔ registry
|
|
26
|
+
// require cycle. Executor/permission bodies were moved VERBATIM from agentExecFile
|
|
27
|
+
// / describePermission; the `const { … } = ctx` preamble re-binds the same names
|
|
28
|
+
// so the bodies below are unchanged.
|
|
29
|
+
//
|
|
30
|
+
// Adding a tool is now ONE registration object here + its TOOL_SPECS schema + its
|
|
31
|
+
// TAG_REGISTRY classification. The first two are asserted in lockstep by the
|
|
32
|
+
// load-time parity check in lib/constants.js (which also requires execute +
|
|
33
|
+
// permission on every non-wrapper entry).
|
|
34
|
+
//
|
|
35
|
+
// IMPORTANT — parse ORDER: extractToolCalls runs entries in array order; the
|
|
36
|
+
// per-format ordering is pinned by test/extract-tool-calls.test.js.
|
|
37
|
+
|
|
38
|
+
const fs = require('fs');
|
|
39
|
+
const fsp = require('fs/promises');
|
|
40
|
+
const path = require('path');
|
|
41
|
+
const os = require('os');
|
|
42
|
+
const http = require('http');
|
|
43
|
+
const https = require('https');
|
|
44
|
+
const { spawnSync } = require('child_process');
|
|
45
|
+
const { extractContent, capToTokens, defaultEstimate, markupEstimate, MARKUP_CHARS_PER_TOKEN, classifyContentType } = require('./web-extract');
|
|
46
|
+
const { summarizeWebContent } = require('./web-summarize');
|
|
47
|
+
|
|
48
|
+
// Resolve the User-Agent for the fetch tools (Task W.3 Part 2). A fixed,
|
|
49
|
+
// realistic browser UA defeats SIMPLE UA-based bot-blocking (sites that 403/406
|
|
50
|
+
// an empty/curl-like UA). Operator-overridable via config.web.user_agent;
|
|
51
|
+
// deliberately NOT model-selectable — the agent does not control how the tool
|
|
52
|
+
// presents itself to the outside, so there is no UA parameter in the tool spec.
|
|
53
|
+
// Reads the already-normalized config (getConfig() returns web.user_agent set to
|
|
54
|
+
// the override or the default) but falls back defensively to DEFAULT_USER_AGENT
|
|
55
|
+
// for any partially-built config. The constant is required LAZILY because
|
|
56
|
+
// constants.js requires this module at load time (circular dep) — a top-level
|
|
57
|
+
// destructure would capture `undefined`; by call time constants is fully loaded.
|
|
58
|
+
function _resolveUserAgent(cfg) {
|
|
59
|
+
const web = cfg && cfg.web && typeof cfg.web === 'object' ? cfg.web : {};
|
|
60
|
+
const ua = typeof web.user_agent === 'string' ? web.user_agent.trim() : '';
|
|
61
|
+
if (ua) return ua;
|
|
62
|
+
return require('./constants').DEFAULT_USER_AGENT;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// http_get per-call options (Task W.1 / W.1b). The agent may override the global
|
|
66
|
+
// web-fetch behavior for a single fetch via a three-level `mode` enum:
|
|
67
|
+
// mode="summarized" (default) → extract → Markdown → secondary-LLM summary.
|
|
68
|
+
// mode="extracted" → extract → Markdown, NO summary (exact snippets).
|
|
69
|
+
// mode="raw" → bypass extraction entirely; return the ORIGINAL
|
|
70
|
+
// fetched HTML/content (token-capped, fenced) — for analyzing a page's
|
|
71
|
+
// markup/CSS/JS/structure, the one task extraction destroys.
|
|
72
|
+
// intent="…" → the reason for fetching, focusing the summary.
|
|
73
|
+
// Deprecated boolean aliases (kept for back-compat): summarize="false" and
|
|
74
|
+
// raw="true" both map to `extracted`. Precedence: an explicit `mode` always
|
|
75
|
+
// beats the legacy booleans; with neither, the global config default applies.
|
|
76
|
+
const WEB_FETCH_MODES = ['summarized', 'extracted', 'raw'];
|
|
77
|
+
|
|
78
|
+
function _httpGetBool(v) {
|
|
79
|
+
if (v == null) return undefined;
|
|
80
|
+
const s = String(v).trim().toLowerCase();
|
|
81
|
+
if (s === 'true' || s === '1' || s === 'yes' || s === 'on') return true;
|
|
82
|
+
if (s === 'false' || s === '0' || s === 'no' || s === 'off') return false;
|
|
83
|
+
return undefined;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Normalize a `mode` value to one of WEB_FETCH_MODES, or undefined if unknown.
|
|
87
|
+
function _httpGetMode(v) {
|
|
88
|
+
if (v == null) return undefined;
|
|
89
|
+
const s = String(v).trim().toLowerCase();
|
|
90
|
+
return WEB_FETCH_MODES.includes(s) ? s : undefined;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Map a legacy boolean pair to a mode (explicit `mode` is resolved by the caller
|
|
94
|
+
// first and takes precedence). summarize=false / raw=true → extracted.
|
|
95
|
+
function _legacyBoolsToMode(summarize, raw) {
|
|
96
|
+
if (summarize !== undefined) return summarize ? 'summarized' : 'extracted';
|
|
97
|
+
if (raw !== undefined) return raw ? 'extracted' : 'summarized';
|
|
98
|
+
return undefined;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Validate + normalize a URL for the fetch tools (http_get / download).
|
|
102
|
+
//
|
|
103
|
+
// `new URL(...)` — and `http.get`/`https.get`'s own internal parse — throws
|
|
104
|
+
// SYNCHRONOUSLY for a malformed URL, before any request starts. That throw
|
|
105
|
+
// happens OUTSIDE the request-level `.on('error')` handlers (which only catch
|
|
106
|
+
// async network failures: EHOSTUNREACH, DNS, timeout, …), so a bad URL would
|
|
107
|
+
// escape the executor as an uncaught exception and crash the whole session
|
|
108
|
+
// instead of becoming a recoverable tool error. The model routinely produces
|
|
109
|
+
// malformed/guessed URLs (invented domains, non-ASCII hosts, stray chars), so
|
|
110
|
+
// every fetch must validate up front and turn ANY bad input into a clean tool
|
|
111
|
+
// error in the SAME `{ error, error_code }` shape the network-failure path
|
|
112
|
+
// returns — so the agent handles it identically to EHOSTUNREACH/timeout.
|
|
113
|
+
//
|
|
114
|
+
// Returns `{ url }` (the normalized href) on success, or `{ error, error_code }`
|
|
115
|
+
// on failure. Only http/https schemes are allowed; everything else (file:, ftp:,
|
|
116
|
+
// javascript:, data:, …) is refused (these parse cleanly but must never be
|
|
117
|
+
// fetched). `base` (optional) resolves a relative URL — used for redirect
|
|
118
|
+
// `Location` headers, which are often relative.
|
|
119
|
+
function _validateFetchUrl(raw, base) {
|
|
120
|
+
if (typeof raw !== 'string') {
|
|
121
|
+
return {
|
|
122
|
+
error: `Invalid URL: expected a string, got ${raw === null ? 'null' : typeof raw}`,
|
|
123
|
+
error_code: 'ERR_INVALID_URL',
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
const trimmed = raw.trim();
|
|
127
|
+
if (!trimmed) {
|
|
128
|
+
return { error: 'Invalid URL: empty URL', error_code: 'ERR_INVALID_URL' };
|
|
129
|
+
}
|
|
130
|
+
let parsed;
|
|
131
|
+
try {
|
|
132
|
+
parsed = base ? new URL(trimmed, base) : new URL(trimmed);
|
|
133
|
+
} catch (err) {
|
|
134
|
+
return { error: `Invalid URL: ${err.message}`, error_code: 'ERR_INVALID_URL' };
|
|
135
|
+
}
|
|
136
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
137
|
+
return {
|
|
138
|
+
error: `Invalid URL: unsupported protocol "${parsed.protocol}" (only http and https are allowed)`,
|
|
139
|
+
error_code: 'ERR_INVALID_PROTOCOL',
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
return { url: parsed.href };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function _httpGetOpts(attrStr) {
|
|
146
|
+
const s = String(attrStr || '');
|
|
147
|
+
const get = (name) => {
|
|
148
|
+
const m = s.match(new RegExp(`${name}="([^"]*)"`)) || s.match(new RegExp(`${name}='([^']*)'`));
|
|
149
|
+
return m ? m[1] : undefined;
|
|
150
|
+
};
|
|
151
|
+
const opts = {};
|
|
152
|
+
const mode = _httpGetMode(get('mode'));
|
|
153
|
+
if (mode) opts.mode = mode;
|
|
154
|
+
else {
|
|
155
|
+
const legacy = _legacyBoolsToMode(_httpGetBool(get('summarize')), _httpGetBool(get('raw')));
|
|
156
|
+
if (legacy) opts.mode = legacy;
|
|
157
|
+
}
|
|
158
|
+
const intent = get('intent');
|
|
159
|
+
if (intent != null && intent !== '') opts.intent = intent;
|
|
160
|
+
return opts;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function _httpGetOptsFromParams(p) {
|
|
164
|
+
const opts = {};
|
|
165
|
+
const mode = _httpGetMode(p.mode);
|
|
166
|
+
if (mode) opts.mode = mode;
|
|
167
|
+
else {
|
|
168
|
+
const summarize = typeof p.summarize === 'boolean' ? p.summarize : undefined;
|
|
169
|
+
const raw = typeof p.raw === 'boolean' ? p.raw : undefined;
|
|
170
|
+
const legacy = _legacyBoolsToMode(summarize, raw);
|
|
171
|
+
if (legacy) opts.mode = legacy;
|
|
172
|
+
}
|
|
173
|
+
if (typeof p.intent === 'string' && p.intent.trim()) opts.intent = p.intent.trim();
|
|
174
|
+
return opts;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// web_search per-call options (Task W.2b). Only `count` today — bounded so a
|
|
178
|
+
// huge value never leaves the client; the backend clamps further. Returns
|
|
179
|
+
// `undefined` for a missing/invalid/zero count so the backend default applies.
|
|
180
|
+
const _WEB_SEARCH_MAX_COUNT = 10;
|
|
181
|
+
function _clampSearchCount(v) {
|
|
182
|
+
if (v == null || v === '') return undefined;
|
|
183
|
+
const n = parseInt(v, 10);
|
|
184
|
+
if (!Number.isFinite(n) || n <= 0) return undefined;
|
|
185
|
+
return Math.min(n, _WEB_SEARCH_MAX_COUNT);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function _webSearchOpts(attrStr) {
|
|
189
|
+
const s = String(attrStr || '');
|
|
190
|
+
const m = s.match(/count="([^"]*)"/) || s.match(/count='([^']*)'/);
|
|
191
|
+
const count = _clampSearchCount(m ? m[1] : undefined);
|
|
192
|
+
return count ? { count } : {};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function _webSearchOptsFromParams(p) {
|
|
196
|
+
const count = _clampSearchCount(p && p.count);
|
|
197
|
+
return count ? { count } : {};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// The web-fetch pipeline (Task W.1 / W.1b), shared by http_get's execute. Turns
|
|
201
|
+
// a fetched body into the content that enters the main context. The `mode` enum
|
|
202
|
+
// selects the depth of processing:
|
|
203
|
+
// raw → bypass extraction ENTIRELY; return the ORIGINAL fetched content
|
|
204
|
+
// (token-capped). For analyzing a page's HTML/CSS/JS/structure —
|
|
205
|
+
// the one task extraction destroys (Task W.1b).
|
|
206
|
+
// extracted → extract main content → Markdown (HTML only; json/text/markdown
|
|
207
|
+
// pass through untouched so they are never mangled), token-cap it,
|
|
208
|
+
// NO secondary summary.
|
|
209
|
+
// summarized → as `extracted`, then summarize via a secondary cheap LLM call —
|
|
210
|
+
// only the summary enters context; the extracted full text never
|
|
211
|
+
// does.
|
|
212
|
+
// Context protection (token-cap via web.max_content_tokens) applies in EVERY
|
|
213
|
+
// mode, including raw (raw HTML is token-heavier, so it matters more, not less).
|
|
214
|
+
// Containment: a summarizer failure falls back to the capped extracted Markdown,
|
|
215
|
+
// NEVER the raw page. Network-free here (the LLM call is the injected webChat).
|
|
216
|
+
async function processWebContent({
|
|
217
|
+
rawBody, contentType, url, statusCode, totalBytes, transferCapped,
|
|
218
|
+
mode, intent, summaryModel, maxContentTokens, webChat, signal,
|
|
219
|
+
}) {
|
|
220
|
+
// RAW mode (Task W.1b): the original content is returned with NO extraction —
|
|
221
|
+
// no Readability, no Turndown, no summary. Context protection still holds: cap
|
|
222
|
+
// to the token budget with the standard truncation notice. The untrusted fence
|
|
223
|
+
// is applied by the caller (lib/agent.js) for raw exactly as for every mode.
|
|
224
|
+
if (mode === 'raw') {
|
|
225
|
+
const kind = classifyContentType(contentType, url, rawBody);
|
|
226
|
+
// Raw HTML/markup tokenizes denser than prose, so char/4 over-admits markup
|
|
227
|
+
// (Task W.4 Part 2). Use the markup-aware estimate + matching char budget for
|
|
228
|
+
// markup; JSON/text raw bodies stay on the prose estimate (unchanged).
|
|
229
|
+
const isMarkup = kind === 'html';
|
|
230
|
+
const capped = isMarkup
|
|
231
|
+
? capToTokens(rawBody, maxContentTokens, markupEstimate, MARKUP_CHARS_PER_TOKEN)
|
|
232
|
+
: capToTokens(rawBody, maxContentTokens, defaultEstimate);
|
|
233
|
+
return {
|
|
234
|
+
status_code: statusCode,
|
|
235
|
+
bytes: totalBytes,
|
|
236
|
+
kind,
|
|
237
|
+
mode: 'raw',
|
|
238
|
+
extracted: false,
|
|
239
|
+
summarized: false,
|
|
240
|
+
content_tokens: capped.tokens,
|
|
241
|
+
content_truncated: capped.truncated,
|
|
242
|
+
transfer_capped: !!transferCapped,
|
|
243
|
+
body: capped.text,
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const { kind, markdown, title, extracted } = extractContent({ body: rawBody, contentType, url });
|
|
248
|
+
const capped = capToTokens(markdown, maxContentTokens, defaultEstimate);
|
|
249
|
+
const base = {
|
|
250
|
+
status_code: statusCode,
|
|
251
|
+
bytes: totalBytes,
|
|
252
|
+
kind,
|
|
253
|
+
mode,
|
|
254
|
+
title: title || undefined,
|
|
255
|
+
extracted,
|
|
256
|
+
content_tokens: capped.tokens,
|
|
257
|
+
content_truncated: capped.truncated,
|
|
258
|
+
transfer_capped: !!transferCapped,
|
|
259
|
+
};
|
|
260
|
+
// Summarize ONLY HTML — JSON/plain text/Markdown pass through verbatim so
|
|
261
|
+
// structured data is never smoothed over. Requires mode==='summarized'
|
|
262
|
+
// AND an available LLM call. Otherwise return the capped extracted Markdown.
|
|
263
|
+
const summarizable = kind === 'html' && capped.text.trim().length > 0;
|
|
264
|
+
if (mode === 'summarized' && summarizable && typeof webChat === 'function') {
|
|
265
|
+
try {
|
|
266
|
+
const summary = await summarizeWebContent({
|
|
267
|
+
markdown: capped.text, intent, chat: webChat, model: summaryModel, signal,
|
|
268
|
+
});
|
|
269
|
+
return { ...base, body: summary, summarized: true };
|
|
270
|
+
} catch (err) {
|
|
271
|
+
// Summary errored/timed out → degrade to the capped extracted Markdown,
|
|
272
|
+
// never the raw HTML.
|
|
273
|
+
return { ...base, body: capped.text, summarized: false, summary_error: err.message };
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return { ...base, body: capped.text, summarized: false };
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// ── XML parse helpers (moved from lib/tools.js) ────────────────────────────
|
|
280
|
+
|
|
281
|
+
function _matchDual(text, template) {
|
|
282
|
+
const results = [];
|
|
283
|
+
for (const q of ['"', "'"]) {
|
|
284
|
+
const re = new RegExp(template.replace(/Q/g, q), 'g');
|
|
285
|
+
for (const m of text.matchAll(re)) results.push(m);
|
|
286
|
+
}
|
|
287
|
+
return results;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
function _unwrapInnerTag(inner) {
|
|
291
|
+
if (inner == null) return inner;
|
|
292
|
+
const trimmed = String(inner).trim();
|
|
293
|
+
const m = trimmed.match(/^<(\w+)(?:\s[^>]*)?>([\s\S]*)<\/\1>$/);
|
|
294
|
+
if (!m) return inner;
|
|
295
|
+
return m[2].trim();
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// read_file pagination rail (Task W.7). Parses both forms in one pass and
|
|
299
|
+
// resolves the optional start_line/end_line/show_line_numbers attributes onto the
|
|
300
|
+
// tuple ['read', path, startLine|null, endLine|null, showLineNumbers]. Absent
|
|
301
|
+
// range → null (parity with fromParams), so the formatter's defaults apply. Path
|
|
302
|
+
// comes from the `path` attr or the inline body (the historical two forms).
|
|
303
|
+
function _parseReadTag(text) {
|
|
304
|
+
const out = [];
|
|
305
|
+
const re = /<read_file\b([^>]*?)(?:\/>|>([\s\S]*?)<\/read_file>)/g;
|
|
306
|
+
for (const m of text.matchAll(re)) {
|
|
307
|
+
const attrStr = m[1] || '';
|
|
308
|
+
const body = m[2] != null ? m[2] : '';
|
|
309
|
+
const attr = (k) => {
|
|
310
|
+
const mm = attrStr.match(new RegExp(`${k}="([^"]*)"`)) || attrStr.match(new RegExp(`${k}='([^']*)'`));
|
|
311
|
+
return mm ? mm[1] : null;
|
|
312
|
+
};
|
|
313
|
+
const num = (v) => { if (v == null) return null; const n = parseInt(v, 10); return Number.isFinite(n) ? n : null; };
|
|
314
|
+
let p = attr('path');
|
|
315
|
+
if (p == null) { const b = _unwrapInnerTag(body).trim(); p = b || null; }
|
|
316
|
+
if (p == null) continue;
|
|
317
|
+
const sln = attr('show_line_numbers');
|
|
318
|
+
out.push(['read', p, num(attr('start_line')), num(attr('end_line')),
|
|
319
|
+
sln === 'true' || sln === '1' || sln === 'yes']);
|
|
320
|
+
}
|
|
321
|
+
return out;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function _inline(text, tagAlternation, action, extraArgs = []) {
|
|
325
|
+
const re = new RegExp(`<(?:${tagAlternation})>([\\s\\S]*?)<\\/(?:${tagAlternation})>`, 'g');
|
|
326
|
+
const out = [];
|
|
327
|
+
for (const m of text.matchAll(re)) out.push([action, _unwrapInnerTag(m[1]).trim(), ...extraArgs]);
|
|
328
|
+
return out;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// The full ctx destructure, reused at the top of every execute/permission so the
|
|
332
|
+
// moved bodies see the same free names they had inside the createToolExecutor
|
|
333
|
+
// closure. Unused names in any given body are harmless.
|
|
334
|
+
// const CTX = (ctx) => ... (we inline the destructure literally for clarity)
|
|
335
|
+
|
|
336
|
+
// ── write/append share one body in agentExecFile; keep that sharing here ────
|
|
337
|
+
async function _execWriteAppend(ctx, action, args, options) {
|
|
338
|
+
const signal = (options && options.signal) || null; // eslint-disable-line no-unused-vars
|
|
339
|
+
const [arg0 = null, arg1 = null] = args;
|
|
340
|
+
const { _log, logToolCall, isPathSafe, isProtectedConfigPath, _sandboxError, _protectedConfigWriteError, _dryRun, _skippedOps, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
341
|
+
|
|
342
|
+
const filePath = arg0;
|
|
343
|
+
const content = arg1;
|
|
344
|
+
const tag = action === 'write' ? 'write_file' : 'append_file';
|
|
345
|
+
|
|
346
|
+
const blocked = permissionManager.readonlyBlock(tag);
|
|
347
|
+
if (blocked) {
|
|
348
|
+
logToolCall(tag, { path: filePath, content }, false, 'denied');
|
|
349
|
+
return blocked;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
if (isProtectedConfigPath(filePath)) {
|
|
353
|
+
logToolCall(tag, { path: filePath }, false, 'denied');
|
|
354
|
+
return _protectedConfigWriteError(filePath);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if (!isPathSafe(filePath)) {
|
|
358
|
+
logToolCall(tag, { path: filePath }, false, 'denied');
|
|
359
|
+
return _sandboxError(filePath);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (_dryRun) {
|
|
363
|
+
const verb = action === 'write' ? 'write' : 'append';
|
|
364
|
+
_skippedOps.push({ category: 'file', symbol: '✎', desc: `${verb} ${filePath}` });
|
|
365
|
+
logToolCall(tag, { path: filePath }, false, 'dry-run');
|
|
366
|
+
return { status: 'dry-run', message: 'dry-run: write skipped', path: filePath };
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
try {
|
|
370
|
+
const dir = path.dirname(filePath);
|
|
371
|
+
if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
|
|
372
|
+
if (action === 'write') await fsp.writeFile(filePath, content || '');
|
|
373
|
+
else await fsp.appendFile(filePath, content || '');
|
|
374
|
+
const verb = action === 'write' ? 'Wrote' : 'Appended to';
|
|
375
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}${verb} ${filePath}${RST}`);
|
|
376
|
+
logToolCall(tag, { path: filePath, content }, true, 'ok');
|
|
377
|
+
return { status: 'ok', path: filePath, bytes: (content || '').length };
|
|
378
|
+
} catch (error) {
|
|
379
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
380
|
+
logToolCall(tag, { path: filePath, content }, true, 'error');
|
|
381
|
+
return { error: error.message };
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
async function _permWriteAppend(ctx, action, args) {
|
|
386
|
+
const { _dryRun, renderDiff, DIFF_BUBBLE_INSET, writer } = ctx;
|
|
387
|
+
const _uiActive = ctx._uiActive;
|
|
388
|
+
const filePath = args[0];
|
|
389
|
+
const content = args[1];
|
|
390
|
+
const tag = action === 'write' ? 'write_file' : 'append_file';
|
|
391
|
+
|
|
392
|
+
let existing = '';
|
|
393
|
+
try { existing = await fsp.readFile(filePath, 'utf8'); } catch {}
|
|
394
|
+
const finalContent = action === 'write' ? (content || '') : (existing + (content || ''));
|
|
395
|
+
const diffOutput = _uiActive
|
|
396
|
+
? renderDiff(existing, finalContent, filePath, { inset: DIFF_BUBBLE_INSET })
|
|
397
|
+
: renderDiff(existing, finalContent, filePath);
|
|
398
|
+
if (!_uiActive) writer.scrollback(diffOutput);
|
|
399
|
+
|
|
400
|
+
if (_dryRun) return null;
|
|
401
|
+
|
|
402
|
+
let desc = `${action === 'write' ? 'Write' : 'Append to'} ${filePath}`;
|
|
403
|
+
if (content) desc += ` (${content.length} chars)`;
|
|
404
|
+
if (_uiActive) desc = `${desc}\n${diffOutput}`;
|
|
405
|
+
return { actionType: 'file', description: desc, tag };
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// ── grep / glob (Task 2.1) ─────────────────────────────────────────────────
|
|
409
|
+
//
|
|
410
|
+
// Canonical search semantics. The pure-Node implementation is the REFERENCE;
|
|
411
|
+
// ripgrep is invoked with a flag set chosen to reproduce it byte-for-byte (the
|
|
412
|
+
// equivalence is pinned by test/grep-glob.test.js):
|
|
413
|
+
// * recurse from baseDir
|
|
414
|
+
// * always skip directories named node_modules or .git
|
|
415
|
+
// * skip hidden entries (names beginning with ".")
|
|
416
|
+
// * honor a .gitignore in baseDir if present (common-subset rules below)
|
|
417
|
+
// * skip binary files (a NUL byte in the first 8 KB)
|
|
418
|
+
// * emit one record per matching LINE: { file, line, text }, sorted by
|
|
419
|
+
// (file, line); file is baseDir-relative POSIX. Output never carries the
|
|
420
|
+
// engine identity, so rg and Node results are deep-equal.
|
|
421
|
+
|
|
422
|
+
const GREP_MAX_MATCHES = 1000;
|
|
423
|
+
const GLOB_MAX_FILES = 5000;
|
|
424
|
+
const GREP_SKIP_DIRS = new Set(['node_modules', '.git']);
|
|
425
|
+
const BINARY_SNIFF_BYTES = 8192;
|
|
426
|
+
|
|
427
|
+
// grep output modes (Task W.5), Claude-Code-style. The model selects one via the
|
|
428
|
+
// `output_mode` parameter; the mode is shaped at serialization time
|
|
429
|
+
// (lib/agent.js formatFileResult) from the same engine result:
|
|
430
|
+
// * content — file:line:text per match (default; "show me the lines")
|
|
431
|
+
// * files_with_matches — unique file paths only ("which files")
|
|
432
|
+
// * count — match counts per file + total ("how many")
|
|
433
|
+
const GREP_OUTPUT_MODES = ['content', 'files_with_matches', 'count'];
|
|
434
|
+
function _normGrepMode(m) {
|
|
435
|
+
return GREP_OUTPUT_MODES.includes(m) ? m : 'content';
|
|
436
|
+
}
|
|
437
|
+
// head_limit / offset normalization (Task W.5). A positive integer bounds /
|
|
438
|
+
// skips results; anything else falls back (limit → default, offset → 0).
|
|
439
|
+
function _normHeadLimit(v, dflt) {
|
|
440
|
+
const n = typeof v === 'number' ? v : parseInt(v, 10);
|
|
441
|
+
return Number.isFinite(n) && n > 0 ? Math.floor(n) : dflt;
|
|
442
|
+
}
|
|
443
|
+
function _normOffset(v) {
|
|
444
|
+
const n = typeof v === 'number' ? v : parseInt(v, 10);
|
|
445
|
+
return Number.isFinite(n) && n > 0 ? Math.floor(n) : 0;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// ripgrep detection, performed once and cached. SEMALT_NO_RG forces the Node
|
|
449
|
+
// fallback; SEMALT_RG_BIN points at an alternate binary (both used by tests).
|
|
450
|
+
let _rgChecked = false;
|
|
451
|
+
let _rgBin = null;
|
|
452
|
+
function _detectRipgrep() {
|
|
453
|
+
if (_rgChecked) return _rgBin;
|
|
454
|
+
_rgChecked = true;
|
|
455
|
+
if (process.env.SEMALT_NO_RG) { _rgBin = null; return _rgBin; }
|
|
456
|
+
const bin = process.env.SEMALT_RG_BIN || 'rg';
|
|
457
|
+
try {
|
|
458
|
+
const r = spawnSync(bin, ['--version'], { encoding: 'utf8' });
|
|
459
|
+
if (r && r.status === 0) _rgBin = bin;
|
|
460
|
+
} catch { /* rg not on PATH */ }
|
|
461
|
+
return _rgBin;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
function _toPosix(p) { return p.split(path.sep).join('/'); }
|
|
465
|
+
|
|
466
|
+
// Glob → anchored RegExp. Mirrors the search_files conversion so the two file
|
|
467
|
+
// matchers agree: * → one path segment, ** → any depth.
|
|
468
|
+
function _globToRegExp(glob) {
|
|
469
|
+
let s = glob.replace(/[.+^${}()|[\]\\]/g, '\\$&');
|
|
470
|
+
s = s.replace(/\*\*/g, '\x00');
|
|
471
|
+
s = s.replace(/\*/g, '[^/]*');
|
|
472
|
+
s = s.replace(/\x00\//g, '(?:.*/)?');
|
|
473
|
+
s = s.replace(/\x00/g, '.*');
|
|
474
|
+
return new RegExp(`^${s}$`);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// .gitignore, common subset: blank/comment lines; basename globs (no slash,
|
|
478
|
+
// matched at any depth); anchored path globs (a slash anywhere but trailing);
|
|
479
|
+
// dir-only (trailing slash); negation (!). Only the baseDir .gitignore is read
|
|
480
|
+
// (no nested files). Last matching rule wins.
|
|
481
|
+
function _loadGitignore(baseDir) {
|
|
482
|
+
let txt;
|
|
483
|
+
try { txt = fs.readFileSync(path.join(baseDir, '.gitignore'), 'utf8'); }
|
|
484
|
+
catch { return []; }
|
|
485
|
+
const rules = [];
|
|
486
|
+
for (let line of txt.split('\n')) {
|
|
487
|
+
line = line.replace(/\r$/, '').replace(/^\s+|\s+$/g, '');
|
|
488
|
+
if (!line || line.startsWith('#')) continue;
|
|
489
|
+
let negate = false;
|
|
490
|
+
if (line.startsWith('!')) { negate = true; line = line.slice(1); }
|
|
491
|
+
let dirOnly = false;
|
|
492
|
+
if (line.endsWith('/')) { dirOnly = true; line = line.slice(0, -1); }
|
|
493
|
+
let anchored = false;
|
|
494
|
+
if (line.startsWith('/')) { anchored = true; line = line.slice(1); }
|
|
495
|
+
if (!line) continue;
|
|
496
|
+
rules.push({ negate, dirOnly, anchored: anchored || line.includes('/'), re: _globToRegExp(line) });
|
|
497
|
+
}
|
|
498
|
+
return rules;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// rel: baseDir-relative POSIX path of the entry; name: its basename.
|
|
502
|
+
function _gitignored(rules, rel, name, isDir) {
|
|
503
|
+
let ignored = false;
|
|
504
|
+
for (const r of rules) {
|
|
505
|
+
if (r.dirOnly && !isDir) continue;
|
|
506
|
+
if (r.re.test(r.anchored ? rel : name)) ignored = !r.negate;
|
|
507
|
+
}
|
|
508
|
+
return ignored;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function _isBinaryBuf(buf) {
|
|
512
|
+
const n = Math.min(buf.length, BINARY_SNIFF_BYTES);
|
|
513
|
+
for (let i = 0; i < n; i++) if (buf[i] === 0) return true;
|
|
514
|
+
return false;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Iterative DFS over baseDir applying the canonical skip rules; calls
|
|
518
|
+
// onFile(rel, name, absPath) for each surviving file. Honors an abort signal
|
|
519
|
+
// between entries. Returns false if aborted, true otherwise.
|
|
520
|
+
function _walkTree(baseDir, { rules = [], signal = null, onFile }) {
|
|
521
|
+
const stack = [{ dir: baseDir, rel: '' }];
|
|
522
|
+
while (stack.length) {
|
|
523
|
+
if (signal && signal.aborted) return false;
|
|
524
|
+
const { dir, rel } = stack.pop();
|
|
525
|
+
let entries;
|
|
526
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
|
|
527
|
+
catch { continue; }
|
|
528
|
+
for (const e of entries) {
|
|
529
|
+
const name = e.name;
|
|
530
|
+
if (name.startsWith('.')) continue; // hidden
|
|
531
|
+
const isDir = e.isDirectory();
|
|
532
|
+
const childRel = rel ? `${rel}/${name}` : name;
|
|
533
|
+
if (isDir) {
|
|
534
|
+
if (GREP_SKIP_DIRS.has(name)) continue;
|
|
535
|
+
if (rules.length && _gitignored(rules, childRel, name, true)) continue;
|
|
536
|
+
stack.push({ dir: path.join(dir, name), rel: childRel });
|
|
537
|
+
continue;
|
|
538
|
+
}
|
|
539
|
+
if (!e.isFile()) continue; // symlinks / sockets / etc.
|
|
540
|
+
if (rules.length && _gitignored(rules, childRel, name, false)) continue;
|
|
541
|
+
onFile(childRel, name, path.join(dir, name));
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
return true;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
function _grepNode({ pattern, pathGlob, ignoreCase, baseDir, signal }) {
|
|
548
|
+
let re;
|
|
549
|
+
try { re = new RegExp(pattern, ignoreCase ? 'i' : ''); }
|
|
550
|
+
catch (err) { return { error: `Invalid regex pattern: ${err.message}` }; }
|
|
551
|
+
const rules = _loadGitignore(baseDir);
|
|
552
|
+
const pf = pathGlob ? _globToRegExp(pathGlob) : null;
|
|
553
|
+
const pfBasename = pathGlob && !pathGlob.includes('/');
|
|
554
|
+
const matches = [];
|
|
555
|
+
const ok = _walkTree(baseDir, {
|
|
556
|
+
rules,
|
|
557
|
+
signal,
|
|
558
|
+
onFile: (rel, name, abs) => {
|
|
559
|
+
if (pf && !pf.test(pfBasename ? name : rel)) return;
|
|
560
|
+
let buf;
|
|
561
|
+
try { buf = fs.readFileSync(abs); } catch { return; }
|
|
562
|
+
if (_isBinaryBuf(buf)) return;
|
|
563
|
+
const data = buf.toString('utf8');
|
|
564
|
+
const lines = data.split('\n');
|
|
565
|
+
// A trailing newline terminates the last line; drop the phantom empty
|
|
566
|
+
// element split() produces so line counting matches ripgrep.
|
|
567
|
+
if (data.endsWith('\n')) lines.pop();
|
|
568
|
+
const posix = _toPosix(rel);
|
|
569
|
+
for (let i = 0; i < lines.length; i++) {
|
|
570
|
+
if (re.test(lines[i])) matches.push({ file: posix, line: i + 1, text: lines[i] });
|
|
571
|
+
}
|
|
572
|
+
},
|
|
573
|
+
});
|
|
574
|
+
if (!ok) return { aborted: true };
|
|
575
|
+
return { matches };
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
function _grepRg({ pattern, pathGlob, ignoreCase, baseDir, signal }) {
|
|
579
|
+
if (signal && signal.aborted) return { aborted: true };
|
|
580
|
+
const bin = _detectRipgrep() || process.env.SEMALT_RG_BIN || 'rg';
|
|
581
|
+
// These flags make rg honor the baseDir .gitignore without a git repo while
|
|
582
|
+
// ignoring parent/global/.ignore files, and unconditionally drop node_modules
|
|
583
|
+
// — exactly the canonical Node semantics above. Hidden entries and binary
|
|
584
|
+
// files are skipped by rg's defaults.
|
|
585
|
+
const args = ['--json', '--no-require-git', '--no-ignore-parent', '--no-ignore-global', '--no-ignore-dot', '-g', '!node_modules'];
|
|
586
|
+
if (ignoreCase) args.push('-i');
|
|
587
|
+
if (pathGlob) args.push('-g', pathGlob);
|
|
588
|
+
args.push('--regexp', pattern, '--', '.');
|
|
589
|
+
const r = spawnSync(bin, args, { cwd: baseDir, encoding: 'utf8', maxBuffer: 64 * 1024 * 1024 });
|
|
590
|
+
if (r.error) return { error: r.error.message };
|
|
591
|
+
if (r.status === 2) return { error: (r.stderr || 'ripgrep error').trim() };
|
|
592
|
+
// status 0 = matches, 1 = no matches — both are success.
|
|
593
|
+
const matches = [];
|
|
594
|
+
if (r.stdout) {
|
|
595
|
+
for (const ln of r.stdout.split('\n')) {
|
|
596
|
+
if (!ln) continue;
|
|
597
|
+
let obj;
|
|
598
|
+
try { obj = JSON.parse(ln); } catch { continue; }
|
|
599
|
+
if (obj.type !== 'match') continue;
|
|
600
|
+
const d = obj.data;
|
|
601
|
+
if (!d.lines || typeof d.lines.text !== 'string') continue; // non-UTF8 line
|
|
602
|
+
let file = d.path && d.path.text ? d.path.text : '';
|
|
603
|
+
if (file.startsWith('./')) file = file.slice(2);
|
|
604
|
+
let text = d.lines.text;
|
|
605
|
+
if (text.endsWith('\n')) text = text.slice(0, -1);
|
|
606
|
+
matches.push({ file: _toPosix(file), line: d.line_number, text });
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
return { matches };
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
function _finalizeGrep(raw, pattern) {
|
|
613
|
+
if (!raw || raw.error || raw.aborted) return raw || { error: 'grep failed' };
|
|
614
|
+
const matches = raw.matches.slice().sort((a, b) =>
|
|
615
|
+
(a.file < b.file ? -1 : a.file > b.file ? 1 : a.line - b.line));
|
|
616
|
+
let truncated = false;
|
|
617
|
+
if (matches.length > GREP_MAX_MATCHES) { matches.length = GREP_MAX_MATCHES; truncated = true; }
|
|
618
|
+
const out = { matches, pattern, count: matches.length };
|
|
619
|
+
if (truncated) out.truncated = true;
|
|
620
|
+
return out;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
// engine: 'auto' (rg if available, else Node), 'rg', or 'node'. Exported for
|
|
624
|
+
// the parity tests, which drive both engines and assert deep equality.
|
|
625
|
+
function _grepSearch({ pattern, pathGlob = null, ignoreCase = false, baseDir = '.', engine = 'auto', signal = null }) {
|
|
626
|
+
if (typeof pattern !== 'string' || pattern === '') return { error: 'grep: pattern is required' };
|
|
627
|
+
const useRg = engine === 'rg' || (engine === 'auto' && !!_detectRipgrep());
|
|
628
|
+
let raw;
|
|
629
|
+
if (useRg) {
|
|
630
|
+
raw = _grepRg({ pattern, pathGlob, ignoreCase, baseDir, signal });
|
|
631
|
+
if (raw && raw.error && engine === 'auto') {
|
|
632
|
+
raw = _grepNode({ pattern, pathGlob, ignoreCase, baseDir, signal });
|
|
633
|
+
}
|
|
634
|
+
} else {
|
|
635
|
+
raw = _grepNode({ pattern, pathGlob, ignoreCase, baseDir, signal });
|
|
636
|
+
}
|
|
637
|
+
if (raw && (raw.error || raw.aborted)) return raw;
|
|
638
|
+
return _finalizeGrep(raw, pattern);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
function _globSearch({ pattern, baseDir = '.', signal = null }) {
|
|
642
|
+
if (typeof pattern !== 'string' || pattern === '') return { error: 'glob: pattern is required' };
|
|
643
|
+
const re = _globToRegExp(pattern);
|
|
644
|
+
const byBasename = !pattern.includes('/');
|
|
645
|
+
const files = [];
|
|
646
|
+
let truncated = false;
|
|
647
|
+
// glob does not apply .gitignore (only node_modules/.git/hidden are skipped).
|
|
648
|
+
const ok = _walkTree(baseDir, {
|
|
649
|
+
rules: [],
|
|
650
|
+
signal,
|
|
651
|
+
onFile: (rel, name, abs) => {
|
|
652
|
+
if (truncated) return;
|
|
653
|
+
if (!re.test(byBasename ? name : rel)) return;
|
|
654
|
+
let st;
|
|
655
|
+
try { st = fs.statSync(abs); } catch { return; }
|
|
656
|
+
files.push({ path: _toPosix(rel), size: st.size, mtime: st.mtime.toISOString() });
|
|
657
|
+
if (files.length >= GLOB_MAX_FILES) truncated = true;
|
|
658
|
+
},
|
|
659
|
+
});
|
|
660
|
+
if (!ok) return { aborted: true };
|
|
661
|
+
files.sort((a, b) => (a.path < b.path ? -1 : a.path > b.path ? 1 : 0));
|
|
662
|
+
const out = { files, pattern, dir: baseDir, count: files.length };
|
|
663
|
+
if (truncated) out.truncated = true;
|
|
664
|
+
return out;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
// Shared XML parser for the attribute-or-inline <grep>/<glob> tags.
|
|
668
|
+
function _parseSearchTag(text, tag) {
|
|
669
|
+
const out = [];
|
|
670
|
+
const re = new RegExp(`<${tag}\\b([^>]*?)(?:\\/>|>([\\s\\S]*?)<\\/${tag}>)`, 'g');
|
|
671
|
+
for (const m of text.matchAll(re)) {
|
|
672
|
+
const attrStr = m[1] || '';
|
|
673
|
+
const body = m[2] != null ? m[2] : '';
|
|
674
|
+
const attr = (k) => {
|
|
675
|
+
const mm = attrStr.match(new RegExp(`${k}="([^"]*)"`)) || attrStr.match(new RegExp(`${k}='([^']*)'`));
|
|
676
|
+
return mm ? mm[1] : null;
|
|
677
|
+
};
|
|
678
|
+
let pattern = attr('pattern');
|
|
679
|
+
if (pattern == null) { const b = body.trim(); pattern = b || null; }
|
|
680
|
+
if (pattern == null) continue;
|
|
681
|
+
if (tag === 'grep') {
|
|
682
|
+
const ic = attr('ignore_case');
|
|
683
|
+
out.push(['grep', pattern, attr('path') || null, ic === 'true' || ic === '1' || ic === 'yes',
|
|
684
|
+
attr('output_mode') || null, attr('head_limit'), attr('offset')]);
|
|
685
|
+
} else {
|
|
686
|
+
out.push(['glob', pattern, attr('path') || attr('dir') || '.', attr('head_limit'), attr('offset')]);
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
return out;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// ── Native git tools (Task 5.1) ────────────────────────────────────────────
|
|
693
|
+
//
|
|
694
|
+
// First-class git operations: structured results for the common verbs, the long
|
|
695
|
+
// tail left to the (sandboxed) generic shell. Every git tool shells out through
|
|
696
|
+
// ctx.agentExecShell — the SAME sandbox + deny-list chokepoint as <shell> — so it
|
|
697
|
+
// gets NO privileged path around confinement (constraint #5). Read-only tools
|
|
698
|
+
// (status/diff/log, and the list ops of branch/worktree) return a null permission
|
|
699
|
+
// descriptor; mutating tools (add/commit/branch-create/checkout/worktree-add+remove)
|
|
700
|
+
// require approval, honor --readonly via permissionManager.readonlyBlock, and are
|
|
701
|
+
// subject to the per-pattern rules + deny-list. Checkpoints (Task 4.3) snapshot
|
|
702
|
+
// FILE-TOOL mutations only — git operations are NOT reversible via /rewind, and
|
|
703
|
+
// git_checkout can discard uncommitted work that checkpoints never captured.
|
|
704
|
+
|
|
705
|
+
// Shell-quote one argument so the command string we hand to agentExecShell is
|
|
706
|
+
// safe regardless of metacharacters in branch names / paths / commit messages.
|
|
707
|
+
// Platform-aware: cmd.exe double-quote convention on Windows, POSIX single-quote
|
|
708
|
+
// elsewhere. The deny-list + sandbox remain the security boundary; this only
|
|
709
|
+
// prevents accidental word-splitting of the structured arguments.
|
|
710
|
+
function _shQuote(arg) {
|
|
711
|
+
const s = String(arg == null ? '' : arg);
|
|
712
|
+
if (process.platform === 'win32') return '"' + s.replace(/"/g, '""') + '"';
|
|
713
|
+
return "'" + s.replace(/'/g, "'\\''") + "'";
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
function _gitCommand(argv) {
|
|
717
|
+
return 'git ' + argv.map(_shQuote).join(' ');
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// Run a git invocation through the shared shell chokepoint and return the raw
|
|
721
|
+
// shell result ({ exit_code, stdout, stderr, blocked, sandbox }).
|
|
722
|
+
function _runGit(ctx, argv, options) {
|
|
723
|
+
return ctx.agentExecShell(_gitCommand(argv), options || {});
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
// Map a failed git invocation to a structured { error } — degrading gracefully
|
|
727
|
+
// for the "not a repo" and "git absent" cases rather than surfacing raw noise.
|
|
728
|
+
function _gitFailure(res) {
|
|
729
|
+
if (res && res.blocked) return { error: (res.stderr || 'git command blocked').trim() };
|
|
730
|
+
const err = (res && res.stderr ? res.stderr : '').trim();
|
|
731
|
+
const combined = (err + '\n' + (res && res.stdout ? res.stdout : '')).toLowerCase();
|
|
732
|
+
if (/not a git repository/.test(combined)) {
|
|
733
|
+
return { error: 'Not a git repository. Run this inside a git working tree.' };
|
|
734
|
+
}
|
|
735
|
+
if ((res && res.exit_code === 127) || /command not found|not recognized as an internal|no such file or directory/.test(combined)) {
|
|
736
|
+
return { error: 'git is not installed or not found on PATH.' };
|
|
737
|
+
}
|
|
738
|
+
return { error: err || (res && res.stdout ? res.stdout.trim() : '') || `git exited with code ${res ? res.exit_code : '?'}` };
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
function _gitLog(ctx, action, args, status) {
|
|
742
|
+
try { ctx.logToolCall(action, { args }, status !== 'error', status); } catch { /* audit best-effort */ }
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
// Parse the `## …` branch header of `git status --porcelain=v1 --branch`.
|
|
746
|
+
function _parseStatusBranch(header) {
|
|
747
|
+
let h = String(header).replace(/^##\s*/, '');
|
|
748
|
+
if (h.startsWith('No commits yet on ')) return h.slice('No commits yet on '.length).trim();
|
|
749
|
+
if (h.startsWith('HEAD ')) return 'HEAD (detached)';
|
|
750
|
+
const dots = h.indexOf('...');
|
|
751
|
+
if (dots !== -1) h = h.slice(0, dots);
|
|
752
|
+
const sp = h.indexOf(' ');
|
|
753
|
+
if (sp !== -1) h = h.slice(0, sp);
|
|
754
|
+
return h.trim();
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// Parse a unified diff into { files: [{ file, additions, deletions, hunks }] }.
|
|
758
|
+
function _parseDiff(raw) {
|
|
759
|
+
const files = [];
|
|
760
|
+
let cur = null;
|
|
761
|
+
let hunk = null;
|
|
762
|
+
for (const line of String(raw).split('\n')) {
|
|
763
|
+
if (line.startsWith('diff --git ')) {
|
|
764
|
+
cur = { file: null, additions: 0, deletions: 0, hunks: [] };
|
|
765
|
+
hunk = null;
|
|
766
|
+
files.push(cur);
|
|
767
|
+
const m = line.match(/ b\/(.+)$/);
|
|
768
|
+
if (m) cur.file = m[1];
|
|
769
|
+
continue;
|
|
770
|
+
}
|
|
771
|
+
if (!cur) continue;
|
|
772
|
+
if (line.startsWith('+++ b/')) { cur.file = line.slice(6); continue; }
|
|
773
|
+
if (line.startsWith('--- ') || line.startsWith('+++ ')) continue;
|
|
774
|
+
if (line.startsWith('@@')) {
|
|
775
|
+
hunk = { header: line, lines: [] };
|
|
776
|
+
cur.hunks.push(hunk);
|
|
777
|
+
continue;
|
|
778
|
+
}
|
|
779
|
+
if (!hunk) continue;
|
|
780
|
+
hunk.lines.push(line);
|
|
781
|
+
if (line.startsWith('+')) cur.additions++;
|
|
782
|
+
else if (line.startsWith('-')) cur.deletions++;
|
|
783
|
+
}
|
|
784
|
+
return files;
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
// Parse `git worktree list --porcelain` into [{ path, head, branch }].
|
|
788
|
+
function _parseWorktrees(raw) {
|
|
789
|
+
const out = [];
|
|
790
|
+
let cur = null;
|
|
791
|
+
for (const line of String(raw).split('\n')) {
|
|
792
|
+
if (line.startsWith('worktree ')) {
|
|
793
|
+
cur = { path: line.slice('worktree '.length), head: null, branch: null };
|
|
794
|
+
out.push(cur);
|
|
795
|
+
} else if (cur && line.startsWith('HEAD ')) {
|
|
796
|
+
cur.head = line.slice('HEAD '.length);
|
|
797
|
+
} else if (cur && line.startsWith('branch ')) {
|
|
798
|
+
cur.branch = line.slice('branch '.length).replace(/^refs\/heads\//, '');
|
|
799
|
+
} else if (cur && line === 'detached') {
|
|
800
|
+
cur.branch = '(detached)';
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
return out;
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
// XML attribute extractor (dual-quote) + a small typed-attr parser shared by the
|
|
807
|
+
// git tags. `spec` = { str: [...], bool: [...], num: [...], inline: 'key'? }.
|
|
808
|
+
function _gitAttr(attrStr, key) {
|
|
809
|
+
const m = attrStr.match(new RegExp(`${key}="([^"]*)"`)) || attrStr.match(new RegExp(`${key}='([^']*)'`));
|
|
810
|
+
return m ? m[1] : null;
|
|
811
|
+
}
|
|
812
|
+
function _gitTruthy(v) { return v === 'true' || v === '1' || v === 'yes' || v === ''; }
|
|
813
|
+
function _parseGitTag(text, tag, spec) {
|
|
814
|
+
const out = [];
|
|
815
|
+
const re = new RegExp(`<${tag}\\b([^>]*?)(?:\\/>|>([\\s\\S]*?)<\\/${tag}>)`, 'g');
|
|
816
|
+
for (const m of text.matchAll(re)) {
|
|
817
|
+
const attrStr = m[1] || '';
|
|
818
|
+
const body = m[2] != null ? m[2] : '';
|
|
819
|
+
const opts = {};
|
|
820
|
+
for (const k of spec.str || []) { const v = _gitAttr(attrStr, k); if (v != null) opts[k] = v; }
|
|
821
|
+
for (const k of spec.bool || []) { const v = _gitAttr(attrStr, k); if (v != null) opts[k] = _gitTruthy(v); }
|
|
822
|
+
for (const k of spec.num || []) {
|
|
823
|
+
const v = _gitAttr(attrStr, k);
|
|
824
|
+
if (v != null && v !== '') { const n = parseInt(v, 10); if (!Number.isNaN(n)) opts[k] = n; }
|
|
825
|
+
}
|
|
826
|
+
if (spec.inline) { const b = body.trim(); if (b && opts[spec.inline] == null) opts[spec.inline] = b; }
|
|
827
|
+
out.push([tag, opts]);
|
|
828
|
+
}
|
|
829
|
+
return out;
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
const GIT_TOOL_REGISTRY = [
|
|
833
|
+
{
|
|
834
|
+
tool: 'git_status',
|
|
835
|
+
specNames: ['git_status'],
|
|
836
|
+
tags: ['git_status'],
|
|
837
|
+
parseXml: (text) => _parseGitTag(text, 'git_status', {}),
|
|
838
|
+
fromParams: () => ['git_status', {}],
|
|
839
|
+
permission: () => null,
|
|
840
|
+
execute: async (ctx, args, options) => {
|
|
841
|
+
const res = await _runGit(ctx, ['status', '--porcelain=v1', '--branch'], options);
|
|
842
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_status', args, 'error'); return _gitFailure(res); }
|
|
843
|
+
const staged = [];
|
|
844
|
+
const unstaged = [];
|
|
845
|
+
const untracked = [];
|
|
846
|
+
let branch = null;
|
|
847
|
+
for (const line of res.stdout.split('\n')) {
|
|
848
|
+
if (!line) continue;
|
|
849
|
+
if (line.startsWith('## ')) { branch = _parseStatusBranch(line); continue; }
|
|
850
|
+
const x = line[0];
|
|
851
|
+
const y = line[1];
|
|
852
|
+
const p = line.slice(3);
|
|
853
|
+
if (line.startsWith('??')) { untracked.push(p); continue; }
|
|
854
|
+
if (x && x !== ' ' && x !== '?') staged.push({ path: p, status: x });
|
|
855
|
+
if (y && y !== ' ' && y !== '?') unstaged.push({ path: p, status: y });
|
|
856
|
+
}
|
|
857
|
+
const clean = staged.length === 0 && unstaged.length === 0 && untracked.length === 0;
|
|
858
|
+
const summary = `On branch ${branch || '(unknown)'} — ${clean ? 'clean' : `staged: ${staged.length}, unstaged: ${unstaged.length}, untracked: ${untracked.length}`}`;
|
|
859
|
+
_gitLog(ctx, 'git_status', args, 'ok');
|
|
860
|
+
return { status: 'ok', branch, staged, unstaged, untracked, clean, summary };
|
|
861
|
+
},
|
|
862
|
+
},
|
|
863
|
+
{
|
|
864
|
+
tool: 'git_diff',
|
|
865
|
+
specNames: ['git_diff'],
|
|
866
|
+
tags: ['git_diff'],
|
|
867
|
+
parseXml: (text) => _parseGitTag(text, 'git_diff', { str: ['path'], bool: ['staged'] }),
|
|
868
|
+
fromParams: (p) => ['git_diff', { ...((p.staged || p.cached) ? { staged: true } : {}), ...(p.path ? { path: String(p.path) } : {}) }],
|
|
869
|
+
permission: () => null,
|
|
870
|
+
execute: async (ctx, args, options) => {
|
|
871
|
+
const o = args[0] || {};
|
|
872
|
+
const argv = ['diff'];
|
|
873
|
+
if (o.staged) argv.push('--cached');
|
|
874
|
+
if (o.path) argv.push('--', String(o.path));
|
|
875
|
+
const res = await _runGit(ctx, argv, options);
|
|
876
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_diff', args, 'error'); return _gitFailure(res); }
|
|
877
|
+
const files = _parseDiff(res.stdout);
|
|
878
|
+
const additions = files.reduce((s, f) => s + f.additions, 0);
|
|
879
|
+
const deletions = files.reduce((s, f) => s + f.deletions, 0);
|
|
880
|
+
const summary = files.length
|
|
881
|
+
? `${files.length} file(s) changed, +${additions} -${deletions}`
|
|
882
|
+
: 'No changes';
|
|
883
|
+
_gitLog(ctx, 'git_diff', args, 'ok');
|
|
884
|
+
return { status: 'ok', staged: !!o.staged, files, additions, deletions, raw: res.stdout, summary };
|
|
885
|
+
},
|
|
886
|
+
},
|
|
887
|
+
{
|
|
888
|
+
tool: 'git_log',
|
|
889
|
+
specNames: ['git_log'],
|
|
890
|
+
tags: ['git_log'],
|
|
891
|
+
parseXml: (text) => _parseGitTag(text, 'git_log', { str: ['path'], num: ['count'] }),
|
|
892
|
+
fromParams: (p) => {
|
|
893
|
+
const count = Number.isInteger(p.count) ? p.count : (p.count != null && p.count !== '' ? parseInt(p.count, 10) : undefined);
|
|
894
|
+
return ['git_log', { ...(count ? { count } : {}), ...(p.path ? { path: String(p.path) } : {}) }];
|
|
895
|
+
},
|
|
896
|
+
permission: () => null,
|
|
897
|
+
execute: async (ctx, args, options) => {
|
|
898
|
+
const o = args[0] || {};
|
|
899
|
+
const count = Number.isInteger(o.count) && o.count > 0 ? o.count : 20;
|
|
900
|
+
const SEP = '\x1f';
|
|
901
|
+
const argv = ['log', '-n', String(count), `--pretty=format:%H${SEP}%an${SEP}%ae${SEP}%ad${SEP}%s`, '--date=iso'];
|
|
902
|
+
if (o.path) argv.push('--', String(o.path));
|
|
903
|
+
const res = await _runGit(ctx, argv, options);
|
|
904
|
+
if (res.blocked || res.exit_code !== 0) {
|
|
905
|
+
const combined = `${res.stderr || ''}\n${res.stdout || ''}`.toLowerCase();
|
|
906
|
+
// A fresh repo with no commits is a valid empty result, not an error.
|
|
907
|
+
if (/does not have any commits yet|bad default revision|unknown revision|ambiguous argument 'head'/.test(combined)) {
|
|
908
|
+
_gitLog(ctx, 'git_log', args, 'ok');
|
|
909
|
+
return { status: 'ok', commits: [], count: 0, summary: 'No commits yet' };
|
|
910
|
+
}
|
|
911
|
+
_gitLog(ctx, 'git_log', args, 'error');
|
|
912
|
+
return _gitFailure(res);
|
|
913
|
+
}
|
|
914
|
+
const commits = [];
|
|
915
|
+
for (const line of res.stdout.split('\n')) {
|
|
916
|
+
if (!line) continue;
|
|
917
|
+
const [hash, author, email, date, ...rest] = line.split(SEP);
|
|
918
|
+
commits.push({ hash, short: (hash || '').slice(0, 7), author, email, date, subject: rest.join(SEP) });
|
|
919
|
+
}
|
|
920
|
+
_gitLog(ctx, 'git_log', args, 'ok');
|
|
921
|
+
return { status: 'ok', commits, count: commits.length, summary: `${commits.length} commit(s)` };
|
|
922
|
+
},
|
|
923
|
+
},
|
|
924
|
+
{
|
|
925
|
+
tool: 'git_add',
|
|
926
|
+
specNames: ['git_add'],
|
|
927
|
+
tags: ['git_add'],
|
|
928
|
+
parseXml: (text) => _parseGitTag(text, 'git_add', { str: ['paths'], bool: ['all'] }),
|
|
929
|
+
fromParams: (p) => ['git_add', { ...(p.paths != null ? { paths: p.paths } : {}), ...(p.all ? { all: true } : {}) }],
|
|
930
|
+
permission: () => ({ actionType: 'git', description: 'git add (stage changes)', tag: 'git_add' }),
|
|
931
|
+
execute: async (ctx, args, options) => {
|
|
932
|
+
const o = args[0] || {};
|
|
933
|
+
const blocked = ctx.permissionManager.readonlyBlock('git_add');
|
|
934
|
+
if (blocked) { _gitLog(ctx, 'git_add', args, 'error'); return blocked; }
|
|
935
|
+
let paths = [];
|
|
936
|
+
if (Array.isArray(o.paths)) paths = o.paths.map(String).filter(Boolean);
|
|
937
|
+
else if (typeof o.paths === 'string' && o.paths.trim()) paths = o.paths.trim().split(/\s+/);
|
|
938
|
+
if (!o.all && paths.length === 0) {
|
|
939
|
+
_gitLog(ctx, 'git_add', args, 'error');
|
|
940
|
+
return { error: 'git_add requires `paths` (one or more files) or `all: true`.' };
|
|
941
|
+
}
|
|
942
|
+
const argv = o.all ? ['add', '-A'] : ['add', '--', ...paths];
|
|
943
|
+
const res = await _runGit(ctx, argv, options);
|
|
944
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_add', args, 'error'); return _gitFailure(res); }
|
|
945
|
+
const added = o.all ? ['-A (all)'] : paths;
|
|
946
|
+
_gitLog(ctx, 'git_add', args, 'ok');
|
|
947
|
+
return { status: 'ok', added, summary: `Staged ${o.all ? 'all changes' : paths.join(', ')}` };
|
|
948
|
+
},
|
|
949
|
+
},
|
|
950
|
+
{
|
|
951
|
+
tool: 'git_commit',
|
|
952
|
+
specNames: ['git_commit'],
|
|
953
|
+
tags: ['git_commit'],
|
|
954
|
+
parseXml: (text) => _parseGitTag(text, 'git_commit', { str: ['message'], bool: ['all'], inline: 'message' }),
|
|
955
|
+
fromParams: (p) => ['git_commit', { message: p.message != null ? String(p.message) : '', ...(p.all ? { all: true } : {}) }],
|
|
956
|
+
permission: () => ({ actionType: 'git', description: 'git commit', tag: 'git_commit' }),
|
|
957
|
+
execute: async (ctx, args, options) => {
|
|
958
|
+
const o = args[0] || {};
|
|
959
|
+
const blocked = ctx.permissionManager.readonlyBlock('git_commit');
|
|
960
|
+
if (blocked) { _gitLog(ctx, 'git_commit', args, 'error'); return blocked; }
|
|
961
|
+
const message = (o.message == null ? '' : String(o.message)).trim();
|
|
962
|
+
if (!message) {
|
|
963
|
+
_gitLog(ctx, 'git_commit', args, 'error');
|
|
964
|
+
return { error: 'git_commit requires a non-empty commit message.' };
|
|
965
|
+
}
|
|
966
|
+
const argv = ['commit', '-m', message];
|
|
967
|
+
if (o.all) argv.push('-a');
|
|
968
|
+
const res = await _runGit(ctx, argv, options);
|
|
969
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_commit', args, 'error'); return _gitFailure(res); }
|
|
970
|
+
const hashRes = await _runGit(ctx, ['rev-parse', 'HEAD'], options);
|
|
971
|
+
const branchRes = await _runGit(ctx, ['rev-parse', '--abbrev-ref', 'HEAD'], options);
|
|
972
|
+
const hash = (hashRes.stdout || '').trim();
|
|
973
|
+
const branch = (branchRes.stdout || '').trim();
|
|
974
|
+
_gitLog(ctx, 'git_commit', args, 'ok');
|
|
975
|
+
return { status: 'ok', hash, short: hash.slice(0, 7), branch, summary: `Committed ${hash.slice(0, 7)} on ${branch}` };
|
|
976
|
+
},
|
|
977
|
+
},
|
|
978
|
+
{
|
|
979
|
+
tool: 'git_branch',
|
|
980
|
+
specNames: ['git_branch'],
|
|
981
|
+
tags: ['git_branch'],
|
|
982
|
+
parseXml: (text) => _parseGitTag(text, 'git_branch', { str: ['name'], bool: ['delete', 'force'] }),
|
|
983
|
+
fromParams: (p) => ['git_branch', { ...(p.name ? { name: String(p.name) } : {}), ...((p.delete || p.remove) ? { delete: true } : {}), ...(p.force ? { force: true } : {}) }],
|
|
984
|
+
// op-dependent: listing branches is read-only (null); create/delete is mutating.
|
|
985
|
+
permission: (ctx, args) => {
|
|
986
|
+
const o = args[0] || {};
|
|
987
|
+
if (!o.name) return null;
|
|
988
|
+
return { actionType: 'git', description: `git branch ${o.delete ? 'delete' : 'create'} ${o.name}`, tag: 'git_branch' };
|
|
989
|
+
},
|
|
990
|
+
execute: async (ctx, args, options) => {
|
|
991
|
+
const o = args[0] || {};
|
|
992
|
+
if (!o.name) {
|
|
993
|
+
const res = await _runGit(ctx, ['branch', '--no-color'], options);
|
|
994
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_branch', args, 'error'); return _gitFailure(res); }
|
|
995
|
+
const branches = [];
|
|
996
|
+
let current = null;
|
|
997
|
+
for (const line of res.stdout.split('\n')) {
|
|
998
|
+
if (!line.trim()) continue;
|
|
999
|
+
const isCurrent = line.startsWith('*');
|
|
1000
|
+
const name = line.replace(/^\*?\s+/, '').trim();
|
|
1001
|
+
if (!name || name.startsWith('(')) continue; // skip "(HEAD detached …)"
|
|
1002
|
+
if (isCurrent) current = name;
|
|
1003
|
+
branches.push({ name, current: isCurrent });
|
|
1004
|
+
}
|
|
1005
|
+
_gitLog(ctx, 'git_branch', args, 'ok');
|
|
1006
|
+
return { status: 'ok', branches, current, summary: `${branches.length} branch(es), on ${current || '(detached)'}` };
|
|
1007
|
+
}
|
|
1008
|
+
const blocked = ctx.permissionManager.readonlyBlock('git_branch');
|
|
1009
|
+
if (blocked) { _gitLog(ctx, 'git_branch', args, 'error'); return blocked; }
|
|
1010
|
+
const argv = o.delete ? ['branch', o.force ? '-D' : '-d', String(o.name)] : ['branch', String(o.name)];
|
|
1011
|
+
const res = await _runGit(ctx, argv, options);
|
|
1012
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_branch', args, 'error'); return _gitFailure(res); }
|
|
1013
|
+
_gitLog(ctx, 'git_branch', args, 'ok');
|
|
1014
|
+
return o.delete
|
|
1015
|
+
? { status: 'ok', deleted: String(o.name), summary: `Deleted branch ${o.name}` }
|
|
1016
|
+
: { status: 'ok', created: String(o.name), summary: `Created branch ${o.name}` };
|
|
1017
|
+
},
|
|
1018
|
+
},
|
|
1019
|
+
{
|
|
1020
|
+
tool: 'git_checkout',
|
|
1021
|
+
specNames: ['git_checkout'],
|
|
1022
|
+
tags: ['git_checkout'],
|
|
1023
|
+
parseXml: (text) => _parseGitTag(text, 'git_checkout', { str: ['name'], bool: ['create', 'force'] }),
|
|
1024
|
+
fromParams: (p) => ['git_checkout', { name: p.name != null ? String(p.name) : '', ...(p.create ? { create: true } : {}), ...(p.force ? { force: true } : {}) }],
|
|
1025
|
+
permission: (ctx, args) => {
|
|
1026
|
+
const o = args[0] || {};
|
|
1027
|
+
// Destructive-git ↔ checkpoint honesty: a checkout can discard uncommitted
|
|
1028
|
+
// working-tree changes that checkpoints never snapshot (not rewindable).
|
|
1029
|
+
return { actionType: 'git', description: `git checkout ${o.create ? '-b ' : ''}${o.name || ''} (may discard uncommitted changes — NOT recoverable via /rewind)`, tag: 'git_checkout' };
|
|
1030
|
+
},
|
|
1031
|
+
execute: async (ctx, args, options) => {
|
|
1032
|
+
const o = args[0] || {};
|
|
1033
|
+
const blocked = ctx.permissionManager.readonlyBlock('git_checkout');
|
|
1034
|
+
if (blocked) { _gitLog(ctx, 'git_checkout', args, 'error'); return blocked; }
|
|
1035
|
+
const name = (o.name == null ? '' : String(o.name)).trim();
|
|
1036
|
+
if (!name) { _gitLog(ctx, 'git_checkout', args, 'error'); return { error: 'git_checkout requires a target `name` (branch or ref).' }; }
|
|
1037
|
+
const argv = ['checkout'];
|
|
1038
|
+
if (o.force) argv.push('-f');
|
|
1039
|
+
if (o.create) argv.push('-b');
|
|
1040
|
+
argv.push(name);
|
|
1041
|
+
const res = await _runGit(ctx, argv, options);
|
|
1042
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_checkout', args, 'error'); return _gitFailure(res); }
|
|
1043
|
+
const branchRes = await _runGit(ctx, ['rev-parse', '--abbrev-ref', 'HEAD'], options);
|
|
1044
|
+
const branch = (branchRes.stdout || '').trim() || name;
|
|
1045
|
+
_gitLog(ctx, 'git_checkout', args, 'ok');
|
|
1046
|
+
return { status: 'ok', branch, created: !!o.create, summary: `Switched to ${branch}` };
|
|
1047
|
+
},
|
|
1048
|
+
},
|
|
1049
|
+
{
|
|
1050
|
+
tool: 'git_worktree',
|
|
1051
|
+
specNames: ['git_worktree'],
|
|
1052
|
+
tags: ['git_worktree'],
|
|
1053
|
+
parseXml: (text) => _parseGitTag(text, 'git_worktree', { str: ['op', 'path', 'branch'], bool: ['force'] }),
|
|
1054
|
+
fromParams: (p) => ['git_worktree', { op: p.op || 'list', ...(p.path ? { path: String(p.path) } : {}), ...(p.branch ? { branch: String(p.branch) } : {}), ...(p.force ? { force: true } : {}) }],
|
|
1055
|
+
// op-dependent: list is read-only (null); add/remove are mutating.
|
|
1056
|
+
permission: (ctx, args) => {
|
|
1057
|
+
const o = args[0] || {};
|
|
1058
|
+
const op = o.op || 'list';
|
|
1059
|
+
if (op === 'list') return null;
|
|
1060
|
+
return { actionType: 'git', description: `git worktree ${op} ${o.path || ''}`, tag: 'git_worktree' };
|
|
1061
|
+
},
|
|
1062
|
+
execute: async (ctx, args, options) => {
|
|
1063
|
+
const o = args[0] || {};
|
|
1064
|
+
const op = o.op || 'list';
|
|
1065
|
+
if (op === 'list') {
|
|
1066
|
+
const res = await _runGit(ctx, ['worktree', 'list', '--porcelain'], options);
|
|
1067
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_worktree', args, 'error'); return _gitFailure(res); }
|
|
1068
|
+
const worktrees = _parseWorktrees(res.stdout);
|
|
1069
|
+
_gitLog(ctx, 'git_worktree', args, 'ok');
|
|
1070
|
+
return { status: 'ok', op: 'list', worktrees, summary: `${worktrees.length} worktree(s)` };
|
|
1071
|
+
}
|
|
1072
|
+
const blocked = ctx.permissionManager.readonlyBlock('git_worktree');
|
|
1073
|
+
if (blocked) { _gitLog(ctx, 'git_worktree', args, 'error'); return blocked; }
|
|
1074
|
+
if (op === 'add') {
|
|
1075
|
+
if (!o.path) { _gitLog(ctx, 'git_worktree', args, 'error'); return { error: 'git_worktree add requires a `path`.' }; }
|
|
1076
|
+
const argv = ['worktree', 'add'];
|
|
1077
|
+
if (o.branch) argv.push('-b', String(o.branch));
|
|
1078
|
+
argv.push(String(o.path));
|
|
1079
|
+
const res = await _runGit(ctx, argv, options);
|
|
1080
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_worktree', args, 'error'); return _gitFailure(res); }
|
|
1081
|
+
_gitLog(ctx, 'git_worktree', args, 'ok');
|
|
1082
|
+
return { status: 'ok', op: 'add', path: String(o.path), branch: o.branch ? String(o.branch) : null, summary: `Added worktree at ${o.path}` };
|
|
1083
|
+
}
|
|
1084
|
+
if (op === 'remove') {
|
|
1085
|
+
if (!o.path) { _gitLog(ctx, 'git_worktree', args, 'error'); return { error: 'git_worktree remove requires a `path`.' }; }
|
|
1086
|
+
const argv = ['worktree', 'remove'];
|
|
1087
|
+
if (o.force) argv.push('--force');
|
|
1088
|
+
argv.push(String(o.path));
|
|
1089
|
+
const res = await _runGit(ctx, argv, options);
|
|
1090
|
+
if (res.blocked || res.exit_code !== 0) { _gitLog(ctx, 'git_worktree', args, 'error'); return _gitFailure(res); }
|
|
1091
|
+
_gitLog(ctx, 'git_worktree', args, 'ok');
|
|
1092
|
+
return { status: 'ok', op: 'remove', path: String(o.path), summary: `Removed worktree at ${o.path}` };
|
|
1093
|
+
}
|
|
1094
|
+
_gitLog(ctx, 'git_worktree', args, 'error');
|
|
1095
|
+
return { error: `git_worktree: unknown op "${op}" (expected list | add | remove).` };
|
|
1096
|
+
},
|
|
1097
|
+
},
|
|
1098
|
+
];
|
|
1099
|
+
|
|
1100
|
+
const TOOL_REGISTRY = [
|
|
1101
|
+
{
|
|
1102
|
+
tool: 'shell',
|
|
1103
|
+
specNames: ['exec', 'shell'],
|
|
1104
|
+
tags: ['exec', 'shell', 'run_command', 'run'],
|
|
1105
|
+
parseXml: (text) => _inline(text, 'shell|exec|run_command|run', 'shell'),
|
|
1106
|
+
fromParams: (p) => (p.command ? ['shell', p.command] : null),
|
|
1107
|
+
// shell is executed through agentExecShell (deny-list chokepoint), not the
|
|
1108
|
+
// agentExecFile dispatch — this execute exists for registry completeness.
|
|
1109
|
+
execute: (ctx, args, options) => ctx.agentExecShell(args[0], options || {}),
|
|
1110
|
+
permission: (ctx, args) => ({ actionType: 'shell', description: args[0] || '', tag: 'exec' }),
|
|
1111
|
+
},
|
|
1112
|
+
{
|
|
1113
|
+
tool: 'read',
|
|
1114
|
+
specNames: ['read_file'],
|
|
1115
|
+
tags: ['read_file'],
|
|
1116
|
+
parseXml: (text) => _parseReadTag(text),
|
|
1117
|
+
fromParams: (p) => (p.path
|
|
1118
|
+
? ['read', p.path, p.start_line ?? null, p.end_line ?? null, !!p.show_line_numbers]
|
|
1119
|
+
: null),
|
|
1120
|
+
permission: () => null,
|
|
1121
|
+
execute: async (ctx, args, options) => {
|
|
1122
|
+
const signal = (options && options.signal) || null;
|
|
1123
|
+
const [arg0 = null] = args;
|
|
1124
|
+
const { _log, logToolCall, isProtectedSecretPath, _secretReadError, getConfig, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1125
|
+
const filePath = arg0;
|
|
1126
|
+
if (isProtectedSecretPath(filePath)) {
|
|
1127
|
+
logToolCall('read_file', { path: filePath }, false, 'denied');
|
|
1128
|
+
return _secretReadError(filePath);
|
|
1129
|
+
}
|
|
1130
|
+
const startedAt = Date.now();
|
|
1131
|
+
const stat = await fsp.stat(filePath).catch(() => null);
|
|
1132
|
+
if (stat) {
|
|
1133
|
+
const cfg = getConfig ? getConfig() : {};
|
|
1134
|
+
// Byte BACKSTOP only (Task W.7). Pagination (formatReadResult) is now the
|
|
1135
|
+
// primary context bound — a large line-readable file paginates rather than
|
|
1136
|
+
// hard-refusing. This ceiling (default 50 MB) just rules out slurping a
|
|
1137
|
+
// multi-GB file whole into memory; an operator can lower max_file_size_kb
|
|
1138
|
+
// to hard-refuse smaller files.
|
|
1139
|
+
const defKb = require('./constants').DEFAULT_READ_MAX_FILE_KB;
|
|
1140
|
+
const maxKb = cfg.max_file_size_kb || defKb;
|
|
1141
|
+
const maxBytes = maxKb * 1024;
|
|
1142
|
+
if (stat.size > maxBytes) {
|
|
1143
|
+
const kb = (stat.size / 1024).toFixed(0);
|
|
1144
|
+
logToolCall('read_file', { path: filePath }, false, 'error');
|
|
1145
|
+
return { error: `File too large: ${kb} KB exceeds max_file_size_kb=${maxKb}` };
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
if (signal && signal.aborted) {
|
|
1149
|
+
logToolCall('read_file', { path: filePath }, true, 'aborted');
|
|
1150
|
+
return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
|
|
1151
|
+
}
|
|
1152
|
+
try {
|
|
1153
|
+
const data = await fsp.readFile(filePath, { encoding: 'utf8', signal: signal || undefined });
|
|
1154
|
+
const lines = data.split('\n').length;
|
|
1155
|
+
if (lines > 10) {
|
|
1156
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath} (${lines} lines, ${data.length} chars)${RST}`);
|
|
1157
|
+
} else {
|
|
1158
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath}${RST}`);
|
|
1159
|
+
}
|
|
1160
|
+
logToolCall('read_file', { path: filePath }, true, 'ok');
|
|
1161
|
+
return { content: data, path: filePath, bytes: Buffer.byteLength(data, 'utf8') };
|
|
1162
|
+
} catch (error) {
|
|
1163
|
+
if (error && (error.name === 'AbortError' || error.code === 'ABORT_ERR')) {
|
|
1164
|
+
logToolCall('read_file', { path: filePath }, true, 'aborted');
|
|
1165
|
+
return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
|
|
1166
|
+
}
|
|
1167
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1168
|
+
logToolCall('read_file', { path: filePath }, true, 'error');
|
|
1169
|
+
return { error: error.message };
|
|
1170
|
+
}
|
|
1171
|
+
},
|
|
1172
|
+
},
|
|
1173
|
+
{
|
|
1174
|
+
tool: 'write',
|
|
1175
|
+
specNames: ['write_file', 'create_file'],
|
|
1176
|
+
tags: ['write_file', 'create_file'],
|
|
1177
|
+
parseXml: (text) => {
|
|
1178
|
+
const out = [];
|
|
1179
|
+
// QUIRK: attribute-form content (m[2]) is captured RAW — not trimmed —
|
|
1180
|
+
// unlike inline-tag bodies which go through _unwrapInnerTag().trim().
|
|
1181
|
+
// Preserved deliberately; pinned by test/extract-tool-calls.test.js
|
|
1182
|
+
// ("QUIRK: attribute-form content is NOT trimmed (unlike inline tags)").
|
|
1183
|
+
// Any change to this is out of scope for the tool-registry refactor.
|
|
1184
|
+
for (const m of _matchDual(text, '<write_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/write_file>')) out.push(['write', m[1], m[2]]);
|
|
1185
|
+
for (const m of _matchDual(text, '<create_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/create_file>')) out.push(['write', m[1], m[2]]);
|
|
1186
|
+
return out;
|
|
1187
|
+
},
|
|
1188
|
+
fromParams: (p) => (p.path ? ['write', p.path, p.content != null ? p.content : ''] : null),
|
|
1189
|
+
permission: (ctx, args) => _permWriteAppend(ctx, 'write', args),
|
|
1190
|
+
execute: (ctx, args, options) => _execWriteAppend(ctx, 'write', args, options),
|
|
1191
|
+
},
|
|
1192
|
+
{
|
|
1193
|
+
tool: 'append',
|
|
1194
|
+
specNames: ['append_file'],
|
|
1195
|
+
tags: ['append_file'],
|
|
1196
|
+
// QUIRK: as with write_file, append content is captured raw (not trimmed).
|
|
1197
|
+
parseXml: (text) => _matchDual(text, '<append_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/append_file>').map((m) => ['append', m[1], m[2]]),
|
|
1198
|
+
fromParams: (p) => (p.path ? ['append', p.path, p.content != null ? p.content : ''] : null),
|
|
1199
|
+
permission: (ctx, args) => _permWriteAppend(ctx, 'append', args),
|
|
1200
|
+
execute: (ctx, args, options) => _execWriteAppend(ctx, 'append', args, options),
|
|
1201
|
+
},
|
|
1202
|
+
{
|
|
1203
|
+
tool: 'list_dir',
|
|
1204
|
+
specNames: ['list_dir'],
|
|
1205
|
+
tags: ['list_dir'],
|
|
1206
|
+
parseXml: (text) => _inline(text, 'list_dir', 'list_dir'),
|
|
1207
|
+
fromParams: (p) => ['list_dir', p.path || '.'],
|
|
1208
|
+
permission: () => null,
|
|
1209
|
+
execute: async (ctx, args) => {
|
|
1210
|
+
const [arg0 = null] = args;
|
|
1211
|
+
const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1212
|
+
const dirPath = arg0;
|
|
1213
|
+
try {
|
|
1214
|
+
const entries = await fsp.readdir(dirPath, { withFileTypes: true });
|
|
1215
|
+
const items = entries.map((e) => {
|
|
1216
|
+
if (e.isSymbolicLink()) return `[L] ${e.name}`;
|
|
1217
|
+
if (e.isDirectory()) return `[D] ${e.name}`;
|
|
1218
|
+
return `[F] ${e.name}`;
|
|
1219
|
+
});
|
|
1220
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${dirPath} (${items.length} items)${RST}`);
|
|
1221
|
+
logToolCall('list_dir', { path: dirPath }, true, 'ok');
|
|
1222
|
+
return { items, path: dirPath };
|
|
1223
|
+
} catch (error) {
|
|
1224
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1225
|
+
logToolCall('list_dir', { path: dirPath }, true, 'error');
|
|
1226
|
+
return { error: error.message };
|
|
1227
|
+
}
|
|
1228
|
+
},
|
|
1229
|
+
},
|
|
1230
|
+
{
|
|
1231
|
+
tool: 'search_files',
|
|
1232
|
+
specNames: ['search_files'],
|
|
1233
|
+
tags: ['search_files'],
|
|
1234
|
+
parseXml: (text) => {
|
|
1235
|
+
const out = _inline(text, 'search_files', 'search_files', ['.']);
|
|
1236
|
+
for (const m of _matchDual(text, '<search_files\\s+pattern=Q([^Q]+)Q(?:\\s+dir=Q([^Q]*)Q)?\\s*(?:><\\/search_files>|\\/>)')) {
|
|
1237
|
+
out.push(['search_files', m[1], m[2] || '.']);
|
|
1238
|
+
}
|
|
1239
|
+
return out;
|
|
1240
|
+
},
|
|
1241
|
+
fromParams: (p) => ['search_files', p.pattern || '*', p.dir || '.'],
|
|
1242
|
+
permission: () => null,
|
|
1243
|
+
execute: async (ctx, args, options) => {
|
|
1244
|
+
const signal = (options && options.signal) || null;
|
|
1245
|
+
const [arg0 = null, arg1 = null] = args;
|
|
1246
|
+
const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1247
|
+
const pattern = arg0;
|
|
1248
|
+
const searchDir = arg1 || '.';
|
|
1249
|
+
const startedAt = Date.now();
|
|
1250
|
+
try {
|
|
1251
|
+
let regStr = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&');
|
|
1252
|
+
regStr = regStr.replace(/\*\*/g, '\x00');
|
|
1253
|
+
regStr = regStr.replace(/\*/g, '[^/]*');
|
|
1254
|
+
regStr = regStr.replace(/\x00\//g, '(?:.*/)?');
|
|
1255
|
+
regStr = regStr.replace(/\x00/g, '.*');
|
|
1256
|
+
const regex = new RegExp(`^${regStr}$`);
|
|
1257
|
+
const matchName = !pattern.includes('/');
|
|
1258
|
+
const files = [];
|
|
1259
|
+
async function walk(dir, rel) {
|
|
1260
|
+
if (signal && signal.aborted) return;
|
|
1261
|
+
let entries;
|
|
1262
|
+
try { entries = await fsp.readdir(dir, { withFileTypes: true }); } catch { return; }
|
|
1263
|
+
for (const entry of entries) {
|
|
1264
|
+
if (signal && signal.aborted) return;
|
|
1265
|
+
const relPath = rel ? `${rel}/${entry.name}` : entry.name;
|
|
1266
|
+
if (regex.test(matchName ? entry.name : relPath)) files.push(relPath);
|
|
1267
|
+
if (entry.isDirectory()) await walk(path.join(dir, entry.name), relPath);
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
await walk(searchDir, '');
|
|
1271
|
+
if (signal && signal.aborted) {
|
|
1272
|
+
logToolCall('search_files', { pattern, dir: searchDir }, true, 'aborted');
|
|
1273
|
+
return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
|
|
1274
|
+
}
|
|
1275
|
+
files.sort();
|
|
1276
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${files.length} file(s) matching "${pattern}"${RST}`);
|
|
1277
|
+
logToolCall('search_files', { pattern, dir: searchDir }, true, 'ok');
|
|
1278
|
+
return { files, pattern, dir: searchDir };
|
|
1279
|
+
} catch (error) {
|
|
1280
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1281
|
+
logToolCall('search_files', { pattern, dir: searchDir }, true, 'error');
|
|
1282
|
+
return { error: error.message };
|
|
1283
|
+
}
|
|
1284
|
+
},
|
|
1285
|
+
},
|
|
1286
|
+
{
|
|
1287
|
+
tool: 'grep',
|
|
1288
|
+
specNames: ['grep'],
|
|
1289
|
+
tags: ['grep'],
|
|
1290
|
+
parseXml: (text) => _parseSearchTag(text, 'grep'),
|
|
1291
|
+
fromParams: (p) => (p.pattern
|
|
1292
|
+
? ['grep', p.pattern, p.path || null, !!p.ignore_case, p.output_mode || null, p.head_limit ?? null, p.offset ?? null]
|
|
1293
|
+
: null),
|
|
1294
|
+
permission: () => null,
|
|
1295
|
+
execute: async (ctx, args, options) => {
|
|
1296
|
+
const signal = (options && options.signal) || null;
|
|
1297
|
+
const [pattern = null, pathGlob = null, ignoreCase = false, outputMode = null, headLimit, offset] = args;
|
|
1298
|
+
const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1299
|
+
const res = _grepSearch({ pattern, pathGlob, ignoreCase, baseDir: '.', engine: 'auto', signal });
|
|
1300
|
+
if (res.aborted) { logToolCall('grep', { pattern }, true, 'aborted'); return res; }
|
|
1301
|
+
if (res.error) {
|
|
1302
|
+
_log(` ${FG_RED}✗ ${res.error}${RST}`);
|
|
1303
|
+
logToolCall('grep', { pattern }, true, 'error');
|
|
1304
|
+
return res;
|
|
1305
|
+
}
|
|
1306
|
+
// Shape the serialization controls onto the result (Task W.5). The engine
|
|
1307
|
+
// returns the full (engine-capped) match set; output_mode + head_limit +
|
|
1308
|
+
// offset bound what reaches the model in formatFileResult (lib/agent.js).
|
|
1309
|
+
res.output_mode = _normGrepMode(outputMode);
|
|
1310
|
+
res.head_limit = _normHeadLimit(headLimit, require('./constants').DEFAULT_GREP_HEAD_LIMIT);
|
|
1311
|
+
res.offset = _normOffset(offset);
|
|
1312
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}grep "${pattern}" — ${res.count} match(es)${RST}`);
|
|
1313
|
+
logToolCall('grep', { pattern, path: pathGlob }, true, 'ok');
|
|
1314
|
+
return res;
|
|
1315
|
+
},
|
|
1316
|
+
},
|
|
1317
|
+
{
|
|
1318
|
+
tool: 'glob',
|
|
1319
|
+
specNames: ['glob'],
|
|
1320
|
+
tags: ['glob'],
|
|
1321
|
+
parseXml: (text) => _parseSearchTag(text, 'glob'),
|
|
1322
|
+
fromParams: (p) => (p.pattern ? ['glob', p.pattern, p.path || p.dir || '.', p.head_limit ?? null, p.offset ?? null] : null),
|
|
1323
|
+
permission: () => null,
|
|
1324
|
+
execute: async (ctx, args, options) => {
|
|
1325
|
+
const signal = (options && options.signal) || null;
|
|
1326
|
+
const [pattern = null, base = '.', headLimit, offset] = args;
|
|
1327
|
+
const { _log, logToolCall, isPathSafe, _sandboxError, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1328
|
+
if (!isPathSafe(base)) {
|
|
1329
|
+
logToolCall('glob', { pattern, dir: base }, false, 'denied');
|
|
1330
|
+
return _sandboxError(base);
|
|
1331
|
+
}
|
|
1332
|
+
const res = _globSearch({ pattern, baseDir: base, signal });
|
|
1333
|
+
if (res.aborted) { logToolCall('glob', { pattern }, true, 'aborted'); return res; }
|
|
1334
|
+
if (res.error) {
|
|
1335
|
+
_log(` ${FG_RED}✗ ${res.error}${RST}`);
|
|
1336
|
+
logToolCall('glob', { pattern }, true, 'error');
|
|
1337
|
+
return res;
|
|
1338
|
+
}
|
|
1339
|
+
// head_limit + offset bound the file list that reaches the model (Task W.5);
|
|
1340
|
+
// the engine returns the full (engine-capped) list, serialized in formatFileResult.
|
|
1341
|
+
res.head_limit = _normHeadLimit(headLimit, require('./constants').DEFAULT_GLOB_HEAD_LIMIT);
|
|
1342
|
+
res.offset = _normOffset(offset);
|
|
1343
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}glob "${pattern}" — ${res.count} file(s)${RST}`);
|
|
1344
|
+
logToolCall('glob', { pattern, dir: base }, true, 'ok');
|
|
1345
|
+
return res;
|
|
1346
|
+
},
|
|
1347
|
+
},
|
|
1348
|
+
{
|
|
1349
|
+
tool: 'delete_file',
|
|
1350
|
+
specNames: ['delete_file'],
|
|
1351
|
+
tags: ['delete_file'],
|
|
1352
|
+
parseXml: (text) => _inline(text, 'delete_file', 'delete_file'),
|
|
1353
|
+
fromParams: (p) => (p.path ? ['delete_file', p.path] : null),
|
|
1354
|
+
permission: (ctx, args) => {
|
|
1355
|
+
const { _log, FG_YELLOW, BOLD, RST } = ctx;
|
|
1356
|
+
const filePath = args[0];
|
|
1357
|
+
_log(` ${FG_YELLOW}${BOLD}⚠ Deleting: ${filePath}${RST}`);
|
|
1358
|
+
return { actionType: 'file', description: `Delete ${filePath}`, tag: 'delete_file' };
|
|
1359
|
+
},
|
|
1360
|
+
execute: async (ctx, args) => {
|
|
1361
|
+
const [arg0 = null] = args;
|
|
1362
|
+
const { _log, logToolCall, isPathSafe, _sandboxError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1363
|
+
const filePath = arg0;
|
|
1364
|
+
const blocked = permissionManager.readonlyBlock('delete_file');
|
|
1365
|
+
if (blocked) {
|
|
1366
|
+
logToolCall('delete_file', { path: filePath }, false, 'denied');
|
|
1367
|
+
return blocked;
|
|
1368
|
+
}
|
|
1369
|
+
if (!isPathSafe(filePath)) {
|
|
1370
|
+
logToolCall('delete_file', { path: filePath }, false, 'denied');
|
|
1371
|
+
return _sandboxError(filePath);
|
|
1372
|
+
}
|
|
1373
|
+
try {
|
|
1374
|
+
await fsp.unlink(filePath);
|
|
1375
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Deleted ${filePath}${RST}`);
|
|
1376
|
+
logToolCall('delete_file', { path: filePath }, true, 'ok');
|
|
1377
|
+
return { status: 'ok', path: filePath };
|
|
1378
|
+
} catch (error) {
|
|
1379
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1380
|
+
logToolCall('delete_file', { path: filePath }, true, 'error');
|
|
1381
|
+
return { error: error.message };
|
|
1382
|
+
}
|
|
1383
|
+
},
|
|
1384
|
+
},
|
|
1385
|
+
{
|
|
1386
|
+
tool: 'make_dir',
|
|
1387
|
+
specNames: ['make_dir'],
|
|
1388
|
+
tags: ['make_dir'],
|
|
1389
|
+
parseXml: (text) => _inline(text, 'make_dir', 'make_dir'),
|
|
1390
|
+
fromParams: (p) => (p.path ? ['make_dir', p.path] : null),
|
|
1391
|
+
permission: (ctx, args) => ({ actionType: 'file', description: `Create directory ${args[0]}`, tag: 'make_dir' }),
|
|
1392
|
+
execute: async (ctx, args) => {
|
|
1393
|
+
const [arg0 = null] = args;
|
|
1394
|
+
const { _log, logToolCall, isPathSafe, _sandboxError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1395
|
+
const dirPath = arg0;
|
|
1396
|
+
const blocked = permissionManager.readonlyBlock('make_dir');
|
|
1397
|
+
if (blocked) {
|
|
1398
|
+
logToolCall('make_dir', { path: dirPath }, false, 'denied');
|
|
1399
|
+
return blocked;
|
|
1400
|
+
}
|
|
1401
|
+
if (!isPathSafe(dirPath)) {
|
|
1402
|
+
logToolCall('make_dir', { path: dirPath }, false, 'denied');
|
|
1403
|
+
return _sandboxError(dirPath);
|
|
1404
|
+
}
|
|
1405
|
+
try {
|
|
1406
|
+
await fsp.mkdir(dirPath, { recursive: true });
|
|
1407
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Created directory ${dirPath}${RST}`);
|
|
1408
|
+
logToolCall('make_dir', { path: dirPath }, true, 'ok');
|
|
1409
|
+
return { status: 'ok', path: dirPath };
|
|
1410
|
+
} catch (error) {
|
|
1411
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1412
|
+
logToolCall('make_dir', { path: dirPath }, true, 'error');
|
|
1413
|
+
return { error: error.message };
|
|
1414
|
+
}
|
|
1415
|
+
},
|
|
1416
|
+
},
|
|
1417
|
+
{
|
|
1418
|
+
tool: 'remove_dir',
|
|
1419
|
+
specNames: ['remove_dir'],
|
|
1420
|
+
tags: ['remove_dir'],
|
|
1421
|
+
parseXml: (text) => _inline(text, 'remove_dir', 'remove_dir'),
|
|
1422
|
+
fromParams: (p) => (p.path ? ['remove_dir', p.path] : null),
|
|
1423
|
+
permission: (ctx, args) => ({ actionType: 'file', description: `Remove directory ${args[0]}`, tag: 'remove_dir' }),
|
|
1424
|
+
execute: async (ctx, args) => {
|
|
1425
|
+
const [arg0 = null] = args;
|
|
1426
|
+
const { _log, logToolCall, isPathSafe, _sandboxError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1427
|
+
const dirPath = arg0;
|
|
1428
|
+
const blocked = permissionManager.readonlyBlock('remove_dir');
|
|
1429
|
+
if (blocked) {
|
|
1430
|
+
logToolCall('remove_dir', { path: dirPath }, false, 'denied');
|
|
1431
|
+
return blocked;
|
|
1432
|
+
}
|
|
1433
|
+
if (!isPathSafe(dirPath)) {
|
|
1434
|
+
logToolCall('remove_dir', { path: dirPath }, false, 'denied');
|
|
1435
|
+
return _sandboxError(dirPath);
|
|
1436
|
+
}
|
|
1437
|
+
try {
|
|
1438
|
+
await fsp.rm(dirPath, { recursive: true, force: true });
|
|
1439
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Removed directory ${dirPath}${RST}`);
|
|
1440
|
+
logToolCall('remove_dir', { path: dirPath }, true, 'ok');
|
|
1441
|
+
return { status: 'ok', path: dirPath };
|
|
1442
|
+
} catch (error) {
|
|
1443
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1444
|
+
logToolCall('remove_dir', { path: dirPath }, true, 'error');
|
|
1445
|
+
return { error: error.message };
|
|
1446
|
+
}
|
|
1447
|
+
},
|
|
1448
|
+
},
|
|
1449
|
+
{
|
|
1450
|
+
tool: 'move_file',
|
|
1451
|
+
specNames: ['move_file'],
|
|
1452
|
+
tags: ['move_file'],
|
|
1453
|
+
parseXml: (text) => _matchDual(text, '<move_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/move_file>|\\/>)').map((m) => ['move_file', m[1], m[2]]),
|
|
1454
|
+
fromParams: (p) => (p.src && p.dst ? ['move_file', p.src, p.dst] : null),
|
|
1455
|
+
permission: (ctx, args) => {
|
|
1456
|
+
const { _log, FG_YELLOW, BOLD, RST } = ctx;
|
|
1457
|
+
const src = args[0];
|
|
1458
|
+
const dst = args[1];
|
|
1459
|
+
_log(` ${FG_YELLOW}${BOLD}⚠ Moving: ${src} → ${dst}${RST}`);
|
|
1460
|
+
return { actionType: 'file', description: `Move ${src} to ${dst}`, tag: 'move_file' };
|
|
1461
|
+
},
|
|
1462
|
+
execute: async (ctx, args) => {
|
|
1463
|
+
const [arg0 = null, arg1 = null] = args;
|
|
1464
|
+
const { _log, logToolCall, isPathSafe, isProtectedSecretPath, isProtectedConfigPath, _sandboxError, _secretReadError, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1465
|
+
const src = arg0;
|
|
1466
|
+
const dst = arg1;
|
|
1467
|
+
const blocked = permissionManager.readonlyBlock('move_file');
|
|
1468
|
+
if (blocked) {
|
|
1469
|
+
logToolCall('move_file', { src, dst }, false, 'denied');
|
|
1470
|
+
return blocked;
|
|
1471
|
+
}
|
|
1472
|
+
if (isProtectedSecretPath(src)) {
|
|
1473
|
+
logToolCall('move_file', { src, dst }, false, 'denied');
|
|
1474
|
+
return _secretReadError(src);
|
|
1475
|
+
}
|
|
1476
|
+
if (isProtectedConfigPath(dst)) {
|
|
1477
|
+
logToolCall('move_file', { src, dst }, false, 'denied');
|
|
1478
|
+
return _protectedConfigWriteError(dst);
|
|
1479
|
+
}
|
|
1480
|
+
if (!isPathSafe(dst)) {
|
|
1481
|
+
logToolCall('move_file', { src, dst }, false, 'denied');
|
|
1482
|
+
return _sandboxError(dst);
|
|
1483
|
+
}
|
|
1484
|
+
try {
|
|
1485
|
+
const dstDir = path.dirname(dst);
|
|
1486
|
+
if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
|
|
1487
|
+
try {
|
|
1488
|
+
await fsp.rename(src, dst);
|
|
1489
|
+
} catch (renameErr) {
|
|
1490
|
+
if (renameErr.code !== 'EXDEV') throw renameErr;
|
|
1491
|
+
await fsp.cp(src, dst, { recursive: true });
|
|
1492
|
+
await fsp.rm(src, { recursive: true, force: true });
|
|
1493
|
+
}
|
|
1494
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Moved ${src} → ${dst}${RST}`);
|
|
1495
|
+
logToolCall('move_file', { src, dst }, true, 'ok');
|
|
1496
|
+
return { status: 'ok', src, dst };
|
|
1497
|
+
} catch (error) {
|
|
1498
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1499
|
+
logToolCall('move_file', { src, dst }, true, 'error');
|
|
1500
|
+
return { error: error.message };
|
|
1501
|
+
}
|
|
1502
|
+
},
|
|
1503
|
+
},
|
|
1504
|
+
{
|
|
1505
|
+
tool: 'copy_file',
|
|
1506
|
+
specNames: ['copy_file'],
|
|
1507
|
+
tags: ['copy_file'],
|
|
1508
|
+
parseXml: (text) => _matchDual(text, '<copy_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/copy_file>|\\/>)').map((m) => ['copy_file', m[1], m[2]]),
|
|
1509
|
+
fromParams: (p) => (p.src && p.dst ? ['copy_file', p.src, p.dst] : null),
|
|
1510
|
+
permission: (ctx, args) => ({ actionType: 'file', description: `Copy ${args[0]} to ${args[1]}`, tag: 'copy_file' }),
|
|
1511
|
+
execute: async (ctx, args) => {
|
|
1512
|
+
const [arg0 = null, arg1 = null] = args;
|
|
1513
|
+
const { _log, logToolCall, isPathSafe, isProtectedSecretPath, isProtectedConfigPath, _sandboxError, _secretReadError, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1514
|
+
const src = arg0;
|
|
1515
|
+
const dst = arg1;
|
|
1516
|
+
const blocked = permissionManager.readonlyBlock('copy_file');
|
|
1517
|
+
if (blocked) {
|
|
1518
|
+
logToolCall('copy_file', { src, dst }, false, 'denied');
|
|
1519
|
+
return blocked;
|
|
1520
|
+
}
|
|
1521
|
+
if (isProtectedSecretPath(src)) {
|
|
1522
|
+
logToolCall('copy_file', { src, dst }, false, 'denied');
|
|
1523
|
+
return _secretReadError(src);
|
|
1524
|
+
}
|
|
1525
|
+
if (isProtectedConfigPath(dst)) {
|
|
1526
|
+
logToolCall('copy_file', { src, dst }, false, 'denied');
|
|
1527
|
+
return _protectedConfigWriteError(dst);
|
|
1528
|
+
}
|
|
1529
|
+
if (!isPathSafe(dst)) {
|
|
1530
|
+
logToolCall('copy_file', { src, dst }, false, 'denied');
|
|
1531
|
+
return _sandboxError(dst);
|
|
1532
|
+
}
|
|
1533
|
+
try {
|
|
1534
|
+
const dstDir = path.dirname(dst);
|
|
1535
|
+
if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
|
|
1536
|
+
await fsp.cp(src, dst, { recursive: true });
|
|
1537
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Copied ${src} → ${dst}${RST}`);
|
|
1538
|
+
logToolCall('copy_file', { src, dst }, true, 'ok');
|
|
1539
|
+
return { status: 'ok', src, dst };
|
|
1540
|
+
} catch (error) {
|
|
1541
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1542
|
+
logToolCall('copy_file', { src, dst }, true, 'error');
|
|
1543
|
+
return { error: error.message };
|
|
1544
|
+
}
|
|
1545
|
+
},
|
|
1546
|
+
},
|
|
1547
|
+
{
|
|
1548
|
+
tool: 'edit_file',
|
|
1549
|
+
specNames: ['edit_file'],
|
|
1550
|
+
tags: ['edit_file'],
|
|
1551
|
+
parseXml: (text) => _matchDual(text, '<edit_file\\s+path=Q([^Q]+)Q\\s+line=Q(\\d+)Q>([\\s\\S]*?)<\\/edit_file>').map((m) => ['edit_file', m[1], parseInt(m[2], 10), m[3]]),
|
|
1552
|
+
fromParams: (p) => (p.path && p.line !== undefined ? ['edit_file', p.path, parseInt(p.line, 10), p.content != null ? p.content : ''] : null),
|
|
1553
|
+
permission: (ctx, args) => ({ actionType: 'file', description: `Edit line ${args[1]} in ${args[0]}`, tag: 'edit_file' }),
|
|
1554
|
+
execute: async (ctx, args) => {
|
|
1555
|
+
const [arg0 = null, arg1 = null, arg2 = null] = args;
|
|
1556
|
+
const { _log, logToolCall, isProtectedConfigPath, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1557
|
+
const filePath = arg0;
|
|
1558
|
+
const lineNum = arg1;
|
|
1559
|
+
const newContent = arg2;
|
|
1560
|
+
const blocked = permissionManager.readonlyBlock('edit_file');
|
|
1561
|
+
if (blocked) {
|
|
1562
|
+
logToolCall('edit_file', { path: filePath, line: lineNum }, false, 'denied');
|
|
1563
|
+
return blocked;
|
|
1564
|
+
}
|
|
1565
|
+
if (isProtectedConfigPath(filePath)) {
|
|
1566
|
+
logToolCall('edit_file', { path: filePath, line: lineNum }, false, 'denied');
|
|
1567
|
+
return _protectedConfigWriteError(filePath);
|
|
1568
|
+
}
|
|
1569
|
+
try {
|
|
1570
|
+
const data = await fsp.readFile(filePath, 'utf8');
|
|
1571
|
+
const lines = data.split('\n');
|
|
1572
|
+
if (lineNum < 1 || lineNum > lines.length) {
|
|
1573
|
+
logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
|
|
1574
|
+
return { error: `Line ${lineNum} out of range (file has ${lines.length} lines)` };
|
|
1575
|
+
}
|
|
1576
|
+
lines[lineNum - 1] = newContent;
|
|
1577
|
+
await fsp.writeFile(filePath, lines.join('\n'));
|
|
1578
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Edited line ${lineNum} in ${filePath}${RST}`);
|
|
1579
|
+
logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'ok');
|
|
1580
|
+
return { status: 'ok', path: filePath, line: lineNum };
|
|
1581
|
+
} catch (error) {
|
|
1582
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1583
|
+
logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
|
|
1584
|
+
return { error: error.message };
|
|
1585
|
+
}
|
|
1586
|
+
},
|
|
1587
|
+
},
|
|
1588
|
+
{
|
|
1589
|
+
tool: 'search_in_file',
|
|
1590
|
+
specNames: ['search_in_file'],
|
|
1591
|
+
tags: ['search_in_file'],
|
|
1592
|
+
parseXml: (text) => _matchDual(text, '<search_in_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/search_in_file>').map((m) => ['search_in_file', m[1], m[2].trim()]),
|
|
1593
|
+
fromParams: (p) => (p.path && p.pattern ? ['search_in_file', p.path, p.pattern] : null),
|
|
1594
|
+
permission: () => null,
|
|
1595
|
+
execute: async (ctx, args) => {
|
|
1596
|
+
const [arg0 = null, arg1 = null] = args;
|
|
1597
|
+
const { _log, logToolCall, isProtectedSecretPath, _secretReadError, _checkRegexSafety, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1598
|
+
const filePath = arg0;
|
|
1599
|
+
const pattern = arg1;
|
|
1600
|
+
if (isProtectedSecretPath(filePath)) {
|
|
1601
|
+
logToolCall('search_in_file', { path: filePath, pattern }, false, 'denied');
|
|
1602
|
+
return _secretReadError(filePath);
|
|
1603
|
+
}
|
|
1604
|
+
try {
|
|
1605
|
+
const data = await fsp.readFile(filePath, 'utf8');
|
|
1606
|
+
const guardErr = _checkRegexSafety(pattern, data);
|
|
1607
|
+
if (guardErr) {
|
|
1608
|
+
logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
|
|
1609
|
+
return guardErr;
|
|
1610
|
+
}
|
|
1611
|
+
const regex = new RegExp(pattern);
|
|
1612
|
+
const matches = data.split('\n')
|
|
1613
|
+
.map((content, idx) => regex.test(content) ? { line: idx + 1, content } : null)
|
|
1614
|
+
.filter(Boolean);
|
|
1615
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${matches.length} match(es) in ${filePath}${RST}`);
|
|
1616
|
+
logToolCall('search_in_file', { path: filePath, pattern }, true, 'ok');
|
|
1617
|
+
return { matches, path: filePath };
|
|
1618
|
+
} catch (error) {
|
|
1619
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1620
|
+
logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
|
|
1621
|
+
return { error: error.message };
|
|
1622
|
+
}
|
|
1623
|
+
},
|
|
1624
|
+
},
|
|
1625
|
+
{
|
|
1626
|
+
tool: 'replace_in_file',
|
|
1627
|
+
specNames: ['replace_in_file'],
|
|
1628
|
+
tags: ['replace_in_file'],
|
|
1629
|
+
parseXml: (text) => _matchDual(text, '<replace_in_file\\s+path=Q([^Q]+)Q\\s+search=Q([^Q]*)Q\\s+replace=Q([^Q]*)Q>([\\s\\S]*?)<\\/replace_in_file>').map((m) => ['replace_in_file', m[1], m[2], m[3], m[4].trim()]),
|
|
1630
|
+
fromParams: (p) => (p.path && p.search !== undefined ? ['replace_in_file', p.path, p.search, p.replace != null ? p.replace : '', p.flags || ''] : null),
|
|
1631
|
+
permission: (ctx, args) => ({ actionType: 'file', description: `Replace in ${args[0]}`, tag: 'replace_in_file' }),
|
|
1632
|
+
execute: async (ctx, args) => {
|
|
1633
|
+
const [arg0 = null, arg1 = null, arg2 = null, arg3 = null] = args;
|
|
1634
|
+
const { _log, logToolCall, isProtectedConfigPath, _protectedConfigWriteError, _checkRegexSafety, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1635
|
+
const filePath = arg0;
|
|
1636
|
+
const searchStr = arg1;
|
|
1637
|
+
const replaceStr = arg2;
|
|
1638
|
+
const flags = arg3 || '';
|
|
1639
|
+
const blocked = permissionManager.readonlyBlock('replace_in_file');
|
|
1640
|
+
if (blocked) {
|
|
1641
|
+
logToolCall('replace_in_file', { path: filePath, search: searchStr }, false, 'denied');
|
|
1642
|
+
return blocked;
|
|
1643
|
+
}
|
|
1644
|
+
if (isProtectedConfigPath(filePath)) {
|
|
1645
|
+
logToolCall('replace_in_file', { path: filePath, search: searchStr }, false, 'denied');
|
|
1646
|
+
return _protectedConfigWriteError(filePath);
|
|
1647
|
+
}
|
|
1648
|
+
try {
|
|
1649
|
+
const data = await fsp.readFile(filePath, 'utf8');
|
|
1650
|
+
const guardErr = _checkRegexSafety(searchStr, data);
|
|
1651
|
+
if (guardErr) {
|
|
1652
|
+
logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
|
|
1653
|
+
return guardErr;
|
|
1654
|
+
}
|
|
1655
|
+
const safeFlags = flags.replace(/[^gimsuy]/g, '');
|
|
1656
|
+
const regex = new RegExp(searchStr, safeFlags || undefined);
|
|
1657
|
+
// Semantics (intentional, unchanged): String.prototype.replace replaces
|
|
1658
|
+
// ALL matches only when the regex is global; without "g" it replaces just
|
|
1659
|
+
// the first match. The returned count must equal the replacements actually
|
|
1660
|
+
// performed — so count all matches when global, else 1 if there is a match
|
|
1661
|
+
// (else 0). (Task 1.4c: previously count was computed with an always-global
|
|
1662
|
+
// regex and overstated non-global replacements.)
|
|
1663
|
+
const isGlobal = safeFlags.includes('g');
|
|
1664
|
+
const count = isGlobal
|
|
1665
|
+
? (data.match(new RegExp(searchStr, safeFlags)) || []).length
|
|
1666
|
+
: (new RegExp(searchStr, safeFlags).test(data) ? 1 : 0);
|
|
1667
|
+
const newData = data.replace(regex, replaceStr);
|
|
1668
|
+
await fsp.writeFile(filePath, newData);
|
|
1669
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Replaced ${count} occurrence(s) in ${filePath}${RST}`);
|
|
1670
|
+
logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'ok');
|
|
1671
|
+
return { status: 'ok', path: filePath, count };
|
|
1672
|
+
} catch (error) {
|
|
1673
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1674
|
+
logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
|
|
1675
|
+
return { error: error.message };
|
|
1676
|
+
}
|
|
1677
|
+
},
|
|
1678
|
+
},
|
|
1679
|
+
{
|
|
1680
|
+
tool: 'download',
|
|
1681
|
+
specNames: ['download'],
|
|
1682
|
+
tags: ['download'],
|
|
1683
|
+
// Optional `path` destination (Pre-Task 4.0b). Both the attribute form
|
|
1684
|
+
// (`<download path="dest">URL</download>`) and the plain form
|
|
1685
|
+
// (`<download>URL</download>`, defaulting to the CWD) are accepted.
|
|
1686
|
+
parseXml: (text) => {
|
|
1687
|
+
const out = [];
|
|
1688
|
+
for (const m of _matchDual(text, '<download\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/download>')) {
|
|
1689
|
+
out.push(['download', _unwrapInnerTag(m[2]).trim(), m[1]]);
|
|
1690
|
+
}
|
|
1691
|
+
for (const m of text.matchAll(/<download>([\s\S]*?)<\/download>/g)) {
|
|
1692
|
+
out.push(['download', _unwrapInnerTag(m[1]).trim()]);
|
|
1693
|
+
}
|
|
1694
|
+
return out;
|
|
1695
|
+
},
|
|
1696
|
+
fromParams: (p) => {
|
|
1697
|
+
if (!p.url) return null;
|
|
1698
|
+
const dest = p.path || p.dest;
|
|
1699
|
+
return dest ? ['download', p.url, dest] : ['download', p.url];
|
|
1700
|
+
},
|
|
1701
|
+
permission: (ctx, args) => ({ actionType: 'net', description: `Download ${args[0]}`, tag: 'download' }),
|
|
1702
|
+
execute: async (ctx, args, options) => {
|
|
1703
|
+
const signal = (options && options.signal) || null;
|
|
1704
|
+
const [arg0 = null, arg1 = null] = args;
|
|
1705
|
+
const {
|
|
1706
|
+
_log, logToolCall, _dryRun, _skippedOps,
|
|
1707
|
+
isPathSafe, _sandboxError, isProtectedSecretPath, _secretReadError,
|
|
1708
|
+
isProtectedConfigPath, _protectedConfigWriteError,
|
|
1709
|
+
permissionManager, getConfig,
|
|
1710
|
+
FG_GREEN, FG_GRAY, FG_RED, RST,
|
|
1711
|
+
} = ctx;
|
|
1712
|
+
const url = arg0;
|
|
1713
|
+
const dest = arg1 || null;
|
|
1714
|
+
if (_dryRun) {
|
|
1715
|
+
_skippedOps.push({ category: 'net', symbol: '↓', desc: `download ${url}` });
|
|
1716
|
+
logToolCall('download', { url }, false, 'dry-run');
|
|
1717
|
+
return { status: 'dry-run', message: 'dry-run: network call skipped' };
|
|
1718
|
+
}
|
|
1719
|
+
// Validate/normalize the URL BEFORE building the request or resolving a
|
|
1720
|
+
// destination — a malformed URL (or non-http(s) scheme, empty, non-string)
|
|
1721
|
+
// is a clean tool error, never an uncaught throw out of the executor.
|
|
1722
|
+
const validatedDlUrl = _validateFetchUrl(url);
|
|
1723
|
+
if (validatedDlUrl.error) {
|
|
1724
|
+
_log(` ${FG_RED}✗ ${validatedDlUrl.error}${RST}`);
|
|
1725
|
+
logToolCall('download', { url }, true, 'error');
|
|
1726
|
+
return validatedDlUrl;
|
|
1727
|
+
}
|
|
1728
|
+
const normalizedDlUrl = validatedDlUrl.url;
|
|
1729
|
+
// Resolve the destination: an explicit path (relative → CWD, or absolute),
|
|
1730
|
+
// otherwise the URL basename into the CWD (historical default).
|
|
1731
|
+
let outPath;
|
|
1732
|
+
if (dest) {
|
|
1733
|
+
outPath = path.resolve(dest);
|
|
1734
|
+
} else {
|
|
1735
|
+
let fileName;
|
|
1736
|
+
try {
|
|
1737
|
+
fileName = path.basename(new URL(normalizedDlUrl).pathname) || 'download';
|
|
1738
|
+
} catch {
|
|
1739
|
+
fileName = 'download';
|
|
1740
|
+
}
|
|
1741
|
+
outPath = path.join(process.cwd(), fileName);
|
|
1742
|
+
}
|
|
1743
|
+
// Confinement (Pre-Task 4.0b): download is a write path and must honor the
|
|
1744
|
+
// same guards as every other mutating file tool — --readonly, the
|
|
1745
|
+
// secret-file guard, and isPathSafe (CWD confinement / --allow-anywhere).
|
|
1746
|
+
const blocked = permissionManager.readonlyBlock('download');
|
|
1747
|
+
if (blocked) {
|
|
1748
|
+
logToolCall('download', { url, path: outPath }, false, 'denied');
|
|
1749
|
+
return blocked;
|
|
1750
|
+
}
|
|
1751
|
+
if (isProtectedSecretPath(outPath)) {
|
|
1752
|
+
logToolCall('download', { url, path: outPath }, false, 'denied');
|
|
1753
|
+
return _secretReadError(outPath);
|
|
1754
|
+
}
|
|
1755
|
+
if (isProtectedConfigPath(outPath)) {
|
|
1756
|
+
logToolCall('download', { url, path: outPath }, false, 'denied');
|
|
1757
|
+
return _protectedConfigWriteError(outPath);
|
|
1758
|
+
}
|
|
1759
|
+
if (!isPathSafe(outPath)) {
|
|
1760
|
+
logToolCall('download', { url, path: outPath }, false, 'denied');
|
|
1761
|
+
return _sandboxError(outPath);
|
|
1762
|
+
}
|
|
1763
|
+
const cfg = getConfig ? getConfig() : {};
|
|
1764
|
+
const maxBytes = Math.max(1024, cfg.download_max_bytes || 104857600);
|
|
1765
|
+
const userAgent = _resolveUserAgent(cfg);
|
|
1766
|
+
const startedAt = Date.now();
|
|
1767
|
+
return new Promise((resolve) => {
|
|
1768
|
+
let abortedByUser = false;
|
|
1769
|
+
let cappedExceeded = false;
|
|
1770
|
+
let onAbort = null;
|
|
1771
|
+
let activeReq = null;
|
|
1772
|
+
let activeFile = null;
|
|
1773
|
+
const detachAbort = () => {
|
|
1774
|
+
if (onAbort && signal) {
|
|
1775
|
+
try { signal.removeEventListener('abort', onAbort); } catch {}
|
|
1776
|
+
onAbort = null;
|
|
1777
|
+
}
|
|
1778
|
+
};
|
|
1779
|
+
const finishAborted = () => {
|
|
1780
|
+
fs.unlink(outPath, () => {});
|
|
1781
|
+
logToolCall('download', { url }, true, 'aborted');
|
|
1782
|
+
resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
|
|
1783
|
+
};
|
|
1784
|
+
if (signal) {
|
|
1785
|
+
if (signal.aborted) {
|
|
1786
|
+
abortedByUser = true;
|
|
1787
|
+
finishAborted();
|
|
1788
|
+
return;
|
|
1789
|
+
}
|
|
1790
|
+
onAbort = () => {
|
|
1791
|
+
abortedByUser = true;
|
|
1792
|
+
try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
|
|
1793
|
+
try { if (activeFile) activeFile.destroy(); } catch {}
|
|
1794
|
+
};
|
|
1795
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
1796
|
+
}
|
|
1797
|
+
|
|
1798
|
+
function doDownload(target, redirectsLeft) {
|
|
1799
|
+
const proto = target.startsWith('https') ? https : http;
|
|
1800
|
+
let req;
|
|
1801
|
+
try {
|
|
1802
|
+
req = proto.get(target, { headers: { 'User-Agent': userAgent } }, (res) => {
|
|
1803
|
+
if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
|
|
1804
|
+
res.resume();
|
|
1805
|
+
// A redirect Location may be relative or malformed — resolve +
|
|
1806
|
+
// validate it against the current target rather than throwing.
|
|
1807
|
+
const nextUrl = _validateFetchUrl(res.headers.location, target);
|
|
1808
|
+
if (nextUrl.error) {
|
|
1809
|
+
detachAbort();
|
|
1810
|
+
_log(` ${FG_RED}✗ ${nextUrl.error}${RST}`);
|
|
1811
|
+
logToolCall('download', { url: target }, true, 'error');
|
|
1812
|
+
return resolve(nextUrl);
|
|
1813
|
+
}
|
|
1814
|
+
return doDownload(nextUrl.url, redirectsLeft - 1);
|
|
1815
|
+
}
|
|
1816
|
+
if (res.statusCode >= 400) {
|
|
1817
|
+
res.resume();
|
|
1818
|
+
const msg = `HTTP ${res.statusCode}`;
|
|
1819
|
+
detachAbort();
|
|
1820
|
+
_log(` ${FG_RED}✗ ${msg}${RST}`);
|
|
1821
|
+
logToolCall('download', { url }, true, 'error');
|
|
1822
|
+
return resolve({ error: msg });
|
|
1823
|
+
}
|
|
1824
|
+
const file = fs.createWriteStream(outPath);
|
|
1825
|
+
activeFile = file;
|
|
1826
|
+
let downloadedBytes = 0;
|
|
1827
|
+
// Manual stream (instead of res.pipe) so we can enforce the byte cap
|
|
1828
|
+
// mid-flight: on exceeding it, abort the request, destroy the file,
|
|
1829
|
+
// remove the partial artifact, and resolve once cleanup completes so
|
|
1830
|
+
// no truncated file is ever left behind.
|
|
1831
|
+
res.on('data', (chunk) => {
|
|
1832
|
+
if (cappedExceeded || abortedByUser) return;
|
|
1833
|
+
downloadedBytes += chunk.length;
|
|
1834
|
+
if (downloadedBytes > maxBytes) {
|
|
1835
|
+
cappedExceeded = true;
|
|
1836
|
+
try { if (activeReq) activeReq.destroy(); } catch {}
|
|
1837
|
+
try { res.destroy(); } catch {}
|
|
1838
|
+
detachAbort();
|
|
1839
|
+
const msg = `Download aborted: exceeded byte cap (${maxBytes} bytes)`;
|
|
1840
|
+
file.destroy();
|
|
1841
|
+
file.once('close', () => {
|
|
1842
|
+
fs.unlink(outPath, () => {
|
|
1843
|
+
_log(` ${FG_RED}✗ ${msg}${RST}`);
|
|
1844
|
+
logToolCall('download', { url, path: outPath }, true, 'error');
|
|
1845
|
+
resolve({ error: msg, capped: true, bytes: downloadedBytes });
|
|
1846
|
+
});
|
|
1847
|
+
});
|
|
1848
|
+
return;
|
|
1849
|
+
}
|
|
1850
|
+
if (!file.write(chunk)) {
|
|
1851
|
+
res.pause();
|
|
1852
|
+
file.once('drain', () => { if (!cappedExceeded && !abortedByUser) res.resume(); });
|
|
1853
|
+
}
|
|
1854
|
+
});
|
|
1855
|
+
res.on('end', () => {
|
|
1856
|
+
if (cappedExceeded || abortedByUser) return;
|
|
1857
|
+
file.end();
|
|
1858
|
+
});
|
|
1859
|
+
res.on('error', (err) => {
|
|
1860
|
+
if (cappedExceeded) return;
|
|
1861
|
+
if (abortedByUser) { detachAbort(); finishAborted(); return; }
|
|
1862
|
+
file.destroy();
|
|
1863
|
+
fs.unlink(outPath, () => {});
|
|
1864
|
+
detachAbort();
|
|
1865
|
+
_log(` ${FG_RED}✗ ${err.message}${RST}`);
|
|
1866
|
+
logToolCall('download', { url }, true, 'error');
|
|
1867
|
+
resolve({ error: err.message });
|
|
1868
|
+
});
|
|
1869
|
+
file.on('finish', () => {
|
|
1870
|
+
if (cappedExceeded || abortedByUser) return;
|
|
1871
|
+
file.close();
|
|
1872
|
+
detachAbort();
|
|
1873
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Downloaded to ${outPath}${RST}`);
|
|
1874
|
+
logToolCall('download', { url }, true, 'ok');
|
|
1875
|
+
resolve({ status: 'ok', path: outPath, bytes: downloadedBytes });
|
|
1876
|
+
});
|
|
1877
|
+
file.on('error', (err) => {
|
|
1878
|
+
if (cappedExceeded) return;
|
|
1879
|
+
if (abortedByUser) {
|
|
1880
|
+
detachAbort();
|
|
1881
|
+
finishAborted();
|
|
1882
|
+
return;
|
|
1883
|
+
}
|
|
1884
|
+
fs.unlink(outPath, () => {});
|
|
1885
|
+
detachAbort();
|
|
1886
|
+
_log(` ${FG_RED}✗ ${err.message}${RST}`);
|
|
1887
|
+
logToolCall('download', { url }, true, 'error');
|
|
1888
|
+
resolve({ error: err.message });
|
|
1889
|
+
});
|
|
1890
|
+
});
|
|
1891
|
+
} catch (err) {
|
|
1892
|
+
// Defense-in-depth: the URL is validated before we get here, but any
|
|
1893
|
+
// synchronous throw from proto.get must still become a tool error.
|
|
1894
|
+
detachAbort();
|
|
1895
|
+
_log(` ${FG_RED}✗ ${err.message}${RST}`);
|
|
1896
|
+
logToolCall('download', { url: target }, true, 'error');
|
|
1897
|
+
resolve({ error: `Invalid URL: ${err.message}`, error_code: err.code || 'ERR_INVALID_URL' });
|
|
1898
|
+
return;
|
|
1899
|
+
}
|
|
1900
|
+
activeReq = req;
|
|
1901
|
+
req.on('error', (err) => {
|
|
1902
|
+
if (cappedExceeded) return;
|
|
1903
|
+
if (abortedByUser) {
|
|
1904
|
+
detachAbort();
|
|
1905
|
+
finishAborted();
|
|
1906
|
+
return;
|
|
1907
|
+
}
|
|
1908
|
+
fs.unlink(outPath, () => {});
|
|
1909
|
+
detachAbort();
|
|
1910
|
+
_log(` ${FG_RED}✗ ${err.message}${RST}`);
|
|
1911
|
+
logToolCall('download', { url }, true, 'error');
|
|
1912
|
+
resolve({ error: err.message });
|
|
1913
|
+
});
|
|
1914
|
+
req.setTimeout(120000, () => {
|
|
1915
|
+
req.destroy();
|
|
1916
|
+
fs.unlink(outPath, () => {});
|
|
1917
|
+
detachAbort();
|
|
1918
|
+
logToolCall('download', { url }, true, 'error');
|
|
1919
|
+
resolve({ error: 'Request timeout' });
|
|
1920
|
+
});
|
|
1921
|
+
}
|
|
1922
|
+
doDownload(normalizedDlUrl, 5);
|
|
1923
|
+
});
|
|
1924
|
+
},
|
|
1925
|
+
},
|
|
1926
|
+
{
|
|
1927
|
+
tool: 'upload',
|
|
1928
|
+
specNames: ['upload'],
|
|
1929
|
+
tags: ['upload'],
|
|
1930
|
+
// QUIRK: upload content (base64) is captured raw (not trimmed), like write.
|
|
1931
|
+
parseXml: (text) => _matchDual(text, '<upload\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/upload>').map((m) => ['upload', m[1], m[2]]),
|
|
1932
|
+
fromParams: (p) => (p.path ? ['upload', p.path, p.content != null ? p.content : ''] : null),
|
|
1933
|
+
permission: (ctx, args) => ({ actionType: 'file', description: `Upload to ${args[0]}`, tag: 'upload' }),
|
|
1934
|
+
execute: async (ctx, args) => {
|
|
1935
|
+
const [arg0 = null, arg1 = null] = args;
|
|
1936
|
+
const { _log, logToolCall, isPathSafe, isProtectedConfigPath, _sandboxError, _protectedConfigWriteError, permissionManager, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1937
|
+
const filePath = arg0;
|
|
1938
|
+
const encodedContent = arg1 || '';
|
|
1939
|
+
const blocked = permissionManager.readonlyBlock('upload');
|
|
1940
|
+
if (blocked) {
|
|
1941
|
+
logToolCall('upload', { path: filePath }, false, 'denied');
|
|
1942
|
+
return blocked;
|
|
1943
|
+
}
|
|
1944
|
+
if (isProtectedConfigPath(filePath)) {
|
|
1945
|
+
logToolCall('upload', { path: filePath }, false, 'denied');
|
|
1946
|
+
return _protectedConfigWriteError(filePath);
|
|
1947
|
+
}
|
|
1948
|
+
if (!isPathSafe(filePath)) {
|
|
1949
|
+
logToolCall('upload', { path: filePath }, false, 'denied');
|
|
1950
|
+
return _sandboxError(filePath);
|
|
1951
|
+
}
|
|
1952
|
+
try {
|
|
1953
|
+
const dir = path.dirname(filePath);
|
|
1954
|
+
if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
|
|
1955
|
+
const buffer = Buffer.from(encodedContent.trim(), 'base64');
|
|
1956
|
+
await fsp.writeFile(filePath, buffer);
|
|
1957
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Uploaded ${buffer.length} bytes to ${filePath}${RST}`);
|
|
1958
|
+
logToolCall('upload', { path: filePath }, true, 'ok');
|
|
1959
|
+
return { status: 'ok', path: filePath, bytes: buffer.length };
|
|
1960
|
+
} catch (error) {
|
|
1961
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1962
|
+
logToolCall('upload', { path: filePath }, true, 'error');
|
|
1963
|
+
return { error: error.message };
|
|
1964
|
+
}
|
|
1965
|
+
},
|
|
1966
|
+
},
|
|
1967
|
+
{
|
|
1968
|
+
tool: 'file_stat',
|
|
1969
|
+
specNames: ['file_stat'],
|
|
1970
|
+
tags: ['file_stat'],
|
|
1971
|
+
parseXml: (text) => _inline(text, 'file_stat', 'file_stat'),
|
|
1972
|
+
fromParams: (p) => (p.path ? ['file_stat', p.path] : null),
|
|
1973
|
+
permission: () => null,
|
|
1974
|
+
execute: async (ctx, args) => {
|
|
1975
|
+
const [arg0 = null] = args;
|
|
1976
|
+
const { _log, logToolCall, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
1977
|
+
const filePath = arg0;
|
|
1978
|
+
try {
|
|
1979
|
+
const stat = await fsp.stat(filePath);
|
|
1980
|
+
const type = stat.isDirectory() ? 'directory' : stat.isSymbolicLink() ? 'symlink' : 'file';
|
|
1981
|
+
const size_kb = (stat.size / 1024).toFixed(2);
|
|
1982
|
+
const mode = '0o' + stat.mode.toString(8);
|
|
1983
|
+
const mtime = stat.mtime.toISOString();
|
|
1984
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stat ${filePath}${RST}`);
|
|
1985
|
+
logToolCall('file_stat', { path: filePath }, true, 'ok');
|
|
1986
|
+
return { path: filePath, size_kb, mtime, type, mode };
|
|
1987
|
+
} catch (error) {
|
|
1988
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
1989
|
+
logToolCall('file_stat', { path: filePath }, true, 'error');
|
|
1990
|
+
return { error: error.message };
|
|
1991
|
+
}
|
|
1992
|
+
},
|
|
1993
|
+
},
|
|
1994
|
+
{
|
|
1995
|
+
tool: 'http_get',
|
|
1996
|
+
specNames: ['http_get'],
|
|
1997
|
+
tags: ['http_get'],
|
|
1998
|
+
parseXml: (text) => {
|
|
1999
|
+
const out = [];
|
|
2000
|
+
for (const m of text.matchAll(/<http_get\b([^>]*?)(?:><\/http_get>|\/>)/g)) {
|
|
2001
|
+
const attrStr = m[1];
|
|
2002
|
+
const urlMatch = attrStr.match(/url="([^"]+)"/) || attrStr.match(/url='([^']+)'/);
|
|
2003
|
+
if (urlMatch) out.push(['http_get', urlMatch[1], _httpGetOpts(attrStr)]);
|
|
2004
|
+
}
|
|
2005
|
+
for (const m of text.matchAll(/<http_get>([\s\S]*?)<\/http_get>/g)) {
|
|
2006
|
+
const inner = m[1].trim();
|
|
2007
|
+
if (!inner) continue;
|
|
2008
|
+
const urlAttr = inner.match(/url="([^"]+)"/) || inner.match(/url='([^']+)'/);
|
|
2009
|
+
out.push(['http_get', urlAttr ? urlAttr[1] : _unwrapInnerTag(inner).trim(), _httpGetOpts(inner)]);
|
|
2010
|
+
}
|
|
2011
|
+
return out;
|
|
2012
|
+
},
|
|
2013
|
+
fromParams: (p) => (p.url ? ['http_get', p.url, _httpGetOptsFromParams(p)] : null),
|
|
2014
|
+
permission: (ctx, args) => ({ actionType: 'net', description: `HTTP GET ${args[0]}`, tag: 'http_get' }),
|
|
2015
|
+
execute: async (ctx, args, options) => {
|
|
2016
|
+
const signal = (options && options.signal) || null;
|
|
2017
|
+
const [arg0 = null, callOpts = {}] = args;
|
|
2018
|
+
const { _log, logToolCall, _dryRun, _skippedOps, getConfig, webChat, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
2019
|
+
const url = arg0;
|
|
2020
|
+
if (_dryRun) {
|
|
2021
|
+
_skippedOps.push({ category: 'net', symbol: '↓', desc: `GET ${url}` });
|
|
2022
|
+
logToolCall('http_get', { url }, false, 'dry-run');
|
|
2023
|
+
return { status: 'dry-run', message: 'dry-run: network call skipped' };
|
|
2024
|
+
}
|
|
2025
|
+
// Validate/normalize the URL BEFORE constructing any request. A malformed
|
|
2026
|
+
// URL (or a non-http(s) scheme, empty/whitespace, non-string) is a clean
|
|
2027
|
+
// tool error the agent can recover from — never an uncaught throw out of
|
|
2028
|
+
// the executor. Same shape as the request-error path below.
|
|
2029
|
+
const validatedUrl = _validateFetchUrl(url);
|
|
2030
|
+
if (validatedUrl.error) {
|
|
2031
|
+
_log(` ${FG_RED}✗ ${validatedUrl.error}${RST}`);
|
|
2032
|
+
logToolCall('http_get', { url }, true, 'error');
|
|
2033
|
+
return validatedUrl;
|
|
2034
|
+
}
|
|
2035
|
+
const normalizedUrl = validatedUrl.url;
|
|
2036
|
+
const httpCfg = getConfig ? getConfig() : {};
|
|
2037
|
+
const reqTimeoutMs = Math.max(15000, httpCfg.request_timeout_ms || 15000);
|
|
2038
|
+
// Byte cap is now ONLY a transfer/disk guard — the context-protection
|
|
2039
|
+
// mechanism is the post-extraction TOKEN budget (web.max_content_tokens).
|
|
2040
|
+
const maxBytes = Math.max(1024, httpCfg.http_fetch_max_bytes || 262144);
|
|
2041
|
+
const userAgent = _resolveUserAgent(httpCfg);
|
|
2042
|
+
const webCfg = (httpCfg.web && typeof httpCfg.web === 'object') ? httpCfg.web : {};
|
|
2043
|
+
const maxContentTokens = Number.isFinite(webCfg.max_content_tokens) && webCfg.max_content_tokens > 0
|
|
2044
|
+
? webCfg.max_content_tokens : 6000;
|
|
2045
|
+
const summaryModel = typeof webCfg.summary_model === 'string' && webCfg.summary_model.trim()
|
|
2046
|
+
? webCfg.summary_model.trim() : undefined;
|
|
2047
|
+
// Resolve the web-fetch mode (Task W.1b). Precedence: an explicit per-call
|
|
2048
|
+
// `mode` (the canonical enum the parser emits) beats the deprecated legacy
|
|
2049
|
+
// booleans (summarize/raw — which may still arrive directly on callOpts from
|
|
2050
|
+
// older callers), which beat the global config default (web.summarize mapped
|
|
2051
|
+
// to summarized/extracted). Summary needs an injected LLM call (webChat);
|
|
2052
|
+
// without one (headless/oneshot without an api client) the summarized branch
|
|
2053
|
+
// degrades to extracted Markdown, never the raw page.
|
|
2054
|
+
const mode = (callOpts && WEB_FETCH_MODES.includes(callOpts.mode) && callOpts.mode)
|
|
2055
|
+
|| _legacyBoolsToMode(
|
|
2056
|
+
typeof (callOpts && callOpts.summarize) === 'boolean' ? callOpts.summarize : undefined,
|
|
2057
|
+
typeof (callOpts && callOpts.raw) === 'boolean' ? callOpts.raw : undefined,
|
|
2058
|
+
)
|
|
2059
|
+
|| (webCfg.summarize !== false ? 'summarized' : 'extracted');
|
|
2060
|
+
const intent = callOpts && typeof callOpts.intent === 'string' ? callOpts.intent : '';
|
|
2061
|
+
const startedAt = Date.now();
|
|
2062
|
+
return new Promise((resolve) => {
|
|
2063
|
+
let abortedByUser = false;
|
|
2064
|
+
let onAbort = null;
|
|
2065
|
+
let activeReq = null;
|
|
2066
|
+
const detachAbort = () => {
|
|
2067
|
+
if (onAbort && signal) {
|
|
2068
|
+
try { signal.removeEventListener('abort', onAbort); } catch {}
|
|
2069
|
+
onAbort = null;
|
|
2070
|
+
}
|
|
2071
|
+
};
|
|
2072
|
+
const finishAborted = () => {
|
|
2073
|
+
logToolCall('http_get', { url }, true, 'aborted');
|
|
2074
|
+
resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
|
|
2075
|
+
};
|
|
2076
|
+
if (signal) {
|
|
2077
|
+
if (signal.aborted) {
|
|
2078
|
+
abortedByUser = true;
|
|
2079
|
+
finishAborted();
|
|
2080
|
+
return;
|
|
2081
|
+
}
|
|
2082
|
+
onAbort = () => {
|
|
2083
|
+
abortedByUser = true;
|
|
2084
|
+
try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
|
|
2085
|
+
};
|
|
2086
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
2087
|
+
}
|
|
2088
|
+
|
|
2089
|
+
function doGet(target, redirectsLeft) {
|
|
2090
|
+
const proto = target.startsWith('https') ? https : http;
|
|
2091
|
+
let req;
|
|
2092
|
+
try {
|
|
2093
|
+
req = proto.get(target, { headers: { 'User-Agent': userAgent } }, (res) => {
|
|
2094
|
+
if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
|
|
2095
|
+
res.resume();
|
|
2096
|
+
// A redirect Location may be relative or malformed — resolve it
|
|
2097
|
+
// against the current target and validate, so a bad redirect is a
|
|
2098
|
+
// clean tool error rather than a synchronous throw in this callback.
|
|
2099
|
+
const nextUrl = _validateFetchUrl(res.headers.location, target);
|
|
2100
|
+
if (nextUrl.error) {
|
|
2101
|
+
detachAbort();
|
|
2102
|
+
_log(` ${FG_RED}✗ ${nextUrl.error}${RST}`);
|
|
2103
|
+
logToolCall('http_get', { url: target }, true, 'error');
|
|
2104
|
+
return resolve(nextUrl);
|
|
2105
|
+
}
|
|
2106
|
+
return doGet(nextUrl.url, redirectsLeft - 1);
|
|
2107
|
+
}
|
|
2108
|
+
const bufs = [];
|
|
2109
|
+
let totalBytes = 0;
|
|
2110
|
+
let capped = false;
|
|
2111
|
+
res.on('data', (chunk) => {
|
|
2112
|
+
totalBytes += chunk.length;
|
|
2113
|
+
if (!capped) {
|
|
2114
|
+
if (totalBytes <= maxBytes) {
|
|
2115
|
+
bufs.push(chunk);
|
|
2116
|
+
} else {
|
|
2117
|
+
const keep = maxBytes - (totalBytes - chunk.length);
|
|
2118
|
+
if (keep > 0) bufs.push(chunk.slice(0, keep));
|
|
2119
|
+
capped = true;
|
|
2120
|
+
}
|
|
2121
|
+
}
|
|
2122
|
+
});
|
|
2123
|
+
res.on('end', () => {
|
|
2124
|
+
if (abortedByUser) return;
|
|
2125
|
+
detachAbort();
|
|
2126
|
+
const kept = Buffer.concat(bufs);
|
|
2127
|
+
const keptBytes = kept.length;
|
|
2128
|
+
const rawBody = kept.toString('utf8');
|
|
2129
|
+
const contentType = res.headers && res.headers['content-type'];
|
|
2130
|
+
const statusCode = res.statusCode;
|
|
2131
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}HTTP GET ${target} (${statusCode}, ${totalBytes} bytes${capped ? `, transfer-capped to ${keptBytes}` : ''})${RST}`);
|
|
2132
|
+
logToolCall('http_get', { url: target }, true, statusCode < 400 ? 'ok' : 'error');
|
|
2133
|
+
// Stage 1+2+3: extract main content → Markdown → (optional) summary.
|
|
2134
|
+
// The RAW page never enters the main context — only the processed
|
|
2135
|
+
// result does. Fully contained: any pipeline error degrades to the
|
|
2136
|
+
// capped extracted Markdown (and as a last resort the crude-stripped
|
|
2137
|
+
// text), NEVER the raw HTML.
|
|
2138
|
+
(async () => {
|
|
2139
|
+
let result;
|
|
2140
|
+
try {
|
|
2141
|
+
result = await processWebContent({
|
|
2142
|
+
rawBody, contentType, url: target, statusCode,
|
|
2143
|
+
totalBytes, transferCapped: capped,
|
|
2144
|
+
mode, intent, summaryModel, maxContentTokens,
|
|
2145
|
+
webChat, signal,
|
|
2146
|
+
});
|
|
2147
|
+
} catch (err) {
|
|
2148
|
+
// Defensive: extraction itself should not throw, but if it does,
|
|
2149
|
+
// fall back to a crude tag-strip rather than dumping raw HTML.
|
|
2150
|
+
const { stripTagsCrude } = require('./web-extract');
|
|
2151
|
+
const safe = capToTokens(stripTagsCrude(rawBody), maxContentTokens, defaultEstimate);
|
|
2152
|
+
result = { status_code: statusCode, body: safe.text, bytes: totalBytes,
|
|
2153
|
+
kind: 'text', extracted: false, summarized: false, processing_error: err.message };
|
|
2154
|
+
}
|
|
2155
|
+
resolve(result);
|
|
2156
|
+
})();
|
|
2157
|
+
});
|
|
2158
|
+
});
|
|
2159
|
+
} catch (err) {
|
|
2160
|
+
// Defense-in-depth: the URL is validated before we get here, but any
|
|
2161
|
+
// synchronous throw from proto.get must still become a tool error,
|
|
2162
|
+
// never escape the executor.
|
|
2163
|
+
detachAbort();
|
|
2164
|
+
_log(` ${FG_RED}✗ ${err.message}${RST}`);
|
|
2165
|
+
logToolCall('http_get', { url: target }, true, 'error');
|
|
2166
|
+
resolve({ error: `Invalid URL: ${err.message}`, error_code: err.code || 'ERR_INVALID_URL' });
|
|
2167
|
+
return;
|
|
2168
|
+
}
|
|
2169
|
+
activeReq = req;
|
|
2170
|
+
req.on('error', (err) => {
|
|
2171
|
+
if (abortedByUser) {
|
|
2172
|
+
detachAbort();
|
|
2173
|
+
finishAborted();
|
|
2174
|
+
return;
|
|
2175
|
+
}
|
|
2176
|
+
detachAbort();
|
|
2177
|
+
_log(` ${FG_RED}✗ ${err.message}${RST}`);
|
|
2178
|
+
logToolCall('http_get', { url: target }, true, 'error');
|
|
2179
|
+
resolve({ error: err.message, error_code: err.code });
|
|
2180
|
+
});
|
|
2181
|
+
req.setTimeout(reqTimeoutMs, () => {
|
|
2182
|
+
req.destroy();
|
|
2183
|
+
detachAbort();
|
|
2184
|
+
logToolCall('http_get', { url: target }, true, 'error');
|
|
2185
|
+
resolve({ error: 'Request timeout', error_code: 'ETIMEDOUT' });
|
|
2186
|
+
});
|
|
2187
|
+
}
|
|
2188
|
+
doGet(normalizedUrl, 5);
|
|
2189
|
+
});
|
|
2190
|
+
},
|
|
2191
|
+
},
|
|
2192
|
+
{
|
|
2193
|
+
// Web search (Task W.2b). Calls the backend POST /api/search via the
|
|
2194
|
+
// injected ctx.webSearch (api client's dashboardSearch → SearXNG) and
|
|
2195
|
+
// returns a COMPACT { title, url, snippet } list — never page content
|
|
2196
|
+
// (that is http_get's job). The spec steers the model to read the snippets,
|
|
2197
|
+
// pick the relevant result(s), and fetch only those with http_get, instead
|
|
2198
|
+
// of blindly multi-fetching. The backend is on another machine and may be
|
|
2199
|
+
// down/unreachable/erroring — every failure mode is caught and surfaced as a
|
|
2200
|
+
// clean tool error; NOTHING throws out of the executor (the http_get-fix
|
|
2201
|
+
// lesson). Results are untrusted external content, fenced in lib/agent.js.
|
|
2202
|
+
tool: 'web_search',
|
|
2203
|
+
specNames: ['web_search'],
|
|
2204
|
+
tags: ['web_search'],
|
|
2205
|
+
parseXml: (text) => {
|
|
2206
|
+
const out = [];
|
|
2207
|
+
for (const m of text.matchAll(/<web_search\b([^>]*?)(?:><\/web_search>|\/>)/g)) {
|
|
2208
|
+
const attrStr = m[1];
|
|
2209
|
+
const qMatch = attrStr.match(/query="([^"]*)"/) || attrStr.match(/query='([^']*)'/);
|
|
2210
|
+
if (qMatch) out.push(['web_search', qMatch[1], _webSearchOpts(attrStr)]);
|
|
2211
|
+
}
|
|
2212
|
+
for (const m of text.matchAll(/<web_search>([\s\S]*?)<\/web_search>/g)) {
|
|
2213
|
+
const inner = m[1].trim();
|
|
2214
|
+
if (!inner) continue;
|
|
2215
|
+
const qAttr = inner.match(/query="([^"]*)"/) || inner.match(/query='([^']*)'/);
|
|
2216
|
+
out.push(['web_search', qAttr ? qAttr[1] : inner, _webSearchOpts(inner)]);
|
|
2217
|
+
}
|
|
2218
|
+
return out;
|
|
2219
|
+
},
|
|
2220
|
+
fromParams: (p) => (p.query ? ['web_search', String(p.query), _webSearchOptsFromParams(p)] : null),
|
|
2221
|
+
// A network read like http_get — same descriptor shape (net, gated; not a
|
|
2222
|
+
// privileged path). Performs no mutation.
|
|
2223
|
+
permission: (ctx, args) => ({ actionType: 'net', description: `Web search: ${args[0]}`, tag: 'web_search' }),
|
|
2224
|
+
execute: async (ctx, args, options) => {
|
|
2225
|
+
const signal = (options && options.signal) || null;
|
|
2226
|
+
const [arg0 = '', callOpts = {}] = args;
|
|
2227
|
+
const { _log, logToolCall, _dryRun, _skippedOps, webSearch, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
2228
|
+
const query = typeof arg0 === 'string' ? arg0.trim() : '';
|
|
2229
|
+
if (!query) {
|
|
2230
|
+
logToolCall('web_search', { query: arg0 }, true, 'error');
|
|
2231
|
+
return { error: 'web search unavailable: empty query' };
|
|
2232
|
+
}
|
|
2233
|
+
if (_dryRun) {
|
|
2234
|
+
_skippedOps.push({ category: 'net', symbol: '⌕', desc: `search ${query}` });
|
|
2235
|
+
logToolCall('web_search', { query }, false, 'dry-run');
|
|
2236
|
+
return { status: 'dry-run', message: 'dry-run: web search skipped' };
|
|
2237
|
+
}
|
|
2238
|
+
if (typeof webSearch !== 'function') {
|
|
2239
|
+
logToolCall('web_search', { query }, true, 'error');
|
|
2240
|
+
return { error: 'web search unavailable: no backend client configured (available in interactive chat / the SDK with dashboard auth)' };
|
|
2241
|
+
}
|
|
2242
|
+
// Bound count BEFORE the backend call; the backend clamps further but a
|
|
2243
|
+
// huge value should never leave the client. An invalid/zero count is
|
|
2244
|
+
// dropped so the backend default applies.
|
|
2245
|
+
const count = _clampSearchCount(callOpts && callOpts.count);
|
|
2246
|
+
const limit = count || 10;
|
|
2247
|
+
try {
|
|
2248
|
+
const resp = await webSearch(query, count ? { count, signal } : { signal });
|
|
2249
|
+
const raw = resp && Array.isArray(resp.results) ? resp.results : [];
|
|
2250
|
+
const results = raw.slice(0, limit).map((r) => ({
|
|
2251
|
+
title: r && typeof r.title === 'string' ? r.title : '',
|
|
2252
|
+
url: r && typeof r.url === 'string' ? r.url : '',
|
|
2253
|
+
snippet: r && typeof r.snippet === 'string' ? r.snippet : '',
|
|
2254
|
+
}));
|
|
2255
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}web search "${query}" (${results.length} result${results.length === 1 ? '' : 's'})${RST}`);
|
|
2256
|
+
logToolCall('web_search', { query }, true, 'ok');
|
|
2257
|
+
return { query, count: results.length, results };
|
|
2258
|
+
} catch (err) {
|
|
2259
|
+
const reason = (err && err.message) ? err.message : String(err || 'unknown error');
|
|
2260
|
+
_log(` ${FG_RED}✗ web search unavailable: ${reason}${RST}`);
|
|
2261
|
+
logToolCall('web_search', { query }, true, 'error');
|
|
2262
|
+
return { error: `web search unavailable: ${reason}` };
|
|
2263
|
+
}
|
|
2264
|
+
},
|
|
2265
|
+
},
|
|
2266
|
+
{
|
|
2267
|
+
tool: 'ask_user',
|
|
2268
|
+
specNames: ['ask_user'],
|
|
2269
|
+
tags: ['ask_user'],
|
|
2270
|
+
parseXml: (text) => _matchDual(text, '<ask_user\\s+question=Q([^Q]+)Q\\s*(?:><\\/ask_user>|\\/>)').map((m) => ['ask_user', m[1]]),
|
|
2271
|
+
fromParams: (p) => (p.question ? ['ask_user', p.question] : null),
|
|
2272
|
+
permission: (ctx, args) => ({ actionType: 'user', description: `Ask user: ${args[0]}`, tag: 'ask_user' }),
|
|
2273
|
+
execute: async (ctx, args) => {
|
|
2274
|
+
const [arg0 = null] = args;
|
|
2275
|
+
const { _log, logToolCall, _parseNumberedOptions, permissionManager, writer, FG_YELLOW, FG_GRAY, RST, DIM } = ctx;
|
|
2276
|
+
const question = arg0;
|
|
2277
|
+
const options = _parseNumberedOptions(question);
|
|
2278
|
+
if (options.length >= 2) {
|
|
2279
|
+
const selected = await permissionManager.captureSelect({ options });
|
|
2280
|
+
logToolCall('ask_user', { question }, true, 'ok');
|
|
2281
|
+
return { question, answer: selected || options[0] };
|
|
2282
|
+
}
|
|
2283
|
+
if (!process.stdout.isTTY || process.stdin.isRaw) {
|
|
2284
|
+
writer.scrollback(`\n ${FG_YELLOW}?${RST} ${question}\n ${DIM}[auto-answering 'y']${RST}`);
|
|
2285
|
+
logToolCall('ask_user', { question }, true, 'ok');
|
|
2286
|
+
return { question, answer: 'y' };
|
|
2287
|
+
}
|
|
2288
|
+
process.stdout.write(`\n ${FG_YELLOW}?${RST} ${question}\n ${FG_GRAY}>${RST} `);
|
|
2289
|
+
const buf = Buffer.alloc(4096);
|
|
2290
|
+
let input = '';
|
|
2291
|
+
while (true) {
|
|
2292
|
+
const n = fs.readSync(0, buf, 0, 1);
|
|
2293
|
+
if (n === 0) break;
|
|
2294
|
+
const ch = buf[0];
|
|
2295
|
+
if (ch === 0x0a) break;
|
|
2296
|
+
if (ch === 0x0d) continue;
|
|
2297
|
+
input += String.fromCharCode(ch);
|
|
2298
|
+
}
|
|
2299
|
+
_log();
|
|
2300
|
+
logToolCall('ask_user', { question }, true, 'ok');
|
|
2301
|
+
return { question, answer: input };
|
|
2302
|
+
},
|
|
2303
|
+
},
|
|
2304
|
+
{
|
|
2305
|
+
tool: 'store_memory',
|
|
2306
|
+
specNames: ['store_memory'],
|
|
2307
|
+
tags: ['store_memory'],
|
|
2308
|
+
// QUIRK: store_memory value is captured raw (not trimmed), like write.
|
|
2309
|
+
parseXml: (text) => _matchDual(text, '<store_memory\\s+key=Q([^Q]+)Q>([\\s\\S]*?)<\\/store_memory>').map((m) => ['store_memory', m[1], m[2]]),
|
|
2310
|
+
fromParams: (p) => (p.key ? ['store_memory', p.key, p.value != null ? p.value : ''] : null),
|
|
2311
|
+
permission: (ctx, args) => ({ actionType: 'memory', description: `Store memory: ${args[0]}`, tag: 'store_memory' }),
|
|
2312
|
+
execute: async (ctx, args) => {
|
|
2313
|
+
const [arg0 = null, arg1 = null] = args;
|
|
2314
|
+
const { _log, logToolCall, MEMORY_PATH, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
2315
|
+
const key = arg0;
|
|
2316
|
+
const value = arg1 || '';
|
|
2317
|
+
try {
|
|
2318
|
+
let mem = {};
|
|
2319
|
+
try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
|
|
2320
|
+
mem[key] = value;
|
|
2321
|
+
await fsp.mkdir(path.dirname(MEMORY_PATH), { recursive: true });
|
|
2322
|
+
await fsp.writeFile(MEMORY_PATH, JSON.stringify(mem, null, 2));
|
|
2323
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stored memory: ${key}${RST}`);
|
|
2324
|
+
logToolCall('store_memory', { key }, true, 'ok');
|
|
2325
|
+
return { status: 'ok', key };
|
|
2326
|
+
} catch (error) {
|
|
2327
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
2328
|
+
logToolCall('store_memory', { key }, true, 'error');
|
|
2329
|
+
return { error: error.message };
|
|
2330
|
+
}
|
|
2331
|
+
},
|
|
2332
|
+
},
|
|
2333
|
+
{
|
|
2334
|
+
tool: 'recall_memory',
|
|
2335
|
+
specNames: ['recall_memory'],
|
|
2336
|
+
tags: ['recall_memory'],
|
|
2337
|
+
parseXml: (text) => _matchDual(text, '<recall_memory\\s+key=Q([^Q]+)Q\\s*(?:><\\/recall_memory>|\\/>)').map((m) => ['recall_memory', m[1]]),
|
|
2338
|
+
fromParams: (p) => (p.key ? ['recall_memory', p.key] : null),
|
|
2339
|
+
permission: () => null,
|
|
2340
|
+
execute: async (ctx, args) => {
|
|
2341
|
+
const [arg0 = null] = args;
|
|
2342
|
+
const { _log, logToolCall, MEMORY_PATH, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
2343
|
+
const key = arg0;
|
|
2344
|
+
try {
|
|
2345
|
+
let mem = {};
|
|
2346
|
+
try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
|
|
2347
|
+
const found = key in mem;
|
|
2348
|
+
const value = found ? mem[key] : null;
|
|
2349
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Recalled memory: ${key}${RST}`);
|
|
2350
|
+
logToolCall('recall_memory', { key }, true, 'ok');
|
|
2351
|
+
return { key, value, found };
|
|
2352
|
+
} catch (error) {
|
|
2353
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
2354
|
+
logToolCall('recall_memory', { key }, true, 'error');
|
|
2355
|
+
return { error: error.message };
|
|
2356
|
+
}
|
|
2357
|
+
},
|
|
2358
|
+
},
|
|
2359
|
+
{
|
|
2360
|
+
tool: 'list_memories',
|
|
2361
|
+
specNames: ['list_memories'],
|
|
2362
|
+
tags: ['list_memories'],
|
|
2363
|
+
parseXml: (text) => [...text.matchAll(/<list_memories\s*(?:><\/list_memories>|\/>)/g)].map(() => ['list_memories']),
|
|
2364
|
+
fromParams: () => ['list_memories'],
|
|
2365
|
+
permission: () => null,
|
|
2366
|
+
execute: async (ctx) => {
|
|
2367
|
+
const { _log, logToolCall, MEMORY_PATH, FG_GREEN, FG_GRAY, FG_RED, RST } = ctx;
|
|
2368
|
+
try {
|
|
2369
|
+
let mem = {};
|
|
2370
|
+
try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
|
|
2371
|
+
const keys = Object.keys(mem);
|
|
2372
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${keys.length} memory key(s)${RST}`);
|
|
2373
|
+
logToolCall('list_memories', {}, true, 'ok');
|
|
2374
|
+
return { keys };
|
|
2375
|
+
} catch (error) {
|
|
2376
|
+
_log(` ${FG_RED}✗ ${error.message}${RST}`);
|
|
2377
|
+
logToolCall('list_memories', {}, true, 'error');
|
|
2378
|
+
return { error: error.message };
|
|
2379
|
+
}
|
|
2380
|
+
},
|
|
2381
|
+
},
|
|
2382
|
+
{
|
|
2383
|
+
tool: 'get_env',
|
|
2384
|
+
specNames: ['get_env'],
|
|
2385
|
+
tags: ['get_env'],
|
|
2386
|
+
parseXml: (text) => _inline(text, 'get_env', 'get_env'),
|
|
2387
|
+
fromParams: (p) => (p.name ? ['get_env', p.name] : null),
|
|
2388
|
+
permission: () => null,
|
|
2389
|
+
execute: async (ctx, args) => {
|
|
2390
|
+
const [arg0 = null] = args;
|
|
2391
|
+
const { _log, logToolCall, FG_GREEN, FG_GRAY, RST } = ctx;
|
|
2392
|
+
const varName = arg0;
|
|
2393
|
+
const value = process.env[varName];
|
|
2394
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Got env ${varName}${RST}`);
|
|
2395
|
+
logToolCall('get_env', { name: varName }, true, 'ok');
|
|
2396
|
+
return { name: varName, value: value !== undefined ? value : null };
|
|
2397
|
+
},
|
|
2398
|
+
},
|
|
2399
|
+
{
|
|
2400
|
+
tool: 'set_env',
|
|
2401
|
+
specNames: ['set_env'],
|
|
2402
|
+
tags: ['set_env'],
|
|
2403
|
+
parseXml: (text) => _matchDual(text, '<set_env\\s+name=Q([^Q]+)Q\\s+value=Q([^Q]*)Q\\s*(?:><\\/set_env>|\\/>)').map((m) => ['set_env', m[1], m[2]]),
|
|
2404
|
+
fromParams: (p) => (p.name ? ['set_env', p.name, p.value != null ? p.value : ''] : null),
|
|
2405
|
+
permission: (ctx, args) => ({ actionType: 'env', description: `Set env ${args[0]}=${args[1] || ''}`, tag: 'set_env' }),
|
|
2406
|
+
execute: async (ctx, args) => {
|
|
2407
|
+
const [arg0 = null, arg1 = null] = args;
|
|
2408
|
+
const { _log, logToolCall, FG_GREEN, FG_GRAY, RST } = ctx;
|
|
2409
|
+
const varName = arg0;
|
|
2410
|
+
const value = arg1 || '';
|
|
2411
|
+
process.env[varName] = value;
|
|
2412
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Set env ${varName}${RST}`);
|
|
2413
|
+
logToolCall('set_env', { name: varName }, true, 'ok');
|
|
2414
|
+
return { status: 'ok', name: varName };
|
|
2415
|
+
},
|
|
2416
|
+
},
|
|
2417
|
+
{
|
|
2418
|
+
tool: 'system_info',
|
|
2419
|
+
specNames: ['system_info'],
|
|
2420
|
+
tags: ['system_info'],
|
|
2421
|
+
parseXml: (text) => [...text.matchAll(/<system_info\s*(?:><\/system_info>|\/>)/g)].map(() => ['system_info']),
|
|
2422
|
+
fromParams: () => ['system_info'],
|
|
2423
|
+
permission: () => null,
|
|
2424
|
+
execute: async (ctx) => {
|
|
2425
|
+
const { _log, logToolCall, FG_GREEN, FG_GRAY, RST } = ctx;
|
|
2426
|
+
const info = {
|
|
2427
|
+
platform: os.platform(),
|
|
2428
|
+
arch: os.arch(),
|
|
2429
|
+
hostname: os.hostname(),
|
|
2430
|
+
user: process.env.USER || process.env.USERNAME || '',
|
|
2431
|
+
total_mem_mb: Math.round(os.totalmem() / 1024 / 1024),
|
|
2432
|
+
free_mem_mb: Math.round(os.freemem() / 1024 / 1024),
|
|
2433
|
+
node_version: process.version,
|
|
2434
|
+
cwd: process.cwd(),
|
|
2435
|
+
};
|
|
2436
|
+
_log(` ${FG_GREEN}✓${RST} ${FG_GRAY}System info: ${info.platform}/${info.arch}${RST}`);
|
|
2437
|
+
logToolCall('system_info', {}, true, 'ok');
|
|
2438
|
+
return info;
|
|
2439
|
+
},
|
|
2440
|
+
},
|
|
2441
|
+
...GIT_TOOL_REGISTRY,
|
|
2442
|
+
];
|
|
2443
|
+
|
|
2444
|
+
// name (TOOL_SPECS key / native function name) → registry entry.
|
|
2445
|
+
const _byName = new Map();
|
|
2446
|
+
// canonical action (tuple[0]) → registry entry, for executor / permission dispatch.
|
|
2447
|
+
const _byAction = new Map();
|
|
2448
|
+
for (const entry of TOOL_REGISTRY) {
|
|
2449
|
+
for (const n of entry.specNames) _byName.set(n.toLowerCase(), entry);
|
|
2450
|
+
_byAction.set(entry.tool, entry);
|
|
2451
|
+
}
|
|
2452
|
+
|
|
2453
|
+
// ── Dynamic (runtime-registered) tools — MCP, Task 3.3 ─────────────────────
|
|
2454
|
+
//
|
|
2455
|
+
// Tools discovered at runtime (MCP servers) are registered here, SEPARATE from
|
|
2456
|
+
// the static TOOL_REGISTRY array above. This separation is deliberate: the
|
|
2457
|
+
// load-time parity check in lib/constants.js validates only the static set
|
|
2458
|
+
// (TAG_REGISTRY ↔ TOOL_SPECS ↔ TOOL_REGISTRY), and it runs once at module load,
|
|
2459
|
+
// before any MCP server has connected. Keeping dynamic tools out of that array
|
|
2460
|
+
// means MCP tools never break the parity invariant.
|
|
2461
|
+
//
|
|
2462
|
+
// Dispatch (entryForAction) and native mapping (fromInvoke) consult this map
|
|
2463
|
+
// AFTER the static one, so a dynamic tool can never shadow a built-in. Each
|
|
2464
|
+
// entry has the same shape as a static one — { tool, fromParams, execute,
|
|
2465
|
+
// permission, parseXml?, spec? } — so it dispatches through the agent loop
|
|
2466
|
+
// identically. `spec` (an OpenAI-format { description, parameters }) is surfaced
|
|
2467
|
+
// to the native function-calling `tools` array via dynamicToolSpecs().
|
|
2468
|
+
const _dynamic = new Map(); // canonical name (== entry.tool) → entry
|
|
2469
|
+
|
|
2470
|
+
function _lookupDynamic(name) {
|
|
2471
|
+
if (name == null) return null;
|
|
2472
|
+
return _dynamic.get(name) || _dynamic.get(String(name).toLowerCase()) || null;
|
|
2473
|
+
}
|
|
2474
|
+
|
|
2475
|
+
function registerDynamicTool(entry) {
|
|
2476
|
+
if (!entry || typeof entry.tool !== 'string' || !entry.tool) {
|
|
2477
|
+
throw new Error('registerDynamicTool: entry.tool (canonical name) is required');
|
|
2478
|
+
}
|
|
2479
|
+
if (typeof entry.execute !== 'function') {
|
|
2480
|
+
throw new Error(`registerDynamicTool(${entry.tool}): execute() is required`);
|
|
2481
|
+
}
|
|
2482
|
+
if (typeof entry.fromParams !== 'function') {
|
|
2483
|
+
throw new Error(`registerDynamicTool(${entry.tool}): fromParams() is required`);
|
|
2484
|
+
}
|
|
2485
|
+
if (typeof entry.permission !== 'function') {
|
|
2486
|
+
throw new Error(`registerDynamicTool(${entry.tool}): permission() is required`);
|
|
2487
|
+
}
|
|
2488
|
+
_dynamic.set(entry.tool, entry);
|
|
2489
|
+
}
|
|
2490
|
+
|
|
2491
|
+
function unregisterDynamicTool(name) {
|
|
2492
|
+
return _dynamic.delete(name);
|
|
2493
|
+
}
|
|
2494
|
+
|
|
2495
|
+
function clearDynamicTools() {
|
|
2496
|
+
_dynamic.clear();
|
|
2497
|
+
}
|
|
2498
|
+
|
|
2499
|
+
function dynamicToolEntries() {
|
|
2500
|
+
return [..._dynamic.values()];
|
|
2501
|
+
}
|
|
2502
|
+
|
|
2503
|
+
// { name → { description, parameters } } for every dynamic tool that carries a
|
|
2504
|
+
// spec. Merged into the native function-calling tools array in lib/api.js.
|
|
2505
|
+
function dynamicToolSpecs() {
|
|
2506
|
+
const out = {};
|
|
2507
|
+
for (const e of _dynamic.values()) {
|
|
2508
|
+
if (e.spec) out[e.tool] = e.spec;
|
|
2509
|
+
}
|
|
2510
|
+
return out;
|
|
2511
|
+
}
|
|
2512
|
+
|
|
2513
|
+
function fromInvoke(toolName, params) {
|
|
2514
|
+
const entry = _byName.get((toolName || '').toLowerCase()) || _lookupDynamic(toolName);
|
|
2515
|
+
if (!entry) return null;
|
|
2516
|
+
return entry.fromParams(params || {});
|
|
2517
|
+
}
|
|
2518
|
+
|
|
2519
|
+
function entryForAction(action) {
|
|
2520
|
+
return _byAction.get(action) || _lookupDynamic(action) || null;
|
|
2521
|
+
}
|
|
2522
|
+
|
|
2523
|
+
// Static (load-time-parity-checked) tool names only — MUST NOT include dynamic
|
|
2524
|
+
// tools, or the lib/constants.js parity assertion would see phantom entries.
|
|
2525
|
+
function registryToolNames() {
|
|
2526
|
+
return [..._byName.keys()];
|
|
2527
|
+
}
|
|
2528
|
+
|
|
2529
|
+
module.exports = {
|
|
2530
|
+
TOOL_REGISTRY,
|
|
2531
|
+
fromInvoke,
|
|
2532
|
+
entryForAction,
|
|
2533
|
+
registryToolNames,
|
|
2534
|
+
// Dynamic (runtime) tool registry — MCP (Task 3.3).
|
|
2535
|
+
registerDynamicTool,
|
|
2536
|
+
unregisterDynamicTool,
|
|
2537
|
+
clearDynamicTools,
|
|
2538
|
+
dynamicToolEntries,
|
|
2539
|
+
dynamicToolSpecs,
|
|
2540
|
+
// Exported for the grep/glob characterization + parity tests (Task 2.1).
|
|
2541
|
+
// The execute() bodies above use these same functions; tests drive both
|
|
2542
|
+
// engines explicitly to prove rg- and Node-path outputs are identical.
|
|
2543
|
+
_grepSearch,
|
|
2544
|
+
_globSearch,
|
|
2545
|
+
_detectRipgrep,
|
|
2546
|
+
// grep output modes + bound normalizers (Task W.5).
|
|
2547
|
+
GREP_OUTPUT_MODES,
|
|
2548
|
+
_normGrepMode,
|
|
2549
|
+
_normHeadLimit,
|
|
2550
|
+
_normOffset,
|
|
2551
|
+
// Exported for the web-fetch mode-resolution tests (Task W.1b).
|
|
2552
|
+
_httpGetOpts,
|
|
2553
|
+
_httpGetOptsFromParams,
|
|
2554
|
+
processWebContent,
|
|
2555
|
+
WEB_FETCH_MODES,
|
|
2556
|
+
// Exported for the URL-validation tests (fetch-url-validation.test.js).
|
|
2557
|
+
_validateFetchUrl,
|
|
2558
|
+
};
|