@semalt-ai/code 1.8.4 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -1
- package/.github/workflows/ci.yml +69 -0
- package/CLAUDE.md +1588 -27
- package/README.md +147 -3
- package/TECHNICAL_DEBT.md +66 -0
- package/examples/embed.js +74 -0
- package/index.js +259 -11
- package/lib/agent.js +935 -181
- package/lib/api.js +308 -55
- package/lib/args.js +96 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +306 -0
- package/lib/commands/chat-slash.js +399 -0
- package/lib/commands/chat-turn.js +446 -0
- package/lib/commands/chat.js +403 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +346 -11
- package/lib/constants.js +372 -3
- package/lib/debug.js +106 -0
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +167 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +264 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +100 -10
- package/lib/pricing.js +67 -0
- package/lib/proc.js +158 -0
- package/lib/prompts.js +88 -8
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2558 -0
- package/lib/tool_specs.js +236 -9
- package/lib/tools.js +370 -944
- package/lib/ui/chat-history.js +19 -1
- package/lib/ui/format.js +101 -6
- package/lib/ui/input-field.js +16 -7
- package/lib/ui/status-bar.js +79 -11
- package/lib/ui/terminal.js +10 -4
- package/lib/ui/theme.js +1 -0
- package/lib/ui/web-activity.js +218 -0
- package/lib/ui/writer.js +7 -9
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/background.test.js +414 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/executors.test.js +362 -0
- package/test/extract-tool-calls.test.js +315 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +142 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +203 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/max-iterations.test.js +216 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +356 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +163 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/result-cap.test.js +233 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/stream-parser.test.js +147 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/web-activity-ordering.test.js +194 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1288
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Web-activity process summary (Task W.3, Part 1).
|
|
4
|
+
//
|
|
5
|
+
// A web task runs `web_search` (find candidate pages) then targeted `http_get`
|
|
6
|
+
// (read the relevant ones). By default each operation printed its own tool line
|
|
7
|
+
// (one "tool · web_search" / "net · GET …" row per call), which reads as a noisy
|
|
8
|
+
// list rather than one coherent process. This module collapses a run of
|
|
9
|
+
// consecutive web operations into a SINGLE compact process-summary line —
|
|
10
|
+
//
|
|
11
|
+
// ✓ web · search "коррупционные скандалы…" · 2 queries · 3 sources read · 1 blocked
|
|
12
|
+
//
|
|
13
|
+
// — while `--debug` keeps the full per-operation lines (in debug mode chat-turn.js
|
|
14
|
+
// bypasses this collapser and renders each op the normal way).
|
|
15
|
+
//
|
|
16
|
+
// Display only: the audit log still records every individual operation (that
|
|
17
|
+
// happens in the executors, untouched here), and NON-web tools render exactly as
|
|
18
|
+
// before. Scope: `web_search` + `http_get`. `download` is a file-save, not a page
|
|
19
|
+
// read for the search→fetch flow, so it keeps its own line.
|
|
20
|
+
|
|
21
|
+
const { UI_THEME, UI_ICONS } = require('./theme');
|
|
22
|
+
const { RST, DIM } = require('./ansi');
|
|
23
|
+
const { formatDuration } = require('./format');
|
|
24
|
+
|
|
25
|
+
// The tools collapsed into the web-activity summary.
|
|
26
|
+
const WEB_TOOLS = new Set(['web_search', 'http_get']);
|
|
27
|
+
|
|
28
|
+
function isWebTool(tag) { return WEB_TOOLS.has(tag); }
|
|
29
|
+
|
|
30
|
+
function _truncate(text, max) {
|
|
31
|
+
const s = String(text == null ? '' : text).replace(/\s+/g, ' ').trim();
|
|
32
|
+
if (s.length <= max) return s;
|
|
33
|
+
return s.slice(0, Math.max(0, max - 1)) + '…';
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Whether a finished web op counts as a success. A `web_search` is ok unless the
|
|
37
|
+
// executor flagged an error (backend down). An `http_get` is ok only when it both
|
|
38
|
+
// avoided a transport error (timeout/DNS — surfaced as `op.error`) AND the server
|
|
39
|
+
// answered < 400: a 403/406 is a real "blocked" even though the fetch itself
|
|
40
|
+
// completed and returned a status code.
|
|
41
|
+
function opSucceeded(op) {
|
|
42
|
+
if (!op) return false;
|
|
43
|
+
if (op.error) return false;
|
|
44
|
+
if (op.tag === 'http_get' && typeof op.status === 'number' && op.status >= 400) return false;
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Pure: fold the recorded op list into the counts the summary needs.
|
|
49
|
+
function aggregateWebOps(ops) {
|
|
50
|
+
const state = {
|
|
51
|
+
searchCount: 0, searchFailed: 0, queries: [],
|
|
52
|
+
fetchCount: 0, fetchOk: 0, fetchFailed: 0,
|
|
53
|
+
};
|
|
54
|
+
for (const op of (ops || [])) {
|
|
55
|
+
if (!op) continue;
|
|
56
|
+
const ok = opSucceeded(op);
|
|
57
|
+
if (op.tag === 'web_search') {
|
|
58
|
+
state.searchCount += 1;
|
|
59
|
+
if (!ok) state.searchFailed += 1;
|
|
60
|
+
if (op.query) state.queries.push(op.query);
|
|
61
|
+
} else if (op.tag === 'http_get') {
|
|
62
|
+
state.fetchCount += 1;
|
|
63
|
+
if (ok) state.fetchOk += 1; else state.fetchFailed += 1;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return state;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Pure: the plain-text segments of the summary (no ANSI). Each segment is tagged
|
|
70
|
+
// with a `kind` so the styled renderer can colour failures distinctly. Exposed
|
|
71
|
+
// for tests and reused by the styled renderer. Failures (a blocked source / a
|
|
72
|
+
// failed search) are ALWAYS represented so the compact view never silently drops
|
|
73
|
+
// a source that didn't load.
|
|
74
|
+
function webSummarySegments(state) {
|
|
75
|
+
const segs = [];
|
|
76
|
+
if (state.searchCount > 0) {
|
|
77
|
+
const q = state.queries[0] ? `"${_truncate(state.queries[0], 48)}"` : '';
|
|
78
|
+
segs.push({ kind: 'lead', text: `search ${q}`.trim() });
|
|
79
|
+
if (state.searchCount > 1) segs.push({ kind: 'count', text: `${state.searchCount} queries` });
|
|
80
|
+
if (state.searchFailed > 0) {
|
|
81
|
+
segs.push({ kind: 'fail', text: `${state.searchFailed} search${state.searchFailed === 1 ? '' : 'es'} failed` });
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
if (state.fetchCount > 0) {
|
|
85
|
+
segs.push({ kind: state.searchCount > 0 ? 'count' : 'lead', text: `${state.fetchOk} ${state.fetchOk === 1 ? 'source' : 'sources'} read` });
|
|
86
|
+
if (state.fetchFailed > 0) segs.push({ kind: 'fail', text: `${state.fetchFailed} blocked` });
|
|
87
|
+
}
|
|
88
|
+
if (segs.length === 0) segs.push({ kind: 'lead', text: 'web' });
|
|
89
|
+
return segs;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Plain-text one-liner (no ANSI). The text the tests assert on.
|
|
93
|
+
function webSummaryText(state) {
|
|
94
|
+
return webSummarySegments(state).map((s) => s.text).join(' · ');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Styled, chrome-consistent line for the writer's activity region / scrollback.
|
|
98
|
+
// Mirrors formatToolLine's "<glyph> <category> · <segments…>" layout so the
|
|
99
|
+
// summary reads as a peer of the other tool lines, not a foreign widget.
|
|
100
|
+
function formatWebSummaryLine(state, opts) {
|
|
101
|
+
const { pending = false, durationMs = 0 } = opts || {};
|
|
102
|
+
const glyph = pending ? UI_ICONS.pending : UI_ICONS.success;
|
|
103
|
+
const glyphColor = pending ? UI_THEME.muted : UI_THEME.success;
|
|
104
|
+
const cat = 'web'.padEnd(5);
|
|
105
|
+
const catColor = (UI_THEME.categories && UI_THEME.categories.web) || UI_THEME.accent;
|
|
106
|
+
const sep = ` ${DIM}·${RST} `;
|
|
107
|
+
|
|
108
|
+
const out = [` ${glyphColor}${glyph}${RST} ${catColor}${cat}${RST}`];
|
|
109
|
+
for (const seg of webSummarySegments(state)) {
|
|
110
|
+
let color = UI_THEME.subtle;
|
|
111
|
+
if (seg.kind === 'lead') color = UI_THEME.default;
|
|
112
|
+
else if (seg.kind === 'fail') color = UI_THEME.warning;
|
|
113
|
+
out.push(`${color}${seg.text}${RST}`);
|
|
114
|
+
}
|
|
115
|
+
if (pending) out.push(`${UI_THEME.muted}${formatDuration(durationMs)}…${RST}`);
|
|
116
|
+
return out.join(sep);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Batch renderer / policy seam (used by tests, and documents the runtime split):
|
|
120
|
+
// debug → one full per-operation tool line per op (nothing hidden), built
|
|
121
|
+
// with the SAME formatToolLine the runtime uses.
|
|
122
|
+
// default → a single collapsed summary line.
|
|
123
|
+
function renderWebActivity(ops, opts) {
|
|
124
|
+
const { debug = false, formatToolLine } = opts || {};
|
|
125
|
+
if (debug) {
|
|
126
|
+
return (ops || []).map((op) => formatToolLine({
|
|
127
|
+
status: opSucceeded(op) ? 'success' : 'failure',
|
|
128
|
+
tag: op.tag,
|
|
129
|
+
arg: op.query || op.url || '',
|
|
130
|
+
attrs: op.tag === 'web_search' ? { query: op.query } : { url: op.url },
|
|
131
|
+
durationMs: op.durationMs,
|
|
132
|
+
meta: op.tag === 'http_get' ? { status_code: op.status, bytes: op.bytes } : null,
|
|
133
|
+
error: op.error ? { message: String(op.error) } : null,
|
|
134
|
+
}));
|
|
135
|
+
}
|
|
136
|
+
return [formatWebSummaryLine(aggregateWebOps(ops), { pending: false })];
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Stateful runtime collapser. Owns one writer "activity" entry per group of
|
|
140
|
+
// consecutive web ops, updating it in place as ops complete and committing a
|
|
141
|
+
// single final summary line to scrollback on flush(). Tools run sequentially in
|
|
142
|
+
// the agent loop, so at most one group is ever open and there is no concurrency.
|
|
143
|
+
function createWebActivityTracker(deps) {
|
|
144
|
+
const { writerModule } = deps || {};
|
|
145
|
+
let groupId = null;
|
|
146
|
+
let seq = 0;
|
|
147
|
+
let ended = [];
|
|
148
|
+
let current = null; // the in-flight op, shown in the pending line before it ends
|
|
149
|
+
|
|
150
|
+
function _render(durationMs) {
|
|
151
|
+
const state = aggregateWebOps(current ? ended.concat([current]) : ended);
|
|
152
|
+
return formatWebSummaryLine(state, { pending: true, durationMs });
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function _refresh() {
|
|
156
|
+
if (groupId === null) return;
|
|
157
|
+
writerModule.updateActivity(groupId, (elapsedMs) => _render(elapsedMs));
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
isWeb: isWebTool,
|
|
162
|
+
isOpen() { return groupId !== null; },
|
|
163
|
+
|
|
164
|
+
start(tag, input) {
|
|
165
|
+
current = {
|
|
166
|
+
tag,
|
|
167
|
+
query: tag === 'web_search' ? input : undefined,
|
|
168
|
+
url: tag === 'http_get' ? input : undefined,
|
|
169
|
+
};
|
|
170
|
+
if (groupId === null) {
|
|
171
|
+
groupId = `web-${seq++}`;
|
|
172
|
+
writerModule.startActivity(groupId, (elapsedMs) => _render(elapsedMs));
|
|
173
|
+
} else {
|
|
174
|
+
_refresh();
|
|
175
|
+
}
|
|
176
|
+
},
|
|
177
|
+
|
|
178
|
+
end(tag, result, durationMs, toolCtx) {
|
|
179
|
+
const meta = toolCtx && toolCtx.meta;
|
|
180
|
+
const attrs = toolCtx && toolCtx.attrs;
|
|
181
|
+
ended.push({
|
|
182
|
+
tag,
|
|
183
|
+
durationMs,
|
|
184
|
+
query: (attrs && attrs.query) || (current && current.query),
|
|
185
|
+
url: (attrs && attrs.url) || (current && current.url),
|
|
186
|
+
status: meta && typeof meta.status_code === 'number' ? meta.status_code : undefined,
|
|
187
|
+
bytes: meta && typeof meta.bytes === 'number' ? meta.bytes : undefined,
|
|
188
|
+
error: toolCtx && toolCtx.error ? (toolCtx.error.message || String(toolCtx.error)) : undefined,
|
|
189
|
+
});
|
|
190
|
+
current = null;
|
|
191
|
+
_refresh();
|
|
192
|
+
},
|
|
193
|
+
|
|
194
|
+
// Commit the collapsed summary for the current group to scrollback and reset.
|
|
195
|
+
// A no-op when no group is open.
|
|
196
|
+
flush() {
|
|
197
|
+
if (groupId === null) return;
|
|
198
|
+
const id = groupId;
|
|
199
|
+
const line = formatWebSummaryLine(aggregateWebOps(ended), { pending: false });
|
|
200
|
+
groupId = null;
|
|
201
|
+
ended = [];
|
|
202
|
+
current = null;
|
|
203
|
+
writerModule.endActivity(id, line);
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
module.exports = {
|
|
209
|
+
WEB_TOOLS,
|
|
210
|
+
isWebTool,
|
|
211
|
+
opSucceeded,
|
|
212
|
+
aggregateWebOps,
|
|
213
|
+
webSummarySegments,
|
|
214
|
+
webSummaryText,
|
|
215
|
+
formatWebSummaryLine,
|
|
216
|
+
renderWebActivity,
|
|
217
|
+
createWebActivityTracker,
|
|
218
|
+
};
|
package/lib/ui/writer.js
CHANGED
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
// can't interleave with another task's writes.
|
|
30
30
|
|
|
31
31
|
const { stripAnsi, termWidth, truncateVisible } = require('./utils');
|
|
32
|
+
const dbg = require('../debug');
|
|
32
33
|
|
|
33
34
|
let _queue = Promise.resolve();
|
|
34
35
|
let _liveLines = [];
|
|
@@ -228,16 +229,13 @@ function setLive(lines, caret) {
|
|
|
228
229
|
// The input row's visible position is relative to the bottom of the
|
|
229
230
|
// viewport; it stays still only while the live region keeps a stable
|
|
230
231
|
// height. If a code path sneaks in a different-height setLive call the
|
|
231
|
-
// input jumps one row. Log
|
|
232
|
-
// without leaking into the TUI itself.
|
|
233
|
-
if (
|
|
234
|
-
_previousLiveLineCount !== undefined &&
|
|
232
|
+
// input jumps one row. Log to the debug file (extended trace) so future
|
|
233
|
+
// drift is visible without leaking into the TUI itself.
|
|
234
|
+
if (_previousLiveLineCount !== undefined &&
|
|
235
235
|
_previousLiveLineCount !== _liveLines.length) {
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
);
|
|
240
|
-
} catch {}
|
|
236
|
+
dbg.logExtended(
|
|
237
|
+
`[writer] live region height changed: ${_previousLiveLineCount} → ${_liveLines.length}`
|
|
238
|
+
);
|
|
241
239
|
}
|
|
242
240
|
_previousLiveLineCount = _liveLines.length;
|
|
243
241
|
_writeSync(_HIDE + eraseSeq + _drawLiveSeq() + _positionCaretSeq() + _caretShowSeq());
|
package/lib/verify.js
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Self-verification (Task 4.2)
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
//
|
|
7
|
+
// When the agent declares a task done, optionally run a configured verification
|
|
8
|
+
// command (e.g. `npm test`, `cargo check`) and feed the result back into the
|
|
9
|
+
// loop. Configured under `config.verify`:
|
|
10
|
+
//
|
|
11
|
+
// "verify": {
|
|
12
|
+
// "mode": "advisory" | "enforcing", // default advisory
|
|
13
|
+
// "command": "npm test", // empty → feature is a no-op
|
|
14
|
+
// "timeout_ms": 120000,
|
|
15
|
+
// "expected_exit_code": 0,
|
|
16
|
+
// "max_attempts": 3
|
|
17
|
+
// }
|
|
18
|
+
//
|
|
19
|
+
// Two modes (orchestration lives in lib/agent.js — this module only RUNS the
|
|
20
|
+
// command and reports the outcome):
|
|
21
|
+
// * advisory (default): run once when the agent finishes; feed the result into
|
|
22
|
+
// context as information. The turn ends regardless of pass/fail — advisory
|
|
23
|
+
// NEVER blocks.
|
|
24
|
+
// * enforcing: verify must pass before "done" is accepted. A failing verify
|
|
25
|
+
// returns the agent to the loop with the fenced result; after `max_attempts`
|
|
26
|
+
// failures the loop terminates with stopReason `verify_failed` — a precise
|
|
27
|
+
// bound distinct from (and far below) the coarse iteration cap.
|
|
28
|
+
//
|
|
29
|
+
// Load-bearing properties (mirror lib/hooks.js — verify is shell, treat it like
|
|
30
|
+
// a hook):
|
|
31
|
+
// * Success is EXIT-CODE based — exit == expected_exit_code is a pass. stdout
|
|
32
|
+
// is never parsed for success patterns (avoids brittleness).
|
|
33
|
+
// * Deny-list FIRST — the verify command passes through the Phase 0 deny-list
|
|
34
|
+
// (lib/deny.js) before running; a hit is refused (never run) and reported as
|
|
35
|
+
// a non-passing verify.
|
|
36
|
+
// * OS sandbox — after the deny-list, the verify command is wrapped by the SAME
|
|
37
|
+
// OS sandbox as every other shell call (Pre-Task 5.0a, resolveSandboxedSpawn),
|
|
38
|
+
// with the identical fail-safe fallback (failIfUnavailable hard error / human
|
|
39
|
+
// approval / refuse). A refusal is reported as a non-passing verify — never a
|
|
40
|
+
// silent unsandboxed run.
|
|
41
|
+
// * Project-layer (.semalt/config.json) verify.command is QUARANTINED before it
|
|
42
|
+
// reaches the runner (loadVerifyLayers, consumed by lib/config.js): a cloned
|
|
43
|
+
// repo cannot introduce an executable verify command. User verify is trusted.
|
|
44
|
+
// * Timeout — a hung verify must not hang the agent. On timeout the command is
|
|
45
|
+
// killed and the result is a (non-passing) verify, never an exception.
|
|
46
|
+
// * Untrusted output — the command output (a failing test name could carry an
|
|
47
|
+
// injection) is fenced in the same <<<UNTRUSTED_EXTERNAL_CONTENT>>> delimiter
|
|
48
|
+
// as hook/MCP/http_get output before it ever enters the model's context.
|
|
49
|
+
// * Contained — a spawn failure is reported as a non-passing verify, never a
|
|
50
|
+
// crash.
|
|
51
|
+
|
|
52
|
+
const { spawnSync } = require('child_process');
|
|
53
|
+
const { checkShellDenylist } = require('./deny');
|
|
54
|
+
const { wrapUntrusted } = require('./hooks');
|
|
55
|
+
const { resolveSandboxedSpawn } = require('./sandbox');
|
|
56
|
+
const { DEFAULT_VERIFY_TIMEOUT_MS, DEFAULT_VERIFY_MAX_ATTEMPTS } = require('./constants');
|
|
57
|
+
|
|
58
|
+
const VERIFY_MODES = ['advisory', 'enforcing'];
|
|
59
|
+
const MAX_VERIFY_OUTPUT_BYTES = 1024 * 1024;
|
|
60
|
+
|
|
61
|
+
// Validate + canonicalize the `config.verify` section. Pure; consumed by
|
|
62
|
+
// lib/config.js normalizeConfig. Unknown/invalid fields fall back to defaults so
|
|
63
|
+
// a malformed config can never produce an unbounded or mode-confused verify.
|
|
64
|
+
function normalizeVerify(raw) {
|
|
65
|
+
const out = {
|
|
66
|
+
mode: 'advisory',
|
|
67
|
+
command: '',
|
|
68
|
+
timeout_ms: DEFAULT_VERIFY_TIMEOUT_MS,
|
|
69
|
+
expected_exit_code: 0,
|
|
70
|
+
max_attempts: DEFAULT_VERIFY_MAX_ATTEMPTS,
|
|
71
|
+
};
|
|
72
|
+
if (!raw || typeof raw !== 'object' || Array.isArray(raw)) return out;
|
|
73
|
+
if (raw.mode === 'enforcing') out.mode = 'enforcing';
|
|
74
|
+
if (typeof raw.command === 'string' && raw.command.trim()) out.command = raw.command.trim();
|
|
75
|
+
if (Number.isInteger(raw.timeout_ms) && raw.timeout_ms > 0) out.timeout_ms = raw.timeout_ms;
|
|
76
|
+
if (Number.isInteger(raw.expected_exit_code) && raw.expected_exit_code >= 0) {
|
|
77
|
+
out.expected_exit_code = raw.expected_exit_code;
|
|
78
|
+
}
|
|
79
|
+
if (Number.isInteger(raw.max_attempts) && raw.max_attempts > 0) out.max_attempts = raw.max_attempts;
|
|
80
|
+
return out;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Build the verify runner. `getConfig` supplies the live config (read per-run so
|
|
84
|
+
// a config change takes effect immediately). `spawn` and `log` are injectable for
|
|
85
|
+
// tests. Returns:
|
|
86
|
+
// {
|
|
87
|
+
// config() → the normalized verify config (for the orchestrator),
|
|
88
|
+
// run(opts) → { skipped, ran, passed, mode, command, exitCode,
|
|
89
|
+
// expectedExitCode, timedOut, denied, maxAttempts,
|
|
90
|
+
// output, fenced }
|
|
91
|
+
// }
|
|
92
|
+
// run() NEVER throws for an ordinary failure — a nonzero exit, timeout, deny-list
|
|
93
|
+
// hit, or spawn failure are all reported as a non-passing result. `opts.noVerify`
|
|
94
|
+
// (the --no-verify flag) short-circuits to a skipped result, as does an empty
|
|
95
|
+
// command.
|
|
96
|
+
function createVerifyRunner({ getConfig, spawn = spawnSync, log, onUnsandboxed = null, sandbox } = {}) {
|
|
97
|
+
const warn = typeof log === 'function' ? log : () => {};
|
|
98
|
+
// OS-sandbox resolver shared with agentExecShell / hooks (Pre-Task 5.0a).
|
|
99
|
+
// Injectable for tests; otherwise resolveSandboxedSpawn reading the live config
|
|
100
|
+
// + the human-typed CLI flags. `onUnsandboxed` (human approval) is threaded from
|
|
101
|
+
// the executor owner so an unavailable sandbox can be approved interactively;
|
|
102
|
+
// with no approver it refuses (reported as a non-passing verify).
|
|
103
|
+
const sandboxResolve = typeof sandbox === 'function'
|
|
104
|
+
? sandbox
|
|
105
|
+
: (command) => resolveSandboxedSpawn({ command, getConfig, onUnsandboxed });
|
|
106
|
+
|
|
107
|
+
function config() {
|
|
108
|
+
let cfg = {};
|
|
109
|
+
try { cfg = (getConfig ? getConfig() : {}) || {}; } catch { cfg = {}; }
|
|
110
|
+
return normalizeVerify(cfg.verify);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
async function run({ noVerify = false } = {}) {
|
|
114
|
+
const v = config();
|
|
115
|
+
const base = {
|
|
116
|
+
skipped: false,
|
|
117
|
+
ran: false,
|
|
118
|
+
passed: false,
|
|
119
|
+
mode: v.mode,
|
|
120
|
+
command: v.command,
|
|
121
|
+
exitCode: null,
|
|
122
|
+
expectedExitCode: v.expected_exit_code,
|
|
123
|
+
timedOut: false,
|
|
124
|
+
denied: null,
|
|
125
|
+
maxAttempts: v.max_attempts,
|
|
126
|
+
output: '',
|
|
127
|
+
fenced: '',
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
// --no-verify (one-off skip) or no command configured → feature is a no-op.
|
|
131
|
+
if (noVerify || !v.command) return { ...base, skipped: true };
|
|
132
|
+
|
|
133
|
+
// Deny-list FIRST — verify is shell and must not be able to run a destructive
|
|
134
|
+
// command any more than the agent can. A hit is refused (never run) and
|
|
135
|
+
// reported as a non-passing verify (it cannot pass).
|
|
136
|
+
const denied = checkShellDenylist(v.command);
|
|
137
|
+
if (denied) {
|
|
138
|
+
warn(`Verify command blocked by deny-list (${denied.label}); not run: ${v.command}`);
|
|
139
|
+
const output = `Verify command was refused by the deny-list (${denied.label}) and did not run — treated as a failed verification.`;
|
|
140
|
+
return { ...base, ran: false, passed: false, denied: denied.label, output, fenced: wrapUntrusted(output, '[verify]') };
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// OS sandbox (Pre-Task 5.0a). After the deny-list, route the verify command
|
|
144
|
+
// through the SAME shared shim as agentExecShell so it runs jailed. A refusal
|
|
145
|
+
// (failIfUnavailable, or no/declined human approval) is reported as a
|
|
146
|
+
// non-passing verify — never a silent unsandboxed run.
|
|
147
|
+
let resolution;
|
|
148
|
+
try {
|
|
149
|
+
resolution = await sandboxResolve(v.command);
|
|
150
|
+
} catch (err) {
|
|
151
|
+
warn(`Verify command sandbox resolution failed: ${err.message}`);
|
|
152
|
+
const output = `Verify command sandbox resolution failed: ${err.message} — treated as a failed verification.`;
|
|
153
|
+
return { ...base, ran: false, passed: false, output, fenced: wrapUntrusted(output, '[verify]') };
|
|
154
|
+
}
|
|
155
|
+
if (!resolution.run) {
|
|
156
|
+
warn(`Verify command not run — ${resolution.message}`);
|
|
157
|
+
return { ...base, ran: false, passed: false, output: resolution.message, fenced: wrapUntrusted(resolution.message, '[verify]') };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const spawnOpts = {
|
|
161
|
+
timeout: v.timeout_ms,
|
|
162
|
+
encoding: 'utf8',
|
|
163
|
+
env: { ...process.env, SEMALT_VERIFY: '1' },
|
|
164
|
+
maxBuffer: MAX_VERIFY_OUTPUT_BYTES,
|
|
165
|
+
};
|
|
166
|
+
let proc;
|
|
167
|
+
try {
|
|
168
|
+
proc = resolution.useShell
|
|
169
|
+
? spawn(resolution.file, { shell: true, ...spawnOpts })
|
|
170
|
+
: spawn(resolution.file, resolution.args, spawnOpts);
|
|
171
|
+
} catch (err) {
|
|
172
|
+
// A spawn that throws (rare) must never crash the loop.
|
|
173
|
+
warn(`Verify command failed to spawn: ${err.message}`);
|
|
174
|
+
const output = `Verify command failed to spawn: ${err.message} — treated as a failed verification.`;
|
|
175
|
+
return { ...base, ran: false, passed: false, output, fenced: wrapUntrusted(output, '[verify]') };
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const timedOut = !!(proc.error && (proc.error.code === 'ETIMEDOUT' || proc.signal === 'SIGTERM'));
|
|
179
|
+
const exitCode = (typeof proc.status === 'number') ? proc.status : -1;
|
|
180
|
+
const stdout = (proc.stdout != null ? String(proc.stdout) : '').trim();
|
|
181
|
+
const stderr = (proc.stderr != null ? String(proc.stderr) : '').trim();
|
|
182
|
+
const combined = [stdout, stderr].filter(Boolean).join('\n');
|
|
183
|
+
|
|
184
|
+
// Timeout: a hung verify is killed and treated as a failed verification —
|
|
185
|
+
// it never blocks indefinitely.
|
|
186
|
+
if (timedOut) {
|
|
187
|
+
warn(`Verify command timed out after ${v.timeout_ms}ms: ${v.command}`);
|
|
188
|
+
const output = `Verification timed out after ${v.timeout_ms}ms running \`${v.command}\` — treated as a failed verification.`
|
|
189
|
+
+ (combined ? `\n${combined}` : '');
|
|
190
|
+
return { ...base, ran: true, passed: false, timedOut: true, exitCode: null, output, fenced: wrapUntrusted(output, '[verify output]') };
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Success is exit-code based — never parse stdout for success patterns.
|
|
194
|
+
const passed = exitCode === v.expected_exit_code;
|
|
195
|
+
const header = passed
|
|
196
|
+
? `Verification PASSED — \`${v.command}\` exited ${exitCode} (expected ${v.expected_exit_code}).`
|
|
197
|
+
: `Verification FAILED — \`${v.command}\` exited ${exitCode} (expected ${v.expected_exit_code}).`;
|
|
198
|
+
const output = combined ? `${header}\n${combined}` : header;
|
|
199
|
+
return { ...base, ran: true, passed, exitCode, output, fenced: wrapUntrusted(output, '[verify output]') };
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return { run, config };
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Resolve the effective verify config from the user and project layers,
|
|
206
|
+
// QUARANTINING a project-introduced verify.command (executable, host-privileged).
|
|
207
|
+
// Mirrors loadRuleLayers / loadHookLayers: a project (.semalt/config.json,
|
|
208
|
+
// attacker-controllable in a cloned repo) can NEVER introduce or change the
|
|
209
|
+
// command that the verify step runs. The effective verify is the USER layer's,
|
|
210
|
+
// full stop — project verify settings are ignored. The two layers are read
|
|
211
|
+
// SEPARATELY (raw config objects, NOT the shallow-merged view); that separation
|
|
212
|
+
// is the security boundary. Returns { verify, quarantinedCommand }.
|
|
213
|
+
function loadVerifyLayers(userVerify, projectVerify) {
|
|
214
|
+
const user = normalizeVerify(userVerify);
|
|
215
|
+
const project = normalizeVerify(projectVerify);
|
|
216
|
+
// A project command that the user did not already declare is the dangerous
|
|
217
|
+
// case — quarantine it. (An identical command is the user's own, no-op.)
|
|
218
|
+
const quarantinedCommand = (project.command && project.command !== user.command)
|
|
219
|
+
? project.command
|
|
220
|
+
: null;
|
|
221
|
+
return { verify: user, quarantinedCommand };
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
module.exports = {
|
|
225
|
+
VERIFY_MODES,
|
|
226
|
+
normalizeVerify,
|
|
227
|
+
loadVerifyLayers,
|
|
228
|
+
createVerifyRunner,
|
|
229
|
+
};
|