@semalt-ai/code 1.8.4 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/.claude/settings.local.json +8 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1588 -27
  4. package/README.md +147 -3
  5. package/TECHNICAL_DEBT.md +66 -0
  6. package/examples/embed.js +74 -0
  7. package/index.js +259 -11
  8. package/lib/agent.js +935 -181
  9. package/lib/api.js +308 -55
  10. package/lib/args.js +96 -2
  11. package/lib/audit.js +23 -1
  12. package/lib/background.js +584 -0
  13. package/lib/checkpoints.js +757 -0
  14. package/lib/commands/auth.js +94 -0
  15. package/lib/commands/chat-session.js +306 -0
  16. package/lib/commands/chat-slash.js +399 -0
  17. package/lib/commands/chat-turn.js +446 -0
  18. package/lib/commands/chat.js +403 -0
  19. package/lib/commands/custom.js +157 -0
  20. package/lib/commands/history-utils.js +66 -0
  21. package/lib/commands/index.js +268 -0
  22. package/lib/commands/mcp.js +113 -0
  23. package/lib/commands/oneshot.js +193 -0
  24. package/lib/commands/registry.js +269 -0
  25. package/lib/commands/tasks.js +89 -0
  26. package/lib/compact.js +87 -0
  27. package/lib/config.js +346 -11
  28. package/lib/constants.js +372 -3
  29. package/lib/debug.js +106 -0
  30. package/lib/deny.js +199 -0
  31. package/lib/doctor.js +160 -0
  32. package/lib/headless.js +167 -0
  33. package/lib/hooks.js +286 -0
  34. package/lib/images.js +264 -0
  35. package/lib/internals.js +49 -0
  36. package/lib/mcp/boundary.js +131 -0
  37. package/lib/mcp/client.js +270 -0
  38. package/lib/mcp/oauth.js +134 -0
  39. package/lib/memory.js +209 -0
  40. package/lib/metrics.js +37 -2
  41. package/lib/payload.js +54 -0
  42. package/lib/permission-rules.js +401 -0
  43. package/lib/permissions.js +100 -10
  44. package/lib/pricing.js +67 -0
  45. package/lib/proc.js +158 -0
  46. package/lib/prompts.js +88 -8
  47. package/lib/sandbox.js +568 -0
  48. package/lib/sdk.js +328 -0
  49. package/lib/secrets.js +211 -0
  50. package/lib/skills.js +223 -0
  51. package/lib/subagents.js +516 -0
  52. package/lib/tool_registry.js +2558 -0
  53. package/lib/tool_specs.js +236 -9
  54. package/lib/tools.js +370 -944
  55. package/lib/ui/chat-history.js +19 -1
  56. package/lib/ui/format.js +101 -6
  57. package/lib/ui/input-field.js +16 -7
  58. package/lib/ui/status-bar.js +79 -11
  59. package/lib/ui/terminal.js +10 -4
  60. package/lib/ui/theme.js +1 -0
  61. package/lib/ui/web-activity.js +218 -0
  62. package/lib/ui/writer.js +7 -9
  63. package/lib/verify.js +229 -0
  64. package/lib/web-extract.js +213 -0
  65. package/lib/web-summarize.js +68 -0
  66. package/package.json +19 -4
  67. package/scripts/lint.js +57 -0
  68. package/test/agent-loop.test.js +389 -0
  69. package/test/background.test.js +414 -0
  70. package/test/chat.test.js +114 -0
  71. package/test/checkpoints-agent.test.js +181 -0
  72. package/test/checkpoints.test.js +650 -0
  73. package/test/command-registry.test.js +160 -0
  74. package/test/compact.test.js +116 -0
  75. package/test/completion-lazy.test.js +52 -0
  76. package/test/config-merge.test.js +324 -0
  77. package/test/config-quarantine.test.js +128 -0
  78. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  79. package/test/config-write-guard-skip.test.js +46 -0
  80. package/test/config-write-guard.test.js +153 -0
  81. package/test/context-split.test.js +215 -0
  82. package/test/cost-doctor.test.js +142 -0
  83. package/test/custom-commands-chat.test.js +106 -0
  84. package/test/custom-commands.test.js +230 -0
  85. package/test/deny-windows.test.js +120 -0
  86. package/test/deny.test.js +83 -0
  87. package/test/download-allow-anywhere.test.js +66 -0
  88. package/test/download-confine.test.js +153 -0
  89. package/test/executors.test.js +362 -0
  90. package/test/extract-tool-calls.test.js +315 -0
  91. package/test/fetch-url-validation.test.js +219 -0
  92. package/test/fixtures/tool-calls.js +57 -0
  93. package/test/fixtures/web-page.js +91 -0
  94. package/test/git-tools.test.js +384 -0
  95. package/test/grep-glob-serialize.test.js +242 -0
  96. package/test/grep-glob.test.js +268 -0
  97. package/test/harness/README.md +57 -0
  98. package/test/harness/chat-harness.js +142 -0
  99. package/test/harness/memwarn-headless-child.js +65 -0
  100. package/test/harness/mock-llm.js +120 -0
  101. package/test/harness/mock-mcp-server.js +142 -0
  102. package/test/harness/sse-server.js +69 -0
  103. package/test/headless.test.js +203 -0
  104. package/test/history-utils.test.js +88 -0
  105. package/test/hooks-agent.test.js +238 -0
  106. package/test/hooks-verify-sandbox.test.js +232 -0
  107. package/test/hooks.test.js +216 -0
  108. package/test/http-get-user-agent.test.js +142 -0
  109. package/test/images-api.test.js +208 -0
  110. package/test/images.test.js +238 -0
  111. package/test/max-iterations.test.js +216 -0
  112. package/test/mcp-boundary.test.js +57 -0
  113. package/test/mcp-client.test.js +267 -0
  114. package/test/mcp-oauth.test.js +86 -0
  115. package/test/memory-truncation-warning.test.js +222 -0
  116. package/test/memory.test.js +198 -0
  117. package/test/native-dispatch.test.js +356 -0
  118. package/test/output-chokepoint.test.js +188 -0
  119. package/test/path-guards.test.js +134 -0
  120. package/test/payload.test.js +99 -0
  121. package/test/permission-rules-agent.test.js +210 -0
  122. package/test/permission-rules.test.js +297 -0
  123. package/test/permissions.test.js +163 -0
  124. package/test/plan-mode.test.js +167 -0
  125. package/test/read-paginate.test.js +275 -0
  126. package/test/readonly-tools.test.js +177 -0
  127. package/test/result-cap.test.js +233 -0
  128. package/test/sandbox-agent.test.js +147 -0
  129. package/test/sandbox-integration.test.js +216 -0
  130. package/test/sandbox.test.js +408 -0
  131. package/test/sdk.test.js +234 -0
  132. package/test/shell-output-cap.test.js +181 -0
  133. package/test/skills-chat.test.js +110 -0
  134. package/test/skills.test.js +295 -0
  135. package/test/smoke.test.js +68 -0
  136. package/test/status-bar-pause.test.js +164 -0
  137. package/test/stream-parser.test.js +147 -0
  138. package/test/subagents-agent.test.js +178 -0
  139. package/test/subagents.test.js +222 -0
  140. package/test/tool-registry.test.js +85 -0
  141. package/test/trim-budget.test.js +101 -0
  142. package/test/verify-agent.test.js +317 -0
  143. package/test/verify.test.js +141 -0
  144. package/test/web-activity-ordering.test.js +194 -0
  145. package/test/web-activity.test.js +207 -0
  146. package/test/web-data-extraction-guidance.test.js +71 -0
  147. package/test/web-extract.test.js +185 -0
  148. package/test/web-fetch-agent.test.js +291 -0
  149. package/test/web-fetch-mode.test.js +193 -0
  150. package/test/web-search.test.js +380 -0
  151. package/lib/commands.js +0 -1288
@@ -0,0 +1,218 @@
1
+ 'use strict';
2
+
3
+ // Web-activity process summary (Task W.3, Part 1).
4
+ //
5
+ // A web task runs `web_search` (find candidate pages) then targeted `http_get`
6
+ // (read the relevant ones). By default each operation printed its own tool line
7
+ // (one "tool · web_search" / "net · GET …" row per call), which reads as a noisy
8
+ // list rather than one coherent process. This module collapses a run of
9
+ // consecutive web operations into a SINGLE compact process-summary line —
10
+ //
11
+ // ✓ web · search "коррупционные скандалы…" · 2 queries · 3 sources read · 1 blocked
12
+ //
13
+ // — while `--debug` keeps the full per-operation lines (in debug mode chat-turn.js
14
+ // bypasses this collapser and renders each op the normal way).
15
+ //
16
+ // Display only: the audit log still records every individual operation (that
17
+ // happens in the executors, untouched here), and NON-web tools render exactly as
18
+ // before. Scope: `web_search` + `http_get`. `download` is a file-save, not a page
19
+ // read for the search→fetch flow, so it keeps its own line.
20
+
21
+ const { UI_THEME, UI_ICONS } = require('./theme');
22
+ const { RST, DIM } = require('./ansi');
23
+ const { formatDuration } = require('./format');
24
+
25
+ // The tools collapsed into the web-activity summary.
26
+ const WEB_TOOLS = new Set(['web_search', 'http_get']);
27
+
28
+ function isWebTool(tag) { return WEB_TOOLS.has(tag); }
29
+
30
+ function _truncate(text, max) {
31
+ const s = String(text == null ? '' : text).replace(/\s+/g, ' ').trim();
32
+ if (s.length <= max) return s;
33
+ return s.slice(0, Math.max(0, max - 1)) + '…';
34
+ }
35
+
36
+ // Whether a finished web op counts as a success. A `web_search` is ok unless the
37
+ // executor flagged an error (backend down). An `http_get` is ok only when it both
38
+ // avoided a transport error (timeout/DNS — surfaced as `op.error`) AND the server
39
+ // answered < 400: a 403/406 is a real "blocked" even though the fetch itself
40
+ // completed and returned a status code.
41
+ function opSucceeded(op) {
42
+ if (!op) return false;
43
+ if (op.error) return false;
44
+ if (op.tag === 'http_get' && typeof op.status === 'number' && op.status >= 400) return false;
45
+ return true;
46
+ }
47
+
48
+ // Pure: fold the recorded op list into the counts the summary needs.
49
+ function aggregateWebOps(ops) {
50
+ const state = {
51
+ searchCount: 0, searchFailed: 0, queries: [],
52
+ fetchCount: 0, fetchOk: 0, fetchFailed: 0,
53
+ };
54
+ for (const op of (ops || [])) {
55
+ if (!op) continue;
56
+ const ok = opSucceeded(op);
57
+ if (op.tag === 'web_search') {
58
+ state.searchCount += 1;
59
+ if (!ok) state.searchFailed += 1;
60
+ if (op.query) state.queries.push(op.query);
61
+ } else if (op.tag === 'http_get') {
62
+ state.fetchCount += 1;
63
+ if (ok) state.fetchOk += 1; else state.fetchFailed += 1;
64
+ }
65
+ }
66
+ return state;
67
+ }
68
+
69
+ // Pure: the plain-text segments of the summary (no ANSI). Each segment is tagged
70
+ // with a `kind` so the styled renderer can colour failures distinctly. Exposed
71
+ // for tests and reused by the styled renderer. Failures (a blocked source / a
72
+ // failed search) are ALWAYS represented so the compact view never silently drops
73
+ // a source that didn't load.
74
+ function webSummarySegments(state) {
75
+ const segs = [];
76
+ if (state.searchCount > 0) {
77
+ const q = state.queries[0] ? `"${_truncate(state.queries[0], 48)}"` : '';
78
+ segs.push({ kind: 'lead', text: `search ${q}`.trim() });
79
+ if (state.searchCount > 1) segs.push({ kind: 'count', text: `${state.searchCount} queries` });
80
+ if (state.searchFailed > 0) {
81
+ segs.push({ kind: 'fail', text: `${state.searchFailed} search${state.searchFailed === 1 ? '' : 'es'} failed` });
82
+ }
83
+ }
84
+ if (state.fetchCount > 0) {
85
+ segs.push({ kind: state.searchCount > 0 ? 'count' : 'lead', text: `${state.fetchOk} ${state.fetchOk === 1 ? 'source' : 'sources'} read` });
86
+ if (state.fetchFailed > 0) segs.push({ kind: 'fail', text: `${state.fetchFailed} blocked` });
87
+ }
88
+ if (segs.length === 0) segs.push({ kind: 'lead', text: 'web' });
89
+ return segs;
90
+ }
91
+
92
+ // Plain-text one-liner (no ANSI). The text the tests assert on.
93
+ function webSummaryText(state) {
94
+ return webSummarySegments(state).map((s) => s.text).join(' · ');
95
+ }
96
+
97
+ // Styled, chrome-consistent line for the writer's activity region / scrollback.
98
+ // Mirrors formatToolLine's "<glyph> <category> · <segments…>" layout so the
99
+ // summary reads as a peer of the other tool lines, not a foreign widget.
100
+ function formatWebSummaryLine(state, opts) {
101
+ const { pending = false, durationMs = 0 } = opts || {};
102
+ const glyph = pending ? UI_ICONS.pending : UI_ICONS.success;
103
+ const glyphColor = pending ? UI_THEME.muted : UI_THEME.success;
104
+ const cat = 'web'.padEnd(5);
105
+ const catColor = (UI_THEME.categories && UI_THEME.categories.web) || UI_THEME.accent;
106
+ const sep = ` ${DIM}·${RST} `;
107
+
108
+ const out = [` ${glyphColor}${glyph}${RST} ${catColor}${cat}${RST}`];
109
+ for (const seg of webSummarySegments(state)) {
110
+ let color = UI_THEME.subtle;
111
+ if (seg.kind === 'lead') color = UI_THEME.default;
112
+ else if (seg.kind === 'fail') color = UI_THEME.warning;
113
+ out.push(`${color}${seg.text}${RST}`);
114
+ }
115
+ if (pending) out.push(`${UI_THEME.muted}${formatDuration(durationMs)}…${RST}`);
116
+ return out.join(sep);
117
+ }
118
+
119
+ // Batch renderer / policy seam (used by tests, and documents the runtime split):
120
+ // debug → one full per-operation tool line per op (nothing hidden), built
121
+ // with the SAME formatToolLine the runtime uses.
122
+ // default → a single collapsed summary line.
123
+ function renderWebActivity(ops, opts) {
124
+ const { debug = false, formatToolLine } = opts || {};
125
+ if (debug) {
126
+ return (ops || []).map((op) => formatToolLine({
127
+ status: opSucceeded(op) ? 'success' : 'failure',
128
+ tag: op.tag,
129
+ arg: op.query || op.url || '',
130
+ attrs: op.tag === 'web_search' ? { query: op.query } : { url: op.url },
131
+ durationMs: op.durationMs,
132
+ meta: op.tag === 'http_get' ? { status_code: op.status, bytes: op.bytes } : null,
133
+ error: op.error ? { message: String(op.error) } : null,
134
+ }));
135
+ }
136
+ return [formatWebSummaryLine(aggregateWebOps(ops), { pending: false })];
137
+ }
138
+
139
+ // Stateful runtime collapser. Owns one writer "activity" entry per group of
140
+ // consecutive web ops, updating it in place as ops complete and committing a
141
+ // single final summary line to scrollback on flush(). Tools run sequentially in
142
+ // the agent loop, so at most one group is ever open and there is no concurrency.
143
+ function createWebActivityTracker(deps) {
144
+ const { writerModule } = deps || {};
145
+ let groupId = null;
146
+ let seq = 0;
147
+ let ended = [];
148
+ let current = null; // the in-flight op, shown in the pending line before it ends
149
+
150
+ function _render(durationMs) {
151
+ const state = aggregateWebOps(current ? ended.concat([current]) : ended);
152
+ return formatWebSummaryLine(state, { pending: true, durationMs });
153
+ }
154
+
155
+ function _refresh() {
156
+ if (groupId === null) return;
157
+ writerModule.updateActivity(groupId, (elapsedMs) => _render(elapsedMs));
158
+ }
159
+
160
+ return {
161
+ isWeb: isWebTool,
162
+ isOpen() { return groupId !== null; },
163
+
164
+ start(tag, input) {
165
+ current = {
166
+ tag,
167
+ query: tag === 'web_search' ? input : undefined,
168
+ url: tag === 'http_get' ? input : undefined,
169
+ };
170
+ if (groupId === null) {
171
+ groupId = `web-${seq++}`;
172
+ writerModule.startActivity(groupId, (elapsedMs) => _render(elapsedMs));
173
+ } else {
174
+ _refresh();
175
+ }
176
+ },
177
+
178
+ end(tag, result, durationMs, toolCtx) {
179
+ const meta = toolCtx && toolCtx.meta;
180
+ const attrs = toolCtx && toolCtx.attrs;
181
+ ended.push({
182
+ tag,
183
+ durationMs,
184
+ query: (attrs && attrs.query) || (current && current.query),
185
+ url: (attrs && attrs.url) || (current && current.url),
186
+ status: meta && typeof meta.status_code === 'number' ? meta.status_code : undefined,
187
+ bytes: meta && typeof meta.bytes === 'number' ? meta.bytes : undefined,
188
+ error: toolCtx && toolCtx.error ? (toolCtx.error.message || String(toolCtx.error)) : undefined,
189
+ });
190
+ current = null;
191
+ _refresh();
192
+ },
193
+
194
+ // Commit the collapsed summary for the current group to scrollback and reset.
195
+ // A no-op when no group is open.
196
+ flush() {
197
+ if (groupId === null) return;
198
+ const id = groupId;
199
+ const line = formatWebSummaryLine(aggregateWebOps(ended), { pending: false });
200
+ groupId = null;
201
+ ended = [];
202
+ current = null;
203
+ writerModule.endActivity(id, line);
204
+ },
205
+ };
206
+ }
207
+
208
+ module.exports = {
209
+ WEB_TOOLS,
210
+ isWebTool,
211
+ opSucceeded,
212
+ aggregateWebOps,
213
+ webSummarySegments,
214
+ webSummaryText,
215
+ formatWebSummaryLine,
216
+ renderWebActivity,
217
+ createWebActivityTracker,
218
+ };
package/lib/ui/writer.js CHANGED
@@ -29,6 +29,7 @@
29
29
  // can't interleave with another task's writes.
30
30
 
31
31
  const { stripAnsi, termWidth, truncateVisible } = require('./utils');
32
+ const dbg = require('../debug');
32
33
 
33
34
  let _queue = Promise.resolve();
34
35
  let _liveLines = [];
@@ -228,16 +229,13 @@ function setLive(lines, caret) {
228
229
  // The input row's visible position is relative to the bottom of the
229
230
  // viewport; it stays still only while the live region keeps a stable
230
231
  // height. If a code path sneaks in a different-height setLive call the
231
- // input jumps one row. Log under DEBUG_TUI so future drift is visible
232
- // without leaking into the TUI itself.
233
- if (process.env.DEBUG_TUI &&
234
- _previousLiveLineCount !== undefined &&
232
+ // input jumps one row. Log to the debug file (extended trace) so future
233
+ // drift is visible without leaking into the TUI itself.
234
+ if (_previousLiveLineCount !== undefined &&
235
235
  _previousLiveLineCount !== _liveLines.length) {
236
- try {
237
- process.stderr.write(
238
- `live region height changed: ${_previousLiveLineCount} → ${_liveLines.length}\n`
239
- );
240
- } catch {}
236
+ dbg.logExtended(
237
+ `[writer] live region height changed: ${_previousLiveLineCount} → ${_liveLines.length}`
238
+ );
241
239
  }
242
240
  _previousLiveLineCount = _liveLines.length;
243
241
  _writeSync(_HIDE + eraseSeq + _drawLiveSeq() + _positionCaretSeq() + _caretShowSeq());
package/lib/verify.js ADDED
@@ -0,0 +1,229 @@
1
+ 'use strict';
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Self-verification (Task 4.2)
5
+ // ---------------------------------------------------------------------------
6
+ //
7
+ // When the agent declares a task done, optionally run a configured verification
8
+ // command (e.g. `npm test`, `cargo check`) and feed the result back into the
9
+ // loop. Configured under `config.verify`:
10
+ //
11
+ // "verify": {
12
+ // "mode": "advisory" | "enforcing", // default advisory
13
+ // "command": "npm test", // empty → feature is a no-op
14
+ // "timeout_ms": 120000,
15
+ // "expected_exit_code": 0,
16
+ // "max_attempts": 3
17
+ // }
18
+ //
19
+ // Two modes (orchestration lives in lib/agent.js — this module only RUNS the
20
+ // command and reports the outcome):
21
+ // * advisory (default): run once when the agent finishes; feed the result into
22
+ // context as information. The turn ends regardless of pass/fail — advisory
23
+ // NEVER blocks.
24
+ // * enforcing: verify must pass before "done" is accepted. A failing verify
25
+ // returns the agent to the loop with the fenced result; after `max_attempts`
26
+ // failures the loop terminates with stopReason `verify_failed` — a precise
27
+ // bound distinct from (and far below) the coarse iteration cap.
28
+ //
29
+ // Load-bearing properties (mirror lib/hooks.js — verify is shell, treat it like
30
+ // a hook):
31
+ // * Success is EXIT-CODE based — exit == expected_exit_code is a pass. stdout
32
+ // is never parsed for success patterns (avoids brittleness).
33
+ // * Deny-list FIRST — the verify command passes through the Phase 0 deny-list
34
+ // (lib/deny.js) before running; a hit is refused (never run) and reported as
35
+ // a non-passing verify.
36
+ // * OS sandbox — after the deny-list, the verify command is wrapped by the SAME
37
+ // OS sandbox as every other shell call (Pre-Task 5.0a, resolveSandboxedSpawn),
38
+ // with the identical fail-safe fallback (failIfUnavailable hard error / human
39
+ // approval / refuse). A refusal is reported as a non-passing verify — never a
40
+ // silent unsandboxed run.
41
+ // * Project-layer (.semalt/config.json) verify.command is QUARANTINED before it
42
+ // reaches the runner (loadVerifyLayers, consumed by lib/config.js): a cloned
43
+ // repo cannot introduce an executable verify command. User verify is trusted.
44
+ // * Timeout — a hung verify must not hang the agent. On timeout the command is
45
+ // killed and the result is a (non-passing) verify, never an exception.
46
+ // * Untrusted output — the command output (a failing test name could carry an
47
+ // injection) is fenced in the same <<<UNTRUSTED_EXTERNAL_CONTENT>>> delimiter
48
+ // as hook/MCP/http_get output before it ever enters the model's context.
49
+ // * Contained — a spawn failure is reported as a non-passing verify, never a
50
+ // crash.
51
+
52
+ const { spawnSync } = require('child_process');
53
+ const { checkShellDenylist } = require('./deny');
54
+ const { wrapUntrusted } = require('./hooks');
55
+ const { resolveSandboxedSpawn } = require('./sandbox');
56
+ const { DEFAULT_VERIFY_TIMEOUT_MS, DEFAULT_VERIFY_MAX_ATTEMPTS } = require('./constants');
57
+
58
+ const VERIFY_MODES = ['advisory', 'enforcing'];
59
+ const MAX_VERIFY_OUTPUT_BYTES = 1024 * 1024;
60
+
61
+ // Validate + canonicalize the `config.verify` section. Pure; consumed by
62
+ // lib/config.js normalizeConfig. Unknown/invalid fields fall back to defaults so
63
+ // a malformed config can never produce an unbounded or mode-confused verify.
64
+ function normalizeVerify(raw) {
65
+ const out = {
66
+ mode: 'advisory',
67
+ command: '',
68
+ timeout_ms: DEFAULT_VERIFY_TIMEOUT_MS,
69
+ expected_exit_code: 0,
70
+ max_attempts: DEFAULT_VERIFY_MAX_ATTEMPTS,
71
+ };
72
+ if (!raw || typeof raw !== 'object' || Array.isArray(raw)) return out;
73
+ if (raw.mode === 'enforcing') out.mode = 'enforcing';
74
+ if (typeof raw.command === 'string' && raw.command.trim()) out.command = raw.command.trim();
75
+ if (Number.isInteger(raw.timeout_ms) && raw.timeout_ms > 0) out.timeout_ms = raw.timeout_ms;
76
+ if (Number.isInteger(raw.expected_exit_code) && raw.expected_exit_code >= 0) {
77
+ out.expected_exit_code = raw.expected_exit_code;
78
+ }
79
+ if (Number.isInteger(raw.max_attempts) && raw.max_attempts > 0) out.max_attempts = raw.max_attempts;
80
+ return out;
81
+ }
82
+
83
+ // Build the verify runner. `getConfig` supplies the live config (read per-run so
84
+ // a config change takes effect immediately). `spawn` and `log` are injectable for
85
+ // tests. Returns:
86
+ // {
87
+ // config() → the normalized verify config (for the orchestrator),
88
+ // run(opts) → { skipped, ran, passed, mode, command, exitCode,
89
+ // expectedExitCode, timedOut, denied, maxAttempts,
90
+ // output, fenced }
91
+ // }
92
+ // run() NEVER throws for an ordinary failure — a nonzero exit, timeout, deny-list
93
+ // hit, or spawn failure are all reported as a non-passing result. `opts.noVerify`
94
+ // (the --no-verify flag) short-circuits to a skipped result, as does an empty
95
+ // command.
96
+ function createVerifyRunner({ getConfig, spawn = spawnSync, log, onUnsandboxed = null, sandbox } = {}) {
97
+ const warn = typeof log === 'function' ? log : () => {};
98
+ // OS-sandbox resolver shared with agentExecShell / hooks (Pre-Task 5.0a).
99
+ // Injectable for tests; otherwise resolveSandboxedSpawn reading the live config
100
+ // + the human-typed CLI flags. `onUnsandboxed` (human approval) is threaded from
101
+ // the executor owner so an unavailable sandbox can be approved interactively;
102
+ // with no approver it refuses (reported as a non-passing verify).
103
+ const sandboxResolve = typeof sandbox === 'function'
104
+ ? sandbox
105
+ : (command) => resolveSandboxedSpawn({ command, getConfig, onUnsandboxed });
106
+
107
+ function config() {
108
+ let cfg = {};
109
+ try { cfg = (getConfig ? getConfig() : {}) || {}; } catch { cfg = {}; }
110
+ return normalizeVerify(cfg.verify);
111
+ }
112
+
113
+ async function run({ noVerify = false } = {}) {
114
+ const v = config();
115
+ const base = {
116
+ skipped: false,
117
+ ran: false,
118
+ passed: false,
119
+ mode: v.mode,
120
+ command: v.command,
121
+ exitCode: null,
122
+ expectedExitCode: v.expected_exit_code,
123
+ timedOut: false,
124
+ denied: null,
125
+ maxAttempts: v.max_attempts,
126
+ output: '',
127
+ fenced: '',
128
+ };
129
+
130
+ // --no-verify (one-off skip) or no command configured → feature is a no-op.
131
+ if (noVerify || !v.command) return { ...base, skipped: true };
132
+
133
+ // Deny-list FIRST — verify is shell and must not be able to run a destructive
134
+ // command any more than the agent can. A hit is refused (never run) and
135
+ // reported as a non-passing verify (it cannot pass).
136
+ const denied = checkShellDenylist(v.command);
137
+ if (denied) {
138
+ warn(`Verify command blocked by deny-list (${denied.label}); not run: ${v.command}`);
139
+ const output = `Verify command was refused by the deny-list (${denied.label}) and did not run — treated as a failed verification.`;
140
+ return { ...base, ran: false, passed: false, denied: denied.label, output, fenced: wrapUntrusted(output, '[verify]') };
141
+ }
142
+
143
+ // OS sandbox (Pre-Task 5.0a). After the deny-list, route the verify command
144
+ // through the SAME shared shim as agentExecShell so it runs jailed. A refusal
145
+ // (failIfUnavailable, or no/declined human approval) is reported as a
146
+ // non-passing verify — never a silent unsandboxed run.
147
+ let resolution;
148
+ try {
149
+ resolution = await sandboxResolve(v.command);
150
+ } catch (err) {
151
+ warn(`Verify command sandbox resolution failed: ${err.message}`);
152
+ const output = `Verify command sandbox resolution failed: ${err.message} — treated as a failed verification.`;
153
+ return { ...base, ran: false, passed: false, output, fenced: wrapUntrusted(output, '[verify]') };
154
+ }
155
+ if (!resolution.run) {
156
+ warn(`Verify command not run — ${resolution.message}`);
157
+ return { ...base, ran: false, passed: false, output: resolution.message, fenced: wrapUntrusted(resolution.message, '[verify]') };
158
+ }
159
+
160
+ const spawnOpts = {
161
+ timeout: v.timeout_ms,
162
+ encoding: 'utf8',
163
+ env: { ...process.env, SEMALT_VERIFY: '1' },
164
+ maxBuffer: MAX_VERIFY_OUTPUT_BYTES,
165
+ };
166
+ let proc;
167
+ try {
168
+ proc = resolution.useShell
169
+ ? spawn(resolution.file, { shell: true, ...spawnOpts })
170
+ : spawn(resolution.file, resolution.args, spawnOpts);
171
+ } catch (err) {
172
+ // A spawn that throws (rare) must never crash the loop.
173
+ warn(`Verify command failed to spawn: ${err.message}`);
174
+ const output = `Verify command failed to spawn: ${err.message} — treated as a failed verification.`;
175
+ return { ...base, ran: false, passed: false, output, fenced: wrapUntrusted(output, '[verify]') };
176
+ }
177
+
178
+ const timedOut = !!(proc.error && (proc.error.code === 'ETIMEDOUT' || proc.signal === 'SIGTERM'));
179
+ const exitCode = (typeof proc.status === 'number') ? proc.status : -1;
180
+ const stdout = (proc.stdout != null ? String(proc.stdout) : '').trim();
181
+ const stderr = (proc.stderr != null ? String(proc.stderr) : '').trim();
182
+ const combined = [stdout, stderr].filter(Boolean).join('\n');
183
+
184
+ // Timeout: a hung verify is killed and treated as a failed verification —
185
+ // it never blocks indefinitely.
186
+ if (timedOut) {
187
+ warn(`Verify command timed out after ${v.timeout_ms}ms: ${v.command}`);
188
+ const output = `Verification timed out after ${v.timeout_ms}ms running \`${v.command}\` — treated as a failed verification.`
189
+ + (combined ? `\n${combined}` : '');
190
+ return { ...base, ran: true, passed: false, timedOut: true, exitCode: null, output, fenced: wrapUntrusted(output, '[verify output]') };
191
+ }
192
+
193
+ // Success is exit-code based — never parse stdout for success patterns.
194
+ const passed = exitCode === v.expected_exit_code;
195
+ const header = passed
196
+ ? `Verification PASSED — \`${v.command}\` exited ${exitCode} (expected ${v.expected_exit_code}).`
197
+ : `Verification FAILED — \`${v.command}\` exited ${exitCode} (expected ${v.expected_exit_code}).`;
198
+ const output = combined ? `${header}\n${combined}` : header;
199
+ return { ...base, ran: true, passed, exitCode, output, fenced: wrapUntrusted(output, '[verify output]') };
200
+ }
201
+
202
+ return { run, config };
203
+ }
204
+
205
+ // Resolve the effective verify config from the user and project layers,
206
+ // QUARANTINING a project-introduced verify.command (executable, host-privileged).
207
+ // Mirrors loadRuleLayers / loadHookLayers: a project (.semalt/config.json,
208
+ // attacker-controllable in a cloned repo) can NEVER introduce or change the
209
+ // command that the verify step runs. The effective verify is the USER layer's,
210
+ // full stop — project verify settings are ignored. The two layers are read
211
+ // SEPARATELY (raw config objects, NOT the shallow-merged view); that separation
212
+ // is the security boundary. Returns { verify, quarantinedCommand }.
213
+ function loadVerifyLayers(userVerify, projectVerify) {
214
+ const user = normalizeVerify(userVerify);
215
+ const project = normalizeVerify(projectVerify);
216
+ // A project command that the user did not already declare is the dangerous
217
+ // case — quarantine it. (An identical command is the user's own, no-op.)
218
+ const quarantinedCommand = (project.command && project.command !== user.command)
219
+ ? project.command
220
+ : null;
221
+ return { verify: user, quarantinedCommand };
222
+ }
223
+
224
+ module.exports = {
225
+ VERIFY_MODES,
226
+ normalizeVerify,
227
+ loadVerifyLayers,
228
+ createVerifyRunner,
229
+ };