@semalt-ai/code 1.8.5 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/.github/workflows/ci.yml +69 -0
- package/ARCHITECTURE.md +6 -95
- package/CLAUDE.md +196 -316
- package/README.md +148 -4
- package/docs/ARCHITECTURE.md +1321 -0
- package/docs/CONFIG.md +340 -0
- package/docs/HISTORY.md +245 -0
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +856 -120
- package/lib/api.js +239 -50
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +489 -0
- package/lib/commands/chat-slash.js +415 -0
- package/lib/commands/chat-turn.js +669 -0
- package/lib/commands/chat.js +407 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +360 -11
- package/lib/constants.js +401 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +202 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +270 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +123 -26
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +99 -8
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2862 -0
- package/lib/tool_specs.js +263 -9
- package/lib/tools.js +352 -1039
- package/lib/ui/anim.js +86 -0
- package/lib/ui/ansi.js +17 -27
- package/lib/ui/chat-history.js +253 -71
- package/lib/ui/create-ui.js +67 -24
- package/lib/ui/diff.js +90 -25
- package/lib/ui/file-activity.js +236 -0
- package/lib/ui/format.js +195 -29
- package/lib/ui/input-field.js +21 -11
- package/lib/ui/md-stream.js +234 -0
- package/lib/ui/render-operation.js +113 -0
- package/lib/ui/select.js +1 -4
- package/lib/ui/status-bar.js +146 -36
- package/lib/ui/stream.js +20 -13
- package/lib/ui/theme.js +190 -44
- package/lib/ui/tool-operation.js +190 -0
- package/lib/ui/utils.js +9 -5
- package/lib/ui/web-activity.js +270 -0
- package/lib/ui/writer.js +159 -45
- package/lib/ui.js +1 -1
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/anim-driver.test.js +153 -0
- package/test/ask-user-display.test.js +226 -0
- package/test/ask-user-gate.test.js +231 -0
- package/test/background.test.js +414 -0
- package/test/chat-history-nocolor.test.js +155 -0
- package/test/chat-relogin.test.js +207 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/defer-detail-band.test.js +403 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/detail-band-tab-flatten.test.js +242 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/exec-diff.test.js +268 -0
- package/test/executors.test.js +599 -0
- package/test/extract-tool-calls.test.js +349 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/file-activity.test.js +522 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/grep-path-target.test.js +227 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +143 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +348 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/input-field-ctrl-o.test.js +37 -0
- package/test/live-height-physical.test.js +281 -0
- package/test/max-iterations.test.js +218 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/md-stream.test.js +183 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +409 -0
- package/test/native-live-narration.test.js +254 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/output-heredoc-leak.test.js +195 -0
- package/test/output-preview.test.js +245 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +362 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/render-operation.test.js +317 -0
- package/test/replay-descriptor-xml.test.js +216 -0
- package/test/replay-descriptor.test.js +189 -0
- package/test/replay-web-aggregate.test.js +291 -0
- package/test/replay-web-persist.test.js +241 -0
- package/test/result-cap.test.js +233 -0
- package/test/running-glyph-anim.test.js +111 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-driver.test.js +93 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/status-bar-resync.test.js +188 -0
- package/test/stream-parser.test.js +171 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/theme-palette.test.js +166 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/truncate-visible.test.js +78 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/view-image.test.js +199 -0
- package/test/web-activity-ordering.test.js +203 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
- package/path +0 -1
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Integration tests for lifecycle hooks (Task 3.4) driving the REAL runAgentLoop
|
|
4
|
+
// against the mock-LLM harness, with the REAL createHookRunner reading
|
|
5
|
+
// config.hooks (so spawnSync actually runs the hook commands). Hook commands use
|
|
6
|
+
// `node -e …` so they are portable across the CI matrix (Linux/macOS/Windows).
|
|
7
|
+
// Sentinel paths are passed via env vars (merged into the hook's environment) to
|
|
8
|
+
// avoid embedding OS-specific path separators in a `node -e` string literal.
|
|
9
|
+
|
|
10
|
+
const { test, before, after } = require('node:test');
|
|
11
|
+
const assert = require('node:assert');
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const os = require('os');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
|
|
16
|
+
const ui = require('../lib/ui');
|
|
17
|
+
const { createApiClient } = require('../lib/api');
|
|
18
|
+
const { createToolExecutor, extractToolCalls } = require('../lib/tools');
|
|
19
|
+
const { createPermissionManager } = require('../lib/permissions');
|
|
20
|
+
const { createAgentRunner } = require('../lib/agent');
|
|
21
|
+
const { startMockLLM } = require('./harness/mock-llm');
|
|
22
|
+
|
|
23
|
+
let prevKey;
|
|
24
|
+
before(() => { prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key'; });
|
|
25
|
+
after(() => {
|
|
26
|
+
if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
|
|
27
|
+
else process.env.SEMALT_API_KEY = prevKey;
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const NODE = JSON.stringify(process.execPath);
|
|
31
|
+
|
|
32
|
+
// buildRunner mirrors agent-loop.test.js, but threads `hooks` into the config so
|
|
33
|
+
// the real hook runner (built inside createAgentRunner from getConfig) sees them.
|
|
34
|
+
function buildRunner(base, hooks) {
|
|
35
|
+
const config = {
|
|
36
|
+
api_base: base, api_key: 'test-key', default_model: 'test-model',
|
|
37
|
+
temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
|
|
38
|
+
hooks: hooks || {},
|
|
39
|
+
// This suite tests hook ORCHESTRATION, not the OS sandbox (covered by
|
|
40
|
+
// hooks-verify-sandbox.test.js). Disable the sandbox so the command hooks
|
|
41
|
+
// run deterministically across the CI matrix regardless of bwrap/Seatbelt.
|
|
42
|
+
sandbox: { mode: 'off' },
|
|
43
|
+
};
|
|
44
|
+
const getConfig = () => config;
|
|
45
|
+
const saveConfig = (c) => Object.assign(config, c);
|
|
46
|
+
const api = createApiClient({ getConfig, saveConfig, ui });
|
|
47
|
+
const pm = createPermissionManager(ui, { skipPermissions: true });
|
|
48
|
+
pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
|
|
49
|
+
const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
|
|
50
|
+
const runner = createAgentRunner({
|
|
51
|
+
chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
|
|
52
|
+
describePermission, permissionManager: pm, ui, getConfig,
|
|
53
|
+
});
|
|
54
|
+
return { runner, config };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function collector() {
|
|
58
|
+
const ev = { tools: [], errors: [], assistants: [] };
|
|
59
|
+
const cb = {
|
|
60
|
+
onToolEnd: (tag, result) => ev.tools.push({ tag, result }),
|
|
61
|
+
onError: (e) => ev.errors.push(e),
|
|
62
|
+
onAssistantMessage: (m) => ev.assistants.push(m),
|
|
63
|
+
};
|
|
64
|
+
return { ev, cb };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function tmpdir() { return fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-hooks-')); }
|
|
68
|
+
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// 1. PreToolUse hook blocks a tool (non-zero exit) — the tool never runs
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
test('PreToolUse hook with a non-zero exit BLOCKS the tool; it does not run and the agent gets the reason', async () => {
|
|
74
|
+
const dir = tmpdir();
|
|
75
|
+
const sentinel = path.join(dir, 'written.txt');
|
|
76
|
+
const hooks = { PreToolUse: [{
|
|
77
|
+
type: 'command', matcher: '*',
|
|
78
|
+
command: `${NODE} -e "process.stdout.write('blocked: writes are frozen'); process.exit(1)"`,
|
|
79
|
+
}] };
|
|
80
|
+
const mock = await startMockLLM();
|
|
81
|
+
// The model tries to write a file; the hook must stop it.
|
|
82
|
+
mock.replyWith(`<write_file path="${sentinel}">DATA</write_file>`);
|
|
83
|
+
mock.replyWith('Understood, I will not write.');
|
|
84
|
+
try {
|
|
85
|
+
const { runner } = buildRunner(mock.base, hooks);
|
|
86
|
+
const { ev, cb } = collector();
|
|
87
|
+
const messages = [{ role: 'user', content: 'write the file' }];
|
|
88
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
89
|
+
|
|
90
|
+
assert.ok(!fs.existsSync(sentinel), 'the write tool was blocked — no file on disk');
|
|
91
|
+
assert.strictEqual(ev.tools.length, 0, 'a blocked tool never reaches onToolEnd');
|
|
92
|
+
const fed = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
|
|
93
|
+
assert.ok(fed, 'the blocked-tool result is fed back to the model');
|
|
94
|
+
assert.match(fed.content, /BLOCKED by a PreToolUse hook/);
|
|
95
|
+
assert.match(fed.content, /writes are frozen/, 'hook stdout is the block reason');
|
|
96
|
+
assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'Understood, I will not write.'));
|
|
97
|
+
} finally {
|
|
98
|
+
await mock.close();
|
|
99
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
// 2. PostToolUse hook observes a result and injects feedback
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
test('PostToolUse hook observes the tool result and its stdout is appended (untrusted-fenced)', async () => {
|
|
108
|
+
const dir = tmpdir();
|
|
109
|
+
const file = path.join(dir, 'fixture.txt');
|
|
110
|
+
fs.writeFileSync(file, 'FILE_BODY_7', 'utf8');
|
|
111
|
+
const hooks = { PostToolUse: [{
|
|
112
|
+
type: 'command',
|
|
113
|
+
// Echo back the tool name the hook saw via env — proof it observed the call.
|
|
114
|
+
command: `${NODE} -e "process.stdout.write('POSTHOOK_SAW=' + process.env.SEMALT_TOOL_NAME)"`,
|
|
115
|
+
}] };
|
|
116
|
+
const mock = await startMockLLM();
|
|
117
|
+
mock.replyWith(`<read_file>${file}</read_file>`);
|
|
118
|
+
mock.replyWith('Read it.');
|
|
119
|
+
try {
|
|
120
|
+
const { runner } = buildRunner(mock.base, hooks);
|
|
121
|
+
const { ev, cb } = collector();
|
|
122
|
+
const messages = [{ role: 'user', content: 'read the file' }];
|
|
123
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
124
|
+
|
|
125
|
+
assert.strictEqual(ev.tools.length, 1);
|
|
126
|
+
assert.strictEqual(ev.tools[0].tag, 'read');
|
|
127
|
+
const fed = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
|
|
128
|
+
assert.ok(fed);
|
|
129
|
+
assert.match(fed.content, /FILE_BODY_7/, 'the real tool result is present');
|
|
130
|
+
assert.match(fed.content, /POSTHOOK_SAW=read/, 'PostToolUse stdout was appended');
|
|
131
|
+
assert.match(fed.content, /UNTRUSTED_EXTERNAL_CONTENT/, 'hook feedback is fenced as untrusted');
|
|
132
|
+
} finally {
|
|
133
|
+
await mock.close();
|
|
134
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
135
|
+
}
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
// ---------------------------------------------------------------------------
|
|
139
|
+
// 3. UserPromptSubmit hook injects context before the loop
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
test('UserPromptSubmit hook stdout is injected into the conversation as context', async () => {
|
|
143
|
+
const hooks = { UserPromptSubmit: [{
|
|
144
|
+
type: 'command',
|
|
145
|
+
command: `${NODE} -e "process.stdout.write('INJECTED_CONTEXT_42')"`,
|
|
146
|
+
}] };
|
|
147
|
+
const mock = await startMockLLM();
|
|
148
|
+
mock.replyWith('Acknowledged.');
|
|
149
|
+
try {
|
|
150
|
+
const { runner } = buildRunner(mock.base, hooks);
|
|
151
|
+
const { cb } = collector();
|
|
152
|
+
const messages = [{ role: 'user', content: 'hello' }];
|
|
153
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
154
|
+
|
|
155
|
+
const injected = messages.find((m) => m.role === 'user' && /INJECTED_CONTEXT_42/.test(m.content));
|
|
156
|
+
assert.ok(injected, 'hook output was injected as a user message');
|
|
157
|
+
assert.match(injected.content, /UNTRUSTED_EXTERNAL_CONTENT/, 'injected context is fenced as untrusted');
|
|
158
|
+
} finally {
|
|
159
|
+
await mock.close();
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
// ---------------------------------------------------------------------------
|
|
164
|
+
// 4. A failing hook does not crash the loop
|
|
165
|
+
// ---------------------------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
test('a failing (non-zero, no-output) PostToolUse hook is contained — the loop completes normally', async () => {
|
|
168
|
+
const hooks = { PostToolUse: [{ type: 'command', command: `${NODE} -e "process.exit(3)"` }] };
|
|
169
|
+
const mock = await startMockLLM();
|
|
170
|
+
mock.replyWith('<exec>echo hi</exec>');
|
|
171
|
+
mock.replyWith('All good.');
|
|
172
|
+
try {
|
|
173
|
+
const { runner } = buildRunner(mock.base, hooks);
|
|
174
|
+
const { ev, cb } = collector();
|
|
175
|
+
const messages = [{ role: 'user', content: 'run it' }];
|
|
176
|
+
const { metrics } = await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
177
|
+
|
|
178
|
+
assert.strictEqual(ev.tools.length, 1, 'the tool still executed');
|
|
179
|
+
assert.strictEqual(metrics.turns.length, 2, 'tool turn + final turn — the loop did not crash');
|
|
180
|
+
assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'All good.'));
|
|
181
|
+
} finally {
|
|
182
|
+
await mock.close();
|
|
183
|
+
}
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
// ---------------------------------------------------------------------------
|
|
187
|
+
// 5. A deny-listed hook command is never executed
|
|
188
|
+
// ---------------------------------------------------------------------------
|
|
189
|
+
|
|
190
|
+
test('a deny-listed PreToolUse hook command is skipped (never run) and does not block the tool', async () => {
|
|
191
|
+
const hooks = { PreToolUse: [{ type: 'command', matcher: '*', command: 'rm -rf /' }] };
|
|
192
|
+
const mock = await startMockLLM();
|
|
193
|
+
mock.replyWith('<exec>echo ALLOWED</exec>');
|
|
194
|
+
mock.replyWith('Done.');
|
|
195
|
+
try {
|
|
196
|
+
const { runner } = buildRunner(mock.base, hooks);
|
|
197
|
+
const { ev, cb } = collector();
|
|
198
|
+
const messages = [{ role: 'user', content: 'go' }];
|
|
199
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
200
|
+
|
|
201
|
+
// The deny-listed hook is skipped, so the tool is NOT blocked and runs.
|
|
202
|
+
assert.strictEqual(ev.tools.length, 1, 'tool ran — a denied hook does not block');
|
|
203
|
+
const fed = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
|
|
204
|
+
assert.match(fed.content, /ALLOWED/);
|
|
205
|
+
assert.ok(!fed.content.includes('BLOCKED by a PreToolUse hook'), 'no spurious block');
|
|
206
|
+
} finally {
|
|
207
|
+
await mock.close();
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
// ---------------------------------------------------------------------------
|
|
212
|
+
// 6. Stop hook fires once when the turn ends
|
|
213
|
+
// ---------------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
test('Stop hook fires when the agent loop finishes the turn', async () => {
|
|
216
|
+
const dir = tmpdir();
|
|
217
|
+
const sentinel = path.join(dir, 'stopped.txt');
|
|
218
|
+
const prev = process.env.SEMALT_TEST_STOP_FILE;
|
|
219
|
+
process.env.SEMALT_TEST_STOP_FILE = sentinel;
|
|
220
|
+
const hooks = { Stop: [{
|
|
221
|
+
type: 'command',
|
|
222
|
+
command: `${NODE} -e "require('fs').writeFileSync(process.env.SEMALT_TEST_STOP_FILE,'x')"`,
|
|
223
|
+
}] };
|
|
224
|
+
const mock = await startMockLLM();
|
|
225
|
+
mock.replyWith('Final answer, no tools.');
|
|
226
|
+
try {
|
|
227
|
+
const { runner } = buildRunner(mock.base, hooks);
|
|
228
|
+
const { cb } = collector();
|
|
229
|
+
const messages = [{ role: 'user', content: 'just answer' }];
|
|
230
|
+
await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
|
|
231
|
+
assert.ok(fs.existsSync(sentinel), 'the Stop hook ran at end of turn');
|
|
232
|
+
} finally {
|
|
233
|
+
await mock.close();
|
|
234
|
+
if (prev === undefined) delete process.env.SEMALT_TEST_STOP_FILE;
|
|
235
|
+
else process.env.SEMALT_TEST_STOP_FILE = prev;
|
|
236
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
237
|
+
}
|
|
238
|
+
});
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Pre-Task 5.0a — verify + command-type hooks must run through the SAME OS
|
|
4
|
+
// sandbox as agentExecShell. Two layers of coverage:
|
|
5
|
+
//
|
|
6
|
+
// 1. Fallback rules (deterministic, no real bwrap needed): the shared detection
|
|
7
|
+
// cache is primed to "unavailable" so we assert the fail-safe path —
|
|
8
|
+
// failIfUnavailable hard error / no-approver refuse / approver-yes run —
|
|
9
|
+
// identically to test/sandbox-agent.test.js but for verify/hooks.
|
|
10
|
+
// 2. Kernel-level enforcement (REAL bwrap/sandbox-exec): a verify command and a
|
|
11
|
+
// command hook that write OUTSIDE the working dir are blocked by the OS.
|
|
12
|
+
// These SKIP gracefully when the primitive is absent on the runner.
|
|
13
|
+
|
|
14
|
+
const { test } = require('node:test');
|
|
15
|
+
const assert = require('node:assert');
|
|
16
|
+
const fs = require('fs');
|
|
17
|
+
const path = require('path');
|
|
18
|
+
|
|
19
|
+
const { createVerifyRunner } = require('../lib/verify');
|
|
20
|
+
const { createHookRunner } = require('../lib/hooks');
|
|
21
|
+
const { detectSandbox, _resetSandboxDetection } = require('../lib/sandbox');
|
|
22
|
+
|
|
23
|
+
const NODE = JSON.stringify(process.execPath);
|
|
24
|
+
|
|
25
|
+
// Force the shared detection cache to "unavailable" so the fallback tests are
|
|
26
|
+
// deterministic on ANY runner (mirrors test/sandbox-agent.test.js).
|
|
27
|
+
function primeUnavailable() {
|
|
28
|
+
_resetSandboxDetection();
|
|
29
|
+
detectSandbox({ platform: 'linux', which: () => null, readFile: () => 'Linux version 6.0', force: true });
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
// 1. Fallback rules — verify
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
test('verify: sandbox unavailable + auto + NO approver → REFUSED (never a silent unsandboxed run)', async () => {
|
|
37
|
+
primeUnavailable();
|
|
38
|
+
const runner = createVerifyRunner({
|
|
39
|
+
getConfig: () => ({ verify: { command: `${NODE} -e "process.exit(0)"` }, sandbox: { mode: 'auto' } }),
|
|
40
|
+
});
|
|
41
|
+
const res = await runner.run();
|
|
42
|
+
assert.strictEqual(res.ran, false, 'the command was never executed');
|
|
43
|
+
assert.strictEqual(res.passed, false, 'a refused verify cannot pass');
|
|
44
|
+
assert.match(res.output, /refused to run unsandboxed/i);
|
|
45
|
+
assert.match(res.fenced, /UNTRUSTED_EXTERNAL_CONTENT/);
|
|
46
|
+
_resetSandboxDetection();
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test('verify: sandbox unavailable + failIfUnavailable → hard error (non-passing, names the gate)', async () => {
|
|
50
|
+
primeUnavailable();
|
|
51
|
+
const runner = createVerifyRunner({
|
|
52
|
+
getConfig: () => ({ verify: { command: `${NODE} -e "process.exit(0)"` }, sandbox: { mode: 'auto', failIfUnavailable: true } }),
|
|
53
|
+
});
|
|
54
|
+
const res = await runner.run();
|
|
55
|
+
assert.strictEqual(res.ran, false);
|
|
56
|
+
assert.strictEqual(res.passed, false);
|
|
57
|
+
assert.match(res.output, /failIfUnavailable/);
|
|
58
|
+
_resetSandboxDetection();
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('verify: sandbox unavailable + human approver says YES → runs unsandboxed and can pass', async () => {
|
|
62
|
+
primeUnavailable();
|
|
63
|
+
let asked = null;
|
|
64
|
+
const runner = createVerifyRunner({
|
|
65
|
+
getConfig: () => ({ verify: { command: `${NODE} -e "process.exit(0)"` }, sandbox: { mode: 'auto' } }),
|
|
66
|
+
onUnsandboxed: async (info) => { asked = info; return true; },
|
|
67
|
+
});
|
|
68
|
+
const res = await runner.run();
|
|
69
|
+
assert.ok(asked && /bwrap|bubblewrap|not found/i.test(asked.reason), 'approver receives the reason');
|
|
70
|
+
assert.strictEqual(res.ran, true);
|
|
71
|
+
assert.strictEqual(res.passed, true, 'exit 0 passes when the human approved an unsandboxed run');
|
|
72
|
+
_resetSandboxDetection();
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test('verify: deny-list still fires BEFORE the sandbox layer (defense in depth)', async () => {
|
|
76
|
+
primeUnavailable(); // would refuse anyway — but the deny-list must win first
|
|
77
|
+
let sandboxCalls = 0;
|
|
78
|
+
const runner = createVerifyRunner({
|
|
79
|
+
getConfig: () => ({ verify: { command: 'rm -rf /' }, sandbox: { mode: 'auto' } }),
|
|
80
|
+
sandbox: () => { sandboxCalls++; return { run: true, useShell: true, file: 'x', args: [], sandbox: 'off' }; },
|
|
81
|
+
});
|
|
82
|
+
const res = await runner.run();
|
|
83
|
+
assert.ok(res.denied, 'deny-list label recorded');
|
|
84
|
+
assert.strictEqual(res.ran, false);
|
|
85
|
+
assert.strictEqual(sandboxCalls, 0, 'a deny-listed command never reaches the sandbox resolver');
|
|
86
|
+
_resetSandboxDetection();
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
// 1. Fallback rules — command hooks
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
test('hook (command): sandbox unavailable + auto + NO approver → NOT run, contained (does not block)', async () => {
|
|
94
|
+
primeUnavailable();
|
|
95
|
+
const logs = [];
|
|
96
|
+
const runner = createHookRunner({
|
|
97
|
+
getConfig: () => ({ hooks: { PreToolUse: [{ type: 'command', command: `${NODE} -e "process.exit(1)"` }] }, sandbox: { mode: 'auto' } }),
|
|
98
|
+
log: (m) => logs.push(m),
|
|
99
|
+
});
|
|
100
|
+
const r = await runner.run('PreToolUse', { tool: 'shell' });
|
|
101
|
+
assert.strictEqual(r.blocked, false, 'a refused hook does not block the tool (contained like a timeout)');
|
|
102
|
+
assert.strictEqual(r.ran[0].ok, false);
|
|
103
|
+
assert.match(r.ran[0].error, /refused to run unsandboxed/i);
|
|
104
|
+
assert.ok(logs.some((l) => /not run/i.test(l)));
|
|
105
|
+
_resetSandboxDetection();
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test('hook (command): deny-listed command never reaches the sandbox resolver', async () => {
|
|
109
|
+
primeUnavailable();
|
|
110
|
+
let sandboxCalls = 0;
|
|
111
|
+
const runner = createHookRunner({
|
|
112
|
+
getConfig: () => ({ hooks: { PreToolUse: [{ type: 'command', command: 'rm -rf /' }] }, sandbox: { mode: 'auto' } }),
|
|
113
|
+
sandbox: () => { sandboxCalls++; return { run: true, useShell: true, file: 'x', args: [], sandbox: 'off' }; },
|
|
114
|
+
});
|
|
115
|
+
const r = await runner.run('PreToolUse', { tool: 'shell' });
|
|
116
|
+
assert.strictEqual(sandboxCalls, 0, 'deny-list short-circuits before the sandbox');
|
|
117
|
+
assert.ok(r.ran[0].denied);
|
|
118
|
+
_resetSandboxDetection();
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
test('hook (prompt): unaffected by the sandbox (no shell, just injects text)', async () => {
|
|
122
|
+
primeUnavailable(); // even with an unavailable sandbox, a prompt hook still injects
|
|
123
|
+
const runner = createHookRunner({
|
|
124
|
+
getConfig: () => ({ hooks: { UserPromptSubmit: [{ type: 'prompt', prompt: 'Mind the style guide.' }] }, sandbox: { mode: 'auto' } }),
|
|
125
|
+
});
|
|
126
|
+
const r = await runner.run('UserPromptSubmit', { prompt: 'go' });
|
|
127
|
+
assert.strictEqual(r.feedback.length, 1, 'prompt hook injects regardless of sandbox availability');
|
|
128
|
+
assert.match(r.feedback[0], /Mind the style guide/);
|
|
129
|
+
_resetSandboxDetection();
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
// 2. Kernel-level enforcement (REAL bwrap / sandbox-exec) — skip gracefully
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
function realDetect() {
|
|
137
|
+
_resetSandboxDetection();
|
|
138
|
+
return detectSandbox({ force: true });
|
|
139
|
+
}
|
|
140
|
+
const det = realDetect();
|
|
141
|
+
const SKIP = det.available ? false : `OS sandbox tool unavailable on this runner (${det.reason || det.platform})`;
|
|
142
|
+
|
|
143
|
+
// A target OUTSIDE the working dir AND outside the OS temp dir (both are writable
|
|
144
|
+
// roots in the real wrap). The repo parent satisfies both — the sandbox must
|
|
145
|
+
// block a write there. Cleaned up regardless of outcome.
|
|
146
|
+
function outsideTarget(tag) {
|
|
147
|
+
return path.join(path.dirname(process.cwd()), `semalt-sbx-${tag}-${process.pid}.txt`);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Quote-free shell redirects keep the command robust under the sh -c wrapper, so
|
|
151
|
+
// a non-zero exit is unambiguously the SANDBOX denying the write — not a parsing
|
|
152
|
+
// artifact. The target paths contain no spaces/quotes.
|
|
153
|
+
test('verify (REAL jail): a command writing OUTSIDE the working dir is blocked by the kernel', { skip: SKIP }, async () => {
|
|
154
|
+
realDetect();
|
|
155
|
+
const escape = outsideTarget('verify-escape');
|
|
156
|
+
try { fs.unlinkSync(escape); } catch {}
|
|
157
|
+
const runner = createVerifyRunner({ getConfig: () => ({ verify: { command: `echo pwned > ${escape}` }, sandbox: { mode: 'auto' } }) });
|
|
158
|
+
try {
|
|
159
|
+
const res = await runner.run();
|
|
160
|
+
assert.strictEqual(res.passed, false, 'the out-of-CWD write must fail under the jail');
|
|
161
|
+
assert.ok(!fs.existsSync(escape), 'the out-of-jail file must NOT have been created');
|
|
162
|
+
} finally {
|
|
163
|
+
try { fs.unlinkSync(escape); } catch {}
|
|
164
|
+
_resetSandboxDetection();
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
test('hook (REAL jail): a command hook writing OUTSIDE the working dir is blocked by the kernel', { skip: SKIP }, async () => {
|
|
169
|
+
realDetect();
|
|
170
|
+
const escape = outsideTarget('hook-escape');
|
|
171
|
+
try { fs.unlinkSync(escape); } catch {}
|
|
172
|
+
const runner = createHookRunner({ getConfig: () => ({ hooks: { PostToolUse: [{ type: 'command', command: `echo pwned > ${escape}` }] }, sandbox: { mode: 'auto' } }) });
|
|
173
|
+
try {
|
|
174
|
+
const r = await runner.run('PostToolUse', { tool: 'shell', result: 'x' });
|
|
175
|
+
assert.strictEqual(r.ran[0].ok, false, 'the hook command must fail under the jail (out-of-CWD write blocked)');
|
|
176
|
+
assert.ok(!fs.existsSync(escape), 'the out-of-jail file must NOT have been created');
|
|
177
|
+
} finally {
|
|
178
|
+
try { fs.unlinkSync(escape); } catch {}
|
|
179
|
+
_resetSandboxDetection();
|
|
180
|
+
}
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
test('verify (REAL jail): a command writing to the OS temp dir (a writable root) still passes', { skip: SKIP }, async () => {
|
|
184
|
+
realDetect();
|
|
185
|
+
const os = require('os');
|
|
186
|
+
const inside = path.join(os.tmpdir(), `semalt-sbx-verify-ok-${process.pid}.txt`);
|
|
187
|
+
try { fs.unlinkSync(inside); } catch {}
|
|
188
|
+
const runner = createVerifyRunner({ getConfig: () => ({ verify: { command: `echo ok > ${inside}` }, sandbox: { mode: 'auto' } }) });
|
|
189
|
+
try {
|
|
190
|
+
const res = await runner.run();
|
|
191
|
+
assert.strictEqual(res.passed, true, 'a write to a writable root succeeds under the jail');
|
|
192
|
+
assert.ok(fs.existsSync(inside), 'the in-jail file was written');
|
|
193
|
+
} finally {
|
|
194
|
+
try { fs.unlinkSync(inside); } catch {}
|
|
195
|
+
_resetSandboxDetection();
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
// ---------------------------------------------------------------------------
|
|
200
|
+
// 3. Binary network isolation reaches verify + hooks too (Task 4.4b).
|
|
201
|
+
// ---------------------------------------------------------------------------
|
|
202
|
+
//
|
|
203
|
+
// sandbox.network: 'off' must apply through the SAME shared shim to verify and
|
|
204
|
+
// command hooks — not just the agent's shell tool. Gated to bwrap, where the
|
|
205
|
+
// no-network jail is observable by counting non-loopback interfaces. A node
|
|
206
|
+
// one-liner exits 0 when it sees NO network (only loopback), non-zero otherwise.
|
|
207
|
+
const NET_SKIP = (det.available && det.tool === 'bwrap') ? false
|
|
208
|
+
: `network-isolation kernel test needs bwrap (got ${det.tool || det.platform})`;
|
|
209
|
+
const NONET_PROBE = `${NODE} -e 'const i=require("os").networkInterfaces();const n=Object.keys(i).filter(x=>x!=="lo"&&x!=="lo0");process.exit(n.length?5:0)'`;
|
|
210
|
+
|
|
211
|
+
test('verify (REAL jail): sandbox.network off runs the verify command with NO network', { skip: NET_SKIP }, async () => {
|
|
212
|
+
realDetect();
|
|
213
|
+
// expected_exit_code 0 == "the probe saw no network" ⇒ passing proves the verify
|
|
214
|
+
// shell ran kernel-isolated from the network.
|
|
215
|
+
const runner = createVerifyRunner({ getConfig: () => ({ verify: { command: NONET_PROBE }, sandbox: { mode: 'auto', network: 'off' } }) });
|
|
216
|
+
try {
|
|
217
|
+
const res = await runner.run();
|
|
218
|
+
assert.strictEqual(res.ran, true);
|
|
219
|
+
assert.strictEqual(res.passed, true, 'the verify command observed no network (exit 0) under the no-network jail');
|
|
220
|
+
assert.strictEqual(res.exitCode, 0);
|
|
221
|
+
} finally { _resetSandboxDetection(); }
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
test('hook (REAL jail): sandbox.network off runs the command hook with NO network', { skip: NET_SKIP }, async () => {
|
|
225
|
+
realDetect();
|
|
226
|
+
const runner = createHookRunner({ getConfig: () => ({ hooks: { PostToolUse: [{ type: 'command', command: NONET_PROBE }] }, sandbox: { mode: 'auto', network: 'off' } }) });
|
|
227
|
+
try {
|
|
228
|
+
const r = await runner.run('PostToolUse', { tool: 'shell', result: 'x' });
|
|
229
|
+
assert.strictEqual(r.ran[0].ok, true, 'the hook command observed no network (exit 0) under the no-network jail');
|
|
230
|
+
assert.strictEqual(r.ran[0].exitCode, 0, 'no non-loopback interface inside the no-network jail');
|
|
231
|
+
} finally { _resetSandboxDetection(); }
|
|
232
|
+
});
|