@semalt-ai/code 1.8.5 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +6 -1
- package/.github/workflows/ci.yml +69 -0
- package/CLAUDE.md +1584 -26
- package/README.md +147 -3
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +711 -104
- package/lib/api.js +213 -49
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +306 -0
- package/lib/commands/chat-slash.js +399 -0
- package/lib/commands/chat-turn.js +446 -0
- package/lib/commands/chat.js +403 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +333 -11
- package/lib/constants.js +372 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +167 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +264 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +100 -10
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +84 -5
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2558 -0
- package/lib/tool_specs.js +222 -2
- package/lib/tools.js +272 -1020
- package/lib/ui/format.js +22 -1
- package/lib/ui/input-field.js +16 -7
- package/lib/ui/status-bar.js +79 -11
- package/lib/ui/theme.js +1 -0
- package/lib/ui/web-activity.js +218 -0
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/background.test.js +414 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/executors.test.js +362 -0
- package/test/extract-tool-calls.test.js +315 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +142 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +203 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/max-iterations.test.js +216 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +356 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +163 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/result-cap.test.js +233 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/stream-parser.test.js +147 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/web-activity-ordering.test.js +194 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Tests for background tasks (Task 5.3) — detached agent processes + the task
|
|
4
|
+
// registry. Coverage:
|
|
5
|
+
// * task store CRUD: create / patch / events / result / list ordering;
|
|
6
|
+
// * validation BEFORE detach surfaces config/policy/sandbox errors, and a
|
|
7
|
+
// validation failure spawns NO process (no orphan);
|
|
8
|
+
// * launchBackground writes the spec + registry record and detaches via an
|
|
9
|
+
// injected spawn; sandbox defaults ON in the spec, opt-out is explicit;
|
|
10
|
+
// * runBackgroundChild (REAL createAgent ↔ mock-LLM) runs to completion and
|
|
11
|
+
// writes the result envelope;
|
|
12
|
+
// * SAFE POSTURE: no policy → a mutating write is refused (paired with: an
|
|
13
|
+
// allow rule lets it proceed);
|
|
14
|
+
// * deny-list stays active in the background process;
|
|
15
|
+
// * lifecycle reconciliation: a dead "running" task is detected as stale and
|
|
16
|
+
// prunable; killTask tree-kills + marks terminated;
|
|
17
|
+
// * a REAL detached process is tree-killable by PID;
|
|
18
|
+
// * TOOL-EXPOSURE DECISION: background-launch is NOT an agent tool.
|
|
19
|
+
|
|
20
|
+
const { test, before, after } = require('node:test');
|
|
21
|
+
const assert = require('node:assert');
|
|
22
|
+
const fs = require('fs');
|
|
23
|
+
const os = require('os');
|
|
24
|
+
const path = require('path');
|
|
25
|
+
|
|
26
|
+
const bg = require('../lib/background');
|
|
27
|
+
const proc = require('../lib/proc');
|
|
28
|
+
const { startMockLLM } = require('./harness/mock-llm');
|
|
29
|
+
|
|
30
|
+
let prevKey;
|
|
31
|
+
let prevCwd;
|
|
32
|
+
let tmpCwd;
|
|
33
|
+
let roots = [];
|
|
34
|
+
|
|
35
|
+
function freshRoot() {
|
|
36
|
+
const d = fs.mkdtempSync(path.join(os.tmpdir(), 'bg-tasks-'));
|
|
37
|
+
roots.push(d);
|
|
38
|
+
return d;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
42
|
+
|
|
43
|
+
before(() => {
|
|
44
|
+
prevKey = process.env.SEMALT_API_KEY;
|
|
45
|
+
process.env.SEMALT_API_KEY = 'test-key';
|
|
46
|
+
prevCwd = process.cwd();
|
|
47
|
+
tmpCwd = fs.mkdtempSync(path.join(os.tmpdir(), 'bg-cwd-'));
|
|
48
|
+
process.chdir(tmpCwd);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
after(() => {
|
|
52
|
+
if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
|
|
53
|
+
else process.env.SEMALT_API_KEY = prevKey;
|
|
54
|
+
process.chdir(prevCwd);
|
|
55
|
+
for (const d of roots.concat([tmpCwd])) {
|
|
56
|
+
try { fs.rmSync(d, { recursive: true, force: true }); } catch {}
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// --------------------------------------------------------------------------
|
|
61
|
+
// Task store
|
|
62
|
+
// --------------------------------------------------------------------------
|
|
63
|
+
test('task store: create writes spec + meta, patch/events/result round-trip, list newest-first', () => {
|
|
64
|
+
let clock = 1000;
|
|
65
|
+
const store = bg.createTaskStore({ rootDir: freshRoot(), now: () => clock });
|
|
66
|
+
|
|
67
|
+
const a = store.genId();
|
|
68
|
+
store.create({ id: a, spec: { prompt: 'first', model: 'm' }, prompt: 'first task', model: 'm' });
|
|
69
|
+
assert.deepStrictEqual(store.readSpec(a), { prompt: 'first', model: 'm' });
|
|
70
|
+
let meta = store.readMeta(a);
|
|
71
|
+
assert.strictEqual(meta.status, 'starting');
|
|
72
|
+
assert.strictEqual(meta.model, 'm');
|
|
73
|
+
assert.strictEqual(meta.prompt_summary, 'first task');
|
|
74
|
+
|
|
75
|
+
store.patchMeta(a, { pid: 4242, status: 'running' });
|
|
76
|
+
meta = store.readMeta(a);
|
|
77
|
+
assert.strictEqual(meta.pid, 4242);
|
|
78
|
+
assert.strictEqual(meta.status, 'running');
|
|
79
|
+
|
|
80
|
+
store.appendEvent(a, { type: 'status', status: 'running' });
|
|
81
|
+
store.appendEvent(a, { type: 'tool', tag: 'read', ms: 3 });
|
|
82
|
+
const events = store.readEvents(a);
|
|
83
|
+
assert.strictEqual(events.length, 2);
|
|
84
|
+
assert.strictEqual(events[1].tag, 'read');
|
|
85
|
+
assert.ok(events[0].ts, 'events carry a timestamp');
|
|
86
|
+
|
|
87
|
+
const envelope = { result: 'ok', toolCalls: [], usage: {}, cost: null, stopReason: 'end_turn', verifyStatus: 'skipped' };
|
|
88
|
+
store.writeResult(a, envelope);
|
|
89
|
+
assert.deepStrictEqual(store.readResult(a), envelope);
|
|
90
|
+
|
|
91
|
+
clock = 2000;
|
|
92
|
+
const b = store.genId();
|
|
93
|
+
store.create({ id: b, spec: { prompt: 'second' }, prompt: 'second task' });
|
|
94
|
+
const list = store.list();
|
|
95
|
+
assert.strictEqual(list.length, 2);
|
|
96
|
+
assert.strictEqual(list[0].id, b, 'newest task first');
|
|
97
|
+
assert.strictEqual(list[1].id, a);
|
|
98
|
+
|
|
99
|
+
assert.strictEqual(store.remove(a), true);
|
|
100
|
+
assert.strictEqual(store.readMeta(a), null);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// --------------------------------------------------------------------------
|
|
104
|
+
// Validation (before detach)
|
|
105
|
+
// --------------------------------------------------------------------------
|
|
106
|
+
test('validateLaunch flags empty prompt, missing model, malformed policy, and unavailable strict sandbox', async () => {
|
|
107
|
+
const okConfig = { api_base: 'http://x', default_model: 'm' };
|
|
108
|
+
|
|
109
|
+
assert.deepStrictEqual(await bg.validateLaunch({ prompt: 'hi', config: okConfig, policy: {} }), [], 'valid launch → no errors');
|
|
110
|
+
|
|
111
|
+
const e1 = await bg.validateLaunch({ prompt: ' ', config: okConfig });
|
|
112
|
+
assert.ok(e1.some((m) => /prompt is empty/.test(m)));
|
|
113
|
+
|
|
114
|
+
const e2 = await bg.validateLaunch({ prompt: 'hi', config: { api_base: 'http://x' } });
|
|
115
|
+
assert.ok(e2.some((m) => /no model/.test(m)), 'missing model surfaced');
|
|
116
|
+
|
|
117
|
+
const e3 = await bg.validateLaunch({ prompt: 'hi', config: okConfig, policy: { rules: [{ tool: 'shell', action: 'banana' }] } });
|
|
118
|
+
assert.ok(e3.some((m) => /action must be one of/.test(m)), 'bad rule action surfaced');
|
|
119
|
+
|
|
120
|
+
const e3b = await bg.validateLaunch({ prompt: 'hi', config: okConfig, policy: { rules: [{ tool: 'shell', pattern: 'a', path: 'b', action: 'allow' }] } });
|
|
121
|
+
assert.ok(e3b.some((m) => /more than one matcher/.test(m)), 'multi-matcher rule surfaced');
|
|
122
|
+
|
|
123
|
+
const e4 = await bg.validateLaunch({
|
|
124
|
+
prompt: 'hi', config: okConfig,
|
|
125
|
+
sandboxConfig: { mode: 'auto', failIfUnavailable: true },
|
|
126
|
+
detection: { available: false, reason: 'no bwrap' },
|
|
127
|
+
});
|
|
128
|
+
assert.ok(e4.some((m) => /sandbox unavailable/.test(m)), 'strict sandbox unavailable surfaced');
|
|
129
|
+
|
|
130
|
+
// Same sandbox unavailable but NOT strict → not a launch error (fail-safe at runtime).
|
|
131
|
+
const e5 = await bg.validateLaunch({
|
|
132
|
+
prompt: 'hi', config: okConfig,
|
|
133
|
+
sandboxConfig: { mode: 'auto', failIfUnavailable: false },
|
|
134
|
+
detection: { available: false, reason: 'no bwrap' },
|
|
135
|
+
});
|
|
136
|
+
assert.deepStrictEqual(e5, [], 'non-strict unavailable sandbox is not a launch error');
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
test('launchBackground: validation failure throws BEFORE detach — no process spawned', async () => {
|
|
140
|
+
let spawned = 0;
|
|
141
|
+
const spawn = () => { spawned++; return { pid: 1, unref() {} }; };
|
|
142
|
+
const store = bg.createTaskStore({ rootDir: freshRoot() });
|
|
143
|
+
await assert.rejects(
|
|
144
|
+
() => bg.launchBackground({ prompt: '', config: { api_base: 'http://x', default_model: 'm' }, store, spawn }),
|
|
145
|
+
/Cannot launch background task/,
|
|
146
|
+
);
|
|
147
|
+
assert.strictEqual(spawned, 0, 'no child spawned on validation failure (no orphan)');
|
|
148
|
+
assert.strictEqual(store.list().length, 0, 'no registry record created on validation failure');
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
test('launchBackground: persists spec + record, detaches, records pid; sandbox defaults ON, opt-out explicit', async () => {
|
|
152
|
+
const calls = [];
|
|
153
|
+
const spawn = (cmd, args, opts) => { calls.push({ cmd, args, opts }); return { pid: 9999, unref() {} }; };
|
|
154
|
+
|
|
155
|
+
// Default sandbox (auto) — ON in the spec.
|
|
156
|
+
const store1 = bg.createTaskStore({ rootDir: freshRoot() });
|
|
157
|
+
const r1 = await bg.launchBackground({
|
|
158
|
+
prompt: 'do a thing', config: { api_base: 'http://x', default_model: 'm', sandbox: { mode: 'auto' } },
|
|
159
|
+
store: store1, spawn, resolveKey: () => 'secret-key',
|
|
160
|
+
});
|
|
161
|
+
assert.ok(r1.id && r1.pid === 9999);
|
|
162
|
+
const spec1 = store1.readSpec(r1.id);
|
|
163
|
+
assert.strictEqual(spec1.sandbox.mode, 'auto', 'sandbox ON by default in the spec');
|
|
164
|
+
assert.strictEqual(spec1.apiBase, 'http://x');
|
|
165
|
+
assert.ok(!('apiKey' in spec1) && !JSON.stringify(spec1).includes('secret-key'), 'API key never written to the spec');
|
|
166
|
+
const meta1 = store1.readMeta(r1.id);
|
|
167
|
+
assert.strictEqual(meta1.pid, 9999);
|
|
168
|
+
assert.strictEqual(meta1.status, 'running');
|
|
169
|
+
// Child argv targets the internal __bg-exec entry with the task dir.
|
|
170
|
+
const launched = calls.find((c) => c.args.includes('__bg-exec'));
|
|
171
|
+
assert.ok(launched, '__bg-exec child launched');
|
|
172
|
+
assert.ok(launched.args.includes(store1.dir(r1.id)), 'task dir passed to child');
|
|
173
|
+
assert.strictEqual(launched.opts.detached, true, 'child detached');
|
|
174
|
+
assert.strictEqual(launched.opts.env.SEMALT_API_KEY, 'secret-key', 'key passed via env, not disk');
|
|
175
|
+
assert.ok(!launched.args.includes('--dangerously-skip-permissions'), 'no skip flag by default');
|
|
176
|
+
|
|
177
|
+
// Explicit opt-out: sandbox off + skip-permissions propagated.
|
|
178
|
+
const store2 = bg.createTaskStore({ rootDir: freshRoot() });
|
|
179
|
+
const r2 = await bg.launchBackground({
|
|
180
|
+
prompt: 'danger', config: { api_base: 'http://x', default_model: 'm', sandbox: { mode: 'off' } },
|
|
181
|
+
policy: bg.buildPolicy({ dangerouslySkipPermissions: true }),
|
|
182
|
+
store: store2, spawn,
|
|
183
|
+
});
|
|
184
|
+
assert.strictEqual(store2.readSpec(r2.id).sandbox.mode, 'off', 'explicit sandbox off honored');
|
|
185
|
+
const launched2 = calls.filter((c) => c.args.includes('__bg-exec')).pop();
|
|
186
|
+
assert.ok(launched2.args.includes('--dangerously-skip-permissions'), 'skip flag propagated to child argv');
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
// --------------------------------------------------------------------------
|
|
190
|
+
// Child execution (real createAgent ↔ mock-LLM)
|
|
191
|
+
// --------------------------------------------------------------------------
|
|
192
|
+
function specFor(store, id, mock, extra = {}) {
|
|
193
|
+
store.create({
|
|
194
|
+
id,
|
|
195
|
+
prompt: extra.prompt || 'do it',
|
|
196
|
+
model: 'test-model',
|
|
197
|
+
spec: {
|
|
198
|
+
version: 1,
|
|
199
|
+
prompt: extra.prompt || 'do it',
|
|
200
|
+
apiBase: mock.base,
|
|
201
|
+
model: 'test-model',
|
|
202
|
+
contextLength: null,
|
|
203
|
+
maxIterations: 50,
|
|
204
|
+
cwd: tmpCwd,
|
|
205
|
+
policy: extra.policy || bg.buildPolicy({}),
|
|
206
|
+
sandbox: { mode: 'off', failIfUnavailable: false },
|
|
207
|
+
},
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
test('runBackgroundChild: completes and writes the result envelope', async () => {
|
|
212
|
+
const mock = await startMockLLM();
|
|
213
|
+
mock.replyWith('All done — 42.');
|
|
214
|
+
const store = bg.createTaskStore({ rootDir: freshRoot() });
|
|
215
|
+
const id = store.genId();
|
|
216
|
+
specFor(store, id, mock, { prompt: 'answer please' });
|
|
217
|
+
try {
|
|
218
|
+
const r = await bg.runBackgroundChild({ taskDir: store.dir(id), store });
|
|
219
|
+
assert.strictEqual(r.status, 'completed');
|
|
220
|
+
const result = store.readResult(id);
|
|
221
|
+
assert.ok(result, 'result.json written');
|
|
222
|
+
assert.match(result.result, /42/);
|
|
223
|
+
assert.strictEqual(result.stopReason, 'end_turn');
|
|
224
|
+
assert.strictEqual(result.verifyStatus, 'skipped');
|
|
225
|
+
assert.ok(Array.isArray(result.toolCalls));
|
|
226
|
+
const meta = store.readMeta(id);
|
|
227
|
+
assert.strictEqual(meta.status, 'completed');
|
|
228
|
+
assert.ok(meta.finished_at, 'finished_at recorded');
|
|
229
|
+
const events = store.readEvents(id);
|
|
230
|
+
assert.ok(events.some((e) => e.type === 'result'), 'result event appended');
|
|
231
|
+
} finally {
|
|
232
|
+
await mock.close();
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
test('SAFE POSTURE: no policy → a mutating write is REFUSED (paired with: an allow rule permits it)', async () => {
|
|
237
|
+
// Refuse case — empty policy.
|
|
238
|
+
const mock1 = await startMockLLM();
|
|
239
|
+
mock1.replyWith('<write_file path="bg-should-not-exist.txt">secret</write_file>');
|
|
240
|
+
const store1 = bg.createTaskStore({ rootDir: freshRoot() });
|
|
241
|
+
const id1 = store1.genId();
|
|
242
|
+
specFor(store1, id1, mock1, { policy: bg.buildPolicy({}) });
|
|
243
|
+
try {
|
|
244
|
+
await bg.runBackgroundChild({ taskDir: store1.dir(id1), store: store1 });
|
|
245
|
+
assert.strictEqual(fs.existsSync(path.join(tmpCwd, 'bg-should-not-exist.txt')), false, 'write refused with no policy');
|
|
246
|
+
} finally {
|
|
247
|
+
await mock1.close();
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Paired positive — an allow rule lets the same write proceed.
|
|
251
|
+
const mock2 = await startMockLLM();
|
|
252
|
+
mock2.replyWith('<write_file path="bg-allowed.txt">hello bg</write_file>');
|
|
253
|
+
mock2.replyWith('Wrote it.');
|
|
254
|
+
const store2 = bg.createTaskStore({ rootDir: freshRoot() });
|
|
255
|
+
const id2 = store2.genId();
|
|
256
|
+
specFor(store2, id2, mock2, { policy: bg.buildPolicy({ rules: [{ tool: 'write_file', path: '**', action: 'allow' }] }) });
|
|
257
|
+
try {
|
|
258
|
+
const r = await bg.runBackgroundChild({ taskDir: store2.dir(id2), store: store2 });
|
|
259
|
+
assert.strictEqual(r.status, 'completed');
|
|
260
|
+
const target = path.join(tmpCwd, 'bg-allowed.txt');
|
|
261
|
+
assert.strictEqual(fs.existsSync(target), true, 'allow rule permitted the write');
|
|
262
|
+
assert.strictEqual(fs.readFileSync(target, 'utf8'), 'hello bg');
|
|
263
|
+
} finally {
|
|
264
|
+
await mock2.close();
|
|
265
|
+
}
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
test('deny-list stays active in the background process', async () => {
|
|
269
|
+
const mock = await startMockLLM();
|
|
270
|
+
mock.replyWith('<exec>rm -rf /</exec>');
|
|
271
|
+
mock.replyWith('Stopped.');
|
|
272
|
+
const store = bg.createTaskStore({ rootDir: freshRoot() });
|
|
273
|
+
const id = store.genId();
|
|
274
|
+
// Allow the exec tier so the GATE passes — the deny-list (which a tier can't
|
|
275
|
+
// bypass) must still block the destructive command.
|
|
276
|
+
specFor(store, id, mock, { policy: bg.buildPolicy({ allowedTiers: ['exec'] }) });
|
|
277
|
+
try {
|
|
278
|
+
await bg.runBackgroundChild({ taskDir: store.dir(id), store });
|
|
279
|
+
const events = store.readEvents(id);
|
|
280
|
+
const blocked = events.some((e) => e.type === 'tool' && e.ok === false && /deny-list/i.test(e.detail || ''));
|
|
281
|
+
assert.ok(blocked, 'rm -rf / blocked by the deny-list inside the background process');
|
|
282
|
+
} finally {
|
|
283
|
+
await mock.close();
|
|
284
|
+
}
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
// --------------------------------------------------------------------------
|
|
288
|
+
// Lifecycle reconciliation + kill
|
|
289
|
+
// --------------------------------------------------------------------------
|
|
290
|
+
test('stale detection + prune: a dead "running" task is stale and prunable; a live one is kept', () => {
|
|
291
|
+
const store = bg.createTaskStore({ rootDir: freshRoot() });
|
|
292
|
+
const dead = store.genId();
|
|
293
|
+
store.create({ id: dead, spec: {}, prompt: 'dead' });
|
|
294
|
+
store.patchMeta(dead, { pid: 424242, status: 'running' });
|
|
295
|
+
const live = store.genId();
|
|
296
|
+
store.create({ id: live, spec: {}, prompt: 'live' });
|
|
297
|
+
store.patchMeta(live, { pid: process.pid, status: 'running' }); // our own pid = alive
|
|
298
|
+
const done = store.genId();
|
|
299
|
+
store.create({ id: done, spec: {}, prompt: 'done' });
|
|
300
|
+
store.patchMeta(done, { status: 'completed' });
|
|
301
|
+
|
|
302
|
+
const alive = (pid) => pid === process.pid;
|
|
303
|
+
assert.strictEqual(bg.effectiveStatus(store.readMeta(dead), alive), 'stale');
|
|
304
|
+
assert.strictEqual(bg.effectiveStatus(store.readMeta(live), alive), 'running');
|
|
305
|
+
assert.strictEqual(bg.effectiveStatus(store.readMeta(done), alive), 'completed');
|
|
306
|
+
|
|
307
|
+
const prunable = bg.prunableIds(store.list(), alive).sort();
|
|
308
|
+
assert.deepStrictEqual(prunable.sort(), [dead, done].sort(), 'stale + completed are prunable; live is kept');
|
|
309
|
+
|
|
310
|
+
const list = bg.formatTaskList(store.list(), { alive });
|
|
311
|
+
assert.match(list, /stale task\(s\)/, 'list warns about stale tasks');
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
test('killTask: tree-kills the recorded pid then marks terminated', async () => {
|
|
315
|
+
const store = bg.createTaskStore({ rootDir: freshRoot() });
|
|
316
|
+
const id = store.genId();
|
|
317
|
+
store.create({ id, spec: {}, prompt: 'runaway' });
|
|
318
|
+
store.patchMeta(id, { pid: 555, status: 'running' });
|
|
319
|
+
|
|
320
|
+
const killed = [];
|
|
321
|
+
let aliveCalls = 0;
|
|
322
|
+
// Alive for the first check (so a SIGTERM is sent), dead after the grace wait.
|
|
323
|
+
const alive = () => { aliveCalls++; return aliveCalls < 2; };
|
|
324
|
+
const r = await bg.killTask(store, id, {
|
|
325
|
+
alive,
|
|
326
|
+
kill: (pid, sig) => { killed.push([pid, sig]); return true; },
|
|
327
|
+
delay: () => Promise.resolve(),
|
|
328
|
+
});
|
|
329
|
+
assert.ok(r.ok);
|
|
330
|
+
assert.deepStrictEqual(killed[0], [555, 'SIGTERM'], 'SIGTERM sent to the pid');
|
|
331
|
+
assert.strictEqual(store.readMeta(id).status, 'terminated');
|
|
332
|
+
assert.ok(store.readMeta(id).finished_at, 'finished_at recorded on kill');
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
test('killTask: a task whose process already died is finalized, not re-killed', async () => {
|
|
336
|
+
const store = bg.createTaskStore({ rootDir: freshRoot() });
|
|
337
|
+
const id = store.genId();
|
|
338
|
+
store.create({ id, spec: {}, prompt: 'gone' });
|
|
339
|
+
store.patchMeta(id, { pid: 4242, status: 'running' });
|
|
340
|
+
let killCalls = 0;
|
|
341
|
+
const r = await bg.killTask(store, id, { alive: () => false, kill: () => { killCalls++; }, delay: () => Promise.resolve() });
|
|
342
|
+
assert.ok(r.ok);
|
|
343
|
+
assert.strictEqual(killCalls, 0, 'no signal sent to an already-dead process');
|
|
344
|
+
assert.strictEqual(store.readMeta(id).status, 'terminated');
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
test('a REAL detached process is alive then tree-killable by PID', async () => {
|
|
348
|
+
const { spawn } = require('child_process');
|
|
349
|
+
const child = proc.spawnDetached(spawn, process.execPath, ['-e', 'setInterval(() => {}, 1e9)'], { cwd: tmpCwd });
|
|
350
|
+
child.unref();
|
|
351
|
+
const pid = child.pid;
|
|
352
|
+
assert.ok(proc.isProcessAlive(pid), 'detached child is alive');
|
|
353
|
+
proc.killTreeByPid(pid, 'SIGKILL');
|
|
354
|
+
// Poll for death (tree-kill is async at the OS level).
|
|
355
|
+
let aliveAfter = true;
|
|
356
|
+
for (let i = 0; i < 50; i++) {
|
|
357
|
+
if (!proc.isProcessAlive(pid)) { aliveAfter = false; break; }
|
|
358
|
+
await sleep(20);
|
|
359
|
+
}
|
|
360
|
+
assert.strictEqual(aliveAfter, false, 'real detached process is dead after tree-kill');
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
// --------------------------------------------------------------------------
|
|
364
|
+
// End-to-end: a REAL detached `__bg-exec` process writes the result envelope
|
|
365
|
+
// --------------------------------------------------------------------------
|
|
366
|
+
test('E2E: a real detached child runs the agent and writes the result envelope (survives parent return)', async () => {
|
|
367
|
+
const mock = await startMockLLM();
|
|
368
|
+
mock.replyWith('Background says hello.');
|
|
369
|
+
const root = freshRoot();
|
|
370
|
+
const store = bg.createTaskStore({ rootDir: root });
|
|
371
|
+
const { spawn } = require('child_process');
|
|
372
|
+
const { id } = await bg.launchBackground({
|
|
373
|
+
prompt: 'say hello',
|
|
374
|
+
config: { api_base: mock.base, default_model: 'test-model', sandbox: { mode: 'off' }, max_iterations: 50 },
|
|
375
|
+
sandboxConfig: { mode: 'off' },
|
|
376
|
+
model: 'test-model',
|
|
377
|
+
cwd: tmpCwd,
|
|
378
|
+
store,
|
|
379
|
+
spawn,
|
|
380
|
+
resolveKey: () => 'test-key',
|
|
381
|
+
});
|
|
382
|
+
try {
|
|
383
|
+
// The launcher has returned; the detached child runs independently.
|
|
384
|
+
let meta;
|
|
385
|
+
for (let i = 0; i < 200; i++) {
|
|
386
|
+
meta = store.readMeta(id);
|
|
387
|
+
if (meta && bg.TERMINAL_STATUSES.has(meta.status)) break;
|
|
388
|
+
await sleep(50);
|
|
389
|
+
}
|
|
390
|
+
assert.ok(meta && meta.status === 'completed', `child completed (status=${meta && meta.status})`);
|
|
391
|
+
const result = store.readResult(id);
|
|
392
|
+
assert.ok(result, 'result envelope written by the detached child');
|
|
393
|
+
assert.match(result.result, /hello/i);
|
|
394
|
+
assert.strictEqual(result.stopReason, 'end_turn');
|
|
395
|
+
} finally {
|
|
396
|
+
await mock.close();
|
|
397
|
+
}
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
// --------------------------------------------------------------------------
|
|
401
|
+
// Tool-exposure decision (constraint 5)
|
|
402
|
+
// --------------------------------------------------------------------------
|
|
403
|
+
test('background-launch is NOT exposed as an agent tool', () => {
|
|
404
|
+
const { TOOL_SPECS } = require('../lib/tool_specs');
|
|
405
|
+
const { TAG_REGISTRY } = require('../lib/constants');
|
|
406
|
+
const reg = require('../lib/tool_registry');
|
|
407
|
+
const forbidden = ['run_background', 'spawn_background', 'background', 'launch_background', 'bg_run'];
|
|
408
|
+
for (const name of forbidden) {
|
|
409
|
+
assert.ok(!(name in TOOL_SPECS), `${name} must not be a tool spec`);
|
|
410
|
+
assert.ok(!(name in TAG_REGISTRY), `${name} must not be a registered tag`);
|
|
411
|
+
}
|
|
412
|
+
const dynamic = reg.dynamicToolEntries().map((e) => e.tool);
|
|
413
|
+
assert.ok(!dynamic.some((n) => /background/i.test(n)), 'no dynamic background-launch tool registered');
|
|
414
|
+
});
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Characterization tests for cmdChat (Task 1.5, tests-first). These lock in the
|
|
4
|
+
// chat loop's observable behavior — slash dispatch, the agent path, session
|
|
5
|
+
// reset, and teardown — BEFORE cmdChat is decomposed into lib/commands/ modules,
|
|
6
|
+
// so the split can be proven behavior-preserving.
|
|
7
|
+
|
|
8
|
+
const { test } = require('node:test');
|
|
9
|
+
const assert = require('node:assert');
|
|
10
|
+
|
|
11
|
+
const { helpText } = require('../lib/commands/registry');
|
|
12
|
+
const { startChat } = require('./harness/chat-harness');
|
|
13
|
+
|
|
14
|
+
test('startup shows the welcome banner message', async () => {
|
|
15
|
+
const c = await startChat();
|
|
16
|
+
try {
|
|
17
|
+
assert.ok(c.chatHistory.find(/Semalt\.AI/), 'welcome message present');
|
|
18
|
+
assert.ok(c.chatHistory.find(/Type \/help for commands/), 'help hint present');
|
|
19
|
+
} finally {
|
|
20
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test('/help renders the registry help text', async () => {
|
|
25
|
+
const c = await startChat();
|
|
26
|
+
try {
|
|
27
|
+
await c.submit('/help');
|
|
28
|
+
assert.ok(c.chatHistory.texts().includes(helpText()), 'help text emitted verbatim');
|
|
29
|
+
} finally {
|
|
30
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test('a normal message is blocked when not logged in', async () => {
|
|
35
|
+
const c = await startChat({ config: { auth_token: '' } });
|
|
36
|
+
try {
|
|
37
|
+
await c.submit('hello there');
|
|
38
|
+
assert.ok(c.chatHistory.find(/Not logged in/), 'unauthenticated message refused');
|
|
39
|
+
assert.strictEqual(c.calls.runAgentLoop.length, 0, 'agent not invoked');
|
|
40
|
+
} finally {
|
|
41
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test('a normal message runs the agent loop when authenticated', async () => {
|
|
46
|
+
const c = await startChat({ config: { auth_token: 'tok' } });
|
|
47
|
+
try {
|
|
48
|
+
await c.submit('do the thing');
|
|
49
|
+
assert.strictEqual(c.calls.runAgentLoop.length, 1, 'agent invoked once');
|
|
50
|
+
const turn = c.calls.runAgentLoop[0];
|
|
51
|
+
assert.ok(turn.messages.some((m) => m.role === 'user' && m.content === 'do the thing'), 'user message threaded into the loop');
|
|
52
|
+
assert.ok(c.chatHistory.messages.some((m) => m.role === 'user' && m.content === 'do the thing'), 'user message shown in history');
|
|
53
|
+
} finally {
|
|
54
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test('/new resets the conversation so the next turn starts fresh', async () => {
|
|
59
|
+
const c = await startChat({ config: { auth_token: 'tok' } });
|
|
60
|
+
try {
|
|
61
|
+
await c.submit('first message');
|
|
62
|
+
await c.submit('/new');
|
|
63
|
+
assert.ok(c.chatHistory.find(/Started new conversation/));
|
|
64
|
+
await c.submit('second message');
|
|
65
|
+
const lastTurn = c.calls.runAgentLoop[c.calls.runAgentLoop.length - 1];
|
|
66
|
+
const userMsgs = lastTurn.messages.filter((m) => m.role === 'user').map((m) => m.content);
|
|
67
|
+
assert.deepStrictEqual(userMsgs, ['second message'], 'history reset — only the post-/new message remains');
|
|
68
|
+
} finally {
|
|
69
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test('/clear resets conversation and clears approvals', async () => {
|
|
74
|
+
const c = await startChat({ config: { auth_token: 'tok' } });
|
|
75
|
+
try {
|
|
76
|
+
await c.submit('a message');
|
|
77
|
+
const before = c.calls.permissionClear;
|
|
78
|
+
await c.submit('/clear');
|
|
79
|
+
assert.ok(c.chatHistory.find(/cleared/i));
|
|
80
|
+
assert.strictEqual(c.calls.permissionClear, before + 1, 'permission approvals cleared');
|
|
81
|
+
} finally {
|
|
82
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test('/model with no arg shows the current model; with an arg switches it', async () => {
|
|
87
|
+
const c = await startChat({ config: { auth_token: 'tok', default_model: 'm-one' } });
|
|
88
|
+
try {
|
|
89
|
+
await c.submit('/model');
|
|
90
|
+
assert.ok(c.chatHistory.find(/Current model: m-one/));
|
|
91
|
+
await c.submit('/model m-two');
|
|
92
|
+
assert.ok(c.chatHistory.find(/Model → m-two/));
|
|
93
|
+
} finally {
|
|
94
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
test('an unknown slash-looking message falls through to the agent (authed)', async () => {
|
|
99
|
+
const c = await startChat({ config: { auth_token: 'tok' } });
|
|
100
|
+
try {
|
|
101
|
+
await c.submit('/notacommand please');
|
|
102
|
+
assert.strictEqual(c.calls.runAgentLoop.length, 1, 'non-command text reaches the agent');
|
|
103
|
+
} finally {
|
|
104
|
+
await c.submit('exit'); await c.done; c.cleanup();
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test('exit ends the chat session (the cmdChat promise resolves)', async () => {
|
|
109
|
+
const c = await startChat();
|
|
110
|
+
await c.submit('exit');
|
|
111
|
+
await c.done; // resolves only if the exit handler ran resolveExit
|
|
112
|
+
c.cleanup();
|
|
113
|
+
assert.ok(true);
|
|
114
|
+
});
|