@semalt-ai/code 1.8.5 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/.github/workflows/ci.yml +69 -0
- package/ARCHITECTURE.md +6 -95
- package/CLAUDE.md +196 -316
- package/README.md +148 -4
- package/docs/ARCHITECTURE.md +1321 -0
- package/docs/CONFIG.md +340 -0
- package/docs/HISTORY.md +245 -0
- package/examples/embed.js +74 -0
- package/index.js +251 -10
- package/lib/agent.js +856 -120
- package/lib/api.js +239 -50
- package/lib/args.js +74 -2
- package/lib/audit.js +23 -1
- package/lib/background.js +584 -0
- package/lib/checkpoints.js +757 -0
- package/lib/commands/auth.js +94 -0
- package/lib/commands/chat-session.js +489 -0
- package/lib/commands/chat-slash.js +415 -0
- package/lib/commands/chat-turn.js +669 -0
- package/lib/commands/chat.js +407 -0
- package/lib/commands/custom.js +157 -0
- package/lib/commands/history-utils.js +66 -0
- package/lib/commands/index.js +268 -0
- package/lib/commands/mcp.js +113 -0
- package/lib/commands/oneshot.js +193 -0
- package/lib/commands/registry.js +269 -0
- package/lib/commands/tasks.js +89 -0
- package/lib/compact.js +87 -0
- package/lib/config.js +360 -11
- package/lib/constants.js +401 -3
- package/lib/deny.js +199 -0
- package/lib/doctor.js +160 -0
- package/lib/headless.js +202 -0
- package/lib/hooks.js +286 -0
- package/lib/images.js +270 -0
- package/lib/internals.js +49 -0
- package/lib/mcp/boundary.js +131 -0
- package/lib/mcp/client.js +270 -0
- package/lib/mcp/oauth.js +134 -0
- package/lib/memory.js +209 -0
- package/lib/metrics.js +37 -2
- package/lib/payload.js +54 -0
- package/lib/permission-rules.js +401 -0
- package/lib/permissions.js +123 -26
- package/lib/pricing.js +67 -0
- package/lib/proc.js +62 -0
- package/lib/prompts.js +99 -8
- package/lib/sandbox.js +568 -0
- package/lib/sdk.js +328 -0
- package/lib/secrets.js +211 -0
- package/lib/skills.js +223 -0
- package/lib/subagents.js +516 -0
- package/lib/tool_registry.js +2862 -0
- package/lib/tool_specs.js +263 -9
- package/lib/tools.js +352 -1039
- package/lib/ui/anim.js +86 -0
- package/lib/ui/ansi.js +17 -27
- package/lib/ui/chat-history.js +253 -71
- package/lib/ui/create-ui.js +67 -24
- package/lib/ui/diff.js +90 -25
- package/lib/ui/file-activity.js +236 -0
- package/lib/ui/format.js +195 -29
- package/lib/ui/input-field.js +21 -11
- package/lib/ui/md-stream.js +234 -0
- package/lib/ui/render-operation.js +113 -0
- package/lib/ui/select.js +1 -4
- package/lib/ui/status-bar.js +146 -36
- package/lib/ui/stream.js +20 -13
- package/lib/ui/theme.js +190 -44
- package/lib/ui/tool-operation.js +190 -0
- package/lib/ui/utils.js +9 -5
- package/lib/ui/web-activity.js +270 -0
- package/lib/ui/writer.js +159 -45
- package/lib/ui.js +1 -1
- package/lib/verify.js +229 -0
- package/lib/web-extract.js +213 -0
- package/lib/web-summarize.js +68 -0
- package/package.json +19 -4
- package/scripts/lint.js +57 -0
- package/test/agent-loop.test.js +389 -0
- package/test/anim-driver.test.js +153 -0
- package/test/ask-user-display.test.js +226 -0
- package/test/ask-user-gate.test.js +231 -0
- package/test/background.test.js +414 -0
- package/test/chat-history-nocolor.test.js +155 -0
- package/test/chat-relogin.test.js +207 -0
- package/test/chat.test.js +114 -0
- package/test/checkpoints-agent.test.js +181 -0
- package/test/checkpoints.test.js +650 -0
- package/test/command-registry.test.js +160 -0
- package/test/compact.test.js +116 -0
- package/test/completion-lazy.test.js +52 -0
- package/test/config-merge.test.js +324 -0
- package/test/config-quarantine.test.js +128 -0
- package/test/config-write-guard-allow-anywhere.test.js +56 -0
- package/test/config-write-guard-skip.test.js +46 -0
- package/test/config-write-guard.test.js +153 -0
- package/test/context-split.test.js +215 -0
- package/test/cost-doctor.test.js +142 -0
- package/test/custom-commands-chat.test.js +106 -0
- package/test/custom-commands.test.js +230 -0
- package/test/defer-detail-band.test.js +403 -0
- package/test/deny-windows.test.js +120 -0
- package/test/deny.test.js +83 -0
- package/test/detail-band-tab-flatten.test.js +242 -0
- package/test/download-allow-anywhere.test.js +66 -0
- package/test/download-confine.test.js +153 -0
- package/test/exec-diff.test.js +268 -0
- package/test/executors.test.js +599 -0
- package/test/extract-tool-calls.test.js +349 -0
- package/test/fetch-url-validation.test.js +219 -0
- package/test/file-activity.test.js +522 -0
- package/test/fixtures/tool-calls.js +57 -0
- package/test/fixtures/web-page.js +91 -0
- package/test/git-tools.test.js +384 -0
- package/test/grep-glob-serialize.test.js +242 -0
- package/test/grep-glob.test.js +268 -0
- package/test/grep-path-target.test.js +227 -0
- package/test/harness/README.md +57 -0
- package/test/harness/chat-harness.js +143 -0
- package/test/harness/memwarn-headless-child.js +65 -0
- package/test/harness/mock-llm.js +120 -0
- package/test/harness/mock-mcp-server.js +142 -0
- package/test/harness/sse-server.js +69 -0
- package/test/headless.test.js +348 -0
- package/test/history-utils.test.js +88 -0
- package/test/hooks-agent.test.js +238 -0
- package/test/hooks-verify-sandbox.test.js +232 -0
- package/test/hooks.test.js +216 -0
- package/test/http-get-user-agent.test.js +142 -0
- package/test/images-api.test.js +208 -0
- package/test/images.test.js +238 -0
- package/test/input-field-ctrl-o.test.js +37 -0
- package/test/live-height-physical.test.js +281 -0
- package/test/max-iterations.test.js +218 -0
- package/test/mcp-boundary.test.js +57 -0
- package/test/mcp-client.test.js +267 -0
- package/test/mcp-oauth.test.js +86 -0
- package/test/md-stream.test.js +183 -0
- package/test/memory-truncation-warning.test.js +222 -0
- package/test/memory.test.js +198 -0
- package/test/native-dispatch.test.js +409 -0
- package/test/native-live-narration.test.js +254 -0
- package/test/output-chokepoint.test.js +188 -0
- package/test/output-heredoc-leak.test.js +195 -0
- package/test/output-preview.test.js +245 -0
- package/test/path-guards.test.js +134 -0
- package/test/payload.test.js +99 -0
- package/test/permission-rules-agent.test.js +210 -0
- package/test/permission-rules.test.js +297 -0
- package/test/permissions.test.js +362 -0
- package/test/plan-mode.test.js +167 -0
- package/test/read-paginate.test.js +275 -0
- package/test/readonly-tools.test.js +177 -0
- package/test/render-operation.test.js +317 -0
- package/test/replay-descriptor-xml.test.js +216 -0
- package/test/replay-descriptor.test.js +189 -0
- package/test/replay-web-aggregate.test.js +291 -0
- package/test/replay-web-persist.test.js +241 -0
- package/test/result-cap.test.js +233 -0
- package/test/running-glyph-anim.test.js +111 -0
- package/test/sandbox-agent.test.js +147 -0
- package/test/sandbox-integration.test.js +216 -0
- package/test/sandbox.test.js +408 -0
- package/test/sdk.test.js +234 -0
- package/test/shell-output-cap.test.js +181 -0
- package/test/skills-chat.test.js +110 -0
- package/test/skills.test.js +295 -0
- package/test/smoke.test.js +68 -0
- package/test/status-bar-driver.test.js +93 -0
- package/test/status-bar-pause.test.js +164 -0
- package/test/status-bar-resync.test.js +188 -0
- package/test/stream-parser.test.js +171 -0
- package/test/subagents-agent.test.js +178 -0
- package/test/subagents.test.js +222 -0
- package/test/theme-palette.test.js +166 -0
- package/test/tool-registry.test.js +85 -0
- package/test/trim-budget.test.js +101 -0
- package/test/truncate-visible.test.js +78 -0
- package/test/verify-agent.test.js +317 -0
- package/test/verify.test.js +141 -0
- package/test/view-image.test.js +199 -0
- package/test/web-activity-ordering.test.js +203 -0
- package/test/web-activity.test.js +207 -0
- package/test/web-data-extraction-guidance.test.js +71 -0
- package/test/web-extract.test.js +185 -0
- package/test/web-fetch-agent.test.js +291 -0
- package/test/web-fetch-mode.test.js +193 -0
- package/test/web-search.test.js +380 -0
- package/lib/commands.js +0 -1438
- package/path +0 -1
|
@@ -0,0 +1,584 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Background tasks — detached agent processes (Task 5.3)
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
//
|
|
7
|
+
// Run an agent task as a DETACHED background process that survives the terminal
|
|
8
|
+
// closing, plus a task registry to list / inspect / collect / terminate it.
|
|
9
|
+
// Each background task is its OWN process (its own `process.cwd()`, its own
|
|
10
|
+
// dynamic tool registry, its own everything) — which sidesteps the documented
|
|
11
|
+
// in-process multi-instance global-state limitations of the embedding SDK
|
|
12
|
+
// (Task 5.2): isolation comes for free from the process boundary.
|
|
13
|
+
//
|
|
14
|
+
// LIFECYCLE
|
|
15
|
+
// launchBackground() (parent / terminal-attached)
|
|
16
|
+
// 1. VALIDATE everything checkable SYNCHRONOUSLY, surfacing errors to the
|
|
17
|
+
// terminal — config validity, a resolvable model, permission-policy
|
|
18
|
+
// shape, sandbox availability. After detach there is no terminal to
|
|
19
|
+
// reach, so a misconfiguration must fail HERE, before any process forks.
|
|
20
|
+
// 2. Write the launch spec + an initial registry record under the task dir.
|
|
21
|
+
// 3. spawnDetached() the child (`semalt-code __bg-exec <taskDir>`), record
|
|
22
|
+
// its PID, and `unref()` so the parent can exit.
|
|
23
|
+
// runBackgroundChild() (detached)
|
|
24
|
+
// Reads the spec, builds an agent via the STABLE SDK facade (createAgent)
|
|
25
|
+
// with the LAUNCH-FIXED permission policy, runs the prompt to completion,
|
|
26
|
+
// and writes progress/result/status to the task dir. After detach: pure
|
|
27
|
+
// execution — no path back to the terminal.
|
|
28
|
+
//
|
|
29
|
+
// SECURITY (the 5.2 embedded perimeter, applied to the background context)
|
|
30
|
+
// * Permission posture is FIXED AT LAUNCH and refuse-by-default. A background
|
|
31
|
+
// task has no TTY, so its policy (preset allow/deny rules + coarse tiers, or
|
|
32
|
+
// the refuse-on-mutation default) is set when it starts and can NEVER fall
|
|
33
|
+
// through to an interactive prompt — createAgent installs a refuse approver
|
|
34
|
+
// when none is supplied (see lib/sdk.js).
|
|
35
|
+
// * The OS sandbox + destructive-command deny-list stay ON in the child unless
|
|
36
|
+
// an opt-out (`sandbox.mode: 'off'`, `dangerouslySkipPermissions`) is passed
|
|
37
|
+
// EXPLICITLY at launch. There is no human to ask, so an unavailable sandbox
|
|
38
|
+
// in `auto` mode REFUSES the command (createAgent's onUnsandboxed default).
|
|
39
|
+
// * Background-launch is NOT exposed as an agent tool — it is a CLI/SDK,
|
|
40
|
+
// human-initiated surface only. See TOOL-EXPOSURE DECISION at the bottom.
|
|
41
|
+
//
|
|
42
|
+
// IPC IS VIA FILES, not a live channel — the detached child writes NDJSON
|
|
43
|
+
// progress + a result envelope to its task dir; the parent reads them on
|
|
44
|
+
// `collect`. This survives the terminal closing and needs no live IPC.
|
|
45
|
+
//
|
|
46
|
+
// TASK STORE LAYOUT — ~/.semalt-ai/tasks/<id>/
|
|
47
|
+
// spec.json launch spec the child reads (prompt, model, policy, sandbox,
|
|
48
|
+
// cwd). NOT secrets — the API key is passed via the child's env
|
|
49
|
+
// (SEMALT_API_KEY), never written to disk here.
|
|
50
|
+
// meta.json the registry record / current status snapshot
|
|
51
|
+
// { id, pid, status, started_at, finished_at, prompt_summary,
|
|
52
|
+
// model, policy_summary, stopReason?, error? }
|
|
53
|
+
// events.ndjson append-only progress log (one JSON object per line, like the
|
|
54
|
+
// audit log): status / tool / warning / error / result events.
|
|
55
|
+
// result.json the final headless envelope on completion
|
|
56
|
+
// { result, toolCalls, usage, cost, stopReason, verifyStatus }
|
|
57
|
+
//
|
|
58
|
+
// Everything is injectable (fs / now / spawn / createAgent / detection) so the
|
|
59
|
+
// whole module is deterministic and unit-testable offline.
|
|
60
|
+
|
|
61
|
+
const fsReal = require('fs');
|
|
62
|
+
const os = require('os');
|
|
63
|
+
const path = require('path');
|
|
64
|
+
const crypto = require('crypto');
|
|
65
|
+
|
|
66
|
+
const { normalizeSandbox, detectSandbox } = require('./sandbox');
|
|
67
|
+
const { spawnDetached, killTreeByPid, isProcessAlive } = require('./proc');
|
|
68
|
+
|
|
69
|
+
const DEFAULT_TASKS_DIR = path.join(os.homedir(), '.semalt-ai', 'tasks');
|
|
70
|
+
|
|
71
|
+
// Lifecycle states. `stale` is NOT persisted — it is COMPUTED for a task marked
|
|
72
|
+
// running/starting whose PID is no longer alive (the process died without
|
|
73
|
+
// writing a terminal status, e.g. a SIGKILL or a crash). It surfaces in `tasks`
|
|
74
|
+
// so zombies never accumulate invisibly and can be pruned.
|
|
75
|
+
const TERMINAL_STATUSES = new Set(['completed', 'failed', 'terminated']);
|
|
76
|
+
const ACTIVE_STATUSES = new Set(['starting', 'running']);
|
|
77
|
+
|
|
78
|
+
const VALID_ACTIONS = new Set(['allow', 'deny', 'ask']);
|
|
79
|
+
const MATCHER_KEYS = ['pattern', 'path', 'url', 'match'];
|
|
80
|
+
|
|
81
|
+
function summarize(text, max = 80) {
|
|
82
|
+
const s = String(text == null ? '' : text).replace(/\s+/g, ' ').trim();
|
|
83
|
+
return s.length > max ? s.slice(0, max - 1) + '…' : s;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// --------------------------------------------------------------------------
|
|
87
|
+
// Task store — registry CRUD over the on-disk layout above.
|
|
88
|
+
// --------------------------------------------------------------------------
|
|
89
|
+
function createTaskStore({ rootDir = DEFAULT_TASKS_DIR, fs = fsReal, now = () => Date.now() } = {}) {
|
|
90
|
+
function dir(id) { return path.join(rootDir, id); }
|
|
91
|
+
function paths(id) {
|
|
92
|
+
const d = dir(id);
|
|
93
|
+
return {
|
|
94
|
+
dir: d,
|
|
95
|
+
spec: path.join(d, 'spec.json'),
|
|
96
|
+
meta: path.join(d, 'meta.json'),
|
|
97
|
+
events: path.join(d, 'events.ndjson'),
|
|
98
|
+
result: path.join(d, 'result.json'),
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// crypto.randomBytes is fine to call directly — this is a process, not a
|
|
103
|
+
// resumable workflow script.
|
|
104
|
+
function genId() { return crypto.randomBytes(5).toString('hex'); }
|
|
105
|
+
|
|
106
|
+
function readJson(p) {
|
|
107
|
+
try { return JSON.parse(fs.readFileSync(p, 'utf8')); }
|
|
108
|
+
catch { return null; }
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Atomic write: write a temp sibling then rename, so a concurrent reader never
|
|
112
|
+
// sees a half-written meta.json.
|
|
113
|
+
function writeJsonAtomic(p, obj) {
|
|
114
|
+
const tmp = `${p}.tmp-${process.pid}-${Math.floor(now())}`;
|
|
115
|
+
fs.writeFileSync(tmp, JSON.stringify(obj, null, 2));
|
|
116
|
+
fs.renameSync(tmp, p);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function create({ id, spec, prompt, model, policySummary }) {
|
|
120
|
+
const d = dir(id);
|
|
121
|
+
fs.mkdirSync(d, { recursive: true });
|
|
122
|
+
const p = paths(id);
|
|
123
|
+
fs.writeFileSync(p.spec, JSON.stringify(spec, null, 2));
|
|
124
|
+
const meta = {
|
|
125
|
+
id,
|
|
126
|
+
pid: null,
|
|
127
|
+
status: 'starting',
|
|
128
|
+
started_at: now(),
|
|
129
|
+
finished_at: null,
|
|
130
|
+
prompt_summary: summarize(prompt),
|
|
131
|
+
model: model || null,
|
|
132
|
+
policy_summary: policySummary || 'refuse-by-default',
|
|
133
|
+
};
|
|
134
|
+
writeJsonAtomic(p.meta, meta);
|
|
135
|
+
// Touch the events log so `tasks status` on a just-launched task doesn't error.
|
|
136
|
+
fs.writeFileSync(p.events, '');
|
|
137
|
+
return meta;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function readMeta(id) { return readJson(paths(id).meta); }
|
|
141
|
+
function readSpec(id) { return readJson(paths(id).spec); }
|
|
142
|
+
function readResult(id) { return readJson(paths(id).result); }
|
|
143
|
+
|
|
144
|
+
function patchMeta(id, patch) {
|
|
145
|
+
const cur = readMeta(id) || { id };
|
|
146
|
+
const next = { ...cur, ...patch };
|
|
147
|
+
writeJsonAtomic(paths(id).meta, next);
|
|
148
|
+
return next;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function appendEvent(id, event) {
|
|
152
|
+
const line = JSON.stringify({ ts: new Date(now()).toISOString(), ...event }) + '\n';
|
|
153
|
+
fs.appendFileSync(paths(id).events, line);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function readEvents(id) {
|
|
157
|
+
let raw;
|
|
158
|
+
try { raw = fs.readFileSync(paths(id).events, 'utf8'); }
|
|
159
|
+
catch { return []; }
|
|
160
|
+
const out = [];
|
|
161
|
+
for (const line of raw.split('\n')) {
|
|
162
|
+
const t = line.trim();
|
|
163
|
+
if (!t) continue;
|
|
164
|
+
try { out.push(JSON.parse(t)); } catch { /* skip corrupt line */ }
|
|
165
|
+
}
|
|
166
|
+
return out;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function writeResult(id, envelope) {
|
|
170
|
+
writeJsonAtomic(paths(id).result, envelope);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function list() {
|
|
174
|
+
let ids;
|
|
175
|
+
try { ids = fs.readdirSync(rootDir); }
|
|
176
|
+
catch { return []; }
|
|
177
|
+
const metas = [];
|
|
178
|
+
for (const id of ids) {
|
|
179
|
+
const meta = readMeta(id);
|
|
180
|
+
if (meta) metas.push(meta);
|
|
181
|
+
}
|
|
182
|
+
// Newest first.
|
|
183
|
+
return metas.sort((a, b) => (b.started_at || 0) - (a.started_at || 0));
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function remove(id) {
|
|
187
|
+
try { fs.rmSync(dir(id), { recursive: true, force: true }); return true; }
|
|
188
|
+
catch { return false; }
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
rootDir, dir, paths, genId,
|
|
193
|
+
create, readMeta, readSpec, readResult, patchMeta,
|
|
194
|
+
appendEvent, readEvents, writeResult,
|
|
195
|
+
list, remove,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// --------------------------------------------------------------------------
|
|
200
|
+
// Status reconciliation — surface dead "running" tasks as stale.
|
|
201
|
+
// --------------------------------------------------------------------------
|
|
202
|
+
|
|
203
|
+
// The effective status for display: a task marked active whose PID is no longer
|
|
204
|
+
// alive is reported as 'stale' (it died without writing a terminal status).
|
|
205
|
+
function effectiveStatus(meta, alive = isProcessAlive) {
|
|
206
|
+
if (!meta) return 'unknown';
|
|
207
|
+
if (ACTIVE_STATUSES.has(meta.status) && !alive(meta.pid)) return 'stale';
|
|
208
|
+
return meta.status;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function isStale(meta, alive = isProcessAlive) {
|
|
212
|
+
return effectiveStatus(meta, alive) === 'stale';
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Which tasks are safe to prune: terminal (completed/failed/terminated) or stale
|
|
216
|
+
// (dead but never finalized). Genuinely-running tasks are kept.
|
|
217
|
+
function prunableIds(metas, alive = isProcessAlive) {
|
|
218
|
+
const out = [];
|
|
219
|
+
for (const m of metas) {
|
|
220
|
+
const eff = effectiveStatus(m, alive);
|
|
221
|
+
if (TERMINAL_STATUSES.has(eff) || eff === 'stale') out.push(m.id);
|
|
222
|
+
}
|
|
223
|
+
return out;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Mark a stale task's meta as terminated so a later read is honest even without
|
|
227
|
+
// a prune (idempotent: only rewrites genuinely-stale active tasks).
|
|
228
|
+
function reconcile(store, { alive = isProcessAlive, now = () => Date.now() } = {}) {
|
|
229
|
+
const changed = [];
|
|
230
|
+
for (const m of store.list()) {
|
|
231
|
+
if (isStale(m, alive)) {
|
|
232
|
+
store.patchMeta(m.id, { status: 'terminated', finished_at: m.finished_at || now(), error: m.error || 'process died without reporting (stale)' });
|
|
233
|
+
changed.push(m.id);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
return changed;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// --------------------------------------------------------------------------
|
|
240
|
+
// Launch-time validation (constraint 4) — runs in the parent, before detach.
|
|
241
|
+
// --------------------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
function validatePolicy(policy) {
|
|
244
|
+
const errors = [];
|
|
245
|
+
if (!policy || typeof policy !== 'object') return errors;
|
|
246
|
+
const rules = policy.rules;
|
|
247
|
+
if (rules != null) {
|
|
248
|
+
if (!Array.isArray(rules)) { errors.push('policy.rules must be an array'); return errors; }
|
|
249
|
+
rules.forEach((r, i) => {
|
|
250
|
+
if (!r || typeof r !== 'object') { errors.push(`policy.rules[${i}] is not an object`); return; }
|
|
251
|
+
if (!r.tool || typeof r.tool !== 'string') errors.push(`policy.rules[${i}] missing a string "tool"`);
|
|
252
|
+
if (!VALID_ACTIONS.has(r.action)) errors.push(`policy.rules[${i}] action must be one of allow|deny|ask`);
|
|
253
|
+
const present = MATCHER_KEYS.filter((k) => r[k] != null);
|
|
254
|
+
if (present.length > 1) errors.push(`policy.rules[${i}] has more than one matcher key (${present.join(', ')})`);
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
if (policy.allow != null && !Array.isArray(policy.allow)) errors.push('policy.allow must be an array of tiers');
|
|
258
|
+
return errors;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Validate everything checkable BEFORE detaching. Synchronous as far as the JS
|
|
262
|
+
// goes except the optional model probe; the launcher awaits it before forking.
|
|
263
|
+
// Returns an array of human-readable error strings ([] = OK).
|
|
264
|
+
async function validateLaunch({
|
|
265
|
+
prompt,
|
|
266
|
+
config = {},
|
|
267
|
+
policy = {},
|
|
268
|
+
sandboxConfig,
|
|
269
|
+
model,
|
|
270
|
+
detection,
|
|
271
|
+
probeModel,
|
|
272
|
+
} = {}) {
|
|
273
|
+
const errors = [];
|
|
274
|
+
|
|
275
|
+
if (!prompt || !String(prompt).trim()) errors.push('prompt is empty');
|
|
276
|
+
|
|
277
|
+
if (!config.api_base) errors.push('no api_base configured (run `semalt-code init` or pass --api-base)');
|
|
278
|
+
|
|
279
|
+
const resolvedModel = model || config.default_model;
|
|
280
|
+
if (!resolvedModel) errors.push('no model configured (pass -m <model> or set a default_model)');
|
|
281
|
+
|
|
282
|
+
errors.push(...validatePolicy(policy));
|
|
283
|
+
|
|
284
|
+
// Sandbox availability. Only a HARD requirement when the launch asks for it
|
|
285
|
+
// (failIfUnavailable) — otherwise an unavailable sandbox refuses commands at
|
|
286
|
+
// runtime (fail-safe), which is not a launch error.
|
|
287
|
+
const sb = normalizeSandbox(sandboxConfig);
|
|
288
|
+
if (sb.mode !== 'off' && sb.failIfUnavailable) {
|
|
289
|
+
const det = detection || detectSandbox();
|
|
290
|
+
if (!det.available) {
|
|
291
|
+
errors.push(`sandbox unavailable and failIfUnavailable is set: ${det.reason || 'no OS sandbox primitive'}`);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Optional model-reachability probe (injected; default skipped to stay
|
|
296
|
+
// offline-deterministic). A background task that can't reach its model would
|
|
297
|
+
// fail silently after detach, so the caller can wire a light probe here.
|
|
298
|
+
if (typeof probeModel === 'function') {
|
|
299
|
+
let ok = false;
|
|
300
|
+
try { ok = await probeModel(resolvedModel); }
|
|
301
|
+
catch (e) { errors.push(`model endpoint unreachable: ${e && e.message ? e.message : e}`); ok = true; /* already recorded */ }
|
|
302
|
+
if (ok === false) errors.push('model endpoint did not respond');
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
return errors;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Build the launch-fixed policy object from coarse flags + rules. Pure.
|
|
309
|
+
function buildPolicy({ allowedTiers = [], readonly = false, rules = [], dangerouslySkipPermissions = false } = {}) {
|
|
310
|
+
return {
|
|
311
|
+
allow: Array.isArray(allowedTiers) ? allowedTiers.slice() : [],
|
|
312
|
+
readonly: !!readonly,
|
|
313
|
+
rules: Array.isArray(rules) ? rules.slice() : [],
|
|
314
|
+
dangerouslySkipPermissions: !!dangerouslySkipPermissions,
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
function policySummary(policy) {
|
|
319
|
+
if (policy.dangerouslySkipPermissions) return 'DANGER: skip-permissions';
|
|
320
|
+
const parts = [];
|
|
321
|
+
if (policy.allow && policy.allow.length) parts.push('allow:' + policy.allow.join('+'));
|
|
322
|
+
if (policy.readonly) parts.push('readonly');
|
|
323
|
+
if (policy.rules && policy.rules.length) parts.push(`${policy.rules.length} rule(s)`);
|
|
324
|
+
return parts.length ? parts.join(', ') : 'refuse-by-default';
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// --------------------------------------------------------------------------
|
|
328
|
+
// Launcher (parent) — validate, persist, detach.
|
|
329
|
+
// --------------------------------------------------------------------------
|
|
330
|
+
async function launchBackground({
|
|
331
|
+
prompt,
|
|
332
|
+
config = {},
|
|
333
|
+
policy = {},
|
|
334
|
+
sandboxConfig,
|
|
335
|
+
model,
|
|
336
|
+
cwd = process.cwd(),
|
|
337
|
+
maxIterations,
|
|
338
|
+
store,
|
|
339
|
+
// Injectables for tests / wiring.
|
|
340
|
+
spawn = require('child_process').spawn,
|
|
341
|
+
execPath = process.execPath,
|
|
342
|
+
indexJs = path.resolve(__dirname, '..', 'index.js'),
|
|
343
|
+
resolveKey,
|
|
344
|
+
env = process.env,
|
|
345
|
+
detection,
|
|
346
|
+
probeModel,
|
|
347
|
+
now = () => Date.now(),
|
|
348
|
+
} = {}) {
|
|
349
|
+
const resolvedModel = model || config.default_model;
|
|
350
|
+
const effectiveSandboxConfig = sandboxConfig != null ? sandboxConfig : config.sandbox;
|
|
351
|
+
|
|
352
|
+
// 1. VALIDATE before any side effect (constraint 4).
|
|
353
|
+
const errors = await validateLaunch({ prompt, config, policy, sandboxConfig: effectiveSandboxConfig, model: resolvedModel, detection, probeModel });
|
|
354
|
+
if (errors.length) {
|
|
355
|
+
const err = new Error('Cannot launch background task:\n - ' + errors.join('\n - '));
|
|
356
|
+
err.validationErrors = errors;
|
|
357
|
+
throw err;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
const taskStore = store || createTaskStore({ now });
|
|
361
|
+
const id = taskStore.genId();
|
|
362
|
+
const sandbox = normalizeSandbox(effectiveSandboxConfig);
|
|
363
|
+
|
|
364
|
+
// The spec the child reads. NO secrets on disk — the API key goes via env.
|
|
365
|
+
const spec = {
|
|
366
|
+
version: 1,
|
|
367
|
+
prompt: String(prompt),
|
|
368
|
+
apiBase: config.api_base,
|
|
369
|
+
model: resolvedModel,
|
|
370
|
+
contextLength: config.context_length || null,
|
|
371
|
+
maxIterations: maxIterations != null ? maxIterations : config.max_iterations,
|
|
372
|
+
cwd,
|
|
373
|
+
policy: buildPolicy(policy),
|
|
374
|
+
sandbox,
|
|
375
|
+
};
|
|
376
|
+
taskStore.create({ id, spec, prompt, model: resolvedModel, policySummary: policySummary(spec.policy) });
|
|
377
|
+
|
|
378
|
+
// 2. DETACH. Pass --dangerously-skip-permissions through to the child's argv
|
|
379
|
+
// so lib/tools.js (which reads argv at module load) honors the launch-fixed
|
|
380
|
+
// opt-out for the deny-list / secret-read / config-write guards — those are
|
|
381
|
+
// NOT reachable through createAgent options.
|
|
382
|
+
const childArgs = [indexJs, '__bg-exec', taskStore.dir(id)];
|
|
383
|
+
if (spec.policy.dangerouslySkipPermissions) childArgs.push('--dangerously-skip-permissions');
|
|
384
|
+
|
|
385
|
+
// Carry the resolved API key via env so it is never persisted to the task dir.
|
|
386
|
+
const childEnv = { ...env };
|
|
387
|
+
if (typeof resolveKey === 'function') {
|
|
388
|
+
try { const k = resolveKey(config); if (k) childEnv.SEMALT_API_KEY = k; } catch { /* fall through */ }
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
const child = spawnDetached(spawn, execPath, childArgs, { cwd, env: childEnv });
|
|
392
|
+
taskStore.patchMeta(id, { pid: child.pid != null ? child.pid : null, status: 'running' });
|
|
393
|
+
if (typeof child.unref === 'function') child.unref();
|
|
394
|
+
|
|
395
|
+
return { id, pid: child.pid, dir: taskStore.dir(id) };
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// --------------------------------------------------------------------------
|
|
399
|
+
// Child runner (detached) — read spec, run agent, write result/status.
|
|
400
|
+
// --------------------------------------------------------------------------
|
|
401
|
+
async function runBackgroundChild({
|
|
402
|
+
taskDir,
|
|
403
|
+
store,
|
|
404
|
+
createAgent = require('./sdk').createAgent,
|
|
405
|
+
now = () => Date.now(),
|
|
406
|
+
} = {}) {
|
|
407
|
+
const id = path.basename(taskDir);
|
|
408
|
+
const taskStore = store || createTaskStore({ rootDir: path.dirname(taskDir), now });
|
|
409
|
+
const spec = taskStore.readSpec(id);
|
|
410
|
+
if (!spec) {
|
|
411
|
+
taskStore.patchMeta(id, { status: 'failed', finished_at: now(), error: 'spec.json missing or unreadable' });
|
|
412
|
+
return { status: 'failed' };
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
taskStore.appendEvent(id, { type: 'status', status: 'running' });
|
|
416
|
+
|
|
417
|
+
const policy = spec.policy || {};
|
|
418
|
+
const agent = createAgent({
|
|
419
|
+
apiBase: spec.apiBase,
|
|
420
|
+
model: spec.model,
|
|
421
|
+
contextLength: spec.contextLength,
|
|
422
|
+
cwd: spec.cwd,
|
|
423
|
+
// LAUNCH-FIXED permission posture. NO `approve` is ever wired (no TTY); with
|
|
424
|
+
// no rules/tiers createAgent refuses every mutation — the safe default.
|
|
425
|
+
rules: policy.rules || [],
|
|
426
|
+
allow: policy.allow || [],
|
|
427
|
+
readonly: !!policy.readonly,
|
|
428
|
+
dangerouslySkipPermissions: !!policy.dangerouslySkipPermissions,
|
|
429
|
+
// Sandbox + deny-list stay on unless the launch opted out (sandbox.mode off).
|
|
430
|
+
sandbox: spec.sandbox,
|
|
431
|
+
maxIterations: spec.maxIterations,
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
// Stream advisory progress to the events log (bounded — tool/warning/error,
|
|
435
|
+
// never raw tokens, so the file stays small).
|
|
436
|
+
agent.on('tool', (e) => {
|
|
437
|
+
try {
|
|
438
|
+
const ok = !(e && e.meta && e.meta.error);
|
|
439
|
+
const ev = { type: 'tool', tag: e && e.tag, ms: e && e.ms, ok };
|
|
440
|
+
// On a failed/blocked tool, record a short excerpt (e.g. a deny-list
|
|
441
|
+
// refusal) so `tasks status` is honest about what the agent couldn't do.
|
|
442
|
+
if (!ok && e && e.result) ev.detail = String(e.result).slice(0, 200);
|
|
443
|
+
taskStore.appendEvent(id, ev);
|
|
444
|
+
} catch { /* best-effort */ }
|
|
445
|
+
});
|
|
446
|
+
agent.on('warning', (m) => {
|
|
447
|
+
try { taskStore.appendEvent(id, { type: 'warning', message: typeof m === 'string' ? m : (m && m.message) }); } catch { /* best-effort */ }
|
|
448
|
+
});
|
|
449
|
+
agent.on('error', (e) => {
|
|
450
|
+
if (e && e.isWarning) return;
|
|
451
|
+
try { taskStore.appendEvent(id, { type: 'warning', message: e && e.message }); } catch { /* best-effort */ }
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
try {
|
|
455
|
+
const res = await agent.run(spec.prompt);
|
|
456
|
+
const envelope = {
|
|
457
|
+
result: res.result,
|
|
458
|
+
toolCalls: res.toolCalls,
|
|
459
|
+
usage: res.usage,
|
|
460
|
+
cost: res.cost,
|
|
461
|
+
stopReason: res.stopReason,
|
|
462
|
+
verifyStatus: res.verifyStatus,
|
|
463
|
+
};
|
|
464
|
+
taskStore.writeResult(id, envelope);
|
|
465
|
+
taskStore.appendEvent(id, { type: 'result', stopReason: envelope.stopReason, verifyStatus: envelope.verifyStatus });
|
|
466
|
+
taskStore.patchMeta(id, { status: 'completed', finished_at: now(), stopReason: envelope.stopReason, verifyStatus: envelope.verifyStatus });
|
|
467
|
+
return { status: 'completed', envelope };
|
|
468
|
+
} catch (err) {
|
|
469
|
+
const message = err && err.message ? err.message : String(err);
|
|
470
|
+
taskStore.appendEvent(id, { type: 'error', message });
|
|
471
|
+
taskStore.patchMeta(id, { status: 'failed', finished_at: now(), error: message });
|
|
472
|
+
return { status: 'failed', error: message };
|
|
473
|
+
} finally {
|
|
474
|
+
try { await agent.close(); } catch { /* best-effort */ }
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// --------------------------------------------------------------------------
|
|
479
|
+
// Kill (terminate) — tree-kill the recorded PID + mark terminated.
|
|
480
|
+
// --------------------------------------------------------------------------
|
|
481
|
+
async function killTask(store, id, {
|
|
482
|
+
alive = isProcessAlive,
|
|
483
|
+
kill = killTreeByPid,
|
|
484
|
+
delay = (ms) => new Promise((r) => setTimeout(r, ms)),
|
|
485
|
+
graceMs = 2000,
|
|
486
|
+
now = () => Date.now(),
|
|
487
|
+
} = {}) {
|
|
488
|
+
const meta = store.readMeta(id);
|
|
489
|
+
if (!meta) return { ok: false, reason: 'no such task' };
|
|
490
|
+
if (TERMINAL_STATUSES.has(meta.status)) return { ok: false, reason: `already ${meta.status}` };
|
|
491
|
+
if (!meta.pid || !alive(meta.pid)) {
|
|
492
|
+
// Already dead — just finalize the record (no orphan to kill).
|
|
493
|
+
store.patchMeta(id, { status: 'terminated', finished_at: now(), error: meta.error || 'process was not running' });
|
|
494
|
+
return { ok: true, reason: 'process was not running; marked terminated' };
|
|
495
|
+
}
|
|
496
|
+
kill(meta.pid, 'SIGTERM');
|
|
497
|
+
await delay(graceMs);
|
|
498
|
+
if (alive(meta.pid)) kill(meta.pid, 'SIGKILL');
|
|
499
|
+
store.patchMeta(id, { status: 'terminated', finished_at: now() });
|
|
500
|
+
return { ok: true, reason: 'terminated' };
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
// --------------------------------------------------------------------------
|
|
504
|
+
// Formatters (pure) — for the `tasks` CLI surface.
|
|
505
|
+
// --------------------------------------------------------------------------
|
|
506
|
+
function formatTaskList(metas, { alive = isProcessAlive } = {}) {
|
|
507
|
+
if (!metas || !metas.length) return 'No background tasks.';
|
|
508
|
+
const lines = ['Background tasks:'];
|
|
509
|
+
for (const m of metas) {
|
|
510
|
+
const eff = effectiveStatus(m, alive);
|
|
511
|
+
const when = m.started_at ? new Date(m.started_at).toISOString() : '?';
|
|
512
|
+
lines.push(` ${m.id} [${eff}] ${when} ${m.model || '?'} ${m.prompt_summary || ''}`);
|
|
513
|
+
}
|
|
514
|
+
const stale = metas.filter((m) => isStale(m, alive)).length;
|
|
515
|
+
if (stale) lines.push(`\n ${stale} stale task(s) — run \`semalt-code tasks prune\` to clean up.`);
|
|
516
|
+
return lines.join('\n');
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
function formatTaskStatus(meta, events, { alive = isProcessAlive } = {}) {
|
|
520
|
+
if (!meta) return 'No such task.';
|
|
521
|
+
const eff = effectiveStatus(meta, alive);
|
|
522
|
+
const lines = [
|
|
523
|
+
`Task ${meta.id}`,
|
|
524
|
+
` status: ${eff}${eff !== meta.status ? ` (recorded: ${meta.status})` : ''}`,
|
|
525
|
+
` pid: ${meta.pid == null ? '?' : meta.pid}`,
|
|
526
|
+
` model: ${meta.model || '?'}`,
|
|
527
|
+
` policy: ${meta.policy_summary || '?'}`,
|
|
528
|
+
` started: ${meta.started_at ? new Date(meta.started_at).toISOString() : '?'}`,
|
|
529
|
+
` finished: ${meta.finished_at ? new Date(meta.finished_at).toISOString() : '—'}`,
|
|
530
|
+
` prompt: ${meta.prompt_summary || ''}`,
|
|
531
|
+
];
|
|
532
|
+
if (meta.stopReason) lines.push(` stopReason: ${meta.stopReason}`);
|
|
533
|
+
if (meta.error) lines.push(` error: ${meta.error}`);
|
|
534
|
+
const recent = (events || []).slice(-8);
|
|
535
|
+
if (recent.length) {
|
|
536
|
+
lines.push(' recent events:');
|
|
537
|
+
for (const e of recent) lines.push(` ${e.ts || ''} ${e.type}${e.tag ? ' ' + e.tag : ''}${e.message ? ' ' + e.message : ''}`);
|
|
538
|
+
}
|
|
539
|
+
return lines.join('\n');
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
module.exports = {
|
|
543
|
+
DEFAULT_TASKS_DIR,
|
|
544
|
+
TERMINAL_STATUSES,
|
|
545
|
+
ACTIVE_STATUSES,
|
|
546
|
+
createTaskStore,
|
|
547
|
+
effectiveStatus,
|
|
548
|
+
isStale,
|
|
549
|
+
prunableIds,
|
|
550
|
+
reconcile,
|
|
551
|
+
validatePolicy,
|
|
552
|
+
validateLaunch,
|
|
553
|
+
buildPolicy,
|
|
554
|
+
policySummary,
|
|
555
|
+
launchBackground,
|
|
556
|
+
runBackgroundChild,
|
|
557
|
+
killTask,
|
|
558
|
+
formatTaskList,
|
|
559
|
+
formatTaskStatus,
|
|
560
|
+
};
|
|
561
|
+
|
|
562
|
+
// ---------------------------------------------------------------------------
|
|
563
|
+
// TOOL-EXPOSURE DECISION (constraint 5) — documented, deliberate.
|
|
564
|
+
// ---------------------------------------------------------------------------
|
|
565
|
+
//
|
|
566
|
+
// Background-launch is NOT exposed as an agent tool. There is no
|
|
567
|
+
// `run_background` / `spawn_background` tag, no TOOL_SPECS entry, and nothing
|
|
568
|
+
// registered into the (static OR dynamic) tool registry. It is reachable ONLY
|
|
569
|
+
// from the human-initiated CLI/SDK surface (`semalt-code run --background`,
|
|
570
|
+
// `launchBackground()`).
|
|
571
|
+
//
|
|
572
|
+
// WHY: a model-reachable background launcher is a privilege-escalation surface —
|
|
573
|
+
// the agent could fork a fresh process to escape its own permission perimeter
|
|
574
|
+
// (the very thing the subagent no-escalation rule, 4.5, forbids). Subagents
|
|
575
|
+
// already give the model in-process parallelism while SHARING the parent
|
|
576
|
+
// permission manager (no escalation possible). Background tasks exist for a
|
|
577
|
+
// different need — a human-owned, terminal-surviving job — so keeping the
|
|
578
|
+
// launcher off the tool surface removes the escalation question entirely rather
|
|
579
|
+
// than trying to police it.
|
|
580
|
+
//
|
|
581
|
+
// If a future task DOES expose a background-launch tool, it MUST inherit (and be
|
|
582
|
+
// unable to exceed) the launching agent's posture: pass the parent's resolved
|
|
583
|
+
// policy as the child's launch-fixed policy and reject any widening, exactly as
|
|
584
|
+
// subagents reuse the parent permissionManager.
|