@semalt-ai/code 1.8.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/.claude/settings.local.json +6 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1584 -26
  4. package/README.md +147 -3
  5. package/examples/embed.js +74 -0
  6. package/index.js +251 -10
  7. package/lib/agent.js +711 -104
  8. package/lib/api.js +213 -49
  9. package/lib/args.js +74 -2
  10. package/lib/audit.js +23 -1
  11. package/lib/background.js +584 -0
  12. package/lib/checkpoints.js +757 -0
  13. package/lib/commands/auth.js +94 -0
  14. package/lib/commands/chat-session.js +306 -0
  15. package/lib/commands/chat-slash.js +399 -0
  16. package/lib/commands/chat-turn.js +446 -0
  17. package/lib/commands/chat.js +403 -0
  18. package/lib/commands/custom.js +157 -0
  19. package/lib/commands/history-utils.js +66 -0
  20. package/lib/commands/index.js +268 -0
  21. package/lib/commands/mcp.js +113 -0
  22. package/lib/commands/oneshot.js +193 -0
  23. package/lib/commands/registry.js +269 -0
  24. package/lib/commands/tasks.js +89 -0
  25. package/lib/compact.js +87 -0
  26. package/lib/config.js +333 -11
  27. package/lib/constants.js +372 -3
  28. package/lib/deny.js +199 -0
  29. package/lib/doctor.js +160 -0
  30. package/lib/headless.js +167 -0
  31. package/lib/hooks.js +286 -0
  32. package/lib/images.js +264 -0
  33. package/lib/internals.js +49 -0
  34. package/lib/mcp/boundary.js +131 -0
  35. package/lib/mcp/client.js +270 -0
  36. package/lib/mcp/oauth.js +134 -0
  37. package/lib/memory.js +209 -0
  38. package/lib/metrics.js +37 -2
  39. package/lib/payload.js +54 -0
  40. package/lib/permission-rules.js +401 -0
  41. package/lib/permissions.js +100 -10
  42. package/lib/pricing.js +67 -0
  43. package/lib/proc.js +62 -0
  44. package/lib/prompts.js +84 -5
  45. package/lib/sandbox.js +568 -0
  46. package/lib/sdk.js +328 -0
  47. package/lib/secrets.js +211 -0
  48. package/lib/skills.js +223 -0
  49. package/lib/subagents.js +516 -0
  50. package/lib/tool_registry.js +2558 -0
  51. package/lib/tool_specs.js +222 -2
  52. package/lib/tools.js +272 -1020
  53. package/lib/ui/format.js +22 -1
  54. package/lib/ui/input-field.js +16 -7
  55. package/lib/ui/status-bar.js +79 -11
  56. package/lib/ui/theme.js +1 -0
  57. package/lib/ui/web-activity.js +218 -0
  58. package/lib/verify.js +229 -0
  59. package/lib/web-extract.js +213 -0
  60. package/lib/web-summarize.js +68 -0
  61. package/package.json +19 -4
  62. package/scripts/lint.js +57 -0
  63. package/test/agent-loop.test.js +389 -0
  64. package/test/background.test.js +414 -0
  65. package/test/chat.test.js +114 -0
  66. package/test/checkpoints-agent.test.js +181 -0
  67. package/test/checkpoints.test.js +650 -0
  68. package/test/command-registry.test.js +160 -0
  69. package/test/compact.test.js +116 -0
  70. package/test/completion-lazy.test.js +52 -0
  71. package/test/config-merge.test.js +324 -0
  72. package/test/config-quarantine.test.js +128 -0
  73. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  74. package/test/config-write-guard-skip.test.js +46 -0
  75. package/test/config-write-guard.test.js +153 -0
  76. package/test/context-split.test.js +215 -0
  77. package/test/cost-doctor.test.js +142 -0
  78. package/test/custom-commands-chat.test.js +106 -0
  79. package/test/custom-commands.test.js +230 -0
  80. package/test/deny-windows.test.js +120 -0
  81. package/test/deny.test.js +83 -0
  82. package/test/download-allow-anywhere.test.js +66 -0
  83. package/test/download-confine.test.js +153 -0
  84. package/test/executors.test.js +362 -0
  85. package/test/extract-tool-calls.test.js +315 -0
  86. package/test/fetch-url-validation.test.js +219 -0
  87. package/test/fixtures/tool-calls.js +57 -0
  88. package/test/fixtures/web-page.js +91 -0
  89. package/test/git-tools.test.js +384 -0
  90. package/test/grep-glob-serialize.test.js +242 -0
  91. package/test/grep-glob.test.js +268 -0
  92. package/test/harness/README.md +57 -0
  93. package/test/harness/chat-harness.js +142 -0
  94. package/test/harness/memwarn-headless-child.js +65 -0
  95. package/test/harness/mock-llm.js +120 -0
  96. package/test/harness/mock-mcp-server.js +142 -0
  97. package/test/harness/sse-server.js +69 -0
  98. package/test/headless.test.js +203 -0
  99. package/test/history-utils.test.js +88 -0
  100. package/test/hooks-agent.test.js +238 -0
  101. package/test/hooks-verify-sandbox.test.js +232 -0
  102. package/test/hooks.test.js +216 -0
  103. package/test/http-get-user-agent.test.js +142 -0
  104. package/test/images-api.test.js +208 -0
  105. package/test/images.test.js +238 -0
  106. package/test/max-iterations.test.js +216 -0
  107. package/test/mcp-boundary.test.js +57 -0
  108. package/test/mcp-client.test.js +267 -0
  109. package/test/mcp-oauth.test.js +86 -0
  110. package/test/memory-truncation-warning.test.js +222 -0
  111. package/test/memory.test.js +198 -0
  112. package/test/native-dispatch.test.js +356 -0
  113. package/test/output-chokepoint.test.js +188 -0
  114. package/test/path-guards.test.js +134 -0
  115. package/test/payload.test.js +99 -0
  116. package/test/permission-rules-agent.test.js +210 -0
  117. package/test/permission-rules.test.js +297 -0
  118. package/test/permissions.test.js +163 -0
  119. package/test/plan-mode.test.js +167 -0
  120. package/test/read-paginate.test.js +275 -0
  121. package/test/readonly-tools.test.js +177 -0
  122. package/test/result-cap.test.js +233 -0
  123. package/test/sandbox-agent.test.js +147 -0
  124. package/test/sandbox-integration.test.js +216 -0
  125. package/test/sandbox.test.js +408 -0
  126. package/test/sdk.test.js +234 -0
  127. package/test/shell-output-cap.test.js +181 -0
  128. package/test/skills-chat.test.js +110 -0
  129. package/test/skills.test.js +295 -0
  130. package/test/smoke.test.js +68 -0
  131. package/test/status-bar-pause.test.js +164 -0
  132. package/test/stream-parser.test.js +147 -0
  133. package/test/subagents-agent.test.js +178 -0
  134. package/test/subagents.test.js +222 -0
  135. package/test/tool-registry.test.js +85 -0
  136. package/test/trim-budget.test.js +101 -0
  137. package/test/verify-agent.test.js +317 -0
  138. package/test/verify.test.js +141 -0
  139. package/test/web-activity-ordering.test.js +194 -0
  140. package/test/web-activity.test.js +207 -0
  141. package/test/web-data-extraction-guidance.test.js +71 -0
  142. package/test/web-extract.test.js +185 -0
  143. package/test/web-fetch-agent.test.js +291 -0
  144. package/test/web-fetch-mode.test.js +193 -0
  145. package/test/web-search.test.js +380 -0
  146. package/lib/commands.js +0 -1438
package/README.md CHANGED
@@ -18,7 +18,7 @@ It provides an interactive chat interface, one-shot code generation, AI-assisted
18
18
 
19
19
  ## Requirements
20
20
 
21
- - Node.js `>=16`
21
+ - Node.js `>=18` (Node 16 is end-of-life; `node --test` is unreliable on it)
22
22
  - An OpenAI-compatible API endpoint
23
23
 
24
24
  The default configuration expects a local API server at `http://127.0.0.1:8800`.
@@ -114,6 +114,14 @@ semalt-code [command] [options]
114
114
  - `semalt-code init`
115
115
  Creates or updates the local config file.
116
116
 
117
+ - `semalt-code mcp <list|status|add|remove|auth>`
118
+ Manage MCP servers. `add` registers a server (stdio `command`/`args` or remote `--url`),
119
+ `status` connects and reports each server's tools, `auth` runs the OAuth flow for a remote
120
+ server (tokens are stored in the OS keychain). Discovered tools register as
121
+ `mcp__<server>__<tool>` and run through the same approval-gated agent loop as built-ins —
122
+ MCP results are treated as untrusted external content, and MCP tools require approval by
123
+ default (opt in per server with `allow`/`allowAll`). In chat, `/mcp` shows the same status.
124
+
117
125
  ### Options
118
126
 
119
127
  - `-m, --model <name>`
@@ -246,6 +254,99 @@ Saved profiles can then be selected inside chat mode with `/model` or `/models`.
246
254
  semalt-code --version
247
255
  ```
248
256
 
257
+ ## Embedding SDK
258
+
259
+ `@semalt-ai/code` can be embedded in another program as a library, not just run as a
260
+ CLI. There are **two tiers**, physically separated by the package `exports` map:
261
+
262
+ | Import | Surface | Stability |
263
+ |--------|---------|-----------|
264
+ | `require('@semalt-ai/code')` | `createAgent` — the high-level facade | **Stable** (semver) |
265
+ | `require('@semalt-ai/code/internals')` | `createAgentRunner`, `createApiClient`, the registries, … | **Unstable** — no guarantee, may change in any release |
266
+
267
+ Both subpaths work for `require` and `import` (the package is CommonJS; ESM consumers
268
+ get the named exports via interop).
269
+
270
+ ### The facade
271
+
272
+ ```js
273
+ const { createAgent } = require('@semalt-ai/code');
274
+
275
+ const agent = createAgent({
276
+ apiBase: 'http://127.0.0.1:8800',
277
+ apiKey: process.env.SEMALT_API_KEY,
278
+ model: 'my-model',
279
+ // permission policy — see below
280
+ });
281
+
282
+ const res = await agent.run('Summarise README.md in three bullets');
283
+ // res = { result, toolCalls, usage, cost, stopReason, verifyStatus, messages }
284
+ console.log(res.result);
285
+
286
+ await agent.close(); // REQUIRED — releases MCP connections / processes
287
+ ```
288
+
289
+ `run(prompt, opts?)` executes a prompt to completion and returns the same structured
290
+ envelope headless mode produces, plus `messages` so you can continue the conversation
291
+ (`agent.run(next, { messages: res.messages })`). Stream events with
292
+ `agent.on('token' | 'assistant' | 'tool' | 'tool-start' | 'error' | 'warning' | 'done', cb)`.
293
+
294
+ ### Permission policy — safe by default
295
+
296
+ There is no terminal in embedded use, so the facade takes a **programmatic** permission
297
+ policy. With **neither** a policy provided, the default is to **refuse every mutating /
298
+ effectful tool** (read-only tools still run) — it never auto-approves:
299
+
300
+ ```js
301
+ // 1) an async approver callback
302
+ createAgent({ /* … */, approve: async (call) => {
303
+ // call = { actionType, description, tag, rule }
304
+ return call.tag !== 'delete_file'; // your decision
305
+ }});
306
+
307
+ // 2) preset allow/deny/ask rules (the same engine as the CLI's per-pattern rules)
308
+ createAgent({ /* … */, rules: [
309
+ { tool: 'write_file', path: 'src/**', action: 'allow' },
310
+ { tool: 'shell', pattern: 'git *', action: 'allow' },
311
+ { tool: 'shell', pattern: '/curl.*\\| *sh/', action: 'deny' },
312
+ ]});
313
+
314
+ // 3) coarse tiers (like --allow-fs/exec/net) and read-only
315
+ createAgent({ /* … */, allow: ['fs', 'net'], readonly: true });
316
+ ```
317
+
318
+ The **OS sandbox** and the **destructive-command deny-list** stay **on** regardless of
319
+ there being no TTY. The sandbox is opt-out **only** via explicit config
320
+ (`sandbox: { mode: 'off' }`); a host can permit running a command unsandboxed when the
321
+ kernel primitive is missing by supplying `onUnsandboxed`. A `deny` rule and the
322
+ deny-list are honored even under the deliberate `dangerouslySkipPermissions: true`
323
+ gate opt-out.
324
+
325
+ By default the SDK does **not** read your `~/.semalt-ai/config.json` (a server wants
326
+ isolation, not the operator's personal defaults) — pass `loadUserConfig: true` to layer
327
+ it in. Pass arbitrary config under `config: { … }`.
328
+
329
+ ### Lifecycle & multi-instance
330
+
331
+ - **Always call `await agent.close()`** when done. It disconnects MCP servers and frees
332
+ handles. `run()` after `close()` throws.
333
+ - Each `createAgent` instance keeps its **own config** — two instances never share
334
+ config state.
335
+ - **Process-global state (documented limitation).** A few things are process-wide, not
336
+ per-instance, because they were built for the single-process CLI:
337
+ - the **dynamic tool registry** (MCP + `spawn_agent`) is global — two instances with
338
+ *different* MCP servers would see each other's tools;
339
+ - file-path confinement (`isPathSafe`) and the deny-list/secret/config guards read
340
+ `process.cwd()` and `process.argv` **once at module load**, so they're shared by all
341
+ instances and the deny-list opt-out requires launching the host process with
342
+ `--dangerously-skip-permissions`;
343
+ - the stdout-chrome-suppression flag is process-wide.
344
+
345
+ For most embeddings (one agent per process, or instances sharing a CWD and MCP set)
346
+ none of this matters; run fully-isolated agents in separate processes if it does.
347
+
348
+ A runnable example lives in [`examples/embed.js`](examples/embed.js).
349
+
249
350
  ## How Responses Are Rendered
250
351
 
251
352
  The CLI formats streamed output for terminal readability:
@@ -259,13 +360,56 @@ The CLI formats streamed output for terminal readability:
259
360
 
260
361
  If the backend returns `reasoning_content`, the CLI also shows a lightweight `thinking` section during streaming.
261
362
 
363
+ ## Dependency Policy
364
+
365
+ This project keeps its runtime dependency surface **minimal, vetted, and pinned**. It
366
+ ran with **zero runtime dependencies** through its first phases; as of v1.9.0 it has a
367
+ single one — the official Model Context Protocol SDK,
368
+ [`@modelcontextprotocol/sdk`](https://www.npmjs.com/package/@modelcontextprotocol/sdk) —
369
+ adopted to implement MCP against its reference (rather than hand-rolling the protocol).
370
+
371
+ The policy for any runtime dependency:
372
+
373
+ - **Minimal & justified** — added only when a Node.js built-in genuinely cannot do the
374
+ job, with a recorded rationale.
375
+ - **Pinned to an exact version** — no `^`/`~` ranges in `package.json`. Upgrades are
376
+ deliberate, reviewed commits.
377
+ - **Reviewed with the lockfile** — `package-lock.json` is committed; adding or bumping a
378
+ dependency is a reviewed change.
379
+
380
+ **Supply-chain checks.** CI runs `npm ci` (lockfile integrity) and
381
+ `npm audit --omit=dev --audit-level=high` (fails on HIGH/CRITICAL advisories in the
382
+ runtime tree). The full audit-findings policy is documented in `CLAUDE.md`.
383
+
384
+ The SDK is ESM-only while this project is CommonJS, so it is loaded in exactly one
385
+ place — `lib/mcp/boundary.js`, via dynamic `import()` — and the rest of the codebase
386
+ stays CommonJS.
387
+
262
388
  ## Notes and Limitations
263
389
 
264
- - This project is currently a single-file CLI implementation centered in `index.js`.
265
- - It uses Node's built-in `http` and `https` modules and does not require extra runtime dependencies.
390
+ - It uses Node's built-in `http` and `https` modules for all networking; the only
391
+ runtime dependency is the MCP SDK (see **Dependency Policy** above).
266
392
  - The `edit` command writes the model output directly back to the target file, so review prompts and backend behavior carefully.
267
393
  - Shell and file operations are approval-based, but they still execute on the local system after approval.
268
394
 
395
+ ### Not yet implemented
396
+
397
+ A few capabilities are intentionally absent today — documented here so you don't build
398
+ on something that isn't wired up. See **Deferred / Not Yet Implemented** in `CLAUDE.md`
399
+ for the full list and roadmap status.
400
+
401
+ - **MCP tools are interactive-chat only** — they are not connected in the `code`/`edit`/`shell`
402
+ one-shot commands or headless `-p/--print` mode.
403
+ - **No session auto-resume** — there's no "resume your last session?" prompt at startup;
404
+ use `/history` (local sessions) or `--resume <chat-id>` (dashboard chats).
405
+ - **Proxy env vars are not consumed** — `HTTPS_PROXY`/`HTTP_PROXY` are read into config but
406
+ outbound HTTP does not yet route through a proxy agent (matters on corporate networks).
407
+ - Planned for Phase 4+: per-pattern permissions, self-verification, checkpoints/rewind, and an OS sandbox.
408
+
409
+ ## Contributing
410
+
411
+ PRs must pass the CI pipeline (`npm ci` + `npm audit` + lint + tests on Linux/macOS/Windows, Node 18 & 20) before they can be merged. Run `npm ci && npm run lint && npm test` locally first. Any dependency change must follow the **Dependency Policy** above (exact pin, committed lockfile, justification).
412
+
269
413
  ## License
270
414
 
271
415
  MIT
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Embedding SDK example (Task 5.2)
6
+ // ---------------------------------------------------------------------------
7
+ //
8
+ // Shows the supported, stable way to embed the agent in another program via the
9
+ // `createAgent` facade: a permission policy that defaults safe, streaming
10
+ // events, the structured run result, and the required close() teardown.
11
+ //
12
+ // Run it against any OpenAI-compatible endpoint:
13
+ //
14
+ // SEMALT_API_BASE=http://127.0.0.1:8800 \
15
+ // SEMALT_API_KEY=sk-… \
16
+ // SEMALT_MODEL=my-model \
17
+ // node examples/embed.js "List the files in this directory"
18
+ //
19
+ // (From outside this repo, `require('@semalt-ai/code')` instead of the relative
20
+ // path below.)
21
+
22
+ const { createAgent } = require('../lib/sdk'); // → require('@semalt-ai/code')
23
+
24
+ async function main() {
25
+ const prompt = process.argv.slice(2).join(' ') || 'Say hello and tell me what tools you have.';
26
+
27
+ const agent = createAgent({
28
+ apiBase: process.env.SEMALT_API_BASE || 'http://127.0.0.1:8800',
29
+ apiKey: process.env.SEMALT_API_KEY || 'any',
30
+ model: process.env.SEMALT_MODEL || 'default',
31
+
32
+ // Permission policy. With NONE of these, the SDK refuses every mutating
33
+ // tool (the safe default). Here we approve read-only-ish work but veto
34
+ // anything destructive — your host decides.
35
+ approve: async ({ tag, description }) => {
36
+ const denied = new Set(['delete_file', 'remove_dir', 'move_file']);
37
+ const ok = !denied.has(tag);
38
+ console.error(`[approve] ${ok ? 'ALLOW' : 'DENY '} ${tag} — ${description}`);
39
+ return ok;
40
+ },
41
+
42
+ // The OS sandbox + deny-list stay ON by default. To run unsandboxed when the
43
+ // kernel primitive is missing you'd opt in explicitly, e.g.:
44
+ // sandbox: { mode: 'off' },
45
+ // onUnsandboxed: async () => true,
46
+ });
47
+
48
+ // Stream activity (advisory — the run result is authoritative).
49
+ agent.on('token', (t) => process.stdout.write(t));
50
+ agent.on('tool', (e) => console.error(`\n[tool] ${e.tag} (${e.ms}ms)`));
51
+ agent.on('warning', (m) => console.error(`[warn] ${m}`));
52
+
53
+ try {
54
+ const res = await agent.run(prompt);
55
+ console.log('\n\n--- result ---');
56
+ console.log(res.result);
57
+ console.log('--- meta ---');
58
+ console.log(JSON.stringify({
59
+ toolCalls: res.toolCalls.length,
60
+ usage: res.usage,
61
+ cost: res.cost,
62
+ stopReason: res.stopReason,
63
+ verifyStatus: res.verifyStatus,
64
+ }, null, 2));
65
+ } finally {
66
+ // ALWAYS close — releases MCP connections / spawned processes.
67
+ await agent.close();
68
+ }
69
+ }
70
+
71
+ main().catch((err) => {
72
+ console.error('embed example failed:', err.message);
73
+ process.exit(1);
74
+ });
package/index.js CHANGED
@@ -6,7 +6,8 @@ const os = require('os');
6
6
  const path = require('path');
7
7
 
8
8
  const { PACKAGE_JSON } = require('./lib/constants');
9
- const { loadConfig, saveConfig, configSet, configShow } = require('./lib/config');
9
+ const { loadConfig, loadUserConfig, saveConfig, configSet, configShow, userLayerForPersist, readUserConfig, loadProjectConfig } = require('./lib/config');
10
+ const { loadRuleLayers } = require('./lib/permission-rules');
10
11
  const ui = require('./lib/ui');
11
12
  const { registerTerminalCleanup } = require('./lib/ui/terminal');
12
13
  const { createPermissionManager } = require('./lib/permissions');
@@ -14,6 +15,7 @@ const { createToolExecutor, extractToolCalls } = require('./lib/tools');
14
15
  const { readFileContext } = require('./lib/context');
15
16
  const { createApiClient } = require('./lib/api');
16
17
  const { createAgentRunner } = require('./lib/agent');
18
+ const { createCheckpointStore, latestSession } = require('./lib/checkpoints');
17
19
  const { createCommands } = require('./lib/commands');
18
20
  const { parseArgs } = require('./lib/args');
19
21
  const { CONFIG_PATH } = require('./lib/constants');
@@ -31,9 +33,19 @@ function getConfig() {
31
33
  return config;
32
34
  }
33
35
 
36
+ // Persist a caller's config object to the USER file, then re-merge the layered
37
+ // view. Only the keys the caller actually changed (vs the current merged view)
38
+ // are layered onto config.json, so env/project/flag overrides the caller merely
39
+ // carried along are never baked in (Task 2.2). All getConfig()/setConfig() call
40
+ // sites are unchanged — they still see the merged view and pass full objects.
41
+ function persistConfig(nextConfig) {
42
+ const layer = userLayerForPersist(nextConfig, config, loadUserConfig());
43
+ saveConfig(layer);
44
+ config = loadConfig();
45
+ }
46
+
34
47
  function setConfig(nextConfig) {
35
- config = nextConfig;
36
- saveConfig(config);
48
+ persistConfig(nextConfig);
37
49
  }
38
50
 
39
51
  // Pre-scan argv for permission tier flags before creating PermissionManager
@@ -47,17 +59,99 @@ if (_argv.includes('--allow-all')) {
47
59
  if (_argv.includes('--allow-net')) _allowedTiers.push('net');
48
60
  }
49
61
  const _readonly = _argv.includes('--readonly');
62
+ // The single explicit opt-out of all safety. Pre-scanned here (like the tier
63
+ // flags) so the PermissionManager is constructed with the right mode before any
64
+ // command runs. tools.js reads the same flag from argv for the deny-list bypass.
65
+ const _skipPermissions = _argv.includes('--dangerously-skip-permissions');
50
66
 
51
- const permissionManager = createPermissionManager(ui, { allowedTiers: _allowedTiers, readonly: _readonly });
52
- const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(permissionManager, ui, getConfig);
67
+ // Per-pattern permission rules (Task 4.1). The user and project layers are read
68
+ // INDEPENDENTLY (not through the shallow-merged config) so the project layer
69
+ // stays separate and can be structurally prevented from widening the user
70
+ // posture. Malformed rules are dropped with a startup warning.
71
+ const _ruleLayers = loadRuleLayers(
72
+ readUserConfig(),
73
+ loadProjectConfig(process.cwd()),
74
+ // audit: allowed — pre-UI startup warning, fires before the TUI initialises.
75
+ (msg) => process.stderr.write(`⚠ ${msg}\n`),
76
+ );
77
+
78
+ const permissionManager = createPermissionManager(ui, {
79
+ allowedTiers: _allowedTiers,
80
+ readonly: _readonly,
81
+ skipPermissions: _skipPermissions,
82
+ rules: _ruleLayers,
83
+ cwd: process.cwd(),
84
+ });
85
+ // Checkpoints & rewind (Task 4.3). One store per process, shared by the executor
86
+ // (capture point) and the agent runner (per-turn linkage). It auto-generates a
87
+ // session id; cmdChat realigns it with the chat session.id before any turn.
88
+ //
89
+ // Restore-path re-validation (Task 4.3b, Part 1): a rewind that would write/delete
90
+ // a path is re-checked against the CURRENT guards — isPathSafe (CWD confinement /
91
+ // --allow-anywhere), the secret-file guard, the protected-config write guard, and
92
+ // any active `deny` permission rule — so a restore can never re-write a path the
93
+ // guards now forbid. A failing target is refused (skipped), never aborting the
94
+ // whole rewind; `force` does NOT bypass this (it overrides only the external-mod
95
+ // check). This guard lives in the executor owner, not anywhere the model reaches.
96
+ const { isPathSafe: _isPathSafe, isProtectedSecretPath: _isProtectedSecretPath, isProtectedConfigPath: _isProtectedConfigPath } = require('./lib/tools');
97
+ function restoreGuard(targetPath, { willDelete } = {}) {
98
+ if (!_isPathSafe(targetPath)) return { ok: false, reason: 'path is now outside the allowed area (isPathSafe / --allow-anywhere)' };
99
+ if (_isProtectedSecretPath(targetPath)) return { ok: false, reason: 'path is a protected secret file' };
100
+ if (_isProtectedConfigPath(targetPath)) return { ok: false, reason: 'path is a protected config path' };
101
+ const verdict = permissionManager.resolveRule([willDelete ? 'delete_file' : 'write_file', targetPath]);
102
+ if (verdict && verdict.decision === 'deny') {
103
+ return { ok: false, reason: `blocked by a deny permission rule${verdict.reason ? ` (${verdict.reason})` : ''}` };
104
+ }
105
+ return { ok: true };
106
+ }
107
+ const checkpointStore = createCheckpointStore({ getConfig, restoreGuard });
108
+ // OS sandbox fallback approver (Task 4.4). When the kernel sandbox is unavailable
109
+ // in `auto` mode, agentExecShell asks a HUMAN here before running a command
110
+ // unsandboxed. Non-TTY → refuse (no way to ask → never a silent unsandboxed run).
111
+ // This lives in the executor owner, NOT anywhere the model can reach, so the
112
+ // agent can never approve its own escape.
113
+ async function onUnsandboxed({ command, reason, installHint } = {}) {
114
+ if (!process.stdin.isTTY || !process.stdout.isTTY) return false;
115
+ // audit: allowed — interactive confirm outside the agent's stream.
116
+ process.stderr.write(`\n⚠ OS sandbox unavailable (${reason}). The command will run WITHOUT kernel-level confinement:\n ${command}\n`);
117
+ if (installHint) process.stderr.write(` ${installHint}\n`);
118
+ try {
119
+ const idx = await ui.interactiveSelect(
120
+ ['No, do not run it', 'Yes, run it unsandboxed'],
121
+ (item, isSelected) => {
122
+ const cursor = isSelected ? `${ui.FG_YELLOW}❯${ui.RST}` : ' ';
123
+ const style = isSelected ? ui.FG_CYAN : ui.FG_GRAY;
124
+ return ` ${cursor} ${style}${item}${ui.RST}`;
125
+ },
126
+ { initialIndex: 0 },
127
+ );
128
+ return idx === 1;
129
+ } catch {
130
+ return false;
131
+ }
132
+ }
53
133
  const apiClient = createApiClient({
54
134
  getConfig,
135
+ // Route the api.js learned-context-length persistence through the same
136
+ // user-layer rebase so a learned value lands in config.json without baking
137
+ // in any active env/project/flag override (Task 2.2).
55
138
  saveConfig: (nextConfig) => {
56
- saveConfig(nextConfig);
57
- config = nextConfig;
139
+ persistConfig(nextConfig);
58
140
  },
59
141
  ui,
60
142
  });
143
+ const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(permissionManager, ui, getConfig, {
144
+ checkpointStore,
145
+ onUnsandboxed,
146
+ // Web-fetch secondary summarizer (Task W.1): http_get runs a separate cheap
147
+ // LLM call to summarize extracted page content; only the summary enters the
148
+ // main context.
149
+ webChat: (messages, opts) => apiClient.chatComplete(messages, opts),
150
+ // Web search (Task W.2b): the web_search tool calls the backend /api/search
151
+ // via dashboardSearch and returns compact snippets so the agent can pick
152
+ // targeted URLs to fetch with http_get instead of guessing.
153
+ webSearch: (query, opts) => apiClient.dashboardSearch(query, opts),
154
+ });
61
155
  const { runAgentLoop } = createAgentRunner({
62
156
  chatStream: apiClient.chatStream,
63
157
  extractToolCalls: (reply, options = {}) => extractToolCalls(reply, {
@@ -70,7 +164,38 @@ const { runAgentLoop } = createAgentRunner({
70
164
  permissionManager,
71
165
  ui,
72
166
  getConfig,
167
+ checkpoints: checkpointStore,
168
+ // Command hooks + self-verification run through the same OS sandbox as
169
+ // agentExecShell (Pre-Task 5.0a); share the human-approval fallback so an
170
+ // unavailable sandbox can be approved interactively (never a silent run).
171
+ onUnsandboxed,
73
172
  });
173
+ // Subagents (Task 3.6). Register the `spawn_agent` tool once at startup so it is
174
+ // available in both interactive chat and headless one-shot runs. The manager
175
+ // builds CONSTRAINED child runners that share this process's permission manager
176
+ // (no privilege escalation) and reuse the same executors. Custom agent
177
+ // definitions are discovered from .semalt/agents (project) + ~/.semalt-ai/agents.
178
+ const { createSubagentManager, discoverAgentDefs, buildSpawnAgentEntry } = require('./lib/subagents');
179
+ const { registerDynamicTool } = require('./lib/tool_registry');
180
+ try {
181
+ const subagentManager = createSubagentManager({
182
+ chatStream: apiClient.chatStream,
183
+ extractToolCalls: (reply, options = {}) => extractToolCalls(reply, {
184
+ repairMalformedXml: !!getConfig().repair_malformed_tool_xml,
185
+ ...options,
186
+ }),
187
+ agentExecShell,
188
+ agentExecFile,
189
+ describePermission,
190
+ permissionManager,
191
+ ui,
192
+ getConfig,
193
+ agentDefs: discoverAgentDefs({ cwd: process.cwd() }),
194
+ maxConcurrency: getConfig().subagents && getConfig().subagents.max_concurrency,
195
+ });
196
+ registerDynamicTool(buildSpawnAgentEntry(subagentManager));
197
+ } catch { /* subagents are best-effort; never block startup */ }
198
+
74
199
  const commands = createCommands({
75
200
  getConfig,
76
201
  setConfig,
@@ -80,6 +205,7 @@ const commands = createCommands({
80
205
  runAgentLoop,
81
206
  readFileContext,
82
207
  agentExecShell,
208
+ checkpointStore,
83
209
  });
84
210
 
85
211
  async function main() {
@@ -92,6 +218,24 @@ async function main() {
92
218
 
93
219
  const command = rawArgs[0];
94
220
 
221
+ // Internal entry (Task 5.3): the detached child of a background task. Not a
222
+ // user-facing command — `semalt-code run --background` spawns it. It reads its
223
+ // spec from <taskDir> and runs the agent via the SDK facade with the
224
+ // launch-fixed policy, writing progress/result/status into the task dir. No
225
+ // terminal to reach after this point — pure execution.
226
+ if (command === '__bg-exec') {
227
+ const taskDir = rawArgs[1];
228
+ if (!taskDir) { process.exit(1); }
229
+ const { runBackgroundChild } = require('./lib/background');
230
+ try {
231
+ const r = await runBackgroundChild({ taskDir });
232
+ process.exit(r && r.status === 'completed' ? 0 : 1);
233
+ } catch {
234
+ process.exit(1);
235
+ }
236
+ return;
237
+ }
238
+
95
239
  if (command === '--help' || command === '-h') {
96
240
  writer.scrollback(`
97
241
  Semalt.AI — Self-hosted AI Coding Assistant
@@ -104,17 +248,25 @@ Commands:
104
248
  code <prompt> Generate code from a prompt
105
249
  edit <file> <instruction> Edit a file with AI
106
250
  shell <command> Run and optionally analyze a shell command
251
+ run --background <prompt> Launch a detached background agent task
252
+ tasks <subcmd> Manage background tasks: list | status | result | kill | prune
107
253
  login Authorize CLI via browser
108
254
  whoami Show current authorized user
109
255
  logout Clear current CLI login
256
+ auth set-key [k] Store the API key in the OS keychain (not plaintext config)
257
+ mcp <subcmd> Manage MCP servers: list | status | add | remove | auth
110
258
  models Choose a model
111
259
  init Initialize config
260
+ rewind [seq] [code|conversation|both] List checkpoints or restore files and/or conversation (default both)
261
+ sandbox Show OS sandbox status (mode, tool, availability, network)
112
262
 
113
263
  Options:
114
264
  -m, --model <name> Model name
115
265
  -r, --resume <chat-id> Resume a saved chat (chat command)
116
266
  -f, --file <path> Load file into context (code command)
267
+ --image <path> Attach an image (PNG/JPEG/WebP/GIF); repeatable
117
268
  -a, --analyze Analyze output with AI (shell command)
269
+ -b, --background Launch as a detached background task (run command)
118
270
  --dry-run Don't save changes (edit command)
119
271
  --api-base <url> API base URL (init)
120
272
  --api-key <key> API key (init)
@@ -132,8 +284,20 @@ Options:
132
284
  --allow-net Auto-approve network operations
133
285
  --allow-all Auto-approve everything (use carefully)
134
286
  --allow-anywhere Allow writes outside the project CWD and in sensitive dirs
287
+ --no-network Kernel-level no-network for sandboxed shell commands
288
+ (bwrap --unshare-net / Seatbelt deny). Binary on/off —
289
+ no host proxy, no domain allowlist, no TLS interception.
290
+ Same effect as sandbox.network "off" in config.
135
291
  --readonly Block all write operations
136
- --new Skip session resume prompt
292
+ --max-iterations <n> Cap agent-loop iterations per turn (default 50);
293
+ 0 or "unlimited" removes the cap (power-user choice)
294
+ --no-verify Skip self-verification (config.verify) for this run
295
+ --dangerously-skip-permissions
296
+ DANGER: fully auto-approve every tool call AND disable
297
+ the destructive-command deny-list and config-file read
298
+ guard. The only way to auto-approve in non-TTY mode;
299
+ without it, headless runs refuse calls that would need
300
+ interactive confirmation. Use only in trusted sandboxes.
137
301
  -v, --version Show CLI version
138
302
 
139
303
  Config: ${CONFIG_PATH}
@@ -166,11 +330,45 @@ Config: ${CONFIG_PATH}
166
330
  await commands.cmdWhoAmI();
167
331
  } else if (command === 'logout') {
168
332
  await commands.cmdLogout();
333
+ } else if (command === 'auth') {
334
+ const sub = rawArgs[1];
335
+ if (sub === 'set-key') {
336
+ await commands.cmdAuthSetKey(rawArgs[2]);
337
+ } else {
338
+ process.stderr.write(`Usage: semalt-code auth set-key [key]\n`);
339
+ process.exit(1);
340
+ }
341
+ } else if (command === 'run') {
342
+ const { opts, positional } = parseArgs(rawArgs.slice(1));
343
+ if (opts.background) {
344
+ await commands.cmdRun(opts, positional);
345
+ } else {
346
+ // `run` without --background is a foreground one-shot, like `code`.
347
+ await commands.cmdCode(opts, positional);
348
+ }
349
+ } else if (command === 'tasks') {
350
+ await commands.cmdTasks(rawArgs[1], rawArgs.slice(2));
351
+ } else if (command === 'mcp') {
352
+ await commands.cmdMcp(rawArgs[1], rawArgs.slice(2));
169
353
  } else if (command === 'models') {
170
354
  await commands.cmdModels();
171
355
  } else if (command === 'init') {
172
356
  const { opts } = parseArgs(rawArgs.slice(1));
173
357
  commands.cmdInit(opts);
358
+ } else if (command === 'doctor') {
359
+ const { diagnose, formatDoctorReport } = require('./lib/doctor');
360
+ const ping = async () => {
361
+ const cfg = getConfig();
362
+ if (!cfg.auth_token) return null;
363
+ try { const r = await apiClient.dashboardWhoAmI(); return !!(r && r.user); } catch { return false; }
364
+ };
365
+ const result = await diagnose({ getConfig, pingDashboard: ping });
366
+ writer.scrollback(formatDoctorReport(result));
367
+ await writer.flush();
368
+ } else if (command === 'sandbox') {
369
+ const { sandboxStatusReport } = require('./lib/sandbox');
370
+ writer.scrollback(sandboxStatusReport({ getConfig }));
371
+ await writer.flush();
174
372
  } else if (command === 'audit') {
175
373
  try {
176
374
  const content = fs.readFileSync(AUDIT_LOG, 'utf8');
@@ -189,6 +387,43 @@ Config: ${CONFIG_PATH}
189
387
  writer.scrollback('No audit log found.');
190
388
  }
191
389
  await writer.flush();
390
+ } else if (command === 'rewind') {
391
+ // Standalone rewind: a fresh process with no in-memory session, so target
392
+ // the most-recently-active session's checkpoints. Conversation rewind here
393
+ // operates on the saved session file (SessionStorage) of the same id.
394
+ const { formatCheckpointList, formatRewindResult, normalizeRewindMode, REWIND_MODES } = require('./lib/checkpoints');
395
+ const { SessionStorage } = require('./lib/storage');
396
+ const force = rawArgs.includes('--force') || rawArgs.includes('force');
397
+ const tokens = rawArgs.slice(1).filter((a) => a !== '--force' && a !== 'force');
398
+ const modeToken = tokens.find((t) => REWIND_MODES.includes(String(t).toLowerCase()));
399
+ const mode = normalizeRewindMode(modeToken);
400
+ const target = tokens.find((t) => t !== modeToken);
401
+ const session = latestSession();
402
+ if (!session) {
403
+ writer.scrollback('No checkpoints found.');
404
+ await writer.flush();
405
+ return;
406
+ }
407
+ const store = createCheckpointStore({ getConfig, sessionId: session, restoreGuard });
408
+ if (!target || target === 'list') {
409
+ writer.scrollback(formatCheckpointList(store.list(session), { session }));
410
+ } else if (mode === null) {
411
+ writer.scrollback(`Unknown rewind mode "${modeToken}". Use one of: ${REWIND_MODES.join(', ')}.`);
412
+ } else {
413
+ // Load the saved session's messages for a conversation/both rewind. The
414
+ // checkpoint session id matches the SessionStorage id (chat aligns them).
415
+ let saved = null;
416
+ const wantConversation = mode === 'conversation' || mode === 'both';
417
+ if (wantConversation) { try { saved = new SessionStorage().load(session); } catch { saved = null; } }
418
+ const messages = saved && Array.isArray(saved.messages) ? saved.messages : null;
419
+ const res = store.rewind(target === 'last' ? 'last' : target, { force, session, mode, messages });
420
+ if (res.conversation && res.conversation.ok && saved) {
421
+ saved.messages = res.conversation.messages;
422
+ try { new SessionStorage().save(saved); } catch { /* best effort */ }
423
+ }
424
+ writer.scrollback(formatRewindResult(res));
425
+ }
426
+ await writer.flush();
192
427
  } else if (command === 'config') {
193
428
  const sub = rawArgs[1];
194
429
  if (sub === 'set') {
@@ -213,8 +448,14 @@ Config: ${CONFIG_PATH}
213
448
  }
214
449
  await writer.flush();
215
450
  } else {
216
- const { opts } = parseArgs(rawArgs);
217
- await commands.cmdChat(opts);
451
+ const { opts, positional } = parseArgs(rawArgs);
452
+ // `-p/--print` (or any --output-format) turns a bare prompt into a headless
453
+ // one-shot run instead of opening interactive chat (Task 2.4).
454
+ if (opts.print) {
455
+ await commands.cmdCode(opts, positional);
456
+ } else {
457
+ await commands.cmdChat(opts);
458
+ }
218
459
  }
219
460
  }
220
461