@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
@@ -0,0 +1,245 @@
1
+ # semalt-code — History, Decisions & Rationale
2
+
3
+ > Dependency-policy rationale, the full "Key Patterns & Invariants" reference,
4
+ > and the "Deferred / Not Yet Implemented" roadmap. **Not auto-loaded** as project
5
+ > memory. The lean `CLAUDE.md` carries the compressed, verified invariant set;
6
+ > this file preserves the long-form rationale and the per-task history.
7
+
8
+ > Per-task (Task X.Y) rationale and the "Tested by …" enumerations live inline in
9
+ > `docs/ARCHITECTURE.md` alongside each subsystem they describe.
10
+
11
+ ---
12
+
13
+ ## Dependency & Supply-Chain Policy (Task 3.2)
14
+
15
+ The project ran **zero runtime dependencies** through Phase 2. Adopting the official
16
+ MCP SDK (`@modelcontextprotocol/sdk`) in v1.9.0 ends that era. The invariant is now
17
+ **minimal, vetted, pinned dependencies** — not "no dependencies."
18
+
19
+ **When a runtime dependency is allowed.** Every new runtime dependency must be:
20
+
21
+ 1. **Minimal** — preferred only when a Node.js built-in genuinely cannot do the job.
22
+ The bar for the *first* dependency was high on purpose; the bar for the next one
23
+ is the same. Dev-only tooling is still avoided (we lint with `node --check` and
24
+ test with `node:test`).
25
+ 2. **Justified** — a one-line rationale recorded here (see below) and in the PR.
26
+ 3. **Pinned to an exact version** — no `^`/`~`/ranges in `package.json`. Upgrades are
27
+ deliberate, reviewed commits, never silent on `npm install`.
28
+ 4. **Reviewed** — adding/bumping a dependency is a reviewed change, and the
29
+ regenerated `package-lock.json` is committed in the same PR.
30
+
31
+ **Rationale for the web-extraction deps (Task W.1, all pinned exact).** The
32
+ web-fetch pipeline (see **Web Fetch Pipeline** below) turns raw HTML into
33
+ main-content Markdown — reliably parsing real-world malformed HTML, scoring the
34
+ main article over chrome, and emitting clean Markdown are each large, bug-prone
35
+ surfaces where a hand-rolled regex approach is exactly the wrong call (quality is
36
+ the whole point). The chosen libraries are the reference implementations:
37
+ - **`@mozilla/readability` (`0.6.0`)** — Firefox Reader View's extractor; the
38
+ de-facto standard for "main content of a page." MIT. **Zero transitive deps.**
39
+ - **`turndown` (`7.2.4`)** — the reference HTML→Markdown converter. MIT. One
40
+ transitive dep (`@mixmark-io/domino`, a DOM impl).
41
+ - **`linkedom` (`0.18.12`)** — a light DOM for Readability to operate on
42
+ (`jsdom` is far heavier and unnecessary here). MIT. Transitive footprint:
43
+ `css-select`, `css-what`, `boolbase`, `nth-check`, `domhandler`,
44
+ `domelementtype`, `domutils`, `dom-serializer`, `entities`, `cssom`,
45
+ `htmlparser2`, `html-escaper`, `uhyphen` (`canvas` is an *optional* dep, left
46
+ uninstalled). **Total added: ~18 packages, `npm audit` clean (0 advisories).**
47
+ All three are loaded directly (CommonJS-compatible) from `lib/web-extract.js` —
48
+ no ESM boundary needed (unlike the MCP SDK).
49
+
50
+ **Rationale for `@modelcontextprotocol/sdk` (pinned `1.29.0`).** MCP is an open
51
+ protocol with a non-trivial wire contract (JSON-RPC framing, capability negotiation,
52
+ transport lifecycle, schema validation). Reimplementing it by hand would be a large,
53
+ bug-prone surface to own and keep in spec. The **official** SDK is the reference
54
+ implementation, MIT-licensed, and tracks the spec — exactly the case where a vetted
55
+ dependency beats a built-in reimplementation. It is the foundation Task 3.3 builds the
56
+ MCP client on.
57
+
58
+ **ESM/CJS boundary.** The SDK is **ESM-only** (`"type": "module"`); this project is
59
+ CommonJS. A CJS module cannot `require()` an ESM-only package. The entire codebase
60
+ stays CommonJS — the SDK is loaded in exactly one place, `lib/mcp/boundary.js`, via
61
+ dynamic `import()`, which re-exposes a CJS-friendly async surface (`loadSdk`,
62
+ `createClient`, `createStdioTransport`). No other module imports the SDK directly.
63
+ See **MCP Boundary** below.
64
+
65
+ **Lockfile + CI guardrails.** `package-lock.json` is committed. CI (`.github/workflows/ci.yml`) runs:
66
+ - `npm ci` — installs strictly from the lockfile; fails on package.json↔lockfile drift (integrity).
67
+ - `npm audit --omit=dev --audit-level=high` — fails the build on a **HIGH or CRITICAL**
68
+ advisory in the **runtime** (production) dependency tree. Dev deps are excluded
69
+ (there are none today).
70
+
71
+ **Audit-findings policy.** When `npm audit` flags an advisory:
72
+
73
+ - **Critical / High** → **blocking.** CI fails. Resolve before merge by bumping to a
74
+ patched pinned version (regenerate + commit the lockfile), or — if no fix exists —
75
+ removing/replacing the dependency. A temporary, time-boxed exception requires an
76
+ explicit `npm audit` allow-list entry **with a written justification and a tracking
77
+ issue**; it is not the default.
78
+ - **Moderate / Low** → **non-blocking** (the `--audit-level=high` gate lets them pass)
79
+ but **tracked**: open an issue and address on the next dependency-maintenance pass.
80
+ Do not raise the gate to fail on these without agreement — noisy gates get ignored.
81
+ - **Routine maintenance** → periodically run `npm audit` and `npm outdated`; dependency
82
+ bumps follow the pinning + review rules above.
83
+
84
+ ---
85
+
86
+
87
+ ## Key Patterns & Invariants
88
+
89
+ - **Minimal, pinned dependencies**: prefer Node.js built-ins; a runtime dependency must be minimal, justified, pinned to an exact version, and reviewed (see **Dependency & Supply-Chain Policy**). Today: `@modelcontextprotocol/sdk` (MCP) and the web-extraction set `@mozilla/readability` + `linkedom` + `turndown` (Task W.1).
90
+ - **CommonJS**: all files use `require()`/`module.exports`. Do not use ES `import`/`export`. The one exception is the **dynamic** `import()` inside `lib/mcp/boundary.js`, which is the sole bridge to the ESM-only MCP SDK — the project itself stays CommonJS.
91
+ - **Streaming**: `api.js` manually parses `text/event-stream`. The parser in `chatStream()` handles partial JSON lines — be careful editing it.
92
+ - **Permissions are per-session**: `PermissionManager` resets on each CLI invocation. Approvals never persist to disk. In non-TTY mode tool calls that would normally need interactive confirmation are **refused** (not auto-approved) unless `--dangerously-skip-permissions` is set, or the tag is pre-approved by an `--allow-*` tier flag.
93
+ - **Destructive-command deny-list** (`lib/deny.js`): every shell call (`exec`/`shell`) passes through `classifyShellCommand()` at the single chokepoint in `agentExecShell`, in *all* modes and regardless of `--allow-*` flags. Handling depends on the **initiator**:
94
+ - **Agent-initiated** (the model asked, the default): any deny-list hit is a **hard block** — `rm -rf`, `curl … | sh`, disk-wipe/fork-bomb patterns, recursive chmod/chown on a system root, and writes to system paths.
95
+ - **User-initiated** (a human typed `!cmd` or `semalt-code shell`): the user owns their machine, so a deny-list hit is **not** hard-blocked. The exception is the **catastrophic subset** (`catastrophic: true` — disk-wipe / block-device write, fork bomb), which interposes a single y/N confirmation as a typo guard; all other deny-listed user commands run with a `bypassed` note.
96
+ - The only full bypass (skips classification entirely) is `--dangerously-skip-permissions`.
97
+ - **Cross-platform + canonicalized (Task 4.4):** the list now covers the
98
+ **Windows** destructive set (`del /s`, `rd`/`rmdir /s`, `Remove-Item -Recurse
99
+ -Force`, `format`, `Format-Volume`, `Clear-Disk`, `cipher /w`, `diskpart …
100
+ clean`) in addition to POSIX — relevant because native Windows has no OS
101
+ sandbox. Matching also runs against a **procfs-root-canonicalized** variant
102
+ (`/proc/self/root` and `/proc/<pid>/root` rewritten to `/`) so a
103
+ `/proc/self/root/etc/…` bypass is caught by the same system-path matchers
104
+ (the resolved-path principle, shared with the OS sandbox).
105
+ - **Untrusted web content**: `http_get` runs the **web-fetch pipeline** (Task W.1 / W.1b, `mode` = summarized→extract→Markdown→secondary-LLM summary / extracted→Markdown / raw→original token-capped content) so by default only a compact result enters context (`raw` mode deliberately returns the original markup, still **token-capped**, for page analysis); the result in **every** mode is wrapped in the explicit `<<<UNTRUSTED_EXTERNAL_CONTENT>>>` block (`lib/agent.js`), and the secondary summarizer treats the page as data-only (a page injection could have steered it). The system prompt (`lib/prompts.js`) instructs the model never to act on instructions inside such a block. MCP tool results and **lifecycle-hook output** reuse the same fence. See **Web Fetch Pipeline**.
106
+ - **Lifecycle hooks are deny-listed + sandboxed shell + untrusted output** (`lib/hooks.js`): a `PreToolUse` non-zero exit blocks the tool; every hook command passes through `checkShellDenylist` AND the **OS sandbox** (`resolveSandboxedSpawn`, Pre-Task 5.0a) before running; hook stdout is fenced as untrusted before it reaches the model; timeouts/sandbox-refusals/failures are contained and never crash the loop. **Project-layer command hooks and `verify.command` are quarantined** (`loadHookLayers`/`loadVerifyLayers`): a cloned-repo `.semalt/config.json` can never introduce host-privileged execution, only inert prompt text.
107
+ - **`--readonly` blocks every file-mutating tool** (`READONLY_BLOCKED`, `lib/permissions.js`, completed in Pre-Task 5.0c): `write_file`, `append_file`, `edit_file`, `replace_in_file`, `delete_file`, `make_dir`, `remove_dir`, `move_file`, `copy_file`, `upload`, `download`. The block is enforced at the executor (`permissionManager.readonlyBlock(tag)`), so it holds for both the XML and native paths; `describePermission` also short-circuits the gate (no approval prompt precedes the deterministic block). **Scope decision (load-bearing): `--readonly` governs FILE TOOLS only.** Shell (`exec`/`shell`) is **not** in the set — a read-only session must still run read-only commands (`ls`, `git status`), and a shell command's arbitrary write side effects are the **OS sandbox + deny-list's** job to confine (the right layer post-Pre-Task 5.0a), not `--readonly`. So `--readonly` is an honest "no file-tool writes," not a false "no writes at all." Read-only file tools (`read_file`, `grep`, `glob`, `search_in_file`, `file_stat`, `list_dir`) work unchanged. Tested by `test/readonly-tools.test.js`.
108
+ - **Secret-file read guard**: `isProtectedSecretPath()` in `tools.js` refuses reads/copies/moves of `config.json`, `memory.json`, and `audit.log` via file tools — **not** overridable by `--allow-anywhere` (only by `--dangerously-skip-permissions`).
109
+ - **Config-write guard** (`isProtectedConfigPath()` in `tools.js`, Pre-Task 5.0b): the write-side companion to the read guard. Every write executor (`write_file`, `append_file`, `edit_file`, `replace_in_file`, `move_file`/`copy_file` **dst**, `upload`, `download`) refuses to write into the **protected-config set** — the whole `~/.semalt-ai` dir **and** every project `.semalt` dir from the CWD up to the repo root, **including files that do not yet exist** (directory-prefix matched on the resolved path, so a missing `.semalt/config.json`/`agents/*.md`/hook is covered). The set is defined once as `protectedConfigDirs` (`lib/constants.js`) and shared with the OS sandbox's `protectedPaths`. Same bypass policy as the read guard: **not** overridable by `--allow-anywhere`, only by `--dangerously-skip-permissions` (human-only). This guards the **agent's** file tools and the sandboxed shell — a human editing their own config in an editor is unaffected. Tested by `test/config-write-guard*.test.js`, `test/path-guards.test.js`, and the kernel case in `test/sandbox-integration.test.js`.
110
+ - **Per-pattern permission rules** (`lib/permission-rules.js`, Task 4.1): allow/deny/ask rules matching tool + argument (glob/regex), layered user→project. **Project rules can only NARROW** — every project `allow` is structurally dropped before resolution, so a cloned-repo `.semalt/config.json` can never widen the user posture. Precedence is total/deterministic (deny>ask>allow, most-specific then most-restrictive). Arguments are canonicalized (`..`/symlink/abs-rel) before matching; pathological/malformed rules fail closed; an `allow` never bypasses the deny-list, secret guard, `--readonly`, or `isPathSafe` (those stay in the executors). A `deny` rule holds even under `--dangerously-skip-permissions`. See **Per-Pattern Permissions** above.
111
+ - **Checkpoints & rewind** (`lib/checkpoints.js`, Task 4.3 / 4.3b): before each file-tool mutation the file's prior state is snapshotted (post-gate, pre-mutation, in `agentExecFile`) so `/rewind` can restore it — **file-tool changes only; shell side effects are not reversible.** Capture is fail-safe (a snapshot failure never blocks the mutation); a denied/withheld call produces no checkpoint; subagent mutations are checkpointed into the parent session. Delete/move are reversed explicitly; an external-modification check warns/asks before clobbering out-of-band edits. A per-file size cap and per-session retention are enforced. **Rewind is human-only (no rewind tool in the registry).** Task 4.3b: the restore path **re-validates the current guards** (`isPathSafe`/secret/protected-config/`deny` rule) per target — a now-forbidden path is refused/skipped, and `force` overrides only the external-mod check, not the guards; **three restore modes** `code`/`conversation`/`both` (default both) restore files, history, or the linked state, with conversation truncation cutting on **turn boundaries** (no orphaned `tool_call`; discard policy) — all on the **unchanged** on-disk schema. See **Checkpoints & Rewind** above.
112
+ - **Native git tools** (`lib/tool_registry.js`, Task 5.1): eight first-class git tools shelling out through the **same** `agentExecShell` sandbox + deny-list chokepoint as `<shell>` (no privileged path around confinement), parsing output into structured results. Read-only (`git_status`/`git_diff`/`git_log`, plus the *list* ops of `git_branch`/`git_worktree`) return a null permission descriptor; mutating (`git_add`/`git_commit`/`git_branch`/`git_checkout`/`git_worktree` add/remove) require approval, honor `--readonly`, and pass the per-pattern rules. `git_commit` requires a real non-empty message (empty → error, never a placeholder). **Destructive-git ↔ checkpoint honesty:** git operations are NOT reversible via `/rewind` (checkpoints snapshot file-tool mutations only) — stated in the descriptions and prompt text. Not-a-repo / git-absent degrade gracefully. See **Native Git Tools** above.
113
+ - **API-key sourcing** (`lib/secrets.js`): precedence is `SEMALT_API_KEY` env → OS keychain (macOS `security` / Linux `secret-tool` / Windows PasswordVault) → `config.json`. Keys from env/keychain are never written back to config; `configShow` reports only `api_key_source`. Store a key with `semalt-code auth set-key`.
114
+ - **Token counting is approximate**: `estimateTokens()` divides char count by 4. It is used only for the `/compact` display — do not rely on it for hard limits.
115
+ - **Context trimming is proactive when a limit is known**: `chatStream()` uses the in-process `_sessionInputLimits` learned from a prior 400 overflow first, then falls back to `config.context_length * 0.9`. When neither is set, no pre-flight trim runs and the client relies on the reactive 400/413 handler (which then persists the discovered window). `Metrics.tokenLimitStatus()` returns `{ used, limit: null }` until a limit is learned, so the status bar shows "N tok · limit unknown" instead of hiding the line.
116
+ - **Shell/exec output entering context is bounded** (Task W.6, `capShellOutput` in `lib/agent.js`): the model-facing shell result is double-bounded — a **head+tail line cap** (`max_output_lines`, default 50, split first ~60% + last ~40% via `OUTPUT_HEAD_RATIO`) eliding the middle, **then** a **token safety net** (`max_output_tokens`, default 10000, reusing the web pipeline's `capToTokens`) so a few enormous lines (minified JS, a binary `cat`) can't blow context. The elision notice teaches the W.5-enabled redirect-to-file→grep pattern. **The exit code stays on its own line, so truncating output VOLUME never hides the command's OUTCOME** (a non-zero exit / failure is always surfaced). Applied at the context boundary in the agent loop — distinct from the **UI** cap (`lib/ui/diff.js`, display only), which stays. Before W.6 the cap was UI-only and the model received the **entire** unbounded stdout+stderr (the #1 context risk). Pure helper, unit-tested on the model-facing text + a real-loop assertion (`test/shell-output-cap.test.js`). MCP/subagent output bounding is Task W.8 (below); W.9 unifies all the paths into a shared chokepoint.
117
+ - **MCP & subagent results entering context are bounded** (Task W.8, `formatMcpResult`/`formatSubagentResult` in `lib/agent.js`): the last two unbounded paths. Both apply `capToTokens` (the W.5–W.7 standard) to the result text **before** wrapping it in the `<<<UNTRUSTED_EXTERNAL_CONTENT>>>` fence, with **distinct budgets reflecting their nature**: **MCP is stricter** (`mcp.max_result_tokens`, default **10000**) because the payload size is third-party/server-controlled and untrusted — the riskiest path; **subagent is generous** (`subagents.max_result_tokens`, default **20000**) because the child's final text is our own deliberate, synthesized answer (a safety net against a verbose child). For MCP the truncation notice sits **inside** the fence with the capped content — capping never weakens the untrusted perimeter; subagent isolation / no-escalation (3.6/4.5) are unchanged (this bounds returned-text size only). A small result passes through fully, no notice. Pure helpers, unit-tested on the model-facing/parent-facing text incl. the fence-still-present and budgets-differ cases + real-loop assertions (`test/result-cap.test.js`).
118
+ - **`read_file` is paginated** (Task W.7, `formatReadResult` in `lib/agent.js`): `read_file` used to dump the **whole file verbatim** into context (`File <path>:\n` + the entire content); the only guard was a hard byte refusal at `max_file_size_kb`. Worst case ~128k tokens for a 500 KB file. Now the **model-facing** result is paginated, mirroring the Claude Code standard: under a **line cap** (`read_line_cap`, default **2000**) the file reads **byte-for-byte as before** (no regression for the common small-file case); over the cap it returns the first page + a **`[PARTIAL]` notice** — `Showing lines 1–2000 of 5234. Read more with start_line=2001.` **`start_line`/`end_line`** (on both XML + native rails; absent → null, tuple parity) read an explicit slice, **also line-capped** so a huge explicit range can't dump everything. A **token safety net** (`read_max_tokens`, default **25000**, reusing the web pipeline's `capToTokens`) bounds the pathological few-but-enormous-lines case (one 100 KB minified line) the line cap misses — consistent with W.6's double-bound. The bound is applied at the **context boundary** in the formatter (the executor still returns the full content, like W.5/W.6); pagination — not the byte cap — is the primary bound, so `max_file_size_kb` is now a **backstop** (raised default **50 MB**) ruling out a multi-GB whole-file slurp (lower it to hard-refuse smaller files). **Line numbers are OPTIONAL, default OFF** (`show_line_numbers`): the **Step 0 finding** is that `edit_file` is **line-number-based** (`lines[N-1]=content`) while `replace_in_file` is **match-based** (regex on a search string) — a mix — so always-on numbers would corrupt copyable snippets for the match path **and** cost ~1.7× per read; the param turns absolute 1-based numbers on (aligned with `edit_file`'s addressing) for when the agent wants line refs to drive an edit. Line indexing matches `edit_file`'s `split('\n')` exactly, so the read→edit loop stays aligned. Pure helper, unit-tested on the model-facing text incl. the no-regression small-file case + the PARTIAL large-file case + rail parity + read→edit alignment (`test/read-paginate.test.js`).
119
+ - **grep/glob results are serialized + bounded** (Task W.5, `formatGrepResult`/`formatGlobResult` in `lib/agent.js`): `formatFileResult` now has `case 'grep'`/`case 'glob'` that turn the structured engine result into model-facing text — closing a correctness bug where both fell through the default and the model received `"grep: done"`/`"glob: done"` (the data was computed and even shown in the UI, but never entered context, making grep-first navigation impossible). grep `output_mode` (`content`/`files_with_matches`/`count`) is model-selectable via the spec; `head_limit` (default `DEFAULT_GREP_HEAD_LIMIT`/`DEFAULT_GLOB_HEAD_LIMIT` = 100) + optional `offset` bound what reaches the model — the engine's 1000/5000 internal caps were never a context bound (the result was dropped before it reached context). Over-limit serialization carries a truncation notice telling the agent how to narrow (refine the pattern, switch to `count`/`files_with_matches`, or raise `head_limit`); under-limit results show fully with no notice. The executors (`lib/tool_registry.js`) normalize and attach `output_mode`/`head_limit`/`offset` onto the result; the serializers are pure and tested on the **model-facing** text (`test/grep-glob-serialize.test.js`, incl. the real-loop regression).
120
+ - **Tool output enters context ONLY via the `boundToolOutput` chokepoint** (Task W.9, `lib/agent.js`): the size analogue of the `resolveSandboxedSpawn` sandbox chokepoint. W.5–W.8 each bounded a previously-unbounded path, but the `capToTokens`-+-fence step was duplicated ad-hoc in five places — the original bugs (grep/glob `"done"`, shell/MCP/subagent unbounded) were all the **same class**: a path that put output into context without bounding it. `boundToolOutput(text, { budget, notice, fenced })` is the **single application point**: it applies `capToTokens` with the path's **budget** and **notice** function and (when `fenced`) wraps in the `<<<UNTRUSTED_EXTERNAL_CONTENT>>>` fence. **grep/glob, shell, read_file, MCP, subagent — and http_get/web_search — all route through it.** The per-path policy is **deliberately distinct and NOT flattened**: budgets (MCP 10k < subagent 20k < read 25k; shell 10k; grep/glob `DEFAULT_GREP_GLOB_MAX_TOKENS` 10k — a new token net so a few huge minified match lines can't blow context, the W.6 lesson applied to grep's count-bound), notice wording (shell teaches redirect→grep, read teaches narrow-the-range, …), and the fence flag (MCP/subagent/web fenced; file/shell not). **Refactor-safe:** model-facing outputs are byte-identical to W.5–W.8 (the W.5–W.8 test suites pass unchanged); http_get/web_search bodies are already token-capped upstream so they pass **no budget** (fence only). **Structural regression prevention:** a new tool gets bounding by *routing* its output through the chokepoint, not by *remembering* to cap. Pure helper, unit-tested on the chokepoint behavior, per-path policy, the bound-by-construction invariant, and equivalence (`test/output-chokepoint.test.js`). The system prompt's `LOCAL_NAVIGATION_NOTICE` (`lib/prompts.js`, both templates) — now actionable post-W.5 — steers the grep-first / read-slice pattern: locate with `grep`/`glob` (`count`/`files_with_matches` modes), then `read_file` only the relevant `start_line`/`end_line` slice; redirect large command output to a file and grep it.
121
+ - **Bounded agent iterations**: the primary loop caps at `config.max_iterations` (default 125, via `DEFAULT_MAX_ITERATIONS` in `constants.js`), overridable with `--max-iterations <n>`; `--max-iterations 0`/`"unlimited"` removes the cap deliberately. Reaching the cap stops gracefully (clear message + `stopReason: "max_iterations"`), never silently. Subagents have their own cap of 12.
122
+ - **Malformed tags are skipped**: each tool dispatch in the agent loop is wrapped in try/catch; errors emit a warning line and continue to the next tool call.
123
+
124
+ ---
125
+
126
+ ## Deferred / Not Yet Implemented
127
+
128
+ This section exists because false documentation has burned this project before (a
129
+ "max 10 iterations" invariant that never existed; coverage assumed but absent). The
130
+ items below are things a reader might reasonably expect from the docs or from peer
131
+ tools but that the code **does not do today**. They are listed honestly so nobody
132
+ builds on a feature that isn't there. Each is marked **Planned (Phase 4+)** —
133
+ on the roadmap — or **Out of scope** — no current plan.
134
+
135
+ **Gaps the re-audit found in existing behavior:**
136
+
137
+ - **MCP in headless / one-shot** — *Planned (Phase 4+).* `connectAll()` runs only in
138
+ interactive `cmdChat` (and the `mcp` management commands); `code`/`edit`/`shell`/`-p`
139
+ never connect a manager, so MCP tools are unavailable there. See **MCP Client → Scope**.
140
+ - **Session auto-resume** — *Planned (Phase 4+).* Sessions are saved, but there is no
141
+ startup prompt offering to resume the most recent (< 24 h) session. Resume is always
142
+ explicit: `/history` (local) or `--resume <id>` (dashboard). See **Session Storage**.
143
+ - **Corporate-proxy consumption** — *Planned (Phase 4+).* `HTTPS_PROXY`/`HTTP_PROXY`
144
+ are parsed into config but `api.js` does not route requests through a proxy agent,
145
+ so they have no effect on outbound HTTP. See **Config hierarchy → Environment**.
146
+
147
+ **Phase 4 roadmap (Planned, in the stated order):**
148
+
149
+ - **Per-pattern permissions** — ✅ **Done (Task 4.1).** Rich allow/deny/ask rules
150
+ matching tool + argument (glob/regex), layered user→project. See **Per-Pattern
151
+ Permissions** above.
152
+ - **Self-verification** — ✅ **Done (Task 4.2).** When the agent declares done,
153
+ optionally run a configured verify command (advisory feeds the result back;
154
+ enforcing returns the agent to the loop until verify passes, bounded by
155
+ `max_attempts` → `verify_failed`). See **Self-Verification** above.
156
+ - **Checkpoints / rewind** — ✅ **Done (Task 4.3 file half + Task 4.3b
157
+ conversation + restore re-validation).** Per-write file snapshots before each
158
+ file-tool mutation; `/rewind` restores prior content (last or to a chosen
159
+ sequence), with delete/move handled and an external-modification check that never
160
+ silently clobbers out-of-band edits. **File-tool changes only — shell side
161
+ effects are not reversible.** Task 4.3b closed the last deferred 4.3 security
162
+ finding (the restore path now **re-validates the current
163
+ isPathSafe/secret/protected-config/`deny`-rule guards** per target — `force`
164
+ overrides only the external-mod check) and added **three restore modes**
165
+ (`code`/`conversation`/`both`, default both) using the existing turn-linkage,
166
+ with conversation truncation cutting on **turn boundaries** (no orphaned
167
+ `tool_call`; discard policy) on the **unchanged** on-disk schema. Rewind stays
168
+ **human-only** (no rewind tool registered). See **Checkpoints & Rewind** above.
169
+ - **OS sandbox** — ✅ **Done (Task 4.4 filesystem + Task 4.4b network).** Real
170
+ OS-level confinement for shell commands: Seatbelt (macOS) / bubblewrap
171
+ (Linux/WSL2) jail every command and its children, confining writes to the working
172
+ dir and keeping `~/.semalt-ai`/secrets/`/etc` read-only (incl. not-yet-existing
173
+ files), with a fail-safe ask-or-block fallback when the primitive is absent and no
174
+ model-reachable way to disable it. **Network isolation is now done as well —
175
+ binary on/off** (bwrap `--unshare-net` / Seatbelt `(deny network*)`), no host
176
+ proxy / no domain allowlist / no TLS interception, anti-fail-open default. See
177
+ **OS Sandbox** above.
178
+
179
+ **Done since:**
180
+
181
+ - **Native git tooling** — ✅ **Done (Task 5.1).** Eight first-class git tools
182
+ (`git_status`/`git_diff`/`git_log` read-only; `git_add`/`git_commit`/`git_branch`/
183
+ `git_checkout` mutating; `git_worktree` infrastructure) shelling out through the
184
+ sandbox + deny-list chokepoint with structured results. The long tail stays in the
185
+ generic shell. See **Native Git Tools** above.
186
+ - **Embedding SDK** — ✅ **Done (Task 5.2).** Two-tier library surface separated by
187
+ `package.json` `exports`: the stable `createAgent` facade (main entry) and the
188
+ unstable building blocks (`/internals`). Programmatic permission policy that
189
+ defaults to refusing mutations; sandbox/deny-list stay on with explicit opt-out;
190
+ `close()` teardown; per-instance config (process-global limits documented). See
191
+ **Embedding SDK** above.
192
+ - **Background tasks** — ✅ **Done (Task 5.3).** `run --background` launches a
193
+ detached agent process (own process = own global state, reusing the
194
+ `createAgent` facade) with a launch-fixed, refuse-by-default policy and
195
+ sandbox/deny-list on; a file-based task registry (`~/.semalt-ai/tasks/`) drives
196
+ `tasks list|status|result|kill|prune`. Validation runs before detach (no
197
+ orphans); stale/dead tasks are detectable and prunable; kill tree-kills by PID.
198
+ Background-launch is intentionally NOT an agent tool. See **Background Tasks**
199
+ above.
200
+ - **Multimodal image input** — ✅ **Done (Task 5.4).** PNG/JPEG/WebP/GIF attach via
201
+ `--image` (repeatable), in-chat `/image`, and the SDK `images` option; read
202
+ through `isPathSafe`, size-capped (`image_max_bytes`), base64-encoded, media
203
+ type detected from magic bytes. The provider content-part shape (Anthropic-style
204
+ vs OpenAI-style) is selected per profile/heuristic; a text-only model fails loud
205
+ (the image is never silently dropped). PDF input deferred; generation out of
206
+ scope. See **Multimodal Image Input** above.
207
+
208
+ **Planned, not yet scheduled:**
209
+
210
+ - **Cost caps** — hard spend limits per session/turn (today cost is *displayed* via
211
+ `lib/pricing.js`, never enforced).
212
+ - **Auto-update** — self-updating the CLI (today: `npm install -g` manually).
213
+ - **XDG / `%APPDATA%` config dirs** — honoring platform config-dir conventions instead
214
+ of the fixed `~/.semalt-ai/`.
215
+ - **Domain-allowlist network policy** — *deliberately deferred, may stay out of
216
+ scope.* Task 4.4b ships **binary** network isolation (on / kernel-level none); a
217
+ per-domain allowlist ("allow github.com, block the rest") is **not** implemented
218
+ and is **not** a planned increment by default. **Rationale:** domain-granularity
219
+ requires a host-side egress proxy with full network privileges, which is the
220
+ exact design the reference implementation shipped and that was **bypassed
221
+ completely, twice, over 5.5 months** (allowedDomains fail-open CVE-2025-66479, a
222
+ hostname-parser differential, and TLS-MITM breaking Go binaries). We will only
223
+ revisit this if it can be done **without** a host proxy / TLS interception (e.g.
224
+ a kernel/eBPF egress filter on resolved IPs) — until then, binary isolation is
225
+ the robust posture. See **OS Sandbox → Why binary**.
226
+ - **Native-Windows / WSL1 sandbox** — no OS primitive today (bwrap needs the
227
+ user/mount namespaces WSL1 lacks; native Windows has none). On those platforms
228
+ the sandbox degrades to the fail-safe fallback (ask-or-block); the Windows
229
+ deny-list (now covered, Task 4.4) is the remaining shell guard there.
230
+
231
+ **Out of scope (no current plan):**
232
+
233
+ - **Multimodal — image *input*** is ✅ **Done (Task 5.4)** — PNG/JPEG/WebP/GIF
234
+ attached via `--image` / `/image` / the SDK `images` option, sent provider-
235
+ specifically to vision models (text-only models fail loud). See **Multimodal
236
+ Image Input** above. Still out of scope: **PDF input** (deferred), **audio
237
+ input**, and **image/audio *generation* / output**.
238
+ - **Background / cloud / scheduling** — long-running background agents, cloud execution,
239
+ or cron-style scheduling.
240
+ - **OpenTelemetry** — OTel traces/metrics export.
241
+ - **Managed policy** — centrally-administered org policy enforcement.
242
+ - **Native notifications** — OS-level desktop notifications.
243
+
244
+ ---
245
+
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Embedding SDK example (Task 5.2)
6
+ // ---------------------------------------------------------------------------
7
+ //
8
+ // Shows the supported, stable way to embed the agent in another program via the
9
+ // `createAgent` facade: a permission policy that defaults safe, streaming
10
+ // events, the structured run result, and the required close() teardown.
11
+ //
12
+ // Run it against any OpenAI-compatible endpoint:
13
+ //
14
+ // SEMALT_API_BASE=http://127.0.0.1:8800 \
15
+ // SEMALT_API_KEY=sk-… \
16
+ // SEMALT_MODEL=my-model \
17
+ // node examples/embed.js "List the files in this directory"
18
+ //
19
+ // (From outside this repo, `require('@semalt-ai/code')` instead of the relative
20
+ // path below.)
21
+
22
+ const { createAgent } = require('../lib/sdk'); // → require('@semalt-ai/code')
23
+
24
+ async function main() {
25
+ const prompt = process.argv.slice(2).join(' ') || 'Say hello and tell me what tools you have.';
26
+
27
+ const agent = createAgent({
28
+ apiBase: process.env.SEMALT_API_BASE || 'http://127.0.0.1:8800',
29
+ apiKey: process.env.SEMALT_API_KEY || 'any',
30
+ model: process.env.SEMALT_MODEL || 'default',
31
+
32
+ // Permission policy. With NONE of these, the SDK refuses every mutating
33
+ // tool (the safe default). Here we approve read-only-ish work but veto
34
+ // anything destructive — your host decides.
35
+ approve: async ({ tag, description }) => {
36
+ const denied = new Set(['delete_file', 'remove_dir', 'move_file']);
37
+ const ok = !denied.has(tag);
38
+ console.error(`[approve] ${ok ? 'ALLOW' : 'DENY '} ${tag} — ${description}`);
39
+ return ok;
40
+ },
41
+
42
+ // The OS sandbox + deny-list stay ON by default. To run unsandboxed when the
43
+ // kernel primitive is missing you'd opt in explicitly, e.g.:
44
+ // sandbox: { mode: 'off' },
45
+ // onUnsandboxed: async () => true,
46
+ });
47
+
48
+ // Stream activity (advisory — the run result is authoritative).
49
+ agent.on('token', (t) => process.stdout.write(t));
50
+ agent.on('tool', (e) => console.error(`\n[tool] ${e.tag} (${e.ms}ms)`));
51
+ agent.on('warning', (m) => console.error(`[warn] ${m}`));
52
+
53
+ try {
54
+ const res = await agent.run(prompt);
55
+ console.log('\n\n--- result ---');
56
+ console.log(res.result);
57
+ console.log('--- meta ---');
58
+ console.log(JSON.stringify({
59
+ toolCalls: res.toolCalls.length,
60
+ usage: res.usage,
61
+ cost: res.cost,
62
+ stopReason: res.stopReason,
63
+ verifyStatus: res.verifyStatus,
64
+ }, null, 2));
65
+ } finally {
66
+ // ALWAYS close — releases MCP connections / spawned processes.
67
+ await agent.close();
68
+ }
69
+ }
70
+
71
+ main().catch((err) => {
72
+ console.error('embed example failed:', err.message);
73
+ process.exit(1);
74
+ });