npm - @semalt-ai/code - Versions diffs - 1.8.5 → 1.20.0 - Mend

@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

package/.claude/settings.local.json +7 -1
package/.github/workflows/ci.yml +69 -0
package/ARCHITECTURE.md +6 -95
package/CLAUDE.md +196 -316
package/README.md +148 -4
package/docs/ARCHITECTURE.md +1321 -0
package/docs/CONFIG.md +340 -0
package/docs/HISTORY.md +245 -0
package/examples/embed.js +74 -0
package/index.js +251 -10
package/lib/agent.js +856 -120
package/lib/api.js +239 -50
package/lib/args.js +74 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +489 -0
package/lib/commands/chat-slash.js +415 -0
package/lib/commands/chat-turn.js +669 -0
package/lib/commands/chat.js +407 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +360 -11
package/lib/constants.js +401 -3
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +202 -0
package/lib/hooks.js +286 -0
package/lib/images.js +270 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +123 -26
package/lib/pricing.js +67 -0
package/lib/proc.js +62 -0
package/lib/prompts.js +99 -8
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2862 -0
package/lib/tool_specs.js +263 -9
package/lib/tools.js +352 -1039
package/lib/ui/anim.js +86 -0
package/lib/ui/ansi.js +17 -27
package/lib/ui/chat-history.js +253 -71
package/lib/ui/create-ui.js +67 -24
package/lib/ui/diff.js +90 -25
package/lib/ui/file-activity.js +236 -0
package/lib/ui/format.js +195 -29
package/lib/ui/input-field.js +21 -11
package/lib/ui/md-stream.js +234 -0
package/lib/ui/render-operation.js +113 -0
package/lib/ui/select.js +1 -4
package/lib/ui/status-bar.js +146 -36
package/lib/ui/stream.js +20 -13
package/lib/ui/theme.js +190 -44
package/lib/ui/tool-operation.js +190 -0
package/lib/ui/utils.js +9 -5
package/lib/ui/web-activity.js +270 -0
package/lib/ui/writer.js +159 -45
package/lib/ui.js +1 -1
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/anim-driver.test.js +153 -0
package/test/ask-user-display.test.js +226 -0
package/test/ask-user-gate.test.js +231 -0
package/test/background.test.js +414 -0
package/test/chat-history-nocolor.test.js +155 -0
package/test/chat-relogin.test.js +207 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/defer-detail-band.test.js +403 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/detail-band-tab-flatten.test.js +242 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/exec-diff.test.js +268 -0
package/test/executors.test.js +599 -0
package/test/extract-tool-calls.test.js +349 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/file-activity.test.js +522 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/grep-path-target.test.js +227 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +143 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +348 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/input-field-ctrl-o.test.js +37 -0
package/test/live-height-physical.test.js +281 -0
package/test/max-iterations.test.js +218 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/md-stream.test.js +183 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +409 -0
package/test/native-live-narration.test.js +254 -0
package/test/output-chokepoint.test.js +188 -0
package/test/output-heredoc-leak.test.js +195 -0
package/test/output-preview.test.js +245 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +362 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/render-operation.test.js +317 -0
package/test/replay-descriptor-xml.test.js +216 -0
package/test/replay-descriptor.test.js +189 -0
package/test/replay-web-aggregate.test.js +291 -0
package/test/replay-web-persist.test.js +241 -0
package/test/result-cap.test.js +233 -0
package/test/running-glyph-anim.test.js +111 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-driver.test.js +93 -0
package/test/status-bar-pause.test.js +164 -0
package/test/status-bar-resync.test.js +188 -0
package/test/stream-parser.test.js +171 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/theme-palette.test.js +166 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/truncate-visible.test.js +78 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/view-image.test.js +199 -0
package/test/web-activity-ordering.test.js +203 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1438
package/path +0 -1

package/CLAUDE.md CHANGED Viewed

@@ -1,45 +1,23 @@
 # semalt-code — CLI Agent
-Node.js CLI tool that lets AI agents interact with code via an iterative tool-use loop. Zero external dependencies; uses only Node.js built-ins.
-Published as `@semalt-ai/code`. Invokable as `semalt-code` or `semalt`.
----
-## Directory Layout
-```
-semalt-code/
-├── index.js           # Entry point: arg parsing, module wiring, command dispatch
-├── lib/
-│   ├── api.js         # HTTP client for dashboard auth + OpenAI-compatible inference
-│   ├── agent.js       # Agent loop: stream → extract tools → execute → repeat
-│   ├── commands.js    # All CLI command handlers (chat, code, edit, shell, login, …)
-│   ├── tools.js       # File and shell operation implementations
-│   ├── prompts.js     # System prompt for the LLM (tells it to use exec/read/write tags)
-│   ├── ui.js          # Barrel: re-exports everything from lib/ui/
-│   ├── ui/
-│   │   ├── ansi.js        # ANSI escape constants, THEME, color codes, SPINNER_DEFS
-│   │   ├── utils.js       # getCols, getRows, stripAnsi, hr, boxLine, insertCharAt, …
-│   │   ├── diff.js        # renderDiff (LCS diff), renderMarkdown, _mdInline
-│   │   ├── stream.js      # StreamRenderer — live token-by-token terminal output
-│   │   ├── legacy.js      # StatusBar (cmdCode/cmdEdit), interactiveSelect, SelectMenu
-│   │   ├── layout.js      # LayoutManager — terminal geometry, resize events
-│   │   ├── chat-history.js# ChatHistory — bubble rendering, scroll, streaming slots
-│   │   ├── status-bar.js  # FullStatusBar — animated TUI status line
-│   │   ├── input-field.js # InputField, parseKeySequence, SLASH_CMDS
-│   │   └── create-ui.js   # createUI factory + non-TTY no-op fallback
-│   ├── context.js     # Loads file/directory content into the prompt
-│   ├── config.js      # Read/write ~/.semalt-ai/config.json
-│   ├── permissions.js # Per-session approval tracking for tool calls
-│   ├── args.js        # CLI argument parser
-│   ├── constants.js   # CONFIG_PATH, DEFAULT_CONFIG, DEFAULT_API_TIMEOUT_MS
-│   ├── audit.js       # Append-only audit log for all tool executions
-│   ├── storage.js     # Local session persistence and resume
-│   └── metrics.js     # Token counting, cost estimation, latency tracking
-├── package.json       # name: @semalt-ai/code, version: 1.8.0, bin: semalt / semalt-code
-└── README.md
-```
+Node.js CLI tool that lets AI agents interact with code via an iterative tool-use
+loop (stream → extract tool calls → execute → repeat). **Minimal, vetted, pinned**
+runtime dependencies — everything else uses Node.js built-ins. Published as
+`@semalt-ai/code`; invokable as `semalt-code` or `semalt`. Also consumable as a
+library via the `createAgent` facade (`lib/sdk.js`).
+> **This file is auto-loaded as project memory — keep it lean.** Deep detail lives
+> in `docs/` (not auto-loaded):
+> - **`docs/ARCHITECTURE.md`** — per-subsystem internals (MCP, checkpoints, sandbox,
+>   web-fetch pipeline, SDK, subagents, hooks, git tools, …).
+> - **`docs/HISTORY.md`** — dependency-policy rationale, the long-form invariant
+>   reference, and the "Deferred / Not Yet Implemented" roadmap.
+> - **`docs/CONFIG.md`** — full per-key config reference + CLI flags/commands +
+>   slash commands + tool tags/operations + dashboard API endpoints.
+The authoritative *runtime* sources for tool tags and CLI surface are
+`lib/tool_specs.js` / `lib/prompts.js` (tool tags) and `semalt-code --help` (CLI
+flags). `docs/CONFIG.md` mirrors them for humans.
 ---
@@ -47,7 +25,8 @@ semalt-code/
 | Component | Technology |
 |-----------|-----------|
-| Runtime | Node.js ≥ 16, CommonJS (`require`) |
+| Runtime | Node.js ≥ 18, CommonJS (`require`) |
+| Runtime deps | `@modelcontextprotocol/sdk` (ESM, via `lib/mcp/boundary.js`); `@mozilla/readability` + `linkedom` + `turndown` (web-fetch extraction) — all exact-pinned |
 | HTTP | Built-in `http`/`https` modules |
 | Shell exec | `child_process.spawnSync` |
 | File I/O | `fs` module |
@@ -57,297 +36,198 @@ semalt-code/
 ---
-## CLI Commands
-```
-semalt-code                            # interactive chat (default)
-semalt-code chat                       # interactive chat (explicit)
-semalt-code code <prompt>              # one-shot task with optional file context
-semalt-code edit <file> <instruction>  # targeted file edit
-semalt-code shell <command>            # run shell, optionally ask LLM to analyze output
-semalt-code login                      # browser-based device auth against dashboard
-semalt-code logout                     # clear stored auth_token
-semalt-code whoami                     # show authenticated user
-semalt-code models                     # interactive model selector (fetches from dashboard)
-semalt-code init [options]             # create/update ~/.semalt-ai/config.json
-semalt-code audit                      # print last 50 audit log entries
-semalt-code config [set <key> <val>]   # show or update config keys
-```
-### Common Flags
-```
--m, --model <name>        override model for this invocation
--r, --resume <chat-id>    resume a dashboard chat by ID
--f, --file <path>         load file or directory as context
--a, --analyze             have LLM analyze shell output (used with `shell`)
---dry-run                 preview file edits without writing
---api-base <url>          LLM API base URL (overrides config)
---api-key <key>           API key (overrides config)
---dashboard-url <url>     dashboard base URL (overrides config)
---default-model <name>    set default model in config
---show-think              display model reasoning (thinking) content
---debug                   inline debug: per-iteration debug block in chat history (TUI-safe)
---debug-file <path>       extended debug: per-iteration block + raw SSE chunks
-                          + request body dumps written to <path>, nothing to stdout.
-                          Mutually exclusive with --debug.
---allow-fs                auto-approve all filesystem operations
---allow-exec              auto-approve shell command execution
---allow-net               auto-approve network operations
---allow-all               auto-approve everything (use carefully)
---readonly                block all write operations
---new                     skip session resume prompt
--v, --version             print version
--h, --help                print help
-```
-### In-Chat Slash Commands
-| Command | Effect |
-|---------|--------|
-| `/help` | List slash commands |
-| `/file <path>` | Attach file or directory to context |
-| `/history` | Browse and load a local saved session |
-| `/chats` | Browse and resume a saved chat from the dashboard |
-| `/new` | Start a fresh conversation (detach from current saved chat) |
-| `/model [name]` | Show or switch model |
-| `/models` | Interactive model picker from dashboard |
-| `/shell <cmd>` or `!<cmd>` | Execute shell command |
-| `/compact` | Show token usage estimate and session metrics |
-| `/clear` | Reset conversation history |
-| `/approve` | Toggle auto-approval of tool calls |
-| `/config` | Print current config |
-| `/login` | Start device auth flow |
-| `/whoami` | Show current user |
-| `/logout` | Clear auth token |
-| `exit` / `quit` | Exit |
----
-## Agent Loop (`lib/agent.js`)
-Maximum 10 iterations per user turn.
-```
-1. Send messages[] to LLM via chatStream()
-2. Stream response tokens to terminal (StreamRenderer)
-3. After full response: extract tool-call tags from text
-4. If no tool tags → done
-5. For each tag: request user permission (once / always / no)
-6. Execute approved operations via ToolExecutor (wrapped in try/catch)
-7. Append tool results to messages[]
-8. Goto 1
-```
+## Directory Layout
-Each tool dispatch is wrapped in try/catch; errors print a warning and continue to the next tag rather than aborting the loop.
-### Tool Tags (parsed from LLM text)
-```xml
-<exec>shell command here</exec>
-<shell>shell command here</shell>
-<read_file>/absolute/or/relative/path</read_file>
-<read_file path="/path/to/file"/>
-<write_file path="/path/to/file">file content here</write_file>
-<create_file path="/path/to/file">file content here</create_file>
-<append_file path="/path/to/file">content to append</append_file>
-<list_dir>/path/to/dir</list_dir>
-<search_files pattern="*.ts" dir="src"/>
-<delete_file>/path/to/file</delete_file>
-<make_dir>/path/to/dir</make_dir>
-<remove_dir>/path/to/dir</remove_dir>
-<get_env>ENV_VAR_NAME</get_env>
-<set_env name="VAR" value="value"/>
-<move_file src="/old/path" dst="/new/path"/>
-<copy_file src="/src/path" dst="/dst/path"/>
-<edit_file path="/file" line="42">replacement line content</edit_file>
-<search_in_file path="/file">regex pattern</search_in_file>
-<replace_in_file path="/file" search="old" replace="new"></replace_in_file>
-<download>https://example.com/file.zip</download>
-<upload path="/local/path">base64encodedcontent</upload>
-<file_stat>/path/to/file</file_stat>
-<http_get url="https://example.com/api"/>
-<ask_user question="What is your preferred language?"/>
-<store_memory key="project_lang">TypeScript</store_memory>
-<recall_memory key="project_lang"/>
-<list_memories/>
-<system_info/>
 ```
-The system prompt (`lib/prompts.js`) instructs the LLM to use exactly these tags. Do not change tag names without updating both `prompts.js` and the parser in `agent.js`.
----
-## Tool Operations (`lib/tools.js`)
-All operations request permission before execution unless auto-approved.
-Output truncated to `config.max_output_lines` (default 20) to avoid filling context.
-| Action | Description |
-|--------|-------------|
-| `read` | Read file content |
-| `write` | Write file (creates parent dirs) |
-| `append` | Append to file |
-| `list_dir` | List directory contents |
-| `delete_file` | Delete file |
-| `make_dir` | Create directory (recursive) |
-| `remove_dir` | Remove directory (recursive) |
-| `move_file` | Move/rename file |
-| `copy_file` | Copy file |
-| `search_files` | Find files matching glob pattern |
-| `search_in_file` | Regex search within file |
-| `replace_in_file` | Replace text in file (regex, optional flags) |
-| `edit_file` | Replace a specific line number in a file |
-| `get_env` / `set_env` | Read/write environment variables |
-| `download` | HTTP GET → save to file |
-| `upload` | Write base64-encoded content to file |
-| `file_stat` | Stat a file (size, mtime, type, mode) |
-| `http_get` | HTTP GET → return body (truncated to max_output_lines) |
-| `ask_user` | Prompt user for input; auto-answers 'y' in non-TTY mode |
-| `store_memory` | Persist a key/value pair to `~/.semalt-ai/memory.json` |
-| `recall_memory` | Read a key from `~/.semalt-ai/memory.json` |
-| `list_memories` | List all stored memory keys |
-| `system_info` | Return platform, arch, hostname, memory, Node version, cwd |
----
-## Audit Log (`lib/audit.js`)
-Every tool execution is appended to `~/.semalt-ai/audit.log` as NDJSON:
-```json
-{"ts":"2026-01-01T00:00:00.000Z","tag":"exec","input":"{\"command\":\"ls\"}","approved":true,"result":"ok"}
+semalt-code/
+├── index.js                # Entry point: arg parsing, module wiring, command dispatch
+├── lib/
+│   ├── sdk.js              # Embedding SDK: createAgent() STABLE facade (assembles loop/registries/permissions/sandbox per-instance)
+│   ├── internals.js        # UNSTABLE building-blocks barrel (@semalt-ai/code/internals subpath; no semver guarantee)
+│   ├── api.js              # HTTP client: dashboard auth + OpenAI-compatible inference (chatStream/chatComplete/dashboard*)
+│   ├── agent.js            # Agent loop; boundToolOutput chokepoint; untrusted fencing; XML+native tuple convergence
+│   ├── commands/           # CLI + in-chat command handlers: registry (dispatch/help/completion), custom commands,
+│   │   #                     auth, mcp mgmt, oneshot (code/edit/shell/models/init), tasks, chat session/slash/turn
+│   ├── tools.js            # File + shell operation impls; agentExecShell chokepoint; secret/config path guards
+│   ├── tool_registry.js    # Per-tool registration: XML parseAttrs + native fromParams + execute + permission; git tools; web-fetch pipeline
+│   ├── tool_specs.js       # TOOL_SPECS: OpenAI-format parameter source of truth for every 'tool' tag
+│   ├── proc.js             # Platform-aware subprocess spawn + tree-kill (+ detached spawn / kill-by-PID / isProcessAlive)
+│   ├── debug.js            # Two debug modes (--debug inline / --debug-file), wired once at startup
+│   ├── prompts.js          # System prompt: tool-tag inventory + untrusted-content rules + navigation guidance
+│   ├── ui.js / ui/         # Terminal UI: raw-ANSI writer, stream renderer, status bar, diff, select, layout, web-activity collapse
+│   ├── mcp/                # boundary.js (sole CJS↔ESM bridge), client.js (manager), oauth.js (keychain provider)
+│   ├── hooks.js            # Lifecycle hooks (shell/prompt) at agent events; deny-listed + sandboxed; project command-hooks quarantined
+│   ├── verify.js           # Self-verification: run a configured command at "done", advisory/enforcing; deny-listed + sandboxed
+│   ├── checkpoints.js      # Per-write file snapshots + /rewind (code/conversation/both); turn linkage; restore-path guard re-validation
+│   ├── sandbox.js          # OS sandbox: Seatbelt/bubblewrap policy gen + wrap; resolveSandboxedSpawn shim; binary network isolation
+│   ├── skills.js           # Skills: discover SKILL.md, metadata-only injection, body on invocation
+│   ├── subagents.js        # spawn_agent tool: isolated child loop sharing parent permissions; bounded parallel
+│   ├── background.js       # Detached background-task launcher + registry (NOT an agent tool)
+│   ├── images.js           # Multimodal image input: read+size-cap+isPathSafe+base64, provider shaping, vision-capability resolution
+│   ├── web-extract.js      # Web-fetch stage 1+2: classify + Readability extract + Turndown HTML→Markdown + token cap
+│   ├── web-summarize.js    # Web-fetch stage 3: data-only untrusted-safe secondary-LLM summary
+│   ├── memory.js           # Project memory: AGENTS.md/CLAUDE.md hierarchy loader (this file)
+│   ├── headless.js         # Headless -p/--print output: text/json/stream-json
+│   ├── pricing.js          # Per-model price table → cost
+│   ├── doctor.js           # /doctor self-diagnostics: checks + aggregation
+│   ├── payload.js          # Prompt-caching + reasoning_effort payload augmentation
+│   ├── compact.js          # Conversation compaction: select/summarize/replace
+│   ├── context.js          # Loads file/directory content into the prompt
+│   ├── config.js           # Read/write ~/.semalt-ai/config.json; 4-layer merge; executable-quarantine re-resolution
+│   ├── permissions.js      # Per-session approval tracking (+ per-pattern rule resolution)
+│   ├── permission-rules.js # Pure per-pattern rule engine: schema, canonicalization, resolvePermission
+│   ├── deny.js             # Destructive-command deny-list for shell calls
+│   ├── secrets.js          # API-key sourcing: env → OS keychain → config; generic keychain helpers
+│   ├── args.js             # CLI argument parser
+│   ├── constants.js        # CONFIG_PATH, DEFAULT_CONFIG, TAG_REGISTRY ↔ TOOL_SPECS parity check, protectedConfigDirs
+│   ├── audit.js            # Append-only audit log for all tool executions
+│   ├── storage.js          # Local session persistence and resume
+│   └── metrics.js          # Token counting, cost estimation, latency tracking, split context counter
+├── scripts/lint.js         # Zero-dep lint: `node --check` over all sources
+├── test/                   # node:test suites (smoke + per-subsystem)
+├── examples/embed.js       # Runnable embedding example: createAgent + permission policy + close()
+├── package.json            # exports: '.' → sdk.js, './internals' → internals.js; bin: semalt / semalt-code
+├── package-lock.json       # committed lockfile (npm ci installs strictly from it)
+└── README.md
 ```
-View the last 50 entries with `semalt-code audit`.
 ---
-## Session Storage (`lib/storage.js`)
-Local chat sessions are saved to `~/.semalt-ai/sessions/` as JSON files named `<timestamp>-<id>.json`. The `chat` command offers to resume the most recent session (< 24 h old) on startup unless `--new` or `--resume` is passed. Use `/history` in-chat to browse and load any saved session.
----
-## Metrics (`lib/metrics.js`)
-`Metrics` is instantiated per `runAgentLoop` call and tracks per-turn token usage, latency, and total session duration. A summary box is printed on exit (SIGINT or natural quit) and after `cmdCode` runs. Use `/compact` in-chat to see the live summary.
+## Invariants the agent must not violate
+These are load-bearing. Each was verified against the code at the cited `file:line`.
+Do not weaken them; when adding code, preserve them.
+1. **CommonJS only.** All files use `require()`/`module.exports`, never ES
+   `import`/`export`. The **sole** exception is the dynamic `import()` inside
+   `lib/mcp/boundary.js` — the one bridge to the ESM-only MCP SDK. Do not migrate
+   the project to ESM. (`lib/mcp/boundary.js:41,42,92,105,113`.)
+2. **Tool output enters context ONLY via `boundToolOutput`** (`lib/agent.js:478`).
+   It applies `capToTokens` (per-path budget) and, when `fenced`, the untrusted
+   fence. grep/glob, shell, read_file, MCP, subagent, http_get, web_search all
+   route through it (`lib/agent.js:546,568,625,691,732,742,865,882`). **A new tool
+   gets bounding by routing its output through this chokepoint — not by remembering
+   to cap.**
+3. **XML and native tool paths converge on one normalized `[action, ...opts]`
+   tuple, and guards act there.** Native (`mapInvokeToCall`) and XML
+   (`extractToolCalls`) both produce the same `call` tuple, executed in one loop;
+   `permissionManager.resolveRule(call)` and the deny gate act on the tuple, so one
+   guard covers both rails (`lib/agent.js:1304,1315,1603,1656,1661`).
+4. **Untrusted-content fence.** Output from `http_get` / `web_search` / MCP /
+   subagent / hook / verify is wrapped in
+   `<<<UNTRUSTED_EXTERNAL_CONTENT … >>> … <<<END_UNTRUSTED_EXTERNAL_CONTENT>>>`
+   (`lib/agent.js:475-476`, `lib/hooks.js:55-56`, `lib/verify.js`). The system
+   prompt instructs the model to treat it as DATA and **never** act on instructions
+   inside it (`lib/prompts.js:80-82`). The secondary web-summarizer treats the page
+   as data-only too — a page could have steered it.
+5. **Destructive-command deny-list at the single `agentExecShell` chokepoint.**
+   Every exec/shell — including native git tools (via `_runGit` → `ctx.agentExecShell`),
+   lifecycle hooks, and self-verify — funnels through `agentExecShell` (`lib/tools.js:239`)
+   which runs `classifyShellCommand` (`lib/deny.js:184`). **Agent-initiated** deny
+   hits **hard-block**; **user-initiated** (`!cmd`) only confirm the catastrophic
+   subset. Only `--dangerously-skip-permissions` bypasses classification.
+6. **The agent can never disable the OS sandbox or widen the network.** No
+   tool/flag/config the *model* can reach turns the sandbox off or flips
+   no-network back to network — only human CLI flags (`--dangerously-skip-permissions`,
+   `--no-network`) or the human-edited `sandbox.*` config. Network is **binary**
+   (on / kernel-level none — `--unshare-net` / Seatbelt `(deny network*)`); no host
+   proxy / allowlist / TLS interception. Protected config + secret dirs (`~/.semalt-ai`,
+   `~/.ssh`/`~/.aws`/`~/.gnupg`, `/etc`, every project `.semalt` dir) are bound
+   **read-only inside the jail, including not-yet-existing files**
+   (`lib/sandbox.js:59-64,107-116,131-134,382-385,449-452`; `lib/constants.js:328-341`).
+7. **Project config can only NARROW.** `.semalt/config.json` is attacker-controllable
+   (cloned repos). Permission rules, hooks, and verify are loaded as **separate**
+   user/project layers (not the shallow-merged view): project `allow` rules are
+   dropped before resolution, and project **command** hooks + `verify.command` are
+   **quarantined** (only inert prompt text survives from a project)
+   (`lib/permission-rules.js:226-231,367-370`; `lib/hooks.js:114-131`;
+   `lib/verify.js:213-222`; `lib/config.js:360-376`).
+8. **Secret-file read guard + config-write guard.** File tools refuse reads of
+   protected secret files (`isProtectedSecretPath`) and writes into `~/.semalt-ai`
+   + project `.semalt` dirs (`isProtectedConfigPath`), **including not-yet-existing
+   files**. Neither is overridable by `--allow-anywhere` — only by
+   `--dangerously-skip-permissions` (`lib/tools.js:85-89,109-119`;
+   `lib/constants.js:328-341`).
+9. **Permissions are per-session, never persisted.** `PermissionManager` is created
+   fresh per invocation with in-memory state; approvals never hit disk. In **non-TTY**
+   mode, calls needing interactive confirmation are **refused** (not auto-approved)
+   unless an `--allow-*` tier pre-approved the tag or `--dangerously-skip-permissions`
+   is set (`lib/permissions.js:29,38-41,221-236,292-295`).
+10. **Tool-tag names stay in sync across all three surfaces.** A load-time parity
+    check (`assertToolSpecParity`, `lib/constants.js:449-492`) asserts
+    `TAG_REGISTRY` ↔ `TOOL_SPECS` ↔ `TOOL_REGISTRY` and that every entry has both an
+    `execute` and a `permission`. The `agent.js` parser and `prompts.js` inventory
+    both consume `TAG_REGISTRY`. **Rename a tag atomically in `prompts.js`,
+    `agent.js`, `tool_specs.js`, and the registry** or the parity check throws at load.
+11. **Checkpoints/rewind cover file-tool mutations ONLY.** `CHECKPOINTABLE_ACTIONS`
+    (`lib/checkpoints.js:62-65`) = write/append/edit_file/replace_in_file/delete_file/
+    move_file/copy_file/upload. **Shell side effects and git discards (`git_checkout`)
+    are NOT reversible** — do not imply `/rewind` covers them. Rewind is
+    **human-only**: there is **no rewind tool** in the static/dynamic registry,
+    `TOOL_SPECS`, or `TAG_REGISTRY` (`/rewind` and `semalt-code rewind` are the only
+    entries).
+12. **Subagents/MCP grant no privilege escalation.** A subagent shares the **parent's**
+    `permissionManager` (cannot auto-approve what the parent wouldn't) and **cannot
+    recurse** (`spawn_agent` is refused/dropped for children). MCP tools **require
+    approval by default** (opt-in per server). Both subagent and MCP results are
+    **untrusted-fenced and token-capped** before entering context (MCP 10k stricter,
+    subagent 20k generous) (`lib/subagents.js:186,297-299,328`; `lib/mcp/client.js:105-110`;
+    `lib/constants.js:130-131`).
+13. **Minimal, pinned dependencies.** Prefer Node built-ins. Any runtime dep must be
+    minimal, justified, **exact-pinned** (no `^`/`~`), and reviewed, with the
+    regenerated `package-lock.json` committed in the same PR. Today: only the four
+    listed in Tech Stack, all exact-pinned (`package.json`). See `docs/HISTORY.md`
+    for the supply-chain policy and rationale.
 ---
-## API Client (`lib/api.js`)
-Handles two distinct concerns:
-**Inference** (OpenAI-compatible):
-- `chatStream(messages, model, opts)` → streams tokens, calls `onToken`, returns `{ content, usage }`
-- URL: `config.api_base` normalized to include `/v1` if missing
-- Supports `reasoning_content` field for extended-thinking models
-**Dashboard** (cli.semalt.ai backend):
-- `requestCliLogin()` → `POST /api/auth/cli/request`
-- `getCliLoginStatus(id, token)` → `POST /api/auth/cli/status`
-- `dashboardWhoAmI()` → `GET /api/auth/me`
-- `dashboardLogout()` → `POST /api/auth/logout`
-- `dashboardListModels()` → `GET /api/models`
-- `dashboardGetModelForCli(id)` → `GET /api/models/{id}/cli`
-- `dashboardCreateChat(title, modelDbId)` → `POST /api/chats`
-- `dashboardListChats()` → `GET /api/chats`
-- `dashboardGetChat(id)` → `GET /api/chats/{id}`
-- `dashboardSaveMessages(chatId, messages)` → `POST /api/chats/{id}/messages/batch`
+## Build / Run / Test / Lint / Publish
-All dashboard calls send `Authorization: Bearer <auth_token>` from config.
----
-## Config File (`~/.semalt-ai/config.json`)
-Managed by `lib/config.js`. Normalized on every load. The config directory is created automatically if it does not exist.
-```json
-{
-  "api_base":            "http://127.0.0.1:8800",
-  "api_key":             "any",
-  "dashboard_url":       "https://cli.semalt.ai",
-  "auth_token":          "",
-  "default_model":       "default",
-  "dashboard_model_id":  null,
-  "temperature":         0.7,
-  "request_timeout_ms":  900000,
-  "stream":              true,
-  "theme":               "dark",
-  "max_file_size_kb":    512,
-  "command_timeout_ms":  30000,
-  "max_output_lines":    50,
-  "show_token_count":    true,
-  "show_cost":           false,
-  "context_length":      null,
-  "models": [
-    {
-      "name":           "local-llama",
-      "api_base":       "http://127.0.0.1:11434",
-      "api_key":        "any",
-      "model":          "llama3",
-      "context_length": 8192
-    }
-  ]
-}
+```bash
+node index.js chat        # run locally (interactive chat)
+npm test                  # node --test (the test/ suite)
+npm run lint              # node --check over all sources (zero-dep lint)
+npm link                  # symlink for global use during dev
+npm publish --access public   # publish to npm (bump package.json version first)
 ```
-- `api_base` is normalized to always include `/v1`.
-- Legacy key `semalt_base_url` is migrated to `api_base` on load.
-- `auth_token` is written by `semalt-code login` and cleared by `logout`.
-- `dashboard_model_id` is the integer PK of the active model in `available_models`; written when a model is selected via `/models`. Required for chat history sync — if null, history sync is silently skipped.
-- `max_file_size_kb` caps how large a file may be before read is refused (default 512 KB).
-- `command_timeout_ms` caps shell command execution time (default 30 s).
-- `max_output_lines` caps shell and HTTP response lines returned to the agent (default 50).
-- `show_token_count` controls whether token count is shown in the status bar.
-- `show_cost` reserved for future cost-display feature.
-- `context_length` / `models[].context_length` — token limit used for context-usage bar, warnings, and proactive trimming. Self-calibrating: when a request triggers a context-overflow 400 (`"context length is only N"`), `api.js` parses the real window, persists it to `config.context_length` (and to the matching `models[]` entry), and trims to ~90% of it on subsequent calls. The value is never cached in memory only — a restart keeps the learned limit.
-- Local `models[]` entries override dashboard models when selected.
----
-## Key Patterns & Invariants
-- **No dependencies**: keep it that way. Any new feature must use Node.js built-ins only.
-- **CommonJS**: all files use `require()`/`module.exports`. Do not use ES `import`/`export`.
-- **Streaming**: `api.js` manually parses `text/event-stream`. The parser in `chatStream()` handles partial JSON lines — be careful editing it.
-- **Permissions are per-session**: `PermissionManager` resets on each CLI invocation. Approvals never persist to disk. In non-TTY mode all tool calls are auto-approved with a warning.
-- **Token counting is approximate**: `estimateTokens()` divides char count by 4. It is used only for the `/compact` display — do not rely on it for hard limits.
-- **Context trimming is proactive when a limit is known**: `chatStream()` uses the in-process `_sessionInputLimits` learned from a prior 400 overflow first, then falls back to `config.context_length * 0.9`. When neither is set, no pre-flight trim runs and the client relies on the reactive 400/413 handler (which then persists the discovered window). `Metrics.tokenLimitStatus()` returns `{ used, limit: null }` until a limit is learned, so the status bar shows "N tok · limit unknown" instead of hiding the line.
-- **Tool output is truncated**: `tools.js` caps output at `max_output_lines` (default 50). Configurable via config.
-- **Max 10 agent iterations**: hard-coded in `agent.js`. Prevents runaway loops.
-- **Malformed tags are skipped**: each tool dispatch in the agent loop is wrapped in try/catch; errors emit a warning line and continue to the next tool call.
+Version lives in `package.json`; bump it with every published change. CI
+(`.github/workflows/ci.yml`) runs `npm ci` + `npm audit --omit=dev
+--audit-level=high` + lint + the test matrix.
 ---
-## Development & Publishing
+## Keeping this file up-to-date
-```bash
-# Run locally
-node index.js chat
-# Symlink for global use during dev
-npm link
-# Publish to npm
-npm publish --access public
-```
-Version is in `package.json`. Bump it with every published change.
----
+This file is **auto-loaded as project memory and capped at 32 KB** — keep it lean so
+it loads in full. **Runtime-essential operational facts and the invariants above go
+here; rationale, per-task history, per-subsystem deep detail, and the full config/CLI
+reference go in `docs/`** (not auto-loaded). Do not let this file re-bloat.
-## Keeping This File Up-to-Date
+Update **this file** when:
+- A new `lib/` module is added (update the Directory Layout one-liner).
+- A **load-bearing invariant** changes — and re-verify the cited `file:line`.
+- The Node version requirement or runtime-dependency set changes.
+- The build/run/test/lint/publish commands change.
-Update this file when:
-- A new CLI command or slash command is added (update the commands tables).
-- A new tool action is added to `tools.js` (update the Tool Operations table).
-- The agent loop behavior changes (max iterations, tag format, approval flow).
-- A new `lib/` module is added.
-- The config schema changes (new keys, renamed keys, migration logic).
-- A new dashboard API call is added to `api.js`.
-- The system prompt in `prompts.js` changes in a way that affects tool-tag syntax.
-- The Node.js version requirement changes.
+Update **`docs/`** when:
+- A subsystem's internals change → `docs/ARCHITECTURE.md`.
+- A config key, CLI flag, slash command, or tool tag/operation changes →
+  `docs/CONFIG.md` (and the runtime source: `lib/config.js` / `lib/args.js` /
+  `lib/tool_specs.js` / `lib/prompts.js`).
+- A design decision, dependency rationale, or roadmap item changes → `docs/HISTORY.md`.
-When renaming or removing a tool tag, update **both** `prompts.js` and `agent.js` atomically and note it here.
+When renaming or removing a tool tag, update **`prompts.js` and `agent.js`
+atomically** (invariant 10) and reflect it in `docs/CONFIG.md`.