npm - @semalt-ai/code - Versions diffs - 1.8.4 → 1.19.0 - Mend

@semalt-ai/code 1.8.4 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (151) hide show

package/.claude/settings.local.json +8 -1
package/.github/workflows/ci.yml +69 -0
package/CLAUDE.md +1588 -27
package/README.md +147 -3
package/TECHNICAL_DEBT.md +66 -0
package/examples/embed.js +74 -0
package/index.js +259 -11
package/lib/agent.js +935 -181
package/lib/api.js +308 -55
package/lib/args.js +96 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +306 -0
package/lib/commands/chat-slash.js +399 -0
package/lib/commands/chat-turn.js +446 -0
package/lib/commands/chat.js +403 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +346 -11
package/lib/constants.js +372 -3
package/lib/debug.js +106 -0
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +167 -0
package/lib/hooks.js +286 -0
package/lib/images.js +264 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +100 -10
package/lib/pricing.js +67 -0
package/lib/proc.js +158 -0
package/lib/prompts.js +88 -8
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2558 -0
package/lib/tool_specs.js +236 -9
package/lib/tools.js +370 -944
package/lib/ui/chat-history.js +19 -1
package/lib/ui/format.js +101 -6
package/lib/ui/input-field.js +16 -7
package/lib/ui/status-bar.js +79 -11
package/lib/ui/terminal.js +10 -4
package/lib/ui/theme.js +1 -0
package/lib/ui/web-activity.js +218 -0
package/lib/ui/writer.js +7 -9
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/background.test.js +414 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/executors.test.js +362 -0
package/test/extract-tool-calls.test.js +315 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +142 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +203 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/max-iterations.test.js +216 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +356 -0
package/test/output-chokepoint.test.js +188 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +163 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/result-cap.test.js +233 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-pause.test.js +164 -0
package/test/stream-parser.test.js +147 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/web-activity-ordering.test.js +194 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1288

package/lib/constants.js CHANGED Viewed

@@ -1,5 +1,6 @@
 'use strict';
+const fs = require('fs');
 const os = require('os');
 const path = require('path');
@@ -7,6 +8,148 @@ const PACKAGE_JSON = require('../package.json');
 const DEFAULT_API_TIMEOUT_MS = 15 * 60 * 1000;
+// Default cap on agent-loop iterations per user turn. This is the single source
+// of truth for the bound: it seeds DEFAULT_CONFIG.max_iterations (overridable via
+// --max-iterations / config) and is also the factory default of runAgentLoop, so
+// even a caller that omits the value gets a real cap rather than an unbounded
+// loop. A config value of 0 (the "unlimited" sentinel) opts out — see
+// resolveMaxIterations in lib/config.js.
+const DEFAULT_MAX_ITERATIONS = 50;
+// Self-verification (Task 4.2). When the agent declares a task done, an optional
+// configured shell command (e.g. `npm test`) is run and its result fed back.
+// These defaults seed DEFAULT_CONFIG.verify and lib/verify.js normalizeVerify.
+//   * DEFAULT_VERIFY_TIMEOUT_MS — a hung verify (e.g. a stuck `npm test`) must
+//     not hang the agent; on timeout the verify is treated as a failure.
+//   * DEFAULT_VERIFY_MAX_ATTEMPTS — in enforcing mode, the agent re-enters the
+//     loop on a failing verify, bounded by this many attempts (distinct from and
+//     much smaller than the coarse iteration cap) before terminating with the
+//     `verify_failed` stop reason.
+const DEFAULT_VERIFY_TIMEOUT_MS = 120000;
+const DEFAULT_VERIFY_MAX_ATTEMPTS = 3;
+// Checkpoints & rewind (Task 4.3). Before each file-tool mutation the prior file
+// state is snapshotted so `/rewind` (and `semalt-code rewind`) can restore it.
+// These defaults seed DEFAULT_CONFIG.checkpoints and lib/checkpoints.js.
+//   * DEFAULT_CHECKPOINT_MAX_FILE_BYTES — a file larger than this is NOT
+//     snapshotted (recorded as rewind-unavailable) rather than silently
+//     exhausting disk. The mutation still proceeds.
+//   * DEFAULT_CHECKPOINT_MAX_PER_SESSION — retention cap; the oldest checkpoints
+//     in a session are pruned once this many exist.
+const DEFAULT_CHECKPOINT_MAX_FILE_BYTES = 5 * 1024 * 1024;
+const DEFAULT_CHECKPOINT_MAX_PER_SESSION = 100;
+// Multimodal image input (Task 5.4). Cap on the RAW bytes of an attached image
+// before base64-encoding (base64 inflates the payload ~33%). A clear pre-send
+// error on exceed beats an opaque endpoint rejection of an oversized payload.
+// 5 MB matches the common per-image ceiling of vision endpoints.
+const DEFAULT_IMAGE_MAX_BYTES = 5 * 1024 * 1024;
+// grep/glob context bound (Task W.5). The engine returns up to GREP_MAX_MATCHES
+// (1000) / GLOB_MAX_FILES (5000) — internal caps that were NEVER a context bound
+// (the structured result used to be dropped before reaching the model). These
+// head_limit defaults are the real context bound: a fixed grep on a common
+// pattern serializes at most this many items into context, with a truncation
+// notice telling the agent how to narrow (refine the pattern, use
+// output_mode="count"/"files_with_matches", or raise head_limit). Model-overridable
+// per call via the head_limit parameter.
+const DEFAULT_GREP_HEAD_LIMIT = 100;
+const DEFAULT_GLOB_HEAD_LIMIT = 100;
+// Token safety net for grep/glob serialized output (Task W.9). head_limit bounds
+// the COUNT of matches/files, but — like the shell line cap (W.6) — a count bound
+// does NOT bound tokens: 100 matches of a 5000-char minified line is ~125k tokens.
+// Routing grep/glob through the shared boundToolOutput chokepoint adds this token
+// backstop so a pathological huge-line result cannot blow context. A normal grep
+// (head_limit short lines) is never clipped; this only catches the few-but-huge case.
+const DEFAULT_GREP_GLOB_MAX_TOKENS = 10000;
+// read_file pagination context bound (Task W.7). read_file used to dump the WHOLE
+// file into context verbatim (the only guard was a hard byte refusal at
+// max_file_size_kb) — worst case ~128k tokens for a 500 KB file. The fix mirrors
+// the Claude Code standard: read the first page (a ~2000-LINE cap) + a PARTIAL
+// notice telling the model the range shown, the total, and the start_line for the
+// next page. start_line/end_line return an explicit slice (also line-capped, so a
+// huge explicit range cannot dump everything). A token safety net (like W.6's)
+// bounds the pathological few-but-enormous-lines case the line cap misses.
+//   - DEFAULT_READ_LINE_CAP: lines returned in one page (and the width of an
+//     explicit start_line window). Model-overridable by narrowing the range; the
+//     operator can tune via config.read_line_cap.
+//   - DEFAULT_READ_MAX_TOKENS: token ceiling on the page. Generous — a normal
+//     2000-line source page (~10-20k tokens) is never clipped; only pages of
+//     pathologically long lines (minified JS, a single megabyte line) are.
+//   - DEFAULT_READ_MAX_FILE_KB: the BYTE BACKSTOP (max_file_size_kb default).
+//     Pagination — not this — is now the PRIMARY bound: a large line-readable
+//     file paginates instead of hard-refusing. This stays only as a sane upper
+//     ceiling so a multi-GB file is never slurped whole into memory. An operator
+//     can still lower it to hard-refuse smaller files.
+const DEFAULT_READ_LINE_CAP = 2000;
+const DEFAULT_READ_MAX_TOKENS = 25000;
+const DEFAULT_READ_MAX_FILE_KB = 51200; // 50 MB
+// Shell/exec output context bound (Task W.6). Shell stdout+stderr used to enter
+// context VERBATIM and UNBOUNDED (`max_output_lines` was applied only in the UI
+// renderer, never to the model-facing message) — the #1 context risk: one
+// `seq 1 5000` / `cat` / test run / build could dump tens of thousands of tokens.
+// The fix is a DOUBLE bound (like `download`'s byte-cap + path-guard):
+//   1. Head+tail line cap of `max_output_lines` — keep the first OUTPUT_HEAD_RATIO
+//      of the budget and the last (1-ratio), eliding the middle. BOTH ends matter:
+//      the commands that ran at the top AND the pass/fail summary / error at the
+//      bottom. A head-only cap would drop the result — the most important part.
+//   2. Token safety net (DEFAULT_OUTPUT_MAX_TOKENS) — a single line can be enormous
+//      (minified JS on one line, a `cat` of a binary), so the line cap alone does
+//      NOT bound tokens. Reuses the web pipeline's capToTokens after the line cap.
+// The truncation notice teaches the now-working (Task W.5) redirect-to-file → grep
+// pattern instead of re-running the command to see more. The exit code stays on
+// its own line, so truncating output volume never hides the command's outcome.
+const DEFAULT_MAX_OUTPUT_LINES = 50;
+// Fraction of the line budget kept as HEAD (the rest is the tail). 0.6 → first 30
+// + last 20 for the default 50-line budget.
+const OUTPUT_HEAD_RATIO = 0.6;
+// Token ceiling for shell output entering context. Comfortably above what a normal
+// `max_output_lines` (50) run produces (~1-3k tokens), so it never interferes with
+// line-bounded output — it only catches the pathological few-but-huge-lines case.
+const DEFAULT_OUTPUT_MAX_TOKENS = 10000;
+// MCP & subagent result context bounds (Task W.8). MCP tool results
+// (lib/mcp/client.js mcpResultToText) and subagent final text (lib/subagents.js)
+// were the last two UNBOUNDED paths into context — both are fenced as untrusted,
+// but neither was token-capped. Bound both with the standard capToTokens
+// (consistent with W.5–W.7), with DIFFERENT budgets reflecting their different
+// nature:
+//   - DEFAULT_MCP_MAX_RESULT_TOKENS: STRICTER. An MCP result's size is
+//     THIRD-PARTY-controlled (the server decides) and the content is untrusted
+//     external data — the riskiest of the two. The cap is applied to the text
+//     BEFORE it is wrapped in the untrusted fence, so the truncation notice sits
+//     inside the fence with the capped content and the perimeter is unchanged
+//     (capping never weakens the fence).
+//   - DEFAULT_SUBAGENT_MAX_RESULT_TOKENS: GENEROUS. The subagent's final text is
+//     OUR OWN child's deliberate, synthesized answer (the child exists to return a
+//     result), so the cap is a safety net against a verbose child rather than the
+//     primary mechanism. Strictly larger than the MCP budget by design.
+// Both are token safety nets — a normal MCP/subagent result is never clipped.
+const DEFAULT_MCP_MAX_RESULT_TOKENS = 10000;
+const DEFAULT_SUBAGENT_MAX_RESULT_TOKENS = 20000;
+// Web-fetch pipeline (Task W.1). After http_get extracts a page's main content
+// to Markdown, this token budget caps what enters the secondary summarizer /
+// main context — REPLACING the blind byte cut as the context-protection
+// mechanism (even clean Markdown can be large). Oversized content is truncated
+// with a notice. ~6k tokens is generous for an article while staying well under
+// a typical context window.
+const DEFAULT_WEB_MAX_CONTENT_TOKENS = 6000;
+// Web-fetch User-Agent (Task W.3 Part 2). http_get/download send no realistic
+// User-Agent by default, so sites that reject empty/curl-like UAs answer 403/406
+// (Wikipedia, the Guardian). A fixed, current mainstream-browser UA defeats that
+// *simple* UA-based bot-blocking. It is a PARTIAL mitigation: Cloudflare /
+// JS-challenges / IP-rate-limits still 403 (those need a headless browser, out of
+// scope). Operator-overridable via config.web.user_agent; deliberately NOT
+// model-selectable (no UA parameter in the tool spec) — letting the agent set a
+// per-call UA would be an impersonation/evasion surface.
+const DEFAULT_USER_AGENT =
+  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' +
+  '(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
 const DEFAULT_CONFIG = {
   api_base: 'http://127.0.0.1:8800',
   api_key: 'any',
@@ -25,18 +168,178 @@ const DEFAULT_CONFIG = {
   // adapters). Per-profile flag on models[] entries.
   models: [],
   theme: 'dark',
-  max_file_size_kb: 512,
+  // Byte BACKSTOP for read_file (Task W.7). No longer the primary bound — a large
+  // line-readable file now PAGINATES (read_line_cap) rather than hard-refusing.
+  // This 50 MB ceiling only rules out slurping a multi-GB file whole into memory;
+  // lower it to hard-refuse smaller files. See DEFAULT_READ_MAX_FILE_KB.
+  max_file_size_kb: DEFAULT_READ_MAX_FILE_KB,
+  // read_file pagination (Task W.7). read_line_cap = lines returned per page (and
+  // the width of an explicit start_line window); read_max_tokens = the token
+  // safety net on the page (catches pathologically long lines). See the
+  // DEFAULT_READ_* constants above.
+  read_line_cap: DEFAULT_READ_LINE_CAP,
+  read_max_tokens: DEFAULT_READ_MAX_TOKENS,
   command_timeout_ms: 30000,
-  max_output_lines: 50,
+  max_output_lines: DEFAULT_MAX_OUTPUT_LINES,
+  // Token safety net for shell/exec output entering context (Task W.6). The
+  // head+tail line cap (max_output_lines) bounds the common case; this bounds the
+  // pathological few-but-huge-lines case (a single minified line, a binary cat).
+  max_output_tokens: DEFAULT_OUTPUT_MAX_TOKENS,
+  // Max agent-loop iterations per user turn. A positive integer caps the loop;
+  // 0 means deliberately unbounded (power-user choice). Default 50.
+  max_iterations: DEFAULT_MAX_ITERATIONS,
   http_fetch_max_bytes: 262144,
+  // Web-fetch pipeline (Task W.1). http_get extracts a page's main content to
+  // Markdown (Readability + Turndown), then — by default — runs a SECONDARY
+  // cheap-LLM call that summarizes it, so only the compact result enters the
+  // main context (the raw page never does). `summarize` (default on) is the big
+  // token win; set false (or pass summarize="false"/raw="true" on a single
+  // http_get) to get the extracted Markdown verbatim when an exact snippet/quote
+  // matters. `summary_model` is the cheap model for that call ('' → the current
+  // model). `max_content_tokens` caps the extracted content fed to the
+  // summarizer / context. Tradeoff: summarization adds one LLM call per fetch
+  // (latency/cost) — the no-summary mode exists for when that isn't wanted.
+  web: {
+    summarize: true,
+    summary_model: '',
+    max_content_tokens: DEFAULT_WEB_MAX_CONTENT_TOKENS,
+    // Operator override for the http_get/download User-Agent. '' → the fixed
+    // DEFAULT_USER_AGENT. Human-only (not model-selectable). See DEFAULT_USER_AGENT.
+    user_agent: '',
+  },
+  // Multimodal image input (Task 5.4). `image_max_bytes` caps the RAW bytes of
+  // an attached image (base64 inflates ~33%); over the cap is a clear error, not
+  // an opaque endpoint failure. `image_format` forces the provider content-part
+  // shape ('anthropic' | 'openai'); '' selects it heuristically per endpoint
+  // (see lib/images.js selectImageFormat). PNG/JPEG/WebP/GIF only — PDF deferred,
+  // generation out of scope.
+  image_max_bytes: DEFAULT_IMAGE_MAX_BYTES,
+  image_format: '',
+  // Byte cap for the `download` tool (Pre-Task 4.0b). Bounds how large a file
+  // the agent may stream to disk; on exceeding it the stream is aborted and the
+  // partial file removed. 100 MB default — generous for real archives/binaries
+  // while still ruling out unbounded disk exhaustion.
+  download_max_bytes: 104857600,
+  // Proxy intent (Task 2.2): populated from HTTPS_PROXY/HTTP_PROXY in the env
+  // config layer. Read and exposed now; proxy-agent wiring in api.js is a later
+  // task. Empty string means "no proxy configured".
+  https_proxy: '',
+  http_proxy: '',
   show_token_count: true,
-  show_cost: false,
+  // Cost display (Task 2.6). Enabled by default; when a model's price is unknown
+  // the UI shows "unknown" rather than a fake $0. `pricing` overrides/extends the
+  // built-in price table (lib/pricing.js): { "<model>": { input, output } } in
+  // USD per 1,000,000 tokens.
+  show_cost: true,
+  pricing: {},
   system_prompt_mode: 'system_role',
   repair_malformed_tool_xml: false,
+  // Prompt caching (Task 2.7): when true, send Anthropic-style cache_control
+  // markers on the stable prefix (system prompt + tools). Opt-in — only enable
+  // for endpoints that support it.
+  prompt_caching: false,
+  // reasoning_effort (Task 2.7): '' (off) | 'minimal' | 'low' | 'medium' | 'high'.
+  // Sent only for models that support it (heuristic in lib/payload.js), unless
+  // reasoning_effort_force is set for a model the heuristic misses.
+  reasoning_effort: '',
+  reasoning_effort_force: false,
+  // MCP (Task 3.2 scaffold; Task 3.3 builds the client that consumes it). Empty
+  // by default — `servers` maps a server name → its launch/connection spec. No
+  // MCP server is configured or connected until the user adds an entry here.
+  // `max_result_tokens` (Task W.8) is the STRICTER token cap on an MCP tool
+  // result before it enters context (it is third-party / untrusted), applied
+  // inside the untrusted fence. See DEFAULT_MCP_MAX_RESULT_TOKENS.
+  mcp: { servers: {}, max_result_tokens: DEFAULT_MCP_MAX_RESULT_TOKENS },
+  // Lifecycle hooks (Task 3.4). Map of event name → list of hook definitions
+  // (shell-command or static-prompt). Empty by default; normalizeConfig fills in
+  // an array per known event. See lib/hooks.js.
+  hooks: {},
+  // Per-pattern permission rules (Task 4.1). `{ rules: [ { tool, action, and one
+  // of pattern|path|url|match } ] }`. Empty by default. NOTE: enforcement reads
+  // the user and project layers SEPARATELY (lib/permission-rules.js loadRuleLayers)
+  // — the project layer can only NARROW the user posture, never widen it — so this
+  // shallow-merged value is for display/normalization only, not the security path.
+  permissions: { rules: [] },
+  // Self-verification (Task 4.2). When the agent declares a task done, optionally
+  // run `command` and feed the result back. `mode` advisory (default) never blocks
+  // the turn; `enforcing` returns the agent to the loop on a failing verify,
+  // bounded by `max_attempts` (then stopReason `verify_failed`). Success is
+  // exit-code based: exit == `expected_exit_code` (default 0) is a pass — stdout
+  // is never parsed for success patterns. No `command` configured → no-op.
+  verify: {
+    mode: 'advisory',
+    command: '',
+    timeout_ms: DEFAULT_VERIFY_TIMEOUT_MS,
+    expected_exit_code: 0,
+    max_attempts: DEFAULT_VERIFY_MAX_ATTEMPTS,
+  },
+  // Checkpoints & rewind (Task 4.3). Before each file-tool mutation the file's
+  // prior state is snapshotted under ~/.semalt-ai/checkpoints/<session>/ so
+  // `/rewind` can restore it. Covers file-tool mutations ONLY — shell side
+  // effects are not reversible (out of scope). `enabled` true by default;
+  // `max_file_bytes` is the per-file snapshot cap (oversize = rewind
+  // unavailable, not disk exhaustion); `max_per_session` is the retention cap
+  // (oldest pruned).
+  checkpoints: {
+    enabled: true,
+    max_file_bytes: DEFAULT_CHECKPOINT_MAX_FILE_BYTES,
+    max_per_session: DEFAULT_CHECKPOINT_MAX_PER_SESSION,
+  },
+  // OS-level filesystem + binary network sandbox for shell commands (Task 4.4 /
+  // 4.4b). `mode` is `auto` (use the kernel sandbox — Seatbelt on macOS,
+  // bubblewrap on Linux/WSL2 — when available) or `off` (a deliberate HUMAN
+  // opt-out; the agent can never set this). `failIfUnavailable` makes a
+  // missing/unusable sandbox a hard error instead of falling back to a human
+  // approval. `network` is `on` (the default — sandboxed commands keep normal
+  // egress so npm/pip work) or `off` (kernel-level no-network: --unshare-net /
+  // Seatbelt deny network*). Binary by design — no host proxy, no domain
+  // allowlist, no TLS interception. See lib/sandbox.js.
+  sandbox: {
+    mode: 'auto',
+    failIfUnavailable: false,
+    network: 'on',
+  },
 };
 const CONFIG_PATH = path.join(os.homedir(), '.semalt-ai', 'config.json');
+// ---------------------------------------------------------------------------
+// Protected-config set (Pre-Task 5.0b) — defined here ONCE.
+// ---------------------------------------------------------------------------
+//
+// The directories whose contents drive host-privileged execution and therefore
+// must never be written by the agent's file tools OR a sandboxed shell command —
+// INCLUDING files that do not yet exist (the CVE-2026-25725 lesson). It is
+// directory-based on purpose: a not-yet-created config.json / agents/*.md / hook
+// file inside one of these dirs is covered without enumerating filenames.
+//
+// Two layers:
+//   * user    — the whole ~/.semalt-ai dir (config.json, mcp.json, hooks,
+//               agents, commands, skills, memory.json, audit.log).
+//   * project — every .semalt dir from `cwd` up to the repo root (the directory
+//               holding .git is the last one checked — the SAME bound the config
+//               hierarchy uses, lib/config.js findProjectConfigPath). .semalt
+//               lives in the (writable) CWD and is attacker-controllable in a
+//               cloned repo, so it is the project equivalent of ~/.semalt-ai.
+//
+// Pure (impure only via fs.existsSync/walk at call time): both lib/tools.js (the
+// host write guard isProtectedConfigPath) and lib/sandbox.js (the jail's
+// protectedPaths) consume this so the set is single-sourced.
+function protectedConfigDirs({ home = os.homedir(), cwd = process.cwd() } = {}) {
+  const dirs = [path.join(home, '.semalt-ai')];
+  let dir = cwd;
+  while (true) {
+    dirs.push(path.join(dir, '.semalt'));
+    let atRepoRoot = false;
+    try { atRepoRoot = fs.existsSync(path.join(dir, '.git')); } catch { /* unreadable — keep walking */ }
+    if (atRepoRoot) break;
+    const parent = path.dirname(dir);
+    if (parent === dir) break; // filesystem root
+    dir = parent;
+  }
+  return dirs;
+}
 // TAG_REGISTRY classifies every XML tag the stream parser may encounter.
 // For 'tool'-type tags, the *parameter schema* lives in lib/tool_specs.js
 // (TOOL_SPECS) — that file is the single source of truth for argument
@@ -68,6 +371,8 @@ const TAG_REGISTRY = {
   file_stat:        { type: 'tool', streaming: false, label: 'Inspecting file' },
   edit_file:        { type: 'tool', streaming: false, label: 'Editing file' },
   search_files:     { type: 'tool', streaming: false, label: 'Searching files' },
+  grep:             { type: 'tool', streaming: false, label: 'Searching (grep)' },
+  glob:             { type: 'tool', streaming: false, label: 'Finding files (glob)' },
   search_in_file:   { type: 'tool', streaming: false, label: 'Searching in file' },
   replace_in_file:  { type: 'tool', streaming: false, label: 'Replacing in file' },
   get_env:          { type: 'tool', streaming: false, label: 'Reading env var' },
@@ -75,12 +380,26 @@ const TAG_REGISTRY = {
   download:         { type: 'tool', streaming: false, label: 'Downloading' },
   upload:           { type: 'tool', streaming: false, label: 'Uploading' },
   http_get:         { type: 'tool', streaming: false, label: 'Fetching URL' },
+  web_search:       { type: 'tool', streaming: false, label: 'Web search' },
   ask_user:         { type: 'tool', streaming: false, label: 'Asking user' },
   store_memory:     { type: 'tool', streaming: false, label: 'Storing memory' },
   recall_memory:    { type: 'tool', streaming: false, label: 'Recalling memory' },
   list_memories:    { type: 'tool', streaming: false, label: 'Listing memories' },
   system_info:      { type: 'tool', streaming: false, label: 'Reading system info' },
+  // Native git tools (Task 5.1). Read-only: git_status/git_diff/git_log (and the
+  // list ops of git_branch/git_worktree). Mutating: git_add/git_commit/
+  // git_branch(create-delete)/git_checkout/git_worktree(add-remove). All shell
+  // out through the same sandbox + deny-list chokepoint as <shell>.
+  git_status:       { type: 'tool', streaming: false, label: 'git status' },
+  git_diff:         { type: 'tool', streaming: false, label: 'git diff' },
+  git_log:          { type: 'tool', streaming: false, label: 'git log' },
+  git_add:          { type: 'tool', streaming: false, label: 'git add' },
+  git_commit:       { type: 'tool', streaming: false, label: 'git commit' },
+  git_branch:       { type: 'tool', streaming: false, label: 'git branch' },
+  git_checkout:     { type: 'tool', streaming: false, label: 'git checkout' },
+  git_worktree:     { type: 'tool', streaming: false, label: 'git worktree' },
   // MiniMax-M2 native tool-call wrappers. `extractToolCalls` parses them into
   // internal calls; classifying them here keeps raw XML out of the UI stream.
   'minimax:tool_call': { type: 'tool', streaming: false, label: 'Using tool' },
@@ -126,6 +445,7 @@ const TAG_REGISTRY = {
 // at the top of the file) keeps the module boundary one-directional —
 // tool_specs.js does not depend on this file.
 const { TOOL_SPECS } = require('./tool_specs');
+const { registryToolNames, TOOL_REGISTRY } = require('./tool_registry');
 (function assertToolSpecParity() {
   const registryTools = Object.entries(TAG_REGISTRY)
     .filter(([, v]) => v.type === 'tool')
@@ -140,11 +460,60 @@ const { TOOL_SPECS } = require('./tool_specs');
     if (extra.length) parts.push(`extra in TOOL_SPECS: ${extra.join(', ')}`);
     throw new Error(`TAG_REGISTRY ↔ TOOL_SPECS mismatch — ${parts.join('; ')}`);
   }
+  // Tool-registry completeness (Task 1.4): the runtime tool registry
+  // (lib/tool_registry.js) must resolve exactly the set of non-wrapper tools —
+  // every callable TOOL_SPECS entry has a registry entry, and vice-versa. This
+  // makes "add a tool = one registry entry + its spec" enforceable at load time.
+  const callableSpecs = Object.entries(TOOL_SPECS)
+    .filter(([, v]) => !v.wrapper)
+    .map(([k]) => k)
+    .sort();
+  const regTools = registryToolNames().slice().sort();
+  const regMissing = callableSpecs.filter((k) => !regTools.includes(k));
+  const regExtra = regTools.filter((k) => !callableSpecs.includes(k));
+  if (regMissing.length || regExtra.length) {
+    const parts = [];
+    if (regMissing.length) parts.push(`missing in TOOL_REGISTRY: ${regMissing.join(', ')}`);
+    if (regExtra.length) parts.push(`extra in TOOL_REGISTRY: ${regExtra.join(', ')}`);
+    throw new Error(`TOOL_SPECS ↔ TOOL_REGISTRY mismatch — ${parts.join('; ')}`);
+  }
+  // Executor/permission completeness (Task 1.4b): now that each tool carries its
+  // own executor and permission descriptor, every non-wrapper registry entry
+  // must provide BOTH — so "add a file tool = one registration object (parse +
+  // native + execute + permission)" is enforceable at load time.
+  const incomplete = TOOL_REGISTRY.filter(
+    (e) => typeof e.execute !== 'function' || typeof e.permission !== 'function',
+  ).map((e) => e.tool);
+  if (incomplete.length) {
+    throw new Error(`TOOL_REGISTRY entries missing execute/permission: ${incomplete.join(', ')}`);
+  }
 })();
 module.exports = {
   CONFIG_PATH,
+  protectedConfigDirs,
   DEFAULT_API_TIMEOUT_MS,
+  DEFAULT_MAX_ITERATIONS,
+  DEFAULT_VERIFY_TIMEOUT_MS,
+  DEFAULT_VERIFY_MAX_ATTEMPTS,
+  DEFAULT_CHECKPOINT_MAX_FILE_BYTES,
+  DEFAULT_CHECKPOINT_MAX_PER_SESSION,
+  DEFAULT_IMAGE_MAX_BYTES,
+  DEFAULT_GREP_HEAD_LIMIT,
+  DEFAULT_GLOB_HEAD_LIMIT,
+  DEFAULT_GREP_GLOB_MAX_TOKENS,
+  DEFAULT_READ_LINE_CAP,
+  DEFAULT_READ_MAX_TOKENS,
+  DEFAULT_READ_MAX_FILE_KB,
+  DEFAULT_MAX_OUTPUT_LINES,
+  OUTPUT_HEAD_RATIO,
+  DEFAULT_OUTPUT_MAX_TOKENS,
+  DEFAULT_MCP_MAX_RESULT_TOKENS,
+  DEFAULT_SUBAGENT_MAX_RESULT_TOKENS,
+  DEFAULT_WEB_MAX_CONTENT_TOKENS,
+  DEFAULT_USER_AGENT,
   DEFAULT_CONFIG,
   PACKAGE_JSON,
   TAG_REGISTRY,

package/lib/debug.js ADDED Viewed

@@ -0,0 +1,106 @@
+'use strict';
+// Two mutually-exclusive debug modes, configured once at startup from the
+// CLI flags (--debug or --debug-file <path>).
+//
+//   off    — no debug output anywhere.
+//   simple — visible inline. Basic per-iteration info routed through
+//            writer.scrollback so the TUI keeps working (no SSE dumps,
+//            no per-chunk noise).
+//   file   — every debug call (basic AND extended) is written to a file.
+//            Nothing debug-related goes to stdout. The TUI stays clean.
+//
+// Two log functions with a clear semantic split:
+//
+//   log(line)         — "always-on" debug. Visible in simple mode (scrollback)
+//                       and file mode (file). Silent in off mode.
+//   logExtended(line) — extended traces (raw SSE, request bodies, delta
+//                       accumulators). Visible only in file mode.
+//
+// File-mode lines are formatted as `[ISO-timestamp] <line>\n` so they're
+// greppable and tail-friendly.
+const fs = require('fs');
+let mode = 'off';
+let fileStream = null;
+function init({ debug, debugFile } = {}) {
+  if (debug && debugFile) {
+    // Belt-and-braces: cli.js (args parser) errors out before this is ever
+    // reached. Throw rather than silently coerce so any internal misuse is
+    // surfaced loudly.
+    throw new Error('debug and debugFile are mutually exclusive');
+  }
+  if (debugFile) {
+    mode = 'file';
+    fileStream = fs.createWriteStream(debugFile, { flags: 'a' });
+    const ts = new Date().toISOString();
+    try {
+      fileStream.write(`\n[${ts}] [session] semalt-code debug session start pid=${process.pid}\n`);
+    } catch {}
+  } else if (debug) {
+    mode = 'simple';
+  } else {
+    mode = 'off';
+  }
+}
+function isActive()  { return mode !== 'off'; }
+function isSimple()  { return mode === 'simple'; }
+function isFile()    { return mode === 'file'; }
+function getMode()   { return mode; }
+function _writeFile(line) {
+  if (!fileStream) return;
+  const ts = new Date().toISOString();
+  try { fileStream.write(`[${ts}] ${line}\n`); } catch {}
+}
+// "Always-on" debug — visible in simple mode (scrollback) and file mode (file).
+// Silent in off mode. Multi-line input gets one timestamp per line in file mode
+// so each line stays greppable.
+function log(line) {
+  if (mode === 'off') return;
+  const s = String(line);
+  if (mode === 'simple') {
+    // Lazy-require to avoid a require cycle: writer pulls in this module
+    // for its own drift diagnostic.
+    const writer = require('./ui/writer');
+    writer.scrollback(s);
+  } else {
+    for (const l of s.split('\n')) _writeFile(l);
+  }
+}
+// Extended-only debug — visible in file mode only. Used for high-volume
+// per-chunk traces (raw SSE, request body dumps, accumulator state) that
+// would shred the TUI if printed inline.
+function logExtended(line) {
+  if (mode !== 'file') return;
+  const s = String(line);
+  for (const l of s.split('\n')) _writeFile(l);
+}
+function close() {
+  if (fileStream) {
+    try {
+      const ts = new Date().toISOString();
+      fileStream.write(`[${ts}] [session] end pid=${process.pid}\n`);
+      fileStream.end();
+    } catch {}
+    fileStream = null;
+  }
+  mode = 'off';
+}
+module.exports = {
+  init,
+  isActive,
+  isSimple,
+  isFile,
+  getMode,
+  log,
+  logExtended,
+  close,
+};