npm - @semalt-ai/code - Versions diffs - 1.8.5 → 1.20.0 - Mend

@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

package/.claude/settings.local.json +7 -1
package/.github/workflows/ci.yml +69 -0
package/ARCHITECTURE.md +6 -95
package/CLAUDE.md +196 -316
package/README.md +148 -4
package/docs/ARCHITECTURE.md +1321 -0
package/docs/CONFIG.md +340 -0
package/docs/HISTORY.md +245 -0
package/examples/embed.js +74 -0
package/index.js +251 -10
package/lib/agent.js +856 -120
package/lib/api.js +239 -50
package/lib/args.js +74 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +489 -0
package/lib/commands/chat-slash.js +415 -0
package/lib/commands/chat-turn.js +669 -0
package/lib/commands/chat.js +407 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +360 -11
package/lib/constants.js +401 -3
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +202 -0
package/lib/hooks.js +286 -0
package/lib/images.js +270 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +123 -26
package/lib/pricing.js +67 -0
package/lib/proc.js +62 -0
package/lib/prompts.js +99 -8
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2862 -0
package/lib/tool_specs.js +263 -9
package/lib/tools.js +352 -1039
package/lib/ui/anim.js +86 -0
package/lib/ui/ansi.js +17 -27
package/lib/ui/chat-history.js +253 -71
package/lib/ui/create-ui.js +67 -24
package/lib/ui/diff.js +90 -25
package/lib/ui/file-activity.js +236 -0
package/lib/ui/format.js +195 -29
package/lib/ui/input-field.js +21 -11
package/lib/ui/md-stream.js +234 -0
package/lib/ui/render-operation.js +113 -0
package/lib/ui/select.js +1 -4
package/lib/ui/status-bar.js +146 -36
package/lib/ui/stream.js +20 -13
package/lib/ui/theme.js +190 -44
package/lib/ui/tool-operation.js +190 -0
package/lib/ui/utils.js +9 -5
package/lib/ui/web-activity.js +270 -0
package/lib/ui/writer.js +159 -45
package/lib/ui.js +1 -1
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/anim-driver.test.js +153 -0
package/test/ask-user-display.test.js +226 -0
package/test/ask-user-gate.test.js +231 -0
package/test/background.test.js +414 -0
package/test/chat-history-nocolor.test.js +155 -0
package/test/chat-relogin.test.js +207 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/defer-detail-band.test.js +403 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/detail-band-tab-flatten.test.js +242 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/exec-diff.test.js +268 -0
package/test/executors.test.js +599 -0
package/test/extract-tool-calls.test.js +349 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/file-activity.test.js +522 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/grep-path-target.test.js +227 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +143 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +348 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/input-field-ctrl-o.test.js +37 -0
package/test/live-height-physical.test.js +281 -0
package/test/max-iterations.test.js +218 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/md-stream.test.js +183 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +409 -0
package/test/native-live-narration.test.js +254 -0
package/test/output-chokepoint.test.js +188 -0
package/test/output-heredoc-leak.test.js +195 -0
package/test/output-preview.test.js +245 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +362 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/render-operation.test.js +317 -0
package/test/replay-descriptor-xml.test.js +216 -0
package/test/replay-descriptor.test.js +189 -0
package/test/replay-web-aggregate.test.js +291 -0
package/test/replay-web-persist.test.js +241 -0
package/test/result-cap.test.js +233 -0
package/test/running-glyph-anim.test.js +111 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-driver.test.js +93 -0
package/test/status-bar-pause.test.js +164 -0
package/test/status-bar-resync.test.js +188 -0
package/test/stream-parser.test.js +171 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/theme-palette.test.js +166 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/truncate-visible.test.js +78 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/view-image.test.js +199 -0
package/test/web-activity-ordering.test.js +203 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1438
package/path +0 -1

package/lib/deny.js ADDED Viewed

@@ -0,0 +1,199 @@
+'use strict';
+// ---------------------------------------------------------------------------
+// Destructive-command deny-list
+// ---------------------------------------------------------------------------
+//
+// These patterns are checked for EVERY shell tool call (exec/shell), in BOTH
+// interactive and non-TTY mode, regardless of any --allow-* approval flags.
+// The ONLY way to bypass this list is the explicit
+// `--dangerously-skip-permissions` flag, which opts out of all safety.
+//
+// This is a defense-in-depth net, not a complete sandbox: shell is infinitely
+// expressive and a determined model can evade pattern matching. The goal is to
+// stop the obvious, high-blast-radius mistakes (wiping a disk, rm -rf /,
+// piping the internet into a root shell) from slipping through auto-approve.
+//
+// To extend: add a `{ label, test }` entry. `test(command)` receives the raw
+// command string (already whitespace-normalised for matching convenience) and
+// returns true when the command should be blocked. Keep the `label` short and
+// user-facing — it is shown in the refusal message and written to the audit log.
+//
+// Each entry is deliberately narrow so legitimate commands are not caught:
+// e.g. `rm -r build/` is allowed, but `rm -rf` (recursive AND force) is not.
+// ---------------------------------------------------------------------------
+const DENYLIST = [
+  {
+    label: 'recursive force delete (rm -rf and variants)',
+    // `rm` invoked with BOTH a recursive flag (-r/-R/--recursive) AND a force
+    // flag (-f/--force), in either order, whether bundled (-rf, -fr) or split
+    // (-r -f). Plain `rm -r dir` or `rm -f file` are intentionally allowed.
+    test: (c) => {
+      if (!/(^|[;&|]|\s)rm(\s|$)/.test(c)) return false;
+      const hasRecursive = /\srm\s+[^;&|]*(-{1,2}[a-z]*r|--recursive)/i.test(' ' + c);
+      const hasForce = /\srm\s+[^;&|]*(-{1,2}[a-z]*f|--force)/i.test(' ' + c);
+      // Bundled short flags such as -rf / -fr satisfy both in one token.
+      const bundled = /\srm\s+[^;&|]*-[a-z]*r[a-z]*f|-[a-z]*f[a-z]*r/i.test(' ' + c);
+      return bundled || (hasRecursive && hasForce);
+    },
+  },
+  {
+    label: 'piping remote content into a shell or interpreter',
+    // curl/wget/fetch whose output is piped straight into a shell or scripting
+    // interpreter — the classic `curl … | sh` / `wget … | bash` RCE pattern.
+    test: (c) =>
+      /\b(curl|wget|fetch)\b[^\n|]*\|\s*(sudo\s+)?(sh|bash|zsh|dash|ksh|fish|python[0-9.]*|perl|ruby|node|php)\b/i.test(c),
+  },
+  {
+    label: 'disk-wipe / block-device write',
+    // dd writing to a raw disk, mkfs (format), or any redirect that overwrites
+    // a block device under /dev. These destroy data with no undo.
+    catastrophic: true,
+    test: (c) =>
+      /\bdd\b[^\n]*\bof=\/dev\/(sd|nvme|hd|mmcblk|vd|disk|rdisk)/i.test(c) ||
+      /\bmkfs(\.[a-z0-9]+)?\b/i.test(c) ||
+      /\b(shred|wipefs)\b[^\n]*\/dev\//i.test(c) ||
+      />\s*\/dev\/(sd|nvme|hd|mmcblk|vd|disk|rdisk)/i.test(c),
+  },
+  {
+    label: 'fork bomb',
+    // The canonical `:(){ :|:& };:` and named-function variants that recursively
+    // spawn until the process table is exhausted.
+    catastrophic: true,
+    test: (c) => /(^|\s|;)([a-zA-Z_:][\w:]*)\s*\(\)\s*\{[^}]*\|\s*\2[^}]*&[^}]*\}\s*;\s*\2/.test(c),
+  },
+  {
+    label: 'recursive chmod/chown on a system root',
+    // Recursively changing ownership/permissions from a filesystem root or a
+    // top-level system directory bricks the machine.
+    test: (c) =>
+      /\b(chmod|chown)\b[^\n]*\s-{0,2}[a-z]*R[a-z]*\s[^\n]*\s(\/|\/etc|\/usr|\/bin|\/boot|\/lib|\/sys|\/var)(\s|\/|$)/i.test(c),
+  },
+  {
+    label: 'destructive write to a system / out-of-tree path',
+    // Redirects, tee, or truncation targeting sensitive absolute paths outside
+    // the working tree (system config, init, secrets). Best-effort: shell is
+    // too expressive to catch every case, but the obvious ones are covered.
+    test: (c) =>
+      /(>>?|\btee\b[^\n]*)\s*\/(etc|boot|sys|proc|usr|bin|sbin|lib|lib64|root)\b/i.test(c) ||
+      /\brm\s+[^\n]*\s\/(etc|boot|sys|proc|usr|bin|sbin|lib|lib64|root)(\s|\/|$)/i.test(c) ||
+      // rm targeting the filesystem root itself (`rm -rf /`, `rm -rf /*`).
+      /\brm\s+[^\n]*\s\/(\s|\*|$)/.test(c),
+  },
+  // -------------------------------------------------------------------------
+  // Windows (cmd.exe / PowerShell) destructive set (Task 4.4).
+  // -------------------------------------------------------------------------
+  // Native Windows has no OS sandbox primitive, so the deny-list is the only
+  // shell guard there. The POSIX patterns above never match `del /s /q`,
+  // `format C:`, `rd /s`, or `Remove-Item -Recurse -Force`, so cover them
+  // explicitly. These are checked on every platform (a deny-list miss on the
+  // wrong OS is harmless; a hit on the wrong OS just fails to run a command
+  // that wouldn't have worked anyway).
+  {
+    label: 'Windows recursive delete (del /s, rd /s, rmdir /s)',
+    // `del /s` recurses into subdirectories; `rd /s` / `rmdir /s` delete a
+    // directory tree. Flags are case-insensitive and may carry /q (quiet) /f
+    // (force) in any order. Plain `del file.txt` / `rd emptydir` are allowed.
+    test: (c) =>
+      /\bdel\b[^\n|&]*\s\/s\b/i.test(c) ||
+      /\b(rd|rmdir)\b[^\n|&]*\s\/s\b/i.test(c),
+  },
+  {
+    label: 'PowerShell recursive force delete (Remove-Item -Recurse -Force)',
+    // Remove-Item (aliases ri/del/erase/rd/rmdir under PowerShell) with BOTH
+    // -Recurse AND -Force — the Windows analogue of `rm -rf`. Flags may be
+    // abbreviated (-rec, -r*) per PowerShell parameter matching.
+    test: (c) => {
+      if (!/\bRemove-Item\b/i.test(c) && !/\bri\b/i.test(c)) return false;
+      const hasRecurse = /-Recurse?\b|-rec\b|-r\b/i.test(c);
+      const hasForce = /-Force\b|-f\b/i.test(c);
+      return hasRecurse && hasForce;
+    },
+  },
+  {
+    label: 'Windows format / disk wipe (format, Format-Volume, Clear-Disk, cipher /w, diskpart clean)',
+    // Formatting a volume, clearing a disk, or securely wiping free space — the
+    // Windows equivalents of mkfs / dd-to-disk. Catastrophic: unrecoverable.
+    catastrophic: true,
+    test: (c) =>
+      /\bformat\s+[^\n|&]*?\b[a-z]:/i.test(c) ||      // format C: /fs:ntfs
+      /\bFormat-Volume\b/i.test(c) ||
+      /\bClear-Disk\b/i.test(c) ||
+      /\bcipher\s+\/w/i.test(c) ||                     // cipher /w:C  (wipe free space)
+      (/\bdiskpart\b/i.test(c) && /\bclean\b/i.test(c)),
+  },
+];
+// Path-rewriting canonicalization (constraint #3). A denylist that matches the
+// textual `/etc` is bypassed by `/proc/self/root/etc` or `/proc/1234/root/etc`,
+// which resolve to the SAME real path but dodge the pattern. Rewrite those
+// procfs-root prefixes back to `/` so the existing system-path matchers see the
+// resolved path. Cheap, global, and false-positive-safe (these prefixes only
+// ever mean "the root of some process's mount namespace").
+function _canonicalizeProcRoot(command) {
+  return command.replace(/\/proc\/(?:self|\d+)\/root(?=\/|$)/gi, '');
+}
+// Check a raw shell command against the deny-list.
+// Returns { label, catastrophic } for the first matching rule, or null when the
+// command is allowed. `catastrophic` is true for the highest-blast-radius subset
+// (disk wipe / block-device write, fork bomb) — see classifyShellCommand for how
+// that flag changes handling of user-initiated shells. Callers that only care
+// about agent-initiated calls can keep treating any non-null result as a refusal.
+function checkShellDenylist(command) {
+  if (typeof command !== 'string' || !command) return null;
+  // Normalise whitespace so multi-space / tab / newline separated tokens match
+  // the same way a single space would. The original command is still executed;
+  // this normalisation only feeds the matchers.
+  const normalised = command.replace(/[\t\r\n]+/g, ' ');
+  // Also match against a procfs-root-canonicalized variant so a
+  // /proc/self/root/etc rewrite is caught by the same /etc matchers (constraint
+  // #3). When the two are identical (the common case) we only test once.
+  const canonical = _canonicalizeProcRoot(normalised);
+  const subjects = canonical === normalised ? [normalised] : [normalised, canonical];
+  for (const rule of DENYLIST) {
+    for (const subject of subjects) {
+      try {
+        if (rule.test(subject)) return { label: rule.label, catastrophic: !!rule.catastrophic };
+      } catch {
+        // A malformed matcher must never crash the agent loop — skip it.
+      }
+    }
+  }
+  return null;
+}
+// Decide how a shell command should be handled given who initiated it.
+//
+//   initiator 'agent' (default): the model asked to run this. Any deny-list hit
+//     is a hard block — the model must not be able to wipe a disk or rm -rf no
+//     matter how it is auto-approved. Only --dangerously-skip-permissions (checked
+//     by the caller, not here) bypasses it.
+//
+//   initiator 'user': a human typed it (`!cmd` / `semalt-code shell`). Humans own
+//     their machine, so deny-list hits are NOT hard-blocked. The exception is the
+//     catastrophic subset (disk wipe / fork bomb), which is so unrecoverable that
+//     we interpose a single y/N confirmation as a typo guard before running it.
+//
+// Returns one of:
+//   { action: 'allow' }                       — run it
+//   { action: 'allow', label, bypassed: true} — deny-listed but user-exempt; run it
+//   { action: 'block',   label }              — refuse (agent-initiated deny-list hit)
+//   { action: 'confirm', label }              — user-initiated catastrophic; ask first
+function classifyShellCommand(command, initiator = 'agent') {
+  const denied = checkShellDenylist(command);
+  if (!denied) return { action: 'allow' };
+  if (initiator === 'user') {
+    return denied.catastrophic
+      ? { action: 'confirm', label: denied.label }
+      : { action: 'allow', label: denied.label, bypassed: true };
+  }
+  return { action: 'block', label: denied.label };
+}
+module.exports = {
+  DENYLIST,
+  checkShellDenylist,
+  classifyShellCommand,
+};

package/lib/doctor.js ADDED Viewed

@@ -0,0 +1,160 @@
+'use strict';
+// ---------------------------------------------------------------------------
+// Self-diagnostics (Task 2.6) — `/doctor` and `semalt-code doctor`.
+// ---------------------------------------------------------------------------
+//
+// Aggregates a set of pass/warn/fail checks across the install: config validity
+// and the resolved layers (Task 2.2), dashboard reachability, the selected
+// model and whether its context limit is known, audit-log writability, the API
+// key source (Phase 0), and the loaded project-memory files (Task 2.3).
+//
+// The aggregation and formatting are pure; gathering is injected via `deps` so
+// the network/fs checks are testable with mocks.
+const STATUS_ICON = { pass: '✓', warn: '⚠', fail: '✗' };
+// Reduce a list of { name, status, detail } checks to an overall verdict.
+// overall = fail if any fail, else warn if any warn, else pass.
+function aggregateChecks(checks) {
+  const list = Array.isArray(checks) ? checks : [];
+  const counts = { pass: 0, warn: 0, fail: 0 };
+  for (const c of list) {
+    if (c && (c.status === 'pass' || c.status === 'warn' || c.status === 'fail')) counts[c.status]++;
+  }
+  const overall = counts.fail ? 'fail' : counts.warn ? 'warn' : 'pass';
+  return { overall, counts, checks: list };
+}
+function formatDoctorReport(result) {
+  const lines = ['semalt-code doctor'];
+  for (const c of result.checks) {
+    lines.push(`  ${STATUS_ICON[c.status] || '?'} ${c.name}: ${c.detail}`);
+  }
+  lines.push('');
+  lines.push(`  Overall: ${result.overall.toUpperCase()} — ${result.counts.pass} pass, ${result.counts.warn} warn, ${result.counts.fail} fail`);
+  return lines.join('\n');
+}
+// Gather every diagnostic into a check list, then aggregate. All external
+// access goes through injected deps:
+//   config            resolved (merged) config object
+//   layers            { userPresent, projectPath, envKeys[], flagKeys[] }
+//   apiKeySource      'env' | 'keychain' | 'config' | 'none'
+//   memoryFiles       array of loaded memory file metas (from loadProjectMemory)
+//   auditWritable     () => boolean
+//   pingDashboard     async () => boolean | null  (null = skipped/not-logged-in)
+async function runDoctor(deps) {
+  const {
+    config = {},
+    layers = {},
+    apiKeySource = 'none',
+    memoryFiles = [],
+    auditWritable = () => true,
+    pingDashboard = async () => null,
+  } = deps || {};
+  const checks = [];
+  // 1. Config + resolved layers.
+  {
+    const parts = [];
+    parts.push(layers.userPresent ? 'user' : 'user(default)');
+    if (layers.projectPath) parts.push(`project(${layers.projectPath})`);
+    if (Array.isArray(layers.envKeys) && layers.envKeys.length) parts.push(`env(${layers.envKeys.join(',')})`);
+    if (Array.isArray(layers.flagKeys) && layers.flagKeys.length) parts.push(`flags(${layers.flagKeys.join(',')})`);
+    checks.push({ name: 'config', status: 'pass', detail: `loaded; layers: ${parts.join(' → ')}` });
+  }
+  // 2. API key source (Phase 0).
+  checks.push(apiKeySource === 'none'
+    ? { name: 'api key', status: 'warn', detail: "no key (env/keychain/config all empty); requests may 401" }
+    : { name: 'api key', status: 'pass', detail: `source: ${apiKeySource}` });
+  // 3. Selected model + context limit.
+  {
+    const model = config.default_model;
+    if (!model) {
+      checks.push({ name: 'model', status: 'warn', detail: 'no default_model selected (run /models)' });
+    } else {
+      const known = Number.isInteger(config.context_length) && config.context_length > 0;
+      checks.push({
+        name: 'model',
+        status: known ? 'pass' : 'warn',
+        detail: known ? `${model} (context limit ${config.context_length})` : `${model} (context limit unknown — learned on first overflow)`,
+      });
+    }
+  }
+  // 4. Dashboard reachability.
+  {
+    let reachable = null;
+    try { reachable = await pingDashboard(); } catch { reachable = false; }
+    if (reachable === null) {
+      checks.push({ name: 'dashboard', status: 'warn', detail: `${config.dashboard_url || '(unset)'} — not logged in (skipped)` });
+    } else if (reachable) {
+      checks.push({ name: 'dashboard', status: 'pass', detail: `${config.dashboard_url} reachable` });
+    } else {
+      checks.push({ name: 'dashboard', status: 'fail', detail: `${config.dashboard_url} unreachable` });
+    }
+  }
+  // 5. Audit-log writability.
+  {
+    let ok = false;
+    try { ok = !!auditWritable(); } catch { ok = false; }
+    checks.push(ok
+      ? { name: 'audit log', status: 'pass', detail: 'writable' }
+      : { name: 'audit log', status: 'fail', detail: 'not writable' });
+  }
+  // 6. Project memory (Task 2.3).
+  {
+    const n = Array.isArray(memoryFiles) ? memoryFiles.length : 0;
+    checks.push(n
+      ? { name: 'memory', status: 'pass', detail: `${n} file(s): ${memoryFiles.map((f) => f.path).join(', ')}` }
+      : { name: 'memory', status: 'pass', detail: 'no AGENTS.md/CLAUDE.md found (optional)' });
+  }
+  return aggregateChecks(checks);
+}
+// Production gatherer: assemble the real deps (config layers, key source, memory,
+// audit writability) and run the diagnostics. `pingDashboard` is supplied by the
+// caller (built from the api client) so this module stays network-agnostic.
+async function diagnose({ getConfig, pingDashboard } = {}) {
+  const fs = require('fs');
+  const path = require('path');
+  const { readUserConfig, findProjectConfigPath, envConfigLayer, flagsConfigLayer } = require('./config');
+  const { apiKeySource } = require('./secrets');
+  const { loadProjectMemory } = require('./memory');
+  const { AUDIT_LOG } = require('./audit');
+  const config = (typeof getConfig === 'function' ? getConfig() : {}) || {};
+  const layers = {
+    userPresent: !!readUserConfig(),
+    projectPath: findProjectConfigPath(process.cwd()),
+    envKeys: Object.keys(envConfigLayer(process.env)),
+    flagKeys: Object.keys(flagsConfigLayer(process.argv.slice(2))),
+  };
+  const auditWritable = () => {
+    try {
+      fs.mkdirSync(path.dirname(AUDIT_LOG), { recursive: true });
+      fs.appendFileSync(AUDIT_LOG, '');
+      return true;
+    } catch { return false; }
+  };
+  let memoryFiles = [];
+  try { memoryFiles = loadProjectMemory().files; } catch { memoryFiles = []; }
+  return runDoctor({
+    config,
+    layers,
+    apiKeySource: apiKeySource(config),
+    memoryFiles,
+    auditWritable,
+    pingDashboard: pingDashboard || (async () => null),
+  });
+}
+module.exports = { aggregateChecks, formatDoctorReport, runDoctor, diagnose, STATUS_ICON };

package/lib/headless.js ADDED Viewed

@@ -0,0 +1,202 @@
+'use strict';
+// ---------------------------------------------------------------------------
+// Headless output surface (Task 2.4) — `-p/--print` + --output-format
+// ---------------------------------------------------------------------------
+//
+// Three formats:
+//   text         human output (default) — handled by the caller, not here.
+//   json         a single JSON object { result, toolCalls, usage, cost } to
+//                stdout, nothing else.
+//   stream-json  newline-delimited JSON events (assistant / tool / result),
+//                one per line, for piping.
+//
+// Machine modes must keep stdout byte-pure: no spinners, no status bar, no
+// ANSI. The two chrome sinks in a headless run both honor the tools.js
+// "UI active" flag: tools' _log (the ✓/✗ lines) and the write/append permission
+// diff (writer.scrollback). Flipping setUIActive(true) for the duration of the
+// run suppresses both, so nothing but the structured JSON is produced. The JSON
+// itself is written through an injectable sink (default process.stdout) so the
+// formatter is unit-testable without touching the global stream.
+//
+// Phase 0 safety is unchanged: headless still refuses deny-listed / interactive
+// approvals unless --dangerously-skip-permissions, because that gate lives in
+// the permission layer the agent loop already runs through.
+const { setUIActive, isUIActive } = require('./tools');
+const { priceForModel, computeCost } = require('./pricing');
+const { DEFAULT_MAX_ITERATIONS } = require('./constants');
+const { buildToolOperation } = require('./ui/tool-operation');
+const { renderOperation } = require('./ui/render-operation');
+const MACHINE_MODES = new Set(['json', 'stream-json']);
+function isMachineMode(mode) { return MACHINE_MODES.has(mode); }
+// Aggregate token usage from the Metrics turns. prompt/completion are summed
+// across turns (total processed); context_tokens is the last turn's prompt.
+function usageFromMetrics(metrics) {
+  const turns = metrics && Array.isArray(metrics.turns) ? metrics.turns : [];
+  let prompt = 0;
+  let completion = 0;
+  for (const t of turns) {
+    prompt += (t && t.promptTokens) || 0;
+    completion += (t && t.completionTokens) || 0;
+  }
+  const last = turns[turns.length - 1];
+  return {
+    prompt_tokens: prompt,
+    completion_tokens: completion,
+    total_tokens: prompt + completion,
+    context_tokens: last ? (last.promptTokens || 0) : 0,
+    // Additive ESTIMATED split of the current context (Variant B, display-only).
+    // Clearly named *_est so they never read as measured; the real
+    // prompt_tokens/total_tokens/context_tokens above are unchanged. Reflect the
+    // last turn (current context), like context_tokens.
+    context_base_est: last ? (last.baseEst || 0) : 0,
+    context_working_est: last ? (last.workingEst || 0) : 0,
+    turns: turns.length,
+  };
+}
+// The final result is the last assistant message — the reply that ended the
+// loop. Falls back to the last streamed assistant message if messages lack one.
+function finalResult(messages, assistantMsgs) {
+  if (Array.isArray(messages)) {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      if (messages[i] && messages[i].role === 'assistant') return messages[i].content || '';
+    }
+  }
+  return assistantMsgs && assistantMsgs.length ? assistantMsgs[assistantMsgs.length - 1] : '';
+}
+// Build the callbacks + finalize for a given mode. `emitLine(obj)` writes one
+// JSON line to the real stdout. The sink records tool calls and assistant
+// messages, streams events in stream-json mode, and prints the final object in
+// json mode. cost is null until the price table lands (Task 2.6).
+function createHeadlessSink(mode, emitLine, { model = null, priceOverrides = null } = {}) {
+  const toolCalls = [];
+  const assistantMsgs = [];
+  let lastError = null;
+  const machine = isMachineMode(mode);
+  const price = priceForModel(model, priceOverrides);
+  const callbacks = {};
+  if (machine) {
+    callbacks.onAssistantMessage = (m) => {
+      assistantMsgs.push(m);
+      if (mode === 'stream-json') emitLine({ type: 'assistant', content: m });
+    };
+    callbacks.onToolEnd = (tag, resultStr, ms, meta) => {
+      const call = meta && Array.isArray(meta.call) ? meta.call : null;
+      const args = call ? call.slice(1) : [];
+      const ok = !(meta && meta.error);
+      // Legacy per-tool fields — computed EXACTLY as before so their names,
+      // types, and values can never drift (the contract pin).
+      const legacy = { tool: tag, args, ok, ms };
+      // Phase 6d-ii — sink-local descriptor build (option A): build the same
+      // ToolOperation the interactive sink builds (chat-turn.js) from the `meta`
+      // already passed, then merge its json-mode core (descriptor-native plain
+      // data: status/category/durationMs/detail/meta/target/attrs/…) ADDITIVELY
+      // BENEATH the legacy fields. `legacy` spreads last so tool/args/ok/ms win
+      // on any name clash → byte-identical to pre-6d-ii. Web ops are ordinary
+      // tools here (NO web-activity collapse — N per-op events is the contract).
+      let core = null;
+      try {
+        const attrs = meta ? meta.attrs : null;
+        const operation = buildToolOperation({
+          id: meta ? meta.id : null,
+          tag,
+          arg: attrs ? (attrs.command || attrs.path || attrs.url || attrs.src || attrs.key || attrs.name || attrs.pattern) : '',
+          attrs,
+          status: ok ? 'ok' : 'error',
+          durationMs: ms,
+          meta: meta ? meta.meta : null,
+          error: meta ? meta.error : null,
+          diff: meta ? meta.diff : null,
+          // Model-facing result → lets the descriptor derive an output-preview
+          // detail (shell/MCP/subagent). Chrome-only; context is untouched.
+          output: typeof resultStr === 'string' ? resultStr : null,
+          noDuration: tag === 'ask_user',
+        });
+        core = renderOperation(operation, { mode: 'json' });
+      } catch (_e) {
+        // No-descriptor safety: fall back to the bare legacy-only rec, never crash.
+        core = null;
+      }
+      const rec = core ? { ...core, ...legacy } : { ...legacy };
+      toolCalls.push(rec);
+      if (mode === 'stream-json') emitLine({ type: 'tool', ...rec });
+    };
+    callbacks.onError = (e) => { if (e && !e.isWarning && e.message) lastError = e.message; };
+  }
+  function finalize({ messages, metrics, stopReason, verifyStatus } = {}) {
+    if (!machine) return;
+    const result = finalResult(messages, assistantMsgs);
+    const usage = usageFromMetrics(metrics);
+    // cost is null (rendered "unknown" downstream) when the model has no price.
+    const cost = computeCost(usage, price);
+    // stopReason (Pre-Task 4.0a): why the loop ended — 'end_turn' normally,
+    // 'max_iterations' when the cap was hit, 'verify_failed' when enforcing
+    // self-verification exhausted its attempts. Always reported so consumers can
+    // distinguish a finished task from a truncated one.
+    const stop = stopReason || 'end_turn';
+    // verifyStatus (Task 4.2): 'skipped' (no verify ran / --no-verify / no
+    // command), 'passed', or 'failed'. Surfaced alongside stopReason.
+    const verify = verifyStatus || 'skipped';
+    if (mode === 'json') {
+      emitLine({ result, toolCalls, usage, cost, stopReason: stop, verifyStatus: verify, ...(lastError ? { error: lastError } : {}) });
+    } else {
+      emitLine({ type: 'result', result, usage, cost, stopReason: stop, verifyStatus: verify, ...(lastError ? { error: lastError } : {}) });
+    }
+  }
+  return { callbacks, finalize, toolCalls, assistantMsgs };
+}
+// Run the agent loop in headless mode. For machine modes, chrome is suppressed
+// (setUIActive) for the duration and only the structured JSON — written through
+// `write` (default process.stdout) — is produced. Returns { messages, metrics }.
+async function runHeadless({
+  runAgentLoop,
+  messages,
+  model,
+  tokenLimit = null,
+  maxIterations,
+  agentOpts = {},
+  mode = 'text',
+  write,
+  priceOverrides = null,
+}) {
+  const machine = isMachineMode(mode);
+  const out = write || ((s) => process.stdout.write(s));
+  const emitLine = (obj) => out(JSON.stringify(obj) + '\n');
+  const sink = createHeadlessSink(mode, emitLine, { model, priceOverrides });
+  let prevUIActive = null;
+  if (machine) { prevUIActive = isUIActive(); setUIActive(true); }
+  try {
+    const callbacks = { ...(agentOpts.callbacks || {}), ...sink.callbacks };
+    const res = await runAgentLoop(
+      messages,
+      model,
+      maxIterations === undefined ? DEFAULT_MAX_ITERATIONS : maxIterations,
+      tokenLimit,
+      { ...agentOpts, callbacks },
+    );
+    sink.finalize(res);
+    return res;
+  } finally {
+    if (machine) setUIActive(prevUIActive);
+  }
+}
+module.exports = {
+  isMachineMode,
+  usageFromMetrics,
+  finalResult,
+  createHeadlessSink,
+  runHeadless,
+};