npm - @semalt-ai/code - Versions diffs - 1.8.5 → 1.19.0 - Mend

@semalt-ai/code 1.8.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

package/.claude/settings.local.json +6 -1
package/.github/workflows/ci.yml +69 -0
package/CLAUDE.md +1584 -26
package/README.md +147 -3
package/examples/embed.js +74 -0
package/index.js +251 -10
package/lib/agent.js +711 -104
package/lib/api.js +213 -49
package/lib/args.js +74 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +306 -0
package/lib/commands/chat-slash.js +399 -0
package/lib/commands/chat-turn.js +446 -0
package/lib/commands/chat.js +403 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +333 -11
package/lib/constants.js +372 -3
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +167 -0
package/lib/hooks.js +286 -0
package/lib/images.js +264 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +100 -10
package/lib/pricing.js +67 -0
package/lib/proc.js +62 -0
package/lib/prompts.js +84 -5
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2558 -0
package/lib/tool_specs.js +222 -2
package/lib/tools.js +272 -1020
package/lib/ui/format.js +22 -1
package/lib/ui/input-field.js +16 -7
package/lib/ui/status-bar.js +79 -11
package/lib/ui/theme.js +1 -0
package/lib/ui/web-activity.js +218 -0
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/background.test.js +414 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/executors.test.js +362 -0
package/test/extract-tool-calls.test.js +315 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +142 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +203 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/max-iterations.test.js +216 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +356 -0
package/test/output-chokepoint.test.js +188 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +163 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/result-cap.test.js +233 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-pause.test.js +164 -0
package/test/stream-parser.test.js +147 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/web-activity-ordering.test.js +194 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1438

package/lib/payload.js ADDED Viewed

@@ -0,0 +1,54 @@
+'use strict';
+// ---------------------------------------------------------------------------
+// Optional chat/completions payload augmentations (Task 2.7).
+// ---------------------------------------------------------------------------
+//
+// Pure functions, gated by config + model support, applied to the request body
+// just before it is serialized. Kept separate from api.js so the exact field
+// presence/absence is unit-testable without a live endpoint.
+// Prompt caching: mark the STABLE PREFIX — the last system message and the end
+// of the tools array — with Anthropic-style cache_control:{type:'ephemeral'} so
+// providers that honor it reuse the prefix across turns. Only mutates when
+// enabled; absent otherwise. Gated upstream by config.prompt_caching, so it is
+// never sent to endpoints the user hasn't opted in for.
+function applyPromptCaching(payload, enabled) {
+  if (!enabled || !payload) return payload;
+  if (Array.isArray(payload.messages)) {
+    for (let i = payload.messages.length - 1; i >= 0; i--) {
+      const m = payload.messages[i];
+      if (m && m.role === 'system') {
+        payload.messages[i] = { ...m, cache_control: { type: 'ephemeral' } };
+        break;
+      }
+    }
+  }
+  if (Array.isArray(payload.tools) && payload.tools.length) {
+    const last = payload.tools.length - 1;
+    payload.tools[last] = { ...payload.tools[last], cache_control: { type: 'ephemeral' } };
+  }
+  return payload;
+}
+// Heuristic for OpenAI-style `reasoning_effort` support: reasoning model
+// families (o1–o4, gpt-5, *-reasoning/-thinking, deepseek-r1, qwq).
+function supportsReasoningEffort(model) {
+  if (typeof model !== 'string' || !model) return false;
+  return /(^|[/\-])o[1-4]([-/]|$|mini|preview)|gpt-5|reason|deepseek-r1|(^|[/\-])r1([-/]|$)|thinking|qwq/i.test(model);
+}
+const VALID_EFFORTS = new Set(['minimal', 'low', 'medium', 'high']);
+// Add reasoning_effort when configured and the model supports it (or support is
+// forced for a model the heuristic misses). No-op otherwise.
+function applyReasoningEffort(payload, effort, model, { force = false } = {}) {
+  if (!payload || !effort) return payload;
+  const e = String(effort).toLowerCase();
+  if (!VALID_EFFORTS.has(e)) return payload;
+  if (!force && !supportsReasoningEffort(model)) return payload;
+  payload.reasoning_effort = e;
+  return payload;
+}
+module.exports = { applyPromptCaching, supportsReasoningEffort, applyReasoningEffort, VALID_EFFORTS };

package/lib/permission-rules.js ADDED Viewed

@@ -0,0 +1,401 @@
+'use strict';
+// ---------------------------------------------------------------------------
+// Per-pattern permission rules (Task 4.1) — the pure rule engine.
+// ---------------------------------------------------------------------------
+//
+// Extends the coarse per-tier permission model (--allow-fs/exec/net, --readonly,
+// per-session "always") with rich rules that match on a TOOL plus its ARGUMENTS
+// (glob OR regex) and resolve to one of `allow` / `deny` / `ask`. Rules are
+// layered across user scope (~/.semalt-ai/config.json) and project scope
+// (.semalt/config.json — attacker-controllable in a cloned repo).
+//
+// EVERYTHING in this module is a pure function: no I/O beyond fs.realpathSync for
+// path canonicalization (constraint 3), which is unavoidable to resolve symlinks.
+// The manager (lib/permissions.js) and the agent gate (lib/agent.js) consume the
+// decisions; composition with the unbypassable Phase 0 controls (deny-list,
+// secret-file guard, --readonly, isPathSafe) happens THERE and downstream in the
+// executors — an `allow` rule can never re-enable something those forbid.
+//
+// The six security constraints (see Task 4.1 brief), and where each lives:
+//   1. Project can only NARROW — collectMatches drops every project `allow` rule
+//      structurally before resolution, so a project rule can only ever contribute
+//      `deny`/`ask`. Enforced here, not by convention.
+//   2. Precedence is total + deterministic — deny > ask > allow; more-specific
+//      beats less-specific; equal specificity resolves by deny>ask>allow (so it
+//      is order-independent). Across layers: most-restrictive wins.
+//   3. Canonicalize before matching — normalizeCall resolves `..`, symlinks, and
+//      absolute/relative forms; matching is on the canonical form.
+//   4. Regex safety — normalizeRule rejects pathological patterns (ReDoS guard)
+//      and bounds subject length; a regex that errors/over-runs fails closed.
+//   5. Fail closed — a malformed rule is dropped at load; a matcher error never
+//      GRANTS (an erroring `allow` is treated as no-match) and still RESTRICTS
+//      (an erroring `deny`/`ask` is treated as a match).
+//   6. Compose, don't bypass — the resolver only ever returns allow/deny/ask/null
+//      for the RULE layer; the manager keeps the deny-list/secret/readonly checks.
+const fs = require('fs');
+const path = require('path');
+// Per canonical action (call[0]): its public tag (for matching by either name)
+// and the argument shape used to derive matchable subjects.
+//   category 'shell' → args[0] is the command string
+//   category 'file'  → `paths` indices are filesystem paths (canonicalized)
+//   category 'net'   → `urls` indices are URLs; `paths` indices are dest files
+//   category 'other' → no matchable argument subject (only tool-only rules match)
+const ACTION_META = {
+  shell:           { tag: 'exec',            category: 'shell' },
+  read:            { tag: 'read_file',       category: 'file', paths: [0] },
+  write:           { tag: 'write_file',      category: 'file', paths: [0] },
+  append:          { tag: 'append_file',     category: 'file', paths: [0] },
+  list_dir:        { tag: 'list_dir',        category: 'file', paths: [0] },
+  delete_file:     { tag: 'delete_file',     category: 'file', paths: [0] },
+  make_dir:        { tag: 'make_dir',        category: 'file', paths: [0] },
+  remove_dir:      { tag: 'remove_dir',      category: 'file', paths: [0] },
+  move_file:       { tag: 'move_file',       category: 'file', paths: [0, 1] },
+  copy_file:       { tag: 'copy_file',       category: 'file', paths: [0, 1] },
+  edit_file:       { tag: 'edit_file',       category: 'file', paths: [0] },
+  search_in_file:  { tag: 'search_in_file',  category: 'file', paths: [0] },
+  replace_in_file: { tag: 'replace_in_file', category: 'file', paths: [0] },
+  search_files:    { tag: 'search_files',    category: 'file', paths: [1] },
+  file_stat:       { tag: 'file_stat',       category: 'file', paths: [0] },
+  upload:          { tag: 'upload',          category: 'file', paths: [0] },
+  grep:            { tag: 'grep',            category: 'file' },
+  glob:            { tag: 'glob',            category: 'file' },
+  download:        { tag: 'download',        category: 'net', urls: [0], paths: [1] },
+  http_get:        { tag: 'http_get',        category: 'net', urls: [0] },
+  ask_user:        { tag: 'ask_user',        category: 'other' },
+  store_memory:    { tag: 'store_memory',    category: 'other' },
+  recall_memory:   { tag: 'recall_memory',   category: 'other' },
+  list_memories:   { tag: 'list_memories',   category: 'other' },
+  get_env:         { tag: 'get_env',         category: 'other' },
+  set_env:         { tag: 'set_env',         category: 'other' },
+  system_info:     { tag: 'system_info',     category: 'other' },
+};
+const VALID_ACTIONS = new Set(['allow', 'deny', 'ask']);
+// Restrictiveness rank — used to pick the most-restrictive decision across layers.
+const RANK = { deny: 3, ask: 2, allow: 1 };
+// ── ReDoS guard (constraint 4) ─────────────────────────────────────────────
+// Mirror of the cheap heuristic in lib/tools.js: reject pathologically long
+// patterns and the common catastrophic-backtracking anti-patterns. A pattern
+// that trips this is dropped at load time (fail closed). Subject length is
+// additionally bounded at match time.
+const MAX_PATTERN_LEN = 1000;
+const MAX_SUBJECT_LEN = 8192;
+function isPatternUnsafe(source) {
+  if (typeof source !== 'string') return true;
+  if (source.length > MAX_PATTERN_LEN) return true;
+  if (/(\(.*[+*].*\).*[+*])|(\[.*\].*[+*].*[+*])/.test(source)) return true;
+  return false;
+}
+// ── matcher compilation ────────────────────────────────────────────────────
+// A glob → anchored RegExp. `crossSep` controls whether `*`/`?` cross a path
+// separator: false for path-style globs (segment-aware), true for command/URL
+// globs (greedy). `**` always crosses separators; a trailing `/**` (or leading
+// `**/`) collapses the separator so `src/**` matches `src/a/b` and `**/*.env`
+// matches both `x.env` and `a/b/x.env`.
+function globToRegExp(glob, { crossSep = false } = {}) {
+  let re = '';
+  for (let i = 0; i < glob.length; i++) {
+    const c = glob[i];
+    if (c === '*') {
+      if (glob[i + 1] === '*') {
+        i++;
+        if (glob[i + 1] === '/') { i++; re += '(?:.*/)?'; }
+        else re += '.*';
+      } else {
+        re += crossSep ? '.*' : '[^/]*';
+      }
+    } else if (c === '?') {
+      re += crossSep ? '.' : '[^/]';
+    } else if ('\\^$+.()|{}[]'.includes(c)) {
+      re += '\\' + c;
+    } else {
+      re += c;
+    }
+  }
+  return new RegExp('^' + re + '$');
+}
+// Count of "literal" (non-wildcard / non-metacharacter) characters — the
+// specificity weight of a pattern. More literal chars ⇒ more specific.
+function literalCount(source, kind) {
+  if (typeof source !== 'string') return 0;
+  const meta = kind === 'regex' ? new Set('.*+?()[]{}|^$\\') : new Set('*?');
+  let n = 0;
+  for (const ch of source) if (!meta.has(ch)) n++;
+  return n;
+}
+// Compile a rule's argument matcher from its source string. Returns null when no
+// matcher is given (a tool-only rule), or throws on an unsafe/invalid pattern so
+// normalizeRule can drop the rule (fail closed). `crossSep` comes from which key
+// the user used (`path:` ⇒ false; `pattern:`/`url:`/`match:` ⇒ true).
+function compileMatcher(source, crossSep) {
+  if (source == null) return { kind: 'any', specificity: 0, test: () => true };
+  const s = String(source);
+  if (s === '*' || s === '**') return { kind: 'any', specificity: 0, test: () => true };
+  const rx = s.match(/^\/(.*)\/([gimsuy]*)$/);
+  if (rx) {
+    const body = rx[1];
+    if (isPatternUnsafe(body)) throw new Error(`unsafe regex pattern: ${s}`);
+    // Strip the stateful `g` flag (it makes .test() position-dependent).
+    const flags = (rx[2] || '').replace(/g/g, '');
+    const re = new RegExp(body, flags);
+    return {
+      kind: 'regex',
+      specificity: literalCount(body, 'regex'),
+      test: (str) => re.test(str.length > MAX_SUBJECT_LEN ? str.slice(0, MAX_SUBJECT_LEN) : str),
+    };
+  }
+  if (isPatternUnsafe(s)) throw new Error(`unsafe glob pattern: ${s}`);
+  const re = globToRegExp(s, { crossSep });
+  return {
+    kind: 'glob',
+    specificity: literalCount(s, 'glob'),
+    test: (str) => re.test(str.length > MAX_SUBJECT_LEN ? str.slice(0, MAX_SUBJECT_LEN) : str),
+  };
+}
+const TOOL_WEIGHT = 1000; // a literal tool dominates an argument-pattern's weight
+// Normalize one raw rule object into an internal rule, or null if malformed
+// (logged via `log`). `scope` is 'user' | 'project'. The matcher source is taken
+// from exactly one of `pattern` | `path` | `url` | `match`; supplying more than
+// one is ambiguous and the rule is dropped (fail closed).
+function normalizeRule(raw, scope, log) {
+  const warn = (msg) => { if (typeof log === 'function') log(`permission rule dropped (${scope}): ${msg}`); };
+  if (!raw || typeof raw !== 'object' || Array.isArray(raw)) { warn('not an object'); return null; }
+  const action = typeof raw.action === 'string' ? raw.action.trim().toLowerCase() : '';
+  if (!VALID_ACTIONS.has(action)) { warn(`bad action ${JSON.stringify(raw.action)}`); return null; }
+  const tool = typeof raw.tool === 'string' ? raw.tool.trim() : '';
+  if (!tool) { warn('missing tool'); return null; }
+  const keys = ['pattern', 'path', 'url', 'match'].filter((k) => raw[k] != null && raw[k] !== '');
+  if (keys.length > 1) { warn(`multiple matcher keys (${keys.join(', ')})`); return null; }
+  const key = keys[0] || null;
+  const source = key ? String(raw[key]) : null;
+  const crossSep = key !== 'path'; // path globs are segment-aware; everything else is greedy
+  let toolMatcher, matcher;
+  try {
+    toolMatcher = globToRegExp(tool, { crossSep: true });
+    matcher = compileMatcher(source, crossSep);
+  } catch (err) {
+    warn(err.message);
+    return null;
+  }
+  const toolSpecificity = (tool === '*' || tool === '**') ? 0 : TOOL_WEIGHT;
+  return {
+    scope,
+    tool,
+    toolMatcher,
+    matcher,
+    matcherKey: key,
+    source,
+    action,
+    specificity: toolSpecificity + matcher.specificity,
+  };
+}
+// Normalize an array of raw rules for one layer; malformed entries are dropped.
+function normalizeRuleLayer(rawRules, scope, log) {
+  if (!Array.isArray(rawRules)) return [];
+  const out = [];
+  for (const raw of rawRules) {
+    const r = normalizeRule(raw, scope, log);
+    if (r) out.push(r);
+  }
+  return out;
+}
+// Build the layered rule set from the two RAW config objects (already parsed
+// JSON, NOT the shallow-merged view — the layers MUST stay separate so the
+// project layer can be structurally prevented from widening). Reads
+// `<cfg>.permissions.rules`.
+function loadRuleLayers(userCfg, projectCfg, log) {
+  const pick = (cfg) => (cfg && cfg.permissions && Array.isArray(cfg.permissions.rules)) ? cfg.permissions.rules : [];
+  return {
+    user: normalizeRuleLayer(pick(userCfg), 'user', log),
+    project: normalizeRuleLayer(pick(projectCfg), 'project', log),
+  };
+}
+// ── call canonicalization (constraint 3) ───────────────────────────────────
+// Resolve a path to its canonical absolute form (symlinks + `..` collapsed) and
+// a cwd-relative form, both in posix separators so globs match identically on
+// every platform. For a not-yet-existent path (writes), the existing ancestor is
+// realpath'd and the basename re-appended.
+function canonicalizePath(p, cwd) {
+  const base = cwd || process.cwd();
+  let abs = path.resolve(base, p);
+  try {
+    abs = fs.realpathSync(abs);
+  } catch {
+    try {
+      const dir = fs.realpathSync(path.dirname(abs));
+      abs = path.join(dir, path.basename(abs));
+    } catch { /* keep the path.resolve form */ }
+  }
+  const absPosix = abs.split(path.sep).join('/');
+  const rel = path.relative(base, abs).split(path.sep).join('/');
+  return { abs: absPosix, rel };
+}
+function normalizeCommand(cmd) {
+  return String(cmd == null ? '' : cmd).replace(/\s+/g, ' ').trim();
+}
+// Turn a [action, ...args] call tuple into the canonical, matchable shape.
+function normalizeCall(call, opts = {}) {
+  const arr = Array.isArray(call) ? call : [];
+  const action = arr[0];
+  const args = arr.slice(1);
+  const meta = ACTION_META[action] || { tag: action, category: 'other' };
+  const cwd = opts.cwd || process.cwd();
+  const out = { action, tag: meta.tag, category: meta.category, command: null, url: null, paths: [] };
+  if (meta.category === 'shell') {
+    out.command = normalizeCommand(args[0]);
+  }
+  if (meta.urls) {
+    for (const i of meta.urls) {
+      if (args[i] != null && args[i] !== '') { out.url = String(args[i]); break; }
+    }
+  }
+  if (meta.paths) {
+    for (const i of meta.paths) {
+      const v = args[i];
+      if (v == null || v === '') continue;
+      const { abs, rel } = canonicalizePath(String(v), cwd);
+      out.paths.push(abs);
+      if (rel && rel !== abs) out.paths.push(rel);
+    }
+  }
+  return out;
+}
+// ── matching + resolution ──────────────────────────────────────────────────
+function toolMatches(rule, call) {
+  try {
+    return rule.toolMatcher.test(String(call.action)) || rule.toolMatcher.test(String(call.tag));
+  } catch {
+    return false;
+  }
+}
+// Does a rule match a normalized call? Returns true | false | 'error'. 'error'
+// (a matcher threw at runtime, e.g. a pathological regex that slipped the load
+// guard) is propagated so the caller can fail closed.
+function ruleMatchesCall(rule, call) {
+  if (!toolMatches(rule, call)) return false;
+  if (rule.matcher.kind === 'any') return true;
+  let subjects;
+  if (call.category === 'shell') subjects = [call.command];
+  else if (call.category === 'net') subjects = [call.url, ...call.paths];
+  else if (call.category === 'file') subjects = call.paths;
+  else subjects = []; // 'other' — only tool-only rules match
+  for (const s of subjects) {
+    if (s == null) continue;
+    try {
+      if (rule.matcher.test(String(s))) return true;
+    } catch {
+      return 'error';
+    }
+  }
+  return false;
+}
+// Collect the rules in one layer that match the call. Fail-closed handling of a
+// matcher error: it NEVER grants (an erroring `allow` is treated as no-match)
+// and still RESTRICTS (an erroring `deny`/`ask` is treated as a match).
+function collectMatches(rules, call) {
+  const matches = [];
+  for (const rule of rules || []) {
+    let m;
+    try { m = ruleMatchesCall(rule, call); } catch { m = 'error'; }
+    if (m === true) matches.push(rule);
+    else if (m === 'error' && rule.action !== 'allow') matches.push(rule);
+  }
+  return matches;
+}
+// Resolve one layer's matches to a single { decision, rule } or null. Precedence:
+// most specific wins; among equal specificity, deny > ask > allow (so the result
+// is independent of rule order — no ambiguity).
+function layerDecision(matches) {
+  if (!matches || !matches.length) return null;
+  let maxSpec = -1;
+  for (const r of matches) if (r.specificity > maxSpec) maxSpec = r.specificity;
+  const top = matches.filter((r) => r.specificity === maxSpec);
+  const deny = top.find((r) => r.action === 'deny');
+  if (deny) return { decision: 'deny', rule: deny };
+  const ask = top.find((r) => r.action === 'ask');
+  if (ask) return { decision: 'ask', rule: ask };
+  return { decision: 'allow', rule: top[0] };
+}
+function ruleReason(rule) {
+  if (!rule) return null;
+  const src = rule.source ? ` ${rule.matcherKey || 'pattern'}=${rule.source}` : '';
+  return `${rule.scope} ${rule.action} ${rule.tool}${src}`;
+}
+// THE resolver. Takes a NORMALIZED call (already canonicalized — constraint 3),
+// the layered rules, and a context bag (reserved for tier/readonly composition,
+// which the manager performs). Returns the deterministic rule-layer decision:
+//   { decision: 'allow'|'deny'|'ask'|null, rule, reason, scope }
+// `null` means no rule matched — the caller falls back to the tier/descriptor
+// default. Project rules can only NARROW: every project `allow` is dropped before
+// resolution, so the project layer can contribute only `deny`/`ask`. Across
+// layers the MOST RESTRICTIVE decision wins.
+function resolvePermission(call, layers, context = {}) { // eslint-disable-line no-unused-vars
+  const userMatches = collectMatches(layers && layers.user, call);
+  // Structural project-cannot-widen: drop project `allow` rules entirely.
+  const projectMatches = collectMatches(layers && layers.project, call).filter((r) => r.action !== 'allow');
+  const u = layerDecision(userMatches);
+  const p = layerDecision(projectMatches);
+  let winner;
+  if (u && p) winner = RANK[p.decision] > RANK[u.decision] ? p : u;
+  else winner = u || p;
+  if (!winner) return { decision: null, rule: null, reason: null, scope: null };
+  return { decision: winner.decision, rule: winner.rule, reason: ruleReason(winner.rule), scope: winner.rule.scope };
+}
+module.exports = {
+  ACTION_META,
+  resolvePermission,
+  normalizeCall,
+  canonicalizePath,
+  normalizeCommand,
+  normalizeRule,
+  normalizeRuleLayer,
+  loadRuleLayers,
+  globToRegExp,
+  compileMatcher,
+  ruleMatchesCall,
+  collectMatches,
+  layerDecision,
+  ruleReason,
+  // test seams
+  literalCount,
+  isPatternUnsafe,
+};

package/lib/permissions.js CHANGED Viewed

@@ -2,6 +2,7 @@
 const writer = require('./ui/writer');
 const messages = require('./ui/messages');
+const { resolvePermission, normalizeCall } = require('./permission-rules');
 const TIER_FS = ['read_file', 'write_file', 'append_file', 'delete_file', 'list_dir', 'make_dir', 'move_file', 'copy_file', 'file_stat', 'search_files', 'store_memory', 'recall_memory'];
 const TIER_EXEC = ['exec'];
@@ -9,11 +10,23 @@ const TIER_NET = ['http_get', 'download'];
 const TIER_SYS = ['system_info', 'get_env', 'set_env'];
 const TIER_MAP = { fs: TIER_FS, exec: TIER_EXEC, net: TIER_NET, sys: TIER_SYS };
-const READONLY_BLOCKED = new Set(['write_file', 'append_file', 'delete_file', 'move_file', 'copy_file']);
+// Every FILE-mutating tool. --readonly governs file tools only; shell side
+// effects are NOT constrained here (a read-only session must still run `ls` /
+// `git status`) — shell writes are confined by the OS sandbox + deny-list,
+// the right layer for that (Pre-Task 5.0c).
+const READONLY_BLOCKED = new Set([
+  'write_file', 'append_file', 'delete_file', 'move_file', 'copy_file', 'download',
+  'edit_file', 'replace_in_file', 'make_dir', 'remove_dir', 'upload',
+  // Native git tools (Task 5.1). The mutating git tools (the create/delete paths
+  // of branch/worktree are gated inside their executors) honor --readonly too — a
+  // read-only session must not stage/commit/switch/create. Read-only git tools
+  // (git_status/git_diff/git_log, and the LIST ops) are NOT here, so they still run.
+  'git_add', 'git_commit', 'git_branch', 'git_checkout', 'git_worktree',
+]);
 let _permissionQueueTail = Promise.resolve();
-function createPermissionManager(ui, { allowedTiers = [], readonly = false } = {}) {
+function createPermissionManager(ui, { allowedTiers = [], readonly = false, skipPermissions = false, rules = null, cwd = null, approver = null, quiet = false } = {}) {
   const { BOLD, FG_CYAN, FG_DARK, FG_GRAY, FG_GREEN, FG_RED, FG_YELLOW, RST, interactiveSelect } = ui;
   const autoApprovedTags = new Set();
@@ -27,6 +40,29 @@ function createPermissionManager(ui, { allowedTiers = [], readonly = false } = {
     sessionApprovedTags: new Set(),
   };
+  // Per-pattern rule layers (Task 4.1). { user: [...], project: [...] } of
+  // already-normalized rules, kept SEPARATE so the project layer can be
+  // structurally prevented from widening (see lib/permission-rules.js).
+  const ruleLayers = (rules && typeof rules === 'object')
+    ? { user: rules.user || [], project: rules.project || [] }
+    : { user: [], project: [] };
+  const hasRules = ruleLayers.user.length > 0 || ruleLayers.project.length > 0;
+  // Resolve the per-pattern rule decision for a [action, ...args] call tuple.
+  // Returns { decision: 'allow'|'deny'|'ask'|null, rule, reason }. `null` when no
+  // rule matches → the caller falls back to the tier/descriptor default. Pure
+  // wrapper around resolvePermission; any failure fails closed to a null decision
+  // (the normal gate then still asks for mutating tools).
+  function resolveRule(call) {
+    if (!hasRules) return { decision: null, rule: null, reason: null };
+    try {
+      const normalized = normalizeCall(call, { cwd: cwd || process.cwd() });
+      return resolvePermission(normalized, ruleLayers, { readonly, tiers: allowedTiers });
+    } catch {
+      return { decision: null, rule: null, reason: null };
+    }
+  }
   let uiCallbacks = null;
   function setUICallbacks(callbacks) {
@@ -124,20 +160,73 @@ function createPermissionManager(ui, { allowedTiers = [], readonly = false } = {
     }
   }
-  async function askPermission(actionType, description, tag) {
-    if (state.autoApproveAll) {
-      _emitAutoApproved(description);
+  async function askPermission(actionType, description, tag, ruleVerdict = null) {
+    // --dangerously-skip-permissions is the ONLY way to fully auto-approve any
+    // tool call. It does not bypass the destructive-command deny-list (enforced
+    // unbypassably in tools.js) — it only skips the interactive/refusal gate.
+    // A per-pattern `deny` rule is handled in the agent gate BEFORE this point
+    // (it blocks even under skip-permissions); here we see only allow/ask/null.
+    if (skipPermissions) {
+      _emitAutoApproved(`[--dangerously-skip-permissions] ${description}`);
       return true;
     }
-    if (tag && (autoApprovedTags.has(tag) || state.sessionApprovedTags.has(tag))) {
-      _emitAutoApproved(description);
-      return true;
+    // Per-pattern rules (Task 4.1). An `ask` rule FORCES the interactive prompt:
+    // it bypasses the auto-approve shortcuts below (tier flags, /approve, and the
+    // per-session "always") so a user policy of "ask for this" always holds. An
+    // `allow` rule auto-approves even what a tier wouldn't — but still composes
+    // with the deny-list / secret-guard / --readonly enforced downstream.
+    const ruleDecision = ruleVerdict && ruleVerdict.decision;
+    const forceAsk = ruleDecision === 'ask';
+    if (!forceAsk) {
+      if (ruleDecision === 'allow') {
+        _emitAutoApproved(`[rule${ruleVerdict.reason ? `: ${ruleVerdict.reason}` : ''}] ${description}`);
+        return true;
+      }
+      if (state.autoApproveAll) {
+        _emitAutoApproved(description);
+        return true;
+      }
+      if (tag && (autoApprovedTags.has(tag) || state.sessionApprovedTags.has(tag))) {
+        _emitAutoApproved(description);
+        return true;
+      }
+    }
+    // Programmatic approver (Task 5.2, SDK). When the process is embedded (no
+    // TTY) a host may supply an async approver — the programmatic equivalent of
+    // the interactive prompt. It is consulted ONLY when we would otherwise have
+    // to refuse for lack of a way to ask (no tier/rule/skip auto-approved above),
+    // so it never widens what a tier already granted, and an approver that throws
+    // or returns falsy means "no" (fail closed). With NO approver the safe
+    // default holds — refuse — exactly as headless does.
+    if (typeof approver === 'function') {
+      try {
+        const ok = await approver({ actionType, description, tag, rule: ruleVerdict || null });
+        return !!ok;
+      } catch {
+        return false;
+      }
     }
     if (!process.stdout.isTTY || !process.stdin.isTTY) {
-      writer.scrollback(`  [non-TTY] Auto-approving: ${description}`);
-      return true;
+      // Non-TTY / headless mode. WITHOUT --dangerously-skip-permissions we no
+      // longer silently auto-approve — that was the security hole. A tier flag
+      // (--allow-fs/exec/net/all) pre-approves its tag above; anything reaching
+      // here would otherwise require interactive confirmation we cannot show,
+      // so we refuse it instead of approving it. `quiet` (set by the embedding
+      // SDK) suppresses the scrollback line — the denial is already surfaced to
+      // the host in the structured run result.
+      if (!quiet) {
+        writer.scrollback(
+          `  [non-TTY] Refused (interactive confirmation required, and ` +
+          `--dangerously-skip-permissions not set): ${description}`
+        );
+      }
+      return false;
     }
     if (uiCallbacks) {
@@ -209,6 +298,7 @@ function createPermissionManager(ui, { allowedTiers = [], readonly = false } = {
     captureSelect,
     clear,
     readonlyBlock,
+    resolveRule,
     setUICallbacks,
     state,
     toggleAll,

package/lib/pricing.js ADDED Viewed

@@ -0,0 +1,67 @@
+'use strict';
+// ---------------------------------------------------------------------------
+// Cost estimation (Task 2.6) — a per-model price table × token usage.
+// ---------------------------------------------------------------------------
+//
+// Prices are USD per 1,000,000 tokens. Self-hosted / local models have no
+// published price; an unknown price yields cost === null, which the UI renders
+// as "unknown" — NEVER a fake $0. Users extend/override the built-in table via
+// `config.pricing` (same shape: { "<model>": { input, output } }).
+const DEFAULT_PRICE_TABLE = {
+  'gpt-4o': { input: 2.5, output: 10 },
+  'gpt-4o-mini': { input: 0.15, output: 0.6 },
+  'gpt-4.1': { input: 2, output: 8 },
+  'gpt-4.1-mini': { input: 0.4, output: 1.6 },
+  'o3-mini': { input: 1.1, output: 4.4 },
+  'claude-3-5-sonnet': { input: 3, output: 15 },
+  'claude-3-5-haiku': { input: 0.8, output: 4 },
+};
+function _normalize(entry) {
+  if (!entry || typeof entry !== 'object') return null;
+  const input = Number(entry.input);
+  const output = Number(entry.output);
+  if (!Number.isFinite(input) || !Number.isFinite(output) || input < 0 || output < 0) return null;
+  return { input, output };
+}
+// Resolve the price entry for a model. config `overrides` win over the built-in
+// table. Matching: exact (case-insensitive) first, then substring with the
+// longest (most specific) key winning — so "gpt-4o-mini" beats "gpt-4o".
+// Returns { input, output } per-Mtok, or null when unknown.
+function priceForModel(model, overrides) {
+  if (typeof model !== 'string' || !model) return null;
+  const table = { ...DEFAULT_PRICE_TABLE, ...(overrides || {}) };
+  const lower = model.toLowerCase();
+  for (const k of Object.keys(table)) {
+    if (k.toLowerCase() === lower) { const n = _normalize(table[k]); if (n) return n; }
+  }
+  const keys = Object.keys(table).sort((a, b) => b.length - a.length);
+  for (const k of keys) {
+    if (lower.includes(k.toLowerCase())) { const n = _normalize(table[k]); if (n) return n; }
+  }
+  return null;
+}
+// Cost in USD for a usage object given a price entry. A null/invalid price
+// yields null (unknown) — never 0.
+function computeCost(usage, price) {
+  const p = _normalize(price);
+  if (!p) return null;
+  const inTok = (usage && Number(usage.prompt_tokens)) || 0;
+  const outTok = (usage && Number(usage.completion_tokens)) || 0;
+  return (inTok / 1e6) * p.input + (outTok / 1e6) * p.output;
+}
+// Render a cost for display. null → "unknown"; otherwise a $-prefixed amount
+// with extra precision for sub-cent costs.
+function formatCost(cost) {
+  if (cost === null || cost === undefined || Number.isNaN(cost)) return 'unknown';
+  if (cost === 0) return '$0.00';
+  if (cost < 0.01) return '$' + cost.toFixed(6);
+  return '$' + cost.toFixed(4);
+}
+module.exports = { DEFAULT_PRICE_TABLE, priceForModel, computeCost, formatCost };