npm - @semalt-ai/code - Versions diffs - 1.8.4 → 1.19.0 - Mend

@semalt-ai/code 1.8.4 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (151) hide show

package/.claude/settings.local.json +8 -1
package/.github/workflows/ci.yml +69 -0
package/CLAUDE.md +1588 -27
package/README.md +147 -3
package/TECHNICAL_DEBT.md +66 -0
package/examples/embed.js +74 -0
package/index.js +259 -11
package/lib/agent.js +935 -181
package/lib/api.js +308 -55
package/lib/args.js +96 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +306 -0
package/lib/commands/chat-slash.js +399 -0
package/lib/commands/chat-turn.js +446 -0
package/lib/commands/chat.js +403 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +346 -11
package/lib/constants.js +372 -3
package/lib/debug.js +106 -0
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +167 -0
package/lib/hooks.js +286 -0
package/lib/images.js +264 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +100 -10
package/lib/pricing.js +67 -0
package/lib/proc.js +158 -0
package/lib/prompts.js +88 -8
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2558 -0
package/lib/tool_specs.js +236 -9
package/lib/tools.js +370 -944
package/lib/ui/chat-history.js +19 -1
package/lib/ui/format.js +101 -6
package/lib/ui/input-field.js +16 -7
package/lib/ui/status-bar.js +79 -11
package/lib/ui/terminal.js +10 -4
package/lib/ui/theme.js +1 -0
package/lib/ui/web-activity.js +218 -0
package/lib/ui/writer.js +7 -9
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/background.test.js +414 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/executors.test.js +362 -0
package/test/extract-tool-calls.test.js +315 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +142 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +203 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/max-iterations.test.js +216 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +356 -0
package/test/output-chokepoint.test.js +188 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +163 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/result-cap.test.js +233 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-pause.test.js +164 -0
package/test/stream-parser.test.js +147 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/web-activity-ordering.test.js +194 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1288

package/test/output-chokepoint.test.js ADDED Viewed

@@ -0,0 +1,188 @@
+'use strict';
+// Task W.9 — Shared output-capping chokepoint + navigation guidance.
+//
+// W.5–W.8 each bounded a previously-unbounded path into context, but the capping
+// was ad-hoc per path: scattered capToTokens calls + hand-built untrusted fences
+// across formatGrepResult / formatGlobResult / capShellOutput / formatReadResult /
+// formatMcpResult / formatSubagentResult. The original bugs (grep/glob returning
+// "done", shell unbounded, MCP/subagent unbounded) were all the SAME class — a
+// path that put tool output into context without bounding it. This task
+// consolidates the capToTokens-+-fence step into ONE chokepoint, boundToolOutput,
+// so bounding is uniform and STRUCTURAL: a new tool gets bounding by routing its
+// output through the chokepoint rather than remembering to cap.
+//
+// These tests pin: (1) the chokepoint's behavior + per-path policy (budgets,
+// notices, fence flags are NOT flattened into one); (2) the structural
+// bound-by-construction invariant; (3) MODEL-FACING equivalence with W.5–W.8
+// (the refactor changed nothing observable); and (4) the now-actionable
+// grep-first / read-slice navigation guidance in the system prompt.
+const { test } = require('node:test');
+const assert = require('node:assert');
+const {
+  boundToolOutput,
+  formatGrepResult, formatGlobResult, capShellOutput,
+  formatReadResult, formatMcpResult, formatSubagentResult,
+} = require('../lib/agent');
+const {
+  DEFAULT_MCP_MAX_RESULT_TOKENS, DEFAULT_SUBAGENT_MAX_RESULT_TOKENS,
+} = require('../lib/constants');
+const FENCE_OPEN = '<<<UNTRUSTED_EXTERNAL_CONTENT';
+const FENCE_CLOSE = '<<<END_UNTRUSTED_EXTERNAL_CONTENT>>>';
+// ---------------------------------------------------------------------------
+// Part 1 — the chokepoint helper itself
+// ---------------------------------------------------------------------------
+test('boundToolOutput: text under budget passes through unchanged, no truncation', () => {
+  const out = boundToolOutput('hello world', { budget: 10000, fenced: false });
+  assert.strictEqual(out.text, 'hello world');
+  assert.strictEqual(out.truncated, false);
+});
+test('boundToolOutput: over-budget text is token-capped with the SUPPLIED notice', () => {
+  const big = 'x'.repeat(4000); // ~1000 tokens
+  const out = boundToolOutput(big, {
+    budget: 50,
+    notice: ({ tokens, limit }) => `\n\n[NET ${tokens}->${limit}]`,
+    fenced: false,
+  });
+  assert.ok(out.truncated, 'flagged truncated');
+  assert.match(out.text, /\[NET \d+->50\]/, 'the caller-supplied notice is used');
+  assert.ok(out.text.length < big.length, 'full payload did not pass through');
+});
+test('boundToolOutput: fenced=true wraps in the untrusted fence; fenced=false does not', () => {
+  const fenced = boundToolOutput('data', { budget: 10000, fenced: true });
+  assert.ok(fenced.text.startsWith(FENCE_OPEN), 'opens with the fence');
+  assert.ok(fenced.text.trimEnd().endsWith(FENCE_CLOSE), 'closes with the fence');
+  assert.ok(fenced.text.includes('data'), 'content inside the fence');
+  const plain = boundToolOutput('data', { budget: 10000, fenced: false });
+  assert.strictEqual(plain.text, 'data', 'no fence when not requested');
+  assert.ok(!plain.text.includes(FENCE_OPEN));
+});
+test('STRUCTURAL invariant: output routed through the chokepoint is bounded by construction', () => {
+  // A "new tool" that surfaces its output via boundToolOutput cannot dump
+  // unbounded into context — a huge payload is capped no matter the path. This
+  // is the regression-prevention guarantee: bound-by-routing, not bound-by-remembering.
+  const huge = 'Z'.repeat(200000);
+  const out = boundToolOutput(huge, { budget: 100, fenced: true });
+  assert.ok(out.truncated, 'huge payload is bounded by construction');
+  assert.ok(out.text.length < huge.length);
+  assert.ok(out.text.includes(FENCE_OPEN), 'and still fenced when requested');
+});
+// ---------------------------------------------------------------------------
+// Part 2 — per-path policy preserved (budgets / notices / fence NOT flattened)
+// ---------------------------------------------------------------------------
+test('fence flag is PER PATH: MCP+subagent fenced; read/shell/grep/glob NOT fenced', () => {
+  assert.match(formatMcpResult({ action: 'mcp__s__t', content: 'a', maxTokens: 10000 }), /UNTRUSTED_EXTERNAL_CONTENT/);
+  assert.match(formatSubagentResult({ count: 1, content: 'a', maxTokens: 20000 }), /UNTRUSTED_EXTERNAL_CONTENT/);
+  assert.doesNotMatch(formatReadResult({ content: 'a\nb', path: '/f' }), /UNTRUSTED_EXTERNAL_CONTENT/);
+  assert.doesNotMatch(capShellOutput('a\nb', {}).text, /UNTRUSTED_EXTERNAL_CONTENT/);
+  assert.doesNotMatch(formatGrepResult({ matches: [{ file: 'a', line: 1, text: 't' }], pattern: 'p' }), /UNTRUSTED_EXTERNAL_CONTENT/);
+  assert.doesNotMatch(formatGlobResult({ files: ['a.ts'], pattern: '*' }), /UNTRUSTED_EXTERNAL_CONTENT/);
+});
+test('notice text is PER PATH (not flattened): each path emits its own wording', () => {
+  const big = 'x'.repeat(200000); // ~50k tokens — over every net at maxTokens=50
+  assert.match(formatMcpResult({ action: 'mcp__s__t', content: big, maxTokens: 50 }), /MCP result capped at/);
+  assert.match(formatSubagentResult({ count: 1, content: big, maxTokens: 50 }), /subagent result capped at/);
+  assert.match(formatReadResult({ content: 'q'.repeat(200000), path: '/f', maxTokens: 50 }), /read token-capped/);
+  assert.match(capShellOutput('q'.repeat(200000), { maxTokens: 50 }).text, /output token-capped/);
+});
+test('budgets are PER PATH: MCP (10k) is strictly stricter than subagent (20k)', () => {
+  assert.ok(DEFAULT_MCP_MAX_RESULT_TOKENS < DEFAULT_SUBAGENT_MAX_RESULT_TOKENS);
+  // Content sized between the two budgets: capped under MCP, passes under subagent.
+  const midTokens = Math.floor((DEFAULT_MCP_MAX_RESULT_TOKENS + DEFAULT_SUBAGENT_MAX_RESULT_TOKENS) / 2);
+  const content = 'z'.repeat(midTokens * 4);
+  assert.match(formatMcpResult({ action: 'mcp__s__t', content }), /capped at/, 'MCP caps above its stricter budget');
+  assert.doesNotMatch(formatSubagentResult({ count: 1, content }), /capped at/, 'subagent passes under its generous budget');
+});
+// ---------------------------------------------------------------------------
+// Part 3 — MODEL-FACING equivalence with W.5–W.8 (refactor changed nothing)
+// ---------------------------------------------------------------------------
+//
+// The fenced paths must compose as `<prefix>` + boundToolOutput(content, …): the
+// prefix sits OUTSIDE the fence, the capped+fenced body is exactly the chokepoint
+// output. This proves the path genuinely routes through the chokepoint.
+test('equivalence: formatMcpResult == prefix + boundToolOutput(content, {fenced:true})', () => {
+  const content = 'payload from server';
+  const out = formatMcpResult({ action: 'mcp__s__t', content, maxTokens: 10000 });
+  const bounded = boundToolOutput(content, { budget: 10000, fenced: true });
+  assert.ok(out.startsWith('MCP tool mcp__s__t result:'), 'prefix outside the fence');
+  assert.ok(out.endsWith(bounded.text), 'body is exactly the chokepoint output');
+});
+test('equivalence: formatSubagentResult == prefix + boundToolOutput(content, {fenced:true})', () => {
+  const content = 'CHILD FINDINGS: the project is a CLI';
+  const out = formatSubagentResult({ count: 1, content, maxTokens: 20000 });
+  const bounded = boundToolOutput(content, { budget: 20000, fenced: true });
+  assert.ok(out.includes('Result from 1 subagent'), 'prefix outside the fence');
+  assert.ok(out.endsWith(bounded.text), 'body is exactly the chokepoint output');
+});
+test('equivalence: small grep/glob/read/shell outputs are byte-identical to W.5–W.7 (no token notice)', () => {
+  // grep content mode — file:line:text, no token cap notice for small results.
+  const grep = formatGrepResult({
+    matches: [{ file: 'a.js', line: 3, text: '// TODO' }],
+    pattern: 'TODO', output_mode: 'content',
+  });
+  assert.match(grep, /a\.js:3:\/\/ TODO/);
+  assert.doesNotMatch(grep, /token-capped/);
+  // glob — relative path list, no token cap notice.
+  const glob = formatGlobResult({ files: ['a.ts', 'src/b.ts'], pattern: '*.ts' });
+  assert.match(glob, /^a\.ts$/m);
+  assert.doesNotMatch(glob, /token-capped/);
+  // read — under the line cap the body is byte-for-byte the file content.
+  const read = formatReadResult({ content: 'one\ntwo\nthree', path: '/x' });
+  assert.strictEqual(read, 'File /x:\none\ntwo\nthree');
+  // shell — under the line + token caps, output passes through unchanged.
+  const shell = capShellOutput('line a\nline b', {});
+  assert.strictEqual(shell.text, 'line a\nline b');
+  assert.strictEqual(shell.truncated, false);
+});
+test('grep/glob now gain a TOKEN safety net via the chokepoint (huge matches bounded)', () => {
+  // Pathological: head_limit lets 100 matches through, but each is a 5000-char
+  // minified line — the count bound alone does NOT bound tokens (the W.6 lesson).
+  // The chokepoint's token net catches it. This is NOT a regression on small
+  // results (asserted above) — it's the structural backstop the refactor adds.
+  const many = [];
+  for (let i = 0; i < 100; i++) many.push({ file: 'min.js', line: i, text: 'q'.repeat(5000) });
+  const out = formatGrepResult({ matches: many, pattern: 'q', output_mode: 'content', head_limit: 100 });
+  assert.match(out, /grep output token-capped/, 'huge grep result is token-bounded');
+  const files = [];
+  for (let i = 0; i < 100; i++) files.push('d/'.repeat(2000) + `f${i}.ts`);
+  const gout = formatGlobResult({ files, pattern: '**/*.ts', head_limit: 100 });
+  assert.match(gout, /glob output token-capped/, 'huge glob result is token-bounded');
+});
+// ---------------------------------------------------------------------------
+// Part 4 — navigation guidance (now actionable post-W.5)
+// ---------------------------------------------------------------------------
+test('system prompt carries grep-first / read-slice navigation guidance (BOTH templates)', () => {
+  const prompts = require('../lib/prompts');
+  const xml = prompts.getSystemPrompt(false, '', '');     // XML template
+  const native = prompts.getSystemPrompt(true, '', '');    // native function-calling template
+  for (const [label, p] of [['xml', xml], ['native', native]]) {
+    assert.match(p, /locate first with .*grep/i, `${label}: grep-first locate guidance`);
+    assert.match(p, /count|files_with_matches/, `${label}: count/files_with_matches modes mentioned`);
+    assert.match(p, /start_line|end_line/, `${label}: read-slice (start_line/end_line) guidance`);
+    assert.match(p, /redirect/i, `${label}: redirect-large-output-to-file guidance`);
+  }
+});

package/test/path-guards.test.js ADDED Viewed

@@ -0,0 +1,134 @@
+'use strict';
+// Characterization tests for the file-path guards isPathSafe and
+// isProtectedSecretPath (Task 1.1). These read the --allow-anywhere /
+// --dangerously-skip-permissions flags from process.argv once at module load;
+// the test runner's argv contains neither, so both default to the safe path.
+const { test } = require('node:test');
+const assert = require('node:assert');
+const os = require('node:os');
+const path = require('node:path');
+const { isPathSafe, isProtectedSecretPath, isProtectedConfigPath } = require('../lib/tools');
+const { CONFIG_PATH, protectedConfigDirs } = require('../lib/constants');
+// ---------------------------------------------------------------------------
+// isPathSafe — writes are confined to CWD; system/home-secret dirs are banned.
+// ---------------------------------------------------------------------------
+test('isPathSafe allows paths inside the current working directory', () => {
+  assert.strictEqual(isPathSafe('a.txt'), true);
+  assert.strictEqual(isPathSafe('./nested/dir/file.js'), true);
+  assert.strictEqual(isPathSafe(path.join(process.cwd(), 'x', 'y.md')), true);
+});
+test('isPathSafe allows the CWD itself', () => {
+  assert.strictEqual(isPathSafe(process.cwd()), true);
+});
+test('isPathSafe rejects paths outside the working tree', () => {
+  assert.strictEqual(isPathSafe('/tmp/outside.txt'), false);
+  assert.strictEqual(isPathSafe(path.join(os.homedir(), 'elsewhere.txt')), false);
+});
+test('isPathSafe rejects banned system directories', () => {
+  for (const p of ['/etc/passwd', '/boot/grub/x', '/sys/x', '/proc/1/mem']) {
+    assert.strictEqual(isPathSafe(p), false, `${p} must be unsafe`);
+  }
+});
+test('isPathSafe rejects sensitive home subdirectories', () => {
+  for (const sub of ['.ssh/id_rsa', '.aws/credentials', '.gnupg/secring']) {
+    assert.strictEqual(isPathSafe(path.join(os.homedir(), sub)), false, `~/${sub} must be unsafe`);
+  }
+});
+test('isPathSafe rejects non-string / empty input', () => {
+  assert.strictEqual(isPathSafe(''), false);
+  assert.strictEqual(isPathSafe(null), false);
+  assert.strictEqual(isPathSafe(undefined), false);
+});
+test('QUIRK: a sibling dir sharing the CWD name prefix is rejected', () => {
+  // The guard appends a path separator before prefix-matching, so `${cwd}-evil`
+  // does not slip through as "starts with cwd".
+  assert.strictEqual(isPathSafe(process.cwd() + '-evil/file'), false);
+});
+// ---------------------------------------------------------------------------
+// isProtectedSecretPath — config.json / memory.json / audit.log are never
+// readable by the agent, regardless of where they sit.
+// ---------------------------------------------------------------------------
+test('isProtectedSecretPath flags the config file', () => {
+  assert.strictEqual(isProtectedSecretPath(CONFIG_PATH), true);
+});
+test('isProtectedSecretPath flags memory.json and audit.log', () => {
+  const dir = path.join(os.homedir(), '.semalt-ai');
+  assert.strictEqual(isProtectedSecretPath(path.join(dir, 'memory.json')), true);
+  assert.strictEqual(isProtectedSecretPath(path.join(dir, 'audit.log')), true);
+});
+test('isProtectedSecretPath resolves relative/.. forms to the same target', () => {
+  const messy = path.join(CONFIG_PATH, '..', path.basename(CONFIG_PATH));
+  assert.strictEqual(isProtectedSecretPath(messy), true);
+});
+test('isProtectedSecretPath does not flag ordinary files', () => {
+  assert.strictEqual(isProtectedSecretPath('a.txt'), false);
+  assert.strictEqual(isProtectedSecretPath(path.join(process.cwd(), 'config.json')), false);
+});
+test('isProtectedSecretPath rejects non-string / empty input', () => {
+  assert.strictEqual(isProtectedSecretPath(''), false);
+  assert.strictEqual(isProtectedSecretPath(null), false);
+});
+// ---------------------------------------------------------------------------
+// isProtectedConfigPath — the WRITE-side guard (Pre-Task 5.0b). The whole
+// ~/.semalt-ai dir AND every project .semalt dir are non-writable by the agent,
+// including not-yet-existing files. The test runner's CWD is the repo root
+// (it has .git), so the project layer is <repo>/.semalt.
+// ---------------------------------------------------------------------------
+test('protectedConfigDirs covers ~/.semalt-ai and the project .semalt dir', () => {
+  const dirs = protectedConfigDirs();
+  assert.ok(dirs.includes(path.join(os.homedir(), '.semalt-ai')), 'must include the user config dir');
+  assert.ok(dirs.includes(path.join(process.cwd(), '.semalt')), 'must include the project .semalt dir');
+});
+test('isProtectedConfigPath flags files anywhere under ~/.semalt-ai', () => {
+  const dir = path.join(os.homedir(), '.semalt-ai');
+  assert.strictEqual(isProtectedConfigPath(path.join(dir, 'config.json')), true);
+  assert.strictEqual(isProtectedConfigPath(path.join(dir, 'agents', 'r.md')), true);
+  assert.strictEqual(isProtectedConfigPath(dir), true, 'the dir itself is protected');
+});
+test('isProtectedConfigPath flags files under the project .semalt dir (incl. not-yet-existing)', () => {
+  const dot = path.join(process.cwd(), '.semalt');
+  assert.strictEqual(isProtectedConfigPath(path.join(dot, 'config.json')), true);
+  assert.strictEqual(isProtectedConfigPath(path.join(dot, 'agents', 'reviewer.md')), true);
+  assert.strictEqual(isProtectedConfigPath('.semalt/config.json'), true, 'relative form resolves to CWD/.semalt');
+  assert.strictEqual(isProtectedConfigPath(path.join(dot, 'hooks', 'does-not-exist.sh')), true);
+});
+test('isProtectedConfigPath resolves .. traversal into a protected dir', () => {
+  const messy = path.join(process.cwd(), 'src', '..', '.semalt', 'config.json');
+  assert.strictEqual(isProtectedConfigPath(messy), true);
+});
+test('isProtectedConfigPath does not flag ordinary files', () => {
+  assert.strictEqual(isProtectedConfigPath('src/app.js'), false);
+  assert.strictEqual(isProtectedConfigPath(path.join(process.cwd(), 'config.json')), false);
+  assert.strictEqual(isProtectedConfigPath(path.join(process.cwd(), 'app', 'config.json')), false);
+  // A sibling dir whose name merely starts with .semalt is not the config dir.
+  assert.strictEqual(isProtectedConfigPath(path.join(process.cwd(), '.semalt-extra', 'x')), false);
+});
+test('isProtectedConfigPath rejects non-string / empty input', () => {
+  assert.strictEqual(isProtectedConfigPath(''), false);
+  assert.strictEqual(isProtectedConfigPath(null), false);
+  assert.strictEqual(isProtectedConfigPath(undefined), false);
+});

package/test/payload.test.js ADDED Viewed

@@ -0,0 +1,99 @@
+'use strict';
+// Payload-augmentation tests (Task 2.7): prompt caching markers and
+// reasoning_effort. Pure functions are unit-tested; the wiring is verified by
+// inspecting the actual request body the api client sends to the mock LLM.
+const { test, before, after } = require('node:test');
+const assert = require('node:assert');
+const ui = require('../lib/ui');
+const { createApiClient } = require('../lib/api');
+const { startMockLLM } = require('./harness/mock-llm');
+const {
+  applyPromptCaching, applyReasoningEffort, supportsReasoningEffort,
+} = require('../lib/payload');
+let prevKey;
+before(() => { prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key'; });
+after(() => { if (prevKey === undefined) delete process.env.SEMALT_API_KEY; else process.env.SEMALT_API_KEY = prevKey; });
+// ---------------------------------------------------------------------------
+// Pure
+// ---------------------------------------------------------------------------
+test('applyPromptCaching marks the last system message and last tool when enabled', () => {
+  const p = {
+    messages: [{ role: 'system', content: 'S' }, { role: 'user', content: 'u' }],
+    tools: [{ type: 'function', function: { name: 'a' } }, { type: 'function', function: { name: 'b' } }],
+  };
+  applyPromptCaching(p, true);
+  assert.deepStrictEqual(p.messages[0].cache_control, { type: 'ephemeral' });
+  assert.strictEqual(p.messages[1].cache_control, undefined);
+  assert.deepStrictEqual(p.tools[1].cache_control, { type: 'ephemeral' });
+  assert.strictEqual(p.tools[0].cache_control, undefined);
+});
+test('applyPromptCaching is a no-op when disabled', () => {
+  const p = { messages: [{ role: 'system', content: 'S' }], tools: [{ type: 'function', function: { name: 'a' } }] };
+  applyPromptCaching(p, false);
+  assert.strictEqual(p.messages[0].cache_control, undefined);
+  assert.strictEqual(p.tools[0].cache_control, undefined);
+});
+test('supportsReasoningEffort matches reasoning model families only', () => {
+  for (const m of ['o3-mini', 'o1-preview', 'gpt-5', 'deepseek-r1', 'qwq-32b', 'some-reasoning-model']) {
+    assert.strictEqual(supportsReasoningEffort(m), true, m);
+  }
+  for (const m of ['gpt-4o', 'llama-3', 'claude-3-5-sonnet', '']) {
+    assert.strictEqual(supportsReasoningEffort(m), false, m);
+  }
+});
+test('applyReasoningEffort sets the field only for supported models / valid effort', () => {
+  const a = {}; applyReasoningEffort(a, 'high', 'o3-mini'); assert.strictEqual(a.reasoning_effort, 'high');
+  const b = {}; applyReasoningEffort(b, 'high', 'gpt-4o'); assert.strictEqual(b.reasoning_effort, undefined);
+  const c = {}; applyReasoningEffort(c, 'bogus', 'o3-mini'); assert.strictEqual(c.reasoning_effort, undefined);
+  const d = {}; applyReasoningEffort(d, 'low', 'gpt-4o', { force: true }); assert.strictEqual(d.reasoning_effort, 'low');
+  const e = {}; applyReasoningEffort(e, '', 'o3-mini'); assert.strictEqual(e.reasoning_effort, undefined);
+});
+// ---------------------------------------------------------------------------
+// Integration: the real request body the api client emits
+// ---------------------------------------------------------------------------
+async function captureBody(configExtra, model) {
+  const mock = await startMockLLM();
+  mock.replyWith('ok');
+  const config = {
+    api_base: mock.base, api_key: 'test-key', default_model: model,
+    temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
+    ...configExtra,
+  };
+  const api = createApiClient({ getConfig: () => config, saveConfig: () => {}, ui });
+  try {
+    await api.chatStream([{ role: 'system', content: 'S' }, { role: 'user', content: 'u' }], { model, nativeTools: true });
+    return JSON.parse(mock.requests[0].body);
+  } finally {
+    await mock.close();
+  }
+}
+test('reasoning_effort is sent for a supporting model and omitted otherwise', async () => {
+  const withEffort = await captureBody({ reasoning_effort: 'high' }, 'o3-mini');
+  assert.strictEqual(withEffort.reasoning_effort, 'high');
+  const without = await captureBody({ reasoning_effort: 'high' }, 'local-llama-7b');
+  assert.strictEqual(without.reasoning_effort, undefined);
+});
+test('prompt caching markers appear only when config.prompt_caching is true', async () => {
+  const cached = await captureBody({ prompt_caching: true }, 'gpt-4o');
+  const sys = cached.messages.find((m) => m.role === 'system');
+  assert.deepStrictEqual(sys.cache_control, { type: 'ephemeral' });
+  assert.ok(Array.isArray(cached.tools) && cached.tools.length);
+  assert.deepStrictEqual(cached.tools[cached.tools.length - 1].cache_control, { type: 'ephemeral' });
+  const plain = await captureBody({ prompt_caching: false }, 'gpt-4o');
+  assert.strictEqual(plain.messages.find((m) => m.role === 'system').cache_control, undefined);
+  assert.ok(!plain.tools.some((t) => t.cache_control), 'no tool carries cache_control when disabled');
+});

package/test/permission-rules-agent.test.js ADDED Viewed

@@ -0,0 +1,210 @@
+'use strict';
+// Per-pattern permission rules (Task 4.1) driving the REAL runAgentLoop against
+// the mock-LLM harness. Proves the gate integration end-to-end on the XML rail
+// (the native rail converges on the SAME [action, ...args] call tuple and the
+// SAME gate, so one path exercises both). Covers: deny blocks (even under
+// --dangerously-skip-permissions), allow auto-approves what a tier wouldn't, and
+// ask forces a prompt a tier would otherwise skip (→ refused in non-TTY).
+// Also proves composition: an allow rule never re-enables the deny-list, the
+// secret-file guard, or --readonly (all enforced downstream in the executors).
+const os = require('node:os');
+const fs = require('node:fs');
+const path = require('node:path');
+// Temp $HOME before lib modules load (audit log / config / memory resolve here).
+const TMP_HOME = fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-permhome-'));
+const PREV_HOME = process.env.HOME;
+const PREV_USERPROFILE = process.env.USERPROFILE;
+process.env.HOME = TMP_HOME;
+process.env.USERPROFILE = TMP_HOME;
+const { test, before, after } = require('node:test');
+const assert = require('node:assert');
+const ui = require('../lib/ui');
+const { createApiClient } = require('../lib/api');
+const { createToolExecutor, extractToolCalls } = require('../lib/tools');
+const { createPermissionManager } = require('../lib/permissions');
+const { createAgentRunner } = require('../lib/agent');
+const { loadRuleLayers } = require('../lib/permission-rules');
+const { startMockLLM } = require('./harness/mock-llm');
+let prevKey, PREV_CWD, CWD;
+before(() => {
+  prevKey = process.env.SEMALT_API_KEY;
+  process.env.SEMALT_API_KEY = 'test-key';
+  PREV_CWD = process.cwd();
+  CWD = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'semalt-permcwd-')));
+  process.chdir(CWD);
+});
+after(() => {
+  process.chdir(PREV_CWD);
+  if (prevKey === undefined) delete process.env.SEMALT_API_KEY; else process.env.SEMALT_API_KEY = prevKey;
+  if (PREV_HOME === undefined) delete process.env.HOME; else process.env.HOME = PREV_HOME;
+  if (PREV_USERPROFILE === undefined) delete process.env.USERPROFILE; else process.env.USERPROFILE = PREV_USERPROFILE;
+});
+// Build a runner whose permission manager carries per-pattern rules + the given
+// manager options (tiers / skip / readonly).
+function buildRunner(base, { user = [], project = [], pmOpts = {} } = {}) {
+  const config = {
+    api_base: base, api_key: 'test-key', default_model: 'test-model',
+    temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
+  };
+  const getConfig = () => config;
+  const saveConfig = (c) => Object.assign(config, c);
+  const api = createApiClient({ getConfig, saveConfig, ui });
+  const rules = loadRuleLayers({ permissions: { rules: user } }, { permissions: { rules: project } });
+  const pm = createPermissionManager(ui, { ...pmOpts, rules, cwd: CWD });
+  pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
+  const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
+  const runner = createAgentRunner({
+    chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
+    describePermission, permissionManager: pm, ui, getConfig,
+  });
+  return { runner };
+}
+function collector() {
+  const ev = { tools: [], errors: [] };
+  return { ev, cb: { onToolEnd: (tag, result) => ev.tools.push({ tag, result }), onError: (e) => ev.errors.push(e) } };
+}
+function fedBack(messages) {
+  const m = messages.find((x) => x.role === 'user' && /Tool execution results/.test(x.content));
+  return m ? m.content : '';
+}
+// ---------------------------------------------------------------------------
+// deny rule
+// ---------------------------------------------------------------------------
+test('a deny rule blocks the tool end-to-end — even under --dangerously-skip-permissions', async () => {
+  const sentinel = path.join(CWD, 'should-not-exist.txt');
+  const mock = await startMockLLM();
+  mock.replyWith(`<shell>touch ${sentinel}</shell>`);
+  mock.replyWith('understood');
+  try {
+    const { runner } = buildRunner(mock.base, {
+      user: [{ tool: 'shell', action: 'deny', pattern: 'touch *' }],
+      pmOpts: { skipPermissions: true }, // deny must win even here
+    });
+    const { ev, cb } = collector();
+    const messages = [{ role: 'user', content: 'make a file' }];
+    await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
+    assert.ok(!fs.existsSync(sentinel), 'the denied command never ran');
+    assert.strictEqual(ev.tools.length, 0, 'a rule-denied tool never reaches onToolEnd');
+    assert.match(fedBack(messages), /DENIED by a permission rule/);
+  } finally { mock.close(); }
+});
+// ---------------------------------------------------------------------------
+// allow rule
+// ---------------------------------------------------------------------------
+test('an allow rule auto-approves a write that would otherwise be refused (non-TTY, no tier)', async () => {
+  const target = path.join(CWD, 'allowed.txt');
+  const mock = await startMockLLM();
+  mock.replyWith(`<write_file path="${target}">DATA</write_file>`);
+  mock.replyWith('done');
+  try {
+    const { runner } = buildRunner(mock.base, {
+      user: [{ tool: 'write_file', action: 'allow', path: '**' }],
+      // no skip, no tier — without the rule this write would be refused in non-TTY
+    });
+    const { cb } = collector();
+    const messages = [{ role: 'user', content: 'write it' }];
+    await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
+    assert.ok(fs.existsSync(target), 'the allow rule auto-approved the write');
+    assert.strictEqual(fs.readFileSync(target, 'utf8'), 'DATA');
+  } finally { mock.close(); }
+});
+test('COMPOSE: an allow shell rule cannot re-enable a deny-listed command', async () => {
+  const mock = await startMockLLM();
+  mock.replyWith('<shell>rm -rf /</shell>');
+  mock.replyWith('ok');
+  try {
+    const { runner } = buildRunner(mock.base, {
+      user: [{ tool: 'shell', action: 'allow', pattern: '*' }], // allow ALL shell
+    });
+    const { ev, cb } = collector();
+    const messages = [{ role: 'user', content: 'wipe' }];
+    await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
+    // The deny-list runs inside agentExecShell, downstream of the gate the rule
+    // satisfied — so the catastrophic command is still blocked.
+    assert.match(fedBack(messages), /Blocked by safety deny-list/);
+  } finally { mock.close(); }
+});
+// ---------------------------------------------------------------------------
+// ask rule
+// ---------------------------------------------------------------------------
+test('an ask rule forces a prompt a tier flag would skip (→ refused in non-TTY)', async () => {
+  const sentinel = path.join(CWD, 'ask-not-run.txt');
+  const mock = await startMockLLM();
+  mock.replyWith(`<shell>touch ${sentinel}</shell>`);
+  mock.replyWith('understood');
+  try {
+    const { runner } = buildRunner(mock.base, {
+      user: [{ tool: 'shell', action: 'ask', pattern: 'touch *' }],
+      pmOpts: { allowedTiers: ['exec'] }, // tier would normally auto-approve shell
+    });
+    const { ev, cb } = collector();
+    const messages = [{ role: 'user', content: 'touch it' }];
+    await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
+    assert.ok(!fs.existsSync(sentinel), 'ask forced a prompt; non-TTY refused it, so it never ran');
+    assert.match(fedBack(messages), /Permission denied/);
+  } finally { mock.close(); }
+});
+// ---------------------------------------------------------------------------
+// compose with --readonly and the secret-file guard
+// ---------------------------------------------------------------------------
+test('COMPOSE: --readonly still wins over an allow write rule', async () => {
+  const target = path.join(CWD, 'ro.txt');
+  const mock = await startMockLLM();
+  mock.replyWith(`<write_file path="${target}">X</write_file>`);
+  mock.replyWith('done');
+  try {
+    const { runner } = buildRunner(mock.base, {
+      user: [{ tool: 'write_file', action: 'allow', path: '**' }],
+      pmOpts: { readonly: true },
+    });
+    const { cb } = collector();
+    const messages = [{ role: 'user', content: 'write' }];
+    await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
+    assert.ok(!fs.existsSync(target), 'the write was blocked by --readonly despite the allow rule');
+    assert.match(fedBack(messages), /readonly/);
+  } finally { mock.close(); }
+});
+test('COMPOSE: an allow read rule cannot re-enable a secret-file read', async () => {
+  const { CONFIG_PATH } = require('../lib/constants');
+  fs.mkdirSync(path.dirname(CONFIG_PATH), { recursive: true });
+  fs.writeFileSync(CONFIG_PATH, JSON.stringify({ api_key: 'sekret' }));
+  const mock = await startMockLLM();
+  mock.replyWith(`<read_file path="${CONFIG_PATH}"/>`);
+  mock.replyWith('done');
+  try {
+    const { runner } = buildRunner(mock.base, {
+      user: [{ tool: 'read_file', action: 'allow', path: '**' }],
+    });
+    const { cb } = collector();
+    const messages = [{ role: 'user', content: 'read config' }];
+    await runner.runAgentLoop(messages, 'test-model', 5, null, { callbacks: cb });
+    const fed = fedBack(messages);
+    assert.match(fed, /holds secrets\/credentials/);
+    assert.ok(!/sekret/.test(fed), 'the secret value never reached the model');
+  } finally { mock.close(); }
+});