npm - @principles/pd-cli - Versions diffs - 1.119.0 → 1.120.0 - Mend

@principles/pd-cli 1.119.0 → 1.120.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/dist/commands/__tests__/legacy-cleanup.test.d.ts +18 -0
package/dist/commands/__tests__/legacy-cleanup.test.d.ts.map +1 -0
package/dist/commands/__tests__/legacy-cleanup.test.js +459 -0
package/dist/commands/__tests__/legacy-cleanup.test.js.map +1 -0
package/dist/commands/__tests__/rulecode-flag-wiring.test.d.ts +21 -0
package/dist/commands/__tests__/rulecode-flag-wiring.test.d.ts.map +1 -0
package/dist/commands/__tests__/rulecode-flag-wiring.test.js +179 -0
package/dist/commands/__tests__/rulecode-flag-wiring.test.js.map +1 -0
package/dist/commands/__tests__/rulecode-handler.test.d.ts +16 -0
package/dist/commands/__tests__/rulecode-handler.test.d.ts.map +1 -0
package/dist/commands/__tests__/rulecode-handler.test.js +285 -0
package/dist/commands/__tests__/rulecode-handler.test.js.map +1 -0
package/dist/commands/legacy-cleanup.d.ts +72 -6
package/dist/commands/legacy-cleanup.d.ts.map +1 -1
package/dist/commands/legacy-cleanup.js +243 -23
package/dist/commands/legacy-cleanup.js.map +1 -1
package/dist/commands/rulecode.d.ts +85 -0
package/dist/commands/rulecode.d.ts.map +1 -0
package/dist/commands/rulecode.js +356 -0
package/dist/commands/rulecode.js.map +1 -0
package/dist/commands/runtime-internalization-run-rulehost.d.ts.map +1 -1
package/dist/commands/runtime-internalization-run-rulehost.js +4 -7
package/dist/commands/runtime-internalization-run-rulehost.js.map +1 -1
package/dist/index.js +30 -9
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/scripts/llm-dogfood.ts +8 -12
package/src/commands/__tests__/legacy-cleanup.test.ts +596 -0
package/src/commands/__tests__/rulecode-flag-wiring.test.ts +230 -0
package/src/commands/__tests__/rulecode-handler.test.ts +369 -0
package/src/commands/legacy-cleanup.ts +335 -27
package/src/commands/rulecode.ts +434 -0
package/src/commands/runtime-internalization-run-rulehost.ts +3 -8
package/src/index.ts +31 -9
package/tests/commands/cli-command-tree.test.ts +40 -0

package/src/commands/__tests__/rulecode-flag-wiring.test.ts ADDED Viewed

@@ -0,0 +1,230 @@
+/**
+ * Parser-level tests for `pd rulecode spec|validate|replay` flags (PRI-439 Phase 5).
+ *
+ * CLI gate rule 7: "Test the real command wiring — when behavior depends on
+ * Commander options, add a command-registration or parser test that exercises
+ * the actual flags."
+ *
+ * Tests the real `registerRulecodeCommand` helper (single source of truth
+ * shared with `index.ts`). Flag typos in production surface here at
+ * parseAsync time, not at handler dispatch.
+ *
+ * Covers:
+ *   - `spec` subcommand: --json, --workspace/-w registered; no --code
+ *   - `validate` subcommand: --code required, --code-file, --json, --workspace/-w
+ *   - `replay` subcommand: --code required, --code-file, --golden-trace required,
+ *     --json, --workspace/-w
+ *   - --no-* negations are NOT registered (no accidental negation)
+ *   - parseAsync actually dispatches the right opts to the handler
+ */
+import { describe, it, expect } from 'vitest';
+import { Command } from 'commander';
+import { registerRulecodeCommand } from '../rulecode.js';
+type ActionOptions = Record<string, unknown>;
+interface CapturedAction {
+  opts: ActionOptions | null;
+}
+function attachCapture(cmd: Command, state: CapturedAction): void {
+  cmd.action(function captureAction(...args: unknown[]): void {
+    let optsArg: unknown = null;
+    for (let i = args.length - 1; i >= 0; i--) {
+      const arg: unknown = args[i];
+      if (arg !== null && typeof arg === 'object' && !(arg instanceof Command)) {
+        optsArg = arg;
+        break;
+      }
+    }
+    if (optsArg !== null && typeof optsArg === 'object') {
+      state.opts = optsArg as ActionOptions;
+    } else {
+      state.opts = {};
+    }
+  });
+}
+function freshProgram(): Command {
+  const program = new Command();
+  program.name('pd').exitOverride();
+  return program;
+}
+describe('pd rulecode — flag wiring (CLI gate rule 7)', () => {
+  // ── spec subcommand ───────────────────────────────────────────────────────
+  it('registers spec subcommand with --json and --workspace', () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    const specCmd = rulecodeCmd.commands.find((c) => c.name() === 'spec');
+    expect(specCmd).toBeDefined();
+    const jsonOpt = specCmd!.options.find((o) => o.long === '--json');
+    expect(jsonOpt).toBeDefined();
+    const wsOpt = specCmd!.options.find((o) => o.long === '--workspace');
+    expect(wsOpt).toBeDefined();
+    expect(wsOpt?.short).toBe('-w');
+  });
+  it('spec subcommand does NOT register --code', () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    const specCmd = rulecodeCmd.commands.find((c) => c.name() === 'spec');
+    const codeOpt = specCmd!.options.find((o) => o.long === '--code');
+    expect(codeOpt).toBeUndefined();
+  });
+  // ── validate subcommand ───────────────────────────────────────────────────
+  it('registers validate subcommand with --code, --code-file, --json, --workspace', () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    const validateCmd = rulecodeCmd.commands.find((c) => c.name() === 'validate');
+    expect(validateCmd).toBeDefined();
+    const codeOpt = validateCmd!.options.find((o) => o.long === '--code');
+    expect(codeOpt).toBeDefined();
+    const codeFileOpt = validateCmd!.options.find((o) => o.long === '--code-file');
+    expect(codeFileOpt).toBeDefined();
+    const jsonOpt = validateCmd!.options.find((o) => o.long === '--json');
+    expect(jsonOpt).toBeDefined();
+    const wsOpt = validateCmd!.options.find((o) => o.long === '--workspace');
+    expect(wsOpt).toBeDefined();
+    expect(wsOpt?.short).toBe('-w');
+  });
+  it('validate --code is NOT required at parser level (can use --code-file instead)', async () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    const validateCmd = rulecodeCmd.commands.find((c) => c.name() === 'validate')!;
+    const captured: CapturedAction = { opts: null };
+    attachCapture(validateCmd, captured);
+    // parseAsync should NOT reject when --code is missing (handler validates)
+    await program.parseAsync(['node', 'pd', 'rulecode', 'validate', '--json']);
+    expect(captured.opts).not.toBeNull();
+    expect(captured.opts!.code).toBeUndefined();
+  });
+  // ── replay subcommand ─────────────────────────────────────────────────────
+  it('registers replay subcommand with --code, --code-file, --golden-trace, --json, --workspace', () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    const replayCmd = rulecodeCmd.commands.find((c) => c.name() === 'replay');
+    expect(replayCmd).toBeDefined();
+    const codeOpt = replayCmd!.options.find((o) => o.long === '--code');
+    expect(codeOpt).toBeDefined();
+    const codeFileOpt = replayCmd!.options.find((o) => o.long === '--code-file');
+    expect(codeFileOpt).toBeDefined();
+    const gtOpt = replayCmd!.options.find((o) => o.long === '--golden-trace');
+    expect(gtOpt).toBeDefined();
+    const jsonOpt = replayCmd!.options.find((o) => o.long === '--json');
+    expect(jsonOpt).toBeDefined();
+    const wsOpt = replayCmd!.options.find((o) => o.long === '--workspace');
+    expect(wsOpt).toBeDefined();
+    expect(wsOpt?.short).toBe('-w');
+  });
+  it('replay --golden-trace is required', () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    const replayCmd = rulecodeCmd.commands.find((c) => c.name() === 'replay');
+    const gtOpt = replayCmd!.options.find((o) => o.long === '--golden-trace');
+    expect(gtOpt?.required).toBe(true);
+  });
+  // ── No accidental negations ───────────────────────────────────────────────
+  it('does NOT register --no-json on any subcommand', () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    for (const sub of rulecodeCmd.commands) {
+      const noJson = sub.options.find((o) => o.long === '--no-json');
+      expect(noJson).toBeUndefined();
+    }
+  });
+  // ── Parser-level dispatch ─────────────────────────────────────────────────
+  it('parseAsync dispatches spec subcommand with json=true', async () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    const specCmd = rulecodeCmd.commands.find((c) => c.name() === 'spec')!;
+    const captured: CapturedAction = { opts: null };
+    attachCapture(specCmd, captured);
+    await program.parseAsync(['node', 'pd', 'rulecode', 'spec', '--json']);
+    expect(captured.opts).not.toBeNull();
+    expect(captured.opts!.json).toBe(true);
+  });
+  it('parseAsync dispatches validate with --code', async () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    const validateCmd = rulecodeCmd.commands.find((c) => c.name() === 'validate')!;
+    const captured: CapturedAction = { opts: null };
+    attachCapture(validateCmd, captured);
+    await program.parseAsync([
+      'node', 'pd', 'rulecode', 'validate',
+      '--code', 'function evaluate(input, helpers) { return { decision: "allow", matched: false, reason: "x" }; }',
+      '--json',
+    ]);
+    expect(captured.opts).not.toBeNull();
+    expect(captured.opts!.code).toContain('function evaluate');
+    expect(captured.opts!.json).toBe(true);
+  });
+  it('parseAsync dispatches replay with --code and --golden-trace', async () => {
+    const program = freshProgram();
+    const rulecodeCmd = registerRulecodeCommand(program);
+    const replayCmd = rulecodeCmd.commands.find((c) => c.name() === 'replay')!;
+    const captured: CapturedAction = { opts: null };
+    attachCapture(replayCmd, captured);
+    await program.parseAsync([
+      'node', 'pd', 'rulecode', 'replay',
+      '--code', 'function evaluate(input, helpers) { return { decision: "allow", matched: false, reason: "x" }; }',
+      '--golden-trace', '/tmp/trace.json',
+      '--json',
+    ]);
+    expect(captured.opts).not.toBeNull();
+    expect(captured.opts!.code).toContain('function evaluate');
+    expect(captured.opts!.goldenTrace).toBe('/tmp/trace.json');
+    expect(captured.opts!.json).toBe(true);
+  });
+  it('parseAsync rejects replay without --golden-trace (requiredOption)', async () => {
+    const program = freshProgram();
+    registerRulecodeCommand(program);
+    await expect(
+      program.parseAsync([
+        'node', 'pd', 'rulecode', 'replay',
+        '--code', 'function evaluate() {}',
+      ]),
+    ).rejects.toThrow(/golden-trace/);
+  });
+});

package/src/commands/__tests__/rulecode-handler.test.ts ADDED Viewed

@@ -0,0 +1,369 @@
+/**
+ * Handler tests for `pd rulecode spec|validate|replay` (PRI-439 Phase 5).
+ *
+ * Tests the actual handler logic (not Commander parser wiring — that's in
+ * rulecode-flag-wiring.test.ts). Verifies:
+ *   - spec returns the canonical RuleCode dialect spec text
+ *   - validate detects forbidden patterns, missing return fields, matched=false
+ *   - validate passes clean code
+ *   - replay runs sandbox replay against a golden trace file
+ *   - failure paths include structured reason + nextAction (CLI gate rule 6)
+ *   - --json outputs exactly one parseable JSON object (CLI gate rule 1)
+ *   - missing --code/--code-file fails loud with reason (ERR-009)
+ *   - missing/malformed --golden-trace fails loud with reason (ERR-009)
+ */
+import { describe, it, expect, vi } from 'vitest';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+import {
+  handleRulecodeSpec,
+  handleRulecodeValidate,
+  handleRulecodeReplay,
+} from '../rulecode.js';
+// ── Helpers ──────────────────────────────────────────────────────────────────
+async function runHandler<T>(fn: () => Promise<T>): Promise<{ stdout: string; stderr: string; exitCode: number | undefined }> {
+  const stdoutChunks: string[] = [];
+  const stderrChunks: string[] = [];
+  const logSpy = vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => {
+    stdoutChunks.push(args.map(String).join(' '));
+  });
+  const errorSpy = vi.spyOn(console, 'error').mockImplementation((...args: unknown[]) => {
+    stderrChunks.push(args.map(String).join(' '));
+  });
+  process.exitCode = undefined;
+  try {
+    await fn();
+  } finally {
+    logSpy.mockRestore();
+    errorSpy.mockRestore();
+  }
+  const exitCode = process.exitCode;
+  process.exitCode = undefined;
+  return {
+    stdout: stdoutChunks.join(''),
+    stderr: stderrChunks.join(''),
+    exitCode,
+  };
+}
+function parseJson(stdout: string): unknown {
+  return JSON.parse(stdout);
+}
+const CLEAN_CODE = `function evaluate(input, helpers) {
+  if (helpers.getToolName() === 'Bash') {
+    return { decision: "block", matched: true, reason: "bash commands blocked" };
+  }
+  return { decision: "allow", matched: false, reason: "non-bash allowed" };
+}`;
+const FORBIDDEN_CODE = `function evaluate(input, helpers) {
+  require('fs');
+  return { decision: "allow", matched: false, reason: "x" };
+}`;
+const MISSING_FIELDS_CODE = `function evaluate(input, helpers) {
+  if (helpers.isRiskPath()) {
+    return { matched: true };
+  }
+  return { decision: "allow", matched: false, reason: "safe" };
+}`;
+// ── Tests ────────────────────────────────────────────────────────────────────
+describe('pd rulecode spec', () => {
+  it('returns the canonical spec text as JSON', async () => {
+    const { stdout, exitCode } = await runHandler(() => handleRulecodeSpec({ json: true }));
+    const output = parseJson(stdout) as { status: string; spec: string };
+    expect(output.status).toBe('ok');
+    expect(output.spec).toContain('RuleCode Dialect Spec');
+    expect(output.spec).toContain('CANONICAL FORM');
+    expect(output.spec).toContain('FORBIDDEN PATTERNS');
+    expect(exitCode).toBeUndefined();
+  });
+  it('outputs text when --json is false', async () => {
+    const { stdout, exitCode } = await runHandler(() => handleRulecodeSpec({ json: false }));
+    expect(stdout).toContain('RuleCode Dialect Spec');
+    expect(stdout.startsWith('{')).toBe(false);
+    expect(exitCode).toBeUndefined();
+  });
+  it('does not set exit code on success', async () => {
+    const { exitCode } = await runHandler(() => handleRulecodeSpec({ json: true }));
+    expect(exitCode).toBeUndefined();
+  });
+});
+describe('pd rulecode validate', () => {
+  it('passes clean code with valid=true', async () => {
+    const { stdout, exitCode } = await runHandler(() =>
+      handleRulecodeValidate({ code: CLEAN_CODE, json: true }),
+    );
+    const output = parseJson(stdout) as {
+      status: string; valid: boolean; violationCount: number; violations: string[];
+    };
+    expect(output.status).toBe('ok');
+    expect(output.valid).toBe(true);
+    expect(output.violationCount).toBe(0);
+    expect(output.violations).toEqual([]);
+    expect(exitCode).toBeUndefined();
+  });
+  it('detects forbidden patterns', async () => {
+    const { stdout, exitCode } = await runHandler(() =>
+      handleRulecodeValidate({ code: FORBIDDEN_CODE, json: true }),
+    );
+    const output = parseJson(stdout) as {
+      status: string; valid: boolean; violationCount: number; violations: string[];
+      reason?: string; nextAction?: string;
+    };
+    expect(output.status).toBe('failed');
+    expect(output.valid).toBe(false);
+    expect(output.violationCount).toBeGreaterThan(0);
+    expect(output.violations.some((v) => v.includes('forbidden pattern'))).toBe(true);
+    expect(output.reason).toBeDefined();
+    expect(output.nextAction).toBeDefined();
+    expect(exitCode).toBe(1);
+  });
+  it('detects missing return fields', async () => {
+    const { stdout, exitCode } = await runHandler(() =>
+      handleRulecodeValidate({ code: MISSING_FIELDS_CODE, json: true }),
+    );
+    const output = parseJson(stdout) as { valid: boolean; violations: string[] };
+    expect(output.valid).toBe(false);
+    expect(output.violations.length).toBeGreaterThan(0);
+    expect(exitCode).toBe(1);
+  });
+  it('fails loud when no --code or --code-file provided', async () => {
+    const { stdout, exitCode } = await runHandler(() =>
+      handleRulecodeValidate({ json: true }),
+    );
+    const output = parseJson(stdout) as {
+      status: string; valid: boolean; reason: string; nextAction: string;
+    };
+    expect(output.status).toBe('failed');
+    expect(output.valid).toBe(false);
+    expect(output.reason).toContain('no code provided');
+    expect(output.nextAction).toContain('--code');
+    expect(exitCode).toBe(1);
+  });
+  it('reads code from --code-file', async () => {
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-rulecode-test-'));
+    const codeFile = path.join(tmpDir, 'rule.js');
+    fs.writeFileSync(codeFile, CLEAN_CODE, 'utf8');
+    try {
+      const { stdout, exitCode } = await runHandler(() =>
+        handleRulecodeValidate({ codeFile, json: true }),
+      );
+      const output = parseJson(stdout) as { valid: boolean };
+      expect(output.valid).toBe(true);
+      expect(exitCode).toBeUndefined();
+    } finally {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+  it('fails loud when --code-file does not exist', async () => {
+    const { stdout, exitCode } = await runHandler(() =>
+      handleRulecodeValidate({ codeFile: '/nonexistent/path/rule.js', json: true }),
+    );
+    const output = parseJson(stdout) as {
+      status: string; reason: string; nextAction: string;
+    };
+    expect(output.status).toBe('failed');
+    expect(output.reason).toContain('cannot read');
+    expect(output.nextAction).toBeDefined();
+    expect(exitCode).toBe(1);
+  });
+});
+describe('pd rulecode replay', () => {
+  const GOLDEN_TRACE_CASES = [
+    {
+      caseId: 'pos-1',
+      kind: 'positive' as const,
+      toolName: 'Write',
+      params: { normalizedPath: 'src/safe.ts' },
+      expectedDecision: 'allow' as const,
+    },
+    {
+      caseId: 'neg-1',
+      kind: 'negative' as const,
+      toolName: 'Bash',
+      params: { command: 'rm -rf /' },
+      expectedDecision: 'block' as const,
+    },
+  ];
+  function writeGoldenTraceFile(cases: unknown): string {
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-rulecode-replay-'));
+    const filePath = path.join(tmpDir, 'trace.json');
+    fs.writeFileSync(filePath, JSON.stringify(cases, null, 2), 'utf8');
+    return filePath;
+  }
+  it('passes replay with clean code and valid golden trace', async () => {
+    const traceFile = writeGoldenTraceFile(GOLDEN_TRACE_CASES);
+    try {
+      const { stdout, exitCode } = await runHandler(() =>
+        handleRulecodeReplay({ code: CLEAN_CODE, goldenTrace: traceFile, json: true }),
+      );
+      const output = parseJson(stdout) as {
+        status: string; decision: string; reasons: string[];
+      };
+      expect(output.status).toBe('ok');
+      expect(output.decision).toBe('accepted_shadow');
+      expect(exitCode).toBeUndefined();
+    } finally {
+      fs.rmSync(path.dirname(traceFile), { recursive: true, force: true });
+    }
+  });
+  it('fails loud when --golden-trace file does not exist', async () => {
+    const { stdout, exitCode } = await runHandler(() =>
+      handleRulecodeReplay({
+        code: CLEAN_CODE,
+        goldenTrace: '/nonexistent/trace.json',
+        json: true,
+      }),
+    );
+    const output = parseJson(stdout) as {
+      status: string; reason: string; nextAction: string;
+    };
+    expect(output.status).toBe('failed');
+    expect(output.reason).toContain('cannot read');
+    expect(exitCode).toBe(1);
+  });
+  it('fails loud when golden trace is not valid JSON', async () => {
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-rulecode-replay-'));
+    const traceFile = path.join(tmpDir, 'trace.json');
+    fs.writeFileSync(traceFile, '{ not valid json', 'utf8');
+    try {
+      const { stdout, exitCode } = await runHandler(() =>
+        handleRulecodeReplay({ code: CLEAN_CODE, goldenTrace: traceFile, json: true }),
+      );
+      const output = parseJson(stdout) as {
+        status: string; reason: string; nextAction: string;
+      };
+      expect(output.status).toBe('failed');
+      expect(output.reason).toContain('not valid JSON');
+      expect(exitCode).toBe(1);
+    } finally {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+  it('fails loud when golden trace is not an array', async () => {
+    const traceFile = writeGoldenTraceFile({ not: 'an array' });
+    try {
+      const { stdout, exitCode } = await runHandler(() =>
+        handleRulecodeReplay({ code: CLEAN_CODE, goldenTrace: traceFile, json: true }),
+      );
+      const output = parseJson(stdout) as {
+        status: string; reason: string; nextAction: string;
+      };
+      expect(output.status).toBe('failed');
+      expect(output.reason).toContain('must contain a JSON array');
+      expect(exitCode).toBe(1);
+    } finally {
+      fs.rmSync(path.dirname(traceFile), { recursive: true, force: true });
+    }
+  });
+  it('fails loud when golden trace has fewer than 2 cases', async () => {
+    const traceFile = writeGoldenTraceFile([GOLDEN_TRACE_CASES[0]]);
+    try {
+      const { stdout, exitCode } = await runHandler(() =>
+        handleRulecodeReplay({ code: CLEAN_CODE, goldenTrace: traceFile, json: true }),
+      );
+      const output = parseJson(stdout) as {
+        status: string; reason: string; nextAction: string;
+      };
+      expect(output.status).toBe('failed');
+      expect(output.reason).toContain('at least 2 cases');
+      expect(exitCode).toBe(1);
+    } finally {
+      fs.rmSync(path.dirname(traceFile), { recursive: true, force: true });
+    }
+  });
+  it('fails loud when a golden trace case is malformed', async () => {
+    const traceFile = writeGoldenTraceFile([
+      { caseId: 'x' }, // missing required fields
+      GOLDEN_TRACE_CASES[1],
+    ]);
+    try {
+      const { stdout, exitCode } = await runHandler(() =>
+        handleRulecodeReplay({ code: CLEAN_CODE, goldenTrace: traceFile, json: true }),
+      );
+      const output = parseJson(stdout) as {
+        status: string; reason: string; nextAction: string;
+      };
+      expect(output.status).toBe('failed');
+      expect(output.reason).toContain('malformed');
+      expect(exitCode).toBe(1);
+    } finally {
+      fs.rmSync(path.dirname(traceFile), { recursive: true, force: true });
+    }
+  });
+  it('fails loud when no --code or --code-file provided', async () => {
+    const traceFile = writeGoldenTraceFile(GOLDEN_TRACE_CASES);
+    try {
+      const { stdout, exitCode } = await runHandler(() =>
+        handleRulecodeReplay({ goldenTrace: traceFile, json: true }),
+      );
+      const output = parseJson(stdout) as {
+        status: string; reason: string; nextAction: string;
+      };
+      expect(output.status).toBe('failed');
+      expect(output.reason).toContain('no code provided');
+      expect(exitCode).toBe(1);
+    } finally {
+      fs.rmSync(path.dirname(traceFile), { recursive: true, force: true });
+    }
+  });
+  it('reports sandbox failures with structured reason + nextAction', async () => {
+    const traceFile = writeGoldenTraceFile(GOLDEN_TRACE_CASES);
+    // Code with forbidden pattern — sandbox will reject
+    const badCode = `function evaluate(input, helpers) {
+      eval('1');
+      return { decision: "allow", matched: false, reason: "x" };
+    }`;
+    try {
+      const { stdout, exitCode } = await runHandler(() =>
+        handleRulecodeReplay({ code: badCode, goldenTrace: traceFile, json: true }),
+      );
+      const output = parseJson(stdout) as {
+        status: string; decision: string; reason?: string; nextAction?: string;
+        forbiddenPatternViolations: string[];
+      };
+      expect(output.status).toBe('failed');
+      expect(output.decision).not.toBe('accepted_shadow');
+      expect(output.reason).toBeDefined();
+      expect(output.nextAction).toBeDefined();
+      expect(exitCode).toBe(1);
+    } finally {
+      fs.rmSync(path.dirname(traceFile), { recursive: true, force: true });
+    }
+  });
+});