npm - agent-harness-kit - Versions diffs - 0.8.0 → 0.10.0 - Mend

agent-harness-kit 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/src/templates/.claude/skills/map-domain/scripts/domain-map.mjs ADDED Viewed

@@ -0,0 +1,145 @@
+#!/usr/bin/env node
+// domain-map.mjs — deterministic step for /map-domain.
+// Renders harness.config.json#domains as a markdown doc with embedded
+// mermaid graph + drift check against the filesystem.
+//
+// Usage:
+//   domain-map.mjs [--out docs/architecture/domain-map.md]
+//   domain-map.mjs --stdout
+import { readFileSync, existsSync, writeFileSync, mkdirSync, statSync, readdirSync } from "node:fs";
+import { resolve, dirname } from "node:path";
+const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
+function parseArgs(argv) {
+  const out = { outPath: null, stdout: false };
+  for (let i = 0; i < argv.length; i++) {
+    if (argv[i] === "--out") out.outPath = argv[++i];
+    else if (argv[i] === "--stdout") out.stdout = true;
+  }
+  if (!out.outPath && !out.stdout) out.stdout = true;
+  return out;
+}
+function loadConfig() {
+  const p = resolve(ROOT, "harness.config.json");
+  if (!existsSync(p)) {
+    console.error("domain-map: harness.config.json not found at repo root");
+    process.exit(2);
+  }
+  try { return JSON.parse(readFileSync(p, "utf8")); }
+  catch (e) {
+    console.error(`domain-map: harness.config.json is not valid JSON: ${e.message}`);
+    process.exit(2);
+  }
+}
+function dirState(abs) {
+  try {
+    const st = statSync(abs);
+    if (!st.isDirectory()) return "missing";
+    const entries = readdirSync(abs);
+    if (entries.length === 0) return "empty";
+    return "present";
+  } catch {
+    return "missing";
+  }
+}
+function driftBadge(state) {
+  if (state === "present") return "✓";
+  if (state === "empty") return "?";
+  return "✗";
+}
+function safeId(s) {
+  return String(s).replace(/[^a-zA-Z0-9_]/g, "_");
+}
+function buildMermaid(domains) {
+  const lines = [];
+  lines.push("```mermaid");
+  lines.push("flowchart LR");
+  let domainIdx = 0;
+  const driftRows = [];
+  for (const d of domains) {
+    domainIdx++;
+    const dName = d.name || `domain${domainIdx}`;
+    const root = d.root || "";
+    const layers = Array.isArray(d.layers) ? d.layers : [];
+    const dId = safeId(dName);
+    lines.push(`  subgraph ${dId} ["${dName} (${root}/)"]`);
+    let prev = null;
+    for (const layer of layers) {
+      const abs = resolve(ROOT, root, layer);
+      const state = dirState(abs);
+      const badge = driftBadge(state);
+      const node = `${dId}__${safeId(layer)}`;
+      lines.push(`    ${node}["${layer} ${badge}"]`);
+      if (prev) lines.push(`    ${prev} --> ${node}`);
+      prev = node;
+      driftRows.push({ domain: dName, layer, root, state, badge });
+    }
+    lines.push("  end");
+  }
+  lines.push("```");
+  return { mermaid: lines.join("\n"), driftRows };
+}
+function buildMarkdown(cfg) {
+  const domains = Array.isArray(cfg.domains) ? cfg.domains : [];
+  const { mermaid, driftRows } = buildMermaid(domains);
+  const drift = driftRows.filter((r) => r.state !== "present");
+  const md = [];
+  md.push(`# Domain map`);
+  md.push("");
+  md.push(`Generated by \`/map-domain\` from \`harness.config.json\`. Re-run after editing domain/layer config.`);
+  md.push("");
+  md.push(`- domains: ${domains.length}`);
+  md.push(`- layers (total): ${driftRows.length}`);
+  md.push(`- drift entries: ${drift.length}`);
+  md.push("");
+  md.push(`## Diagram`);
+  md.push("");
+  md.push(mermaid);
+  md.push("");
+  md.push(`## Drift table`);
+  md.push("");
+  md.push(`| domain | layer | root | state | badge |`);
+  md.push(`|---|---|---|---|---|`);
+  for (const r of driftRows) {
+    md.push(`| ${r.domain} | ${r.layer} | \`${r.root}/${r.layer}/\` | ${r.state} | ${r.badge} |`);
+  }
+  md.push("");
+  if (drift.length > 0) {
+    md.push(`## Resolutions`);
+    md.push("");
+    for (const d of drift) {
+      if (d.state === "missing") {
+        md.push(`- \`${d.root}/${d.layer}/\` is **missing**. Either create the directory (with a README.md) or remove "${d.layer}" from harness.config.json#domains[${d.domain}].layers.`);
+      } else if (d.state === "empty") {
+        md.push(`- \`${d.root}/${d.layer}/\` exists but is empty. Add at least a README.md or seed file, or drop the layer from config.`);
+      }
+    }
+    md.push("");
+  }
+  const tail = { domains: domains.length, layers: driftRows.length, drift_count: drift.length };
+  md.push(`<!-- machine-tail: ${JSON.stringify(tail)} -->`);
+  return { md: md.join("\n") + "\n", drift_count: drift.length };
+}
+function main() {
+  const { outPath, stdout } = parseArgs(process.argv.slice(2));
+  const cfg = loadConfig();
+  const { md, drift_count } = buildMarkdown(cfg);
+  if (outPath) {
+    const abs = resolve(ROOT, outPath);
+    mkdirSync(dirname(abs), { recursive: true });
+    writeFileSync(abs, md);
+    process.stdout.write(JSON.stringify({ written: outPath, drift_count }) + "\n");
+  }
+  if (stdout) process.stdout.write(md);
+}
+main();

package/src/templates/.claude/skills/propose-harness-improvement/SKILL.md.vi ADDED Viewed

@@ -0,0 +1,49 @@
+---
+name: propose-harness-improvement
+description: Use this skill whenever the agent makes a mistake, the user observes an avoidable failure, a pattern recurs, or someone says "the agent keeps doing X". Files an "Engineer the Harness" entry — Mitchell Hashimoto's discipline: every failure becomes a permanent prevention mechanism. Always invoke this instead of just fixing the immediate symptom.
+allowed-tools: Read, Edit, Write, Bash(git diff:*)
+suggested-turns: 8
+---
+## Các bước
+1. **Triage.** Hỏi: "Vừa rồi sai cái gì? Hành vi mong muốn của agent là
+   gì? Triệu chứng là gì?"
+2. **Phân loại.** Một trong:
+   - **(a) Thiếu context** — agent không biết một điều gì đó. Fix: thêm
+     vào `docs/`.
+   - **(b) Thiếu rule** — agent làm một việc bị một unwritten rule cấm.
+     Fix: gọi `/structural-test-author`.
+   - **(c) Thiếu tool/skill** — agent với lấy sai tool. Fix: gọi
+     `/write-skill`.
+   - **(d) Wrong layer / architecture** — cấu trúc đã mời gọi sai lầm.
+     Fix: viết ADR qua `/add-adr`.
+   - **(e) Wrong instruction in prompt** — failure truy ngược về một
+     skill/agent prompt đã ambiguous, gây hiểu sai, hoặc under-constrained.
+     Agent đã làm theo prompt chính xác nhưng chính prompt đã dẫn sai.
+     Fix: edit file vi phạm tại `.claude/skills/<name>/SKILL.md` hoặc
+     `.claude/agents/<name>.md`. Sau đó chạy lại `/eval-runner` để xác
+     nhận regression đã đóng.
+3. **Append entry** vào `docs/agent-failures.md` với: date, symptom, fix,
+   fix-type, file modified.
+4. **Áp dụng fix tại nơi đúng.** KHÔNG BAO GIỜ dán đè bằng một câu "be
+   careful" vào CLAUDE.md trừ khi rule (a) áp dụng — và ngay cả khi đó,
+   chỉ làm pointer đến doc dài hơn.
+5. **Update PROGRESS.** Append `harness-improvement: <slug>` vào
+   `.harness/PROGRESS.md`.
+## Output contract
+```
+### Failure: <one-line summary>
+### Classification: (a|b|c|d|e) <name>
+### Fix applied at: <file:line>
+### docs/agent-failures.md entry: §<n>
+```
+## Anti-patterns (block on these)
+- Không thêm câu "be careful with X" mơ hồ vào CLAUDE.md.
+- Không thêm rule mà enforcement của nó cũng dựa trên LLM.
+- Không dùng skill này để log những cleanup ideas không liên quan —
+  chúng đi vào `docs/tech-debt-tracker.md`.

package/src/templates/.claude/skills/propose-harness-improvement/scripts/improvement-bundle.mjs ADDED Viewed

@@ -0,0 +1,172 @@
+#!/usr/bin/env node
+// improvement-bundle.mjs — deterministic step for /propose-harness-improvement.
+// Replaces the "ask the agent to summarize recent failures" LLM turn with a
+// mechanical sweep over telemetry + git history + bypass log.
+//
+// Output (JSON, stdout or --out):
+//   {
+//     window_days: <n>,
+//     recent_failures: [ {ts, event, source, detail} ],
+//     recurring_patterns: [ {pattern, count, sample_ts} ],
+//     classification: { context, rule, tool_skill, architecture, prompt },
+//     fix_targets: [ {file, why} ]
+//   }
+//
+// Classification rubric mirrors the (a)-(e) buckets in the SKILL.md:
+//   (a) context        — pretooluse denials referencing rules in docs/
+//   (b) rule           — structural-test failures / baseline drift
+//   (c) tool/skill     — bypass.log entries / missing-skill prompt-guard hits
+//   (d) architecture   — layer-violation patterns appearing >=3 times
+//   (e) prompt         — skill_invoked followed by failure within same session
+//
+// The buckets are heuristic; an LLM still makes the final call. The point is
+// to hand it a dense, factual digest instead of forcing it to scan files
+// blind.
+import { readFileSync, existsSync, writeFileSync } from "node:fs";
+import { resolve } from "node:path";
+import { spawnSync } from "node:child_process";
+const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
+function parseArgs(argv) {
+  const out = { window: 14, out: null };
+  for (let i = 0; i < argv.length; i++) {
+    if (argv[i] === "--window") out.window = Number(argv[++i]) || 14;
+    else if (argv[i] === "--out") out.out = argv[++i];
+  }
+  return out;
+}
+function readJsonl(path) {
+  if (!existsSync(path)) return [];
+  const body = readFileSync(path, "utf8");
+  const out = [];
+  for (const line of body.split("\n")) {
+    if (!line.trim()) continue;
+    try { out.push(JSON.parse(line)); } catch { /* skip malformed */ }
+  }
+  return out;
+}
+function isWithin(ts, days) {
+  const t = Date.parse(ts);
+  if (!Number.isFinite(t)) return false;
+  return (Date.now() - t) <= days * 24 * 3600 * 1000;
+}
+function gitLogFixes(days) {
+  const since = `${days}.days`;
+  const r = spawnSync("git", ["log", `--since=${since}`, "--oneline", "--grep=fix\\|revert\\|hotfix"], {
+    cwd: ROOT, encoding: "utf8",
+  });
+  if (r.status !== 0) return [];
+  return (r.stdout || "").split("\n").filter(Boolean).slice(0, 50);
+}
+function summariseFailures(telemetry, bypass, windowDays) {
+  const failures = [];
+  for (const rec of telemetry) {
+    if (!rec.ts || !isWithin(rec.ts, windowDays)) continue;
+    if (rec.event === "structural_test_fail" || rec.event === "precompletion_block" ||
+        rec.event === "permission_denied" || rec.event === "userprompt_block") {
+      failures.push({
+        ts: rec.ts,
+        event: rec.event,
+        source: rec.source || rec.rule || "(unspecified)",
+        detail: (rec.reason || rec.detail || rec.skill || "").slice(0, 200),
+      });
+    }
+  }
+  for (const rec of bypass) {
+    if (!rec.ts || !isWithin(rec.ts, windowDays)) continue;
+    failures.push({
+      ts: rec.ts,
+      event: "bypass",
+      source: rec.rule || rec.bypass || "(unspecified)",
+      detail: (rec.command || rec.file || "").slice(0, 200),
+    });
+  }
+  failures.sort((a, b) => a.ts.localeCompare(b.ts));
+  return failures.slice(-40);
+}
+function recurringPatterns(failures) {
+  const counts = new Map();
+  const samples = new Map();
+  for (const f of failures) {
+    const key = `${f.event}::${f.source}`;
+    counts.set(key, (counts.get(key) || 0) + 1);
+    if (!samples.has(key)) samples.set(key, f.ts);
+  }
+  const out = [];
+  for (const [key, count] of counts) {
+    if (count >= 2) out.push({ pattern: key, count, sample_ts: samples.get(key) });
+  }
+  out.sort((a, b) => b.count - a.count);
+  return out.slice(0, 20);
+}
+function classify(failures, recurring) {
+  const buckets = { context: 0, rule: 0, tool_skill: 0, architecture: 0, prompt: 0 };
+  for (const f of failures) {
+    if (f.event === "structural_test_fail") buckets.rule++;
+    else if (f.event === "precompletion_block") buckets.rule++;
+    else if (f.event === "permission_denied") buckets.context++;
+    else if (f.event === "userprompt_block") buckets.context++;
+    else if (f.event === "bypass") buckets.tool_skill++;
+  }
+  for (const r of recurring) {
+    if (r.count >= 3 && r.pattern.startsWith("structural_test_fail::")) {
+      buckets.architecture++;
+    }
+  }
+  return buckets;
+}
+function fixTargets(buckets) {
+  const out = [];
+  if (buckets.rule > 0) {
+    out.push({ file: "harness.config.json", why: "structural rule lives here; consider tightening" });
+    out.push({ file: ".harness/structural-baseline.json", why: "review whether baseline entries should drain" });
+  }
+  if (buckets.context > 0) {
+    out.push({ file: "docs/golden-principles.md", why: "context gap surfaced via permission denials" });
+    out.push({ file: "CLAUDE.md", why: "consider a pointer (not a paste) to relevant doc" });
+  }
+  if (buckets.tool_skill > 0) {
+    out.push({ file: ".claude/skills/", why: "missing skill or wrong skill chosen — write or edit one" });
+  }
+  if (buckets.architecture > 0) {
+    out.push({ file: "docs/adr/", why: "recurring violation suggests an ADR is needed" });
+  }
+  if (buckets.prompt > 0) {
+    out.push({ file: ".claude/skills/<name>/SKILL.md", why: "prompt ambiguity led the agent astray" });
+  }
+  return out;
+}
+function main() {
+  const { window: windowDays, out: outPath } = parseArgs(process.argv.slice(2));
+  const telemetry = readJsonl(resolve(ROOT, ".harness/telemetry.jsonl"));
+  const bypass = readJsonl(resolve(ROOT, ".harness/bypass.log"));
+  const recentFailures = summariseFailures(telemetry, bypass, windowDays);
+  const recurring = recurringPatterns(recentFailures);
+  const classification = classify(recentFailures, recurring);
+  const targets = fixTargets(classification);
+  const fixCommits = gitLogFixes(windowDays);
+  const payload = {
+    window_days: windowDays,
+    recent_failures: recentFailures,
+    recurring_patterns: recurring,
+    classification,
+    fix_targets: targets,
+    recent_fix_commits: fixCommits,
+  };
+  const text = JSON.stringify(payload, null, 2);
+  if (outPath) writeFileSync(resolve(ROOT, outPath), text + "\n");
+  else process.stdout.write(text + "\n");
+}
+main();

package/src/templates/.claude/skills/refactor-feature/SKILL.md ADDED Viewed

@@ -0,0 +1,60 @@
+---
+name: refactor-feature
+description: Use this skill when restructuring a feature in feature_list.json — splitting steps, merging steps, renaming, or marking a previously-failing step done. The side-car diffs feature_list.json#steps before/after and rejects the edit when a step.done transition is not accompanied by a test reference. Forces "no done without proof".
+allowed-tools: Read, Edit, Bash(git diff:*, node .claude/skills/refactor-feature/scripts/feature-diff.mjs:*)
+suggested-turns: 6
+isolation: worktree
+---
+## When to invoke
+- Re-decomposing a feature (one becomes many, or vice versa).
+- Marking `passes: false → true` for a step that was previously WIP.
+- Renaming feature ids (this is the dangerous case — the side-car catches
+  silent renames that orphan PROGRESS.md references).
+## Pre-flight (side-car gate)
+Run the diff side-car BEFORE any feature_list.json edit lands:
+```
+node .claude/skills/refactor-feature/scripts/feature-diff.mjs \
+  --before-ref HEAD --after-file feature_list.json
+```
+Side-car contract:
+- Exits 0 + JSON when changes are coherent.
+- Exits 2 + JSON with `violations: [...]` when:
+  - A step's `passes` flipped `false → true` without a test entry under
+    `step.tests` (or `step.testCommit`).
+  - A step's `id` changed without a `renamed_from` field (silent rename).
+  - A step disappeared without an entry in `step.replaced_by`.
+## Steps
+1. **Capture before-state.** `git show HEAD:feature_list.json > /tmp/before.json`
+2. **Edit.** Make the refactor in your working copy.
+3. **Run the gate.** Side-car compares HEAD vs working copy. Address any
+   violation before staging.
+4. **Stage + test.** If `passes` flipped true, the test must exist and be
+   referenced in `step.tests`.
+5. **Commit with a body explaining the refactor.** Use commit trailer
+   `Refactor-Feature: <feature_id>` so /review-this-pr can group changes.
+## Output contract
+```
+feature_list refactor: <id>
+steps_changed: <N>
+done_transitions: <M> (each with a test reference)
+renames: <list of id→id>
+gate: passed
+```
+## Anti-patterns
+- Don't mark `passes: true` first and "add tests later" — the side-car
+  blocks at the boundary on purpose. Flip the bit only AFTER the test
+  exists.
+- Don't delete a step without `replaced_by` — orphaned PROGRESS.md
+  entries get out of sync with the live feature list.

package/src/templates/.claude/skills/refactor-feature/SKILL.md.vi ADDED Viewed

@@ -0,0 +1,64 @@
+<!-- LOCALE_TODO: translate body to vi -->
+<!-- Source: .claude/skills/refactor-feature/SKILL.md -->
+<!-- Edit only the markdown body — keep frontmatter verbatim so the kit's renderer + Claude Code parse it identically across locales. -->
+---
+name: refactor-feature
+description: Use this skill when restructuring a feature in feature_list.json — splitting steps, merging steps, renaming, or marking a previously-failing step done. The side-car diffs feature_list.json#steps before/after and rejects the edit when a step.done transition is not accompanied by a test reference. Forces "no done without proof".
+allowed-tools: Read, Edit, Bash(git diff:*, node .claude/skills/refactor-feature/scripts/feature-diff.mjs:*)
+suggested-turns: 6
+isolation: worktree
+---
+## When to invoke
+- Re-decomposing a feature (one becomes many, or vice versa).
+- Marking `passes: false → true` for a step that was previously WIP.
+- Renaming feature ids (this is the dangerous case — the side-car catches
+  silent renames that orphan PROGRESS.md references).
+## Pre-flight (side-car gate)
+Run the diff side-car BEFORE any feature_list.json edit lands:
+```
+node .claude/skills/refactor-feature/scripts/feature-diff.mjs \
+  --before-ref HEAD --after-file feature_list.json
+```
+Side-car contract:
+- Exits 0 + JSON when changes are coherent.
+- Exits 2 + JSON with `violations: [...]` when:
+  - A step's `passes` flipped `false → true` without a test entry under
+    `step.tests` (or `step.testCommit`).
+  - A step's `id` changed without a `renamed_from` field (silent rename).
+  - A step disappeared without an entry in `step.replaced_by`.
+## Steps
+1. **Capture before-state.** `git show HEAD:feature_list.json > /tmp/before.json`
+2. **Edit.** Make the refactor in your working copy.
+3. **Run the gate.** Side-car compares HEAD vs working copy. Address any
+   violation before staging.
+4. **Stage + test.** If `passes` flipped true, the test must exist and be
+   referenced in `step.tests`.
+5. **Commit with a body explaining the refactor.** Use commit trailer
+   `Refactor-Feature: <feature_id>` so /review-this-pr can group changes.
+## Output contract
+```
+feature_list refactor: <id>
+steps_changed: <N>
+done_transitions: <M> (each with a test reference)
+renames: <list of id→id>
+gate: passed
+```
+## Anti-patterns
+- Don't mark `passes: true` first and "add tests later" — the side-car
+  blocks at the boundary on purpose. Flip the bit only AFTER the test
+  exists.
+- Don't delete a step without `replaced_by` — orphaned PROGRESS.md
+  entries get out of sync with the live feature list.

package/src/templates/.claude/skills/refactor-feature/scripts/feature-diff.mjs ADDED Viewed

@@ -0,0 +1,146 @@
+#!/usr/bin/env node
+// feature-diff.mjs — deterministic gate for /refactor-feature.
+// Diffs feature_list.json#features[*].steps[*] between a base ref and the
+// current working copy. Returns violations when:
+//   - step.passes flipped false → true without step.tests[] or step.testCommit
+//   - step.id silently renamed (no renamed_from)
+//   - step disappeared without replaced_by
+//
+// Exit codes:
+//   0 → no violations
+//   2 → violations present (printed as JSON to stdout)
+//   3 → input error (missing ref / unreadable file)
+import { readFileSync, existsSync } from "node:fs";
+import { resolve } from "node:path";
+import { spawnSync } from "node:child_process";
+const ROOT = process.env.CLAUDE_PROJECT_DIR || process.cwd();
+function parseArgs(argv) {
+  const out = { beforeRef: "HEAD", afterFile: "feature_list.json" };
+  for (let i = 0; i < argv.length; i++) {
+    if (argv[i] === "--before-ref") out.beforeRef = argv[++i];
+    else if (argv[i] === "--after-file") out.afterFile = argv[++i];
+  }
+  return out;
+}
+function gitShow(ref, path) {
+  const r = spawnSync("git", ["show", `${ref}:${path}`], { cwd: ROOT, encoding: "utf8" });
+  if (r.status !== 0) return null;
+  return r.stdout;
+}
+function safeJSON(s, label) {
+  if (!s) return null;
+  try { return JSON.parse(s); }
+  catch (e) {
+    console.error(`feature-diff: invalid JSON in ${label}: ${e.message}`);
+    process.exit(3);
+  }
+}
+function indexSteps(featureList) {
+  // Returns { [stepId]: { featureId, step } }.
+  const idx = new Map();
+  for (const f of (featureList?.features || [])) {
+    for (const s of (f.steps || [])) {
+      if (s && s.id) idx.set(s.id, { featureId: f.id, step: s });
+    }
+  }
+  return idx;
+}
+function diff(before, after) {
+  const beforeIdx = indexSteps(before);
+  const afterIdx = indexSteps(after);
+  const violations = [];
+  const renames = [];
+  const doneTransitions = [];
+  // Disappearances + done-transitions (work over before).
+  for (const [id, { featureId, step }] of beforeIdx) {
+    const post = afterIdx.get(id);
+    if (!post) {
+      // Disappeared. Allowed only when a replaced_by exists in the BEFORE
+      // version OR an AFTER step references this id under renamed_from.
+      let renamedAway = false;
+      for (const [newId, { step: newStep }] of afterIdx) {
+        if (Array.isArray(newStep.renamed_from) && newStep.renamed_from.includes(id)) {
+          renamedAway = true;
+          renames.push({ from: id, to: newId, kind: "renamed_from" });
+          break;
+        }
+        if (newStep.renamed_from === id) {
+          renamedAway = true;
+          renames.push({ from: id, to: newId, kind: "renamed_from" });
+          break;
+        }
+      }
+      if (!renamedAway && !step.replaced_by) {
+        violations.push({
+          kind: "step_disappeared",
+          step_id: id,
+          feature_id: featureId,
+          fix: `Add 'replaced_by: <new_step_id>' to the step before deleting, OR mark the new step's 'renamed_from'.`,
+        });
+      }
+      continue;
+    }
+    // passes transition false → true.
+    if (step.passes === false && post.step.passes === true) {
+      doneTransitions.push({ step_id: id, feature_id: featureId });
+      const hasTests = Array.isArray(post.step.tests) && post.step.tests.length > 0;
+      const hasCommit = typeof post.step.testCommit === "string" && post.step.testCommit.length > 0;
+      if (!hasTests && !hasCommit) {
+        violations.push({
+          kind: "done_without_proof",
+          step_id: id,
+          feature_id: featureId,
+          fix: `Add 'tests: [...]' (test file paths) or 'testCommit: <sha>' before flipping passes:true.`,
+        });
+      }
+    }
+  }
+  // Newly-introduced steps with renamed_from referring to nonexistent ids
+  // (paranoia: catches typos in the renamed_from value).
+  for (const [id, { step }] of afterIdx) {
+    if (beforeIdx.has(id)) continue;
+    const refs = Array.isArray(step.renamed_from) ? step.renamed_from
+               : (typeof step.renamed_from === "string" ? [step.renamed_from] : []);
+    for (const ref of refs) {
+      if (!beforeIdx.has(ref)) {
+        violations.push({
+          kind: "renamed_from_typo",
+          step_id: id,
+          missing_ref: ref,
+          fix: `'renamed_from' must reference a step that existed at HEAD. Check the spelling.`,
+        });
+      }
+    }
+  }
+  return { violations, renames, doneTransitions };
+}
+function main() {
+  const { beforeRef, afterFile } = parseArgs(process.argv.slice(2));
+  const beforeRaw = gitShow(beforeRef, afterFile);
+  if (beforeRaw === null) {
+    // First-time addition — nothing to diff.
+    process.stdout.write(JSON.stringify({ violations: [], note: `no prior ${afterFile} at ${beforeRef}` }) + "\n");
+    process.exit(0);
+  }
+  const afterPath = resolve(ROOT, afterFile);
+  if (!existsSync(afterPath)) {
+    console.error(`feature-diff: missing ${afterFile} in working copy`);
+    process.exit(3);
+  }
+  const before = safeJSON(beforeRaw, `${beforeRef}:${afterFile}`);
+  const after = safeJSON(readFileSync(afterPath, "utf8"), afterFile);
+  const result = diff(before, after);
+  process.stdout.write(JSON.stringify(result, null, 2) + "\n");
+  if (result.violations.length > 0) process.exit(2);
+}
+main();