npm - @doidor/agentrig - Versions diffs - 0.5.3 - Mend

@doidor/agentrig 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/LICENSE +21 -0
package/README.md +224 -0
package/dist/agent/claude.js +125 -0
package/dist/agent/claude.js.map +1 -0
package/dist/agent/copilot.js +147 -0
package/dist/agent/copilot.js.map +1 -0
package/dist/agent/index.js +17 -0
package/dist/agent/index.js.map +1 -0
package/dist/agent/provider.js +10 -0
package/dist/agent/provider.js.map +1 -0
package/dist/cli.js +169 -0
package/dist/cli.js.map +1 -0
package/dist/commands/compile.js +42 -0
package/dist/commands/compile.js.map +1 -0
package/dist/commands/dashboard.js +35 -0
package/dist/commands/dashboard.js.map +1 -0
package/dist/commands/doctor.js +40 -0
package/dist/commands/doctor.js.map +1 -0
package/dist/commands/eval.js +178 -0
package/dist/commands/eval.js.map +1 -0
package/dist/commands/init.js +100 -0
package/dist/commands/init.js.map +1 -0
package/dist/commands/update.js +176 -0
package/dist/commands/update.js.map +1 -0
package/dist/core/activity.js +80 -0
package/dist/core/activity.js.map +1 -0
package/dist/core/audit.js +112 -0
package/dist/core/audit.js.map +1 -0
package/dist/core/compile.js +250 -0
package/dist/core/compile.js.map +1 -0
package/dist/core/fsutil.js +45 -0
package/dist/core/fsutil.js.map +1 -0
package/dist/core/install.js +97 -0
package/dist/core/install.js.map +1 -0
package/dist/core/knowledge.js +34 -0
package/dist/core/knowledge.js.map +1 -0
package/dist/core/logger.js +31 -0
package/dist/core/logger.js.map +1 -0
package/dist/core/paths.js +22 -0
package/dist/core/paths.js.map +1 -0
package/dist/core/setupsteps.js +72 -0
package/dist/core/setupsteps.js.map +1 -0
package/dist/core/state.js +19 -0
package/dist/core/state.js.map +1 -0
package/dist/core/surfaces.js +62 -0
package/dist/core/surfaces.js.map +1 -0
package/dist/prompts/index.js +117 -0
package/dist/prompts/index.js.map +1 -0
package/dist/version.js +26 -0
package/dist/version.js.map +1 -0
package/knowledge/PRINCIPLES.md +106 -0
package/knowledge/manifest.json +247 -0
package/knowledge/templates/AGENTS.md +66 -0
package/knowledge/templates/AGENTS.package.example.md +19 -0
package/knowledge/templates/agents/README.md +33 -0
package/knowledge/templates/agents/developer.md +7 -0
package/knowledge/templates/agents/developer.yml +7 -0
package/knowledge/templates/agents/judge.md +6 -0
package/knowledge/templates/agents/judge.yml +6 -0
package/knowledge/templates/agents/reviewer.md +6 -0
package/knowledge/templates/agents/reviewer.yml +7 -0
package/knowledge/templates/agents/triager.md +8 -0
package/knowledge/templates/agents/triager.yml +8 -0
package/knowledge/templates/dashboard/dashboard.mjs +261 -0
package/knowledge/templates/eval/RUBRIC.md +94 -0
package/knowledge/templates/eval/axes.json +56 -0
package/knowledge/templates/eval/checks.json +304 -0
package/knowledge/templates/eval/sandbox/eval-rules.md +23 -0
package/knowledge/templates/eval/scenarios/README.md +24 -0
package/knowledge/templates/eval/scenarios/add-small-feature.md +28 -0
package/knowledge/templates/eval/scenarios/fix-failing-test.md +27 -0
package/knowledge/templates/eval/scenarios/review-catches-bug.md +30 -0
package/knowledge/templates/eval/score.mjs +257 -0
package/knowledge/templates/eval/static-audit.mjs +112 -0
package/knowledge/templates/harness/ORCHESTRATION.md +53 -0
package/knowledge/templates/harness/state-machine.yml +105 -0
package/knowledge/templates/mcp/mcp.json +12 -0
package/knowledge/templates/rules/README.md +32 -0
package/knowledge/templates/rules/code-review.md +26 -0
package/knowledge/templates/rules/coding-standards.md +15 -0
package/knowledge/templates/rules/no-debug-logging.md +16 -0
package/knowledge/templates/rules/security.md +23 -0
package/knowledge/templates/scripts/repair-worktrees.sh +124 -0
package/knowledge/templates/skills/fix-ci/SKILL.md +17 -0
package/knowledge/templates/skills/harness-eval/SKILL.md +83 -0
package/knowledge/templates/skills/self-verify/SKILL.md +25 -0
package/knowledge/templates/skills/skill-authoring/SKILL.md +35 -0
package/knowledge/templates/skills/skill-improver/SKILL.md +23 -0
package/knowledge/templates/skills/verify-loop/SKILL.md +35 -0
package/knowledge/templates/wiki/README.md +23 -0
package/knowledge/templates/wiki/_TEMPLATE.md +16 -0
package/knowledge/templates/wiki/index.md +29 -0
package/knowledge/templates/wiki/troubleshooting.md +14 -0
package/package.json +70 -0

package/knowledge/templates/dashboard/dashboard.mjs ADDED Viewed

@@ -0,0 +1,261 @@
+#!/usr/bin/env node
+// AgentRig harness dashboard. Dependency-free. Surfaces, in one place:
+//   • the agent roster (roles + models)            (principle 2)
+//   • live GitHub tasks per harness label via `gh` (principle 3 — system of record)
+//   • the static Harness Score + per-principle      (principle 6)
+//   • the latest dynamic eval summary               (principle 6)
+//   • the harness hard limits                       (principle 10)
+// Usage:
+//   node .agentrig/dashboard/dashboard.mjs            terminal dashboard
+//   node .agentrig/dashboard/dashboard.mjs --json     machine-readable
+//   node .agentrig/dashboard/dashboard.mjs --html [file]   write a self-contained HTML page
+//   node .agentrig/dashboard/dashboard.mjs --no-tasks live GitHub lookups skipped (offline)
+import { readFileSync, existsSync, readdirSync, writeFileSync } from "node:fs";
+import { execFileSync } from "node:child_process";
+import { fileURLToPath } from "node:url";
+import { dirname, join, resolve } from "node:path";
+const scriptDir = dirname(fileURLToPath(import.meta.url));
+const repoRoot = resolve(scriptDir, "..", "..");
+const args = process.argv.slice(2);
+const asJson = args.includes("--json");
+const htmlIdx = args.indexOf("--html");
+const asHtml = htmlIdx >= 0;
+const htmlOut = asHtml ? args[htmlIdx + 1] && !args[htmlIdx + 1].startsWith("-") ? args[htmlIdx + 1] : join(scriptDir, "dashboard.html") : null;
+const noTasks = args.includes("--no-tasks");
+const rel = (p) => resolve(repoRoot, p);
+const read = (p) => (existsSync(rel(p)) ? readFileSync(rel(p), "utf8") : null);
+function runNode(scriptRelPath, scriptArgs) {
+  try {
+    const out = execFileSync(process.execPath, [rel(scriptRelPath), ...scriptArgs], {
+      encoding: "utf8",
+      stdio: ["ignore", "pipe", "ignore"],
+    });
+    return JSON.parse(out);
+  } catch {
+    return null;
+  }
+}
+// --- Agent roster -----------------------------------------------------------
+function loadRoster() {
+  const dir = rel(".agentrig/agents");
+  if (!existsSync(dir)) return [];
+  return readdirSync(dir)
+    .filter((f) => f.endsWith(".yml"))
+    .map((f) => {
+      const text = readFileSync(join(dir, f), "utf8");
+      const get = (k) => (text.match(new RegExp("^\\s*" + k + "\\s*:\\s*(.+)\\s*$", "m")) || [])[1]?.trim() ?? null;
+      return { role: get("role") || f.replace(/\.yml$/, ""), model: get("model"), tier: get("model_tier") };
+    })
+    .sort((a, b) => a.role.localeCompare(b.role));
+}
+// --- State<->label map from the state machine -------------------------------
+function loadStateLabels() {
+  const text = read(".agentrig/harness/state-machine.yml");
+  if (!text) return {};
+  const lines = text.split("\n");
+  const map = {};
+  let inStateMap = false;
+  let baseIndent = null;
+  for (const line of lines) {
+    if (/^\s*state_map:\s*$/.test(line)) {
+      inStateMap = true;
+      baseIndent = null;
+      continue;
+    }
+    if (inStateMap) {
+      if (line.trim() === "") continue;
+      const indent = line.length - line.trimStart().length;
+      const m = line.match(/^\s*([a-z_]+)\s*:\s*([A-Za-z0-9_-]+)\s*$/);
+      if (baseIndent === null && m) baseIndent = indent;
+      if (m && indent === baseIndent) map[m[1]] = m[2];
+      else if (indent <= (baseIndent ?? 0) - 1 || /^\s*[a-z_]+:\s*$/.test(line)) {
+        if (!m) break;
+      }
+    }
+  }
+  return map;
+}
+function loadLimits() {
+  const text = read(".agentrig/harness/state-machine.yml");
+  if (!text) return {};
+  const out = {};
+  const block = text.split(/^\s*limits:\s*$/m)[1];
+  if (!block) return out;
+  for (const line of block.split("\n")) {
+    const m = line.match(/^\s{2,}([a-z_]+)\s*:\s*(\d+)\s*$/);
+    if (m) out[m[1]] = Number(m[2]);
+    else if (/^\S/.test(line) && line.trim() !== "") break;
+  }
+  return out;
+}
+// --- Live GitHub tasks via gh ----------------------------------------------
+function ghAvailable() {
+  try {
+    execFileSync("gh", ["auth", "status"], { stdio: "ignore" });
+    return true;
+  } catch {
+    return false;
+  }
+}
+function ghList(kind, label) {
+  // kind: "issue" | "pr"
+  try {
+    const out = execFileSync(
+      "gh",
+      [kind, "list", "--label", label, "--state", "open", "--limit", "30", "--json", "number,title,url,assignees"],
+      { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] },
+    );
+    return JSON.parse(out).map((x) => ({
+      kind,
+      number: x.number,
+      title: x.title,
+      url: x.url,
+      assignees: (x.assignees || []).map((a) => a.login),
+    }));
+  } catch {
+    return [];
+  }
+}
+function loadTasks(stateLabels) {
+  if (noTasks) return { available: false, reason: "skipped (--no-tasks)", byState: {} };
+  if (!ghAvailable()) return { available: false, reason: "gh not installed or not authenticated", byState: {} };
+  const byState = {};
+  for (const [state, label] of Object.entries(stateLabels)) {
+    byState[state] = { label, items: [...ghList("issue", label), ...ghList("pr", label)] };
+  }
+  return { available: true, reason: null, byState };
+}
+// --- Gather everything ------------------------------------------------------
+const audit = runNode(".agentrig/eval/static-audit.mjs", ["--json"]);
+const evals = runNode(".agentrig/eval/score.mjs", ["report", "--json"]) || { overall: 0, scenarios: [], axes: [] };
+const roster = loadRoster();
+const stateLabels = loadStateLabels();
+const limits = loadLimits();
+const tasks = loadTasks(stateLabels);
+const data = {
+  generatedAt: new Date().toISOString(),
+  repo: repoRoot,
+  harnessScore: audit?.harnessScore ?? null,
+  principles: audit?.principles ?? [],
+  roster,
+  tasks,
+  evals,
+  limits,
+};
+// --- Render -----------------------------------------------------------------
+if (asJson) {
+  console.log(JSON.stringify(data, null, 2));
+  process.exit(0);
+}
+if (asHtml) {
+  writeFileSync(htmlOut, renderHtml(data));
+  console.log(`Wrote ${htmlOut}`);
+  process.exit(0);
+}
+renderTerminal(data);
+function renderTerminal(d) {
+  const useColor = process.stdout.isTTY && !process.env.NO_COLOR;
+  const c = (code, s) => (useColor ? `\x1b[${code}m${s}\x1b[0m` : s);
+  const bold = (s) => c("1", s), dim = (s) => c("2", s), green = (s) => c("32", s), yellow = (s) => c("33", s), red = (s) => c("31", s), cyan = (s) => c("36", s);
+  const rule = dim("─".repeat(64));
+  console.log(`\n${bold("AgentRig — harness dashboard")}  ${dim(d.repo)}`);
+  console.log(rule);
+  const scoreColor = d.harnessScore == null ? dim : d.harnessScore >= 80 ? green : d.harnessScore >= 50 ? yellow : red;
+  console.log(`${bold("Harness Score")}  ${scoreColor(d.harnessScore == null ? "n/a" : d.harnessScore + "%")}`);
+  if (d.principles.length) {
+    const weak = d.principles.filter((p) => p.score < 1).map((p) => `P${p.principle} ${(p.score * 100).toFixed(0)}%`);
+    console.log(dim(`  weak principles: ${weak.length ? weak.join(", ") : "none — all full credit"}`));
+  }
+  console.log(`\n${bold("Agents")} ${dim(`(${d.roster.length} roles)`)}`);
+  for (const a of d.roster) console.log(`  ${cyan(a.role.padEnd(11))} ${(a.model || "?").padEnd(20)} ${dim(a.tier || "")}`);
+  console.log(`\n${bold("Tasks")}`);
+  if (!d.tasks.available) {
+    console.log(dim(`  unavailable — ${d.tasks.reason}`));
+  } else {
+    let total = 0;
+    for (const [state, info] of Object.entries(d.tasks.byState)) {
+      const items = info.items;
+      total += items.length;
+      const head = `  ${state.padEnd(16)} ${dim(info.label)}  ${bold(String(items.length))}`;
+      console.log(head);
+      for (const it of items.slice(0, 8)) {
+        const who = it.assignees.length ? dim(` @${it.assignees.join(", @")}`) : dim(" unassigned");
+        console.log(`      ${it.kind === "pr" ? "PR" : "# "}${it.number} ${it.title.slice(0, 48)}${who}`);
+      }
+    }
+    if (total === 0) console.log(dim("  no open tasks carrying harness labels"));
+  }
+  console.log(`\n${bold("Evals")} ${dim("(dynamic)")}`);
+  const evalRows = d.evals.results || d.evals.scenarios || [];
+  if (!evalRows.length) {
+    console.log(dim("  no dynamic eval runs yet — `agentrig eval --dynamic`"));
+  } else {
+    console.log(`  overall ${bold(d.evals.overall.toFixed(2))} across ${evalRows.length} result(s)`);
+    for (const s of evalRows) {
+      const label = `${s.type ? s.type + "/" : ""}${s.scenario}${s.variant ? " [" + s.variant + "]" : ""}`;
+      console.log(`    ${s.pass ? green("PASS") : red("FAIL")} ${label.padEnd(28)} ${s.aggregate.toFixed(2)} ${dim("(" + s.judge + ")")}`);
+    }
+  }
+  if (Object.keys(d.limits).length) {
+    console.log(`\n${bold("Limits")}`);
+    console.log(dim("  " + Object.entries(d.limits).map(([k, v]) => `${k}=${v}`).join("  ")));
+  }
+  console.log("");
+}
+function renderHtml(d) {
+  const esc = (s) => String(s).replace(/[&<>]/g, (m) => ({ "&": "&amp;", "<": "&lt;", ">": "&gt;" }[m]));
+  const scoreClass = d.harnessScore == null ? "na" : d.harnessScore >= 80 ? "good" : d.harnessScore >= 50 ? "warn" : "bad";
+  const rosterRows = d.roster.map((a) => `<tr><td>${esc(a.role)}</td><td>${esc(a.model || "?")}</td><td>${esc(a.tier || "")}</td></tr>`).join("");
+  let tasksHtml;
+  if (!d.tasks.available) {
+    tasksHtml = `<p class="muted">Tasks unavailable — ${esc(d.tasks.reason)}</p>`;
+  } else {
+    tasksHtml = Object.entries(d.tasks.byState).map(([state, info]) => {
+      const items = info.items.map((it) => `<li><span class="tag">${it.kind === "pr" ? "PR" : "#"}${it.number}</span> <a href="${esc(it.url)}">${esc(it.title)}</a> <span class="muted">${it.assignees.length ? "@" + it.assignees.map(esc).join(", @") : "unassigned"}</span></li>`).join("");
+      return `<div class="state"><h4>${esc(state)} <span class="muted">${esc(info.label)} · ${info.items.length}</span></h4><ul>${items || '<li class="muted">none</li>'}</ul></div>`;
+    }).join("");
+  }
+  const evalList = d.evals.results || d.evals.scenarios || [];
+  const evalRows = evalList.map((s) => `<tr><td>${s.pass ? "✅" : "❌"}</td><td>${esc((s.type ? s.type + "/" : "") + s.scenario + (s.variant ? " [" + s.variant + "]" : ""))}</td><td>${s.aggregate.toFixed(2)}</td><td class="muted">${esc(s.judge)}</td></tr>`).join("");
+  const limits = Object.entries(d.limits).map(([k, v]) => `<code>${esc(k)}=${esc(v)}</code>`).join(" ");
+  return `<!doctype html><html><head><meta charset="utf-8"><title>AgentRig dashboard</title>
+<style>
+:root{color-scheme:light dark}body{font:14px/1.5 system-ui,sans-serif;margin:2rem auto;max-width:880px;padding:0 1rem}
+h1{font-size:1.3rem}h2{font-size:1rem;border-bottom:1px solid #8884;padding-bottom:.2rem;margin-top:2rem}
+.score{font-size:2rem;font-weight:700}.good{color:#1a7f37}.warn{color:#9a6700}.bad{color:#cf222e}.na{color:#888}
+table{border-collapse:collapse;width:100%}td,th{text-align:left;padding:.25rem .5rem;border-bottom:1px solid #8882}
+.muted{color:#888}.tag{display:inline-block;background:#8882;border-radius:4px;padding:0 .35rem;font-size:.8em}
+.state h4{margin:.6rem 0 .2rem}code{background:#8882;border-radius:4px;padding:0 .3rem}
+</style></head><body>
+<h1>AgentRig — harness dashboard</h1>
+<p class="muted">${esc(d.repo)} · generated ${esc(d.generatedAt)}</p>
+<h2>Harness Score</h2><p class="score ${scoreClass}">${d.harnessScore == null ? "n/a" : d.harnessScore + "%"}</p>
+<h2>Agents (${d.roster.length})</h2><table><tr><th>Role</th><th>Model</th><th>Tier</th></tr>${rosterRows}</table>
+<h2>Tasks</h2>${tasksHtml}
+<h2>Evals</h2>${evalRows ? `<table><tr><th></th><th>Scenario</th><th>Score</th><th>Judge</th></tr>${evalRows}</table><p class="muted">overall ${d.evals.overall.toFixed(2)}</p>` : '<p class="muted">No dynamic eval runs yet.</p>'}
+${limits ? `<h2>Limits</h2><p>${limits}</p>` : ""}
+</body></html>`;
+}

package/knowledge/templates/eval/RUBRIC.md ADDED Viewed

@@ -0,0 +1,94 @@
+# Harness evaluation rubric (principle 6)
+Two layers. **Layer A** is deterministic and model-free; **Layer B** is an independent,
+model-judged behavioral eval. Both write to `.agentrig/eval/results/` via `score.mjs`
+(never hand-edit JSON). The machine-readable rubric registry lives in
+[`axes.json`](./axes.json) — `score.mjs` validates every score against it.
+---
+## Layer A — Static harness audit
+Scored automatically by `checks.json`. Each check maps to a principle and earns **0 / 0.5 / 1.0**.
+The aggregate is the **Harness Score** (0–100%). Run:
+```bash
+node .agentrig/eval/static-audit.mjs           # or: agentrig eval --static
+```
+Treat any principle scoring < 1.0 as a missing/weak artifact to fix.
+---
+## Layer B — Dynamic behavioral eval
+For each scenario, run the task through the harness, then have an **independent judge model**
+(different from the producer) score the result. Scoring is **strict 3-tier: 0 / 0.5 / 1.0**.
+Three rules, enforced by `score.mjs` against `axes.json`:
+1. **Issue code required.** Any axis < 1.0 (and observed) must carry an issue code **from that
+   axis's bounded registry** plus a one-line **evidence** string. Invented codes are rejected.
+2. **Confidence-gated.** An axis you couldn't observe is scored `na` (confidence 0) and excluded
+   from rollups — partial observability never contaminates the total.
+3. **Rollups are recomputed from axes.** Category and aggregate scores come from the axis data, not
+   from anything the judge asserts.
+### Multi-rubric lifecycle
+The eval covers the whole lifecycle, not just the final patch. Three rubric **types**, linked by the
+same `--task` id so you get a spec → run → review view:
+| `--type` | What it scores | Categories |
+|----------|----------------|------------|
+| `spec`   | task/issue spec quality (before work) | spec_quality (clarity, acceptance_criteria, scope_bounded, testability, context) |
+| `run`    | the implementation run | output_quality, agent_behavior, long_term_impact |
+| `review` | the reviewer's own behavior | review_quality (finding_correctness, severity_calibration, false_positive_rate, coverage, actionability, independence, blocking_decision) |
+### `run` axes (the most common)
+- **Output Quality** — `correctness`, `scope`, `tests`, `clarity`
+- **Agent Behavior** — `self_verification`, `gate_compliance`, `tool_discipline`, `escalation`
+- **Long-Term Impact** — `memory`, `regression_risk`, `maintainability`
+See `axes.json` for the full per-axis issue-code registries (e.g. `OQ-SCOPE-CHURN`,
+`AB-VERIFY-REDHANDOFF`, `LT-REGRESS-LIKELY`).
+### Saving and reading scores
+```bash
+# Save one rubric (any axis < 1.0 needs CODE:evidence; use `=na` for unobserved axes)
+node .agentrig/eval/score.mjs save --type run --task add-small-feature \
+  --scenario add-small-feature --judge <model> [--variant v2] [--run RID] \
+  --axis 'correctness=1.0' \
+  --axis 'scope=0.5:OQ-SCOPE-CHURN:left package-lock churn in the diff' \
+  --axis 'tests=na'
+node .agentrig/eval/score.mjs report                     # latest per scenario/variant + per-axis means
+node .agentrig/eval/score.mjs compare --scenario <id>    # A/B variants side by side
+```
+### A/B variant evaluation
+Run the **same scenario** under different harness versions (a prompt/skill/rule change) and save each
+under a `--variant`. `score.mjs compare` puts them side by side. **A change that lowers the score is
+a regression even if it "feels" better.** For deeper diffing, keep each run's `diff.patch` /
+`output` artifacts next to the score (see the `harness-eval` skill).
+### Harness lift — does it actually help? (with vs without)
+Prove the harness earns its keep in *your* repo by comparing a harness-on run to a harness-off
+baseline:
+```bash
+agentrig eval --dynamic --scenario <id> --variant harness    # harness ON
+agentrig eval --dynamic --scenario <id> --variant baseline   # bare agent, no AGENTS.md/rules/skills
+node .agentrig/eval/score.mjs compare --scenario <id> --baseline baseline
+```
+`compare --baseline` prints the per-axis and aggregate **delta** and a `HELPS`/`HURTS` verdict. A
+positive aggregate delta means installing AgentRig improved agent behavior here.
+### Threshold
+A scenario passes if its aggregate ≥ **0.8** (`passThreshold` in `axes.json`) with no observed axis
+at 0.
+---
+## Sandboxing
+Run dynamic evals under the guardrails in [`sandbox/eval-rules.md`](./sandbox/eval-rules.md): the
+agent works in a throwaway worktree and must **not push, open PRs, or merge** — the eval measures
+behavior, it must not mutate real branches.

package/knowledge/templates/eval/axes.json ADDED Viewed

@@ -0,0 +1,56 @@
+{
+  "$schema": "agentrig-eval-axes/1",
+  "description": "Rubric registry for the dynamic harness eval. Defines, per rubric TYPE, the categories, their axes, and a BOUNDED issue-code list per axis. score.mjs validates judge output against this: scores must be 0/0.5/1.0, and any axis < 1.0 (with confidence > 0) must carry an evidence string and an issue code drawn from that axis's list. Inspired by epichan's agent_scoring/issue_spec/review_scoring registries.",
+  "tiers": [0, 0.5, 1.0],
+  "passThreshold": 0.8,
+  "types": {
+    "run": {
+      "label": "Implementation run (the harness doing a task)",
+      "categories": {
+        "output_quality": {
+          "correctness": ["OQ-CORRECT-WRONG", "OQ-CORRECT-PARTIAL", "OQ-CORRECT-EDGE"],
+          "scope": ["OQ-SCOPE-CHURN", "OQ-SCOPE-UNRELATED", "OQ-SCOPE-INCOMPLETE"],
+          "tests": ["OQ-TESTS-MISSING", "OQ-TESTS-WEAK", "OQ-TESTS-BROKEN"],
+          "clarity": ["OQ-CLARITY-NAMING", "OQ-CLARITY-COMPLEXITY", "OQ-CLARITY-COMMENTS"]
+        },
+        "agent_behavior": {
+          "self_verification": ["AB-VERIFY-SKIPPED", "AB-VERIFY-REDHANDOFF", "AB-VERIFY-PARTIAL"],
+          "gate_compliance": ["AB-GATE-SKIPPED", "AB-GATE-HUMANLABEL", "AB-GATE-ORDER"],
+          "tool_discipline": ["AB-TOOLS-OVERLIMIT", "AB-TOOLS-UNSCOPED", "AB-TOOLS-NOISE"],
+          "escalation": ["AB-ESCALATE-LATE", "AB-ESCALATE-THRASH", "AB-ESCALATE-NONE"]
+        },
+        "long_term_impact": {
+          "memory": ["LT-MEMORY-NOLOG", "LT-MEMORY-REPEAT", "LT-MEMORY-DUP"],
+          "regression_risk": ["LT-REGRESS-LIKELY", "LT-REGRESS-UNTESTED"],
+          "maintainability": ["LT-MAINTAIN-DEBT", "LT-MAINTAIN-COUPLING"]
+        }
+      }
+    },
+    "spec": {
+      "label": "Task/issue spec quality (before implementation)",
+      "categories": {
+        "spec_quality": {
+          "clarity": ["SP-CLARITY-VAGUE", "SP-CLARITY-AMBIGUOUS"],
+          "acceptance_criteria": ["SP-AC-MISSING", "SP-AC-UNTESTABLE"],
+          "scope_bounded": ["SP-SCOPE-TOOBIG", "SP-SCOPE-UNBOUNDED"],
+          "testability": ["SP-TEST-NOORACLE", "SP-TEST-NOREPRO"],
+          "context": ["SP-CONTEXT-MISSING", "SP-CONTEXT-STALE"]
+        }
+      }
+    },
+    "review": {
+      "label": "Review process quality (the reviewer's behavior)",
+      "categories": {
+        "review_quality": {
+          "finding_correctness": ["RV-FIND-WRONG", "RV-FIND-UNSUPPORTED"],
+          "severity_calibration": ["RV-SEV-OVER", "RV-SEV-UNDER"],
+          "false_positive_rate": ["RV-FP-NOISE", "RV-FP-STYLE"],
+          "coverage": ["RV-COV-MISSEDBUG", "RV-COV-SHALLOW"],
+          "actionability": ["RV-ACT-VAGUE", "RV-ACT-NOREPRO"],
+          "independence": ["RV-IND-SAMEMODEL", "RV-IND-RUBBERSTAMP"],
+          "blocking_decision": ["RV-BLOCK-WRONGPASS", "RV-BLOCK-WRONGFAIL"]
+        }
+      }
+    }
+  }
+}