npm - kanban-system - Versions diffs - 1.0.0 - Mend

kanban-system 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/.env.example +76 -0
package/CLAUDE.md +108 -0
package/README.md +272 -0
package/agents/_TEMPLATE.md +42 -0
package/agents/backend-agent.md +81 -0
package/agents/deploy-gate-agent.md +73 -0
package/agents/frontend-agent.md +73 -0
package/agents/monitor-agent.md +65 -0
package/agents/orchestrator.md +91 -0
package/agents/reviewer-codex.md +51 -0
package/bin/cli.js +171 -0
package/config.example.js +99 -0
package/docs/adapting-to-your-project.md +155 -0
package/docs/example-apex.md +86 -0
package/docs/the-pattern.md +92 -0
package/hooks/launchd.plist.template +66 -0
package/hooks/pre-push.sample +61 -0
package/lib/config.cjs +138 -0
package/lib/detect/_template.cjs +63 -0
package/lib/detect/rules.json +28 -0
package/lib/detect/sentry.cjs +86 -0
package/lib/detect/vercel.cjs +62 -0
package/lib/gate/index.cjs +182 -0
package/lib/runner/adapters/both.cjs +33 -0
package/lib/runner/adapters/claude.cjs +119 -0
package/lib/runner/adapters/codex.cjs +43 -0
package/lib/runner/adapters/reviewer.cjs +91 -0
package/lib/runner/budget.cjs +75 -0
package/lib/runner/index.cjs +93 -0
package/lib/runner/result-merger.cjs +58 -0
package/lib/runner/worktree-manager.cjs +64 -0
package/lib/watch/scheduler.cjs +164 -0
package/package.json +59 -0
package/playbooks/_TEMPLATE.html +54 -0
package/playbooks/build-fail.html +57 -0
package/playbooks/deploy-rollback.html +53 -0
package/playbooks/e2e-regression.html +58 -0
package/playbooks/playbook.css +26 -0
package/playbooks/sentry-spike.html +53 -0
package/server/kanban.cjs +1152 -0
package/skills/archive.md +18 -0
package/skills/gate.md +22 -0
package/skills/standup.md +24 -0
package/skills/triage.md +24 -0
package/ui/kanban.html +628 -0
package/ui/styles/kanban.css +436 -0
package/ui/styles/progress.css +315 -0
package/ui/styles/tokens.css +291 -0

package/lib/detect/rules.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "_meta": {
+    "version": "1.0",
+    "doc": "Declarative anomaly rules. Each block maps a detector's signals to a severity + a routing target. The scheduler re-reads this on every sweep, so you can tune thresholds without a restart. Whether a detector actually runs is decided by config.js -> detectors (or the WATCH_ENABLED env var); the `enabled` flag here is only the fallback when neither is set.",
+    "_envHints": {
+      "sentry": ["SENTRY_AUTH_TOKEN", "SENTRY_ORG_SLUG", "SENTRY_PROJECT_SLUG"],
+      "vercel": ["VERCEL_TOKEN", "VERCEL_PROJECT_ID", "VERCEL_TEAM_ID"]
+    }
+  },
+  "rules": [
+    {
+      "detector": "sentry",
+      "enabled": false,
+      "signals": [
+        { "id": "error-rate-spike", "window": "1h", "metric": "error_rate", "threshold": "> 3x rolling baseline", "severity": "high", "routesTo": "frontend-agent", "note": "Error rate well above the rolling baseline — usually a regression from the last deploy." },
+        { "id": "new-issue-spike", "window": "1h", "metric": "new_issues", "threshold": "> 5 in window", "severity": "medium", "routesTo": "frontend-agent" },
+        { "id": "heartbeat", "window": "12h", "metric": "—", "threshold": "alive", "severity": "low", "routesTo": "orchestrator" }
+      ]
+    },
+    {
+      "detector": "vercel",
+      "enabled": false,
+      "signals": [
+        { "id": "deploy-failure", "window": "any", "metric": "build_state", "threshold": "= ERROR", "severity": "high", "routesTo": "deploy-gate-agent" }
+      ]
+    }
+  ]
+}

package/lib/detect/sentry.cjs ADDED Viewed

@@ -0,0 +1,86 @@
+/**
+ * Sentry detector — polls the Sentry API for error-rate spikes and new issues.
+ *
+ * Env: SENTRY_AUTH_TOKEN, SENTRY_ORG_SLUG, SENTRY_PROJECT_SLUG
+ *
+ * Degrades gracefully when env is missing — emits a single low-severity
+ * "config-missing" alert (deduped 24h) so the operator sees the gap on the board,
+ * instead of crashing the sweep.
+ */
+const TOKEN = process.env.SENTRY_AUTH_TOKEN || "";
+const ORG = process.env.SENTRY_ORG_SLUG || "";
+const PROJECT = process.env.SENTRY_PROJECT_SLUG || "";
+const BASE = "https://sentry.io/api/0";
+async function sentryFetch(pathname) {
+  const r = await fetch(`${BASE}${pathname}`, { headers: { Authorization: `Bearer ${TOKEN}` } });
+  if (!r.ok) throw new Error(`sentry ${pathname}: ${r.status}`);
+  return await r.json();
+}
+async function run(ruleSet, state) {
+  if (!TOKEN || !ORG || !PROJECT) {
+    const k = "sentry:config-missing";
+    if (state.alerts[k] && Date.now() - state.alerts[k] < 24 * 3600 * 1000) return [];
+    return [{
+      source: "sentry", signal: "config-missing", severity: "low",
+      message: "SENTRY_AUTH_TOKEN / SENTRY_ORG_SLUG / SENTRY_PROJECT_SLUG not set — Sentry polling disabled. Add them to .env.",
+      threshold: "env present", value: "missing", routesTo: "orchestrator",
+      evidence: { needs: ["SENTRY_AUTH_TOKEN", "SENTRY_ORG_SLUG", "SENTRY_PROJECT_SLUG"] },
+    }];
+  }
+  const alerts = [];
+  const baselineKey = "sentry:baseline:error_rate";
+  const baseline = state[baselineKey] || null;
+  try {
+    // Hourly event stats — legacy endpoint that accepts a project slug.
+    // Returns [[unix_ts, count], ...]; the last bucket is the in-progress hour.
+    const stats = await sentryFetch(`/projects/${ORG}/${PROJECT}/stats/?stat=received&since=${Math.floor(Date.now() / 1000) - 6 * 3600}`);
+    const counts = (Array.isArray(stats) ? stats : []).map((b) => b[1] || 0);
+    const recent = counts.slice(-1)[0] || 0;
+    const prior = counts.slice(0, -1);
+    const avgPriorHours = prior.length ? prior.reduce((a, b) => a + b, 0) / prior.length : 0;
+    if (baseline && recent > baseline * 3 && recent > 5) {
+      alerts.push({
+        source: "sentry", signal: "error-rate-spike", severity: "high",
+        message: `Sentry error rate ${recent}/h — ${(recent / Math.max(1, baseline)).toFixed(1)}× baseline (${baseline.toFixed(1)}/h).`,
+        threshold: `${(baseline * 3).toFixed(1)}/h`, value: `${recent}/h`, routesTo: "frontend-agent",
+        evidence: { window: "1h current bucket", baseline, recent, hourlyAvg: avgPriorHours },
+      });
+    }
+    // Rolling EMA baseline (alpha 0.2 for hourly samples)
+    state[baselineKey] = baseline ? baseline * 0.8 + recent * 0.2 : Math.max(recent, avgPriorHours);
+    // New issues — Sentry rejects statsPeriod=1h, so fetch 24h and filter client-side.
+    const qs = new URLSearchParams({ statsPeriod: "24h", query: "is:unresolved", limit: "100" }).toString();
+    const issues24h = await sentryFetch(`/projects/${ORG}/${PROJECT}/issues/?${qs}`);
+    const oneHourAgo = Date.now() - 60 * 60 * 1000;
+    const newIssues1h = (Array.isArray(issues24h) ? issues24h : []).filter((i) => i.firstSeen && new Date(i.firstSeen).getTime() >= oneHourAgo);
+    if (newIssues1h.length > 5) {
+      alerts.push({
+        source: "sentry", signal: "new-issue-spike", severity: "medium",
+        message: `${newIssues1h.length} new unresolved issues in the last hour (24h total: ${issues24h.length}).`,
+        threshold: "> 5", value: `${newIssues1h.length}`, routesTo: "frontend-agent",
+        evidence: { topIssues: newIssues1h.slice(0, 3).map((i) => ({ title: i.title, count: i.count, firstSeen: i.firstSeen, link: i.permalink })) },
+      });
+    }
+    // Heartbeat — confirm the path is alive (deduped 12h)
+    const hbKey = "sentry:heartbeat";
+    if (!state.alerts[hbKey] || Date.now() - state.alerts[hbKey] > 12 * 3600 * 1000) {
+      alerts.push({
+        source: "sentry", signal: "heartbeat", severity: "low",
+        message: `Sentry polling OK. Last 1h: ${recent} events; 6h avg ${avgPriorHours.toFixed(1)}/h; baseline ${(state[baselineKey] || 0).toFixed(1)}.`,
+        threshold: "alive", value: "ok", routesTo: "orchestrator",
+        evidence: { recent, avgPriorHours, baseline: state[baselineKey], org: ORG, project: PROJECT },
+      });
+    }
+  } catch (e) {
+    return [{ source: "sentry", signal: "api-error", severity: "low", message: `Sentry API error: ${e.message}`, routesTo: "orchestrator", evidence: { error: e.message } }];
+  }
+  return alerts;
+}
+module.exports = { run };

package/lib/detect/vercel.cjs ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Vercel detector — polls the Vercel API for deploy state (and is a stub for 5xx
+ * rate / bundle delta, which need a log-drain integration to do properly).
+ *
+ * Env: VERCEL_TOKEN, VERCEL_PROJECT_ID, VERCEL_TEAM_ID (only for team accounts)
+ *
+ * Degrades gracefully when env is missing.
+ */
+const TOKEN = process.env.VERCEL_TOKEN || "";
+const PROJECT = process.env.VERCEL_PROJECT_ID || "";
+const TEAM = process.env.VERCEL_TEAM_ID || "";
+const BASE = "https://api.vercel.com";
+async function vercelFetch(pathname) {
+  const sep = pathname.includes("?") ? "&" : "?";
+  const url = `${BASE}${pathname}${TEAM ? `${sep}teamId=${TEAM}` : ""}`;
+  const r = await fetch(url, { headers: { Authorization: `Bearer ${TOKEN}` } });
+  if (!r.ok) throw new Error(`vercel ${pathname}: ${r.status}`);
+  return await r.json();
+}
+async function run(ruleSet, state) {
+  if (!TOKEN || !PROJECT) {
+    const k = "vercel:config-missing";
+    if (state.alerts[k] && Date.now() - state.alerts[k] < 24 * 3600 * 1000) return [];
+    return [{
+      source: "vercel", signal: "config-missing", severity: "low",
+      message: "VERCEL_TOKEN / VERCEL_PROJECT_ID not set — Vercel polling disabled. Add them to .env.",
+      threshold: "env present", value: "missing", routesTo: "orchestrator",
+    }];
+  }
+  const alerts = [];
+  try {
+    const deploys = await vercelFetch(`/v6/deployments?projectId=${PROJECT}&limit=5`);
+    const list = deploys.deployments || [];
+    const failed = list.find((d) => d.state === "ERROR" || d.state === "CANCELED");
+    if (failed) {
+      const key = `vercel:deploy-fail:${failed.uid}`;
+      if (!state.alerts[key]) {
+        alerts.push({
+          source: "vercel", signal: "deploy-failure", severity: "high",
+          message: `Vercel deploy ${failed.uid} state=${failed.state} (${(failed.meta && failed.meta.githubCommitMessage) || failed.name}).`,
+          threshold: "state != ERROR/CANCELED", value: failed.state, routesTo: "deploy-gate-agent",
+          evidence: { uid: failed.uid, branch: failed.meta && failed.meta.githubCommitRef, commit: failed.meta && (failed.meta.githubCommitSha || "").slice(0, 7), url: failed.url },
+        });
+      }
+    }
+    // Bundle delta / 5xx burst would need a log-drain integration; out of scope here.
+    // Note the most recent READY deploy so a future bundle-inspect step has a marker.
+    const recent = list.find((d) => d.state === "READY");
+    if (recent) {
+      const seenKey = `vercel:last-deploy:${recent.uid}`;
+      if (!state[seenKey]) state[seenKey] = recent.created;
+    }
+  } catch (e) {
+    return [{ source: "vercel", signal: "api-error", severity: "low", message: `Vercel API error: ${e.message}`, routesTo: "orchestrator" }];
+  }
+  return alerts;
+}
+module.exports = { run };

package/lib/gate/index.cjs ADDED Viewed

@@ -0,0 +1,182 @@
+#!/usr/bin/env node
+/**
+ * Pre-deploy gate.
+ *
+ * Runs config.js → deployCommands serially (fail-fast) from config.js → repoPath,
+ * then an optional bundle-inspection stage (if config.js → buildOutputDir is set).
+ * Each stage writes its own log under data/runs/gate-<ts>/, plus a summary report.md.
+ * On failure, auto-creates a kanban task in the "needs human" column (disable with
+ * GATE_NO_KANBAN=1).
+ *
+ * Exit codes: 0 = pass; N = the (1-based) index of the failed deployCommands stage;
+ * if bundle inspection warns and STRICT_BUNDLE=1, exit = deployCommands.length + 1.
+ *
+ * Env: GATE_TIMEOUT_MS (per stage, default 600000), STRICT_BUNDLE, GATE_NO_KANBAN.
+ *
+ * Usage:  npm run gate   (or  node lib/gate/index.cjs)
+ */
+const fs = require("fs");
+const http = require("http");
+const path = require("path");
+const { spawnSync } = require("child_process");
+const config = require("../config.cjs");
+const REPO = config.repoPath;
+const HARNESS_ROOT = config.repoRoot;
+const RUNS_DIR = path.join(HARNESS_ROOT, "data", "runs");
+const GATE_TIMEOUT = config.gateTimeoutMs;
+const KANBAN_PORT = config.port;
+function ts() { return new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19); }
+function ensureDir(p) { if (!fs.existsSync(p)) fs.mkdirSync(p, { recursive: true }); }
+function currentBranch() {
+  try { return (spawnSync("git", ["rev-parse", "--abbrev-ref", "HEAD"], { cwd: REPO, encoding: "utf-8" }).stdout || "").trim() || "unknown"; }
+  catch { return "unknown"; }
+}
+function runStage(name, cmd, args, dir, env = {}) {
+  const start = Date.now();
+  const logFile = path.join(dir, `${name}.log`);
+  process.stdout.write(`[gate] ${name} → ${cmd} ${args.join(" ")} (cwd=${REPO})\n`);
+  const result = spawnSync(cmd, args, { cwd: REPO, timeout: GATE_TIMEOUT, encoding: "utf-8", env: { ...process.env, FORCE_COLOR: "0", ...env } });
+  const stdout = result.stdout || "", stderr = result.stderr || "";
+  const status = result.status === null ? -1 : result.status;
+  fs.writeFileSync(logFile, `# ${name}\nexit_code: ${status}\nduration_ms: ${Date.now() - start}\ncwd: ${REPO}\ncmd: ${cmd} ${args.join(" ")}\n\n## stdout\n${stdout}\n\n## stderr\n${stderr}\n`);
+  return { name, passed: status === 0, status, duration: Date.now() - start, logFile, stdout, stderr };
+}
+function walkSize(root) {
+  let total = 0, count = 0; const all = [];
+  (function walk(p) {
+    for (const e of fs.readdirSync(p, { withFileTypes: true })) {
+      const full = path.join(p, e.name);
+      if (e.isDirectory()) walk(full);
+      else if (e.isFile()) { const s = fs.statSync(full).size; total += s; count++; all.push({ path: path.relative(root, full), size: s }); }
+    }
+  })(root);
+  all.sort((a, b) => b.size - a.size);
+  return { total, count, largest: all.slice(0, 5) };
+}
+function inspectBundle(dir) {
+  if (!config.buildOutputDir) return { name: `${String(config.deployCommands.length + 1).padStart(2, "0")}-inspect`, passed: true, status: 0, duration: 0, skipped: true, note: "buildOutputDir not configured" };
+  const outDir = path.join(REPO, config.buildOutputDir);
+  const stageName = `${String(config.deployCommands.length + 1).padStart(2, "0")}-inspect`;
+  if (!fs.existsSync(outDir)) return { name: stageName, passed: true, status: 0, duration: 0, skipped: true, note: `${config.buildOutputDir}/ not found` };
+  const sizes = walkSize(outDir);
+  const totalKB = Math.round(sizes.total / 1024);
+  fs.writeFileSync(path.join(dir, `${stageName}.log`),
+    `total: ${totalKB} KB\nfile_count: ${sizes.count}\nlargest:\n` + sizes.largest.map((f) => `  ${(f.size / 1024).toFixed(1)} KB  ${f.path}`).join("\n"));
+  let baseline = null;
+  const lastGateFile = path.join(RUNS_DIR, "last-gate.json");
+  if (fs.existsSync(lastGateFile)) { try { baseline = JSON.parse(fs.readFileSync(lastGateFile, "utf-8")); } catch {} }
+  let warning = null;
+  if (baseline && baseline.totalKB) {
+    const delta = ((totalKB - baseline.totalKB) / baseline.totalKB) * 100;
+    if (delta > 10) warning = `bundle +${delta.toFixed(1)}% (was ${baseline.totalKB} KB, now ${totalKB} KB)`;
+  }
+  return { name: stageName, passed: !warning, status: warning ? 1 : 0, duration: 0, totalKB, fileCount: sizes.count, warning };
+}
+function finalize(dir, tag, stages, exitCode) {
+  const reportPath = path.join(dir, "report.md");
+  const totalDuration = stages.reduce((n, s) => n + (s.duration || 0), 0);
+  const passed = exitCode === 0;
+  const branch = currentBranch();
+  let md = `# Gate Run · ${tag}\n\n**Branch**: ${branch}\n**Verdict**: ${passed ? "✓ PASS" : "✗ FAIL"} (exit ${exitCode})\n**Total duration**: ${(totalDuration / 1000).toFixed(1)}s\n\n## Stages\n\n| # | stage | status | duration | note |\n|---|---|---|---|---|\n`;
+  stages.forEach((s, i) => {
+    const status = s.skipped ? "⊘ skipped" : s.passed ? "✓ pass" : "✗ fail";
+    md += `| ${i + 1} | ${s.name} | ${status} | ${s.duration ? (s.duration / 1000).toFixed(1) + "s" : "—"} | ${s.warning || s.note || ""} |\n`;
+  });
+  md += `\n## Logs\n\n`;
+  stages.forEach((s) => { if (s.logFile) md += `- ${path.relative(HARNESS_ROOT, s.logFile)}\n`; });
+  fs.writeFileSync(reportPath, md);
+  if (passed) {
+    const insp = stages.find((s) => s.name.endsWith("-inspect"));
+    if (insp && insp.totalKB) { ensureDir(RUNS_DIR); fs.writeFileSync(path.join(RUNS_DIR, "last-gate.json"), JSON.stringify({ tag, totalKB: insp.totalKB, fileCount: insp.fileCount, completedAt: new Date().toISOString() }, null, 2)); }
+  }
+  let pending = null;
+  if (!passed && process.env.GATE_NO_KANBAN !== "1") {
+    pending = notifyFailure({ tag, dir, branch, stages, exitCode, reportPath }).catch((err) => process.stderr.write(`[gate] kanban notify failed: ${err.message}\n`));
+  }
+  process.stdout.write(`\n[gate] ${passed ? "✓ PASS" : "✗ FAIL"} (exit ${exitCode}) — report: ${reportPath}\n`);
+  return { passed, exitCode, stages, reportPath, dir, pending };
+}
+function runGate(opts = {}) {
+  const tag = opts.tag || ts();
+  const dir = path.join(RUNS_DIR, `gate-${tag}`);
+  ensureDir(dir);
+  const stages = [];
+  if (!config.deployCommands.length) {
+    process.stdout.write("[gate] config.js → deployCommands is empty. Set your build/test commands. Treating as pass.\n");
+    return finalize(dir, tag, [{ name: "00-noop", passed: true, status: 0, duration: 0, skipped: true, note: "no deployCommands configured" }], 0);
+  }
+  let i = 0;
+  for (const dc of config.deployCommands) {
+    i++;
+    const name = dc.name || `${String(i).padStart(2, "0")}-${(dc.cmd || "stage").replace(/[^a-z0-9]+/gi, "")}`;
+    const s = runStage(name, dc.cmd, dc.args || [], dir, dc.env || {});
+    stages.push(s);
+    if (!s.passed) return finalize(dir, tag, stages, i);
+  }
+  const insp = inspectBundle(dir);
+  stages.push(insp);
+  let exitCode = 0;
+  if (!insp.passed && process.env.STRICT_BUNDLE === "1") exitCode = config.deployCommands.length + 1;
+  return finalize(dir, tag, stages, exitCode);
+}
+function notifyFailure({ tag, dir, branch, stages, exitCode, reportPath }) {
+  const failed = stages.find((s) => !s.passed && !s.skipped);
+  const stageName = failed ? failed.name : `exit-${exitCode}`;
+  const reportRel = path.relative(HARNESS_ROOT, reportPath);
+  const stageRows = stages.map((s, i) => `${i + 1}. ${s.name} — ${s.skipped ? "skipped" : s.passed ? "pass" : "FAIL"}${s.warning ? ` (${s.warning})` : ""}${s.note ? ` (${s.note})` : ""}`).join("\n");
+  const description = [
+    `Gate run **${tag}** blocked the push on branch \`${branch}\`.`,
+    "",
+    `**Failed stage**: ${stageName} (exit ${exitCode})`,
+    `**Report**: ${reportRel}`,
+    `**Run dir**: ${path.relative(HARNESS_ROOT, dir)}`,
+    "", "## Stages", stageRows,
+    "", "Resolve the failed stage, then re-push. To bypass (audited): `KANBAN_GATE_BYPASS=1 git push`.",
+  ].join("\n");
+  const payload = {
+    subject: `[BUILD-FAIL] ${branch}: ${stageName} (gate ${tag})`,
+    description, status: "in_review", priority: "high", agent: "deploy-gate-agent",
+    reportPath: reportRel, reportSummary: `${stageName} failed (exit ${exitCode})`,
+  };
+  return postKanban("/api/tasks", payload).then((task) => {
+    if (task && task.id) {
+      postKanban(`/api/tasks/${task.id}/slack`, { text: `[BLOCKED] deploy-gate-agent: ${payload.subject}. Stage ${stageName} failed. Report: ${reportRel}` }).catch(() => {});
+      process.stdout.write(`[gate] kanban task #${task.id} created (status=in_review, priority=high)\n`);
+    }
+    return task;
+  });
+}
+function postKanban(pathname, body) {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const req = http.request({ host: "127.0.0.1", port: KANBAN_PORT, path: pathname, method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(data) }, timeout: 4000 }, (res) => {
+      let chunks = "";
+      res.on("data", (c) => (chunks += c));
+      res.on("end", () => { if (res.statusCode >= 200 && res.statusCode < 300) { try { resolve(JSON.parse(chunks)); } catch { resolve(null); } } else reject(new Error(`HTTP ${res.statusCode}: ${chunks.slice(0, 200)}`)); });
+    });
+    req.on("error", reject);
+    req.on("timeout", () => req.destroy(new Error("timeout")));
+    req.write(data); req.end();
+  });
+}
+module.exports = { runGate, inspectBundle };
+if (require.main === module) {
+  const result = runGate();
+  Promise.resolve(result.pending).finally(() => process.exit(result.exitCode));
+}

package/lib/runner/adapters/both.cjs ADDED Viewed

@@ -0,0 +1,33 @@
+/**
+ * "both" adapter — runs Claude and Codex in parallel on independent worktrees,
+ * compares their verdicts, returns a combined result. Disagreement → needs_human.
+ */
+const claude = require("./claude.cjs");
+const codex = require("./codex.cjs");
+const wtm = require("../worktree-manager.cjs");
+const merger = require("../result-merger.cjs");
+async function run(task, opts = {}) {
+  const start = Date.now();
+  const wt1 = wtm.createWorktree(task.id, "claude");
+  const wt2 = wtm.createWorktree(task.id, "codex");
+  let r1, r2;
+  try {
+    [r1, r2] = await Promise.all([
+      claude.run(task, { ...opts, worktree: wt1 }),
+      codex.run(task, { ...opts, worktree: wt2 }),
+    ]);
+  } finally {
+    wtm.removeWorktree(wt1);
+    wtm.removeWorktree(wt2);
+  }
+  const merged = merger.compare(r1, r2);
+  return {
+    runner: "both", duration_ms: Date.now() - start,
+    claude: r1, codex: r2,
+    agreement: merged.agreement, verdict: merged.verdict, confidence: merged.confidence,
+    diffPath: merged.diffPath, needsHuman: merged.agreement === "disagreed",
+  };
+}
+module.exports = { run };

package/lib/runner/adapters/claude.cjs ADDED Viewed

@@ -0,0 +1,119 @@
+/**
+ * Claude CLI adapter — spawns `claude` to execute a task inside the isolated git
+ * worktree it's handed. Captures output under data/runs/task-<id>/claude/.
+ *
+ * Falls back to a deterministic stub verdict if the `claude` CLI isn't on PATH
+ * (useful in CI / before the CLI is installed).
+ *
+ * The agent definition for the task is pulled from <repo-root>/agents/<task.agent>.md.
+ */
+const fs = require("fs");
+const path = require("path");
+const { spawnSync, execSync } = require("child_process");
+const config = require("../../config.cjs");
+const RUNS_DIR = path.join(config.repoRoot, "data", "runs");
+const AGENTS_DIR = path.join(config.repoRoot, "agents");
+function isClaudeAvailable() { try { execSync("which claude", { stdio: "ignore" }); return true; } catch { return false; } }
+function ensureDir(p) { if (!fs.existsSync(p)) fs.mkdirSync(p, { recursive: true }); }
+function loadAgentDef(name) {
+  if (!name) return null;
+  const fp = path.join(AGENTS_DIR, name + ".md");
+  return fs.existsSync(fp) ? fs.readFileSync(fp, "utf-8") : null;
+}
+function buildPrompt(task, agentDef) {
+  return [
+    "# Task Execution",
+    "",
+    `**Task ID**: ${task.id}`,
+    `**Subject**: ${task.subject}`,
+    `**Agent**: ${task.agent || "unassigned"}`,
+    `**Status**: ${task.status}`,
+    `**Application repo**: ${config.repoPath}`,
+    "",
+    "## Agent Definition",
+    "",
+    agentDef || "(no agent definition found for this agent)",
+    "",
+    "## Task Description",
+    "",
+    task.description || "(no description)",
+    "",
+    "## Required Output",
+    "",
+    "Produce a single markdown report with this structure:",
+    "",
+    "```markdown",
+    "---",
+    "verdict: pass | fail | flag | needs_human",
+    "confidence: 0.0-1.0",
+    "---",
+    "",
+    "## Summary",
+    "<one paragraph>",
+    "",
+    "## Findings",
+    "- <bullet, with file:line where applicable>",
+    "",
+    "## Recommended action",
+    "<one of: merge | regenerate | escalate | hold>",
+    "```",
+    "",
+    "Execute the task per your agent definition. Do not ask clarifying questions — make best-effort decisions and document them.",
+  ].join("\n");
+}
+function parseVerdict(md) {
+  const fm = md.match(/^---\n([\s\S]*?)\n---/);
+  let verdict = "needs_human", confidence = 0;
+  if (fm) {
+    const v = fm[1].match(/^verdict:\s*(\w+)/m);
+    const c = fm[1].match(/^confidence:\s*([\d.]+)/m);
+    if (v) verdict = v[1];
+    if (c) confidence = parseFloat(c[1]);
+  }
+  const sm = md.match(/## Summary\s*\n([\s\S]*?)(?=\n##|$)/);
+  return { verdict, confidence, summary: sm ? sm[1].trim() : "" };
+}
+function stubVerdict(task, runner) {
+  return [
+    "---", "verdict: needs_human", "confidence: 0.5", "---", "",
+    "## Summary",
+    `[stub:${runner}] CLI not on PATH. Task #${task.id} needs manual execution (or install the ${runner} CLI).`,
+    "", "## Findings",
+    `- ${runner} CLI not found — install it or add it to PATH for the runner`,
+    "", "## Recommended action", "escalate",
+  ].join("\n");
+}
+async function run(task, opts = {}) {
+  const wt = opts.worktree;
+  const runDir = path.join(RUNS_DIR, `task-${task.id}`, "claude");
+  ensureDir(runDir);
+  const start = Date.now();
+  const prompt = buildPrompt(task, loadAgentDef(task.agent));
+  fs.writeFileSync(path.join(runDir, "prompt.md"), prompt);
+  let stdout = "", stderr = "", status = -1, mode = "live";
+  if (!isClaudeAvailable()) {
+    mode = "stub"; stdout = stubVerdict(task, "claude");
+  } else {
+    const r = spawnSync("claude", ["--print", "--model", "opus", prompt], {
+      cwd: wt ? wt.path : config.repoPath, encoding: "utf-8", timeout: opts.timeout || 600000,
+      env: { ...process.env, FORCE_COLOR: "0" },
+    });
+    stdout = r.stdout || ""; stderr = r.stderr || ""; status = r.status === null ? -1 : r.status;
+  }
+  fs.writeFileSync(path.join(runDir, "stdout.md"), stdout);
+  if (stderr) fs.writeFileSync(path.join(runDir, "stderr.log"), stderr);
+  const parsed = parseVerdict(stdout);
+  const reportPath = path.join(runDir, "report.md");
+  fs.writeFileSync(reportPath, stdout);
+  return { runner: "claude", mode, duration_ms: Date.now() - start, status, verdict: parsed.verdict, confidence: parsed.confidence, reportPath, summary: parsed.summary };
+}
+module.exports = { run, isClaudeAvailable, parseVerdict, stubVerdict, buildPrompt, loadAgentDef };

package/lib/runner/adapters/codex.cjs ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * Codex CLI adapter — same shape as claude.cjs, but spawns the `codex` CLI.
+ * Falls back to a stub verdict if `codex` isn't on PATH.
+ */
+const fs = require("fs");
+const path = require("path");
+const { spawnSync, execSync } = require("child_process");
+const config = require("../../config.cjs");
+const claude = require("./claude.cjs");
+const RUNS_DIR = path.join(config.repoRoot, "data", "runs");
+function isCodexAvailable() { try { execSync("which codex", { stdio: "ignore" }); return true; } catch { return false; } }
+function ensureDir(p) { if (!fs.existsSync(p)) fs.mkdirSync(p, { recursive: true }); }
+async function run(task, opts = {}) {
+  const wt = opts.worktree;
+  const runDir = path.join(RUNS_DIR, `task-${task.id}`, "codex");
+  ensureDir(runDir);
+  const start = Date.now();
+  // Reuse the same prompt builder + agent loader as the Claude adapter, for parity.
+  const prompt = claude.buildPrompt(task, claude.loadAgentDef(task.agent));
+  fs.writeFileSync(path.join(runDir, "prompt.md"), prompt);
+  let stdout = "", stderr = "", status = -1, mode = "live";
+  if (!isCodexAvailable()) {
+    mode = "stub"; stdout = claude.stubVerdict(task, "codex");
+  } else {
+    const r = spawnSync("codex", ["exec", "--quiet", prompt], {
+      cwd: wt ? wt.path : config.repoPath, encoding: "utf-8", timeout: opts.timeout || 600000,
+      env: { ...process.env, FORCE_COLOR: "0" },
+    });
+    stdout = r.stdout || ""; stderr = r.stderr || ""; status = r.status === null ? -1 : r.status;
+  }
+  fs.writeFileSync(path.join(runDir, "stdout.md"), stdout);
+  if (stderr) fs.writeFileSync(path.join(runDir, "stderr.log"), stderr);
+  const parsed = claude.parseVerdict(stdout);
+  const reportPath = path.join(runDir, "report.md");
+  fs.writeFileSync(reportPath, stdout);
+  return { runner: "codex", mode, duration_ms: Date.now() - start, status, verdict: parsed.verdict, confidence: parsed.confidence, reportPath, summary: parsed.summary };
+}
+module.exports = { run, isCodexAvailable };

package/lib/runner/adapters/reviewer.cjs ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * Reviewer adapter — one model executes, the other reviews the result.
+ *
+ *   reviewer:codex   — Claude does the work, Codex reviews
+ *   reviewer:claude  — Codex does the work, Claude reviews
+ *
+ * The reviewer gets the executor's full report (no worktree, no code change) and is
+ * asked to flag concerns. If it flags a needs_human/fail issue, the final verdict is
+ * downgraded to needs_human and the task moves to the "needs human" column.
+ */
+const fs = require("fs");
+const path = require("path");
+const claude = require("./claude.cjs");
+const codex = require("./codex.cjs");
+const wtm = require("../worktree-manager.cjs");
+const config = require("../../config.cjs");
+const RUNS_DIR = path.join(config.repoRoot, "data", "runs");
+async function run(task, opts = {}) {
+  const start = Date.now();
+  const mode = opts.runner || "reviewer:codex";
+  const reviewerName = mode.split(":")[1];
+  const executorName = reviewerName === "codex" ? "claude" : "codex";
+  // Stage 1 — executor does the work in an isolated worktree.
+  const wt = wtm.createWorktree(task.id, executorName);
+  let exec;
+  try {
+    const adapter = executorName === "claude" ? claude : codex;
+    exec = await adapter.run(task, { ...opts, worktree: wt });
+  } finally {
+    wtm.removeWorktree(wt);
+  }
+  // Stage 2 — reviewer inspects the executor's report (no worktree, no code change).
+  const reviewerAdapter = reviewerName === "claude" ? claude : codex;
+  const reviewTask = {
+    id: `${task.id}-review`,
+    subject: `Review: ${task.subject}`,
+    agent: task.agent,
+    status: "in_review",
+    description: [
+      `## Original task #${task.id}`,
+      task.description || "",
+      "",
+      `## Executor (${executorName}) report`,
+      "",
+      `verdict: ${exec.verdict} (confidence: ${exec.confidence})`,
+      "",
+      exec.summary,
+      "",
+      "## Review request",
+      "Inspect the executor's verdict, summary, and findings. Surface any risk the executor missed.",
+      "Output the same frontmatter format. If you concur, set verdict = same. If you find blocking issues, set verdict = needs_human and explain in Findings.",
+    ].join("\n"),
+  };
+  const review = await reviewerAdapter.run(reviewTask, { ...opts });
+  let finalVerdict = exec.verdict;
+  let agreement = "agreed";
+  if (review.verdict === "needs_human" || review.verdict === "fail") { finalVerdict = "needs_human"; agreement = "disagreed"; }
+  else if (review.verdict !== exec.verdict) agreement = "partial";
+  const dir = path.join(RUNS_DIR, `task-${task.id}`);
+  if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+  fs.writeFileSync(path.join(dir, "reviewer-summary.md"), [
+    `# Reviewer flow — ${mode}`,
+    "",
+    `**Executor**: ${executorName} → verdict=${exec.verdict} (conf ${exec.confidence})`,
+    `**Reviewer**: ${reviewerName} → verdict=${review.verdict} (conf ${review.confidence})`,
+    `**Agreement**: ${agreement}`,
+    `**Final verdict**: ${finalVerdict}`,
+    "",
+    "## Executor summary",
+    exec.summary || "—",
+    "",
+    "## Reviewer summary",
+    review.summary || "—",
+  ].join("\n"));
+  return {
+    runner: mode, duration_ms: Date.now() - start,
+    executor: exec, reviewer: review,
+    agreement, verdict: finalVerdict, confidence: Math.min(exec.confidence || 0, review.confidence || 0),
+    needsHuman: finalVerdict === "needs_human",
+    reportPath: exec.reportPath,
+  };
+}
+module.exports = { run };