npm - role-os - Versions diffs - 2.9.0 → 2.9.1 - Mend

role-os 2.9.0 → 2.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/CHANGELOG.md +37 -0
package/README.es.md +28 -11
package/README.fr.md +25 -8
package/README.hi.md +25 -8
package/README.it.md +28 -11
package/README.ja.md +27 -10
package/README.md +25 -8
package/README.pt-BR.md +25 -8
package/README.zh.md +25 -8
package/bin/roleos.mjs +3 -2
package/package.json +1 -1
package/src/artifacts.mjs +14 -7
package/src/audit-cmd.mjs +23 -23
package/src/brainstorm-roles.mjs +6 -0
package/src/citation-panel.mjs +26 -1
package/src/composite.mjs +4 -0
package/src/entry.mjs +2 -2
package/src/hooks.mjs +107 -27
package/src/knowledge/analyze-artifact-evidence.mjs +19 -9
package/src/knowledge/fallback-policy.mjs +19 -7
package/src/knowledge/resolve-overlay.mjs +21 -8
package/src/knowledge/retrieve-for-dispatch.mjs +9 -4
package/src/mission-run.mjs +11 -2
package/src/packs-cmd.mjs +1 -1
package/src/review.mjs +11 -2
package/src/role-dossiers.json +1 -1
package/src/route.mjs +41 -8
package/src/run-cmd.mjs +0 -1
package/src/run.mjs +67 -15
package/src/session.mjs +3 -1
package/src/specialist/capability-gate.mjs +35 -18
package/src/specialist/dispatch.mjs +8 -3
package/src/specialist/registry.mjs +6 -0
package/src/specialist/shadow.mjs +13 -3
package/src/specialist/state.mjs +94 -26
package/src/state-machine.mjs +2 -2
package/src/status.mjs +4 -2
package/src/swarm/build-gate.mjs +11 -2
package/src/swarm/persist-bridge.mjs +4 -3
package/src/swarm-cmd.mjs +88 -46
package/src/verify-citations-cmd.mjs +17 -1
package/src/verify-citations.mjs +31 -7
package/starter-pack/README.md +22 -14
package/starter-pack/handbook.md +4 -4
package/starter-pack/policy/routing-rules.md +42 -0
package/starter-pack/policy/tool-permissions.md +21 -0
package/starter-pack/workflows/full-treatment.md +27 -16

package/src/specialist/shadow.mjs CHANGED Viewed

@@ -67,6 +67,13 @@ export function recordProbe(eventsPath, probe) {
  * narrow fine-tunes show step changes, so an early halt on a small sample would be a noise
  * trigger, not a real disagreement signal).
  *
+ * Only probes recorded AFTER the role's most recent clear-halt event count. A clear-halt is
+ * an operator decision that the disagreement evidence before it is adjudicated; without this
+ * boundary the stale disagreeing probes keep dominating the window and the role re-halts on
+ * the very next probe — the documented recovery command could never actually recover a role.
+ * The fresh-start window also restarts the ≥N thin-sample guard, so a cleared role gets a
+ * full new sample before it can halt again.
+ *
  * @param {string} eventsPath
  * @param {string} role
  * @param {number} [N]
@@ -74,8 +81,10 @@ export function recordProbe(eventsPath, probe) {
  * @returns {{ probes: number, agreed: number, rate: number, shouldHalt: boolean }}
  */
 export function checkHalt(eventsPath, role, N = SHADOW_DEFAULTS.N, tau = SHADOW_DEFAULTS.TAU) {
-  const events = readEvents(eventsPath, { role, kind: "shadow-probe" });
-  const window = events.slice(-N);
+  const events = readEvents(eventsPath, { role, kind: ["shadow-probe", "clear-halt"] });
+  const lastClear = events.map((e) => e.kind).lastIndexOf("clear-halt");
+  const probesSinceClear = events.slice(lastClear + 1).filter((e) => e.kind === "shadow-probe");
+  const window = probesSinceClear.slice(-N);
   const probes = window.length;
   const agreed = window.filter((e) => e.data && e.data.agreed === true).length;
   const rate = probes === 0 ? 1 : agreed / probes;
@@ -94,7 +103,8 @@ export function contrastiveHaltMessage({ role, probes, rate, tau }) {
   return (
     `specialist for role "${role}" halted: shadow-probe agreement ${pct}% over the last ` +
     `${probes} probes < required ${required}% (τ=${tau}). The specialist's verdicts have ` +
-    `drifted from Claude's on the same inputs. Clear with: roleos specialist clear-halt ${role}`
+    // Role names contain spaces ("Token Budget Analyst") — the copy-pasteable command must quote.
+    `drifted from Claude's on the same inputs. Clear with: roleos specialist clear-halt "${role}"`
   );
 }

package/src/specialist/state.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Specialist runtime state — quota counters + shadow-probe counter + halt state, per role.
+ * Specialist runtime state — quota window + shadow-probe counter + halt state.
  *
  * Hides one secret family (Parnas): the persistence of routing counters. Callers see
  * `get/inc/setHalt/getHalt`; they never touch the on-disk format. The on-disk format is a
@@ -8,20 +8,35 @@
  * State default path: `<repo>/.role-os/specialist-state.json`. Override with
  * `ROLEOS_SPECIALIST_STATE_PATH`.
  *
- * Quota: a sliding-window counter. We store the last `window` dispatch timestamps so the
- * window is a true rolling window, not aligned to wall clock. (A wall-clock window can be
- * timed against the edge, which the workload-quota anti-collapse argument is meant to
- * prevent.)
+ * Quota (v2): a sliding window over the last `windowSize` dispatches of EITHER route. Every
+ * dispatch is recorded as `{ t, route }` with route "specialist" | "claude"; the specialist
+ * share is count(route === "specialist") over the last `windowSize` entries. Recording BOTH
+ * routes is what makes the window actually roll — Claude-routed traffic pushes old specialist
+ * entries out, so the specialist keeps receiving its quota share indefinitely. (v1 recorded
+ * only specialist dispatches and never aged them out, so every role locked out permanently
+ * after `windowSize × quota` dispatches — the opposite of the policy's "sliding" promise.)
+ * The window counts dispatches, not seconds, so it is a true rolling window not aligned to
+ * wall clock (a wall-clock window can be timed against the edge, which the workload-quota
+ * anti-collapse argument is meant to prevent).
  */
 import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
 import { dirname } from "node:path";
-export const STATE_SCHEMA = "roleos-specialist-state/v1";
+export const STATE_SCHEMA = "roleos-specialist-state/v2";
+const STATE_SCHEMA_V1 = "roleos-specialist-state/v1";
+/** Default quota window (dispatches), matching the dispatcher's DEFAULT_WINDOW. */
+export const DEFAULT_WINDOW = 200;
+/**
+ * @typedef {object} DispatchRecord
+ * @property {number} t                        unix-ms timestamp
+ * @property {"specialist"|"claude"} route     where the dispatch was actually served
+ */
 /**
  * @typedef {object} RoleState
- * @property {number[]} dispatch_timestamps   sliding window of dispatch unix-ms
  * @property {number} probe_counter            count of dispatches since the last shadow probe
  * @property {object|null} halt                { reason, since } or null when not halted
  */
@@ -29,23 +44,32 @@ export const STATE_SCHEMA = "roleos-specialist-state/v1";
 /**
  * @typedef {object} StateFile
  * @property {string} schema
+ * @property {DispatchRecord[]} dispatches     sliding window of recent dispatches (both routes)
  * @property {Object<string, RoleState>} roles
  */
 export function emptyState() {
-  return { schema: STATE_SCHEMA, roles: {} };
+  return { schema: STATE_SCHEMA, dispatches: [], roles: {} };
 }
 export function loadState(path) {
   if (!existsSync(path)) return emptyState();
   try {
     const raw = JSON.parse(readFileSync(path, "utf8"));
+    // Tolerant v1 migration: a v1 file loads as v2 (old bare timestamps -> route "specialist")
+    // instead of erroring; the migrated shape is persisted on the next saveState.
+    if (raw && raw.schema === STATE_SCHEMA_V1 && typeof raw.roles === "object") {
+      return migrateV1(raw);
+    }
     if (!raw || raw.schema !== STATE_SCHEMA || typeof raw.roles !== "object") {
       // Refuse to silently accept a mis-shaped state file. Caller decides what to do.
       const err = new Error(`state file schema mismatch: got "${raw && raw.schema}", expected "${STATE_SCHEMA}"`);
       err.code = "STATE_SCHEMA_MISMATCH";
       throw err;
     }
+    raw.dispatches = (Array.isArray(raw.dispatches) ? raw.dispatches : [])
+      .map(normalizeDispatchRecord)
+      .filter(Boolean);
     return raw;
   } catch (err) {
     if (err.code === "STATE_SCHEMA_MISMATCH") throw err;
@@ -55,6 +79,35 @@ export function loadState(path) {
   }
 }
+/**
+ * v1 -> v2 migration. v1 recorded only specialist dispatches, as bare unix-ms numbers in a
+ * per-role `dispatch_timestamps` array. Each becomes `{ t, route: "specialist" }` in the
+ * shared v2 window (sorted oldest-first); role slots keep probe_counter/halt.
+ */
+function migrateV1(raw) {
+  const dispatches = [];
+  const roles = {};
+  for (const [role, slot] of Object.entries(raw.roles || {})) {
+    if (!slot || typeof slot !== "object") continue;
+    for (const t of Array.isArray(slot.dispatch_timestamps) ? slot.dispatch_timestamps : []) {
+      if (typeof t === "number" && Number.isFinite(t)) dispatches.push({ t, route: "specialist" });
+    }
+    roles[role] = {
+      probe_counter: typeof slot.probe_counter === "number" ? slot.probe_counter : 0,
+      halt: slot.halt ? { reason: slot.halt.reason, since: slot.halt.since } : null,
+    };
+  }
+  dispatches.sort((a, b) => a.t - b.t);
+  return { schema: STATE_SCHEMA, dispatches, roles };
+}
+/** Tolerate hand-edited windows: bare numbers read as v1-style specialist timestamps. */
+function normalizeDispatchRecord(r) {
+  if (typeof r === "number" && Number.isFinite(r)) return { t: r, route: "specialist" };
+  if (!r || typeof r !== "object" || typeof r.t !== "number" || !Number.isFinite(r.t)) return null;
+  return { t: r.t, route: r.route === "claude" ? "claude" : "specialist" };
+}
 export function saveState(path, state) {
   mkdirSync(dirname(path), { recursive: true });
   writeFileSync(path, JSON.stringify(state, null, 2) + "\n", "utf8");
@@ -63,39 +116,54 @@ export function saveState(path, state) {
 /** Get or create a role's slot in the state object. Mutates and returns the slot. */
 export function ensureRole(state, role) {
   if (!state.roles[role]) {
-    state.roles[role] = { dispatch_timestamps: [], probe_counter: 0, halt: null };
+    state.roles[role] = { probe_counter: 0, halt: null };
   }
   return state.roles[role];
 }
 /**
- * Record a specialist dispatch in the sliding window. Pure — returns updated state.
- * `windowSize` is in dispatches, not seconds; we keep the last `windowSize` timestamps.
- * `nowMs` must be supplied (no Date.now() inside this function for testability).
+ * Record a dispatch in the sliding window — EVERY dispatch, both routes. Pure — returns
+ * updated state. The window only rolls if Claude-routed traffic is recorded too; that is what
+ * lets the specialist share recover after quota pressure instead of locking out permanently.
+ *
+ * Signature (cross-agent contract): `recordDispatch(state, route)` with route "specialist" |
+ * "claude". Trailing params are optional: `windowSize` trims the stored window (dispatches,
+ * not seconds; default 200) and `nowMs` injects the timestamp (testability — defaults to
+ * Date.now()). Any route value that is not "claude" counts as "specialist" — the conservative
+ * direction (legacy callers only ever recorded specialist dispatches, and over-counting
+ * tightens the quota, which fails open to Claude).
  */
-export function recordDispatch(state, role, windowSize, nowMs) {
-  const slot = ensureRole(state, role);
-  slot.dispatch_timestamps.push(nowMs);
-  if (slot.dispatch_timestamps.length > windowSize) {
-    slot.dispatch_timestamps.splice(0, slot.dispatch_timestamps.length - windowSize);
+export function recordDispatch(state, route, windowSize = DEFAULT_WINDOW, nowMs = Date.now()) {
+  if (!Array.isArray(state.dispatches)) state.dispatches = [];
+  state.dispatches.push({ t: nowMs, route: route === "claude" ? "claude" : "specialist" });
+  const max = Math.max(1, windowSize);
+  if (state.dispatches.length > max) {
+    state.dispatches.splice(0, state.dispatches.length - max);
   }
   return state;
 }
 /**
  * Build a QuotaState view for the gate. `used` is "how many of the last `windowSize`
- * dispatches went to the specialist" — but here EVERY tracked timestamp is a specialist
- * dispatch (Claude calls are not tracked), so `used = dispatch_timestamps.length` and
- * `window` accounts for both — the gate computes share-if-added.
+ * dispatches (both routes) went to the specialist"; `window` is the fixed denominator —
+ * the gate computes share-if-added.
+ *
+ * Important: with fewer than `windowSize` dispatches recorded, the quota check is generous
+ * (a small numerator over a full-size denominator). That is intentional — the quota cap is
+ * meant to prevent collapse at scale, not to gate a cold start.
  *
- * Important: this caps `window` at `windowSize`. With fewer than `windowSize` dispatches,
- * the quota check is generous (a small denominator means small share). That is intentional
- * — the quota cap is meant to prevent collapse at scale, not to gate a cold start.
+ * `role` is kept for call-site/API stability; the v2 window is shared across roles (the state
+ * file is the repo's dispatch ledger). The 2-arg form `quotaStateFor(state, windowSize)` is
+ * tolerated.
  */
 export function quotaStateFor(state, role, windowSize) {
-  const slot = state.roles[role];
-  const used = slot ? slot.dispatch_timestamps.length : 0;
-  return { used, window: windowSize };
+  if (typeof role === "number" && windowSize === undefined) {
+    windowSize = role;
+  }
+  const size = Math.max(1, typeof windowSize === "number" && Number.isFinite(windowSize) ? windowSize : DEFAULT_WINDOW);
+  const entries = Array.isArray(state.dispatches) ? state.dispatches.slice(-size) : [];
+  const used = entries.filter((d) => d && d.route === "specialist").length;
+  return { used, window: size };
 }
 export function incrementProbeCounter(state, role) {

package/src/state-machine.mjs CHANGED Viewed

@@ -11,12 +11,12 @@
  * Key: current status. Value: array of allowed next statuses.
  */
 export const STEP_TRANSITIONS = {
-  pending:   ["active"],
+  pending:   ["active", "blocked"], // blocked: upstream failure or operator block
   active:    ["completed", "partial", "failed", "blocked"],
   completed: ["pending"],          // re-opened by escalation
   partial:   ["pending"],          // retried
   failed:    ["pending"],          // retried
-  blocked:   ["pending"],          // unblocked
+  blocked:   ["pending"],          // unblocked / retried / reopened
   skipped:   [],                   // terminal
 };

package/src/status.mjs CHANGED Viewed

@@ -9,7 +9,8 @@ function parsePacket(filePath) {
   if (!content) return null;
   const get = (heading) => {
-    const re = new RegExp(`## ${heading}\\n([\\s\\S]*?)(?=\\n## |\\n---|$)`);
+    // \r?\n keeps CRLF packets (git autocrlf / Windows editors) parseable
+    const re = new RegExp(`## ${heading}\\r?\\n([\\s\\S]*?)(?=\\r?\\n## |\\r?\\n---|$)`);
     const m = content.match(re);
     return m ? m[1].trim() : null;
   };
@@ -48,7 +49,8 @@ function parseVerdict(filePath) {
   if (!content) return null;
   const get = (heading) => {
-    const re = new RegExp(`## ${heading}\\n([\\s\\S]*?)(?=\\n## |$)`);
+    // \r?\n keeps CRLF verdicts (git autocrlf / Windows editors) parseable
+    const re = new RegExp(`## ${heading}\\r?\\n([\\s\\S]*?)(?=\\r?\\n## |$)`);
     const m = content.match(re);
     return m ? m[1].trim() : null;
   };

package/src/swarm/build-gate.mjs CHANGED Viewed

@@ -77,7 +77,7 @@ export function detectBuildSystem(cwd) {
  * @param {object} [options]
  * @param {object} [options.buildSystem] - Override auto-detected build system
  * @param {number} [options.timeout] - Per-command timeout in ms (default: 120000)
- * @returns {{ pass: boolean, lint: StepResult, typecheck: StepResult, test: StepResult, duration: number }}
+ * @returns {{ pass: boolean, vacuous: boolean, reason: string|null, lint: StepResult, typecheck: StepResult, test: StepResult, duration: number }}
  *
  * @typedef {{ status: "pass"|"fail"|"skip", output: string, duration: number }} StepResult
  */
@@ -90,10 +90,19 @@ export function runBuildGate(cwd, options = {}) {
   const typecheck = runStep(bs.typecheckCmd, cwd, timeout);
   const test = runStep(bs.testCmd, cwd, timeout);
-  const pass = lint.status !== "fail" && typecheck.status !== "fail" && test.status !== "fail";
+  // A gate that ran nothing verified nothing — fail loudly instead of
+  // passing vacuously, so an undetected build system can't silently
+  // disable the after-every-wave safety check (ANDON_AUTHORITY).
+  const vacuous = lint.status === "skip" && typecheck.status === "skip" && test.status === "skip";
+  const pass = !vacuous &&
+    lint.status !== "fail" && typecheck.status !== "fail" && test.status !== "fail";
   return {
     pass,
+    vacuous,
+    reason: vacuous
+      ? `No verification commands found (build system: ${bs.type}) — the gate could not verify anything. Add lint/typecheck/test commands or pass options.buildSystem.`
+      : null,
     lint,
     typecheck,
     test,

package/src/swarm/persist-bridge.mjs CHANGED Viewed

@@ -2,8 +2,9 @@
  * Evidence Persistence Bridge — Optional connection to dogfood-lab/testing-os.
  *
  * Converts swarm wave results into dogfood submission format and audit DB
- * payloads. The core swarm mission works without this — it's activated by
- * the --persist-evidence flag on `roleos swarm`.
+ * payloads. The core swarm mission works without this — this module is a
+ * library consumed by external tooling (e.g. dogfood-lab/testing-os); the
+ * roleos CLI does not currently invoke it.
  *
  * This mirrors the logic from dogfood-lab/testing-os/packages/dogfood-swarm/persist-results.js
  * but produces the payloads without requiring testing-os to be present.
@@ -70,7 +71,7 @@ export function buildScenarioResults(waveReports) {
       product_surface: surfaceFromDomain(domain),
       verdict: deriveVerdict(allFindings),
       step_results: [
-        { step: "audit", status: allFindings.length > 0 ? "pass" : "pass" },
+        { step: "audit", status: allFindings.length > 0 ? "pass" : "skip" },
         { step: "remediate", status: allRemediations.length > 0 ? "pass" : "skip" },
       ],
       evidence: {

package/src/swarm-cmd.mjs CHANGED Viewed

@@ -5,8 +5,8 @@
  * roleos swarm manifest               Show the swarm manifest
  * roleos swarm manifest --generate    Auto-detect domains and generate manifest
  * roleos swarm status                 Show swarm run progress
- * roleos swarm findings               List all findings by severity
- * roleos swarm approve                Approve the current feature gate
+ * roleos swarm findings               List findings captured from wave reports
+ * roleos swarm approve                Approve the current user gate
  * roleos swarm verify                 Run Phase 9 final verification
  *
  * This is a first-class shortcut into the dogfood-swarm mission.
@@ -14,9 +14,9 @@
  */
 import { existsSync, readFileSync, writeFileSync } from "node:fs";
-import { join } from "node:path";
+import { join, resolve } from "node:path";
 import {
-  createPersistentRun, listRuns, loadRun, getPosition,
+  createPersistentRun, listRuns, loadRun, getPosition, saveRun,
 } from "./run.mjs";
 import {
   generateSwarmManifest, validateSwarmManifest,
@@ -25,6 +25,19 @@ import {
 // ── Constants ────────────────────────────────────────────────────────────────
 const MANIFEST_FILE = "swarm-manifest.json";
+const DEFAULT_STAGES = ["health-a", "health-b", "health-c", "feature", "treatment"];
+/**
+ * Filter listRuns output down to swarm runs.
+ * missionKey is authoritative; task keywords cover legacy runs.
+ */
+function filterSwarmRuns(runs) {
+  return runs.filter(r =>
+    r.missionKey === "dogfood-swarm" ||
+    r.task.toLowerCase().includes("swarm") ||
+    r.task.toLowerCase().includes("dogfood")
+  );
+}
 // ── Main dispatch ────────────────────────────────────────────────────────────
@@ -60,7 +73,7 @@ export async function swarmCommand(args) {
 // ── roleos swarm [run] ──────────────────────────────────────────────────────
-function cmdRun(extraArgs) {
+async function cmdRun(extraArgs) {
   const cwd = process.cwd();
   const manifestPath = join(cwd, MANIFEST_FILE);
@@ -99,11 +112,18 @@ function cmdRun(extraArgs) {
     ? extraArgs.join(" ")
     : `Dogfood swarm of ${manifest.repo || "current repo"}`;
-  // Create persistent run via the dogfood-swarm mission
-  const run = createPersistentRun(taskDesc, cwd, { forceMission: "dogfood-swarm" });
+  // Create persistent run via the dogfood-swarm mission.
+  // Forwarding the manifest routes step construction through buildSwarmSteps,
+  // so steps carry stage/domain/gate metadata and scale with the domains.
+  const run = await createPersistentRun(taskDesc, cwd, {
+    forceMission: "dogfood-swarm",
+    manifest,
+  });
   const domainCount = manifest.domains?.length || 0;
-  const stageCount = manifest.stages?.length || 4;
+  const stages = Array.isArray(manifest.stages) && manifest.stages.length > 0
+    ? manifest.stages
+    : DEFAULT_STAGES;
   console.log(`\nDogfood Swarm Started`);
   console.log(`─────────────────────`);
@@ -111,18 +131,19 @@ function cmdRun(extraArgs) {
   console.log(`Repo:     ${manifest.repo || "unknown"}`);
   console.log(`Type:     ${manifest.repoType || "unknown"}`);
   console.log(`Domains:  ${domainCount}`);
-  console.log(`Stages:   ${stageCount} (health-a → health-b → health-c → feature)`);
+  console.log(`Stages:   ${stages.length} (${stages.join(" → ")})`);
   console.log(`Steps:    ${run.steps.length}`);
   console.log(`\nDomain Agents:`);
   for (const d of manifest.domains || []) {
     console.log(`  - ${d.id}: ${d.role} (${d.patterns.length} patterns)`);
   }
   console.log(`\nStage Pipeline:`);
-  console.log(`  1. Health-A  Bug/Security Fix     (loop until 0 CRITICAL + 0 HIGH)`);
-  console.log(`  2. Health-B  Proactive Hardening   (user review gate)`);
-  console.log(`  3. Health-C  Humanization          (loop until 0 CRITICAL + 0 HIGH)`);
-  console.log(`  4. Feature   Capability Audit      (user approval gate)`);
-  console.log(`  5. Final     Synthesis + Verdict`);
+  console.log(`  1. Health-A   Bug/Security Fix      (loop until 0 CRITICAL + 0 HIGH)`);
+  console.log(`  2. Health-B   Proactive Hardening   (user review gate)`);
+  console.log(`  3. Health-C   Humanization          (loop until 0 CRITICAL + 0 HIGH)`);
+  console.log(`  4. Feature    Capability Audit      (user approval gate)`);
+  console.log(`  5. Treatment  Full Treatment        (shipcheck, docs, handbook — user gate)`);
+  console.log(`  6. Final      Synthesis + Verdict`);
   console.log(`\nRun 'roleos next' to begin the first wave.`);
   console.log(`Run 'roleos swarm status' to check progress.\n`);
 }
@@ -209,11 +230,7 @@ function generateManifestFile(cwd, manifestPath) {
 function cmdStatus() {
   const cwd = process.cwd();
-  const runs = listRuns(cwd);
-  const swarmRuns = runs.filter(r =>
-    r.task.toLowerCase().includes("swarm") ||
-    r.task.toLowerCase().includes("dogfood")
-  );
+  const swarmRuns = filterSwarmRuns(listRuns(cwd));
   if (swarmRuns.length === 0) {
     console.log("\nNo swarm runs found. Start one with: roleos swarm\n");
@@ -258,11 +275,7 @@ function cmdStatus() {
 function cmdFindings() {
   const cwd = process.cwd();
-  const runs = listRuns(cwd);
-  const swarmRuns = runs.filter(r =>
-    r.task.toLowerCase().includes("swarm") ||
-    r.task.toLowerCase().includes("dogfood")
-  );
+  const swarmRuns = filterSwarmRuns(listRuns(cwd));
   if (swarmRuns.length === 0) {
     console.log("\nNo swarm runs found.\n");
@@ -275,12 +288,22 @@ function cmdFindings() {
     return;
   }
-  // Extract findings from wave-report artifacts
+  // Extract findings from wave-report artifacts.
+  // step.artifact is usually a short reference (often a file path) — when it
+  // points at a readable file, scan the file content instead of the reference.
   const findings = [];
   for (const step of full.steps) {
     if (step.produces === "wave-report" && step.artifact) {
-      // Try to parse findings from artifact
-      const match = step.artifact.match(/## findings\n([\s\S]*?)(?=\n## |$)/i);
+      let body = step.artifact;
+      try {
+        const artifactPath = resolve(cwd, step.artifact);
+        if (existsSync(artifactPath)) {
+          body = readFileSync(artifactPath, "utf-8");
+        }
+      } catch { /* not a readable file — treat the reference as inline content */ }
+      // Normalize line endings so CRLF artifacts parse on Windows checkouts
+      const match = body.replace(/\r\n/g, "\n").match(/## findings\n([\s\S]*?)(?=\n## |$)/i);
       if (match) {
         findings.push({
           domain: step.domain || "unknown",
@@ -292,7 +315,9 @@ function cmdFindings() {
   }
   if (findings.length === 0) {
-    console.log("\nNo findings captured yet. Run waves first.\n");
+    console.log("\nNo findings captured yet. Run waves first.");
+    console.log("Findings are read from each wave-report artifact's '## Findings' section");
+    console.log("(complete steps with a wave-report file path to make them scannable).\n");
     return;
   }
@@ -309,11 +334,7 @@ function cmdFindings() {
 function cmdApprove() {
   const cwd = process.cwd();
-  const runs = listRuns(cwd);
-  const swarmRuns = runs.filter(r =>
-    r.task.toLowerCase().includes("swarm") ||
-    r.task.toLowerCase().includes("dogfood")
-  );
+  const swarmRuns = filterSwarmRuns(listRuns(cwd));
   if (swarmRuns.length === 0) {
     console.log("\nNo swarm runs found.\n");
@@ -326,9 +347,10 @@ function cmdApprove() {
     return;
   }
-  // Find the next gate step waiting for approval
+  // Find the next gate step waiting for approval (not yet approved)
   const gateStep = full.steps.find(s =>
-    s.isGate && s.userApproval && s.status === "active"
+    s.isGate && s.userApproval && s.status === "active" &&
+    s.userApprovalStatus !== "approved"
   );
   if (!gateStep) {
@@ -337,8 +359,26 @@ function cmdApprove() {
     return;
   }
-  console.log(`\nApproved: ${gateStep.stage} gate`);
-  console.log(`The swarm will proceed to the next stage.\n`);
+  // Record the approval on the persisted run — an approval that isn't
+  // saved is not a control.
+  const approvedAt = new Date().toISOString();
+  gateStep.userApprovalStatus = "approved";
+  gateStep.approvedAt = approvedAt;
+  gateStep.note = gateStep.note
+    ? `${gateStep.note}; user approved ${gateStep.stage} gate`
+    : `User approved ${gateStep.stage} gate`;
+  full.interventions = full.interventions || [];
+  full.interventions.push({
+    type: "gate-approval",
+    stepIndex: gateStep.index,
+    stage: gateStep.stage,
+    timestamp: approvedAt,
+  });
+  saveRun(cwd, full);
+  console.log(`\nApproved: ${gateStep.stage} gate (recorded at ${approvedAt})`);
+  console.log(`The swarm will proceed to the next stage.`);
+  console.log(`Complete the gate step with 'roleos complete <swarm-gate-artifact>' to advance.\n`);
 }
 // ── roleos swarm verify ─────────────────────────────────────────────────────
@@ -358,10 +398,13 @@ function cmdVerify() {
   console.log(`\nSwarm Verification`);
   console.log(`──────────────────`);
+  let healthy = true;
   // 1. Manifest valid
   if (validation.valid) {
     console.log(`  [PASS] Manifest is valid`);
   } else {
+    healthy = false;
     console.log(`  [FAIL] Manifest has ${validation.issues.length} issue(s)`);
     for (const i of validation.issues) console.log(`         - ${i}`);
   }
@@ -371,15 +414,12 @@ function cmdVerify() {
   if (domainCount >= 1 && domainCount <= 10) {
     console.log(`  [PASS] ${domainCount} domains (within 1-10 range)`);
   } else {
+    healthy = false;
     console.log(`  [FAIL] ${domainCount} domains (must be 1-10)`);
   }
   // 3. Check for swarm run
-  const runs = listRuns(cwd);
-  const swarmRuns = runs.filter(r =>
-    r.task.toLowerCase().includes("swarm") ||
-    r.task.toLowerCase().includes("dogfood")
-  );
+  const swarmRuns = filterSwarmRuns(listRuns(cwd));
   if (swarmRuns.length > 0) {
     const latest = swarmRuns[0];
@@ -393,7 +433,8 @@ function cmdVerify() {
     console.log(`  [INFO] No swarm runs yet — run 'roleos swarm' to start`);
   }
-  console.log("");
+  console.log(`\n${healthy ? "Swarm infrastructure verified." : "Verification failed — fix the issues above and re-run."}\n`);
+  if (!healthy) process.exit(1);
 }
 // ── Help ────────────────────────────────────────────────────────────────────
@@ -407,16 +448,17 @@ Usage:
   roleos swarm manifest            Show the swarm manifest
   roleos swarm manifest --generate Auto-detect domains and generate manifest
   roleos swarm status              Show swarm run progress
-  roleos swarm findings            List all findings by severity
-  roleos swarm approve             Approve the current feature gate
+  roleos swarm findings            List findings captured from wave reports
+  roleos swarm approve             Approve the current user gate
   roleos swarm verify              Verify manifest and run state
   roleos swarm help                Show this help
-The swarm runs 4 stages in sequence:
+The swarm runs 5 stages in sequence:
   1. Health-A   Bug/Security Fix      (loops until 0 CRITICAL + 0 HIGH)
   2. Health-B   Proactive Hardening   (user review gate)
   3. Health-C   Humanization          (loops until 0 CRITICAL + 0 HIGH)
   4. Feature    Capability Audit      (user approval before execution)
+  5. Treatment  Full Treatment        (shipcheck, docs, handbook — user gate)
 Each stage dispatches parallel domain agents with exclusive file ownership.
 A build gate (lint + typecheck + test) runs after every wave.

package/src/verify-citations-cmd.mjs CHANGED Viewed

@@ -7,7 +7,7 @@
  * operator can branch on it.
  */
-import { writeFileSync } from "node:fs";
+import { writeFileSync, existsSync } from "node:fs";
 import { resolve, dirname, basename } from "node:path";
 import { runCitationGate } from "./verify-citations.mjs";
@@ -25,6 +25,22 @@ export async function verifyCitationsCommand(args) {
     throw err;
   }
+  // The CLI positional is a dispatch FILE (inline-markdown input is the library API). Validate it
+  // up front: a missing file or odd extension must fail loudly — never silently degrade into
+  // scanning the path STRING for citations and reporting "no citations found".
+  if (!existsSync(dispatch)) {
+    const err = new Error(`dispatch file not found: ${dispatch}`);
+    err.exitCode = 1;
+    err.hint = "Pass the path to an existing research dispatch (.md, .markdown, or .json).";
+    throw err;
+  }
+  if (!/\.(md|markdown|json)$/i.test(dispatch)) {
+    const err = new Error(`unsupported dispatch file extension: ${dispatch}`);
+    err.exitCode = 1;
+    err.hint = "Supported extensions: .md, .markdown, .json (matched case-insensitively).";
+    throw err;
+  }
   const result = runCitationGate(dispatch, {
     provider: flags.provider || "ollama",
     ...(typeof flags.intent === "string" ? { intent: flags.intent } : {}),