role-os 2.9.0 → 2.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +37 -0
  2. package/README.es.md +28 -11
  3. package/README.fr.md +25 -8
  4. package/README.hi.md +25 -8
  5. package/README.it.md +28 -11
  6. package/README.ja.md +27 -10
  7. package/README.md +25 -8
  8. package/README.pt-BR.md +25 -8
  9. package/README.zh.md +25 -8
  10. package/bin/roleos.mjs +3 -2
  11. package/package.json +1 -1
  12. package/src/artifacts.mjs +14 -7
  13. package/src/audit-cmd.mjs +23 -23
  14. package/src/brainstorm-roles.mjs +6 -0
  15. package/src/citation-panel.mjs +26 -1
  16. package/src/composite.mjs +4 -0
  17. package/src/entry.mjs +2 -2
  18. package/src/hooks.mjs +107 -27
  19. package/src/knowledge/analyze-artifact-evidence.mjs +19 -9
  20. package/src/knowledge/fallback-policy.mjs +19 -7
  21. package/src/knowledge/resolve-overlay.mjs +21 -8
  22. package/src/knowledge/retrieve-for-dispatch.mjs +9 -4
  23. package/src/mission-run.mjs +11 -2
  24. package/src/packs-cmd.mjs +1 -1
  25. package/src/review.mjs +11 -2
  26. package/src/role-dossiers.json +1 -1
  27. package/src/route.mjs +41 -8
  28. package/src/run-cmd.mjs +0 -1
  29. package/src/run.mjs +67 -15
  30. package/src/session.mjs +3 -1
  31. package/src/specialist/capability-gate.mjs +35 -18
  32. package/src/specialist/dispatch.mjs +8 -3
  33. package/src/specialist/registry.mjs +6 -0
  34. package/src/specialist/shadow.mjs +13 -3
  35. package/src/specialist/state.mjs +94 -26
  36. package/src/state-machine.mjs +2 -2
  37. package/src/status.mjs +4 -2
  38. package/src/swarm/build-gate.mjs +11 -2
  39. package/src/swarm/persist-bridge.mjs +4 -3
  40. package/src/swarm-cmd.mjs +88 -46
  41. package/src/verify-citations-cmd.mjs +17 -1
  42. package/src/verify-citations.mjs +31 -7
  43. package/starter-pack/README.md +22 -14
  44. package/starter-pack/handbook.md +4 -4
  45. package/starter-pack/policy/routing-rules.md +42 -0
  46. package/starter-pack/policy/tool-permissions.md +21 -0
  47. package/starter-pack/workflows/full-treatment.md +27 -16
@@ -67,6 +67,13 @@ export function recordProbe(eventsPath, probe) {
67
67
  * narrow fine-tunes show step changes, so an early halt on a small sample would be a noise
68
68
  * trigger, not a real disagreement signal).
69
69
  *
70
+ * Only probes recorded AFTER the role's most recent clear-halt event count. A clear-halt is
71
+ * an operator decision that the disagreement evidence before it is adjudicated; without this
72
+ * boundary the stale disagreeing probes keep dominating the window and the role re-halts on
73
+ * the very next probe — the documented recovery command could never actually recover a role.
74
+ * The fresh-start window also restarts the ≥N thin-sample guard, so a cleared role gets a
75
+ * full new sample before it can halt again.
76
+ *
70
77
  * @param {string} eventsPath
71
78
  * @param {string} role
72
79
  * @param {number} [N]
@@ -74,8 +81,10 @@ export function recordProbe(eventsPath, probe) {
74
81
  * @returns {{ probes: number, agreed: number, rate: number, shouldHalt: boolean }}
75
82
  */
76
83
  export function checkHalt(eventsPath, role, N = SHADOW_DEFAULTS.N, tau = SHADOW_DEFAULTS.TAU) {
77
- const events = readEvents(eventsPath, { role, kind: "shadow-probe" });
78
- const window = events.slice(-N);
84
+ const events = readEvents(eventsPath, { role, kind: ["shadow-probe", "clear-halt"] });
85
+ const lastClear = events.map((e) => e.kind).lastIndexOf("clear-halt");
86
+ const probesSinceClear = events.slice(lastClear + 1).filter((e) => e.kind === "shadow-probe");
87
+ const window = probesSinceClear.slice(-N);
79
88
  const probes = window.length;
80
89
  const agreed = window.filter((e) => e.data && e.data.agreed === true).length;
81
90
  const rate = probes === 0 ? 1 : agreed / probes;
@@ -94,7 +103,8 @@ export function contrastiveHaltMessage({ role, probes, rate, tau }) {
94
103
  return (
95
104
  `specialist for role "${role}" halted: shadow-probe agreement ${pct}% over the last ` +
96
105
  `${probes} probes < required ${required}% (τ=${tau}). The specialist's verdicts have ` +
97
- `drifted from Claude's on the same inputs. Clear with: roleos specialist clear-halt ${role}`
106
+ // Role names contain spaces ("Token Budget Analyst") the copy-pasteable command must quote.
107
+ `drifted from Claude's on the same inputs. Clear with: roleos specialist clear-halt "${role}"`
98
108
  );
99
109
  }
100
110
 
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Specialist runtime state — quota counters + shadow-probe counter + halt state, per role.
2
+ * Specialist runtime state — quota window + shadow-probe counter + halt state.
3
3
  *
4
4
  * Hides one secret family (Parnas): the persistence of routing counters. Callers see
5
5
  * `get/inc/setHalt/getHalt`; they never touch the on-disk format. The on-disk format is a
@@ -8,20 +8,35 @@
8
8
  * State default path: `<repo>/.role-os/specialist-state.json`. Override with
9
9
  * `ROLEOS_SPECIALIST_STATE_PATH`.
10
10
  *
11
- * Quota: a sliding-window counter. We store the last `window` dispatch timestamps so the
12
- * window is a true rolling window, not aligned to wall clock. (A wall-clock window can be
13
- * timed against the edge, which the workload-quota anti-collapse argument is meant to
14
- * prevent.)
11
+ * Quota (v2): a sliding window over the last `windowSize` dispatches of EITHER route. Every
12
+ * dispatch is recorded as `{ t, route }` with route "specialist" | "claude"; the specialist
13
+ * share is count(route === "specialist") over the last `windowSize` entries. Recording BOTH
14
+ * routes is what makes the window actually roll — Claude-routed traffic pushes old specialist
15
+ * entries out, so the specialist keeps receiving its quota share indefinitely. (v1 recorded
16
+ * only specialist dispatches and never aged them out, so every role locked out permanently
17
+ * after `windowSize × quota` dispatches — the opposite of the policy's "sliding" promise.)
18
+ * The window counts dispatches, not seconds, so it is a true rolling window not aligned to
19
+ * wall clock (a wall-clock window can be timed against the edge, which the workload-quota
20
+ * anti-collapse argument is meant to prevent).
15
21
  */
16
22
 
17
23
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
18
24
  import { dirname } from "node:path";
19
25
 
20
- export const STATE_SCHEMA = "roleos-specialist-state/v1";
26
+ export const STATE_SCHEMA = "roleos-specialist-state/v2";
27
+ const STATE_SCHEMA_V1 = "roleos-specialist-state/v1";
28
+
29
+ /** Default quota window (dispatches), matching the dispatcher's DEFAULT_WINDOW. */
30
+ export const DEFAULT_WINDOW = 200;
31
+
32
+ /**
33
+ * @typedef {object} DispatchRecord
34
+ * @property {number} t unix-ms timestamp
35
+ * @property {"specialist"|"claude"} route where the dispatch was actually served
36
+ */
21
37
 
22
38
  /**
23
39
  * @typedef {object} RoleState
24
- * @property {number[]} dispatch_timestamps sliding window of dispatch unix-ms
25
40
  * @property {number} probe_counter count of dispatches since the last shadow probe
26
41
  * @property {object|null} halt { reason, since } or null when not halted
27
42
  */
@@ -29,23 +44,32 @@ export const STATE_SCHEMA = "roleos-specialist-state/v1";
29
44
  /**
30
45
  * @typedef {object} StateFile
31
46
  * @property {string} schema
47
+ * @property {DispatchRecord[]} dispatches sliding window of recent dispatches (both routes)
32
48
  * @property {Object<string, RoleState>} roles
33
49
  */
34
50
 
35
51
  export function emptyState() {
36
- return { schema: STATE_SCHEMA, roles: {} };
52
+ return { schema: STATE_SCHEMA, dispatches: [], roles: {} };
37
53
  }
38
54
 
39
55
  export function loadState(path) {
40
56
  if (!existsSync(path)) return emptyState();
41
57
  try {
42
58
  const raw = JSON.parse(readFileSync(path, "utf8"));
59
+ // Tolerant v1 migration: a v1 file loads as v2 (old bare timestamps -> route "specialist")
60
+ // instead of erroring; the migrated shape is persisted on the next saveState.
61
+ if (raw && raw.schema === STATE_SCHEMA_V1 && typeof raw.roles === "object") {
62
+ return migrateV1(raw);
63
+ }
43
64
  if (!raw || raw.schema !== STATE_SCHEMA || typeof raw.roles !== "object") {
44
65
  // Refuse to silently accept a mis-shaped state file. Caller decides what to do.
45
66
  const err = new Error(`state file schema mismatch: got "${raw && raw.schema}", expected "${STATE_SCHEMA}"`);
46
67
  err.code = "STATE_SCHEMA_MISMATCH";
47
68
  throw err;
48
69
  }
70
+ raw.dispatches = (Array.isArray(raw.dispatches) ? raw.dispatches : [])
71
+ .map(normalizeDispatchRecord)
72
+ .filter(Boolean);
49
73
  return raw;
50
74
  } catch (err) {
51
75
  if (err.code === "STATE_SCHEMA_MISMATCH") throw err;
@@ -55,6 +79,35 @@ export function loadState(path) {
55
79
  }
56
80
  }
57
81
 
82
+ /**
83
+ * v1 -> v2 migration. v1 recorded only specialist dispatches, as bare unix-ms numbers in a
84
+ * per-role `dispatch_timestamps` array. Each becomes `{ t, route: "specialist" }` in the
85
+ * shared v2 window (sorted oldest-first); role slots keep probe_counter/halt.
86
+ */
87
+ function migrateV1(raw) {
88
+ const dispatches = [];
89
+ const roles = {};
90
+ for (const [role, slot] of Object.entries(raw.roles || {})) {
91
+ if (!slot || typeof slot !== "object") continue;
92
+ for (const t of Array.isArray(slot.dispatch_timestamps) ? slot.dispatch_timestamps : []) {
93
+ if (typeof t === "number" && Number.isFinite(t)) dispatches.push({ t, route: "specialist" });
94
+ }
95
+ roles[role] = {
96
+ probe_counter: typeof slot.probe_counter === "number" ? slot.probe_counter : 0,
97
+ halt: slot.halt ? { reason: slot.halt.reason, since: slot.halt.since } : null,
98
+ };
99
+ }
100
+ dispatches.sort((a, b) => a.t - b.t);
101
+ return { schema: STATE_SCHEMA, dispatches, roles };
102
+ }
103
+
104
+ /** Tolerate hand-edited windows: bare numbers read as v1-style specialist timestamps. */
105
+ function normalizeDispatchRecord(r) {
106
+ if (typeof r === "number" && Number.isFinite(r)) return { t: r, route: "specialist" };
107
+ if (!r || typeof r !== "object" || typeof r.t !== "number" || !Number.isFinite(r.t)) return null;
108
+ return { t: r.t, route: r.route === "claude" ? "claude" : "specialist" };
109
+ }
110
+
58
111
  export function saveState(path, state) {
59
112
  mkdirSync(dirname(path), { recursive: true });
60
113
  writeFileSync(path, JSON.stringify(state, null, 2) + "\n", "utf8");
@@ -63,39 +116,54 @@ export function saveState(path, state) {
63
116
  /** Get or create a role's slot in the state object. Mutates and returns the slot. */
64
117
  export function ensureRole(state, role) {
65
118
  if (!state.roles[role]) {
66
- state.roles[role] = { dispatch_timestamps: [], probe_counter: 0, halt: null };
119
+ state.roles[role] = { probe_counter: 0, halt: null };
67
120
  }
68
121
  return state.roles[role];
69
122
  }
70
123
 
71
124
  /**
72
- * Record a specialist dispatch in the sliding window. Pure — returns updated state.
73
- * `windowSize` is in dispatches, not seconds; we keep the last `windowSize` timestamps.
74
- * `nowMs` must be supplied (no Date.now() inside this function for testability).
125
+ * Record a dispatch in the sliding window — EVERY dispatch, both routes. Pure — returns
126
+ * updated state. The window only rolls if Claude-routed traffic is recorded too; that is what
127
+ * lets the specialist share recover after quota pressure instead of locking out permanently.
128
+ *
129
+ * Signature (cross-agent contract): `recordDispatch(state, route)` with route "specialist" |
130
+ * "claude". Trailing params are optional: `windowSize` trims the stored window (dispatches,
131
+ * not seconds; default 200) and `nowMs` injects the timestamp (testability — defaults to
132
+ * Date.now()). Any route value that is not "claude" counts as "specialist" — the conservative
133
+ * direction (legacy callers only ever recorded specialist dispatches, and over-counting
134
+ * tightens the quota, which fails open to Claude).
75
135
  */
76
- export function recordDispatch(state, role, windowSize, nowMs) {
77
- const slot = ensureRole(state, role);
78
- slot.dispatch_timestamps.push(nowMs);
79
- if (slot.dispatch_timestamps.length > windowSize) {
80
- slot.dispatch_timestamps.splice(0, slot.dispatch_timestamps.length - windowSize);
136
+ export function recordDispatch(state, route, windowSize = DEFAULT_WINDOW, nowMs = Date.now()) {
137
+ if (!Array.isArray(state.dispatches)) state.dispatches = [];
138
+ state.dispatches.push({ t: nowMs, route: route === "claude" ? "claude" : "specialist" });
139
+ const max = Math.max(1, windowSize);
140
+ if (state.dispatches.length > max) {
141
+ state.dispatches.splice(0, state.dispatches.length - max);
81
142
  }
82
143
  return state;
83
144
  }
84
145
 
85
146
  /**
86
147
  * Build a QuotaState view for the gate. `used` is "how many of the last `windowSize`
87
- * dispatches went to the specialist" but here EVERY tracked timestamp is a specialist
88
- * dispatch (Claude calls are not tracked), so `used = dispatch_timestamps.length` and
89
- * `window` accounts for both — the gate computes share-if-added.
148
+ * dispatches (both routes) went to the specialist"; `window` is the fixed denominator
149
+ * the gate computes share-if-added.
150
+ *
151
+ * Important: with fewer than `windowSize` dispatches recorded, the quota check is generous
152
+ * (a small numerator over a full-size denominator). That is intentional — the quota cap is
153
+ * meant to prevent collapse at scale, not to gate a cold start.
90
154
  *
91
- * Important: this caps `window` at `windowSize`. With fewer than `windowSize` dispatches,
92
- * the quota check is generous (a small denominator means small share). That is intentional
93
- * — the quota cap is meant to prevent collapse at scale, not to gate a cold start.
155
+ * `role` is kept for call-site/API stability; the v2 window is shared across roles (the state
156
+ * file is the repo's dispatch ledger). The 2-arg form `quotaStateFor(state, windowSize)` is
157
+ * tolerated.
94
158
  */
95
159
  export function quotaStateFor(state, role, windowSize) {
96
- const slot = state.roles[role];
97
- const used = slot ? slot.dispatch_timestamps.length : 0;
98
- return { used, window: windowSize };
160
+ if (typeof role === "number" && windowSize === undefined) {
161
+ windowSize = role;
162
+ }
163
+ const size = Math.max(1, typeof windowSize === "number" && Number.isFinite(windowSize) ? windowSize : DEFAULT_WINDOW);
164
+ const entries = Array.isArray(state.dispatches) ? state.dispatches.slice(-size) : [];
165
+ const used = entries.filter((d) => d && d.route === "specialist").length;
166
+ return { used, window: size };
99
167
  }
100
168
 
101
169
  export function incrementProbeCounter(state, role) {
@@ -11,12 +11,12 @@
11
11
  * Key: current status. Value: array of allowed next statuses.
12
12
  */
13
13
  export const STEP_TRANSITIONS = {
14
- pending: ["active"],
14
+ pending: ["active", "blocked"], // blocked: upstream failure or operator block
15
15
  active: ["completed", "partial", "failed", "blocked"],
16
16
  completed: ["pending"], // re-opened by escalation
17
17
  partial: ["pending"], // retried
18
18
  failed: ["pending"], // retried
19
- blocked: ["pending"], // unblocked
19
+ blocked: ["pending"], // unblocked / retried / reopened
20
20
  skipped: [], // terminal
21
21
  };
22
22
 
package/src/status.mjs CHANGED
@@ -9,7 +9,8 @@ function parsePacket(filePath) {
9
9
  if (!content) return null;
10
10
 
11
11
  const get = (heading) => {
12
- const re = new RegExp(`## ${heading}\\n([\\s\\S]*?)(?=\\n## |\\n---|$)`);
12
+ // \r?\n keeps CRLF packets (git autocrlf / Windows editors) parseable
13
+ const re = new RegExp(`## ${heading}\\r?\\n([\\s\\S]*?)(?=\\r?\\n## |\\r?\\n---|$)`);
13
14
  const m = content.match(re);
14
15
  return m ? m[1].trim() : null;
15
16
  };
@@ -48,7 +49,8 @@ function parseVerdict(filePath) {
48
49
  if (!content) return null;
49
50
 
50
51
  const get = (heading) => {
51
- const re = new RegExp(`## ${heading}\\n([\\s\\S]*?)(?=\\n## |$)`);
52
+ // \r?\n keeps CRLF verdicts (git autocrlf / Windows editors) parseable
53
+ const re = new RegExp(`## ${heading}\\r?\\n([\\s\\S]*?)(?=\\r?\\n## |$)`);
52
54
  const m = content.match(re);
53
55
  return m ? m[1].trim() : null;
54
56
  };
@@ -77,7 +77,7 @@ export function detectBuildSystem(cwd) {
77
77
  * @param {object} [options]
78
78
  * @param {object} [options.buildSystem] - Override auto-detected build system
79
79
  * @param {number} [options.timeout] - Per-command timeout in ms (default: 120000)
80
- * @returns {{ pass: boolean, lint: StepResult, typecheck: StepResult, test: StepResult, duration: number }}
80
+ * @returns {{ pass: boolean, vacuous: boolean, reason: string|null, lint: StepResult, typecheck: StepResult, test: StepResult, duration: number }}
81
81
  *
82
82
  * @typedef {{ status: "pass"|"fail"|"skip", output: string, duration: number }} StepResult
83
83
  */
@@ -90,10 +90,19 @@ export function runBuildGate(cwd, options = {}) {
90
90
  const typecheck = runStep(bs.typecheckCmd, cwd, timeout);
91
91
  const test = runStep(bs.testCmd, cwd, timeout);
92
92
 
93
- const pass = lint.status !== "fail" && typecheck.status !== "fail" && test.status !== "fail";
93
+ // A gate that ran nothing verified nothing fail loudly instead of
94
+ // passing vacuously, so an undetected build system can't silently
95
+ // disable the after-every-wave safety check (ANDON_AUTHORITY).
96
+ const vacuous = lint.status === "skip" && typecheck.status === "skip" && test.status === "skip";
97
+ const pass = !vacuous &&
98
+ lint.status !== "fail" && typecheck.status !== "fail" && test.status !== "fail";
94
99
 
95
100
  return {
96
101
  pass,
102
+ vacuous,
103
+ reason: vacuous
104
+ ? `No verification commands found (build system: ${bs.type}) — the gate could not verify anything. Add lint/typecheck/test commands or pass options.buildSystem.`
105
+ : null,
97
106
  lint,
98
107
  typecheck,
99
108
  test,
@@ -2,8 +2,9 @@
2
2
  * Evidence Persistence Bridge — Optional connection to dogfood-lab/testing-os.
3
3
  *
4
4
  * Converts swarm wave results into dogfood submission format and audit DB
5
- * payloads. The core swarm mission works without this — it's activated by
6
- * the --persist-evidence flag on `roleos swarm`.
5
+ * payloads. The core swarm mission works without this — this module is a
6
+ * library consumed by external tooling (e.g. dogfood-lab/testing-os); the
7
+ * roleos CLI does not currently invoke it.
7
8
  *
8
9
  * This mirrors the logic from dogfood-lab/testing-os/packages/dogfood-swarm/persist-results.js
9
10
  * but produces the payloads without requiring testing-os to be present.
@@ -70,7 +71,7 @@ export function buildScenarioResults(waveReports) {
70
71
  product_surface: surfaceFromDomain(domain),
71
72
  verdict: deriveVerdict(allFindings),
72
73
  step_results: [
73
- { step: "audit", status: allFindings.length > 0 ? "pass" : "pass" },
74
+ { step: "audit", status: allFindings.length > 0 ? "pass" : "skip" },
74
75
  { step: "remediate", status: allRemediations.length > 0 ? "pass" : "skip" },
75
76
  ],
76
77
  evidence: {
package/src/swarm-cmd.mjs CHANGED
@@ -5,8 +5,8 @@
5
5
  * roleos swarm manifest Show the swarm manifest
6
6
  * roleos swarm manifest --generate Auto-detect domains and generate manifest
7
7
  * roleos swarm status Show swarm run progress
8
- * roleos swarm findings List all findings by severity
9
- * roleos swarm approve Approve the current feature gate
8
+ * roleos swarm findings List findings captured from wave reports
9
+ * roleos swarm approve Approve the current user gate
10
10
  * roleos swarm verify Run Phase 9 final verification
11
11
  *
12
12
  * This is a first-class shortcut into the dogfood-swarm mission.
@@ -14,9 +14,9 @@
14
14
  */
15
15
 
16
16
  import { existsSync, readFileSync, writeFileSync } from "node:fs";
17
- import { join } from "node:path";
17
+ import { join, resolve } from "node:path";
18
18
  import {
19
- createPersistentRun, listRuns, loadRun, getPosition,
19
+ createPersistentRun, listRuns, loadRun, getPosition, saveRun,
20
20
  } from "./run.mjs";
21
21
  import {
22
22
  generateSwarmManifest, validateSwarmManifest,
@@ -25,6 +25,19 @@ import {
25
25
  // ── Constants ────────────────────────────────────────────────────────────────
26
26
 
27
27
  const MANIFEST_FILE = "swarm-manifest.json";
28
+ const DEFAULT_STAGES = ["health-a", "health-b", "health-c", "feature", "treatment"];
29
+
30
+ /**
31
+ * Filter listRuns output down to swarm runs.
32
+ * missionKey is authoritative; task keywords cover legacy runs.
33
+ */
34
+ function filterSwarmRuns(runs) {
35
+ return runs.filter(r =>
36
+ r.missionKey === "dogfood-swarm" ||
37
+ r.task.toLowerCase().includes("swarm") ||
38
+ r.task.toLowerCase().includes("dogfood")
39
+ );
40
+ }
28
41
 
29
42
  // ── Main dispatch ────────────────────────────────────────────────────────────
30
43
 
@@ -60,7 +73,7 @@ export async function swarmCommand(args) {
60
73
 
61
74
  // ── roleos swarm [run] ──────────────────────────────────────────────────────
62
75
 
63
- function cmdRun(extraArgs) {
76
+ async function cmdRun(extraArgs) {
64
77
  const cwd = process.cwd();
65
78
  const manifestPath = join(cwd, MANIFEST_FILE);
66
79
 
@@ -99,11 +112,18 @@ function cmdRun(extraArgs) {
99
112
  ? extraArgs.join(" ")
100
113
  : `Dogfood swarm of ${manifest.repo || "current repo"}`;
101
114
 
102
- // Create persistent run via the dogfood-swarm mission
103
- const run = createPersistentRun(taskDesc, cwd, { forceMission: "dogfood-swarm" });
115
+ // Create persistent run via the dogfood-swarm mission.
116
+ // Forwarding the manifest routes step construction through buildSwarmSteps,
117
+ // so steps carry stage/domain/gate metadata and scale with the domains.
118
+ const run = await createPersistentRun(taskDesc, cwd, {
119
+ forceMission: "dogfood-swarm",
120
+ manifest,
121
+ });
104
122
 
105
123
  const domainCount = manifest.domains?.length || 0;
106
- const stageCount = manifest.stages?.length || 4;
124
+ const stages = Array.isArray(manifest.stages) && manifest.stages.length > 0
125
+ ? manifest.stages
126
+ : DEFAULT_STAGES;
107
127
 
108
128
  console.log(`\nDogfood Swarm Started`);
109
129
  console.log(`─────────────────────`);
@@ -111,18 +131,19 @@ function cmdRun(extraArgs) {
111
131
  console.log(`Repo: ${manifest.repo || "unknown"}`);
112
132
  console.log(`Type: ${manifest.repoType || "unknown"}`);
113
133
  console.log(`Domains: ${domainCount}`);
114
- console.log(`Stages: ${stageCount} (health-ahealth-b → health-c → feature)`);
134
+ console.log(`Stages: ${stages.length} (${stages.join("")})`);
115
135
  console.log(`Steps: ${run.steps.length}`);
116
136
  console.log(`\nDomain Agents:`);
117
137
  for (const d of manifest.domains || []) {
118
138
  console.log(` - ${d.id}: ${d.role} (${d.patterns.length} patterns)`);
119
139
  }
120
140
  console.log(`\nStage Pipeline:`);
121
- console.log(` 1. Health-A Bug/Security Fix (loop until 0 CRITICAL + 0 HIGH)`);
122
- console.log(` 2. Health-B Proactive Hardening (user review gate)`);
123
- console.log(` 3. Health-C Humanization (loop until 0 CRITICAL + 0 HIGH)`);
124
- console.log(` 4. Feature Capability Audit (user approval gate)`);
125
- console.log(` 5. Final Synthesis + Verdict`);
141
+ console.log(` 1. Health-A Bug/Security Fix (loop until 0 CRITICAL + 0 HIGH)`);
142
+ console.log(` 2. Health-B Proactive Hardening (user review gate)`);
143
+ console.log(` 3. Health-C Humanization (loop until 0 CRITICAL + 0 HIGH)`);
144
+ console.log(` 4. Feature Capability Audit (user approval gate)`);
145
+ console.log(` 5. Treatment Full Treatment (shipcheck, docs, handbook — user gate)`);
146
+ console.log(` 6. Final Synthesis + Verdict`);
126
147
  console.log(`\nRun 'roleos next' to begin the first wave.`);
127
148
  console.log(`Run 'roleos swarm status' to check progress.\n`);
128
149
  }
@@ -209,11 +230,7 @@ function generateManifestFile(cwd, manifestPath) {
209
230
 
210
231
  function cmdStatus() {
211
232
  const cwd = process.cwd();
212
- const runs = listRuns(cwd);
213
- const swarmRuns = runs.filter(r =>
214
- r.task.toLowerCase().includes("swarm") ||
215
- r.task.toLowerCase().includes("dogfood")
216
- );
233
+ const swarmRuns = filterSwarmRuns(listRuns(cwd));
217
234
 
218
235
  if (swarmRuns.length === 0) {
219
236
  console.log("\nNo swarm runs found. Start one with: roleos swarm\n");
@@ -258,11 +275,7 @@ function cmdStatus() {
258
275
 
259
276
  function cmdFindings() {
260
277
  const cwd = process.cwd();
261
- const runs = listRuns(cwd);
262
- const swarmRuns = runs.filter(r =>
263
- r.task.toLowerCase().includes("swarm") ||
264
- r.task.toLowerCase().includes("dogfood")
265
- );
278
+ const swarmRuns = filterSwarmRuns(listRuns(cwd));
266
279
 
267
280
  if (swarmRuns.length === 0) {
268
281
  console.log("\nNo swarm runs found.\n");
@@ -275,12 +288,22 @@ function cmdFindings() {
275
288
  return;
276
289
  }
277
290
 
278
- // Extract findings from wave-report artifacts
291
+ // Extract findings from wave-report artifacts.
292
+ // step.artifact is usually a short reference (often a file path) — when it
293
+ // points at a readable file, scan the file content instead of the reference.
279
294
  const findings = [];
280
295
  for (const step of full.steps) {
281
296
  if (step.produces === "wave-report" && step.artifact) {
282
- // Try to parse findings from artifact
283
- const match = step.artifact.match(/## findings\n([\s\S]*?)(?=\n## |$)/i);
297
+ let body = step.artifact;
298
+ try {
299
+ const artifactPath = resolve(cwd, step.artifact);
300
+ if (existsSync(artifactPath)) {
301
+ body = readFileSync(artifactPath, "utf-8");
302
+ }
303
+ } catch { /* not a readable file — treat the reference as inline content */ }
304
+
305
+ // Normalize line endings so CRLF artifacts parse on Windows checkouts
306
+ const match = body.replace(/\r\n/g, "\n").match(/## findings\n([\s\S]*?)(?=\n## |$)/i);
284
307
  if (match) {
285
308
  findings.push({
286
309
  domain: step.domain || "unknown",
@@ -292,7 +315,9 @@ function cmdFindings() {
292
315
  }
293
316
 
294
317
  if (findings.length === 0) {
295
- console.log("\nNo findings captured yet. Run waves first.\n");
318
+ console.log("\nNo findings captured yet. Run waves first.");
319
+ console.log("Findings are read from each wave-report artifact's '## Findings' section");
320
+ console.log("(complete steps with a wave-report file path to make them scannable).\n");
296
321
  return;
297
322
  }
298
323
 
@@ -309,11 +334,7 @@ function cmdFindings() {
309
334
 
310
335
  function cmdApprove() {
311
336
  const cwd = process.cwd();
312
- const runs = listRuns(cwd);
313
- const swarmRuns = runs.filter(r =>
314
- r.task.toLowerCase().includes("swarm") ||
315
- r.task.toLowerCase().includes("dogfood")
316
- );
337
+ const swarmRuns = filterSwarmRuns(listRuns(cwd));
317
338
 
318
339
  if (swarmRuns.length === 0) {
319
340
  console.log("\nNo swarm runs found.\n");
@@ -326,9 +347,10 @@ function cmdApprove() {
326
347
  return;
327
348
  }
328
349
 
329
- // Find the next gate step waiting for approval
350
+ // Find the next gate step waiting for approval (not yet approved)
330
351
  const gateStep = full.steps.find(s =>
331
- s.isGate && s.userApproval && s.status === "active"
352
+ s.isGate && s.userApproval && s.status === "active" &&
353
+ s.userApprovalStatus !== "approved"
332
354
  );
333
355
 
334
356
  if (!gateStep) {
@@ -337,8 +359,26 @@ function cmdApprove() {
337
359
  return;
338
360
  }
339
361
 
340
- console.log(`\nApproved: ${gateStep.stage} gate`);
341
- console.log(`The swarm will proceed to the next stage.\n`);
362
+ // Record the approval on the persisted run — an approval that isn't
363
+ // saved is not a control.
364
+ const approvedAt = new Date().toISOString();
365
+ gateStep.userApprovalStatus = "approved";
366
+ gateStep.approvedAt = approvedAt;
367
+ gateStep.note = gateStep.note
368
+ ? `${gateStep.note}; user approved ${gateStep.stage} gate`
369
+ : `User approved ${gateStep.stage} gate`;
370
+ full.interventions = full.interventions || [];
371
+ full.interventions.push({
372
+ type: "gate-approval",
373
+ stepIndex: gateStep.index,
374
+ stage: gateStep.stage,
375
+ timestamp: approvedAt,
376
+ });
377
+ saveRun(cwd, full);
378
+
379
+ console.log(`\nApproved: ${gateStep.stage} gate (recorded at ${approvedAt})`);
380
+ console.log(`The swarm will proceed to the next stage.`);
381
+ console.log(`Complete the gate step with 'roleos complete <swarm-gate-artifact>' to advance.\n`);
342
382
  }
343
383
 
344
384
  // ── roleos swarm verify ─────────────────────────────────────────────────────
@@ -358,10 +398,13 @@ function cmdVerify() {
358
398
  console.log(`\nSwarm Verification`);
359
399
  console.log(`──────────────────`);
360
400
 
401
+ let healthy = true;
402
+
361
403
  // 1. Manifest valid
362
404
  if (validation.valid) {
363
405
  console.log(` [PASS] Manifest is valid`);
364
406
  } else {
407
+ healthy = false;
365
408
  console.log(` [FAIL] Manifest has ${validation.issues.length} issue(s)`);
366
409
  for (const i of validation.issues) console.log(` - ${i}`);
367
410
  }
@@ -371,15 +414,12 @@ function cmdVerify() {
371
414
  if (domainCount >= 1 && domainCount <= 10) {
372
415
  console.log(` [PASS] ${domainCount} domains (within 1-10 range)`);
373
416
  } else {
417
+ healthy = false;
374
418
  console.log(` [FAIL] ${domainCount} domains (must be 1-10)`);
375
419
  }
376
420
 
377
421
  // 3. Check for swarm run
378
- const runs = listRuns(cwd);
379
- const swarmRuns = runs.filter(r =>
380
- r.task.toLowerCase().includes("swarm") ||
381
- r.task.toLowerCase().includes("dogfood")
382
- );
422
+ const swarmRuns = filterSwarmRuns(listRuns(cwd));
383
423
 
384
424
  if (swarmRuns.length > 0) {
385
425
  const latest = swarmRuns[0];
@@ -393,7 +433,8 @@ function cmdVerify() {
393
433
  console.log(` [INFO] No swarm runs yet — run 'roleos swarm' to start`);
394
434
  }
395
435
 
396
- console.log("");
436
+ console.log(`\n${healthy ? "Swarm infrastructure verified." : "Verification failed — fix the issues above and re-run."}\n`);
437
+ if (!healthy) process.exit(1);
397
438
  }
398
439
 
399
440
  // ── Help ────────────────────────────────────────────────────────────────────
@@ -407,16 +448,17 @@ Usage:
407
448
  roleos swarm manifest Show the swarm manifest
408
449
  roleos swarm manifest --generate Auto-detect domains and generate manifest
409
450
  roleos swarm status Show swarm run progress
410
- roleos swarm findings List all findings by severity
411
- roleos swarm approve Approve the current feature gate
451
+ roleos swarm findings List findings captured from wave reports
452
+ roleos swarm approve Approve the current user gate
412
453
  roleos swarm verify Verify manifest and run state
413
454
  roleos swarm help Show this help
414
455
 
415
- The swarm runs 4 stages in sequence:
456
+ The swarm runs 5 stages in sequence:
416
457
  1. Health-A Bug/Security Fix (loops until 0 CRITICAL + 0 HIGH)
417
458
  2. Health-B Proactive Hardening (user review gate)
418
459
  3. Health-C Humanization (loops until 0 CRITICAL + 0 HIGH)
419
460
  4. Feature Capability Audit (user approval before execution)
461
+ 5. Treatment Full Treatment (shipcheck, docs, handbook — user gate)
420
462
 
421
463
  Each stage dispatches parallel domain agents with exclusive file ownership.
422
464
  A build gate (lint + typecheck + test) runs after every wave.
@@ -7,7 +7,7 @@
7
7
  * operator can branch on it.
8
8
  */
9
9
 
10
- import { writeFileSync } from "node:fs";
10
+ import { writeFileSync, existsSync } from "node:fs";
11
11
  import { resolve, dirname, basename } from "node:path";
12
12
  import { runCitationGate } from "./verify-citations.mjs";
13
13
 
@@ -25,6 +25,22 @@ export async function verifyCitationsCommand(args) {
25
25
  throw err;
26
26
  }
27
27
 
28
+ // The CLI positional is a dispatch FILE (inline-markdown input is the library API). Validate it
29
+ // up front: a missing file or odd extension must fail loudly — never silently degrade into
30
+ // scanning the path STRING for citations and reporting "no citations found".
31
+ if (!existsSync(dispatch)) {
32
+ const err = new Error(`dispatch file not found: ${dispatch}`);
33
+ err.exitCode = 1;
34
+ err.hint = "Pass the path to an existing research dispatch (.md, .markdown, or .json).";
35
+ throw err;
36
+ }
37
+ if (!/\.(md|markdown|json)$/i.test(dispatch)) {
38
+ const err = new Error(`unsupported dispatch file extension: ${dispatch}`);
39
+ err.exitCode = 1;
40
+ err.hint = "Supported extensions: .md, .markdown, .json (matched case-insensitively).";
41
+ throw err;
42
+ }
43
+
28
44
  const result = runCitationGate(dispatch, {
29
45
  provider: flags.provider || "ollama",
30
46
  ...(typeof flags.intent === "string" ? { intent: flags.intent } : {}),