role-os 2.9.0 → 2.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/README.es.md +28 -11
- package/README.fr.md +25 -8
- package/README.hi.md +25 -8
- package/README.it.md +28 -11
- package/README.ja.md +27 -10
- package/README.md +25 -8
- package/README.pt-BR.md +25 -8
- package/README.zh.md +25 -8
- package/bin/roleos.mjs +3 -2
- package/package.json +1 -1
- package/src/artifacts.mjs +14 -7
- package/src/audit-cmd.mjs +23 -23
- package/src/brainstorm-roles.mjs +6 -0
- package/src/citation-panel.mjs +26 -1
- package/src/composite.mjs +4 -0
- package/src/entry.mjs +2 -2
- package/src/hooks.mjs +107 -27
- package/src/knowledge/analyze-artifact-evidence.mjs +19 -9
- package/src/knowledge/fallback-policy.mjs +19 -7
- package/src/knowledge/resolve-overlay.mjs +21 -8
- package/src/knowledge/retrieve-for-dispatch.mjs +9 -4
- package/src/mission-run.mjs +11 -2
- package/src/packs-cmd.mjs +1 -1
- package/src/review.mjs +11 -2
- package/src/role-dossiers.json +1 -1
- package/src/route.mjs +41 -8
- package/src/run-cmd.mjs +0 -1
- package/src/run.mjs +67 -15
- package/src/session.mjs +3 -1
- package/src/specialist/capability-gate.mjs +35 -18
- package/src/specialist/dispatch.mjs +8 -3
- package/src/specialist/registry.mjs +6 -0
- package/src/specialist/shadow.mjs +13 -3
- package/src/specialist/state.mjs +94 -26
- package/src/state-machine.mjs +2 -2
- package/src/status.mjs +4 -2
- package/src/swarm/build-gate.mjs +11 -2
- package/src/swarm/persist-bridge.mjs +4 -3
- package/src/swarm-cmd.mjs +88 -46
- package/src/verify-citations-cmd.mjs +17 -1
- package/src/verify-citations.mjs +31 -7
- package/starter-pack/README.md +22 -14
- package/starter-pack/handbook.md +4 -4
- package/starter-pack/policy/routing-rules.md +42 -0
- package/starter-pack/policy/tool-permissions.md +21 -0
- package/starter-pack/workflows/full-treatment.md +27 -16
|
@@ -67,6 +67,13 @@ export function recordProbe(eventsPath, probe) {
|
|
|
67
67
|
* narrow fine-tunes show step changes, so an early halt on a small sample would be a noise
|
|
68
68
|
* trigger, not a real disagreement signal).
|
|
69
69
|
*
|
|
70
|
+
* Only probes recorded AFTER the role's most recent clear-halt event count. A clear-halt is
|
|
71
|
+
* an operator decision that the disagreement evidence before it is adjudicated; without this
|
|
72
|
+
* boundary the stale disagreeing probes keep dominating the window and the role re-halts on
|
|
73
|
+
* the very next probe — the documented recovery command could never actually recover a role.
|
|
74
|
+
* The fresh-start window also restarts the ≥N thin-sample guard, so a cleared role gets a
|
|
75
|
+
* full new sample before it can halt again.
|
|
76
|
+
*
|
|
70
77
|
* @param {string} eventsPath
|
|
71
78
|
* @param {string} role
|
|
72
79
|
* @param {number} [N]
|
|
@@ -74,8 +81,10 @@ export function recordProbe(eventsPath, probe) {
|
|
|
74
81
|
* @returns {{ probes: number, agreed: number, rate: number, shouldHalt: boolean }}
|
|
75
82
|
*/
|
|
76
83
|
export function checkHalt(eventsPath, role, N = SHADOW_DEFAULTS.N, tau = SHADOW_DEFAULTS.TAU) {
|
|
77
|
-
const events = readEvents(eventsPath, { role, kind: "shadow-probe" });
|
|
78
|
-
const
|
|
84
|
+
const events = readEvents(eventsPath, { role, kind: ["shadow-probe", "clear-halt"] });
|
|
85
|
+
const lastClear = events.map((e) => e.kind).lastIndexOf("clear-halt");
|
|
86
|
+
const probesSinceClear = events.slice(lastClear + 1).filter((e) => e.kind === "shadow-probe");
|
|
87
|
+
const window = probesSinceClear.slice(-N);
|
|
79
88
|
const probes = window.length;
|
|
80
89
|
const agreed = window.filter((e) => e.data && e.data.agreed === true).length;
|
|
81
90
|
const rate = probes === 0 ? 1 : agreed / probes;
|
|
@@ -94,7 +103,8 @@ export function contrastiveHaltMessage({ role, probes, rate, tau }) {
|
|
|
94
103
|
return (
|
|
95
104
|
`specialist for role "${role}" halted: shadow-probe agreement ${pct}% over the last ` +
|
|
96
105
|
`${probes} probes < required ${required}% (τ=${tau}). The specialist's verdicts have ` +
|
|
97
|
-
|
|
106
|
+
// Role names contain spaces ("Token Budget Analyst") — the copy-pasteable command must quote.
|
|
107
|
+
`drifted from Claude's on the same inputs. Clear with: roleos specialist clear-halt "${role}"`
|
|
98
108
|
);
|
|
99
109
|
}
|
|
100
110
|
|
package/src/specialist/state.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Specialist runtime state — quota
|
|
2
|
+
* Specialist runtime state — quota window + shadow-probe counter + halt state.
|
|
3
3
|
*
|
|
4
4
|
* Hides one secret family (Parnas): the persistence of routing counters. Callers see
|
|
5
5
|
* `get/inc/setHalt/getHalt`; they never touch the on-disk format. The on-disk format is a
|
|
@@ -8,20 +8,35 @@
|
|
|
8
8
|
* State default path: `<repo>/.role-os/specialist-state.json`. Override with
|
|
9
9
|
* `ROLEOS_SPECIALIST_STATE_PATH`.
|
|
10
10
|
*
|
|
11
|
-
* Quota: a sliding
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
11
|
+
* Quota (v2): a sliding window over the last `windowSize` dispatches of EITHER route. Every
|
|
12
|
+
* dispatch is recorded as `{ t, route }` with route "specialist" | "claude"; the specialist
|
|
13
|
+
* share is count(route === "specialist") over the last `windowSize` entries. Recording BOTH
|
|
14
|
+
* routes is what makes the window actually roll — Claude-routed traffic pushes old specialist
|
|
15
|
+
* entries out, so the specialist keeps receiving its quota share indefinitely. (v1 recorded
|
|
16
|
+
* only specialist dispatches and never aged them out, so every role locked out permanently
|
|
17
|
+
* after `windowSize × quota` dispatches — the opposite of the policy's "sliding" promise.)
|
|
18
|
+
* The window counts dispatches, not seconds, so it is a true rolling window not aligned to
|
|
19
|
+
* wall clock (a wall-clock window can be timed against the edge, which the workload-quota
|
|
20
|
+
* anti-collapse argument is meant to prevent).
|
|
15
21
|
*/
|
|
16
22
|
|
|
17
23
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
|
|
18
24
|
import { dirname } from "node:path";
|
|
19
25
|
|
|
20
|
-
export const STATE_SCHEMA = "roleos-specialist-state/
|
|
26
|
+
export const STATE_SCHEMA = "roleos-specialist-state/v2";
|
|
27
|
+
const STATE_SCHEMA_V1 = "roleos-specialist-state/v1";
|
|
28
|
+
|
|
29
|
+
/** Default quota window (dispatches), matching the dispatcher's DEFAULT_WINDOW. */
|
|
30
|
+
export const DEFAULT_WINDOW = 200;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* @typedef {object} DispatchRecord
|
|
34
|
+
* @property {number} t unix-ms timestamp
|
|
35
|
+
* @property {"specialist"|"claude"} route where the dispatch was actually served
|
|
36
|
+
*/
|
|
21
37
|
|
|
22
38
|
/**
|
|
23
39
|
* @typedef {object} RoleState
|
|
24
|
-
* @property {number[]} dispatch_timestamps sliding window of dispatch unix-ms
|
|
25
40
|
* @property {number} probe_counter count of dispatches since the last shadow probe
|
|
26
41
|
* @property {object|null} halt { reason, since } or null when not halted
|
|
27
42
|
*/
|
|
@@ -29,23 +44,32 @@ export const STATE_SCHEMA = "roleos-specialist-state/v1";
|
|
|
29
44
|
/**
|
|
30
45
|
* @typedef {object} StateFile
|
|
31
46
|
* @property {string} schema
|
|
47
|
+
* @property {DispatchRecord[]} dispatches sliding window of recent dispatches (both routes)
|
|
32
48
|
* @property {Object<string, RoleState>} roles
|
|
33
49
|
*/
|
|
34
50
|
|
|
35
51
|
export function emptyState() {
|
|
36
|
-
return { schema: STATE_SCHEMA, roles: {} };
|
|
52
|
+
return { schema: STATE_SCHEMA, dispatches: [], roles: {} };
|
|
37
53
|
}
|
|
38
54
|
|
|
39
55
|
export function loadState(path) {
|
|
40
56
|
if (!existsSync(path)) return emptyState();
|
|
41
57
|
try {
|
|
42
58
|
const raw = JSON.parse(readFileSync(path, "utf8"));
|
|
59
|
+
// Tolerant v1 migration: a v1 file loads as v2 (old bare timestamps -> route "specialist")
|
|
60
|
+
// instead of erroring; the migrated shape is persisted on the next saveState.
|
|
61
|
+
if (raw && raw.schema === STATE_SCHEMA_V1 && typeof raw.roles === "object") {
|
|
62
|
+
return migrateV1(raw);
|
|
63
|
+
}
|
|
43
64
|
if (!raw || raw.schema !== STATE_SCHEMA || typeof raw.roles !== "object") {
|
|
44
65
|
// Refuse to silently accept a mis-shaped state file. Caller decides what to do.
|
|
45
66
|
const err = new Error(`state file schema mismatch: got "${raw && raw.schema}", expected "${STATE_SCHEMA}"`);
|
|
46
67
|
err.code = "STATE_SCHEMA_MISMATCH";
|
|
47
68
|
throw err;
|
|
48
69
|
}
|
|
70
|
+
raw.dispatches = (Array.isArray(raw.dispatches) ? raw.dispatches : [])
|
|
71
|
+
.map(normalizeDispatchRecord)
|
|
72
|
+
.filter(Boolean);
|
|
49
73
|
return raw;
|
|
50
74
|
} catch (err) {
|
|
51
75
|
if (err.code === "STATE_SCHEMA_MISMATCH") throw err;
|
|
@@ -55,6 +79,35 @@ export function loadState(path) {
|
|
|
55
79
|
}
|
|
56
80
|
}
|
|
57
81
|
|
|
82
|
+
/**
|
|
83
|
+
* v1 -> v2 migration. v1 recorded only specialist dispatches, as bare unix-ms numbers in a
|
|
84
|
+
* per-role `dispatch_timestamps` array. Each becomes `{ t, route: "specialist" }` in the
|
|
85
|
+
* shared v2 window (sorted oldest-first); role slots keep probe_counter/halt.
|
|
86
|
+
*/
|
|
87
|
+
function migrateV1(raw) {
|
|
88
|
+
const dispatches = [];
|
|
89
|
+
const roles = {};
|
|
90
|
+
for (const [role, slot] of Object.entries(raw.roles || {})) {
|
|
91
|
+
if (!slot || typeof slot !== "object") continue;
|
|
92
|
+
for (const t of Array.isArray(slot.dispatch_timestamps) ? slot.dispatch_timestamps : []) {
|
|
93
|
+
if (typeof t === "number" && Number.isFinite(t)) dispatches.push({ t, route: "specialist" });
|
|
94
|
+
}
|
|
95
|
+
roles[role] = {
|
|
96
|
+
probe_counter: typeof slot.probe_counter === "number" ? slot.probe_counter : 0,
|
|
97
|
+
halt: slot.halt ? { reason: slot.halt.reason, since: slot.halt.since } : null,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
dispatches.sort((a, b) => a.t - b.t);
|
|
101
|
+
return { schema: STATE_SCHEMA, dispatches, roles };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Tolerate hand-edited windows: bare numbers read as v1-style specialist timestamps. */
|
|
105
|
+
function normalizeDispatchRecord(r) {
|
|
106
|
+
if (typeof r === "number" && Number.isFinite(r)) return { t: r, route: "specialist" };
|
|
107
|
+
if (!r || typeof r !== "object" || typeof r.t !== "number" || !Number.isFinite(r.t)) return null;
|
|
108
|
+
return { t: r.t, route: r.route === "claude" ? "claude" : "specialist" };
|
|
109
|
+
}
|
|
110
|
+
|
|
58
111
|
export function saveState(path, state) {
|
|
59
112
|
mkdirSync(dirname(path), { recursive: true });
|
|
60
113
|
writeFileSync(path, JSON.stringify(state, null, 2) + "\n", "utf8");
|
|
@@ -63,39 +116,54 @@ export function saveState(path, state) {
|
|
|
63
116
|
/** Get or create a role's slot in the state object. Mutates and returns the slot. */
|
|
64
117
|
export function ensureRole(state, role) {
|
|
65
118
|
if (!state.roles[role]) {
|
|
66
|
-
state.roles[role] = {
|
|
119
|
+
state.roles[role] = { probe_counter: 0, halt: null };
|
|
67
120
|
}
|
|
68
121
|
return state.roles[role];
|
|
69
122
|
}
|
|
70
123
|
|
|
71
124
|
/**
|
|
72
|
-
* Record a
|
|
73
|
-
*
|
|
74
|
-
*
|
|
125
|
+
* Record a dispatch in the sliding window — EVERY dispatch, both routes. Pure — returns
|
|
126
|
+
* updated state. The window only rolls if Claude-routed traffic is recorded too; that is what
|
|
127
|
+
* lets the specialist share recover after quota pressure instead of locking out permanently.
|
|
128
|
+
*
|
|
129
|
+
* Signature (cross-agent contract): `recordDispatch(state, route)` with route "specialist" |
|
|
130
|
+
* "claude". Trailing params are optional: `windowSize` trims the stored window (dispatches,
|
|
131
|
+
* not seconds; default 200) and `nowMs` injects the timestamp (testability — defaults to
|
|
132
|
+
* Date.now()). Any route value that is not "claude" counts as "specialist" — the conservative
|
|
133
|
+
* direction (legacy callers only ever recorded specialist dispatches, and over-counting
|
|
134
|
+
* tightens the quota, which fails open to Claude).
|
|
75
135
|
*/
|
|
76
|
-
export function recordDispatch(state,
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
136
|
+
export function recordDispatch(state, route, windowSize = DEFAULT_WINDOW, nowMs = Date.now()) {
|
|
137
|
+
if (!Array.isArray(state.dispatches)) state.dispatches = [];
|
|
138
|
+
state.dispatches.push({ t: nowMs, route: route === "claude" ? "claude" : "specialist" });
|
|
139
|
+
const max = Math.max(1, windowSize);
|
|
140
|
+
if (state.dispatches.length > max) {
|
|
141
|
+
state.dispatches.splice(0, state.dispatches.length - max);
|
|
81
142
|
}
|
|
82
143
|
return state;
|
|
83
144
|
}
|
|
84
145
|
|
|
85
146
|
/**
|
|
86
147
|
* Build a QuotaState view for the gate. `used` is "how many of the last `windowSize`
|
|
87
|
-
* dispatches went to the specialist"
|
|
88
|
-
*
|
|
89
|
-
*
|
|
148
|
+
* dispatches (both routes) went to the specialist"; `window` is the fixed denominator —
|
|
149
|
+
* the gate computes share-if-added.
|
|
150
|
+
*
|
|
151
|
+
* Important: with fewer than `windowSize` dispatches recorded, the quota check is generous
|
|
152
|
+
* (a small numerator over a full-size denominator). That is intentional — the quota cap is
|
|
153
|
+
* meant to prevent collapse at scale, not to gate a cold start.
|
|
90
154
|
*
|
|
91
|
-
*
|
|
92
|
-
*
|
|
93
|
-
*
|
|
155
|
+
* `role` is kept for call-site/API stability; the v2 window is shared across roles (the state
|
|
156
|
+
* file is the repo's dispatch ledger). The 2-arg form `quotaStateFor(state, windowSize)` is
|
|
157
|
+
* tolerated.
|
|
94
158
|
*/
|
|
95
159
|
export function quotaStateFor(state, role, windowSize) {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
160
|
+
if (typeof role === "number" && windowSize === undefined) {
|
|
161
|
+
windowSize = role;
|
|
162
|
+
}
|
|
163
|
+
const size = Math.max(1, typeof windowSize === "number" && Number.isFinite(windowSize) ? windowSize : DEFAULT_WINDOW);
|
|
164
|
+
const entries = Array.isArray(state.dispatches) ? state.dispatches.slice(-size) : [];
|
|
165
|
+
const used = entries.filter((d) => d && d.route === "specialist").length;
|
|
166
|
+
return { used, window: size };
|
|
99
167
|
}
|
|
100
168
|
|
|
101
169
|
export function incrementProbeCounter(state, role) {
|
package/src/state-machine.mjs
CHANGED
|
@@ -11,12 +11,12 @@
|
|
|
11
11
|
* Key: current status. Value: array of allowed next statuses.
|
|
12
12
|
*/
|
|
13
13
|
export const STEP_TRANSITIONS = {
|
|
14
|
-
pending: ["active"],
|
|
14
|
+
pending: ["active", "blocked"], // blocked: upstream failure or operator block
|
|
15
15
|
active: ["completed", "partial", "failed", "blocked"],
|
|
16
16
|
completed: ["pending"], // re-opened by escalation
|
|
17
17
|
partial: ["pending"], // retried
|
|
18
18
|
failed: ["pending"], // retried
|
|
19
|
-
blocked: ["pending"], // unblocked
|
|
19
|
+
blocked: ["pending"], // unblocked / retried / reopened
|
|
20
20
|
skipped: [], // terminal
|
|
21
21
|
};
|
|
22
22
|
|
package/src/status.mjs
CHANGED
|
@@ -9,7 +9,8 @@ function parsePacket(filePath) {
|
|
|
9
9
|
if (!content) return null;
|
|
10
10
|
|
|
11
11
|
const get = (heading) => {
|
|
12
|
-
|
|
12
|
+
// \r?\n keeps CRLF packets (git autocrlf / Windows editors) parseable
|
|
13
|
+
const re = new RegExp(`## ${heading}\\r?\\n([\\s\\S]*?)(?=\\r?\\n## |\\r?\\n---|$)`);
|
|
13
14
|
const m = content.match(re);
|
|
14
15
|
return m ? m[1].trim() : null;
|
|
15
16
|
};
|
|
@@ -48,7 +49,8 @@ function parseVerdict(filePath) {
|
|
|
48
49
|
if (!content) return null;
|
|
49
50
|
|
|
50
51
|
const get = (heading) => {
|
|
51
|
-
|
|
52
|
+
// \r?\n keeps CRLF verdicts (git autocrlf / Windows editors) parseable
|
|
53
|
+
const re = new RegExp(`## ${heading}\\r?\\n([\\s\\S]*?)(?=\\r?\\n## |$)`);
|
|
52
54
|
const m = content.match(re);
|
|
53
55
|
return m ? m[1].trim() : null;
|
|
54
56
|
};
|
package/src/swarm/build-gate.mjs
CHANGED
|
@@ -77,7 +77,7 @@ export function detectBuildSystem(cwd) {
|
|
|
77
77
|
* @param {object} [options]
|
|
78
78
|
* @param {object} [options.buildSystem] - Override auto-detected build system
|
|
79
79
|
* @param {number} [options.timeout] - Per-command timeout in ms (default: 120000)
|
|
80
|
-
* @returns {{ pass: boolean, lint: StepResult, typecheck: StepResult, test: StepResult, duration: number }}
|
|
80
|
+
* @returns {{ pass: boolean, vacuous: boolean, reason: string|null, lint: StepResult, typecheck: StepResult, test: StepResult, duration: number }}
|
|
81
81
|
*
|
|
82
82
|
* @typedef {{ status: "pass"|"fail"|"skip", output: string, duration: number }} StepResult
|
|
83
83
|
*/
|
|
@@ -90,10 +90,19 @@ export function runBuildGate(cwd, options = {}) {
|
|
|
90
90
|
const typecheck = runStep(bs.typecheckCmd, cwd, timeout);
|
|
91
91
|
const test = runStep(bs.testCmd, cwd, timeout);
|
|
92
92
|
|
|
93
|
-
|
|
93
|
+
// A gate that ran nothing verified nothing — fail loudly instead of
|
|
94
|
+
// passing vacuously, so an undetected build system can't silently
|
|
95
|
+
// disable the after-every-wave safety check (ANDON_AUTHORITY).
|
|
96
|
+
const vacuous = lint.status === "skip" && typecheck.status === "skip" && test.status === "skip";
|
|
97
|
+
const pass = !vacuous &&
|
|
98
|
+
lint.status !== "fail" && typecheck.status !== "fail" && test.status !== "fail";
|
|
94
99
|
|
|
95
100
|
return {
|
|
96
101
|
pass,
|
|
102
|
+
vacuous,
|
|
103
|
+
reason: vacuous
|
|
104
|
+
? `No verification commands found (build system: ${bs.type}) — the gate could not verify anything. Add lint/typecheck/test commands or pass options.buildSystem.`
|
|
105
|
+
: null,
|
|
97
106
|
lint,
|
|
98
107
|
typecheck,
|
|
99
108
|
test,
|
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
* Evidence Persistence Bridge — Optional connection to dogfood-lab/testing-os.
|
|
3
3
|
*
|
|
4
4
|
* Converts swarm wave results into dogfood submission format and audit DB
|
|
5
|
-
* payloads. The core swarm mission works without this —
|
|
6
|
-
*
|
|
5
|
+
* payloads. The core swarm mission works without this — this module is a
|
|
6
|
+
* library consumed by external tooling (e.g. dogfood-lab/testing-os); the
|
|
7
|
+
* roleos CLI does not currently invoke it.
|
|
7
8
|
*
|
|
8
9
|
* This mirrors the logic from dogfood-lab/testing-os/packages/dogfood-swarm/persist-results.js
|
|
9
10
|
* but produces the payloads without requiring testing-os to be present.
|
|
@@ -70,7 +71,7 @@ export function buildScenarioResults(waveReports) {
|
|
|
70
71
|
product_surface: surfaceFromDomain(domain),
|
|
71
72
|
verdict: deriveVerdict(allFindings),
|
|
72
73
|
step_results: [
|
|
73
|
-
{ step: "audit", status: allFindings.length > 0 ? "pass" : "
|
|
74
|
+
{ step: "audit", status: allFindings.length > 0 ? "pass" : "skip" },
|
|
74
75
|
{ step: "remediate", status: allRemediations.length > 0 ? "pass" : "skip" },
|
|
75
76
|
],
|
|
76
77
|
evidence: {
|
package/src/swarm-cmd.mjs
CHANGED
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
* roleos swarm manifest Show the swarm manifest
|
|
6
6
|
* roleos swarm manifest --generate Auto-detect domains and generate manifest
|
|
7
7
|
* roleos swarm status Show swarm run progress
|
|
8
|
-
* roleos swarm findings List
|
|
9
|
-
* roleos swarm approve Approve the current
|
|
8
|
+
* roleos swarm findings List findings captured from wave reports
|
|
9
|
+
* roleos swarm approve Approve the current user gate
|
|
10
10
|
* roleos swarm verify Run Phase 9 final verification
|
|
11
11
|
*
|
|
12
12
|
* This is a first-class shortcut into the dogfood-swarm mission.
|
|
@@ -14,9 +14,9 @@
|
|
|
14
14
|
*/
|
|
15
15
|
|
|
16
16
|
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
17
|
-
import { join } from "node:path";
|
|
17
|
+
import { join, resolve } from "node:path";
|
|
18
18
|
import {
|
|
19
|
-
createPersistentRun, listRuns, loadRun, getPosition,
|
|
19
|
+
createPersistentRun, listRuns, loadRun, getPosition, saveRun,
|
|
20
20
|
} from "./run.mjs";
|
|
21
21
|
import {
|
|
22
22
|
generateSwarmManifest, validateSwarmManifest,
|
|
@@ -25,6 +25,19 @@ import {
|
|
|
25
25
|
// ── Constants ────────────────────────────────────────────────────────────────
|
|
26
26
|
|
|
27
27
|
const MANIFEST_FILE = "swarm-manifest.json";
|
|
28
|
+
const DEFAULT_STAGES = ["health-a", "health-b", "health-c", "feature", "treatment"];
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Filter listRuns output down to swarm runs.
|
|
32
|
+
* missionKey is authoritative; task keywords cover legacy runs.
|
|
33
|
+
*/
|
|
34
|
+
function filterSwarmRuns(runs) {
|
|
35
|
+
return runs.filter(r =>
|
|
36
|
+
r.missionKey === "dogfood-swarm" ||
|
|
37
|
+
r.task.toLowerCase().includes("swarm") ||
|
|
38
|
+
r.task.toLowerCase().includes("dogfood")
|
|
39
|
+
);
|
|
40
|
+
}
|
|
28
41
|
|
|
29
42
|
// ── Main dispatch ────────────────────────────────────────────────────────────
|
|
30
43
|
|
|
@@ -60,7 +73,7 @@ export async function swarmCommand(args) {
|
|
|
60
73
|
|
|
61
74
|
// ── roleos swarm [run] ──────────────────────────────────────────────────────
|
|
62
75
|
|
|
63
|
-
function cmdRun(extraArgs) {
|
|
76
|
+
async function cmdRun(extraArgs) {
|
|
64
77
|
const cwd = process.cwd();
|
|
65
78
|
const manifestPath = join(cwd, MANIFEST_FILE);
|
|
66
79
|
|
|
@@ -99,11 +112,18 @@ function cmdRun(extraArgs) {
|
|
|
99
112
|
? extraArgs.join(" ")
|
|
100
113
|
: `Dogfood swarm of ${manifest.repo || "current repo"}`;
|
|
101
114
|
|
|
102
|
-
// Create persistent run via the dogfood-swarm mission
|
|
103
|
-
|
|
115
|
+
// Create persistent run via the dogfood-swarm mission.
|
|
116
|
+
// Forwarding the manifest routes step construction through buildSwarmSteps,
|
|
117
|
+
// so steps carry stage/domain/gate metadata and scale with the domains.
|
|
118
|
+
const run = await createPersistentRun(taskDesc, cwd, {
|
|
119
|
+
forceMission: "dogfood-swarm",
|
|
120
|
+
manifest,
|
|
121
|
+
});
|
|
104
122
|
|
|
105
123
|
const domainCount = manifest.domains?.length || 0;
|
|
106
|
-
const
|
|
124
|
+
const stages = Array.isArray(manifest.stages) && manifest.stages.length > 0
|
|
125
|
+
? manifest.stages
|
|
126
|
+
: DEFAULT_STAGES;
|
|
107
127
|
|
|
108
128
|
console.log(`\nDogfood Swarm Started`);
|
|
109
129
|
console.log(`─────────────────────`);
|
|
@@ -111,18 +131,19 @@ function cmdRun(extraArgs) {
|
|
|
111
131
|
console.log(`Repo: ${manifest.repo || "unknown"}`);
|
|
112
132
|
console.log(`Type: ${manifest.repoType || "unknown"}`);
|
|
113
133
|
console.log(`Domains: ${domainCount}`);
|
|
114
|
-
console.log(`Stages: ${
|
|
134
|
+
console.log(`Stages: ${stages.length} (${stages.join(" → ")})`);
|
|
115
135
|
console.log(`Steps: ${run.steps.length}`);
|
|
116
136
|
console.log(`\nDomain Agents:`);
|
|
117
137
|
for (const d of manifest.domains || []) {
|
|
118
138
|
console.log(` - ${d.id}: ${d.role} (${d.patterns.length} patterns)`);
|
|
119
139
|
}
|
|
120
140
|
console.log(`\nStage Pipeline:`);
|
|
121
|
-
console.log(` 1. Health-A
|
|
122
|
-
console.log(` 2. Health-B
|
|
123
|
-
console.log(` 3. Health-C
|
|
124
|
-
console.log(` 4. Feature
|
|
125
|
-
console.log(` 5.
|
|
141
|
+
console.log(` 1. Health-A Bug/Security Fix (loop until 0 CRITICAL + 0 HIGH)`);
|
|
142
|
+
console.log(` 2. Health-B Proactive Hardening (user review gate)`);
|
|
143
|
+
console.log(` 3. Health-C Humanization (loop until 0 CRITICAL + 0 HIGH)`);
|
|
144
|
+
console.log(` 4. Feature Capability Audit (user approval gate)`);
|
|
145
|
+
console.log(` 5. Treatment Full Treatment (shipcheck, docs, handbook — user gate)`);
|
|
146
|
+
console.log(` 6. Final Synthesis + Verdict`);
|
|
126
147
|
console.log(`\nRun 'roleos next' to begin the first wave.`);
|
|
127
148
|
console.log(`Run 'roleos swarm status' to check progress.\n`);
|
|
128
149
|
}
|
|
@@ -209,11 +230,7 @@ function generateManifestFile(cwd, manifestPath) {
|
|
|
209
230
|
|
|
210
231
|
function cmdStatus() {
|
|
211
232
|
const cwd = process.cwd();
|
|
212
|
-
const
|
|
213
|
-
const swarmRuns = runs.filter(r =>
|
|
214
|
-
r.task.toLowerCase().includes("swarm") ||
|
|
215
|
-
r.task.toLowerCase().includes("dogfood")
|
|
216
|
-
);
|
|
233
|
+
const swarmRuns = filterSwarmRuns(listRuns(cwd));
|
|
217
234
|
|
|
218
235
|
if (swarmRuns.length === 0) {
|
|
219
236
|
console.log("\nNo swarm runs found. Start one with: roleos swarm\n");
|
|
@@ -258,11 +275,7 @@ function cmdStatus() {
|
|
|
258
275
|
|
|
259
276
|
function cmdFindings() {
|
|
260
277
|
const cwd = process.cwd();
|
|
261
|
-
const
|
|
262
|
-
const swarmRuns = runs.filter(r =>
|
|
263
|
-
r.task.toLowerCase().includes("swarm") ||
|
|
264
|
-
r.task.toLowerCase().includes("dogfood")
|
|
265
|
-
);
|
|
278
|
+
const swarmRuns = filterSwarmRuns(listRuns(cwd));
|
|
266
279
|
|
|
267
280
|
if (swarmRuns.length === 0) {
|
|
268
281
|
console.log("\nNo swarm runs found.\n");
|
|
@@ -275,12 +288,22 @@ function cmdFindings() {
|
|
|
275
288
|
return;
|
|
276
289
|
}
|
|
277
290
|
|
|
278
|
-
// Extract findings from wave-report artifacts
|
|
291
|
+
// Extract findings from wave-report artifacts.
|
|
292
|
+
// step.artifact is usually a short reference (often a file path) — when it
|
|
293
|
+
// points at a readable file, scan the file content instead of the reference.
|
|
279
294
|
const findings = [];
|
|
280
295
|
for (const step of full.steps) {
|
|
281
296
|
if (step.produces === "wave-report" && step.artifact) {
|
|
282
|
-
|
|
283
|
-
|
|
297
|
+
let body = step.artifact;
|
|
298
|
+
try {
|
|
299
|
+
const artifactPath = resolve(cwd, step.artifact);
|
|
300
|
+
if (existsSync(artifactPath)) {
|
|
301
|
+
body = readFileSync(artifactPath, "utf-8");
|
|
302
|
+
}
|
|
303
|
+
} catch { /* not a readable file — treat the reference as inline content */ }
|
|
304
|
+
|
|
305
|
+
// Normalize line endings so CRLF artifacts parse on Windows checkouts
|
|
306
|
+
const match = body.replace(/\r\n/g, "\n").match(/## findings\n([\s\S]*?)(?=\n## |$)/i);
|
|
284
307
|
if (match) {
|
|
285
308
|
findings.push({
|
|
286
309
|
domain: step.domain || "unknown",
|
|
@@ -292,7 +315,9 @@ function cmdFindings() {
|
|
|
292
315
|
}
|
|
293
316
|
|
|
294
317
|
if (findings.length === 0) {
|
|
295
|
-
console.log("\nNo findings captured yet. Run waves first
|
|
318
|
+
console.log("\nNo findings captured yet. Run waves first.");
|
|
319
|
+
console.log("Findings are read from each wave-report artifact's '## Findings' section");
|
|
320
|
+
console.log("(complete steps with a wave-report file path to make them scannable).\n");
|
|
296
321
|
return;
|
|
297
322
|
}
|
|
298
323
|
|
|
@@ -309,11 +334,7 @@ function cmdFindings() {
|
|
|
309
334
|
|
|
310
335
|
function cmdApprove() {
|
|
311
336
|
const cwd = process.cwd();
|
|
312
|
-
const
|
|
313
|
-
const swarmRuns = runs.filter(r =>
|
|
314
|
-
r.task.toLowerCase().includes("swarm") ||
|
|
315
|
-
r.task.toLowerCase().includes("dogfood")
|
|
316
|
-
);
|
|
337
|
+
const swarmRuns = filterSwarmRuns(listRuns(cwd));
|
|
317
338
|
|
|
318
339
|
if (swarmRuns.length === 0) {
|
|
319
340
|
console.log("\nNo swarm runs found.\n");
|
|
@@ -326,9 +347,10 @@ function cmdApprove() {
|
|
|
326
347
|
return;
|
|
327
348
|
}
|
|
328
349
|
|
|
329
|
-
// Find the next gate step waiting for approval
|
|
350
|
+
// Find the next gate step waiting for approval (not yet approved)
|
|
330
351
|
const gateStep = full.steps.find(s =>
|
|
331
|
-
s.isGate && s.userApproval && s.status === "active"
|
|
352
|
+
s.isGate && s.userApproval && s.status === "active" &&
|
|
353
|
+
s.userApprovalStatus !== "approved"
|
|
332
354
|
);
|
|
333
355
|
|
|
334
356
|
if (!gateStep) {
|
|
@@ -337,8 +359,26 @@ function cmdApprove() {
|
|
|
337
359
|
return;
|
|
338
360
|
}
|
|
339
361
|
|
|
340
|
-
|
|
341
|
-
|
|
362
|
+
// Record the approval on the persisted run — an approval that isn't
|
|
363
|
+
// saved is not a control.
|
|
364
|
+
const approvedAt = new Date().toISOString();
|
|
365
|
+
gateStep.userApprovalStatus = "approved";
|
|
366
|
+
gateStep.approvedAt = approvedAt;
|
|
367
|
+
gateStep.note = gateStep.note
|
|
368
|
+
? `${gateStep.note}; user approved ${gateStep.stage} gate`
|
|
369
|
+
: `User approved ${gateStep.stage} gate`;
|
|
370
|
+
full.interventions = full.interventions || [];
|
|
371
|
+
full.interventions.push({
|
|
372
|
+
type: "gate-approval",
|
|
373
|
+
stepIndex: gateStep.index,
|
|
374
|
+
stage: gateStep.stage,
|
|
375
|
+
timestamp: approvedAt,
|
|
376
|
+
});
|
|
377
|
+
saveRun(cwd, full);
|
|
378
|
+
|
|
379
|
+
console.log(`\nApproved: ${gateStep.stage} gate (recorded at ${approvedAt})`);
|
|
380
|
+
console.log(`The swarm will proceed to the next stage.`);
|
|
381
|
+
console.log(`Complete the gate step with 'roleos complete <swarm-gate-artifact>' to advance.\n`);
|
|
342
382
|
}
|
|
343
383
|
|
|
344
384
|
// ── roleos swarm verify ─────────────────────────────────────────────────────
|
|
@@ -358,10 +398,13 @@ function cmdVerify() {
|
|
|
358
398
|
console.log(`\nSwarm Verification`);
|
|
359
399
|
console.log(`──────────────────`);
|
|
360
400
|
|
|
401
|
+
let healthy = true;
|
|
402
|
+
|
|
361
403
|
// 1. Manifest valid
|
|
362
404
|
if (validation.valid) {
|
|
363
405
|
console.log(` [PASS] Manifest is valid`);
|
|
364
406
|
} else {
|
|
407
|
+
healthy = false;
|
|
365
408
|
console.log(` [FAIL] Manifest has ${validation.issues.length} issue(s)`);
|
|
366
409
|
for (const i of validation.issues) console.log(` - ${i}`);
|
|
367
410
|
}
|
|
@@ -371,15 +414,12 @@ function cmdVerify() {
|
|
|
371
414
|
if (domainCount >= 1 && domainCount <= 10) {
|
|
372
415
|
console.log(` [PASS] ${domainCount} domains (within 1-10 range)`);
|
|
373
416
|
} else {
|
|
417
|
+
healthy = false;
|
|
374
418
|
console.log(` [FAIL] ${domainCount} domains (must be 1-10)`);
|
|
375
419
|
}
|
|
376
420
|
|
|
377
421
|
// 3. Check for swarm run
|
|
378
|
-
const
|
|
379
|
-
const swarmRuns = runs.filter(r =>
|
|
380
|
-
r.task.toLowerCase().includes("swarm") ||
|
|
381
|
-
r.task.toLowerCase().includes("dogfood")
|
|
382
|
-
);
|
|
422
|
+
const swarmRuns = filterSwarmRuns(listRuns(cwd));
|
|
383
423
|
|
|
384
424
|
if (swarmRuns.length > 0) {
|
|
385
425
|
const latest = swarmRuns[0];
|
|
@@ -393,7 +433,8 @@ function cmdVerify() {
|
|
|
393
433
|
console.log(` [INFO] No swarm runs yet — run 'roleos swarm' to start`);
|
|
394
434
|
}
|
|
395
435
|
|
|
396
|
-
console.log("");
|
|
436
|
+
console.log(`\n${healthy ? "Swarm infrastructure verified." : "Verification failed — fix the issues above and re-run."}\n`);
|
|
437
|
+
if (!healthy) process.exit(1);
|
|
397
438
|
}
|
|
398
439
|
|
|
399
440
|
// ── Help ────────────────────────────────────────────────────────────────────
|
|
@@ -407,16 +448,17 @@ Usage:
|
|
|
407
448
|
roleos swarm manifest Show the swarm manifest
|
|
408
449
|
roleos swarm manifest --generate Auto-detect domains and generate manifest
|
|
409
450
|
roleos swarm status Show swarm run progress
|
|
410
|
-
roleos swarm findings List
|
|
411
|
-
roleos swarm approve Approve the current
|
|
451
|
+
roleos swarm findings List findings captured from wave reports
|
|
452
|
+
roleos swarm approve Approve the current user gate
|
|
412
453
|
roleos swarm verify Verify manifest and run state
|
|
413
454
|
roleos swarm help Show this help
|
|
414
455
|
|
|
415
|
-
The swarm runs
|
|
456
|
+
The swarm runs 5 stages in sequence:
|
|
416
457
|
1. Health-A Bug/Security Fix (loops until 0 CRITICAL + 0 HIGH)
|
|
417
458
|
2. Health-B Proactive Hardening (user review gate)
|
|
418
459
|
3. Health-C Humanization (loops until 0 CRITICAL + 0 HIGH)
|
|
419
460
|
4. Feature Capability Audit (user approval before execution)
|
|
461
|
+
5. Treatment Full Treatment (shipcheck, docs, handbook — user gate)
|
|
420
462
|
|
|
421
463
|
Each stage dispatches parallel domain agents with exclusive file ownership.
|
|
422
464
|
A build gate (lint + typecheck + test) runs after every wave.
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* operator can branch on it.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import { writeFileSync } from "node:fs";
|
|
10
|
+
import { writeFileSync, existsSync } from "node:fs";
|
|
11
11
|
import { resolve, dirname, basename } from "node:path";
|
|
12
12
|
import { runCitationGate } from "./verify-citations.mjs";
|
|
13
13
|
|
|
@@ -25,6 +25,22 @@ export async function verifyCitationsCommand(args) {
|
|
|
25
25
|
throw err;
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
// The CLI positional is a dispatch FILE (inline-markdown input is the library API). Validate it
|
|
29
|
+
// up front: a missing file or odd extension must fail loudly — never silently degrade into
|
|
30
|
+
// scanning the path STRING for citations and reporting "no citations found".
|
|
31
|
+
if (!existsSync(dispatch)) {
|
|
32
|
+
const err = new Error(`dispatch file not found: ${dispatch}`);
|
|
33
|
+
err.exitCode = 1;
|
|
34
|
+
err.hint = "Pass the path to an existing research dispatch (.md, .markdown, or .json).";
|
|
35
|
+
throw err;
|
|
36
|
+
}
|
|
37
|
+
if (!/\.(md|markdown|json)$/i.test(dispatch)) {
|
|
38
|
+
const err = new Error(`unsupported dispatch file extension: ${dispatch}`);
|
|
39
|
+
err.exitCode = 1;
|
|
40
|
+
err.hint = "Supported extensions: .md, .markdown, .json (matched case-insensitively).";
|
|
41
|
+
throw err;
|
|
42
|
+
}
|
|
43
|
+
|
|
28
44
|
const result = runCitationGate(dispatch, {
|
|
29
45
|
provider: flags.provider || "ollama",
|
|
30
46
|
...(typeof flags.intent === "string" ? { intent: flags.intent } : {}),
|