role-os 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +472 -437
- package/README.es.md +319 -319
- package/README.fr.md +319 -319
- package/README.hi.md +319 -319
- package/README.it.md +319 -319
- package/README.ja.md +319 -319
- package/README.md +387 -387
- package/README.pt-BR.md +319 -319
- package/README.zh.md +322 -322
- package/bin/roleos.mjs +230 -225
- package/package.json +51 -51
- package/src/artifacts.mjs +693 -647
- package/src/brainstorm-render.mjs +462 -462
- package/src/brainstorm-roles.mjs +817 -817
- package/src/brainstorm.mjs +778 -778
- package/src/citation-panel.mjs +249 -0
- package/src/dispatch.mjs +265 -265
- package/src/mission-run.mjs +1 -1
- package/src/mission.mjs +655 -638
- package/src/packs.mjs +467 -467
- package/src/route.mjs +766 -766
- package/src/run-cmd.mjs +408 -408
- package/src/run.mjs +1000 -1000
- package/src/swarm/domain-detect.mjs +1 -1
- package/src/swarm/persist-bridge.mjs +4 -4
- package/src/verify-citations-cmd.mjs +138 -0
- package/src/verify-citations.mjs +522 -0
- package/starter-pack/agents/engineering/caption-auditor.md +61 -0
- package/starter-pack/agents/engineering/monster-taxonomy-verifier.md +62 -0
- package/starter-pack/agents/engineering/red-teamer.md +75 -0
- package/starter-pack/policy/tool-permissions.md +19 -0
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local-panel seat — a SECOND, family-different verifier seat for the citation gate, layered on
|
|
3
|
+
* prism. Where prism RETRIEVES (the deterministic existence floor + the source title/abstract) and
|
|
4
|
+
* runs its own groundedness lens, this seat re-judges each citation prism vouched for with an
|
|
5
|
+
* independent grounded-entailment PANEL running entirely on local models (Qwen + Mistral via
|
|
6
|
+
* llama-swap, the `offload` CLI). It is decorrelated from the Claude generator by construction
|
|
7
|
+
* (no Anthropic model in the panel) and from prism's single groundedness model (3 seats, ≥2
|
|
8
|
+
* families, conservative majority).
|
|
9
|
+
*
|
|
10
|
+
* Why it can only TIGHTEN: the panel's measured property (tensor-engine-knowledge wave-5 #156) is
|
|
11
|
+
* ZERO false-confirms — a 3-seat conservative-majority panel never stamps a false claim
|
|
12
|
+
* "supported". So a panel DISAGREEMENT on a citation prism marked `supported` is a real
|
|
13
|
+
* false-confirm signal: we downgrade that citation's gate accept -> escalate (a human checkpoint
|
|
14
|
+
* with a contrastive message). The panel NEVER turns a non-accept into an accept, and the
|
|
15
|
+
* deterministic existence floor (`fabricated` -> blocking) always dominates. EXTERNAL_VERIFIER
|
|
16
|
+
* (workflow-standard #6), now runnable locally for free.
|
|
17
|
+
*
|
|
18
|
+
* Read-only (shells the read-only `offload verify`); no compensator. Mirrors the inject-`exec`,
|
|
19
|
+
* closed-gate-on-unreachable discipline of verify-citations.mjs. See
|
|
20
|
+
* design/citation-verification-runner.md (Local-panel seat).
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { execFileSync } from "node:child_process";
|
|
24
|
+
|
|
25
|
+
/** The offload command, overridable for non-default rigs (defaults match offload.py's README). */
|
|
26
|
+
export const DEFAULT_OFFLOAD_PYTHON = process.env.OFFLOAD_PYTHON || "python";
|
|
27
|
+
export const DEFAULT_OFFLOAD_SCRIPT =
|
|
28
|
+
process.env.OFFLOAD_SCRIPT || "E:/AI-Models/studio-local/offload.py";
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Build the evidence string the panel judges the claim against: prism's retrieved source title +
|
|
32
|
+
* the single supporting span prism surfaced. Thin by design — if even prism's OWN best span does
|
|
33
|
+
* not entail the claim under a strict panel, that is exactly the false-confirm worth catching.
|
|
34
|
+
* (Surfacing prism's full retrieved abstract would strengthen this — tracked as a prism follow-up.)
|
|
35
|
+
* @returns {string} evidence, or "" when prism surfaced nothing to judge against.
|
|
36
|
+
*/
|
|
37
|
+
export function buildEvidence({ source_title, span } = {}) {
|
|
38
|
+
const title = (source_title || "").trim();
|
|
39
|
+
const s = (span || "").trim();
|
|
40
|
+
if (!title && !s) return "";
|
|
41
|
+
return [title ? `Title: ${title}` : "", s].filter(Boolean).join("\n\n");
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Default exec — execFileSync, capturing stdout even on a non-zero exit, no shell (args verbatim). */
|
|
45
|
+
function defaultOffloadExec(cmd, args, { timeout, cwd, env }) {
|
|
46
|
+
try {
|
|
47
|
+
const stdout = execFileSync(cmd, args, {
|
|
48
|
+
cwd,
|
|
49
|
+
timeout,
|
|
50
|
+
env,
|
|
51
|
+
encoding: "utf8",
|
|
52
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
53
|
+
maxBuffer: 16 * 1024 * 1024,
|
|
54
|
+
});
|
|
55
|
+
return { status: 0, stdout, stderr: "" };
|
|
56
|
+
} catch (err) {
|
|
57
|
+
if (err.code === "ENOENT") throw err; // missing python/script -> caller escalates
|
|
58
|
+
return {
|
|
59
|
+
status: err.status ?? 1,
|
|
60
|
+
stdout: (err.stdout || "").toString(),
|
|
61
|
+
stderr: (err.stderr || "").toString(),
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function tryParseJson(text) {
|
|
67
|
+
const s = (text || "").trim();
|
|
68
|
+
if (!s) return null;
|
|
69
|
+
try {
|
|
70
|
+
return JSON.parse(s);
|
|
71
|
+
} catch {
|
|
72
|
+
const a = s.indexOf("{");
|
|
73
|
+
const b = s.lastIndexOf("}");
|
|
74
|
+
if (a !== -1 && b > a) {
|
|
75
|
+
try {
|
|
76
|
+
return JSON.parse(s.slice(a, b + 1));
|
|
77
|
+
} catch {
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* @typedef {object} PanelCitation
|
|
87
|
+
* @property {string|null} id
|
|
88
|
+
* @property {string|null} identifier
|
|
89
|
+
* @property {string} claim
|
|
90
|
+
* @property {string} evidence
|
|
91
|
+
*/
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* @typedef {object} PanelResult
|
|
95
|
+
* @property {boolean} requested always true when this ran
|
|
96
|
+
* @property {boolean} reachable false iff offload/llama-swap could not be reached at all
|
|
97
|
+
* @property {string[]} seats the actual model tags the panel used (PIN_PER_STEP)
|
|
98
|
+
* @property {number} checked citations the panel actually adjudicated
|
|
99
|
+
* @property {object[]} perCitation { id, identifier, panel_verdict, seats }
|
|
100
|
+
* @property {object[]} disagreements { id, identifier, prism, panel } where prism=supported, panel≠supported
|
|
101
|
+
* @property {string} [detail]
|
|
102
|
+
*/
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Run the offload entailment panel over the citations prism marked `supported` (the only ones whose
|
|
106
|
+
* acceptance the panel can challenge). Each call is `offload verify --panel --json`; the actual seat
|
|
107
|
+
* models come back in the panel JSON and are recorded for the receipt (PIN_PER_STEP).
|
|
108
|
+
*
|
|
109
|
+
* @param {PanelCitation[]} supported citations prism vouched for, with evidence already built
|
|
110
|
+
* @param {object} [options]
|
|
111
|
+
* @param {Function} [options.exec] injectable (cmd,args,{timeout,cwd,env}) -> {status,stdout,stderr}
|
|
112
|
+
* @param {string} [options.python] default DEFAULT_OFFLOAD_PYTHON
|
|
113
|
+
* @param {string} [options.script] default DEFAULT_OFFLOAD_SCRIPT
|
|
114
|
+
* @param {string} [options.base] LLAMASWAP_BASE passed to the child (default offload's own)
|
|
115
|
+
* @param {number} [options.timeout] per-call ms (default 300000 — first call may swap 3 models)
|
|
116
|
+
* @param {string} [options.cwd]
|
|
117
|
+
* @returns {PanelResult}
|
|
118
|
+
*/
|
|
119
|
+
export function runOffloadPanel(supported, options = {}) {
|
|
120
|
+
const {
|
|
121
|
+
exec = defaultOffloadExec,
|
|
122
|
+
python = DEFAULT_OFFLOAD_PYTHON,
|
|
123
|
+
script = DEFAULT_OFFLOAD_SCRIPT,
|
|
124
|
+
base = process.env.LLAMASWAP_BASE || "",
|
|
125
|
+
timeout = 300_000,
|
|
126
|
+
cwd = process.cwd(),
|
|
127
|
+
} = options;
|
|
128
|
+
|
|
129
|
+
const env = {
|
|
130
|
+
...process.env,
|
|
131
|
+
PYTHONIOENCODING: "utf-8",
|
|
132
|
+
PYTHONUTF8: "1",
|
|
133
|
+
...(base ? { LLAMASWAP_BASE: base } : {}),
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
const perCitation = [];
|
|
137
|
+
const disagreements = [];
|
|
138
|
+
const seatModels = new Set();
|
|
139
|
+
let reachable = false;
|
|
140
|
+
let anyError = false;
|
|
141
|
+
let detail = "";
|
|
142
|
+
|
|
143
|
+
for (const c of supported) {
|
|
144
|
+
if (!c.evidence) {
|
|
145
|
+
// prism marked it supported but surfaced no span/title to re-judge — note, do not downgrade
|
|
146
|
+
// (absence of evidence is not a contradiction); surfaced in the report.
|
|
147
|
+
perCitation.push({ id: c.id, identifier: c.identifier, panel_verdict: "no_evidence", seats: [] });
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
const args = [
|
|
151
|
+
script, "verify", "--panel", "--json",
|
|
152
|
+
"--claim", c.claim,
|
|
153
|
+
"--evidence", c.evidence,
|
|
154
|
+
];
|
|
155
|
+
let res;
|
|
156
|
+
try {
|
|
157
|
+
res = exec(python, args, { timeout, cwd, env });
|
|
158
|
+
} catch (err) {
|
|
159
|
+
// ENOENT (no python / no script) or spawn failure -> the panel is unreachable as a whole.
|
|
160
|
+
detail = `offload not runnable: ${err.code || err.message}`;
|
|
161
|
+
anyError = true;
|
|
162
|
+
perCitation.push({ id: c.id, identifier: c.identifier, panel_verdict: "error", seats: [] });
|
|
163
|
+
if (err.code === "ENOENT") break; // no point retrying the rest with a missing binary
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
const parsed = tryParseJson((res.stdout || "").toString());
|
|
167
|
+
if (!parsed || typeof parsed.verdict !== "string") {
|
|
168
|
+
anyError = true;
|
|
169
|
+
detail = detail || `offload produced no parseable panel JSON (exit ${res.status}): ${(res.stderr || res.stdout || "").toString().slice(0, 200)}`;
|
|
170
|
+
perCitation.push({ id: c.id, identifier: c.identifier, panel_verdict: "error", seats: [] });
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
reachable = true;
|
|
174
|
+
const seats = Array.isArray(parsed.seats) ? parsed.seats.map((s) => s.model).filter(Boolean) : [];
|
|
175
|
+
seats.forEach((m) => seatModels.add(m));
|
|
176
|
+
const verdict = String(parsed.verdict).toLowerCase();
|
|
177
|
+
perCitation.push({ id: c.id, identifier: c.identifier, panel_verdict: verdict, seats });
|
|
178
|
+
if (verdict !== "supported") {
|
|
179
|
+
disagreements.push({ id: c.id, identifier: c.identifier, prism: "supported", panel: verdict });
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const checked = perCitation.filter((p) => p.panel_verdict === "supported" || disagreements.some((d) => d.id === p.id)).length;
|
|
184
|
+
return {
|
|
185
|
+
requested: true,
|
|
186
|
+
reachable,
|
|
187
|
+
seats: [...seatModels],
|
|
188
|
+
checked,
|
|
189
|
+
perCitation,
|
|
190
|
+
disagreements,
|
|
191
|
+
...(detail ? { detail } : {}),
|
|
192
|
+
// unreachable iff we never got a single parseable verdict AND something errored
|
|
193
|
+
...(anyError && !reachable ? { unreachable: true } : {}),
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Contrastive escalation message (workflow-standard #5 / Buçinca 2024): name what the dispatch
|
|
199
|
+
* assumed, then what the independent panel found — so the human reviews the disagreement, not a
|
|
200
|
+
* bare "uncertain".
|
|
201
|
+
*/
|
|
202
|
+
function contrastiveDetail(disagreements) {
|
|
203
|
+
const lead = disagreements
|
|
204
|
+
.slice(0, 3)
|
|
205
|
+
.map((d) => `${d.identifier || d.id}: prism read the source as SUPPORTING the claim; the local Qwen+Mistral panel found "${d.panel}" on prism's own span`)
|
|
206
|
+
.join("; ");
|
|
207
|
+
const more = disagreements.length > 3 ? ` (+${disagreements.length - 3} more)` : "";
|
|
208
|
+
return `local entailment panel disagrees with prism on ${disagreements.length} citation(s) — review before accepting. ${lead}${more}`;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Apply the panel to a gate result — MONOTONE-TIGHTENING. Only ever downgrades accept -> escalate;
|
|
213
|
+
* never loosens, never overrides the existence floor (blocking).
|
|
214
|
+
*
|
|
215
|
+
* - gate passing + panel DISAGREES on ≥1 supported citation -> escalate (local_panel_disagreement)
|
|
216
|
+
* - gate passing + panel UNREACHABLE (and it was requested) -> escalate (local_panel_unreachable)
|
|
217
|
+
* ("an unreachable gate is a closed gate" — same invariant prism uses)
|
|
218
|
+
* - gate already blocking/advisory -> unchanged (panel adds notes only)
|
|
219
|
+
*
|
|
220
|
+
* @param {object} gate GateResult from gateCitations / runCitationGate
|
|
221
|
+
* @param {PanelResult} panel
|
|
222
|
+
* @returns {object} gate (possibly downgraded), with `local_panel` attached
|
|
223
|
+
*/
|
|
224
|
+
export function applyLocalPanel(gate, panel) {
|
|
225
|
+
const annotated = { ...gate, local_panel: panel };
|
|
226
|
+
if (gate.blocking || !gate.pass) return annotated; // floor + non-pass dominate; panel only annotates
|
|
227
|
+
|
|
228
|
+
if (panel.unreachable) {
|
|
229
|
+
return {
|
|
230
|
+
...annotated,
|
|
231
|
+
verdict: "escalate",
|
|
232
|
+
pass: false,
|
|
233
|
+
advisory: true,
|
|
234
|
+
reason: "local_panel_unreachable",
|
|
235
|
+
detail: panel.detail || "the local verifier panel could not be reached (offload/llama-swap down)",
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
if (panel.disagreements.length > 0) {
|
|
239
|
+
return {
|
|
240
|
+
...annotated,
|
|
241
|
+
verdict: "escalate",
|
|
242
|
+
pass: false,
|
|
243
|
+
advisory: true,
|
|
244
|
+
reason: "local_panel_disagreement",
|
|
245
|
+
detail: contrastiveDetail(panel.disagreements),
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
return annotated; // panel agrees (or had nothing to challenge) -> pass stands
|
|
249
|
+
}
|