role-os 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,249 @@
1
+ /**
2
+ * Local-panel seat — a SECOND, family-different verifier seat for the citation gate, layered on
3
+ * prism. Where prism RETRIEVES (the deterministic existence floor + the source title/abstract) and
4
+ * runs its own groundedness lens, this seat re-judges each citation prism vouched for with an
5
+ * independent grounded-entailment PANEL running entirely on local models (Qwen + Mistral via
6
+ * llama-swap, the `offload` CLI). It is decorrelated from the Claude generator by construction
7
+ * (no Anthropic model in the panel) and from prism's single groundedness model (3 seats, ≥2
8
+ * families, conservative majority).
9
+ *
10
+ * Why it can only TIGHTEN: the panel's measured property (tensor-engine-knowledge wave-5 #156) is
11
+ * ZERO false-confirms — a 3-seat conservative-majority panel never stamps a false claim
12
+ * "supported". So a panel DISAGREEMENT on a citation prism marked `supported` is a real
13
+ * false-confirm signal: we downgrade that citation's gate accept -> escalate (a human checkpoint
14
+ * with a contrastive message). The panel NEVER turns a non-accept into an accept, and the
15
+ * deterministic existence floor (`fabricated` -> blocking) always dominates. EXTERNAL_VERIFIER
16
+ * (workflow-standard #6), now runnable locally for free.
17
+ *
18
+ * Read-only (shells the read-only `offload verify`); no compensator. Mirrors the inject-`exec`,
19
+ * closed-gate-on-unreachable discipline of verify-citations.mjs. See
20
+ * design/citation-verification-runner.md (Local-panel seat).
21
+ */
22
+
23
+ import { execFileSync } from "node:child_process";
24
+
25
+ /** The offload command, overridable for non-default rigs (defaults match offload.py's README). */
26
+ export const DEFAULT_OFFLOAD_PYTHON = process.env.OFFLOAD_PYTHON || "python";
27
+ export const DEFAULT_OFFLOAD_SCRIPT =
28
+ process.env.OFFLOAD_SCRIPT || "E:/AI-Models/studio-local/offload.py";
29
+
30
+ /**
31
+ * Build the evidence string the panel judges the claim against: prism's retrieved source title +
32
+ * the single supporting span prism surfaced. Thin by design — if even prism's OWN best span does
33
+ * not entail the claim under a strict panel, that is exactly the false-confirm worth catching.
34
+ * (Surfacing prism's full retrieved abstract would strengthen this — tracked as a prism follow-up.)
35
+ * @returns {string} evidence, or "" when prism surfaced nothing to judge against.
36
+ */
37
+ export function buildEvidence({ source_title, span } = {}) {
38
+ const title = (source_title || "").trim();
39
+ const s = (span || "").trim();
40
+ if (!title && !s) return "";
41
+ return [title ? `Title: ${title}` : "", s].filter(Boolean).join("\n\n");
42
+ }
43
+
44
+ /** Default exec — execFileSync, capturing stdout even on a non-zero exit, no shell (args verbatim). */
45
+ function defaultOffloadExec(cmd, args, { timeout, cwd, env }) {
46
+ try {
47
+ const stdout = execFileSync(cmd, args, {
48
+ cwd,
49
+ timeout,
50
+ env,
51
+ encoding: "utf8",
52
+ stdio: ["ignore", "pipe", "pipe"],
53
+ maxBuffer: 16 * 1024 * 1024,
54
+ });
55
+ return { status: 0, stdout, stderr: "" };
56
+ } catch (err) {
57
+ if (err.code === "ENOENT") throw err; // missing python/script -> caller escalates
58
+ return {
59
+ status: err.status ?? 1,
60
+ stdout: (err.stdout || "").toString(),
61
+ stderr: (err.stderr || "").toString(),
62
+ };
63
+ }
64
+ }
65
+
66
+ function tryParseJson(text) {
67
+ const s = (text || "").trim();
68
+ if (!s) return null;
69
+ try {
70
+ return JSON.parse(s);
71
+ } catch {
72
+ const a = s.indexOf("{");
73
+ const b = s.lastIndexOf("}");
74
+ if (a !== -1 && b > a) {
75
+ try {
76
+ return JSON.parse(s.slice(a, b + 1));
77
+ } catch {
78
+ return null;
79
+ }
80
+ }
81
+ return null;
82
+ }
83
+ }
84
+
85
+ /**
86
+ * @typedef {object} PanelCitation
87
+ * @property {string|null} id
88
+ * @property {string|null} identifier
89
+ * @property {string} claim
90
+ * @property {string} evidence
91
+ */
92
+
93
+ /**
94
+ * @typedef {object} PanelResult
95
+ * @property {boolean} requested always true when this ran
96
+ * @property {boolean} reachable false iff offload/llama-swap could not be reached at all
97
+ * @property {string[]} seats the actual model tags the panel used (PIN_PER_STEP)
98
+ * @property {number} checked citations the panel actually adjudicated
99
+ * @property {object[]} perCitation { id, identifier, panel_verdict, seats }
100
+ * @property {object[]} disagreements { id, identifier, prism, panel } where prism=supported, panel≠supported
101
+ * @property {string} [detail]
102
+ */
103
+
104
+ /**
105
+ * Run the offload entailment panel over the citations prism marked `supported` (the only ones whose
106
+ * acceptance the panel can challenge). Each call is `offload verify --panel --json`; the actual seat
107
+ * models come back in the panel JSON and are recorded for the receipt (PIN_PER_STEP).
108
+ *
109
+ * @param {PanelCitation[]} supported citations prism vouched for, with evidence already built
110
+ * @param {object} [options]
111
+ * @param {Function} [options.exec] injectable (cmd,args,{timeout,cwd,env}) -> {status,stdout,stderr}
112
+ * @param {string} [options.python] default DEFAULT_OFFLOAD_PYTHON
113
+ * @param {string} [options.script] default DEFAULT_OFFLOAD_SCRIPT
114
+ * @param {string} [options.base] LLAMASWAP_BASE passed to the child (default offload's own)
115
+ * @param {number} [options.timeout] per-call ms (default 300000 — first call may swap 3 models)
116
+ * @param {string} [options.cwd]
117
+ * @returns {PanelResult}
118
+ */
119
+ export function runOffloadPanel(supported, options = {}) {
120
+ const {
121
+ exec = defaultOffloadExec,
122
+ python = DEFAULT_OFFLOAD_PYTHON,
123
+ script = DEFAULT_OFFLOAD_SCRIPT,
124
+ base = process.env.LLAMASWAP_BASE || "",
125
+ timeout = 300_000,
126
+ cwd = process.cwd(),
127
+ } = options;
128
+
129
+ const env = {
130
+ ...process.env,
131
+ PYTHONIOENCODING: "utf-8",
132
+ PYTHONUTF8: "1",
133
+ ...(base ? { LLAMASWAP_BASE: base } : {}),
134
+ };
135
+
136
+ const perCitation = [];
137
+ const disagreements = [];
138
+ const seatModels = new Set();
139
+ let reachable = false;
140
+ let anyError = false;
141
+ let detail = "";
142
+
143
+ for (const c of supported) {
144
+ if (!c.evidence) {
145
+ // prism marked it supported but surfaced no span/title to re-judge — note, do not downgrade
146
+ // (absence of evidence is not a contradiction); surfaced in the report.
147
+ perCitation.push({ id: c.id, identifier: c.identifier, panel_verdict: "no_evidence", seats: [] });
148
+ continue;
149
+ }
150
+ const args = [
151
+ script, "verify", "--panel", "--json",
152
+ "--claim", c.claim,
153
+ "--evidence", c.evidence,
154
+ ];
155
+ let res;
156
+ try {
157
+ res = exec(python, args, { timeout, cwd, env });
158
+ } catch (err) {
159
+ // ENOENT (no python / no script) or spawn failure -> the panel is unreachable as a whole.
160
+ detail = `offload not runnable: ${err.code || err.message}`;
161
+ anyError = true;
162
+ perCitation.push({ id: c.id, identifier: c.identifier, panel_verdict: "error", seats: [] });
163
+ if (err.code === "ENOENT") break; // no point retrying the rest with a missing binary
164
+ continue;
165
+ }
166
+ const parsed = tryParseJson((res.stdout || "").toString());
167
+ if (!parsed || typeof parsed.verdict !== "string") {
168
+ anyError = true;
169
+ detail = detail || `offload produced no parseable panel JSON (exit ${res.status}): ${(res.stderr || res.stdout || "").toString().slice(0, 200)}`;
170
+ perCitation.push({ id: c.id, identifier: c.identifier, panel_verdict: "error", seats: [] });
171
+ continue;
172
+ }
173
+ reachable = true;
174
+ const seats = Array.isArray(parsed.seats) ? parsed.seats.map((s) => s.model).filter(Boolean) : [];
175
+ seats.forEach((m) => seatModels.add(m));
176
+ const verdict = String(parsed.verdict).toLowerCase();
177
+ perCitation.push({ id: c.id, identifier: c.identifier, panel_verdict: verdict, seats });
178
+ if (verdict !== "supported") {
179
+ disagreements.push({ id: c.id, identifier: c.identifier, prism: "supported", panel: verdict });
180
+ }
181
+ }
182
+
183
+ const checked = perCitation.filter((p) => p.panel_verdict === "supported" || disagreements.some((d) => d.id === p.id)).length;
184
+ return {
185
+ requested: true,
186
+ reachable,
187
+ seats: [...seatModels],
188
+ checked,
189
+ perCitation,
190
+ disagreements,
191
+ ...(detail ? { detail } : {}),
192
+ // unreachable iff we never got a single parseable verdict AND something errored
193
+ ...(anyError && !reachable ? { unreachable: true } : {}),
194
+ };
195
+ }
196
+
197
+ /**
198
+ * Contrastive escalation message (workflow-standard #5 / Buçinca 2024): name what the dispatch
199
+ * assumed, then what the independent panel found — so the human reviews the disagreement, not a
200
+ * bare "uncertain".
201
+ */
202
+ function contrastiveDetail(disagreements) {
203
+ const lead = disagreements
204
+ .slice(0, 3)
205
+ .map((d) => `${d.identifier || d.id}: prism read the source as SUPPORTING the claim; the local Qwen+Mistral panel found "${d.panel}" on prism's own span`)
206
+ .join("; ");
207
+ const more = disagreements.length > 3 ? ` (+${disagreements.length - 3} more)` : "";
208
+ return `local entailment panel disagrees with prism on ${disagreements.length} citation(s) — review before accepting. ${lead}${more}`;
209
+ }
210
+
211
+ /**
212
+ * Apply the panel to a gate result — MONOTONE-TIGHTENING. Only ever downgrades accept -> escalate;
213
+ * never loosens, never overrides the existence floor (blocking).
214
+ *
215
+ * - gate passing + panel DISAGREES on ≥1 supported citation -> escalate (local_panel_disagreement)
216
+ * - gate passing + panel UNREACHABLE (and it was requested) -> escalate (local_panel_unreachable)
217
+ * ("an unreachable gate is a closed gate" — same invariant prism uses)
218
+ * - gate already blocking/advisory -> unchanged (panel adds notes only)
219
+ *
220
+ * @param {object} gate GateResult from gateCitations / runCitationGate
221
+ * @param {PanelResult} panel
222
+ * @returns {object} gate (possibly downgraded), with `local_panel` attached
223
+ */
224
+ export function applyLocalPanel(gate, panel) {
225
+ const annotated = { ...gate, local_panel: panel };
226
+ if (gate.blocking || !gate.pass) return annotated; // floor + non-pass dominate; panel only annotates
227
+
228
+ if (panel.unreachable) {
229
+ return {
230
+ ...annotated,
231
+ verdict: "escalate",
232
+ pass: false,
233
+ advisory: true,
234
+ reason: "local_panel_unreachable",
235
+ detail: panel.detail || "the local verifier panel could not be reached (offload/llama-swap down)",
236
+ };
237
+ }
238
+ if (panel.disagreements.length > 0) {
239
+ return {
240
+ ...annotated,
241
+ verdict: "escalate",
242
+ pass: false,
243
+ advisory: true,
244
+ reason: "local_panel_disagreement",
245
+ detail: contrastiveDetail(panel.disagreements),
246
+ };
247
+ }
248
+ return annotated; // panel agrees (or had nothing to challenge) -> pass stands
249
+ }