role-os 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/bin/roleos.mjs +10 -0
- package/package.json +1 -1
- package/src/specialist/budget-consult.mjs +120 -0
- package/src/specialist/client.mjs +131 -0
- package/src/specialist/dispatch.mjs +237 -0
- package/src/specialist/events.mjs +56 -0
- package/src/specialist/gate.mjs +202 -0
- package/src/specialist/registry.mjs +219 -0
- package/src/specialist/shadow.mjs +122 -0
- package/src/specialist/state.mjs +125 -0
- package/src/specialist-cmd.mjs +378 -0
- package/starter-pack/policy/specialist-tier.md +288 -0
- package/starter-pack/schemas/specialist.md +155 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 2.7.0
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
#### Token Budget Analyst — production budget consult (opt-in, default-off)
|
|
8
|
+
|
|
9
|
+
- **`src/specialist/budget-consult.mjs`** — wires the budgeter specialist into dispatch assembly. `consultBudgetForManifest(manifest)` / `buildDispatchManifestWithBudget(options)` consult the Token Budget Analyst per dispatch step (via the proven `dispatchSpecialist` path), attaching an advisory `budgetForecast` + `budgetReceipt` to each step.
|
|
10
|
+
- **Opt-in** via `ROLEOS_BUDGET_CONSULT` (default **off** — production dispatch is byte-identical until the flip, which is a release decision). **Fail-open** to the deterministic baseline `max(ctx*1.5, 50000)` (not Claude); the consult swallows any error into a receipt so it can never break manifest assembly. **Advisory** — it never blocks or gates a dispatch. Compensator: `roleos specialist rollback <role> <version>`.
|
|
11
|
+
- Also lands the **Token Budget Analyst dataset tooling** under `tools/token-budget-dataset/` (the v0.1 harvester + puzzle curriculum + review/freeze pipeline that produced the budgeter's training corpus). Not part of the published CLI package (`files` ships `bin`/`src`/`starter-pack`).
|
|
12
|
+
|
|
13
|
+
### Tests
|
|
14
|
+
- 9 new tests (`specialist-budget-consult`: off=no-op, specialist forecast, fail-open on backend-down + no-registry, never-throws). **1334 total, all green.**
|
|
15
|
+
|
|
3
16
|
## 2.6.0
|
|
4
17
|
|
|
5
18
|
### Changed
|
package/bin/roleos.mjs
CHANGED
|
@@ -16,6 +16,7 @@ import { auditCommand } from "../src/audit-cmd.mjs";
|
|
|
16
16
|
import { swarmCommand } from "../src/swarm-cmd.mjs";
|
|
17
17
|
import { startCommand } from "../src/entry-cmd.mjs";
|
|
18
18
|
import { verifyCitationsCommand } from "../src/verify-citations-cmd.mjs";
|
|
19
|
+
import { specialistCommand } from "../src/specialist-cmd.mjs";
|
|
19
20
|
import {
|
|
20
21
|
runCommand, resumeCommand, nextCommand, explainCommand,
|
|
21
22
|
completeCommand, failCommand, retryCommand, rerouteCommand,
|
|
@@ -75,6 +76,12 @@ Usage:
|
|
|
75
76
|
roleos swarm approve Approve the current feature gate
|
|
76
77
|
roleos swarm verify Verify manifest and run state
|
|
77
78
|
roleos verify-citations <dispatch> Verify a research dispatch's citations via prism (gate)
|
|
79
|
+
roleos specialist list List all specialists in the registry (active version + cert)
|
|
80
|
+
roleos specialist status [--role] Show registry + halt + quota state per role
|
|
81
|
+
roleos specialist register <r> <f> Register a new version for a role
|
|
82
|
+
roleos specialist promote <r> <v> Promote a certified version to active (refused on L0)
|
|
83
|
+
roleos specialist rollback <r> <v> NAMED COMPENSATOR — pointer-swap to a prior certified version
|
|
84
|
+
roleos specialist clear-halt <r> Clear a shadow-probe halt on a role
|
|
78
85
|
roleos mission list List all missions
|
|
79
86
|
roleos mission show <key> Show full mission detail
|
|
80
87
|
roleos mission suggest <text> Suggest a mission for a task
|
|
@@ -206,6 +213,9 @@ try {
|
|
|
206
213
|
case "verify-citations":
|
|
207
214
|
await verifyCitationsCommand(args);
|
|
208
215
|
break;
|
|
216
|
+
case "specialist":
|
|
217
|
+
await specialistCommand(args);
|
|
218
|
+
break;
|
|
209
219
|
case "mission":
|
|
210
220
|
await missionCommand(args);
|
|
211
221
|
break;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "role-os",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.7.0",
|
|
4
4
|
"description": "Role OS — a multi-Claude operating system where 61 specialized roles execute work through contracts, conflict detection, escalation, and structured evidence. 10 team packs, 9 missions including dogfood swarm (multi-pass convergence), deep audit with manifest-scaled dynamic dispatch, and brainstorm with traceable disagreement.",
|
|
5
5
|
"homepage": "https://mcp-tool-shop-org.github.io/role-os/",
|
|
6
6
|
"bugs": {
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Budget consult — the production seam that consults the Token Budget Analyst specialist for each step
|
|
3
|
+
* of a dispatch manifest, attaching an ADVISORY budget forecast + receipt.
|
|
4
|
+
*
|
|
5
|
+
* This is the B-DEFAULT wiring: it puts the proven dispatchSpecialist pattern (wire_test.mjs) on the
|
|
6
|
+
* production dispatch-assembly path. Three properties make it safe to land before the default-on flip:
|
|
7
|
+
*
|
|
8
|
+
* - OPT-IN, default OFF (ROLEOS_BUDGET_CONSULT). With the flag off, buildDispatchManifestWithBudget
|
|
9
|
+
* returns EXACTLY what buildDispatchManifest returns — the production dispatch is byte-identical
|
|
10
|
+
* until the flip (a Mike-gated release decision).
|
|
11
|
+
* - FAIL-OPEN to the DETERMINISTIC baseline max(ctx*1.5, 50000) — NOT Claude (the budgeter's
|
|
12
|
+
* contracted fallback). dispatchSpecialist is itself fail-open in three places (gate, specialist
|
|
13
|
+
* call, consumer reject); on top of that, this consult swallows any error into an error-receipt so
|
|
14
|
+
* a budget consult can NEVER break manifest assembly.
|
|
15
|
+
* - ADVISORY. The forecast/receipt is attached to each step for the audit trail; it never blocks or
|
|
16
|
+
* gates the dispatch. The budgeter forecasts spend; it does not stop work.
|
|
17
|
+
*
|
|
18
|
+
* Reject conditions (lockdown doctrine): the flag turns it off; fail-open means it can't halt a
|
|
19
|
+
* dispatch; the shadow-probe halt + `roleos specialist rollback <role> <version>` revert a bad adapter.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { buildDispatchManifest } from "../dispatch.mjs";
|
|
23
|
+
import { dispatchSpecialist } from "./dispatch.mjs";
|
|
24
|
+
|
|
25
|
+
export const BUDGET_ROLE = "Token Budget Analyst";
|
|
26
|
+
const CHARS_PER_TOKEN = 3.5;
|
|
27
|
+
const OUTPUT_TOKENS_PER_TURN = 300;
|
|
28
|
+
|
|
29
|
+
/** The budgeter's contracted DETERMINISTIC fail-open baseline (not Claude): max(ctx*1.5, 50000). */
|
|
30
|
+
export function deterministicBudget(input) {
|
|
31
|
+
return {
|
|
32
|
+
spend_weighted: Math.max(Math.round((input?.context_tokens || 0) * 1.5), 50000),
|
|
33
|
+
source: "deterministic-baseline",
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** Shadow-probe agreement: the specialist forecast is within 25% of the deterministic baseline. */
|
|
38
|
+
export function budgetAgree(specialistVerdict, baselineVerdict) {
|
|
39
|
+
const s = specialistVerdict?.spend_weighted ?? specialistVerdict?.verdict?.spend_weighted ?? 0;
|
|
40
|
+
const b = baselineVerdict?.spend_weighted ?? 0;
|
|
41
|
+
return Math.abs(s - b) <= 0.25 * (b || 1);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Derive the {context_tokens, steps, output_estimate} the budgeter expects from a manifest step. */
|
|
45
|
+
export function budgetInputForStep(step, totalSteps) {
|
|
46
|
+
const ctxChars = typeof step?.systemPrompt === "string" ? step.systemPrompt.length : 0;
|
|
47
|
+
return {
|
|
48
|
+
context_tokens: Math.round(ctxChars / CHARS_PER_TOKEN),
|
|
49
|
+
steps: totalSteps,
|
|
50
|
+
output_estimate: Math.round((step?.maxTurns || 1) * OUTPUT_TOKENS_PER_TURN),
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Read the wiring flag. Default OFF — flipping it on is a Mike-gated release decision. */
|
|
55
|
+
export function budgetConsultEnabled() {
|
|
56
|
+
const v = process.env.ROLEOS_BUDGET_CONSULT;
|
|
57
|
+
return v === "1" || v === "true";
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Consult the Token Budget Analyst for every step of a built manifest, mutating each step with
|
|
62
|
+
* `budgetForecast` (the spend estimate) + `budgetReceipt` (the roleos-specialist-receipt/v1 audit
|
|
63
|
+
* record). No-op when not enabled. NEVER throws.
|
|
64
|
+
*
|
|
65
|
+
* @param {object} manifest a DispatchManifest from buildDispatchManifest
|
|
66
|
+
* @param {object} [opts]
|
|
67
|
+
* @param {boolean} [opts.enabled] default: budgetConsultEnabled()
|
|
68
|
+
* @param {object} [opts.paths] { registry, state, events } for dispatchSpecialist
|
|
69
|
+
* @param {string} [opts.nowIso]
|
|
70
|
+
* @param {Function}[opts.httpFn] injectable HTTP for the specialist call (tests)
|
|
71
|
+
* @param {object} [opts.classifier]
|
|
72
|
+
* @param {object} [opts.shadow]
|
|
73
|
+
* @returns {Promise<object>} the same manifest, enriched
|
|
74
|
+
*/
|
|
75
|
+
export async function consultBudgetForManifest(manifest, opts = {}) {
|
|
76
|
+
const { enabled = budgetConsultEnabled(), paths, nowIso, httpFn, classifier, shadow } = opts;
|
|
77
|
+
if (!enabled || !manifest?.steps?.length) return manifest;
|
|
78
|
+
|
|
79
|
+
for (const step of manifest.steps) {
|
|
80
|
+
try {
|
|
81
|
+
const { result, receipt } = await dispatchSpecialist({
|
|
82
|
+
role: BUDGET_ROLE,
|
|
83
|
+
input: budgetInputForStep(step, manifest.steps.length),
|
|
84
|
+
claudeFn: async (i) => deterministicBudget(i),
|
|
85
|
+
agreeFn: budgetAgree,
|
|
86
|
+
traceId: `${manifest.runId || "run"}-${step.packetId ?? step.stepIndex}-budget`,
|
|
87
|
+
nowIso: nowIso || new Date().toISOString(),
|
|
88
|
+
...(paths ? { paths } : {}),
|
|
89
|
+
...(httpFn ? { httpFn } : {}),
|
|
90
|
+
...(classifier ? { classifier } : {}),
|
|
91
|
+
...(shadow ? { shadow } : {}),
|
|
92
|
+
});
|
|
93
|
+
step.budgetForecast = result;
|
|
94
|
+
step.budgetReceipt = receipt;
|
|
95
|
+
} catch (err) {
|
|
96
|
+
// A budget consult must never break manifest assembly — record the error and move on.
|
|
97
|
+
step.budgetReceipt = {
|
|
98
|
+
schema: "roleos-specialist-receipt/v1",
|
|
99
|
+
role: BUDGET_ROLE,
|
|
100
|
+
source: "consult-error",
|
|
101
|
+
error: String(err && err.message ? err.message : err),
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return manifest;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Budget-aware dispatch assembly. Build the manifest, then (when the consult is enabled) attach a
|
|
110
|
+
* per-step budget forecast. This is the single production entry point to call in place of
|
|
111
|
+
* buildDispatchManifest once the consult is turned on; with the flag OFF (default) it returns exactly
|
|
112
|
+
* what buildDispatchManifest returns.
|
|
113
|
+
*
|
|
114
|
+
* @param {object} options buildDispatchManifest options
|
|
115
|
+
* @param {object} [consultOpts] consultBudgetForManifest options
|
|
116
|
+
*/
|
|
117
|
+
export async function buildDispatchManifestWithBudget(options, consultOpts = {}) {
|
|
118
|
+
const manifest = buildDispatchManifest(options);
|
|
119
|
+
return consultBudgetForManifest(manifest, consultOpts);
|
|
120
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Specialist HTTP client — POST to `<backend_url>/verify` per the Specialist HTTP contract in
|
|
3
|
+
* `starter-pack/policy/specialist-tier.md`.
|
|
4
|
+
*
|
|
5
|
+
* Hides one secret family (Parnas): the vLLM HTTP wire format and the fail-open mapping from
|
|
6
|
+
* (timeout | non-200 | malformed JSON | adapter_id mismatch) to a result the dispatcher can
|
|
7
|
+
* use to fail open without leaking transport details upward.
|
|
8
|
+
*
|
|
9
|
+
* The `httpFn` is injectable so tests do not need a live server. Production default uses the
|
|
10
|
+
* global `fetch` (Node 18+, required by package.json engines).
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* @typedef {object} SpecialistCallResult
|
|
15
|
+
* @property {boolean} ok
|
|
16
|
+
* @property {object} [verdict] role-specific opaque verdict from the backend
|
|
17
|
+
* @property {number} [score] backend self-reported score in [0, 1] (INFORMATIONAL only)
|
|
18
|
+
* @property {string} [adapter_id] echo of the adapter pin
|
|
19
|
+
* @property {string} [base_model] echo of the base model
|
|
20
|
+
* @property {number} [duration_ms] backend-reported duration
|
|
21
|
+
* @property {string} [error] on !ok — fail-open reason
|
|
22
|
+
* @property {string} [detail] on !ok — human-readable detail
|
|
23
|
+
* @property {number} [status] HTTP status (when relevant)
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
const DEFAULT_TIMEOUT_MS = 10_000;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Default httpFn: a thin wrapper over global fetch that returns the same shape as the
|
|
30
|
+
* injectable contract — { ok, status, json|null, error|null }.
|
|
31
|
+
*/
|
|
32
|
+
async function defaultHttpFn(url, { method, headers, body, timeoutMs }) {
|
|
33
|
+
// AbortController is available on Node 18+ (engines: >=18.0.0).
|
|
34
|
+
const ac = new AbortController();
|
|
35
|
+
const t = setTimeout(() => ac.abort(), timeoutMs);
|
|
36
|
+
try {
|
|
37
|
+
const res = await fetch(url, { method, headers, body, signal: ac.signal });
|
|
38
|
+
let json = null;
|
|
39
|
+
let parseError = null;
|
|
40
|
+
try { json = await res.json(); }
|
|
41
|
+
catch (err) { parseError = err.message; }
|
|
42
|
+
return { ok: res.ok, status: res.status, json, error: parseError };
|
|
43
|
+
} catch (err) {
|
|
44
|
+
if (err.name === "AbortError") return { ok: false, status: 0, json: null, error: "timeout" };
|
|
45
|
+
return { ok: false, status: 0, json: null, error: err.message };
|
|
46
|
+
} finally {
|
|
47
|
+
clearTimeout(t);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Call the specialist backend. Always returns a result object; never throws on transport
|
|
53
|
+
* failure. A failed call returns `{ ok: false, error, detail }` and the dispatcher fails open
|
|
54
|
+
* to Claude — that's the contract.
|
|
55
|
+
*
|
|
56
|
+
* @param {object} params
|
|
57
|
+
* @param {string} params.backendUrl
|
|
58
|
+
* @param {string} params.adapterId
|
|
59
|
+
* @param {string} params.role
|
|
60
|
+
* @param {*} params.input
|
|
61
|
+
* @param {string} params.traceId
|
|
62
|
+
* @param {number} [params.timeoutMs=10000]
|
|
63
|
+
* @param {Function} [params.httpFn] injectable for tests: (url, opts) => Promise<{ok,status,json,error}>
|
|
64
|
+
* @returns {Promise<SpecialistCallResult>}
|
|
65
|
+
*/
|
|
66
|
+
export async function callSpecialist({
|
|
67
|
+
backendUrl,
|
|
68
|
+
adapterId,
|
|
69
|
+
role,
|
|
70
|
+
input,
|
|
71
|
+
traceId,
|
|
72
|
+
timeoutMs = DEFAULT_TIMEOUT_MS,
|
|
73
|
+
httpFn = defaultHttpFn,
|
|
74
|
+
}) {
|
|
75
|
+
if (typeof backendUrl !== "string" || !backendUrl) {
|
|
76
|
+
return { ok: false, error: "no_backend_url", detail: "backendUrl is required" };
|
|
77
|
+
}
|
|
78
|
+
if (typeof adapterId !== "string" || !adapterId) {
|
|
79
|
+
return { ok: false, error: "no_adapter_id", detail: "adapterId is required" };
|
|
80
|
+
}
|
|
81
|
+
const url = `${backendUrl.replace(/\/+$/, "")}/verify`;
|
|
82
|
+
const body = JSON.stringify({ adapter_id: adapterId, role, input, trace_id: traceId });
|
|
83
|
+
const t0 = Date.now();
|
|
84
|
+
const res = await httpFn(url, {
|
|
85
|
+
method: "POST",
|
|
86
|
+
headers: { "Content-Type": "application/json" },
|
|
87
|
+
body,
|
|
88
|
+
timeoutMs,
|
|
89
|
+
});
|
|
90
|
+
const wireMs = Date.now() - t0;
|
|
91
|
+
|
|
92
|
+
if (!res.ok) {
|
|
93
|
+
// Prefer http_<status> when the backend gave us a status (e.g. 500 with no body —
|
|
94
|
+
// the parse error is incidental, the HTTP status is what matters). Fall back to the
|
|
95
|
+
// transport error string only when there's no status to report (status === 0 = no
|
|
96
|
+
// response, e.g. timeout / ECONNREFUSED).
|
|
97
|
+
const error = res.status > 0 ? `http_${res.status}` : (res.error || "transport_error");
|
|
98
|
+
return {
|
|
99
|
+
ok: false,
|
|
100
|
+
error,
|
|
101
|
+
detail: `specialist backend at ${url} returned status=${res.status || 0}${res.error ? ` (${res.error})` : ""}`,
|
|
102
|
+
status: res.status || 0,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
if (!res.json || typeof res.json !== "object") {
|
|
106
|
+
return { ok: false, error: "malformed_response", detail: "backend response was not a JSON object", status: res.status };
|
|
107
|
+
}
|
|
108
|
+
const { verdict, score, adapter_id: echoedAdapterId, base_model, duration_ms } = res.json;
|
|
109
|
+
if (verdict === undefined) {
|
|
110
|
+
return { ok: false, error: "missing_verdict", detail: "backend response had no `verdict` field", status: res.status };
|
|
111
|
+
}
|
|
112
|
+
if (typeof echoedAdapterId !== "string" || echoedAdapterId !== adapterId) {
|
|
113
|
+
// Adapter-id echo mismatch: the backend served a different adapter than we pinned.
|
|
114
|
+
// This is a load-bearing safety check — the registry's `adapter_id` is the pin, and a
|
|
115
|
+
// different adapter could be a different specialist entirely. Fail open.
|
|
116
|
+
return {
|
|
117
|
+
ok: false,
|
|
118
|
+
error: "adapter_id_mismatch",
|
|
119
|
+
detail: `pinned adapter_id="${adapterId}", backend echoed "${echoedAdapterId}"`,
|
|
120
|
+
status: res.status,
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
ok: true,
|
|
125
|
+
verdict,
|
|
126
|
+
score: typeof score === "number" ? score : undefined,
|
|
127
|
+
adapter_id: echoedAdapterId,
|
|
128
|
+
base_model: typeof base_model === "string" ? base_model : undefined,
|
|
129
|
+
duration_ms: typeof duration_ms === "number" ? duration_ms : wireMs,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Specialist dispatch — the consumer-facing entry point. Wires registry → gate → (specialist
|
|
3
|
+
* | Claude) → shadow probe → halt check, and returns a receipt the caller can attach to its
|
|
4
|
+
* own audit trail.
|
|
5
|
+
*
|
|
6
|
+
* The consumer supplies the Claude path (`claudeFn`) and the domain-aware agreement
|
|
7
|
+
* comparator (`agreeFn`). This keeps the tier consumer-agnostic — `verify-citations` will
|
|
8
|
+
* supply prism-aware functions; the future Token Budget Analyst will supply budget-aware
|
|
9
|
+
* ones; an external consumer can supply anything.
|
|
10
|
+
*
|
|
11
|
+
* The dispatch is fail-open in three places:
|
|
12
|
+
* 1. The gate fails open (registry/gate-level rejects → Claude).
|
|
13
|
+
* 2. The specialist call fails open (transport/parse/adapter-mismatch errors → Claude).
|
|
14
|
+
* 3. A consumer-side reject of the specialist's verdict (its own guards, e.g. prism's
|
|
15
|
+
* submodularity check) is reported back through `consumerReject` and the caller can
|
|
16
|
+
* retry against Claude. The dispatcher itself doesn't see consumer guards — it just
|
|
17
|
+
* surfaces the specialist's verdict and lets the caller apply them.
|
|
18
|
+
*
|
|
19
|
+
* Shadow probes happen on the Kth specialist dispatch (the gate routed to a specialist).
|
|
20
|
+
* They never fire when the gate routed to Claude — there is nothing to compare. After a
|
|
21
|
+
* probe, the halt check runs and may flip the role into a halted state.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { loadRegistry } from "./registry.mjs";
|
|
25
|
+
import { gate, defaultClassifier } from "./gate.mjs";
|
|
26
|
+
import { callSpecialist } from "./client.mjs";
|
|
27
|
+
import {
|
|
28
|
+
loadState,
|
|
29
|
+
saveState,
|
|
30
|
+
emptyState,
|
|
31
|
+
quotaStateFor,
|
|
32
|
+
recordDispatch,
|
|
33
|
+
incrementProbeCounter,
|
|
34
|
+
resetProbeCounter,
|
|
35
|
+
getHalt,
|
|
36
|
+
setHalt,
|
|
37
|
+
} from "./state.mjs";
|
|
38
|
+
import {
|
|
39
|
+
shouldShadowProbe,
|
|
40
|
+
recordProbe,
|
|
41
|
+
checkHalt,
|
|
42
|
+
contrastiveHaltMessage,
|
|
43
|
+
appendHaltEvent,
|
|
44
|
+
SHADOW_DEFAULTS,
|
|
45
|
+
} from "./shadow.mjs";
|
|
46
|
+
|
|
47
|
+
const DEFAULT_REGISTRY_PATH = ".role-os/specialists.json";
|
|
48
|
+
const DEFAULT_STATE_PATH = ".role-os/specialist-state.json";
|
|
49
|
+
const DEFAULT_EVENTS_PATH = ".role-os/specialist-events.jsonl";
|
|
50
|
+
const DEFAULT_WINDOW = 200;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* @typedef {object} DispatchReceipt
|
|
54
|
+
* @property {string} schema
|
|
55
|
+
* @property {string} role
|
|
56
|
+
* @property {string} ts ISO-8601
|
|
57
|
+
* @property {string} trace_id
|
|
58
|
+
* @property {"specialist"|"claude"} route gate's decision
|
|
59
|
+
* @property {"specialist"|"claude"} source where `result` actually came from
|
|
60
|
+
* @property {object} decision from gate.mjs
|
|
61
|
+
* @property {object} [specialist_call]
|
|
62
|
+
* @property {object} [shadow]
|
|
63
|
+
*/
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Run a dispatch through the specialist tier.
|
|
67
|
+
*
|
|
68
|
+
* @param {object} params
|
|
69
|
+
* @param {string} params.role
|
|
70
|
+
* @param {*} params.input
|
|
71
|
+
* @param {(input:any) => Promise<*>} params.claudeFn the role's Claude-backed path
|
|
72
|
+
* @param {(specialistVerdict:any, claudeVerdict:any) => boolean} [params.agreeFn]
|
|
73
|
+
* domain comparator for shadow probes; defaults to strict deep-equal
|
|
74
|
+
* @param {string} params.traceId
|
|
75
|
+
* @param {string} params.nowIso ISO-8601; injected for testability
|
|
76
|
+
* @param {object} [params.paths] { registry, state, events } file paths
|
|
77
|
+
* @param {object} [params.classifier] gate classifier (scoreFn + oodFn)
|
|
78
|
+
* @param {Function} [params.httpFn] injectable HTTP for the specialist call
|
|
79
|
+
* @param {object} [params.shadow] { K, N, tau } overrides
|
|
80
|
+
* @param {number} [params.windowSize] quota window size; default 200
|
|
81
|
+
* @param {number} [params.timeoutMs] specialist call timeout
|
|
82
|
+
* @returns {Promise<{ result: *, receipt: DispatchReceipt }>}
|
|
83
|
+
*/
|
|
84
|
+
export async function dispatchSpecialist({
|
|
85
|
+
role,
|
|
86
|
+
input,
|
|
87
|
+
claudeFn,
|
|
88
|
+
agreeFn = strictEqualVerdicts,
|
|
89
|
+
traceId,
|
|
90
|
+
nowIso,
|
|
91
|
+
paths = {},
|
|
92
|
+
classifier = defaultClassifier,
|
|
93
|
+
httpFn,
|
|
94
|
+
shadow: shadowCfg = {},
|
|
95
|
+
windowSize = DEFAULT_WINDOW,
|
|
96
|
+
timeoutMs,
|
|
97
|
+
}) {
|
|
98
|
+
if (typeof claudeFn !== "function") {
|
|
99
|
+
throw new Error("dispatchSpecialist: claudeFn (the Claude-backed path) is required");
|
|
100
|
+
}
|
|
101
|
+
if (typeof traceId !== "string" || !traceId) {
|
|
102
|
+
throw new Error("dispatchSpecialist: traceId is required");
|
|
103
|
+
}
|
|
104
|
+
if (typeof nowIso !== "string" || !nowIso) {
|
|
105
|
+
throw new Error("dispatchSpecialist: nowIso is required");
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const registryPath = paths.registry || process.env.ROLEOS_SPECIALISTS_PATH || DEFAULT_REGISTRY_PATH;
|
|
109
|
+
const statePath = paths.state || process.env.ROLEOS_SPECIALIST_STATE_PATH || DEFAULT_STATE_PATH;
|
|
110
|
+
const eventsPath = paths.events || process.env.ROLEOS_SPECIALIST_EVENTS_PATH || DEFAULT_EVENTS_PATH;
|
|
111
|
+
|
|
112
|
+
const K = shadowCfg.K ?? SHADOW_DEFAULTS.K;
|
|
113
|
+
const N = shadowCfg.N ?? SHADOW_DEFAULTS.N;
|
|
114
|
+
const tau = shadowCfg.tau ?? SHADOW_DEFAULTS.TAU;
|
|
115
|
+
|
|
116
|
+
// ── Load registry + state ────────────────────────────────────────────────────────────────
|
|
117
|
+
const { byRole, errors: regErrors } = loadRegistry(registryPath);
|
|
118
|
+
let state;
|
|
119
|
+
try { state = loadState(statePath); }
|
|
120
|
+
catch { state = emptyState(); } // a corrupt state file resets; we re-record on the next dispatch
|
|
121
|
+
const entry = byRole.get(role) || null;
|
|
122
|
+
const haltState = entry ? getHalt(state, role) : { halted: false };
|
|
123
|
+
|
|
124
|
+
// ── Gate decision ────────────────────────────────────────────────────────────────────────
|
|
125
|
+
const quotaState = quotaStateFor(state, role, windowSize);
|
|
126
|
+
const decision = entry
|
|
127
|
+
? gate({ role, input, registryEntry: entry, quotaState, haltState, classifier })
|
|
128
|
+
: { route: "claude", reason: "no_registry_entry", score: 0, ood: false, quotaOk: true, detail: regErrors.length ? `registry errors: ${regErrors.join("; ")}` : "no registry entry for role" };
|
|
129
|
+
|
|
130
|
+
// ── Route to specialist or Claude ────────────────────────────────────────────────────────
|
|
131
|
+
let specialistCall = null;
|
|
132
|
+
let result;
|
|
133
|
+
let source;
|
|
134
|
+
if (decision.route === "specialist" && entry) {
|
|
135
|
+
const active = entry.versions.find((v) => v.id === entry.active_version);
|
|
136
|
+
specialistCall = await callSpecialist({
|
|
137
|
+
backendUrl: entry.backend_url,
|
|
138
|
+
adapterId: active.adapter_id,
|
|
139
|
+
role,
|
|
140
|
+
input,
|
|
141
|
+
traceId,
|
|
142
|
+
...(timeoutMs !== undefined ? { timeoutMs } : {}),
|
|
143
|
+
...(httpFn ? { httpFn } : {}),
|
|
144
|
+
});
|
|
145
|
+
if (specialistCall.ok) {
|
|
146
|
+
result = specialistCall.verdict;
|
|
147
|
+
source = "specialist";
|
|
148
|
+
recordDispatch(state, role, windowSize, parseIsoMs(nowIso));
|
|
149
|
+
} else {
|
|
150
|
+
// Specialist call failed → fail open to Claude. The gate's "route" was specialist, but
|
|
151
|
+
// the realized source is Claude. Both are recorded in the receipt.
|
|
152
|
+
result = await claudeFn(input);
|
|
153
|
+
source = "claude";
|
|
154
|
+
}
|
|
155
|
+
} else {
|
|
156
|
+
result = await claudeFn(input);
|
|
157
|
+
source = "claude";
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ── Shadow probe ─────────────────────────────────────────────────────────────────────────
|
|
161
|
+
// Probes only fire when the dispatch actually went to a specialist (source === "specialist").
|
|
162
|
+
// A failed-open dispatch already ran Claude; there is nothing left to probe.
|
|
163
|
+
let shadow = null;
|
|
164
|
+
if (source === "specialist") {
|
|
165
|
+
const c = incrementProbeCounter(state, role);
|
|
166
|
+
if (shouldShadowProbe(c, K)) {
|
|
167
|
+
const claudeVerdict = await claudeFn(input);
|
|
168
|
+
const agreed = !!safeAgree(agreeFn, result, claudeVerdict);
|
|
169
|
+
recordProbe(eventsPath, {
|
|
170
|
+
role,
|
|
171
|
+
ts: nowIso,
|
|
172
|
+
trace_id: traceId,
|
|
173
|
+
agreed,
|
|
174
|
+
specialist_summary: summarize(result),
|
|
175
|
+
claude_summary: summarize(claudeVerdict),
|
|
176
|
+
});
|
|
177
|
+
resetProbeCounter(state, role);
|
|
178
|
+
const { probes, rate, shouldHalt } = checkHalt(eventsPath, role, N, tau);
|
|
179
|
+
shadow = { fired: true, agreed, probes, rate, halt_triggered: shouldHalt };
|
|
180
|
+
if (shouldHalt && !getHalt(state, role).halted) {
|
|
181
|
+
const reason = contrastiveHaltMessage({ role, probes, rate, tau });
|
|
182
|
+
setHalt(state, role, { reason, since: nowIso });
|
|
183
|
+
appendHaltEvent(eventsPath, { role, ts: nowIso, reason, probes, agreed: probes - Math.round(probes * (1 - rate)), rate, tau });
|
|
184
|
+
}
|
|
185
|
+
} else {
|
|
186
|
+
shadow = { fired: false, counter: c };
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// ── Persist state ────────────────────────────────────────────────────────────────────────
|
|
191
|
+
try { saveState(statePath, state); } catch { /* best-effort */ }
|
|
192
|
+
|
|
193
|
+
// ── Receipt ──────────────────────────────────────────────────────────────────────────────
|
|
194
|
+
const receipt = {
|
|
195
|
+
schema: "roleos-specialist-receipt/v1",
|
|
196
|
+
role,
|
|
197
|
+
ts: nowIso,
|
|
198
|
+
trace_id: traceId,
|
|
199
|
+
route: decision.route,
|
|
200
|
+
source,
|
|
201
|
+
decision,
|
|
202
|
+
...(specialistCall ? { specialist_call: stripVerdictFromCall(specialistCall) } : {}),
|
|
203
|
+
...(shadow ? { shadow } : {}),
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
return { result, receipt };
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/** Default verdict comparator — strict deep-equal on JSON-stringifiable verdicts. */
|
|
210
|
+
function strictEqualVerdicts(a, b) {
|
|
211
|
+
try { return JSON.stringify(a) === JSON.stringify(b); }
|
|
212
|
+
catch { return false; }
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function safeAgree(fn, a, b) {
|
|
216
|
+
try { return fn(a, b); }
|
|
217
|
+
catch { return false; }
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function parseIsoMs(iso) {
|
|
221
|
+
const t = Date.parse(iso);
|
|
222
|
+
return Number.isFinite(t) ? t : 0;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/** Operator-facing one-liner for the shadow-probe log. */
|
|
226
|
+
function summarize(v) {
|
|
227
|
+
if (v === null || v === undefined) return "(none)";
|
|
228
|
+
if (typeof v === "string") return v.slice(0, 80);
|
|
229
|
+
if (typeof v === "number" || typeof v === "boolean") return String(v);
|
|
230
|
+
try { return JSON.stringify(v).slice(0, 120); } catch { return "(unserializable)"; }
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/** Don't repeat the verdict in `specialist_call`; it's already in `result`/receipt source. */
|
|
234
|
+
function stripVerdictFromCall(call) {
|
|
235
|
+
const { verdict, ...rest } = call;
|
|
236
|
+
return rest;
|
|
237
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Specialist event log — append-only JSONL of operator actions (promote, rollback, halt,
|
|
3
|
+
* clear-halt) and shadow-probe outcomes. Default path:
|
|
4
|
+
* `<repo>/.role-os/specialist-events.jsonl`. Override with `ROLEOS_SPECIALIST_EVENTS_PATH`.
|
|
5
|
+
*
|
|
6
|
+
* Hides one secret family: the history representation. Operators read the log; the rest of
|
|
7
|
+
* the tier sees `appendEvent` / `readEvents` and does not parse JSONL.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { appendFileSync, readFileSync, existsSync, mkdirSync } from "node:fs";
|
|
11
|
+
import { dirname } from "node:path";
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* @typedef {object} SpecialistEvent
|
|
15
|
+
* @property {string} kind one of: promote | rollback | halt | clear-halt | shadow-probe
|
|
16
|
+
* @property {string} role
|
|
17
|
+
* @property {string} ts ISO-8601
|
|
18
|
+
* @property {object} [data] kind-specific payload
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
/** Append a single event to the log. Creates the parent directory if needed. */
|
|
22
|
+
export function appendEvent(path, event) {
|
|
23
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
24
|
+
appendFileSync(path, JSON.stringify(event) + "\n", "utf8");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Read events from the log, optionally filtered. Returns events in file order (oldest
|
|
29
|
+
* first). Lines that don't parse as JSON are skipped silently — the log is operator-edited
|
|
30
|
+
* in a pinch.
|
|
31
|
+
*
|
|
32
|
+
* @param {string} path
|
|
33
|
+
* @param {object} [filter]
|
|
34
|
+
* @param {string} [filter.role]
|
|
35
|
+
* @param {string|string[]} [filter.kind]
|
|
36
|
+
* @returns {SpecialistEvent[]}
|
|
37
|
+
*/
|
|
38
|
+
export function readEvents(path, filter = {}) {
|
|
39
|
+
if (!existsSync(path)) return [];
|
|
40
|
+
const text = readFileSync(path, "utf8");
|
|
41
|
+
const kindSet = filter.kind
|
|
42
|
+
? new Set(Array.isArray(filter.kind) ? filter.kind : [filter.kind])
|
|
43
|
+
: null;
|
|
44
|
+
const out = [];
|
|
45
|
+
for (const line of text.split(/\r?\n/)) {
|
|
46
|
+
const s = line.trim();
|
|
47
|
+
if (!s) continue;
|
|
48
|
+
let ev;
|
|
49
|
+
try { ev = JSON.parse(s); } catch { continue; }
|
|
50
|
+
if (!ev || typeof ev !== "object") continue;
|
|
51
|
+
if (filter.role && ev.role !== filter.role) continue;
|
|
52
|
+
if (kindSet && !kindSet.has(ev.kind)) continue;
|
|
53
|
+
out.push(ev);
|
|
54
|
+
}
|
|
55
|
+
return out;
|
|
56
|
+
}
|