role-os 2.7.1 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/README.es.md +14 -1
- package/README.fr.md +130 -117
- package/README.hi.md +125 -112
- package/README.it.md +14 -1
- package/README.ja.md +14 -1
- package/README.md +14 -1
- package/README.pt-BR.md +14 -1
- package/README.zh.md +136 -123
- package/package.json +1 -1
- package/src/dispatch.mjs +3 -1
- package/src/dossier-block.mjs +74 -0
- package/src/hooks.mjs +125 -14
- package/src/role-dossiers.json +962 -0
- package/src/specialist/capability-gate.mjs +124 -0
- package/src/specialist/conformance-consult.mjs +322 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Capability gate — deterministic least-privilege (POLA) on IRREVERSIBLE tool calls. The one
|
|
3
|
+
* security primitive worth owning internally (memory: oversight-specialist-mint-strategy.md,
|
|
4
|
+
* 2026-06-08 posture): it bounds what any agent action can DO, so a WRONG verdict — an honest crew
|
|
5
|
+
* mistake OR (later) an adversarial flip — can never trigger an unauthorized irreversible action.
|
|
6
|
+
* The PREVENTIVE complement to NAMED_COMPENSATORS (which undoes an irreversible action; this stops
|
|
7
|
+
* the unauthorized one before it happens). Same action set, two halves.
|
|
8
|
+
*
|
|
9
|
+
* Grounded in the object-capability model / POLA and CaMeL (Debenedetti et al. 2025,
|
|
10
|
+
* arXiv:2503.18813: control/data-flow separation + capabilities, model UNMODIFIED). Deterministic,
|
|
11
|
+
* NO model — least-privilege, not an arms race.
|
|
12
|
+
*
|
|
13
|
+
* Safe by construction:
|
|
14
|
+
* - OPT-IN, default OFF (ROLEOS_CAPABILITY_GATE). Disabled => never denies (pure no-op), so it can
|
|
15
|
+
* never disrupt an existing flow until the director turns it on.
|
|
16
|
+
* - Scoped to a SMALL gated set (the NAMED_COMPENSATORS irreversible-action list). Every non-gated
|
|
17
|
+
* tool and every read-only tool is untouched.
|
|
18
|
+
* - FAIL-CLOSED for the gated set: a gated action with NO matching capability grant is denied, with
|
|
19
|
+
* a reason telling the director how to grant it. (Distinct from the conformance floor, which is
|
|
20
|
+
* advisory / fail-open — a missed nonconformance is cheap; an unauthorized irreversible action is
|
|
21
|
+
* not, so its asymmetry runs the other way.)
|
|
22
|
+
*
|
|
23
|
+
* The grant manifest (`.claude/role-os/capabilities.json`, director-authored) maps an action id to a
|
|
24
|
+
* grant, e.g.: { "npm:publish": { "granted": true, "scope": "@mcptoolshop/roll", "expires": "2026-07-01" } }
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
28
|
+
import { join } from "node:path";
|
|
29
|
+
|
|
30
|
+
export const CAPABILITIES_FILE = ".claude/role-os/capabilities.json";
|
|
31
|
+
|
|
32
|
+
/** Opt-in flag, mirroring conformanceConsultEnabled(). Default OFF. */
|
|
33
|
+
export function capabilityGateEnabled() {
|
|
34
|
+
const v = process.env.ROLEOS_CAPABILITY_GATE;
|
|
35
|
+
return v === "1" || v === "true";
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const _bash = (re) => (toolName, call) =>
|
|
39
|
+
toolName === "Bash" && typeof call?.command === "string" && re.test(call.command);
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* The GATED SET — the irreversible / world-touching actions from the NAMED_COMPENSATORS standard.
|
|
43
|
+
* Each entry: { id, label, test(toolName, call) -> boolean }. Detection is deterministic + pattern-
|
|
44
|
+
* based and errs toward FLAGGING (a benign match just needs a one-line grant), never toward missing
|
|
45
|
+
* an irreversible action.
|
|
46
|
+
*/
|
|
47
|
+
export const GATED_ACTIONS = [
|
|
48
|
+
{ id: "npm:publish", label: "npm/pnpm/yarn publish", test: _bash(/\b(?:npm|pnpm|yarn)\s+publish\b/) },
|
|
49
|
+
{ id: "pypi:publish", label: "PyPI publish (twine/uv)", test: _bash(/\btwine\s+upload\b|\buv\s+publish\b/) },
|
|
50
|
+
{ id: "gh:release", label: "gh release create", test: _bash(/\bgh\s+release\s+create\b/) },
|
|
51
|
+
{ id: "gh:pr-create", label: "gh pr create", test: _bash(/\bgh\s+pr\s+create\b/) },
|
|
52
|
+
{ id: "gh:repo-edit", label: "gh repo edit/delete", test: _bash(/\bgh\s+repo\s+(?:edit|delete)\b/) },
|
|
53
|
+
{ id: "git:push", label: "git push", test: _bash(/\bgit\s+push\b/) },
|
|
54
|
+
{ id: "pages:deploy", label: "GitHub Pages / gh-pages deploy", test: _bash(/\bgh-pages\b|\bpages\s+deploy\b/) },
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
/** Read the director's capability manifest, or {} if absent/malformed (=> nothing granted). */
|
|
58
|
+
export function loadCapabilities(cwd) {
|
|
59
|
+
try {
|
|
60
|
+
const p = join(cwd, CAPABILITIES_FILE);
|
|
61
|
+
if (!existsSync(p)) return {};
|
|
62
|
+
const data = JSON.parse(readFileSync(p, "utf-8"));
|
|
63
|
+
return data && typeof data === "object" ? data : {};
|
|
64
|
+
} catch {
|
|
65
|
+
return {};
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Is `actionId` granted (granted:true and not expired) in the manifest? */
|
|
70
|
+
function _granted(manifest, actionId, now) {
|
|
71
|
+
const g = manifest && manifest[actionId];
|
|
72
|
+
if (!g || typeof g !== "object" || g.granted !== true) return false;
|
|
73
|
+
if (typeof g.expires === "string") {
|
|
74
|
+
const t = Date.parse(g.expires);
|
|
75
|
+
if (!Number.isNaN(t) && t < now) return false; // grant expired
|
|
76
|
+
}
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Capability gate for a proposed tool call.
|
|
82
|
+
*
|
|
83
|
+
* OPT-IN: when disabled it ALWAYS returns { denied:false } (pure no-op). When enabled, a gated
|
|
84
|
+
* irreversible action is allowed ONLY if the manifest grants its capability id; otherwise it is
|
|
85
|
+
* DENIED (fail-closed) with a reason telling the director how to grant it.
|
|
86
|
+
*
|
|
87
|
+
* @param {string} cwd
|
|
88
|
+
* @param {string} toolName
|
|
89
|
+
* @param {object} toolInput
|
|
90
|
+
* @param {object} [opts] - { force } run the gate regardless of the env flag (tests);
|
|
91
|
+
* { capabilities } inject a manifest (tests); { now } epoch ms for expiry (tests)
|
|
92
|
+
* @returns {{ denied: boolean, action?: string, reason?: string }}
|
|
93
|
+
*/
|
|
94
|
+
export function capabilityGate(cwd, toolName, toolInput, opts = {}) {
|
|
95
|
+
if (!opts.force && !capabilityGateEnabled()) return { denied: false };
|
|
96
|
+
let action;
|
|
97
|
+
try {
|
|
98
|
+
const call = toolInput && typeof toolInput === "object" ? toolInput : {};
|
|
99
|
+
action = GATED_ACTIONS.find((a) => a.test(toolName, call));
|
|
100
|
+
if (!action) return { denied: false }; // not an irreversible action -> allow
|
|
101
|
+
const manifest = opts.capabilities || loadCapabilities(cwd);
|
|
102
|
+
const now = typeof opts.now === "number" ? opts.now : Date.now();
|
|
103
|
+
if (_granted(manifest, action.id, now)) return { denied: false };
|
|
104
|
+
return {
|
|
105
|
+
denied: true,
|
|
106
|
+
action: action.id,
|
|
107
|
+
reason:
|
|
108
|
+
`Capability gate: "${action.label}" is an irreversible action requiring an explicit grant. ` +
|
|
109
|
+
`No capability "${action.id}" is granted in ${CAPABILITIES_FILE}. To authorize it, the ` +
|
|
110
|
+
`director adds {"${action.id}": {"granted": true}} (optionally with "scope"/"expires").`,
|
|
111
|
+
};
|
|
112
|
+
} catch {
|
|
113
|
+
// A gate that errors must not silently allow an irreversible action: if a gated action matched
|
|
114
|
+
// but the grant cannot be evaluated, fail CLOSED (deny). If nothing matched, allow.
|
|
115
|
+
if (action) {
|
|
116
|
+
return {
|
|
117
|
+
denied: true,
|
|
118
|
+
action: action.id,
|
|
119
|
+
reason: `Capability gate errored evaluating the grant for "${action.id}"; failing closed on an irreversible action.`,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
return { denied: false };
|
|
123
|
+
}
|
|
124
|
+
}
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conformance consult — the production seam that checks a proposed tool CALL against its TOOL's schema
|
|
3
|
+
* and documented contract, attaching an ADVISORY conformance verdict + receipt. Oversight wedge #1.
|
|
4
|
+
*
|
|
5
|
+
* Two-stage, mirroring prism's deterministic-floor + LLM-ceiling design:
|
|
6
|
+
* 1. DETERMINISTIC schema floor (L1 type / L2 required / L3 enum-range) — mechanical + infallible,
|
|
7
|
+
* NO model. A floor violation short-circuits to `nonconformant` with the offending clause; the
|
|
8
|
+
* LLM is never consulted for something a validator can prove.
|
|
9
|
+
* 2. LLM specialist (L4 semantic-contract / L5 intent-vs-action) via the proven dispatchSpecialist
|
|
10
|
+
* fail-open gate — only when the floor passes.
|
|
11
|
+
*
|
|
12
|
+
* Safe by construction:
|
|
13
|
+
* - OPT-IN, default OFF (ROLEOS_CONFORMANCE_CONSULT). Disabled => `{verdict: null}`, a pure no-op.
|
|
14
|
+
* - ADVISORY. It returns a verdict + receipt; it never blocks a dispatch (callers decide).
|
|
15
|
+
* - FAIL-OPEN to ABSTAIN (escalate), NEVER to `conformant`. The cost-asymmetric error is a false
|
|
16
|
+
* `conformant` (a bad call waved through), so an unavailable/erroring LLM degrades to "I can't
|
|
17
|
+
* verify the semantics — escalate", never to "looks fine".
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { dispatchSpecialist } from "./dispatch.mjs";
|
|
21
|
+
|
|
22
|
+
export const CONFORMANCE_ROLE = "Tool-Call Conformance";
|
|
23
|
+
|
|
24
|
+
const _TYPE_OK = {
|
|
25
|
+
string: (v) => typeof v === "string",
|
|
26
|
+
integer: (v) => Number.isInteger(v),
|
|
27
|
+
number: (v) => typeof v === "number",
|
|
28
|
+
boolean: (v) => typeof v === "boolean",
|
|
29
|
+
array: (v) => Array.isArray(v),
|
|
30
|
+
object: (v) => v !== null && typeof v === "object" && !Array.isArray(v),
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Deterministic schema floor (L1-L3). Returns { verdict: "nonconformant"|null, violations: string[] }.
|
|
35
|
+
* `null` verdict = no mechanical violation found -> defer to the LLM for the semantic contract.
|
|
36
|
+
*/
|
|
37
|
+
export function schemaFloor(tool, call) {
|
|
38
|
+
const params = Array.isArray(tool?.params) ? tool.params : [];
|
|
39
|
+
const args = call && typeof call === "object" ? call : {};
|
|
40
|
+
const byName = Object.fromEntries(params.map((p) => [p.name, p]));
|
|
41
|
+
const violations = [];
|
|
42
|
+
for (const p of params) { // L2 required
|
|
43
|
+
if (p.required && !(p.name in args)) violations.push(`missing required '${p.name}'`);
|
|
44
|
+
}
|
|
45
|
+
for (const [k, v] of Object.entries(args)) { // L1 type + L3 enum/range
|
|
46
|
+
const p = byName[k];
|
|
47
|
+
if (!p) continue; // unknown arg: not floor-fatal (leave to contract)
|
|
48
|
+
if (p.type && _TYPE_OK[p.type] && !_TYPE_OK[p.type](v)) violations.push(`'${k}' should be ${p.type}`);
|
|
49
|
+
if (Array.isArray(p.enum) && !p.enum.includes(v)) violations.push(`'${k}' not in enum ${JSON.stringify(p.enum)}`);
|
|
50
|
+
if (typeof p.max === "number" && typeof v === "number" && v > p.max) violations.push(`'${k}' exceeds max ${p.max}`);
|
|
51
|
+
}
|
|
52
|
+
return violations.length ? { verdict: "nonconformant", violations } : { verdict: null, violations: [] };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Resolve a constraint OPERAND against the call args and (optional) structured state.
|
|
57
|
+
* - a bare string -> a CALL field reference (the common case)
|
|
58
|
+
* - { value } -> a literal
|
|
59
|
+
* - { field } -> an explicit call-field reference
|
|
60
|
+
* - { state } -> a structured-state key (object state only; prose state can't resolve -> undefined)
|
|
61
|
+
* - { len } -> the length of an array/object call field (cardinality)
|
|
62
|
+
* - { enum } -> a literal array
|
|
63
|
+
* - anything else -> the literal itself (number/array/boolean)
|
|
64
|
+
*/
|
|
65
|
+
export function resolveOperand(op, call, state) {
|
|
66
|
+
if (op && typeof op === "object" && !Array.isArray(op)) {
|
|
67
|
+
if ("value" in op) return op.value;
|
|
68
|
+
if ("state" in op) return state && typeof state === "object" ? state[op.state] : undefined;
|
|
69
|
+
if ("field" in op) return call ? call[op.field] : undefined;
|
|
70
|
+
if ("len" in op) {
|
|
71
|
+
const a = call ? call[op.len] : undefined;
|
|
72
|
+
if (Array.isArray(a)) return a.length;
|
|
73
|
+
if (a && typeof a === "object") return Object.keys(a).length;
|
|
74
|
+
return undefined;
|
|
75
|
+
}
|
|
76
|
+
if ("enum" in op) return op.enum;
|
|
77
|
+
}
|
|
78
|
+
if (typeof op === "string") return call ? call[op] : undefined; // bare = call field ref
|
|
79
|
+
return op; // numeric / array / boolean literal
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const _NUM = (x) =>
|
|
83
|
+
typeof x === "number" ? x : (typeof x === "string" && x.trim() !== "" && !Number.isNaN(Number(x)) ? Number(x) : NaN);
|
|
84
|
+
const _EPS = 1e-9;
|
|
85
|
+
const _EQ = (a, b) => (typeof a === "number" && typeof b === "number" ? Math.abs(a - b) < _EPS : a === b);
|
|
86
|
+
const _CMP = {
|
|
87
|
+
lt: (a, b) => a < b, le: (a, b) => a <= b, gt: (a, b) => a > b, ge: (a, b) => a >= b,
|
|
88
|
+
eq: _EQ, ne: (a, b) => !_EQ(a, b),
|
|
89
|
+
};
|
|
90
|
+
const _opLabel = (op) =>
|
|
91
|
+
op && typeof op === "object" ? ("len" in op ? `len(${op.len})` : "value" in op ? JSON.stringify(op.value) : "state" in op ? `state.${op.state}` : "field" in op ? op.field : JSON.stringify(op)) : String(op);
|
|
92
|
+
|
|
93
|
+
function _collectSum(of, call) {
|
|
94
|
+
if (typeof of === "string") {
|
|
95
|
+
const v = call ? call[of] : undefined;
|
|
96
|
+
const arr = Array.isArray(v) ? v : v && typeof v === "object" ? Object.values(v) : null;
|
|
97
|
+
if (!arr) return null;
|
|
98
|
+
const nums = arr.map(_NUM);
|
|
99
|
+
return nums.some(Number.isNaN) ? null : nums;
|
|
100
|
+
}
|
|
101
|
+
if (Array.isArray(of)) { // a list of scalar field names to add
|
|
102
|
+
const nums = of.map((f) => _NUM(call ? call[f] : undefined));
|
|
103
|
+
return nums.some(Number.isNaN) ? null : nums;
|
|
104
|
+
}
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Deterministic CONTRACT floor (computable L4). Evaluates `tool.constraints` — a small structured DSL
|
|
110
|
+
* for the relational/arithmetic contracts a 14B LLM does UNRELIABLY (summation, additive bounds,
|
|
111
|
+
* cardinality, ordering, mutual-exclusion) but a checker does perfectly. v0.3: the diagnosis from the
|
|
112
|
+
* v0.2 OOD dogfood was that the residual false-conformants are ALL computable — so they belong here,
|
|
113
|
+
* not in the model. A constraint that cannot be evaluated from (call + state) is SKIPPED (deferred to
|
|
114
|
+
* the LLM); the floor only ever PROVES a violation, never asserts conformance.
|
|
115
|
+
* Returns { verdict: "nonconformant"|null, violations: string[] }.
|
|
116
|
+
*/
|
|
117
|
+
export function contractFloor(tool, call, state) {
|
|
118
|
+
const constraints = Array.isArray(tool?.constraints) ? tool.constraints : [];
|
|
119
|
+
const args = call && typeof call === "object" ? call : {};
|
|
120
|
+
const v = [];
|
|
121
|
+
for (const c of constraints) {
|
|
122
|
+
if (!c || typeof c !== "object") continue;
|
|
123
|
+
switch (c.kind) {
|
|
124
|
+
case "cmp": {
|
|
125
|
+
const a = resolveOperand(c.left, args, state), b = resolveOperand(c.right, args, state);
|
|
126
|
+
if (a === undefined || b === undefined) break;
|
|
127
|
+
const fn = _CMP[c.op]; if (!fn) break;
|
|
128
|
+
if (["lt", "le", "gt", "ge"].includes(c.op)) {
|
|
129
|
+
const na = _NUM(a), nb = _NUM(b);
|
|
130
|
+
if (Number.isNaN(na) || Number.isNaN(nb)) break;
|
|
131
|
+
if (!fn(na, nb)) v.push(`${_opLabel(c.left)}(${na}) not ${c.op} ${_opLabel(c.right)}(${nb})`);
|
|
132
|
+
} else if (!fn(a, b)) {
|
|
133
|
+
v.push(`${_opLabel(c.left)}(${JSON.stringify(a)}) not ${c.op} ${_opLabel(c.right)}(${JSON.stringify(b)})`);
|
|
134
|
+
}
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
case "sum": {
|
|
138
|
+
const vals = _collectSum(c.of, args);
|
|
139
|
+
const cap = _NUM(resolveOperand(c.vs, args, state));
|
|
140
|
+
if (vals === null || Number.isNaN(cap)) break;
|
|
141
|
+
const s = vals.reduce((x, y) => x + y, 0);
|
|
142
|
+
const fn = _CMP[c.op] || _CMP.eq;
|
|
143
|
+
if (!fn(s, cap)) v.push(`sum(${_opLabel(c.of)})=${s} not ${c.op} ${cap}`);
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
case "present": { // exactly `count` (default 1) of the listed optional fields present
|
|
147
|
+
const want = c.count == null ? 1 : c.count;
|
|
148
|
+
const cnt = (c.fields || []).filter((f) => f in args).length;
|
|
149
|
+
if (cnt !== want) v.push(`expected exactly ${want} of [${(c.fields || []).join(", ")}] present, got ${cnt}`);
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
case "requires": { // conditional co-requirement
|
|
153
|
+
const w = c.when || {};
|
|
154
|
+
const val = args[w.field];
|
|
155
|
+
const hit = "equals" in w ? val === w.equals : Array.isArray(w.in) ? w.in.includes(val) : w.field in args;
|
|
156
|
+
if (hit) {
|
|
157
|
+
const missing = (c.require || []).filter((f) => !(f in args));
|
|
158
|
+
if (missing.length) v.push(`when ${w.field}=${JSON.stringify(val)} requires [${missing.join(", ")}] present`);
|
|
159
|
+
}
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
case "distinct": {
|
|
163
|
+
const vals = (c.fields || []).map((f) => args[f]);
|
|
164
|
+
if (vals.some((x) => x === undefined)) break;
|
|
165
|
+
const seen = new Set();
|
|
166
|
+
for (const x of vals) {
|
|
167
|
+
const k = x && typeof x === "object" ? JSON.stringify(x) : String(x);
|
|
168
|
+
if (seen.has(k)) { v.push(`fields [${(c.fields || []).join(", ")}] must be distinct (duplicate ${k})`); break; }
|
|
169
|
+
seen.add(k);
|
|
170
|
+
}
|
|
171
|
+
break;
|
|
172
|
+
}
|
|
173
|
+
case "member": {
|
|
174
|
+
const val = args[c.field], set = resolveOperand(c.in, args, state);
|
|
175
|
+
if (val === undefined || !Array.isArray(set)) break;
|
|
176
|
+
if (!set.includes(val)) v.push(`'${c.field}'=${JSON.stringify(val)} not in ${JSON.stringify(set)}`);
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
case "char_at": {
|
|
180
|
+
// `offset` (default 0) adjusts for index bases — e.g. offset:-1 for a 1-based `position`.
|
|
181
|
+
const s = args[c.string], want = resolveOperand(c.equals, args, state);
|
|
182
|
+
const idx = _NUM(resolveOperand(c.index, args, state)) + (typeof c.offset === "number" ? c.offset : 0);
|
|
183
|
+
if (typeof s !== "string" || Number.isNaN(idx) || want === undefined) break;
|
|
184
|
+
const got = s.substr(idx, String(want).length);
|
|
185
|
+
if (got !== want) v.push(`${c.string}[${idx}..]='${got}' != expected '${want}'`);
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
default:
|
|
189
|
+
break; // unknown kind: ignore (forward-compatible), never assert conformance
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return v.length ? { verdict: "nonconformant", violations: v } : { verdict: null, violations: [] };
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/** Read the wiring flag. Default OFF — flipping it on is a Mike-gated release decision. */
|
|
196
|
+
export function conformanceConsultEnabled() {
|
|
197
|
+
const v = process.env.ROLEOS_CONFORMANCE_CONSULT;
|
|
198
|
+
return v === "1" || v === "true";
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/** Serialize a tool (+intent, +state) into the EVIDENCE shape the watcher was TRAINED on. */
|
|
202
|
+
export function evidenceFor(tool, intent, state) {
|
|
203
|
+
const schema = {};
|
|
204
|
+
for (const p of Array.isArray(tool?.params) ? tool.params : []) {
|
|
205
|
+
const { name, ...rest } = p;
|
|
206
|
+
schema[name] = rest;
|
|
207
|
+
}
|
|
208
|
+
let ev = `TOOL: ${tool?.name}\nCONTRACT: ${tool?.contract}\nPARAMS (JSON schema): ${JSON.stringify(schema)}`;
|
|
209
|
+
const st = state ?? tool?.state;
|
|
210
|
+
if (st) ev += `\nSTATE: ${typeof st === "string" ? st : JSON.stringify(st)}`;
|
|
211
|
+
if (intent) ev += `\nINTENT: ${intent}`;
|
|
212
|
+
return ev;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/** Serialize the proposed CALL into the trained CLAIM shape. */
|
|
216
|
+
export function claimFor(tool, call) {
|
|
217
|
+
return `CALL: ${tool?.name}(${JSON.stringify(call ?? {})})`;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Enrich a tool definition with its constraints (+ structured state) from a rollout CATALOG keyed by
|
|
222
|
+
* tool name (tools/conformance-dataset/tool-constraints.json). The production gate calls this before
|
|
223
|
+
* consultConformance so the deterministic contract floor applies to any catalogued tool. A tool's own
|
|
224
|
+
* inline `constraints` win if present; an inline `state` is not overwritten.
|
|
225
|
+
*/
|
|
226
|
+
export function withToolConstraints(tool, catalog) {
|
|
227
|
+
const entry = tool && catalog ? catalog[tool.name] : null;
|
|
228
|
+
if (!entry) return tool;
|
|
229
|
+
return {
|
|
230
|
+
...tool,
|
|
231
|
+
constraints: tool.constraints && tool.constraints.length ? tool.constraints : entry.constraints || [],
|
|
232
|
+
...(entry.state_struct && tool.state == null ? { state: entry.state_struct } : {}),
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/** Verdict comparator for shadow probes — normalizes string|{verdict} and compares the label. */
|
|
237
|
+
export function conformanceAgree(s, c) {
|
|
238
|
+
const norm = (v) => (v && typeof v === "object" ? v.verdict : v);
|
|
239
|
+
return norm(s) === norm(c);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Check one tool-call's conformance. Deterministic floor first, then the LLM specialist via the
|
|
244
|
+
* fail-open gate. Advisory + opt-in + NEVER throws.
|
|
245
|
+
*
|
|
246
|
+
* @param {object} args
|
|
247
|
+
* @param {object} args.tool { name, contract, params:[{name,type,required,enum?,max?}] }
|
|
248
|
+
* @param {object} args.call the proposed arguments
|
|
249
|
+
* @param {string} [args.intent] the stated goal (enables the L5 intent check)
|
|
250
|
+
* @param {object} [opts]
|
|
251
|
+
* @param {boolean} [opts.enabled] default: conformanceConsultEnabled()
|
|
252
|
+
* @param {object} [opts.paths] { registry, state, events } for dispatchSpecialist
|
|
253
|
+
* @param {string} [opts.nowIso]
|
|
254
|
+
* @param {string} [opts.traceId]
|
|
255
|
+
* @param {Function}[opts.httpFn] injectable HTTP for the specialist call (tests)
|
|
256
|
+
* @param {object} [opts.classifier]
|
|
257
|
+
* @param {object} [opts.shadow]
|
|
258
|
+
* @returns {Promise<{verdict: string|null, source: string, receipt?: object, floor?: object}>}
|
|
259
|
+
*/
|
|
260
|
+
export async function consultConformance({ tool, call, intent, state } = {}, opts = {}) {
|
|
261
|
+
const { enabled = conformanceConsultEnabled(), paths, nowIso, traceId, httpFn, classifier, shadow } = opts;
|
|
262
|
+
if (!enabled) return { verdict: null, source: "disabled" };
|
|
263
|
+
|
|
264
|
+
const ts = nowIso || new Date().toISOString();
|
|
265
|
+
const st = state ?? tool?.state;
|
|
266
|
+
|
|
267
|
+
// 1) Deterministic floor — a PROVABLE violation never needs the LLM. Two rungs:
|
|
268
|
+
// 1a schema (L1 type / L2 required / L3 enum-range), 1b computable contract (relational/arithmetic
|
|
269
|
+
// L4: ordering, sum-to-cap, additive bounds, cardinality, mutual-exclusion, ...). Either proving a
|
|
270
|
+
// violation short-circuits to `nonconformant`; both only ever PROVE, never assert conformance.
|
|
271
|
+
let floor;
|
|
272
|
+
try {
|
|
273
|
+
const schema = schemaFloor(tool, call);
|
|
274
|
+
const contract = contractFloor(tool, call, st);
|
|
275
|
+
const violations = [...schema.violations, ...contract.violations];
|
|
276
|
+
floor = { verdict: violations.length ? "nonconformant" : null, violations,
|
|
277
|
+
schema: schema.violations, contract: contract.violations };
|
|
278
|
+
} catch (err) {
|
|
279
|
+
floor = { verdict: null, violations: [], error: String(err && err.message ? err.message : err) };
|
|
280
|
+
}
|
|
281
|
+
if (floor.verdict === "nonconformant") {
|
|
282
|
+
const reason = floor.contract && floor.contract.length && !(floor.schema && floor.schema.length)
|
|
283
|
+
? "contract_violation" : "schema_violation";
|
|
284
|
+
return {
|
|
285
|
+
verdict: "nonconformant",
|
|
286
|
+
source: "floor",
|
|
287
|
+
floor,
|
|
288
|
+
receipt: {
|
|
289
|
+
schema: "roleos-specialist-receipt/v1", role: CONFORMANCE_ROLE, ts, source: "floor",
|
|
290
|
+
decision: { route: "floor", reason, detail: floor.violations.join("; ") },
|
|
291
|
+
},
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// 2) Floor passed -> consult the LLM specialist (residual semantic-contract + L5 intent).
|
|
296
|
+
try {
|
|
297
|
+
const { result, receipt } = await dispatchSpecialist({
|
|
298
|
+
role: CONFORMANCE_ROLE,
|
|
299
|
+
input: { tool, call, intent, state: st, evidence: evidenceFor(tool, intent, st), claim: claimFor(tool, call) },
|
|
300
|
+
// SAFE fail-open: never "conformant" — an unverifiable semantic check escalates, not waves through.
|
|
301
|
+
claudeFn: async () => ({ verdict: "abstain", source: "floor-pass-llm-unavailable" }),
|
|
302
|
+
agreeFn: conformanceAgree,
|
|
303
|
+
traceId: traceId || `conformance-${ts}`,
|
|
304
|
+
nowIso: ts,
|
|
305
|
+
...(paths ? { paths } : {}),
|
|
306
|
+
...(httpFn ? { httpFn } : {}),
|
|
307
|
+
...(classifier ? { classifier } : {}),
|
|
308
|
+
...(shadow ? { shadow } : {}),
|
|
309
|
+
});
|
|
310
|
+
const verdict = result && typeof result === "object" && "verdict" in result ? result.verdict : result;
|
|
311
|
+
return { verdict: verdict ?? "abstain", source: receipt.source, receipt, floor };
|
|
312
|
+
} catch (err) {
|
|
313
|
+
// A conformance consult must never break the caller — escalate (abstain), never wave through.
|
|
314
|
+
return {
|
|
315
|
+
verdict: "abstain", source: "consult-error", floor,
|
|
316
|
+
receipt: {
|
|
317
|
+
schema: "roleos-specialist-receipt/v1", role: CONFORMANCE_ROLE, ts, source: "consult-error",
|
|
318
|
+
error: String(err && err.message ? err.message : err),
|
|
319
|
+
},
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
}
|