role-os 2.7.1 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/README.es.md +7 -0
- package/README.fr.md +124 -117
- package/README.hi.md +119 -112
- package/README.it.md +7 -0
- package/README.ja.md +7 -0
- package/README.md +7 -0
- package/README.pt-BR.md +7 -0
- package/README.zh.md +130 -123
- package/package.json +1 -1
- package/src/hooks.mjs +125 -14
- package/src/specialist/capability-gate.mjs +124 -0
- package/src/specialist/conformance-consult.mjs +322 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conformance consult — the production seam that checks a proposed tool CALL against its TOOL's schema
|
|
3
|
+
* and documented contract, attaching an ADVISORY conformance verdict + receipt. Oversight wedge #1.
|
|
4
|
+
*
|
|
5
|
+
* Two-stage, mirroring prism's deterministic-floor + LLM-ceiling design:
|
|
6
|
+
* 1. DETERMINISTIC schema floor (L1 type / L2 required / L3 enum-range) — mechanical + infallible,
|
|
7
|
+
* NO model. A floor violation short-circuits to `nonconformant` with the offending clause; the
|
|
8
|
+
* LLM is never consulted for something a validator can prove.
|
|
9
|
+
* 2. LLM specialist (L4 semantic-contract / L5 intent-vs-action) via the proven dispatchSpecialist
|
|
10
|
+
* fail-open gate — only when the floor passes.
|
|
11
|
+
*
|
|
12
|
+
* Safe by construction:
|
|
13
|
+
* - OPT-IN, default OFF (ROLEOS_CONFORMANCE_CONSULT). Disabled => `{verdict: null}`, a pure no-op.
|
|
14
|
+
* - ADVISORY. It returns a verdict + receipt; it never blocks a dispatch (callers decide).
|
|
15
|
+
* - FAIL-OPEN to ABSTAIN (escalate), NEVER to `conformant`. The cost-asymmetric error is a false
|
|
16
|
+
* `conformant` (a bad call waved through), so an unavailable/erroring LLM degrades to "I can't
|
|
17
|
+
* verify the semantics — escalate", never to "looks fine".
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { dispatchSpecialist } from "./dispatch.mjs";
|
|
21
|
+
|
|
22
|
+
export const CONFORMANCE_ROLE = "Tool-Call Conformance";
|
|
23
|
+
|
|
24
|
+
const _TYPE_OK = {
|
|
25
|
+
string: (v) => typeof v === "string",
|
|
26
|
+
integer: (v) => Number.isInteger(v),
|
|
27
|
+
number: (v) => typeof v === "number",
|
|
28
|
+
boolean: (v) => typeof v === "boolean",
|
|
29
|
+
array: (v) => Array.isArray(v),
|
|
30
|
+
object: (v) => v !== null && typeof v === "object" && !Array.isArray(v),
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Deterministic schema floor (L1-L3). Returns { verdict: "nonconformant"|null, violations: string[] }.
|
|
35
|
+
* `null` verdict = no mechanical violation found -> defer to the LLM for the semantic contract.
|
|
36
|
+
*/
|
|
37
|
+
export function schemaFloor(tool, call) {
|
|
38
|
+
const params = Array.isArray(tool?.params) ? tool.params : [];
|
|
39
|
+
const args = call && typeof call === "object" ? call : {};
|
|
40
|
+
const byName = Object.fromEntries(params.map((p) => [p.name, p]));
|
|
41
|
+
const violations = [];
|
|
42
|
+
for (const p of params) { // L2 required
|
|
43
|
+
if (p.required && !(p.name in args)) violations.push(`missing required '${p.name}'`);
|
|
44
|
+
}
|
|
45
|
+
for (const [k, v] of Object.entries(args)) { // L1 type + L3 enum/range
|
|
46
|
+
const p = byName[k];
|
|
47
|
+
if (!p) continue; // unknown arg: not floor-fatal (leave to contract)
|
|
48
|
+
if (p.type && _TYPE_OK[p.type] && !_TYPE_OK[p.type](v)) violations.push(`'${k}' should be ${p.type}`);
|
|
49
|
+
if (Array.isArray(p.enum) && !p.enum.includes(v)) violations.push(`'${k}' not in enum ${JSON.stringify(p.enum)}`);
|
|
50
|
+
if (typeof p.max === "number" && typeof v === "number" && v > p.max) violations.push(`'${k}' exceeds max ${p.max}`);
|
|
51
|
+
}
|
|
52
|
+
return violations.length ? { verdict: "nonconformant", violations } : { verdict: null, violations: [] };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Resolve a constraint OPERAND against the call args and (optional) structured state.
|
|
57
|
+
* - a bare string -> a CALL field reference (the common case)
|
|
58
|
+
* - { value } -> a literal
|
|
59
|
+
* - { field } -> an explicit call-field reference
|
|
60
|
+
* - { state } -> a structured-state key (object state only; prose state can't resolve -> undefined)
|
|
61
|
+
* - { len } -> the length of an array/object call field (cardinality)
|
|
62
|
+
* - { enum } -> a literal array
|
|
63
|
+
* - anything else -> the literal itself (number/array/boolean)
|
|
64
|
+
*/
|
|
65
|
+
export function resolveOperand(op, call, state) {
|
|
66
|
+
if (op && typeof op === "object" && !Array.isArray(op)) {
|
|
67
|
+
if ("value" in op) return op.value;
|
|
68
|
+
if ("state" in op) return state && typeof state === "object" ? state[op.state] : undefined;
|
|
69
|
+
if ("field" in op) return call ? call[op.field] : undefined;
|
|
70
|
+
if ("len" in op) {
|
|
71
|
+
const a = call ? call[op.len] : undefined;
|
|
72
|
+
if (Array.isArray(a)) return a.length;
|
|
73
|
+
if (a && typeof a === "object") return Object.keys(a).length;
|
|
74
|
+
return undefined;
|
|
75
|
+
}
|
|
76
|
+
if ("enum" in op) return op.enum;
|
|
77
|
+
}
|
|
78
|
+
if (typeof op === "string") return call ? call[op] : undefined; // bare = call field ref
|
|
79
|
+
return op; // numeric / array / boolean literal
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const _NUM = (x) =>
|
|
83
|
+
typeof x === "number" ? x : (typeof x === "string" && x.trim() !== "" && !Number.isNaN(Number(x)) ? Number(x) : NaN);
|
|
84
|
+
const _EPS = 1e-9;
|
|
85
|
+
const _EQ = (a, b) => (typeof a === "number" && typeof b === "number" ? Math.abs(a - b) < _EPS : a === b);
|
|
86
|
+
const _CMP = {
|
|
87
|
+
lt: (a, b) => a < b, le: (a, b) => a <= b, gt: (a, b) => a > b, ge: (a, b) => a >= b,
|
|
88
|
+
eq: _EQ, ne: (a, b) => !_EQ(a, b),
|
|
89
|
+
};
|
|
90
|
+
const _opLabel = (op) =>
|
|
91
|
+
op && typeof op === "object" ? ("len" in op ? `len(${op.len})` : "value" in op ? JSON.stringify(op.value) : "state" in op ? `state.${op.state}` : "field" in op ? op.field : JSON.stringify(op)) : String(op);
|
|
92
|
+
|
|
93
|
+
function _collectSum(of, call) {
|
|
94
|
+
if (typeof of === "string") {
|
|
95
|
+
const v = call ? call[of] : undefined;
|
|
96
|
+
const arr = Array.isArray(v) ? v : v && typeof v === "object" ? Object.values(v) : null;
|
|
97
|
+
if (!arr) return null;
|
|
98
|
+
const nums = arr.map(_NUM);
|
|
99
|
+
return nums.some(Number.isNaN) ? null : nums;
|
|
100
|
+
}
|
|
101
|
+
if (Array.isArray(of)) { // a list of scalar field names to add
|
|
102
|
+
const nums = of.map((f) => _NUM(call ? call[f] : undefined));
|
|
103
|
+
return nums.some(Number.isNaN) ? null : nums;
|
|
104
|
+
}
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Deterministic CONTRACT floor (computable L4). Evaluates `tool.constraints` — a small structured DSL
|
|
110
|
+
* for the relational/arithmetic contracts a 14B LLM does UNRELIABLY (summation, additive bounds,
|
|
111
|
+
* cardinality, ordering, mutual-exclusion) but a checker does perfectly. v0.3: the diagnosis from the
|
|
112
|
+
* v0.2 OOD dogfood was that the residual false-conformants are ALL computable — so they belong here,
|
|
113
|
+
* not in the model. A constraint that cannot be evaluated from (call + state) is SKIPPED (deferred to
|
|
114
|
+
* the LLM); the floor only ever PROVES a violation, never asserts conformance.
|
|
115
|
+
* Returns { verdict: "nonconformant"|null, violations: string[] }.
|
|
116
|
+
*/
|
|
117
|
+
export function contractFloor(tool, call, state) {
|
|
118
|
+
const constraints = Array.isArray(tool?.constraints) ? tool.constraints : [];
|
|
119
|
+
const args = call && typeof call === "object" ? call : {};
|
|
120
|
+
const v = [];
|
|
121
|
+
for (const c of constraints) {
|
|
122
|
+
if (!c || typeof c !== "object") continue;
|
|
123
|
+
switch (c.kind) {
|
|
124
|
+
case "cmp": {
|
|
125
|
+
const a = resolveOperand(c.left, args, state), b = resolveOperand(c.right, args, state);
|
|
126
|
+
if (a === undefined || b === undefined) break;
|
|
127
|
+
const fn = _CMP[c.op]; if (!fn) break;
|
|
128
|
+
if (["lt", "le", "gt", "ge"].includes(c.op)) {
|
|
129
|
+
const na = _NUM(a), nb = _NUM(b);
|
|
130
|
+
if (Number.isNaN(na) || Number.isNaN(nb)) break;
|
|
131
|
+
if (!fn(na, nb)) v.push(`${_opLabel(c.left)}(${na}) not ${c.op} ${_opLabel(c.right)}(${nb})`);
|
|
132
|
+
} else if (!fn(a, b)) {
|
|
133
|
+
v.push(`${_opLabel(c.left)}(${JSON.stringify(a)}) not ${c.op} ${_opLabel(c.right)}(${JSON.stringify(b)})`);
|
|
134
|
+
}
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
case "sum": {
|
|
138
|
+
const vals = _collectSum(c.of, args);
|
|
139
|
+
const cap = _NUM(resolveOperand(c.vs, args, state));
|
|
140
|
+
if (vals === null || Number.isNaN(cap)) break;
|
|
141
|
+
const s = vals.reduce((x, y) => x + y, 0);
|
|
142
|
+
const fn = _CMP[c.op] || _CMP.eq;
|
|
143
|
+
if (!fn(s, cap)) v.push(`sum(${_opLabel(c.of)})=${s} not ${c.op} ${cap}`);
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
case "present": { // exactly `count` (default 1) of the listed optional fields present
|
|
147
|
+
const want = c.count == null ? 1 : c.count;
|
|
148
|
+
const cnt = (c.fields || []).filter((f) => f in args).length;
|
|
149
|
+
if (cnt !== want) v.push(`expected exactly ${want} of [${(c.fields || []).join(", ")}] present, got ${cnt}`);
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
case "requires": { // conditional co-requirement
|
|
153
|
+
const w = c.when || {};
|
|
154
|
+
const val = args[w.field];
|
|
155
|
+
const hit = "equals" in w ? val === w.equals : Array.isArray(w.in) ? w.in.includes(val) : w.field in args;
|
|
156
|
+
if (hit) {
|
|
157
|
+
const missing = (c.require || []).filter((f) => !(f in args));
|
|
158
|
+
if (missing.length) v.push(`when ${w.field}=${JSON.stringify(val)} requires [${missing.join(", ")}] present`);
|
|
159
|
+
}
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
case "distinct": {
|
|
163
|
+
const vals = (c.fields || []).map((f) => args[f]);
|
|
164
|
+
if (vals.some((x) => x === undefined)) break;
|
|
165
|
+
const seen = new Set();
|
|
166
|
+
for (const x of vals) {
|
|
167
|
+
const k = x && typeof x === "object" ? JSON.stringify(x) : String(x);
|
|
168
|
+
if (seen.has(k)) { v.push(`fields [${(c.fields || []).join(", ")}] must be distinct (duplicate ${k})`); break; }
|
|
169
|
+
seen.add(k);
|
|
170
|
+
}
|
|
171
|
+
break;
|
|
172
|
+
}
|
|
173
|
+
case "member": {
|
|
174
|
+
const val = args[c.field], set = resolveOperand(c.in, args, state);
|
|
175
|
+
if (val === undefined || !Array.isArray(set)) break;
|
|
176
|
+
if (!set.includes(val)) v.push(`'${c.field}'=${JSON.stringify(val)} not in ${JSON.stringify(set)}`);
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
case "char_at": {
|
|
180
|
+
// `offset` (default 0) adjusts for index bases — e.g. offset:-1 for a 1-based `position`.
|
|
181
|
+
const s = args[c.string], want = resolveOperand(c.equals, args, state);
|
|
182
|
+
const idx = _NUM(resolveOperand(c.index, args, state)) + (typeof c.offset === "number" ? c.offset : 0);
|
|
183
|
+
if (typeof s !== "string" || Number.isNaN(idx) || want === undefined) break;
|
|
184
|
+
const got = s.substr(idx, String(want).length);
|
|
185
|
+
if (got !== want) v.push(`${c.string}[${idx}..]='${got}' != expected '${want}'`);
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
default:
|
|
189
|
+
break; // unknown kind: ignore (forward-compatible), never assert conformance
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return v.length ? { verdict: "nonconformant", violations: v } : { verdict: null, violations: [] };
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/** Read the wiring flag. Default OFF — flipping it on is a Mike-gated release decision. */
|
|
196
|
+
export function conformanceConsultEnabled() {
|
|
197
|
+
const v = process.env.ROLEOS_CONFORMANCE_CONSULT;
|
|
198
|
+
return v === "1" || v === "true";
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/** Serialize a tool (+intent, +state) into the EVIDENCE shape the watcher was TRAINED on. */
|
|
202
|
+
export function evidenceFor(tool, intent, state) {
|
|
203
|
+
const schema = {};
|
|
204
|
+
for (const p of Array.isArray(tool?.params) ? tool.params : []) {
|
|
205
|
+
const { name, ...rest } = p;
|
|
206
|
+
schema[name] = rest;
|
|
207
|
+
}
|
|
208
|
+
let ev = `TOOL: ${tool?.name}\nCONTRACT: ${tool?.contract}\nPARAMS (JSON schema): ${JSON.stringify(schema)}`;
|
|
209
|
+
const st = state ?? tool?.state;
|
|
210
|
+
if (st) ev += `\nSTATE: ${typeof st === "string" ? st : JSON.stringify(st)}`;
|
|
211
|
+
if (intent) ev += `\nINTENT: ${intent}`;
|
|
212
|
+
return ev;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/** Serialize the proposed CALL into the trained CLAIM shape. */
|
|
216
|
+
export function claimFor(tool, call) {
|
|
217
|
+
return `CALL: ${tool?.name}(${JSON.stringify(call ?? {})})`;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Enrich a tool definition with its constraints (+ structured state) from a rollout CATALOG keyed by
|
|
222
|
+
* tool name (tools/conformance-dataset/tool-constraints.json). The production gate calls this before
|
|
223
|
+
* consultConformance so the deterministic contract floor applies to any catalogued tool. A tool's own
|
|
224
|
+
* inline `constraints` win if present; an inline `state` is not overwritten.
|
|
225
|
+
*/
|
|
226
|
+
export function withToolConstraints(tool, catalog) {
|
|
227
|
+
const entry = tool && catalog ? catalog[tool.name] : null;
|
|
228
|
+
if (!entry) return tool;
|
|
229
|
+
return {
|
|
230
|
+
...tool,
|
|
231
|
+
constraints: tool.constraints && tool.constraints.length ? tool.constraints : entry.constraints || [],
|
|
232
|
+
...(entry.state_struct && tool.state == null ? { state: entry.state_struct } : {}),
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/** Verdict comparator for shadow probes — normalizes string|{verdict} and compares the label. */
|
|
237
|
+
export function conformanceAgree(s, c) {
|
|
238
|
+
const norm = (v) => (v && typeof v === "object" ? v.verdict : v);
|
|
239
|
+
return norm(s) === norm(c);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Check one tool-call's conformance. Deterministic floor first, then the LLM specialist via the
|
|
244
|
+
* fail-open gate. Advisory + opt-in + NEVER throws.
|
|
245
|
+
*
|
|
246
|
+
* @param {object} args
|
|
247
|
+
* @param {object} args.tool { name, contract, params:[{name,type,required,enum?,max?}] }
|
|
248
|
+
* @param {object} args.call the proposed arguments
|
|
249
|
+
* @param {string} [args.intent] the stated goal (enables the L5 intent check)
|
|
250
|
+
* @param {object} [opts]
|
|
251
|
+
* @param {boolean} [opts.enabled] default: conformanceConsultEnabled()
|
|
252
|
+
* @param {object} [opts.paths] { registry, state, events } for dispatchSpecialist
|
|
253
|
+
* @param {string} [opts.nowIso]
|
|
254
|
+
* @param {string} [opts.traceId]
|
|
255
|
+
* @param {Function}[opts.httpFn] injectable HTTP for the specialist call (tests)
|
|
256
|
+
* @param {object} [opts.classifier]
|
|
257
|
+
* @param {object} [opts.shadow]
|
|
258
|
+
* @returns {Promise<{verdict: string|null, source: string, receipt?: object, floor?: object}>}
|
|
259
|
+
*/
|
|
260
|
+
export async function consultConformance({ tool, call, intent, state } = {}, opts = {}) {
|
|
261
|
+
const { enabled = conformanceConsultEnabled(), paths, nowIso, traceId, httpFn, classifier, shadow } = opts;
|
|
262
|
+
if (!enabled) return { verdict: null, source: "disabled" };
|
|
263
|
+
|
|
264
|
+
const ts = nowIso || new Date().toISOString();
|
|
265
|
+
const st = state ?? tool?.state;
|
|
266
|
+
|
|
267
|
+
// 1) Deterministic floor — a PROVABLE violation never needs the LLM. Two rungs:
|
|
268
|
+
// 1a schema (L1 type / L2 required / L3 enum-range), 1b computable contract (relational/arithmetic
|
|
269
|
+
// L4: ordering, sum-to-cap, additive bounds, cardinality, mutual-exclusion, ...). Either proving a
|
|
270
|
+
// violation short-circuits to `nonconformant`; both only ever PROVE, never assert conformance.
|
|
271
|
+
let floor;
|
|
272
|
+
try {
|
|
273
|
+
const schema = schemaFloor(tool, call);
|
|
274
|
+
const contract = contractFloor(tool, call, st);
|
|
275
|
+
const violations = [...schema.violations, ...contract.violations];
|
|
276
|
+
floor = { verdict: violations.length ? "nonconformant" : null, violations,
|
|
277
|
+
schema: schema.violations, contract: contract.violations };
|
|
278
|
+
} catch (err) {
|
|
279
|
+
floor = { verdict: null, violations: [], error: String(err && err.message ? err.message : err) };
|
|
280
|
+
}
|
|
281
|
+
if (floor.verdict === "nonconformant") {
|
|
282
|
+
const reason = floor.contract && floor.contract.length && !(floor.schema && floor.schema.length)
|
|
283
|
+
? "contract_violation" : "schema_violation";
|
|
284
|
+
return {
|
|
285
|
+
verdict: "nonconformant",
|
|
286
|
+
source: "floor",
|
|
287
|
+
floor,
|
|
288
|
+
receipt: {
|
|
289
|
+
schema: "roleos-specialist-receipt/v1", role: CONFORMANCE_ROLE, ts, source: "floor",
|
|
290
|
+
decision: { route: "floor", reason, detail: floor.violations.join("; ") },
|
|
291
|
+
},
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// 2) Floor passed -> consult the LLM specialist (residual semantic-contract + L5 intent).
|
|
296
|
+
try {
|
|
297
|
+
const { result, receipt } = await dispatchSpecialist({
|
|
298
|
+
role: CONFORMANCE_ROLE,
|
|
299
|
+
input: { tool, call, intent, state: st, evidence: evidenceFor(tool, intent, st), claim: claimFor(tool, call) },
|
|
300
|
+
// SAFE fail-open: never "conformant" — an unverifiable semantic check escalates, not waves through.
|
|
301
|
+
claudeFn: async () => ({ verdict: "abstain", source: "floor-pass-llm-unavailable" }),
|
|
302
|
+
agreeFn: conformanceAgree,
|
|
303
|
+
traceId: traceId || `conformance-${ts}`,
|
|
304
|
+
nowIso: ts,
|
|
305
|
+
...(paths ? { paths } : {}),
|
|
306
|
+
...(httpFn ? { httpFn } : {}),
|
|
307
|
+
...(classifier ? { classifier } : {}),
|
|
308
|
+
...(shadow ? { shadow } : {}),
|
|
309
|
+
});
|
|
310
|
+
const verdict = result && typeof result === "object" && "verdict" in result ? result.verdict : result;
|
|
311
|
+
return { verdict: verdict ?? "abstain", source: receipt.source, receipt, floor };
|
|
312
|
+
} catch (err) {
|
|
313
|
+
// A conformance consult must never break the caller — escalate (abstain), never wave through.
|
|
314
|
+
return {
|
|
315
|
+
verdict: "abstain", source: "consult-error", floor,
|
|
316
|
+
receipt: {
|
|
317
|
+
schema: "roleos-specialist-receipt/v1", role: CONFORMANCE_ROLE, ts, source: "consult-error",
|
|
318
|
+
error: String(err && err.message ? err.message : err),
|
|
319
|
+
},
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
}
|