open-multi-agent-kit 0.78.2 → 0.78.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -2
- package/MATURITY.md +2 -2
- package/README.md +4 -4
- package/dist/benchmark/contracts.d.ts +116 -0
- package/dist/benchmark/contracts.js +6 -0
- package/dist/benchmark/fixtures.d.ts +11 -0
- package/dist/benchmark/fixtures.js +124 -0
- package/dist/benchmark/harness.d.ts +13 -0
- package/dist/benchmark/harness.js +191 -0
- package/dist/benchmark/shadow-mode.d.ts +17 -0
- package/dist/benchmark/shadow-mode.js +95 -0
- package/dist/cli/release-promotion-gate.js +14 -4
- package/dist/commands/merge.js +102 -56
- package/dist/contracts/provider-health.d.ts +37 -0
- package/dist/contracts/provider-health.js +49 -1
- package/dist/evidence/evidence-trust-score.d.ts +101 -0
- package/dist/evidence/evidence-trust-score.js +408 -0
- package/dist/evidence/index.d.ts +2 -0
- package/dist/evidence/index.js +1 -0
- package/dist/native/linux-x64/omk-safety +0 -0
- package/dist/orchestration/merge-arbiter.d.ts +91 -0
- package/dist/orchestration/merge-arbiter.js +376 -0
- package/dist/providers/health.d.ts +3 -0
- package/dist/providers/health.js +46 -0
- package/dist/providers/index.d.ts +1 -0
- package/dist/providers/index.js +1 -0
- package/dist/providers/provider-health.d.ts +8 -1
- package/dist/providers/provider-health.js +39 -0
- package/dist/providers/provider-task-runner.js +31 -0
- package/dist/providers/provider.d.ts +2 -0
- package/dist/providers/router.js +80 -3
- package/dist/providers/types.d.ts +4 -0
- package/dist/runtime/contracts/weakness-remediation.d.ts +6 -0
- package/dist/runtime/provider-maturity-gate.d.ts +2 -0
- package/dist/runtime/provider-maturity-gate.js +26 -0
- package/dist/runtime/tool-dispatch-contracts.d.ts +24 -3
- package/dist/runtime/tool-dispatch-contracts.js +42 -2
- package/dist/runtime/weakness-remediation-index.d.ts +1 -1
- package/dist/runtime/weakness-remediation-index.js +1 -1
- package/dist/safety/enforcement-engine.d.ts +89 -0
- package/dist/safety/enforcement-engine.js +279 -0
- package/dist/safety/tool-authority-gate.d.ts +40 -0
- package/dist/safety/tool-authority-gate.js +92 -0
- package/dist/schema/evidence.schema.d.ts +2 -2
- package/dist/schema/proof-bundle.schema.d.ts +2 -2
- package/docs/benchmark-design.md +122 -0
- package/docs/getting-started.md +1 -1
- package/docs/provider-maturity.md +1 -1
- package/docs/versioning.md +3 -3
- package/package.json +7 -3
package/dist/providers/router.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { PROVIDER_CAPABILITY_ORDINAL } from "../contracts/provider-health.js";
|
|
1
2
|
import { DEFAULT_AUTHORITY_PROVIDER, resolveFallbackProvider } from "./types.js";
|
|
2
3
|
const DEEPSEEK_READ_ONLY_ROLES = new Set([
|
|
3
4
|
"explorer",
|
|
@@ -41,7 +42,17 @@ export function routeProvider(input) {
|
|
|
41
42
|
const role = input.role.toLowerCase();
|
|
42
43
|
const seed = `${input.nodeId ?? ""}:${role}:${input.taskType}`;
|
|
43
44
|
const authorityProvider = resolveFallbackProvider(input.authorityProvider);
|
|
44
|
-
const
|
|
45
|
+
const authorityVector = input.providerHealthVectors?.[authorityProvider];
|
|
46
|
+
const authorityProviderAllowed = providerVectorAllowsAuthority(authorityVector);
|
|
47
|
+
const authorityProviderFallbackOnly = authorityVector
|
|
48
|
+
? !providerVectorQuotaOk(authorityVector) && providerVectorAllowsDirect(authorityVector)
|
|
49
|
+
: false;
|
|
50
|
+
const authorityDecision = (reason, confidence, deepseek, extra = {}) => {
|
|
51
|
+
if (!authorityProviderAllowed || authorityProviderFallbackOnly) {
|
|
52
|
+
return authorityProviderDecision(authorityProvider, `${providerLabel(authorityProvider)} ${authorityProviderFallbackOnly ? "quota exhausted" : "auth insufficient"}; using fallback-only route`, Math.max(0.5, confidence - 0.15), deepseek, extra);
|
|
53
|
+
}
|
|
54
|
+
return authorityProviderDecision(authorityProvider, reason, confidence, deepseek, extra);
|
|
55
|
+
};
|
|
45
56
|
const directDeepSeekAllowed = canUseDirectDeepSeek(role, input);
|
|
46
57
|
const dedicatedDeepSeekAgent = isDedicatedDeepSeekAgent(input);
|
|
47
58
|
const withRouteEnsemble = (decision, winner) => ({
|
|
@@ -60,6 +71,11 @@ export function routeProvider(input) {
|
|
|
60
71
|
if (role === "orchestrator" || role === "merger" || role === "integrator") {
|
|
61
72
|
return withRouteEnsemble(authorityDecision("Core orchestration and merge authority stay with the configured authority provider", 1), "safety-gate");
|
|
62
73
|
}
|
|
74
|
+
// Hard constraint: if authority provider is blocked by vector, skip to safety gate
|
|
75
|
+
// unless an explicit, fully-available external provider is requested.
|
|
76
|
+
if (!authorityProviderAllowed && !requestedExternalProvider(input)) {
|
|
77
|
+
return withRouteEnsemble(authorityProviderDecision(authorityProvider, `${providerLabel(authorityProvider)} auth/quota insufficient for authority lanes; safety gate active`, 0.5), "safety-gate");
|
|
78
|
+
}
|
|
63
79
|
const externalProvider = requestedExternalProvider(input);
|
|
64
80
|
if (externalProvider) {
|
|
65
81
|
if (!isProviderAvailable(input, externalProvider)) {
|
|
@@ -69,6 +85,12 @@ export function routeProvider(input) {
|
|
|
69
85
|
}
|
|
70
86
|
// If the external provider is the authority provider, route to it as authority
|
|
71
87
|
if (externalProvider === authorityProvider) {
|
|
88
|
+
// Hard constraint: auth != ok → exclude authority lanes
|
|
89
|
+
if (!authorityProviderAllowed) {
|
|
90
|
+
return withRouteEnsemble(authorityDecision(`${providerLabel(externalProvider)} auth/quota insufficient for authority; using configured fallback`, 0.86, undefined, {
|
|
91
|
+
providerModel: genericProviderModelRef(input, externalProvider, "veto"),
|
|
92
|
+
}), "safety-gate");
|
|
93
|
+
}
|
|
72
94
|
return withRouteEnsemble(authorityDecision(`${providerLabel(externalProvider)} is the configured authority provider`, 0.9, undefined, {
|
|
73
95
|
providerModel: genericProviderModelRef(input, externalProvider, "authority"),
|
|
74
96
|
}), "authority-provider");
|
|
@@ -178,8 +200,41 @@ function requestedExternalProvider(input) {
|
|
|
178
200
|
function isGenericExternalProvider(value) {
|
|
179
201
|
return typeof value === "string" && value !== "auto" && value !== "kimi" && value !== "deepseek";
|
|
180
202
|
}
|
|
203
|
+
function providerVectorQuotaOk(vector) {
|
|
204
|
+
if (!vector)
|
|
205
|
+
return true;
|
|
206
|
+
return PROVIDER_CAPABILITY_ORDINAL[vector.quota] >= PROVIDER_CAPABILITY_ORDINAL["quota_available"];
|
|
207
|
+
}
|
|
208
|
+
function providerVectorAllowsAuthority(vector) {
|
|
209
|
+
if (!vector)
|
|
210
|
+
return true;
|
|
211
|
+
return (PROVIDER_CAPABILITY_ORDINAL[vector.auth] >= PROVIDER_CAPABILITY_ORDINAL["auth_valid"] &&
|
|
212
|
+
PROVIDER_CAPABILITY_ORDINAL[vector.model] >= PROVIDER_CAPABILITY_ORDINAL["model_available"] &&
|
|
213
|
+
PROVIDER_CAPABILITY_ORDINAL[vector.quota] >= PROVIDER_CAPABILITY_ORDINAL["quota_available"]);
|
|
214
|
+
}
|
|
215
|
+
function providerVectorAllowsDirect(vector) {
|
|
216
|
+
if (!vector)
|
|
217
|
+
return true;
|
|
218
|
+
return (PROVIDER_CAPABILITY_ORDINAL[vector.auth] >= PROVIDER_CAPABILITY_ORDINAL["auth_valid"] &&
|
|
219
|
+
PROVIDER_CAPABILITY_ORDINAL[vector.model] >= PROVIDER_CAPABILITY_ORDINAL["model_available"] &&
|
|
220
|
+
PROVIDER_CAPABILITY_ORDINAL[vector.quota] >= PROVIDER_CAPABILITY_ORDINAL["quota_available"]);
|
|
221
|
+
}
|
|
222
|
+
function providerVectorAllowsAdvisory(vector) {
|
|
223
|
+
if (!vector)
|
|
224
|
+
return true;
|
|
225
|
+
return PROVIDER_CAPABILITY_ORDINAL[vector.auth] >= PROVIDER_CAPABILITY_ORDINAL["auth_present"];
|
|
226
|
+
}
|
|
181
227
|
function isProviderAvailable(input, provider) {
|
|
182
228
|
const explicit = input.providerAvailability?.[provider];
|
|
229
|
+
const vector = input.providerHealthVectors?.[provider];
|
|
230
|
+
if (vector) {
|
|
231
|
+
// Hard constraint: quota exhausted → fallback only (not fully available)
|
|
232
|
+
if (!providerVectorQuotaOk(vector))
|
|
233
|
+
return false;
|
|
234
|
+
// Hard constraint: auth not valid → not available for any lane
|
|
235
|
+
if (PROVIDER_CAPABILITY_ORDINAL[vector.auth] < PROVIDER_CAPABILITY_ORDINAL["auth_valid"])
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
183
238
|
return explicit === undefined ? true : explicit;
|
|
184
239
|
}
|
|
185
240
|
function canUseGenericDirectProvider(role, input) {
|
|
@@ -187,7 +242,15 @@ function canUseGenericDirectProvider(role, input) {
|
|
|
187
242
|
return false;
|
|
188
243
|
if (input.needsMcp || input.needsToolCalling)
|
|
189
244
|
return false;
|
|
190
|
-
|
|
245
|
+
if (!input.readOnly && !GENERIC_EXTERNAL_READ_ONLY_ROLES.has(role))
|
|
246
|
+
return false;
|
|
247
|
+
const externalProvider = requestedExternalProvider(input);
|
|
248
|
+
if (externalProvider) {
|
|
249
|
+
const vector = input.providerHealthVectors?.[externalProvider];
|
|
250
|
+
if (vector && !providerVectorAllowsDirect(vector))
|
|
251
|
+
return false;
|
|
252
|
+
}
|
|
253
|
+
return true;
|
|
191
254
|
}
|
|
192
255
|
function canUseGenericAdvisoryProvider(role, input) {
|
|
193
256
|
if (!GENERIC_EXTERNAL_ADVISORY_FILE_ROLES.has(role))
|
|
@@ -196,6 +259,12 @@ function canUseGenericAdvisoryProvider(role, input) {
|
|
|
196
259
|
return false;
|
|
197
260
|
if (input.needsMcp || input.needsToolCalling)
|
|
198
261
|
return false;
|
|
262
|
+
const externalProvider = requestedExternalProvider(input);
|
|
263
|
+
if (externalProvider) {
|
|
264
|
+
const vector = input.providerHealthVectors?.[externalProvider];
|
|
265
|
+
if (vector && !providerVectorAllowsAdvisory(vector))
|
|
266
|
+
return false;
|
|
267
|
+
}
|
|
199
268
|
return true;
|
|
200
269
|
}
|
|
201
270
|
function canUseDeepSeekProAdvisory(role, input) {
|
|
@@ -205,12 +274,20 @@ function canUseDeepSeekProAdvisory(role, input) {
|
|
|
205
274
|
return false;
|
|
206
275
|
if (input.needsMcp || input.needsToolCalling)
|
|
207
276
|
return false;
|
|
277
|
+
const vector = input.providerHealthVectors?.["deepseek"];
|
|
278
|
+
if (vector && !providerVectorAllowsAdvisory(vector))
|
|
279
|
+
return false;
|
|
208
280
|
return true;
|
|
209
281
|
}
|
|
210
282
|
function canUseDirectDeepSeek(role, input) {
|
|
211
283
|
if (input.risk !== "read")
|
|
212
284
|
return false;
|
|
213
|
-
|
|
285
|
+
if (!(input.readOnly === true || DEEPSEEK_READ_ONLY_ROLES.has(role)))
|
|
286
|
+
return false;
|
|
287
|
+
const vector = input.providerHealthVectors?.["deepseek"];
|
|
288
|
+
if (vector && !providerVectorAllowsDirect(vector))
|
|
289
|
+
return false;
|
|
290
|
+
return true;
|
|
214
291
|
}
|
|
215
292
|
function buildProviderRouteEnsemble(options) {
|
|
216
293
|
const { input, role, decision, winner, directDeepSeekAllowed } = options;
|
|
@@ -71,6 +71,8 @@ export interface ProviderRouteInput {
|
|
|
71
71
|
authorityProvider?: ProviderId;
|
|
72
72
|
preferredModel?: string;
|
|
73
73
|
preferredDeepSeekTier?: DeepSeekModelTier;
|
|
74
|
+
/** v2 capability vectors for hard-constraint filtering. */
|
|
75
|
+
providerHealthVectors?: Partial<Record<ProviderId, import("../contracts/provider-health.js").ProviderHealthVector>>;
|
|
74
76
|
}
|
|
75
77
|
export interface ProviderRouteDecision {
|
|
76
78
|
provider: ProviderId;
|
|
@@ -90,6 +92,8 @@ export interface ProviderAvailability {
|
|
|
90
92
|
checkedAt: number;
|
|
91
93
|
reason?: string;
|
|
92
94
|
disableForRun: boolean;
|
|
95
|
+
/** v2 capability vector (optional; present when profiler v2 is active). */
|
|
96
|
+
healthVector?: import("../contracts/provider-health.js").ProviderHealthVector;
|
|
93
97
|
}
|
|
94
98
|
export interface AgentProvider {
|
|
95
99
|
id: ProviderId;
|
|
@@ -55,6 +55,12 @@ export interface ReleasePromotionInputs {
|
|
|
55
55
|
readonly maturity?: number;
|
|
56
56
|
/** Algorithm 8 — minimal verified demo run gate. Hard block when false/undefined. */
|
|
57
57
|
readonly demoRun?: boolean;
|
|
58
|
+
/** Stable claim gate — live/recorded benchmark must pass before stable verdict. */
|
|
59
|
+
readonly liveBenchmarkPass?: boolean;
|
|
60
|
+
/** Stable claim gate — must be exactly 0 before stable verdict. */
|
|
61
|
+
readonly sandboxViolationCount?: number;
|
|
62
|
+
/** Stable claim gate — package/lock/docs/proof/bin invariant. */
|
|
63
|
+
readonly versionConsistency?: number;
|
|
58
64
|
}
|
|
59
65
|
export interface ReleasePromotionResult {
|
|
60
66
|
readonly score: number;
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
* verdict.
|
|
7
7
|
*/
|
|
8
8
|
import type { AdapterTestKind, AdapterTestResult, ProviderAuthorityClass } from "./contracts/evidence.js";
|
|
9
|
+
import type { ProviderHealthVector } from "../contracts/provider-health.js";
|
|
9
10
|
/** Maturity evaluation result for a single provider. */
|
|
10
11
|
export interface MaturityResult {
|
|
11
12
|
/** Computed maturity score M_p ∈ [0, 1]. */
|
|
@@ -39,3 +40,4 @@ export interface ProviderMaturityTable {
|
|
|
39
40
|
}
|
|
40
41
|
export declare function createProviderMaturityTable(): ProviderMaturityTable;
|
|
41
42
|
export declare function createProviderMaturityGate(): ProviderMaturityGate;
|
|
43
|
+
export declare function evaluateProviderFromVector(gate: ProviderMaturityGate, vector: ProviderHealthVector): MaturityResult;
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
* produces a maturity score M_p, an authority class, and a pass/fail
|
|
6
6
|
* verdict.
|
|
7
7
|
*/
|
|
8
|
+
import { PROVIDER_CAPABILITY_ORDINAL } from "../contracts/provider-health.js";
|
|
8
9
|
// ── Constants ───────────────────────────────────────────────────
|
|
9
10
|
const WEIGHT_AUTH = 0.10;
|
|
10
11
|
const WEIGHT_READ = 0.10;
|
|
@@ -72,6 +73,28 @@ export function createProviderMaturityTable() {
|
|
|
72
73
|
},
|
|
73
74
|
};
|
|
74
75
|
}
|
|
76
|
+
function vectorToAdapterResults(vector) {
|
|
77
|
+
const authOrdinal = PROVIDER_CAPABILITY_ORDINAL[vector.auth];
|
|
78
|
+
const binaryOrdinal = PROVIDER_CAPABILITY_ORDINAL[vector.binary];
|
|
79
|
+
const authScore = authOrdinal >= PROVIDER_CAPABILITY_ORDINAL["auth_valid"] ? 1.0 : authOrdinal / PROVIDER_CAPABILITY_ORDINAL["auth_valid"];
|
|
80
|
+
const readScore = vector.supportsRead ? 1.0 : 0.0;
|
|
81
|
+
const writeScore = vector.supportsWrite ? 1.0 : 0.0;
|
|
82
|
+
const shellScore = vector.supportsShell ? 1.0 : 0.0;
|
|
83
|
+
const mcpScore = binaryOrdinal >= PROVIDER_CAPABILITY_ORDINAL["tool_contract_verified"] ? 1.0 : binaryOrdinal / PROVIDER_CAPABILITY_ORDINAL["tool_contract_verified"];
|
|
84
|
+
const mergeScore = vector.evidencePassRate7d;
|
|
85
|
+
const evidenceScore = vector.evidencePassRate7d;
|
|
86
|
+
const fallbackScore = 1.0 - vector.failureEwma;
|
|
87
|
+
return [
|
|
88
|
+
{ kind: "auth", passed: authScore >= 0.5, score: authScore },
|
|
89
|
+
{ kind: "read", passed: readScore >= 0.5, score: readScore },
|
|
90
|
+
{ kind: "write", passed: writeScore >= 0.5, score: writeScore },
|
|
91
|
+
{ kind: "shell", passed: shellScore >= 0.5, score: shellScore },
|
|
92
|
+
{ kind: "mcp", passed: mcpScore >= 0.5, score: mcpScore },
|
|
93
|
+
{ kind: "merge", passed: mergeScore >= 0.5, score: mergeScore },
|
|
94
|
+
{ kind: "evidence", passed: evidenceScore >= 0.5, score: evidenceScore },
|
|
95
|
+
{ kind: "fallback", passed: fallbackScore >= 0.5, score: fallbackScore },
|
|
96
|
+
];
|
|
97
|
+
}
|
|
75
98
|
export function createProviderMaturityGate() {
|
|
76
99
|
return {
|
|
77
100
|
evaluate(results) {
|
|
@@ -99,3 +122,6 @@ export function createProviderMaturityGate() {
|
|
|
99
122
|
},
|
|
100
123
|
};
|
|
101
124
|
}
|
|
125
|
+
export function evaluateProviderFromVector(gate, vector) {
|
|
126
|
+
return gate.evaluate(vectorToAdapterResults(vector));
|
|
127
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { OmkToolCall, OmkToolDefinition } from "./tool-registry-contract.js";
|
|
2
|
-
import { type ToolAuthorityDecision, type ToolOp } from "../safety/tool-authority-gate.js";
|
|
2
|
+
import { type ToolAuthorityDecision, type ToolOp, type ToolOpV2 } from "../safety/tool-authority-gate.js";
|
|
3
3
|
import type { ProviderAuthorityLevel } from "../contracts/provider-health.js";
|
|
4
|
+
import type { EnforcementProof } from "../safety/enforcement-engine.js";
|
|
4
5
|
export interface ToolDispatchResult<R = unknown> {
|
|
5
6
|
readonly call: OmkToolCall;
|
|
6
7
|
readonly status: "fulfilled" | "rejected";
|
|
@@ -15,13 +16,15 @@ export type ToolAuthorityMode = "shadow" | "enforce";
|
|
|
15
16
|
*/
|
|
16
17
|
export interface ToolAuthorityDecisionRecord {
|
|
17
18
|
readonly toolName: string;
|
|
18
|
-
readonly op: ToolOp;
|
|
19
|
+
readonly op: ToolOp | ToolOpV2;
|
|
19
20
|
readonly decision: ToolAuthorityDecision;
|
|
20
21
|
readonly mode: ToolAuthorityMode;
|
|
21
22
|
/** True only when the verdict actually rejected the call (enforce + block). */
|
|
22
23
|
readonly enforced: boolean;
|
|
23
24
|
/** Redacted, human-readable reason. Never includes args or secret values. */
|
|
24
25
|
readonly reason: string;
|
|
26
|
+
/** v2 enforcement proof hash when available. */
|
|
27
|
+
readonly policyHash?: string;
|
|
25
28
|
}
|
|
26
29
|
/**
|
|
27
30
|
* Authority wiring for one dispatch turn. All inputs are non-secret enum/bool
|
|
@@ -42,6 +45,12 @@ export interface ToolAuthorityWiring {
|
|
|
42
45
|
readonly enforce?: boolean;
|
|
43
46
|
/** Optional sink for computed verdicts (invoked in both shadow and enforce). */
|
|
44
47
|
readonly onDecision?: (record: ToolAuthorityDecisionRecord) => void;
|
|
48
|
+
/**
|
|
49
|
+
* v2 enforcement proof from the adapter / runtime.
|
|
50
|
+
* When present, the gate uses policy-dependent capability resolution.
|
|
51
|
+
* Runtimes without a valid proof cannot enter authority lanes.
|
|
52
|
+
*/
|
|
53
|
+
readonly enforcementProof?: EnforcementProof;
|
|
45
54
|
}
|
|
46
55
|
/**
|
|
47
56
|
* Resolve the global enforcement opt-in from the environment. Default OFF means
|
|
@@ -52,8 +61,9 @@ export declare function resolveToolAuthorityEnforcement(env?: Record<string, str
|
|
|
52
61
|
/** Error used to reject a tool call rejected by the authority gate (enforce mode). */
|
|
53
62
|
export declare class ToolAuthorityBlockedError extends Error {
|
|
54
63
|
readonly toolName: string;
|
|
55
|
-
readonly op: ToolOp;
|
|
64
|
+
readonly op: ToolOp | ToolOpV2;
|
|
56
65
|
readonly decision: ToolAuthorityDecision;
|
|
66
|
+
readonly policyHash?: string;
|
|
57
67
|
constructor(record: ToolAuthorityDecisionRecord);
|
|
58
68
|
}
|
|
59
69
|
/** Compute the gate verdict for a single call. Pure (no IO, no env reads). */
|
|
@@ -61,4 +71,15 @@ export declare function evaluateToolAuthority(toolName: string, wiring: ToolAuth
|
|
|
61
71
|
readonly record: ToolAuthorityDecisionRecord;
|
|
62
72
|
readonly blocked: boolean;
|
|
63
73
|
};
|
|
74
|
+
/**
|
|
75
|
+
* Compute the gate verdict for a single call using v2 enforcement proof.
|
|
76
|
+
* Pure (no IO, no env reads).
|
|
77
|
+
*
|
|
78
|
+
* If `enforcementProof` is present, the gate uses policy-dependent capability
|
|
79
|
+
* resolution. Runtimes without a valid proof cannot enter authority lanes.
|
|
80
|
+
*/
|
|
81
|
+
export declare function evaluateToolAuthorityV2(toolName: string, wiring: ToolAuthorityWiring): {
|
|
82
|
+
readonly record: ToolAuthorityDecisionRecord;
|
|
83
|
+
readonly blocked: boolean;
|
|
84
|
+
};
|
|
64
85
|
export declare function dispatchToolCallsByContract<A, R>(calls: readonly OmkToolCall<A>[], registry: ReadonlyMap<string, OmkToolDefinition<A, R>>, dispatchOne: (call: OmkToolCall<A>) => Promise<R>, authority?: ToolAuthorityWiring): Promise<ToolDispatchResult<R>[]>;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createToolExecutionBatches } from "./tool-registry-contract.js";
|
|
2
|
-
import { decideToolAuthority, mapToolNameToOp, } from "../safety/tool-authority-gate.js";
|
|
2
|
+
import { decideToolAuthority, decideToolAuthorityV2, mapToolNameToOp, } from "../safety/tool-authority-gate.js";
|
|
3
3
|
const ENFORCE_PATTERN = /^(1|true|yes|on)$/i;
|
|
4
4
|
/**
|
|
5
5
|
* Resolve the global enforcement opt-in from the environment. Default OFF means
|
|
@@ -20,12 +20,14 @@ export class ToolAuthorityBlockedError extends Error {
|
|
|
20
20
|
toolName;
|
|
21
21
|
op;
|
|
22
22
|
decision;
|
|
23
|
+
policyHash;
|
|
23
24
|
constructor(record) {
|
|
24
25
|
super(record.reason);
|
|
25
26
|
this.name = "ToolAuthorityBlockedError";
|
|
26
27
|
this.toolName = record.toolName;
|
|
27
28
|
this.op = record.op;
|
|
28
29
|
this.decision = record.decision;
|
|
30
|
+
this.policyHash = record.policyHash;
|
|
29
31
|
}
|
|
30
32
|
}
|
|
31
33
|
/** Compute the gate verdict for a single call. Pure (no IO, no env reads). */
|
|
@@ -57,6 +59,44 @@ export function evaluateToolAuthority(toolName, wiring) {
|
|
|
57
59
|
blocked,
|
|
58
60
|
};
|
|
59
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Compute the gate verdict for a single call using v2 enforcement proof.
|
|
64
|
+
* Pure (no IO, no env reads).
|
|
65
|
+
*
|
|
66
|
+
* If `enforcementProof` is present, the gate uses policy-dependent capability
|
|
67
|
+
* resolution. Runtimes without a valid proof cannot enter authority lanes.
|
|
68
|
+
*/
|
|
69
|
+
export function evaluateToolAuthorityV2(toolName, wiring) {
|
|
70
|
+
const op = mapToolNameToOp(toolName);
|
|
71
|
+
if (wiring.enforcementProof) {
|
|
72
|
+
const decision = decideToolAuthorityV2({
|
|
73
|
+
op,
|
|
74
|
+
writeAuthority: wiring.writeAuthority,
|
|
75
|
+
shellAuthority: wiring.shellAuthority,
|
|
76
|
+
approvalPolicy: wiring.approvalPolicy,
|
|
77
|
+
sandboxMode: wiring.sandboxMode,
|
|
78
|
+
tty: wiring.tty,
|
|
79
|
+
enforcementProof: wiring.enforcementProof,
|
|
80
|
+
});
|
|
81
|
+
const enforce = wiring.enforce === true;
|
|
82
|
+
const wouldBlock = decision === "block" || (decision === "ask" && !wiring.tty);
|
|
83
|
+
const blocked = enforce && wouldBlock;
|
|
84
|
+
return {
|
|
85
|
+
record: {
|
|
86
|
+
toolName,
|
|
87
|
+
op,
|
|
88
|
+
decision,
|
|
89
|
+
mode: enforce ? "enforce" : "shadow",
|
|
90
|
+
enforced: blocked,
|
|
91
|
+
reason: redactedAuthorityReason(op, decision, wiring),
|
|
92
|
+
policyHash: wiring.enforcementProof.policyHash,
|
|
93
|
+
},
|
|
94
|
+
blocked,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
// Fall back to legacy evaluation when no proof is present.
|
|
98
|
+
return evaluateToolAuthority(toolName, wiring);
|
|
99
|
+
}
|
|
60
100
|
/**
|
|
61
101
|
* Wrap a dispatch function with the authority checkpoint. In shadow mode the
|
|
62
102
|
* wrapper records the verdict and always delegates to `dispatchOne`. In enforce
|
|
@@ -64,7 +104,7 @@ export function evaluateToolAuthority(toolName, wiring) {
|
|
|
64
104
|
*/
|
|
65
105
|
function buildGatedDispatch(wiring, dispatchOne) {
|
|
66
106
|
return async (call) => {
|
|
67
|
-
const { record, blocked } =
|
|
107
|
+
const { record, blocked } = evaluateToolAuthorityV2(call.toolName, wiring);
|
|
68
108
|
wiring.onDecision?.(record);
|
|
69
109
|
if (blocked) {
|
|
70
110
|
throw new ToolAuthorityBlockedError(record);
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
export { type IntegrationResultKind, type WeaknessRemediationState, type AdvancedControlLoopInput, type AdvancedControlLoopResult, type AdvancedControlLoop, type AdvancedControlLoopOptions, createAdvancedControlLoop, } from "./advanced-control-loop.js";
|
|
6
6
|
export { type SurfaceItem, type ScoredSurfaceItem, type MandatoryAnchor, type CompressionResult, type PublicSurfaceCompressorOptions, computeSurfaceScore, enforceFlowInvariant, PublicSurfaceCompressor, } from "./public-surface.js";
|
|
7
7
|
export { type ProofBundleScores, type TrustScoreResult, type ProofBundleTrustEngine, type DeriveScoresOptions, createProofBundleTrustEngine, } from "./proof-bundle-trust.js";
|
|
8
|
-
export { type MaturityResult, type ProviderMaturityGate, createProviderMaturityGate, } from "./provider-maturity-gate.js";
|
|
8
|
+
export { type MaturityResult, type ProviderMaturityGate, createProviderMaturityGate, evaluateProviderFromVector, } from "./provider-maturity-gate.js";
|
|
9
9
|
export { type RuntimeScoreV2, type RuntimeRouterDecisionV2, type RouterV2Options, type RouterV2ScoringEngine, type BlastRadiusParams, type EvidenceHistoryEntry, type NodeIntent, } from "./contracts/router-v2.js";
|
|
10
10
|
export { createRouterV2ScoringEngine, } from "./router-v2-scoring.js";
|
|
11
11
|
export { type ReleasePromotionInputs, type ReleasePromotionResult, type ReleaseVerdict, RELEASE_GATE_WEIGHTS, TAU_EVIDENCE, TAU_EVIDENCE_HIGH, TAU_PROOF, TAU_STABLE, BETA_PRIOR_ALPHA0, BETA_PRIOR_BETA0, SURFACE_BUDGET_K, } from "./contracts/weakness-remediation.js";
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
export { createAdvancedControlLoop, } from "./advanced-control-loop.js";
|
|
6
6
|
export { computeSurfaceScore, enforceFlowInvariant, PublicSurfaceCompressor, } from "./public-surface.js";
|
|
7
7
|
export { createProofBundleTrustEngine, } from "./proof-bundle-trust.js";
|
|
8
|
-
export { createProviderMaturityGate, } from "./provider-maturity-gate.js";
|
|
8
|
+
export { createProviderMaturityGate, evaluateProviderFromVector, } from "./provider-maturity-gate.js";
|
|
9
9
|
export { createRouterV2ScoringEngine, } from "./router-v2-scoring.js";
|
|
10
10
|
export { RELEASE_GATE_WEIGHTS, TAU_EVIDENCE, TAU_EVIDENCE_HIGH, TAU_PROOF, TAU_STABLE, BETA_PRIOR_ALPHA0, BETA_PRIOR_BETA0, SURFACE_BUDGET_K, } from "./contracts/weakness-remediation.js";
|
|
11
11
|
export { createReleasePromotionGate, } from "../cli/release-promotion-gate.js";
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Policy / Sandbox Enforcement Engine v2
|
|
3
|
+
*
|
|
4
|
+
* Capability lattice with conservative policy combination.
|
|
5
|
+
* effectivePolicy = minByAuthority(userPolicy, repoPolicy, providerPolicy, adapterPolicy, riskPolicy)
|
|
6
|
+
*
|
|
7
|
+
* Conservative by default. Any ambiguity → block.
|
|
8
|
+
*/
|
|
9
|
+
export type SandboxCapability = "read" | "write" | "shell" | "network" | "secret_read" | "secret_write" | "merge" | "publish";
|
|
10
|
+
export declare const ALL_CAPABILITIES: readonly SandboxCapability[];
|
|
11
|
+
export type CapabilityLevel = "none" | "advisory" | "direct" | "full";
|
|
12
|
+
export interface CapabilityLattice {
|
|
13
|
+
read: CapabilityLevel;
|
|
14
|
+
write: CapabilityLevel;
|
|
15
|
+
shell: CapabilityLevel;
|
|
16
|
+
network: CapabilityLevel;
|
|
17
|
+
secret_read: CapabilityLevel;
|
|
18
|
+
secret_write: CapabilityLevel;
|
|
19
|
+
merge: CapabilityLevel;
|
|
20
|
+
publish: CapabilityLevel;
|
|
21
|
+
}
|
|
22
|
+
export type SandboxMode = "read-only" | "workspace-write" | "network-isolated" | "unrestricted";
|
|
23
|
+
export type ApprovalPolicy = "interactive" | "auto" | "yolo" | "block";
|
|
24
|
+
export interface PolicyLayer {
|
|
25
|
+
readonly source: "user" | "repo" | "provider" | "adapter" | "risk";
|
|
26
|
+
/** Partial lattice — omitted capabilities mean "no opinion" (inherit from other layers). */
|
|
27
|
+
readonly lattice: Partial<CapabilityLattice>;
|
|
28
|
+
readonly sandboxMode?: SandboxMode;
|
|
29
|
+
readonly approvalPolicy?: ApprovalPolicy;
|
|
30
|
+
}
|
|
31
|
+
export interface CombinedPolicy {
|
|
32
|
+
readonly lattice: Readonly<CapabilityLattice>;
|
|
33
|
+
readonly sandboxMode: SandboxMode;
|
|
34
|
+
readonly approvalPolicy: ApprovalPolicy;
|
|
35
|
+
/** Ordered list of sources that contributed to the combination. */
|
|
36
|
+
readonly sources: readonly PolicyLayer["source"][];
|
|
37
|
+
}
|
|
38
|
+
export interface EnforcementProof {
|
|
39
|
+
readonly sandboxMode: SandboxMode;
|
|
40
|
+
/** Which policy layers were active in the final combination. */
|
|
41
|
+
readonly enforcedBy: readonly string[];
|
|
42
|
+
/** Capabilities fully blocked (level === "none" or sandbox hard floor). */
|
|
43
|
+
readonly blockedCapabilities: readonly SandboxCapability[];
|
|
44
|
+
/** Capabilities that require explicit approval (level === "advisory" or interactive policy). */
|
|
45
|
+
readonly approvalRequired: readonly SandboxCapability[];
|
|
46
|
+
/** Deterministic hash of the combined policy for audit / replay. */
|
|
47
|
+
readonly policyHash: string;
|
|
48
|
+
}
|
|
49
|
+
export declare function rankOf(level: CapabilityLevel): number;
|
|
50
|
+
export declare function defaultLattice(): CapabilityLattice;
|
|
51
|
+
/**
|
|
52
|
+
* Combine multiple policy layers by taking the **most restrictive**
|
|
53
|
+
* (minimum) authority level for each capability.
|
|
54
|
+
*
|
|
55
|
+
* If no layer expresses an opinion on a capability, it defaults to "full".
|
|
56
|
+
* If any layer expresses a sandbox mode, the most restrictive mode wins.
|
|
57
|
+
* If any layer expresses an approval policy, the most restrictive wins.
|
|
58
|
+
*/
|
|
59
|
+
export declare function combinePoliciesByMinAuthority(layers: readonly PolicyLayer[]): CombinedPolicy;
|
|
60
|
+
/**
|
|
61
|
+
* Compute the enforcement proof from a combined policy.
|
|
62
|
+
*
|
|
63
|
+
* Rules:
|
|
64
|
+
* 1. read-only sandbox blocks write, shell, network, merge, publish.
|
|
65
|
+
* 2. network-isolated sandbox blocks network.
|
|
66
|
+
* 3. Any capability with level "none" is blocked.
|
|
67
|
+
* 4. Any capability with level "advisory" requires approval.
|
|
68
|
+
* 5. interactive policy requires approval for non-read capabilities.
|
|
69
|
+
* 6. block policy blocks everything except read.
|
|
70
|
+
*/
|
|
71
|
+
export declare function computeEnforcementProof(combined: CombinedPolicy): EnforcementProof;
|
|
72
|
+
/**
|
|
73
|
+
* Returns true when the runtime/adapter has provided a valid enforcement proof.
|
|
74
|
+
* Runtimes without enforcement proof cannot enter authority lanes.
|
|
75
|
+
*/
|
|
76
|
+
export declare function hasValidEnforcementProof(proof: unknown): proof is EnforcementProof;
|
|
77
|
+
export declare function policyLayerFromLegacyAuthorities(source: PolicyLayer["source"], options: {
|
|
78
|
+
writeAuthority?: "none" | "advisory" | "direct" | "full";
|
|
79
|
+
shellAuthority?: "none" | "advisory" | "direct" | "full";
|
|
80
|
+
sandboxMode?: SandboxMode;
|
|
81
|
+
approvalPolicy?: ApprovalPolicy;
|
|
82
|
+
}): PolicyLayer;
|
|
83
|
+
export type ToolOpV2 = "read" | "write" | "shell" | "merge" | "network" | "secret";
|
|
84
|
+
/**
|
|
85
|
+
* Map a capability-lattice capability to the coarse ToolOp used by the gate.
|
|
86
|
+
* This preserves backward compatibility with the existing 4-class gate while
|
|
87
|
+
* allowing the new lattice to express finer-grained restrictions.
|
|
88
|
+
*/
|
|
89
|
+
export declare function capabilityToToolOp(cap: SandboxCapability): ToolOpV2;
|