open-multi-agent-kit 0.78.1 → 0.78.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/MATURITY.md +4 -0
- package/README.md +70 -1
- package/dist/benchmark/contracts.d.ts +116 -0
- package/dist/benchmark/contracts.js +6 -0
- package/dist/benchmark/fixtures.d.ts +11 -0
- package/dist/benchmark/fixtures.js +121 -0
- package/dist/benchmark/harness.d.ts +13 -0
- package/dist/benchmark/harness.js +191 -0
- package/dist/benchmark/shadow-mode.d.ts +17 -0
- package/dist/benchmark/shadow-mode.js +96 -0
- package/dist/cli/register-spec-agent-goal-commands.js +45 -0
- package/dist/cli/release-promotion-gate.d.ts +14 -0
- package/dist/cli/release-promotion-gate.js +71 -0
- package/dist/cli/v2/release-commands.d.ts +29 -0
- package/dist/cli/v2/release-commands.js +95 -0
- package/dist/commands/chat/native-root-loop.js +14 -1
- package/dist/commands/chat/slash/commands/session.js +19 -1
- package/dist/commands/goal-interview.d.ts +18 -0
- package/dist/commands/goal-interview.js +396 -0
- package/dist/commands/merge.js +102 -56
- package/dist/contracts/interview.d.ts +106 -0
- package/dist/contracts/interview.js +9 -0
- package/dist/contracts/provider-health.d.ts +37 -0
- package/dist/contracts/provider-health.js +49 -1
- package/dist/evidence/evidence-trust-score.d.ts +101 -0
- package/dist/evidence/evidence-trust-score.js +408 -0
- package/dist/evidence/index.d.ts +6 -0
- package/dist/evidence/index.js +3 -0
- package/dist/evidence/proof-trust-cli.d.ts +8 -0
- package/dist/evidence/proof-trust-cli.js +27 -0
- package/dist/evidence/proof-trust.d.ts +14 -0
- package/dist/evidence/proof-trust.js +381 -0
- package/dist/evidence/regression-proof-matrix.d.ts +42 -0
- package/dist/evidence/regression-proof-matrix.js +72 -0
- package/dist/goal/intent-frame.d.ts +6 -0
- package/dist/goal/intent-frame.js +21 -9
- package/dist/goal/interview-assimilation.d.ts +13 -0
- package/dist/goal/interview-assimilation.js +383 -0
- package/dist/goal/interview-question-bank.d.ts +11 -0
- package/dist/goal/interview-question-bank.js +225 -0
- package/dist/goal/interview-scoring.d.ts +31 -0
- package/dist/goal/interview-scoring.js +187 -0
- package/dist/goal/interview-session.d.ts +25 -0
- package/dist/goal/interview-session.js +116 -0
- package/dist/input/input-envelope.d.ts +22 -0
- package/dist/input/input-envelope.js +1 -0
- package/dist/orchestration/merge-arbiter.d.ts +91 -0
- package/dist/orchestration/merge-arbiter.js +376 -0
- package/dist/providers/health.d.ts +3 -0
- package/dist/providers/health.js +46 -0
- package/dist/providers/index.d.ts +1 -0
- package/dist/providers/index.js +1 -0
- package/dist/providers/provider-health.d.ts +8 -1
- package/dist/providers/provider-health.js +39 -0
- package/dist/providers/provider-task-runner.js +31 -0
- package/dist/providers/provider.d.ts +2 -0
- package/dist/providers/router.js +87 -3
- package/dist/providers/types.d.ts +4 -0
- package/dist/runtime/advanced-control-loop.d.ts +60 -0
- package/dist/runtime/advanced-control-loop.js +136 -0
- package/dist/runtime/agent-runtime.d.ts +10 -0
- package/dist/runtime/blast-radius.d.ts +10 -0
- package/dist/runtime/blast-radius.js +14 -0
- package/dist/runtime/contracts/evidence.d.ts +87 -0
- package/dist/runtime/contracts/evidence.js +7 -0
- package/dist/runtime/contracts/router-v2.d.ts +44 -0
- package/dist/runtime/contracts/router-v2.js +4 -0
- package/dist/runtime/contracts/weakness-remediation.d.ts +67 -0
- package/dist/runtime/contracts/weakness-remediation.js +36 -0
- package/dist/runtime/kimi-api-runtime.js +59 -1
- package/dist/runtime/proof-bundle-trust.d.ts +74 -0
- package/dist/runtime/proof-bundle-trust.js +100 -0
- package/dist/runtime/provider-maturity-gate.d.ts +43 -0
- package/dist/runtime/provider-maturity-gate.js +129 -0
- package/dist/runtime/public-surface.d.ts +93 -0
- package/dist/runtime/public-surface.js +146 -0
- package/dist/runtime/router-v2-scoring.d.ts +11 -0
- package/dist/runtime/router-v2-scoring.js +151 -0
- package/dist/runtime/tool-dispatch-contracts.d.ts +24 -3
- package/dist/runtime/tool-dispatch-contracts.js +42 -2
- package/dist/runtime/weakness-remediation-index.d.ts +27 -0
- package/dist/runtime/weakness-remediation-index.js +37 -0
- package/dist/safety/enforcement-engine.d.ts +89 -0
- package/dist/safety/enforcement-engine.js +279 -0
- package/dist/safety/tool-authority-gate.d.ts +40 -0
- package/dist/safety/tool-authority-gate.js +92 -0
- package/dist/schema/evidence.schema.d.ts +2 -2
- package/dist/schema/proof-bundle.schema.d.ts +28 -28
- package/dist/util/clipboard-image.d.ts +49 -0
- package/dist/util/clipboard-image.js +263 -0
- package/docs/2026-06-09/critical-issues.md +20 -0
- package/docs/2026-06-09/improvements.md +14 -0
- package/docs/2026-06-09/init-checklist.md +25 -0
- package/docs/2026-06-09/plan.md +20 -0
- package/docs/benchmark-design.md +122 -0
- package/docs/github-organic-promotion.md +127 -0
- package/docs/native-root-runtime-algorithms.md +301 -0
- package/package.json +8 -4
- package/readmeasset/ASSET_INDEX.md +1 -0
- package/templates/skills/agents/omk-agent-reach-websearch/SKILL.md +55 -0
- package/templates/skills/kimi/omk-agent-reach-websearch/SKILL.md +55 -0
package/dist/providers/router.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { PROVIDER_CAPABILITY_ORDINAL } from "../contracts/provider-health.js";
|
|
1
2
|
import { DEFAULT_AUTHORITY_PROVIDER, resolveFallbackProvider } from "./types.js";
|
|
2
3
|
const DEEPSEEK_READ_ONLY_ROLES = new Set([
|
|
3
4
|
"explorer",
|
|
@@ -41,7 +42,17 @@ export function routeProvider(input) {
|
|
|
41
42
|
const role = input.role.toLowerCase();
|
|
42
43
|
const seed = `${input.nodeId ?? ""}:${role}:${input.taskType}`;
|
|
43
44
|
const authorityProvider = resolveFallbackProvider(input.authorityProvider);
|
|
44
|
-
const
|
|
45
|
+
const authorityVector = input.providerHealthVectors?.[authorityProvider];
|
|
46
|
+
const authorityProviderAllowed = providerVectorAllowsAuthority(authorityVector);
|
|
47
|
+
const authorityProviderFallbackOnly = authorityVector
|
|
48
|
+
? !providerVectorQuotaOk(authorityVector) && providerVectorAllowsDirect(authorityVector)
|
|
49
|
+
: false;
|
|
50
|
+
const authorityDecision = (reason, confidence, deepseek, extra = {}) => {
|
|
51
|
+
if (!authorityProviderAllowed || authorityProviderFallbackOnly) {
|
|
52
|
+
return authorityProviderDecision(authorityProvider, `${providerLabel(authorityProvider)} ${authorityProviderFallbackOnly ? "quota exhausted" : "auth insufficient"}; using fallback-only route`, Math.max(0.5, confidence - 0.15), deepseek, extra);
|
|
53
|
+
}
|
|
54
|
+
return authorityProviderDecision(authorityProvider, reason, confidence, deepseek, extra);
|
|
55
|
+
};
|
|
45
56
|
const directDeepSeekAllowed = canUseDirectDeepSeek(role, input);
|
|
46
57
|
const dedicatedDeepSeekAgent = isDedicatedDeepSeekAgent(input);
|
|
47
58
|
const withRouteEnsemble = (decision, winner) => ({
|
|
@@ -60,6 +71,11 @@ export function routeProvider(input) {
|
|
|
60
71
|
if (role === "orchestrator" || role === "merger" || role === "integrator") {
|
|
61
72
|
return withRouteEnsemble(authorityDecision("Core orchestration and merge authority stay with the configured authority provider", 1), "safety-gate");
|
|
62
73
|
}
|
|
74
|
+
// Hard constraint: if authority provider is blocked by vector, skip to safety gate
|
|
75
|
+
// unless an explicit, fully-available external provider is requested.
|
|
76
|
+
if (!authorityProviderAllowed && !requestedExternalProvider(input)) {
|
|
77
|
+
return withRouteEnsemble(authorityProviderDecision(authorityProvider, `${providerLabel(authorityProvider)} auth/quota insufficient for authority lanes; safety gate active`, 0.5), "safety-gate");
|
|
78
|
+
}
|
|
63
79
|
const externalProvider = requestedExternalProvider(input);
|
|
64
80
|
if (externalProvider) {
|
|
65
81
|
if (!isProviderAvailable(input, externalProvider)) {
|
|
@@ -69,6 +85,12 @@ export function routeProvider(input) {
|
|
|
69
85
|
}
|
|
70
86
|
// If the external provider is the authority provider, route to it as authority
|
|
71
87
|
if (externalProvider === authorityProvider) {
|
|
88
|
+
// Hard constraint: auth != ok → exclude authority lanes
|
|
89
|
+
if (!authorityProviderAllowed) {
|
|
90
|
+
return withRouteEnsemble(authorityDecision(`${providerLabel(externalProvider)} auth/quota insufficient for authority; using configured fallback`, 0.86, undefined, {
|
|
91
|
+
providerModel: genericProviderModelRef(input, externalProvider, "veto"),
|
|
92
|
+
}), "safety-gate");
|
|
93
|
+
}
|
|
72
94
|
return withRouteEnsemble(authorityDecision(`${providerLabel(externalProvider)} is the configured authority provider`, 0.9, undefined, {
|
|
73
95
|
providerModel: genericProviderModelRef(input, externalProvider, "authority"),
|
|
74
96
|
}), "authority-provider");
|
|
@@ -178,8 +200,48 @@ function requestedExternalProvider(input) {
|
|
|
178
200
|
function isGenericExternalProvider(value) {
|
|
179
201
|
return typeof value === "string" && value !== "auto" && value !== "kimi" && value !== "deepseek";
|
|
180
202
|
}
|
|
203
|
+
function providerVectorMeets(vector, minState) {
|
|
204
|
+
if (!vector)
|
|
205
|
+
return true;
|
|
206
|
+
const current = PROVIDER_CAPABILITY_ORDINAL[vector.auth];
|
|
207
|
+
const required = PROVIDER_CAPABILITY_ORDINAL[minState];
|
|
208
|
+
return current >= required;
|
|
209
|
+
}
|
|
210
|
+
function providerVectorQuotaOk(vector) {
|
|
211
|
+
if (!vector)
|
|
212
|
+
return true;
|
|
213
|
+
return PROVIDER_CAPABILITY_ORDINAL[vector.quota] >= PROVIDER_CAPABILITY_ORDINAL["quota_available"];
|
|
214
|
+
}
|
|
215
|
+
function providerVectorAllowsAuthority(vector) {
|
|
216
|
+
if (!vector)
|
|
217
|
+
return true;
|
|
218
|
+
return (PROVIDER_CAPABILITY_ORDINAL[vector.auth] >= PROVIDER_CAPABILITY_ORDINAL["auth_valid"] &&
|
|
219
|
+
PROVIDER_CAPABILITY_ORDINAL[vector.model] >= PROVIDER_CAPABILITY_ORDINAL["model_available"] &&
|
|
220
|
+
PROVIDER_CAPABILITY_ORDINAL[vector.quota] >= PROVIDER_CAPABILITY_ORDINAL["quota_available"]);
|
|
221
|
+
}
|
|
222
|
+
function providerVectorAllowsDirect(vector) {
|
|
223
|
+
if (!vector)
|
|
224
|
+
return true;
|
|
225
|
+
return (PROVIDER_CAPABILITY_ORDINAL[vector.auth] >= PROVIDER_CAPABILITY_ORDINAL["auth_valid"] &&
|
|
226
|
+
PROVIDER_CAPABILITY_ORDINAL[vector.model] >= PROVIDER_CAPABILITY_ORDINAL["model_available"] &&
|
|
227
|
+
PROVIDER_CAPABILITY_ORDINAL[vector.quota] >= PROVIDER_CAPABILITY_ORDINAL["quota_available"]);
|
|
228
|
+
}
|
|
229
|
+
function providerVectorAllowsAdvisory(vector) {
|
|
230
|
+
if (!vector)
|
|
231
|
+
return true;
|
|
232
|
+
return PROVIDER_CAPABILITY_ORDINAL[vector.auth] >= PROVIDER_CAPABILITY_ORDINAL["auth_present"];
|
|
233
|
+
}
|
|
181
234
|
function isProviderAvailable(input, provider) {
|
|
182
235
|
const explicit = input.providerAvailability?.[provider];
|
|
236
|
+
const vector = input.providerHealthVectors?.[provider];
|
|
237
|
+
if (vector) {
|
|
238
|
+
// Hard constraint: quota exhausted → fallback only (not fully available)
|
|
239
|
+
if (!providerVectorQuotaOk(vector))
|
|
240
|
+
return false;
|
|
241
|
+
// Hard constraint: auth not valid → not available for any lane
|
|
242
|
+
if (PROVIDER_CAPABILITY_ORDINAL[vector.auth] < PROVIDER_CAPABILITY_ORDINAL["auth_valid"])
|
|
243
|
+
return false;
|
|
244
|
+
}
|
|
183
245
|
return explicit === undefined ? true : explicit;
|
|
184
246
|
}
|
|
185
247
|
function canUseGenericDirectProvider(role, input) {
|
|
@@ -187,7 +249,15 @@ function canUseGenericDirectProvider(role, input) {
|
|
|
187
249
|
return false;
|
|
188
250
|
if (input.needsMcp || input.needsToolCalling)
|
|
189
251
|
return false;
|
|
190
|
-
|
|
252
|
+
if (!input.readOnly && !GENERIC_EXTERNAL_READ_ONLY_ROLES.has(role))
|
|
253
|
+
return false;
|
|
254
|
+
const externalProvider = requestedExternalProvider(input);
|
|
255
|
+
if (externalProvider) {
|
|
256
|
+
const vector = input.providerHealthVectors?.[externalProvider];
|
|
257
|
+
if (vector && !providerVectorAllowsDirect(vector))
|
|
258
|
+
return false;
|
|
259
|
+
}
|
|
260
|
+
return true;
|
|
191
261
|
}
|
|
192
262
|
function canUseGenericAdvisoryProvider(role, input) {
|
|
193
263
|
if (!GENERIC_EXTERNAL_ADVISORY_FILE_ROLES.has(role))
|
|
@@ -196,6 +266,12 @@ function canUseGenericAdvisoryProvider(role, input) {
|
|
|
196
266
|
return false;
|
|
197
267
|
if (input.needsMcp || input.needsToolCalling)
|
|
198
268
|
return false;
|
|
269
|
+
const externalProvider = requestedExternalProvider(input);
|
|
270
|
+
if (externalProvider) {
|
|
271
|
+
const vector = input.providerHealthVectors?.[externalProvider];
|
|
272
|
+
if (vector && !providerVectorAllowsAdvisory(vector))
|
|
273
|
+
return false;
|
|
274
|
+
}
|
|
199
275
|
return true;
|
|
200
276
|
}
|
|
201
277
|
function canUseDeepSeekProAdvisory(role, input) {
|
|
@@ -205,12 +281,20 @@ function canUseDeepSeekProAdvisory(role, input) {
|
|
|
205
281
|
return false;
|
|
206
282
|
if (input.needsMcp || input.needsToolCalling)
|
|
207
283
|
return false;
|
|
284
|
+
const vector = input.providerHealthVectors?.["deepseek"];
|
|
285
|
+
if (vector && !providerVectorAllowsAdvisory(vector))
|
|
286
|
+
return false;
|
|
208
287
|
return true;
|
|
209
288
|
}
|
|
210
289
|
function canUseDirectDeepSeek(role, input) {
|
|
211
290
|
if (input.risk !== "read")
|
|
212
291
|
return false;
|
|
213
|
-
|
|
292
|
+
if (!(input.readOnly === true || DEEPSEEK_READ_ONLY_ROLES.has(role)))
|
|
293
|
+
return false;
|
|
294
|
+
const vector = input.providerHealthVectors?.["deepseek"];
|
|
295
|
+
if (vector && !providerVectorAllowsDirect(vector))
|
|
296
|
+
return false;
|
|
297
|
+
return true;
|
|
214
298
|
}
|
|
215
299
|
function buildProviderRouteEnsemble(options) {
|
|
216
300
|
const { input, role, decision, winner, directDeepSeekAllowed } = options;
|
|
@@ -71,6 +71,8 @@ export interface ProviderRouteInput {
|
|
|
71
71
|
authorityProvider?: ProviderId;
|
|
72
72
|
preferredModel?: string;
|
|
73
73
|
preferredDeepSeekTier?: DeepSeekModelTier;
|
|
74
|
+
/** v2 capability vectors for hard-constraint filtering. */
|
|
75
|
+
providerHealthVectors?: Partial<Record<ProviderId, import("../contracts/provider-health.js").ProviderHealthVector>>;
|
|
74
76
|
}
|
|
75
77
|
export interface ProviderRouteDecision {
|
|
76
78
|
provider: ProviderId;
|
|
@@ -90,6 +92,8 @@ export interface ProviderAvailability {
|
|
|
90
92
|
checkedAt: number;
|
|
91
93
|
reason?: string;
|
|
92
94
|
disableForRun: boolean;
|
|
95
|
+
/** v2 capability vector (optional; present when profiler v2 is active). */
|
|
96
|
+
healthVector?: import("../contracts/provider-health.js").ProviderHealthVector;
|
|
93
97
|
}
|
|
94
98
|
export interface AgentProvider {
|
|
95
99
|
id: ProviderId;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advanced Control Loop — Phase 6 of OMK Weakness Remediation (Algorithm 12).
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates Phases 1–5 into a single turn-gating engine:
|
|
5
|
+
* 1. Public surface compression
|
|
6
|
+
* 2. Proof bundle trust evaluation
|
|
7
|
+
* 3. Provider maturity gating
|
|
8
|
+
* 4. Evidence-calibrated router v2
|
|
9
|
+
* 5. Release gate advisory
|
|
10
|
+
*/
|
|
11
|
+
import type { CompressionResult, SurfaceItem } from "./public-surface.js";
|
|
12
|
+
import type { ProofBundleScores, TrustScoreResult } from "./proof-bundle-trust.js";
|
|
13
|
+
import type { MaturityResult } from "./provider-maturity-gate.js";
|
|
14
|
+
import type { RuntimeRouterDecisionV2, EvidenceHistoryEntry, NodeIntent } from "./contracts/router-v2.js";
|
|
15
|
+
import type { ReleasePromotionInputs, ReleasePromotionResult } from "./contracts/weakness-remediation.js";
|
|
16
|
+
import type { ReleasePromotionGate } from "../cli/release-promotion-gate.js";
|
|
17
|
+
import type { AdapterTestResult } from "./contracts/evidence.js";
|
|
18
|
+
import type { AgentRuntime } from "./agent-runtime.js";
|
|
19
|
+
export type IntegrationResultKind = "verified" | "blocked" | "handoff";
|
|
20
|
+
export interface WeaknessRemediationState {
|
|
21
|
+
readonly publicSurface: CompressionResult;
|
|
22
|
+
readonly proofTrust: TrustScoreResult;
|
|
23
|
+
readonly providerMaturity: MaturityResult;
|
|
24
|
+
readonly routerV2Decision: RuntimeRouterDecisionV2 | null;
|
|
25
|
+
readonly releaseGate: ReleasePromotionResult | null;
|
|
26
|
+
}
|
|
27
|
+
export interface AdvancedControlLoopInput {
|
|
28
|
+
readonly turnId: string;
|
|
29
|
+
readonly intent: NodeIntent;
|
|
30
|
+
readonly surfaceItems: readonly SurfaceItem[];
|
|
31
|
+
readonly proofScores: ProofBundleScores;
|
|
32
|
+
readonly providerTests: readonly AdapterTestResult[];
|
|
33
|
+
readonly candidates: readonly AgentRuntime[];
|
|
34
|
+
readonly evidenceHistory: readonly EvidenceHistoryEntry[];
|
|
35
|
+
readonly releaseInputs?: ReleasePromotionInputs;
|
|
36
|
+
readonly retryBudget?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface AdvancedControlLoopResult {
|
|
39
|
+
readonly kind: IntegrationResultKind;
|
|
40
|
+
readonly turnId: string;
|
|
41
|
+
readonly state: WeaknessRemediationState;
|
|
42
|
+
readonly replan?: {
|
|
43
|
+
readonly triggered: boolean;
|
|
44
|
+
readonly reason: string;
|
|
45
|
+
readonly retryBudgetRemaining: number;
|
|
46
|
+
};
|
|
47
|
+
readonly evidenceContractPath: string;
|
|
48
|
+
}
|
|
49
|
+
export interface AdvancedControlLoop {
|
|
50
|
+
readonly run: (input: AdvancedControlLoopInput) => Promise<AdvancedControlLoopResult>;
|
|
51
|
+
}
|
|
52
|
+
export interface AdvancedControlLoopOptions {
|
|
53
|
+
readonly publicSurfaceBudget?: number;
|
|
54
|
+
readonly tauProof?: number;
|
|
55
|
+
readonly tauEvidence?: number;
|
|
56
|
+
readonly releaseGate?: ReleasePromotionGate;
|
|
57
|
+
readonly releaseGateEnabled?: boolean;
|
|
58
|
+
readonly evidenceContractDir?: string;
|
|
59
|
+
}
|
|
60
|
+
export declare function createAdvancedControlLoop(options?: AdvancedControlLoopOptions): AdvancedControlLoop;
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advanced Control Loop — Phase 6 of OMK Weakness Remediation (Algorithm 12).
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates Phases 1–5 into a single turn-gating engine:
|
|
5
|
+
* 1. Public surface compression
|
|
6
|
+
* 2. Proof bundle trust evaluation
|
|
7
|
+
* 3. Provider maturity gating
|
|
8
|
+
* 4. Evidence-calibrated router v2
|
|
9
|
+
* 5. Release gate advisory
|
|
10
|
+
*/
|
|
11
|
+
import { PublicSurfaceCompressor } from "./public-surface.js";
|
|
12
|
+
import { createProofBundleTrustEngine } from "./proof-bundle-trust.js";
|
|
13
|
+
import { TAU_PROOF } from "./contracts/weakness-remediation.js";
|
|
14
|
+
import { createProviderMaturityGate } from "./provider-maturity-gate.js";
|
|
15
|
+
import { createRouterV2ScoringEngine } from "./router-v2-scoring.js";
|
|
16
|
+
import { TAU_EVIDENCE } from "./contracts/weakness-remediation.js";
|
|
17
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
18
|
+
function clamp01(n) {
|
|
19
|
+
return Math.max(0, Math.min(1, n));
|
|
20
|
+
}
|
|
21
|
+
function buildState(publicSurface, proofTrust, providerMaturity, routerV2Decision, releaseGate) {
|
|
22
|
+
return Object.freeze({
|
|
23
|
+
publicSurface,
|
|
24
|
+
proofTrust: proofTrust ??
|
|
25
|
+
Object.freeze({
|
|
26
|
+
score: 0,
|
|
27
|
+
permissionLevel: "no-claim",
|
|
28
|
+
passed: false,
|
|
29
|
+
breakdown: Object.freeze({
|
|
30
|
+
schema: 0,
|
|
31
|
+
hashes: 0,
|
|
32
|
+
commands: 0,
|
|
33
|
+
stdout: 0,
|
|
34
|
+
decisions: 0,
|
|
35
|
+
evidence: 0,
|
|
36
|
+
limitations: 0,
|
|
37
|
+
replay: 0,
|
|
38
|
+
}),
|
|
39
|
+
}),
|
|
40
|
+
providerMaturity: providerMaturity ??
|
|
41
|
+
Object.freeze({
|
|
42
|
+
score: 0,
|
|
43
|
+
authorityClass: "disabled",
|
|
44
|
+
passed: false,
|
|
45
|
+
subScores: Object.freeze({
|
|
46
|
+
auth: 0,
|
|
47
|
+
read: 0,
|
|
48
|
+
write: 0,
|
|
49
|
+
shell: 0,
|
|
50
|
+
mcp: 0,
|
|
51
|
+
merge: 0,
|
|
52
|
+
evidence: 0,
|
|
53
|
+
fallback: 0,
|
|
54
|
+
}),
|
|
55
|
+
}),
|
|
56
|
+
routerV2Decision,
|
|
57
|
+
releaseGate,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
// ─── Factory ─────────────────────────────────────────────────────────────────
|
|
61
|
+
export function createAdvancedControlLoop(options = {}) {
|
|
62
|
+
const { publicSurfaceBudget, tauProof = TAU_PROOF, tauEvidence = TAU_EVIDENCE, releaseGate, releaseGateEnabled = true, evidenceContractDir = ".omk/evidence", } = options;
|
|
63
|
+
const compressorOptions = publicSurfaceBudget
|
|
64
|
+
? { budget: publicSurfaceBudget }
|
|
65
|
+
: {};
|
|
66
|
+
const compressor = new PublicSurfaceCompressor(compressorOptions);
|
|
67
|
+
const proofEngine = createProofBundleTrustEngine();
|
|
68
|
+
const maturityGate = createProviderMaturityGate();
|
|
69
|
+
const routerV2Engine = createRouterV2ScoringEngine();
|
|
70
|
+
return {
|
|
71
|
+
async run(input) {
|
|
72
|
+
// Phase 1 — Public surface compression
|
|
73
|
+
const publicSurface = compressor.compress(input.surfaceItems);
|
|
74
|
+
if (!publicSurface.invariantPassed) {
|
|
75
|
+
return Object.freeze({
|
|
76
|
+
kind: "blocked",
|
|
77
|
+
turnId: input.turnId,
|
|
78
|
+
state: buildState(publicSurface, null, null, null, null),
|
|
79
|
+
evidenceContractPath: `${evidenceContractDir}/${input.turnId}-blocked.json`,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
// Phase 2 — Proof bundle trust evaluation
|
|
83
|
+
const proofTrust = proofEngine.evaluate(input.proofScores);
|
|
84
|
+
if (proofTrust.score < tauProof) {
|
|
85
|
+
return Object.freeze({
|
|
86
|
+
kind: "handoff",
|
|
87
|
+
turnId: input.turnId,
|
|
88
|
+
state: buildState(publicSurface, proofTrust, null, null, null),
|
|
89
|
+
replan: {
|
|
90
|
+
triggered: false,
|
|
91
|
+
reason: `Proof trust ${clamp01(proofTrust.score).toFixed(3)} < τ_proof ${tauProof}`,
|
|
92
|
+
retryBudgetRemaining: input.retryBudget ?? 0,
|
|
93
|
+
},
|
|
94
|
+
evidenceContractPath: `${evidenceContractDir}/${input.turnId}-handoff.json`,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
// Phase 3 — Provider maturity gating
|
|
98
|
+
const providerMaturity = maturityGate.evaluate(input.providerTests);
|
|
99
|
+
if (!providerMaturity.passed || providerMaturity.authorityClass === "disabled") {
|
|
100
|
+
return Object.freeze({
|
|
101
|
+
kind: "blocked",
|
|
102
|
+
turnId: input.turnId,
|
|
103
|
+
state: buildState(publicSurface, proofTrust, providerMaturity, null, null),
|
|
104
|
+
evidenceContractPath: `${evidenceContractDir}/${input.turnId}-blocked.json`,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
// Phase 4 — Evidence-calibrated router v2
|
|
108
|
+
const routerV2Decision = routerV2Engine.select([...input.candidates], input.intent, [...input.evidenceHistory]);
|
|
109
|
+
const bestScore = routerV2Decision.scores[0];
|
|
110
|
+
if (!bestScore || bestScore.bayesianEvidenceScore < tauEvidence) {
|
|
111
|
+
return Object.freeze({
|
|
112
|
+
kind: "handoff",
|
|
113
|
+
turnId: input.turnId,
|
|
114
|
+
state: buildState(publicSurface, proofTrust, providerMaturity, routerV2Decision, null),
|
|
115
|
+
replan: {
|
|
116
|
+
triggered: true,
|
|
117
|
+
reason: `Router v2 bayesian evidence ${bestScore ? clamp01(bestScore.bayesianEvidenceScore).toFixed(3) : "n/a"} < τ_evidence ${tauEvidence}`,
|
|
118
|
+
retryBudgetRemaining: Math.max(0, (input.retryBudget ?? 1) - 1),
|
|
119
|
+
},
|
|
120
|
+
evidenceContractPath: `${evidenceContractDir}/${input.turnId}-handoff.json`,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
// Phase 5 — Release gate advisory (optional)
|
|
124
|
+
let releaseGateResult = null;
|
|
125
|
+
if (releaseGateEnabled && releaseGate && input.releaseInputs) {
|
|
126
|
+
releaseGateResult = releaseGate.evaluate(input.releaseInputs);
|
|
127
|
+
}
|
|
128
|
+
return Object.freeze({
|
|
129
|
+
kind: "verified",
|
|
130
|
+
turnId: input.turnId,
|
|
131
|
+
state: buildState(publicSurface, proofTrust, providerMaturity, routerV2Decision, releaseGateResult),
|
|
132
|
+
evidenceContractPath: `${evidenceContractDir}/${input.turnId}-verified.json`,
|
|
133
|
+
});
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
}
|
|
@@ -91,12 +91,22 @@ export interface ProviderPolicy {
|
|
|
91
91
|
export interface CapabilityManifest extends RuntimeCapabilities {
|
|
92
92
|
readonly structuredOutput?: boolean;
|
|
93
93
|
}
|
|
94
|
+
export interface AgentTaskAttachment {
|
|
95
|
+
readonly name: string;
|
|
96
|
+
readonly path?: string;
|
|
97
|
+
readonly mimeType: string;
|
|
98
|
+
readonly dataUri: string;
|
|
99
|
+
readonly ext: string;
|
|
100
|
+
readonly source: "clipboard" | "file" | "drag";
|
|
101
|
+
}
|
|
94
102
|
export interface AgentTask {
|
|
95
103
|
readonly prompt: string;
|
|
96
104
|
readonly context: AgentContext;
|
|
97
105
|
readonly tools: ToolManifest;
|
|
98
106
|
readonly providerPolicy: ProviderPolicy;
|
|
99
107
|
readonly capabilities: CapabilityManifest;
|
|
108
|
+
/** Images/files attached to this task (clipboard paste, --image, drag). */
|
|
109
|
+
readonly attachments?: readonly AgentTaskAttachment[];
|
|
100
110
|
}
|
|
101
111
|
export interface AgentResult {
|
|
102
112
|
readonly output: string;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Blast radius penalty — penalizes runtimes for high-risk, wide-impact tasks.
|
|
3
|
+
*
|
|
4
|
+
* Higher downstream dependency counts, larger affected file surfaces, and
|
|
5
|
+
* global side-effects increase the penalty, nudging the router toward
|
|
6
|
+
* more mature or lower-blast-radius runtimes.
|
|
7
|
+
*/
|
|
8
|
+
import type { BlastRadiusParams } from "./contracts/router-v2.js";
|
|
9
|
+
export type { BlastRadiusParams } from "./contracts/router-v2.js";
|
|
10
|
+
export declare function computeBlastRadiusPenalty(params: BlastRadiusParams): number;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Blast radius penalty — penalizes runtimes for high-risk, wide-impact tasks.
|
|
3
|
+
*
|
|
4
|
+
* Higher downstream dependency counts, larger affected file surfaces, and
|
|
5
|
+
* global side-effects increase the penalty, nudging the router toward
|
|
6
|
+
* more mature or lower-blast-radius runtimes.
|
|
7
|
+
*/
|
|
8
|
+
export function computeBlastRadiusPenalty(params) {
|
|
9
|
+
const { downstreamNodeCount, affectedFileCount, hasGlobalSideEffects } = params;
|
|
10
|
+
const downstreamPenalty = Math.min(0.15, downstreamNodeCount * 0.03);
|
|
11
|
+
const filePenalty = Math.min(0.10, affectedFileCount * 0.01);
|
|
12
|
+
const sideEffectPenalty = hasGlobalSideEffects ? 0.10 : 0.0;
|
|
13
|
+
return Math.min(0.30, downstreamPenalty + filePenalty + sideEffectPenalty);
|
|
14
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evidence contracts for OMK Weakness Remediation.
|
|
3
|
+
*
|
|
4
|
+
* Core interfaces that bridge reasoning traces, runtime decisions,
|
|
5
|
+
* provider maturity, and release gates into a verifiable evidence model.
|
|
6
|
+
*/
|
|
7
|
+
/** Granularity of a single evidence artifact. */
|
|
8
|
+
export type EvidenceKind = "test" | "diff" | "command" | "screenshot" | "trace" | "metric" | "audit" | "review";
|
|
9
|
+
/** Verdict state of an evidence item. */
|
|
10
|
+
export type EvidenceVerdict = "pass" | "fail" | "partial" | "pending";
|
|
11
|
+
/** A single, auditable piece of evidence. */
|
|
12
|
+
export interface EvidenceItem {
|
|
13
|
+
readonly id: string;
|
|
14
|
+
readonly kind: EvidenceKind;
|
|
15
|
+
readonly source: string;
|
|
16
|
+
readonly description: string;
|
|
17
|
+
readonly verdict: EvidenceVerdict;
|
|
18
|
+
readonly timestamp: string;
|
|
19
|
+
readonly confidence: number;
|
|
20
|
+
readonly linkedTraceId?: string;
|
|
21
|
+
readonly linkedFilePaths: readonly string[];
|
|
22
|
+
readonly metadata?: Readonly<Record<string, unknown>>;
|
|
23
|
+
}
|
|
24
|
+
/** A curated bundle of evidence items with a collective verdict. */
|
|
25
|
+
export interface ProofBundle {
|
|
26
|
+
readonly id: string;
|
|
27
|
+
readonly name: string;
|
|
28
|
+
readonly items: readonly EvidenceItem[];
|
|
29
|
+
readonly createdAt: string;
|
|
30
|
+
readonly verdict: EvidenceVerdict;
|
|
31
|
+
readonly coveragePercent: number;
|
|
32
|
+
readonly summary: string;
|
|
33
|
+
}
|
|
34
|
+
/** Maturity tier for a provider or runtime surface. */
|
|
35
|
+
export type MaturityTier = "experimental" | "preview" | "stable" | "deprecated";
|
|
36
|
+
/** Maturity assessment for a provider/runtime. */
|
|
37
|
+
export interface ProviderMaturity {
|
|
38
|
+
readonly providerId: string;
|
|
39
|
+
readonly tier: MaturityTier;
|
|
40
|
+
readonly runCount: number;
|
|
41
|
+
readonly passRate: number;
|
|
42
|
+
readonly lastVerifiedAt: string;
|
|
43
|
+
readonly knownIssues: readonly string[];
|
|
44
|
+
readonly recommendedBudgetFactor: number;
|
|
45
|
+
}
|
|
46
|
+
/** Normalized record of a runtime routing decision. */
|
|
47
|
+
export interface RuntimeRouterDecision {
|
|
48
|
+
readonly decisionId: string;
|
|
49
|
+
readonly turnId: string;
|
|
50
|
+
readonly timestamp: string;
|
|
51
|
+
readonly intentCategory: string;
|
|
52
|
+
readonly selectedRuntimeId: string;
|
|
53
|
+
readonly candidatesConsidered: readonly string[];
|
|
54
|
+
readonly confidence: number;
|
|
55
|
+
readonly fallbackUsed: boolean;
|
|
56
|
+
readonly latencyMs: number;
|
|
57
|
+
}
|
|
58
|
+
/** Permission level derived from a proof bundle trust score. */
|
|
59
|
+
export type ClaimPermissionLevel = "strong-public-claim" | "qualified-public-claim" | "internal-claim-only" | "no-claim";
|
|
60
|
+
/** Authority class derived from provider maturity score and sub-scores. */
|
|
61
|
+
export type ProviderAuthorityClass = "merge-authority" | "write-authority" | "review-authority" | "read-only-advisory" | "disabled";
|
|
62
|
+
/** Kinds of adapter tests used in provider maturity assessment. */
|
|
63
|
+
export type AdapterTestKind = "auth" | "read" | "write" | "shell" | "mcp" | "merge" | "evidence" | "fallback";
|
|
64
|
+
/** Result of a single adapter test. */
|
|
65
|
+
export interface AdapterTestResult {
|
|
66
|
+
readonly kind: AdapterTestKind;
|
|
67
|
+
readonly passed: boolean;
|
|
68
|
+
readonly score: number;
|
|
69
|
+
readonly details?: string;
|
|
70
|
+
}
|
|
71
|
+
/** Per-gate check result. */
|
|
72
|
+
export interface GateCheck {
|
|
73
|
+
readonly gate: string;
|
|
74
|
+
readonly passed: boolean;
|
|
75
|
+
readonly message: string;
|
|
76
|
+
readonly evidenceIds: readonly string[];
|
|
77
|
+
}
|
|
78
|
+
/** Result of a full release gate evaluation. */
|
|
79
|
+
export interface ReleaseGateResult {
|
|
80
|
+
readonly runId: string;
|
|
81
|
+
readonly timestamp: string;
|
|
82
|
+
readonly overallPass: boolean;
|
|
83
|
+
readonly checks: readonly GateCheck[];
|
|
84
|
+
readonly requiredGates: readonly string[];
|
|
85
|
+
readonly optionalGates: readonly string[];
|
|
86
|
+
readonly summary: string;
|
|
87
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Router v2 contracts — Evidence-Calibrated Runtime Router (Algorithm 6)
|
|
3
|
+
*/
|
|
4
|
+
import type { AgentRuntime } from "../agent-runtime.js";
|
|
5
|
+
export type NodeIntent = "research" | "planning" | "coding" | "debugging" | "refactor" | "review" | "test-generation" | "documentation" | "shell-operation";
|
|
6
|
+
export interface EvidenceHistoryEntry {
|
|
7
|
+
readonly runtime: string;
|
|
8
|
+
readonly intent: string;
|
|
9
|
+
readonly passed: boolean;
|
|
10
|
+
readonly timestamp: string;
|
|
11
|
+
readonly nodeId: string;
|
|
12
|
+
}
|
|
13
|
+
export interface RuntimeScoreV2 {
|
|
14
|
+
readonly runtimeId: string;
|
|
15
|
+
readonly bayesianEvidenceScore: number;
|
|
16
|
+
readonly confidence: number;
|
|
17
|
+
readonly capabilityFit: number;
|
|
18
|
+
readonly maturityScore: number;
|
|
19
|
+
readonly latencyScore: number;
|
|
20
|
+
readonly costScore: number;
|
|
21
|
+
readonly recentFailurePenalty: number;
|
|
22
|
+
readonly blastRadiusPenalty: number;
|
|
23
|
+
readonly composite: number;
|
|
24
|
+
}
|
|
25
|
+
export interface RuntimeRouterDecisionV2 {
|
|
26
|
+
readonly runtime: AgentRuntime;
|
|
27
|
+
readonly reason: string;
|
|
28
|
+
readonly fallbacks: AgentRuntime[];
|
|
29
|
+
readonly intent: NodeIntent;
|
|
30
|
+
readonly scores: RuntimeScoreV2[];
|
|
31
|
+
}
|
|
32
|
+
export interface BlastRadiusParams {
|
|
33
|
+
readonly downstreamNodeCount: number;
|
|
34
|
+
readonly affectedFileCount: number;
|
|
35
|
+
readonly hasGlobalSideEffects: boolean;
|
|
36
|
+
}
|
|
37
|
+
export interface RouterV2Options {
|
|
38
|
+
readonly enableBlastRadius?: boolean;
|
|
39
|
+
readonly blastRadiusParams?: BlastRadiusParams;
|
|
40
|
+
}
|
|
41
|
+
export interface RouterV2ScoringEngine {
|
|
42
|
+
score(runtime: AgentRuntime, intent: NodeIntent, history: EvidenceHistoryEntry[]): RuntimeScoreV2;
|
|
43
|
+
select(candidates: AgentRuntime[], intent: NodeIntent, history: EvidenceHistoryEntry[]): RuntimeRouterDecisionV2;
|
|
44
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Weakness Remediation contracts — shared constants and Phase 5 types.
|
|
3
|
+
*
|
|
4
|
+
* - Phase 2 Proof Bundle Trust, Phase 3 Provider Maturity Gate,
|
|
5
|
+
* and Phase 1 Public Surface Compression share thresholds here.
|
|
6
|
+
* - Phase 5 Release Promotion Gate types live here as well.
|
|
7
|
+
*/
|
|
8
|
+
/** Evidence trust threshold for standard claims. */
|
|
9
|
+
export declare const TAU_EVIDENCE = 0.75;
|
|
10
|
+
/** Evidence trust threshold for high-confidence claims. */
|
|
11
|
+
export declare const TAU_EVIDENCE_HIGH = 0.85;
|
|
12
|
+
/** Proof bundle trust threshold. */
|
|
13
|
+
export declare const TAU_PROOF = 0.85;
|
|
14
|
+
/** Stability/maturity threshold for fully-trusted surfaces. */
|
|
15
|
+
export declare const TAU_STABLE = 0.9;
|
|
16
|
+
/** Beta prior α₀ for Bayesian run-count scoring. */
|
|
17
|
+
export declare const BETA_PRIOR_ALPHA0 = 1;
|
|
18
|
+
/** Beta prior β₀ for Bayesian run-count scoring. */
|
|
19
|
+
export declare const BETA_PRIOR_BETA0 = 1;
|
|
20
|
+
/** Default public surface budget K (max items). */
|
|
21
|
+
export declare const SURFACE_BUDGET_K = 8;
|
|
22
|
+
/** Algorithm 8 release gate weights. */
|
|
23
|
+
export declare const RELEASE_GATE_WEIGHTS: {
|
|
24
|
+
readonly ci: 0.15;
|
|
25
|
+
readonly build: 0.1;
|
|
26
|
+
readonly types: 0.1;
|
|
27
|
+
readonly tests: 0.1;
|
|
28
|
+
readonly install: 0.1;
|
|
29
|
+
readonly demo: 0.15;
|
|
30
|
+
readonly proof: 0.15;
|
|
31
|
+
readonly maturity: 0.1;
|
|
32
|
+
readonly docs: 0.1;
|
|
33
|
+
readonly regression: 0.15;
|
|
34
|
+
};
|
|
35
|
+
export type ReleaseVerdict = "block" | "pre-release" | "stable";
|
|
36
|
+
export interface ReleasePromotionInputs {
|
|
37
|
+
readonly ci: number;
|
|
38
|
+
readonly docs: number;
|
|
39
|
+
readonly proofMedian: number;
|
|
40
|
+
readonly regressionSeverity: number;
|
|
41
|
+
readonly freshInstallSmoke: number;
|
|
42
|
+
/** Backward-compat: old callers may still pass schema. */
|
|
43
|
+
readonly schema?: number;
|
|
44
|
+
/** Backward-compat: old callers may still pass providerMinimum. */
|
|
45
|
+
readonly providerMinimum?: number;
|
|
46
|
+
/** Backward-compat: old callers may still pass semver. */
|
|
47
|
+
readonly semver?: number;
|
|
48
|
+
/** Algorithm 8 — build dimension (0–1). */
|
|
49
|
+
readonly build?: number;
|
|
50
|
+
/** Algorithm 8 — type-check dimension (0–1). */
|
|
51
|
+
readonly types?: number;
|
|
52
|
+
/** Algorithm 8 — test dimension (0–1). */
|
|
53
|
+
readonly tests?: number;
|
|
54
|
+
/** Algorithm 8 — maturity dimension (0–1). Falls back to providerMinimum. */
|
|
55
|
+
readonly maturity?: number;
|
|
56
|
+
/** Algorithm 8 — minimal verified demo run gate. Hard block when false/undefined. */
|
|
57
|
+
readonly demoRun?: boolean;
|
|
58
|
+
}
|
|
59
|
+
export interface ReleasePromotionResult {
|
|
60
|
+
readonly score: number;
|
|
61
|
+
readonly verdict: ReleaseVerdict;
|
|
62
|
+
readonly blocked: boolean;
|
|
63
|
+
readonly reasons: readonly string[];
|
|
64
|
+
}
|
|
65
|
+
export interface ReleasePromotionGate {
|
|
66
|
+
evaluate(inputs: ReleasePromotionInputs): ReleasePromotionResult;
|
|
67
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Weakness Remediation contracts — shared constants and Phase 5 types.
|
|
3
|
+
*
|
|
4
|
+
* - Phase 2 Proof Bundle Trust, Phase 3 Provider Maturity Gate,
|
|
5
|
+
* and Phase 1 Public Surface Compression share thresholds here.
|
|
6
|
+
* - Phase 5 Release Promotion Gate types live here as well.
|
|
7
|
+
*/
|
|
8
|
+
// ── Shared constants (Phase 1–3) ────────────────────────────────
|
|
9
|
+
/** Evidence trust threshold for standard claims. */
|
|
10
|
+
export const TAU_EVIDENCE = 0.75;
|
|
11
|
+
/** Evidence trust threshold for high-confidence claims. */
|
|
12
|
+
export const TAU_EVIDENCE_HIGH = 0.85;
|
|
13
|
+
/** Proof bundle trust threshold. */
|
|
14
|
+
export const TAU_PROOF = 0.85;
|
|
15
|
+
/** Stability/maturity threshold for fully-trusted surfaces. */
|
|
16
|
+
export const TAU_STABLE = 0.90;
|
|
17
|
+
/** Beta prior α₀ for Bayesian run-count scoring. */
|
|
18
|
+
export const BETA_PRIOR_ALPHA0 = 1;
|
|
19
|
+
/** Beta prior β₀ for Bayesian run-count scoring. */
|
|
20
|
+
export const BETA_PRIOR_BETA0 = 1;
|
|
21
|
+
/** Default public surface budget K (max items). */
|
|
22
|
+
export const SURFACE_BUDGET_K = 8;
|
|
23
|
+
// ── Phase 5 Release Promotion Gate ──────────────────────────────
|
|
24
|
+
/** Algorithm 8 release gate weights. */
|
|
25
|
+
export const RELEASE_GATE_WEIGHTS = {
|
|
26
|
+
ci: 0.15,
|
|
27
|
+
build: 0.10,
|
|
28
|
+
types: 0.10,
|
|
29
|
+
tests: 0.10,
|
|
30
|
+
install: 0.10,
|
|
31
|
+
demo: 0.15,
|
|
32
|
+
proof: 0.15,
|
|
33
|
+
maturity: 0.10,
|
|
34
|
+
docs: 0.10,
|
|
35
|
+
regression: 0.15,
|
|
36
|
+
};
|