open-multi-agent-kit 0.78.0 → 0.78.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +56 -15
- package/MATURITY.md +6 -2
- package/README.md +125 -26
- package/ROADMAP.md +36 -28
- package/dist/cli/register-basic-commands.js +3 -2
- package/dist/cli/register-mcp-dag-cron-screenshot-commands.js +2 -0
- package/dist/cli/register-spec-agent-goal-commands.js +45 -0
- package/dist/cli/register-tool-commands.js +11 -0
- package/dist/cli/register-workflow-commands.js +1 -0
- package/dist/cli/registry/tooling.js +3 -2
- package/dist/cli/release-promotion-gate.d.ts +14 -0
- package/dist/cli/release-promotion-gate.js +71 -0
- package/dist/cli/v2/release-commands.d.ts +29 -0
- package/dist/cli/v2/release-commands.js +95 -0
- package/dist/commands/chat/core.js +5 -0
- package/dist/commands/chat/native-root-loop.js +74 -1
- package/dist/commands/chat/slash/commands/session.js +19 -1
- package/dist/commands/dag-from-spec.d.ts +1 -0
- package/dist/commands/dag-from-spec.js +61 -1
- package/dist/commands/goal-interview.d.ts +18 -0
- package/dist/commands/goal-interview.js +396 -0
- package/dist/commands/graph.d.ts +62 -0
- package/dist/commands/graph.js +182 -0
- package/dist/commands/merge.d.ts +1 -0
- package/dist/commands/merge.js +88 -0
- package/dist/commands/parallel/core.js +3 -3
- package/dist/commands/provider.js +5 -3
- package/dist/commands/star.js +6 -1
- package/dist/commands/summary.d.ts +4 -1
- package/dist/commands/summary.js +103 -1
- package/dist/commands/team.d.ts +1 -0
- package/dist/commands/team.js +38 -0
- package/dist/contracts/interview.d.ts +106 -0
- package/dist/contracts/interview.js +9 -0
- package/dist/contracts/provider-health.d.ts +42 -0
- package/dist/contracts/provider-health.js +9 -0
- package/dist/evidence/index.d.ts +4 -0
- package/dist/evidence/index.js +2 -0
- package/dist/evidence/proof-trust-cli.d.ts +8 -0
- package/dist/evidence/proof-trust-cli.js +27 -0
- package/dist/evidence/proof-trust.d.ts +14 -0
- package/dist/evidence/proof-trust.js +381 -0
- package/dist/evidence/regression-proof-matrix.d.ts +42 -0
- package/dist/evidence/regression-proof-matrix.js +72 -0
- package/dist/goal/intent-frame.d.ts +30 -0
- package/dist/goal/intent-frame.js +39 -9
- package/dist/goal/interview-assimilation.d.ts +13 -0
- package/dist/goal/interview-assimilation.js +383 -0
- package/dist/goal/interview-question-bank.d.ts +11 -0
- package/dist/goal/interview-question-bank.js +225 -0
- package/dist/goal/interview-scoring.d.ts +31 -0
- package/dist/goal/interview-scoring.js +187 -0
- package/dist/goal/interview-session.d.ts +25 -0
- package/dist/goal/interview-session.js +116 -0
- package/dist/input/input-envelope.d.ts +22 -0
- package/dist/input/input-envelope.js +1 -0
- package/dist/memory/local-graph-memory-store.d.ts +15 -0
- package/dist/memory/local-graph-memory-store.js +176 -0
- package/dist/memory/memory-store.d.ts +18 -0
- package/dist/memory/memory-store.js +18 -0
- package/dist/orchestration/adaptorch-topology.d.ts +59 -0
- package/dist/orchestration/adaptorch-topology.js +194 -0
- package/dist/orchestration/capability-routing.d.ts +23 -0
- package/dist/orchestration/capability-routing.js +56 -0
- package/dist/orchestration/dag-compiler-types.d.ts +3 -0
- package/dist/orchestration/dag-compiler.js +14 -1
- package/dist/orchestration/parallel-orchestrator.d.ts +6 -0
- package/dist/orchestration/parallel-orchestrator.js +31 -0
- package/dist/providers/provider-health.d.ts +39 -0
- package/dist/providers/provider-health.js +161 -0
- package/dist/runtime/advanced-control-loop.d.ts +60 -0
- package/dist/runtime/advanced-control-loop.js +136 -0
- package/dist/runtime/agent-runtime.d.ts +10 -0
- package/dist/runtime/blast-radius.d.ts +10 -0
- package/dist/runtime/blast-radius.js +14 -0
- package/dist/runtime/context-broker.d.ts +13 -4
- package/dist/runtime/context-broker.js +14 -1
- package/dist/runtime/contracts/evidence.d.ts +87 -0
- package/dist/runtime/contracts/evidence.js +7 -0
- package/dist/runtime/contracts/router-v2.d.ts +44 -0
- package/dist/runtime/contracts/router-v2.js +4 -0
- package/dist/runtime/contracts/weakness-remediation.d.ts +67 -0
- package/dist/runtime/contracts/weakness-remediation.js +36 -0
- package/dist/runtime/headroom-policy.d.ts +37 -0
- package/dist/runtime/headroom-policy.js +122 -0
- package/dist/runtime/kimi-api-runtime.js +59 -1
- package/dist/runtime/ouroboros-policy.d.ts +57 -0
- package/dist/runtime/ouroboros-policy.js +134 -0
- package/dist/runtime/proof-bundle-trust.d.ts +74 -0
- package/dist/runtime/proof-bundle-trust.js +100 -0
- package/dist/runtime/provider-maturity-gate.d.ts +41 -0
- package/dist/runtime/provider-maturity-gate.js +101 -0
- package/dist/runtime/public-surface.d.ts +93 -0
- package/dist/runtime/public-surface.js +146 -0
- package/dist/runtime/router-v2-scoring.d.ts +11 -0
- package/dist/runtime/router-v2-scoring.js +151 -0
- package/dist/runtime/runtime-backed-task-runner.js +9 -1
- package/dist/runtime/tool-dispatch-contracts.d.ts +57 -1
- package/dist/runtime/tool-dispatch-contracts.js +79 -3
- package/dist/runtime/weakness-remediation-index.d.ts +27 -0
- package/dist/runtime/weakness-remediation-index.js +37 -0
- package/dist/safety/tool-authority-gate.d.ts +62 -0
- package/dist/safety/tool-authority-gate.js +108 -0
- package/dist/schema/proof-bundle.schema.d.ts +26 -26
- package/dist/schema/provider.schema.d.ts +4 -4
- package/dist/util/clipboard-image.d.ts +49 -0
- package/dist/util/clipboard-image.js +263 -0
- package/dist/util/first-run-star.d.ts +9 -0
- package/dist/util/first-run-star.js +42 -1
- package/dist/util/terminal-input.d.ts +20 -0
- package/dist/util/terminal-input.js +32 -0
- package/dist/util/update-check.d.ts +6 -1
- package/dist/util/update-check.js +35 -1
- package/docs/2026-06-08/critical-issues.md +20 -0
- package/docs/2026-06-08/improvements.md +14 -0
- package/docs/2026-06-08/init-checklist.md +25 -0
- package/docs/2026-06-08/plan.md +20 -0
- package/docs/2026-06-09/critical-issues.md +20 -0
- package/docs/2026-06-09/improvements.md +14 -0
- package/docs/2026-06-09/init-checklist.md +25 -0
- package/docs/2026-06-09/plan.md +20 -0
- package/docs/getting-started.md +31 -3
- package/docs/github-organic-promotion.md +127 -0
- package/docs/integrations/ouroboros.md +96 -0
- package/docs/native-root-runtime-algorithms.md +301 -0
- package/docs/provider-maturity.md +1 -1
- package/docs/versioning.md +3 -3
- package/package.json +4 -3
- package/readmeasset/ASSET_INDEX.md +1 -0
- package/templates/skills/agents/omk-agent-reach-websearch/SKILL.md +55 -0
- package/templates/skills/kimi/omk-agent-reach-websearch/SKILL.md +55 -0
- package/dist/native/linux-x64/omk-safety +0 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider Maturity Gate — Phase 3 of OMK Weakness Remediation.
|
|
3
|
+
*
|
|
4
|
+
* Evaluates a provider/runtime across 8 adapter test dimensions and
|
|
5
|
+
* produces a maturity score M_p, an authority class, and a pass/fail
|
|
6
|
+
* verdict.
|
|
7
|
+
*/
|
|
8
|
+
// ── Constants ───────────────────────────────────────────────────
|
|
9
|
+
const WEIGHT_AUTH = 0.10;
|
|
10
|
+
const WEIGHT_READ = 0.10;
|
|
11
|
+
const WEIGHT_WRITE = 0.15;
|
|
12
|
+
const WEIGHT_SHELL = 0.10;
|
|
13
|
+
const WEIGHT_MCP = 0.15;
|
|
14
|
+
const WEIGHT_MERGE = 0.15;
|
|
15
|
+
const WEIGHT_EVIDENCE = 0.15;
|
|
16
|
+
const WEIGHT_FALLBACK = 0.10;
|
|
17
|
+
const MERGE_AUTHORITY_THRESHOLD = 0.90;
|
|
18
|
+
const MERGE_SUBSCORE_THRESHOLD = 0.90;
|
|
19
|
+
const EVIDENCE_SUBSCORE_THRESHOLD_FOR_MERGE = 0.85;
|
|
20
|
+
const WRITE_AUTHORITY_THRESHOLD = 0.80;
|
|
21
|
+
const WRITE_SUBSCORE_THRESHOLD = 0.85;
|
|
22
|
+
const REVIEW_AUTHORITY_THRESHOLD = 0.70;
|
|
23
|
+
const READ_SUBSCORE_THRESHOLD = 0.90;
|
|
24
|
+
const READ_ONLY_ADVISORY_THRESHOLD = 0.55;
|
|
25
|
+
// ── Helpers ─────────────────────────────────────────────────────
|
|
26
|
+
function clamp01(n) {
|
|
27
|
+
return Math.max(0, Math.min(1, n));
|
|
28
|
+
}
|
|
29
|
+
function computeAuthorityClass(score, subScores) {
|
|
30
|
+
if (score >= MERGE_AUTHORITY_THRESHOLD &&
|
|
31
|
+
subScores.merge >= MERGE_SUBSCORE_THRESHOLD &&
|
|
32
|
+
subScores.evidence >= EVIDENCE_SUBSCORE_THRESHOLD_FOR_MERGE) {
|
|
33
|
+
return "merge-authority";
|
|
34
|
+
}
|
|
35
|
+
if (score >= WRITE_AUTHORITY_THRESHOLD &&
|
|
36
|
+
subScores.write >= WRITE_SUBSCORE_THRESHOLD) {
|
|
37
|
+
return "write-authority";
|
|
38
|
+
}
|
|
39
|
+
if (score >= REVIEW_AUTHORITY_THRESHOLD &&
|
|
40
|
+
subScores.read >= READ_SUBSCORE_THRESHOLD) {
|
|
41
|
+
return "review-authority";
|
|
42
|
+
}
|
|
43
|
+
if (score >= READ_ONLY_ADVISORY_THRESHOLD) {
|
|
44
|
+
return "read-only-advisory";
|
|
45
|
+
}
|
|
46
|
+
return "disabled";
|
|
47
|
+
}
|
|
48
|
+
function buildSubScoreMap(results) {
|
|
49
|
+
const map = {
|
|
50
|
+
auth: 0,
|
|
51
|
+
read: 0,
|
|
52
|
+
write: 0,
|
|
53
|
+
shell: 0,
|
|
54
|
+
mcp: 0,
|
|
55
|
+
merge: 0,
|
|
56
|
+
evidence: 0,
|
|
57
|
+
fallback: 0,
|
|
58
|
+
};
|
|
59
|
+
for (const r of results) {
|
|
60
|
+
map[r.kind] = clamp01(r.score);
|
|
61
|
+
}
|
|
62
|
+
return map;
|
|
63
|
+
}
|
|
64
|
+
export function createProviderMaturityTable() {
|
|
65
|
+
const table = new Map();
|
|
66
|
+
return {
|
|
67
|
+
lookup(providerId) {
|
|
68
|
+
return table.get(providerId);
|
|
69
|
+
},
|
|
70
|
+
register(providerId, result) {
|
|
71
|
+
table.set(providerId, result);
|
|
72
|
+
},
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
export function createProviderMaturityGate() {
|
|
76
|
+
return {
|
|
77
|
+
evaluate(results) {
|
|
78
|
+
const subScores = Object.freeze(buildSubScoreMap(results));
|
|
79
|
+
const score = WEIGHT_AUTH * subScores.auth +
|
|
80
|
+
WEIGHT_READ * subScores.read +
|
|
81
|
+
WEIGHT_WRITE * subScores.write +
|
|
82
|
+
WEIGHT_SHELL * subScores.shell +
|
|
83
|
+
WEIGHT_MCP * subScores.mcp +
|
|
84
|
+
WEIGHT_MERGE * subScores.merge +
|
|
85
|
+
WEIGHT_EVIDENCE * subScores.evidence +
|
|
86
|
+
WEIGHT_FALLBACK * subScores.fallback;
|
|
87
|
+
const finalScore = clamp01(score);
|
|
88
|
+
const authorityClass = computeAuthorityClass(finalScore, subScores);
|
|
89
|
+
return Object.freeze({
|
|
90
|
+
score: finalScore,
|
|
91
|
+
authorityClass,
|
|
92
|
+
passed: authorityClass !== "disabled",
|
|
93
|
+
subScores,
|
|
94
|
+
});
|
|
95
|
+
},
|
|
96
|
+
getSubScore(results, kind) {
|
|
97
|
+
const found = results.find((r) => r.kind === kind);
|
|
98
|
+
return found ? clamp01(found.score) : 0;
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public Surface Compression — Phase 1 of OMK Weakness Remediation.
|
|
3
|
+
*
|
|
4
|
+
* Takes a candidate set of runtime surfaces, scores each item,
|
|
5
|
+
* enforces mandatory anchors, applies budget K, and returns
|
|
6
|
+
* public surface S and hidden set H.
|
|
7
|
+
*
|
|
8
|
+
* Also enforces the 5-step flow invariant:
|
|
9
|
+
* goal → dag → route → verify → replay
|
|
10
|
+
*/
|
|
11
|
+
/** A candidate surface item (e.g., a tool, MCP server, skill, or runtime). */
|
|
12
|
+
export interface SurfaceItem {
|
|
13
|
+
readonly id: string;
|
|
14
|
+
readonly name: string;
|
|
15
|
+
readonly category: "tool" | "mcp" | "skill" | "runtime" | "hook";
|
|
16
|
+
/** How often this surface is invoked per 100 turns. */
|
|
17
|
+
readonly usage: number;
|
|
18
|
+
/** Contribution score from verified runs [0, 1]. */
|
|
19
|
+
readonly verifiedRunContribution: number;
|
|
20
|
+
/** Contribution score from evidence items [0, 1]. */
|
|
21
|
+
readonly evidenceContribution: number;
|
|
22
|
+
/** Onboarding difficulty/cost [0, 1]. */
|
|
23
|
+
readonly onboardingCost: number;
|
|
24
|
+
/** Explainability burden [0, 1]. */
|
|
25
|
+
readonly explainabilityCost: number;
|
|
26
|
+
/** Risk of lineage drift [0, 1]. */
|
|
27
|
+
readonly lineageRisk: number;
|
|
28
|
+
}
|
|
29
|
+
/** Scored surface item with computed score. */
|
|
30
|
+
export interface ScoredSurfaceItem extends SurfaceItem {
|
|
31
|
+
readonly score: number;
|
|
32
|
+
}
|
|
33
|
+
/** Mandatory anchor identifiers. */
|
|
34
|
+
export type MandatoryAnchor = "goal" | "dag" | "route" | "verify" | "replay";
|
|
35
|
+
/** Compression result: public surface S and hidden set H. */
|
|
36
|
+
export interface CompressionResult {
|
|
37
|
+
readonly publicSurface: readonly ScoredSurfaceItem[];
|
|
38
|
+
readonly hiddenSet: readonly ScoredSurfaceItem[];
|
|
39
|
+
readonly mandatoryAnchors: readonly MandatoryAnchor[];
|
|
40
|
+
readonly budget: number;
|
|
41
|
+
readonly invariantPassed: boolean;
|
|
42
|
+
readonly invariantViolations: readonly string[];
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Compute surface score from item metrics.
|
|
46
|
+
*
|
|
47
|
+
* Formula:
|
|
48
|
+
* 0.30 * usage
|
|
49
|
+
* + 0.30 * verifiedRunContribution
|
|
50
|
+
* + 0.20 * evidenceContribution
|
|
51
|
+
* - 0.10 * onboardingCost
|
|
52
|
+
* - 0.05 * explainabilityCost
|
|
53
|
+
* - 0.05 * lineageRisk
|
|
54
|
+
*/
|
|
55
|
+
export declare function computeSurfaceScore(item: SurfaceItem): number;
|
|
56
|
+
/**
|
|
57
|
+
* Validate the 5-step flow invariant against the public surface.
|
|
58
|
+
*
|
|
59
|
+
* Invariant: The public surface must contain all mandatory anchors
|
|
60
|
+
* in order: goal → dag → route → verify → replay.
|
|
61
|
+
*
|
|
62
|
+
* Returns violations as human-readable strings.
|
|
63
|
+
*/
|
|
64
|
+
export declare function enforceFlowInvariant(publicSurface: readonly ScoredSurfaceItem[]): {
|
|
65
|
+
readonly passed: boolean;
|
|
66
|
+
readonly violations: readonly string[];
|
|
67
|
+
};
|
|
68
|
+
export interface PublicSurfaceCompressorOptions {
|
|
69
|
+
/** Maximum number of items in the public surface (default 5). */
|
|
70
|
+
readonly budget?: number;
|
|
71
|
+
/** Optional custom scoring function. */
|
|
72
|
+
readonly scoreFn?: (item: SurfaceItem) => number;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Compresses a candidate surface set into public (S) and hidden (H) subsets.
|
|
76
|
+
*
|
|
77
|
+
* Rules:
|
|
78
|
+
* 1. Mandatory anchors A = {goal, dag, route, verify, replay} are always in S.
|
|
79
|
+
* 2. Remaining slots are filled by highest score until budget K is reached.
|
|
80
|
+
* 3. The 5-step flow invariant is enforced and reported.
|
|
81
|
+
*/
|
|
82
|
+
export declare class PublicSurfaceCompressor {
|
|
83
|
+
private readonly budget;
|
|
84
|
+
private readonly scoreFn;
|
|
85
|
+
constructor(options?: PublicSurfaceCompressorOptions);
|
|
86
|
+
/**
|
|
87
|
+
* Compress candidates into public surface S and hidden set H.
|
|
88
|
+
*
|
|
89
|
+
* @param candidates All candidate surface items.
|
|
90
|
+
* @returns CompressionResult with S, H, and invariant status.
|
|
91
|
+
*/
|
|
92
|
+
compress(candidates: readonly SurfaceItem[]): CompressionResult;
|
|
93
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public Surface Compression — Phase 1 of OMK Weakness Remediation.
|
|
3
|
+
*
|
|
4
|
+
* Takes a candidate set of runtime surfaces, scores each item,
|
|
5
|
+
* enforces mandatory anchors, applies budget K, and returns
|
|
6
|
+
* public surface S and hidden set H.
|
|
7
|
+
*
|
|
8
|
+
* Also enforces the 5-step flow invariant:
|
|
9
|
+
* goal → dag → route → verify → replay
|
|
10
|
+
*/
|
|
11
|
+
// ── Constants ───────────────────────────────────────────────────
|
|
12
|
+
const MANDATORY_ANCHORS = [
|
|
13
|
+
"goal",
|
|
14
|
+
"dag",
|
|
15
|
+
"route",
|
|
16
|
+
"verify",
|
|
17
|
+
"replay",
|
|
18
|
+
];
|
|
19
|
+
const DEFAULT_BUDGET = 5;
|
|
20
|
+
// ── Scoring ─────────────────────────────────────────────────────
|
|
21
|
+
/**
|
|
22
|
+
* Compute surface score from item metrics.
|
|
23
|
+
*
|
|
24
|
+
* Formula:
|
|
25
|
+
* 0.30 * usage
|
|
26
|
+
* + 0.30 * verifiedRunContribution
|
|
27
|
+
* + 0.20 * evidenceContribution
|
|
28
|
+
* - 0.10 * onboardingCost
|
|
29
|
+
* - 0.05 * explainabilityCost
|
|
30
|
+
* - 0.05 * lineageRisk
|
|
31
|
+
*/
|
|
32
|
+
export function computeSurfaceScore(item) {
|
|
33
|
+
const raw = 0.30 * item.usage +
|
|
34
|
+
0.30 * item.verifiedRunContribution +
|
|
35
|
+
0.20 * item.evidenceContribution -
|
|
36
|
+
0.10 * item.onboardingCost -
|
|
37
|
+
0.05 * item.explainabilityCost -
|
|
38
|
+
0.05 * item.lineageRisk;
|
|
39
|
+
// Clamp to [0, 1]
|
|
40
|
+
return Math.max(0, Math.min(1, raw));
|
|
41
|
+
}
|
|
42
|
+
// ── Invariant Enforcement ───────────────────────────────────────
|
|
43
|
+
/**
|
|
44
|
+
* Validate the 5-step flow invariant against the public surface.
|
|
45
|
+
*
|
|
46
|
+
* Invariant: The public surface must contain all mandatory anchors
|
|
47
|
+
* in order: goal → dag → route → verify → replay.
|
|
48
|
+
*
|
|
49
|
+
* Returns violations as human-readable strings.
|
|
50
|
+
*/
|
|
51
|
+
export function enforceFlowInvariant(publicSurface) {
|
|
52
|
+
const violations = [];
|
|
53
|
+
const ids = publicSurface.map((s) => s.id);
|
|
54
|
+
for (const anchor of MANDATORY_ANCHORS) {
|
|
55
|
+
if (!ids.includes(anchor)) {
|
|
56
|
+
violations.push(`Missing mandatory anchor: ${anchor}`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if (violations.length === 0) {
|
|
60
|
+
const orderIndices = MANDATORY_ANCHORS.map((a) => ids.indexOf(a));
|
|
61
|
+
for (let i = 1; i < orderIndices.length; i++) {
|
|
62
|
+
if (orderIndices[i] < orderIndices[i - 1]) {
|
|
63
|
+
violations.push(`Flow order violation: ${MANDATORY_ANCHORS[i - 1]} must precede ${MANDATORY_ANCHORS[i]}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return {
|
|
68
|
+
passed: violations.length === 0,
|
|
69
|
+
violations: Object.freeze(violations),
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Compresses a candidate surface set into public (S) and hidden (H) subsets.
|
|
74
|
+
*
|
|
75
|
+
* Rules:
|
|
76
|
+
* 1. Mandatory anchors A = {goal, dag, route, verify, replay} are always in S.
|
|
77
|
+
* 2. Remaining slots are filled by highest score until budget K is reached.
|
|
78
|
+
* 3. The 5-step flow invariant is enforced and reported.
|
|
79
|
+
*/
|
|
80
|
+
export class PublicSurfaceCompressor {
|
|
81
|
+
budget;
|
|
82
|
+
scoreFn;
|
|
83
|
+
constructor(options = {}) {
|
|
84
|
+
this.budget = Math.max(MANDATORY_ANCHORS.length, options.budget ?? DEFAULT_BUDGET);
|
|
85
|
+
this.scoreFn = options.scoreFn ?? computeSurfaceScore;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Compress candidates into public surface S and hidden set H.
|
|
89
|
+
*
|
|
90
|
+
* @param candidates All candidate surface items.
|
|
91
|
+
* @returns CompressionResult with S, H, and invariant status.
|
|
92
|
+
*/
|
|
93
|
+
compress(candidates) {
|
|
94
|
+
const scored = candidates.map((item) => ({
|
|
95
|
+
...item,
|
|
96
|
+
score: this.scoreFn(item),
|
|
97
|
+
}));
|
|
98
|
+
// Partition mandatory vs elective
|
|
99
|
+
const mandatoryItems = [];
|
|
100
|
+
const electiveItems = [];
|
|
101
|
+
for (const item of scored) {
|
|
102
|
+
if (MANDATORY_ANCHORS.includes(item.id)) {
|
|
103
|
+
mandatoryItems.push(item);
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
electiveItems.push(item);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
// Ensure all mandatory anchors are present; inject placeholders if missing
|
|
110
|
+
const presentIds = new Set(mandatoryItems.map((m) => m.id));
|
|
111
|
+
for (const anchor of MANDATORY_ANCHORS) {
|
|
112
|
+
if (!presentIds.has(anchor)) {
|
|
113
|
+
mandatoryItems.push({
|
|
114
|
+
id: anchor,
|
|
115
|
+
name: anchor,
|
|
116
|
+
category: "runtime",
|
|
117
|
+
usage: 0,
|
|
118
|
+
verifiedRunContribution: 0,
|
|
119
|
+
evidenceContribution: 0,
|
|
120
|
+
onboardingCost: 0,
|
|
121
|
+
explainabilityCost: 0,
|
|
122
|
+
lineageRisk: 0,
|
|
123
|
+
score: 0,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
// Sort mandatory by canonical order, electives by score desc
|
|
128
|
+
const orderedMandatory = MANDATORY_ANCHORS.map((anchor) => mandatoryItems.find((m) => m.id === anchor));
|
|
129
|
+
electiveItems.sort((a, b) => b.score - a.score);
|
|
130
|
+
const remainingSlots = Math.max(0, this.budget - orderedMandatory.length);
|
|
131
|
+
const publicSurface = Object.freeze([
|
|
132
|
+
...orderedMandatory,
|
|
133
|
+
...electiveItems.slice(0, remainingSlots),
|
|
134
|
+
]);
|
|
135
|
+
const hiddenSet = Object.freeze(electiveItems.slice(remainingSlots));
|
|
136
|
+
const invariant = enforceFlowInvariant(publicSurface);
|
|
137
|
+
return Object.freeze({
|
|
138
|
+
publicSurface,
|
|
139
|
+
hiddenSet,
|
|
140
|
+
mandatoryAnchors: MANDATORY_ANCHORS,
|
|
141
|
+
budget: this.budget,
|
|
142
|
+
invariantPassed: invariant.passed,
|
|
143
|
+
invariantViolations: invariant.violations,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Router V2 Scoring Engine — Bayesian-smoothed evidence calibration (Algorithm 6).
|
|
3
|
+
*
|
|
4
|
+
* Composite formula:
|
|
5
|
+
* 0.25*E + 0.15*conf + 0.20*cap + 0.15*mat + 0.10*lat + 0.10*cost
|
|
6
|
+
* - 0.15*pen - 0.10*blast
|
|
7
|
+
*/
|
|
8
|
+
import type { AgentRuntime } from "./agent-runtime.js";
|
|
9
|
+
import type { EvidenceHistoryEntry, NodeIntent, RuntimeScoreV2, RouterV2Options, RouterV2ScoringEngine, BlastRadiusParams } from "./contracts/router-v2.js";
|
|
10
|
+
export declare function createRouterV2ScoringEngine(options?: RouterV2Options, blastRadiusFn?: (params: BlastRadiusParams) => number): RouterV2ScoringEngine;
|
|
11
|
+
export declare function scoreRuntimes(candidates: AgentRuntime[], intent: NodeIntent, history: EvidenceHistoryEntry[], options?: RouterV2Options): RuntimeScoreV2[];
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Router V2 Scoring Engine — Bayesian-smoothed evidence calibration (Algorithm 6).
|
|
3
|
+
*
|
|
4
|
+
* Composite formula:
|
|
5
|
+
* 0.25*E + 0.15*conf + 0.20*cap + 0.15*mat + 0.10*lat + 0.10*cost
|
|
6
|
+
* - 0.15*pen - 0.10*blast
|
|
7
|
+
*/
|
|
8
|
+
import { computeBlastRadiusPenalty } from "./blast-radius.js";
|
|
9
|
+
const ALPHA_0 = 1;
|
|
10
|
+
const BETA_0 = 1;
|
|
11
|
+
const INTENT_CAPABILITY_WEIGHTS = {
|
|
12
|
+
research: [["read", 0.35], ["review", 0.2], ["toolCalling", 0.15], ["vision", 0.1]],
|
|
13
|
+
planning: [["read", 0.3], ["review", 0.2], ["toolCalling", 0.15]],
|
|
14
|
+
coding: [["write", 0.3], ["patch", 0.25], ["shell", 0.15], ["toolCalling", 0.1]],
|
|
15
|
+
debugging: [["read", 0.2], ["write", 0.2], ["patch", 0.2], ["shell", 0.15], ["toolCalling", 0.1]],
|
|
16
|
+
refactor: [["write", 0.25], ["patch", 0.25], ["review", 0.15], ["toolCalling", 0.1]],
|
|
17
|
+
review: [["review", 0.35], ["read", 0.25], ["toolCalling", 0.1]],
|
|
18
|
+
"test-generation": [["write", 0.25], ["patch", 0.2], ["review", 0.15], ["toolCalling", 0.1]],
|
|
19
|
+
documentation: [["read", 0.25], ["write", 0.15], ["review", 0.15], ["toolCalling", 0.1]],
|
|
20
|
+
"shell-operation": [["shell", 0.4], ["read", 0.15], ["write", 0.1]],
|
|
21
|
+
};
|
|
22
|
+
function runtimeCapabilityEnabled(capabilities, capability) {
|
|
23
|
+
if (capability === "toolCalling") {
|
|
24
|
+
return capabilities.toolCalling === true || capabilities.supportsToolCalling === true;
|
|
25
|
+
}
|
|
26
|
+
if (capability === "streaming") {
|
|
27
|
+
return capabilities.streaming === true || capabilities.supportsStreaming === true;
|
|
28
|
+
}
|
|
29
|
+
return capabilities[capability] === true;
|
|
30
|
+
}
|
|
31
|
+
function computeCapabilityFit(runtime, intent) {
|
|
32
|
+
const caps = runtime.capabilities;
|
|
33
|
+
if (!caps)
|
|
34
|
+
return 0;
|
|
35
|
+
let score = 0;
|
|
36
|
+
for (const [capability, weight] of INTENT_CAPABILITY_WEIGHTS[intent]) {
|
|
37
|
+
if (runtimeCapabilityEnabled(caps, capability))
|
|
38
|
+
score += weight;
|
|
39
|
+
}
|
|
40
|
+
if (caps.maxTokens != null && caps.maxTokens > 0) {
|
|
41
|
+
score += Math.min(0.1, caps.maxTokens / 1_000_000);
|
|
42
|
+
}
|
|
43
|
+
if (caps.maxContextTokens != null && caps.maxContextTokens > 0) {
|
|
44
|
+
score += Math.min(0.1, caps.maxContextTokens / 1_000_000);
|
|
45
|
+
}
|
|
46
|
+
return score;
|
|
47
|
+
}
|
|
48
|
+
function computeMaturityScore(runtime) {
|
|
49
|
+
const caps = runtime.capabilities;
|
|
50
|
+
if (!caps)
|
|
51
|
+
return 0.5;
|
|
52
|
+
const capabilityCount = [
|
|
53
|
+
caps.read,
|
|
54
|
+
caps.write,
|
|
55
|
+
caps.shell,
|
|
56
|
+
caps.patch,
|
|
57
|
+
caps.review,
|
|
58
|
+
caps.merge,
|
|
59
|
+
caps.vision,
|
|
60
|
+
caps.mcp,
|
|
61
|
+
caps.toolCalling,
|
|
62
|
+
caps.supportsToolCalling,
|
|
63
|
+
].filter(Boolean).length;
|
|
64
|
+
const breadthScore = Math.min(1, capabilityCount / 8);
|
|
65
|
+
const priorityScore = Math.max(0, Math.min(1, runtime.priority / 100));
|
|
66
|
+
return 0.6 * breadthScore + 0.4 * priorityScore;
|
|
67
|
+
}
|
|
68
|
+
function computeLatencyScore(runtime) {
|
|
69
|
+
return runtime.capabilities?.supportsStreaming === true || runtime.capabilities?.streaming === true
|
|
70
|
+
? 0.85
|
|
71
|
+
: 0.70;
|
|
72
|
+
}
|
|
73
|
+
function computeCostScore(runtime) {
|
|
74
|
+
return runtime.priority > 50 ? 0.75 : 0.90;
|
|
75
|
+
}
|
|
76
|
+
export function createRouterV2ScoringEngine(options = {}, blastRadiusFn = computeBlastRadiusPenalty) {
|
|
77
|
+
const { enableBlastRadius = false, blastRadiusParams = { downstreamNodeCount: 0, affectedFileCount: 0, hasGlobalSideEffects: false }, } = options;
|
|
78
|
+
function score(runtime, intent, history) {
|
|
79
|
+
const runtimeHistory = history.filter((e) => e.runtime === runtime.id);
|
|
80
|
+
const totalAttempts = runtimeHistory.length;
|
|
81
|
+
const passedAttempts = runtimeHistory.filter((e) => e.passed).length;
|
|
82
|
+
// Bayesian smoothing with α₀=1, β₀=1
|
|
83
|
+
const bayesianEvidenceScore = (ALPHA_0 + passedAttempts) / (ALPHA_0 + BETA_0 + totalAttempts);
|
|
84
|
+
// Confidence increases with sample size (asymptotic toward 1)
|
|
85
|
+
const confidence = Math.min(1, totalAttempts / 10 + 0.1);
|
|
86
|
+
const recentFailures = runtimeHistory
|
|
87
|
+
.filter((e) => !e.passed)
|
|
88
|
+
.sort((a, b) => b.timestamp.localeCompare(a.timestamp))
|
|
89
|
+
.slice(0, 5);
|
|
90
|
+
const recentFailurePenalty = Math.min(0.3, recentFailures.length * 0.06);
|
|
91
|
+
const capabilityFit = computeCapabilityFit(runtime, intent);
|
|
92
|
+
const maturityScore = computeMaturityScore(runtime);
|
|
93
|
+
const latencyScore = computeLatencyScore(runtime);
|
|
94
|
+
const costScore = computeCostScore(runtime);
|
|
95
|
+
const blastRadiusPenalty = enableBlastRadius ? blastRadiusFn(blastRadiusParams) : 0;
|
|
96
|
+
const composite = 0.25 * bayesianEvidenceScore +
|
|
97
|
+
0.15 * confidence +
|
|
98
|
+
0.20 * capabilityFit +
|
|
99
|
+
0.15 * maturityScore +
|
|
100
|
+
0.10 * latencyScore +
|
|
101
|
+
0.10 * costScore -
|
|
102
|
+
0.15 * recentFailurePenalty -
|
|
103
|
+
0.10 * blastRadiusPenalty;
|
|
104
|
+
return {
|
|
105
|
+
runtimeId: runtime.id,
|
|
106
|
+
bayesianEvidenceScore,
|
|
107
|
+
confidence,
|
|
108
|
+
capabilityFit,
|
|
109
|
+
maturityScore,
|
|
110
|
+
latencyScore,
|
|
111
|
+
costScore,
|
|
112
|
+
recentFailurePenalty,
|
|
113
|
+
blastRadiusPenalty,
|
|
114
|
+
composite,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
function select(candidates, intent, history) {
|
|
118
|
+
const scored = candidates.map((runtime) => ({
|
|
119
|
+
runtime,
|
|
120
|
+
score: score(runtime, intent, history),
|
|
121
|
+
}));
|
|
122
|
+
scored.sort((a, b) => b.score.composite - a.score.composite);
|
|
123
|
+
const primary = scored[0].runtime;
|
|
124
|
+
const fallbacks = scored.slice(1).map((s) => s.runtime);
|
|
125
|
+
const bestScore = scored[0].score;
|
|
126
|
+
const reason = [
|
|
127
|
+
`intent=${intent}`,
|
|
128
|
+
`bayesianE=${bestScore.bayesianEvidenceScore.toFixed(2)}`,
|
|
129
|
+
`confidence=${bestScore.confidence.toFixed(2)}`,
|
|
130
|
+
`capability=${bestScore.capabilityFit.toFixed(2)}`,
|
|
131
|
+
`maturity=${bestScore.maturityScore.toFixed(2)}`,
|
|
132
|
+
`latency=${bestScore.latencyScore.toFixed(2)}`,
|
|
133
|
+
`cost=${bestScore.costScore.toFixed(2)}`,
|
|
134
|
+
`penalty=${bestScore.recentFailurePenalty.toFixed(2)}`,
|
|
135
|
+
`blast=${bestScore.blastRadiusPenalty.toFixed(2)}`,
|
|
136
|
+
`composite=${bestScore.composite.toFixed(3)}`,
|
|
137
|
+
].join("; ");
|
|
138
|
+
return {
|
|
139
|
+
runtime: primary,
|
|
140
|
+
reason,
|
|
141
|
+
fallbacks,
|
|
142
|
+
intent,
|
|
143
|
+
scores: scored.map((s) => s.score),
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
return { score, select };
|
|
147
|
+
}
|
|
148
|
+
export function scoreRuntimes(candidates, intent, history, options = {}) {
|
|
149
|
+
const engine = createRouterV2ScoringEngine(options);
|
|
150
|
+
return candidates.map((runtime) => engine.score(runtime, intent, history));
|
|
151
|
+
}
|
|
@@ -14,6 +14,7 @@ import { applyTaskRunContextToAgentTask, envFromWorkerManifest } from "./worker-
|
|
|
14
14
|
import { createRuntimeRegistry } from "./runtime-registry.js";
|
|
15
15
|
import { createRuntimeRouter } from "./runtime-router.js";
|
|
16
16
|
import { createContextBroker } from "./context-broker.js";
|
|
17
|
+
import { maybeCompactWithHeadroom } from "./headroom-policy.js";
|
|
17
18
|
import { DeepSeekRuntime } from "./deepseek-runtime.js";
|
|
18
19
|
import { CodexRuntime } from "./codex-runtime.js";
|
|
19
20
|
import { createOpencodeCliAdapter } from "../adapters/opencode/opencode-cli-adapter.js";
|
|
@@ -128,7 +129,14 @@ export async function createRuntimeBackedTaskRunner(options) {
|
|
|
128
129
|
startedAt: new Date().toISOString(),
|
|
129
130
|
}
|
|
130
131
|
: undefined;
|
|
131
|
-
const { capsule } = await contextBroker.buildCapsule(node, runState);
|
|
132
|
+
const { capsule, headroomDecision } = await contextBroker.buildCapsule(node, runState);
|
|
133
|
+
// CTX guard: compact via headroom before the context window crosses the threshold (~90%).
|
|
134
|
+
if (headroomDecision?.shouldCompact) {
|
|
135
|
+
await maybeCompactWithHeadroom({
|
|
136
|
+
decision: headroomDecision,
|
|
137
|
+
text: JSON.stringify(capsule),
|
|
138
|
+
}).catch(() => undefined);
|
|
139
|
+
}
|
|
132
140
|
const routing = capsule.node.routing;
|
|
133
141
|
const providerFallbackChain = options.fallbackChain
|
|
134
142
|
?? (routing?.fallbackProvider ? [routing.fallbackProvider] : []);
|
|
@@ -1,8 +1,64 @@
|
|
|
1
1
|
import type { OmkToolCall, OmkToolDefinition } from "./tool-registry-contract.js";
|
|
2
|
+
import { type ToolAuthorityDecision, type ToolOp } from "../safety/tool-authority-gate.js";
|
|
3
|
+
import type { ProviderAuthorityLevel } from "../contracts/provider-health.js";
|
|
2
4
|
export interface ToolDispatchResult<R = unknown> {
|
|
3
5
|
readonly call: OmkToolCall;
|
|
4
6
|
readonly status: "fulfilled" | "rejected";
|
|
5
7
|
readonly value?: R;
|
|
6
8
|
readonly reason?: unknown;
|
|
7
9
|
}
|
|
8
|
-
|
|
10
|
+
/** Shadow = record only; enforce = a block/ask(non-TTY) verdict rejects the call. */
|
|
11
|
+
export type ToolAuthorityMode = "shadow" | "enforce";
|
|
12
|
+
/**
|
|
13
|
+
* Per-call verdict recorded at the dispatch checkpoint. Carries only coarse,
|
|
14
|
+
* non-secret signals (op class, authority levels, policy) — never tool args.
|
|
15
|
+
*/
|
|
16
|
+
export interface ToolAuthorityDecisionRecord {
|
|
17
|
+
readonly toolName: string;
|
|
18
|
+
readonly op: ToolOp;
|
|
19
|
+
readonly decision: ToolAuthorityDecision;
|
|
20
|
+
readonly mode: ToolAuthorityMode;
|
|
21
|
+
/** True only when the verdict actually rejected the call (enforce + block). */
|
|
22
|
+
readonly enforced: boolean;
|
|
23
|
+
/** Redacted, human-readable reason. Never includes args or secret values. */
|
|
24
|
+
readonly reason: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Authority wiring for one dispatch turn. All inputs are non-secret enum/bool
|
|
28
|
+
* signals. When omitted from {@link dispatchToolCallsByContract}, dispatch is
|
|
29
|
+
* byte-identical to the ungated path.
|
|
30
|
+
*/
|
|
31
|
+
export interface ToolAuthorityWiring {
|
|
32
|
+
readonly writeAuthority: ProviderAuthorityLevel;
|
|
33
|
+
readonly shellAuthority: ProviderAuthorityLevel;
|
|
34
|
+
readonly approvalPolicy: "interactive" | "auto" | "yolo" | "block";
|
|
35
|
+
readonly sandboxMode: "read-only" | "workspace-write";
|
|
36
|
+
readonly tty: boolean;
|
|
37
|
+
/**
|
|
38
|
+
* Enforcement opt-in. Default `false` => shadow mode (zero behavior change):
|
|
39
|
+
* verdicts are computed and recorded but never block. When `true`, a `block`
|
|
40
|
+
* verdict (and `ask` in a non-TTY context, fail-closed) rejects the call.
|
|
41
|
+
*/
|
|
42
|
+
readonly enforce?: boolean;
|
|
43
|
+
/** Optional sink for computed verdicts (invoked in both shadow and enforce). */
|
|
44
|
+
readonly onDecision?: (record: ToolAuthorityDecisionRecord) => void;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Resolve the global enforcement opt-in from the environment. Default OFF means
|
|
48
|
+
* the gate runs in shadow mode (record only). Set `OMK_TOOL_AUTHORITY_ENFORCE=1`
|
|
49
|
+
* to enable fail-closed enforcement at the dispatch checkpoint.
|
|
50
|
+
*/
|
|
51
|
+
export declare function resolveToolAuthorityEnforcement(env?: Record<string, string | undefined>): boolean;
|
|
52
|
+
/** Error used to reject a tool call rejected by the authority gate (enforce mode). */
|
|
53
|
+
export declare class ToolAuthorityBlockedError extends Error {
|
|
54
|
+
readonly toolName: string;
|
|
55
|
+
readonly op: ToolOp;
|
|
56
|
+
readonly decision: ToolAuthorityDecision;
|
|
57
|
+
constructor(record: ToolAuthorityDecisionRecord);
|
|
58
|
+
}
|
|
59
|
+
/** Compute the gate verdict for a single call. Pure (no IO, no env reads). */
|
|
60
|
+
export declare function evaluateToolAuthority(toolName: string, wiring: ToolAuthorityWiring): {
|
|
61
|
+
readonly record: ToolAuthorityDecisionRecord;
|
|
62
|
+
readonly blocked: boolean;
|
|
63
|
+
};
|
|
64
|
+
export declare function dispatchToolCallsByContract<A, R>(calls: readonly OmkToolCall<A>[], registry: ReadonlyMap<string, OmkToolDefinition<A, R>>, dispatchOne: (call: OmkToolCall<A>) => Promise<R>, authority?: ToolAuthorityWiring): Promise<ToolDispatchResult<R>[]>;
|