open-multi-agent-kit 0.78.1 → 0.78.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/MATURITY.md +4 -0
- package/README.md +70 -1
- package/dist/benchmark/contracts.d.ts +116 -0
- package/dist/benchmark/contracts.js +6 -0
- package/dist/benchmark/fixtures.d.ts +11 -0
- package/dist/benchmark/fixtures.js +121 -0
- package/dist/benchmark/harness.d.ts +13 -0
- package/dist/benchmark/harness.js +191 -0
- package/dist/benchmark/shadow-mode.d.ts +17 -0
- package/dist/benchmark/shadow-mode.js +96 -0
- package/dist/cli/register-spec-agent-goal-commands.js +45 -0
- package/dist/cli/release-promotion-gate.d.ts +14 -0
- package/dist/cli/release-promotion-gate.js +71 -0
- package/dist/cli/v2/release-commands.d.ts +29 -0
- package/dist/cli/v2/release-commands.js +95 -0
- package/dist/commands/chat/native-root-loop.js +14 -1
- package/dist/commands/chat/slash/commands/session.js +19 -1
- package/dist/commands/goal-interview.d.ts +18 -0
- package/dist/commands/goal-interview.js +396 -0
- package/dist/commands/merge.js +102 -56
- package/dist/contracts/interview.d.ts +106 -0
- package/dist/contracts/interview.js +9 -0
- package/dist/contracts/provider-health.d.ts +37 -0
- package/dist/contracts/provider-health.js +49 -1
- package/dist/evidence/evidence-trust-score.d.ts +101 -0
- package/dist/evidence/evidence-trust-score.js +408 -0
- package/dist/evidence/index.d.ts +6 -0
- package/dist/evidence/index.js +3 -0
- package/dist/evidence/proof-trust-cli.d.ts +8 -0
- package/dist/evidence/proof-trust-cli.js +27 -0
- package/dist/evidence/proof-trust.d.ts +14 -0
- package/dist/evidence/proof-trust.js +381 -0
- package/dist/evidence/regression-proof-matrix.d.ts +42 -0
- package/dist/evidence/regression-proof-matrix.js +72 -0
- package/dist/goal/intent-frame.d.ts +6 -0
- package/dist/goal/intent-frame.js +21 -9
- package/dist/goal/interview-assimilation.d.ts +13 -0
- package/dist/goal/interview-assimilation.js +383 -0
- package/dist/goal/interview-question-bank.d.ts +11 -0
- package/dist/goal/interview-question-bank.js +225 -0
- package/dist/goal/interview-scoring.d.ts +31 -0
- package/dist/goal/interview-scoring.js +187 -0
- package/dist/goal/interview-session.d.ts +25 -0
- package/dist/goal/interview-session.js +116 -0
- package/dist/input/input-envelope.d.ts +22 -0
- package/dist/input/input-envelope.js +1 -0
- package/dist/orchestration/merge-arbiter.d.ts +91 -0
- package/dist/orchestration/merge-arbiter.js +376 -0
- package/dist/providers/health.d.ts +3 -0
- package/dist/providers/health.js +46 -0
- package/dist/providers/index.d.ts +1 -0
- package/dist/providers/index.js +1 -0
- package/dist/providers/provider-health.d.ts +8 -1
- package/dist/providers/provider-health.js +39 -0
- package/dist/providers/provider-task-runner.js +31 -0
- package/dist/providers/provider.d.ts +2 -0
- package/dist/providers/router.js +87 -3
- package/dist/providers/types.d.ts +4 -0
- package/dist/runtime/advanced-control-loop.d.ts +60 -0
- package/dist/runtime/advanced-control-loop.js +136 -0
- package/dist/runtime/agent-runtime.d.ts +10 -0
- package/dist/runtime/blast-radius.d.ts +10 -0
- package/dist/runtime/blast-radius.js +14 -0
- package/dist/runtime/contracts/evidence.d.ts +87 -0
- package/dist/runtime/contracts/evidence.js +7 -0
- package/dist/runtime/contracts/router-v2.d.ts +44 -0
- package/dist/runtime/contracts/router-v2.js +4 -0
- package/dist/runtime/contracts/weakness-remediation.d.ts +67 -0
- package/dist/runtime/contracts/weakness-remediation.js +36 -0
- package/dist/runtime/kimi-api-runtime.js +59 -1
- package/dist/runtime/proof-bundle-trust.d.ts +74 -0
- package/dist/runtime/proof-bundle-trust.js +100 -0
- package/dist/runtime/provider-maturity-gate.d.ts +43 -0
- package/dist/runtime/provider-maturity-gate.js +129 -0
- package/dist/runtime/public-surface.d.ts +93 -0
- package/dist/runtime/public-surface.js +146 -0
- package/dist/runtime/router-v2-scoring.d.ts +11 -0
- package/dist/runtime/router-v2-scoring.js +151 -0
- package/dist/runtime/tool-dispatch-contracts.d.ts +24 -3
- package/dist/runtime/tool-dispatch-contracts.js +42 -2
- package/dist/runtime/weakness-remediation-index.d.ts +27 -0
- package/dist/runtime/weakness-remediation-index.js +37 -0
- package/dist/safety/enforcement-engine.d.ts +89 -0
- package/dist/safety/enforcement-engine.js +279 -0
- package/dist/safety/tool-authority-gate.d.ts +40 -0
- package/dist/safety/tool-authority-gate.js +92 -0
- package/dist/schema/evidence.schema.d.ts +2 -2
- package/dist/schema/proof-bundle.schema.d.ts +28 -28
- package/dist/util/clipboard-image.d.ts +49 -0
- package/dist/util/clipboard-image.js +263 -0
- package/docs/2026-06-09/critical-issues.md +20 -0
- package/docs/2026-06-09/improvements.md +14 -0
- package/docs/2026-06-09/init-checklist.md +25 -0
- package/docs/2026-06-09/plan.md +20 -0
- package/docs/benchmark-design.md +122 -0
- package/docs/github-organic-promotion.md +127 -0
- package/docs/native-root-runtime-algorithms.md +301 -0
- package/package.json +8 -4
- package/readmeasset/ASSET_INDEX.md +1 -0
- package/templates/skills/agents/omk-agent-reach-websearch/SKILL.md +55 -0
- package/templates/skills/kimi/omk-agent-reach-websearch/SKILL.md +55 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proof Trust MVP — Algorithm 3
|
|
3
|
+
*
|
|
4
|
+
* Evaluates a proof bundle against a run directory, computing a trust score
|
|
5
|
+
* based on weighted field presence and validity.
|
|
6
|
+
*/
|
|
7
|
+
export interface ProofTrustMvpEngine {
|
|
8
|
+
evaluate(runDir: string, bundle: unknown): Promise<ProofTrustResult>;
|
|
9
|
+
}
|
|
10
|
+
export interface ProofTrustResult {
|
|
11
|
+
readonly trustScore: number;
|
|
12
|
+
readonly missingFields: readonly string[];
|
|
13
|
+
}
|
|
14
|
+
export declare function createProofTrustMvpEngine(schemas?: Readonly<Record<string, unknown>>): ProofTrustMvpEngine;
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proof Trust MVP — Algorithm 3
|
|
3
|
+
*
|
|
4
|
+
* Evaluates a proof bundle against a run directory, computing a trust score
|
|
5
|
+
* based on weighted field presence and validity.
|
|
6
|
+
*/
|
|
7
|
+
import { createHash } from "node:crypto";
|
|
8
|
+
import { existsSync } from "node:fs";
|
|
9
|
+
import { readFile, readdir, stat } from "node:fs/promises";
|
|
10
|
+
import { isAbsolute, join, relative } from "node:path";
|
|
11
|
+
// ─── Constants ─────────────────────────────────────────────────────────────
|
|
12
|
+
const EXPECTED_SCHEMA_VERSION = "omk.proof-bundle.v1";
|
|
13
|
+
const WEIGHTS = {
|
|
14
|
+
schema: 0.15,
|
|
15
|
+
commands: 0.15,
|
|
16
|
+
stdout: 0.10,
|
|
17
|
+
hashes: 0.15,
|
|
18
|
+
decisions: 0.15,
|
|
19
|
+
evidence: 0.15,
|
|
20
|
+
limitations: 0.05,
|
|
21
|
+
replay: 0.10,
|
|
22
|
+
};
|
|
23
|
+
const FIELD_TO_MISSING = {
|
|
24
|
+
schema: "schema",
|
|
25
|
+
commands: "commands",
|
|
26
|
+
stdout: "stdout",
|
|
27
|
+
hashes: "artifact-hashes",
|
|
28
|
+
decisions: "decision-trace",
|
|
29
|
+
evidence: "weak-evidence",
|
|
30
|
+
limitations: "limitations",
|
|
31
|
+
replay: "replay-or-inspect",
|
|
32
|
+
};
|
|
33
|
+
// ─── Factory ───────────────────────────────────────────────────────────────
|
|
34
|
+
export function createProofTrustMvpEngine(schemas) {
|
|
35
|
+
return {
|
|
36
|
+
async evaluate(runDir, bundle) {
|
|
37
|
+
const missingFields = [];
|
|
38
|
+
const root = process.cwd();
|
|
39
|
+
// ── helpers ─────────────────────────────────────────────────
|
|
40
|
+
function isObject(value) {
|
|
41
|
+
return value !== null && typeof value === "object" && !Array.isArray(value);
|
|
42
|
+
}
|
|
43
|
+
function isNonEmptyString(value) {
|
|
44
|
+
return typeof value === "string" && value.length > 0;
|
|
45
|
+
}
|
|
46
|
+
function resolveRepoPath(path) {
|
|
47
|
+
if (!isNonEmptyString(path))
|
|
48
|
+
return undefined;
|
|
49
|
+
if (isAbsolute(path))
|
|
50
|
+
return undefined;
|
|
51
|
+
if (path.split(/[\\/]+/).includes(".."))
|
|
52
|
+
return undefined;
|
|
53
|
+
const absolute = join(root, path);
|
|
54
|
+
const back = relative(root, absolute);
|
|
55
|
+
if (back.startsWith("..") || isAbsolute(back))
|
|
56
|
+
return undefined;
|
|
57
|
+
return absolute;
|
|
58
|
+
}
|
|
59
|
+
async function digestFile(filePath) {
|
|
60
|
+
return createHash("sha256").update(await readFile(filePath)).digest("hex");
|
|
61
|
+
}
|
|
62
|
+
function markMissing(field) {
|
|
63
|
+
missingFields.push(FIELD_TO_MISSING[field]);
|
|
64
|
+
}
|
|
65
|
+
// ── schema check ────────────────────────────────────────────
|
|
66
|
+
{
|
|
67
|
+
let valid = true;
|
|
68
|
+
if (!isObject(bundle)) {
|
|
69
|
+
valid = false;
|
|
70
|
+
}
|
|
71
|
+
else if (bundle.schemaVersion !== EXPECTED_SCHEMA_VERSION) {
|
|
72
|
+
valid = false;
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
const required = [
|
|
76
|
+
"proofId",
|
|
77
|
+
"title",
|
|
78
|
+
"omkVersion",
|
|
79
|
+
"runtimeVersion",
|
|
80
|
+
"commit",
|
|
81
|
+
"runId",
|
|
82
|
+
"providerPolicy",
|
|
83
|
+
"scenario",
|
|
84
|
+
"files",
|
|
85
|
+
"verdict",
|
|
86
|
+
"knownLimitations",
|
|
87
|
+
"checksums",
|
|
88
|
+
];
|
|
89
|
+
for (const field of required) {
|
|
90
|
+
if (!(field in bundle)) {
|
|
91
|
+
valid = false;
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (valid && schemas && EXPECTED_SCHEMA_VERSION in schemas) {
|
|
96
|
+
// Future: JSON Schema validation. MVP checks version only.
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (!valid)
|
|
100
|
+
markMissing("schema");
|
|
101
|
+
}
|
|
102
|
+
// ── commands check ──────────────────────────────────────────
|
|
103
|
+
{
|
|
104
|
+
let hasCommands = false;
|
|
105
|
+
if (isObject(bundle) && isObject(bundle.files)) {
|
|
106
|
+
const commandsPath = bundle.files.commands;
|
|
107
|
+
if (isNonEmptyString(commandsPath)) {
|
|
108
|
+
const resolved = resolveRepoPath(commandsPath);
|
|
109
|
+
if (resolved) {
|
|
110
|
+
try {
|
|
111
|
+
const content = await readFile(resolved, "utf8");
|
|
112
|
+
if (content.trim().length > 0)
|
|
113
|
+
hasCommands = true;
|
|
114
|
+
}
|
|
115
|
+
catch { /* ignore */ }
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
if (!hasCommands) {
|
|
120
|
+
const fallback = join(runDir, "commands.sh");
|
|
121
|
+
if (existsSync(fallback)) {
|
|
122
|
+
try {
|
|
123
|
+
const content = await readFile(fallback, "utf8");
|
|
124
|
+
if (content.trim().length > 0)
|
|
125
|
+
hasCommands = true;
|
|
126
|
+
}
|
|
127
|
+
catch { /* ignore */ }
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (!hasCommands)
|
|
131
|
+
markMissing("commands");
|
|
132
|
+
}
|
|
133
|
+
// ── stdout check ────────────────────────────────────────────
|
|
134
|
+
{
|
|
135
|
+
let hasStdout = false;
|
|
136
|
+
if (isObject(bundle) && isObject(bundle.files)) {
|
|
137
|
+
const stdoutPath = bundle.files.stdout;
|
|
138
|
+
if (isNonEmptyString(stdoutPath)) {
|
|
139
|
+
const resolved = resolveRepoPath(stdoutPath);
|
|
140
|
+
if (resolved) {
|
|
141
|
+
try {
|
|
142
|
+
const st = await stat(resolved);
|
|
143
|
+
if (st.size > 0)
|
|
144
|
+
hasStdout = true;
|
|
145
|
+
}
|
|
146
|
+
catch { /* ignore */ }
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
if (!hasStdout) {
|
|
151
|
+
try {
|
|
152
|
+
const entries = await readdir(runDir);
|
|
153
|
+
if (entries.some((e) => e.endsWith(".out")))
|
|
154
|
+
hasStdout = true;
|
|
155
|
+
}
|
|
156
|
+
catch { /* ignore */ }
|
|
157
|
+
}
|
|
158
|
+
if (!hasStdout)
|
|
159
|
+
markMissing("stdout");
|
|
160
|
+
}
|
|
161
|
+
// ── artifact-hashes check ───────────────────────────────────
|
|
162
|
+
{
|
|
163
|
+
let valid = true;
|
|
164
|
+
if (isObject(bundle) && isObject(bundle.checksums) && isObject(bundle.files)) {
|
|
165
|
+
const checksums = bundle.checksums;
|
|
166
|
+
const files = bundle.files;
|
|
167
|
+
for (const [, path] of Object.entries(files)) {
|
|
168
|
+
if (!isNonEmptyString(path))
|
|
169
|
+
continue;
|
|
170
|
+
const resolved = resolveRepoPath(path);
|
|
171
|
+
if (!resolved || !existsSync(resolved)) {
|
|
172
|
+
valid = false;
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
175
|
+
const expected = checksums[path];
|
|
176
|
+
if (!isNonEmptyString(expected)) {
|
|
177
|
+
valid = false;
|
|
178
|
+
break;
|
|
179
|
+
}
|
|
180
|
+
try {
|
|
181
|
+
const actual = await digestFile(resolved);
|
|
182
|
+
if (actual !== expected) {
|
|
183
|
+
valid = false;
|
|
184
|
+
break;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
catch {
|
|
188
|
+
valid = false;
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
if (valid) {
|
|
193
|
+
const fileValues = Object.values(files).filter((v) => isNonEmptyString(v));
|
|
194
|
+
for (const checksumPath of Object.keys(checksums)) {
|
|
195
|
+
if (!fileValues.includes(checksumPath)) {
|
|
196
|
+
valid = false;
|
|
197
|
+
break;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
valid = false;
|
|
204
|
+
}
|
|
205
|
+
if (!valid)
|
|
206
|
+
markMissing("hashes");
|
|
207
|
+
}
|
|
208
|
+
// ── decision-trace check ────────────────────────────────────
|
|
209
|
+
{
|
|
210
|
+
let complete = false;
|
|
211
|
+
if (isObject(bundle) && isObject(bundle.files)) {
|
|
212
|
+
const decisionsPath = bundle.files.decisionsJsonl;
|
|
213
|
+
if (isNonEmptyString(decisionsPath)) {
|
|
214
|
+
const resolved = resolveRepoPath(decisionsPath);
|
|
215
|
+
if (resolved && existsSync(resolved)) {
|
|
216
|
+
try {
|
|
217
|
+
const content = await readFile(resolved, "utf8");
|
|
218
|
+
const lines = content.split(/\r?\n/).filter((l) => l.trim().length > 0);
|
|
219
|
+
if (lines.length > 0) {
|
|
220
|
+
complete = lines.every((line) => {
|
|
221
|
+
try {
|
|
222
|
+
const parsed = JSON.parse(line);
|
|
223
|
+
return isObject(parsed) && parsed.schemaVersion === "omk.decision.v1";
|
|
224
|
+
}
|
|
225
|
+
catch {
|
|
226
|
+
return false;
|
|
227
|
+
}
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
catch { /* ignore */ }
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
if (!complete) {
|
|
236
|
+
const fallback = join(runDir, "decisions.jsonl");
|
|
237
|
+
if (existsSync(fallback)) {
|
|
238
|
+
try {
|
|
239
|
+
const content = await readFile(fallback, "utf8");
|
|
240
|
+
const lines = content.split(/\r?\n/).filter((l) => l.trim().length > 0);
|
|
241
|
+
if (lines.length > 0) {
|
|
242
|
+
complete = lines.every((line) => {
|
|
243
|
+
try {
|
|
244
|
+
const parsed = JSON.parse(line);
|
|
245
|
+
return isObject(parsed) && parsed.schemaVersion === "omk.decision.v1";
|
|
246
|
+
}
|
|
247
|
+
catch {
|
|
248
|
+
return false;
|
|
249
|
+
}
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
catch { /* ignore */ }
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
if (!complete)
|
|
257
|
+
markMissing("decisions");
|
|
258
|
+
}
|
|
259
|
+
// ── evidence check ──────────────────────────────────────────
|
|
260
|
+
{
|
|
261
|
+
const records = [];
|
|
262
|
+
if (isObject(bundle) && isObject(bundle.files)) {
|
|
263
|
+
const evidencePath = bundle.files.evidenceJsonl;
|
|
264
|
+
if (isNonEmptyString(evidencePath)) {
|
|
265
|
+
const resolved = resolveRepoPath(evidencePath);
|
|
266
|
+
if (resolved && existsSync(resolved)) {
|
|
267
|
+
try {
|
|
268
|
+
const content = await readFile(resolved, "utf8");
|
|
269
|
+
const lines = content.split(/\r?\n/).filter((l) => l.trim().length > 0);
|
|
270
|
+
for (const line of lines) {
|
|
271
|
+
try {
|
|
272
|
+
const parsed = JSON.parse(line);
|
|
273
|
+
if (isObject(parsed))
|
|
274
|
+
records.push(parsed);
|
|
275
|
+
}
|
|
276
|
+
catch { /* ignore */ }
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
catch { /* ignore */ }
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
if (records.length === 0) {
|
|
284
|
+
const fallback = join(runDir, "evidence.jsonl");
|
|
285
|
+
if (existsSync(fallback)) {
|
|
286
|
+
try {
|
|
287
|
+
const content = await readFile(fallback, "utf8");
|
|
288
|
+
const lines = content.split(/\r?\n/).filter((l) => l.trim().length > 0);
|
|
289
|
+
for (const line of lines) {
|
|
290
|
+
try {
|
|
291
|
+
const parsed = JSON.parse(line);
|
|
292
|
+
if (isObject(parsed))
|
|
293
|
+
records.push(parsed);
|
|
294
|
+
}
|
|
295
|
+
catch { /* ignore */ }
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
catch { /* ignore */ }
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
let avg;
|
|
302
|
+
if (records.length === 0) {
|
|
303
|
+
avg = 0;
|
|
304
|
+
}
|
|
305
|
+
else {
|
|
306
|
+
const passCount = records.filter((r) => r.status === "passed").length;
|
|
307
|
+
avg = passCount / records.length;
|
|
308
|
+
}
|
|
309
|
+
if (avg < 0.75)
|
|
310
|
+
markMissing("evidence");
|
|
311
|
+
}
|
|
312
|
+
// ── limitations check ───────────────────────────────────────
|
|
313
|
+
{
|
|
314
|
+
let hasLimitations = false;
|
|
315
|
+
if (isObject(bundle) && isObject(bundle.files)) {
|
|
316
|
+
const limitationsPath = bundle.files.limitations;
|
|
317
|
+
if (isNonEmptyString(limitationsPath)) {
|
|
318
|
+
const resolved = resolveRepoPath(limitationsPath);
|
|
319
|
+
if (resolved) {
|
|
320
|
+
try {
|
|
321
|
+
const content = await readFile(resolved, "utf8");
|
|
322
|
+
if (content.trim().length > 0)
|
|
323
|
+
hasLimitations = true;
|
|
324
|
+
}
|
|
325
|
+
catch { /* ignore */ }
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
if (!hasLimitations) {
|
|
330
|
+
const fallback = join(runDir, "limitations.md");
|
|
331
|
+
if (existsSync(fallback)) {
|
|
332
|
+
try {
|
|
333
|
+
const content = await readFile(fallback, "utf8");
|
|
334
|
+
if (content.trim().length > 0)
|
|
335
|
+
hasLimitations = true;
|
|
336
|
+
}
|
|
337
|
+
catch { /* ignore */ }
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
if (!hasLimitations)
|
|
341
|
+
markMissing("limitations");
|
|
342
|
+
}
|
|
343
|
+
// ── replay-or-inspect check ─────────────────────────────────
|
|
344
|
+
{
|
|
345
|
+
let hasReplayOrInspect = false;
|
|
346
|
+
if (isObject(bundle) && isObject(bundle.files)) {
|
|
347
|
+
const replayPath = bundle.files.replay;
|
|
348
|
+
const inspectPath = bundle.files.inspectJson;
|
|
349
|
+
for (const p of [replayPath, inspectPath]) {
|
|
350
|
+
if (isNonEmptyString(p)) {
|
|
351
|
+
const resolved = resolveRepoPath(p);
|
|
352
|
+
if (resolved && existsSync(resolved)) {
|
|
353
|
+
hasReplayOrInspect = true;
|
|
354
|
+
break;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
if (!hasReplayOrInspect) {
|
|
360
|
+
if (existsSync(join(runDir, "replay.json")) || existsSync(join(runDir, "inspect.json"))) {
|
|
361
|
+
hasReplayOrInspect = true;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
if (!hasReplayOrInspect)
|
|
365
|
+
markMissing("replay");
|
|
366
|
+
}
|
|
367
|
+
// ── compute trust score ─────────────────────────────────────
|
|
368
|
+
const missingSet = new Set(missingFields);
|
|
369
|
+
let trustScore = 0;
|
|
370
|
+
for (const [key, weight] of Object.entries(WEIGHTS)) {
|
|
371
|
+
if (!missingSet.has(FIELD_TO_MISSING[key])) {
|
|
372
|
+
trustScore += weight;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return {
|
|
376
|
+
trustScore: Math.round(trustScore * 100) / 100,
|
|
377
|
+
missingFields: Object.freeze(missingFields),
|
|
378
|
+
};
|
|
379
|
+
},
|
|
380
|
+
};
|
|
381
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression Proof Matrix — Algorithm 9
|
|
3
|
+
*
|
|
4
|
+
* Verifies that Algorithms 1~8 are alive via tests, proof bundles,
|
|
5
|
+
* decision traces, and CLI surfaces.
|
|
6
|
+
*/
|
|
7
|
+
import type { ProofTrustResult } from "./proof-trust.js";
|
|
8
|
+
/** Specification for a single algorithm in the regression matrix. */
|
|
9
|
+
export interface AlgorithmSpec {
|
|
10
|
+
readonly name: string;
|
|
11
|
+
readonly tests: number;
|
|
12
|
+
readonly proofBundles: number;
|
|
13
|
+
readonly decisionTraces: number;
|
|
14
|
+
readonly cliSurface: "reachable" | "unreachable";
|
|
15
|
+
}
|
|
16
|
+
/** Release candidate global gate inputs. */
|
|
17
|
+
export interface ReleaseCandidate {
|
|
18
|
+
readonly medianProofTrust: number;
|
|
19
|
+
readonly routerShadowSafety: number;
|
|
20
|
+
readonly providerAuthorityInvariant: number;
|
|
21
|
+
readonly minimalVerifiedDemo: number;
|
|
22
|
+
}
|
|
23
|
+
/** Result of evaluating the regression proof matrix. */
|
|
24
|
+
export interface RegressionProofMatrixResult {
|
|
25
|
+
readonly verdict: "pass" | "fail";
|
|
26
|
+
readonly coverageByAlgorithm: Readonly<Record<string, number>>;
|
|
27
|
+
readonly reasons: readonly string[];
|
|
28
|
+
}
|
|
29
|
+
/** Engine interface. */
|
|
30
|
+
export interface RegressionProofMatrixEngine {
|
|
31
|
+
evaluate(algorithmSet: readonly AlgorithmSpec[], testSuite: {
|
|
32
|
+
readonly testsByAlgorithm: Readonly<Record<string, number>>;
|
|
33
|
+
}, proofBundles: ReadonlyArray<Readonly<{
|
|
34
|
+
algorithm: string;
|
|
35
|
+
} & Partial<ProofTrustResult>>>, releaseCandidate: ReleaseCandidate): RegressionProofMatrixResult;
|
|
36
|
+
}
|
|
37
|
+
/** Optional configuration for the engine factory. */
|
|
38
|
+
export interface RegressionProofMatrixOptions {
|
|
39
|
+
readonly coverageThreshold?: number;
|
|
40
|
+
readonly proofTrustThreshold?: number;
|
|
41
|
+
}
|
|
42
|
+
export declare function createRegressionProofMatrixEngine(options?: RegressionProofMatrixOptions): RegressionProofMatrixEngine;
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression Proof Matrix — Algorithm 9
|
|
3
|
+
*
|
|
4
|
+
* Verifies that Algorithms 1~8 are alive via tests, proof bundles,
|
|
5
|
+
* decision traces, and CLI surfaces.
|
|
6
|
+
*/
|
|
7
|
+
import { TAU_EVIDENCE } from "../runtime/contracts/weakness-remediation.js";
|
|
8
|
+
// ─── Constants ─────────────────────────────────────────────────────────────
|
|
9
|
+
const DEFAULT_COVERAGE_THRESHOLD = TAU_EVIDENCE; // 0.75
|
|
10
|
+
const DEFAULT_PROOF_TRUST_THRESHOLD = TAU_EVIDENCE; // 0.75
|
|
11
|
+
// ─── Factory ───────────────────────────────────────────────────────────────
|
|
12
|
+
export function createRegressionProofMatrixEngine(options) {
|
|
13
|
+
const coverageThreshold = options?.coverageThreshold ?? DEFAULT_COVERAGE_THRESHOLD;
|
|
14
|
+
const proofTrustThreshold = options?.proofTrustThreshold ?? DEFAULT_PROOF_TRUST_THRESHOLD;
|
|
15
|
+
return {
|
|
16
|
+
evaluate(algorithmSet, testSuite, proofBundles, releaseCandidate) {
|
|
17
|
+
const reasons = [];
|
|
18
|
+
const coverageByAlgorithm = {};
|
|
19
|
+
// Compute median proof trust from proof bundle scores when available
|
|
20
|
+
const proofTrustScores = [];
|
|
21
|
+
for (const pb of proofBundles) {
|
|
22
|
+
if (typeof pb.trustScore === "number") {
|
|
23
|
+
proofTrustScores.push(pb.trustScore);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
// Evaluate each algorithm using algorithmSet directly
|
|
27
|
+
for (const alg of algorithmSet) {
|
|
28
|
+
const coverage = 0.35 * (alg.tests > 0 ? 1 : 0) +
|
|
29
|
+
0.30 * (alg.proofBundles > 0 ? 1 : 0) +
|
|
30
|
+
0.20 * (alg.decisionTraces > 0 ? 1 : 0) +
|
|
31
|
+
0.15 * (alg.cliSurface === "reachable" ? 1 : 0);
|
|
32
|
+
coverageByAlgorithm[alg.name] = Math.round(coverage * 100) / 100;
|
|
33
|
+
if (coverage < coverageThreshold) {
|
|
34
|
+
reasons.push(`Algorithm "${alg.name}" coverage ${coverage.toFixed(2)} < threshold ${coverageThreshold}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
// Global gates
|
|
38
|
+
const medianProofTrust = proofTrustScores.length > 0
|
|
39
|
+
? computeMedian(proofTrustScores)
|
|
40
|
+
: releaseCandidate.medianProofTrust;
|
|
41
|
+
if (medianProofTrust < proofTrustThreshold) {
|
|
42
|
+
reasons.push(`medianProofTrust ${medianProofTrust.toFixed(2)} < threshold ${proofTrustThreshold}`);
|
|
43
|
+
}
|
|
44
|
+
if (releaseCandidate.routerShadowSafety !== 1) {
|
|
45
|
+
reasons.push(`routerShadowSafety ${releaseCandidate.routerShadowSafety} !== 1`);
|
|
46
|
+
}
|
|
47
|
+
if (releaseCandidate.providerAuthorityInvariant !== 1) {
|
|
48
|
+
reasons.push(`providerAuthorityInvariant ${releaseCandidate.providerAuthorityInvariant} !== 1`);
|
|
49
|
+
}
|
|
50
|
+
if (releaseCandidate.minimalVerifiedDemo !== 1) {
|
|
51
|
+
reasons.push(`minimalVerifiedDemo ${releaseCandidate.minimalVerifiedDemo} !== 1`);
|
|
52
|
+
}
|
|
53
|
+
const verdict = reasons.length === 0 ? "pass" : "fail";
|
|
54
|
+
return {
|
|
55
|
+
verdict,
|
|
56
|
+
coverageByAlgorithm: Object.freeze(coverageByAlgorithm),
|
|
57
|
+
reasons: Object.freeze(reasons),
|
|
58
|
+
};
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
// ─── Helpers ───────────────────────────────────────────────────────────────
|
|
63
|
+
function computeMedian(values) {
|
|
64
|
+
if (values.length === 0)
|
|
65
|
+
return 0;
|
|
66
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
67
|
+
const mid = Math.floor(sorted.length / 2);
|
|
68
|
+
if (sorted.length % 2 === 0) {
|
|
69
|
+
return (sorted[mid - 1] + sorted[mid]) / 2;
|
|
70
|
+
}
|
|
71
|
+
return sorted[mid];
|
|
72
|
+
}
|
|
@@ -57,3 +57,9 @@ export declare function evaluatePromptNovelty(input: {
|
|
|
57
57
|
action: NextAction;
|
|
58
58
|
targetAtomId?: string;
|
|
59
59
|
}): PromptNoveltyReport;
|
|
60
|
+
/**
|
|
61
|
+
* Redact high-confidence secrets from free text while preserving layout
|
|
62
|
+
* (no normalization). Use before persisting or echoing prompts/answers so
|
|
63
|
+
* tokens never land in interview artifacts, GoalSpec fields, or --json output.
|
|
64
|
+
*/
|
|
65
|
+
export declare function redactSecretText(text: string): string;
|
|
@@ -176,20 +176,32 @@ export function evaluatePromptNovelty(input) {
|
|
|
176
176
|
targetAtomId,
|
|
177
177
|
};
|
|
178
178
|
}
|
|
179
|
+
const SECRET_REDACTION_PATTERNS = [
|
|
180
|
+
{ name: "bearer-token", pattern: /Bearer\s+[A-Za-z0-9._~+/=-]{12,}/gi, replacement: "Bearer [REDACTED_TOKEN]" },
|
|
181
|
+
{ name: "openai-key", pattern: /\bsk-[A-Za-z0-9_-]{16,}\b/g, replacement: "[REDACTED_API_KEY]" },
|
|
182
|
+
{ name: "github-token", pattern: /\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{16,}\b/g, replacement: "[REDACTED_GITHUB_TOKEN]" },
|
|
183
|
+
{ name: "aws-key", pattern: /\bAKIA[0-9A-Z]{16}\b/g, replacement: "[REDACTED_AWS_KEY]" },
|
|
184
|
+
{ name: "env-secret", pattern: /\b[A-Z][A-Z0-9_]*(?:SECRET|TOKEN|KEY|PASSWORD)\s*=\s*[^\s`'"]{6,}/g, replacement: "[REDACTED_ENV_SECRET]" },
|
|
185
|
+
{ name: "private-key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g, replacement: "[REDACTED_PRIVATE_KEY]" },
|
|
186
|
+
];
|
|
187
|
+
/**
|
|
188
|
+
* Redact high-confidence secrets from free text while preserving layout
|
|
189
|
+
* (no normalization). Use before persisting or echoing prompts/answers so
|
|
190
|
+
* tokens never land in interview artifacts, GoalSpec fields, or --json output.
|
|
191
|
+
*/
|
|
192
|
+
export function redactSecretText(text) {
|
|
193
|
+
let redacted = text;
|
|
194
|
+
for (const item of SECRET_REDACTION_PATTERNS) {
|
|
195
|
+
redacted = redacted.replace(item.pattern, item.replacement);
|
|
196
|
+
}
|
|
197
|
+
return redacted;
|
|
198
|
+
}
|
|
179
199
|
function sanitizePrompt(rawPrompt) {
|
|
180
200
|
const normalized = normalizeText(rawPrompt);
|
|
181
|
-
const patterns = [
|
|
182
|
-
{ name: "bearer-token", pattern: /Bearer\s+[A-Za-z0-9._~+/=-]{12,}/gi, replacement: "Bearer [REDACTED_TOKEN]" },
|
|
183
|
-
{ name: "openai-key", pattern: /\bsk-[A-Za-z0-9_-]{16,}\b/g, replacement: "[REDACTED_API_KEY]" },
|
|
184
|
-
{ name: "github-token", pattern: /\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{16,}\b/g, replacement: "[REDACTED_GITHUB_TOKEN]" },
|
|
185
|
-
{ name: "aws-key", pattern: /\bAKIA[0-9A-Z]{16}\b/g, replacement: "[REDACTED_AWS_KEY]" },
|
|
186
|
-
{ name: "env-secret", pattern: /\b[A-Z][A-Z0-9_]*(?:SECRET|TOKEN|KEY|PASSWORD)\s*=\s*[^\s`'"]{6,}/g, replacement: "[REDACTED_ENV_SECRET]" },
|
|
187
|
-
{ name: "private-key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g, replacement: "[REDACTED_PRIVATE_KEY]" },
|
|
188
|
-
];
|
|
189
201
|
let redacted = normalized;
|
|
190
202
|
let redactionCount = 0;
|
|
191
203
|
const diagnostics = [];
|
|
192
|
-
for (const item of
|
|
204
|
+
for (const item of SECRET_REDACTION_PATTERNS) {
|
|
193
205
|
let count = 0;
|
|
194
206
|
redacted = redacted.replace(item.pattern, () => {
|
|
195
207
|
count += 1;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { GoalSpec } from "../contracts/goal.js";
|
|
2
|
+
import type { InterviewAnswer, InterviewApplyResult, InterviewFinding, InterviewQuestion, InterviewSeed, InterviewSpecDelta } from "../contracts/interview.js";
|
|
3
|
+
export declare function assimilateAnswers(input: {
|
|
4
|
+
seed: InterviewSeed;
|
|
5
|
+
questions: InterviewQuestion[];
|
|
6
|
+
answers: InterviewAnswer[];
|
|
7
|
+
goal?: GoalSpec;
|
|
8
|
+
}): {
|
|
9
|
+
findings: InterviewFinding[];
|
|
10
|
+
specDelta: InterviewSpecDelta;
|
|
11
|
+
contradictions: string[];
|
|
12
|
+
};
|
|
13
|
+
export declare function applyInterviewDelta(goal: GoalSpec, delta: InterviewSpecDelta): InterviewApplyResult;
|