prism-mcp-server 7.3.1 → 7.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -194
- package/dist/cli.js +50 -0
- package/dist/darkfactory/clawInvocation.js +62 -7
- package/dist/darkfactory/runner.js +288 -24
- package/dist/darkfactory/safetyController.js +48 -22
- package/dist/darkfactory/schema.js +2 -0
- package/dist/dashboard/ui.js +2617 -2051
- package/dist/dashboard/ui.tmp.js +3475 -0
- package/dist/errors.js +29 -0
- package/dist/server.js +19 -0
- package/dist/storage/sqlite.js +199 -7
- package/dist/storage/supabase.js +143 -3
- package/dist/tools/routerExperience.js +14 -0
- package/dist/verification/clawValidator.js +2 -1
- package/dist/verification/cliHandler.js +325 -0
- package/dist/verification/gatekeeper.js +39 -0
- package/dist/verification/renameDetector.js +170 -0
- package/dist/verification/runner.js +27 -5
- package/dist/verification/schema.js +18 -0
- package/dist/verification/severityPolicy.js +5 -1
- package/package.json +5 -2
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
import * as fs from 'fs/promises';
|
|
2
|
+
import { computeRubricHash } from './schema.js';
|
|
3
|
+
// ─── Constants ────────────────────────────────────────────────────────────────
|
|
4
|
+
/** H5 fix: Centralize the harness file path as a constant */
|
|
5
|
+
const DEFAULT_HARNESS_PATH = './verification_harness.json';
|
|
6
|
+
// ─── Utilities ────────────────────────────────────────────────────────────────
|
|
7
|
+
/** M11 fix: Extract CI environment detection into a reusable utility */
|
|
8
|
+
export function isStrictVerificationEnv() {
|
|
9
|
+
return (process.env.CI === 'true' ||
|
|
10
|
+
process.env.CI === '1' ||
|
|
11
|
+
process.env.GITHUB_ACTIONS === 'true' ||
|
|
12
|
+
process.env.GITLAB_CI === 'true' ||
|
|
13
|
+
process.env.PRISM_STRICT_VERIFICATION === 'true');
|
|
14
|
+
}
|
|
15
|
+
// ─── Renderers ────────────────────────────────────────────────────────────────
|
|
16
|
+
/** Render a VerifyStatusResult as human-readable console output */
|
|
17
|
+
function renderVerifyStatus(result, jsonMode) {
|
|
18
|
+
if (jsonMode) {
|
|
19
|
+
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
console.log(`\n🔍 Checking verification status for project: ${result.project}...`);
|
|
23
|
+
if (result.no_runs) {
|
|
24
|
+
console.log('⚠️ No previous verification runs found.');
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
const r = result.last_run;
|
|
28
|
+
const overrideBadge = r.gate_override
|
|
29
|
+
? `[OVERRIDDEN${r.override_reason ? `: ${r.override_reason}` : ''}] `
|
|
30
|
+
: '';
|
|
31
|
+
const passText = r.passed ? 'YES' : 'NO';
|
|
32
|
+
console.log(`✅ Last Run: ${r.run_at} | Passed: ${overrideBadge}${passText}`);
|
|
33
|
+
console.log(` Pass Rate: ${(r.pass_rate * 100).toFixed(1)}% | Critical Failures: ${r.critical_failures}`);
|
|
34
|
+
console.log(` Coverage Score: ${(r.coverage_score * 100).toFixed(1)}% | Gate Action: ${r.gate_action}`);
|
|
35
|
+
if (result.harness_missing) {
|
|
36
|
+
console.log('\nℹ️ No local verification_harness.json found to check against.');
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
if (result.harness_invalid_json) {
|
|
40
|
+
console.error(`\n❌ Invalid JSON in ${DEFAULT_HARNESS_PATH}.`);
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
if (result.synchronized) {
|
|
44
|
+
console.log('\n✨ Harness is synchronized.');
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
// Drift output — phrasing differs only by policy outcome, not unrelated wording
|
|
48
|
+
const d = result.drift;
|
|
49
|
+
const hashLine = ` Stored Hash: ${d.stored_hash.slice(0, 8)}... Local Hash: ${d.local_hash.slice(0, 8)}...`;
|
|
50
|
+
if (d.policy === 'bypassed') {
|
|
51
|
+
console.warn('\n🚨 [BYPASSED] Configuration drift detected.');
|
|
52
|
+
console.warn(hashLine);
|
|
53
|
+
console.warn(` Drift block bypassed via --force. Recommended: run 'prism verify generate' to realign.`);
|
|
54
|
+
}
|
|
55
|
+
else if (d.policy === 'blocked') {
|
|
56
|
+
console.error('\n🚫 [BLOCKED] Configuration drift detected — CI environment enforces strict policy.');
|
|
57
|
+
console.error(hashLine);
|
|
58
|
+
console.error(` Action: run 'prism verify generate' before merging to update your harness.`);
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
// 'warn' — local dev
|
|
62
|
+
console.warn('\n⚠️ [DRIFT] Configuration drift detected.');
|
|
63
|
+
console.warn(hashLine);
|
|
64
|
+
console.warn(` Recommended: run 'prism verify generate' to update your harness.`);
|
|
65
|
+
}
|
|
66
|
+
// Render Diff if available
|
|
67
|
+
if (d.diff_counts) {
|
|
68
|
+
console.log(`\n Diff Summary: +${d.diff_counts.added} added, ~${d.diff_counts.modified} modified, -${d.diff_counts.removed} removed`);
|
|
69
|
+
}
|
|
70
|
+
if (d.diff) {
|
|
71
|
+
console.log('\n Changes Detected:');
|
|
72
|
+
for (const add of d.diff.added)
|
|
73
|
+
console.log(` + ${add.id}: ${add.description}`);
|
|
74
|
+
for (const mod of d.diff.modified) {
|
|
75
|
+
const keys = mod.changed_keys;
|
|
76
|
+
const keySuffix = keys?.length ? ` [${keys.join(', ')}]` : '';
|
|
77
|
+
console.log(` ~ ${mod.id}: ${mod.description}${keySuffix}`);
|
|
78
|
+
}
|
|
79
|
+
for (const rem of d.diff.removed)
|
|
80
|
+
console.log(` - ${rem.id}: ${rem.description}`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/** Render a GenerateHarnessResult as human-readable console output */
|
|
84
|
+
function renderGenerateHarness(result, jsonMode) {
|
|
85
|
+
if (jsonMode) {
|
|
86
|
+
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
console.log(`\n🛠 Generating/Refreshing harness for project: ${result.project}...`);
|
|
90
|
+
if (result.file_missing) {
|
|
91
|
+
console.error(`❌ Failed to read ${DEFAULT_HARNESS_PATH}. Does the file exist?`);
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
if (result.invalid_json) {
|
|
95
|
+
console.error(`❌ Invalid JSON in ${DEFAULT_HARNESS_PATH}.`);
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
if (result.already_exists) {
|
|
99
|
+
console.warn(`\n⚠️ A harness with rubric hash ${result.rubric_hash?.slice(0, 12)}... already exists.`);
|
|
100
|
+
console.warn(' Use --force to re-register anyway.');
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
if (result.success) {
|
|
104
|
+
console.log('✅ Harness registered successfully.');
|
|
105
|
+
console.log(` Hash: ${result.rubric_hash?.slice(0, 12)}...`);
|
|
106
|
+
console.log(` Tests: ${result.test_count} assertions.`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
// ─── Handlers ─────────────────────────────────────────────────────────────────
|
|
110
|
+
/**
|
|
111
|
+
* Core logic for `verify status`.
|
|
112
|
+
* Returns a typed VerifyStatusResult — callers decide how to render/exit.
|
|
113
|
+
*/
|
|
114
|
+
export async function computeVerifyStatus(storage, project, force = false, userId = 'default') {
|
|
115
|
+
const base = {
|
|
116
|
+
schema_version: 1,
|
|
117
|
+
project,
|
|
118
|
+
no_runs: false,
|
|
119
|
+
harness_missing: false,
|
|
120
|
+
harness_invalid_json: false,
|
|
121
|
+
synchronized: null,
|
|
122
|
+
recommended_action: null,
|
|
123
|
+
exit_code: 0,
|
|
124
|
+
};
|
|
125
|
+
// 1. Get latest run
|
|
126
|
+
const runs = await storage.listVerificationRuns(project, userId);
|
|
127
|
+
const lastRun = runs[0];
|
|
128
|
+
if (!lastRun) {
|
|
129
|
+
return { ...base, no_runs: true, recommended_action: 'run prism verify generate' };
|
|
130
|
+
}
|
|
131
|
+
base.last_run = {
|
|
132
|
+
run_at: lastRun.run_at,
|
|
133
|
+
passed: lastRun.passed,
|
|
134
|
+
pass_rate: lastRun.pass_rate,
|
|
135
|
+
critical_failures: lastRun.critical_failures,
|
|
136
|
+
coverage_score: lastRun.coverage_score,
|
|
137
|
+
gate_action: lastRun.gate_action,
|
|
138
|
+
gate_override: lastRun.gate_override ?? false,
|
|
139
|
+
override_reason: lastRun.override_reason,
|
|
140
|
+
};
|
|
141
|
+
// 2. Drift detection — C5 fix: separate readFile and JSON.parse error paths
|
|
142
|
+
let harnessRaw;
|
|
143
|
+
try {
|
|
144
|
+
harnessRaw = await fs.readFile(DEFAULT_HARNESS_PATH, 'utf-8');
|
|
145
|
+
}
|
|
146
|
+
catch {
|
|
147
|
+
return { ...base, harness_missing: true };
|
|
148
|
+
}
|
|
149
|
+
let localHarness;
|
|
150
|
+
try {
|
|
151
|
+
localHarness = JSON.parse(harnessRaw);
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
return { ...base, harness_invalid_json: true, exit_code: 1 };
|
|
155
|
+
}
|
|
156
|
+
const localHash = computeRubricHash(localHarness.tests);
|
|
157
|
+
const storedHash = lastRun.rubric_hash;
|
|
158
|
+
if (localHash === storedHash) {
|
|
159
|
+
return { ...base, synchronized: true };
|
|
160
|
+
}
|
|
161
|
+
// Drift detected
|
|
162
|
+
const strictEnv = isStrictVerificationEnv();
|
|
163
|
+
// Phase 2 Diagnostics: Compute Structured Diff
|
|
164
|
+
let diff;
|
|
165
|
+
let diffCounts;
|
|
166
|
+
try {
|
|
167
|
+
const historicalHarness = await storage.getVerificationHarness?.(storedHash, userId);
|
|
168
|
+
if (historicalHarness) {
|
|
169
|
+
diff = { added: [], removed: [], modified: [] };
|
|
170
|
+
const dbTests = historicalHarness.tests;
|
|
171
|
+
const localTests = localHarness.tests;
|
|
172
|
+
const storedMap = new Map(dbTests.map(t => [t.id, t]));
|
|
173
|
+
const localMap = new Map(localTests.map(t => [t.id, t]));
|
|
174
|
+
for (const [id, localTest] of localMap.entries()) {
|
|
175
|
+
const storedTest = storedMap.get(id);
|
|
176
|
+
if (!storedTest) {
|
|
177
|
+
diff.added.push(localTest);
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
// Compare JSON stringification for deep equality
|
|
181
|
+
const storedStr = JSON.stringify(storedTest);
|
|
182
|
+
const localStr = JSON.stringify(localTest);
|
|
183
|
+
if (storedStr !== localStr) {
|
|
184
|
+
// Diagnostics v2: Compute changed_keys — top-level fields that differ
|
|
185
|
+
const allKeys = new Set([...Object.keys(storedTest), ...Object.keys(localTest)]);
|
|
186
|
+
const changedKeys = [];
|
|
187
|
+
for (const key of allKeys) {
|
|
188
|
+
if (JSON.stringify(storedTest[key]) !== JSON.stringify(localTest[key])) {
|
|
189
|
+
changedKeys.push(key);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
changedKeys.sort();
|
|
193
|
+
diff.modified.push({ ...localTest, changed_keys: changedKeys });
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
for (const [id, storedTest] of storedMap.entries()) {
|
|
198
|
+
if (!localMap.has(id)) {
|
|
199
|
+
diff.removed.push(storedTest);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
// Ensure stable ordering by ID
|
|
203
|
+
diff.added.sort((a, b) => a.id.localeCompare(b.id));
|
|
204
|
+
diff.removed.sort((a, b) => a.id.localeCompare(b.id));
|
|
205
|
+
diff.modified.sort((a, b) => a.id.localeCompare(b.id));
|
|
206
|
+
// Diagnostics v2: Compute summary counts
|
|
207
|
+
diffCounts = {
|
|
208
|
+
added: diff.added.length,
|
|
209
|
+
removed: diff.removed.length,
|
|
210
|
+
modified: diff.modified.length,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
catch {
|
|
215
|
+
// Failure to load historical harness skips structured diff output (safe default)
|
|
216
|
+
// Downstream consumers parse JSON and know `diff`/`diff_counts` are optional per schema constraints.
|
|
217
|
+
}
|
|
218
|
+
const driftBase = (diff && diffCounts)
|
|
219
|
+
? { stored_hash: storedHash, local_hash: localHash, strict_env: strictEnv, diff, diff_counts: diffCounts }
|
|
220
|
+
: { stored_hash: storedHash, local_hash: localHash, strict_env: strictEnv };
|
|
221
|
+
const action = "run 'prism verify generate' to update your harness";
|
|
222
|
+
if (force) {
|
|
223
|
+
return {
|
|
224
|
+
...base,
|
|
225
|
+
synchronized: false,
|
|
226
|
+
drift: { ...driftBase, policy: 'bypassed' },
|
|
227
|
+
recommended_action: action,
|
|
228
|
+
exit_code: 0,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
if (strictEnv) {
|
|
232
|
+
return {
|
|
233
|
+
...base,
|
|
234
|
+
synchronized: false,
|
|
235
|
+
drift: { ...driftBase, policy: 'blocked' },
|
|
236
|
+
recommended_action: action,
|
|
237
|
+
exit_code: 1,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
return {
|
|
241
|
+
...base,
|
|
242
|
+
synchronized: false,
|
|
243
|
+
drift: { ...driftBase, policy: 'warn' },
|
|
244
|
+
recommended_action: action,
|
|
245
|
+
exit_code: 0,
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* CLI entry-point for `verify status`.
|
|
250
|
+
* Computes the result, renders it (human or JSON), then sets process.exitCode.
|
|
251
|
+
*/
|
|
252
|
+
export async function handleVerifyStatus(storage, project, force = false, userId = 'default', jsonMode = false) {
|
|
253
|
+
const result = await computeVerifyStatus(storage, project, force, userId);
|
|
254
|
+
renderVerifyStatus(result, jsonMode);
|
|
255
|
+
// Use process.exitCode rather than process.exit() for cleaner test teardown
|
|
256
|
+
if (result.exit_code !== 0) {
|
|
257
|
+
process.exitCode = result.exit_code;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Core logic for `verify generate`.
|
|
262
|
+
* Returns a typed GenerateHarnessResult.
|
|
263
|
+
*/
|
|
264
|
+
export async function computeGenerateHarness(storage, project, force = false, userId = 'default') {
|
|
265
|
+
const base = {
|
|
266
|
+
schema_version: 1,
|
|
267
|
+
project,
|
|
268
|
+
success: false,
|
|
269
|
+
already_exists: false,
|
|
270
|
+
file_missing: false,
|
|
271
|
+
invalid_json: false,
|
|
272
|
+
exit_code: 0,
|
|
273
|
+
};
|
|
274
|
+
let raw;
|
|
275
|
+
try {
|
|
276
|
+
raw = await fs.readFile(DEFAULT_HARNESS_PATH, 'utf-8');
|
|
277
|
+
}
|
|
278
|
+
catch {
|
|
279
|
+
return { ...base, file_missing: true, exit_code: 1 };
|
|
280
|
+
}
|
|
281
|
+
let harnessData;
|
|
282
|
+
try {
|
|
283
|
+
harnessData = JSON.parse(raw);
|
|
284
|
+
}
|
|
285
|
+
catch {
|
|
286
|
+
return { ...base, invalid_json: true, exit_code: 1 };
|
|
287
|
+
}
|
|
288
|
+
const rubric_hash = computeRubricHash(harnessData.tests);
|
|
289
|
+
// H3 fix: If not --force, check if a harness already exists for this hash
|
|
290
|
+
if (!force) {
|
|
291
|
+
try {
|
|
292
|
+
const existing = await storage.getVerificationHarness?.(rubric_hash, userId);
|
|
293
|
+
if (existing) {
|
|
294
|
+
return { ...base, already_exists: true, rubric_hash, exit_code: 0 };
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
catch {
|
|
298
|
+
// getVerificationHarness may not exist on all backends; proceed
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
const harness = {
|
|
302
|
+
...harnessData,
|
|
303
|
+
project,
|
|
304
|
+
created_at: new Date().toISOString(),
|
|
305
|
+
rubric_hash,
|
|
306
|
+
};
|
|
307
|
+
await storage.saveVerificationHarness(harness, userId);
|
|
308
|
+
return {
|
|
309
|
+
...base,
|
|
310
|
+
success: true,
|
|
311
|
+
rubric_hash,
|
|
312
|
+
test_count: harness.tests.length,
|
|
313
|
+
exit_code: 0,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* CLI entry-point for `verify generate`.
|
|
318
|
+
*/
|
|
319
|
+
export async function handleGenerateHarness(storage, project, force = false, userId = 'default', jsonMode = false) {
|
|
320
|
+
const result = await computeGenerateHarness(storage, project, force, userId);
|
|
321
|
+
renderGenerateHarness(result, jsonMode);
|
|
322
|
+
if (result.exit_code !== 0) {
|
|
323
|
+
process.exitCode = result.exit_code;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { VerificationGateError } from "../errors.js";
|
|
2
|
+
export class Gatekeeper {
|
|
3
|
+
/**
|
|
4
|
+
* Reviews a ValidationResult and determines if execution is permitted to continue.
|
|
5
|
+
* Throws `VerificationGateError` strictly on "abort" if bypass isn't provided.
|
|
6
|
+
*
|
|
7
|
+
* @param result - The output of the VerificationRunner
|
|
8
|
+
* @param options - Configuration including audit bypasses
|
|
9
|
+
* @returns `true` if downstream pipeline execution is allowed
|
|
10
|
+
*/
|
|
11
|
+
static executeGate(result, options) {
|
|
12
|
+
const isBypass = options?.forceBypass === true;
|
|
13
|
+
const validatedResult = { ...result };
|
|
14
|
+
if (isBypass) {
|
|
15
|
+
console.warn(`\n⚠️ [OVERRIDDEN] Verification Gate bypassed via administrator override.`);
|
|
16
|
+
// Enforce immutability and record audit trail context via environment variables
|
|
17
|
+
validatedResult.gate_override = true;
|
|
18
|
+
const actor = process.env.USER || process.env.USERNAME || 'unknown_user';
|
|
19
|
+
validatedResult.override_reason = validatedResult.override_reason || `CLI --force bypass by ${actor}`;
|
|
20
|
+
return { canContinue: true, validatedResult };
|
|
21
|
+
}
|
|
22
|
+
switch (validatedResult.gate_action) {
|
|
23
|
+
case "continue":
|
|
24
|
+
if (validatedResult.critical_failures > 0) {
|
|
25
|
+
console.warn(`\n⚠️ [CONTINUE] Harness passed but ${validatedResult.critical_failures} critical assertion(s) failed.`);
|
|
26
|
+
}
|
|
27
|
+
return { canContinue: true, validatedResult };
|
|
28
|
+
case "block":
|
|
29
|
+
console.error(`\n🚫 [BLOCK] Harness blocked execution. ${(validatedResult.pass_rate * 100).toFixed(1)}% pass rate.`);
|
|
30
|
+
return { canContinue: false, validatedResult };
|
|
31
|
+
case "abort":
|
|
32
|
+
console.error(`\n💥 [ABORT] Critical failures detected. Pipeline aborted.`);
|
|
33
|
+
throw new VerificationGateError(`Pipeline blocked by verification harness. Gate action evaluated to ABORT. Pass rate: ${(validatedResult.pass_rate * 100).toFixed(1)}%`, validatedResult);
|
|
34
|
+
default:
|
|
35
|
+
console.error(`\n⚠️ [UNKNOWN ACTION] Invalid gate action type: ${validatedResult.gate_action}. Failsafe blocking.`);
|
|
36
|
+
return { canContinue: false, validatedResult };
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rename Detection Heuristic Engine
|
|
3
|
+
*
|
|
4
|
+
* Isolated module that detects probable test assertion renames by computing
|
|
5
|
+
* similarity scores between removed and added tests. This module is ONLY
|
|
6
|
+
* invoked when rename detection is explicitly enabled via --rename-detection
|
|
7
|
+
* flag or PRISM_RENAME_DETECTION=true env var.
|
|
8
|
+
*
|
|
9
|
+
* When this module is NOT invoked, the strict-by-ID deterministic behavior
|
|
10
|
+
* of v7.3.2 is preserved byte-for-byte.
|
|
11
|
+
*
|
|
12
|
+
* Algorithm: Greedy bipartite matching on composite similarity scores.
|
|
13
|
+
* - Field overlap via Jaccard coefficient over non-ID field values
|
|
14
|
+
* - Description similarity via normalized Levenshtein distance
|
|
15
|
+
* - Greedy highest-score-first, one-to-one matching (no test matched twice)
|
|
16
|
+
*
|
|
17
|
+
* @module renameDetector
|
|
18
|
+
*/
|
|
19
|
+
// ─── Constants ────────────────────────────────────────────────────────────────
|
|
20
|
+
/** Minimum allowed threshold (too low = excessive false positives) */
|
|
21
|
+
export const MIN_THRESHOLD = 0.50;
|
|
22
|
+
/** Maximum allowed threshold (too high = nothing ever matches) */
|
|
23
|
+
export const MAX_THRESHOLD = 0.95;
|
|
24
|
+
/** Default similarity threshold when not specified */
|
|
25
|
+
export const DEFAULT_THRESHOLD = 0.70;
|
|
26
|
+
// ─── Similarity Functions ─────────────────────────────────────────────────────
|
|
27
|
+
/**
|
|
28
|
+
* Compute normalized Levenshtein distance between two strings.
|
|
29
|
+
* Returns 0.0 (completely different) to 1.0 (identical).
|
|
30
|
+
*/
|
|
31
|
+
export function levenshteinSimilarity(a, b) {
|
|
32
|
+
if (a === b)
|
|
33
|
+
return 1.0;
|
|
34
|
+
if (a.length === 0 || b.length === 0)
|
|
35
|
+
return 0.0;
|
|
36
|
+
const maxLen = Math.max(a.length, b.length);
|
|
37
|
+
// Wagner-Fischer algorithm for edit distance
|
|
38
|
+
const prev = new Array(b.length + 1);
|
|
39
|
+
const curr = new Array(b.length + 1);
|
|
40
|
+
for (let j = 0; j <= b.length; j++)
|
|
41
|
+
prev[j] = j;
|
|
42
|
+
for (let i = 1; i <= a.length; i++) {
|
|
43
|
+
curr[0] = i;
|
|
44
|
+
for (let j = 1; j <= b.length; j++) {
|
|
45
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
46
|
+
curr[j] = Math.min(curr[j - 1] + 1, // insertion
|
|
47
|
+
prev[j] + 1, // deletion
|
|
48
|
+
prev[j - 1] + cost);
|
|
49
|
+
}
|
|
50
|
+
// Swap rows
|
|
51
|
+
for (let j = 0; j <= b.length; j++) {
|
|
52
|
+
prev[j] = curr[j];
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
const distance = prev[b.length];
|
|
56
|
+
return 1.0 - distance / maxLen;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Compute Jaccard similarity coefficient over the non-ID field values
|
|
60
|
+
* of two TestAssertion objects.
|
|
61
|
+
* Returns 0.0 (no overlap) to 1.0 (identical field values).
|
|
62
|
+
*/
|
|
63
|
+
export function fieldJaccardSimilarity(a, b) {
|
|
64
|
+
const keysA = Object.keys(a).filter(k => k !== 'id');
|
|
65
|
+
const keysB = Object.keys(b).filter(k => k !== 'id');
|
|
66
|
+
const allKeys = new Set([...keysA, ...keysB]);
|
|
67
|
+
if (allKeys.size === 0)
|
|
68
|
+
return 1.0; // Both empty — trivially identical
|
|
69
|
+
let matches = 0;
|
|
70
|
+
for (const key of allKeys) {
|
|
71
|
+
const valA = JSON.stringify(a[key]);
|
|
72
|
+
const valB = JSON.stringify(b[key]);
|
|
73
|
+
if (valA === valB)
|
|
74
|
+
matches++;
|
|
75
|
+
}
|
|
76
|
+
return matches / allKeys.size;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Compute composite similarity between two TestAssertions.
|
|
80
|
+
* Weights: 40% Jaccard field overlap + 60% Levenshtein on description.
|
|
81
|
+
*
|
|
82
|
+
* The heavier description weight reflects the observation that operators
|
|
83
|
+
* typically rename tests when restructuring but preserve the intent —
|
|
84
|
+
* the description carries the most semantic signal.
|
|
85
|
+
*/
|
|
86
|
+
export function compositeSimilarity(removed, added) {
|
|
87
|
+
const jaccard = fieldJaccardSimilarity(removed, added);
|
|
88
|
+
const descSim = levenshteinSimilarity(removed.description || '', added.description || '');
|
|
89
|
+
return 0.4 * jaccard + 0.6 * descSim;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Compute field-level changed_keys between two TestAssertions (excluding id).
|
|
93
|
+
*/
|
|
94
|
+
function computeChangedKeys(removed, added) {
|
|
95
|
+
const allKeys = new Set([
|
|
96
|
+
...Object.keys(removed).filter(k => k !== 'id'),
|
|
97
|
+
...Object.keys(added).filter(k => k !== 'id'),
|
|
98
|
+
]);
|
|
99
|
+
const changed = [];
|
|
100
|
+
for (const key of allKeys) {
|
|
101
|
+
if (JSON.stringify(removed[key]) !== JSON.stringify(added[key])) {
|
|
102
|
+
changed.push(key);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
changed.sort();
|
|
106
|
+
return changed;
|
|
107
|
+
}
|
|
108
|
+
// ─── Core Detection ───────────────────────────────────────────────────────────
|
|
109
|
+
/**
|
|
110
|
+
* Clamp and validate the threshold value.
|
|
111
|
+
*/
|
|
112
|
+
export function clampThreshold(threshold) {
|
|
113
|
+
return Math.max(MIN_THRESHOLD, Math.min(MAX_THRESHOLD, threshold));
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Detect probable renames between removed and added test assertion sets.
|
|
117
|
+
*
|
|
118
|
+
* Uses greedy bipartite matching: compute all pairwise similarity scores,
|
|
119
|
+
* sort descending, and greedily assign one-to-one matches above threshold.
|
|
120
|
+
* This is O(n*m) where n=|removed|, m=|added| — acceptable for test suites
|
|
121
|
+
* which are typically <100 assertions.
|
|
122
|
+
*
|
|
123
|
+
* @param added Tests present locally but not in stored harness
|
|
124
|
+
* @param removed Tests present in stored harness but not locally
|
|
125
|
+
* @param threshold Minimum similarity to consider a rename (0.50-0.95)
|
|
126
|
+
* @returns Detected renames and residual unmatched tests
|
|
127
|
+
*/
|
|
128
|
+
export function detectRenames(added, removed, threshold = DEFAULT_THRESHOLD) {
|
|
129
|
+
const effectiveThreshold = clampThreshold(threshold);
|
|
130
|
+
// Edge case: nothing to match
|
|
131
|
+
if (added.length === 0 || removed.length === 0) {
|
|
132
|
+
return { renamed: [], residualAdded: [...added], residualRemoved: [...removed] };
|
|
133
|
+
}
|
|
134
|
+
const pairs = [];
|
|
135
|
+
for (let ri = 0; ri < removed.length; ri++) {
|
|
136
|
+
for (let ai = 0; ai < added.length; ai++) {
|
|
137
|
+
const sim = compositeSimilarity(removed[ri], added[ai]);
|
|
138
|
+
if (sim >= effectiveThreshold) {
|
|
139
|
+
pairs.push({ removedIdx: ri, addedIdx: ai, similarity: sim });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
// Step 2: Sort descending by similarity (greedy — highest first)
|
|
144
|
+
pairs.sort((a, b) => b.similarity - a.similarity);
|
|
145
|
+
// Step 3: Greedy one-to-one matching
|
|
146
|
+
const matchedRemoved = new Set();
|
|
147
|
+
const matchedAdded = new Set();
|
|
148
|
+
const renamed = [];
|
|
149
|
+
for (const pair of pairs) {
|
|
150
|
+
if (matchedRemoved.has(pair.removedIdx) || matchedAdded.has(pair.addedIdx)) {
|
|
151
|
+
continue; // Already matched — skip
|
|
152
|
+
}
|
|
153
|
+
matchedRemoved.add(pair.removedIdx);
|
|
154
|
+
matchedAdded.add(pair.addedIdx);
|
|
155
|
+
const removedTest = removed[pair.removedIdx];
|
|
156
|
+
const addedTest = added[pair.addedIdx];
|
|
157
|
+
renamed.push({
|
|
158
|
+
removed: removedTest,
|
|
159
|
+
added: addedTest,
|
|
160
|
+
similarity: Math.round(pair.similarity * 1000) / 1000, // 3 decimal places
|
|
161
|
+
changed_keys: computeChangedKeys(removedTest, addedTest),
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
// Step 4: Build residuals — unmatched tests stay in added/removed
|
|
165
|
+
const residualAdded = added.filter((_, i) => !matchedAdded.has(i));
|
|
166
|
+
const residualRemoved = removed.filter((_, i) => !matchedRemoved.has(i));
|
|
167
|
+
// Sort renamed by old_id for deterministic output within the heuristic domain
|
|
168
|
+
renamed.sort((a, b) => a.removed.id.localeCompare(b.removed.id));
|
|
169
|
+
return { renamed, residualAdded, residualRemoved };
|
|
170
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as fs from "fs";
|
|
2
2
|
import { getQuickJS } from "quickjs-emscripten";
|
|
3
|
-
import { TestSuiteSchema, } from "./schema.js";
|
|
3
|
+
import { TestSuiteSchema, computeRubricHash, } from "./schema.js";
|
|
4
4
|
import { evaluateSeverityGates, resolveEffectiveSeverity } from "./severityPolicy.js";
|
|
5
5
|
// ─── Utilities ──────────────────────────────────────────────
|
|
6
6
|
/** Deeply match objects (expected ⊆ actual) */
|
|
@@ -8,6 +8,10 @@ function deepMatch(actual, expected) {
|
|
|
8
8
|
if (typeof expected !== 'object' || expected === null) {
|
|
9
9
|
return actual === expected;
|
|
10
10
|
}
|
|
11
|
+
// H1 fix: Guard against null/undefined/primitive actual before iterating
|
|
12
|
+
if (typeof actual !== 'object' || actual === null) {
|
|
13
|
+
return false;
|
|
14
|
+
}
|
|
11
15
|
for (const key of Object.keys(expected)) {
|
|
12
16
|
if (typeof actual[key] === 'object') {
|
|
13
17
|
if (!deepMatch(actual[key], expected[key]))
|
|
@@ -187,6 +191,16 @@ const DEFAULT_CONFIG = {
|
|
|
187
191
|
};
|
|
188
192
|
// ─── v7.2.0: Enhanced Verification Runner ───────────────────
|
|
189
193
|
export class VerificationRunner {
|
|
194
|
+
/**
|
|
195
|
+
* Validates that the provided tests match the expected rubric hash from the harness.
|
|
196
|
+
* Throws an error if the hash does not match, ensuring test integrity.
|
|
197
|
+
*/
|
|
198
|
+
static verifyRubricHash(tests, harness) {
|
|
199
|
+
const computed = computeRubricHash(tests);
|
|
200
|
+
if (computed !== harness.rubric_hash) {
|
|
201
|
+
throw new Error(`Rubric hash mismatch. Expected ${harness.rubric_hash}, but computeRubricHash returned ${computed}. The tests have been modified since the harness was created.`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
190
204
|
/**
|
|
191
205
|
* v7.2.0 enhanced suite runner.
|
|
192
206
|
*
|
|
@@ -195,16 +209,21 @@ export class VerificationRunner {
|
|
|
195
209
|
* - Retry logic for transient failures
|
|
196
210
|
* - Dependency chain resolution
|
|
197
211
|
* - Structured VerificationResult with per-layer breakdown
|
|
212
|
+
* - Rubric hash validation if a harness is provided
|
|
198
213
|
*/
|
|
199
214
|
static async runSuite(jsonContent, options) {
|
|
200
215
|
const startTime = Date.now();
|
|
201
216
|
const config = options?.config ?? DEFAULT_CONFIG;
|
|
202
217
|
const filterLayers = options?.layers ?? config.layers;
|
|
203
218
|
const minSeverity = options?.minSeverity;
|
|
219
|
+
const harness = options?.harness;
|
|
204
220
|
let assertionResults = [];
|
|
205
221
|
try {
|
|
206
222
|
const parsed = JSON.parse(jsonContent);
|
|
207
223
|
const suite = TestSuiteSchema.parse(parsed);
|
|
224
|
+
if (harness) {
|
|
225
|
+
VerificationRunner.verifyRubricHash(suite.tests, harness);
|
|
226
|
+
}
|
|
208
227
|
const { preparedById, orderedIds, precomputed } = prepareAssertions(suite.tests, filterLayers, minSeverity, config);
|
|
209
228
|
const outcomes = new Map();
|
|
210
229
|
const resultById = new Map(precomputed);
|
|
@@ -438,11 +457,14 @@ export class VerificationRunner {
|
|
|
438
457
|
ops++;
|
|
439
458
|
return ops > 10000;
|
|
440
459
|
});
|
|
441
|
-
//
|
|
442
|
-
//
|
|
443
|
-
//
|
|
460
|
+
// C3 fix: Use vm.newString() + vm.setProp() to safely pass JSON
|
|
461
|
+
// into the VM without any string escaping. This prevents injection
|
|
462
|
+
// attacks from crafted input values containing quotes or backslashes.
|
|
444
463
|
const inputsJson = JSON.stringify(inputs);
|
|
445
|
-
const
|
|
464
|
+
const inputsJsonHandle = vm.newString(inputsJson);
|
|
465
|
+
vm.setProp(vm.global, "__inputsJson", inputsJsonHandle);
|
|
466
|
+
inputsJsonHandle.dispose();
|
|
467
|
+
const parseResult = vm.evalCode(`JSON.parse(__inputsJson)`);
|
|
446
468
|
if (parseResult.error) {
|
|
447
469
|
const err = vm.dump(parseResult.error);
|
|
448
470
|
parseResult.error.dispose();
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
import { createHash } from "crypto";
|
|
2
3
|
// ─── v7.2.0: Severity Levels ────────────────────────────────
|
|
3
4
|
// warn → log and continue
|
|
4
5
|
// gate → block progression until resolved
|
|
@@ -44,3 +45,20 @@ export const TestAssertionSchema = z.object({
|
|
|
44
45
|
export const TestSuiteSchema = z.object({
|
|
45
46
|
tests: z.array(TestAssertionSchema)
|
|
46
47
|
});
|
|
48
|
+
// ─── v7.2.0: Rubric Hash Utility ─────────────────────────────
|
|
49
|
+
/**
|
|
50
|
+
* Compute a deterministic SHA-256 hash over the test assertions.
|
|
51
|
+
*
|
|
52
|
+
* Sorts by `id` before hashing so that insertion order does NOT affect
|
|
53
|
+
* the result. This ensures the hash is stable across environments
|
|
54
|
+
* even when tests are stored in different orders.
|
|
55
|
+
*
|
|
56
|
+
* @param tests - The array of TestAssertion to hash
|
|
57
|
+
* @returns Lowercase hex SHA-256 digest
|
|
58
|
+
*/
|
|
59
|
+
export function computeRubricHash(tests) {
|
|
60
|
+
const sorted = [...tests].sort((a, b) => a.id.localeCompare(b.id));
|
|
61
|
+
return createHash("sha256")
|
|
62
|
+
.update(JSON.stringify(sorted))
|
|
63
|
+
.digest("hex");
|
|
64
|
+
}
|
|
@@ -17,7 +17,11 @@ function severityRank(s) {
|
|
|
17
17
|
case "warn": return 0;
|
|
18
18
|
case "gate": return 1;
|
|
19
19
|
case "abort": return 2;
|
|
20
|
-
default:
|
|
20
|
+
default: {
|
|
21
|
+
// M4 fix: Exhaustive check — future SeverityLevel additions will cause a compile error
|
|
22
|
+
const _exhaustive = s;
|
|
23
|
+
return _exhaustive;
|
|
24
|
+
}
|
|
21
25
|
}
|
|
22
26
|
}
|
|
23
27
|
/**
|