prism-mcp-server 7.3.1 → 7.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,325 @@
1
+ import * as fs from 'fs/promises';
2
+ import { computeRubricHash } from './schema.js';
3
+ // ─── Constants ────────────────────────────────────────────────────────────────
4
+ /** H5 fix: Centralize the harness file path as a constant */
5
+ const DEFAULT_HARNESS_PATH = './verification_harness.json';
6
+ // ─── Utilities ────────────────────────────────────────────────────────────────
7
+ /** M11 fix: Extract CI environment detection into a reusable utility */
8
+ export function isStrictVerificationEnv() {
9
+ return (process.env.CI === 'true' ||
10
+ process.env.CI === '1' ||
11
+ process.env.GITHUB_ACTIONS === 'true' ||
12
+ process.env.GITLAB_CI === 'true' ||
13
+ process.env.PRISM_STRICT_VERIFICATION === 'true');
14
+ }
15
+ // ─── Renderers ────────────────────────────────────────────────────────────────
16
+ /** Render a VerifyStatusResult as human-readable console output */
17
+ function renderVerifyStatus(result, jsonMode) {
18
+ if (jsonMode) {
19
+ process.stdout.write(JSON.stringify(result, null, 2) + '\n');
20
+ return;
21
+ }
22
+ console.log(`\n🔍 Checking verification status for project: ${result.project}...`);
23
+ if (result.no_runs) {
24
+ console.log('⚠️ No previous verification runs found.');
25
+ return;
26
+ }
27
+ const r = result.last_run;
28
+ const overrideBadge = r.gate_override
29
+ ? `[OVERRIDDEN${r.override_reason ? `: ${r.override_reason}` : ''}] `
30
+ : '';
31
+ const passText = r.passed ? 'YES' : 'NO';
32
+ console.log(`✅ Last Run: ${r.run_at} | Passed: ${overrideBadge}${passText}`);
33
+ console.log(` Pass Rate: ${(r.pass_rate * 100).toFixed(1)}% | Critical Failures: ${r.critical_failures}`);
34
+ console.log(` Coverage Score: ${(r.coverage_score * 100).toFixed(1)}% | Gate Action: ${r.gate_action}`);
35
+ if (result.harness_missing) {
36
+ console.log('\nℹ️ No local verification_harness.json found to check against.');
37
+ return;
38
+ }
39
+ if (result.harness_invalid_json) {
40
+ console.error(`\n❌ Invalid JSON in ${DEFAULT_HARNESS_PATH}.`);
41
+ return;
42
+ }
43
+ if (result.synchronized) {
44
+ console.log('\n✨ Harness is synchronized.');
45
+ return;
46
+ }
47
+ // Drift output — phrasing differs only by policy outcome, not unrelated wording
48
+ const d = result.drift;
49
+ const hashLine = ` Stored Hash: ${d.stored_hash.slice(0, 8)}... Local Hash: ${d.local_hash.slice(0, 8)}...`;
50
+ if (d.policy === 'bypassed') {
51
+ console.warn('\n🚨 [BYPASSED] Configuration drift detected.');
52
+ console.warn(hashLine);
53
+ console.warn(` Drift block bypassed via --force. Recommended: run 'prism verify generate' to realign.`);
54
+ }
55
+ else if (d.policy === 'blocked') {
56
+ console.error('\n🚫 [BLOCKED] Configuration drift detected — CI environment enforces strict policy.');
57
+ console.error(hashLine);
58
+ console.error(` Action: run 'prism verify generate' before merging to update your harness.`);
59
+ }
60
+ else {
61
+ // 'warn' — local dev
62
+ console.warn('\n⚠️ [DRIFT] Configuration drift detected.');
63
+ console.warn(hashLine);
64
+ console.warn(` Recommended: run 'prism verify generate' to update your harness.`);
65
+ }
66
+ // Render Diff if available
67
+ if (d.diff_counts) {
68
+ console.log(`\n Diff Summary: +${d.diff_counts.added} added, ~${d.diff_counts.modified} modified, -${d.diff_counts.removed} removed`);
69
+ }
70
+ if (d.diff) {
71
+ console.log('\n Changes Detected:');
72
+ for (const add of d.diff.added)
73
+ console.log(` + ${add.id}: ${add.description}`);
74
+ for (const mod of d.diff.modified) {
75
+ const keys = mod.changed_keys;
76
+ const keySuffix = keys?.length ? ` [${keys.join(', ')}]` : '';
77
+ console.log(` ~ ${mod.id}: ${mod.description}${keySuffix}`);
78
+ }
79
+ for (const rem of d.diff.removed)
80
+ console.log(` - ${rem.id}: ${rem.description}`);
81
+ }
82
+ }
83
+ /** Render a GenerateHarnessResult as human-readable console output */
84
+ function renderGenerateHarness(result, jsonMode) {
85
+ if (jsonMode) {
86
+ process.stdout.write(JSON.stringify(result, null, 2) + '\n');
87
+ return;
88
+ }
89
+ console.log(`\n🛠 Generating/Refreshing harness for project: ${result.project}...`);
90
+ if (result.file_missing) {
91
+ console.error(`❌ Failed to read ${DEFAULT_HARNESS_PATH}. Does the file exist?`);
92
+ return;
93
+ }
94
+ if (result.invalid_json) {
95
+ console.error(`❌ Invalid JSON in ${DEFAULT_HARNESS_PATH}.`);
96
+ return;
97
+ }
98
+ if (result.already_exists) {
99
+ console.warn(`\n⚠️ A harness with rubric hash ${result.rubric_hash?.slice(0, 12)}... already exists.`);
100
+ console.warn(' Use --force to re-register anyway.');
101
+ return;
102
+ }
103
+ if (result.success) {
104
+ console.log('✅ Harness registered successfully.');
105
+ console.log(` Hash: ${result.rubric_hash?.slice(0, 12)}...`);
106
+ console.log(` Tests: ${result.test_count} assertions.`);
107
+ }
108
+ }
109
+ // ─── Handlers ─────────────────────────────────────────────────────────────────
110
+ /**
111
+ * Core logic for `verify status`.
112
+ * Returns a typed VerifyStatusResult — callers decide how to render/exit.
113
+ */
114
+ export async function computeVerifyStatus(storage, project, force = false, userId = 'default') {
115
+ const base = {
116
+ schema_version: 1,
117
+ project,
118
+ no_runs: false,
119
+ harness_missing: false,
120
+ harness_invalid_json: false,
121
+ synchronized: null,
122
+ recommended_action: null,
123
+ exit_code: 0,
124
+ };
125
+ // 1. Get latest run
126
+ const runs = await storage.listVerificationRuns(project, userId);
127
+ const lastRun = runs[0];
128
+ if (!lastRun) {
129
+ return { ...base, no_runs: true, recommended_action: 'run prism verify generate' };
130
+ }
131
+ base.last_run = {
132
+ run_at: lastRun.run_at,
133
+ passed: lastRun.passed,
134
+ pass_rate: lastRun.pass_rate,
135
+ critical_failures: lastRun.critical_failures,
136
+ coverage_score: lastRun.coverage_score,
137
+ gate_action: lastRun.gate_action,
138
+ gate_override: lastRun.gate_override ?? false,
139
+ override_reason: lastRun.override_reason,
140
+ };
141
+ // 2. Drift detection — C5 fix: separate readFile and JSON.parse error paths
142
+ let harnessRaw;
143
+ try {
144
+ harnessRaw = await fs.readFile(DEFAULT_HARNESS_PATH, 'utf-8');
145
+ }
146
+ catch {
147
+ return { ...base, harness_missing: true };
148
+ }
149
+ let localHarness;
150
+ try {
151
+ localHarness = JSON.parse(harnessRaw);
152
+ }
153
+ catch {
154
+ return { ...base, harness_invalid_json: true, exit_code: 1 };
155
+ }
156
+ const localHash = computeRubricHash(localHarness.tests);
157
+ const storedHash = lastRun.rubric_hash;
158
+ if (localHash === storedHash) {
159
+ return { ...base, synchronized: true };
160
+ }
161
+ // Drift detected
162
+ const strictEnv = isStrictVerificationEnv();
163
+ // Phase 2 Diagnostics: Compute Structured Diff
164
+ let diff;
165
+ let diffCounts;
166
+ try {
167
+ const historicalHarness = await storage.getVerificationHarness?.(storedHash, userId);
168
+ if (historicalHarness) {
169
+ diff = { added: [], removed: [], modified: [] };
170
+ const dbTests = historicalHarness.tests;
171
+ const localTests = localHarness.tests;
172
+ const storedMap = new Map(dbTests.map(t => [t.id, t]));
173
+ const localMap = new Map(localTests.map(t => [t.id, t]));
174
+ for (const [id, localTest] of localMap.entries()) {
175
+ const storedTest = storedMap.get(id);
176
+ if (!storedTest) {
177
+ diff.added.push(localTest);
178
+ }
179
+ else {
180
+ // Compare JSON stringification for deep equality
181
+ const storedStr = JSON.stringify(storedTest);
182
+ const localStr = JSON.stringify(localTest);
183
+ if (storedStr !== localStr) {
184
+ // Diagnostics v2: Compute changed_keys — top-level fields that differ
185
+ const allKeys = new Set([...Object.keys(storedTest), ...Object.keys(localTest)]);
186
+ const changedKeys = [];
187
+ for (const key of allKeys) {
188
+ if (JSON.stringify(storedTest[key]) !== JSON.stringify(localTest[key])) {
189
+ changedKeys.push(key);
190
+ }
191
+ }
192
+ changedKeys.sort();
193
+ diff.modified.push({ ...localTest, changed_keys: changedKeys });
194
+ }
195
+ }
196
+ }
197
+ for (const [id, storedTest] of storedMap.entries()) {
198
+ if (!localMap.has(id)) {
199
+ diff.removed.push(storedTest);
200
+ }
201
+ }
202
+ // Ensure stable ordering by ID
203
+ diff.added.sort((a, b) => a.id.localeCompare(b.id));
204
+ diff.removed.sort((a, b) => a.id.localeCompare(b.id));
205
+ diff.modified.sort((a, b) => a.id.localeCompare(b.id));
206
+ // Diagnostics v2: Compute summary counts
207
+ diffCounts = {
208
+ added: diff.added.length,
209
+ removed: diff.removed.length,
210
+ modified: diff.modified.length,
211
+ };
212
+ }
213
+ }
214
+ catch {
215
+ // Failure to load historical harness skips structured diff output (safe default)
216
+ // Downstream consumers parse JSON and know `diff`/`diff_counts` are optional per schema constraints.
217
+ }
218
+ const driftBase = (diff && diffCounts)
219
+ ? { stored_hash: storedHash, local_hash: localHash, strict_env: strictEnv, diff, diff_counts: diffCounts }
220
+ : { stored_hash: storedHash, local_hash: localHash, strict_env: strictEnv };
221
+ const action = "run 'prism verify generate' to update your harness";
222
+ if (force) {
223
+ return {
224
+ ...base,
225
+ synchronized: false,
226
+ drift: { ...driftBase, policy: 'bypassed' },
227
+ recommended_action: action,
228
+ exit_code: 0,
229
+ };
230
+ }
231
+ if (strictEnv) {
232
+ return {
233
+ ...base,
234
+ synchronized: false,
235
+ drift: { ...driftBase, policy: 'blocked' },
236
+ recommended_action: action,
237
+ exit_code: 1,
238
+ };
239
+ }
240
+ return {
241
+ ...base,
242
+ synchronized: false,
243
+ drift: { ...driftBase, policy: 'warn' },
244
+ recommended_action: action,
245
+ exit_code: 0,
246
+ };
247
+ }
248
+ /**
249
+ * CLI entry-point for `verify status`.
250
+ * Computes the result, renders it (human or JSON), then sets process.exitCode.
251
+ */
252
+ export async function handleVerifyStatus(storage, project, force = false, userId = 'default', jsonMode = false) {
253
+ const result = await computeVerifyStatus(storage, project, force, userId);
254
+ renderVerifyStatus(result, jsonMode);
255
+ // Use process.exitCode rather than process.exit() for cleaner test teardown
256
+ if (result.exit_code !== 0) {
257
+ process.exitCode = result.exit_code;
258
+ }
259
+ }
260
+ /**
261
+ * Core logic for `verify generate`.
262
+ * Returns a typed GenerateHarnessResult.
263
+ */
264
+ export async function computeGenerateHarness(storage, project, force = false, userId = 'default') {
265
+ const base = {
266
+ schema_version: 1,
267
+ project,
268
+ success: false,
269
+ already_exists: false,
270
+ file_missing: false,
271
+ invalid_json: false,
272
+ exit_code: 0,
273
+ };
274
+ let raw;
275
+ try {
276
+ raw = await fs.readFile(DEFAULT_HARNESS_PATH, 'utf-8');
277
+ }
278
+ catch {
279
+ return { ...base, file_missing: true, exit_code: 1 };
280
+ }
281
+ let harnessData;
282
+ try {
283
+ harnessData = JSON.parse(raw);
284
+ }
285
+ catch {
286
+ return { ...base, invalid_json: true, exit_code: 1 };
287
+ }
288
+ const rubric_hash = computeRubricHash(harnessData.tests);
289
+ // H3 fix: If not --force, check if a harness already exists for this hash
290
+ if (!force) {
291
+ try {
292
+ const existing = await storage.getVerificationHarness?.(rubric_hash, userId);
293
+ if (existing) {
294
+ return { ...base, already_exists: true, rubric_hash, exit_code: 0 };
295
+ }
296
+ }
297
+ catch {
298
+ // getVerificationHarness may not exist on all backends; proceed
299
+ }
300
+ }
301
+ const harness = {
302
+ ...harnessData,
303
+ project,
304
+ created_at: new Date().toISOString(),
305
+ rubric_hash,
306
+ };
307
+ await storage.saveVerificationHarness(harness, userId);
308
+ return {
309
+ ...base,
310
+ success: true,
311
+ rubric_hash,
312
+ test_count: harness.tests.length,
313
+ exit_code: 0,
314
+ };
315
+ }
316
+ /**
317
+ * CLI entry-point for `verify generate`.
318
+ */
319
+ export async function handleGenerateHarness(storage, project, force = false, userId = 'default', jsonMode = false) {
320
+ const result = await computeGenerateHarness(storage, project, force, userId);
321
+ renderGenerateHarness(result, jsonMode);
322
+ if (result.exit_code !== 0) {
323
+ process.exitCode = result.exit_code;
324
+ }
325
+ }
@@ -0,0 +1,39 @@
1
+ import { VerificationGateError } from "../errors.js";
2
+ export class Gatekeeper {
3
+ /**
4
+ * Reviews a ValidationResult and determines if execution is permitted to continue.
5
+ * Throws `VerificationGateError` strictly on "abort" if bypass isn't provided.
6
+ *
7
+ * @param result - The output of the VerificationRunner
8
+ * @param options - Configuration including audit bypasses
9
+ * @returns `true` if downstream pipeline execution is allowed
10
+ */
11
+ static executeGate(result, options) {
12
+ const isBypass = options?.forceBypass === true;
13
+ const validatedResult = { ...result };
14
+ if (isBypass) {
15
+ console.warn(`\n⚠️ [OVERRIDDEN] Verification Gate bypassed via administrator override.`);
16
+ // Enforce immutability and record audit trail context via environment variables
17
+ validatedResult.gate_override = true;
18
+ const actor = process.env.USER || process.env.USERNAME || 'unknown_user';
19
+ validatedResult.override_reason = validatedResult.override_reason || `CLI --force bypass by ${actor}`;
20
+ return { canContinue: true, validatedResult };
21
+ }
22
+ switch (validatedResult.gate_action) {
23
+ case "continue":
24
+ if (validatedResult.critical_failures > 0) {
25
+ console.warn(`\n⚠️ [CONTINUE] Harness passed but ${validatedResult.critical_failures} critical assertion(s) failed.`);
26
+ }
27
+ return { canContinue: true, validatedResult };
28
+ case "block":
29
+ console.error(`\n🚫 [BLOCK] Harness blocked execution. ${(validatedResult.pass_rate * 100).toFixed(1)}% pass rate.`);
30
+ return { canContinue: false, validatedResult };
31
+ case "abort":
32
+ console.error(`\n💥 [ABORT] Critical failures detected. Pipeline aborted.`);
33
+ throw new VerificationGateError(`Pipeline blocked by verification harness. Gate action evaluated to ABORT. Pass rate: ${(validatedResult.pass_rate * 100).toFixed(1)}%`, validatedResult);
34
+ default:
35
+ console.error(`\n⚠️ [UNKNOWN ACTION] Invalid gate action type: ${validatedResult.gate_action}. Failsafe blocking.`);
36
+ return { canContinue: false, validatedResult };
37
+ }
38
+ }
39
+ }
@@ -0,0 +1,170 @@
1
+ /**
2
+ * Rename Detection Heuristic Engine
3
+ *
4
+ * Isolated module that detects probable test assertion renames by computing
5
+ * similarity scores between removed and added tests. This module is ONLY
6
+ * invoked when rename detection is explicitly enabled via --rename-detection
7
+ * flag or PRISM_RENAME_DETECTION=true env var.
8
+ *
9
+ * When this module is NOT invoked, the strict-by-ID deterministic behavior
10
+ * of v7.3.2 is preserved byte-for-byte.
11
+ *
12
+ * Algorithm: Greedy bipartite matching on composite similarity scores.
13
+ * - Field overlap via Jaccard coefficient over non-ID field values
14
+ * - Description similarity via normalized Levenshtein distance
15
+ * - Greedy highest-score-first, one-to-one matching (no test matched twice)
16
+ *
17
+ * @module renameDetector
18
+ */
19
+ // ─── Constants ────────────────────────────────────────────────────────────────
20
+ /** Minimum allowed threshold (too low = excessive false positives) */
21
+ export const MIN_THRESHOLD = 0.50;
22
+ /** Maximum allowed threshold (too high = nothing ever matches) */
23
+ export const MAX_THRESHOLD = 0.95;
24
+ /** Default similarity threshold when not specified */
25
+ export const DEFAULT_THRESHOLD = 0.70;
26
+ // ─── Similarity Functions ─────────────────────────────────────────────────────
27
+ /**
28
+ * Compute normalized Levenshtein distance between two strings.
29
+ * Returns 0.0 (completely different) to 1.0 (identical).
30
+ */
31
+ export function levenshteinSimilarity(a, b) {
32
+ if (a === b)
33
+ return 1.0;
34
+ if (a.length === 0 || b.length === 0)
35
+ return 0.0;
36
+ const maxLen = Math.max(a.length, b.length);
37
+ // Wagner-Fischer algorithm for edit distance
38
+ const prev = new Array(b.length + 1);
39
+ const curr = new Array(b.length + 1);
40
+ for (let j = 0; j <= b.length; j++)
41
+ prev[j] = j;
42
+ for (let i = 1; i <= a.length; i++) {
43
+ curr[0] = i;
44
+ for (let j = 1; j <= b.length; j++) {
45
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
46
+ curr[j] = Math.min(curr[j - 1] + 1, // insertion
47
+ prev[j] + 1, // deletion
48
+ prev[j - 1] + cost);
49
+ }
50
+ // Swap rows
51
+ for (let j = 0; j <= b.length; j++) {
52
+ prev[j] = curr[j];
53
+ }
54
+ }
55
+ const distance = prev[b.length];
56
+ return 1.0 - distance / maxLen;
57
+ }
58
+ /**
59
+ * Compute Jaccard similarity coefficient over the non-ID field values
60
+ * of two TestAssertion objects.
61
+ * Returns 0.0 (no overlap) to 1.0 (identical field values).
62
+ */
63
+ export function fieldJaccardSimilarity(a, b) {
64
+ const keysA = Object.keys(a).filter(k => k !== 'id');
65
+ const keysB = Object.keys(b).filter(k => k !== 'id');
66
+ const allKeys = new Set([...keysA, ...keysB]);
67
+ if (allKeys.size === 0)
68
+ return 1.0; // Both empty — trivially identical
69
+ let matches = 0;
70
+ for (const key of allKeys) {
71
+ const valA = JSON.stringify(a[key]);
72
+ const valB = JSON.stringify(b[key]);
73
+ if (valA === valB)
74
+ matches++;
75
+ }
76
+ return matches / allKeys.size;
77
+ }
78
+ /**
79
+ * Compute composite similarity between two TestAssertions.
80
+ * Weights: 40% Jaccard field overlap + 60% Levenshtein on description.
81
+ *
82
+ * The heavier description weight reflects the observation that operators
83
+ * typically rename tests when restructuring but preserve the intent —
84
+ * the description carries the most semantic signal.
85
+ */
86
+ export function compositeSimilarity(removed, added) {
87
+ const jaccard = fieldJaccardSimilarity(removed, added);
88
+ const descSim = levenshteinSimilarity(removed.description || '', added.description || '');
89
+ return 0.4 * jaccard + 0.6 * descSim;
90
+ }
91
+ /**
92
+ * Compute field-level changed_keys between two TestAssertions (excluding id).
93
+ */
94
+ function computeChangedKeys(removed, added) {
95
+ const allKeys = new Set([
96
+ ...Object.keys(removed).filter(k => k !== 'id'),
97
+ ...Object.keys(added).filter(k => k !== 'id'),
98
+ ]);
99
+ const changed = [];
100
+ for (const key of allKeys) {
101
+ if (JSON.stringify(removed[key]) !== JSON.stringify(added[key])) {
102
+ changed.push(key);
103
+ }
104
+ }
105
+ changed.sort();
106
+ return changed;
107
+ }
108
+ // ─── Core Detection ───────────────────────────────────────────────────────────
109
+ /**
110
+ * Clamp and validate the threshold value.
111
+ */
112
+ export function clampThreshold(threshold) {
113
+ return Math.max(MIN_THRESHOLD, Math.min(MAX_THRESHOLD, threshold));
114
+ }
115
+ /**
116
+ * Detect probable renames between removed and added test assertion sets.
117
+ *
118
+ * Uses greedy bipartite matching: compute all pairwise similarity scores,
119
+ * sort descending, and greedily assign one-to-one matches above threshold.
120
+ * This is O(n*m) where n=|removed|, m=|added| — acceptable for test suites
121
+ * which are typically <100 assertions.
122
+ *
123
+ * @param added Tests present locally but not in stored harness
124
+ * @param removed Tests present in stored harness but not locally
125
+ * @param threshold Minimum similarity to consider a rename (0.50-0.95)
126
+ * @returns Detected renames and residual unmatched tests
127
+ */
128
+ export function detectRenames(added, removed, threshold = DEFAULT_THRESHOLD) {
129
+ const effectiveThreshold = clampThreshold(threshold);
130
+ // Edge case: nothing to match
131
+ if (added.length === 0 || removed.length === 0) {
132
+ return { renamed: [], residualAdded: [...added], residualRemoved: [...removed] };
133
+ }
134
+ const pairs = [];
135
+ for (let ri = 0; ri < removed.length; ri++) {
136
+ for (let ai = 0; ai < added.length; ai++) {
137
+ const sim = compositeSimilarity(removed[ri], added[ai]);
138
+ if (sim >= effectiveThreshold) {
139
+ pairs.push({ removedIdx: ri, addedIdx: ai, similarity: sim });
140
+ }
141
+ }
142
+ }
143
+ // Step 2: Sort descending by similarity (greedy — highest first)
144
+ pairs.sort((a, b) => b.similarity - a.similarity);
145
+ // Step 3: Greedy one-to-one matching
146
+ const matchedRemoved = new Set();
147
+ const matchedAdded = new Set();
148
+ const renamed = [];
149
+ for (const pair of pairs) {
150
+ if (matchedRemoved.has(pair.removedIdx) || matchedAdded.has(pair.addedIdx)) {
151
+ continue; // Already matched — skip
152
+ }
153
+ matchedRemoved.add(pair.removedIdx);
154
+ matchedAdded.add(pair.addedIdx);
155
+ const removedTest = removed[pair.removedIdx];
156
+ const addedTest = added[pair.addedIdx];
157
+ renamed.push({
158
+ removed: removedTest,
159
+ added: addedTest,
160
+ similarity: Math.round(pair.similarity * 1000) / 1000, // 3 decimal places
161
+ changed_keys: computeChangedKeys(removedTest, addedTest),
162
+ });
163
+ }
164
+ // Step 4: Build residuals — unmatched tests stay in added/removed
165
+ const residualAdded = added.filter((_, i) => !matchedAdded.has(i));
166
+ const residualRemoved = removed.filter((_, i) => !matchedRemoved.has(i));
167
+ // Sort renamed by old_id for deterministic output within the heuristic domain
168
+ renamed.sort((a, b) => a.removed.id.localeCompare(b.removed.id));
169
+ return { renamed, residualAdded, residualRemoved };
170
+ }
@@ -1,6 +1,6 @@
1
1
  import * as fs from "fs";
2
2
  import { getQuickJS } from "quickjs-emscripten";
3
- import { TestSuiteSchema, } from "./schema.js";
3
+ import { TestSuiteSchema, computeRubricHash, } from "./schema.js";
4
4
  import { evaluateSeverityGates, resolveEffectiveSeverity } from "./severityPolicy.js";
5
5
  // ─── Utilities ──────────────────────────────────────────────
6
6
  /** Deeply match objects (expected ⊆ actual) */
@@ -8,6 +8,10 @@ function deepMatch(actual, expected) {
8
8
  if (typeof expected !== 'object' || expected === null) {
9
9
  return actual === expected;
10
10
  }
11
+ // H1 fix: Guard against null/undefined/primitive actual before iterating
12
+ if (typeof actual !== 'object' || actual === null) {
13
+ return false;
14
+ }
11
15
  for (const key of Object.keys(expected)) {
12
16
  if (typeof actual[key] === 'object') {
13
17
  if (!deepMatch(actual[key], expected[key]))
@@ -187,6 +191,16 @@ const DEFAULT_CONFIG = {
187
191
  };
188
192
  // ─── v7.2.0: Enhanced Verification Runner ───────────────────
189
193
  export class VerificationRunner {
194
+ /**
195
+ * Validates that the provided tests match the expected rubric hash from the harness.
196
+ * Throws an error if the hash does not match, ensuring test integrity.
197
+ */
198
+ static verifyRubricHash(tests, harness) {
199
+ const computed = computeRubricHash(tests);
200
+ if (computed !== harness.rubric_hash) {
201
+ throw new Error(`Rubric hash mismatch. Expected ${harness.rubric_hash}, but computeRubricHash returned ${computed}. The tests have been modified since the harness was created.`);
202
+ }
203
+ }
190
204
  /**
191
205
  * v7.2.0 enhanced suite runner.
192
206
  *
@@ -195,16 +209,21 @@ export class VerificationRunner {
195
209
  * - Retry logic for transient failures
196
210
  * - Dependency chain resolution
197
211
  * - Structured VerificationResult with per-layer breakdown
212
+ * - Rubric hash validation if a harness is provided
198
213
  */
199
214
  static async runSuite(jsonContent, options) {
200
215
  const startTime = Date.now();
201
216
  const config = options?.config ?? DEFAULT_CONFIG;
202
217
  const filterLayers = options?.layers ?? config.layers;
203
218
  const minSeverity = options?.minSeverity;
219
+ const harness = options?.harness;
204
220
  let assertionResults = [];
205
221
  try {
206
222
  const parsed = JSON.parse(jsonContent);
207
223
  const suite = TestSuiteSchema.parse(parsed);
224
+ if (harness) {
225
+ VerificationRunner.verifyRubricHash(suite.tests, harness);
226
+ }
208
227
  const { preparedById, orderedIds, precomputed } = prepareAssertions(suite.tests, filterLayers, minSeverity, config);
209
228
  const outcomes = new Map();
210
229
  const resultById = new Map(precomputed);
@@ -438,11 +457,14 @@ export class VerificationRunner {
438
457
  ops++;
439
458
  return ops > 10000;
440
459
  });
441
- // v7.2.0 FIX: Properly inject inputs as a JSON string literal,
442
- // then JSON.parse inside the VM. The previous approach broke on
443
- // object/array inputs due to unquoted interpolation.
460
+ // C3 fix: Use vm.newString() + vm.setProp() to safely pass JSON
461
+ // into the VM without any string escaping. This prevents injection
462
+ // attacks from crafted input values containing quotes or backslashes.
444
463
  const inputsJson = JSON.stringify(inputs);
445
- const parseResult = vm.evalCode(`JSON.parse('${inputsJson.replace(/\\/g, '\\\\').replace(/'/g, "\\'")}')`);
464
+ const inputsJsonHandle = vm.newString(inputsJson);
465
+ vm.setProp(vm.global, "__inputsJson", inputsJsonHandle);
466
+ inputsJsonHandle.dispose();
467
+ const parseResult = vm.evalCode(`JSON.parse(__inputsJson)`);
446
468
  if (parseResult.error) {
447
469
  const err = vm.dump(parseResult.error);
448
470
  parseResult.error.dispose();
@@ -1,4 +1,5 @@
1
1
  import { z } from "zod";
2
+ import { createHash } from "crypto";
2
3
  // ─── v7.2.0: Severity Levels ────────────────────────────────
3
4
  // warn → log and continue
4
5
  // gate → block progression until resolved
@@ -44,3 +45,20 @@ export const TestAssertionSchema = z.object({
44
45
  export const TestSuiteSchema = z.object({
45
46
  tests: z.array(TestAssertionSchema)
46
47
  });
48
+ // ─── v7.2.0: Rubric Hash Utility ─────────────────────────────
49
+ /**
50
+ * Compute a deterministic SHA-256 hash over the test assertions.
51
+ *
52
+ * Sorts by `id` before hashing so that insertion order does NOT affect
53
+ * the result. This ensures the hash is stable across environments
54
+ * even when tests are stored in different orders.
55
+ *
56
+ * @param tests - The array of TestAssertion to hash
57
+ * @returns Lowercase hex SHA-256 digest
58
+ */
59
+ export function computeRubricHash(tests) {
60
+ const sorted = [...tests].sort((a, b) => a.id.localeCompare(b.id));
61
+ return createHash("sha256")
62
+ .update(JSON.stringify(sorted))
63
+ .digest("hex");
64
+ }
@@ -17,7 +17,11 @@ function severityRank(s) {
17
17
  case "warn": return 0;
18
18
  case "gate": return 1;
19
19
  case "abort": return 2;
20
- default: return 0;
20
+ default: {
21
+ // M4 fix: Exhaustive check — future SeverityLevel additions will cause a compile error
22
+ const _exhaustive = s;
23
+ return _exhaustive;
24
+ }
21
25
  }
22
26
  }
23
27
  /**