prism-mcp-server 7.3.1 → 7.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -19
- package/dist/cli.js +50 -0
- package/dist/darkfactory/runner.js +101 -2
- package/dist/dashboard/ui.js +2617 -2051
- package/dist/dashboard/ui.tmp.js +3475 -0
- package/dist/errors.js +29 -0
- package/dist/storage/sqlite.js +155 -0
- package/dist/storage/supabase.js +116 -0
- package/dist/tools/routerExperience.js +14 -0
- package/dist/verification/clawValidator.js +2 -1
- package/dist/verification/cliHandler.js +325 -0
- package/dist/verification/gatekeeper.js +39 -0
- package/dist/verification/renameDetector.js +170 -0
- package/dist/verification/runner.js +27 -5
- package/dist/verification/schema.js +18 -0
- package/dist/verification/severityPolicy.js +5 -1
- package/package.json +4 -1
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { VerificationGateError } from "../errors.js";
|
|
2
|
+
export class Gatekeeper {
|
|
3
|
+
/**
|
|
4
|
+
* Reviews a ValidationResult and determines if execution is permitted to continue.
|
|
5
|
+
* Throws `VerificationGateError` strictly on "abort" if bypass isn't provided.
|
|
6
|
+
*
|
|
7
|
+
* @param result - The output of the VerificationRunner
|
|
8
|
+
* @param options - Configuration including audit bypasses
|
|
9
|
+
* @returns `true` if downstream pipeline execution is allowed
|
|
10
|
+
*/
|
|
11
|
+
static executeGate(result, options) {
|
|
12
|
+
const isBypass = options?.forceBypass === true;
|
|
13
|
+
const validatedResult = { ...result };
|
|
14
|
+
if (isBypass) {
|
|
15
|
+
console.warn(`\n⚠️ [OVERRIDDEN] Verification Gate bypassed via administrator override.`);
|
|
16
|
+
// Enforce immutability and record audit trail context via environment variables
|
|
17
|
+
validatedResult.gate_override = true;
|
|
18
|
+
const actor = process.env.USER || process.env.USERNAME || 'unknown_user';
|
|
19
|
+
validatedResult.override_reason = validatedResult.override_reason || `CLI --force bypass by ${actor}`;
|
|
20
|
+
return { canContinue: true, validatedResult };
|
|
21
|
+
}
|
|
22
|
+
switch (validatedResult.gate_action) {
|
|
23
|
+
case "continue":
|
|
24
|
+
if (validatedResult.critical_failures > 0) {
|
|
25
|
+
console.warn(`\n⚠️ [CONTINUE] Harness passed but ${validatedResult.critical_failures} critical assertion(s) failed.`);
|
|
26
|
+
}
|
|
27
|
+
return { canContinue: true, validatedResult };
|
|
28
|
+
case "block":
|
|
29
|
+
console.error(`\n🚫 [BLOCK] Harness blocked execution. ${(validatedResult.pass_rate * 100).toFixed(1)}% pass rate.`);
|
|
30
|
+
return { canContinue: false, validatedResult };
|
|
31
|
+
case "abort":
|
|
32
|
+
console.error(`\n💥 [ABORT] Critical failures detected. Pipeline aborted.`);
|
|
33
|
+
throw new VerificationGateError(`Pipeline blocked by verification harness. Gate action evaluated to ABORT. Pass rate: ${(validatedResult.pass_rate * 100).toFixed(1)}%`, validatedResult);
|
|
34
|
+
default:
|
|
35
|
+
console.error(`\n⚠️ [UNKNOWN ACTION] Invalid gate action type: ${validatedResult.gate_action}. Failsafe blocking.`);
|
|
36
|
+
return { canContinue: false, validatedResult };
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rename Detection Heuristic Engine
|
|
3
|
+
*
|
|
4
|
+
* Isolated module that detects probable test assertion renames by computing
|
|
5
|
+
* similarity scores between removed and added tests. This module is ONLY
|
|
6
|
+
* invoked when rename detection is explicitly enabled via --rename-detection
|
|
7
|
+
* flag or PRISM_RENAME_DETECTION=true env var.
|
|
8
|
+
*
|
|
9
|
+
* When this module is NOT invoked, the strict-by-ID deterministic behavior
|
|
10
|
+
* of v7.3.2 is preserved byte-for-byte.
|
|
11
|
+
*
|
|
12
|
+
* Algorithm: Greedy bipartite matching on composite similarity scores.
|
|
13
|
+
* - Field overlap via Jaccard coefficient over non-ID field values
|
|
14
|
+
* - Description similarity via normalized Levenshtein distance
|
|
15
|
+
* - Greedy highest-score-first, one-to-one matching (no test matched twice)
|
|
16
|
+
*
|
|
17
|
+
* @module renameDetector
|
|
18
|
+
*/
|
|
19
|
+
// ─── Constants ────────────────────────────────────────────────────────────────
|
|
20
|
+
/** Minimum allowed threshold (too low = excessive false positives) */
|
|
21
|
+
export const MIN_THRESHOLD = 0.50;
|
|
22
|
+
/** Maximum allowed threshold (too high = nothing ever matches) */
|
|
23
|
+
export const MAX_THRESHOLD = 0.95;
|
|
24
|
+
/** Default similarity threshold when not specified */
|
|
25
|
+
export const DEFAULT_THRESHOLD = 0.70;
|
|
26
|
+
// ─── Similarity Functions ─────────────────────────────────────────────────────
|
|
27
|
+
/**
|
|
28
|
+
* Compute normalized Levenshtein distance between two strings.
|
|
29
|
+
* Returns 0.0 (completely different) to 1.0 (identical).
|
|
30
|
+
*/
|
|
31
|
+
export function levenshteinSimilarity(a, b) {
|
|
32
|
+
if (a === b)
|
|
33
|
+
return 1.0;
|
|
34
|
+
if (a.length === 0 || b.length === 0)
|
|
35
|
+
return 0.0;
|
|
36
|
+
const maxLen = Math.max(a.length, b.length);
|
|
37
|
+
// Wagner-Fischer algorithm for edit distance
|
|
38
|
+
const prev = new Array(b.length + 1);
|
|
39
|
+
const curr = new Array(b.length + 1);
|
|
40
|
+
for (let j = 0; j <= b.length; j++)
|
|
41
|
+
prev[j] = j;
|
|
42
|
+
for (let i = 1; i <= a.length; i++) {
|
|
43
|
+
curr[0] = i;
|
|
44
|
+
for (let j = 1; j <= b.length; j++) {
|
|
45
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
46
|
+
curr[j] = Math.min(curr[j - 1] + 1, // insertion
|
|
47
|
+
prev[j] + 1, // deletion
|
|
48
|
+
prev[j - 1] + cost);
|
|
49
|
+
}
|
|
50
|
+
// Swap rows
|
|
51
|
+
for (let j = 0; j <= b.length; j++) {
|
|
52
|
+
prev[j] = curr[j];
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
const distance = prev[b.length];
|
|
56
|
+
return 1.0 - distance / maxLen;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Compute Jaccard similarity coefficient over the non-ID field values
|
|
60
|
+
* of two TestAssertion objects.
|
|
61
|
+
* Returns 0.0 (no overlap) to 1.0 (identical field values).
|
|
62
|
+
*/
|
|
63
|
+
export function fieldJaccardSimilarity(a, b) {
|
|
64
|
+
const keysA = Object.keys(a).filter(k => k !== 'id');
|
|
65
|
+
const keysB = Object.keys(b).filter(k => k !== 'id');
|
|
66
|
+
const allKeys = new Set([...keysA, ...keysB]);
|
|
67
|
+
if (allKeys.size === 0)
|
|
68
|
+
return 1.0; // Both empty — trivially identical
|
|
69
|
+
let matches = 0;
|
|
70
|
+
for (const key of allKeys) {
|
|
71
|
+
const valA = JSON.stringify(a[key]);
|
|
72
|
+
const valB = JSON.stringify(b[key]);
|
|
73
|
+
if (valA === valB)
|
|
74
|
+
matches++;
|
|
75
|
+
}
|
|
76
|
+
return matches / allKeys.size;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Compute composite similarity between two TestAssertions.
|
|
80
|
+
* Weights: 40% Jaccard field overlap + 60% Levenshtein on description.
|
|
81
|
+
*
|
|
82
|
+
* The heavier description weight reflects the observation that operators
|
|
83
|
+
* typically rename tests when restructuring but preserve the intent —
|
|
84
|
+
* the description carries the most semantic signal.
|
|
85
|
+
*/
|
|
86
|
+
export function compositeSimilarity(removed, added) {
|
|
87
|
+
const jaccard = fieldJaccardSimilarity(removed, added);
|
|
88
|
+
const descSim = levenshteinSimilarity(removed.description || '', added.description || '');
|
|
89
|
+
return 0.4 * jaccard + 0.6 * descSim;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Compute field-level changed_keys between two TestAssertions (excluding id).
|
|
93
|
+
*/
|
|
94
|
+
function computeChangedKeys(removed, added) {
|
|
95
|
+
const allKeys = new Set([
|
|
96
|
+
...Object.keys(removed).filter(k => k !== 'id'),
|
|
97
|
+
...Object.keys(added).filter(k => k !== 'id'),
|
|
98
|
+
]);
|
|
99
|
+
const changed = [];
|
|
100
|
+
for (const key of allKeys) {
|
|
101
|
+
if (JSON.stringify(removed[key]) !== JSON.stringify(added[key])) {
|
|
102
|
+
changed.push(key);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
changed.sort();
|
|
106
|
+
return changed;
|
|
107
|
+
}
|
|
108
|
+
// ─── Core Detection ───────────────────────────────────────────────────────────
|
|
109
|
+
/**
|
|
110
|
+
* Clamp and validate the threshold value.
|
|
111
|
+
*/
|
|
112
|
+
export function clampThreshold(threshold) {
|
|
113
|
+
return Math.max(MIN_THRESHOLD, Math.min(MAX_THRESHOLD, threshold));
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Detect probable renames between removed and added test assertion sets.
|
|
117
|
+
*
|
|
118
|
+
* Uses greedy bipartite matching: compute all pairwise similarity scores,
|
|
119
|
+
* sort descending, and greedily assign one-to-one matches above threshold.
|
|
120
|
+
* This is O(n*m) where n=|removed|, m=|added| — acceptable for test suites
|
|
121
|
+
* which are typically <100 assertions.
|
|
122
|
+
*
|
|
123
|
+
* @param added Tests present locally but not in stored harness
|
|
124
|
+
* @param removed Tests present in stored harness but not locally
|
|
125
|
+
* @param threshold Minimum similarity to consider a rename (0.50-0.95)
|
|
126
|
+
* @returns Detected renames and residual unmatched tests
|
|
127
|
+
*/
|
|
128
|
+
export function detectRenames(added, removed, threshold = DEFAULT_THRESHOLD) {
|
|
129
|
+
const effectiveThreshold = clampThreshold(threshold);
|
|
130
|
+
// Edge case: nothing to match
|
|
131
|
+
if (added.length === 0 || removed.length === 0) {
|
|
132
|
+
return { renamed: [], residualAdded: [...added], residualRemoved: [...removed] };
|
|
133
|
+
}
|
|
134
|
+
const pairs = [];
|
|
135
|
+
for (let ri = 0; ri < removed.length; ri++) {
|
|
136
|
+
for (let ai = 0; ai < added.length; ai++) {
|
|
137
|
+
const sim = compositeSimilarity(removed[ri], added[ai]);
|
|
138
|
+
if (sim >= effectiveThreshold) {
|
|
139
|
+
pairs.push({ removedIdx: ri, addedIdx: ai, similarity: sim });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
// Step 2: Sort descending by similarity (greedy — highest first)
|
|
144
|
+
pairs.sort((a, b) => b.similarity - a.similarity);
|
|
145
|
+
// Step 3: Greedy one-to-one matching
|
|
146
|
+
const matchedRemoved = new Set();
|
|
147
|
+
const matchedAdded = new Set();
|
|
148
|
+
const renamed = [];
|
|
149
|
+
for (const pair of pairs) {
|
|
150
|
+
if (matchedRemoved.has(pair.removedIdx) || matchedAdded.has(pair.addedIdx)) {
|
|
151
|
+
continue; // Already matched — skip
|
|
152
|
+
}
|
|
153
|
+
matchedRemoved.add(pair.removedIdx);
|
|
154
|
+
matchedAdded.add(pair.addedIdx);
|
|
155
|
+
const removedTest = removed[pair.removedIdx];
|
|
156
|
+
const addedTest = added[pair.addedIdx];
|
|
157
|
+
renamed.push({
|
|
158
|
+
removed: removedTest,
|
|
159
|
+
added: addedTest,
|
|
160
|
+
similarity: Math.round(pair.similarity * 1000) / 1000, // 3 decimal places
|
|
161
|
+
changed_keys: computeChangedKeys(removedTest, addedTest),
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
// Step 4: Build residuals — unmatched tests stay in added/removed
|
|
165
|
+
const residualAdded = added.filter((_, i) => !matchedAdded.has(i));
|
|
166
|
+
const residualRemoved = removed.filter((_, i) => !matchedRemoved.has(i));
|
|
167
|
+
// Sort renamed by old_id for deterministic output within the heuristic domain
|
|
168
|
+
renamed.sort((a, b) => a.removed.id.localeCompare(b.removed.id));
|
|
169
|
+
return { renamed, residualAdded, residualRemoved };
|
|
170
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as fs from "fs";
|
|
2
2
|
import { getQuickJS } from "quickjs-emscripten";
|
|
3
|
-
import { TestSuiteSchema, } from "./schema.js";
|
|
3
|
+
import { TestSuiteSchema, computeRubricHash, } from "./schema.js";
|
|
4
4
|
import { evaluateSeverityGates, resolveEffectiveSeverity } from "./severityPolicy.js";
|
|
5
5
|
// ─── Utilities ──────────────────────────────────────────────
|
|
6
6
|
/** Deeply match objects (expected ⊆ actual) */
|
|
@@ -8,6 +8,10 @@ function deepMatch(actual, expected) {
|
|
|
8
8
|
if (typeof expected !== 'object' || expected === null) {
|
|
9
9
|
return actual === expected;
|
|
10
10
|
}
|
|
11
|
+
// H1 fix: Guard against null/undefined/primitive actual before iterating
|
|
12
|
+
if (typeof actual !== 'object' || actual === null) {
|
|
13
|
+
return false;
|
|
14
|
+
}
|
|
11
15
|
for (const key of Object.keys(expected)) {
|
|
12
16
|
if (typeof actual[key] === 'object') {
|
|
13
17
|
if (!deepMatch(actual[key], expected[key]))
|
|
@@ -187,6 +191,16 @@ const DEFAULT_CONFIG = {
|
|
|
187
191
|
};
|
|
188
192
|
// ─── v7.2.0: Enhanced Verification Runner ───────────────────
|
|
189
193
|
export class VerificationRunner {
|
|
194
|
+
/**
|
|
195
|
+
* Validates that the provided tests match the expected rubric hash from the harness.
|
|
196
|
+
* Throws an error if the hash does not match, ensuring test integrity.
|
|
197
|
+
*/
|
|
198
|
+
static verifyRubricHash(tests, harness) {
|
|
199
|
+
const computed = computeRubricHash(tests);
|
|
200
|
+
if (computed !== harness.rubric_hash) {
|
|
201
|
+
throw new Error(`Rubric hash mismatch. Expected ${harness.rubric_hash}, but computeRubricHash returned ${computed}. The tests have been modified since the harness was created.`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
190
204
|
/**
|
|
191
205
|
* v7.2.0 enhanced suite runner.
|
|
192
206
|
*
|
|
@@ -195,16 +209,21 @@ export class VerificationRunner {
|
|
|
195
209
|
* - Retry logic for transient failures
|
|
196
210
|
* - Dependency chain resolution
|
|
197
211
|
* - Structured VerificationResult with per-layer breakdown
|
|
212
|
+
* - Rubric hash validation if a harness is provided
|
|
198
213
|
*/
|
|
199
214
|
static async runSuite(jsonContent, options) {
|
|
200
215
|
const startTime = Date.now();
|
|
201
216
|
const config = options?.config ?? DEFAULT_CONFIG;
|
|
202
217
|
const filterLayers = options?.layers ?? config.layers;
|
|
203
218
|
const minSeverity = options?.minSeverity;
|
|
219
|
+
const harness = options?.harness;
|
|
204
220
|
let assertionResults = [];
|
|
205
221
|
try {
|
|
206
222
|
const parsed = JSON.parse(jsonContent);
|
|
207
223
|
const suite = TestSuiteSchema.parse(parsed);
|
|
224
|
+
if (harness) {
|
|
225
|
+
VerificationRunner.verifyRubricHash(suite.tests, harness);
|
|
226
|
+
}
|
|
208
227
|
const { preparedById, orderedIds, precomputed } = prepareAssertions(suite.tests, filterLayers, minSeverity, config);
|
|
209
228
|
const outcomes = new Map();
|
|
210
229
|
const resultById = new Map(precomputed);
|
|
@@ -438,11 +457,14 @@ export class VerificationRunner {
|
|
|
438
457
|
ops++;
|
|
439
458
|
return ops > 10000;
|
|
440
459
|
});
|
|
441
|
-
//
|
|
442
|
-
//
|
|
443
|
-
//
|
|
460
|
+
// C3 fix: Use vm.newString() + vm.setProp() to safely pass JSON
|
|
461
|
+
// into the VM without any string escaping. This prevents injection
|
|
462
|
+
// attacks from crafted input values containing quotes or backslashes.
|
|
444
463
|
const inputsJson = JSON.stringify(inputs);
|
|
445
|
-
const
|
|
464
|
+
const inputsJsonHandle = vm.newString(inputsJson);
|
|
465
|
+
vm.setProp(vm.global, "__inputsJson", inputsJsonHandle);
|
|
466
|
+
inputsJsonHandle.dispose();
|
|
467
|
+
const parseResult = vm.evalCode(`JSON.parse(__inputsJson)`);
|
|
446
468
|
if (parseResult.error) {
|
|
447
469
|
const err = vm.dump(parseResult.error);
|
|
448
470
|
parseResult.error.dispose();
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
import { createHash } from "crypto";
|
|
2
3
|
// ─── v7.2.0: Severity Levels ────────────────────────────────
|
|
3
4
|
// warn → log and continue
|
|
4
5
|
// gate → block progression until resolved
|
|
@@ -44,3 +45,20 @@ export const TestAssertionSchema = z.object({
|
|
|
44
45
|
export const TestSuiteSchema = z.object({
|
|
45
46
|
tests: z.array(TestAssertionSchema)
|
|
46
47
|
});
|
|
48
|
+
// ─── v7.2.0: Rubric Hash Utility ─────────────────────────────
|
|
49
|
+
/**
|
|
50
|
+
* Compute a deterministic SHA-256 hash over the test assertions.
|
|
51
|
+
*
|
|
52
|
+
* Sorts by `id` before hashing so that insertion order does NOT affect
|
|
53
|
+
* the result. This ensures the hash is stable across environments
|
|
54
|
+
* even when tests are stored in different orders.
|
|
55
|
+
*
|
|
56
|
+
* @param tests - The array of TestAssertion to hash
|
|
57
|
+
* @returns Lowercase hex SHA-256 digest
|
|
58
|
+
*/
|
|
59
|
+
export function computeRubricHash(tests) {
|
|
60
|
+
const sorted = [...tests].sort((a, b) => a.id.localeCompare(b.id));
|
|
61
|
+
return createHash("sha256")
|
|
62
|
+
.update(JSON.stringify(sorted))
|
|
63
|
+
.digest("hex");
|
|
64
|
+
}
|
|
@@ -17,7 +17,11 @@ function severityRank(s) {
|
|
|
17
17
|
case "warn": return 0;
|
|
18
18
|
case "gate": return 1;
|
|
19
19
|
case "abort": return 2;
|
|
20
|
-
default:
|
|
20
|
+
default: {
|
|
21
|
+
// M4 fix: Exhaustive check — future SeverityLevel additions will cause a compile error
|
|
22
|
+
const _exhaustive = s;
|
|
23
|
+
return _exhaustive;
|
|
24
|
+
}
|
|
21
25
|
}
|
|
22
26
|
}
|
|
23
27
|
/**
|
package/package.json
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prism-mcp-server",
|
|
3
|
-
"version": "7.3.
|
|
3
|
+
"version": "7.3.3",
|
|
4
4
|
"mcpName": "io.github.dcostenco/prism-mcp",
|
|
5
5
|
"description": "The Mind Palace for AI Agents — fail-closed Dark Factory autonomous pipelines (3-gate parse→type→scope validation), persistent memory (SQLite/Supabase), ACT-R cognitive retrieval, behavioral learning & IDE rules sync, multi-agent Hivemind, time travel, visual dashboard. Zero-config local mode.",
|
|
6
6
|
"module": "index.ts",
|
|
7
7
|
"type": "module",
|
|
8
8
|
"main": "dist/server.js",
|
|
9
9
|
"bin": {
|
|
10
|
+
"prism": "dist/cli.js",
|
|
10
11
|
"prism-mcp-server": "dist/server.js",
|
|
11
12
|
"prism-import": "dist/utils/universalImporter.js"
|
|
12
13
|
},
|
|
@@ -15,6 +16,7 @@
|
|
|
15
16
|
],
|
|
16
17
|
"scripts": {
|
|
17
18
|
"build": "tsc",
|
|
19
|
+
"lint:dashboard": "node scripts/lint-dashboard-es5.cjs",
|
|
18
20
|
"start": "node dist/server.js",
|
|
19
21
|
"test": "vitest run",
|
|
20
22
|
"test:watch": "vitest",
|
|
@@ -107,6 +109,7 @@
|
|
|
107
109
|
"@supabase/supabase-js": "^2.99.3",
|
|
108
110
|
"@tavily/core": "^0.6.0",
|
|
109
111
|
"cheerio": "^1.2.0",
|
|
112
|
+
"commander": "^14.0.3",
|
|
110
113
|
"dotenv": "^16.5.0",
|
|
111
114
|
"fflate": "^0.8.2",
|
|
112
115
|
"jsdom": "^29.0.1",
|