prism-mcp-server 7.3.1 → 7.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -194
- package/dist/cli.js +50 -0
- package/dist/darkfactory/clawInvocation.js +62 -7
- package/dist/darkfactory/runner.js +288 -24
- package/dist/darkfactory/safetyController.js +48 -22
- package/dist/darkfactory/schema.js +2 -0
- package/dist/dashboard/ui.js +2617 -2051
- package/dist/dashboard/ui.tmp.js +3475 -0
- package/dist/errors.js +29 -0
- package/dist/server.js +19 -0
- package/dist/storage/sqlite.js +199 -7
- package/dist/storage/supabase.js +143 -3
- package/dist/tools/routerExperience.js +14 -0
- package/dist/verification/clawValidator.js +2 -1
- package/dist/verification/cliHandler.js +325 -0
- package/dist/verification/gatekeeper.js +39 -0
- package/dist/verification/renameDetector.js +170 -0
- package/dist/verification/runner.js +27 -5
- package/dist/verification/schema.js +18 -0
- package/dist/verification/severityPolicy.js +5 -1
- package/package.json +5 -2
package/dist/errors.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thrown when a verification harness gate action evaluates to "abort".
|
|
3
|
+
* Indicates a strict security or operational policy failure that should halt
|
|
4
|
+
* downstream execution immediately.
|
|
5
|
+
*/
|
|
6
|
+
export class VerificationGateError extends Error {
|
|
7
|
+
result;
|
|
8
|
+
constructor(message, result) {
|
|
9
|
+
super(message);
|
|
10
|
+
this.name = "VerificationGateError";
|
|
11
|
+
this.result = result;
|
|
12
|
+
// Maintain V8 stack trace natively
|
|
13
|
+
if (Error.captureStackTrace) {
|
|
14
|
+
Error.captureStackTrace(this, VerificationGateError);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Helper to dump the JSON representation of the failure block for logs.
|
|
19
|
+
*/
|
|
20
|
+
toJSON() {
|
|
21
|
+
return {
|
|
22
|
+
message: this.message,
|
|
23
|
+
project: this.result.project,
|
|
24
|
+
critical_failures: this.result.critical_failures,
|
|
25
|
+
pass_rate: this.result.pass_rate,
|
|
26
|
+
result_json: this.result.result_json,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
}
|
package/dist/server.js
CHANGED
|
@@ -1184,6 +1184,25 @@ export async function startServer() {
|
|
|
1184
1184
|
console.error(`[DarkFactory] Startup failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
1185
1185
|
});
|
|
1186
1186
|
}
|
|
1187
|
+
// ─── v7.4: TurboQuant Compressor Async Warmup ────────────
|
|
1188
|
+
// The first call to getDefaultCompressor() triggers construction of a
|
|
1189
|
+
// 768×768 rotation matrix (~15ms of synchronous CPU). Pre-warm via
|
|
1190
|
+
// setImmediate so it runs after the current event-loop tick completes,
|
|
1191
|
+
// preventing the stdio handshake from being blocked during startup.
|
|
1192
|
+
// Fire-and-forget — non-critical; subsequent calls hit the singleton cache.
|
|
1193
|
+
setImmediate(() => {
|
|
1194
|
+
try {
|
|
1195
|
+
// Dynamic import avoids loading turboquant.ts at module-parse time
|
|
1196
|
+
// (the construction side-effects run only when actually needed).
|
|
1197
|
+
import("./utils/turboquant.js").then(({ getDefaultCompressor }) => {
|
|
1198
|
+
getDefaultCompressor();
|
|
1199
|
+
console.error("[Prism] TurboQuant compressor pre-warmed (rotation matrix ready)");
|
|
1200
|
+
}).catch(err => {
|
|
1201
|
+
console.error(`[TurboQuant] Warmup failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
1202
|
+
});
|
|
1203
|
+
}
|
|
1204
|
+
catch { /* warmup is a best-effort optimization */ }
|
|
1205
|
+
});
|
|
1187
1206
|
// Keep the process alive — without this, Node.js would exit
|
|
1188
1207
|
// because there are no active event loop handles after the
|
|
1189
1208
|
// synchronous setup completes.
|
package/dist/storage/sqlite.js
CHANGED
|
@@ -566,14 +566,103 @@ export class SqliteStorage {
|
|
|
566
566
|
status TEXT NOT NULL,
|
|
567
567
|
current_step TEXT NOT NULL,
|
|
568
568
|
iteration INTEGER NOT NULL,
|
|
569
|
+
eval_revisions INTEGER DEFAULT 0,
|
|
569
570
|
started_at TEXT NOT NULL,
|
|
570
571
|
updated_at TEXT NOT NULL,
|
|
571
572
|
spec TEXT NOT NULL,
|
|
572
573
|
error TEXT,
|
|
573
|
-
last_heartbeat TEXT
|
|
574
|
+
last_heartbeat TEXT,
|
|
575
|
+
contract_payload TEXT,
|
|
576
|
+
notes TEXT
|
|
574
577
|
)
|
|
575
578
|
`);
|
|
579
|
+
// ─── v7.4.0 Migration: Adversarial Eval Revisions ─────────
|
|
580
|
+
try {
|
|
581
|
+
await this.db.execute(`ALTER TABLE dark_factory_pipelines ADD COLUMN eval_revisions INTEGER DEFAULT 0`);
|
|
582
|
+
debugLog("[SqliteStorage] v7.4.0 migration: added eval_revisions column");
|
|
583
|
+
// Backfill existing rows — ALTER TABLE DEFAULT only applies to new inserts;
|
|
584
|
+
// rows that existed before the migration will have NULL until explicitly set.
|
|
585
|
+
await this.db.execute(`UPDATE dark_factory_pipelines SET eval_revisions = 0 WHERE eval_revisions IS NULL`);
|
|
586
|
+
debugLog("[SqliteStorage] v7.4.0 migration: backfilled eval_revisions = 0");
|
|
587
|
+
}
|
|
588
|
+
catch (e) {
|
|
589
|
+
if (!e.message?.includes("duplicate column name"))
|
|
590
|
+
throw e;
|
|
591
|
+
}
|
|
592
|
+
try {
|
|
593
|
+
await this.db.execute(`ALTER TABLE dark_factory_pipelines ADD COLUMN contract_payload TEXT`);
|
|
594
|
+
await this.db.execute(`ALTER TABLE dark_factory_pipelines ADD COLUMN notes TEXT`);
|
|
595
|
+
}
|
|
596
|
+
catch (e) {
|
|
597
|
+
if (!e.message?.includes("duplicate column name"))
|
|
598
|
+
throw e;
|
|
599
|
+
}
|
|
576
600
|
await this.db.execute(`CREATE INDEX IF NOT EXISTS idx_pipelines_status ON dark_factory_pipelines(user_id, project, status)`);
|
|
601
|
+
// ─── v7.2.0 Migration: Verification Harness ────────────────
|
|
602
|
+
await this.db.execute(`
|
|
603
|
+
CREATE TABLE IF NOT EXISTS verification_harnesses (
|
|
604
|
+
rubric_hash TEXT PRIMARY KEY,
|
|
605
|
+
project TEXT NOT NULL,
|
|
606
|
+
conversation_id TEXT NOT NULL,
|
|
607
|
+
created_at TEXT NOT NULL,
|
|
608
|
+
min_pass_rate REAL NOT NULL,
|
|
609
|
+
tests TEXT NOT NULL,
|
|
610
|
+
metadata TEXT,
|
|
611
|
+
user_id TEXT NOT NULL DEFAULT 'default'
|
|
612
|
+
)
|
|
613
|
+
`);
|
|
614
|
+
await this.db.execute(`
|
|
615
|
+
CREATE TABLE IF NOT EXISTS verification_runs (
|
|
616
|
+
id TEXT PRIMARY KEY,
|
|
617
|
+
rubric_hash TEXT NOT NULL,
|
|
618
|
+
project TEXT NOT NULL,
|
|
619
|
+
conversation_id TEXT NOT NULL,
|
|
620
|
+
run_at TEXT NOT NULL,
|
|
621
|
+
passed INTEGER NOT NULL,
|
|
622
|
+
pass_rate REAL NOT NULL,
|
|
623
|
+
critical_failures INTEGER NOT NULL,
|
|
624
|
+
coverage_score REAL NOT NULL,
|
|
625
|
+
result_json TEXT NOT NULL,
|
|
626
|
+
gate_action TEXT NOT NULL,
|
|
627
|
+
gate_override INTEGER,
|
|
628
|
+
override_reason TEXT,
|
|
629
|
+
user_id TEXT NOT NULL DEFAULT 'default',
|
|
630
|
+
FOREIGN KEY(rubric_hash) REFERENCES verification_harnesses(rubric_hash)
|
|
631
|
+
)
|
|
632
|
+
`);
|
|
633
|
+
await this.db.execute(`CREATE INDEX IF NOT EXISTS idx_verification_runs_project ON verification_runs(project, run_at DESC)`);
|
|
634
|
+
// ─── v7.3 Migration: Pipeline Orchestration Overrides ────────
|
|
635
|
+
try {
|
|
636
|
+
await this.db.execute(`ALTER TABLE verification_runs ADD COLUMN gate_override INTEGER`);
|
|
637
|
+
}
|
|
638
|
+
catch (e) {
|
|
639
|
+
if (!e.message?.includes('duplicate column name'))
|
|
640
|
+
console.warn('Migration warning:', e.message);
|
|
641
|
+
}
|
|
642
|
+
try {
|
|
643
|
+
await this.db.execute(`ALTER TABLE verification_runs ADD COLUMN override_reason TEXT`);
|
|
644
|
+
}
|
|
645
|
+
catch (e) {
|
|
646
|
+
if (!e.message?.includes('duplicate column name'))
|
|
647
|
+
console.warn('Migration warning:', e.message);
|
|
648
|
+
}
|
|
649
|
+
// ─── H7 Migration: Tenant isolation for verification tables ────────
|
|
650
|
+
try {
|
|
651
|
+
await this.db.execute(`ALTER TABLE verification_harnesses ADD COLUMN user_id TEXT NOT NULL DEFAULT 'default'`);
|
|
652
|
+
}
|
|
653
|
+
catch (e) {
|
|
654
|
+
if (!e.message?.includes('duplicate column name'))
|
|
655
|
+
console.warn('Migration warning:', e.message);
|
|
656
|
+
}
|
|
657
|
+
try {
|
|
658
|
+
await this.db.execute(`ALTER TABLE verification_runs ADD COLUMN user_id TEXT NOT NULL DEFAULT 'default'`);
|
|
659
|
+
}
|
|
660
|
+
catch (e) {
|
|
661
|
+
if (!e.message?.includes('duplicate column name'))
|
|
662
|
+
console.warn('Migration warning:', e.message);
|
|
663
|
+
}
|
|
664
|
+
// H7: Create index after the column exists (post-migration)
|
|
665
|
+
await this.db.execute(`CREATE INDEX IF NOT EXISTS idx_verification_runs_user ON verification_runs(user_id, project)`);
|
|
577
666
|
// ─── v6.1 Migration: Integrity Check ──────────────────────
|
|
578
667
|
//
|
|
579
668
|
// REVIEWER NOTE: PRAGMA integrity_check scans the B-tree structure of
|
|
@@ -2823,16 +2912,19 @@ export class SqliteStorage {
|
|
|
2823
2912
|
}
|
|
2824
2913
|
await this.db.execute({
|
|
2825
2914
|
sql: `
|
|
2826
|
-
INSERT INTO dark_factory_pipelines (id, project, user_id, status, current_step, iteration, started_at, updated_at, spec, error, last_heartbeat)
|
|
2827
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2915
|
+
INSERT INTO dark_factory_pipelines (id, project, user_id, status, current_step, iteration, eval_revisions, started_at, updated_at, spec, error, last_heartbeat, contract_payload, notes)
|
|
2916
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2828
2917
|
ON CONFLICT(id) DO UPDATE SET
|
|
2829
2918
|
status = excluded.status,
|
|
2830
2919
|
current_step = excluded.current_step,
|
|
2831
2920
|
iteration = excluded.iteration,
|
|
2921
|
+
eval_revisions = excluded.eval_revisions,
|
|
2832
2922
|
updated_at = excluded.updated_at,
|
|
2833
2923
|
spec = excluded.spec,
|
|
2834
2924
|
error = excluded.error,
|
|
2835
|
-
last_heartbeat = excluded.last_heartbeat
|
|
2925
|
+
last_heartbeat = excluded.last_heartbeat,
|
|
2926
|
+
contract_payload = excluded.contract_payload,
|
|
2927
|
+
notes = excluded.notes
|
|
2836
2928
|
`,
|
|
2837
2929
|
args: [
|
|
2838
2930
|
updatedState.id,
|
|
@@ -2841,11 +2933,14 @@ export class SqliteStorage {
|
|
|
2841
2933
|
updatedState.status,
|
|
2842
2934
|
updatedState.current_step,
|
|
2843
2935
|
updatedState.iteration,
|
|
2936
|
+
updatedState.eval_revisions ?? 0,
|
|
2844
2937
|
updatedState.started_at,
|
|
2845
2938
|
updatedState.updated_at,
|
|
2846
2939
|
updatedState.spec,
|
|
2847
2940
|
updatedState.error || null,
|
|
2848
|
-
updatedState.last_heartbeat || null
|
|
2941
|
+
updatedState.last_heartbeat || null,
|
|
2942
|
+
updatedState.contract_payload ? JSON.stringify(updatedState.contract_payload) : null,
|
|
2943
|
+
updatedState.notes || null
|
|
2849
2944
|
]
|
|
2850
2945
|
});
|
|
2851
2946
|
}
|
|
@@ -2856,7 +2951,11 @@ export class SqliteStorage {
|
|
|
2856
2951
|
});
|
|
2857
2952
|
if (result.rows.length === 0)
|
|
2858
2953
|
return null;
|
|
2859
|
-
|
|
2954
|
+
const row = result.rows[0];
|
|
2955
|
+
return {
|
|
2956
|
+
...row,
|
|
2957
|
+
contract_payload: row.contract_payload ? JSON.parse(row.contract_payload) : undefined
|
|
2958
|
+
};
|
|
2860
2959
|
}
|
|
2861
2960
|
async listPipelines(project, status, userId) {
|
|
2862
2961
|
const conditions = ['user_id = ?'];
|
|
@@ -2872,6 +2971,99 @@ export class SqliteStorage {
|
|
|
2872
2971
|
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
2873
2972
|
const sql = `SELECT * FROM dark_factory_pipelines ${where} ORDER BY updated_at DESC`;
|
|
2874
2973
|
const result = await this.db.execute({ sql, args });
|
|
2875
|
-
return result.rows
|
|
2974
|
+
return result.rows.map((row) => ({
|
|
2975
|
+
...row,
|
|
2976
|
+
contract_payload: row.contract_payload ? JSON.parse(row.contract_payload) : undefined
|
|
2977
|
+
}));
|
|
2978
|
+
}
|
|
2979
|
+
// ─── Verification Harness (v7.2.0) ───────────────────────────
|
|
2980
|
+
async saveVerificationHarness(harness, userId) {
|
|
2981
|
+
await this.db.execute({
|
|
2982
|
+
sql: `
|
|
2983
|
+
INSERT INTO verification_harnesses (rubric_hash, project, conversation_id, created_at, min_pass_rate, tests, metadata, user_id)
|
|
2984
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
2985
|
+
ON CONFLICT(rubric_hash) DO UPDATE SET
|
|
2986
|
+
metadata = excluded.metadata
|
|
2987
|
+
`,
|
|
2988
|
+
args: [
|
|
2989
|
+
harness.rubric_hash,
|
|
2990
|
+
harness.project,
|
|
2991
|
+
harness.conversation_id,
|
|
2992
|
+
harness.created_at,
|
|
2993
|
+
harness.min_pass_rate,
|
|
2994
|
+
JSON.stringify(harness.tests),
|
|
2995
|
+
harness.metadata ? JSON.stringify(harness.metadata) : null,
|
|
2996
|
+
userId
|
|
2997
|
+
]
|
|
2998
|
+
});
|
|
2999
|
+
}
|
|
3000
|
+
async getVerificationHarness(rubric_hash, userId) {
|
|
3001
|
+
const result = await this.db.execute({
|
|
3002
|
+
sql: `SELECT * FROM verification_harnesses WHERE rubric_hash = ? AND user_id = ?`,
|
|
3003
|
+
args: [rubric_hash, userId]
|
|
3004
|
+
});
|
|
3005
|
+
if (result.rows.length === 0)
|
|
3006
|
+
return null;
|
|
3007
|
+
const row = result.rows[0];
|
|
3008
|
+
return {
|
|
3009
|
+
...row,
|
|
3010
|
+
tests: JSON.parse(row.tests),
|
|
3011
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : undefined
|
|
3012
|
+
};
|
|
3013
|
+
}
|
|
3014
|
+
async saveVerificationRun(result, userId) {
|
|
3015
|
+
await this.db.execute({
|
|
3016
|
+
sql: `
|
|
3017
|
+
INSERT INTO verification_runs (
|
|
3018
|
+
id, rubric_hash, project, conversation_id, run_at,
|
|
3019
|
+
passed, pass_rate, critical_failures, coverage_score, result_json, gate_action, gate_override, override_reason, user_id
|
|
3020
|
+
)
|
|
3021
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
3022
|
+
ON CONFLICT(id) DO NOTHING
|
|
3023
|
+
`,
|
|
3024
|
+
args: [
|
|
3025
|
+
result.id,
|
|
3026
|
+
result.rubric_hash,
|
|
3027
|
+
result.project,
|
|
3028
|
+
result.conversation_id,
|
|
3029
|
+
result.run_at,
|
|
3030
|
+
result.passed ? 1 : 0,
|
|
3031
|
+
result.pass_rate,
|
|
3032
|
+
result.critical_failures,
|
|
3033
|
+
result.coverage_score,
|
|
3034
|
+
result.result_json,
|
|
3035
|
+
result.gate_action,
|
|
3036
|
+
result.gate_override ? 1 : 0,
|
|
3037
|
+
result.override_reason || null,
|
|
3038
|
+
userId
|
|
3039
|
+
]
|
|
3040
|
+
});
|
|
3041
|
+
}
|
|
3042
|
+
async listVerificationRuns(project, userId) {
|
|
3043
|
+
const result = await this.db.execute({
|
|
3044
|
+
sql: `SELECT * FROM verification_runs WHERE project = ? AND user_id = ? ORDER BY run_at DESC`,
|
|
3045
|
+
args: [project, userId]
|
|
3046
|
+
});
|
|
3047
|
+
return result.rows.map(row => ({
|
|
3048
|
+
...row,
|
|
3049
|
+
passed: Boolean(row.passed),
|
|
3050
|
+
gate_override: row.gate_override === 1,
|
|
3051
|
+
override_reason: row.override_reason || undefined
|
|
3052
|
+
}));
|
|
3053
|
+
}
|
|
3054
|
+
async getVerificationRun(id, userId) {
|
|
3055
|
+
const result = await this.db.execute({
|
|
3056
|
+
sql: `SELECT * FROM verification_runs WHERE id = ? AND user_id = ?`,
|
|
3057
|
+
args: [id, userId]
|
|
3058
|
+
});
|
|
3059
|
+
if (result.rows.length === 0)
|
|
3060
|
+
return null;
|
|
3061
|
+
const row = result.rows[0];
|
|
3062
|
+
return {
|
|
3063
|
+
...row,
|
|
3064
|
+
passed: Boolean(row.passed),
|
|
3065
|
+
gate_override: row.gate_override === 1,
|
|
3066
|
+
override_reason: row.override_reason || undefined
|
|
3067
|
+
};
|
|
2876
3068
|
}
|
|
2877
3069
|
}
|
package/dist/storage/supabase.js
CHANGED
|
@@ -1222,7 +1222,13 @@ export class SupabaseStorage {
|
|
|
1222
1222
|
updated_at: updatedState.updated_at,
|
|
1223
1223
|
spec: updatedState.spec,
|
|
1224
1224
|
error: updatedState.error || null,
|
|
1225
|
-
last_heartbeat: updatedState.last_heartbeat || null
|
|
1225
|
+
last_heartbeat: updatedState.last_heartbeat || null,
|
|
1226
|
+
// ─── v7.4: Adversarial Evaluation fields ───
|
|
1227
|
+
eval_revisions: updatedState.eval_revisions ?? 0,
|
|
1228
|
+
contract_payload: updatedState.contract_payload
|
|
1229
|
+
? JSON.stringify(updatedState.contract_payload)
|
|
1230
|
+
: null,
|
|
1231
|
+
notes: updatedState.notes || null,
|
|
1226
1232
|
}, { on_conflict: "id" }, { Prefer: "return=minimal,resolution=merge-duplicates" });
|
|
1227
1233
|
}
|
|
1228
1234
|
catch (e) {
|
|
@@ -1244,7 +1250,15 @@ export class SupabaseStorage {
|
|
|
1244
1250
|
const rows = Array.isArray(result) ? result : [];
|
|
1245
1251
|
if (rows.length === 0)
|
|
1246
1252
|
return null;
|
|
1247
|
-
|
|
1253
|
+
const row = rows[0];
|
|
1254
|
+
// ─── v7.4: Deserialize contract_payload from JSON TEXT ───
|
|
1255
|
+
if (row.contract_payload && typeof row.contract_payload === "string") {
|
|
1256
|
+
try {
|
|
1257
|
+
row.contract_payload = JSON.parse(row.contract_payload);
|
|
1258
|
+
}
|
|
1259
|
+
catch { /* leave as-is */ }
|
|
1260
|
+
}
|
|
1261
|
+
return row;
|
|
1248
1262
|
}
|
|
1249
1263
|
catch (e) {
|
|
1250
1264
|
if (e.message?.includes("PGRST202") || e.message?.includes("Could not find the relation"))
|
|
@@ -1263,7 +1277,17 @@ export class SupabaseStorage {
|
|
|
1263
1277
|
if (status)
|
|
1264
1278
|
query.status = `eq.${status}`;
|
|
1265
1279
|
const result = await supabaseGet("dark_factory_pipelines", query);
|
|
1266
|
-
|
|
1280
|
+
const rows = (Array.isArray(result) ? result : []);
|
|
1281
|
+
// ─── v7.4: Deserialize contract_payload from JSON TEXT ───
|
|
1282
|
+
return rows.map(row => {
|
|
1283
|
+
if (row.contract_payload && typeof row.contract_payload === "string") {
|
|
1284
|
+
try {
|
|
1285
|
+
row.contract_payload = JSON.parse(row.contract_payload);
|
|
1286
|
+
}
|
|
1287
|
+
catch { /* leave as-is */ }
|
|
1288
|
+
}
|
|
1289
|
+
return row;
|
|
1290
|
+
});
|
|
1267
1291
|
}
|
|
1268
1292
|
catch (e) {
|
|
1269
1293
|
if (e.message?.includes("PGRST202") || e.message?.includes("Could not find the relation"))
|
|
@@ -1271,4 +1295,120 @@ export class SupabaseStorage {
|
|
|
1271
1295
|
throw e;
|
|
1272
1296
|
}
|
|
1273
1297
|
}
|
|
1298
|
+
// ─── Verification Harness (v7.2.0) ───────────────────────────
|
|
1299
|
+
async saveVerificationHarness(harness, userId) {
|
|
1300
|
+
try {
|
|
1301
|
+
await supabasePost("verification_harnesses", {
|
|
1302
|
+
rubric_hash: harness.rubric_hash,
|
|
1303
|
+
project: harness.project,
|
|
1304
|
+
conversation_id: harness.conversation_id,
|
|
1305
|
+
created_at: harness.created_at,
|
|
1306
|
+
min_pass_rate: harness.min_pass_rate,
|
|
1307
|
+
tests: JSON.stringify(harness.tests),
|
|
1308
|
+
metadata: harness.metadata ? JSON.stringify(harness.metadata) : null,
|
|
1309
|
+
user_id: userId
|
|
1310
|
+
}, { on_conflict: "rubric_hash" }, { Prefer: "return=representation,resolution=merge-duplicates" });
|
|
1311
|
+
}
|
|
1312
|
+
catch (e) {
|
|
1313
|
+
if (e.message?.includes("PGRST116") || e.message?.includes("duplicate key")) {
|
|
1314
|
+
return;
|
|
1315
|
+
}
|
|
1316
|
+
throw e;
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
async getVerificationHarness(rubric_hash, userId) {
|
|
1320
|
+
try {
|
|
1321
|
+
const rows = await supabaseGet("verification_harnesses", {
|
|
1322
|
+
"rubric_hash": `eq.${rubric_hash}`,
|
|
1323
|
+
"user_id": `eq.${userId}`
|
|
1324
|
+
});
|
|
1325
|
+
if (!Array.isArray(rows) || rows.length === 0)
|
|
1326
|
+
return null;
|
|
1327
|
+
const row = rows[0];
|
|
1328
|
+
return {
|
|
1329
|
+
...row,
|
|
1330
|
+
tests: JSON.parse(row.tests),
|
|
1331
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : undefined
|
|
1332
|
+
};
|
|
1333
|
+
}
|
|
1334
|
+
catch (e) {
|
|
1335
|
+
if (e.message?.includes("PGRST202") || e.message?.includes("Could not find the relation"))
|
|
1336
|
+
return null;
|
|
1337
|
+
throw e;
|
|
1338
|
+
}
|
|
1339
|
+
}
|
|
1340
|
+
async saveVerificationRun(result, userId) {
|
|
1341
|
+
try {
|
|
1342
|
+
await supabasePost("verification_runs", {
|
|
1343
|
+
id: result.id,
|
|
1344
|
+
rubric_hash: result.rubric_hash,
|
|
1345
|
+
project: result.project,
|
|
1346
|
+
conversation_id: result.conversation_id,
|
|
1347
|
+
run_at: result.run_at,
|
|
1348
|
+
// H2 fix: Use native booleans for Supabase/PostgreSQL (not 0/1 integers)
|
|
1349
|
+
passed: result.passed,
|
|
1350
|
+
pass_rate: result.pass_rate,
|
|
1351
|
+
critical_failures: result.critical_failures,
|
|
1352
|
+
coverage_score: result.coverage_score,
|
|
1353
|
+
result_json: result.result_json,
|
|
1354
|
+
gate_action: result.gate_action,
|
|
1355
|
+
gate_override: result.gate_override ?? false,
|
|
1356
|
+
override_reason: result.override_reason || null,
|
|
1357
|
+
user_id: userId
|
|
1358
|
+
}, { on_conflict: "id" }, { Prefer: "return=representation,resolution=ignore-duplicates" });
|
|
1359
|
+
}
|
|
1360
|
+
catch (e) {
|
|
1361
|
+
if (e.message?.includes("PGRST116") || e.message?.includes("duplicate key")) {
|
|
1362
|
+
return;
|
|
1363
|
+
}
|
|
1364
|
+
throw e;
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
async listVerificationRuns(project, userId) {
|
|
1368
|
+
try {
|
|
1369
|
+
const query = {
|
|
1370
|
+
project: `eq.${project}`,
|
|
1371
|
+
user_id: `eq.${userId}`,
|
|
1372
|
+
order: "run_at.desc"
|
|
1373
|
+
};
|
|
1374
|
+
const rows = await supabaseGet("verification_runs", query);
|
|
1375
|
+
if (!Array.isArray(rows))
|
|
1376
|
+
return [];
|
|
1377
|
+
return rows.map((row) => ({
|
|
1378
|
+
...row,
|
|
1379
|
+
passed: Boolean(row.passed),
|
|
1380
|
+
// H2 fix: Use Boolean() consistently (native booleans from Supabase)
|
|
1381
|
+
gate_override: Boolean(row.gate_override),
|
|
1382
|
+
override_reason: row.override_reason || undefined
|
|
1383
|
+
}));
|
|
1384
|
+
}
|
|
1385
|
+
catch (e) {
|
|
1386
|
+
if (e.message?.includes("PGRST202") || e.message?.includes("Could not find the relation"))
|
|
1387
|
+
return [];
|
|
1388
|
+
throw e;
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
async getVerificationRun(id, userId) {
|
|
1392
|
+
try {
|
|
1393
|
+
const rows = await supabaseGet("verification_runs", {
|
|
1394
|
+
id: `eq.${id}`,
|
|
1395
|
+
user_id: `eq.${userId}`
|
|
1396
|
+
});
|
|
1397
|
+
if (!Array.isArray(rows) || rows.length === 0)
|
|
1398
|
+
return null;
|
|
1399
|
+
const row = rows[0];
|
|
1400
|
+
return {
|
|
1401
|
+
...row,
|
|
1402
|
+
passed: Boolean(row.passed),
|
|
1403
|
+
// H2 fix: Use Boolean() consistently (native booleans from Supabase)
|
|
1404
|
+
gate_override: Boolean(row.gate_override),
|
|
1405
|
+
override_reason: row.override_reason || undefined
|
|
1406
|
+
};
|
|
1407
|
+
}
|
|
1408
|
+
catch (e) {
|
|
1409
|
+
if (e.message?.includes("PGRST202") || e.message?.includes("Could not find the relation"))
|
|
1410
|
+
return null;
|
|
1411
|
+
throw e;
|
|
1412
|
+
}
|
|
1413
|
+
}
|
|
1274
1414
|
}
|
|
@@ -54,6 +54,20 @@ export async function getExperienceBias(project, taskKeywords, storageBackend) {
|
|
|
54
54
|
relevantCount++;
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
|
+
// GAP-1 fix: Ingest validation_result events into ML routing bias.
|
|
58
|
+
// The v7.2 spec requires that "Router learning ingests raw verification
|
|
59
|
+
// signals (pass_rate, critical_failures, coverage_score, rubric_hash)."
|
|
60
|
+
// confidence_score >= 80 indicates a passing verification suite.
|
|
61
|
+
if (eventType === "validation_result") {
|
|
62
|
+
const confidence = raw.confidence_score || 50;
|
|
63
|
+
if (confidence >= 80) {
|
|
64
|
+
successCount++;
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
failureCount++;
|
|
68
|
+
}
|
|
69
|
+
relevantCount++;
|
|
70
|
+
}
|
|
57
71
|
}
|
|
58
72
|
if (relevantCount < MIN_SAMPLES) {
|
|
59
73
|
return {
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
* - claw-code-agent MCP server must be available
|
|
18
18
|
* - PRISM_VERIFICATION_HARNESS_ENABLED=true
|
|
19
19
|
*/
|
|
20
|
+
import { createHash } from "crypto";
|
|
20
21
|
import { TestSuiteSchema } from "./schema.js";
|
|
21
22
|
/**
|
|
22
23
|
* Build the prompt for Claw validation.
|
|
@@ -213,7 +214,7 @@ export function mergeSuggestedAssertions(suite, suggestions) {
|
|
|
213
214
|
...suite.tests,
|
|
214
215
|
...suggestions.map((s) => ({
|
|
215
216
|
...s,
|
|
216
|
-
id: s.id || `claw-suggestion-${
|
|
217
|
+
id: s.id || `claw-suggestion-${createHash("sha256").update(JSON.stringify(s)).digest("hex").slice(0, 12)}`,
|
|
217
218
|
severity: s.severity || "warn",
|
|
218
219
|
})),
|
|
219
220
|
],
|