prism-mcp-server 7.3.1 → 7.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -19
- package/dist/cli.js +50 -0
- package/dist/darkfactory/runner.js +101 -2
- package/dist/dashboard/ui.js +2617 -2051
- package/dist/dashboard/ui.tmp.js +3475 -0
- package/dist/errors.js +29 -0
- package/dist/storage/sqlite.js +155 -0
- package/dist/storage/supabase.js +116 -0
- package/dist/tools/routerExperience.js +14 -0
- package/dist/verification/clawValidator.js +2 -1
- package/dist/verification/cliHandler.js +325 -0
- package/dist/verification/gatekeeper.js +39 -0
- package/dist/verification/renameDetector.js +170 -0
- package/dist/verification/runner.js +27 -5
- package/dist/verification/schema.js +18 -0
- package/dist/verification/severityPolicy.js +5 -1
- package/package.json +4 -1
package/README.md
CHANGED
|
@@ -29,7 +29,8 @@ Works with **Claude Desktop · Claude Code · Cursor · Windsurf · Cline · Gem
|
|
|
29
29
|
- [Use Cases](#-use-cases)
|
|
30
30
|
- [What's New](#-whats-new)
|
|
31
31
|
- [v7.3.1 Dark Factory (Fail-Closed Execution)](#v731--dark-factory-fail-closed-execution-)
|
|
32
|
-
- [v7.2.0
|
|
32
|
+
- [v7.2.0 Verification Harness (Planned)](#v720--verification-harness-front-loaded-testing-)
|
|
33
|
+
- [v7.4.0 Adversarial Dev Harness (Planned)](#v740--adversarial-dev-harness-anti-sycophancy-)
|
|
33
34
|
- [How Prism Compares](#-how-prism-compares)
|
|
34
35
|
- [Tool Reference](#-tool-reference)
|
|
35
36
|
- [Environment Variables](#environment-variables)
|
|
@@ -412,7 +413,7 @@ Soft/hard delete (Art. 17), full export in JSON, Markdown, or Obsidian vault `.z
|
|
|
412
413
|
|
|
413
414
|
**Consulting / multi-project** — Switch between client projects with progressive loading: `quick` (~50 tokens), `standard` (~200), or `deep` (~1000+).
|
|
414
415
|
|
|
415
|
-
**Complex refactoring (v7.2 planned)** — Prism’s roadmap adds
|
|
416
|
+
**Complex refactoring (v7.2 planned)** — Prism’s roadmap adds verification-first execution for multi-step changes with contract-frozen assertions and gated finalization.
|
|
416
417
|
|
|
417
418
|
**Team onboarding** — New team member's agent loads the full project history instantly.
|
|
418
419
|
|
|
@@ -503,24 +504,24 @@ Prism v7.3.1 implements exactly this: a **3-gate fail-closed pipeline** where ev
|
|
|
503
504
|
|
|
504
505
|
</details>
|
|
505
506
|
|
|
506
|
-
### v7.2.0 —
|
|
507
|
-
> **Planned roadmap release.** Extends Prism from
|
|
507
|
+
### v7.2.0 — Verification Harness (Front-Loaded Testing) 🔭
|
|
508
|
+
> **Planned roadmap release.** Extends Prism from passive validation to contract-frozen, machine-verifiable execution gates.
|
|
508
509
|
|
|
509
|
-
-
|
|
510
|
-
-
|
|
511
|
-
-
|
|
512
|
-
-
|
|
513
|
-
-
|
|
514
|
-
-
|
|
510
|
+
- 📋 **Spec-Freeze Contract (planned)** — v7.2 formalizes three artifacts with strict responsibilities: `implementation_plan.md` (**how**), `verification_harness.json` (**proof contract**), and `validation_result` (**immutable outcome record**).
|
|
511
|
+
- 🔐 **Rubric Hash Lock (planned)** — `verification_harness.json` is generated before execution and hash-locked (`rubric_hash`) so criteria cannot drift mid-sprint.
|
|
512
|
+
- 🔬 **Multi-Layer Verification (planned)** — Structured checks across **Data Accuracy**, **Agent Behavior**, and **Pipeline Integrity** using machine-parseable assertions.
|
|
513
|
+
- 🤖 **Adversarial Validation Loop (planned)** — A second validation pass evaluates execution outputs against the frozen contract before progression.
|
|
514
|
+
- 🚦 **Finalization Gates (planned)** — Gate policies (`warn` / `gate` / `abort`) evaluate `validation_result` against the frozen rubric before pipeline completion.
|
|
515
|
+
- 🧠 **Routing Feedback Signals (planned)** — Router learning ingests raw verification signals (`pass_rate`, `critical_failures`, `coverage_score`, `rubric_hash`) for downstream confidence adjustment.
|
|
515
516
|
|
|
516
517
|
<details>
|
|
517
518
|
<summary><strong>🔬 Concept Example: Before vs. After v7.2</strong></summary>
|
|
518
519
|
|
|
519
520
|
**Scenario:** "Refactor the Auth module and update the unit tests."
|
|
520
521
|
|
|
521
|
-
**Before
|
|
522
|
+
**Before:** Criteria emerge during or after coding; verification is inconsistent and hard to audit.
|
|
522
523
|
|
|
523
|
-
**After (
|
|
524
|
+
**After (verification-first):** Plan emits a frozen verification contract first, execution runs, validator emits immutable `validation_result`, and finalization gates enforce rubric compliance.
|
|
524
525
|
|
|
525
526
|
</details>
|
|
526
527
|
|
|
@@ -815,13 +816,13 @@ Requires `PRISM_DARK_FACTORY_ENABLED=true`.
|
|
|
815
816
|
</details>
|
|
816
817
|
|
|
817
818
|
<details>
|
|
818
|
-
<summary><strong>
|
|
819
|
+
<summary><strong>Verification Harness (Planned for v7.2)</strong></summary>
|
|
819
820
|
|
|
820
821
|
| Tool | Purpose |
|
|
821
822
|
|------|---------|
|
|
822
|
-
| `session_plan_decompose` | Decompose natural language goals into
|
|
823
|
-
| `session_plan_step_update` | Atomically update
|
|
824
|
-
| `session_plan_get_active` | Retrieve
|
|
823
|
+
| `session_plan_decompose` | Decompose natural language goals into an execution plan that references verification requirements |
|
|
824
|
+
| `session_plan_step_update` | Atomically update step status/result with verification context |
|
|
825
|
+
| `session_plan_get_active` | Retrieve active plan state and current verification gating position |
|
|
825
826
|
|
|
826
827
|
</details>
|
|
827
828
|
|
|
@@ -970,7 +971,7 @@ Prism is evolving from smart session logging toward a **cognitive memory archite
|
|
|
970
971
|
| **v7.0** | Composite Retrieval Scoring — `0.7 × similarity + 0.3 × σ(activation)`; configurable via `PRISM_ACTR_WEIGHT_*` | Hybrid cognitive-neural retrieval models | ✅ Shipped |
|
|
971
972
|
| **v7.0** | AccessLogBuffer — in-memory batch-write buffer with 5s flush; prevents SQLite `SQLITE_BUSY` under parallel agents | Production reliability engineering | ✅ Shipped |
|
|
972
973
|
| **v7.3** | Dark Factory — 3-gate fail-closed EXECUTE pipeline (parse → type → scope) with structured JSON action contract | Industrial safety systems (defense-in-depth, fail-closed valves) | ✅ Shipped |
|
|
973
|
-
| **v7.2** |
|
|
974
|
+
| **v7.2** | Verification-first harness & contract-gated execution | Programmatic verification systems + adversarial validation loops | 🔭 Horizon |
|
|
974
975
|
| **v7.x** | Affect-Tagged Memory — sentiment shapes what gets recalled | Affect-modulated retrieval (neuroscience) | 🔭 Horizon |
|
|
975
976
|
| **v8+** | Zero-Search Retrieval — no index, no ANN, just ask the vector | Holographic Reduced Representations | 🔭 Horizon |
|
|
976
977
|
|
|
@@ -997,8 +998,8 @@ Shipped. Deterministic task routing (`session_task_route`) with optional experie
|
|
|
997
998
|
### v7.0: ACT-R Activation Memory ✅
|
|
998
999
|
Shipped. Scientifically-grounded retrieval re-ranking via ACT-R base-level activation (`B_i = ln(Σ t_j^(-d))`), candidate-scoped spreading activation, parameterized sigmoid normalization, composite scoring, and zero-cold-start access log infrastructure. 49 dedicated unit tests, 705 total passing.
|
|
999
1000
|
|
|
1000
|
-
### v7.2:
|
|
1001
|
-
Planned. Adds
|
|
1001
|
+
### v7.2: Verification Harness 🔭
|
|
1002
|
+
Planned. Adds a spec-frozen verification contract (`implementation_plan.md` + `verification_harness.json` + immutable `validation_result`), multi-layer machine checks, and finalization gates before autonomous completion.
|
|
1002
1003
|
|
|
1003
1004
|
### Future Tracks
|
|
1004
1005
|
- **v7.x: Affect-Tagged Memory** — Recall prioritization improves by weighting memories with affective/contextual valence, making surfaced context more behaviorally useful.
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from 'commander';
|
|
3
|
+
import { SqliteStorage } from './storage/sqlite.js';
|
|
4
|
+
import { handleVerifyStatus, handleGenerateHarness } from './verification/cliHandler.js';
|
|
5
|
+
import * as path from 'path';
|
|
6
|
+
const program = new Command();
|
|
7
|
+
program
|
|
8
|
+
.name('prism')
|
|
9
|
+
.description('Prism Configuration & CLI')
|
|
10
|
+
.version('7.3.1');
|
|
11
|
+
const verifyCmd = program
|
|
12
|
+
.command('verify')
|
|
13
|
+
.description('Manage the verification harness');
|
|
14
|
+
verifyCmd
|
|
15
|
+
.command('status')
|
|
16
|
+
.description('Check the current verification state and view config drift')
|
|
17
|
+
.option('-p, --project <name>', 'Project name', path.basename(process.cwd()))
|
|
18
|
+
.option('-f, --force', 'Bypass verification failures and drift tracking constraints')
|
|
19
|
+
.option('-u, --user <id>', 'User ID for tenant isolation', 'default')
|
|
20
|
+
.option('--json', 'Emit machine-readable JSON output with stable keys')
|
|
21
|
+
.action(async (options) => {
|
|
22
|
+
const storage = new SqliteStorage();
|
|
23
|
+
await storage.initialize('./prism-local.db');
|
|
24
|
+
// H4 fix: Ensure storage is closed on exit to flush WAL and prevent data loss
|
|
25
|
+
try {
|
|
26
|
+
await handleVerifyStatus(storage, options.project, !!options.force, options.user, !!options.json);
|
|
27
|
+
}
|
|
28
|
+
finally {
|
|
29
|
+
await storage.close();
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
verifyCmd
|
|
33
|
+
.command('generate')
|
|
34
|
+
.description('Bless the current ./verification_harness.json as the canonical rubric')
|
|
35
|
+
.option('-p, --project <name>', 'Project name', path.basename(process.cwd()))
|
|
36
|
+
.option('-f, --force', 'Bypass verification failures and drift tracking constraints')
|
|
37
|
+
.option('-u, --user <id>', 'User ID for tenant isolation', 'default')
|
|
38
|
+
.option('--json', 'Emit machine-readable JSON output with stable keys')
|
|
39
|
+
.action(async (options) => {
|
|
40
|
+
const storage = new SqliteStorage();
|
|
41
|
+
await storage.initialize('./prism-local.db');
|
|
42
|
+
// H4 fix: Ensure storage is closed on exit to flush WAL and prevent data loss
|
|
43
|
+
try {
|
|
44
|
+
await handleGenerateHarness(storage, options.project, !!options.force, options.user, !!options.json);
|
|
45
|
+
}
|
|
46
|
+
finally {
|
|
47
|
+
await storage.close();
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
program.parse(process.argv);
|
|
@@ -21,10 +21,15 @@ import { getStorage } from '../storage/index.js';
|
|
|
21
21
|
import { VALID_ACTION_TYPES } from './schema.js';
|
|
22
22
|
import { SafetyController } from './safetyController.js';
|
|
23
23
|
import { invokeClawAgent } from './clawInvocation.js';
|
|
24
|
-
import { PRISM_DARK_FACTORY_POLL_MS, PRISM_DARK_FACTORY_MAX_RUNTIME_MS, PRISM_USER_ID } from '../config.js';
|
|
24
|
+
import { PRISM_DARK_FACTORY_POLL_MS, PRISM_DARK_FACTORY_MAX_RUNTIME_MS, PRISM_USER_ID, PRISM_VERIFICATION_LAYERS, PRISM_VERIFICATION_DEFAULT_SEVERITY } from '../config.js';
|
|
25
25
|
import { debugLog } from '../utils/logger.js';
|
|
26
26
|
import path from 'path';
|
|
27
27
|
import fs from 'fs';
|
|
28
|
+
import * as crypto from 'crypto';
|
|
29
|
+
import { Gatekeeper } from '../verification/gatekeeper.js';
|
|
30
|
+
import { VerificationRunner } from '../verification/runner.js';
|
|
31
|
+
import { computeRubricHash } from '../verification/schema.js';
|
|
32
|
+
import { VerificationGateError } from '../errors.js';
|
|
28
33
|
/** Interval handle for graceful shutdown */
|
|
29
34
|
let runnerInterval = null;
|
|
30
35
|
/** Tracks whether the runner is currently processing a tick (prevents overlap) */
|
|
@@ -482,8 +487,102 @@ async function runnerTick() {
|
|
|
482
487
|
await emitExperienceEvent(pipeline, 'failure', `Scope violation: ${result.scopeViolation}`);
|
|
483
488
|
return;
|
|
484
489
|
}
|
|
485
|
-
// Determine next step based on result
|
|
486
490
|
const currentStep = pipeline.current_step;
|
|
491
|
+
// ── Phase 4: Verification Pipeline Orchestrator ──
|
|
492
|
+
if (currentStep === 'VERIFY' && spec.workingDirectory) {
|
|
493
|
+
const harnessPath = path.join(path.resolve(spec.workingDirectory), 'verification_harness.json');
|
|
494
|
+
if (fs.existsSync(harnessPath)) {
|
|
495
|
+
try {
|
|
496
|
+
const rawHarness = fs.readFileSync(harnessPath, 'utf8');
|
|
497
|
+
const harnessData = JSON.parse(rawHarness);
|
|
498
|
+
// GAP-5 fix: Persist the harness so CLI drift detection works for DarkFactory runs
|
|
499
|
+
const rubricHash = computeRubricHash(harnessData.tests);
|
|
500
|
+
const harness = {
|
|
501
|
+
...harnessData,
|
|
502
|
+
project: pipeline.project,
|
|
503
|
+
conversation_id: `dark-factory-${pipeline.id}`,
|
|
504
|
+
created_at: new Date().toISOString(),
|
|
505
|
+
rubric_hash: rubricHash,
|
|
506
|
+
};
|
|
507
|
+
await storage.saveVerificationHarness(harness, pipeline.user_id);
|
|
508
|
+
// GAP-2 fix: Build VerificationConfig from env vars so PRISM_VERIFICATION_LAYERS
|
|
509
|
+
// and PRISM_VERIFICATION_DEFAULT_SEVERITY are respected in DarkFactory pipelines
|
|
510
|
+
const vConfig = {
|
|
511
|
+
enabled: true,
|
|
512
|
+
layers: PRISM_VERIFICATION_LAYERS,
|
|
513
|
+
default_severity: PRISM_VERIFICATION_DEFAULT_SEVERITY,
|
|
514
|
+
};
|
|
515
|
+
const verificationResult = await VerificationRunner.runSuite(rawHarness, {
|
|
516
|
+
harness,
|
|
517
|
+
layers: PRISM_VERIFICATION_LAYERS,
|
|
518
|
+
config: vConfig,
|
|
519
|
+
});
|
|
520
|
+
const coverageScore = verificationResult.total > 0 ? (verificationResult.total - verificationResult.skipped_count) / verificationResult.total : 0;
|
|
521
|
+
const executedCount = verificationResult.total - verificationResult.skipped_count;
|
|
522
|
+
const passRate = executedCount > 0 ? verificationResult.passed_count / executedCount : 0;
|
|
523
|
+
// GAP-4 fix: Use proper ValidationResult type instead of `any`
|
|
524
|
+
const valResult = {
|
|
525
|
+
id: crypto.randomUUID(),
|
|
526
|
+
rubric_hash: rubricHash,
|
|
527
|
+
project: pipeline.project,
|
|
528
|
+
conversation_id: `dark-factory-${pipeline.id}`,
|
|
529
|
+
run_at: new Date().toISOString(),
|
|
530
|
+
passed: passRate >= harnessData.min_pass_rate && verificationResult.severity_gate.action !== "abort",
|
|
531
|
+
pass_rate: passRate,
|
|
532
|
+
critical_failures: verificationResult.severity_gate.failed_assertions.length,
|
|
533
|
+
coverage_score: coverageScore,
|
|
534
|
+
result_json: JSON.stringify(verificationResult),
|
|
535
|
+
gate_action: verificationResult.severity_gate.action,
|
|
536
|
+
gate_override: false,
|
|
537
|
+
};
|
|
538
|
+
const { canContinue, validatedResult } = Gatekeeper.executeGate(valResult);
|
|
539
|
+
await storage.saveVerificationRun(validatedResult, pipeline.user_id);
|
|
540
|
+
// GAP-3 fix: Emit verification experience event for ML routing feedback
|
|
541
|
+
try {
|
|
542
|
+
const confidenceScore = Math.round(passRate * 100);
|
|
543
|
+
await storage.saveLedger({
|
|
544
|
+
project: pipeline.project,
|
|
545
|
+
conversation_id: `dark-factory-${pipeline.id}`,
|
|
546
|
+
user_id: pipeline.user_id,
|
|
547
|
+
event_type: 'validation_result',
|
|
548
|
+
summary: `[VERIFY] ${verificationResult.passed_count}/${verificationResult.total} passed (gate: ${verificationResult.severity_gate.action})`,
|
|
549
|
+
keywords: ['dark-factory', 'verification', pipeline.project],
|
|
550
|
+
importance: verificationResult.severity_gate.action === 'abort' ? 2 : 0,
|
|
551
|
+
confidence_score: confidenceScore,
|
|
552
|
+
});
|
|
553
|
+
}
|
|
554
|
+
catch { /* experience events are advisory — never block execution */ }
|
|
555
|
+
if (!canContinue) {
|
|
556
|
+
result.success = false;
|
|
557
|
+
result.notes = (result.notes ? result.notes + '\n\n' : '') + `[GATE BLOCKED] Pipeline verification runner failed the security gate.`;
|
|
558
|
+
}
|
|
559
|
+
else {
|
|
560
|
+
result.success = result.success && validatedResult.passed;
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
catch (err) {
|
|
564
|
+
if (err instanceof VerificationGateError) {
|
|
565
|
+
debugLog(`[DarkFactory] Pipeline ${pipeline.id} ABORTED by Verification Gate.`);
|
|
566
|
+
try {
|
|
567
|
+
await storage.savePipeline({
|
|
568
|
+
...pipeline,
|
|
569
|
+
status: 'FAILED',
|
|
570
|
+
error: `[GATE ABORT] ${err.message}`,
|
|
571
|
+
});
|
|
572
|
+
}
|
|
573
|
+
catch { /* Status guard */ }
|
|
574
|
+
await emitExperienceEvent(pipeline, 'failure', `[GATE ABORT] ${err.message}`);
|
|
575
|
+
return;
|
|
576
|
+
}
|
|
577
|
+
else {
|
|
578
|
+
console.error(`[DarkFactory] Verification harness crash: ${err.message}`);
|
|
579
|
+
result.success = false;
|
|
580
|
+
result.notes = `[GATE CRASH] Verification suite failed to execute: ${err.message}`;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
// Determine next step based on result
|
|
487
586
|
const nextStep = SafetyController.getNextStep(currentStep, pipeline.iteration, spec, result.success // For VERIFY step: success means tests passed
|
|
488
587
|
);
|
|
489
588
|
if (nextStep === null || currentStep === 'FINALIZE') {
|