prism-mcp-server 7.3.1 → 7.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -194
- package/dist/cli.js +50 -0
- package/dist/darkfactory/clawInvocation.js +62 -7
- package/dist/darkfactory/runner.js +288 -24
- package/dist/darkfactory/safetyController.js +48 -22
- package/dist/darkfactory/schema.js +2 -0
- package/dist/dashboard/ui.js +2617 -2051
- package/dist/dashboard/ui.tmp.js +3475 -0
- package/dist/errors.js +29 -0
- package/dist/server.js +19 -0
- package/dist/storage/sqlite.js +199 -7
- package/dist/storage/supabase.js +143 -3
- package/dist/tools/routerExperience.js +14 -0
- package/dist/verification/clawValidator.js +2 -1
- package/dist/verification/cliHandler.js +325 -0
- package/dist/verification/gatekeeper.js +39 -0
- package/dist/verification/renameDetector.js +170 -0
- package/dist/verification/runner.js +27 -5
- package/dist/verification/schema.js +18 -0
- package/dist/verification/severityPolicy.js +5 -1
- package/package.json +5 -2
|
@@ -21,10 +21,15 @@ import { getStorage } from '../storage/index.js';
|
|
|
21
21
|
import { VALID_ACTION_TYPES } from './schema.js';
|
|
22
22
|
import { SafetyController } from './safetyController.js';
|
|
23
23
|
import { invokeClawAgent } from './clawInvocation.js';
|
|
24
|
-
import { PRISM_DARK_FACTORY_POLL_MS, PRISM_DARK_FACTORY_MAX_RUNTIME_MS, PRISM_USER_ID } from '../config.js';
|
|
24
|
+
import { PRISM_DARK_FACTORY_POLL_MS, PRISM_DARK_FACTORY_MAX_RUNTIME_MS, PRISM_USER_ID, PRISM_VERIFICATION_LAYERS, PRISM_VERIFICATION_DEFAULT_SEVERITY } from '../config.js';
|
|
25
25
|
import { debugLog } from '../utils/logger.js';
|
|
26
26
|
import path from 'path';
|
|
27
27
|
import fs from 'fs';
|
|
28
|
+
import * as crypto from 'crypto';
|
|
29
|
+
import { Gatekeeper } from '../verification/gatekeeper.js';
|
|
30
|
+
import { VerificationRunner } from '../verification/runner.js';
|
|
31
|
+
import { computeRubricHash } from '../verification/schema.js';
|
|
32
|
+
import { VerificationGateError } from '../errors.js';
|
|
28
33
|
/** Interval handle for graceful shutdown */
|
|
29
34
|
let runnerInterval = null;
|
|
30
35
|
/** Tracks whether the runner is currently processing a tick (prevents overlap) */
|
|
@@ -186,25 +191,21 @@ async function emitExperienceEvent(pipeline, eventType, outcome) {
|
|
|
186
191
|
*
|
|
187
192
|
* @internal Exported for unit testing only. Not part of the public API.
|
|
188
193
|
*/
|
|
189
|
-
|
|
194
|
+
function extractJsonFromLlmOutput(raw) {
|
|
190
195
|
if (!raw || typeof raw !== 'string' || raw.trim() === '') {
|
|
191
|
-
return {
|
|
196
|
+
return { json: null, error: 'JSON Parse Error: empty or non-string input' };
|
|
192
197
|
}
|
|
193
198
|
const cleaned = raw.trim();
|
|
194
199
|
let jsonCandidate = null;
|
|
195
|
-
// Strategy 1: Try raw trimmed input as-is
|
|
196
200
|
if (cleaned.startsWith('{')) {
|
|
197
201
|
jsonCandidate = cleaned;
|
|
198
202
|
}
|
|
199
|
-
// Strategy 2: Strip markdown code fences
|
|
200
203
|
if (!jsonCandidate) {
|
|
201
|
-
// Match ```json or ``` blocks anywhere in the text (not just start/end of string)
|
|
202
204
|
const fenceMatch = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/);
|
|
203
205
|
if (fenceMatch) {
|
|
204
206
|
jsonCandidate = fenceMatch[1].trim();
|
|
205
207
|
}
|
|
206
208
|
}
|
|
207
|
-
// Strategy 3: Brace extraction — find first { to last }
|
|
208
209
|
if (!jsonCandidate) {
|
|
209
210
|
const firstBrace = cleaned.indexOf('{');
|
|
210
211
|
const lastBrace = cleaned.lastIndexOf('}');
|
|
@@ -213,17 +214,21 @@ export function parseExecuteOutput(raw) {
|
|
|
213
214
|
}
|
|
214
215
|
}
|
|
215
216
|
if (!jsonCandidate) {
|
|
216
|
-
return {
|
|
217
|
+
return { json: null, error: 'JSON Parse Error: no JSON object found in LLM output' };
|
|
217
218
|
}
|
|
218
|
-
|
|
219
|
+
return { json: jsonCandidate, error: null };
|
|
220
|
+
}
|
|
221
|
+
export function parseExecuteOutput(raw) {
|
|
222
|
+
const ext = extractJsonFromLlmOutput(raw);
|
|
223
|
+
if (ext.error || !ext.json)
|
|
224
|
+
return { parsed: null, error: ext.error };
|
|
219
225
|
let parsed;
|
|
220
226
|
try {
|
|
221
|
-
parsed = JSON.parse(
|
|
227
|
+
parsed = JSON.parse(ext.json);
|
|
222
228
|
}
|
|
223
229
|
catch {
|
|
224
230
|
return { parsed: null, error: 'JSON Parse Error: LLM output is not valid JSON' };
|
|
225
231
|
}
|
|
226
|
-
// Shape validation: must be an object with an 'actions' array
|
|
227
232
|
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
228
233
|
return { parsed: null, error: 'Shape Error: output is not a JSON object' };
|
|
229
234
|
}
|
|
@@ -231,7 +236,6 @@ export function parseExecuteOutput(raw) {
|
|
|
231
236
|
return { parsed: null, error: 'Shape Error: output missing required "actions" array' };
|
|
232
237
|
}
|
|
233
238
|
const result = parsed;
|
|
234
|
-
// Validate each action in the array
|
|
235
239
|
for (let i = 0; i < result.actions.length; i++) {
|
|
236
240
|
const action = result.actions[i];
|
|
237
241
|
if (!action || typeof action !== 'object' || Array.isArray(action)) {
|
|
@@ -246,6 +250,62 @@ export function parseExecuteOutput(raw) {
|
|
|
246
250
|
}
|
|
247
251
|
return { parsed: result, error: null };
|
|
248
252
|
}
|
|
253
|
+
export function parseContractOutput(raw) {
|
|
254
|
+
const ext = extractJsonFromLlmOutput(raw);
|
|
255
|
+
if (ext.error || !ext.json)
|
|
256
|
+
return { parsed: null, error: ext.error };
|
|
257
|
+
let parsed;
|
|
258
|
+
try {
|
|
259
|
+
parsed = JSON.parse(ext.json);
|
|
260
|
+
}
|
|
261
|
+
catch {
|
|
262
|
+
return { parsed: null, error: 'JSON Parse Error: LLM output is not valid JSON' };
|
|
263
|
+
}
|
|
264
|
+
if (!parsed || typeof parsed !== 'object' || !Array.isArray(parsed.criteria)) {
|
|
265
|
+
return { parsed: null, error: 'Shape Error: output missing required "criteria" array' };
|
|
266
|
+
}
|
|
267
|
+
// Validate each criterion element has the required string fields
|
|
268
|
+
for (let i = 0; i < parsed.criteria.length; i++) {
|
|
269
|
+
const c = parsed.criteria[i];
|
|
270
|
+
if (!c || typeof c !== 'object' || typeof c.id !== 'string' || typeof c.description !== 'string') {
|
|
271
|
+
return { parsed: null, error: `Shape Error: criteria[${i}] must have string "id" and "description"` };
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return { parsed: parsed, error: null };
|
|
275
|
+
}
|
|
276
|
+
export function parseEvaluationOutput(raw) {
|
|
277
|
+
const ext = extractJsonFromLlmOutput(raw);
|
|
278
|
+
if (ext.error || !ext.json)
|
|
279
|
+
return { parsed: null, error: ext.error };
|
|
280
|
+
let parsed;
|
|
281
|
+
try {
|
|
282
|
+
parsed = JSON.parse(ext.json);
|
|
283
|
+
}
|
|
284
|
+
catch {
|
|
285
|
+
return { parsed: null, error: 'JSON Parse Error: LLM output is not valid JSON' };
|
|
286
|
+
}
|
|
287
|
+
if (!parsed || typeof parsed !== 'object' || typeof parsed.pass !== 'boolean') {
|
|
288
|
+
return { parsed: null, error: 'Shape Error: output missing required "pass" boolean' };
|
|
289
|
+
}
|
|
290
|
+
const p = parsed;
|
|
291
|
+
if (p.findings !== undefined) {
|
|
292
|
+
if (!Array.isArray(p.findings)) {
|
|
293
|
+
return { parsed: null, error: 'Shape Error: "findings" must be an array when present' };
|
|
294
|
+
}
|
|
295
|
+
// Fix #3: Each failing finding must supply an evidence object so the
|
|
296
|
+
// Evaluator cannot submit bare severity claims without evidence pointers.
|
|
297
|
+
for (let i = 0; i < p.findings.length; i++) {
|
|
298
|
+
const f = p.findings[i];
|
|
299
|
+
if (!f || typeof f !== 'object') {
|
|
300
|
+
return { parsed: null, error: `Shape Error: findings[${i}] must be an object` };
|
|
301
|
+
}
|
|
302
|
+
if (f.pass_fail === false && (!f.evidence || typeof f.evidence !== 'object')) {
|
|
303
|
+
return { parsed: null, error: `Shape Error: findings[${i}] is missing required "evidence" object for failure` };
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
return { parsed: parsed, error: null };
|
|
308
|
+
}
|
|
249
309
|
// ─── Step Execution ────────────────────────────────────────────
|
|
250
310
|
/**
|
|
251
311
|
* Execute a single step of the pipeline.
|
|
@@ -268,8 +328,8 @@ async function executeStep(pipeline, spec) {
|
|
|
268
328
|
// - BYOM model override
|
|
269
329
|
// - Timeout enforcement
|
|
270
330
|
const { success, resultText } = await invokeClawAgent(spec, pipeline);
|
|
271
|
-
// For non-
|
|
272
|
-
if (step !== 'EXECUTE') {
|
|
331
|
+
// For non-JSON steps, return as-is (free-form text)
|
|
332
|
+
if (step !== 'EXECUTE' && step !== 'PLAN_CONTRACT' && step !== 'EVALUATE') {
|
|
273
333
|
return {
|
|
274
334
|
iteration: pipeline.iteration,
|
|
275
335
|
step,
|
|
@@ -279,7 +339,6 @@ async function executeStep(pipeline, spec) {
|
|
|
279
339
|
notes: resultText.slice(0, 2000),
|
|
280
340
|
};
|
|
281
341
|
}
|
|
282
|
-
// ── v7.3.1: EXECUTE step — parse and validate structured output ──
|
|
283
342
|
if (!success) {
|
|
284
343
|
// LLM invocation itself failed (timeout, error, etc.)
|
|
285
344
|
return {
|
|
@@ -291,7 +350,59 @@ async function executeStep(pipeline, spec) {
|
|
|
291
350
|
notes: `LLM invocation failed: ${resultText.slice(0, 500)}`,
|
|
292
351
|
};
|
|
293
352
|
}
|
|
294
|
-
// Parse
|
|
353
|
+
// Parse appropriate JSON output depending on step
|
|
354
|
+
if (step === 'PLAN_CONTRACT') {
|
|
355
|
+
const { parsed, error: parseError } = parseContractOutput(resultText);
|
|
356
|
+
if (parseError || !parsed) {
|
|
357
|
+
debugLog(`[DarkFactory] PLAN_CONTRACT output parse failure: ${parseError}`);
|
|
358
|
+
return {
|
|
359
|
+
iteration: pipeline.iteration,
|
|
360
|
+
step,
|
|
361
|
+
started_at: stepStart,
|
|
362
|
+
completed_at: new Date().toISOString(),
|
|
363
|
+
success: false,
|
|
364
|
+
notes: parseError || 'Unknown parse error',
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
return {
|
|
368
|
+
iteration: pipeline.iteration,
|
|
369
|
+
step,
|
|
370
|
+
started_at: stepStart,
|
|
371
|
+
completed_at: new Date().toISOString(),
|
|
372
|
+
success: true,
|
|
373
|
+
notes: `Contract accepted with ${parsed.criteria.length} criteria.`,
|
|
374
|
+
contractPayload: parsed, // Passthrough for runner to write to disk
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
if (step === 'EVALUATE') {
|
|
378
|
+
const { parsed, error: parseError } = parseEvaluationOutput(resultText);
|
|
379
|
+
if (parseError || !parsed) {
|
|
380
|
+
debugLog(`[DarkFactory] EVALUATE output parse failure: ${parseError}`);
|
|
381
|
+
return {
|
|
382
|
+
iteration: pipeline.iteration,
|
|
383
|
+
step,
|
|
384
|
+
started_at: stepStart,
|
|
385
|
+
completed_at: new Date().toISOString(),
|
|
386
|
+
success: false,
|
|
387
|
+
notes: parseError || 'Unknown parse error',
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
// Fix #2: Serialize findings array into notes so the Generator's retry
|
|
391
|
+
// prompt receives the full line-by-line critique, not just a summary string.
|
|
392
|
+
const findingsText = parsed.findings && parsed.findings.length > 0
|
|
393
|
+
? '\nFindings:\n' + parsed.findings.map((f) => `- [${f.severity}] Criterion ${f.criterion_id}: ${f.evidence?.description || 'Failed'} (${f.evidence?.file || 'unknown'}:${f.evidence?.line ?? '?'})`).join('\n')
|
|
394
|
+
: '';
|
|
395
|
+
return {
|
|
396
|
+
iteration: pipeline.iteration,
|
|
397
|
+
step,
|
|
398
|
+
started_at: stepStart,
|
|
399
|
+
completed_at: new Date().toISOString(),
|
|
400
|
+
success: parsed.pass,
|
|
401
|
+
notes: (parsed.notes || `Evaluation complete: ${parsed.pass ? 'PASS' : 'FAIL'}`) + findingsText,
|
|
402
|
+
evaluationPayload: parsed, // Passthrough for orchestrator logic
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
// EXECUTE
|
|
295
406
|
const { parsed, error: parseError } = parseExecuteOutput(resultText);
|
|
296
407
|
if (parseError || !parsed) {
|
|
297
408
|
debugLog(`[DarkFactory] EXECUTE output parse failure: ${parseError}`);
|
|
@@ -482,11 +593,152 @@ async function runnerTick() {
|
|
|
482
593
|
await emitExperienceEvent(pipeline, 'failure', `Scope violation: ${result.scopeViolation}`);
|
|
483
594
|
return;
|
|
484
595
|
}
|
|
485
|
-
// Determine next step based on result
|
|
486
596
|
const currentStep = pipeline.current_step;
|
|
487
|
-
|
|
488
|
-
)
|
|
489
|
-
|
|
597
|
+
// ── Phase 4: Verification Pipeline Orchestrator ──
|
|
598
|
+
if (currentStep === 'VERIFY' && spec.workingDirectory) {
|
|
599
|
+
const harnessPath = path.join(path.resolve(spec.workingDirectory), 'verification_harness.json');
|
|
600
|
+
if (fs.existsSync(harnessPath)) {
|
|
601
|
+
try {
|
|
602
|
+
const rawHarness = fs.readFileSync(harnessPath, 'utf8');
|
|
603
|
+
const harnessData = JSON.parse(rawHarness);
|
|
604
|
+
// GAP-5 fix: Persist the harness so CLI drift detection works for DarkFactory runs
|
|
605
|
+
const rubricHash = computeRubricHash(harnessData.tests);
|
|
606
|
+
const harness = {
|
|
607
|
+
...harnessData,
|
|
608
|
+
project: pipeline.project,
|
|
609
|
+
conversation_id: `dark-factory-${pipeline.id}`,
|
|
610
|
+
created_at: new Date().toISOString(),
|
|
611
|
+
rubric_hash: rubricHash,
|
|
612
|
+
};
|
|
613
|
+
await storage.saveVerificationHarness(harness, pipeline.user_id);
|
|
614
|
+
// GAP-2 fix: Build VerificationConfig from env vars so PRISM_VERIFICATION_LAYERS
|
|
615
|
+
// and PRISM_VERIFICATION_DEFAULT_SEVERITY are respected in DarkFactory pipelines
|
|
616
|
+
const vConfig = {
|
|
617
|
+
enabled: true,
|
|
618
|
+
layers: PRISM_VERIFICATION_LAYERS,
|
|
619
|
+
default_severity: PRISM_VERIFICATION_DEFAULT_SEVERITY,
|
|
620
|
+
};
|
|
621
|
+
const verificationResult = await VerificationRunner.runSuite(rawHarness, {
|
|
622
|
+
harness,
|
|
623
|
+
layers: PRISM_VERIFICATION_LAYERS,
|
|
624
|
+
config: vConfig,
|
|
625
|
+
});
|
|
626
|
+
const coverageScore = verificationResult.total > 0 ? (verificationResult.total - verificationResult.skipped_count) / verificationResult.total : 0;
|
|
627
|
+
const executedCount = verificationResult.total - verificationResult.skipped_count;
|
|
628
|
+
const passRate = executedCount > 0 ? verificationResult.passed_count / executedCount : 0;
|
|
629
|
+
// GAP-4 fix: Use proper ValidationResult type instead of `any`
|
|
630
|
+
const valResult = {
|
|
631
|
+
id: crypto.randomUUID(),
|
|
632
|
+
rubric_hash: rubricHash,
|
|
633
|
+
project: pipeline.project,
|
|
634
|
+
conversation_id: `dark-factory-${pipeline.id}`,
|
|
635
|
+
run_at: new Date().toISOString(),
|
|
636
|
+
passed: passRate >= harnessData.min_pass_rate && verificationResult.severity_gate.action !== "abort",
|
|
637
|
+
pass_rate: passRate,
|
|
638
|
+
critical_failures: verificationResult.severity_gate.failed_assertions.length,
|
|
639
|
+
coverage_score: coverageScore,
|
|
640
|
+
result_json: JSON.stringify(verificationResult),
|
|
641
|
+
gate_action: verificationResult.severity_gate.action,
|
|
642
|
+
gate_override: false,
|
|
643
|
+
};
|
|
644
|
+
const { canContinue, validatedResult } = Gatekeeper.executeGate(valResult);
|
|
645
|
+
await storage.saveVerificationRun(validatedResult, pipeline.user_id);
|
|
646
|
+
// GAP-3 fix: Emit verification experience event for ML routing feedback
|
|
647
|
+
try {
|
|
648
|
+
const confidenceScore = Math.round(passRate * 100);
|
|
649
|
+
await storage.saveLedger({
|
|
650
|
+
project: pipeline.project,
|
|
651
|
+
conversation_id: `dark-factory-${pipeline.id}`,
|
|
652
|
+
user_id: pipeline.user_id,
|
|
653
|
+
event_type: 'validation_result',
|
|
654
|
+
summary: `[VERIFY] ${verificationResult.passed_count}/${verificationResult.total} passed (gate: ${verificationResult.severity_gate.action})`,
|
|
655
|
+
keywords: ['dark-factory', 'verification', pipeline.project],
|
|
656
|
+
importance: verificationResult.severity_gate.action === 'abort' ? 2 : 0,
|
|
657
|
+
confidence_score: confidenceScore,
|
|
658
|
+
});
|
|
659
|
+
}
|
|
660
|
+
catch { /* experience events are advisory — never block execution */ }
|
|
661
|
+
if (!canContinue) {
|
|
662
|
+
result.success = false;
|
|
663
|
+
result.notes = (result.notes ? result.notes + '\n\n' : '') + `[GATE BLOCKED] Pipeline verification runner failed the security gate.`;
|
|
664
|
+
}
|
|
665
|
+
else {
|
|
666
|
+
result.success = result.success && validatedResult.passed;
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
catch (err) {
|
|
670
|
+
if (err instanceof VerificationGateError) {
|
|
671
|
+
debugLog(`[DarkFactory] Pipeline ${pipeline.id} ABORTED by Verification Gate.`);
|
|
672
|
+
try {
|
|
673
|
+
await storage.savePipeline({
|
|
674
|
+
...pipeline,
|
|
675
|
+
status: 'FAILED',
|
|
676
|
+
error: `[GATE ABORT] ${err.message}`,
|
|
677
|
+
});
|
|
678
|
+
}
|
|
679
|
+
catch { /* Status guard */ }
|
|
680
|
+
await emitExperienceEvent(pipeline, 'failure', `[GATE ABORT] ${err.message}`);
|
|
681
|
+
return;
|
|
682
|
+
}
|
|
683
|
+
else {
|
|
684
|
+
console.error(`[DarkFactory] Verification harness crash: ${err.message}`);
|
|
685
|
+
result.success = false;
|
|
686
|
+
result.notes = `[GATE CRASH] Verification suite failed to execute: ${err.message}`;
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
if (currentStep === 'PLAN_CONTRACT' && spec.workingDirectory && result.success && result.contractPayload) {
|
|
692
|
+
const contractPath = path.join(path.resolve(spec.workingDirectory), 'contract_rubric.json');
|
|
693
|
+
try {
|
|
694
|
+
fs.writeFileSync(contractPath, JSON.stringify(result.contractPayload, null, 2), 'utf8');
|
|
695
|
+
debugLog(`[DarkFactory] contract_rubric.json written to ${contractPath}`);
|
|
696
|
+
}
|
|
697
|
+
catch (writeErr) {
|
|
698
|
+
// Disk/permissions error — fail the pipeline immediately so it doesn't
|
|
699
|
+
// loop on PLAN_CONTRACT forever (each tick would re-attempt the write).
|
|
700
|
+
debugLog(`[DarkFactory] Failed to write contract_rubric.json: ${writeErr.message}`);
|
|
701
|
+
try {
|
|
702
|
+
await storage.savePipeline({
|
|
703
|
+
...pipeline,
|
|
704
|
+
status: 'FAILED',
|
|
705
|
+
error: `PLAN_CONTRACT failed: could not write contract_rubric.json — ${writeErr.message}`,
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
catch { /* status guard */ }
|
|
709
|
+
await emitExperienceEvent(pipeline, 'failure', `contract_rubric.json write failed: ${writeErr.message}`);
|
|
710
|
+
return;
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
if (currentStep === 'EVALUATE' && result.evaluationPayload) {
|
|
714
|
+
// Emit ML learning event for evaluation outcome.
|
|
715
|
+
// Using 'learning' (valid LedgerEntry event type) rather than
|
|
716
|
+
// a non-existent 'evaluation_result' to avoid runtime cast issues.
|
|
717
|
+
try {
|
|
718
|
+
await storage.saveLedger({
|
|
719
|
+
project: pipeline.project,
|
|
720
|
+
conversation_id: `dark-factory-${pipeline.id}`,
|
|
721
|
+
user_id: pipeline.user_id,
|
|
722
|
+
event_type: 'learning',
|
|
723
|
+
summary: `[EVALUATE] ${result.success ? 'PASS' : 'FAIL'} on iter ${pipeline.iteration} rev ${pipeline.eval_revisions ?? 0}`,
|
|
724
|
+
keywords: ['dark-factory', 'evaluation', pipeline.project],
|
|
725
|
+
importance: result.success ? 3 : 1,
|
|
726
|
+
confidence_score: result.success ? 90 : 50,
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
catch { /* advisory — never block execution */ }
|
|
730
|
+
}
|
|
731
|
+
// ─── Determine plan_viable from evaluation payload ───
|
|
732
|
+
// Default to false (conservative): a parse failure or missing payload means
|
|
733
|
+
// we don't know if the plan is viable, so escalate to PLAN re-planning
|
|
734
|
+
// rather than burning eval_revisions on more EXECUTE retries.
|
|
735
|
+
let evalPlanViable = false;
|
|
736
|
+
if (currentStep === 'EVALUATE' && result.evaluationPayload) {
|
|
737
|
+
// plan_viable defaults false if null/missing (same conservative principle)
|
|
738
|
+
evalPlanViable = result.evaluationPayload.plan_viable ?? false;
|
|
739
|
+
}
|
|
740
|
+
const nextStepInfo = SafetyController.getNextStep(pipeline, spec, result.success, evalPlanViable);
|
|
741
|
+
if (nextStepInfo === null || currentStep === 'FINALIZE') {
|
|
490
742
|
// Pipeline complete — determine final status
|
|
491
743
|
const finalStatus = result.success ? 'COMPLETED' : 'FAILED';
|
|
492
744
|
const finalError = result.success ? null : `Pipeline ended at step=${currentStep}: ${result.notes?.slice(0, 500)}`;
|
|
@@ -514,13 +766,25 @@ async function runnerTick() {
|
|
|
514
766
|
debugLog(`[DarkFactory] Pipeline ${pipeline.id} finished: ${finalStatus}`);
|
|
515
767
|
}
|
|
516
768
|
else {
|
|
517
|
-
// Advance to next step
|
|
518
769
|
try {
|
|
770
|
+
const updatedPayload = currentStep === 'PLAN_CONTRACT' && result.contractPayload
|
|
771
|
+
? result.contractPayload
|
|
772
|
+
: pipeline.contract_payload;
|
|
773
|
+
// Forward the most informative notes available:
|
|
774
|
+
// EXECUTE notes = what the generator did
|
|
775
|
+
// EVALUATE notes = what the evaluator found
|
|
776
|
+
// Other steps: preserve existing notes
|
|
777
|
+
const updatedNotes = (currentStep === 'EXECUTE' || currentStep === 'EVALUATE') && result.notes
|
|
778
|
+
? result.notes
|
|
779
|
+
: pipeline.notes;
|
|
519
780
|
await storage.savePipeline({
|
|
520
781
|
...pipeline,
|
|
521
|
-
current_step:
|
|
522
|
-
iteration:
|
|
782
|
+
current_step: nextStepInfo.step,
|
|
783
|
+
iteration: nextStepInfo.iteration,
|
|
784
|
+
eval_revisions: nextStepInfo.eval_revisions,
|
|
523
785
|
last_heartbeat: new Date().toISOString(),
|
|
786
|
+
contract_payload: updatedPayload,
|
|
787
|
+
notes: updatedNotes,
|
|
524
788
|
});
|
|
525
789
|
}
|
|
526
790
|
catch (err) {
|
|
@@ -531,7 +795,7 @@ async function runnerTick() {
|
|
|
531
795
|
}
|
|
532
796
|
throw err;
|
|
533
797
|
}
|
|
534
|
-
debugLog(`[DarkFactory] Pipeline ${pipeline.id} advanced: ${currentStep} → ${
|
|
798
|
+
debugLog(`[DarkFactory] Pipeline ${pipeline.id} advanced: ${currentStep} → ${nextStepInfo.step} (iter ${nextStepInfo.iteration}, rev ${nextStepInfo.eval_revisions ?? 0})`);
|
|
535
799
|
}
|
|
536
800
|
}
|
|
537
801
|
catch (err) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { VALID_ACTION_TYPES } from './schema.js';
|
|
1
|
+
import { VALID_ACTION_TYPES, DEFAULT_MAX_REVISIONS } from './schema.js';
|
|
2
2
|
import { PRISM_DARK_FACTORY_MAX_RUNTIME_MS } from '../config.js';
|
|
3
3
|
import { debugLog } from '../utils/logger.js';
|
|
4
4
|
import path from 'path';
|
|
@@ -31,13 +31,6 @@ export class SafetyController {
|
|
|
31
31
|
'COMPLETED': [], // Terminal — no exits
|
|
32
32
|
'FAILED': ['RUNNING'], // Allow retry from failed state
|
|
33
33
|
};
|
|
34
|
-
/**
|
|
35
|
-
* Legal step transitions for the pipeline execution state machine.
|
|
36
|
-
* FINALIZE is entered from VERIFY when iteration == maxIterations or success.
|
|
37
|
-
*/
|
|
38
|
-
static STEP_ORDER = [
|
|
39
|
-
'INIT', 'PLAN', 'EXECUTE', 'VERIFY', 'FINALIZE'
|
|
40
|
-
];
|
|
41
34
|
/**
|
|
42
35
|
* Prevents runaway LLM invocation loops by enforcing the max iteration envelope.
|
|
43
36
|
*/
|
|
@@ -147,8 +140,15 @@ export class SafetyController {
|
|
|
147
140
|
* Used by clawInvocation.ts instead of inline prompt construction.
|
|
148
141
|
*/
|
|
149
142
|
static generateBoundaryPrompt(spec, state) {
|
|
143
|
+
let modeDescription = 'an autonomous code agent';
|
|
144
|
+
if (state.current_step === 'PLAN_CONTRACT' || state.current_step === 'EVALUATE') {
|
|
145
|
+
modeDescription = 'an ADVERSARIAL EVALUATOR enforcing strict quality constraints against a generated output';
|
|
146
|
+
}
|
|
147
|
+
else if (state.current_step === 'EXECUTE') {
|
|
148
|
+
modeDescription = 'a GENERATOR executing code constrained by a strict rubric';
|
|
149
|
+
}
|
|
150
150
|
const lines = [
|
|
151
|
-
`You are Prism Dark Factory, operating in the background as
|
|
151
|
+
`You are Prism Dark Factory, operating in the background as ${modeDescription}.`,
|
|
152
152
|
`You are strictly limited to code actions within the defined scope.`,
|
|
153
153
|
``,
|
|
154
154
|
`── Operational Boundaries ──`,
|
|
@@ -156,6 +156,7 @@ export class SafetyController {
|
|
|
156
156
|
`Project: ${state.project}`,
|
|
157
157
|
`Current Step: ${state.current_step}`,
|
|
158
158
|
`Iteration: ${state.iteration} / ${spec.maxIterations}`,
|
|
159
|
+
`Revision: ${state.eval_revisions ?? 0} / ${spec.maxRevisions ?? DEFAULT_MAX_REVISIONS}`,
|
|
159
160
|
`Restricted Workspace: ${spec.workingDirectory || '(unrestricted)'}`,
|
|
160
161
|
];
|
|
161
162
|
if (spec.contextFiles && spec.contextFiles.length > 0) {
|
|
@@ -164,29 +165,54 @@ export class SafetyController {
|
|
|
164
165
|
lines.push(``, `── Objective ──`, spec.objective, ``, `── Safety Rules ──`, `1. Do NOT modify files outside the Restricted Workspace.`, `2. Do NOT make network requests unless the objective explicitly requires it.`, `3. Do NOT execute destructive operations (rm -rf, DROP TABLE, etc.).`, `4. Respond ONLY with actions relevant to the current step.`, `5. If you cannot complete the step, explain why and stop.`);
|
|
165
166
|
return lines.join('\n');
|
|
166
167
|
}
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
static getNextStep(currentStep, iteration, spec, verifyPassed) {
|
|
168
|
+
static getNextStep(state, spec, stepPassed, planViable = true) {
|
|
169
|
+
const currentStep = state.current_step;
|
|
170
|
+
const iteration = state.iteration;
|
|
171
|
+
const eval_revisions = state.eval_revisions ?? 0;
|
|
172
172
|
switch (currentStep) {
|
|
173
173
|
case 'INIT':
|
|
174
|
-
return { step: 'PLAN', iteration };
|
|
174
|
+
return { step: 'PLAN', iteration, eval_revisions };
|
|
175
175
|
case 'PLAN':
|
|
176
|
-
return { step: '
|
|
176
|
+
return { step: 'PLAN_CONTRACT', iteration, eval_revisions };
|
|
177
|
+
case 'PLAN_CONTRACT':
|
|
178
|
+
return { step: 'EXECUTE', iteration, eval_revisions };
|
|
177
179
|
case 'EXECUTE':
|
|
178
|
-
return { step: '
|
|
180
|
+
return { step: 'EVALUATE', iteration, eval_revisions };
|
|
181
|
+
case 'EVALUATE':
|
|
182
|
+
if (stepPassed) {
|
|
183
|
+
// Contract passed, move to VERIFY
|
|
184
|
+
return { step: 'VERIFY', iteration, eval_revisions: 0 };
|
|
185
|
+
}
|
|
186
|
+
// Contract failed.
|
|
187
|
+
if (planViable) {
|
|
188
|
+
// Fall back to EXECUTE but increment revision counter
|
|
189
|
+
const nextRevision = eval_revisions + 1;
|
|
190
|
+
const maxRev = spec.maxRevisions ?? DEFAULT_MAX_REVISIONS;
|
|
191
|
+
if (nextRevision >= maxRev) {
|
|
192
|
+
// Exceeded max revisions — pipeline fails
|
|
193
|
+
return null;
|
|
194
|
+
}
|
|
195
|
+
return { step: 'EXECUTE', iteration, eval_revisions: nextRevision };
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
// Fall back all the way to PLAN
|
|
199
|
+
const nextIteration = iteration + 1;
|
|
200
|
+
if (!SafetyController.validateIterationLimit(nextIteration, spec)) {
|
|
201
|
+
return null;
|
|
202
|
+
}
|
|
203
|
+
return { step: 'PLAN', iteration: nextIteration, eval_revisions: 0 };
|
|
204
|
+
}
|
|
179
205
|
case 'VERIFY':
|
|
180
|
-
if (
|
|
181
|
-
return { step: 'FINALIZE', iteration };
|
|
206
|
+
if (stepPassed) {
|
|
207
|
+
return { step: 'FINALIZE', iteration, eval_revisions };
|
|
182
208
|
}
|
|
183
209
|
// Verification failed — loop back to PLAN with incremented iteration
|
|
184
|
-
const
|
|
185
|
-
if (!SafetyController.validateIterationLimit(
|
|
210
|
+
const nextIterationVerify = iteration + 1;
|
|
211
|
+
if (!SafetyController.validateIterationLimit(nextIterationVerify, spec)) {
|
|
186
212
|
// Exceeded max iterations — force finalize with failure
|
|
187
213
|
return null;
|
|
188
214
|
}
|
|
189
|
-
return { step: 'PLAN', iteration:
|
|
215
|
+
return { step: 'PLAN', iteration: nextIterationVerify, eval_revisions: 0 };
|
|
190
216
|
case 'FINALIZE':
|
|
191
217
|
return null; // Terminal step
|
|
192
218
|
default:
|