npm - prism-mcp-server - Versions diffs - 7.3.1 → 7.4.0 - Mend

prism-mcp-server 7.3.1 → 7.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +117 -194
package/dist/cli.js +50 -0
package/dist/darkfactory/clawInvocation.js +62 -7
package/dist/darkfactory/runner.js +288 -24
package/dist/darkfactory/safetyController.js +48 -22
package/dist/darkfactory/schema.js +2 -0
package/dist/dashboard/ui.js +2617 -2051
package/dist/dashboard/ui.tmp.js +3475 -0
package/dist/errors.js +29 -0
package/dist/server.js +19 -0
package/dist/storage/sqlite.js +199 -7
package/dist/storage/supabase.js +143 -3
package/dist/tools/routerExperience.js +14 -0
package/dist/verification/clawValidator.js +2 -1
package/dist/verification/cliHandler.js +325 -0
package/dist/verification/gatekeeper.js +39 -0
package/dist/verification/renameDetector.js +170 -0
package/dist/verification/runner.js +27 -5
package/dist/verification/schema.js +18 -0
package/dist/verification/severityPolicy.js +5 -1
package/package.json +5 -2

package/dist/darkfactory/runner.js CHANGED Viewed

@@ -21,10 +21,15 @@ import { getStorage } from '../storage/index.js';
 import { VALID_ACTION_TYPES } from './schema.js';
 import { SafetyController } from './safetyController.js';
 import { invokeClawAgent } from './clawInvocation.js';
-import { PRISM_DARK_FACTORY_POLL_MS, PRISM_DARK_FACTORY_MAX_RUNTIME_MS, PRISM_USER_ID } from '../config.js';
+import { PRISM_DARK_FACTORY_POLL_MS, PRISM_DARK_FACTORY_MAX_RUNTIME_MS, PRISM_USER_ID, PRISM_VERIFICATION_LAYERS, PRISM_VERIFICATION_DEFAULT_SEVERITY } from '../config.js';
 import { debugLog } from '../utils/logger.js';
 import path from 'path';
 import fs from 'fs';
+import * as crypto from 'crypto';
+import { Gatekeeper } from '../verification/gatekeeper.js';
+import { VerificationRunner } from '../verification/runner.js';
+import { computeRubricHash } from '../verification/schema.js';
+import { VerificationGateError } from '../errors.js';
 /** Interval handle for graceful shutdown */
 let runnerInterval = null;
 /** Tracks whether the runner is currently processing a tick (prevents overlap) */
@@ -186,25 +191,21 @@ async function emitExperienceEvent(pipeline, eventType, outcome) {
  *
  * @internal Exported for unit testing only. Not part of the public API.
  */
-export function parseExecuteOutput(raw) {
+function extractJsonFromLlmOutput(raw) {
     if (!raw || typeof raw !== 'string' || raw.trim() === '') {
-        return { parsed: null, error: 'JSON Parse Error: empty or non-string input' };
+        return { json: null, error: 'JSON Parse Error: empty or non-string input' };
     }
     const cleaned = raw.trim();
     let jsonCandidate = null;
-    // Strategy 1: Try raw trimmed input as-is
     if (cleaned.startsWith('{')) {
         jsonCandidate = cleaned;
     }
-    // Strategy 2: Strip markdown code fences
     if (!jsonCandidate) {
-        // Match ```json or ``` blocks anywhere in the text (not just start/end of string)
         const fenceMatch = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/);
         if (fenceMatch) {
             jsonCandidate = fenceMatch[1].trim();
         }
     }
-    // Strategy 3: Brace extraction — find first { to last }
     if (!jsonCandidate) {
         const firstBrace = cleaned.indexOf('{');
         const lastBrace = cleaned.lastIndexOf('}');
@@ -213,17 +214,21 @@ export function parseExecuteOutput(raw) {
         }
     }
     if (!jsonCandidate) {
-        return { parsed: null, error: 'JSON Parse Error: no JSON object found in LLM output' };
+        return { json: null, error: 'JSON Parse Error: no JSON object found in LLM output' };
     }
-    // Attempt JSON parse
+    return { json: jsonCandidate, error: null };
+}
+export function parseExecuteOutput(raw) {
+    const ext = extractJsonFromLlmOutput(raw);
+    if (ext.error || !ext.json)
+        return { parsed: null, error: ext.error };
     let parsed;
     try {
-        parsed = JSON.parse(jsonCandidate);
+        parsed = JSON.parse(ext.json);
     }
     catch {
         return { parsed: null, error: 'JSON Parse Error: LLM output is not valid JSON' };
     }
-    // Shape validation: must be an object with an 'actions' array
     if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
         return { parsed: null, error: 'Shape Error: output is not a JSON object' };
     }
@@ -231,7 +236,6 @@ export function parseExecuteOutput(raw) {
         return { parsed: null, error: 'Shape Error: output missing required "actions" array' };
     }
     const result = parsed;
-    // Validate each action in the array
     for (let i = 0; i < result.actions.length; i++) {
         const action = result.actions[i];
         if (!action || typeof action !== 'object' || Array.isArray(action)) {
@@ -246,6 +250,62 @@ export function parseExecuteOutput(raw) {
     }
     return { parsed: result, error: null };
 }
+export function parseContractOutput(raw) {
+    const ext = extractJsonFromLlmOutput(raw);
+    if (ext.error || !ext.json)
+        return { parsed: null, error: ext.error };
+    let parsed;
+    try {
+        parsed = JSON.parse(ext.json);
+    }
+    catch {
+        return { parsed: null, error: 'JSON Parse Error: LLM output is not valid JSON' };
+    }
+    if (!parsed || typeof parsed !== 'object' || !Array.isArray(parsed.criteria)) {
+        return { parsed: null, error: 'Shape Error: output missing required "criteria" array' };
+    }
+    // Validate each criterion element has the required string fields
+    for (let i = 0; i < parsed.criteria.length; i++) {
+        const c = parsed.criteria[i];
+        if (!c || typeof c !== 'object' || typeof c.id !== 'string' || typeof c.description !== 'string') {
+            return { parsed: null, error: `Shape Error: criteria[${i}] must have string "id" and "description"` };
+        }
+    }
+    return { parsed: parsed, error: null };
+}
+export function parseEvaluationOutput(raw) {
+    const ext = extractJsonFromLlmOutput(raw);
+    if (ext.error || !ext.json)
+        return { parsed: null, error: ext.error };
+    let parsed;
+    try {
+        parsed = JSON.parse(ext.json);
+    }
+    catch {
+        return { parsed: null, error: 'JSON Parse Error: LLM output is not valid JSON' };
+    }
+    if (!parsed || typeof parsed !== 'object' || typeof parsed.pass !== 'boolean') {
+        return { parsed: null, error: 'Shape Error: output missing required "pass" boolean' };
+    }
+    const p = parsed;
+    if (p.findings !== undefined) {
+        if (!Array.isArray(p.findings)) {
+            return { parsed: null, error: 'Shape Error: "findings" must be an array when present' };
+        }
+        // Fix #3: Each failing finding must supply an evidence object so the
+        // Evaluator cannot submit bare severity claims without evidence pointers.
+        for (let i = 0; i < p.findings.length; i++) {
+            const f = p.findings[i];
+            if (!f || typeof f !== 'object') {
+                return { parsed: null, error: `Shape Error: findings[${i}] must be an object` };
+            }
+            if (f.pass_fail === false && (!f.evidence || typeof f.evidence !== 'object')) {
+                return { parsed: null, error: `Shape Error: findings[${i}] is missing required "evidence" object for failure` };
+            }
+        }
+    }
+    return { parsed: parsed, error: null };
+}
 // ─── Step Execution ────────────────────────────────────────────
 /**
  * Execute a single step of the pipeline.
@@ -268,8 +328,8 @@ async function executeStep(pipeline, spec) {
         // - BYOM model override
         // - Timeout enforcement
         const { success, resultText } = await invokeClawAgent(spec, pipeline);
-        // For non-EXECUTE steps, return as-is (free-form text)
-        if (step !== 'EXECUTE') {
+        // For non-JSON steps, return as-is (free-form text)
+        if (step !== 'EXECUTE' && step !== 'PLAN_CONTRACT' && step !== 'EVALUATE') {
             return {
                 iteration: pipeline.iteration,
                 step,
@@ -279,7 +339,6 @@ async function executeStep(pipeline, spec) {
                 notes: resultText.slice(0, 2000),
             };
         }
-        // ── v7.3.1: EXECUTE step — parse and validate structured output ──
         if (!success) {
             // LLM invocation itself failed (timeout, error, etc.)
             return {
@@ -291,7 +350,59 @@ async function executeStep(pipeline, spec) {
                 notes: `LLM invocation failed: ${resultText.slice(0, 500)}`,
             };
         }
-        // Parse the structured JSON output
+        // Parse appropriate JSON output depending on step
+        if (step === 'PLAN_CONTRACT') {
+            const { parsed, error: parseError } = parseContractOutput(resultText);
+            if (parseError || !parsed) {
+                debugLog(`[DarkFactory] PLAN_CONTRACT output parse failure: ${parseError}`);
+                return {
+                    iteration: pipeline.iteration,
+                    step,
+                    started_at: stepStart,
+                    completed_at: new Date().toISOString(),
+                    success: false,
+                    notes: parseError || 'Unknown parse error',
+                };
+            }
+            return {
+                iteration: pipeline.iteration,
+                step,
+                started_at: stepStart,
+                completed_at: new Date().toISOString(),
+                success: true,
+                notes: `Contract accepted with ${parsed.criteria.length} criteria.`,
+                contractPayload: parsed, // Passthrough for runner to write to disk
+            };
+        }
+        if (step === 'EVALUATE') {
+            const { parsed, error: parseError } = parseEvaluationOutput(resultText);
+            if (parseError || !parsed) {
+                debugLog(`[DarkFactory] EVALUATE output parse failure: ${parseError}`);
+                return {
+                    iteration: pipeline.iteration,
+                    step,
+                    started_at: stepStart,
+                    completed_at: new Date().toISOString(),
+                    success: false,
+                    notes: parseError || 'Unknown parse error',
+                };
+            }
+            // Fix #2: Serialize findings array into notes so the Generator's retry
+            // prompt receives the full line-by-line critique, not just a summary string.
+            const findingsText = parsed.findings && parsed.findings.length > 0
+                ? '\nFindings:\n' + parsed.findings.map((f) => `- [${f.severity}] Criterion ${f.criterion_id}: ${f.evidence?.description || 'Failed'} (${f.evidence?.file || 'unknown'}:${f.evidence?.line ?? '?'})`).join('\n')
+                : '';
+            return {
+                iteration: pipeline.iteration,
+                step,
+                started_at: stepStart,
+                completed_at: new Date().toISOString(),
+                success: parsed.pass,
+                notes: (parsed.notes || `Evaluation complete: ${parsed.pass ? 'PASS' : 'FAIL'}`) + findingsText,
+                evaluationPayload: parsed, // Passthrough for orchestrator logic
+            };
+        }
+        // EXECUTE
         const { parsed, error: parseError } = parseExecuteOutput(resultText);
         if (parseError || !parsed) {
             debugLog(`[DarkFactory] EXECUTE output parse failure: ${parseError}`);
@@ -482,11 +593,152 @@ async function runnerTick() {
             await emitExperienceEvent(pipeline, 'failure', `Scope violation: ${result.scopeViolation}`);
             return;
         }
-        // Determine next step based on result
         const currentStep = pipeline.current_step;
-        const nextStep = SafetyController.getNextStep(currentStep, pipeline.iteration, spec, result.success // For VERIFY step: success means tests passed
-        );
-        if (nextStep === null || currentStep === 'FINALIZE') {
+        // ── Phase 4: Verification Pipeline Orchestrator ──
+        if (currentStep === 'VERIFY' && spec.workingDirectory) {
+            const harnessPath = path.join(path.resolve(spec.workingDirectory), 'verification_harness.json');
+            if (fs.existsSync(harnessPath)) {
+                try {
+                    const rawHarness = fs.readFileSync(harnessPath, 'utf8');
+                    const harnessData = JSON.parse(rawHarness);
+                    // GAP-5 fix: Persist the harness so CLI drift detection works for DarkFactory runs
+                    const rubricHash = computeRubricHash(harnessData.tests);
+                    const harness = {
+                        ...harnessData,
+                        project: pipeline.project,
+                        conversation_id: `dark-factory-${pipeline.id}`,
+                        created_at: new Date().toISOString(),
+                        rubric_hash: rubricHash,
+                    };
+                    await storage.saveVerificationHarness(harness, pipeline.user_id);
+                    // GAP-2 fix: Build VerificationConfig from env vars so PRISM_VERIFICATION_LAYERS
+                    // and PRISM_VERIFICATION_DEFAULT_SEVERITY are respected in DarkFactory pipelines
+                    const vConfig = {
+                        enabled: true,
+                        layers: PRISM_VERIFICATION_LAYERS,
+                        default_severity: PRISM_VERIFICATION_DEFAULT_SEVERITY,
+                    };
+                    const verificationResult = await VerificationRunner.runSuite(rawHarness, {
+                        harness,
+                        layers: PRISM_VERIFICATION_LAYERS,
+                        config: vConfig,
+                    });
+                    const coverageScore = verificationResult.total > 0 ? (verificationResult.total - verificationResult.skipped_count) / verificationResult.total : 0;
+                    const executedCount = verificationResult.total - verificationResult.skipped_count;
+                    const passRate = executedCount > 0 ? verificationResult.passed_count / executedCount : 0;
+                    // GAP-4 fix: Use proper ValidationResult type instead of `any`
+                    const valResult = {
+                        id: crypto.randomUUID(),
+                        rubric_hash: rubricHash,
+                        project: pipeline.project,
+                        conversation_id: `dark-factory-${pipeline.id}`,
+                        run_at: new Date().toISOString(),
+                        passed: passRate >= harnessData.min_pass_rate && verificationResult.severity_gate.action !== "abort",
+                        pass_rate: passRate,
+                        critical_failures: verificationResult.severity_gate.failed_assertions.length,
+                        coverage_score: coverageScore,
+                        result_json: JSON.stringify(verificationResult),
+                        gate_action: verificationResult.severity_gate.action,
+                        gate_override: false,
+                    };
+                    const { canContinue, validatedResult } = Gatekeeper.executeGate(valResult);
+                    await storage.saveVerificationRun(validatedResult, pipeline.user_id);
+                    // GAP-3 fix: Emit verification experience event for ML routing feedback
+                    try {
+                        const confidenceScore = Math.round(passRate * 100);
+                        await storage.saveLedger({
+                            project: pipeline.project,
+                            conversation_id: `dark-factory-${pipeline.id}`,
+                            user_id: pipeline.user_id,
+                            event_type: 'validation_result',
+                            summary: `[VERIFY] ${verificationResult.passed_count}/${verificationResult.total} passed (gate: ${verificationResult.severity_gate.action})`,
+                            keywords: ['dark-factory', 'verification', pipeline.project],
+                            importance: verificationResult.severity_gate.action === 'abort' ? 2 : 0,
+                            confidence_score: confidenceScore,
+                        });
+                    }
+                    catch { /* experience events are advisory — never block execution */ }
+                    if (!canContinue) {
+                        result.success = false;
+                        result.notes = (result.notes ? result.notes + '\n\n' : '') + `[GATE BLOCKED] Pipeline verification runner failed the security gate.`;
+                    }
+                    else {
+                        result.success = result.success && validatedResult.passed;
+                    }
+                }
+                catch (err) {
+                    if (err instanceof VerificationGateError) {
+                        debugLog(`[DarkFactory] Pipeline ${pipeline.id} ABORTED by Verification Gate.`);
+                        try {
+                            await storage.savePipeline({
+                                ...pipeline,
+                                status: 'FAILED',
+                                error: `[GATE ABORT] ${err.message}`,
+                            });
+                        }
+                        catch { /* Status guard */ }
+                        await emitExperienceEvent(pipeline, 'failure', `[GATE ABORT] ${err.message}`);
+                        return;
+                    }
+                    else {
+                        console.error(`[DarkFactory] Verification harness crash: ${err.message}`);
+                        result.success = false;
+                        result.notes = `[GATE CRASH] Verification suite failed to execute: ${err.message}`;
+                    }
+                }
+            }
+        }
+        if (currentStep === 'PLAN_CONTRACT' && spec.workingDirectory && result.success && result.contractPayload) {
+            const contractPath = path.join(path.resolve(spec.workingDirectory), 'contract_rubric.json');
+            try {
+                fs.writeFileSync(contractPath, JSON.stringify(result.contractPayload, null, 2), 'utf8');
+                debugLog(`[DarkFactory] contract_rubric.json written to ${contractPath}`);
+            }
+            catch (writeErr) {
+                // Disk/permissions error — fail the pipeline immediately so it doesn't
+                // loop on PLAN_CONTRACT forever (each tick would re-attempt the write).
+                debugLog(`[DarkFactory] Failed to write contract_rubric.json: ${writeErr.message}`);
+                try {
+                    await storage.savePipeline({
+                        ...pipeline,
+                        status: 'FAILED',
+                        error: `PLAN_CONTRACT failed: could not write contract_rubric.json — ${writeErr.message}`,
+                    });
+                }
+                catch { /* status guard */ }
+                await emitExperienceEvent(pipeline, 'failure', `contract_rubric.json write failed: ${writeErr.message}`);
+                return;
+            }
+        }
+        if (currentStep === 'EVALUATE' && result.evaluationPayload) {
+            // Emit ML learning event for evaluation outcome.
+            // Using 'learning' (valid LedgerEntry event type) rather than
+            // a non-existent 'evaluation_result' to avoid runtime cast issues.
+            try {
+                await storage.saveLedger({
+                    project: pipeline.project,
+                    conversation_id: `dark-factory-${pipeline.id}`,
+                    user_id: pipeline.user_id,
+                    event_type: 'learning',
+                    summary: `[EVALUATE] ${result.success ? 'PASS' : 'FAIL'} on iter ${pipeline.iteration} rev ${pipeline.eval_revisions ?? 0}`,
+                    keywords: ['dark-factory', 'evaluation', pipeline.project],
+                    importance: result.success ? 3 : 1,
+                    confidence_score: result.success ? 90 : 50,
+                });
+            }
+            catch { /* advisory — never block execution */ }
+        }
+        // ─── Determine plan_viable from evaluation payload ───
+        // Default to false (conservative): a parse failure or missing payload means
+        // we don't know if the plan is viable, so escalate to PLAN re-planning
+        // rather than burning eval_revisions on more EXECUTE retries.
+        let evalPlanViable = false;
+        if (currentStep === 'EVALUATE' && result.evaluationPayload) {
+            // plan_viable defaults false if null/missing (same conservative principle)
+            evalPlanViable = result.evaluationPayload.plan_viable ?? false;
+        }
+        const nextStepInfo = SafetyController.getNextStep(pipeline, spec, result.success, evalPlanViable);
+        if (nextStepInfo === null || currentStep === 'FINALIZE') {
             // Pipeline complete — determine final status
             const finalStatus = result.success ? 'COMPLETED' : 'FAILED';
             const finalError = result.success ? null : `Pipeline ended at step=${currentStep}: ${result.notes?.slice(0, 500)}`;
@@ -514,13 +766,25 @@ async function runnerTick() {
             debugLog(`[DarkFactory] Pipeline ${pipeline.id} finished: ${finalStatus}`);
         }
         else {
-            // Advance to next step
             try {
+                const updatedPayload = currentStep === 'PLAN_CONTRACT' && result.contractPayload
+                    ? result.contractPayload
+                    : pipeline.contract_payload;
+                // Forward the most informative notes available:
+                // EXECUTE notes = what the generator did
+                // EVALUATE notes = what the evaluator found
+                // Other steps: preserve existing notes
+                const updatedNotes = (currentStep === 'EXECUTE' || currentStep === 'EVALUATE') && result.notes
+                    ? result.notes
+                    : pipeline.notes;
                 await storage.savePipeline({
                     ...pipeline,
-                    current_step: nextStep.step,
-                    iteration: nextStep.iteration,
+                    current_step: nextStepInfo.step,
+                    iteration: nextStepInfo.iteration,
+                    eval_revisions: nextStepInfo.eval_revisions,
                     last_heartbeat: new Date().toISOString(),
+                    contract_payload: updatedPayload,
+                    notes: updatedNotes,
                 });
             }
             catch (err) {
@@ -531,7 +795,7 @@ async function runnerTick() {
                 }
                 throw err;
             }
-            debugLog(`[DarkFactory] Pipeline ${pipeline.id} advanced: ${currentStep} → ${nextStep.step} (iter ${nextStep.iteration})`);
+            debugLog(`[DarkFactory] Pipeline ${pipeline.id} advanced: ${currentStep} → ${nextStepInfo.step} (iter ${nextStepInfo.iteration}, rev ${nextStepInfo.eval_revisions ?? 0})`);
         }
     }
     catch (err) {

package/dist/darkfactory/safetyController.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { VALID_ACTION_TYPES } from './schema.js';
+import { VALID_ACTION_TYPES, DEFAULT_MAX_REVISIONS } from './schema.js';
 import { PRISM_DARK_FACTORY_MAX_RUNTIME_MS } from '../config.js';
 import { debugLog } from '../utils/logger.js';
 import path from 'path';
@@ -31,13 +31,6 @@ export class SafetyController {
         'COMPLETED': [], // Terminal — no exits
         'FAILED': ['RUNNING'], // Allow retry from failed state
     };
-    /**
-     * Legal step transitions for the pipeline execution state machine.
-     * FINALIZE is entered from VERIFY when iteration == maxIterations or success.
-     */
-    static STEP_ORDER = [
-        'INIT', 'PLAN', 'EXECUTE', 'VERIFY', 'FINALIZE'
-    ];
     /**
      * Prevents runaway LLM invocation loops by enforcing the max iteration envelope.
      */
@@ -147,8 +140,15 @@ export class SafetyController {
      * Used by clawInvocation.ts instead of inline prompt construction.
      */
     static generateBoundaryPrompt(spec, state) {
+        let modeDescription = 'an autonomous code agent';
+        if (state.current_step === 'PLAN_CONTRACT' || state.current_step === 'EVALUATE') {
+            modeDescription = 'an ADVERSARIAL EVALUATOR enforcing strict quality constraints against a generated output';
+        }
+        else if (state.current_step === 'EXECUTE') {
+            modeDescription = 'a GENERATOR executing code constrained by a strict rubric';
+        }
         const lines = [
-            `You are Prism Dark Factory, operating in the background as an autonomous code agent.`,
+            `You are Prism Dark Factory, operating in the background as ${modeDescription}.`,
             `You are strictly limited to code actions within the defined scope.`,
             ``,
             `── Operational Boundaries ──`,
@@ -156,6 +156,7 @@ export class SafetyController {
             `Project: ${state.project}`,
             `Current Step: ${state.current_step}`,
             `Iteration: ${state.iteration} / ${spec.maxIterations}`,
+            `Revision: ${state.eval_revisions ?? 0} / ${spec.maxRevisions ?? DEFAULT_MAX_REVISIONS}`,
             `Restricted Workspace: ${spec.workingDirectory || '(unrestricted)'}`,
         ];
         if (spec.contextFiles && spec.contextFiles.length > 0) {
@@ -164,29 +165,54 @@ export class SafetyController {
         lines.push(``, `── Objective ──`, spec.objective, ``, `── Safety Rules ──`, `1. Do NOT modify files outside the Restricted Workspace.`, `2. Do NOT make network requests unless the objective explicitly requires it.`, `3. Do NOT execute destructive operations (rm -rf, DROP TABLE, etc.).`, `4. Respond ONLY with actions relevant to the current step.`, `5. If you cannot complete the step, explain why and stop.`);
         return lines.join('\n');
     }
-    /**
-     * Determine the next step in the pipeline execution sequence.
-     * Returns null if the pipeline should terminate (FINALIZE reached or iteration exceeded).
-     */
-    static getNextStep(currentStep, iteration, spec, verifyPassed) {
+    static getNextStep(state, spec, stepPassed, planViable = true) {
+        const currentStep = state.current_step;
+        const iteration = state.iteration;
+        const eval_revisions = state.eval_revisions ?? 0;
         switch (currentStep) {
             case 'INIT':
-                return { step: 'PLAN', iteration };
+                return { step: 'PLAN', iteration, eval_revisions };
             case 'PLAN':
-                return { step: 'EXECUTE', iteration };
+                return { step: 'PLAN_CONTRACT', iteration, eval_revisions };
+            case 'PLAN_CONTRACT':
+                return { step: 'EXECUTE', iteration, eval_revisions };
             case 'EXECUTE':
-                return { step: 'VERIFY', iteration };
+                return { step: 'EVALUATE', iteration, eval_revisions };
+            case 'EVALUATE':
+                if (stepPassed) {
+                    // Contract passed, move to VERIFY
+                    return { step: 'VERIFY', iteration, eval_revisions: 0 };
+                }
+                // Contract failed.
+                if (planViable) {
+                    // Fall back to EXECUTE but increment revision counter
+                    const nextRevision = eval_revisions + 1;
+                    const maxRev = spec.maxRevisions ?? DEFAULT_MAX_REVISIONS;
+                    if (nextRevision >= maxRev) {
+                        // Exceeded max revisions — pipeline fails
+                        return null;
+                    }
+                    return { step: 'EXECUTE', iteration, eval_revisions: nextRevision };
+                }
+                else {
+                    // Fall back all the way to PLAN
+                    const nextIteration = iteration + 1;
+                    if (!SafetyController.validateIterationLimit(nextIteration, spec)) {
+                        return null;
+                    }
+                    return { step: 'PLAN', iteration: nextIteration, eval_revisions: 0 };
+                }
             case 'VERIFY':
-                if (verifyPassed) {
-                    return { step: 'FINALIZE', iteration };
+                if (stepPassed) {
+                    return { step: 'FINALIZE', iteration, eval_revisions };
                 }
                 // Verification failed — loop back to PLAN with incremented iteration
-                const nextIteration = iteration + 1;
-                if (!SafetyController.validateIterationLimit(nextIteration, spec)) {
+                const nextIterationVerify = iteration + 1;
+                if (!SafetyController.validateIterationLimit(nextIterationVerify, spec)) {
                     // Exceeded max iterations — force finalize with failure
                     return null;
                 }
-                return { step: 'PLAN', iteration: nextIteration };
+                return { step: 'PLAN', iteration: nextIterationVerify, eval_revisions: 0 };
             case 'FINALIZE':
                 return null; // Terminal step
             default:

package/dist/darkfactory/schema.js CHANGED Viewed

@@ -2,3 +2,5 @@
 export const VALID_ACTION_TYPES = [
     'READ_FILE', 'WRITE_FILE', 'PATCH_FILE', 'RUN_TEST'
 ];
+/** Default max adversarial revisions per EXECUTE phase (used when spec.maxRevisions is unset). */
+export const DEFAULT_MAX_REVISIONS = 3;