npm - @hongmaple0820/scale-engine - Versions diffs - 0.48.0 → 0.49.0 - Mend

@hongmaple0820/scale-engine 0.48.0 → 0.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/README.en.md +2 -2
package/README.md +2 -2
package/dist/agents/evidenceDiscipline.d.ts +7 -0
package/dist/agents/evidenceDiscipline.js +21 -0
package/dist/agents/evidenceDiscipline.js.map +1 -0
package/dist/agents/profiles.js +8 -1
package/dist/agents/profiles.js.map +1 -1
package/dist/agents/types.d.ts +1 -0
package/dist/artifact/types.d.ts +59 -0
package/dist/artifact/types.js.map +1 -1
package/dist/cli/cortexCommands.d.ts +36 -0
package/dist/cli/cortexCommands.js +76 -4
package/dist/cli/cortexCommands.js.map +1 -1
package/dist/cli/evalCommands.js +12 -1
package/dist/cli/evalCommands.js.map +1 -1
package/dist/cli/phaseCommands.d.ts +53 -1
package/dist/cli/phaseCommands.js +317 -22
package/dist/cli/phaseCommands.js.map +1 -1
package/dist/cortex/InstinctStore.d.ts +32 -1
package/dist/cortex/InstinctStore.js +235 -42
package/dist/cortex/InstinctStore.js.map +1 -1
package/dist/cortex/InstinctValidation.d.ts +9 -0
package/dist/cortex/InstinctValidation.js +55 -0
package/dist/cortex/InstinctValidation.js.map +1 -0
package/dist/cortex/SessionInjector.js +13 -6
package/dist/cortex/SessionInjector.js.map +1 -1
package/dist/eval/BenchmarkPublisher.d.ts +2 -0
package/dist/eval/BenchmarkPublisher.js +43 -0
package/dist/eval/BenchmarkPublisher.js.map +1 -1
package/dist/guardrails/ast/confirmers.d.ts +18 -0
package/dist/guardrails/ast/confirmers.js +69 -0
package/dist/guardrails/ast/confirmers.js.map +1 -0
package/dist/guardrails/ast/parse.d.ts +20 -0
package/dist/guardrails/ast/parse.js +51 -0
package/dist/guardrails/ast/parse.js.map +1 -0
package/dist/output/HTMLDocumentRenderer.d.ts +9 -0
package/dist/output/HTMLDocumentRenderer.js +19 -0
package/dist/output/HTMLDocumentRenderer.js.map +1 -1
package/dist/review/FreshContextVerifier.d.ts +35 -0
package/dist/review/FreshContextVerifier.js +120 -0
package/dist/review/FreshContextVerifier.js.map +1 -0
package/dist/review/JsonLlmClient.d.ts +37 -0
package/dist/review/JsonLlmClient.js +94 -0
package/dist/review/JsonLlmClient.js.map +1 -0
package/dist/review/LlmJudge.d.ts +61 -0
package/dist/review/LlmJudge.js +167 -0
package/dist/review/LlmJudge.js.map +1 -0
package/dist/version.d.ts +1 -1
package/dist/version.js +1 -1
package/dist/workflow/BoundaryEnforcement.d.ts +60 -0
package/dist/workflow/BoundaryEnforcement.js +182 -0
package/dist/workflow/BoundaryEnforcement.js.map +1 -0
package/dist/workflow/EngineeringStandards.js +19 -9
package/dist/workflow/EngineeringStandards.js.map +1 -1
package/dist/workflow/GateCatalog.js +12 -2
package/dist/workflow/GateCatalog.js.map +1 -1
package/dist/workflow/ProfileEnforcement.d.ts +7 -0
package/dist/workflow/ProfileEnforcement.js +12 -0
package/dist/workflow/ProfileEnforcement.js.map +1 -0
package/dist/workflow/ReviewStore.d.ts +10 -0
package/dist/workflow/ReviewStore.js.map +1 -1
package/dist/workflow/SurfaceCoverage.d.ts +19 -0
package/dist/workflow/SurfaceCoverage.js +57 -0
package/dist/workflow/SurfaceCoverage.js.map +1 -0
package/dist/workflow/gates/EnhancedGates.js +2 -0
package/dist/workflow/gates/EnhancedGates.js.map +1 -1
package/dist/workflow/gates/TestIntegrityGate.d.ts +51 -0
package/dist/workflow/gates/TestIntegrityGate.js +175 -0
package/dist/workflow/gates/TestIntegrityGate.js.map +1 -0
package/dist/workflow/types.d.ts +1 -1
package/docs/guides/DEVELOPMENT_WORKFLOW.md +28 -0
package/docs/workflow/E2E_EXAMPLE.md +133 -0
package/docs/workflow/README.md +6 -0
package/docs/workflow/TEMPLATE_GUIDE.md +162 -0
package/docs/workflow/templates/plan.md +26 -0
package/docs/workflow/templates/spec.md +28 -0
package/package.json +2 -1

package/dist/cli/phaseCommands.js CHANGED Viewed

@@ -18,6 +18,9 @@ import { WorkflowArtifactWriter } from '../workflow/WorkflowArtifactWriter.js';
 import { resolveVerificationTargets } from '../workflow/VerificationProfile.js';
 import { EvidenceStore } from '../workflow/EvidenceStore.js';
 import { ReviewStore } from '../workflow/ReviewStore.js';
+import { JudgePromptStore, LlmJudge } from '../review/LlmJudge.js';
+import { JsonLlmClient } from '../review/JsonLlmClient.js';
+import { FreshContextVerifier } from '../review/FreshContextVerifier.js';
 import { TaskMetricsStore } from '../workflow/TaskMetricsStore.js';
 import { appendVerificationArtifact, checkTaskArtifactCompleteness, scaffoldTaskArtifacts } from '../workflow/TaskArtifactScaffolder.js';
 import { createWorkflowGuidance, renderWorkflowGuidance } from '../workflow/WorkflowGuidance.js';
@@ -33,6 +36,8 @@ import { loadToolPolicy } from '../tools/ToolPolicy.js';
 import { runSafeCommand } from '../tools/SafeCommandRunner.js';
 import { join } from 'node:path';
 import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync } from 'node:fs';
+import { computeSurfaceCoverage, formatSurfaceCoverageWarnings } from '../workflow/SurfaceCoverage.js';
+import { evaluateBoundaries, evaluateConstraints, formatBoundaryWarnings, formatConstraintWarnings, isEnforcedBoundaryProfile, countBoundaryBlockers, } from '../workflow/BoundaryEnforcement.js';
 import { HTMLDocumentRenderer } from '../output/HTMLDocumentRenderer.js';
 import { SCALE_ENGINE_VERSION } from '../version.js';
 import { optimizeCodingPrompt } from '../prompts/PromptOptimizer.js';
@@ -322,11 +327,11 @@ async function recordVerificationMetric(options) {
     return record;
 }
 // Helper: Generate spec markdown file
-function generateSpecMarkdown(id, title, payload) {
+function generateSpecMarkdown(id, title, payload, status = 'FROZEN') {
     return `# Spec: ${title}
 **ID**: ${id}
-**Status**: FROZEN
+**Status**: ${status}
 **Ambiguity Score**: ${payload.ambiguityScore ?? 0.15}
 ## What
@@ -343,11 +348,34 @@ ${payload.edgeCases.map(e => `- ${e}`).join('\n') || '(none defined)'}
 ## North Star
 ${payload.northStar || 'User value delivered'}
+${renderSpecContractSections(payload)}
 ---
 *Generated by SCALE Engine DEFINE phase*
 `;
 }
+// Helper: Render the optional P0 six-element contract sections.
+// Each section is omitted when its field is unset, keeping legacy specs unchanged.
+function renderSpecContractSections(payload) {
+    const sections = [];
+    if (payload.verificationSurface?.length) {
+        sections.push(`\n## Verification Surface\n${payload.verificationSurface.map(s => `- ${s}`).join('\n')}`);
+    }
+    if (payload.constraints?.length) {
+        sections.push(`\n## Constraints\n${payload.constraints.map(c => `- ${c}`).join('\n')}`);
+    }
+    if (payload.boundaries) {
+        const b = payload.boundaries;
+        const line = (label, items) => `- ${label}: ${items.length ? items.join(', ') : '(none)'}`;
+        sections.push(`\n## Boundaries\n${line('Files', b.files)}\n${line('Tools', b.tools)}\n${line('Forbidden', b.forbidden)}`);
+    }
+    if (payload.iterationStrategy) {
+        sections.push(`\n## Iteration Strategy\n${payload.iterationStrategy}`);
+    }
+    if (payload.blockedStopCondition) {
+        sections.push(`\n## Blocked Stop Condition\n${payload.blockedStopCondition}`);
+    }
+    return sections.length ? `\n${sections.join('\n')}\n` : '\n';
+}
 // Helper: Calculate ambiguity score
 function calculateAmbiguityScore(description, successCriteria) {
     let score = 0.2; // Base score (maximum threshold)
@@ -364,9 +392,20 @@ function calculateAmbiguityScore(description, successCriteria) {
 export const phaseDefine = defineCommand({
     meta: { name: 'define', description: 'DEFINE: Create Spec with AmbiguityScorer + SocraticQuestioner (/spec)' },
     args: {
-        title: { type: 'positional', required: true },
+        title: { type: 'positional', required: false },
         description: { type: 'string', alias: 'd' },
         'success-criteria': { type: 'string', alias: 'c', description: 'Comma-separated criteria' },
+        // P0 draft/confirm two-step lifecycle (backward compatible: bare `define` still auto-freezes)
+        draft: { type: 'boolean', default: false, description: 'Stop the new Spec at REVIEWING (requires `define --confirm <id>` to FROZEN)' },
+        confirm: { type: 'string', description: 'Confirm and freeze an existing draft Spec id (REVIEWING -> FROZEN)' },
+        // P0 six-element contract inputs (optional, comma-separated where plural)
+        'verification-surface': { type: 'string', description: 'Comma-separated evidence sources: test names / benchmark commands / artifact paths' },
+        'constraints': { type: 'string', description: 'Comma-separated invariants that must not regress (perf/security/compat)' },
+        'boundary-files': { type: 'string', description: 'Comma-separated files allowed to change' },
+        'boundary-tools': { type: 'string', description: 'Comma-separated tools allowed to use' },
+        'boundary-forbidden': { type: 'string', description: 'Comma-separated scope that must not be touched' },
+        'iteration-strategy': { type: 'string', description: 'How each build iteration decides the next step' },
+        'blocked-stop': { type: 'string', description: 'What to report / what is needed to unblock when no path is viable' },
         // Socratic refinement answers (optional)
         'goal': { type: 'string', description: 'Goal answer for Socratic refinement' },
         'constraint': { type: 'string', description: 'Constraint answer for Socratic refinement' },
@@ -380,6 +419,15 @@ export const phaseDefine = defineCommand({
     },
     async run({ args }) {
         const { store, fsm, workflowEngine } = getEngine();
+        // P0: --confirm freezes an existing draft Spec (REVIEWING -> FROZEN) without re-creating it.
+        if (args.confirm) {
+            await confirmDraftSpec(store, fsm, String(args.confirm), Boolean(args.json));
+            return;
+        }
+        if (!args.title) {
+            console.error('\nMissing required argument: title (or pass --confirm <spec-id> to freeze a draft)\n');
+            process.exit(1);
+        }
         const rawDesc = String(args.description ?? args.title);
         // Parse success criteria
         const successCriteria = args['success-criteria']
@@ -467,6 +515,17 @@ export const phaseDefine = defineCommand({
             initialStatus: 'DRAFT',
             createdBy: { kind: 'human', userId: 'cli' },
         });
+        // P0 six-element contract inputs (optional; omitted fields stay undefined)
+        const csv = (v) => {
+            const items = typeof v === 'string' ? v.split(',').map(s => s.trim()).filter(Boolean) : [];
+            return items.length ? items : undefined;
+        };
+        const boundaryFiles = csv(args['boundary-files']);
+        const boundaryTools = csv(args['boundary-tools']);
+        const boundaryForbidden = csv(args['boundary-forbidden']);
+        const boundaries = (boundaryFiles || boundaryTools || boundaryForbidden)
+            ? { files: boundaryFiles ?? [], tools: boundaryTools ?? [], forbidden: boundaryForbidden ?? [] }
+            : undefined;
         // Create Spec artifact with proper payload (use refined requirement if available)
         const specPayload = {
             what: refinedRequirement,
@@ -475,6 +534,11 @@ export const phaseDefine = defineCommand({
             edgeCases: [],
             northStar: 'Deliver user value',
             ambiguityScore,
+            verificationSurface: csv(args['verification-surface']),
+            constraints: csv(args['constraints']),
+            boundaries,
+            iterationStrategy: typeof args['iteration-strategy'] === 'string' && args['iteration-strategy'] ? String(args['iteration-strategy']) : undefined,
+            blockedStopCondition: typeof args['blocked-stop'] === 'string' && args['blocked-stop'] ? String(args['blocked-stop']) : undefined,
         };
         const spec = await store.create({
             type: 'Spec', title: args.title,
@@ -483,11 +547,14 @@ export const phaseDefine = defineCommand({
             initialStatus: 'DRAFT',
             createdBy: { kind: 'human', userId: 'cli' },
         });
+        // Draft mode stops at REVIEWING; default mode auto-freezes (FROZEN).
+        const isDraft = Boolean(args.draft);
+        const finalStatus = isDraft ? 'REVIEWING' : 'FROZEN';
         // Generate spec markdown file
         const specsDir = join(SCALE_DIR, 'specs');
         ensureDir(specsDir);
         const specPath = join(specsDir, `${spec.id}.md`);
-        writeFileSync(specPath, generateSpecMarkdown(spec.id, args.title, specPayload));
+        writeFileSync(specPath, generateSpecMarkdown(spec.id, args.title, specPayload, finalStatus));
         // Generate spec HTML file (default format: html)
         const outputFormat = args.format ?? 'md';
         let specHtmlPath;
@@ -496,7 +563,7 @@ export const phaseDefine = defineCommand({
                 title: args.title,
                 brand: args.brand,
                 version: SCALE_ENGINE_VERSION,
-                status: 'FROZEN',
+                status: finalStatus,
             });
             const html = renderer.renderSpec({
                 id: spec.id,
@@ -507,11 +574,16 @@ export const phaseDefine = defineCommand({
                 edgeCases: specPayload.edgeCases,
                 northStar: specPayload.northStar,
                 ambiguityScore,
+                verificationSurface: specPayload.verificationSurface,
+                constraints: specPayload.constraints,
+                boundaries: specPayload.boundaries,
+                iterationStrategy: specPayload.iterationStrategy,
+                blockedStopCondition: specPayload.blockedStopCondition,
             });
             specHtmlPath = join(specsDir, `${spec.id}.html`);
             renderer.writeToFile(html, specHtmlPath);
         }
-        // FSM transitions: DRAFT -> REVIEWING -> FROZEN
+        // FSM transitions: DRAFT -> REVIEWING (-> FROZEN unless --draft)
         // Phase 1: refine (DRAFT -> REVIEWING) - no guards
         const refineResult = await fsm.canTransition(spec.id, 'refine');
         if (!refineResult.allowed) {
@@ -522,22 +594,30 @@ export const phaseDefine = defineCommand({
             process.exit(1);
         }
         await fsm.transition(spec.id, 'refine', { actor: { kind: 'system', component: 'phase-define' } });
-        // Phase 2: approve (REVIEWING -> FROZEN) - guards: ambiguityScore <= 0.2, has successCriteria
-        const approveResult = await fsm.canTransition(spec.id, 'approve');
-        if (!approveResult.allowed) {
+        // Phase 2: approve (REVIEWING -> FROZEN) - guards: ambiguityScore <= 0.2, has successCriteria.
+        // Skipped in --draft mode: the draft waits for `scale define --confirm <id>`.
+        if (!isDraft) {
+            const approveResult = await fsm.canTransition(spec.id, 'approve');
+            if (!approveResult.allowed) {
+                if (!args.json) {
+                    console.error('\nFSM transition blocked: REVIEWING -> FROZEN');
+                    console.error('   Spec cannot be frozen due to:');
+                    approveResult.blockedBy?.forEach(b => console.error(`   [GUARD] ${b.guard}: ${b.message}`));
+                    console.error('\n   Resolve issues before proceeding.');
+                }
+                process.exit(1);
+            }
+            await fsm.transition(spec.id, 'approve', { actor: { kind: 'system', component: 'phase-define' } });
             if (!args.json) {
-                console.error('\nFSM transition blocked: REVIEWING -> FROZEN');
-                console.error('   Spec cannot be frozen due to:');
-                approveResult.blockedBy?.forEach(b => console.error(`   [GUARD] ${b.guard}: ${b.message}`));
-                console.error('\n   Resolve issues before proceeding.');
+                console.log('   FSM: DRAFT -> REVIEWING -> FROZEN ✓');
             }
-            process.exit(1);
         }
-        await fsm.transition(spec.id, 'approve', { actor: { kind: 'system', component: 'phase-define' } });
-        if (!args.json) {
-            console.log('   FSM: DRAFT -> REVIEWING -> FROZEN ✓');
+        else if (!args.json) {
+            console.log('   FSM: DRAFT -> REVIEWING (draft; not yet FROZEN)');
         }
-        const result = { phase: 'DEFINE', spec, specPath, specHtmlPath, ambiguityScore, successCriteria, format: outputFormat, promptOptimization };
+        // Refresh the spec so the reported status reflects the post-transition state.
+        const finalSpec = (await store.get(spec.id)) ?? spec;
+        const result = { phase: 'DEFINE', spec: finalSpec, specPath, specHtmlPath, ambiguityScore, successCriteria, format: outputFormat, promptOptimization, status: finalStatus, draft: isDraft };
         // Write explore artifact for Gate G1 verification
         const artifactWriter = new WorkflowArtifactWriter(SCALE_DIR);
         artifactWriter.writeExploreResult({
@@ -557,10 +637,94 @@ export const phaseDefine = defineCommand({
                 console.log(`   HTML file: ${specHtmlPath}`);
             console.log(`   Ambiguity score: ${ambiguityScore.toFixed(2)}`);
             console.log(`   Success criteria: ${successCriteria.length}`);
-            console.log(`\n   Next: scale plan ${spec.id}\n`);
+            if (isDraft) {
+                console.log(`\n   Draft created (REVIEWING). Review, then confirm:`);
+                console.log(`   Next: scale define --confirm ${spec.id}\n`);
+            }
+            else {
+                console.log(`\n   Next: scale plan ${spec.id}\n`);
+            }
         }
     },
 });
+// Helper: Confirm a draft Spec (REVIEWING -> FROZEN) for the `define --confirm <id>` flow.
+async function confirmDraftSpec(store, fsm, specId, json) {
+    const spec = await store.get(specId);
+    if (!spec || spec.type !== 'Spec') {
+        console.error(`\nSpec not found: ${specId}\n`);
+        process.exit(1);
+    }
+    if (spec.status === 'FROZEN') {
+        if (!json)
+            console.log(`\nSpec ${specId} is already FROZEN.\n`);
+        else
+            console.log(JSON.stringify({ phase: 'DEFINE', confirm: true, spec, status: 'FROZEN', alreadyFrozen: true }, null, 2));
+        return;
+    }
+    const approveResult = await fsm.canTransition(specId, 'approve');
+    if (!approveResult.allowed) {
+        if (!json) {
+            console.error('\nFSM transition blocked: REVIEWING -> FROZEN');
+            console.error('   Spec cannot be confirmed due to:');
+            approveResult.blockedBy?.forEach(b => console.error(`   [GUARD] ${b.guard}: ${b.message}`));
+            console.error('\n   Resolve issues before confirming.');
+        }
+        process.exit(1);
+    }
+    await fsm.transition(specId, 'approve', { actor: { kind: 'human', userId: 'cli' } });
+    // Refresh persisted markdown status (draft was written as REVIEWING).
+    const specPath = join(SCALE_DIR, 'specs', `${specId}.md`);
+    if (existsSync(specPath)) {
+        writeFileSync(specPath, generateSpecMarkdown(specId, spec.title, spec.payload, 'FROZEN'));
+    }
+    const confirmed = await store.get(specId);
+    if (json) {
+        console.log(JSON.stringify({ phase: 'DEFINE', confirm: true, spec: confirmed, status: 'FROZEN' }, null, 2));
+    }
+    else {
+        console.log(`\nCONFIRM: ${specId}`);
+        console.log('   FSM: REVIEWING -> FROZEN ✓');
+        console.log(`\n   Next: scale plan ${specId}\n`);
+    }
+}
+// Helper: Resolve the originating Spec for a Task by walking Task -> Plan -> Spec.
+async function resolveSpecForTask(store, task) {
+    const planId = task?.parents?.[0];
+    if (!planId)
+        return undefined;
+    const plan = await store.get(planId);
+    const specId = plan?.parents?.[0];
+    if (!specId)
+        return undefined;
+    const spec = await store.get(specId);
+    if (!spec || spec.type !== 'Spec')
+        return undefined;
+    return { id: spec.id, payload: spec.payload };
+}
+// Helper: Collect free-form evidence signals (commands run, files, evidence refs/artifacts)
+// used to soft-map a Spec's verificationSurface during verify/ship (P0 Decision C1).
+async function gatherVerificationSignals(store, options) {
+    const signals = [];
+    for (const command of options.commands ?? [])
+        if (command)
+            signals.push(command);
+    for (const file of options.files ?? [])
+        if (file)
+            signals.push(file);
+    for (const id of options.evidenceIds ?? []) {
+        const record = await store.get(id);
+        if (!record || record.type !== 'Evidence')
+            continue;
+        const payload = record.payload;
+        if (payload.verificationSurfaceRef)
+            signals.push(payload.verificationSurfaceRef);
+        if (payload.toolUsed)
+            signals.push(payload.toolUsed);
+        if (payload.artifacts?.length)
+            signals.push(...payload.artifacts);
+    }
+    return signals;
+}
 // Helper: Generate plan markdown file
 function generatePlanMarkdown(id, specId, payload) {
     return `# Plan: ${id}
@@ -906,10 +1070,15 @@ export const phaseVerify = defineCommand({
         'skip-build': { type: 'boolean', default: false },
         'skip-lint': { type: 'boolean', default: false },
         'skip-test': { type: 'boolean', default: false },
+        progress: { type: 'boolean', default: false, description: 'Emit coarse verify progress events to stderr without changing JSON output' },
         json: { type: 'boolean', default: false },
     },
     async run({ args }) {
         const { store, fsm, workflowEngine } = getEngine();
+        const emitProgress = (event, detail) => {
+            if (isTruthyFlag(args.progress))
+                console.error(`[progress] ${event}: ${detail}`);
+        };
         // Validate task exists
         const task = await store.get(args['task-id']);
         if (!task || task.type !== 'Task') {
@@ -932,6 +1101,7 @@ export const phaseVerify = defineCommand({
             service: args.service,
             services: args.service ? undefined : taskServices,
         });
+        emitProgress('verify:start', `task=${args['task-id']} profile=${resolvedVerification.profileName} targets=${resolvedVerification.targets.length}`);
         if (!args.json) {
             for (const warning of resolvedVerification.warnings)
                 console.log(`   [WARN] ${warning}`);
@@ -947,6 +1117,7 @@ export const phaseVerify = defineCommand({
             if (!args.json && resolvedVerification.targets.length > 1) {
                 console.log(`\n   Target: ${target.service?.name ?? 'root'}`);
             }
+            emitProgress('target:start', target.service?.name ?? 'root');
             const targetResults = await workflowEngine.verify({
                 cwd: target.config.cwd,
                 build: args['build-cmd'] ?? target.config.build,
@@ -964,7 +1135,9 @@ export const phaseVerify = defineCommand({
                 tddStrict: isTruthyFlag(args['tdd-strict']),
             });
             gateResults.push(...targetResults);
+            emitProgress('target:done', `${target.service?.name ?? 'root'} gates=${targetResults.length}`);
         }
+        emitProgress('verify:gates-complete', `gates=${gateResults.length}`);
         // Step 2: Display gate results
         if (!args.json) {
             console.log('\nGate Results:');
@@ -1095,6 +1268,16 @@ export const phaseVerify = defineCommand({
         });
         const workflowOpenTaskBlockers = blockingWorkflowOpenTasks(workflowState.openTasks, args['task-id']);
         const workflowOpenTasksBlocked = workflowOpenTaskBlockers.length > 0;
+        // P0+ (decision E1): resolve the originating Spec up-front so the executional
+        // boundary / constraint checks can gate Task completion. Both reports are
+        // advisory under default/auto and blocking under full/ci/strict; the
+        // detection logic is identical, only the report mode and gating differ.
+        const spec = await resolveSpecForTask(store, task);
+        const boundaryEnforced = isEnforcedBoundaryProfile(resolvedVerification.profileName);
+        const boundaryEnforcement = evaluateBoundaries(taskFiles, spec?.payload.boundaries, boundaryEnforced);
+        const constraintCoverage = evaluateConstraints(spec?.payload.constraints, spec?.payload.verificationSurface, boundaryEnforced);
+        const boundaryBlocked = boundaryEnforced &&
+            countBoundaryBlockers(boundaryEnforcement, constraintCoverage) > 0;
         // Attempt FSM transition to COMPLETED
         // Guards: build_passed, lint_passed, tests_passed, open workflow tasks, and optional artifact policy.
         const codePassed = results.buildStatus === 'success' &&
@@ -1109,6 +1292,7 @@ export const phaseVerify = defineCommand({
             !skillInstallationBlocked &&
             !engineeringStandards.blocked &&
             !(toolEvidenceGate?.blocked ?? false) &&
+            !boundaryBlocked &&
             !workflowOpenTasksBlocked;
         let transitionResult = null;
         if (completionEligible) {
@@ -1147,6 +1331,9 @@ export const phaseVerify = defineCommand({
         else if (!args.json && toolEvidenceGate?.blocked) {
             console.log('\n   Tool evidence gate blocked completion - required tools need passed execution evidence');
         }
+        else if (!args.json && boundaryBlocked) {
+            console.log('\n   Boundary enforcement blocked completion - keep edits inside Spec boundaries and guard every constraint (enforced profile)');
+        }
         else if (!args.json && workflowOpenTasksBlocked) {
             console.log('\n   Workflow open tasks blocked completion - finish required workflow commands first');
         }
@@ -1203,7 +1390,7 @@ export const phaseVerify = defineCommand({
             toolEvidenceGatePassed: finalToolEvidenceGate ? !finalToolEvidenceGate.blocked : true,
         };
         await store.update(args['task-id'], { payload: finalPayload });
-        const metricGateStatus = (finalArtifactGate.blocked || finalSkillGate?.blocked || skillInstallationBlocked || engineeringStandards.blocked || finalToolEvidenceGate?.blocked || workflowOpenTasksBlocked)
+        const metricGateStatus = (finalArtifactGate.blocked || finalSkillGate?.blocked || skillInstallationBlocked || engineeringStandards.blocked || finalToolEvidenceGate?.blocked || boundaryBlocked || workflowOpenTasksBlocked)
             ? 'blocked'
             : undefined;
         const metricRecord = await recordVerificationMetric({
@@ -1215,6 +1402,24 @@ export const phaseVerify = defineCommand({
             artifactCheck,
             finalGateStatus: metricGateStatus,
         });
+        // P0 (Decision C1): soft-map the Spec's verificationSurface against evidence.
+        // Unmapped items are reported as warnings only — never blocking in P0.
+        // (`spec`, `boundaryEnforcement` and `constraintCoverage` were resolved
+        // above so the boundary checks could gate completion under enforced profiles.)
+        const verificationCommands = resolvedVerification.targets.flatMap(target => [
+            target.config.build, target.config.lint, target.config.test, target.config.coverage,
+        ]);
+        const surfaceSignals = await gatherVerificationSignals(store, {
+            evidenceIds: verificationEvidenceIds,
+            commands: [
+                ...verificationCommands,
+                args['build-cmd'], args['lint-cmd'], args['test-cmd'], args['coverage-cmd'],
+            ],
+            files: taskFiles,
+        });
+        const surfaceCoverage = spec?.payload.verificationSurface?.length
+            ? computeSurfaceCoverage(spec.payload.verificationSurface, surfaceSignals)
+            : undefined;
         const result = {
             phase: 'VERIFY',
             taskId: args['task-id'],
@@ -1240,12 +1445,23 @@ export const phaseVerify = defineCommand({
                 blocked: skillInstallationBlocked,
             },
             metric: metricRecord,
+            verificationSurfaceCoverage: surfaceCoverage,
+            boundaryEnforcement,
+            constraintCoverage,
             passed
         };
         if (args.json)
             console.log(JSON.stringify(result, null, 2));
         else {
             console.log(`\nVERIFY: ${passed ? 'PASSED' : 'FAILED'}`);
+            if (surfaceCoverage) {
+                for (const line of formatSurfaceCoverageWarnings(surfaceCoverage))
+                    console.log(`   ${line}`);
+            }
+            for (const line of formatBoundaryWarnings(boundaryEnforcement))
+                console.log(`   ${line}`);
+            for (const line of formatConstraintWarnings(constraintCoverage))
+                console.log(`   ${line}`);
             if (metricRecord)
                 console.log(`   Metrics: ${metricRecord.taskId} ${metricRecord.finalGateStatus} (fix iterations: ${metricRecord.fixIterations})`);
             if (artifactCheck && !artifactCheck.complete) {
@@ -1341,7 +1557,23 @@ async function reviewGitChanges(taskPayload) {
             diffs.push({ file: file.path, text: diff.stdout });
         }
     }
-    return analyzeReview({ statusOutput, diffs, taskPayload, verificationEvidence });
+    return { ...analyzeReview({ statusOutput, diffs, taskPayload, verificationEvidence }), diffs };
+}
+function normalizeReviewMode(value) {
+    return value === 'fresh-subagent' || value === 'hybrid' ? value : 'ai-self';
+}
+// Build a compact diff summary (file headers + added lines) for the advisory
+// LLM-as-Judge (P1.4). Capped so it never blows the model/context budget.
+function buildJudgeDiffSummary(diffs) {
+    const parts = [];
+    for (const diff of diffs) {
+        const added = diff.text
+            .split('\n')
+            .filter(line => line.startsWith('+') && !line.startsWith('+++'))
+            .map(line => line.slice(1));
+        parts.push(`# ${diff.file}\n${added.join('\n')}`);
+    }
+    return parts.join('\n\n').slice(0, 6000);
 }
 function collectReviewedFiles(records) {
     const reviewed = new Set();
@@ -1553,6 +1785,8 @@ export const phaseReview = defineCommand({
         'check-style': { type: 'boolean', default: true },
         format: { type: 'string', alias: 'f', description: 'Output format: html or md (default: html)' },
         brand: { type: 'string', description: 'Brand theme for HTML output (vercel/stripe/notion/linear/github)' },
+        judge: { type: 'boolean', default: true, description: 'Run the advisory LLM-as-Judge spec-conformance check (P1.4)' },
+        mode: { type: 'string', default: 'ai-self', description: 'Review mode: ai-self (default) | fresh-subagent | hybrid (P2.2)' },
         json: { type: 'boolean', default: false },
     },
     async run({ args }) {
@@ -1586,12 +1820,44 @@ export const phaseReview = defineCommand({
         const findings = review.findings;
         const summary = summarizeFindings(findings);
         const passed = summary.critical === 0 && summary.high === 0;
+        const reviewMode = normalizeReviewMode(args.mode);
+        // Resolve the originating Spec once; both the advisory judge (P1.4) and the
+        // fresh-context verifier (P2.2) read its outcome / verificationSurface.
+        const needsSpec = args.judge || reviewMode !== 'ai-self';
+        const spec = needsSpec ? await resolveSpecForTask(store, task) : undefined;
+        const diffSummary = needsSpec ? buildJudgeDiffSummary(review.diffs) : '';
+        // P1.4 (decision K1): advisory LLM-as-Judge. Never part of `passed`.
+        let judgeVerdict;
+        if (args.judge) {
+            const judge = new LlmJudge(new JsonLlmClient(), new JudgePromptStore(SCALE_DIR));
+            judgeVerdict = await judge.judge({
+                outcome: spec?.payload.what,
+                verificationSurface: spec?.payload.verificationSurface ?? [],
+                diffSummary,
+                reviewFindings: summary,
+            });
+        }
+        // P2.2 (decisions M1/N1/O1): fresh-context verifier runs only for
+        // fresh-subagent / hybrid modes, on isolated input (surface + diff + gate
+        // summary, no build-agent history). Advisory only — never blocks ship.
+        let freshVerifyVerdict;
+        if (reviewMode !== 'ai-self') {
+            freshVerifyVerdict = await new FreshContextVerifier(new JsonLlmClient()).verify({
+                outcome: spec?.payload.what,
+                verificationSurface: spec?.payload.verificationSurface ?? [],
+                diffSummary,
+                gateSummary: `critical=${summary.critical} high=${summary.high} medium=${summary.medium} low=${summary.low}`,
+            });
+        }
         const record = reviewStore.saveReview({
             taskId: args['task-id'],
             passed,
             findings,
             changedFiles: review.changedFiles.map(file => normalizeGitPath(file.path)),
             summary,
+            judge: judgeVerdict,
+            reviewMode,
+            freshVerify: freshVerifyVerdict,
         });
         if (task && taskPayload) {
             const updatedPayload = {
@@ -1638,6 +1904,9 @@ export const phaseReview = defineCommand({
             findings,
             changedFiles: review.changedFiles.map(file => normalizeGitPath(file.path)),
             summary,
+            judge: judgeVerdict,
+            reviewMode,
+            freshVerify: freshVerifyVerdict,
             karpathy: karpathyReport,
             passed,
             format: reviewOutputFormat,
@@ -1660,6 +1929,18 @@ export const phaseReview = defineCommand({
             console.log(`LOW:      ${summary.low} issues`);
             console.log('----------------------------------------');
             findings.slice(0, 10).forEach(f => console.log(`  [${f.severity}] ${f.file ? `${f.file}: ` : ''}${f.description}`));
+            if (judgeVerdict) {
+                console.log(`\nJudge (advisory, ${judgeVerdict.modelUsed}): ${judgeVerdict.decision.toUpperCase()} (confidence ${judgeVerdict.confidence.toFixed(2)})`);
+                console.log(`  ${judgeVerdict.rationale}`);
+                if (judgeVerdict.unmetSurfaces.length)
+                    console.log(`  Unmet surfaces: ${judgeVerdict.unmetSurfaces.join('; ')}`);
+            }
+            if (freshVerifyVerdict) {
+                console.log(`\nFresh-context verifier (advisory, ${freshVerifyVerdict.modelUsed}): ${freshVerifyVerdict.decision.toUpperCase()} (confidence ${freshVerifyVerdict.confidence.toFixed(2)})`);
+                console.log(`  ${freshVerifyVerdict.rationale}`);
+                if (freshVerifyVerdict.unmetSurfaces.length)
+                    console.log(`  Unmet surfaces: ${freshVerifyVerdict.unmetSurfaces.join('; ')}`);
+            }
             if (passed) {
                 console.log('\nReview passed (no CRITICAL issues)');
                 console.log('\n   Next: scale ship ' + (args['task-id'] ?? '<task-id>') + '\n');
@@ -1808,6 +2089,15 @@ export const phaseShip = defineCommand({
                 console.error("Warning: Plan completion transition failed:", e.message);
             }
         }
+        // P0 (Decision C1): soft-map the Spec's verificationSurface at ship time too.
+        const shipSpec = await resolveSpecForTask(store, task);
+        const shipSignals = await gatherVerificationSignals(store, {
+            evidenceIds: payload.verificationEvidenceIds,
+            files: payload.filesInvolved,
+        });
+        const shipSurfaceCoverage = shipSpec?.payload.verificationSurface?.length
+            ? computeSurfaceCoverage(shipSpec.payload.verificationSurface, shipSignals)
+            : undefined;
         // === WorkflowEngine Integration ===
         // Generate HonestDelivery report
         if (!args.json) {
@@ -1844,6 +2134,10 @@ export const phaseShip = defineCommand({
                 unverifiedItems.forEach(item => console.log(`  [UNVERIFIED] ${item}`));
                 console.log('');
             }
+            if (shipSurfaceCoverage) {
+                for (const line of formatSurfaceCoverageWarnings(shipSurfaceCoverage))
+                    console.log(line);
+            }
         }
         const result = {
             phase: 'SHIP',
@@ -1863,6 +2157,7 @@ export const phaseShip = defineCommand({
                 blockers: workspaceBoundary.blockers,
                 warnings: workspaceBoundary.warnings,
             } : null,
+            verificationSurfaceCoverage: shipSurfaceCoverage,
         };
         if (args.json)
             console.log(JSON.stringify(result, null, 2));