@hongmaple0820/scale-engine 0.47.0 → 0.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/README.en.md +8 -2
  2. package/README.md +9 -3
  3. package/dist/agents/evidenceDiscipline.d.ts +7 -0
  4. package/dist/agents/evidenceDiscipline.js +21 -0
  5. package/dist/agents/evidenceDiscipline.js.map +1 -0
  6. package/dist/agents/profiles.js +8 -1
  7. package/dist/agents/profiles.js.map +1 -1
  8. package/dist/agents/types.d.ts +1 -0
  9. package/dist/api/cli.js +975 -6222
  10. package/dist/api/cli.js.map +1 -1
  11. package/dist/artifact/types.d.ts +59 -0
  12. package/dist/artifact/types.js.map +1 -1
  13. package/dist/cli/artifactCrudCommands.d.ts +67 -0
  14. package/dist/cli/artifactCrudCommands.js +182 -0
  15. package/dist/cli/artifactCrudCommands.js.map +1 -0
  16. package/dist/cli/codegraphCommands.d.ts +1 -0
  17. package/dist/cli/codegraphCommands.js +241 -0
  18. package/dist/cli/codegraphCommands.js.map +1 -0
  19. package/dist/cli/contextCommands.d.ts +1 -0
  20. package/dist/cli/contextCommands.js +415 -0
  21. package/dist/cli/contextCommands.js.map +1 -0
  22. package/dist/cli/cortexCommands.d.ts +36 -0
  23. package/dist/cli/cortexCommands.js +76 -4
  24. package/dist/cli/cortexCommands.js.map +1 -1
  25. package/dist/cli/dependencyTddCommands.d.ts +92 -0
  26. package/dist/cli/dependencyTddCommands.js +174 -0
  27. package/dist/cli/dependencyTddCommands.js.map +1 -0
  28. package/dist/cli/diagnoseHuntCommands.d.ts +135 -0
  29. package/dist/cli/diagnoseHuntCommands.js +224 -0
  30. package/dist/cli/diagnoseHuntCommands.js.map +1 -0
  31. package/dist/cli/engineBootstrap.d.ts +39 -0
  32. package/dist/cli/engineBootstrap.js +129 -0
  33. package/dist/cli/engineBootstrap.js.map +1 -0
  34. package/dist/cli/evalCommands.d.ts +1 -0
  35. package/dist/cli/evalCommands.js +273 -0
  36. package/dist/cli/evalCommands.js.map +1 -0
  37. package/dist/cli/evolveDoctorCommands.d.ts +18 -0
  38. package/dist/cli/evolveDoctorCommands.js +59 -0
  39. package/dist/cli/evolveDoctorCommands.js.map +1 -0
  40. package/dist/cli/gateInlineCommands.d.ts +43 -0
  41. package/dist/cli/gateInlineCommands.js +74 -0
  42. package/dist/cli/gateInlineCommands.js.map +1 -0
  43. package/dist/cli/initConfigCommands.d.ts +138 -0
  44. package/dist/cli/initConfigCommands.js +602 -0
  45. package/dist/cli/initConfigCommands.js.map +1 -0
  46. package/dist/cli/metaGovernanceCommands.d.ts +11 -0
  47. package/dist/cli/metaGovernanceCommands.js +55 -0
  48. package/dist/cli/metaGovernanceCommands.js.map +1 -0
  49. package/dist/cli/phaseCommands.d.ts +53 -1
  50. package/dist/cli/phaseCommands.js +317 -22
  51. package/dist/cli/phaseCommands.js.map +1 -1
  52. package/dist/cli/runtimeSkillCommands.d.ts +6 -0
  53. package/dist/cli/runtimeSkillCommands.js +1515 -0
  54. package/dist/cli/runtimeSkillCommands.js.map +1 -0
  55. package/dist/cli/sessionCommands.d.ts +17 -0
  56. package/dist/cli/sessionCommands.js +38 -0
  57. package/dist/cli/sessionCommands.js.map +1 -0
  58. package/dist/cli/toolAgentCommands.d.ts +3 -0
  59. package/dist/cli/toolAgentCommands.js +441 -0
  60. package/dist/cli/toolAgentCommands.js.map +1 -0
  61. package/dist/cli/transitionCommands.d.ts +62 -0
  62. package/dist/cli/transitionCommands.js +174 -0
  63. package/dist/cli/transitionCommands.js.map +1 -0
  64. package/dist/cli/upgradeAssetsCommands.d.ts +44 -0
  65. package/dist/cli/upgradeAssetsCommands.js +933 -0
  66. package/dist/cli/upgradeAssetsCommands.js.map +1 -0
  67. package/dist/cli/workflowEvidenceCommands.d.ts +34 -0
  68. package/dist/cli/workflowEvidenceCommands.js +130 -0
  69. package/dist/cli/workflowEvidenceCommands.js.map +1 -0
  70. package/dist/cortex/InstinctStore.d.ts +32 -1
  71. package/dist/cortex/InstinctStore.js +235 -42
  72. package/dist/cortex/InstinctStore.js.map +1 -1
  73. package/dist/cortex/InstinctValidation.d.ts +9 -0
  74. package/dist/cortex/InstinctValidation.js +55 -0
  75. package/dist/cortex/InstinctValidation.js.map +1 -0
  76. package/dist/cortex/SessionInjector.js +13 -6
  77. package/dist/cortex/SessionInjector.js.map +1 -1
  78. package/dist/eval/BenchmarkPublisher.d.ts +2 -0
  79. package/dist/eval/BenchmarkPublisher.js +43 -0
  80. package/dist/eval/BenchmarkPublisher.js.map +1 -1
  81. package/dist/guardrails/ast/confirmers.d.ts +18 -0
  82. package/dist/guardrails/ast/confirmers.js +69 -0
  83. package/dist/guardrails/ast/confirmers.js.map +1 -0
  84. package/dist/guardrails/ast/parse.d.ts +20 -0
  85. package/dist/guardrails/ast/parse.js +51 -0
  86. package/dist/guardrails/ast/parse.js.map +1 -0
  87. package/dist/output/HTMLDocumentRenderer.d.ts +9 -0
  88. package/dist/output/HTMLDocumentRenderer.js +19 -0
  89. package/dist/output/HTMLDocumentRenderer.js.map +1 -1
  90. package/dist/review/FreshContextVerifier.d.ts +35 -0
  91. package/dist/review/FreshContextVerifier.js +120 -0
  92. package/dist/review/FreshContextVerifier.js.map +1 -0
  93. package/dist/review/JsonLlmClient.d.ts +37 -0
  94. package/dist/review/JsonLlmClient.js +94 -0
  95. package/dist/review/JsonLlmClient.js.map +1 -0
  96. package/dist/review/LlmJudge.d.ts +61 -0
  97. package/dist/review/LlmJudge.js +167 -0
  98. package/dist/review/LlmJudge.js.map +1 -0
  99. package/dist/version.d.ts +1 -1
  100. package/dist/version.js +1 -1
  101. package/dist/workflow/BoundaryEnforcement.d.ts +60 -0
  102. package/dist/workflow/BoundaryEnforcement.js +182 -0
  103. package/dist/workflow/BoundaryEnforcement.js.map +1 -0
  104. package/dist/workflow/EngineeringStandards.js +19 -9
  105. package/dist/workflow/EngineeringStandards.js.map +1 -1
  106. package/dist/workflow/GateCatalog.js +12 -2
  107. package/dist/workflow/GateCatalog.js.map +1 -1
  108. package/dist/workflow/ProfileEnforcement.d.ts +7 -0
  109. package/dist/workflow/ProfileEnforcement.js +12 -0
  110. package/dist/workflow/ProfileEnforcement.js.map +1 -0
  111. package/dist/workflow/ReviewStore.d.ts +10 -0
  112. package/dist/workflow/ReviewStore.js.map +1 -1
  113. package/dist/workflow/SurfaceCoverage.d.ts +19 -0
  114. package/dist/workflow/SurfaceCoverage.js +57 -0
  115. package/dist/workflow/SurfaceCoverage.js.map +1 -0
  116. package/dist/workflow/gates/EnhancedGates.js +2 -0
  117. package/dist/workflow/gates/EnhancedGates.js.map +1 -1
  118. package/dist/workflow/gates/TestIntegrityGate.d.ts +51 -0
  119. package/dist/workflow/gates/TestIntegrityGate.js +175 -0
  120. package/dist/workflow/gates/TestIntegrityGate.js.map +1 -0
  121. package/dist/workflow/types.d.ts +1 -1
  122. package/docs/guides/DEVELOPMENT_WORKFLOW.md +28 -0
  123. package/docs/workflow/E2E_EXAMPLE.md +133 -0
  124. package/docs/workflow/README.md +6 -0
  125. package/docs/workflow/TEMPLATE_GUIDE.md +162 -0
  126. package/docs/workflow/templates/plan.md +26 -0
  127. package/docs/workflow/templates/spec.md +28 -0
  128. package/package.json +3 -1
@@ -18,6 +18,9 @@ import { WorkflowArtifactWriter } from '../workflow/WorkflowArtifactWriter.js';
18
18
  import { resolveVerificationTargets } from '../workflow/VerificationProfile.js';
19
19
  import { EvidenceStore } from '../workflow/EvidenceStore.js';
20
20
  import { ReviewStore } from '../workflow/ReviewStore.js';
21
+ import { JudgePromptStore, LlmJudge } from '../review/LlmJudge.js';
22
+ import { JsonLlmClient } from '../review/JsonLlmClient.js';
23
+ import { FreshContextVerifier } from '../review/FreshContextVerifier.js';
21
24
  import { TaskMetricsStore } from '../workflow/TaskMetricsStore.js';
22
25
  import { appendVerificationArtifact, checkTaskArtifactCompleteness, scaffoldTaskArtifacts } from '../workflow/TaskArtifactScaffolder.js';
23
26
  import { createWorkflowGuidance, renderWorkflowGuidance } from '../workflow/WorkflowGuidance.js';
@@ -33,6 +36,8 @@ import { loadToolPolicy } from '../tools/ToolPolicy.js';
33
36
  import { runSafeCommand } from '../tools/SafeCommandRunner.js';
34
37
  import { join } from 'node:path';
35
38
  import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync } from 'node:fs';
39
+ import { computeSurfaceCoverage, formatSurfaceCoverageWarnings } from '../workflow/SurfaceCoverage.js';
40
+ import { evaluateBoundaries, evaluateConstraints, formatBoundaryWarnings, formatConstraintWarnings, isEnforcedBoundaryProfile, countBoundaryBlockers, } from '../workflow/BoundaryEnforcement.js';
36
41
  import { HTMLDocumentRenderer } from '../output/HTMLDocumentRenderer.js';
37
42
  import { SCALE_ENGINE_VERSION } from '../version.js';
38
43
  import { optimizeCodingPrompt } from '../prompts/PromptOptimizer.js';
@@ -322,11 +327,11 @@ async function recordVerificationMetric(options) {
322
327
  return record;
323
328
  }
324
329
  // Helper: Generate spec markdown file
325
- function generateSpecMarkdown(id, title, payload) {
330
+ function generateSpecMarkdown(id, title, payload, status = 'FROZEN') {
326
331
  return `# Spec: ${title}
327
332
 
328
333
  **ID**: ${id}
329
- **Status**: FROZEN
334
+ **Status**: ${status}
330
335
  **Ambiguity Score**: ${payload.ambiguityScore ?? 0.15}
331
336
 
332
337
  ## What
@@ -343,11 +348,34 @@ ${payload.edgeCases.map(e => `- ${e}`).join('\n') || '(none defined)'}
343
348
 
344
349
  ## North Star
345
350
  ${payload.northStar || 'User value delivered'}
346
-
351
+ ${renderSpecContractSections(payload)}
347
352
  ---
348
353
  *Generated by SCALE Engine DEFINE phase*
349
354
  `;
350
355
  }
356
+ // Helper: Render the optional P0 six-element contract sections.
357
+ // Each section is omitted when its field is unset, keeping legacy specs unchanged.
358
+ function renderSpecContractSections(payload) {
359
+ const sections = [];
360
+ if (payload.verificationSurface?.length) {
361
+ sections.push(`\n## Verification Surface\n${payload.verificationSurface.map(s => `- ${s}`).join('\n')}`);
362
+ }
363
+ if (payload.constraints?.length) {
364
+ sections.push(`\n## Constraints\n${payload.constraints.map(c => `- ${c}`).join('\n')}`);
365
+ }
366
+ if (payload.boundaries) {
367
+ const b = payload.boundaries;
368
+ const line = (label, items) => `- ${label}: ${items.length ? items.join(', ') : '(none)'}`;
369
+ sections.push(`\n## Boundaries\n${line('Files', b.files)}\n${line('Tools', b.tools)}\n${line('Forbidden', b.forbidden)}`);
370
+ }
371
+ if (payload.iterationStrategy) {
372
+ sections.push(`\n## Iteration Strategy\n${payload.iterationStrategy}`);
373
+ }
374
+ if (payload.blockedStopCondition) {
375
+ sections.push(`\n## Blocked Stop Condition\n${payload.blockedStopCondition}`);
376
+ }
377
+ return sections.length ? `\n${sections.join('\n')}\n` : '\n';
378
+ }
351
379
  // Helper: Calculate ambiguity score
352
380
  function calculateAmbiguityScore(description, successCriteria) {
353
381
  let score = 0.2; // Base score (maximum threshold)
@@ -364,9 +392,20 @@ function calculateAmbiguityScore(description, successCriteria) {
364
392
  export const phaseDefine = defineCommand({
365
393
  meta: { name: 'define', description: 'DEFINE: Create Spec with AmbiguityScorer + SocraticQuestioner (/spec)' },
366
394
  args: {
367
- title: { type: 'positional', required: true },
395
+ title: { type: 'positional', required: false },
368
396
  description: { type: 'string', alias: 'd' },
369
397
  'success-criteria': { type: 'string', alias: 'c', description: 'Comma-separated criteria' },
398
+ // P0 draft/confirm two-step lifecycle (backward compatible: bare `define` still auto-freezes)
399
+ draft: { type: 'boolean', default: false, description: 'Stop the new Spec at REVIEWING (requires `define --confirm <id>` to FROZEN)' },
400
+ confirm: { type: 'string', description: 'Confirm and freeze an existing draft Spec id (REVIEWING -> FROZEN)' },
401
+ // P0 six-element contract inputs (optional, comma-separated where plural)
402
+ 'verification-surface': { type: 'string', description: 'Comma-separated evidence sources: test names / benchmark commands / artifact paths' },
403
+ 'constraints': { type: 'string', description: 'Comma-separated invariants that must not regress (perf/security/compat)' },
404
+ 'boundary-files': { type: 'string', description: 'Comma-separated files allowed to change' },
405
+ 'boundary-tools': { type: 'string', description: 'Comma-separated tools allowed to use' },
406
+ 'boundary-forbidden': { type: 'string', description: 'Comma-separated scope that must not be touched' },
407
+ 'iteration-strategy': { type: 'string', description: 'How each build iteration decides the next step' },
408
+ 'blocked-stop': { type: 'string', description: 'What to report / what is needed to unblock when no path is viable' },
370
409
  // Socratic refinement answers (optional)
371
410
  'goal': { type: 'string', description: 'Goal answer for Socratic refinement' },
372
411
  'constraint': { type: 'string', description: 'Constraint answer for Socratic refinement' },
@@ -380,6 +419,15 @@ export const phaseDefine = defineCommand({
380
419
  },
381
420
  async run({ args }) {
382
421
  const { store, fsm, workflowEngine } = getEngine();
422
+ // P0: --confirm freezes an existing draft Spec (REVIEWING -> FROZEN) without re-creating it.
423
+ if (args.confirm) {
424
+ await confirmDraftSpec(store, fsm, String(args.confirm), Boolean(args.json));
425
+ return;
426
+ }
427
+ if (!args.title) {
428
+ console.error('\nMissing required argument: title (or pass --confirm <spec-id> to freeze a draft)\n');
429
+ process.exit(1);
430
+ }
383
431
  const rawDesc = String(args.description ?? args.title);
384
432
  // Parse success criteria
385
433
  const successCriteria = args['success-criteria']
@@ -467,6 +515,17 @@ export const phaseDefine = defineCommand({
467
515
  initialStatus: 'DRAFT',
468
516
  createdBy: { kind: 'human', userId: 'cli' },
469
517
  });
518
+ // P0 six-element contract inputs (optional; omitted fields stay undefined)
519
+ const csv = (v) => {
520
+ const items = typeof v === 'string' ? v.split(',').map(s => s.trim()).filter(Boolean) : [];
521
+ return items.length ? items : undefined;
522
+ };
523
+ const boundaryFiles = csv(args['boundary-files']);
524
+ const boundaryTools = csv(args['boundary-tools']);
525
+ const boundaryForbidden = csv(args['boundary-forbidden']);
526
+ const boundaries = (boundaryFiles || boundaryTools || boundaryForbidden)
527
+ ? { files: boundaryFiles ?? [], tools: boundaryTools ?? [], forbidden: boundaryForbidden ?? [] }
528
+ : undefined;
470
529
  // Create Spec artifact with proper payload (use refined requirement if available)
471
530
  const specPayload = {
472
531
  what: refinedRequirement,
@@ -475,6 +534,11 @@ export const phaseDefine = defineCommand({
475
534
  edgeCases: [],
476
535
  northStar: 'Deliver user value',
477
536
  ambiguityScore,
537
+ verificationSurface: csv(args['verification-surface']),
538
+ constraints: csv(args['constraints']),
539
+ boundaries,
540
+ iterationStrategy: typeof args['iteration-strategy'] === 'string' && args['iteration-strategy'] ? String(args['iteration-strategy']) : undefined,
541
+ blockedStopCondition: typeof args['blocked-stop'] === 'string' && args['blocked-stop'] ? String(args['blocked-stop']) : undefined,
478
542
  };
479
543
  const spec = await store.create({
480
544
  type: 'Spec', title: args.title,
@@ -483,11 +547,14 @@ export const phaseDefine = defineCommand({
483
547
  initialStatus: 'DRAFT',
484
548
  createdBy: { kind: 'human', userId: 'cli' },
485
549
  });
550
+ // Draft mode stops at REVIEWING; default mode auto-freezes (FROZEN).
551
+ const isDraft = Boolean(args.draft);
552
+ const finalStatus = isDraft ? 'REVIEWING' : 'FROZEN';
486
553
  // Generate spec markdown file
487
554
  const specsDir = join(SCALE_DIR, 'specs');
488
555
  ensureDir(specsDir);
489
556
  const specPath = join(specsDir, `${spec.id}.md`);
490
- writeFileSync(specPath, generateSpecMarkdown(spec.id, args.title, specPayload));
557
+ writeFileSync(specPath, generateSpecMarkdown(spec.id, args.title, specPayload, finalStatus));
491
558
  // Generate spec HTML file (default format: html)
492
559
  const outputFormat = args.format ?? 'md';
493
560
  let specHtmlPath;
@@ -496,7 +563,7 @@ export const phaseDefine = defineCommand({
496
563
  title: args.title,
497
564
  brand: args.brand,
498
565
  version: SCALE_ENGINE_VERSION,
499
- status: 'FROZEN',
566
+ status: finalStatus,
500
567
  });
501
568
  const html = renderer.renderSpec({
502
569
  id: spec.id,
@@ -507,11 +574,16 @@ export const phaseDefine = defineCommand({
507
574
  edgeCases: specPayload.edgeCases,
508
575
  northStar: specPayload.northStar,
509
576
  ambiguityScore,
577
+ verificationSurface: specPayload.verificationSurface,
578
+ constraints: specPayload.constraints,
579
+ boundaries: specPayload.boundaries,
580
+ iterationStrategy: specPayload.iterationStrategy,
581
+ blockedStopCondition: specPayload.blockedStopCondition,
510
582
  });
511
583
  specHtmlPath = join(specsDir, `${spec.id}.html`);
512
584
  renderer.writeToFile(html, specHtmlPath);
513
585
  }
514
- // FSM transitions: DRAFT -> REVIEWING -> FROZEN
586
+ // FSM transitions: DRAFT -> REVIEWING (-> FROZEN unless --draft)
515
587
  // Phase 1: refine (DRAFT -> REVIEWING) - no guards
516
588
  const refineResult = await fsm.canTransition(spec.id, 'refine');
517
589
  if (!refineResult.allowed) {
@@ -522,22 +594,30 @@ export const phaseDefine = defineCommand({
522
594
  process.exit(1);
523
595
  }
524
596
  await fsm.transition(spec.id, 'refine', { actor: { kind: 'system', component: 'phase-define' } });
525
- // Phase 2: approve (REVIEWING -> FROZEN) - guards: ambiguityScore <= 0.2, has successCriteria
526
- const approveResult = await fsm.canTransition(spec.id, 'approve');
527
- if (!approveResult.allowed) {
597
+ // Phase 2: approve (REVIEWING -> FROZEN) - guards: ambiguityScore <= 0.2, has successCriteria.
598
+ // Skipped in --draft mode: the draft waits for `scale define --confirm <id>`.
599
+ if (!isDraft) {
600
+ const approveResult = await fsm.canTransition(spec.id, 'approve');
601
+ if (!approveResult.allowed) {
602
+ if (!args.json) {
603
+ console.error('\nFSM transition blocked: REVIEWING -> FROZEN');
604
+ console.error(' Spec cannot be frozen due to:');
605
+ approveResult.blockedBy?.forEach(b => console.error(` [GUARD] ${b.guard}: ${b.message}`));
606
+ console.error('\n Resolve issues before proceeding.');
607
+ }
608
+ process.exit(1);
609
+ }
610
+ await fsm.transition(spec.id, 'approve', { actor: { kind: 'system', component: 'phase-define' } });
528
611
  if (!args.json) {
529
- console.error('\nFSM transition blocked: REVIEWING -> FROZEN');
530
- console.error(' Spec cannot be frozen due to:');
531
- approveResult.blockedBy?.forEach(b => console.error(` [GUARD] ${b.guard}: ${b.message}`));
532
- console.error('\n Resolve issues before proceeding.');
612
+ console.log(' FSM: DRAFT -> REVIEWING -> FROZEN');
533
613
  }
534
- process.exit(1);
535
614
  }
536
- await fsm.transition(spec.id, 'approve', { actor: { kind: 'system', component: 'phase-define' } });
537
- if (!args.json) {
538
- console.log(' FSM: DRAFT -> REVIEWING -> FROZEN ✓');
615
+ else if (!args.json) {
616
+ console.log(' FSM: DRAFT -> REVIEWING (draft; not yet FROZEN)');
539
617
  }
540
- const result = { phase: 'DEFINE', spec, specPath, specHtmlPath, ambiguityScore, successCriteria, format: outputFormat, promptOptimization };
618
+ // Refresh the spec so the reported status reflects the post-transition state.
619
+ const finalSpec = (await store.get(spec.id)) ?? spec;
620
+ const result = { phase: 'DEFINE', spec: finalSpec, specPath, specHtmlPath, ambiguityScore, successCriteria, format: outputFormat, promptOptimization, status: finalStatus, draft: isDraft };
541
621
  // Write explore artifact for Gate G1 verification
542
622
  const artifactWriter = new WorkflowArtifactWriter(SCALE_DIR);
543
623
  artifactWriter.writeExploreResult({
@@ -557,10 +637,94 @@ export const phaseDefine = defineCommand({
557
637
  console.log(` HTML file: ${specHtmlPath}`);
558
638
  console.log(` Ambiguity score: ${ambiguityScore.toFixed(2)}`);
559
639
  console.log(` Success criteria: ${successCriteria.length}`);
560
- console.log(`\n Next: scale plan ${spec.id}\n`);
640
+ if (isDraft) {
641
+ console.log(`\n Draft created (REVIEWING). Review, then confirm:`);
642
+ console.log(` Next: scale define --confirm ${spec.id}\n`);
643
+ }
644
+ else {
645
+ console.log(`\n Next: scale plan ${spec.id}\n`);
646
+ }
561
647
  }
562
648
  },
563
649
  });
650
+ // Helper: Confirm a draft Spec (REVIEWING -> FROZEN) for the `define --confirm <id>` flow.
651
+ async function confirmDraftSpec(store, fsm, specId, json) {
652
+ const spec = await store.get(specId);
653
+ if (!spec || spec.type !== 'Spec') {
654
+ console.error(`\nSpec not found: ${specId}\n`);
655
+ process.exit(1);
656
+ }
657
+ if (spec.status === 'FROZEN') {
658
+ if (!json)
659
+ console.log(`\nSpec ${specId} is already FROZEN.\n`);
660
+ else
661
+ console.log(JSON.stringify({ phase: 'DEFINE', confirm: true, spec, status: 'FROZEN', alreadyFrozen: true }, null, 2));
662
+ return;
663
+ }
664
+ const approveResult = await fsm.canTransition(specId, 'approve');
665
+ if (!approveResult.allowed) {
666
+ if (!json) {
667
+ console.error('\nFSM transition blocked: REVIEWING -> FROZEN');
668
+ console.error(' Spec cannot be confirmed due to:');
669
+ approveResult.blockedBy?.forEach(b => console.error(` [GUARD] ${b.guard}: ${b.message}`));
670
+ console.error('\n Resolve issues before confirming.');
671
+ }
672
+ process.exit(1);
673
+ }
674
+ await fsm.transition(specId, 'approve', { actor: { kind: 'human', userId: 'cli' } });
675
+ // Refresh persisted markdown status (draft was written as REVIEWING).
676
+ const specPath = join(SCALE_DIR, 'specs', `${specId}.md`);
677
+ if (existsSync(specPath)) {
678
+ writeFileSync(specPath, generateSpecMarkdown(specId, spec.title, spec.payload, 'FROZEN'));
679
+ }
680
+ const confirmed = await store.get(specId);
681
+ if (json) {
682
+ console.log(JSON.stringify({ phase: 'DEFINE', confirm: true, spec: confirmed, status: 'FROZEN' }, null, 2));
683
+ }
684
+ else {
685
+ console.log(`\nCONFIRM: ${specId}`);
686
+ console.log(' FSM: REVIEWING -> FROZEN ✓');
687
+ console.log(`\n Next: scale plan ${specId}\n`);
688
+ }
689
+ }
690
+ // Helper: Resolve the originating Spec for a Task by walking Task -> Plan -> Spec.
691
+ async function resolveSpecForTask(store, task) {
692
+ const planId = task?.parents?.[0];
693
+ if (!planId)
694
+ return undefined;
695
+ const plan = await store.get(planId);
696
+ const specId = plan?.parents?.[0];
697
+ if (!specId)
698
+ return undefined;
699
+ const spec = await store.get(specId);
700
+ if (!spec || spec.type !== 'Spec')
701
+ return undefined;
702
+ return { id: spec.id, payload: spec.payload };
703
+ }
704
+ // Helper: Collect free-form evidence signals (commands run, files, evidence refs/artifacts)
705
+ // used to soft-map a Spec's verificationSurface during verify/ship (P0 Decision C1).
706
+ async function gatherVerificationSignals(store, options) {
707
+ const signals = [];
708
+ for (const command of options.commands ?? [])
709
+ if (command)
710
+ signals.push(command);
711
+ for (const file of options.files ?? [])
712
+ if (file)
713
+ signals.push(file);
714
+ for (const id of options.evidenceIds ?? []) {
715
+ const record = await store.get(id);
716
+ if (!record || record.type !== 'Evidence')
717
+ continue;
718
+ const payload = record.payload;
719
+ if (payload.verificationSurfaceRef)
720
+ signals.push(payload.verificationSurfaceRef);
721
+ if (payload.toolUsed)
722
+ signals.push(payload.toolUsed);
723
+ if (payload.artifacts?.length)
724
+ signals.push(...payload.artifacts);
725
+ }
726
+ return signals;
727
+ }
564
728
  // Helper: Generate plan markdown file
565
729
  function generatePlanMarkdown(id, specId, payload) {
566
730
  return `# Plan: ${id}
@@ -906,10 +1070,15 @@ export const phaseVerify = defineCommand({
906
1070
  'skip-build': { type: 'boolean', default: false },
907
1071
  'skip-lint': { type: 'boolean', default: false },
908
1072
  'skip-test': { type: 'boolean', default: false },
1073
+ progress: { type: 'boolean', default: false, description: 'Emit coarse verify progress events to stderr without changing JSON output' },
909
1074
  json: { type: 'boolean', default: false },
910
1075
  },
911
1076
  async run({ args }) {
912
1077
  const { store, fsm, workflowEngine } = getEngine();
1078
+ const emitProgress = (event, detail) => {
1079
+ if (isTruthyFlag(args.progress))
1080
+ console.error(`[progress] ${event}: ${detail}`);
1081
+ };
913
1082
  // Validate task exists
914
1083
  const task = await store.get(args['task-id']);
915
1084
  if (!task || task.type !== 'Task') {
@@ -932,6 +1101,7 @@ export const phaseVerify = defineCommand({
932
1101
  service: args.service,
933
1102
  services: args.service ? undefined : taskServices,
934
1103
  });
1104
+ emitProgress('verify:start', `task=${args['task-id']} profile=${resolvedVerification.profileName} targets=${resolvedVerification.targets.length}`);
935
1105
  if (!args.json) {
936
1106
  for (const warning of resolvedVerification.warnings)
937
1107
  console.log(` [WARN] ${warning}`);
@@ -947,6 +1117,7 @@ export const phaseVerify = defineCommand({
947
1117
  if (!args.json && resolvedVerification.targets.length > 1) {
948
1118
  console.log(`\n Target: ${target.service?.name ?? 'root'}`);
949
1119
  }
1120
+ emitProgress('target:start', target.service?.name ?? 'root');
950
1121
  const targetResults = await workflowEngine.verify({
951
1122
  cwd: target.config.cwd,
952
1123
  build: args['build-cmd'] ?? target.config.build,
@@ -964,7 +1135,9 @@ export const phaseVerify = defineCommand({
964
1135
  tddStrict: isTruthyFlag(args['tdd-strict']),
965
1136
  });
966
1137
  gateResults.push(...targetResults);
1138
+ emitProgress('target:done', `${target.service?.name ?? 'root'} gates=${targetResults.length}`);
967
1139
  }
1140
+ emitProgress('verify:gates-complete', `gates=${gateResults.length}`);
968
1141
  // Step 2: Display gate results
969
1142
  if (!args.json) {
970
1143
  console.log('\nGate Results:');
@@ -1095,6 +1268,16 @@ export const phaseVerify = defineCommand({
1095
1268
  });
1096
1269
  const workflowOpenTaskBlockers = blockingWorkflowOpenTasks(workflowState.openTasks, args['task-id']);
1097
1270
  const workflowOpenTasksBlocked = workflowOpenTaskBlockers.length > 0;
1271
+ // P0+ (decision E1): resolve the originating Spec up-front so the executional
1272
+ // boundary / constraint checks can gate Task completion. Both reports are
1273
+ // advisory under default/auto and blocking under full/ci/strict; the
1274
+ // detection logic is identical, only the report mode and gating differ.
1275
+ const spec = await resolveSpecForTask(store, task);
1276
+ const boundaryEnforced = isEnforcedBoundaryProfile(resolvedVerification.profileName);
1277
+ const boundaryEnforcement = evaluateBoundaries(taskFiles, spec?.payload.boundaries, boundaryEnforced);
1278
+ const constraintCoverage = evaluateConstraints(spec?.payload.constraints, spec?.payload.verificationSurface, boundaryEnforced);
1279
+ const boundaryBlocked = boundaryEnforced &&
1280
+ countBoundaryBlockers(boundaryEnforcement, constraintCoverage) > 0;
1098
1281
  // Attempt FSM transition to COMPLETED
1099
1282
  // Guards: build_passed, lint_passed, tests_passed, open workflow tasks, and optional artifact policy.
1100
1283
  const codePassed = results.buildStatus === 'success' &&
@@ -1109,6 +1292,7 @@ export const phaseVerify = defineCommand({
1109
1292
  !skillInstallationBlocked &&
1110
1293
  !engineeringStandards.blocked &&
1111
1294
  !(toolEvidenceGate?.blocked ?? false) &&
1295
+ !boundaryBlocked &&
1112
1296
  !workflowOpenTasksBlocked;
1113
1297
  let transitionResult = null;
1114
1298
  if (completionEligible) {
@@ -1147,6 +1331,9 @@ export const phaseVerify = defineCommand({
1147
1331
  else if (!args.json && toolEvidenceGate?.blocked) {
1148
1332
  console.log('\n Tool evidence gate blocked completion - required tools need passed execution evidence');
1149
1333
  }
1334
+ else if (!args.json && boundaryBlocked) {
1335
+ console.log('\n Boundary enforcement blocked completion - keep edits inside Spec boundaries and guard every constraint (enforced profile)');
1336
+ }
1150
1337
  else if (!args.json && workflowOpenTasksBlocked) {
1151
1338
  console.log('\n Workflow open tasks blocked completion - finish required workflow commands first');
1152
1339
  }
@@ -1203,7 +1390,7 @@ export const phaseVerify = defineCommand({
1203
1390
  toolEvidenceGatePassed: finalToolEvidenceGate ? !finalToolEvidenceGate.blocked : true,
1204
1391
  };
1205
1392
  await store.update(args['task-id'], { payload: finalPayload });
1206
- const metricGateStatus = (finalArtifactGate.blocked || finalSkillGate?.blocked || skillInstallationBlocked || engineeringStandards.blocked || finalToolEvidenceGate?.blocked || workflowOpenTasksBlocked)
1393
+ const metricGateStatus = (finalArtifactGate.blocked || finalSkillGate?.blocked || skillInstallationBlocked || engineeringStandards.blocked || finalToolEvidenceGate?.blocked || boundaryBlocked || workflowOpenTasksBlocked)
1207
1394
  ? 'blocked'
1208
1395
  : undefined;
1209
1396
  const metricRecord = await recordVerificationMetric({
@@ -1215,6 +1402,24 @@ export const phaseVerify = defineCommand({
1215
1402
  artifactCheck,
1216
1403
  finalGateStatus: metricGateStatus,
1217
1404
  });
1405
+ // P0 (Decision C1): soft-map the Spec's verificationSurface against evidence.
1406
+ // Unmapped items are reported as warnings only — never blocking in P0.
1407
+ // (`spec`, `boundaryEnforcement` and `constraintCoverage` were resolved
1408
+ // above so the boundary checks could gate completion under enforced profiles.)
1409
+ const verificationCommands = resolvedVerification.targets.flatMap(target => [
1410
+ target.config.build, target.config.lint, target.config.test, target.config.coverage,
1411
+ ]);
1412
+ const surfaceSignals = await gatherVerificationSignals(store, {
1413
+ evidenceIds: verificationEvidenceIds,
1414
+ commands: [
1415
+ ...verificationCommands,
1416
+ args['build-cmd'], args['lint-cmd'], args['test-cmd'], args['coverage-cmd'],
1417
+ ],
1418
+ files: taskFiles,
1419
+ });
1420
+ const surfaceCoverage = spec?.payload.verificationSurface?.length
1421
+ ? computeSurfaceCoverage(spec.payload.verificationSurface, surfaceSignals)
1422
+ : undefined;
1218
1423
  const result = {
1219
1424
  phase: 'VERIFY',
1220
1425
  taskId: args['task-id'],
@@ -1240,12 +1445,23 @@ export const phaseVerify = defineCommand({
1240
1445
  blocked: skillInstallationBlocked,
1241
1446
  },
1242
1447
  metric: metricRecord,
1448
+ verificationSurfaceCoverage: surfaceCoverage,
1449
+ boundaryEnforcement,
1450
+ constraintCoverage,
1243
1451
  passed
1244
1452
  };
1245
1453
  if (args.json)
1246
1454
  console.log(JSON.stringify(result, null, 2));
1247
1455
  else {
1248
1456
  console.log(`\nVERIFY: ${passed ? 'PASSED' : 'FAILED'}`);
1457
+ if (surfaceCoverage) {
1458
+ for (const line of formatSurfaceCoverageWarnings(surfaceCoverage))
1459
+ console.log(` ${line}`);
1460
+ }
1461
+ for (const line of formatBoundaryWarnings(boundaryEnforcement))
1462
+ console.log(` ${line}`);
1463
+ for (const line of formatConstraintWarnings(constraintCoverage))
1464
+ console.log(` ${line}`);
1249
1465
  if (metricRecord)
1250
1466
  console.log(` Metrics: ${metricRecord.taskId} ${metricRecord.finalGateStatus} (fix iterations: ${metricRecord.fixIterations})`);
1251
1467
  if (artifactCheck && !artifactCheck.complete) {
@@ -1341,7 +1557,23 @@ async function reviewGitChanges(taskPayload) {
1341
1557
  diffs.push({ file: file.path, text: diff.stdout });
1342
1558
  }
1343
1559
  }
1344
- return analyzeReview({ statusOutput, diffs, taskPayload, verificationEvidence });
1560
+ return { ...analyzeReview({ statusOutput, diffs, taskPayload, verificationEvidence }), diffs };
1561
+ }
1562
+ function normalizeReviewMode(value) {
1563
+ return value === 'fresh-subagent' || value === 'hybrid' ? value : 'ai-self';
1564
+ }
1565
+ // Build a compact diff summary (file headers + added lines) for the advisory
1566
+ // LLM-as-Judge (P1.4). Capped so it never blows the model/context budget.
1567
+ function buildJudgeDiffSummary(diffs) {
1568
+ const parts = [];
1569
+ for (const diff of diffs) {
1570
+ const added = diff.text
1571
+ .split('\n')
1572
+ .filter(line => line.startsWith('+') && !line.startsWith('+++'))
1573
+ .map(line => line.slice(1));
1574
+ parts.push(`# ${diff.file}\n${added.join('\n')}`);
1575
+ }
1576
+ return parts.join('\n\n').slice(0, 6000);
1345
1577
  }
1346
1578
  function collectReviewedFiles(records) {
1347
1579
  const reviewed = new Set();
@@ -1553,6 +1785,8 @@ export const phaseReview = defineCommand({
1553
1785
  'check-style': { type: 'boolean', default: true },
1554
1786
  format: { type: 'string', alias: 'f', description: 'Output format: html or md (default: html)' },
1555
1787
  brand: { type: 'string', description: 'Brand theme for HTML output (vercel/stripe/notion/linear/github)' },
1788
+ judge: { type: 'boolean', default: true, description: 'Run the advisory LLM-as-Judge spec-conformance check (P1.4)' },
1789
+ mode: { type: 'string', default: 'ai-self', description: 'Review mode: ai-self (default) | fresh-subagent | hybrid (P2.2)' },
1556
1790
  json: { type: 'boolean', default: false },
1557
1791
  },
1558
1792
  async run({ args }) {
@@ -1586,12 +1820,44 @@ export const phaseReview = defineCommand({
1586
1820
  const findings = review.findings;
1587
1821
  const summary = summarizeFindings(findings);
1588
1822
  const passed = summary.critical === 0 && summary.high === 0;
1823
+ const reviewMode = normalizeReviewMode(args.mode);
1824
+ // Resolve the originating Spec once; both the advisory judge (P1.4) and the
1825
+ // fresh-context verifier (P2.2) read its outcome / verificationSurface.
1826
+ const needsSpec = args.judge || reviewMode !== 'ai-self';
1827
+ const spec = needsSpec ? await resolveSpecForTask(store, task) : undefined;
1828
+ const diffSummary = needsSpec ? buildJudgeDiffSummary(review.diffs) : '';
1829
+ // P1.4 (decision K1): advisory LLM-as-Judge. Never part of `passed`.
1830
+ let judgeVerdict;
1831
+ if (args.judge) {
1832
+ const judge = new LlmJudge(new JsonLlmClient(), new JudgePromptStore(SCALE_DIR));
1833
+ judgeVerdict = await judge.judge({
1834
+ outcome: spec?.payload.what,
1835
+ verificationSurface: spec?.payload.verificationSurface ?? [],
1836
+ diffSummary,
1837
+ reviewFindings: summary,
1838
+ });
1839
+ }
1840
+ // P2.2 (decisions M1/N1/O1): fresh-context verifier runs only for
1841
+ // fresh-subagent / hybrid modes, on isolated input (surface + diff + gate
1842
+ // summary, no build-agent history). Advisory only — never blocks ship.
1843
+ let freshVerifyVerdict;
1844
+ if (reviewMode !== 'ai-self') {
1845
+ freshVerifyVerdict = await new FreshContextVerifier(new JsonLlmClient()).verify({
1846
+ outcome: spec?.payload.what,
1847
+ verificationSurface: spec?.payload.verificationSurface ?? [],
1848
+ diffSummary,
1849
+ gateSummary: `critical=${summary.critical} high=${summary.high} medium=${summary.medium} low=${summary.low}`,
1850
+ });
1851
+ }
1589
1852
  const record = reviewStore.saveReview({
1590
1853
  taskId: args['task-id'],
1591
1854
  passed,
1592
1855
  findings,
1593
1856
  changedFiles: review.changedFiles.map(file => normalizeGitPath(file.path)),
1594
1857
  summary,
1858
+ judge: judgeVerdict,
1859
+ reviewMode,
1860
+ freshVerify: freshVerifyVerdict,
1595
1861
  });
1596
1862
  if (task && taskPayload) {
1597
1863
  const updatedPayload = {
@@ -1638,6 +1904,9 @@ export const phaseReview = defineCommand({
1638
1904
  findings,
1639
1905
  changedFiles: review.changedFiles.map(file => normalizeGitPath(file.path)),
1640
1906
  summary,
1907
+ judge: judgeVerdict,
1908
+ reviewMode,
1909
+ freshVerify: freshVerifyVerdict,
1641
1910
  karpathy: karpathyReport,
1642
1911
  passed,
1643
1912
  format: reviewOutputFormat,
@@ -1660,6 +1929,18 @@ export const phaseReview = defineCommand({
1660
1929
  console.log(`LOW: ${summary.low} issues`);
1661
1930
  console.log('----------------------------------------');
1662
1931
  findings.slice(0, 10).forEach(f => console.log(` [${f.severity}] ${f.file ? `${f.file}: ` : ''}${f.description}`));
1932
+ if (judgeVerdict) {
1933
+ console.log(`\nJudge (advisory, ${judgeVerdict.modelUsed}): ${judgeVerdict.decision.toUpperCase()} (confidence ${judgeVerdict.confidence.toFixed(2)})`);
1934
+ console.log(` ${judgeVerdict.rationale}`);
1935
+ if (judgeVerdict.unmetSurfaces.length)
1936
+ console.log(` Unmet surfaces: ${judgeVerdict.unmetSurfaces.join('; ')}`);
1937
+ }
1938
+ if (freshVerifyVerdict) {
1939
+ console.log(`\nFresh-context verifier (advisory, ${freshVerifyVerdict.modelUsed}): ${freshVerifyVerdict.decision.toUpperCase()} (confidence ${freshVerifyVerdict.confidence.toFixed(2)})`);
1940
+ console.log(` ${freshVerifyVerdict.rationale}`);
1941
+ if (freshVerifyVerdict.unmetSurfaces.length)
1942
+ console.log(` Unmet surfaces: ${freshVerifyVerdict.unmetSurfaces.join('; ')}`);
1943
+ }
1663
1944
  if (passed) {
1664
1945
  console.log('\nReview passed (no CRITICAL issues)');
1665
1946
  console.log('\n Next: scale ship ' + (args['task-id'] ?? '<task-id>') + '\n');
@@ -1808,6 +2089,15 @@ export const phaseShip = defineCommand({
1808
2089
  console.error("Warning: Plan completion transition failed:", e.message);
1809
2090
  }
1810
2091
  }
2092
+ // P0 (Decision C1): soft-map the Spec's verificationSurface at ship time too.
2093
+ const shipSpec = await resolveSpecForTask(store, task);
2094
+ const shipSignals = await gatherVerificationSignals(store, {
2095
+ evidenceIds: payload.verificationEvidenceIds,
2096
+ files: payload.filesInvolved,
2097
+ });
2098
+ const shipSurfaceCoverage = shipSpec?.payload.verificationSurface?.length
2099
+ ? computeSurfaceCoverage(shipSpec.payload.verificationSurface, shipSignals)
2100
+ : undefined;
1811
2101
  // === WorkflowEngine Integration ===
1812
2102
  // Generate HonestDelivery report
1813
2103
  if (!args.json) {
@@ -1844,6 +2134,10 @@ export const phaseShip = defineCommand({
1844
2134
  unverifiedItems.forEach(item => console.log(` [UNVERIFIED] ${item}`));
1845
2135
  console.log('');
1846
2136
  }
2137
+ if (shipSurfaceCoverage) {
2138
+ for (const line of formatSurfaceCoverageWarnings(shipSurfaceCoverage))
2139
+ console.log(line);
2140
+ }
1847
2141
  }
1848
2142
  const result = {
1849
2143
  phase: 'SHIP',
@@ -1863,6 +2157,7 @@ export const phaseShip = defineCommand({
1863
2157
  blockers: workspaceBoundary.blockers,
1864
2158
  warnings: workspaceBoundary.warnings,
1865
2159
  } : null,
2160
+ verificationSurfaceCoverage: shipSurfaceCoverage,
1866
2161
  };
1867
2162
  if (args.json)
1868
2163
  console.log(JSON.stringify(result, null, 2));