@hongmaple0820/scale-engine 0.48.0 → 0.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.en.md +2 -2
- package/README.md +2 -2
- package/dist/agents/evidenceDiscipline.d.ts +7 -0
- package/dist/agents/evidenceDiscipline.js +21 -0
- package/dist/agents/evidenceDiscipline.js.map +1 -0
- package/dist/agents/profiles.js +8 -1
- package/dist/agents/profiles.js.map +1 -1
- package/dist/agents/types.d.ts +1 -0
- package/dist/artifact/types.d.ts +59 -0
- package/dist/artifact/types.js.map +1 -1
- package/dist/cli/cortexCommands.d.ts +36 -0
- package/dist/cli/cortexCommands.js +76 -4
- package/dist/cli/cortexCommands.js.map +1 -1
- package/dist/cli/evalCommands.js +12 -1
- package/dist/cli/evalCommands.js.map +1 -1
- package/dist/cli/phaseCommands.d.ts +53 -1
- package/dist/cli/phaseCommands.js +317 -22
- package/dist/cli/phaseCommands.js.map +1 -1
- package/dist/cortex/InstinctStore.d.ts +32 -1
- package/dist/cortex/InstinctStore.js +235 -42
- package/dist/cortex/InstinctStore.js.map +1 -1
- package/dist/cortex/InstinctValidation.d.ts +9 -0
- package/dist/cortex/InstinctValidation.js +55 -0
- package/dist/cortex/InstinctValidation.js.map +1 -0
- package/dist/cortex/SessionInjector.js +13 -6
- package/dist/cortex/SessionInjector.js.map +1 -1
- package/dist/eval/BenchmarkPublisher.d.ts +2 -0
- package/dist/eval/BenchmarkPublisher.js +43 -0
- package/dist/eval/BenchmarkPublisher.js.map +1 -1
- package/dist/guardrails/ast/confirmers.d.ts +18 -0
- package/dist/guardrails/ast/confirmers.js +69 -0
- package/dist/guardrails/ast/confirmers.js.map +1 -0
- package/dist/guardrails/ast/parse.d.ts +20 -0
- package/dist/guardrails/ast/parse.js +51 -0
- package/dist/guardrails/ast/parse.js.map +1 -0
- package/dist/output/HTMLDocumentRenderer.d.ts +9 -0
- package/dist/output/HTMLDocumentRenderer.js +19 -0
- package/dist/output/HTMLDocumentRenderer.js.map +1 -1
- package/dist/review/FreshContextVerifier.d.ts +35 -0
- package/dist/review/FreshContextVerifier.js +120 -0
- package/dist/review/FreshContextVerifier.js.map +1 -0
- package/dist/review/JsonLlmClient.d.ts +37 -0
- package/dist/review/JsonLlmClient.js +94 -0
- package/dist/review/JsonLlmClient.js.map +1 -0
- package/dist/review/LlmJudge.d.ts +61 -0
- package/dist/review/LlmJudge.js +167 -0
- package/dist/review/LlmJudge.js.map +1 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workflow/BoundaryEnforcement.d.ts +60 -0
- package/dist/workflow/BoundaryEnforcement.js +182 -0
- package/dist/workflow/BoundaryEnforcement.js.map +1 -0
- package/dist/workflow/EngineeringStandards.js +19 -9
- package/dist/workflow/EngineeringStandards.js.map +1 -1
- package/dist/workflow/GateCatalog.js +12 -2
- package/dist/workflow/GateCatalog.js.map +1 -1
- package/dist/workflow/ProfileEnforcement.d.ts +7 -0
- package/dist/workflow/ProfileEnforcement.js +12 -0
- package/dist/workflow/ProfileEnforcement.js.map +1 -0
- package/dist/workflow/ReviewStore.d.ts +10 -0
- package/dist/workflow/ReviewStore.js.map +1 -1
- package/dist/workflow/SurfaceCoverage.d.ts +19 -0
- package/dist/workflow/SurfaceCoverage.js +57 -0
- package/dist/workflow/SurfaceCoverage.js.map +1 -0
- package/dist/workflow/gates/EnhancedGates.js +2 -0
- package/dist/workflow/gates/EnhancedGates.js.map +1 -1
- package/dist/workflow/gates/TestIntegrityGate.d.ts +51 -0
- package/dist/workflow/gates/TestIntegrityGate.js +175 -0
- package/dist/workflow/gates/TestIntegrityGate.js.map +1 -0
- package/dist/workflow/types.d.ts +1 -1
- package/docs/guides/DEVELOPMENT_WORKFLOW.md +28 -0
- package/docs/workflow/E2E_EXAMPLE.md +133 -0
- package/docs/workflow/README.md +6 -0
- package/docs/workflow/TEMPLATE_GUIDE.md +162 -0
- package/docs/workflow/templates/plan.md +26 -0
- package/docs/workflow/templates/spec.md +28 -0
- package/package.json +2 -1
|
@@ -18,6 +18,9 @@ import { WorkflowArtifactWriter } from '../workflow/WorkflowArtifactWriter.js';
|
|
|
18
18
|
import { resolveVerificationTargets } from '../workflow/VerificationProfile.js';
|
|
19
19
|
import { EvidenceStore } from '../workflow/EvidenceStore.js';
|
|
20
20
|
import { ReviewStore } from '../workflow/ReviewStore.js';
|
|
21
|
+
import { JudgePromptStore, LlmJudge } from '../review/LlmJudge.js';
|
|
22
|
+
import { JsonLlmClient } from '../review/JsonLlmClient.js';
|
|
23
|
+
import { FreshContextVerifier } from '../review/FreshContextVerifier.js';
|
|
21
24
|
import { TaskMetricsStore } from '../workflow/TaskMetricsStore.js';
|
|
22
25
|
import { appendVerificationArtifact, checkTaskArtifactCompleteness, scaffoldTaskArtifacts } from '../workflow/TaskArtifactScaffolder.js';
|
|
23
26
|
import { createWorkflowGuidance, renderWorkflowGuidance } from '../workflow/WorkflowGuidance.js';
|
|
@@ -33,6 +36,8 @@ import { loadToolPolicy } from '../tools/ToolPolicy.js';
|
|
|
33
36
|
import { runSafeCommand } from '../tools/SafeCommandRunner.js';
|
|
34
37
|
import { join } from 'node:path';
|
|
35
38
|
import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync } from 'node:fs';
|
|
39
|
+
import { computeSurfaceCoverage, formatSurfaceCoverageWarnings } from '../workflow/SurfaceCoverage.js';
|
|
40
|
+
import { evaluateBoundaries, evaluateConstraints, formatBoundaryWarnings, formatConstraintWarnings, isEnforcedBoundaryProfile, countBoundaryBlockers, } from '../workflow/BoundaryEnforcement.js';
|
|
36
41
|
import { HTMLDocumentRenderer } from '../output/HTMLDocumentRenderer.js';
|
|
37
42
|
import { SCALE_ENGINE_VERSION } from '../version.js';
|
|
38
43
|
import { optimizeCodingPrompt } from '../prompts/PromptOptimizer.js';
|
|
@@ -322,11 +327,11 @@ async function recordVerificationMetric(options) {
|
|
|
322
327
|
return record;
|
|
323
328
|
}
|
|
324
329
|
// Helper: Generate spec markdown file
|
|
325
|
-
function generateSpecMarkdown(id, title, payload) {
|
|
330
|
+
function generateSpecMarkdown(id, title, payload, status = 'FROZEN') {
|
|
326
331
|
return `# Spec: ${title}
|
|
327
332
|
|
|
328
333
|
**ID**: ${id}
|
|
329
|
-
**Status**:
|
|
334
|
+
**Status**: ${status}
|
|
330
335
|
**Ambiguity Score**: ${payload.ambiguityScore ?? 0.15}
|
|
331
336
|
|
|
332
337
|
## What
|
|
@@ -343,11 +348,34 @@ ${payload.edgeCases.map(e => `- ${e}`).join('\n') || '(none defined)'}
|
|
|
343
348
|
|
|
344
349
|
## North Star
|
|
345
350
|
${payload.northStar || 'User value delivered'}
|
|
346
|
-
|
|
351
|
+
${renderSpecContractSections(payload)}
|
|
347
352
|
---
|
|
348
353
|
*Generated by SCALE Engine DEFINE phase*
|
|
349
354
|
`;
|
|
350
355
|
}
|
|
356
|
+
// Helper: Render the optional P0 six-element contract sections.
|
|
357
|
+
// Each section is omitted when its field is unset, keeping legacy specs unchanged.
|
|
358
|
+
function renderSpecContractSections(payload) {
|
|
359
|
+
const sections = [];
|
|
360
|
+
if (payload.verificationSurface?.length) {
|
|
361
|
+
sections.push(`\n## Verification Surface\n${payload.verificationSurface.map(s => `- ${s}`).join('\n')}`);
|
|
362
|
+
}
|
|
363
|
+
if (payload.constraints?.length) {
|
|
364
|
+
sections.push(`\n## Constraints\n${payload.constraints.map(c => `- ${c}`).join('\n')}`);
|
|
365
|
+
}
|
|
366
|
+
if (payload.boundaries) {
|
|
367
|
+
const b = payload.boundaries;
|
|
368
|
+
const line = (label, items) => `- ${label}: ${items.length ? items.join(', ') : '(none)'}`;
|
|
369
|
+
sections.push(`\n## Boundaries\n${line('Files', b.files)}\n${line('Tools', b.tools)}\n${line('Forbidden', b.forbidden)}`);
|
|
370
|
+
}
|
|
371
|
+
if (payload.iterationStrategy) {
|
|
372
|
+
sections.push(`\n## Iteration Strategy\n${payload.iterationStrategy}`);
|
|
373
|
+
}
|
|
374
|
+
if (payload.blockedStopCondition) {
|
|
375
|
+
sections.push(`\n## Blocked Stop Condition\n${payload.blockedStopCondition}`);
|
|
376
|
+
}
|
|
377
|
+
return sections.length ? `\n${sections.join('\n')}\n` : '\n';
|
|
378
|
+
}
|
|
351
379
|
// Helper: Calculate ambiguity score
|
|
352
380
|
function calculateAmbiguityScore(description, successCriteria) {
|
|
353
381
|
let score = 0.2; // Base score (maximum threshold)
|
|
@@ -364,9 +392,20 @@ function calculateAmbiguityScore(description, successCriteria) {
|
|
|
364
392
|
export const phaseDefine = defineCommand({
|
|
365
393
|
meta: { name: 'define', description: 'DEFINE: Create Spec with AmbiguityScorer + SocraticQuestioner (/spec)' },
|
|
366
394
|
args: {
|
|
367
|
-
title: { type: 'positional', required:
|
|
395
|
+
title: { type: 'positional', required: false },
|
|
368
396
|
description: { type: 'string', alias: 'd' },
|
|
369
397
|
'success-criteria': { type: 'string', alias: 'c', description: 'Comma-separated criteria' },
|
|
398
|
+
// P0 draft/confirm two-step lifecycle (backward compatible: bare `define` still auto-freezes)
|
|
399
|
+
draft: { type: 'boolean', default: false, description: 'Stop the new Spec at REVIEWING (requires `define --confirm <id>` to FROZEN)' },
|
|
400
|
+
confirm: { type: 'string', description: 'Confirm and freeze an existing draft Spec id (REVIEWING -> FROZEN)' },
|
|
401
|
+
// P0 six-element contract inputs (optional, comma-separated where plural)
|
|
402
|
+
'verification-surface': { type: 'string', description: 'Comma-separated evidence sources: test names / benchmark commands / artifact paths' },
|
|
403
|
+
'constraints': { type: 'string', description: 'Comma-separated invariants that must not regress (perf/security/compat)' },
|
|
404
|
+
'boundary-files': { type: 'string', description: 'Comma-separated files allowed to change' },
|
|
405
|
+
'boundary-tools': { type: 'string', description: 'Comma-separated tools allowed to use' },
|
|
406
|
+
'boundary-forbidden': { type: 'string', description: 'Comma-separated scope that must not be touched' },
|
|
407
|
+
'iteration-strategy': { type: 'string', description: 'How each build iteration decides the next step' },
|
|
408
|
+
'blocked-stop': { type: 'string', description: 'What to report / what is needed to unblock when no path is viable' },
|
|
370
409
|
// Socratic refinement answers (optional)
|
|
371
410
|
'goal': { type: 'string', description: 'Goal answer for Socratic refinement' },
|
|
372
411
|
'constraint': { type: 'string', description: 'Constraint answer for Socratic refinement' },
|
|
@@ -380,6 +419,15 @@ export const phaseDefine = defineCommand({
|
|
|
380
419
|
},
|
|
381
420
|
async run({ args }) {
|
|
382
421
|
const { store, fsm, workflowEngine } = getEngine();
|
|
422
|
+
// P0: --confirm freezes an existing draft Spec (REVIEWING -> FROZEN) without re-creating it.
|
|
423
|
+
if (args.confirm) {
|
|
424
|
+
await confirmDraftSpec(store, fsm, String(args.confirm), Boolean(args.json));
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
if (!args.title) {
|
|
428
|
+
console.error('\nMissing required argument: title (or pass --confirm <spec-id> to freeze a draft)\n');
|
|
429
|
+
process.exit(1);
|
|
430
|
+
}
|
|
383
431
|
const rawDesc = String(args.description ?? args.title);
|
|
384
432
|
// Parse success criteria
|
|
385
433
|
const successCriteria = args['success-criteria']
|
|
@@ -467,6 +515,17 @@ export const phaseDefine = defineCommand({
|
|
|
467
515
|
initialStatus: 'DRAFT',
|
|
468
516
|
createdBy: { kind: 'human', userId: 'cli' },
|
|
469
517
|
});
|
|
518
|
+
// P0 six-element contract inputs (optional; omitted fields stay undefined)
|
|
519
|
+
const csv = (v) => {
|
|
520
|
+
const items = typeof v === 'string' ? v.split(',').map(s => s.trim()).filter(Boolean) : [];
|
|
521
|
+
return items.length ? items : undefined;
|
|
522
|
+
};
|
|
523
|
+
const boundaryFiles = csv(args['boundary-files']);
|
|
524
|
+
const boundaryTools = csv(args['boundary-tools']);
|
|
525
|
+
const boundaryForbidden = csv(args['boundary-forbidden']);
|
|
526
|
+
const boundaries = (boundaryFiles || boundaryTools || boundaryForbidden)
|
|
527
|
+
? { files: boundaryFiles ?? [], tools: boundaryTools ?? [], forbidden: boundaryForbidden ?? [] }
|
|
528
|
+
: undefined;
|
|
470
529
|
// Create Spec artifact with proper payload (use refined requirement if available)
|
|
471
530
|
const specPayload = {
|
|
472
531
|
what: refinedRequirement,
|
|
@@ -475,6 +534,11 @@ export const phaseDefine = defineCommand({
|
|
|
475
534
|
edgeCases: [],
|
|
476
535
|
northStar: 'Deliver user value',
|
|
477
536
|
ambiguityScore,
|
|
537
|
+
verificationSurface: csv(args['verification-surface']),
|
|
538
|
+
constraints: csv(args['constraints']),
|
|
539
|
+
boundaries,
|
|
540
|
+
iterationStrategy: typeof args['iteration-strategy'] === 'string' && args['iteration-strategy'] ? String(args['iteration-strategy']) : undefined,
|
|
541
|
+
blockedStopCondition: typeof args['blocked-stop'] === 'string' && args['blocked-stop'] ? String(args['blocked-stop']) : undefined,
|
|
478
542
|
};
|
|
479
543
|
const spec = await store.create({
|
|
480
544
|
type: 'Spec', title: args.title,
|
|
@@ -483,11 +547,14 @@ export const phaseDefine = defineCommand({
|
|
|
483
547
|
initialStatus: 'DRAFT',
|
|
484
548
|
createdBy: { kind: 'human', userId: 'cli' },
|
|
485
549
|
});
|
|
550
|
+
// Draft mode stops at REVIEWING; default mode auto-freezes (FROZEN).
|
|
551
|
+
const isDraft = Boolean(args.draft);
|
|
552
|
+
const finalStatus = isDraft ? 'REVIEWING' : 'FROZEN';
|
|
486
553
|
// Generate spec markdown file
|
|
487
554
|
const specsDir = join(SCALE_DIR, 'specs');
|
|
488
555
|
ensureDir(specsDir);
|
|
489
556
|
const specPath = join(specsDir, `${spec.id}.md`);
|
|
490
|
-
writeFileSync(specPath, generateSpecMarkdown(spec.id, args.title, specPayload));
|
|
557
|
+
writeFileSync(specPath, generateSpecMarkdown(spec.id, args.title, specPayload, finalStatus));
|
|
491
558
|
// Generate spec HTML file (default format: html)
|
|
492
559
|
const outputFormat = args.format ?? 'md';
|
|
493
560
|
let specHtmlPath;
|
|
@@ -496,7 +563,7 @@ export const phaseDefine = defineCommand({
|
|
|
496
563
|
title: args.title,
|
|
497
564
|
brand: args.brand,
|
|
498
565
|
version: SCALE_ENGINE_VERSION,
|
|
499
|
-
status:
|
|
566
|
+
status: finalStatus,
|
|
500
567
|
});
|
|
501
568
|
const html = renderer.renderSpec({
|
|
502
569
|
id: spec.id,
|
|
@@ -507,11 +574,16 @@ export const phaseDefine = defineCommand({
|
|
|
507
574
|
edgeCases: specPayload.edgeCases,
|
|
508
575
|
northStar: specPayload.northStar,
|
|
509
576
|
ambiguityScore,
|
|
577
|
+
verificationSurface: specPayload.verificationSurface,
|
|
578
|
+
constraints: specPayload.constraints,
|
|
579
|
+
boundaries: specPayload.boundaries,
|
|
580
|
+
iterationStrategy: specPayload.iterationStrategy,
|
|
581
|
+
blockedStopCondition: specPayload.blockedStopCondition,
|
|
510
582
|
});
|
|
511
583
|
specHtmlPath = join(specsDir, `${spec.id}.html`);
|
|
512
584
|
renderer.writeToFile(html, specHtmlPath);
|
|
513
585
|
}
|
|
514
|
-
// FSM transitions: DRAFT -> REVIEWING -> FROZEN
|
|
586
|
+
// FSM transitions: DRAFT -> REVIEWING (-> FROZEN unless --draft)
|
|
515
587
|
// Phase 1: refine (DRAFT -> REVIEWING) - no guards
|
|
516
588
|
const refineResult = await fsm.canTransition(spec.id, 'refine');
|
|
517
589
|
if (!refineResult.allowed) {
|
|
@@ -522,22 +594,30 @@ export const phaseDefine = defineCommand({
|
|
|
522
594
|
process.exit(1);
|
|
523
595
|
}
|
|
524
596
|
await fsm.transition(spec.id, 'refine', { actor: { kind: 'system', component: 'phase-define' } });
|
|
525
|
-
// Phase 2: approve (REVIEWING -> FROZEN) - guards: ambiguityScore <= 0.2, has successCriteria
|
|
526
|
-
|
|
527
|
-
if (!
|
|
597
|
+
// Phase 2: approve (REVIEWING -> FROZEN) - guards: ambiguityScore <= 0.2, has successCriteria.
|
|
598
|
+
// Skipped in --draft mode: the draft waits for `scale define --confirm <id>`.
|
|
599
|
+
if (!isDraft) {
|
|
600
|
+
const approveResult = await fsm.canTransition(spec.id, 'approve');
|
|
601
|
+
if (!approveResult.allowed) {
|
|
602
|
+
if (!args.json) {
|
|
603
|
+
console.error('\nFSM transition blocked: REVIEWING -> FROZEN');
|
|
604
|
+
console.error(' Spec cannot be frozen due to:');
|
|
605
|
+
approveResult.blockedBy?.forEach(b => console.error(` [GUARD] ${b.guard}: ${b.message}`));
|
|
606
|
+
console.error('\n Resolve issues before proceeding.');
|
|
607
|
+
}
|
|
608
|
+
process.exit(1);
|
|
609
|
+
}
|
|
610
|
+
await fsm.transition(spec.id, 'approve', { actor: { kind: 'system', component: 'phase-define' } });
|
|
528
611
|
if (!args.json) {
|
|
529
|
-
console.
|
|
530
|
-
console.error(' Spec cannot be frozen due to:');
|
|
531
|
-
approveResult.blockedBy?.forEach(b => console.error(` [GUARD] ${b.guard}: ${b.message}`));
|
|
532
|
-
console.error('\n Resolve issues before proceeding.');
|
|
612
|
+
console.log(' FSM: DRAFT -> REVIEWING -> FROZEN ✓');
|
|
533
613
|
}
|
|
534
|
-
process.exit(1);
|
|
535
614
|
}
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
console.log(' FSM: DRAFT -> REVIEWING -> FROZEN ✓');
|
|
615
|
+
else if (!args.json) {
|
|
616
|
+
console.log(' FSM: DRAFT -> REVIEWING (draft; not yet FROZEN)');
|
|
539
617
|
}
|
|
540
|
-
|
|
618
|
+
// Refresh the spec so the reported status reflects the post-transition state.
|
|
619
|
+
const finalSpec = (await store.get(spec.id)) ?? spec;
|
|
620
|
+
const result = { phase: 'DEFINE', spec: finalSpec, specPath, specHtmlPath, ambiguityScore, successCriteria, format: outputFormat, promptOptimization, status: finalStatus, draft: isDraft };
|
|
541
621
|
// Write explore artifact for Gate G1 verification
|
|
542
622
|
const artifactWriter = new WorkflowArtifactWriter(SCALE_DIR);
|
|
543
623
|
artifactWriter.writeExploreResult({
|
|
@@ -557,10 +637,94 @@ export const phaseDefine = defineCommand({
|
|
|
557
637
|
console.log(` HTML file: ${specHtmlPath}`);
|
|
558
638
|
console.log(` Ambiguity score: ${ambiguityScore.toFixed(2)}`);
|
|
559
639
|
console.log(` Success criteria: ${successCriteria.length}`);
|
|
560
|
-
|
|
640
|
+
if (isDraft) {
|
|
641
|
+
console.log(`\n Draft created (REVIEWING). Review, then confirm:`);
|
|
642
|
+
console.log(` Next: scale define --confirm ${spec.id}\n`);
|
|
643
|
+
}
|
|
644
|
+
else {
|
|
645
|
+
console.log(`\n Next: scale plan ${spec.id}\n`);
|
|
646
|
+
}
|
|
561
647
|
}
|
|
562
648
|
},
|
|
563
649
|
});
|
|
650
|
+
// Helper: Confirm a draft Spec (REVIEWING -> FROZEN) for the `define --confirm <id>` flow.
|
|
651
|
+
async function confirmDraftSpec(store, fsm, specId, json) {
|
|
652
|
+
const spec = await store.get(specId);
|
|
653
|
+
if (!spec || spec.type !== 'Spec') {
|
|
654
|
+
console.error(`\nSpec not found: ${specId}\n`);
|
|
655
|
+
process.exit(1);
|
|
656
|
+
}
|
|
657
|
+
if (spec.status === 'FROZEN') {
|
|
658
|
+
if (!json)
|
|
659
|
+
console.log(`\nSpec ${specId} is already FROZEN.\n`);
|
|
660
|
+
else
|
|
661
|
+
console.log(JSON.stringify({ phase: 'DEFINE', confirm: true, spec, status: 'FROZEN', alreadyFrozen: true }, null, 2));
|
|
662
|
+
return;
|
|
663
|
+
}
|
|
664
|
+
const approveResult = await fsm.canTransition(specId, 'approve');
|
|
665
|
+
if (!approveResult.allowed) {
|
|
666
|
+
if (!json) {
|
|
667
|
+
console.error('\nFSM transition blocked: REVIEWING -> FROZEN');
|
|
668
|
+
console.error(' Spec cannot be confirmed due to:');
|
|
669
|
+
approveResult.blockedBy?.forEach(b => console.error(` [GUARD] ${b.guard}: ${b.message}`));
|
|
670
|
+
console.error('\n Resolve issues before confirming.');
|
|
671
|
+
}
|
|
672
|
+
process.exit(1);
|
|
673
|
+
}
|
|
674
|
+
await fsm.transition(specId, 'approve', { actor: { kind: 'human', userId: 'cli' } });
|
|
675
|
+
// Refresh persisted markdown status (draft was written as REVIEWING).
|
|
676
|
+
const specPath = join(SCALE_DIR, 'specs', `${specId}.md`);
|
|
677
|
+
if (existsSync(specPath)) {
|
|
678
|
+
writeFileSync(specPath, generateSpecMarkdown(specId, spec.title, spec.payload, 'FROZEN'));
|
|
679
|
+
}
|
|
680
|
+
const confirmed = await store.get(specId);
|
|
681
|
+
if (json) {
|
|
682
|
+
console.log(JSON.stringify({ phase: 'DEFINE', confirm: true, spec: confirmed, status: 'FROZEN' }, null, 2));
|
|
683
|
+
}
|
|
684
|
+
else {
|
|
685
|
+
console.log(`\nCONFIRM: ${specId}`);
|
|
686
|
+
console.log(' FSM: REVIEWING -> FROZEN ✓');
|
|
687
|
+
console.log(`\n Next: scale plan ${specId}\n`);
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
// Helper: Resolve the originating Spec for a Task by walking Task -> Plan -> Spec.
|
|
691
|
+
async function resolveSpecForTask(store, task) {
|
|
692
|
+
const planId = task?.parents?.[0];
|
|
693
|
+
if (!planId)
|
|
694
|
+
return undefined;
|
|
695
|
+
const plan = await store.get(planId);
|
|
696
|
+
const specId = plan?.parents?.[0];
|
|
697
|
+
if (!specId)
|
|
698
|
+
return undefined;
|
|
699
|
+
const spec = await store.get(specId);
|
|
700
|
+
if (!spec || spec.type !== 'Spec')
|
|
701
|
+
return undefined;
|
|
702
|
+
return { id: spec.id, payload: spec.payload };
|
|
703
|
+
}
|
|
704
|
+
// Helper: Collect free-form evidence signals (commands run, files, evidence refs/artifacts)
|
|
705
|
+
// used to soft-map a Spec's verificationSurface during verify/ship (P0 Decision C1).
|
|
706
|
+
async function gatherVerificationSignals(store, options) {
|
|
707
|
+
const signals = [];
|
|
708
|
+
for (const command of options.commands ?? [])
|
|
709
|
+
if (command)
|
|
710
|
+
signals.push(command);
|
|
711
|
+
for (const file of options.files ?? [])
|
|
712
|
+
if (file)
|
|
713
|
+
signals.push(file);
|
|
714
|
+
for (const id of options.evidenceIds ?? []) {
|
|
715
|
+
const record = await store.get(id);
|
|
716
|
+
if (!record || record.type !== 'Evidence')
|
|
717
|
+
continue;
|
|
718
|
+
const payload = record.payload;
|
|
719
|
+
if (payload.verificationSurfaceRef)
|
|
720
|
+
signals.push(payload.verificationSurfaceRef);
|
|
721
|
+
if (payload.toolUsed)
|
|
722
|
+
signals.push(payload.toolUsed);
|
|
723
|
+
if (payload.artifacts?.length)
|
|
724
|
+
signals.push(...payload.artifacts);
|
|
725
|
+
}
|
|
726
|
+
return signals;
|
|
727
|
+
}
|
|
564
728
|
// Helper: Generate plan markdown file
|
|
565
729
|
function generatePlanMarkdown(id, specId, payload) {
|
|
566
730
|
return `# Plan: ${id}
|
|
@@ -906,10 +1070,15 @@ export const phaseVerify = defineCommand({
|
|
|
906
1070
|
'skip-build': { type: 'boolean', default: false },
|
|
907
1071
|
'skip-lint': { type: 'boolean', default: false },
|
|
908
1072
|
'skip-test': { type: 'boolean', default: false },
|
|
1073
|
+
progress: { type: 'boolean', default: false, description: 'Emit coarse verify progress events to stderr without changing JSON output' },
|
|
909
1074
|
json: { type: 'boolean', default: false },
|
|
910
1075
|
},
|
|
911
1076
|
async run({ args }) {
|
|
912
1077
|
const { store, fsm, workflowEngine } = getEngine();
|
|
1078
|
+
const emitProgress = (event, detail) => {
|
|
1079
|
+
if (isTruthyFlag(args.progress))
|
|
1080
|
+
console.error(`[progress] ${event}: ${detail}`);
|
|
1081
|
+
};
|
|
913
1082
|
// Validate task exists
|
|
914
1083
|
const task = await store.get(args['task-id']);
|
|
915
1084
|
if (!task || task.type !== 'Task') {
|
|
@@ -932,6 +1101,7 @@ export const phaseVerify = defineCommand({
|
|
|
932
1101
|
service: args.service,
|
|
933
1102
|
services: args.service ? undefined : taskServices,
|
|
934
1103
|
});
|
|
1104
|
+
emitProgress('verify:start', `task=${args['task-id']} profile=${resolvedVerification.profileName} targets=${resolvedVerification.targets.length}`);
|
|
935
1105
|
if (!args.json) {
|
|
936
1106
|
for (const warning of resolvedVerification.warnings)
|
|
937
1107
|
console.log(` [WARN] ${warning}`);
|
|
@@ -947,6 +1117,7 @@ export const phaseVerify = defineCommand({
|
|
|
947
1117
|
if (!args.json && resolvedVerification.targets.length > 1) {
|
|
948
1118
|
console.log(`\n Target: ${target.service?.name ?? 'root'}`);
|
|
949
1119
|
}
|
|
1120
|
+
emitProgress('target:start', target.service?.name ?? 'root');
|
|
950
1121
|
const targetResults = await workflowEngine.verify({
|
|
951
1122
|
cwd: target.config.cwd,
|
|
952
1123
|
build: args['build-cmd'] ?? target.config.build,
|
|
@@ -964,7 +1135,9 @@ export const phaseVerify = defineCommand({
|
|
|
964
1135
|
tddStrict: isTruthyFlag(args['tdd-strict']),
|
|
965
1136
|
});
|
|
966
1137
|
gateResults.push(...targetResults);
|
|
1138
|
+
emitProgress('target:done', `${target.service?.name ?? 'root'} gates=${targetResults.length}`);
|
|
967
1139
|
}
|
|
1140
|
+
emitProgress('verify:gates-complete', `gates=${gateResults.length}`);
|
|
968
1141
|
// Step 2: Display gate results
|
|
969
1142
|
if (!args.json) {
|
|
970
1143
|
console.log('\nGate Results:');
|
|
@@ -1095,6 +1268,16 @@ export const phaseVerify = defineCommand({
|
|
|
1095
1268
|
});
|
|
1096
1269
|
const workflowOpenTaskBlockers = blockingWorkflowOpenTasks(workflowState.openTasks, args['task-id']);
|
|
1097
1270
|
const workflowOpenTasksBlocked = workflowOpenTaskBlockers.length > 0;
|
|
1271
|
+
// P0+ (decision E1): resolve the originating Spec up-front so the executional
|
|
1272
|
+
// boundary / constraint checks can gate Task completion. Both reports are
|
|
1273
|
+
// advisory under default/auto and blocking under full/ci/strict; the
|
|
1274
|
+
// detection logic is identical, only the report mode and gating differ.
|
|
1275
|
+
const spec = await resolveSpecForTask(store, task);
|
|
1276
|
+
const boundaryEnforced = isEnforcedBoundaryProfile(resolvedVerification.profileName);
|
|
1277
|
+
const boundaryEnforcement = evaluateBoundaries(taskFiles, spec?.payload.boundaries, boundaryEnforced);
|
|
1278
|
+
const constraintCoverage = evaluateConstraints(spec?.payload.constraints, spec?.payload.verificationSurface, boundaryEnforced);
|
|
1279
|
+
const boundaryBlocked = boundaryEnforced &&
|
|
1280
|
+
countBoundaryBlockers(boundaryEnforcement, constraintCoverage) > 0;
|
|
1098
1281
|
// Attempt FSM transition to COMPLETED
|
|
1099
1282
|
// Guards: build_passed, lint_passed, tests_passed, open workflow tasks, and optional artifact policy.
|
|
1100
1283
|
const codePassed = results.buildStatus === 'success' &&
|
|
@@ -1109,6 +1292,7 @@ export const phaseVerify = defineCommand({
|
|
|
1109
1292
|
!skillInstallationBlocked &&
|
|
1110
1293
|
!engineeringStandards.blocked &&
|
|
1111
1294
|
!(toolEvidenceGate?.blocked ?? false) &&
|
|
1295
|
+
!boundaryBlocked &&
|
|
1112
1296
|
!workflowOpenTasksBlocked;
|
|
1113
1297
|
let transitionResult = null;
|
|
1114
1298
|
if (completionEligible) {
|
|
@@ -1147,6 +1331,9 @@ export const phaseVerify = defineCommand({
|
|
|
1147
1331
|
else if (!args.json && toolEvidenceGate?.blocked) {
|
|
1148
1332
|
console.log('\n Tool evidence gate blocked completion - required tools need passed execution evidence');
|
|
1149
1333
|
}
|
|
1334
|
+
else if (!args.json && boundaryBlocked) {
|
|
1335
|
+
console.log('\n Boundary enforcement blocked completion - keep edits inside Spec boundaries and guard every constraint (enforced profile)');
|
|
1336
|
+
}
|
|
1150
1337
|
else if (!args.json && workflowOpenTasksBlocked) {
|
|
1151
1338
|
console.log('\n Workflow open tasks blocked completion - finish required workflow commands first');
|
|
1152
1339
|
}
|
|
@@ -1203,7 +1390,7 @@ export const phaseVerify = defineCommand({
|
|
|
1203
1390
|
toolEvidenceGatePassed: finalToolEvidenceGate ? !finalToolEvidenceGate.blocked : true,
|
|
1204
1391
|
};
|
|
1205
1392
|
await store.update(args['task-id'], { payload: finalPayload });
|
|
1206
|
-
const metricGateStatus = (finalArtifactGate.blocked || finalSkillGate?.blocked || skillInstallationBlocked || engineeringStandards.blocked || finalToolEvidenceGate?.blocked || workflowOpenTasksBlocked)
|
|
1393
|
+
const metricGateStatus = (finalArtifactGate.blocked || finalSkillGate?.blocked || skillInstallationBlocked || engineeringStandards.blocked || finalToolEvidenceGate?.blocked || boundaryBlocked || workflowOpenTasksBlocked)
|
|
1207
1394
|
? 'blocked'
|
|
1208
1395
|
: undefined;
|
|
1209
1396
|
const metricRecord = await recordVerificationMetric({
|
|
@@ -1215,6 +1402,24 @@ export const phaseVerify = defineCommand({
|
|
|
1215
1402
|
artifactCheck,
|
|
1216
1403
|
finalGateStatus: metricGateStatus,
|
|
1217
1404
|
});
|
|
1405
|
+
// P0 (Decision C1): soft-map the Spec's verificationSurface against evidence.
|
|
1406
|
+
// Unmapped items are reported as warnings only — never blocking in P0.
|
|
1407
|
+
// (`spec`, `boundaryEnforcement` and `constraintCoverage` were resolved
|
|
1408
|
+
// above so the boundary checks could gate completion under enforced profiles.)
|
|
1409
|
+
const verificationCommands = resolvedVerification.targets.flatMap(target => [
|
|
1410
|
+
target.config.build, target.config.lint, target.config.test, target.config.coverage,
|
|
1411
|
+
]);
|
|
1412
|
+
const surfaceSignals = await gatherVerificationSignals(store, {
|
|
1413
|
+
evidenceIds: verificationEvidenceIds,
|
|
1414
|
+
commands: [
|
|
1415
|
+
...verificationCommands,
|
|
1416
|
+
args['build-cmd'], args['lint-cmd'], args['test-cmd'], args['coverage-cmd'],
|
|
1417
|
+
],
|
|
1418
|
+
files: taskFiles,
|
|
1419
|
+
});
|
|
1420
|
+
const surfaceCoverage = spec?.payload.verificationSurface?.length
|
|
1421
|
+
? computeSurfaceCoverage(spec.payload.verificationSurface, surfaceSignals)
|
|
1422
|
+
: undefined;
|
|
1218
1423
|
const result = {
|
|
1219
1424
|
phase: 'VERIFY',
|
|
1220
1425
|
taskId: args['task-id'],
|
|
@@ -1240,12 +1445,23 @@ export const phaseVerify = defineCommand({
|
|
|
1240
1445
|
blocked: skillInstallationBlocked,
|
|
1241
1446
|
},
|
|
1242
1447
|
metric: metricRecord,
|
|
1448
|
+
verificationSurfaceCoverage: surfaceCoverage,
|
|
1449
|
+
boundaryEnforcement,
|
|
1450
|
+
constraintCoverage,
|
|
1243
1451
|
passed
|
|
1244
1452
|
};
|
|
1245
1453
|
if (args.json)
|
|
1246
1454
|
console.log(JSON.stringify(result, null, 2));
|
|
1247
1455
|
else {
|
|
1248
1456
|
console.log(`\nVERIFY: ${passed ? 'PASSED' : 'FAILED'}`);
|
|
1457
|
+
if (surfaceCoverage) {
|
|
1458
|
+
for (const line of formatSurfaceCoverageWarnings(surfaceCoverage))
|
|
1459
|
+
console.log(` ${line}`);
|
|
1460
|
+
}
|
|
1461
|
+
for (const line of formatBoundaryWarnings(boundaryEnforcement))
|
|
1462
|
+
console.log(` ${line}`);
|
|
1463
|
+
for (const line of formatConstraintWarnings(constraintCoverage))
|
|
1464
|
+
console.log(` ${line}`);
|
|
1249
1465
|
if (metricRecord)
|
|
1250
1466
|
console.log(` Metrics: ${metricRecord.taskId} ${metricRecord.finalGateStatus} (fix iterations: ${metricRecord.fixIterations})`);
|
|
1251
1467
|
if (artifactCheck && !artifactCheck.complete) {
|
|
@@ -1341,7 +1557,23 @@ async function reviewGitChanges(taskPayload) {
|
|
|
1341
1557
|
diffs.push({ file: file.path, text: diff.stdout });
|
|
1342
1558
|
}
|
|
1343
1559
|
}
|
|
1344
|
-
return analyzeReview({ statusOutput, diffs, taskPayload, verificationEvidence });
|
|
1560
|
+
return { ...analyzeReview({ statusOutput, diffs, taskPayload, verificationEvidence }), diffs };
|
|
1561
|
+
}
|
|
1562
|
+
function normalizeReviewMode(value) {
|
|
1563
|
+
return value === 'fresh-subagent' || value === 'hybrid' ? value : 'ai-self';
|
|
1564
|
+
}
|
|
1565
|
+
// Build a compact diff summary (file headers + added lines) for the advisory
|
|
1566
|
+
// LLM-as-Judge (P1.4). Capped so it never blows the model/context budget.
|
|
1567
|
+
function buildJudgeDiffSummary(diffs) {
|
|
1568
|
+
const parts = [];
|
|
1569
|
+
for (const diff of diffs) {
|
|
1570
|
+
const added = diff.text
|
|
1571
|
+
.split('\n')
|
|
1572
|
+
.filter(line => line.startsWith('+') && !line.startsWith('+++'))
|
|
1573
|
+
.map(line => line.slice(1));
|
|
1574
|
+
parts.push(`# ${diff.file}\n${added.join('\n')}`);
|
|
1575
|
+
}
|
|
1576
|
+
return parts.join('\n\n').slice(0, 6000);
|
|
1345
1577
|
}
|
|
1346
1578
|
function collectReviewedFiles(records) {
|
|
1347
1579
|
const reviewed = new Set();
|
|
@@ -1553,6 +1785,8 @@ export const phaseReview = defineCommand({
|
|
|
1553
1785
|
'check-style': { type: 'boolean', default: true },
|
|
1554
1786
|
format: { type: 'string', alias: 'f', description: 'Output format: html or md (default: html)' },
|
|
1555
1787
|
brand: { type: 'string', description: 'Brand theme for HTML output (vercel/stripe/notion/linear/github)' },
|
|
1788
|
+
judge: { type: 'boolean', default: true, description: 'Run the advisory LLM-as-Judge spec-conformance check (P1.4)' },
|
|
1789
|
+
mode: { type: 'string', default: 'ai-self', description: 'Review mode: ai-self (default) | fresh-subagent | hybrid (P2.2)' },
|
|
1556
1790
|
json: { type: 'boolean', default: false },
|
|
1557
1791
|
},
|
|
1558
1792
|
async run({ args }) {
|
|
@@ -1586,12 +1820,44 @@ export const phaseReview = defineCommand({
|
|
|
1586
1820
|
const findings = review.findings;
|
|
1587
1821
|
const summary = summarizeFindings(findings);
|
|
1588
1822
|
const passed = summary.critical === 0 && summary.high === 0;
|
|
1823
|
+
const reviewMode = normalizeReviewMode(args.mode);
|
|
1824
|
+
// Resolve the originating Spec once; both the advisory judge (P1.4) and the
|
|
1825
|
+
// fresh-context verifier (P2.2) read its outcome / verificationSurface.
|
|
1826
|
+
const needsSpec = args.judge || reviewMode !== 'ai-self';
|
|
1827
|
+
const spec = needsSpec ? await resolveSpecForTask(store, task) : undefined;
|
|
1828
|
+
const diffSummary = needsSpec ? buildJudgeDiffSummary(review.diffs) : '';
|
|
1829
|
+
// P1.4 (decision K1): advisory LLM-as-Judge. Never part of `passed`.
|
|
1830
|
+
let judgeVerdict;
|
|
1831
|
+
if (args.judge) {
|
|
1832
|
+
const judge = new LlmJudge(new JsonLlmClient(), new JudgePromptStore(SCALE_DIR));
|
|
1833
|
+
judgeVerdict = await judge.judge({
|
|
1834
|
+
outcome: spec?.payload.what,
|
|
1835
|
+
verificationSurface: spec?.payload.verificationSurface ?? [],
|
|
1836
|
+
diffSummary,
|
|
1837
|
+
reviewFindings: summary,
|
|
1838
|
+
});
|
|
1839
|
+
}
|
|
1840
|
+
// P2.2 (decisions M1/N1/O1): fresh-context verifier runs only for
|
|
1841
|
+
// fresh-subagent / hybrid modes, on isolated input (surface + diff + gate
|
|
1842
|
+
// summary, no build-agent history). Advisory only — never blocks ship.
|
|
1843
|
+
let freshVerifyVerdict;
|
|
1844
|
+
if (reviewMode !== 'ai-self') {
|
|
1845
|
+
freshVerifyVerdict = await new FreshContextVerifier(new JsonLlmClient()).verify({
|
|
1846
|
+
outcome: spec?.payload.what,
|
|
1847
|
+
verificationSurface: spec?.payload.verificationSurface ?? [],
|
|
1848
|
+
diffSummary,
|
|
1849
|
+
gateSummary: `critical=${summary.critical} high=${summary.high} medium=${summary.medium} low=${summary.low}`,
|
|
1850
|
+
});
|
|
1851
|
+
}
|
|
1589
1852
|
const record = reviewStore.saveReview({
|
|
1590
1853
|
taskId: args['task-id'],
|
|
1591
1854
|
passed,
|
|
1592
1855
|
findings,
|
|
1593
1856
|
changedFiles: review.changedFiles.map(file => normalizeGitPath(file.path)),
|
|
1594
1857
|
summary,
|
|
1858
|
+
judge: judgeVerdict,
|
|
1859
|
+
reviewMode,
|
|
1860
|
+
freshVerify: freshVerifyVerdict,
|
|
1595
1861
|
});
|
|
1596
1862
|
if (task && taskPayload) {
|
|
1597
1863
|
const updatedPayload = {
|
|
@@ -1638,6 +1904,9 @@ export const phaseReview = defineCommand({
|
|
|
1638
1904
|
findings,
|
|
1639
1905
|
changedFiles: review.changedFiles.map(file => normalizeGitPath(file.path)),
|
|
1640
1906
|
summary,
|
|
1907
|
+
judge: judgeVerdict,
|
|
1908
|
+
reviewMode,
|
|
1909
|
+
freshVerify: freshVerifyVerdict,
|
|
1641
1910
|
karpathy: karpathyReport,
|
|
1642
1911
|
passed,
|
|
1643
1912
|
format: reviewOutputFormat,
|
|
@@ -1660,6 +1929,18 @@ export const phaseReview = defineCommand({
|
|
|
1660
1929
|
console.log(`LOW: ${summary.low} issues`);
|
|
1661
1930
|
console.log('----------------------------------------');
|
|
1662
1931
|
findings.slice(0, 10).forEach(f => console.log(` [${f.severity}] ${f.file ? `${f.file}: ` : ''}${f.description}`));
|
|
1932
|
+
if (judgeVerdict) {
|
|
1933
|
+
console.log(`\nJudge (advisory, ${judgeVerdict.modelUsed}): ${judgeVerdict.decision.toUpperCase()} (confidence ${judgeVerdict.confidence.toFixed(2)})`);
|
|
1934
|
+
console.log(` ${judgeVerdict.rationale}`);
|
|
1935
|
+
if (judgeVerdict.unmetSurfaces.length)
|
|
1936
|
+
console.log(` Unmet surfaces: ${judgeVerdict.unmetSurfaces.join('; ')}`);
|
|
1937
|
+
}
|
|
1938
|
+
if (freshVerifyVerdict) {
|
|
1939
|
+
console.log(`\nFresh-context verifier (advisory, ${freshVerifyVerdict.modelUsed}): ${freshVerifyVerdict.decision.toUpperCase()} (confidence ${freshVerifyVerdict.confidence.toFixed(2)})`);
|
|
1940
|
+
console.log(` ${freshVerifyVerdict.rationale}`);
|
|
1941
|
+
if (freshVerifyVerdict.unmetSurfaces.length)
|
|
1942
|
+
console.log(` Unmet surfaces: ${freshVerifyVerdict.unmetSurfaces.join('; ')}`);
|
|
1943
|
+
}
|
|
1663
1944
|
if (passed) {
|
|
1664
1945
|
console.log('\nReview passed (no CRITICAL issues)');
|
|
1665
1946
|
console.log('\n Next: scale ship ' + (args['task-id'] ?? '<task-id>') + '\n');
|
|
@@ -1808,6 +2089,15 @@ export const phaseShip = defineCommand({
|
|
|
1808
2089
|
console.error("Warning: Plan completion transition failed:", e.message);
|
|
1809
2090
|
}
|
|
1810
2091
|
}
|
|
2092
|
+
// P0 (Decision C1): soft-map the Spec's verificationSurface at ship time too.
|
|
2093
|
+
const shipSpec = await resolveSpecForTask(store, task);
|
|
2094
|
+
const shipSignals = await gatherVerificationSignals(store, {
|
|
2095
|
+
evidenceIds: payload.verificationEvidenceIds,
|
|
2096
|
+
files: payload.filesInvolved,
|
|
2097
|
+
});
|
|
2098
|
+
const shipSurfaceCoverage = shipSpec?.payload.verificationSurface?.length
|
|
2099
|
+
? computeSurfaceCoverage(shipSpec.payload.verificationSurface, shipSignals)
|
|
2100
|
+
: undefined;
|
|
1811
2101
|
// === WorkflowEngine Integration ===
|
|
1812
2102
|
// Generate HonestDelivery report
|
|
1813
2103
|
if (!args.json) {
|
|
@@ -1844,6 +2134,10 @@ export const phaseShip = defineCommand({
|
|
|
1844
2134
|
unverifiedItems.forEach(item => console.log(` [UNVERIFIED] ${item}`));
|
|
1845
2135
|
console.log('');
|
|
1846
2136
|
}
|
|
2137
|
+
if (shipSurfaceCoverage) {
|
|
2138
|
+
for (const line of formatSurfaceCoverageWarnings(shipSurfaceCoverage))
|
|
2139
|
+
console.log(line);
|
|
2140
|
+
}
|
|
1847
2141
|
}
|
|
1848
2142
|
const result = {
|
|
1849
2143
|
phase: 'SHIP',
|
|
@@ -1863,6 +2157,7 @@ export const phaseShip = defineCommand({
|
|
|
1863
2157
|
blockers: workspaceBoundary.blockers,
|
|
1864
2158
|
warnings: workspaceBoundary.warnings,
|
|
1865
2159
|
} : null,
|
|
2160
|
+
verificationSurfaceCoverage: shipSurfaceCoverage,
|
|
1866
2161
|
};
|
|
1867
2162
|
if (args.json)
|
|
1868
2163
|
console.log(JSON.stringify(result, null, 2));
|