cclaw-cli 0.51.23 → 0.51.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +135 -414
  2. package/dist/artifact-linter.js +10 -6
  3. package/dist/config.d.ts +1 -1
  4. package/dist/config.js +28 -3
  5. package/dist/content/core-agents.d.ts +128 -2
  6. package/dist/content/core-agents.js +291 -13
  7. package/dist/content/examples.js +21 -10
  8. package/dist/content/next-command.js +10 -6
  9. package/dist/content/reference-patterns.d.ts +18 -0
  10. package/dist/content/reference-patterns.js +391 -0
  11. package/dist/content/seed-shelf.js +73 -8
  12. package/dist/content/skills.js +39 -34
  13. package/dist/content/stage-common-guidance.js +19 -3
  14. package/dist/content/stage-schema.d.ts +12 -0
  15. package/dist/content/stage-schema.js +224 -24
  16. package/dist/content/stages/_lint-metadata/index.js +3 -2
  17. package/dist/content/stages/brainstorm.js +27 -18
  18. package/dist/content/stages/design.js +27 -18
  19. package/dist/content/stages/review.js +20 -9
  20. package/dist/content/stages/schema-types.d.ts +9 -2
  21. package/dist/content/stages/scope.js +21 -10
  22. package/dist/content/stages/ship.js +3 -2
  23. package/dist/content/stages/tdd.js +18 -13
  24. package/dist/content/start-command.js +3 -2
  25. package/dist/content/status-command.js +9 -4
  26. package/dist/content/subagents.js +336 -38
  27. package/dist/content/templates.js +182 -25
  28. package/dist/delegation.d.ts +2 -0
  29. package/dist/delegation.js +27 -6
  30. package/dist/doctor.js +167 -25
  31. package/dist/flow-state.d.ts +1 -0
  32. package/dist/flow-state.js +1 -0
  33. package/dist/gate-evidence.js +25 -2
  34. package/dist/install.js +72 -8
  35. package/dist/internal/advance-stage.js +179 -26
  36. package/dist/knowledge-store.js +30 -6
  37. package/dist/run-archive.js +11 -0
  38. package/dist/run-persistence.js +35 -10
  39. package/dist/tdd-verification-evidence.d.ts +17 -0
  40. package/dist/tdd-verification-evidence.js +43 -0
  41. package/dist/types.d.ts +10 -0
  42. package/package.json +1 -1
@@ -9,11 +9,11 @@ export const DESIGN = {
9
9
  complexityTier: "standard",
10
10
  skillFolder: "engineering-design-lock",
11
11
  skillName: "engineering-design-lock",
12
- skillDescription: "Engineering lock-in stage. Build a concrete technical spine before spec and planning, with section-by-section interactive review.",
12
+ skillDescription: "Engineering lock stage. Convert the approved scope contract into a buildable architecture with adversarial alternatives, failure/rescue paths, and spec handoff.",
13
13
  philosophy: {
14
14
  hardGate: "Do NOT write implementation code. This stage produces design decisions and architecture documents only. No code changes, no scaffolding, no test files.",
15
15
  ironLaw: "NO DESIGN DECISION WITHOUT A LABELED DIAGRAM, A REJECTED ALTERNATIVE, AND A NAMED FAILURE MODE.",
16
- purpose: "Lock architecture, data flow, failure modes, and test/performance expectations through rigorous interactive review.",
16
+ purpose: "Lock how the scoped slice works: architecture boundary, existing fit, data/state flow, critical path, trust boundaries, failure/rescue behavior, verification, rollout, and spec handoff.",
17
17
  whenToUse: [
18
18
  "After scope agreement approval",
19
19
  "Before writing final spec and execution plan",
@@ -40,14 +40,15 @@ export const DESIGN = {
40
40
  },
41
41
  executionModel: {
42
42
  checklist: [
43
- "Compact design lock — for simple greenfield/product slices, produce a tight but complete design spine: codebase investigation, architecture boundary, one labeled diagram, data flow, failure/rescue table, test/perf expectations, and handoff. Do not run a sprawling workshop when a strong engineering lock fits on one page.",
43
+ "Compact design lock — design does not decide what to build; it decides how the approved scope works. For simple slices, produce a tight lock: upstream handoff, existing fit, architecture boundary, one labeled diagram, data/state flow, critical path, failure/rescue, trust boundaries, test/perf expectations, rollout/rollback, rejected alternative, and spec handoff.",
44
44
  "Trivial-Change Escape Hatch — for <=3 files, no new interfaces, and no cross-module data flow, produce a mini-design (rationale, changed files, one risk) and proceed to spec.",
45
45
  "Tiered Research — for simple/medium work, do compact inline codebase/research synthesis in `Research Fleet Synthesis`; write `.cclaw/artifacts/02a-research.md` and run the full fleet only for deep/high-risk work or when external framework/architecture uncertainty exists.",
46
46
  "Design Doc Check — read upstream artifacts and current design docs; latest superseding doc wins.",
47
47
  "Investigator pass — before design decisions, read blast-radius code and record touched files, responsibilities, reuse candidates, and existing patterns.",
48
- "Scope Challenge + Search Before Building — find existing solutions, minimum change set, and complexity smells before custom architecture.",
49
- "Architecture Review — lock boundaries, one realistic failure scenario per new codepath, and high-risk choices with chosen path, one shadow alternative, switch trigger, and verification evidence; include tier-required diagrams.",
50
- "Review core risk areas — security/threat model, code quality, tests, performance, observability/debuggability, deployment/rollout, and parallelization when modules are independent.",
48
+ "Scope Challenge + Search Before Building — find existing solutions, minimum change set, reference-grade contracts to mirror, and complexity smells before custom architecture.",
49
+ "Architecture Review — lock boundaries, chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, and verification evidence for every high-risk choice; include tier-required diagrams.",
50
+ "Review core risk areas — existing system fit, data/state flow, critical path, security/trust boundaries, tests, performance budget, observability/debuggability, rollout/rollback, rejected alternatives, and spec handoff.",
51
+ "**ADR + pre-mortem contract** — capture ADR-style decision rows (context, decision, alternatives, consequences), run a pre-mortem on likely failures, and map each critical flow to a validating test and diagram anchor before lock.",
51
52
  `Critic pass — run/reconcile adversarial second opinion on architecture, coupling, failure modes, and cheaper alternatives. ${reviewLoopPolicySummary("design")} ${reviewLoopSecondOpinionSummary("design")}`,
52
53
  "Run optional stale-diagram audit only when configured.",
53
54
  "Capture leftovers — seed high-upside deferred ideas, list unresolved decisions with defaults, document distribution for new artifact types, and cross-reference deferred items to scope or unresolved decisions."
@@ -73,7 +74,7 @@ export const DESIGN = {
73
74
  "Run configured stale-diagram audit when enabled.",
74
75
  "Produce required outputs: NOT-in-scope, What-already-exists, tier diagrams, failure table, completion dashboard.",
75
76
  "Plant high-upside deferred ideas when useful and reconcile critic/outside-voice findings.",
76
- "Write design lock artifact for downstream spec/plan."
77
+ "Write design lock artifact for downstream spec/plan with design decisions, rejected alternatives, verification evidence, and exact spec handoff."
77
78
  ],
78
79
  requiredGates: [
79
80
  { id: "design_research_complete", description: "Research is complete: compact inline synthesis by default, or a separate research artifact for deep/high-risk work, and findings are mapped to design decisions." },
@@ -83,7 +84,7 @@ export const DESIGN = {
83
84
  { id: "design_test_and_perf_defined", description: "Test strategy and performance budget are defined." }
84
85
  ],
85
86
  requiredEvidence: [
86
- "Research Fleet Synthesis is filled in `03-design.md`; for deep/high-risk work, `.cclaw/artifacts/02a-research.md` is also written with stack/features/architecture/pitfalls sections plus synthesis.",
87
+ "Research Fleet Synthesis is filled in `.cclaw/artifacts/03-design-<slug>.md`; for deep/high-risk work, `.cclaw/artifacts/02a-research.md` is also written with stack/features/architecture/pitfalls sections plus synthesis.",
87
88
  "Artifact written to `.cclaw/artifacts/03-design-<slug>.md`.",
88
89
  "Failure-mode table exists in Method/Exception/Rescue/UserSees format.",
89
90
  "Tier-required diagram markers are present: architecture (all tiers). Standard/Deep add-ons (shadow/error) and Deep add-ons (state-machine/rollback/deployment-sequence) are included only when risk warrants them.",
@@ -93,6 +94,10 @@ export const DESIGN = {
93
94
  "Outside-voice findings and dispositions are recorded (accept/reject/defer).",
94
95
  `Spec review loop summary includes iteration count and quality score trajectory per ${reviewLoopPolicySummary("design")}`,
95
96
  reviewLoopSecondOpinionSummary("design"),
97
+ "Adversarial lock table includes chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, and verification evidence, with reference-grade contracts for mirrored patterns when applicable.",
98
+ "Architecture Decision Record (ADR) section captures context, decision, alternatives, consequences, and reversal trigger for major choices.",
99
+ "Pre-mortem section lists top failure scenarios, early signals, mitigations, and owner before implementation begins.",
100
+ "Test-Diagram Mapping links critical flows to both validating tests and diagram anchors.",
96
101
  "Test strategy includes unit/integration/e2e expectations.",
97
102
  "When a high-upside idea is deferred, a seed file is created under `.cclaw/seeds/` and referenced in the artifact.",
98
103
  "NOT-in-scope section produced.",
@@ -144,30 +149,33 @@ export const DESIGN = {
144
149
  artifactValidation: [
145
150
  { section: "Upstream Handoff", required: false, validationRule: "Summarizes scope/research decisions, constraints, open questions, and explicit drift before design choices." },
146
151
  { section: "Research Fleet Synthesis", required: true, validationRule: "Must summarize the tiered lenses actually run and map findings to concrete design decisions. Default may be compact inline synthesis; full separate research pack is Deep/high-risk only." },
147
- { section: "Codebase Investigation", required: false, validationRule: "Investigator pass: list blast-radius files with current responsibilities, discovered patterns, and reuse candidates." },
152
+ { section: "Codebase Investigation", required: false, validationRule: "Investigator pass: list blast-radius files with current responsibilities, discovered patterns, reuse candidates, and existing system fit." },
153
+ { section: "Engineering Lock", required: true, validationRule: "Canonical lock: chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, verification evidence, critical path, rollout/rollback, and confidence." },
154
+ { section: "Architecture Decision Record (ADR)", required: false, validationRule: "Recommended: rows for context, decision, alternatives, consequences, and reversal trigger for each major architecture choice." },
148
155
  { section: "Search Before Building", required: false, validationRule: "For each technical choice: Layer 1 (exact match), Layer 2 (partial match), Layer 3 (inspiration), EUREKA labels with reuse-first default." },
149
156
  { section: "Architecture Boundaries", required: true, validationRule: "Must list component boundaries with ownership." },
150
157
  { section: "Architecture Diagram", required: true, validationRule: "Must include `<!-- diagram: architecture -->` marker. Diagram must label concrete nodes, label arrows, mark direction, distinguish sync/async edges, and include at least one failure/degraded edge." },
151
- { section: "Data-Flow Shadow Paths", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: data-flow-shadow-paths -->` marker plus a table for high-risk choices: chosen path, shadow alternative, switch trigger, fallback/degrade behavior, and verification evidence." },
158
+ { section: "Data-Flow Shadow Paths", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: data-flow-shadow-paths -->` marker plus a table for high-risk choices: chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, and verification evidence." },
152
159
  { section: "Error Flow Diagram", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: error-flow -->` marker and failure-detection -> rescue -> user-visible outcome flow." },
153
- { section: "State Machine Diagram", required: false, validationRule: "Deep add-on: include `<!-- diagram: state-machine -->` marker and state transitions for critical flow lifecycle." },
154
- { section: "Rollback Flowchart", required: false, validationRule: "Deep add-on: include `<!-- diagram: rollback-flowchart -->` marker with trigger -> rollback actions -> verification." },
155
- { section: "Deployment Sequence Diagram", required: false, validationRule: "Deep add-on: include `<!-- diagram: deployment-sequence -->` marker with rollout order and guard checks." },
156
- { section: "Data Flow", required: false, validationRule: "Must include happy path, nil input, empty input, upstream error paths, plus Interaction Edge Case matrix rows for: double-click, nav-away-mid-request, 10K-result dataset, background-job abandonment, zombie connection. Each row must declare handled yes/no and deferred item when not handled." },
160
+ { section: "Data Flow", required: false, validationRule: "Must include data/state flow, happy path, nil input, empty input, upstream error paths, plus Interaction Edge Case matrix rows for double-click, nav-away-mid-request, 10K-result dataset, background-job abandonment, zombie connection. Each row declares handled yes/no and deferred item when not handled." },
157
161
  { section: "Stale Diagram Audit", required: false, validationRule: "When `.cclaw/config.yaml::optInAudits.staleDiagramAudit` is true: blast-radius files from Codebase Investigation must not be newer than the current design diagram-marker baseline unless explicitly refreshed." },
158
162
  { section: "Failure Mode Table", required: true, validationRule: "Use Method/Exception/Rescue/UserSees columns and treat silent user impact without rescue as critical." },
163
+ { section: "Pre-mortem", required: false, validationRule: "Recommended: list top failure scenarios, early warning signal, mitigation owner, and containment action before implementation." },
159
164
  { section: "Security & Threat Model", required: true, validationRule: "Must list trust boundaries, abuse/failure scenarios, mitigations, and residual risks." },
160
165
  { section: "Test Strategy", required: false, validationRule: "Must define unit/integration/e2e expectations with coverage targets." },
166
+ { section: "Test-Diagram Mapping", required: false, validationRule: "Recommended: map each critical flow to at least one validating test ID and one diagram marker/anchor." },
167
+ { section: "Test Strategy", required: false, validationRule: "Must define unit/integration/e2e expectations with coverage targets." },
161
168
  { section: "Performance Budget", required: false, validationRule: "For each critical path: metric name, target threshold, and measurement method." },
162
169
  { section: "Observability & Debuggability", required: true, validationRule: "Must define logs/metrics/traces plus alerting/debug path for critical failure modes." },
163
- { section: "Deployment & Rollout", required: true, validationRule: "Must define migration/flag strategy, rollback plan, and post-deploy verification steps." },
170
+ { section: "Deployment & Rollout", required: true, validationRule: "Must define migration/flag strategy, rollout/rollback plan, switch trigger, and post-deploy verification steps." },
164
171
  { section: "What Already Exists", required: false, validationRule: "For each sub-problem: existing code/library found (Layer 1-3/EUREKA label), reuse decision, and adaptation needed." },
172
+ { section: "Reference-Grade Contracts", required: false, validationRule: "For every mirrored pattern: source, reusable invariant, local adaptation, rejection boundary, and verification signal. Omit with `None - no external or in-repo pattern mirrored` for compact local changes." },
173
+ { section: "Rejected Alternatives", required: false, validationRule: "List alternatives considered, why rejected, and what signal would revive them." },
174
+ { section: "Design Decisions", required: false, validationRule: "Stable design decisions with requirement/locked-decision refs and downstream spec impact." },
175
+ { section: "Spec Handoff", required: true, validationRule: "Exact requirements, design decisions, risks, test/perf expectations, and unresolved questions that spec must carry forward." },
165
176
  { section: "Outside Voice Findings", required: false, validationRule: "Critic pass: list adversarial findings and disposition (accept/reject/defer) with rationale per material finding." },
166
177
  { section: "Design Outside Voice Loop", required: false, validationRule: `Record iteration table with quality score per iteration, stop reason, and unresolved concerns. Enforce ${reviewLoopPolicySummary("design")}` },
167
178
  { section: "NOT in scope", required: false, validationRule: "Work considered and explicitly deferred with one-line rationale." },
168
- { section: "Parallelization Strategy", required: false, validationRule: "Standard/Deep add-on when multi-module: dependency table, parallel lanes, conflict flags." },
169
- { section: "Interface Contracts", required: false, validationRule: "Standard/Deep add-on when module boundaries or APIs change: producers, consumers, and payload/interface expectations." },
170
- { section: "Unresolved Decisions", required: false, validationRule: "Standard/Deep add-on if any: what info is missing, who provides it, default if unanswered." },
171
179
  { section: "Completion Dashboard", required: true, validationRule: "Lists every review section with status (clear / issues-found-resolved / issues-open), critical/open gap counts, decision count, and unresolved items (or 'None')." }
172
180
  ],
173
181
  trivialOverrideSections: ["Architecture Boundaries", "NOT in scope", "Completion Dashboard"]
@@ -180,6 +188,7 @@ export const DESIGN = {
180
188
  "test and performance baseline",
181
189
  "NOT-in-scope section",
182
190
  "What-already-exists section",
191
+ "design decisions and spec handoff",
183
192
  "design completion dashboard"
184
193
  ],
185
194
  reviewLoop: {
@@ -38,14 +38,16 @@ export const REVIEW = {
38
38
  "Load upstream evidence — read TDD artifact (RED + GREEN + REFACTOR), spec, and the active track's upstream source items.",
39
39
  "Run traceability matrix when the active track enforces it; otherwise confirm spec acceptance/reproduction slices are covered directly.",
40
40
  "Layer 1: Spec Compliance — check every acceptance criterion against implementation. Verdict: pass/fail per criterion.",
41
- "Layer 2: Integrated findings one structured pass tagged by category: correctness, security, performance, architecture, external-safety.",
42
- "Security sweepmandatory dedicated security-reviewer pass across diff + touched modules. A zero-finding pass must include `NO_CHANGE_ATTESTATION` with rationale.",
41
+ "Review Evidence Scoperecord base/head, files inspected, changed-file coverage, diagnostics run, dependency/version audit when relevant, and any files intentionally not inspected with explicit reason.",
42
+ "Layer 2: Integrated findings one structured pass tagged by category: correctness, security, performance, architecture, external-safety. Every finding uses file:line; if impossible, include an explicit no-line reason.",
43
+ "Security sweep — mandatory dedicated security-reviewer pass across diff + touched modules. A zero-finding pass must include `NO_CHANGE_ATTESTATION` or `NO_SECURITY_IMPACT` with rationale and inspected surfaces.",
43
44
  "Incoming Feedback Intake — when human reviewer comments, bot findings, or CI annotations exist, keep a per-comment disposition queue and mirror outcomes into `07-review.md` + `07-review-army.json` before final verdict.",
44
45
  "Structured Review reconciliation — normalize findings into `07-review-army.json`, dedup by fingerprint, and mark multi-specialist confirmations when multiple lenses agree.",
45
- "Meta-Review — Were tests actually run? Do test names match what they test? Are there real assertions?",
46
+ "Meta-Review — Were tests/diagnostics actually run? Do test names match what they test? Are there real assertions? Is the dependency/version surface unchanged or audited?",
46
47
  "Classify findings — Critical (blocks ship), Important (should fix), Suggestion (optional improvement).",
48
+ "Victory Detector — before verdict, confirm Layer 1, Layer 2, security sweep, structured findings, trace evidence, and unresolved-critical status are complete; otherwise iterate findings or route back to TDD.",
47
49
  "Produce verdict — APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.",
48
- "If verdict is BLOCKED, emit remediation route token `ROUTE_BACK_TO_TDD`, include `cclaw internal rewind tdd \"review_blocked_by_critical\"` with the blocking finding IDs, and satisfy the special transition guard `review_verdict_blocked` instead of `review_criticals_resolved`."
50
+ "If verdict is BLOCKED, emit remediation route token `ROUTE_BACK_TO_TDD`, include the managed command `cclaw internal rewind tdd \"review_blocked_by_critical <finding-ids>\"`, and satisfy the special transition guard `review_verdict_blocked` instead of `review_criticals_resolved`. After TDD rework, clear the stale marker with `cclaw internal rewind --ack tdd` before `/cc-next`."
49
51
  ],
50
52
  interactionProtocol: [
51
53
  "Run Layer 1 (spec compliance) completely before starting Layer 2.",
@@ -53,7 +55,7 @@ export const REVIEW = {
53
55
  "Classify every finding as Critical, Important, or Suggestion.",
54
56
  decisionProtocolInstruction("each Critical finding", "present resolution options (A/B/C) with trade-offs, and mark one as (recommended)", "recommend the option that fully closes the finding with no carry-over risk and the smallest blast radius", STRUCTURED_ASK_TOOL_LIST_REVIEW),
55
57
  "Resolve all critical blockers before ship. If verdict is BLOCKED, do not pass `review_criticals_resolved`; pass only the remediation route gate `review_verdict_blocked` when routing back to TDD.",
56
- "When verdict is BLOCKED, do not end with a passive stop: explicitly route remediation to TDD via `ROUTE_BACK_TO_TDD` and point to `cclaw internal rewind tdd` with the blocking IDs.",
58
+ "When verdict is BLOCKED, do not end with a passive stop: explicitly route remediation to TDD via `ROUTE_BACK_TO_TDD`, point to `cclaw internal rewind tdd` with the blocking IDs, and tell the operator to ack the stale TDD marker only after rework is complete.",
57
59
  structuredAskSingleChoiceInstruction("final verdict", "verdict (APPROVED / APPROVED_WITH_CONCERNS / BLOCKED)"),
58
60
  "**STOP.** Do NOT proceed to ship until the user provides an explicit verdict."
59
61
  ],
@@ -64,7 +66,7 @@ export const REVIEW = {
64
66
  "Reconcile structured findings into `.cclaw/artifacts/07-review-army.json` (dedup + confidence + conflict notes + source tags from spec/correctness/security/performance/architecture/external-safety passes).",
65
67
  "Classify and prioritize all findings.",
66
68
  "Write review report artifact with explicit verdict.",
67
- "If verdict is BLOCKED, include the remediation route token `ROUTE_BACK_TO_TDD` and the rewind command payload."
69
+ "If verdict is BLOCKED, include the remediation route token `ROUTE_BACK_TO_TDD`, the managed rewind command payload, and the follow-up ack command after TDD rework."
68
70
  ],
69
71
  requiredGates: [
70
72
  { id: "review_layer1_spec_compliance", description: "Spec compliance check completed with per-criterion verdict." },
@@ -79,11 +81,16 @@ export const REVIEW = {
79
81
  "Artifact written to `.cclaw/artifacts/07-review-army.json`.",
80
82
  "Traceability matrix run recorded (no orphaned source items or tests for enforced tracks).",
81
83
  "Layer 1 verdict captured with per-criterion pass/fail.",
84
+ "Review Evidence Scope lists files inspected, changed-file coverage, diagnostics run, and omitted files with explicit reason.",
82
85
  "Layer 2 sections completed across correctness, security, performance, architecture, and external-safety findings.",
86
+ "Every finding cites `file:line`, or an explicit no-line reason is recorded.",
87
+ "No-finding attestation is explicit when no issues are found.",
88
+ "Dependency/version audit is recorded when manifests, lockfiles, generated clients, CI, runtime config, or external APIs are relevant.",
83
89
  "Severity log includes critical/important/suggestion buckets.",
90
+ "Victory Detector recorded: Layer 1, Layer 2, security sweep, structured findings, trace evidence, and unresolved-critical status are complete, or BLOCKED route is explicit.",
84
91
  "Explicit final verdict: APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.",
85
92
  "Fresh verification command discovery recorded, and the command cited in `review_trace_matrix_clean` evidence before ship handoff.",
86
- "If BLOCKED: include explicit remediation route (`ROUTE_BACK_TO_TDD`) with blocking finding IDs."
93
+ "If BLOCKED: include explicit remediation route (`ROUTE_BACK_TO_TDD`) with blocking finding IDs, managed rewind command, and post-rework ack instruction."
87
94
  ],
88
95
  inputs: ["implementation diff", "upstream artifacts", "test/build evidence"],
89
96
  requiredContext: ["spec criteria", "tdd artifact", "rulebook constraints"],
@@ -114,10 +121,14 @@ export const REVIEW = {
114
121
  },
115
122
  artifactValidation: [
116
123
  { section: "Upstream Handoff", required: false, validationRule: "Summarizes spec/plan/tdd decisions, constraints, open questions, and explicit drift before review verdicts." },
124
+ { section: "Review Evidence Scope", required: true, validationRule: "Base/head, files inspected, changed-file coverage, diagnostics run, omitted files with reason, and reviewer/security-reviewer delegation evidence." },
125
+ { section: "Changed-File Coverage", required: true, validationRule: "Each changed file is covered, intentionally omitted with no-impact reason, or linked to a broader inspected module." },
117
126
  { section: "Layer 1 Verdict", required: true, validationRule: "Per-criterion pass/fail with references." },
118
- { section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, description, and resolution status across correctness, security, performance, architecture, and external-safety. Security coverage must include either explicit security findings or `NO_CHANGE_ATTESTATION: <reason>` when no security-relevant changes were found." },
127
+ { section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, category, file:line or explicit no-line reason, description, and resolution status across correctness/security/performance/architecture/external-safety. If there are no findings, include a no-finding attestation." },
128
+ { section: "Security Sweep Attestation", required: false, validationRule: "Dedicated security-reviewer result: findings or `NO_CHANGE_ATTESTATION` / `NO_SECURITY_IMPACT` with inspected surfaces and rationale." },
129
+ { section: "Dependency & Version Audit", required: false, validationRule: "Required when manifests, lockfiles, generated clients, CI, runtime config, or external APIs changed; otherwise record no-impact rationale." },
119
130
  { section: "Review Findings Contract", required: true, validationRule: "Structured findings in 07-review-army.json include id/severity/confidence/fingerprint/reportedBy/status and source tags from {spec, correctness, security, performance, architecture, external-safety} with dedup reconciliation summary." },
120
- { section: "Review Readiness Snapshot", required: false, validationRule: "Optional compact summary: completed checks, delegation-log status, staleness signal, open critical blockers, and ship recommendation." },
131
+ { section: "Review Readiness Snapshot", required: false, validationRule: "Optional compact summary: completed checks, delegation-log status, staleness signal, open critical blockers, ship recommendation, and Victory Detector pass/fail." },
121
132
  { section: "Completeness Snapshot", required: false, validationRule: "Optional compact coverage summary for AC coverage, source item coverage, test-slice coverage, and adversarial-review status when triggered." },
122
133
  { section: "Incoming Feedback Queue", required: false, validationRule: "When external review feedback exists, include a queue summary with per-item disposition (resolved / accepted-risk / rejected-with-evidence) and evidence refs." },
123
134
  { section: "Trace Matrix Check", required: false, validationRule: "Records source-item/test orphan counts (all zero on enforced tracks) with command output reference." },
@@ -20,8 +20,11 @@ export interface ArtifactValidation {
20
20
  tier?: "required" | "recommended";
21
21
  validationRule: string;
22
22
  }
23
+ export type StageSubagentName = "researcher" | "architect" | "spec-validator" | "slice-implementer" | "performance-reviewer" | "compatibility-reviewer" | "observability-reviewer" | "release-reviewer" | "planner" | "product-manager" | "critic" | "reviewer" | "security-reviewer" | "test-author" | "doc-updater" | "implementer" | "fixer";
24
+ export type StageSubagentDispatchClass = "stage-specialist" | "worker" | "review-lens";
25
+ export type StageSubagentReturnSchema = "planning-return" | "product-return" | "critic-return" | "review-return" | "security-return" | "tdd-return" | "docs-return" | "worker-return" | "fixer-return" | "research-return" | "architecture-return" | "spec-validation-return" | "performance-return" | "compatibility-return" | "observability-return" | "release-return";
23
26
  export interface StageAutoSubagentDispatch {
24
- agent: "planner" | "reviewer" | "security-reviewer" | "test-author" | "doc-updater";
27
+ agent: StageSubagentName;
25
28
  /**
26
29
  * - `mandatory` — must be dispatched (or explicitly waived) before stage transition.
27
30
  * - `proactive` — should be dispatched automatically when context matches `when`.
@@ -29,12 +32,16 @@ export interface StageAutoSubagentDispatch {
29
32
  mode: "mandatory" | "proactive";
30
33
  /**
31
34
  * Minimum complexity tier where this dispatch policy applies.
32
- * Defaults to `standard` for mandatory dispatches when omitted.
35
+ * Defaults to `standard` for mandatory/proactive dispatches when omitted.
33
36
  */
34
37
  requiredAtTier?: StageComplexityTier;
35
38
  when: string;
36
39
  purpose: string;
37
40
  requiresUserGate: boolean;
41
+ /** Role category used by generated routing tables and lifecycle checks. */
42
+ dispatchClass?: StageSubagentDispatchClass;
43
+ /** Strict status/evidence contract the dispatched agent must return. */
44
+ returnSchema?: StageSubagentReturnSchema;
38
45
  /** Optional skill folder the dispatched agent should load as additional context. */
39
46
  skill?: string;
40
47
  }
@@ -9,7 +9,7 @@ export const SCOPE = {
9
9
  complexityTier: "standard",
10
10
  skillFolder: "scope-shaping",
11
11
  skillName: "scope-shaping",
12
- skillDescription: "Strategic scope stage. Challenge premise and lock explicit in-scope/out-of-scope boundaries using CEO-level thinking.",
12
+ skillDescription: "Strategic contract stage. Select HOLD/SELECTIVE/EXPAND/REDUCE mode, lock the slice and boundaries, and hand stable discretion zones to design.",
13
13
  philosophy: {
14
14
  hardGate: "Do NOT begin architecture, design, or code. This stage produces scope decisions only. Do not silently add or remove scope — every change is an explicit user opt-in.",
15
15
  ironLaw: "EVERY SCOPE CHANGE IS AN EXPLICIT USER OPT-IN — NEVER A SILENT ENLARGEMENT OR TRIM.",
@@ -45,19 +45,20 @@ export const SCOPE = {
45
45
  },
46
46
  executionModel: {
47
47
  checklist: [
48
- "**Scope contract first** — read brainstorm, name the job-to-be-done, draft the explicit in-scope/out-of-scope/deferred contract, select one mode, and write the rationale. This is the default path; use dream/10-star/temporal/deep strategy sections only when risk, novelty, or user ambition justifies them.",
48
+ "**Scope contract first** — read brainstorm handoff, name upstream decisions used, explicit drift, confidence, unresolved questions, and next-stage risk hints; draft the in-scope/out-of-scope/deferred/discretion contract before any design choice.",
49
49
  "**Premise and leverage check** — answer in the artifact: *Right problem? Direct path? What if nothing? Where can we leverage existing code? What is the reversibility cost?* Take a position; do not hedge.",
50
50
  "**Conditional 10-star boundary** — for deep/high-risk/product-strategy work, show what would make the product meaningfully better, then explicitly choose what ships now, what is deferred, and what is excluded without vague `later/for now` placeholders. Skip this for straightforward repair work and record `not needed: compact scope`.",
51
- "**Pick one of four gstack modes with the user** — SCOPE EXPANSION, SELECTIVE EXPANSION, HOLD SCOPE, or SCOPE REDUCTION. Recommend one, state why and what signal would change it, then STOP for the user's mode/scope approval before writing the final artifact.",
52
- "**Run mode-specific analysis only to needed depth** — ordinary path is a selected-mode row plus rationale tied to the scope contract. For deep/high-risk work, expand the analysis to match the chosen mode: SCOPE EXPANSION enumerates 10x opportunities + delight features; SELECTIVE EXPANSION lists baseline + cherry-picked additions; HOLD SCOPE proves rigor on the current slice; SCOPE REDUCTION names the smallest useful wedge.",
51
+ "**Pick one operational mode with the user** — HOLD SCOPE preserves focus; SELECTIVE EXPANSION cherry-picks high-leverage reference ideas; SCOPE EXPANSION explores ambitious alternatives; SCOPE REDUCTION cuts to the essential wedge. Recommend one, state why and what signal would change it, then STOP for approval.",
52
+ "**Run mode-specific analysis only to needed depth** — lite keeps the selected-mode row compact; standard adds requirements/locked decisions/discretion; deep may add Landscape Check, Taste Calibration, Reference Pattern Registry, Reference Pull, Ambitious Alternatives, and Ruthless Minimum Slice evidence when mode/risk warrants it.",
53
+ "**Decision-driver contract** — list weighted decision drivers (value, risk, reversibility, effort, timeline) and score candidate scope moves so the selected mode and boundaries are evidence-backed, not preference-led.",
53
54
  "**Compare implementation alternatives** — include minimum viable, product-grade, and ideal architecture options with effort (S/M/L/XL), risk (Low/Med/High), pros, cons, and reuses. Recommend one and tie it to mode.",
54
55
  "**Run outside voice before final approval** — for simple/low-risk scope, record one concise adversarial self-check row; for complex/high-risk/configured scope, iterate until threshold. Record the loop summary in `## Scope Outside Voice Loop`, but do not treat it as user approval.",
55
56
  "**Ask only one decision-changing question** — if the user rejects the contract but is unsure, offer 3-4 concrete scope moves instead of open-ended interrogation.",
56
- "**Write the scope contract after approval** — include in-scope/out-of-scope, discretion areas, deferred items, locked decisions, error/rescue notes, completion dashboard, scope summary (with canonical mode token + next-stage handoff), and explicit approval evidence."
57
+ "**Write the scope contract after approval** — include selected mode, in scope, out of scope, requirements, locked decisions, discretion areas, deferred ideas, accepted/rejected reference ideas, success definition, design handoff, completion dashboard, and explicit approval evidence."
57
58
  ],
58
59
  interactionProtocol: [
59
60
  decisionProtocolInstruction("scope mode selection", "present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended)", "recommend the option that best covers the prime-directive failure modes, four data-flow paths, observability, and deferred handling for the in-scope set with the smallest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce"),
60
- "Do not walk the full checklist by default. Lead with a proposed scope contract and the one decision that matters most; label the mode as recommended, not selected, until the user answers.",
61
+ "Do not walk the full checklist by default. Lead with a proposed scope contract, selected depth (`lite`/`standard`/`deep`), and the one decision that matters most; label the mode as recommended, not selected, until the user answers.",
61
62
  "For simple web-app flows, default to HOLD SCOPE or SELECTIVE EXPANSION, show the exact in/out/deferred contract as a proposal, and STOP for one explicit approval before writing the final scope artifact or completing the stage.",
62
63
  "Challenge premise first, take a firm position, and name one concrete condition that would change it.",
63
64
  "Push back on weak framing: vague scope needs a specific user/problem, platform vision needs a narrow wedge, social proof needs behavioral evidence.",
@@ -86,7 +87,10 @@ export const SCOPE = {
86
87
  "When `.cclaw/config.yaml::optInAudits.scopePreAudit` is true, Pre-Scope System Audit findings are captured (git log/diff/stash/debt markers).",
87
88
  "In-scope and out-of-scope lists are explicit.",
88
89
  "Discretion areas are explicit (or marked as `None`).",
89
- "Selected mode and rationale are documented.",
90
+ "Selected mode and rationale are documented using HOLD SCOPE, SELECTIVE EXPANSION, SCOPE EXPANSION, or SCOPE REDUCTION.",
91
+ "Scope Contract captures requirements, locked decisions, discretion areas, accepted/rejected/deferred reference ideas from the Reference Pattern Registry, success definition, and design handoff.",
92
+ "Decision Drivers section records weighted criteria and per-option scores used to choose mode and boundary moves.",
93
+ "Scope Completeness Score is recorded (0.00-1.00) with the explicit blocker list for any remaining uncertainty.",
90
94
  "Locked Decisions section lists stable LD#hash anchors for non-negotiable boundaries.",
91
95
  "Premise challenge findings documented.",
92
96
  "Outside Voice findings and dispositions are recorded (accept/reject/defer with rationale) before final approval.",
@@ -140,8 +144,15 @@ export const SCOPE = {
140
144
  { section: "Pre-Scope System Audit", required: false, validationRule: "When `.cclaw/config.yaml::optInAudits.scopePreAudit` is true: must capture git log -30, git diff --stat, git stash list, and debt-marker scan (TODO/FIXME/XXX/HACK) before premise challenge." },
141
145
  { section: "Prime Directives", required: false, validationRule: "For each scoped capability: named failure modes, explicit error surface, four data-flow paths, interaction edge cases, observability expectations, and deferred-item handling." },
142
146
  { section: "Premise Challenge", required: false, validationRule: "Must list at least 3 question/answer rows in a markdown table or bullet list (gstack default trio: right problem? direct path? what if we do nothing? — extend with leverage and reversibility for richer scope). The linter enforces structure, not English wording — answers may be in any language." },
143
- { section: "Landscape Check", required: false, validationRule: "When mode is EXPAND/SELECTIVE, include at least one external reference insight and its impact on scope." },
144
- { section: "Taste Calibration", required: false, validationRule: "Must reference 2-3 strong in-repo modules/files that define the quality bar or explicitly justify omission." },
147
+ { section: "Scope Contract", required: true, validationRule: "Canonical contract: selected mode, in scope, out of scope, requirements, locked decisions, discretion areas, deferred ideas, accepted/rejected reference ideas, success definition, and design handoff." },
148
+ { section: "Decision Drivers", required: false, validationRule: "Recommended: weighted decision drivers (value, risk, reversibility, effort, timeline) with scored options and the selected boundary rationale." },
149
+ { section: "Scope Completeness Score", required: false, validationRule: "Recommended: score 0.00-1.00 plus unresolved blockers and the escalation trigger when confidence is low." },
150
+ { section: "Landscape Check", required: false, validationRule: "Optional evidence heading for EXPAND/SELECTIVE/deep modes: include reference insight and impact on scope, or omit for compact HOLD SCOPE." },
151
+ { section: "Taste Calibration", required: false, validationRule: "Optional evidence heading: reference 2-3 strong in-repo modules/files that define the quality bar or justify omission." },
152
+ { section: "Reference Pattern Registry", required: false, validationRule: "Recommended for SELECTIVE/EXPAND/deep scope: table of pattern/source, accepted/rejected/deferred disposition, invariant to preserve, and boundary impact. Compact HOLD SCOPE may state `Not needed - compact scope`." },
153
+ { section: "Reference Pull", required: false, validationRule: "Optional evidence heading: cite ideas pulled from `/Users/zuevrs/Downloads/references` or state no reference pull was needed for compact HOLD SCOPE." },
154
+ { section: "Ambitious Alternatives", required: false, validationRule: "Optional evidence heading for SCOPE EXPANSION/SELECTIVE: list larger alternatives considered and their disposition." },
155
+ { section: "Ruthless Minimum Slice", required: false, validationRule: "Optional evidence heading for SCOPE REDUCTION or high-risk scope: define the smallest useful wedge and what it proves." },
145
156
  { section: "Requirements", required: false, validationRule: "Table of stable requirement IDs (R1, R2, R3…) one per row with observable outcome, priority, and source. IDs are assigned once and never renumbered across scope/design/spec/plan/review; dropped requirements stay with Priority `DROPPED`." },
146
157
  { section: "Locked Decisions (LD#hash)", required: false, validationRule: "List of stable locked decisions with unique `LD#<sha8>` anchors. Each anchor is derived from the normalized Decision cell and is referenced downstream for cross-stage traceability." },
147
158
  { section: "Implementation Alternatives", required: false, validationRule: "2-3 options with Name, Summary, Effort, Risk, Pros, Cons, and Reuses. Must include minimal viable and ideal architecture options." },
@@ -154,7 +165,7 @@ export const SCOPE = {
154
165
  { section: "Outside Voice Findings", required: false, validationRule: "Must list external/adversarial findings and disposition (accept/reject/defer) with rationale." },
155
166
  { section: "Scope Outside Voice Loop", required: false, validationRule: `Must record iterations, quality score per iteration, stop reason, and unresolved concerns. Enforce ${reviewLoopPolicySummary("scope")}` },
156
167
  { section: "Completion Dashboard", required: true, validationRule: "Lists per-review-section status, count of critical/open gaps, resolved decisions, and unresolved decisions (or 'None')." },
157
- { section: "Scope Summary", required: true, validationRule: "Compact recap of the locked scope. Must name the selected mode using one of the canonical tokens (`SCOPE EXPANSION`, `SELECTIVE EXPANSION`, `HOLD SCOPE`, `SCOPE REDUCTION`) and record the track-aware next-stage handoff (`design` for standard, `spec` for medium); the linter checks structure, not English wording." },
168
+ { section: "Scope Summary", required: true, validationRule: "Compact recap of the locked scope. Must name the selected mode using one canonical token, confidence, explicit drift from brainstorm, unresolved questions, and the track-aware next-stage handoff (`design` for standard, `spec` for medium); the linter checks structure, not English wording." },
158
169
  { section: "Dream State Mapping", required: false, validationRule: "Deep/optional only: CURRENT STATE, THIS PLAN, 12-MONTH IDEAL, and alignment verdict. Omit for compact scope." },
159
170
  { section: "Temporal Interrogation", required: false, validationRule: "Deep/optional only: timeline simulation table with decision pressures and lock-now vs defer verdicts. Omit for compact scope." }
160
171
  ]
@@ -49,6 +49,7 @@ export const SHIP = {
49
49
  "Load utility skills — `verification-before-completion` for fresh evidence and `finishing-a-development-branch` for finalization workflow.",
50
50
  "Monitoring checklist — what should be watched after deploy? Error rates, latency, key business metrics. If no monitoring exists, flag it as a risk.",
51
51
  "Detect repository mode — if `.git/` is absent or inaccessible, lock finalization choices to FINALIZE_NO_VCS only and document manual handoff + rollback.",
52
+ "Victory Detector — valid review verdict, fresh preflight, rollback trigger/steps, exactly one finalization enum, and execution target are present; if any field is stale or missing, keep status BLOCKED and iterate.",
52
53
  "Select finalization mode — exactly ONE enum: (A) FINALIZE_MERGE_LOCAL, (B) FINALIZE_OPEN_PR, (C) FINALIZE_KEEP_BRANCH, (D) FINALIZE_DISCARD_BRANCH, (E) FINALIZE_NO_VCS. For discard: list what will be deleted, require typed confirmation.",
53
54
  "Execute finalization — perform the selected action. For merge: verify clean merge. For PR: include structured body (summary, test plan, rollback). For discard: verify deletion. For NO_VCS: record handoff target, artifact bundle path, and manual rollback owner.",
54
55
  "Branch cleanup — after merge/discard, remove only branches or temporary files the user explicitly approved. Skip for FINALIZE_NO_VCS."
@@ -79,7 +80,7 @@ export const SHIP = {
79
80
  "Release notes section is complete.",
80
81
  "Rollback section includes trigger conditions, steps, and verification.",
81
82
  "Finalization section shows exactly one selected enum token.",
82
- "Execution result documented."
83
+ "Victory Detector result documented: review verdict valid, preflight fresh, rollback ready, finalization enum selected, and execution result present."
83
84
  ],
84
85
  inputs: ["review verdict", "test/build outputs", "release context"],
85
86
  requiredContext: ["review artifact", "changelog scope", "deployment constraints"],
@@ -116,7 +117,7 @@ export const SHIP = {
116
117
  { section: "Rollback Plan", required: true, validationRule: "Trigger conditions, rollback steps (exact commands), verification steps." },
117
118
  { section: "Monitoring", required: false, validationRule: "If applicable: what metrics/logs to watch post-deploy. Risk note if no monitoring." },
118
119
  { section: "Finalization", required: true, validationRule: "Exactly one finalization enum token selected (FINALIZE_MERGE_LOCAL | FINALIZE_OPEN_PR | FINALIZE_KEEP_BRANCH | FINALIZE_DISCARD_BRANCH | FINALIZE_NO_VCS). Execution result documented. Worktree cleaned if applicable." },
119
- { section: "Completion Status", required: false, validationRule: "If present: exactly one of SHIPPED, SHIPPED_WITH_EXCEPTIONS, BLOCKED. Exceptions documented when applicable." },
120
+ { section: "Completion Status", required: false, validationRule: "If present: exactly one of SHIPPED, SHIPPED_WITH_EXCEPTIONS, BLOCKED. Exceptions documented when applicable. BLOCKED is required when the Victory Detector has stale or missing evidence." },
120
121
  { section: "Compound Step", required: false, validationRule: "Optional retrospective: include overlap assessment before appending duplicate knowledge; distinguish bug-track fixes/tests from knowledge-track process/project guidance; use supersedes/superseded_by only for clear refreshes; or include an explicit 'No compound insight this run.' line." }
121
122
  ]
122
123
  },
@@ -8,7 +8,7 @@ export const TDD = {
8
8
  complexityTier: "standard",
9
9
  skillFolder: "test-driven-development",
10
10
  skillName: "test-driven-development",
11
- skillDescription: "Full TDD cycle: discover existing tests and system impact, then RED (failing tests), GREEN (minimal implementation), REFACTOR (cleanup). One plan slice at a time with strict traceability.",
11
+ skillDescription: "Full vertical-slice TDD cycle: discover existing tests and system impact, then RED (failing tests), GREEN (minimal implementation), REFACTOR (cleanup). One source item at a time with strict traceability.",
12
12
  philosophy: {
13
13
  hardGate: "Do NOT merge, ship, or skip review. Follow RED → GREEN → REFACTOR strictly for each plan slice. Do NOT write implementation code before RED tests exist. Do NOT write RED tests before discovering relevant existing tests and impacted contracts. Do NOT skip the REFACTOR step.",
14
14
  ironLaw: "NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST — THE RED FAILURE IS THE SPEC.",
@@ -37,17 +37,18 @@ export const TDD = {
37
37
  },
38
38
  executionModel: {
39
39
  checklist: [
40
- "Select plan slice — pick one task from the plan. Do not batch multiple tasks. Before starting, read `.cclaw/state/ralph-loop.json` (`loopIteration`, `acClosed[]`, `redOpenSlices[]`) so you skip cycles already closed. If `redOpenSlices[]` is non-empty, repair or explicitly park those slices before opening a new RED.",
40
+ "Select vertical slice — pick one source item from the active track (plan task on standard/medium, spec AC or bug reproduction slice on quick). Do not batch multiple tasks. Before starting, read `.cclaw/state/ralph-loop.json` (`loopIteration`, `acClosed[]`, `redOpenSlices[]`) so you skip cycles already closed. If `redOpenSlices[]` is non-empty, repair or explicitly park those slices before opening a new RED.",
41
41
  "Map to acceptance criterion — identify the specific spec criterion this test proves.",
42
42
  "Discover the test surface — inspect existing tests, fixtures, helpers, test commands, and nearby assertions before authoring RED. Reuse the local test style unless the slice genuinely needs a new pattern.",
43
43
  "Run a system-wide impact check — name callbacks, state transitions, interfaces, schemas, CLI/config/API contracts, persistence, or event boundaries that this slice can affect. Add RED coverage for each affected public contract or record why it is out of scope.",
44
+ "Source/test preflight — before production edits, classify planned paths using `.cclaw/config.yaml::tdd.testPathPatterns` and `tdd.productionPathPatterns` when present; verify the RED touches a test path and the GREEN touches only source paths needed for the failing behavior.",
44
45
  "Set execution posture — record whether this slice is sequential, batch-safe, or blocked; when the existing git workflow permits small commits, checkpoint after RED, GREEN, and REFACTOR (or record why commits are deferred).",
45
46
  "Use the mandatory `test-author` delegation for RED — after discovery and impact check, produce failing behavior tests and RED evidence only (no production edits). Set `CCLAW_ACTIVE_AGENT=tdd-red` when the harness supports phase labels.",
46
47
  "RED: Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
47
48
  "Continue the same `test-author` delegation intent for GREEN — minimal implementation plus full-suite GREEN evidence. Set `CCLAW_ACTIVE_AGENT=tdd-green` when the harness supports phase labels.",
48
49
  "GREEN: Run full suite — execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
49
50
  "GREEN: Verify no regressions — if any existing test breaks, fix the regression before proceeding.",
50
- "Run verification-before-completion discipline for the slice — capture a fresh test command, commit SHA, and explicit PASS/FAIL status before completion claims.",
51
+ "Run verification-before-completion discipline for the slice — capture a fresh test command, explicit PASS/FAIL status, and a config-aware ref (commit SHA when VCS is present/required, or no-vcs attestation when allowed).",
51
52
  "REFACTOR: continue the `test-author` evidence cycle (or a dedicated refactor mode when available) to improve code quality without behavior changes. Set `CCLAW_ACTIVE_AGENT=tdd-refactor` when the harness supports phase labels.",
52
53
  "Record evidence — capture test discovery, system-wide impact check, RED failure, GREEN output, and REFACTOR notes in the TDD artifact. When logging a `green` row, attach the closed acceptance-criterion IDs in `acIds` so Ralph Loop status counts them.",
53
54
  "Annotate traceability — link to the active track's source: plan task ID + spec criterion on standard/medium, or spec acceptance item / bug reproduction slice on quick.",
@@ -55,15 +56,16 @@ export const TDD = {
55
56
  "Repeat for each slice — return to step 1 for the next plan slice."
56
57
  ],
57
58
  interactionProtocol: [
58
- "Pick one planned slice at a time.",
59
+ "Pick one vertical slice at a time: source item, RED test, GREEN implementation, REFACTOR, and verification evidence move together.",
59
60
  "Controller owns orchestration; one mandatory `test-author` delegation carries phase-specific RED -> GREEN -> REFACTOR evidence instead of spawning separate workers by default.",
60
61
  "Before writing RED tests, discover relevant existing tests and commands so the new test extends the suite instead of fighting it.",
61
62
  "Before implementation, perform a system-wide impact check across callbacks, state, interfaces, schemas, and external contracts touched by the slice.",
63
+ "Run source/test preflight using configured TDD path patterns where feasible; if path classification is impossible (generated files, non-file side effect), record why.",
62
64
  "Write behavior-focused tests before changing implementation (RED).",
63
65
  "Capture and store failing output as RED evidence.",
64
66
  "Apply minimal change to satisfy RED tests (GREEN).",
65
67
  "Run full suite, not partial checks, for GREEN validation.",
66
- "Before declaring the slice complete, run a fresh verification check and record command + commit SHA + PASS/FAIL.",
68
+ "Before declaring the slice complete, run a fresh verification check and record command + PASS/FAIL plus commit SHA or no-vcs/config override evidence.",
67
69
  "Refactor without changing behavior and document rationale (REFACTOR).",
68
70
  "Use incremental RED/GREEN/REFACTOR commits when the repository workflow and working tree make that appropriate; otherwise record the checkpoint boundaries in the artifact.",
69
71
  "Stop if regressions appear and fix before proceeding.",
@@ -71,7 +73,7 @@ export const TDD = {
71
73
  "**Per-Slice Review point (conditional, opt-in).** When `.cclaw/config.yaml::sliceReview.enabled` is true, check every slice against the triggers before declaring it DONE. Triggers: `touchCount >= filesChangedThreshold`, any `touchPaths` match a `touchTriggers` glob, or the plan row declares `highRisk: true`. On a trigger, run two passes on the slice alone — (1) Spec-Compliance: trace RED/GREEN/REFACTOR evidence back to its plan task + spec criterion, noting edge cases the tests skip; (2) Quality: diff-scan for naming, error handling, dead code, simpler alternatives. Record both under `## Per-Slice Review` in `06-tdd.md`, naming the trigger that fired. Dispatch the `reviewer` subagent natively when available (log `fulfillmentMode: \"isolated\"`); otherwise fulfil via in-session role switch (`fulfillmentMode: \"role-switch\"`). Never fabricate an isolated pass from memory. Tracks outside `sliceReview.enforceOnTracks` still emit the section; doctor only escalates missed reviews on enforced tracks."
72
74
  ],
73
75
  process: [
74
- "Select slice and map to acceptance criterion.",
76
+ "Select one vertical slice and map it to acceptance criterion(s).",
75
77
  "Discover existing tests, fixtures, helpers, and exact test commands for the affected area.",
76
78
  "Check system-wide impact across callbacks, state transitions, interfaces, schemas, and external contracts.",
77
79
  "Record execution posture and checkpoint plan for RED/GREEN/REFACTOR commits or deferred commits.",
@@ -79,7 +81,7 @@ export const TDD = {
79
81
  "Run tests and capture failure output.",
80
82
  "Use `test-author` in GREEN intent and implement the smallest change needed for GREEN.",
81
83
  "Run full tests and build checks.",
82
- "Run a fresh verification-before-completion check and capture command + commit SHA + PASS/FAIL in guard evidence.",
84
+ "Run a fresh verification-before-completion check and capture command + PASS/FAIL plus a commit SHA when VCS is present; for `vcs: none`, record explicit no-vcs reason plus content/artifact hash unless `tdd.verificationRef: disabled` is configured.",
83
85
  "Run the REFACTOR intent preserving behavior.",
84
86
  "Record RED, GREEN, and REFACTOR evidence in artifact.",
85
87
  "Annotate traceability to plan task and spec criterion; on `sliceReview` triggers, append a Per-Slice Review entry before closing the slice."
@@ -90,7 +92,7 @@ export const TDD = {
90
92
  { id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
91
93
  { id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
92
94
  { id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
93
- { id: "tdd_verified_before_complete", description: "Fresh verification evidence includes test command, commit SHA, and explicit pass/fail status." },
95
+ { id: "tdd_verified_before_complete", description: "Fresh verification evidence includes test command, explicit pass/fail status, and a config-aware ref: commit SHA when VCS is present/required or an explicit no-VCS attestation when allowed." },
94
96
  { id: "tdd_traceable_to_plan", description: "Change traceability to plan slice is explicit." },
95
97
  { id: "tdd_docs_drift_check", description: "When public API/config/CLI surfaces change, docs drift is addressed via a completed doc-updater pass." }
96
98
  ],
@@ -98,10 +100,10 @@ export const TDD = {
98
100
  "Artifact updated at `.cclaw/artifacts/06-tdd.md` with Test Discovery, System-Wide Impact Check, RED, GREEN, and REFACTOR sections.",
99
101
  "Relevant existing test files, helpers, fixtures, and exact commands identified before RED.",
100
102
  "Callbacks, state transitions, interfaces, schemas, and contracts checked for impact before implementation.",
101
- "Execution posture and RED/GREEN/REFACTOR checkpoint plan recorded, including commit boundaries when the repo workflow supports them.",
103
+ "Execution posture and vertical-slice RED/GREEN/REFACTOR checkpoint plan recorded, including commit boundaries when the repo workflow supports them.",
102
104
  "Failing command output captured (RED).",
103
105
  "Full test/build output recorded (GREEN).",
104
- "Fresh verification evidence recorded with command, commit SHA, and PASS/FAIL status before completion.",
106
+ "Fresh verification evidence recorded with command, PASS/FAIL status, and config-aware commit SHA or no-VCS reason plus content/artifact hash before completion.",
105
107
  "Acceptance mapping documented.",
106
108
  "Failure reason analysis recorded.",
107
109
  "Refactor rationale captured.",
@@ -115,7 +117,9 @@ export const TDD = {
115
117
  "tests pass before behavior change (RED failure missing)",
116
118
  "full suite not green",
117
119
  "behavior changed during refactor",
118
- "no evidence recorded"
120
+ "no evidence recorded",
121
+ "RED/GREEN blocked — classify with the managed taxonomy `NO_SOURCE_CONTEXT`, `NO_TEST_SURFACE`, `NO_IMPLEMENTABLE_SLICE`, `RED_NOT_EXPRESSIBLE`, or `NO_VCS_MODE` and capture blockedBecause, missingInputs, recommendedRoute, nextCommand, and resumeCriteria.",
122
+ "no-VCS workspace without explicit `vcs: none`, no-vcs reason, content/artifact hash, or `tdd.verificationRef: disabled`"
119
123
  ],
120
124
  exitCriteria: [
121
125
  "test discovery and system-wide impact check are recorded",
@@ -144,14 +148,15 @@ export const TDD = {
144
148
  { section: "Upstream Handoff", required: false, validationRule: "Summarizes plan/spec/design decisions, constraints, open questions, and explicit drift before RED work." },
145
149
  { section: "Test Discovery", required: true, validationRule: "Before RED: lists existing tests, fixtures/helpers, exact commands, and the chosen local pattern to extend." },
146
150
  { section: "System-Wide Impact Check", required: true, validationRule: "Before implementation: names affected callbacks, state transitions, interfaces, schemas, public APIs/config/CLI, persistence, or event contracts, with coverage or explicit out-of-scope notes." },
147
- { section: "Execution Posture", required: false, validationRule: "Records sequential/batch/blocked posture and RED/GREEN/REFACTOR checkpoint plan, including incremental commit boundaries when consistent with the repository git workflow." },
151
+ { section: "Execution Posture", required: false, validationRule: "Records sequential/batch/blocked posture and vertical-slice RED/GREEN/REFACTOR checkpoint plan, including incremental commit boundaries when consistent with the repository git workflow." },
148
152
  { section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
149
153
  { section: "Acceptance Mapping", required: false, validationRule: "Each RED test links to a plan task and spec criterion." },
150
154
  { section: "Failure Analysis", required: false, validationRule: "Failure reason matches expected missing behavior." },
151
155
  { section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
152
156
  { section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
153
157
  { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." },
154
- { section: "Verification Ladder", required: true, validationRule: "Per-slice verification tier (static, command, behavioral, human) with evidence captured for the highest tier reached this turn." },
158
+ { section: "Verification Ladder", required: true, validationRule: "Per-slice verification tier (static, command, behavioral, human) with evidence captured for the highest tier reached this turn. Must include command + PASS/FAIL + commit SHA when VCS is present, or explicit no-vcs reason plus content/artifact hash/config override." },
159
+ { section: "TDD Blocker Taxonomy", required: false, validationRule: "When blocked, classify as NO_SOURCE_CONTEXT, NO_TEST_SURFACE, NO_IMPLEMENTABLE_SLICE, RED_NOT_EXPRESSIBLE, or NO_VCS_MODE; include blockedBecause, missingInputs, recommendedRoute, nextCommand, and resumeCriteria." },
155
160
  { section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." },
156
161
  { section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
157
162
  { section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." },
@@ -81,7 +81,8 @@ ${conversationLanguagePolicyMarkdown()}
81
81
  - **standard** (full 8 stages — default fallback) — anything that introduces a new capability with architecture uncertainty, touches many modules, or has unclear scope.
82
82
  Triggers: \`new feature\`, \`refactor\`, \`migration\`, \`platform\`, \`architecture\`, \`schema\`, \`integrate\`, \`workflow\`, \`onboarding\`, or any prompt that does not match quick/medium confidently.
83
83
  - When triggers conflict, prefer **standard** over **medium**, and **medium** over **quick**.
84
- 8. Present one compact **Start framing** summary: class, recommended track, stack, origin docs, seed recalls, and the recommended next action. Ask a single confirmation question only when there is a destructive reset, a real contradiction, or ambiguous software/non-software classification.
84
+ - Report **track selection confidence** as high/medium/low with the matched trigger or fallback reason. Be explicit that this recommendation is advisory until the user accepts and the managed helper writes state; after that, \`/cc-next\` follows the configured track.
85
+ 8. Present one compact **Start framing** summary: class, recommended track, track selection confidence, stack, origin docs, seed recalls, and the recommended next action. Ask a single confirmation question only when there is a destructive reset, a real contradiction, or ambiguous software/non-software classification.
85
86
  9. Present the recommendation as a single decision with explicit options:
86
87
  > \`Recommended track: <quick|medium|standard>\` because \`<one-line reason citing matched triggers>\`.
87
88
  > Override? (A) keep \`<recommended>\` (B) switch track (C) cancel.
@@ -183,7 +184,7 @@ ${conversationLanguagePolicyMarkdown()}
183
184
  | \`standard\` | \`new feature\`, \`refactor\`, \`migration\`, \`platform\`, \`architecture\`, \`schema\`, \`integrate\`, \`workflow\`, \`onboarding\` (or no confident quick/medium match) | New or uncertain multi-module work |
184
185
 
185
186
  - On conflict, prefer \`standard\` over \`medium\`, and \`medium\` over \`quick\`.
186
- - Always state the recommendation as a one-line reason citing matched triggers.
187
+ - Always state the recommendation as a one-line reason citing matched triggers and a high/medium/low track selection confidence. Clarify that the heuristic is advisory until the managed helper writes state; after that, \`/cc-next\` follows the selected track.
187
188
  8. Run the managed start helper: \`node .cclaw/hooks/start-flow.mjs --track=<quick|medium|standard> --class=<class> --prompt=<prompt> --stack=<stack> --reason=<matched heuristic>\`. The helper writes \`${flowPath}\`, computes \`skippedStages\`, resets the gate catalog, and writes \`${RUNTIME_ROOT}/artifacts/00-idea.md\`. If it fails, STOP and report the exact command/output; do not manually edit flow state.
188
189
  9. Load and execute the **first stage skill of the chosen track** (\`brainstorming\` for medium/standard, \`specification-authoring\` for quick) plus its matching command file.
189
190
 
@@ -71,15 +71,20 @@ a read-only command.
71
71
  - harness row
72
72
  - stale stage row
73
73
  11. Suggest the next action:
74
- - If current stage has unmet gates \`/cc-next\` to resume.
75
- - If closeout substate is non-idle \`/cc-next\` to continue the chain.
76
- - If current stage is complete \`/cc-next\` to advance (or report "Flow complete" if terminal).
74
+ - If current stage has unmet gates -> \`/cc-next\` to resume.
75
+ - If a mandatory delegation is missing evidence -> dispatch the worker/reviewer or waive with rationale; do not advance silently.
76
+ - If a TDD blocker taxonomy code is present (\`NO_SOURCE_CONTEXT\`, \`NO_TEST_SURFACE\`, \`NO_IMPLEMENTABLE_SLICE\`, \`RED_NOT_EXPRESSIBLE\`, \`NO_VCS_MODE\`) -> name the blocker and the rewind/config route.
77
+ - If review is blocked by critical findings -> show \`cclaw internal rewind tdd "review_blocked_by_critical <finding-ids>"\` plus the later \`cclaw internal rewind --ack tdd\`.
78
+ - If closeout substate is non-idle -> \`/cc-next\` to continue the chain.
79
+ - If current stage is complete -> \`/cc-next\` to advance (or report "Flow complete" if terminal).
77
80
 
78
81
  ## Output Guidelines
79
82
 
80
83
  - Keep output compact (≤ 40 lines) — status, not narrative.
81
84
  - Start with the same operator rows as \`/cc-next\` when possible:
82
- \`Stage\`, \`Gates\`, \`Delegations\`, \`Blockers\`, \`Next\`.
85
+ \`Current\`, \`Stage\`, \`Gates\`, \`Delegations\`, \`Blocked by\`, \`Next\`, \`Evidence needed\`.
86
+ - When blocked, include a plain-English action block:
87
+ \`Current: <stage or closeout substate>\`; \`Blocked by: <gate/delegation/blocker code>\`; \`Next: <exact command or managed remediation>\`; \`Evidence needed: <artifact/test/review/delegation evidence>\`.
83
88
  - Report counts, not full artifact contents.
84
89
  - If any data source is missing or corrupt, say so explicitly rather than guessing.
85
90
  - Include \`/cc-view tree\` for deep structure and \`/cc-view diff\` for before/after map in the final line.