cclaw-cli 0.48.29 → 0.48.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,189 +2,199 @@
2
2
  // TDD — RED → GREEN → REFACTOR cycle (merged test + build)
3
3
  // ---------------------------------------------------------------------------
4
4
  export const TDD = {
5
+ schemaShape: "v2",
5
6
  stage: "tdd",
7
+ complexityTier: "standard",
6
8
  skillFolder: "test-driven-development",
7
9
  skillName: "test-driven-development",
8
10
  skillDescription: "Full TDD cycle: RED (failing tests), GREEN (minimal implementation), REFACTOR (cleanup). One plan slice at a time with strict traceability.",
9
- hardGate: "Do NOT merge, ship, or skip review. Follow RED → GREEN → REFACTOR strictly for each plan slice. Do NOT write implementation code before RED tests exist. Do NOT skip the REFACTOR step.",
10
- ironLaw: "NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST THE RED FAILURE IS THE SPEC.",
11
- purpose: "Implement features through the TDD cycle: write failing tests, make them pass with minimal code, then refactor.",
12
- whenToUse: [
13
- "After plan confirmation",
14
- "For every behavior change in scope",
15
- "Before review stage"
16
- ],
17
- whenNotToUse: [
18
- "Plan approval is still pending WAIT_FOR_CONFIRM",
19
- "The change is docs-only and does not alter behavior",
20
- "The stage intent is review/ship sign-off rather than implementation"
21
- ],
22
- checklist: [
23
- "Select plan slice — pick one task from the plan. Do not batch multiple tasks. Before starting, read `.cclaw/state/ralph-loop.json` (`loopIteration`, `acClosed[]`, `redOpenSlices[]`) so you skip cycles already closed.",
24
- "Map to acceptance criterion — identify the specific spec criterion this test proves.",
25
- "Dispatch mandatory `tdd-red` execution (or `test-author` in TEST_RED_ONLY mode) — produce failing behavior tests and RED evidence only (no production edits). Set `CCLAW_ACTIVE_AGENT=tdd-red` when supported.",
26
- "RED: Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
27
- "Dispatch `tdd-green` execution (or `test-author` in BUILD_GREEN_REFACTOR mode) — minimal implementation + full-suite GREEN. Set `CCLAW_ACTIVE_AGENT=tdd-green` when supported.",
28
- "GREEN: Run full suite execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
29
- "GREEN: Verify no regressions if any existing test breaks, fix the regression before proceeding.",
30
- "Run verification-before-completion discipline for the slice — capture a fresh test command, commit SHA, and explicit PASS/FAIL status before completion claims.",
31
- "REFACTOR: Dispatch `tdd-refactor` execution (or dedicated refactor mode) to improve code quality without behavior changes. Set `CCLAW_ACTIVE_AGENT=tdd-refactor` when supported.",
32
- "Record evidence — capture RED failure, GREEN output, and REFACTOR notes in the TDD artifact. When logging the `green` row via `/cc-ops tdd-log green`, attach the closed acceptance-criterion IDs in `acIds` so Ralph Loop status counts them.",
33
- "Annotate traceability — link to plan task ID and spec criterion.",
34
- "Per-Slice Review (conditional)if `.cclaw/config.yaml::sliceReview.enabled` is true and the slice meets any trigger (touchCount >= filesChangedThreshold, touchPaths match touchTriggers, or highRisk=true), append a `## Per-Slice Review` entry for this slice before moving on (see the dedicated section below).",
35
- "Repeat for each slicereturn to step 1 for the next plan slice."
36
- ],
37
- interactionProtocol: [
38
- "Pick one planned slice at a time.",
39
- "Controller owns orchestration; execution runs through phase-specific delegation (`tdd-red` -> `tdd-green` -> `tdd-refactor`) or equivalent `test-author` modes.",
40
- "Write behavior-focused tests before changing implementation (RED).",
41
- "Capture and store failing output as RED evidence.",
42
- "Apply minimal change to satisfy RED tests (GREEN).",
43
- "Run full suite, not partial checks, for GREEN validation.",
44
- "Before declaring the slice complete, run a fresh verification check and record command + commit SHA + PASS/FAIL.",
45
- "Refactor without changing behavior and document rationale (REFACTOR).",
46
- "Stop if regressions appear and fix before proceeding.",
47
- "If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?",
48
- "**Per-Slice Review checkpoint (conditional, opt-in).** When `.cclaw/config.yaml::sliceReview.enabled` is true, check every slice against the triggers before declaring it DONE. Triggers: `touchCount >= filesChangedThreshold`, any `touchPaths` match a `touchTriggers` glob, or the plan row declares `highRisk: true`. On a trigger, run two passes on the slice alone — (1) Spec-Compliance: trace RED/GREEN/REFACTOR evidence back to its plan task + spec criterion, noting edge cases the tests skip; (2) Quality: diff-scan for naming, error handling, dead code, simpler alternatives. Record both under `## Per-Slice Review` in `06-tdd.md`, naming the trigger that fired. Dispatch the `reviewer` subagent natively when available (log `fulfillmentMode: \"isolated\"`); otherwise fulfil via in-session role switch (`fulfillmentMode: \"role-switch\"`). Never fabricate an isolated pass from memory. Tracks outside `sliceReview.enforceOnTracks` still emit the section; doctor only escalates missed reviews on enforced tracks."
49
- ],
50
- process: [
51
- "Select slice and map to acceptance criterion.",
52
- "Dispatch `tdd-red` (or `test-author` TEST_RED_ONLY mode) and produce failing test(s) for expected reason (RED).",
53
- "Run tests and capture failure output.",
54
- "Dispatch `tdd-green` (or `test-author` BUILD_GREEN_REFACTOR mode) and implement smallest change needed for GREEN.",
55
- "Run full tests and build checks.",
56
- "Run a fresh verification-before-completion check and capture command + commit SHA + PASS/FAIL in guard evidence.",
57
- "Dispatch `tdd-refactor` pass preserving behavior.",
58
- "Record RED, GREEN, and REFACTOR evidence in artifact.",
59
- "Annotate traceability to plan task and spec criterion; on `sliceReview` triggers, append a Per-Slice Review entry before closing the slice."
60
- ],
61
- requiredGates: [
62
- { id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
63
- { id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
64
- { id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
65
- { id: "tdd_verified_before_complete", description: "Fresh verification evidence includes test command, commit SHA, and explicit pass/fail status." },
66
- { id: "tdd_traceable_to_plan", description: "Change traceability to plan slice is explicit." },
67
- { id: "tdd_docs_drift_check", description: "When public API/config/CLI surfaces change, docs drift is addressed via a completed doc-updater pass." }
68
- ],
69
- requiredEvidence: [
70
- "Artifact updated at `.cclaw/artifacts/06-tdd.md` with RED, GREEN, and REFACTOR sections.",
71
- "Failing command output captured (RED).",
72
- "Full test/build output recorded (GREEN).",
73
- "Fresh verification evidence recorded with command, commit SHA, and PASS/FAIL status before completion.",
74
- "Acceptance mapping documented.",
75
- "Failure reason analysis recorded.",
76
- "Refactor rationale captured.",
77
- "Traceability to task identifier is documented."
78
- ],
79
- inputs: ["approved plan slice", "spec acceptance criterion", "test harness configuration", "coding standards and constraints"],
80
- requiredContext: ["plan artifact", "spec artifact", "existing test patterns"],
81
- outputs: ["failing test set", "passing implementation", "refactor evidence", "review-ready change set"],
82
- blockers: [
83
- "tests pass before behavior change (RED failure missing)",
84
- "full suite not green",
85
- "behavior changed during refactor",
86
- "no evidence recorded"
87
- ],
88
- exitCriteria: [
89
- "RED evidence exists and is traceable",
90
- "GREEN evidence captured with full suite pass",
91
- "REFACTOR evidence captured",
92
- "required gates marked satisfied",
93
- "traceability annotated"
94
- ],
95
- commonRationalizations: [
96
- "Writing code before failing test",
97
- "Partial test runs presented as GREEN",
98
- "Skipping evidence capture",
99
- "Undocumented refactor changes",
100
- "No full-suite GREEN evidence",
101
- "Multiple tasks implemented in one pass without justification"
102
- ],
103
- artifactFile: "06-tdd.md",
104
- next: "review",
105
- reviewSections: [
106
- {
107
- title: "RED Evidence Audit",
108
- evaluationPoints: [
109
- "Does every slice have a captured failing test output?",
110
- "Does each failure reason match the expected missing behavior (not a typo or config error)?",
111
- "Were tests written BEFORE any production code for that slice?",
112
- "Does each RED test assert observable behavior, not implementation details?",
113
- "Is there a test for each acceptance criterion mapped in the plan?"
114
- ],
115
- stopGate: true
116
- },
117
- {
118
- title: "GREEN/REFACTOR Audit",
119
- evaluationPoints: [
120
- "Does GREEN evidence show a FULL suite pass (not partial)?",
121
- "Is the GREEN implementation minimal — no features beyond what RED tests require?",
122
- "Does the REFACTOR step preserve all existing behavior (no new failures)?",
123
- "Are REFACTOR notes documented with rationale?",
124
- "Is traceability complete: every change links to plan task ID and spec criterion?"
125
- ],
126
- stopGate: true
127
- },
128
- {
129
- title: "Test Pyramid + Size Audit",
130
- evaluationPoints: [
131
- "Is the tests-added count skewed toward Small (unit) tests, with Medium and Large used only when a real boundary justifies the cost?",
132
- "Does every newly added test declare a size class (Small / Medium / Large) — either inline in the test file or in the TDD artifact table?",
133
- "Are Large tests reserved for genuine end-to-end user journeys (not substitutes for unit coverage)?",
134
- "Has the slice avoided using Medium/Large tests to paper over testability problems that should be fixed at the design layer?"
135
- ],
136
- stopGate: false
137
- },
138
- {
139
- title: "Prove-It Reproduction (bug-fix slices)",
140
- evaluationPoints: [
141
- "Does the artifact identify this slice as a bug fix, and if so, include a reproduction test checked in alongside the fix?",
142
- "Is there captured RED evidence from running the reproduction WITHOUT the fix applied?",
143
- "Is there captured GREEN evidence from the same reproduction AFTER the fix was applied?",
144
- "Is there a note confirming the reproduction test fails again if the fix is reverted (or equivalent evidence that the test is actually pinned to this fix)?"
145
- ],
146
- stopGate: false
147
- },
148
- {
149
- title: "Per-Slice Review Audit (conditional)",
150
- evaluationPoints: [
151
- "When `.cclaw/config.yaml::sliceReview.enabled` is true: does every triggered slice (touchCount >= threshold, touchPaths match, or highRisk=true) carry a Per-Slice Review entry with BOTH a Spec-Compliance pass (plan task <-> spec criterion + edge-case notes) AND a Quality pass (diff-level naming/errors/dead code/simpler alternatives)?",
152
- "Is the delegation `fulfillmentMode` recorded (`isolated` for a dispatched reviewer subagent, `role-switch` for an in-session pass) and does it match an entry in `.cclaw/state/delegation-log.json`?",
153
- "On tracks listed in `sliceReview.enforceOnTracks`, are there zero missed triggered slices (doctor also surfaces this as a warning)?"
154
- ],
155
- stopGate: false
11
+ philosophy: {
12
+ hardGate: "Do NOT merge, ship, or skip review. Follow RED GREEN → REFACTOR strictly for each plan slice. Do NOT write implementation code before RED tests exist. Do NOT skip the REFACTOR step.",
13
+ ironLaw: "NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST THE RED FAILURE IS THE SPEC.",
14
+ purpose: "Implement features through the TDD cycle: write failing tests, make them pass with minimal code, then refactor.",
15
+ whenToUse: [
16
+ "After plan confirmation",
17
+ "For every behavior change in scope",
18
+ "Before review stage"
19
+ ],
20
+ whenNotToUse: [
21
+ "Plan approval is still pending WAIT_FOR_CONFIRM",
22
+ "The change is docs-only and does not alter behavior",
23
+ "The stage intent is review/ship sign-off rather than implementation"
24
+ ],
25
+ commonRationalizations: [
26
+ "Writing code before failing test",
27
+ "Partial test runs presented as GREEN",
28
+ "Skipping evidence capture",
29
+ "Undocumented refactor changes",
30
+ "No full-suite GREEN evidence",
31
+ "Multiple tasks implemented in one pass without justification"
32
+ ]
33
+ },
34
+ executionModel: {
35
+ checklist: [
36
+ "Select plan slicepick one task from the plan. Do not batch multiple tasks. Before starting, read `.cclaw/state/ralph-loop.json` (`loopIteration`, `acClosed[]`, `redOpenSlices[]`) so you skip cycles already closed.",
37
+ "Map to acceptance criterionidentify the specific spec criterion this test proves.",
38
+ "Dispatch mandatory `tdd-red` execution (or `test-author` in TEST_RED_ONLY mode) — produce failing behavior tests and RED evidence only (no production edits). Set `CCLAW_ACTIVE_AGENT=tdd-red` when supported.",
39
+ "RED: Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
40
+ "Dispatch `tdd-green` execution (or `test-author` in BUILD_GREEN_REFACTOR mode) — minimal implementation + full-suite GREEN. Set `CCLAW_ACTIVE_AGENT=tdd-green` when supported.",
41
+ "GREEN: Run full suite execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
42
+ "GREEN: Verify no regressions if any existing test breaks, fix the regression before proceeding.",
43
+ "Run verification-before-completion discipline for the slice — capture a fresh test command, commit SHA, and explicit PASS/FAIL status before completion claims.",
44
+ "REFACTOR: Dispatch `tdd-refactor` execution (or dedicated refactor mode) to improve code quality without behavior changes. Set `CCLAW_ACTIVE_AGENT=tdd-refactor` when supported.",
45
+ "Record evidence capture RED failure, GREEN output, and REFACTOR notes in the TDD artifact. When logging the `green` row via `/cc-ops tdd-log green`, attach the closed acceptance-criterion IDs in `acIds` so Ralph Loop status counts them.",
46
+ "Annotate traceability link to plan task ID and spec criterion.",
47
+ "Per-Slice Review (conditional) if `.cclaw/config.yaml::sliceReview.enabled` is true and the slice meets any trigger (touchCount >= filesChangedThreshold, touchPaths match touchTriggers, or highRisk=true), append a `## Per-Slice Review` entry for this slice before moving on (see the dedicated section below).",
48
+ "Repeat for each slice return to step 1 for the next plan slice."
49
+ ],
50
+ interactionProtocol: [
51
+ "Pick one planned slice at a time.",
52
+ "Controller owns orchestration; execution runs through phase-specific delegation (`tdd-red` -> `tdd-green` -> `tdd-refactor`) or equivalent `test-author` modes.",
53
+ "Write behavior-focused tests before changing implementation (RED).",
54
+ "Capture and store failing output as RED evidence.",
55
+ "Apply minimal change to satisfy RED tests (GREEN).",
56
+ "Run full suite, not partial checks, for GREEN validation.",
57
+ "Before declaring the slice complete, run a fresh verification check and record command + commit SHA + PASS/FAIL.",
58
+ "Refactor without changing behavior and document rationale (REFACTOR).",
59
+ "Stop if regressions appear and fix before proceeding.",
60
+ "If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?",
61
+ "**Per-Slice Review checkpoint (conditional, opt-in).** When `.cclaw/config.yaml::sliceReview.enabled` is true, check every slice against the triggers before declaring it DONE. Triggers: `touchCount >= filesChangedThreshold`, any `touchPaths` match a `touchTriggers` glob, or the plan row declares `highRisk: true`. On a trigger, run two passes on the slice alone — (1) Spec-Compliance: trace RED/GREEN/REFACTOR evidence back to its plan task + spec criterion, noting edge cases the tests skip; (2) Quality: diff-scan for naming, error handling, dead code, simpler alternatives. Record both under `## Per-Slice Review` in `06-tdd.md`, naming the trigger that fired. Dispatch the `reviewer` subagent natively when available (log `fulfillmentMode: \"isolated\"`); otherwise fulfil via in-session role switch (`fulfillmentMode: \"role-switch\"`). Never fabricate an isolated pass from memory. Tracks outside `sliceReview.enforceOnTracks` still emit the section; doctor only escalates missed reviews on enforced tracks."
62
+ ],
63
+ process: [
64
+ "Select slice and map to acceptance criterion.",
65
+ "Dispatch `tdd-red` (or `test-author` TEST_RED_ONLY mode) and produce failing test(s) for expected reason (RED).",
66
+ "Run tests and capture failure output.",
67
+ "Dispatch `tdd-green` (or `test-author` BUILD_GREEN_REFACTOR mode) and implement smallest change needed for GREEN.",
68
+ "Run full tests and build checks.",
69
+ "Run a fresh verification-before-completion check and capture command + commit SHA + PASS/FAIL in guard evidence.",
70
+ "Dispatch `tdd-refactor` pass preserving behavior.",
71
+ "Record RED, GREEN, and REFACTOR evidence in artifact.",
72
+ "Annotate traceability to plan task and spec criterion; on `sliceReview` triggers, append a Per-Slice Review entry before closing the slice."
73
+ ],
74
+ requiredGates: [
75
+ { id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
76
+ { id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
77
+ { id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
78
+ { id: "tdd_verified_before_complete", description: "Fresh verification evidence includes test command, commit SHA, and explicit pass/fail status." },
79
+ { id: "tdd_traceable_to_plan", description: "Change traceability to plan slice is explicit." },
80
+ { id: "tdd_docs_drift_check", description: "When public API/config/CLI surfaces change, docs drift is addressed via a completed doc-updater pass." }
81
+ ],
82
+ requiredEvidence: [
83
+ "Artifact updated at `.cclaw/artifacts/06-tdd.md` with RED, GREEN, and REFACTOR sections.",
84
+ "Failing command output captured (RED).",
85
+ "Full test/build output recorded (GREEN).",
86
+ "Fresh verification evidence recorded with command, commit SHA, and PASS/FAIL status before completion.",
87
+ "Acceptance mapping documented.",
88
+ "Failure reason analysis recorded.",
89
+ "Refactor rationale captured.",
90
+ "Traceability to task identifier is documented."
91
+ ],
92
+ inputs: ["approved plan slice", "spec acceptance criterion", "test harness configuration", "coding standards and constraints"],
93
+ requiredContext: ["plan artifact", "spec artifact", "existing test patterns"],
94
+ blockers: [
95
+ "tests pass before behavior change (RED failure missing)",
96
+ "full suite not green",
97
+ "behavior changed during refactor",
98
+ "no evidence recorded"
99
+ ],
100
+ exitCriteria: [
101
+ "RED evidence exists and is traceable",
102
+ "GREEN evidence captured with full suite pass",
103
+ "REFACTOR evidence captured",
104
+ "required gates marked satisfied",
105
+ "traceability annotated"
106
+ ]
107
+ },
108
+ artifactRules: {
109
+ artifactFile: "06-tdd.md",
110
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
111
+ crossStageTrace: {
112
+ readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/03-design-<slug>.md"],
113
+ writesTo: [".cclaw/artifacts/06-tdd.md"],
114
+ traceabilityRule: "Every RED test traces to a plan task. Every GREEN change traces to a RED test. Every plan task traces to a spec criterion. Design decisions inform test strategy. Evidence chain must be unbroken."
156
115
  },
157
- {
158
- title: "State-over-Interaction + Beyoncé Coverage",
159
- evaluationPoints: [
160
- "Do assertions target observable state (return values, persisted data, HTTP responses, logs) rather than which internal helpers were called?",
161
- "Are mocks/spies used only at true trust boundaries (network, filesystem, time, external services), not for module-internal collaborators?",
162
- "For every public surface touched in this slice (exported API, CLI flag, config key, env var, exit code, schema field) does at least one test observe it?",
163
- "If a bug or review finding revealed an uncovered surface, was a test added alongside the fix, not just the code change?",
164
- "Are interaction-style assertions (e.g. `toHaveBeenCalledWith` without a state assertion) justified by an explicit boundary comment, or flagged for follow-up?"
165
- ],
166
- stopGate: false
167
- }
168
- ],
169
- completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
170
- crossStageTrace: {
171
- readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/03-design-<slug>.md"],
172
- writesTo: [".cclaw/artifacts/06-tdd.md"],
173
- traceabilityRule: "Every RED test traces to a plan task. Every GREEN change traces to a RED test. Every plan task traces to a spec criterion. Design decisions inform test strategy. Evidence chain must be unbroken."
116
+ artifactValidation: [
117
+ { section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
118
+ { section: "Acceptance Mapping", required: false, validationRule: "Each RED test links to a plan task and spec criterion." },
119
+ { section: "Failure Analysis", required: false, validationRule: "Failure reason matches expected missing behavior." },
120
+ { section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
121
+ { section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
122
+ { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." },
123
+ { section: "Verification Ladder", required: true, validationRule: "Per-slice verification tier (static, command, behavioral, human) with evidence captured for the highest tier reached this turn." },
124
+ { section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." },
125
+ { section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
126
+ { section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." },
127
+ { section: "Per-Slice Review", required: false, validationRule: "When `.cclaw/config.yaml::sliceReview.enabled` is true: per triggered slice, a two-part record — Spec-Compliance (slice <-> plan task <-> spec criterion trace plus edge-case notes) and Quality (diff-focused review of naming, error handling, dead code, simpler alternatives). Each entry names the trigger (touchCount, touchPaths glob, or highRisk) and the delegation fulfillmentMode (`isolated` when a reviewer subagent was dispatched natively; `role-switch` when fulfilled in-session). Slices that did not meet any trigger may list `not triggered` instead of a full pass." }
128
+ ]
129
+ },
130
+ reviewLens: {
131
+ outputs: ["failing test set", "passing implementation", "refactor evidence", "review-ready change set"],
132
+ reviewSections: [
133
+ {
134
+ title: "RED Evidence Audit",
135
+ evaluationPoints: [
136
+ "Does every slice have a captured failing test output?",
137
+ "Does each failure reason match the expected missing behavior (not a typo or config error)?",
138
+ "Were tests written BEFORE any production code for that slice?",
139
+ "Does each RED test assert observable behavior, not implementation details?",
140
+ "Is there a test for each acceptance criterion mapped in the plan?"
141
+ ],
142
+ stopGate: true
143
+ },
144
+ {
145
+ title: "GREEN/REFACTOR Audit",
146
+ evaluationPoints: [
147
+ "Does GREEN evidence show a FULL suite pass (not partial)?",
148
+ "Is the GREEN implementation minimal — no features beyond what RED tests require?",
149
+ "Does the REFACTOR step preserve all existing behavior (no new failures)?",
150
+ "Are REFACTOR notes documented with rationale?",
151
+ "Is traceability complete: every change links to plan task ID and spec criterion?"
152
+ ],
153
+ stopGate: true
154
+ },
155
+ {
156
+ title: "Test Pyramid + Size Audit",
157
+ evaluationPoints: [
158
+ "Is the tests-added count skewed toward Small (unit) tests, with Medium and Large used only when a real boundary justifies the cost?",
159
+ "Does every newly added test declare a size class (Small / Medium / Large) — either inline in the test file or in the TDD artifact table?",
160
+ "Are Large tests reserved for genuine end-to-end user journeys (not substitutes for unit coverage)?",
161
+ "Has the slice avoided using Medium/Large tests to paper over testability problems that should be fixed at the design layer?"
162
+ ],
163
+ stopGate: false
164
+ },
165
+ {
166
+ title: "Prove-It Reproduction (bug-fix slices)",
167
+ evaluationPoints: [
168
+ "Does the artifact identify this slice as a bug fix, and if so, include a reproduction test checked in alongside the fix?",
169
+ "Is there captured RED evidence from running the reproduction WITHOUT the fix applied?",
170
+ "Is there captured GREEN evidence from the same reproduction AFTER the fix was applied?",
171
+ "Is there a note confirming the reproduction test fails again if the fix is reverted (or equivalent evidence that the test is actually pinned to this fix)?"
172
+ ],
173
+ stopGate: false
174
+ },
175
+ {
176
+ title: "Per-Slice Review Audit (conditional)",
177
+ evaluationPoints: [
178
+ "When `.cclaw/config.yaml::sliceReview.enabled` is true: does every triggered slice (touchCount >= threshold, touchPaths match, or highRisk=true) carry a Per-Slice Review entry with BOTH a Spec-Compliance pass (plan task <-> spec criterion + edge-case notes) AND a Quality pass (diff-level naming/errors/dead code/simpler alternatives)?",
179
+ "Is the delegation `fulfillmentMode` recorded (`isolated` for a dispatched reviewer subagent, `role-switch` for an in-session pass) and does it match an entry in `.cclaw/state/delegation-log.json`?",
180
+ "On tracks listed in `sliceReview.enforceOnTracks`, are there zero missed triggered slices (doctor also surfaces this as a warning)?"
181
+ ],
182
+ stopGate: false
183
+ },
184
+ {
185
+ title: "State-over-Interaction + Beyoncé Coverage",
186
+ evaluationPoints: [
187
+ "Do assertions target observable state (return values, persisted data, HTTP responses, logs) rather than which internal helpers were called?",
188
+ "Are mocks/spies used only at true trust boundaries (network, filesystem, time, external services), not for module-internal collaborators?",
189
+ "For every public surface touched in this slice (exported API, CLI flag, config key, env var, exit code, schema field) — does at least one test observe it?",
190
+ "If a bug or review finding revealed an uncovered surface, was a test added alongside the fix, not just the code change?",
191
+ "Are interaction-style assertions (e.g. `toHaveBeenCalledWith` without a state assertion) justified by an explicit boundary comment, or flagged for follow-up?"
192
+ ],
193
+ stopGate: false
194
+ }
195
+ ]
174
196
  },
175
- artifactValidation: [
176
- { section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
177
- { section: "Acceptance Mapping", required: false, validationRule: "Each RED test links to a plan task and spec criterion." },
178
- { section: "Failure Analysis", required: false, validationRule: "Failure reason matches expected missing behavior." },
179
- { section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
180
- { section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
181
- { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." },
182
- { section: "Verification Ladder", required: true, validationRule: "Per-slice verification tier (static, command, behavioral, human) with evidence captured for the highest tier reached this turn." },
183
- { section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." },
184
- { section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
185
- { section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." },
186
- { section: "Per-Slice Review", required: false, validationRule: "When `.cclaw/config.yaml::sliceReview.enabled` is true: per triggered slice, a two-part record — Spec-Compliance (slice <-> plan task <-> spec criterion trace plus edge-case notes) and Quality (diff-focused review of naming, error handling, dead code, simpler alternatives). Each entry names the trigger (touchCount, touchPaths glob, or highRisk) and the delegation fulfillmentMode (`isolated` when a reviewer subagent was dispatched natively; `role-switch` when fulfilled in-session). Slices that did not meet any trigger may list `not triggered` instead of a full pass." }
187
- ],
197
+ next: "review",
188
198
  batchExecutionAllowed: true
189
199
  };
190
200
  function quickTrackText(value) {
@@ -201,48 +211,61 @@ function quickTrackText(value) {
201
211
  function tddQuickTrackVariant() {
202
212
  return {
203
213
  ...TDD,
214
+ // Quick track keeps the same stage intent but rewrites plan-centric language to spec-centric language.
204
215
  skillDescription: quickTrackText(TDD.skillDescription),
205
- hardGate: quickTrackText(TDD.hardGate),
206
- checklist: TDD.checklist.map(quickTrackText),
207
- interactionProtocol: TDD.interactionProtocol.map(quickTrackText),
208
- process: TDD.process.map(quickTrackText),
209
- requiredGates: TDD.requiredGates
210
- .filter((gate) => gate.id !== "tdd_traceable_to_plan")
211
- .map((gate) => ({
212
- ...gate,
213
- description: quickTrackText(gate.description)
214
- })),
215
- requiredEvidence: TDD.requiredEvidence.map(quickTrackText),
216
- inputs: TDD.inputs.map(quickTrackText),
217
- requiredContext: ["spec artifact", "existing test patterns"],
218
- reviewSections: TDD.reviewSections.map((section) => ({
219
- ...section,
220
- evaluationPoints: section.evaluationPoints.map(quickTrackText)
221
- })),
222
- crossStageTrace: {
223
- ...TDD.crossStageTrace,
224
- readsFrom: [".cclaw/artifacts/04-spec.md"],
225
- traceabilityRule: "Every RED test traces to an acceptance criterion. Every GREEN change traces to a RED test. Evidence chain must be unbroken."
216
+ philosophy: {
217
+ ...TDD.philosophy,
218
+ hardGate: quickTrackText(TDD.philosophy.hardGate)
226
219
  },
227
- artifactValidation: TDD.artifactValidation.map((row) => {
228
- if (row.section === "Acceptance Mapping") {
229
- return {
230
- ...row,
231
- required: true,
232
- validationRule: "Each RED test links to a spec acceptance criterion ID (for example AC-1)."
233
- };
234
- }
235
- if (row.section === "Traceability") {
220
+ executionModel: {
221
+ ...TDD.executionModel,
222
+ checklist: TDD.executionModel.checklist.map(quickTrackText),
223
+ interactionProtocol: TDD.executionModel.interactionProtocol.map(quickTrackText),
224
+ process: TDD.executionModel.process.map(quickTrackText),
225
+ requiredGates: TDD.executionModel.requiredGates
226
+ .filter((gate) => gate.id !== "tdd_traceable_to_plan")
227
+ .map((gate) => ({
228
+ ...gate,
229
+ description: quickTrackText(gate.description)
230
+ })),
231
+ requiredEvidence: TDD.executionModel.requiredEvidence.map(quickTrackText),
232
+ inputs: TDD.executionModel.inputs.map(quickTrackText),
233
+ requiredContext: ["spec artifact", "existing test patterns"]
234
+ },
235
+ reviewLens: {
236
+ ...TDD.reviewLens,
237
+ reviewSections: TDD.reviewLens.reviewSections.map((section) => ({
238
+ ...section,
239
+ evaluationPoints: section.evaluationPoints.map(quickTrackText)
240
+ }))
241
+ },
242
+ artifactRules: {
243
+ ...TDD.artifactRules,
244
+ crossStageTrace: {
245
+ ...TDD.artifactRules.crossStageTrace,
246
+ readsFrom: [".cclaw/artifacts/04-spec.md"],
247
+ traceabilityRule: "Every RED test traces to an acceptance criterion. Every GREEN change traces to a RED test. Evidence chain must be unbroken."
248
+ },
249
+ artifactValidation: TDD.artifactRules.artifactValidation.map((row) => {
250
+ if (row.section === "Acceptance Mapping") {
251
+ return {
252
+ ...row,
253
+ required: true,
254
+ validationRule: "Each RED test links to a spec acceptance criterion ID (for example AC-1)."
255
+ };
256
+ }
257
+ if (row.section === "Traceability") {
258
+ return {
259
+ ...row,
260
+ validationRule: "Acceptance criterion IDs are linked to RED/GREEN evidence."
261
+ };
262
+ }
236
263
  return {
237
264
  ...row,
238
- validationRule: "Acceptance criterion IDs are linked to RED/GREEN evidence."
265
+ validationRule: quickTrackText(row.validationRule)
239
266
  };
240
- }
241
- return {
242
- ...row,
243
- validationRule: quickTrackText(row.validationRule)
244
- };
245
- })
267
+ })
268
+ }
246
269
  };
247
270
  }
248
271
  export function tddStageForTrack(track) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cclaw-cli",
3
- "version": "0.48.29",
3
+ "version": "0.48.31",
4
4
  "description": "Installer-first flow toolkit for coding agents",
5
5
  "type": "module",
6
6
  "bin": {