cclaw-cli 0.48.28 → 0.48.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/artifact-linter.js +16 -5
- package/dist/artifact-paths.d.ts +28 -0
- package/dist/artifact-paths.js +261 -0
- package/dist/content/stage-schema.js +26 -11
- package/dist/content/stages/brainstorm.js +5 -5
- package/dist/content/stages/design.js +5 -5
- package/dist/content/stages/plan.js +153 -143
- package/dist/content/stages/review.js +212 -202
- package/dist/content/stages/scope.js +4 -4
- package/dist/content/stages/ship.js +132 -122
- package/dist/content/stages/spec.js +131 -121
- package/dist/content/stages/tdd.d.ts +2 -2
- package/dist/content/stages/tdd.js +237 -214
- package/dist/gate-evidence.js +7 -17
- package/dist/internal/advance-stage.js +9 -3
- package/package.json +1 -1
|
@@ -2,189 +2,199 @@
|
|
|
2
2
|
// TDD — RED → GREEN → REFACTOR cycle (merged test + build)
|
|
3
3
|
// ---------------------------------------------------------------------------
|
|
4
4
|
export const TDD = {
|
|
5
|
+
schemaShape: "v2",
|
|
5
6
|
stage: "tdd",
|
|
7
|
+
complexityTier: "standard",
|
|
6
8
|
skillFolder: "test-driven-development",
|
|
7
9
|
skillName: "test-driven-development",
|
|
8
10
|
skillDescription: "Full TDD cycle: RED (failing tests), GREEN (minimal implementation), REFACTOR (cleanup). One plan slice at a time with strict traceability.",
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
"
|
|
91
|
-
"
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
"Is there a test for each acceptance criterion mapped in the plan?"
|
|
114
|
-
],
|
|
115
|
-
stopGate: true
|
|
116
|
-
},
|
|
117
|
-
{
|
|
118
|
-
title: "GREEN/REFACTOR Audit",
|
|
119
|
-
evaluationPoints: [
|
|
120
|
-
"Does GREEN evidence show a FULL suite pass (not partial)?",
|
|
121
|
-
"Is the GREEN implementation minimal — no features beyond what RED tests require?",
|
|
122
|
-
"Does the REFACTOR step preserve all existing behavior (no new failures)?",
|
|
123
|
-
"Are REFACTOR notes documented with rationale?",
|
|
124
|
-
"Is traceability complete: every change links to plan task ID and spec criterion?"
|
|
125
|
-
],
|
|
126
|
-
stopGate: true
|
|
127
|
-
},
|
|
128
|
-
{
|
|
129
|
-
title: "Test Pyramid + Size Audit",
|
|
130
|
-
evaluationPoints: [
|
|
131
|
-
"Is the tests-added count skewed toward Small (unit) tests, with Medium and Large used only when a real boundary justifies the cost?",
|
|
132
|
-
"Does every newly added test declare a size class (Small / Medium / Large) — either inline in the test file or in the TDD artifact table?",
|
|
133
|
-
"Are Large tests reserved for genuine end-to-end user journeys (not substitutes for unit coverage)?",
|
|
134
|
-
"Has the slice avoided using Medium/Large tests to paper over testability problems that should be fixed at the design layer?"
|
|
135
|
-
],
|
|
136
|
-
stopGate: false
|
|
137
|
-
},
|
|
138
|
-
{
|
|
139
|
-
title: "Prove-It Reproduction (bug-fix slices)",
|
|
140
|
-
evaluationPoints: [
|
|
141
|
-
"Does the artifact identify this slice as a bug fix, and if so, include a reproduction test checked in alongside the fix?",
|
|
142
|
-
"Is there captured RED evidence from running the reproduction WITHOUT the fix applied?",
|
|
143
|
-
"Is there captured GREEN evidence from the same reproduction AFTER the fix was applied?",
|
|
144
|
-
"Is there a note confirming the reproduction test fails again if the fix is reverted (or equivalent evidence that the test is actually pinned to this fix)?"
|
|
145
|
-
],
|
|
146
|
-
stopGate: false
|
|
147
|
-
},
|
|
148
|
-
{
|
|
149
|
-
title: "Per-Slice Review Audit (conditional)",
|
|
150
|
-
evaluationPoints: [
|
|
151
|
-
"When `.cclaw/config.yaml::sliceReview.enabled` is true: does every triggered slice (touchCount >= threshold, touchPaths match, or highRisk=true) carry a Per-Slice Review entry with BOTH a Spec-Compliance pass (plan task <-> spec criterion + edge-case notes) AND a Quality pass (diff-level naming/errors/dead code/simpler alternatives)?",
|
|
152
|
-
"Is the delegation `fulfillmentMode` recorded (`isolated` for a dispatched reviewer subagent, `role-switch` for an in-session pass) and does it match an entry in `.cclaw/state/delegation-log.json`?",
|
|
153
|
-
"On tracks listed in `sliceReview.enforceOnTracks`, are there zero missed triggered slices (doctor also surfaces this as a warning)?"
|
|
154
|
-
],
|
|
155
|
-
stopGate: false
|
|
11
|
+
philosophy: {
|
|
12
|
+
hardGate: "Do NOT merge, ship, or skip review. Follow RED → GREEN → REFACTOR strictly for each plan slice. Do NOT write implementation code before RED tests exist. Do NOT skip the REFACTOR step.",
|
|
13
|
+
ironLaw: "NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST — THE RED FAILURE IS THE SPEC.",
|
|
14
|
+
purpose: "Implement features through the TDD cycle: write failing tests, make them pass with minimal code, then refactor.",
|
|
15
|
+
whenToUse: [
|
|
16
|
+
"After plan confirmation",
|
|
17
|
+
"For every behavior change in scope",
|
|
18
|
+
"Before review stage"
|
|
19
|
+
],
|
|
20
|
+
whenNotToUse: [
|
|
21
|
+
"Plan approval is still pending WAIT_FOR_CONFIRM",
|
|
22
|
+
"The change is docs-only and does not alter behavior",
|
|
23
|
+
"The stage intent is review/ship sign-off rather than implementation"
|
|
24
|
+
],
|
|
25
|
+
commonRationalizations: [
|
|
26
|
+
"Writing code before failing test",
|
|
27
|
+
"Partial test runs presented as GREEN",
|
|
28
|
+
"Skipping evidence capture",
|
|
29
|
+
"Undocumented refactor changes",
|
|
30
|
+
"No full-suite GREEN evidence",
|
|
31
|
+
"Multiple tasks implemented in one pass without justification"
|
|
32
|
+
]
|
|
33
|
+
},
|
|
34
|
+
executionModel: {
|
|
35
|
+
checklist: [
|
|
36
|
+
"Select plan slice — pick one task from the plan. Do not batch multiple tasks. Before starting, read `.cclaw/state/ralph-loop.json` (`loopIteration`, `acClosed[]`, `redOpenSlices[]`) so you skip cycles already closed.",
|
|
37
|
+
"Map to acceptance criterion — identify the specific spec criterion this test proves.",
|
|
38
|
+
"Dispatch mandatory `tdd-red` execution (or `test-author` in TEST_RED_ONLY mode) — produce failing behavior tests and RED evidence only (no production edits). Set `CCLAW_ACTIVE_AGENT=tdd-red` when supported.",
|
|
39
|
+
"RED: Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
|
|
40
|
+
"Dispatch `tdd-green` execution (or `test-author` in BUILD_GREEN_REFACTOR mode) — minimal implementation + full-suite GREEN. Set `CCLAW_ACTIVE_AGENT=tdd-green` when supported.",
|
|
41
|
+
"GREEN: Run full suite — execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
|
|
42
|
+
"GREEN: Verify no regressions — if any existing test breaks, fix the regression before proceeding.",
|
|
43
|
+
"Run verification-before-completion discipline for the slice — capture a fresh test command, commit SHA, and explicit PASS/FAIL status before completion claims.",
|
|
44
|
+
"REFACTOR: Dispatch `tdd-refactor` execution (or dedicated refactor mode) to improve code quality without behavior changes. Set `CCLAW_ACTIVE_AGENT=tdd-refactor` when supported.",
|
|
45
|
+
"Record evidence — capture RED failure, GREEN output, and REFACTOR notes in the TDD artifact. When logging the `green` row via `/cc-ops tdd-log green`, attach the closed acceptance-criterion IDs in `acIds` so Ralph Loop status counts them.",
|
|
46
|
+
"Annotate traceability — link to plan task ID and spec criterion.",
|
|
47
|
+
"Per-Slice Review (conditional) — if `.cclaw/config.yaml::sliceReview.enabled` is true and the slice meets any trigger (touchCount >= filesChangedThreshold, touchPaths match touchTriggers, or highRisk=true), append a `## Per-Slice Review` entry for this slice before moving on (see the dedicated section below).",
|
|
48
|
+
"Repeat for each slice — return to step 1 for the next plan slice."
|
|
49
|
+
],
|
|
50
|
+
interactionProtocol: [
|
|
51
|
+
"Pick one planned slice at a time.",
|
|
52
|
+
"Controller owns orchestration; execution runs through phase-specific delegation (`tdd-red` -> `tdd-green` -> `tdd-refactor`) or equivalent `test-author` modes.",
|
|
53
|
+
"Write behavior-focused tests before changing implementation (RED).",
|
|
54
|
+
"Capture and store failing output as RED evidence.",
|
|
55
|
+
"Apply minimal change to satisfy RED tests (GREEN).",
|
|
56
|
+
"Run full suite, not partial checks, for GREEN validation.",
|
|
57
|
+
"Before declaring the slice complete, run a fresh verification check and record command + commit SHA + PASS/FAIL.",
|
|
58
|
+
"Refactor without changing behavior and document rationale (REFACTOR).",
|
|
59
|
+
"Stop if regressions appear and fix before proceeding.",
|
|
60
|
+
"If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?",
|
|
61
|
+
"**Per-Slice Review checkpoint (conditional, opt-in).** When `.cclaw/config.yaml::sliceReview.enabled` is true, check every slice against the triggers before declaring it DONE. Triggers: `touchCount >= filesChangedThreshold`, any `touchPaths` match a `touchTriggers` glob, or the plan row declares `highRisk: true`. On a trigger, run two passes on the slice alone — (1) Spec-Compliance: trace RED/GREEN/REFACTOR evidence back to its plan task + spec criterion, noting edge cases the tests skip; (2) Quality: diff-scan for naming, error handling, dead code, simpler alternatives. Record both under `## Per-Slice Review` in `06-tdd.md`, naming the trigger that fired. Dispatch the `reviewer` subagent natively when available (log `fulfillmentMode: \"isolated\"`); otherwise fulfil via in-session role switch (`fulfillmentMode: \"role-switch\"`). Never fabricate an isolated pass from memory. Tracks outside `sliceReview.enforceOnTracks` still emit the section; doctor only escalates missed reviews on enforced tracks."
|
|
62
|
+
],
|
|
63
|
+
process: [
|
|
64
|
+
"Select slice and map to acceptance criterion.",
|
|
65
|
+
"Dispatch `tdd-red` (or `test-author` TEST_RED_ONLY mode) and produce failing test(s) for expected reason (RED).",
|
|
66
|
+
"Run tests and capture failure output.",
|
|
67
|
+
"Dispatch `tdd-green` (or `test-author` BUILD_GREEN_REFACTOR mode) and implement smallest change needed for GREEN.",
|
|
68
|
+
"Run full tests and build checks.",
|
|
69
|
+
"Run a fresh verification-before-completion check and capture command + commit SHA + PASS/FAIL in guard evidence.",
|
|
70
|
+
"Dispatch `tdd-refactor` pass preserving behavior.",
|
|
71
|
+
"Record RED, GREEN, and REFACTOR evidence in artifact.",
|
|
72
|
+
"Annotate traceability to plan task and spec criterion; on `sliceReview` triggers, append a Per-Slice Review entry before closing the slice."
|
|
73
|
+
],
|
|
74
|
+
requiredGates: [
|
|
75
|
+
{ id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
|
|
76
|
+
{ id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
|
|
77
|
+
{ id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
|
|
78
|
+
{ id: "tdd_verified_before_complete", description: "Fresh verification evidence includes test command, commit SHA, and explicit pass/fail status." },
|
|
79
|
+
{ id: "tdd_traceable_to_plan", description: "Change traceability to plan slice is explicit." },
|
|
80
|
+
{ id: "tdd_docs_drift_check", description: "When public API/config/CLI surfaces change, docs drift is addressed via a completed doc-updater pass." }
|
|
81
|
+
],
|
|
82
|
+
requiredEvidence: [
|
|
83
|
+
"Artifact updated at `.cclaw/artifacts/06-tdd.md` with RED, GREEN, and REFACTOR sections.",
|
|
84
|
+
"Failing command output captured (RED).",
|
|
85
|
+
"Full test/build output recorded (GREEN).",
|
|
86
|
+
"Fresh verification evidence recorded with command, commit SHA, and PASS/FAIL status before completion.",
|
|
87
|
+
"Acceptance mapping documented.",
|
|
88
|
+
"Failure reason analysis recorded.",
|
|
89
|
+
"Refactor rationale captured.",
|
|
90
|
+
"Traceability to task identifier is documented."
|
|
91
|
+
],
|
|
92
|
+
inputs: ["approved plan slice", "spec acceptance criterion", "test harness configuration", "coding standards and constraints"],
|
|
93
|
+
requiredContext: ["plan artifact", "spec artifact", "existing test patterns"],
|
|
94
|
+
blockers: [
|
|
95
|
+
"tests pass before behavior change (RED failure missing)",
|
|
96
|
+
"full suite not green",
|
|
97
|
+
"behavior changed during refactor",
|
|
98
|
+
"no evidence recorded"
|
|
99
|
+
],
|
|
100
|
+
exitCriteria: [
|
|
101
|
+
"RED evidence exists and is traceable",
|
|
102
|
+
"GREEN evidence captured with full suite pass",
|
|
103
|
+
"REFACTOR evidence captured",
|
|
104
|
+
"required gates marked satisfied",
|
|
105
|
+
"traceability annotated"
|
|
106
|
+
]
|
|
107
|
+
},
|
|
108
|
+
artifactRules: {
|
|
109
|
+
artifactFile: "06-tdd.md",
|
|
110
|
+
completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
|
|
111
|
+
crossStageTrace: {
|
|
112
|
+
readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/03-design-<slug>.md"],
|
|
113
|
+
writesTo: [".cclaw/artifacts/06-tdd.md"],
|
|
114
|
+
traceabilityRule: "Every RED test traces to a plan task. Every GREEN change traces to a RED test. Every plan task traces to a spec criterion. Design decisions inform test strategy. Evidence chain must be unbroken."
|
|
156
115
|
},
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
116
|
+
artifactValidation: [
|
|
117
|
+
{ section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
|
|
118
|
+
{ section: "Acceptance Mapping", required: false, validationRule: "Each RED test links to a plan task and spec criterion." },
|
|
119
|
+
{ section: "Failure Analysis", required: false, validationRule: "Failure reason matches expected missing behavior." },
|
|
120
|
+
{ section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
|
|
121
|
+
{ section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
|
|
122
|
+
{ section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." },
|
|
123
|
+
{ section: "Verification Ladder", required: true, validationRule: "Per-slice verification tier (static, command, behavioral, human) with evidence captured for the highest tier reached this turn." },
|
|
124
|
+
{ section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." },
|
|
125
|
+
{ section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
|
|
126
|
+
{ section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." },
|
|
127
|
+
{ section: "Per-Slice Review", required: false, validationRule: "When `.cclaw/config.yaml::sliceReview.enabled` is true: per triggered slice, a two-part record — Spec-Compliance (slice <-> plan task <-> spec criterion trace plus edge-case notes) and Quality (diff-focused review of naming, error handling, dead code, simpler alternatives). Each entry names the trigger (touchCount, touchPaths glob, or highRisk) and the delegation fulfillmentMode (`isolated` when a reviewer subagent was dispatched natively; `role-switch` when fulfilled in-session). Slices that did not meet any trigger may list `not triggered` instead of a full pass." }
|
|
128
|
+
]
|
|
129
|
+
},
|
|
130
|
+
reviewLens: {
|
|
131
|
+
outputs: ["failing test set", "passing implementation", "refactor evidence", "review-ready change set"],
|
|
132
|
+
reviewSections: [
|
|
133
|
+
{
|
|
134
|
+
title: "RED Evidence Audit",
|
|
135
|
+
evaluationPoints: [
|
|
136
|
+
"Does every slice have a captured failing test output?",
|
|
137
|
+
"Does each failure reason match the expected missing behavior (not a typo or config error)?",
|
|
138
|
+
"Were tests written BEFORE any production code for that slice?",
|
|
139
|
+
"Does each RED test assert observable behavior, not implementation details?",
|
|
140
|
+
"Is there a test for each acceptance criterion mapped in the plan?"
|
|
141
|
+
],
|
|
142
|
+
stopGate: true
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
title: "GREEN/REFACTOR Audit",
|
|
146
|
+
evaluationPoints: [
|
|
147
|
+
"Does GREEN evidence show a FULL suite pass (not partial)?",
|
|
148
|
+
"Is the GREEN implementation minimal — no features beyond what RED tests require?",
|
|
149
|
+
"Does the REFACTOR step preserve all existing behavior (no new failures)?",
|
|
150
|
+
"Are REFACTOR notes documented with rationale?",
|
|
151
|
+
"Is traceability complete: every change links to plan task ID and spec criterion?"
|
|
152
|
+
],
|
|
153
|
+
stopGate: true
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
title: "Test Pyramid + Size Audit",
|
|
157
|
+
evaluationPoints: [
|
|
158
|
+
"Is the tests-added count skewed toward Small (unit) tests, with Medium and Large used only when a real boundary justifies the cost?",
|
|
159
|
+
"Does every newly added test declare a size class (Small / Medium / Large) — either inline in the test file or in the TDD artifact table?",
|
|
160
|
+
"Are Large tests reserved for genuine end-to-end user journeys (not substitutes for unit coverage)?",
|
|
161
|
+
"Has the slice avoided using Medium/Large tests to paper over testability problems that should be fixed at the design layer?"
|
|
162
|
+
],
|
|
163
|
+
stopGate: false
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
title: "Prove-It Reproduction (bug-fix slices)",
|
|
167
|
+
evaluationPoints: [
|
|
168
|
+
"Does the artifact identify this slice as a bug fix, and if so, include a reproduction test checked in alongside the fix?",
|
|
169
|
+
"Is there captured RED evidence from running the reproduction WITHOUT the fix applied?",
|
|
170
|
+
"Is there captured GREEN evidence from the same reproduction AFTER the fix was applied?",
|
|
171
|
+
"Is there a note confirming the reproduction test fails again if the fix is reverted (or equivalent evidence that the test is actually pinned to this fix)?"
|
|
172
|
+
],
|
|
173
|
+
stopGate: false
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
title: "Per-Slice Review Audit (conditional)",
|
|
177
|
+
evaluationPoints: [
|
|
178
|
+
"When `.cclaw/config.yaml::sliceReview.enabled` is true: does every triggered slice (touchCount >= threshold, touchPaths match, or highRisk=true) carry a Per-Slice Review entry with BOTH a Spec-Compliance pass (plan task <-> spec criterion + edge-case notes) AND a Quality pass (diff-level naming/errors/dead code/simpler alternatives)?",
|
|
179
|
+
"Is the delegation `fulfillmentMode` recorded (`isolated` for a dispatched reviewer subagent, `role-switch` for an in-session pass) and does it match an entry in `.cclaw/state/delegation-log.json`?",
|
|
180
|
+
"On tracks listed in `sliceReview.enforceOnTracks`, are there zero missed triggered slices (doctor also surfaces this as a warning)?"
|
|
181
|
+
],
|
|
182
|
+
stopGate: false
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
title: "State-over-Interaction + Beyoncé Coverage",
|
|
186
|
+
evaluationPoints: [
|
|
187
|
+
"Do assertions target observable state (return values, persisted data, HTTP responses, logs) rather than which internal helpers were called?",
|
|
188
|
+
"Are mocks/spies used only at true trust boundaries (network, filesystem, time, external services), not for module-internal collaborators?",
|
|
189
|
+
"For every public surface touched in this slice (exported API, CLI flag, config key, env var, exit code, schema field) — does at least one test observe it?",
|
|
190
|
+
"If a bug or review finding revealed an uncovered surface, was a test added alongside the fix, not just the code change?",
|
|
191
|
+
"Are interaction-style assertions (e.g. `toHaveBeenCalledWith` without a state assertion) justified by an explicit boundary comment, or flagged for follow-up?"
|
|
192
|
+
],
|
|
193
|
+
stopGate: false
|
|
194
|
+
}
|
|
195
|
+
]
|
|
174
196
|
},
|
|
175
|
-
|
|
176
|
-
{ section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
|
|
177
|
-
{ section: "Acceptance Mapping", required: false, validationRule: "Each RED test links to a plan task and spec criterion." },
|
|
178
|
-
{ section: "Failure Analysis", required: false, validationRule: "Failure reason matches expected missing behavior." },
|
|
179
|
-
{ section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
|
|
180
|
-
{ section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
|
|
181
|
-
{ section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." },
|
|
182
|
-
{ section: "Verification Ladder", required: true, validationRule: "Per-slice verification tier (static, command, behavioral, human) with evidence captured for the highest tier reached this turn." },
|
|
183
|
-
{ section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." },
|
|
184
|
-
{ section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
|
|
185
|
-
{ section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." },
|
|
186
|
-
{ section: "Per-Slice Review", required: false, validationRule: "When `.cclaw/config.yaml::sliceReview.enabled` is true: per triggered slice, a two-part record — Spec-Compliance (slice <-> plan task <-> spec criterion trace plus edge-case notes) and Quality (diff-focused review of naming, error handling, dead code, simpler alternatives). Each entry names the trigger (touchCount, touchPaths glob, or highRisk) and the delegation fulfillmentMode (`isolated` when a reviewer subagent was dispatched natively; `role-switch` when fulfilled in-session). Slices that did not meet any trigger may list `not triggered` instead of a full pass." }
|
|
187
|
-
],
|
|
197
|
+
next: "review",
|
|
188
198
|
batchExecutionAllowed: true
|
|
189
199
|
};
|
|
190
200
|
function quickTrackText(value) {
|
|
@@ -201,48 +211,61 @@ function quickTrackText(value) {
|
|
|
201
211
|
function tddQuickTrackVariant() {
|
|
202
212
|
return {
|
|
203
213
|
...TDD,
|
|
214
|
+
// Quick track keeps the same stage intent but rewrites plan-centric language to spec-centric language.
|
|
204
215
|
skillDescription: quickTrackText(TDD.skillDescription),
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
process: TDD.process.map(quickTrackText),
|
|
209
|
-
requiredGates: TDD.requiredGates
|
|
210
|
-
.filter((gate) => gate.id !== "tdd_traceable_to_plan")
|
|
211
|
-
.map((gate) => ({
|
|
212
|
-
...gate,
|
|
213
|
-
description: quickTrackText(gate.description)
|
|
214
|
-
})),
|
|
215
|
-
requiredEvidence: TDD.requiredEvidence.map(quickTrackText),
|
|
216
|
-
inputs: TDD.inputs.map(quickTrackText),
|
|
217
|
-
requiredContext: ["spec artifact", "existing test patterns"],
|
|
218
|
-
reviewSections: TDD.reviewSections.map((section) => ({
|
|
219
|
-
...section,
|
|
220
|
-
evaluationPoints: section.evaluationPoints.map(quickTrackText)
|
|
221
|
-
})),
|
|
222
|
-
crossStageTrace: {
|
|
223
|
-
...TDD.crossStageTrace,
|
|
224
|
-
readsFrom: [".cclaw/artifacts/04-spec.md"],
|
|
225
|
-
traceabilityRule: "Every RED test traces to an acceptance criterion. Every GREEN change traces to a RED test. Evidence chain must be unbroken."
|
|
216
|
+
philosophy: {
|
|
217
|
+
...TDD.philosophy,
|
|
218
|
+
hardGate: quickTrackText(TDD.philosophy.hardGate)
|
|
226
219
|
},
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
220
|
+
executionModel: {
|
|
221
|
+
...TDD.executionModel,
|
|
222
|
+
checklist: TDD.executionModel.checklist.map(quickTrackText),
|
|
223
|
+
interactionProtocol: TDD.executionModel.interactionProtocol.map(quickTrackText),
|
|
224
|
+
process: TDD.executionModel.process.map(quickTrackText),
|
|
225
|
+
requiredGates: TDD.executionModel.requiredGates
|
|
226
|
+
.filter((gate) => gate.id !== "tdd_traceable_to_plan")
|
|
227
|
+
.map((gate) => ({
|
|
228
|
+
...gate,
|
|
229
|
+
description: quickTrackText(gate.description)
|
|
230
|
+
})),
|
|
231
|
+
requiredEvidence: TDD.executionModel.requiredEvidence.map(quickTrackText),
|
|
232
|
+
inputs: TDD.executionModel.inputs.map(quickTrackText),
|
|
233
|
+
requiredContext: ["spec artifact", "existing test patterns"]
|
|
234
|
+
},
|
|
235
|
+
reviewLens: {
|
|
236
|
+
...TDD.reviewLens,
|
|
237
|
+
reviewSections: TDD.reviewLens.reviewSections.map((section) => ({
|
|
238
|
+
...section,
|
|
239
|
+
evaluationPoints: section.evaluationPoints.map(quickTrackText)
|
|
240
|
+
}))
|
|
241
|
+
},
|
|
242
|
+
artifactRules: {
|
|
243
|
+
...TDD.artifactRules,
|
|
244
|
+
crossStageTrace: {
|
|
245
|
+
...TDD.artifactRules.crossStageTrace,
|
|
246
|
+
readsFrom: [".cclaw/artifacts/04-spec.md"],
|
|
247
|
+
traceabilityRule: "Every RED test traces to an acceptance criterion. Every GREEN change traces to a RED test. Evidence chain must be unbroken."
|
|
248
|
+
},
|
|
249
|
+
artifactValidation: TDD.artifactRules.artifactValidation.map((row) => {
|
|
250
|
+
if (row.section === "Acceptance Mapping") {
|
|
251
|
+
return {
|
|
252
|
+
...row,
|
|
253
|
+
required: true,
|
|
254
|
+
validationRule: "Each RED test links to a spec acceptance criterion ID (for example AC-1)."
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
if (row.section === "Traceability") {
|
|
258
|
+
return {
|
|
259
|
+
...row,
|
|
260
|
+
validationRule: "Acceptance criterion IDs are linked to RED/GREEN evidence."
|
|
261
|
+
};
|
|
262
|
+
}
|
|
236
263
|
return {
|
|
237
264
|
...row,
|
|
238
|
-
validationRule:
|
|
265
|
+
validationRule: quickTrackText(row.validationRule)
|
|
239
266
|
};
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
...row,
|
|
243
|
-
validationRule: quickTrackText(row.validationRule)
|
|
244
|
-
};
|
|
245
|
-
})
|
|
267
|
+
})
|
|
268
|
+
}
|
|
246
269
|
};
|
|
247
270
|
}
|
|
248
271
|
export function tddStageForTrack(track) {
|
package/dist/gate-evidence.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from "node:fs/promises";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { checkReviewSecurityNoChangeAttestation, checkReviewVerdictConsistency, extractMarkdownSectionBody, lintArtifact, validateReviewArmy } from "./artifact-linter.js";
|
|
4
|
+
import { resolveArtifactPath } from "./artifact-paths.js";
|
|
4
5
|
import { RUNTIME_ROOT } from "./constants.js";
|
|
5
6
|
import { stageSchema } from "./content/stage-schema.js";
|
|
6
7
|
import { readDelegationLedger } from "./delegation.js";
|
|
@@ -11,23 +12,12 @@ import { parseTddCycleLog, validateTddCycleOrder } from "./tdd-cycle.js";
|
|
|
11
12
|
import { buildTraceMatrix } from "./trace-matrix.js";
|
|
12
13
|
import { FLOW_STAGES } from "./types.js";
|
|
13
14
|
async function currentStageArtifactExists(projectRoot, stage, track) {
|
|
14
|
-
const
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
if (await exists(candidate))
|
|
21
|
-
return true;
|
|
22
|
-
}
|
|
23
|
-
// Artifact-linter also accepts the file under current working directory fallback; stat once more.
|
|
24
|
-
try {
|
|
25
|
-
await fs.access(path.join(projectRoot, artifactFile));
|
|
26
|
-
return true;
|
|
27
|
-
}
|
|
28
|
-
catch {
|
|
29
|
-
return false;
|
|
30
|
-
}
|
|
15
|
+
const resolved = await resolveArtifactPath(stage, {
|
|
16
|
+
projectRoot,
|
|
17
|
+
track,
|
|
18
|
+
intent: "read"
|
|
19
|
+
});
|
|
20
|
+
return exists(resolved.absPath);
|
|
31
21
|
}
|
|
32
22
|
async function readArtifactMarkdown(projectRoot, artifactFile) {
|
|
33
23
|
const candidates = [
|
|
@@ -2,6 +2,7 @@ import fs from "node:fs/promises";
|
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { spawn } from "node:child_process";
|
|
4
4
|
import process from "node:process";
|
|
5
|
+
import { resolveArtifactPath } from "../artifact-paths.js";
|
|
5
6
|
import { RUNTIME_ROOT, SHIP_FINALIZATION_MODES } from "../constants.js";
|
|
6
7
|
import { stageSchema } from "../content/stage-schema.js";
|
|
7
8
|
import { appendDelegation, checkMandatoryDelegations } from "../delegation.js";
|
|
@@ -355,8 +356,13 @@ function withLearningsHarvestMarker(artifactMarkdown, appendedEntries, skippedDu
|
|
|
355
356
|
const suffix = artifactMarkdown.endsWith("\n") ? "" : "\n";
|
|
356
357
|
return `${artifactMarkdown}${suffix}${LEARNINGS_HARVEST_MARKER_PREFIX}${new Date().toISOString()} appended=${appendedEntries} skipped=${skippedDuplicates} -->\n`;
|
|
357
358
|
}
|
|
358
|
-
async function harvestStageLearnings(projectRoot, stage,
|
|
359
|
-
const
|
|
359
|
+
async function harvestStageLearnings(projectRoot, stage, track) {
|
|
360
|
+
const resolvedArtifact = await resolveArtifactPath(stage, {
|
|
361
|
+
projectRoot,
|
|
362
|
+
track,
|
|
363
|
+
intent: "read"
|
|
364
|
+
});
|
|
365
|
+
const artifactPath = resolvedArtifact.absPath;
|
|
360
366
|
let raw = "";
|
|
361
367
|
try {
|
|
362
368
|
raw = await fs.readFile(artifactPath, "utf8");
|
|
@@ -560,7 +566,7 @@ async function runAdvanceStage(projectRoot, args, io) {
|
|
|
560
566
|
}
|
|
561
567
|
return 1;
|
|
562
568
|
}
|
|
563
|
-
const learningsHarvest = await harvestStageLearnings(projectRoot, args.stage,
|
|
569
|
+
const learningsHarvest = await harvestStageLearnings(projectRoot, args.stage, flowState.track);
|
|
564
570
|
if (!learningsHarvest.ok) {
|
|
565
571
|
io.stderr.write(`cclaw internal advance-stage: learnings harvest failed for "${schema.artifactFile}". ${learningsHarvest.details}\n`);
|
|
566
572
|
return 1;
|