cclaw-cli 0.15.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ // ---------------------------------------------------------------------------
2
+ // SPEC
3
+ // ---------------------------------------------------------------------------
4
+ export const SPEC = {
5
+ stage: "spec",
6
+ skillFolder: "specification-authoring",
7
+ skillName: "specification-authoring",
8
+ skillDescription: "Specification stage. Produce measurable, testable requirements without ambiguity.",
9
+ hardGate: "Do NOT plan tasks or write implementation code. This stage produces a specification document only. Every requirement must be expressed in observable, testable terms.",
10
+ ironLaw: "EVERY ACCEPTANCE CRITERION MUST BE OBSERVABLE AND TESTABLE — OR IT DOES NOT EXIST.",
11
+ purpose: "Create a testable specification aligned with approved design and constraints.",
12
+ whenToUse: [
13
+ "After design lock",
14
+ "Before planning and implementation",
15
+ "When acceptance criteria must be measurable"
16
+ ],
17
+ whenNotToUse: [
18
+ "Design decisions are still unresolved or disputed",
19
+ "The task is implementation-only cleanup with unchanged behavior",
20
+ "You still need to challenge scope rather than author requirements"
21
+ ],
22
+ checklist: [
23
+ "Read upstream — load design artifact and scope contract. Cross-reference architecture decisions.",
24
+ "Define measurable acceptance criteria — each criterion must be observable and falsifiable. No vague adjectives.",
25
+ "Capture edge cases — for each criterion, define at least one boundary condition and one error condition.",
26
+ "Document constraints and assumptions — regulatory, system, integration, and performance boundaries. Surface implicit assumptions explicitly.",
27
+ "Confirm testability — for each acceptance criterion, describe the test that would prove it. If untestable, rewrite the criterion.",
28
+ "Write spec artifact and request user approval — wait for explicit confirmation before proceeding."
29
+ ],
30
+ interactionProtocol: [
31
+ "Express each requirement in observable terms.",
32
+ "Resolve ambiguity before moving to plan. Challenge vague language.",
33
+ "Capture assumptions explicitly, not implicitly.",
34
+ "Require user confirmation on the written spec. **STOP.** Do NOT proceed to plan until user approves.",
35
+ "For each criterion, ask: how would you test this? If the answer is unclear, rewrite.",
36
+ "When encountering ambiguity, classify it before acting: (A) ask user for missing info, (B) enumerate interpretations and pick one with justification, (C) propose hypothesis with validation path. Do NOT silently resolve ambiguity."
37
+ ],
38
+ process: [
39
+ "Define measurable acceptance criteria.",
40
+ "Capture constraints, assumptions, and edge cases.",
41
+ "Build testability map: criterion -> test description.",
42
+ "Confirm testability for each criterion.",
43
+ "Write spec artifact and request approval."
44
+ ],
45
+ requiredGates: [
46
+ { id: "spec_acceptance_measurable", description: "Acceptance criteria are measurable and observable." },
47
+ { id: "spec_edge_cases_documented", description: "Boundary and error conditions are defined for each criterion." },
48
+ { id: "spec_constraints_documented", description: "Constraints and assumptions are explicit." },
49
+ { id: "spec_testability_confirmed", description: "Each criterion has a described test method." },
50
+ { id: "spec_user_approved", description: "User approved the final written spec." }
51
+ ],
52
+ requiredEvidence: [
53
+ "Artifact written to `.cclaw/artifacts/04-spec.md`.",
54
+ "Each acceptance criterion maps to a testable outcome.",
55
+ "Edge cases documented per criterion.",
56
+ "Approval marker captured in artifact."
57
+ ],
58
+ inputs: ["design artifact", "business constraints", "quality requirements"],
59
+ requiredContext: [
60
+ "design lock baseline",
61
+ "regulatory or system boundaries",
62
+ "integration constraints"
63
+ ],
64
+ outputs: [
65
+ "measurable specification",
66
+ "acceptance-to-testability map",
67
+ "approved spec artifact"
68
+ ],
69
+ blockers: [
70
+ "non-measurable criteria",
71
+ "constraints missing",
72
+ "open ambiguities remain"
73
+ ],
74
+ exitCriteria: [
75
+ "spec approved by user",
76
+ "required gates marked satisfied",
77
+ "plan-ready acceptance mapping exists",
78
+ "testability confirmed for all criteria"
79
+ ],
80
+ antiPatterns: [
81
+ "High-level goals without measurable outcomes",
82
+ "Implicit assumptions",
83
+ "Proceeding to plan before approval",
84
+ "Using vague adjectives (fast, intuitive, robust) without thresholds"
85
+ ],
86
+ redFlags: [
87
+ "Criteria use vague language (fast, intuitive, robust) without thresholds",
88
+ "No explicit assumptions section",
89
+ "No approval record",
90
+ "No testability mapping",
91
+ "Edge cases missing or deferred"
92
+ ],
93
+ policyNeedles: ["Acceptance Criteria", "Constraints", "Testability", "approved spec", "Edge Cases"],
94
+ artifactFile: "04-spec.md",
95
+ next: "plan",
96
+ reviewSections: [
97
+ {
98
+ title: "Acceptance Criteria Audit",
99
+ evaluationPoints: [
100
+ "Is every criterion observable (can you point to evidence of pass/fail)?",
101
+ "Is every criterion measurable (numeric threshold or boolean outcome)?",
102
+ "Is every criterion falsifiable (can you describe what failure looks like)?",
103
+ "Does every criterion trace to a design decision (Design Decision Ref)?",
104
+ "Are there any vague adjectives (fast, intuitive, robust) without thresholds?"
105
+ ],
106
+ stopGate: true
107
+ },
108
+ {
109
+ title: "Testability Audit",
110
+ evaluationPoints: [
111
+ "Does every criterion have a concrete test description in the Testability Map?",
112
+ "Does every test specify a verification approach (unit, integration, e2e, manual)?",
113
+ "Does every test include a runnable command or manual steps?",
114
+ "Are edge cases (boundary + error) defined for every criterion?",
115
+ "Can you run every verification command right now and get a meaningful result?"
116
+ ],
117
+ stopGate: true
118
+ }
119
+ ],
120
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
121
+ crossStageTrace: {
122
+ readsFrom: [".cclaw/artifacts/03-design.md", ".cclaw/artifacts/02-scope.md"],
123
+ writesTo: [".cclaw/artifacts/04-spec.md"],
124
+ traceabilityRule: "Every acceptance criterion must trace to a design decision. Every downstream plan task must trace to a spec criterion."
125
+ },
126
+ artifactValidation: [
127
+ { section: "Acceptance Criteria", required: true, validationRule: "Each criterion is observable, measurable, and falsifiable. Table must include a Requirement Ref column linking to R# IDs in 02-scope.md and a Design Decision Ref column tracing back to design artifact. AC IDs (AC-1, AC-2…) are stable across revisions — dropped ACs stay with Priority `DROPPED`." },
128
+ { section: "Edge Cases", required: true, validationRule: "At least one boundary and one error condition per criterion." },
129
+ { section: "Constraints and Assumptions", required: true, validationRule: "All implicit assumptions surfaced. Constraints have sources." },
130
+ { section: "Testability Map", required: true, validationRule: "Each criterion maps to a concrete test description with verification approach (unit, integration, e2e, manual) and command or manual steps." },
131
+ { section: "Vague to Fixed", required: false, validationRule: "If present: table with original vague wording and rewritten observable/testable version for each ambiguous requirement." },
132
+ { section: "Non-Functional Requirements", required: false, validationRule: "If present: performance thresholds, security constraints, scalability limits, reliability targets with measurable values." },
133
+ { section: "Interface Contracts", required: false, validationRule: "If present: for each module boundary list produces (outputs) and consumes (inputs) with data types." },
134
+ { section: "Approval", required: true, validationRule: "Explicit user approval marker present." }
135
+ ]
136
+ };
@@ -0,0 +1,2 @@
1
+ import type { StageSchemaInput } from "./schema-types.js";
2
+ export declare const TDD: StageSchemaInput;
@@ -0,0 +1,184 @@
1
+ // ---------------------------------------------------------------------------
2
+ // TDD — RED → GREEN → REFACTOR cycle (merged test + build)
3
+ // ---------------------------------------------------------------------------
4
+ export const TDD = {
5
+ stage: "tdd",
6
+ skillFolder: "test-driven-development",
7
+ skillName: "test-driven-development",
8
+ skillDescription: "Full TDD cycle: RED (failing tests), GREEN (minimal implementation), REFACTOR (cleanup). One plan slice at a time with strict traceability.",
9
+ hardGate: "Do NOT merge, ship, or skip review. Follow RED → GREEN → REFACTOR strictly for each plan slice. Do NOT write implementation code before RED tests exist. Do NOT skip the REFACTOR step.",
10
+ ironLaw: "NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST — THE RED FAILURE IS THE SPEC.",
11
+ purpose: "Implement features through the TDD cycle: write failing tests, make them pass with minimal code, then refactor.",
12
+ whenToUse: [
13
+ "After plan confirmation",
14
+ "For every behavior change in scope",
15
+ "Before review stage"
16
+ ],
17
+ whenNotToUse: [
18
+ "Plan approval is still pending WAIT_FOR_CONFIRM",
19
+ "The change is docs-only and does not alter behavior",
20
+ "The stage intent is review/ship sign-off rather than implementation"
21
+ ],
22
+ checklist: [
23
+ "Select plan slice — pick one task from the plan. Do not batch multiple tasks.",
24
+ "Map to acceptance criterion — identify the specific spec criterion this test proves.",
25
+ "RED: Write behavior-focused test — test the expected behavior, not implementation details. Tests MUST fail.",
26
+ "RED: Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
27
+ "GREEN: Minimal implementation — write the smallest code change that makes the RED tests pass. No extra features.",
28
+ "GREEN: Run full suite — execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
29
+ "GREEN: Verify no regressions — if any existing test breaks, fix the regression before proceeding.",
30
+ "REFACTOR: Improve code quality — without changing behavior. Document what you changed and why.",
31
+ "Record evidence — capture RED failure, GREEN output, and REFACTOR notes in the TDD artifact.",
32
+ "Annotate traceability — link to plan task ID and spec criterion.",
33
+ "Repeat for each slice — return to step 1 for the next plan slice."
34
+ ],
35
+ interactionProtocol: [
36
+ "Pick one planned slice at a time.",
37
+ "Write behavior-focused tests before changing implementation (RED).",
38
+ "Capture and store failing output as RED evidence.",
39
+ "Apply minimal change to satisfy RED tests (GREEN).",
40
+ "Run full suite, not partial checks, for GREEN validation.",
41
+ "Refactor without changing behavior and document rationale (REFACTOR).",
42
+ "Stop if regressions appear and fix before proceeding.",
43
+ "If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?"
44
+ ],
45
+ process: [
46
+ "Select slice and map to acceptance criterion.",
47
+ "Write test(s) that fail for expected reason (RED).",
48
+ "Run tests and capture failure output.",
49
+ "Implement smallest change needed for GREEN.",
50
+ "Run full tests and build checks.",
51
+ "Perform refactor pass preserving behavior.",
52
+ "Record RED, GREEN, and REFACTOR evidence in artifact.",
53
+ "Annotate traceability to plan task and spec criterion."
54
+ ],
55
+ requiredGates: [
56
+ { id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
57
+ { id: "tdd_red_failure_captured", description: "Failure output is captured as evidence." },
58
+ { id: "tdd_trace_to_acceptance", description: "RED tests trace to explicit acceptance criteria." },
59
+ { id: "tdd_red_failure_reason_verified", description: "Failure is for the expected reason, not an unrelated error." },
60
+ { id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
61
+ { id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
62
+ { id: "tdd_refactor_notes_written", description: "Refactor decisions and outcomes are documented." },
63
+ { id: "tdd_traceable_to_plan", description: "Change traceability to plan slice is explicit." }
64
+ ],
65
+ requiredEvidence: [
66
+ "Artifact updated at `.cclaw/artifacts/06-tdd.md` with RED, GREEN, and REFACTOR sections.",
67
+ "Failing command output captured (RED).",
68
+ "Full test/build output recorded (GREEN).",
69
+ "Acceptance mapping documented.",
70
+ "Failure reason analysis recorded.",
71
+ "Refactor rationale captured.",
72
+ "Traceability to task identifier is documented."
73
+ ],
74
+ inputs: ["approved plan slice", "spec acceptance criterion", "test harness configuration", "coding standards and constraints"],
75
+ requiredContext: ["plan artifact", "spec artifact", "existing test patterns"],
76
+ outputs: ["failing test set", "passing implementation", "refactor evidence", "review-ready change set"],
77
+ blockers: [
78
+ "tests pass before behavior change (RED failure missing)",
79
+ "full suite not green",
80
+ "behavior changed during refactor",
81
+ "no evidence recorded"
82
+ ],
83
+ exitCriteria: [
84
+ "RED evidence exists and is traceable",
85
+ "GREEN evidence captured with full suite pass",
86
+ "REFACTOR evidence captured",
87
+ "required gates marked satisfied",
88
+ "traceability annotated"
89
+ ],
90
+ antiPatterns: [
91
+ "Writing code before failing test",
92
+ "Asserting implementation details instead of behavior",
93
+ "Big-bang implementation across multiple slices",
94
+ "Partial test runs presented as GREEN",
95
+ "Skipping evidence capture",
96
+ "Undocumented refactor changes",
97
+ "Adding features beyond what RED tests require"
98
+ ],
99
+ redFlags: [
100
+ "No failing test output (RED missing)",
101
+ "Implementation edits appear before RED evidence",
102
+ "No full-suite GREEN evidence",
103
+ "No refactor notes",
104
+ "Multiple tasks implemented in one pass without justification",
105
+ "Files changed outside current slice scope"
106
+ ],
107
+ policyNeedles: ["RED", "GREEN", "REFACTOR", "failing test", "full test suite", "acceptance criteria", "traceable to plan slice"],
108
+ artifactFile: "06-tdd.md",
109
+ next: "review",
110
+ reviewSections: [
111
+ {
112
+ title: "RED Evidence Audit",
113
+ evaluationPoints: [
114
+ "Does every slice have a captured failing test output?",
115
+ "Does each failure reason match the expected missing behavior (not a typo or config error)?",
116
+ "Were tests written BEFORE any production code for that slice?",
117
+ "Does each RED test assert observable behavior, not implementation details?",
118
+ "Is there a test for each acceptance criterion mapped in the plan?"
119
+ ],
120
+ stopGate: true
121
+ },
122
+ {
123
+ title: "GREEN/REFACTOR Audit",
124
+ evaluationPoints: [
125
+ "Does GREEN evidence show a FULL suite pass (not partial)?",
126
+ "Is the GREEN implementation minimal — no features beyond what RED tests require?",
127
+ "Does the REFACTOR step preserve all existing behavior (no new failures)?",
128
+ "Are REFACTOR notes documented with rationale?",
129
+ "Is traceability complete: every change links to plan task ID and spec criterion?"
130
+ ],
131
+ stopGate: true
132
+ },
133
+ {
134
+ title: "Test Pyramid + Size Audit",
135
+ evaluationPoints: [
136
+ "Is the tests-added count skewed toward Small (unit) tests, with Medium and Large used only when a real boundary justifies the cost?",
137
+ "Does every newly added test declare a size class (Small / Medium / Large) — either inline in the test file or in the TDD artifact table?",
138
+ "Are Large tests reserved for genuine end-to-end user journeys (not substitutes for unit coverage)?",
139
+ "Has the slice avoided using Medium/Large tests to paper over testability problems that should be fixed at the design layer?"
140
+ ],
141
+ stopGate: false
142
+ },
143
+ {
144
+ title: "Prove-It Reproduction (bug-fix slices)",
145
+ evaluationPoints: [
146
+ "Does the artifact identify this slice as a bug fix, and if so, include a reproduction test checked in alongside the fix?",
147
+ "Is there captured RED evidence from running the reproduction WITHOUT the fix applied?",
148
+ "Is there captured GREEN evidence from the same reproduction AFTER the fix was applied?",
149
+ "Is there a note confirming the reproduction test fails again if the fix is reverted (or equivalent evidence that the test is actually pinned to this fix)?"
150
+ ],
151
+ stopGate: false
152
+ },
153
+ {
154
+ title: "State-over-Interaction + Beyoncé Coverage",
155
+ evaluationPoints: [
156
+ "Do assertions target observable state (return values, persisted data, HTTP responses, logs) rather than which internal helpers were called?",
157
+ "Are mocks/spies used only at true trust boundaries (network, filesystem, time, external services), not for module-internal collaborators?",
158
+ "For every public surface touched in this slice (exported API, CLI flag, config key, env var, exit code, schema field) — does at least one test observe it?",
159
+ "If a bug or review finding revealed an uncovered surface, was a test added alongside the fix, not just the code change?",
160
+ "Are interaction-style assertions (e.g. `toHaveBeenCalledWith` without a state assertion) justified by an explicit boundary comment, or flagged for follow-up?"
161
+ ],
162
+ stopGate: false
163
+ }
164
+ ],
165
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
166
+ crossStageTrace: {
167
+ readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/03-design.md"],
168
+ writesTo: [".cclaw/artifacts/06-tdd.md"],
169
+ traceabilityRule: "Every RED test traces to a plan task. Every GREEN change traces to a RED test. Every plan task traces to a spec criterion. Design decisions inform test strategy. Evidence chain must be unbroken."
170
+ },
171
+ artifactValidation: [
172
+ { section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
173
+ { section: "Acceptance Mapping", required: true, validationRule: "Each RED test links to a plan task and spec criterion." },
174
+ { section: "Failure Analysis", required: true, validationRule: "Failure reason matches expected missing behavior." },
175
+ { section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
176
+ { section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
177
+ { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." },
178
+ { section: "Verification Ladder", required: false, validationRule: "If present: per-slice verification tier (static, command, behavioral, human) with evidence for highest tier reached." },
179
+ { section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." },
180
+ { section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
181
+ { section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." }
182
+ ],
183
+ waveExecutionAllowed: true
184
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cclaw-cli",
3
- "version": "0.15.0",
3
+ "version": "0.15.1",
4
4
  "description": "Installer-first flow toolkit for coding agents",
5
5
  "type": "module",
6
6
  "bin": {