cclaw-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +100 -0
  3. package/dist/cli.d.ts +10 -0
  4. package/dist/cli.js +101 -0
  5. package/dist/config.d.ts +5 -0
  6. package/dist/config.js +70 -0
  7. package/dist/constants.d.ts +12 -0
  8. package/dist/constants.js +50 -0
  9. package/dist/content/agents.d.ts +39 -0
  10. package/dist/content/agents.js +244 -0
  11. package/dist/content/autoplan.d.ts +7 -0
  12. package/dist/content/autoplan.js +297 -0
  13. package/dist/content/contracts.d.ts +2 -0
  14. package/dist/content/contracts.js +50 -0
  15. package/dist/content/examples.d.ts +2 -0
  16. package/dist/content/examples.js +327 -0
  17. package/dist/content/hooks.d.ts +16 -0
  18. package/dist/content/hooks.js +753 -0
  19. package/dist/content/learnings.d.ts +5 -0
  20. package/dist/content/learnings.js +265 -0
  21. package/dist/content/meta-skill.d.ts +10 -0
  22. package/dist/content/meta-skill.js +137 -0
  23. package/dist/content/observe.d.ts +21 -0
  24. package/dist/content/observe.js +1110 -0
  25. package/dist/content/session-hooks.d.ts +7 -0
  26. package/dist/content/session-hooks.js +137 -0
  27. package/dist/content/skills.d.ts +3 -0
  28. package/dist/content/skills.js +257 -0
  29. package/dist/content/stage-schema.d.ts +78 -0
  30. package/dist/content/stage-schema.js +1453 -0
  31. package/dist/content/subagents.d.ts +13 -0
  32. package/dist/content/subagents.js +616 -0
  33. package/dist/content/templates.d.ts +3 -0
  34. package/dist/content/templates.js +272 -0
  35. package/dist/content/utility-skills.d.ts +12 -0
  36. package/dist/content/utility-skills.js +467 -0
  37. package/dist/doctor.d.ts +7 -0
  38. package/dist/doctor.js +610 -0
  39. package/dist/flow-state.d.ts +19 -0
  40. package/dist/flow-state.js +41 -0
  41. package/dist/fs-utils.d.ts +5 -0
  42. package/dist/fs-utils.js +28 -0
  43. package/dist/gitignore.d.ts +3 -0
  44. package/dist/gitignore.js +43 -0
  45. package/dist/harness-adapters.d.ts +12 -0
  46. package/dist/harness-adapters.js +175 -0
  47. package/dist/install.d.ts +9 -0
  48. package/dist/install.js +562 -0
  49. package/dist/learnings-summarizer.d.ts +25 -0
  50. package/dist/learnings-summarizer.js +201 -0
  51. package/dist/logger.d.ts +3 -0
  52. package/dist/logger.js +6 -0
  53. package/dist/policy.d.ts +6 -0
  54. package/dist/policy.js +179 -0
  55. package/dist/runs.d.ts +18 -0
  56. package/dist/runs.js +446 -0
  57. package/dist/types.d.ts +19 -0
  58. package/dist/types.js +12 -0
  59. package/package.json +47 -0
@@ -0,0 +1,1453 @@
1
+ import { COMMAND_FILE_ORDER } from "../constants.js";
2
+ // ---------------------------------------------------------------------------
3
+ // BRAINSTORM — reference: superpowers brainstorming
4
+ // ---------------------------------------------------------------------------
5
+ const BRAINSTORM = {
6
+ stage: "brainstorm",
7
+ skillFolder: "brainstorming",
8
+ skillName: "brainstorming",
9
+ skillDescription: "Design-first stage. Clarify intent, compare options, and get explicit approval before implementation planning.",
10
+ hardGate: "Do NOT invoke any implementation skill, write any code, scaffold any project, or take any implementation action until you have presented a design and the user has approved it. This applies to EVERY project regardless of perceived simplicity.",
11
+ purpose: "Turn a rough request into an approved design direction with clear assumptions and boundaries.",
12
+ whenToUse: [
13
+ "Starting a new feature or behavior change",
14
+ "Requirements are ambiguous or solution path is unclear",
15
+ "Before any implementation-stage command"
16
+ ],
17
+ checklist: [
18
+ "Explore project context — check files, docs, recent commits, existing behavior.",
19
+ "Assess scope — if the request describes multiple independent subsystems, flag for decomposition before detailed questions.",
20
+ "Ask clarifying questions — one at a time, understand purpose, constraints, success criteria. Prefer multiple choice.",
21
+ "Propose 2-3 approaches — with trade-offs and your explicit recommendation with reasoning.",
22
+ "Present design — in sections scaled to their complexity (few sentences if simple, up to 300 words if nuanced). Get approval after each section.",
23
+ "Write design doc — save to `.cclaw/artifacts/01-brainstorm.md`.",
24
+ "Self-review — scan for placeholders, TBDs, contradictions, ambiguity, scope creep. Fix inline.",
25
+ "User reviews written artifact — ask user to review before proceeding. Wait for response.",
26
+ "Transition — invoke /cc-scope only after explicit user approval."
27
+ ],
28
+ interactionProtocol: [
29
+ "Explore context first (files, docs, existing behavior).",
30
+ "Ask one clarifying question per message. Do NOT combine questions.",
31
+ "For approach selection: use the Decision Protocol — present labeled options (A/B/C) with trade-offs, mark one as (recommended), use AskQuestion/AskUserQuestion tool when available.",
32
+ "Get section-by-section approval before finalizing the design direction.",
33
+ "Run a self-review pass (ambiguity, placeholders, contradictions) before handoff.",
34
+ "Wait for explicit user approval after writing the artifact. Do NOT auto-advance."
35
+ ],
36
+ process: [
37
+ "Capture problem statement, users, constraints, and success criteria.",
38
+ "Identify whether request should be decomposed into smaller sub-problems.",
39
+ "Offer alternatives and recommendation with rationale.",
40
+ "Present design in sections, ask after each section whether it looks right.",
41
+ "Write artifact with validated design.",
42
+ "Run self-review: placeholder scan, internal consistency, scope check, ambiguity check.",
43
+ "Ask user to review the written spec. Wait for changes or approval.",
44
+ "Handoff to scope stage only after approval is explicit."
45
+ ],
46
+ requiredGates: [
47
+ { id: "brainstorm_context_explored", description: "Project context and constraints have been reviewed." },
48
+ { id: "brainstorm_options_compared", description: "At least two alternatives were compared with trade-offs." },
49
+ { id: "brainstorm_design_approved", description: "User approved a concrete design direction." },
50
+ { id: "brainstorm_self_review_passed", description: "Design doc passed placeholder/ambiguity/consistency checks." },
51
+ { id: "brainstorm_user_reviewed_artifact", description: "User reviewed the written artifact and confirmed readiness." }
52
+ ],
53
+ requiredEvidence: [
54
+ "Artifact written to `.cclaw/artifacts/01-brainstorm.md`.",
55
+ "Approved direction captured in artifact.",
56
+ "Open questions explicitly listed (if any).",
57
+ "Self-review pass completed with no unresolved issues."
58
+ ],
59
+ inputs: ["problem statement", "constraints", "success criteria"],
60
+ requiredContext: [
61
+ "existing project docs and patterns",
62
+ "current behavior of affected area",
63
+ "business and delivery constraints"
64
+ ],
65
+ outputs: [
66
+ "approved design direction",
67
+ "alternatives and trade-off table",
68
+ "brainstorm artifact"
69
+ ],
70
+ blockers: [
71
+ "no explicit approval",
72
+ "critical ambiguity unresolved",
73
+ "scope too broad and not decomposed"
74
+ ],
75
+ exitCriteria: [
76
+ "approved design direction documented",
77
+ "required gates marked satisfied",
78
+ "no implementation action taken",
79
+ "self-review completed with fixes applied"
80
+ ],
81
+ antiPatterns: [
82
+ "Skipping design because task seems simple",
83
+ "Asking many questions in one message",
84
+ "Jumping directly into implementation",
85
+ "Combining visual companion offer with a clarifying question",
86
+ "Invoking implementation skills before writing plans"
87
+ ],
88
+ rationalizations: [
89
+ { claim: "This is too simple for design.", reality: "Simple tasks fail fast when assumptions are wrong; a short design pass prevents rework." },
90
+ { claim: "We can figure it out while coding.", reality: "Coding before alignment creates churn and hidden scope growth." },
91
+ { claim: "There is only one obvious approach.", reality: "Without alternatives, trade-offs stay implicit and risk goes unexamined." },
92
+ { claim: "The user already knows what they want.", reality: "Unstated assumptions diverge during implementation; explicit design surfaces them early." }
93
+ ],
94
+ redFlags: [
95
+ "No alternatives documented",
96
+ "No explicit approval checkpoint",
97
+ "Implementation-related actions before approval",
98
+ "Self-review skipped or glossed over",
99
+ "Artifact has TBD or placeholder sections"
100
+ ],
101
+ policyNeedles: [
102
+ "One clarifying question per message",
103
+ "2-3 approaches with trade-offs",
104
+ "Do NOT implement, scaffold, or modify behavior"
105
+ ],
106
+ artifactFile: "01-brainstorm.md",
107
+ next: "scope",
108
+ cognitivePatterns: [
109
+ { name: "Divergent-Convergent Thinking", description: "First expand the solution space widely, then converge on the strongest option. Do not skip the divergent phase." },
110
+ { name: "YAGNI Ruthlessness", description: "Remove unnecessary features from all designs. Every feature must earn its place against the cost of complexity." },
111
+ { name: "Decomposition Instinct", description: "When a request describes multiple independent subsystems, decompose before refining. Each sub-project gets its own cycle." },
112
+ { name: "Isolation Preference", description: "Break the system into units that each have one clear purpose, communicate through well-defined interfaces, and can be understood and tested independently." }
113
+ ],
114
+ reviewSections: [],
115
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
116
+ crossStageTrace: {
117
+ readsFrom: [],
118
+ writesTo: [".cclaw/artifacts/01-brainstorm.md"],
119
+ traceabilityRule: "Every approved direction must be traceable forward through scope and design. Downstream stages must reference brainstorm decisions."
120
+ },
121
+ artifactValidation: [
122
+ { section: "Problem Statement", required: true, validationRule: "Must describe the user problem, not the solution." },
123
+ { section: "Alternatives Table", required: true, validationRule: "At least 2 approaches with trade-offs and recommendation." },
124
+ { section: "Approved Direction", required: true, validationRule: "Must contain explicit approval marker from user." },
125
+ { section: "Open Questions", required: true, validationRule: "If empty, state 'None' explicitly. Do not omit." }
126
+ ],
127
+ namedAntiPattern: {
128
+ title: "This Is Too Simple To Need A Design",
129
+ description: "Every project goes through this process. A todo list, a single-function utility, a config change — all of them. 'Simple' projects are where unexamined assumptions cause the most wasted work. The design can be short (a few sentences for truly simple projects), but you MUST present it and get approval."
130
+ }
131
+ };
132
+ // ---------------------------------------------------------------------------
133
+ // SCOPE — reference: gstack CEO review
134
+ // ---------------------------------------------------------------------------
135
+ const SCOPE = {
136
+ stage: "scope",
137
+ skillFolder: "scope-shaping",
138
+ skillName: "scope-shaping",
139
+ skillDescription: "Strategic scope stage. Challenge premise and lock explicit in-scope/out-of-scope boundaries using CEO-level thinking.",
140
+ hardGate: "Do NOT begin architecture, design, or code. This stage produces scope decisions only. Do not silently add or remove scope — every change is an explicit user opt-in.",
141
+ purpose: "Decide the right scope before technical lock-in using explicit mode selection and rigorous premise challenge.",
142
+ whenToUse: [
143
+ "After brainstorm approval",
144
+ "Before architecture/design lock-in",
145
+ "When ambition vs feasibility trade-off is unclear"
146
+ ],
147
+ checklist: [
148
+ "Prime Directives — Zero silent failures (every failure mode visible). Every error has a name (not 'handle errors' — name the exception). Every data flow has four paths: happy, nil input, upstream error, downstream timeout. Observability is a scope deliverable, not a post-launch add-on.",
149
+ "Premise Challenge — Is this the right problem? Could a different framing yield a simpler or more impactful solution? What happens if we do nothing? What are we optimizing for — speed, quality, cost, user experience?",
150
+ "Existing Code Leverage — Map every sub-problem to existing code. Run searches (grep, codebase exploration) BEFORE deciding to build new. If built-in or library solutions exist, default to them.",
151
+ "Dream State Mapping — Describe the ideal end state 12 months from now. Does this plan move toward that state or away from it?",
152
+ "Implementation Alternatives (MANDATORY) — Produce 2-3 distinct approaches. One must be 'minimal viable', one must be 'ideal architecture'. Include effort/risk/reversibility for each.",
153
+ "Temporal Interrogation — Think in time slices: HOUR 1 (foundation, what must exist first), HOURS 2-3 (core logic, what builds on foundation), HOURS 4-5 (integration, what connects the pieces), HOUR 6+ (polish, what can wait). What decisions must be locked NOW vs deferred to implementation?",
154
+ "Mode Selection with Default Heuristic — Present four options: SCOPE EXPANSION (dream big), SELECTIVE EXPANSION (hold scope + cherry-pick), HOLD SCOPE (maximum rigor), SCOPE REDUCTION (strip to essentials). Suggest default: greenfield → EXPANSION, bug/hotfix → HOLD, >15 files touched → suggest REDUCTION. Once selected, commit fully.",
155
+ "Error & Rescue Registry — For every new capability in scope: what breaks if it fails? How is the failure detected? What is the fallback? This is scope, not design — decide WHAT to protect, not HOW."
156
+ ],
157
+ interactionProtocol: [
158
+ "For scope mode selection: use the Decision Protocol — present expand/selective/hold/reduce as labeled options with trade-offs, mark one as (recommended), use AskQuestion/AskUserQuestion tool when available.",
159
+ "Challenge premise and verify the problem framing before anything else.",
160
+ "Present one structural scope issue at a time for decision. Do NOT batch. Use structured options for each scope boundary question.",
161
+ "Record explicit in-scope and out-of-scope contract.",
162
+ "Once the user accepts or rejects a recommendation, commit fully. Do not re-argue.",
163
+ "Produce a clean scope summary after all issues are resolved."
164
+ ],
165
+ process: [
166
+ "Run premise challenge and existing-solution leverage check.",
167
+ "Produce 2-3 scope alternatives (minimum viable + ideal included).",
168
+ "Choose scope mode with user approval.",
169
+ "Walk through scope review sections one at a time.",
170
+ "Write explicit scope contract and deferred items.",
171
+ "Produce scope summary with mode, in-scope, out-of-scope, and deferred."
172
+ ],
173
+ requiredGates: [
174
+ { id: "scope_premise_challenged", description: "Problem framing and assumptions were challenged." },
175
+ { id: "scope_alternatives_produced", description: "At least 2 implementation alternatives with effort/risk evaluated." },
176
+ { id: "scope_mode_selected", description: "One scope mode was explicitly selected." },
177
+ { id: "scope_contract_written", description: "In-scope/out-of-scope contract is documented." },
178
+ { id: "scope_user_approved", description: "User approved the final scope direction." }
179
+ ],
180
+ requiredEvidence: [
181
+ "Artifact written to `.cclaw/artifacts/02-scope.md`.",
182
+ "In-scope and out-of-scope lists are explicit.",
183
+ "Selected mode and rationale are documented.",
184
+ "Premise challenge findings documented.",
185
+ "Deferred items list with one-line rationale for each."
186
+ ],
187
+ inputs: ["brainstorm artifact", "timeline constraints", "product priorities"],
188
+ requiredContext: [
189
+ "approved brainstorm direction",
190
+ "existing capabilities and reusable components",
191
+ "delivery deadlines and risk tolerance"
192
+ ],
193
+ outputs: ["scope mode decision", "scope contract", "deferred scope list", "scope summary"],
194
+ blockers: [
195
+ "scope mode not selected",
196
+ "in/out boundaries ambiguous",
197
+ "critical premise disagreement unresolved"
198
+ ],
199
+ exitCriteria: [
200
+ "scope contract approved by user",
201
+ "required gates marked satisfied",
202
+ "deferred list recorded explicitly",
203
+ "scope summary produced"
204
+ ],
205
+ antiPatterns: [
206
+ "Scope silently expanded during discussion",
207
+ "No explicit out-of-scope section",
208
+ "Premise accepted without challenge",
209
+ "Batching multiple scope issues into one question",
210
+ "Re-arguing for smaller scope after user rejects reduction"
211
+ ],
212
+ rationalizations: [
213
+ { claim: "Scope can be finalized during implementation.", reality: "Late scope decisions create architecture churn and missed deadlines." },
214
+ { claim: "Mode selection is unnecessary overhead.", reality: "Mode selection makes trade-offs explicit and prevents silent drift." },
215
+ { claim: "Out-of-scope is obvious.", reality: "Unwritten exclusions return later as hidden requirements." },
216
+ { claim: "We do not need alternatives for a clear request.", reality: "Even clear requests benefit from a minimal-viable vs ideal comparison." }
217
+ ],
218
+ redFlags: [
219
+ "No selected mode in artifact",
220
+ "No deferred/not-in-scope section",
221
+ "No user approval marker",
222
+ "Premise challenge missing or superficial",
223
+ "No implementation alternatives evaluated"
224
+ ],
225
+ policyNeedles: ["Scope mode", "In Scope", "Out of Scope", "NOT in scope", "Premise Challenge"],
226
+ artifactFile: "02-scope.md",
227
+ next: "design",
228
+ cognitivePatterns: [
229
+ { name: "Classification Instinct", description: "Categorize every decision by reversibility x magnitude. Most things are two-way doors — move fast. Only slow down for irreversible + high-magnitude decisions." },
230
+ { name: "Inversion Reflex", description: "For every 'how do we win?' also ask 'what would make us fail?' Map failure modes before committing to scope." },
231
+ { name: "Focus as Subtraction", description: "Primary value-add is what to NOT do. Default: do fewer things, better. Every feature must earn its place." },
232
+ { name: "Speed Calibration", description: "Fast is default. Only slow down for irreversible + high-magnitude decisions. 70% information is enough to decide." },
233
+ { name: "Leverage Obsession", description: "Find inputs where small effort creates massive output. Reuse existing code aggressively. Build new only when nothing exists." },
234
+ { name: "Proxy Skepticism", description: "Is this metric/feature solving the actual problem or a proxy for it? Ask: if this succeeds perfectly, does the user's real problem go away?" },
235
+ { name: "Narrative Coherence", description: "The scope should tell a story: problem → insight → solution → impact. If you cannot tell that story in two sentences, scope is too broad or misframed." },
236
+ { name: "Blast Radius Awareness", description: "For every scope item, count how many systems/files/teams it touches. High blast radius = high risk = needs explicit justification." }
237
+ ],
238
+ reviewSections: [
239
+ {
240
+ title: "Scope Boundary Audit",
241
+ evaluationPoints: [
242
+ "Are all in-scope items justified by the problem statement?",
243
+ "Are any in-scope items actually solving a proxy problem instead of the real one?",
244
+ "Could any in-scope item be deferred without blocking the core objective?"
245
+ ],
246
+ stopGate: true
247
+ },
248
+ {
249
+ title: "Deferred Items Review",
250
+ evaluationPoints: [
251
+ "Does each deferred item have a one-line rationale?",
252
+ "Are any deferred items actually blockers for the core scope?",
253
+ "Will deferring these items create technical debt that is expensive to unwind?"
254
+ ],
255
+ stopGate: true
256
+ },
257
+ {
258
+ title: "Risk and Reversibility Check",
259
+ evaluationPoints: [
260
+ "For each major scope decision: is it reversible?",
261
+ "What is the blast radius if this decision is wrong?",
262
+ "Are there hidden dependencies between in-scope and out-of-scope items?"
263
+ ],
264
+ stopGate: true
265
+ },
266
+ {
267
+ title: "Existing-Code Reuse Check",
268
+ evaluationPoints: [
269
+ "Has every sub-problem been mapped to existing code?",
270
+ "Is the plan rebuilding anything that already exists?",
271
+ "Are there integration opportunities that reduce new code?",
272
+ "Have you searched for built-in or library solutions before scoping custom work?"
273
+ ],
274
+ stopGate: true
275
+ },
276
+ {
277
+ title: "Error & Rescue Scope Check",
278
+ evaluationPoints: [
279
+ "For every new capability: what breaks if it fails?",
280
+ "Is failure detection in scope or deferred? If deferred, is that acceptable?",
281
+ "Are there rescue/fallback paths for critical user journeys?",
282
+ "Is observability (logging, metrics, alerts) explicitly in or out of scope?"
283
+ ],
284
+ stopGate: true
285
+ }
286
+ ],
287
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
288
+ crossStageTrace: {
289
+ readsFrom: [".cclaw/artifacts/01-brainstorm.md"],
290
+ writesTo: [".cclaw/artifacts/02-scope.md"],
291
+ traceabilityRule: "Every scope boundary must be traceable to a brainstorm decision. Every downstream design choice must stay within the scope contract."
292
+ },
293
+ artifactValidation: [
294
+ { section: "Prime Directives", required: true, validationRule: "Named error modes for each capability. Four paths per data flow." },
295
+ { section: "Premise Challenge", required: true, validationRule: "Must contain explicit answers to: right problem? direct path? what if nothing?" },
296
+ { section: "Scope Mode", required: true, validationRule: "Must state selected mode and rationale with default heuristic justification." },
297
+ { section: "In Scope / Out of Scope", required: true, validationRule: "Two separate explicit lists. Out-of-scope must not be empty." },
298
+ { section: "Deferred Items", required: true, validationRule: "Each item has one-line rationale. If empty, state 'None' explicitly." },
299
+ { section: "Error & Rescue Registry", required: true, validationRule: "Each scoped capability has: failure mode, detection method, fallback decision." },
300
+ { section: "Scope Summary", required: true, validationRule: "Clean summary: mode, strongest challenges, recommended path, accepted scope, deferred, excluded." }
301
+ ],
302
+ namedAntiPattern: {
303
+ title: "Scope Is Obvious From Context",
304
+ description: "Scope is never obvious. Unstated boundaries return as hidden requirements during implementation. Even when a request seems perfectly clear, the act of writing explicit in-scope and out-of-scope lists reveals assumptions that would otherwise surface as late surprises."
305
+ }
306
+ };
307
+ // ---------------------------------------------------------------------------
308
+ // DESIGN — reference: gstack Eng review
309
+ // ---------------------------------------------------------------------------
310
+ const DESIGN = {
311
+ stage: "design",
312
+ skillFolder: "engineering-design-lock",
313
+ skillName: "engineering-design-lock",
314
+ skillDescription: "Engineering lock-in stage. Build a concrete technical spine before spec and planning, with section-by-section interactive review.",
315
+ hardGate: "Do NOT write implementation code. This stage produces design decisions and architecture documents only. No code changes, no scaffolding, no test files.",
316
+ purpose: "Lock architecture, data flow, failure modes, and test/performance expectations through rigorous interactive review.",
317
+ whenToUse: [
318
+ "After scope contract approval",
319
+ "Before writing final spec and execution plan",
320
+ "When architecture risks need explicit treatment"
321
+ ],
322
+ checklist: [
323
+ "Design Doc Check — read existing design docs, scope artifact, brainstorm artifact. If a design doc exists that covers this area, check for 'Supersedes:' and use the latest. Use upstream artifacts as source of truth.",
324
+ "Step 0: Scope Challenge — what existing code solves sub-problems? Minimum change set? Complexity check: 8+ files or 2+ new services = complexity smell → flag for possible scope reduction.",
325
+ "Search Before Building — For each technical choice (library, pattern, architecture), search for existing solutions. Label findings: Layer 1 (exact match), Layer 2 (partial match, needs adaptation), Layer 3 (inspiration only), EUREKA (unexpected perfect solution). Default to existing before custom.",
326
+ "Architecture Review — system design, component boundaries, data flow, scaling, security architecture. For each new codepath: one realistic production failure scenario.",
327
+ "Code Quality Review — code organization, DRY violations, error handling patterns, over/under-engineering assessment.",
328
+ "Test Review — diagram every new flow, data path, error path. For each: what test type covers it? Does one exist? What is the gap? Produce test plan artifact.",
329
+ "Performance Review — N+1 queries, memory concerns, caching opportunities, slow code paths. What breaks at 10x load? At 100x?",
330
+ "Parallelization Strategy — If multiple independent modules, produce dependency table: which can be built in parallel? Where are conflict risks? Flag shared-state modules.",
331
+ "Unresolved Decisions — List any design decisions that could not be resolved in this session. For each: what information is missing? Who can provide it? What is the default if no answer comes?",
332
+ "Distribution Check — If the plan creates new artifact types (packages, CLI tools, configs), document the build/publish story. How does it reach the user?"
333
+ ],
334
+ interactionProtocol: [
335
+ "Review architecture decisions section-by-section.",
336
+ "For EACH issue found in a review section, present it ONE AT A TIME. Do NOT batch multiple issues.",
337
+ "For each issue: use the Decision Protocol — describe concretely with file/line references, present labeled options (A/B/C) with trade-offs, mark one as (recommended), use AskQuestion/AskUserQuestion tool when available.",
338
+ "Only proceed to the next review section after ALL issues in the current section are resolved.",
339
+ "If a section has no issues, say 'No issues found' and move on.",
340
+ "Do not skip failure-mode mapping.",
341
+ "For design baseline approval: present the full baseline and wait for explicit user approval."
342
+ ],
343
+ process: [
344
+ "Read upstream artifacts (brainstorm, scope).",
345
+ "Run Step 0 scope challenge: existing code leverage, minimum change set, complexity check.",
346
+ "Walk through each review section interactively.",
347
+ "Define architecture boundaries and ownership.",
348
+ "Describe data flow and state transitions with edge paths.",
349
+ "Map failure modes and recovery strategy.",
350
+ "Define test coverage strategy and performance budget.",
351
+ "Produce required outputs: NOT-in-scope section, What-already-exists section, diagrams, failure mode table.",
352
+ "Write design lock artifact for downstream spec/plan."
353
+ ],
354
+ requiredGates: [
355
+ { id: "design_scope_challenge_done", description: "Step 0 scope challenge completed with existing-code mapping." },
356
+ { id: "design_architecture_locked", description: "Architecture boundaries are explicit and approved." },
357
+ { id: "design_data_flow_mapped", description: "Data/state flow includes edge-case paths." },
358
+ { id: "design_failure_modes_mapped", description: "Failure modes and mitigations are documented." },
359
+ { id: "design_test_and_perf_defined", description: "Test strategy and performance budget are defined." }
360
+ ],
361
+ requiredEvidence: [
362
+ "Artifact written to `.cclaw/artifacts/03-design.md`.",
363
+ "Failure-mode table exists with mitigations.",
364
+ "Test strategy includes unit/integration/e2e expectations.",
365
+ "NOT-in-scope section produced.",
366
+ "What-already-exists section produced."
367
+ ],
368
+ inputs: ["scope contract", "system constraints", "non-functional requirements"],
369
+ requiredContext: [
370
+ "existing architecture and boundaries",
371
+ "operational constraints",
372
+ "security and reliability expectations"
373
+ ],
374
+ outputs: [
375
+ "architecture lock",
376
+ "risk and failure map",
377
+ "test and performance baseline",
378
+ "NOT-in-scope section",
379
+ "What-already-exists section"
380
+ ],
381
+ blockers: [
382
+ "architecture ambiguity remains",
383
+ "failure modes not mapped",
384
+ "test/performance targets missing"
385
+ ],
386
+ exitCriteria: [
387
+ "design baseline approved",
388
+ "all review sections completed",
389
+ "required gates marked satisfied",
390
+ "artifact complete for spec handoff"
391
+ ],
392
+ antiPatterns: [
393
+ "Architecture deferred to implementation phase",
394
+ "Missing data-flow edge cases",
395
+ "No performance budget for critical path",
396
+ "Batching multiple design issues into one question",
397
+ "Skipping review sections because plan seems simple"
398
+ ],
399
+ rationalizations: [
400
+ { claim: "Architecture can emerge incrementally while coding.", reality: "Unplanned architecture decisions cause incompatible module boundaries." },
401
+ { claim: "Failure modes are edge cases we can ignore for now.", reality: "Production incidents usually come from unplanned edge paths." },
402
+ { claim: "Performance can be optimized after launch.", reality: "Missing performance budgets make regressions invisible until late." },
403
+ { claim: "This is a strategy doc so implementation sections do not apply.", reality: "Implementation details are where strategy breaks down. Every section must be evaluated." }
404
+ ],
405
+ redFlags: [
406
+ "No explicit architecture boundary section",
407
+ "No failure recovery strategy",
408
+ "No defined test/perf baseline",
409
+ "Review sections skipped or condensed",
410
+ "No NOT-in-scope output section"
411
+ ],
412
+ policyNeedles: [
413
+ "Architecture",
414
+ "Data Flow",
415
+ "Failure Modes and Mitigation",
416
+ "Performance Budget",
417
+ "One issue at a time"
418
+ ],
419
+ artifactFile: "03-design.md",
420
+ next: "spec",
421
+ cognitivePatterns: [
422
+ { name: "Boring By Default", description: "Every company gets about three innovation tokens. Everything else should be proven technology. If the plan rolls a custom solution where a built-in exists, flag it." },
423
+ { name: "Incremental Over Revolutionary", description: "Strangler fig, not big bang. Canary, not global rollout. Refactor, not rewrite." },
424
+ { name: "Systems Over Heroes", description: "Design for tired humans at 3am, not your best engineer on their best day. If it requires heroics to operate, the design is wrong." },
425
+ { name: "Essential vs Accidental Complexity", description: "Before adding anything: is this solving a real problem or one we created? Distinguish essential complexity from accidental." },
426
+ { name: "Blast Radius Instinct", description: "Every decision evaluated through: what is the worst case and how many systems/people does it affect?" },
427
+ { name: "Completeness Push", description: "AI effort is cheap. Push for completeness in plans: cover all files in blast radius, all edge cases in touched code, all affected tests. Favor doing it now over creating a TODO." },
428
+ { name: "Owner Preference Alignment", description: "Every recommendation must align with project conventions (DRY, test style, minimal diff, edge-case rigor). Read existing patterns before recommending new ones." }
429
+ ],
430
+ reviewSections: [
431
+ {
432
+ title: "Architecture Review",
433
+ evaluationPoints: [
434
+ "System design and component boundaries",
435
+ "Dependency graph and coupling concerns",
436
+ "Data flow patterns and potential bottlenecks",
437
+ "Scaling characteristics and single points of failure",
438
+ "Security architecture (auth, data access, API boundaries)",
439
+ "For each new codepath: one realistic production failure scenario"
440
+ ],
441
+ stopGate: true
442
+ },
443
+ {
444
+ title: "Code Quality Review",
445
+ evaluationPoints: [
446
+ "Code organization and module structure",
447
+ "DRY violations — flag aggressively",
448
+ "Error handling patterns and missing edge cases",
449
+ "Over-engineered or under-engineered areas",
450
+ "Existing diagrams in touched files — still accurate?"
451
+ ],
452
+ stopGate: true
453
+ },
454
+ {
455
+ title: "Test Review",
456
+ evaluationPoints: [
457
+ "Diagram every new UX flow, data flow, codepath, background job, integration, error path",
458
+ "For each: what type of test covers it? Does one exist? What is the gap?",
459
+ "Coverage expectations: unit, integration, e2e split"
460
+ ],
461
+ stopGate: true
462
+ },
463
+ {
464
+ title: "Performance Review",
465
+ evaluationPoints: [
466
+ "N+1 queries and database access patterns",
467
+ "Memory-usage concerns",
468
+ "Caching opportunities",
469
+ "Slow or high-complexity code paths",
470
+ "What breaks at 10x load? At 100x?"
471
+ ],
472
+ stopGate: true
473
+ },
474
+ {
475
+ title: "Distribution & Delivery Review",
476
+ evaluationPoints: [
477
+ "If new artifact types are created (packages, CLI, configs): is the build/publish story documented?",
478
+ "Are there new dependencies that need version pinning?",
479
+ "Does the change affect existing consumers (APIs, shared modules)?",
480
+ "Is backwards compatibility maintained or is a migration needed?"
481
+ ],
482
+ stopGate: false
483
+ }
484
+ ],
485
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
486
+ crossStageTrace: {
487
+ readsFrom: [".cclaw/artifacts/01-brainstorm.md", ".cclaw/artifacts/02-scope.md"],
488
+ writesTo: [".cclaw/artifacts/03-design.md"],
489
+ traceabilityRule: "Every architecture decision must trace to a scope boundary. Every downstream spec requirement must trace to a design decision."
490
+ },
491
+ artifactValidation: [
492
+ { section: "Architecture Boundaries", required: true, validationRule: "Must list component boundaries with ownership." },
493
+ { section: "Data Flow", required: true, validationRule: "Must include happy path, nil input, empty input, upstream error paths." },
494
+ { section: "Failure Mode Table", required: true, validationRule: "Each failure mode has: trigger, detection, mitigation, user impact." },
495
+ { section: "Test Strategy", required: true, validationRule: "Must define unit/integration/e2e expectations with coverage targets." },
496
+ { section: "NOT in scope", required: true, validationRule: "Work considered and explicitly deferred with one-line rationale." },
497
+ { section: "Parallelization Strategy", required: false, validationRule: "If multi-module: dependency table, parallel lanes, conflict flags." },
498
+ { section: "Unresolved Decisions", required: false, validationRule: "If any: what info is missing, who provides it, default if unanswered." }
499
+ ],
500
+ namedAntiPattern: {
501
+ title: "Architecture Will Emerge While Coding",
502
+ description: "Emergent architecture is a myth for non-trivial systems. What actually emerges is accidental complexity, incompatible module boundaries, and tech debt that costs 10x to fix later. Lock architecture explicitly before writing code."
503
+ },
504
+ decisionRecordFormat: `### Decision: [TITLE]
505
+ **Status:** Proposed | Accepted | Rejected
506
+ **Context:** [What is the situation?]
507
+ **Options:**
508
+ - A: [option] — effort: [S/M/L], risk: [low/med/high]
509
+ - B: [option] — effort: [S/M/L], risk: [low/med/high]
510
+ **Decision:** [chosen option]
511
+ **Rationale:** [why this option over others]
512
+ **Consequences:** [what changes as a result]`
513
+ };
514
+ // ---------------------------------------------------------------------------
515
+ // SPEC
516
+ // ---------------------------------------------------------------------------
517
+ const SPEC = {
518
+ stage: "spec",
519
+ skillFolder: "specification-authoring",
520
+ skillName: "specification-authoring",
521
+ skillDescription: "Specification stage. Produce measurable, testable requirements without ambiguity.",
522
+ hardGate: "Do NOT plan tasks or write implementation code. This stage produces a specification document only. Every requirement must be expressed in observable, testable terms.",
523
+ purpose: "Create a testable specification aligned with approved design and constraints.",
524
+ whenToUse: [
525
+ "After design lock",
526
+ "Before planning and implementation",
527
+ "When acceptance criteria must be measurable"
528
+ ],
529
+ checklist: [
530
+ "Read upstream — load design artifact and scope contract. Cross-reference architecture decisions.",
531
+ "Define measurable acceptance criteria — each criterion must be observable and falsifiable. No vague adjectives.",
532
+ "Capture edge cases — for each criterion, define at least one boundary condition and one error condition.",
533
+ "Document constraints and assumptions — regulatory, system, integration, and performance boundaries. Surface implicit assumptions explicitly.",
534
+ "Confirm testability — for each acceptance criterion, describe the test that would prove it. If untestable, rewrite the criterion.",
535
+ "Write spec artifact and request user approval — wait for explicit confirmation before proceeding."
536
+ ],
537
+ interactionProtocol: [
538
+ "Express each requirement in observable terms.",
539
+ "Resolve ambiguity before moving to plan. Challenge vague language.",
540
+ "Capture assumptions explicitly, not implicitly.",
541
+ "Require user confirmation on the written spec.",
542
+ "For each criterion, ask: how would you test this? If the answer is unclear, rewrite."
543
+ ],
544
+ process: [
545
+ "Define measurable acceptance criteria.",
546
+ "Capture constraints, assumptions, and edge cases.",
547
+ "Build testability map: criterion -> test description.",
548
+ "Confirm testability for each criterion.",
549
+ "Write spec artifact and request approval."
550
+ ],
551
+ requiredGates: [
552
+ { id: "spec_acceptance_measurable", description: "Acceptance criteria are measurable and observable." },
553
+ { id: "spec_edge_cases_documented", description: "Boundary and error conditions are defined for each criterion." },
554
+ { id: "spec_constraints_documented", description: "Constraints and assumptions are explicit." },
555
+ { id: "spec_testability_confirmed", description: "Each criterion has a described test method." },
556
+ { id: "spec_user_approved", description: "User approved the final written spec." }
557
+ ],
558
+ requiredEvidence: [
559
+ "Artifact written to `.cclaw/artifacts/04-spec.md`.",
560
+ "Each acceptance criterion maps to a testable outcome.",
561
+ "Edge cases documented per criterion.",
562
+ "Approval marker captured in artifact."
563
+ ],
564
+ inputs: ["design artifact", "business constraints", "quality requirements"],
565
+ requiredContext: [
566
+ "design lock baseline",
567
+ "regulatory or system boundaries",
568
+ "integration constraints"
569
+ ],
570
+ outputs: [
571
+ "measurable specification",
572
+ "acceptance-to-testability map",
573
+ "approved spec artifact"
574
+ ],
575
+ blockers: [
576
+ "non-measurable criteria",
577
+ "constraints missing",
578
+ "open ambiguities remain"
579
+ ],
580
+ exitCriteria: [
581
+ "spec approved by user",
582
+ "required gates marked satisfied",
583
+ "plan-ready acceptance mapping exists",
584
+ "testability confirmed for all criteria"
585
+ ],
586
+ antiPatterns: [
587
+ "High-level goals without measurable outcomes",
588
+ "Implicit assumptions",
589
+ "Proceeding to plan before approval",
590
+ "Using vague adjectives (fast, intuitive, robust) without thresholds"
591
+ ],
592
+ rationalizations: [
593
+ { claim: "The implementation will clarify this requirement.", reality: "Unclear specs create rework and contradictory implementations." },
594
+ { claim: "Acceptance criteria do not need to be measurable.", reality: "Without measurability, verification becomes subjective." },
595
+ { claim: "We can skip explicit approval to save time.", reality: "Skipping approval shifts uncertainty into later, costlier stages." },
596
+ { claim: "Edge cases are implementation details.", reality: "Edge cases determine acceptance boundaries; specifying them prevents scope creep." }
597
+ ],
598
+ redFlags: [
599
+ "Criteria use vague language (fast, intuitive, robust) without thresholds",
600
+ "No explicit assumptions section",
601
+ "No approval record",
602
+ "No testability mapping",
603
+ "Edge cases missing or deferred"
604
+ ],
605
+ policyNeedles: ["Acceptance Criteria", "Constraints", "Testability", "approved spec", "Edge Cases"],
606
+ artifactFile: "04-spec.md",
607
+ next: "plan",
608
+ cognitivePatterns: [
609
+ { name: "Observable Over Descriptive", description: "Requirements describe what can be observed, not what should feel like. Replace every adjective with a measurement." },
610
+ { name: "Boundary Precision", description: "Every acceptance criterion has boundary conditions. What is the minimum valid input? Maximum? What happens at the edges?" },
611
+ { name: "Assumption Surfacing", description: "Implicit assumptions are invisible requirements. Force every assumption into an explicit statement. If you cannot name the assumption, you have not found it yet." }
612
+ ],
613
+ reviewSections: [],
614
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
615
+ crossStageTrace: {
616
+ readsFrom: [".cclaw/artifacts/03-design.md", ".cclaw/artifacts/02-scope.md"],
617
+ writesTo: [".cclaw/artifacts/04-spec.md"],
618
+ traceabilityRule: "Every acceptance criterion must trace to a design decision. Every downstream plan task must trace to a spec criterion."
619
+ },
620
+ artifactValidation: [
621
+ { section: "Acceptance Criteria", required: true, validationRule: "Each criterion is observable, measurable, and falsifiable." },
622
+ { section: "Edge Cases", required: true, validationRule: "At least one boundary and one error condition per criterion." },
623
+ { section: "Constraints and Assumptions", required: true, validationRule: "All implicit assumptions surfaced. Constraints have sources." },
624
+ { section: "Testability Map", required: true, validationRule: "Each criterion maps to a concrete test description." },
625
+ { section: "Approval", required: true, validationRule: "Explicit user approval marker present." }
626
+ ]
627
+ };
628
+ // ---------------------------------------------------------------------------
629
+ // PLAN
630
+ // ---------------------------------------------------------------------------
631
+ const PLAN = {
632
+ stage: "plan",
633
+ skillFolder: "planning-and-task-breakdown",
634
+ skillName: "planning-and-task-breakdown",
635
+ skillDescription: "Execution planning stage with strict confirmation gate before implementation.",
636
+ hardGate: "Do NOT write code or tests. Planning only. This stage produces a task graph and execution order. WAIT_FOR_CONFIRM before any handoff to implementation.",
637
+ purpose: "Create small executable tasks with dependencies and pause for explicit user confirmation.",
638
+ whenToUse: [
639
+ "After spec approval",
640
+ "Before writing tests or implementation",
641
+ "When delivery path and dependency order are needed"
642
+ ],
643
+ checklist: [
644
+ "Read upstream — load spec, design, and scope artifacts. Cross-reference acceptance criteria.",
645
+ "Build dependency graph — identify task ordering, parallel opportunities, and blocking dependencies.",
646
+ "Group tasks into dependency waves — wave N+1 cannot start until wave N has verification evidence.",
647
+ "Slice into vertical tasks — each task targets 2-5 minutes, produces one testable outcome, and touches one coherent area.",
648
+ "Attach verification — every task has an acceptance criterion mapping and a concrete verification command.",
649
+ "Define checkpoints — mark points where progress should be validated before continuing.",
650
+ "WAIT_FOR_CONFIRM — write plan artifact and explicitly pause. Do NOT proceed to /cc-test until user confirms."
651
+ ],
652
+ interactionProtocol: [
653
+ "Plan in read-only mode relative to implementation.",
654
+ "Split work into small vertical slices (target 2-5 minute tasks).",
655
+ "Publish explicit dependency waves with entry and exit checks for each wave.",
656
+ "Attach verification step to every task.",
657
+ "Enforce WAIT_FOR_CONFIRM before moving to /cc-test. Use AskQuestion/AskUserQuestion tool: present the plan summary with options (A) Approve / (B) Revise / (C) Reject.",
658
+ "Wait for explicit approval. Do not auto-advance."
659
+ ],
660
+ process: [
661
+ "Build dependency graph and ordered slices.",
662
+ "Group slices into execution waves and define gate criteria per wave.",
663
+ "Define each task with acceptance mapping and verification commands.",
664
+ "Record checkpoints and blockers.",
665
+ "Write plan artifact and pause at WAIT_FOR_CONFIRM."
666
+ ],
667
+ requiredGates: [
668
+ { id: "plan_tasks_sliced_2_5_min", description: "Tasks are small, executable slices." },
669
+ { id: "plan_dependency_graph_written", description: "Dependency graph and order are explicit." },
670
+ { id: "plan_dependency_waves_defined", description: "Tasks are grouped into executable waves with gate checks." },
671
+ { id: "plan_verification_steps_defined", description: "Each task has verification guidance." },
672
+ { id: "plan_acceptance_mapped", description: "Each task maps to a spec acceptance criterion." },
673
+ { id: "plan_wait_for_confirm", description: "Execution blocked until explicit user confirmation." }
674
+ ],
675
+ requiredEvidence: [
676
+ "Artifact written to `.cclaw/artifacts/05-plan.md`.",
677
+ "Task list includes acceptance mapping.",
678
+ "Dependency graph documented.",
679
+ "Dependency waves documented with wave-by-wave verification gates.",
680
+ "WAIT_FOR_CONFIRM status recorded."
681
+ ],
682
+ inputs: ["approved spec", "codebase context", "delivery constraints"],
683
+ requiredContext: [
684
+ "spec acceptance criteria",
685
+ "current architecture",
686
+ "known technical debt and dependencies"
687
+ ],
688
+ outputs: ["task graph", "dependency wave plan", "ordered plan", "explicit confirmation checkpoint"],
689
+ blockers: [
690
+ "tasks too broad",
691
+ "dependency uncertainty unresolved",
692
+ "wave boundaries are unclear",
693
+ "no explicit confirmation"
694
+ ],
695
+ exitCriteria: [
696
+ "plan quality gates complete",
697
+ "WAIT_FOR_CONFIRM present and unresolved until user approves",
698
+ "artifact ready for TDD execution",
699
+ "acceptance mapping complete"
700
+ ],
701
+ antiPatterns: [
702
+ "Horizontal decomposition without end-to-end slices",
703
+ "Tasks without verification steps",
704
+ "Starting execution before approval",
705
+ "Tasks that touch multiple unrelated areas"
706
+ ],
707
+ rationalizations: [
708
+ { claim: "Task details can be finalized during coding.", reality: "Underspecified tasks cause context thrash and broken sequencing." },
709
+ { claim: "Dependency map is overkill for this change.", reality: "Missing dependencies are a major source of blocked execution." },
710
+ { claim: "We can assume approval and continue.", reality: "Explicit confirmation is the contract boundary between planning and execution." }
711
+ ],
712
+ redFlags: [
713
+ "No dependency graph",
714
+ "No WAIT_FOR_CONFIRM marker",
715
+ "No explicit dependency waves",
716
+ "Tasks exceed one coherent outcome",
717
+ "No acceptance mapping"
718
+ ],
719
+ policyNeedles: ["WAIT_FOR_CONFIRM", "Task Graph", "Dependency Waves", "Acceptance Mapping", "verification steps"],
720
+ artifactFile: "05-plan.md",
721
+ next: "test",
722
+ cognitivePatterns: [
723
+ { name: "Vertical Slice Thinking", description: "Each task delivers one thin end-to-end slice of value. Horizontal layers (all models, then all controllers) create integration risk. Vertical slices (one feature through all layers) reduce it." },
724
+ { name: "Two-Minute Smell Test", description: "If a competent engineer cannot understand and start a task in two minutes, the task is too large or too vague. Break it down further." },
725
+ { name: "Make the Change Easy, Then Make the Easy Change", description: "Refactor first, implement second. Never structural + behavioral changes simultaneously. Sequence tasks accordingly." }
726
+ ],
727
+ reviewSections: [],
728
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
729
+ crossStageTrace: {
730
+ readsFrom: [".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/03-design.md", ".cclaw/artifacts/02-scope.md"],
731
+ writesTo: [".cclaw/artifacts/05-plan.md"],
732
+ traceabilityRule: "Every task must trace to a spec acceptance criterion. Every downstream RED test must trace to a plan task."
733
+ },
734
+ artifactValidation: [
735
+ { section: "Dependency Graph", required: true, validationRule: "Ordering and parallel opportunities explicit. No circular dependencies." },
736
+ { section: "Dependency Waves", required: true, validationRule: "Every task belongs to a wave. Each wave has an exit gate and dependency statement." },
737
+ { section: "Task List", required: true, validationRule: "Each task: ID, description, acceptance criterion link, verification command." },
738
+ { section: "Acceptance Mapping", required: true, validationRule: "Every spec criterion is covered by at least one task." },
739
+ { section: "WAIT_FOR_CONFIRM", required: true, validationRule: "Explicit marker present. Status: pending until user approves." }
740
+ ]
741
+ };
742
+ // ---------------------------------------------------------------------------
743
+ // TEST — TDD RED stage
744
+ // ---------------------------------------------------------------------------
745
+ const TEST = {
746
+ stage: "test",
747
+ skillFolder: "red-first-testing",
748
+ skillName: "red-first-testing",
749
+ skillDescription: "TDD RED stage. Establish failing tests as proof before implementation changes.",
750
+ hardGate: "Do NOT change implementation code. This stage writes failing tests ONLY. If you find yourself editing non-test files, STOP — you have left the RED stage.",
751
+ purpose: "Create RED evidence tied to acceptance criteria before any implementation.",
752
+ whenToUse: [
753
+ "After plan confirmation",
754
+ "Before /cc-build",
755
+ "For every behavior change in scope"
756
+ ],
757
+ checklist: [
758
+ "Select plan slice — pick one task from the plan. Do not batch multiple tasks.",
759
+ "Map to acceptance criterion — identify the specific spec criterion this test proves.",
760
+ "Write behavior-focused test — test the expected behavior, not implementation details. Name tests descriptively.",
761
+ "Run tests and observe failure — tests MUST fail. If they pass, either the behavior already exists or the test is wrong.",
762
+ "Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
763
+ "Repeat for each slice — return to step 1 for the next plan slice."
764
+ ],
765
+ interactionProtocol: [
766
+ "Pick one planned slice at a time.",
767
+ "Write behavior-focused tests before changing implementation.",
768
+ "Capture and store failing output as RED evidence.",
769
+ "Do not proceed to build without RED evidence.",
770
+ "If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?"
771
+ ],
772
+ process: [
773
+ "Select slice and map to acceptance criterion.",
774
+ "Write test(s) that fail for expected reason.",
775
+ "Run tests and capture failure output.",
776
+ "Record RED evidence in TDD artifact.",
777
+ "Verify failure reason matches expected missing behavior."
778
+ ],
779
+ requiredGates: [
780
+ { id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
781
+ { id: "tdd_red_failure_captured", description: "Failure output is captured as evidence." },
782
+ { id: "tdd_trace_to_acceptance", description: "RED tests trace to explicit acceptance criteria." },
783
+ { id: "tdd_red_failure_reason_verified", description: "Failure is for the expected reason, not an unrelated error." }
784
+ ],
785
+ requiredEvidence: [
786
+ "Artifact updated at `.cclaw/artifacts/06-tdd.md` RED section.",
787
+ "Failing command output captured.",
788
+ "Acceptance mapping documented.",
789
+ "Failure reason analysis recorded."
790
+ ],
791
+ inputs: ["approved plan slice", "spec acceptance criterion", "test harness configuration"],
792
+ requiredContext: ["plan artifact", "spec artifact", "existing test patterns"],
793
+ outputs: ["failing test set", "captured RED evidence", "ready signal for GREEN stage"],
794
+ blockers: [
795
+ "tests pass before behavior change",
796
+ "failure reason does not match expected behavior",
797
+ "no evidence recorded"
798
+ ],
799
+ exitCriteria: [
800
+ "RED evidence exists and is traceable",
801
+ "required gates marked satisfied",
802
+ "no implementation changes made in this stage",
803
+ "failure reason verified for each test"
804
+ ],
805
+ antiPatterns: [
806
+ "Writing code before failing test",
807
+ "Asserting implementation details instead of behavior",
808
+ "Skipping evidence capture",
809
+ "Testing multiple slices without recording evidence for each"
810
+ ],
811
+ rationalizations: [
812
+ { claim: "This change is obvious, tests can be added later.", reality: "Without RED proof, regressions hide behind optimistic assumptions." },
813
+ { claim: "A passing baseline is enough to continue.", reality: "Baseline pass does not prove new behavior requirements." },
814
+ { claim: "One broad integration test is enough.", reality: "Slice-level RED tests are required for precise failure signal." }
815
+ ],
816
+ redFlags: [
817
+ "No failing test output",
818
+ "No acceptance linkage",
819
+ "Implementation edits appear before RED evidence",
820
+ "Test passes without behavior change"
821
+ ],
822
+ policyNeedles: ["RED", "failing test", "acceptance criteria", "no implementation changes"],
823
+ artifactFile: "06-tdd.md",
824
+ next: "build",
825
+ cognitivePatterns: [
826
+ { name: "Behavior Over Implementation", description: "Tests describe WHAT the system does, not HOW. Test the observable behavior from outside the unit. If you need to test internals, the design needs work." },
827
+ { name: "Failure-First Thinking", description: "The failing test IS the specification. Until you see the right failure, you do not understand what you are building. Wrong failures are information." },
828
+ { name: "Proof Before Claim", description: "Do not claim a feature works without evidence. RED output is proof of what is missing. GREEN output is proof it was added. Both are required." }
829
+ ],
830
+ reviewSections: [],
831
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
832
+ crossStageTrace: {
833
+ readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md"],
834
+ writesTo: [".cclaw/artifacts/06-tdd.md"],
835
+ traceabilityRule: "Every RED test traces to a plan task. Every plan task traces to a spec criterion. Evidence chain: spec -> plan -> RED test -> failure output."
836
+ },
837
+ artifactValidation: [
838
+ { section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
839
+ { section: "Acceptance Mapping", required: true, validationRule: "Each RED test links to a plan task and spec criterion." },
840
+ { section: "Failure Analysis", required: true, validationRule: "Failure reason matches expected missing behavior." }
841
+ ]
842
+ };
843
+ // ---------------------------------------------------------------------------
844
+ // BUILD — TDD GREEN + REFACTOR stage
845
+ // ---------------------------------------------------------------------------
846
+ const BUILD = {
847
+ stage: "build",
848
+ skillFolder: "incremental-implementation",
849
+ skillName: "incremental-implementation",
850
+ skillDescription: "TDD GREEN and REFACTOR stage with strict traceability to plan slices.",
851
+ hardGate: "Do NOT merge, ship, or skip review. This stage produces GREEN and REFACTOR evidence for one plan slice at a time. If you are touching files unrelated to the current slice, STOP.",
852
+ purpose: "Implement minimal passing change, run full suite GREEN, then refactor safely.",
853
+ whenToUse: [
854
+ "After RED evidence is complete",
855
+ "For one accepted plan slice at a time",
856
+ "Before review stage"
857
+ ],
858
+ checklist: [
859
+ "Minimal GREEN change — implement the smallest code change that makes the RED tests pass. No extra features.",
860
+ "Run full suite — execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
861
+ "Verify no regressions — if any existing test breaks, fix the regression before proceeding.",
862
+ "Refactor pass — improve code quality without changing behavior. Document what you changed and why.",
863
+ "Record evidence — capture GREEN output and REFACTOR notes in the TDD artifact.",
864
+ "Annotate traceability — link the implementation to the plan task ID and spec criterion."
865
+ ],
866
+ interactionProtocol: [
867
+ "Apply minimal change to satisfy RED tests.",
868
+ "Run full suite, not partial checks, for GREEN validation.",
869
+ "Refactor without changing behavior and document rationale.",
870
+ "Stop if regressions appear and return to prior step.",
871
+ "Record traceability to plan slice explicitly."
872
+ ],
873
+ process: [
874
+ "Implement smallest change needed for GREEN.",
875
+ "Run full tests and build checks.",
876
+ "Perform refactor pass preserving behavior.",
877
+ "Record GREEN and REFACTOR evidence in artifact.",
878
+ "Annotate traceability to plan task and spec criterion."
879
+ ],
880
+ requiredGates: [
881
+ { id: "build_minimal_change_applied", description: "Implementation matches a single plan slice." },
882
+ { id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
883
+ { id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
884
+ { id: "tdd_refactor_notes_written", description: "Refactor decisions and outcomes are documented." },
885
+ { id: "build_traceable_to_plan", description: "Change traceability to plan slice is explicit." }
886
+ ],
887
+ requiredEvidence: [
888
+ "Artifact `.cclaw/artifacts/06-tdd.md` includes GREEN and REFACTOR sections.",
889
+ "Full test/build output recorded.",
890
+ "Traceability to task identifier is documented.",
891
+ "Refactor rationale captured."
892
+ ],
893
+ inputs: ["RED evidence", "approved plan slice", "coding standards and constraints"],
894
+ requiredContext: ["tdd artifact", "plan artifact", "spec acceptance criteria"],
895
+ outputs: ["passing implementation", "refactor evidence", "review-ready change set"],
896
+ blockers: [
897
+ "no RED evidence",
898
+ "full suite not green",
899
+ "behavior changed during refactor"
900
+ ],
901
+ exitCriteria: [
902
+ "GREEN evidence captured",
903
+ "REFACTOR evidence captured",
904
+ "required gates marked satisfied",
905
+ "traceability annotated"
906
+ ],
907
+ antiPatterns: [
908
+ "Big-bang implementation across multiple slices",
909
+ "Partial test runs presented as GREEN",
910
+ "Undocumented refactor changes",
911
+ "Adding features beyond what RED tests require"
912
+ ],
913
+ rationalizations: [
914
+ { claim: "Refactor can be skipped for speed.", reality: "Skipping refactor accumulates debt and weakens maintainability." },
915
+ { claim: "Only changed tests need to pass.", reality: "Full-suite checks are needed to detect regressions." },
916
+ { claim: "Traceability is implied by commit diff.", reality: "Explicit mapping avoids ambiguity in review and rollback." }
917
+ ],
918
+ redFlags: [
919
+ "No full-suite GREEN evidence",
920
+ "No refactor notes",
921
+ "Multiple tasks implemented in one pass without justification",
922
+ "Files changed outside current slice scope"
923
+ ],
924
+ policyNeedles: ["GREEN", "full test suite", "REFACTOR", "traceable to plan slice"],
925
+ artifactFile: "06-tdd.md",
926
+ next: "review",
927
+ cognitivePatterns: [
928
+ { name: "Minimal Viable Change", description: "The best implementation is the smallest one that passes all RED tests. Every extra line is risk. Resist the urge to 'improve while you are here.'" },
929
+ { name: "Regression Paranoia", description: "Assume every change breaks something until the full suite proves otherwise. Partial test runs are lies of omission." },
930
+ { name: "Refactor-as-Hygiene", description: "Refactoring is not optional cleanup — it is the third leg of TDD. GREEN without REFACTOR accumulates mess. REFACTOR without GREEN breaks things." }
931
+ ],
932
+ reviewSections: [],
933
+ completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
934
+ crossStageTrace: {
935
+ readsFrom: [".cclaw/artifacts/06-tdd.md", ".cclaw/artifacts/05-plan.md"],
936
+ writesTo: [".cclaw/artifacts/06-tdd.md"],
937
+ traceabilityRule: "Every GREEN change traces to a RED test. Every RED test traces to a plan task. Evidence chain must be unbroken."
938
+ },
939
+ artifactValidation: [
940
+ { section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
941
+ { section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
942
+ { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." }
943
+ ]
944
+ };
945
+ // ---------------------------------------------------------------------------
946
+ // REVIEW — reference: superpowers code-review + gstack /review
947
+ // ---------------------------------------------------------------------------
948
+ const REVIEW = {
949
+ stage: "review",
950
+ skillFolder: "two-layer-review",
951
+ skillName: "two-layer-review",
952
+ skillDescription: "Two-layer review stage: spec compliance first, then code quality and production readiness. Section-by-section with severity discipline.",
953
+ hardGate: "Do NOT ship, merge, or release until both review layers complete with an explicit verdict. No exceptions for urgency. Critical blockers MUST be resolved before handoff.",
954
+ purpose: "Validate that implementation matches spec and meets quality/security/performance bar through structured two-layer review.",
955
+ whenToUse: [
956
+ "After build stage completes",
957
+ "Before any ship action",
958
+ "When release risk must be assessed explicitly"
959
+ ],
960
+ checklist: [
961
+ "Diff Scope — Run `git diff` against base branch. If no diff, exit early with APPROVED (no changes to review). Scope the review to changed files unless blast-radius analysis requires wider inspection.",
962
+ "Change-Size Check — ~100 lines = normal. ~300 lines = consider splitting. ~1000+ lines = strongly recommend stacked PRs. Flag large diffs to the user.",
963
+ "Load upstream evidence — read TDD artifact (RED + GREEN + REFACTOR), spec, and plan. Verify evidence chain is unbroken.",
964
+ "Layer 1: Spec Compliance — check every acceptance criterion against implementation. Verdict: pass/fail per criterion.",
965
+ "Layer 2a: Correctness — logic errors, race conditions, boundary violations, null handling.",
966
+ "Layer 2b: Security — input validation, auth boundaries, secrets exposure, injection vectors.",
967
+ "Layer 2c: Performance — N+1 queries, memory leaks, missing caching, hot paths.",
968
+ "Layer 2d: Architecture Fit — does the implementation match the locked design? Coupling, cohesion, interface contracts.",
969
+ "Layer 2e: External Safety — SQL safety, concurrency, secrets in logs, enum completeness (grep outside diff), LLM trust boundaries.",
970
+ "Review Army reconciliation — normalize findings into structured records, dedup by fingerprint, and mark multi-specialist confirmations.",
971
+ "Meta-Review — Were tests actually run? Do test names match what they test? Are there real assertions?",
972
+ "Classify findings — Critical (blocks ship), Important (should fix), Suggestion (optional improvement).",
973
+ "Produce verdict — APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED."
974
+ ],
975
+ interactionProtocol: [
976
+ "Run Layer 1 (spec compliance) completely before starting Layer 2.",
977
+ "In each review section, present findings ONE AT A TIME. Do NOT batch.",
978
+ "Classify every finding as Critical, Important, or Suggestion.",
979
+ "For each Critical finding: use the Decision Protocol — present resolution options (A/B/C) with trade-offs, mark one as (recommended), use AskQuestion/AskUserQuestion tool when available.",
980
+ "Resolve all critical blockers before ship.",
981
+ "For final verdict: use AskQuestion/AskUserQuestion tool with options APPROVED / APPROVED_WITH_CONCERNS / BLOCKED."
982
+ ],
983
+ process: [
984
+ "Layer 1: check acceptance criteria and requirement coverage.",
985
+ "Layer 2a: check correctness — logic, races, boundaries, null handling.",
986
+ "Layer 2b: check security — validation, auth, secrets, injection.",
987
+ "Layer 2c: check performance — queries, memory, caching, hot paths.",
988
+ "Layer 2d: check architecture fit — design compliance, coupling, interfaces.",
989
+ "Reconcile multi-agent findings into `.cclaw/artifacts/07-review-army.json` (dedup + confidence + conflict notes).",
990
+ "Classify and prioritize all findings.",
991
+ "Write review report artifact with explicit verdict."
992
+ ],
993
+ requiredGates: [
994
+ { id: "review_layer1_spec_compliance", description: "Spec compliance check completed with per-criterion verdict." },
995
+ { id: "review_layer2_correctness", description: "Correctness review completed." },
996
+ { id: "review_layer2_security", description: "Security review completed." },
997
+ { id: "review_layer2_performance", description: "Performance review completed." },
998
+ { id: "review_layer2_architecture", description: "Architecture fit review completed." },
999
+ { id: "review_severity_classified", description: "All findings are severity-tagged." },
1000
+ { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." }
1001
+ ],
1002
+ requiredEvidence: [
1003
+ "Artifact written to `.cclaw/artifacts/07-review.md`.",
1004
+ "Artifact written to `.cclaw/artifacts/07-review-army.json`.",
1005
+ "Layer 1 verdict captured with per-criterion pass/fail.",
1006
+ "Layer 2 sections completed with findings.",
1007
+ "Severity log includes critical/important/suggestion buckets.",
1008
+ "Explicit final verdict: APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED."
1009
+ ],
1010
+ inputs: ["implementation diff", "spec and plan artifacts", "test/build evidence"],
1011
+ requiredContext: ["spec criteria", "tdd artifact", "rulebook constraints"],
1012
+ outputs: ["review verdict", "severity-indexed findings", "reconciled review-army findings", "ship readiness decision"],
1013
+ blockers: [
1014
+ "layer 1 failed",
1015
+ "critical findings unresolved",
1016
+ "missing regression evidence"
1017
+ ],
1018
+ exitCriteria: [
1019
+ "both layers completed",
1020
+ "all review sections evaluated",
1021
+ "critical blockers resolved",
1022
+ "ship readiness explicitly stated"
1023
+ ],
1024
+ antiPatterns: [
1025
+ "Single generic review without layered structure",
1026
+ "No severity classification",
1027
+ "Shipping with open criticals",
1028
+ "Batching multiple findings into one report without individual resolution",
1029
+ "Skipping Layer 2 sections because Layer 1 passed"
1030
+ ],
1031
+ rationalizations: [
1032
+ { claim: "Passing tests mean spec compliance by default.", reality: "Tests can miss requirement mismatches; explicit spec review is mandatory." },
1033
+ { claim: "Severity labels are unnecessary.", reality: "Without severity, release decisions become inconsistent." },
1034
+ { claim: "Critical issues can be fixed after ship.", reality: "Critical blockers must be resolved before release handoff." },
1035
+ { claim: "Security review is not needed for internal tools.", reality: "Internal tools become external surface area. Security is always in scope." }
1036
+ ],
1037
+ redFlags: [
1038
+ "No separate Layer 1/Layer 2 outcomes",
1039
+ "No structured review-army reconciliation artifact",
1040
+ "No critical bucket",
1041
+ "No explicit ready/not-ready verdict",
1042
+ "Review sections skipped or abbreviated",
1043
+ "Findings not classified by severity"
1044
+ ],
1045
+ policyNeedles: ["Layer 1", "Layer 2", "Critical", "Review Army", "Ready to Ship", "One issue at a time"],
1046
+ artifactFile: "07-review.md",
1047
+ next: "ship",
1048
+ cognitivePatterns: [
1049
+ { name: "Severity Discipline", description: "Every finding gets a severity label. Critical blocks ship. Important should be fixed. Suggestion is optional. No ambiguous middle ground." },
1050
+ { name: "Spec-First Not Code-First", description: "Review starts with the spec, not the code. Does the code do what was specified? Only after spec compliance is confirmed do you review code quality." },
1051
+ { name: "Blocker Resolution Before Progress", description: "When a critical finding is identified, stop and resolve it before continuing the review. Do not accumulate criticals for batch resolution." },
1052
+ { name: "Evidence or Unknown", description: "For every safety/correctness claim, cite file:line or test name. If you cannot point to evidence, the claim is 'UNKNOWN' not 'safe'. Never say 'probably tested' — check." },
1053
+ { name: "Diff-Scoped Thinking", description: "Start with the diff (git diff vs main). Review only what changed unless a change has blast-radius implications. Skip unchanged files unless directly affected." },
1054
+ { name: "Change-Size Awareness", description: "~100 lines = normal review. ~300 lines = consider splitting. ~1000+ lines = strongly recommend splitting into stacked PRs. Large diffs hide bugs." }
1055
+ ],
1056
+ reviewSections: [
1057
+ {
1058
+ title: "Layer 1: Spec Compliance",
1059
+ evaluationPoints: [
1060
+ "For each acceptance criterion: does the implementation satisfy it?",
1061
+ "Are there spec requirements with no corresponding implementation?",
1062
+ "Are there implementations with no corresponding spec requirement (scope creep)?",
1063
+ "Is every edge case from the spec handled?"
1064
+ ],
1065
+ stopGate: true
1066
+ },
1067
+ {
1068
+ title: "Layer 2a: Correctness",
1069
+ evaluationPoints: [
1070
+ "Logic errors and boundary violations",
1071
+ "Race conditions and concurrency issues",
1072
+ "Null/undefined handling",
1073
+ "Error propagation and recovery paths"
1074
+ ],
1075
+ stopGate: true
1076
+ },
1077
+ {
1078
+ title: "Layer 2b: Security",
1079
+ evaluationPoints: [
1080
+ "Input validation completeness",
1081
+ "Authorization boundary enforcement",
1082
+ "Secrets exposure risk",
1083
+ "Injection vector assessment"
1084
+ ],
1085
+ stopGate: true
1086
+ },
1087
+ {
1088
+ title: "Layer 2c: Performance",
1089
+ evaluationPoints: [
1090
+ "N+1 query patterns",
1091
+ "Memory leak potential",
1092
+ "Missing caching opportunities",
1093
+ "Hot path complexity analysis"
1094
+ ],
1095
+ stopGate: true
1096
+ },
1097
+ {
1098
+ title: "Layer 2d: Architecture Fit",
1099
+ evaluationPoints: [
1100
+ "Does implementation match the locked design?",
1101
+ "Coupling and cohesion assessment",
1102
+ "Interface contract compliance",
1103
+ "Unintended architectural drift"
1104
+ ],
1105
+ stopGate: true
1106
+ },
1107
+ {
1108
+ title: "Layer 2e: External Safety Checklist",
1109
+ evaluationPoints: [
1110
+ "SQL/database: parameterized queries, no raw string interpolation, migration safety",
1111
+ "Concurrency: race conditions in shared state, lock ordering, timeout handling",
1112
+ "Secrets: no hardcoded tokens, no secrets in logs, env vars for sensitive config",
1113
+ "Enum/constant completeness: grep for sibling values OUTSIDE the diff — are all cases handled?",
1114
+ "Trust boundaries: if LLM/AI output is used, is it validated before acting on it?"
1115
+ ],
1116
+ stopGate: true
1117
+ },
1118
+ {
1119
+ title: "Meta-Review: Verify the Verification",
1120
+ evaluationPoints: [
1121
+ "Were tests actually run (not just assumed to pass)?",
1122
+ "Do the test names match what they actually test?",
1123
+ "Is there test coverage for the specific changes in this diff?",
1124
+ "Are there assertions, or do tests just run without checking results?"
1125
+ ],
1126
+ stopGate: false
1127
+ }
1128
+ ],
1129
+ completionStatus: ["APPROVED", "APPROVED_WITH_CONCERNS", "BLOCKED"],
1130
+ crossStageTrace: {
1131
+ readsFrom: [".cclaw/artifacts/06-tdd.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/05-plan.md"],
1132
+ writesTo: [".cclaw/artifacts/07-review.md"],
1133
+ traceabilityRule: "Review verdict must reference specific spec criteria and TDD evidence. Downstream ship stage must reference review verdict."
1134
+ },
1135
+ artifactValidation: [
1136
+ { section: "Layer 1 Verdict", required: true, validationRule: "Per-criterion pass/fail with references." },
1137
+ { section: "Layer 2 Findings", required: true, validationRule: "Each finding has severity, description, and resolution status." },
1138
+ { section: "Review Army Contract", required: true, validationRule: "Structured findings include id/severity/confidence/fingerprint/reportedBy/status with dedup reconciliation summary." },
1139
+ { section: "Severity Summary", required: true, validationRule: "Counts: N critical, N important, N suggestion." },
1140
+ { section: "Final Verdict", required: true, validationRule: "Exactly one of: APPROVED, APPROVED_WITH_CONCERNS, BLOCKED." }
1141
+ ],
1142
+ namedAntiPattern: {
1143
+ title: "Tests Pass So It Must Be Correct",
1144
+ description: "Tests verify what the developer thought to test. They do not verify what the spec requires. A passing test suite with failing spec compliance is a false green. Layer 1 exists precisely because tests and specs can diverge without anyone noticing."
1145
+ }
1146
+ };
1147
+ // ---------------------------------------------------------------------------
1148
+ // SHIP — reference: superpowers finishing-a-development-branch + gstack /ship
1149
+ // ---------------------------------------------------------------------------
1150
+ const SHIP = {
1151
+ stage: "ship",
1152
+ skillFolder: "shipping-and-handoff",
1153
+ skillName: "shipping-and-handoff",
1154
+ skillDescription: "Release handoff stage with preflight checks, rollback readiness, and explicit finalization mode.",
1155
+ hardGate: "Do NOT merge, push, or finalize without a passed preflight check, written rollback plan, and exactly one explicit finalization mode selected. No exceptions for urgency.",
1156
+ purpose: "Prepare a safe release handoff with clear rollback and branch finalization decision.",
1157
+ whenToUse: [
1158
+ "After review passes with APPROVED or APPROVED_WITH_CONCERNS verdict",
1159
+ "Before creating PR/merge/final branch action",
1160
+ "When release notes and rollback plan are required"
1161
+ ],
1162
+ checklist: [
1163
+ "Validate upstream gates — verify review verdict is APPROVED or APPROVED_WITH_CONCERNS. If BLOCKED, stop immediately.",
1164
+ "Run preflight checks — tests pass, build succeeds, linter clean, type-check clean, no uncommitted changes. Every check must produce fresh output in this message.",
1165
+ "Merge-base detection — identify the correct base branch. Run `git merge-base HEAD <base>`. If the base has diverged significantly, flag for rebase-first.",
1166
+ "Re-run tests on merged result — if merging locally, run the full test suite AFTER the merge, not just before. Post-merge failures are common.",
1167
+ "Generate release notes — summarize what changed, why, and what it affects. Reference spec criteria. Include: breaking changes, new dependencies, migration steps if any.",
1168
+ "Write rollback plan — trigger conditions (what tells you it is broken), rollback steps (exact commands/git operations), and verification (how to confirm rollback worked).",
1169
+ "Monitoring checklist — what should be watched after deploy? Error rates, latency, key business metrics. If no monitoring exists, flag it as a risk.",
1170
+ "Select finalization mode — exactly ONE of: (A) merge to main locally, (B) create PR with description, (C) keep branch for later, (D) discard branch. For discard: list what will be deleted, require typed confirmation.",
1171
+ "Execute finalization — perform the selected action. For merge: verify clean merge. For PR: include structured body (summary, test plan, rollback). For discard: verify deletion.",
1172
+ "Worktree cleanup — if using git worktrees, clean up the worktree after merge/discard. Keep it only for 'keep branch' mode."
1173
+ ],
1174
+ interactionProtocol: [
1175
+ "Run preflight checks before any release action.",
1176
+ "Document release notes and rollback plan explicitly.",
1177
+ "For finalization mode: use the Decision Protocol — present modes as labeled options (A/B/C) with consequences, mark one as (recommended), use AskQuestion/AskUserQuestion tool when available.",
1178
+ "Do not proceed if critical blockers remain from review.",
1179
+ "Execute the selected finalization action and verify."
1180
+ ],
1181
+ process: [
1182
+ "Validate review and test gates.",
1183
+ "Run preflight: build, test, lint, uncommitted-changes check.",
1184
+ "Generate release notes and rollback procedure.",
1185
+ "Choose one finalization mode: merge, PR, keep branch, discard branch.",
1186
+ "Execute finalization action.",
1187
+ "Write ship artifact with decision, rationale, and execution result."
1188
+ ],
1189
+ requiredGates: [
1190
+ { id: "ship_review_verdict_valid", description: "Review verdict is APPROVED or APPROVED_WITH_CONCERNS." },
1191
+ { id: "ship_preflight_passed", description: "Preflight checks passed or exceptions documented and approved." },
1192
+ { id: "ship_release_notes_written", description: "Release notes are complete and accurate." },
1193
+ { id: "ship_rollback_plan_ready", description: "Rollback trigger, steps, and verification are documented." },
1194
+ { id: "ship_finalization_mode_selected", description: "Exactly one finalization action is selected." },
1195
+ { id: "ship_finalization_executed", description: "Selected finalization action was executed and verified." }
1196
+ ],
1197
+ requiredEvidence: [
1198
+ "Artifact written to `.cclaw/artifacts/08-ship.md`.",
1199
+ "Release notes section is complete.",
1200
+ "Rollback section includes trigger conditions, steps, and verification.",
1201
+ "Finalization mode shows exactly one selected.",
1202
+ "Execution result documented."
1203
+ ],
1204
+ inputs: ["review verdict", "test/build outputs", "release context"],
1205
+ requiredContext: ["review artifact", "changelog scope", "deployment constraints"],
1206
+ outputs: ["release package handoff", "rollback plan", "final branch decision"],
1207
+ blockers: [
1208
+ "review verdict is BLOCKED",
1209
+ "critical review blockers remain",
1210
+ "rollback plan missing",
1211
+ "finalization mode not selected"
1212
+ ],
1213
+ exitCriteria: [
1214
+ "preflight completed",
1215
+ "rollback and release notes complete",
1216
+ "finalization action explicitly chosen and executed"
1217
+ ],
1218
+ antiPatterns: [
1219
+ "Shipping without rollback strategy",
1220
+ "Implicit finalization decision",
1221
+ "Bypassing preflight due to urgency",
1222
+ "Selecting multiple finalization modes",
1223
+ "Shipping with BLOCKED review verdict"
1224
+ ],
1225
+ rationalizations: [
1226
+ { claim: "Rollback details can be written after release.", reality: "Rollback is part of release readiness, not post-release cleanup." },
1227
+ { claim: "Finalization choice is obvious from context.", reality: "Explicit branch action prevents accidental release state." },
1228
+ { claim: "Urgent fixes can skip preflight.", reality: "Urgency increases risk; preflight discipline matters more, not less." }
1229
+ ],
1230
+ redFlags: [
1231
+ "No rollback trigger/steps",
1232
+ "More than one finalization mode implied",
1233
+ "No explicit preflight result",
1234
+ "Review verdict not referenced",
1235
+ "Finalization not executed, only planned"
1236
+ ],
1237
+ policyNeedles: [
1238
+ "Pre-Ship Checks",
1239
+ "Release Notes",
1240
+ "Rollback Plan",
1241
+ "merge / PR / keep branch / discard branch"
1242
+ ],
1243
+ artifactFile: "08-ship.md",
1244
+ next: "done",
1245
+ cognitivePatterns: [
1246
+ { name: "Preflight Discipline", description: "Preflight is not bureaucracy — it is the last safety net. Every skip 'just this once' normalizes skipping. Run the checks every time." },
1247
+ { name: "Rollback-First Thinking", description: "Before shipping, answer: what tells me this is broken? How do I undo it? How do I verify the undo worked? If you cannot answer all three, you are not ready." },
1248
+ { name: "Explicit Over Implicit Finalization", description: "Merge, PR, keep, discard — each has different consequences. Pick one. Say it out loud. Write it down. Never let finalization be 'whatever the default is.'" },
1249
+ { name: "Post-Merge Paranoia", description: "The merge itself can introduce failures even when both branches pass independently. Always run the full suite AFTER merge, not just before." },
1250
+ { name: "Observability Before Ship", description: "If you cannot monitor the change in production, you cannot know if it is broken. Monitoring/logging is a ship prerequisite, not a follow-up." }
1251
+ ],
1252
+ reviewSections: [
1253
+ {
1254
+ title: "Preflight Verification",
1255
+ evaluationPoints: [
1256
+ "Test suite: full run, all pass, output captured",
1257
+ "Build: clean build, exit code 0",
1258
+ "Lint/format: no violations",
1259
+ "Type-check: no errors",
1260
+ "Working tree: no uncommitted changes"
1261
+ ],
1262
+ stopGate: true
1263
+ },
1264
+ {
1265
+ title: "Release Readiness",
1266
+ evaluationPoints: [
1267
+ "Release notes are accurate and reference spec criteria",
1268
+ "Breaking changes are documented with migration steps",
1269
+ "Rollback plan has trigger, steps, and verification",
1270
+ "If applicable: monitoring/alerting is in place for the change"
1271
+ ],
1272
+ stopGate: true
1273
+ }
1274
+ ],
1275
+ completionStatus: ["SHIPPED", "SHIPPED_WITH_EXCEPTIONS", "BLOCKED"],
1276
+ crossStageTrace: {
1277
+ readsFrom: [".cclaw/artifacts/07-review.md", ".cclaw/artifacts/06-tdd.md"],
1278
+ writesTo: [".cclaw/artifacts/08-ship.md"],
1279
+ traceabilityRule: "Ship artifact must reference review verdict and resolution status. Rollback plan must reference specific changes that could fail."
1280
+ },
1281
+ artifactValidation: [
1282
+ { section: "Preflight Results", required: true, validationRule: "Build, test, lint, type-check results captured with fresh output. Exceptions documented if any." },
1283
+ { section: "Release Notes", required: true, validationRule: "What changed, why, impact. References spec criteria. Breaking changes flagged." },
1284
+ { section: "Rollback Plan", required: true, validationRule: "Trigger conditions, rollback steps (exact commands), verification steps." },
1285
+ { section: "Monitoring", required: false, validationRule: "If applicable: what metrics/logs to watch post-deploy. Risk note if no monitoring." },
1286
+ { section: "Finalization", required: true, validationRule: "Exactly one mode selected. Execution result documented. Worktree cleaned if applicable." }
1287
+ ]
1288
+ };
1289
+ // ---------------------------------------------------------------------------
1290
+ // Stage map and accessors
1291
+ // ---------------------------------------------------------------------------
1292
+ const STAGE_SCHEMA_MAP = {
1293
+ brainstorm: BRAINSTORM,
1294
+ scope: SCOPE,
1295
+ design: DESIGN,
1296
+ spec: SPEC,
1297
+ plan: PLAN,
1298
+ test: TEST,
1299
+ build: BUILD,
1300
+ review: REVIEW,
1301
+ ship: SHIP
1302
+ };
1303
+ const STAGE_AUTO_SUBAGENT_DISPATCH = {
1304
+ brainstorm: [
1305
+ {
1306
+ agent: "planner",
1307
+ mode: "proactive",
1308
+ when: "When request is ambiguous, multi-surface, or spans multiple modules.",
1309
+ purpose: "Map scope and alternatives before direction lock.",
1310
+ requiresUserGate: false
1311
+ }
1312
+ ],
1313
+ scope: [
1314
+ {
1315
+ agent: "planner",
1316
+ mode: "mandatory",
1317
+ when: "Always during scope shaping.",
1318
+ purpose: "Challenge premise, map alternatives, and produce explicit in/out contract.",
1319
+ requiresUserGate: false
1320
+ }
1321
+ ],
1322
+ design: [
1323
+ {
1324
+ agent: "planner",
1325
+ mode: "mandatory",
1326
+ when: "Always during design lock.",
1327
+ purpose: "Stress architecture boundaries and dependency graph.",
1328
+ requiresUserGate: false
1329
+ },
1330
+ {
1331
+ agent: "security-reviewer",
1332
+ mode: "proactive",
1333
+ when: "When trust boundaries, auth, secrets, or external inputs are involved.",
1334
+ purpose: "Catch design-level security risks before implementation.",
1335
+ requiresUserGate: false
1336
+ }
1337
+ ],
1338
+ spec: [
1339
+ {
1340
+ agent: "planner",
1341
+ mode: "proactive",
1342
+ when: "When acceptance criteria are unclear or constraints conflict.",
1343
+ purpose: "Normalize measurable criteria and testability mapping.",
1344
+ requiresUserGate: false
1345
+ }
1346
+ ],
1347
+ plan: [
1348
+ {
1349
+ agent: "planner",
1350
+ mode: "mandatory",
1351
+ when: "Always when producing execution slices.",
1352
+ purpose: "Create dependency-aware task graph with verification steps.",
1353
+ requiresUserGate: false
1354
+ }
1355
+ ],
1356
+ test: [
1357
+ {
1358
+ agent: "test-author",
1359
+ mode: "mandatory",
1360
+ when: "Always during RED stage.",
1361
+ purpose: "Guarantee failing tests are created before implementation.",
1362
+ requiresUserGate: false
1363
+ }
1364
+ ],
1365
+ build: [
1366
+ {
1367
+ agent: "test-author",
1368
+ mode: "mandatory",
1369
+ when: "Always during GREEN + REFACTOR.",
1370
+ purpose: "Keep implementation traceable to RED evidence and full-suite verification.",
1371
+ requiresUserGate: false
1372
+ },
1373
+ {
1374
+ agent: "doc-updater",
1375
+ mode: "proactive",
1376
+ when: "When public behavior, APIs, or config surfaces change.",
1377
+ purpose: "Prevent code/docs drift before review and ship.",
1378
+ requiresUserGate: false
1379
+ }
1380
+ ],
1381
+ review: [
1382
+ {
1383
+ agent: "spec-reviewer",
1384
+ mode: "mandatory",
1385
+ when: "Always in review stage.",
1386
+ purpose: "Verify implementation against acceptance criteria with file evidence.",
1387
+ requiresUserGate: false
1388
+ },
1389
+ {
1390
+ agent: "code-reviewer",
1391
+ mode: "mandatory",
1392
+ when: "Always in review stage.",
1393
+ purpose: "Assess correctness, maintainability, architecture, and ship risk.",
1394
+ requiresUserGate: false
1395
+ },
1396
+ {
1397
+ agent: "security-reviewer",
1398
+ mode: "proactive",
1399
+ when: "When auth, input validation, secrets, parser, or privileged actions changed.",
1400
+ purpose: "Raise exploitable findings before release.",
1401
+ requiresUserGate: false
1402
+ }
1403
+ ],
1404
+ ship: [
1405
+ {
1406
+ agent: "doc-updater",
1407
+ mode: "mandatory",
1408
+ when: "Always in ship stage.",
1409
+ purpose: "Ensure release notes and docs reflect actual shipped behavior.",
1410
+ requiresUserGate: false
1411
+ },
1412
+ {
1413
+ agent: "code-reviewer",
1414
+ mode: "proactive",
1415
+ when: "When release involves broad blast radius or unresolved concerns.",
1416
+ purpose: "Provide final integration-scale quality pass.",
1417
+ requiresUserGate: false
1418
+ }
1419
+ ]
1420
+ };
1421
+ export function stageSchema(stage) {
1422
+ return STAGE_SCHEMA_MAP[stage];
1423
+ }
1424
+ export function orderedStageSchemas() {
1425
+ return COMMAND_FILE_ORDER.map((stage) => stageSchema(stage));
1426
+ }
1427
+ export function stageGateIds(stage) {
1428
+ return stageSchema(stage).requiredGates.map((gate) => gate.id);
1429
+ }
1430
+ export function nextCclawCommand(stage) {
1431
+ const next = stageSchema(stage).next;
1432
+ return next === "done" ? "none" : `/cc-${next}`;
1433
+ }
1434
+ export function buildTransitionRules() {
1435
+ const rules = [];
1436
+ for (const schema of orderedStageSchemas()) {
1437
+ if (schema.next === "done") {
1438
+ continue;
1439
+ }
1440
+ rules.push({
1441
+ from: schema.stage,
1442
+ to: schema.next,
1443
+ guards: stageGateIds(schema.stage)
1444
+ });
1445
+ }
1446
+ return rules;
1447
+ }
1448
+ export function stagePolicyNeedles(stage) {
1449
+ return stageSchema(stage).policyNeedles;
1450
+ }
1451
+ export function stageAutoSubagentDispatch(stage) {
1452
+ return STAGE_AUTO_SUBAGENT_DISPATCH[stage];
1453
+ }