cclaw-cli 0.15.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,11 @@
1
1
  import { COMMAND_FILE_ORDER } from "../constants.js";
2
+ import { BRAINSTORM, SCOPE, DESIGN, SPEC, PLAN, TDD, REVIEW, SHIP } from "./stages/index.js";
3
+ // ---------------------------------------------------------------------------
4
+ // NOTE: The former QUESTION_FORMAT_SPEC / ERROR_BUDGET_SPEC exports were
5
+ // hoisted into `src/content/meta-skill.ts` (Shared Decision + Tool-Use
6
+ // Protocol). They are no longer re-exported from here to avoid duplication
7
+ // and drift. Stage skills cite the meta-skill by path instead.
8
+ // ---------------------------------------------------------------------------
2
9
  /**
3
10
  * Gate tiers:
4
11
  * - required: blocking for stage completion.
@@ -117,1347 +124,6 @@ function tieredArtifactValidation(stage, rows) {
117
124
  };
118
125
  });
119
126
  }
120
- const BRAINSTORM = {
121
- stage: "brainstorm",
122
- skillFolder: "brainstorming",
123
- skillName: "brainstorming",
124
- skillDescription: "Design-first stage. Explore context, understand intent through collaborative dialogue, propose distinct approaches, and lock an approved direction before scope/design work.",
125
- hardGate: "Do NOT invoke implementation skills, write code, scaffold projects, or mutate product behavior until a concrete direction is approved by the user.",
126
- ironLaw: "NO ARTIFACT IS COMPLETE WITHOUT AN EXPLICITLY APPROVED DIRECTION — SILENCE IS NOT APPROVAL.",
127
- purpose: "Turn an initial idea into an approved design direction through natural collaborative dialogue — understanding the problem before proposing solutions.",
128
- whenToUse: [
129
- "Starting a new feature or behavior change",
130
- "Requirements are ambiguous or trade-offs are unclear",
131
- "Before any implementation-stage command or architecture commitment"
132
- ],
133
- whenNotToUse: [
134
- "A valid approved direction already exists and only execution remains",
135
- "The request is a pure release/finalization action with no new product decisions",
136
- "The task is retrospective only (post-ship audit with no new solution choices)"
137
- ],
138
- checklist: [
139
- "**Explore project context** — check files, docs, recent commits to understand what already exists.",
140
- "**Assess scope** — if the request covers multiple independent subsystems, flag it and help decompose before deep-diving. Each sub-project gets its own brainstorm cycle.",
141
- "**Ask clarifying questions** — one at a time, understand purpose, constraints, and success criteria. Prefer multiple choice when possible. Each question should change what we build, not just gather trivia.",
142
- "**Propose 2-3 architecturally distinct approaches** — with real trade-offs and your recommendation. Lead with the recommended option and explain why.",
143
- "**Present design by sections** — scale each section to its complexity. Ask after each section whether it looks right so far. Cover: architecture, key components, data flow.",
144
- "**Write artifact** to `.cclaw/artifacts/01-brainstorm.md`.",
145
- "**Self-review** — scan for placeholders/TODOs, check internal consistency, verify scope is focused, resolve any ambiguity.",
146
- "**User reviews artifact** — ask the user to review the written artifact and explicitly approve or request changes.",
147
- "**Handoff** — only then complete stage and point to `/cc-next`."
148
- ],
149
- interactionProtocol: [
150
- "Explore what exists before asking what to build — check project files first.",
151
- "If the idea is vague or could mean many different things, your FIRST question narrows to a specific kind of project. Do not ask detail questions until the project type is clear.",
152
- "Ask exactly one question per turn. Prefer multiple choice. No bundled questions.",
153
- "After 2-3 questions, summarize your emerging understanding before continuing so the user can correct course early.",
154
- "Each question should change a concrete design decision. Litmus test: if the two most likely answers do not lead to different architectures, make the choice yourself and state it.",
155
- "Present design in sections scaled to their complexity — a few sentences for simple aspects, detailed for nuanced ones. Get approval after each section.",
156
- "When proposing approaches, lead with your recommendation and explain why.",
157
- "State explicitly what is being approved when requesting approval.",
158
- "Run a brief self-review (placeholders, contradictions, scope, ambiguity) before presenting the artifact.",
159
- "**STOP.** Wait for explicit user approval after writing the artifact. Do NOT auto-advance."
160
- ],
161
- process: [
162
- "Explore project context: check files, docs, recent activity.",
163
- "Assess scope: flag if request is too broad, help decompose first.",
164
- "Ask clarifying questions one at a time — focus on purpose, constraints, success criteria.",
165
- "Propose 2-3 architecturally distinct approaches with trade-offs and a recommendation.",
166
- "Present design sections incrementally, get approval after each.",
167
- "Write approved direction to `.cclaw/artifacts/01-brainstorm.md`.",
168
- "Self-review: placeholder scan, internal consistency, scope check, ambiguity check.",
169
- "Request explicit user approval of the artifact.",
170
- "Handoff to scope only after approval is explicit."
171
- ],
172
- requiredGates: [
173
- { id: "brainstorm_context_explored", description: "Project context (files, docs, existing patterns) was checked before asking questions." },
174
- { id: "brainstorm_idea_understood", description: "Agent and user share the same understanding of the problem, constraints, and success criteria." },
175
- { id: "brainstorm_approaches_compared", description: "2-3 architecturally distinct approaches were compared with real trade-offs and a recommendation." },
176
- { id: "brainstorm_direction_approved", description: "User approved a concrete direction and what exactly was approved is stated." },
177
- { id: "brainstorm_artifact_reviewed", description: "User reviewed the written brainstorm artifact and confirmed readiness." }
178
- ],
179
- requiredEvidence: [
180
- "Artifact written to `.cclaw/artifacts/01-brainstorm.md`.",
181
- "Project context was explored (files, docs, or recent activity referenced).",
182
- "Clarifying questions and their answers are captured.",
183
- "2-3 approaches with trade-offs and recommendation are recorded.",
184
- "Approved direction and approval marker are present.",
185
- "Assumptions and open questions are captured (or explicitly marked as none)."
186
- ],
187
- inputs: ["problem statement", "constraints", "success criteria"],
188
- requiredContext: [
189
- "existing project context and patterns",
190
- "current behavior of affected area",
191
- "business and delivery constraints"
192
- ],
193
- researchPlaybooks: [
194
- "research/repo-scan.md",
195
- "research/learnings-lookup.md"
196
- ],
197
- outputs: [
198
- "approved design direction",
199
- "alternatives with trade-offs",
200
- "brainstorm artifact"
201
- ],
202
- blockers: [
203
- "no explicit approval",
204
- "critical ambiguity unresolved",
205
- "project context not explored"
206
- ],
207
- exitCriteria: [
208
- "approved design direction documented",
209
- "required gates marked satisfied",
210
- "no implementation action taken",
211
- "artifact reviewed by user"
212
- ],
213
- antiPatterns: [
214
- "Asking questions without exploring existing project context first",
215
- "Asking bundled or purely informational questions that don't change decisions",
216
- "Proposing cosmetic option variants instead of architecturally distinct approaches",
217
- "Jumping directly into implementation",
218
- "Requesting approval without stating what decision is being approved"
219
- ],
220
- redFlags: [
221
- "No project context exploration before questions",
222
- "Questions that only gather preferences without design impact",
223
- "Options that are variants of one approach, not distinct alternatives",
224
- "Approval requested without explicit decision context"
225
- ],
226
- policyNeedles: [
227
- "Explore project context",
228
- "One question at a time",
229
- "2-3 architecturally distinct approaches",
230
- "State what is being approved",
231
- "Self-review before handoff",
232
- "Do NOT implement, scaffold, or modify behavior"
233
- ],
234
- artifactFile: "01-brainstorm.md",
235
- next: "scope",
236
- reviewSections: [],
237
- completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
238
- crossStageTrace: {
239
- readsFrom: [],
240
- writesTo: [".cclaw/artifacts/01-brainstorm.md"],
241
- traceabilityRule: "Scope and design decisions must trace back to explored context and approved brainstorm direction."
242
- },
243
- artifactValidation: [
244
- { section: "Context", required: true, validationRule: "Must reference project state and relevant existing code or patterns." },
245
- { section: "Problem", required: true, validationRule: "Must define what we're solving, success criteria, and constraints." },
246
- { section: "Clarifying Questions", required: true, validationRule: "Must capture question, answer, and decision impact for each clarifying question." },
247
- { section: "Approaches", required: true, validationRule: "Must compare 2-3 architecturally distinct options with real trade-offs and recommendation." },
248
- { section: "Selected Direction", required: true, validationRule: "Must include the selected approach, rationale, and explicit approval marker." },
249
- { section: "Design", required: true, validationRule: "Must cover architecture, key components, and data flow scaled to complexity." },
250
- { section: "Assumptions and Open Questions", required: true, validationRule: "Must capture unresolved assumptions/open questions, or explicitly state none." }
251
- ]
252
- };
253
- // ---------------------------------------------------------------------------
254
- // SCOPE — reference: gstack CEO review
255
- // ---------------------------------------------------------------------------
256
- const SCOPE = {
257
- stage: "scope",
258
- skillFolder: "scope-shaping",
259
- skillName: "scope-shaping",
260
- skillDescription: "Strategic scope stage. Challenge premise and lock explicit in-scope/out-of-scope boundaries using CEO-level thinking.",
261
- hardGate: "Do NOT begin architecture, design, or code. This stage produces scope decisions only. Do not silently add or remove scope — every change is an explicit user opt-in.",
262
- ironLaw: "EVERY SCOPE CHANGE IS AN EXPLICIT USER OPT-IN — NEVER A SILENT ENLARGEMENT OR TRIM.",
263
- purpose: "Decide the right scope before technical lock-in using explicit mode selection and rigorous premise challenge.",
264
- whenToUse: [
265
- "After brainstorm approval",
266
- "Before architecture/design lock-in",
267
- "When ambition vs feasibility trade-off is unclear"
268
- ],
269
- whenNotToUse: [
270
- "Brainstorm has not been approved yet",
271
- "Scope boundaries are already locked and user requested no scope changes",
272
- "The work is a pure implementation or debugging pass within existing scope"
273
- ],
274
- checklist: [
275
- "**Assess complexity** — Read the brainstorm artifact. If project is simple (single component, clear architecture, personal/prototype), run light-touch scope: mode selection, 3-5 key in/out boundaries, deferred items. Skip Dream State Mapping and Temporal Interrogation. If project is complex (multi-component, team delivery, production), run the full checklist.",
276
- "**Prime Directives** — Zero silent failures. For each in-scope capability, name concrete failure modes, the exact error surface, and trace all four data-flow paths (happy, nil, empty, upstream error). Include interaction edge cases (double-click, navigate-away, stale state), observability commitments, and explicit deferred-item logging.",
277
- "**Premise Challenge** — Is this the right problem? What if we do nothing? What are we optimizing for?",
278
- "**Existing Code Leverage** — Search for existing solutions before deciding to build new.",
279
- "**Dream State Mapping** — (complex projects only) describe the ideal state 12 months out using `CURRENT STATE -> THIS PLAN -> 12-MONTH IDEAL`, then verify this scope moves toward that target.",
280
- "**Implementation Alternatives** — Produce 2-3 distinct approaches. For each: Name, Summary, Effort (S/M/L/XL), Risk (Low/Med/High), 2-3 Pros, 2-3 Cons, and explicit Reuses. One option must be minimal viable, one must be ideal architecture.",
281
- "**Temporal Interrogation** — (complex projects only) simulate implementation timeline: HOUR 1 foundations, HOUR 2-3 core logic, HOUR 4-5 integration surprises, HOUR 6+ polish/tests. Decide what must be locked now vs safely deferred.",
282
- "**Mode Selection** — Present expand/selective/hold/reduce with recommendation and default heuristic: greenfield -> expand, feature enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius (>15 files or multi-team impact) -> reduce.",
283
- "**Mode-Specific Analysis** — After mode is selected, run the matching analysis: EXPAND (10x and delight opportunities), SELECTIVE (hold-scope rigor then cherry-picked expansions), HOLD (minimum-change-set hardening), REDUCE (ruthless cuts and follow-up split).",
284
- "**Error and Rescue Registry** — For each capability: what breaks, how detected, what fallback."
285
- ],
286
- interactionProtocol: [
287
- "For scope mode selection: use the Decision Protocol — present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that best covers the prime-directive failure modes, four data-flow paths, observability, and deferred handling for the in-scope set with the smallest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
288
- "Walk through the scope checklist interactively. Each checklist item that surfaces a decision should be presented to the user as a question, not as a monologue. Do not dump all items at once.",
289
- "Challenge premise and verify the problem framing before anything else.",
290
- "Take a position on every scope decision. Avoid hedging phrases like 'this could work' or 'there are many ways'; state your recommendation and one concrete condition that would change it.",
291
- "Use pushback patterns when framing is weak: vague scope -> force a specific user/problem, platform vision -> force a narrowest viable wedge, social proof -> demand behavioral evidence.",
292
- "Present one structural scope issue at a time for decision. Do NOT batch. Use structured options for each scope boundary question.",
293
- "Record explicit in-scope and out-of-scope contract.",
294
- "Once the user accepts or rejects a recommendation, commit fully. Do not re-argue.",
295
- "Produce a clean scope summary after all issues are resolved.",
296
- "**STOP.** Wait for explicit user approval of scope contract before advancing to design."
297
- ],
298
- process: [
299
- "Run premise challenge and existing-solution leverage check.",
300
- "Produce 2-3 scope alternatives in a structured format (Name, Summary, Effort, Risk, Pros, Cons, Reuses) with minimum viable and ideal architecture options included.",
301
- "Choose scope mode with user approval.",
302
- "Run mode-specific analysis that matches the selected scope mode.",
303
- "Walk through scope review sections one at a time.",
304
- "Write explicit scope contract, discretion areas, and deferred items.",
305
- "Produce scope summary plus completion dashboard (checklist findings, number of resolved decisions, unresolved items or `None`)."
306
- ],
307
- requiredGates: [
308
- { id: "scope_premise_challenged", description: "Problem framing and assumptions were challenged." },
309
- { id: "scope_alternatives_produced", description: "At least 2 implementation alternatives were evaluated with explicit effort/risk and reuse fields." },
310
- { id: "scope_mode_selected", description: "One scope mode was explicitly selected." },
311
- { id: "scope_contract_written", description: "In-scope/out-of-scope contract is documented." },
312
- { id: "scope_discretion_documented", description: "Discretion areas are documented (or explicitly marked as none)." },
313
- { id: "scope_user_approved", description: "User approved the final scope direction." }
314
- ],
315
- requiredEvidence: [
316
- "Artifact written to `.cclaw/artifacts/02-scope.md`.",
317
- "In-scope and out-of-scope lists are explicit.",
318
- "Discretion areas are explicit (or marked as `None`).",
319
- "Selected mode and rationale are documented.",
320
- "Premise challenge findings documented.",
321
- "Deferred items list with one-line rationale for each.",
322
- "Completion dashboard lists checklist findings, decision count, and unresolved items (or `None`)."
323
- ],
324
- inputs: ["brainstorm artifact", "timeline constraints", "product priorities"],
325
- requiredContext: [
326
- "approved brainstorm direction",
327
- "existing capabilities and reusable components",
328
- "delivery deadlines and risk tolerance"
329
- ],
330
- researchPlaybooks: [
331
- "research/git-history.md"
332
- ],
333
- outputs: ["scope mode decision", "scope contract", "discretion areas list", "deferred scope list", "scope summary", "scope completion dashboard"],
334
- blockers: [
335
- "scope mode not selected",
336
- "in/out boundaries ambiguous",
337
- "discretion areas undefined",
338
- "critical premise disagreement unresolved"
339
- ],
340
- exitCriteria: [
341
- "scope contract approved by user",
342
- "discretion areas recorded explicitly",
343
- "required gates marked satisfied",
344
- "deferred list recorded explicitly",
345
- "completion dashboard produced",
346
- "scope summary produced"
347
- ],
348
- antiPatterns: [
349
- "Scope silently expanded during discussion",
350
- "No explicit out-of-scope section",
351
- "Premise accepted without challenge",
352
- "Sycophantic agreement without evidence-based pushback",
353
- "Hedged recommendations that avoid taking a position",
354
- "Batching multiple scope issues into one question",
355
- "Re-arguing for smaller scope after user rejects reduction"
356
- ],
357
- redFlags: [
358
- "No selected mode in artifact",
359
- "Mode selected without heuristic justification",
360
- "No discretion section (or explicit `None`) in artifact",
361
- "No deferred/not-in-scope section",
362
- "No user approval marker",
363
- "Premise challenge missing or superficial",
364
- "No implementation alternatives evaluated"
365
- ],
366
- policyNeedles: ["Scope mode", "In Scope", "Out of Scope", "Discretion Areas", "NOT in scope", "Premise Challenge"],
367
- artifactFile: "02-scope.md",
368
- next: "design",
369
- reviewSections: [
370
- {
371
- title: "Scope Boundary Audit",
372
- evaluationPoints: [
373
- "Are all in-scope items justified by the problem statement?",
374
- "Are any in-scope items actually solving a proxy problem instead of the real one?",
375
- "Could any in-scope item be deferred without blocking the core objective?"
376
- ],
377
- stopGate: true
378
- },
379
- {
380
- title: "Deferred Items Review",
381
- evaluationPoints: [
382
- "Does each deferred item have a one-line rationale?",
383
- "Are any deferred items actually blockers for the core scope?",
384
- "Will deferring these items create technical debt that is expensive to unwind?"
385
- ],
386
- stopGate: true
387
- },
388
- {
389
- title: "Risk and Reversibility Check",
390
- evaluationPoints: [
391
- "For each major scope decision: is it reversible?",
392
- "What is the blast radius if this decision is wrong?",
393
- "Are there hidden dependencies between in-scope and out-of-scope items?"
394
- ],
395
- stopGate: true
396
- },
397
- {
398
- title: "Existing-Code Reuse Check",
399
- evaluationPoints: [
400
- "Has every sub-problem been mapped to existing code?",
401
- "Is the plan rebuilding anything that already exists?",
402
- "Are there integration opportunities that reduce new code?",
403
- "Have you searched for built-in or library solutions before scoping custom work?"
404
- ],
405
- stopGate: true
406
- },
407
- {
408
- title: "Error & Rescue Scope Check",
409
- evaluationPoints: [
410
- "For every new capability: what breaks if it fails?",
411
- "Is failure detection in scope or deferred? If deferred, is that acceptable?",
412
- "Are there rescue/fallback paths for critical user journeys?",
413
- "Is observability (logging, metrics, alerts) explicitly in or out of scope?"
414
- ],
415
- stopGate: true
416
- }
417
- ],
418
- completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
419
- crossStageTrace: {
420
- readsFrom: [".cclaw/artifacts/01-brainstorm.md"],
421
- writesTo: [".cclaw/artifacts/02-scope.md"],
422
- traceabilityRule: "Every scope boundary must be traceable to a brainstorm decision. Every downstream design choice must stay within the scope contract."
423
- },
424
- artifactValidation: [
425
- { section: "Prime Directives", required: true, validationRule: "For each scoped capability: named failure modes, explicit error surface, four data-flow paths, interaction edge cases, observability expectations, and deferred-item handling." },
426
- { section: "Premise Challenge", required: true, validationRule: "Must contain explicit answers to: right problem? direct path? what if nothing?" },
427
- { section: "Requirements", required: true, validationRule: "Table of stable requirement IDs (R1, R2, R3…) one per row with observable outcome, priority, and source. IDs are assigned once and never renumbered across scope/design/spec/plan/review; dropped requirements stay with Priority `DROPPED`." },
428
- { section: "Implementation Alternatives", required: true, validationRule: "2-3 options with Name, Summary, Effort, Risk, Pros, Cons, and Reuses. Must include minimal viable and ideal architecture options." },
429
- { section: "Scope Mode", required: true, validationRule: "Must state selected mode and rationale with default heuristic justification." },
430
- { section: "Mode-Specific Analysis", required: true, validationRule: "Must document the analysis matching the selected scope mode: EXPAND (10x and delight opportunities), SELECTIVE (hold-scope baseline then cherry-picked expansions), HOLD (minimum-change-set hardening), REDUCE (ruthless cuts and follow-up split)." },
431
- { section: "In Scope / Out of Scope", required: true, validationRule: "Two separate explicit lists. Out-of-scope must not be empty." },
432
- { section: "Discretion Areas", required: true, validationRule: "Explicit list of implementer decision zones, or 'None' if scope is fully locked." },
433
- { section: "Deferred Items", required: true, validationRule: "Each item has one-line rationale. If empty, state 'None' explicitly." },
434
- { section: "Error & Rescue Registry", required: true, validationRule: "Each scoped capability has: failure mode, detection method, fallback decision." },
435
- { section: "Completion Dashboard", required: true, validationRule: "Lists checklist findings, count of resolved decisions, and unresolved decisions (or 'None')." },
436
- { section: "Scope Summary", required: true, validationRule: "Clean summary: mode, strongest challenges, recommended path, accepted scope, deferred, excluded." },
437
- { section: "Dream State Mapping", required: false, validationRule: "If present (complex projects): CURRENT STATE, THIS PLAN, 12-MONTH IDEAL, and alignment verdict." },
438
- { section: "Temporal Interrogation", required: false, validationRule: "If present (complex projects): timeline simulation table with decision pressures and lock-now vs defer verdicts." }
439
- ]
440
- };
441
- // ---------------------------------------------------------------------------
442
- // DESIGN — reference: gstack Eng review
443
- // ---------------------------------------------------------------------------
444
- const DESIGN = {
445
- stage: "design",
446
- skillFolder: "engineering-design-lock",
447
- skillName: "engineering-design-lock",
448
- skillDescription: "Engineering lock-in stage. Build a concrete technical spine before spec and planning, with section-by-section interactive review.",
449
- hardGate: "Do NOT write implementation code. This stage produces design decisions and architecture documents only. No code changes, no scaffolding, no test files.",
450
- ironLaw: "NO DESIGN DECISION WITHOUT A LABELED DIAGRAM, A REJECTED ALTERNATIVE, AND A NAMED FAILURE MODE.",
451
- purpose: "Lock architecture, data flow, failure modes, and test/performance expectations through rigorous interactive review.",
452
- whenToUse: [
453
- "After scope contract approval",
454
- "Before writing final spec and execution plan",
455
- "When architecture risks need explicit treatment"
456
- ],
457
- whenNotToUse: [
458
- "Scope mode and boundaries are still unresolved",
459
- "The change is docs-only or metadata-only with no architecture impact",
460
- "Implementation has already started and requires review instead of design lock"
461
- ],
462
- checklist: [
463
- "Trivial-Change Escape Hatch — If scope artifact shows ≤3 files, zero new interfaces, and no cross-module data flow, skip full review sections. Produce a mini-design: one paragraph of rationale, list of changed files, one risk to watch. Proceed to spec.",
464
- "Design Doc Check — read existing design docs, scope artifact, brainstorm artifact. If a design doc exists that covers this area, check for 'Supersedes:' and use the latest. Use upstream artifacts as source of truth.",
465
- "Codebase Investigation — Before any design decision, read the actual code in the blast radius. List every file that will be touched, its current responsibilities, and existing patterns (error handling, naming, test style). Design must conform to discovered patterns, not impose new ones without justification.",
466
- "Step 0: Scope Challenge — what existing code solves sub-problems? Minimum change set? Complexity check: 8+ files or 2+ new services = complexity smell → flag for possible scope reduction.",
467
- "Search Before Building — For each technical choice (library, pattern, architecture), search for existing solutions. Label findings: Layer 1 (exact match), Layer 2 (partial match, needs adaptation), Layer 3 (inspiration only), EUREKA (unexpected perfect solution). Default to existing before custom.",
468
- "Architecture Review — system design, component boundaries, data flow, scaling, security architecture. For each new codepath: one realistic production failure scenario. **Mandatory:** produce at least one architecture diagram (ASCII, Mermaid, or tool-generated) showing component boundaries and data flow direction. Apply the **Visual Communication rules** (see below) — an unlabeled or generic diagram is worse than no diagram, because it pretends to encode decisions it does not.",
469
- "Code Quality Review — code organization, DRY violations, error handling patterns, over/under-engineering assessment.",
470
- "Test Review — diagram every new flow, data path, error path. For each: what test type covers it? Does one exist? What is the gap? Produce test plan artifact.",
471
- "Performance Review — N+1 queries, memory concerns, caching opportunities, slow code paths. What breaks at 10x load? At 100x?",
472
- "Parallelization Strategy — If multiple independent modules, produce dependency table: which can be built in parallel? Where are conflict risks? Flag shared-state modules.",
473
- "Unresolved Decisions — List any design decisions that could not be resolved in this session. For each: what information is missing? Who can provide it? What is the default if no answer comes?",
474
- "Distribution Check — If the plan creates new artifact types (packages, CLI tools, configs), document the build/publish story. How does it reach the user?",
475
- "Deferred Items Cross-Reference — Collect every item explicitly deferred during design review. Each must appear in the Unresolved Decisions table or in the upstream scope artifact's deferred list. No deferred item may exist only in conversation — it must be written down."
476
- ],
477
- interactionProtocol: [
478
- "Review architecture decisions section-by-section.",
479
- "For EACH issue found in a review section, present it ONE AT A TIME. Do NOT batch multiple issues.",
480
- "For each issue: use the Decision Protocol — describe concretely with file/line references, present labeled options (A/B/C) with trade-offs, effort estimate (S/M/L/XL), risk level (Low/Med/High), and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that best covers architecture, data-flow, failure-modes, test, and perf review concerns for the issue with the lowest risk. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
481
- "Only proceed to the next review section after ALL issues in the current section are resolved.",
482
- "If a section has no issues, say 'No issues found' and move on.",
483
- "Do not skip failure-mode mapping.",
484
- "For design baseline approval: present the full baseline. **STOP.** Do NOT proceed until user explicitly approves the design.",
485
- "Take a firm position on every recommendation. Do NOT hedge with 'it depends' or 'you could do either'. State your opinion, then justify it.",
486
- "Use pushback patterns for weak framing: if the user says 'it's just a small change', respond with 'small changes to shared interfaces have outsized blast radius — let's map it'. If 'we'll refactor later', respond with 'later never comes — show me the refactor ticket or do it now'.",
487
- "When the user's proposed architecture is suboptimal, say so directly. Offer the alternative with concrete trade-offs, do not bury criticism in praise.",
488
- "When encountering ambiguity, classify it before acting: (A) ask user for missing info, (B) enumerate interpretations and pick one with justification, (C) propose hypothesis with validation path. Do NOT silently resolve ambiguity."
489
- ],
490
- process: [
491
- "Read upstream artifacts (brainstorm, scope).",
492
- "Investigate codebase: read files in blast radius, catalogue current patterns and responsibilities.",
493
- "Run Step 0 scope challenge: existing code leverage, minimum change set, complexity check.",
494
- "Walk through each review section interactively.",
495
- "Define architecture boundaries and ownership.",
496
- "Describe data flow and state transitions with edge paths.",
497
- "Map failure modes and recovery strategy.",
498
- "Define test coverage strategy and performance budget.",
499
- "Produce required outputs: NOT-in-scope section, What-already-exists section, diagrams, failure mode table.",
500
- "Produce completion dashboard: list every review section with status (clear / issues-found-resolved / issues-open), count of decisions made, and list of unresolved items.",
501
- "Write design lock artifact for downstream spec/plan."
502
- ],
503
- requiredGates: [
504
- { id: "design_codebase_investigated", description: "Blast-radius files read and current patterns catalogued." },
505
- { id: "design_scope_challenge_done", description: "Step 0 scope challenge completed with existing-code mapping." },
506
- { id: "design_architecture_locked", description: "Architecture boundaries are explicit and approved." },
507
- { id: "design_data_flow_mapped", description: "Data/state flow includes edge-case paths." },
508
- { id: "design_failure_modes_mapped", description: "Failure modes and mitigations are documented." },
509
- { id: "design_test_and_perf_defined", description: "Test strategy and performance budget are defined." }
510
- ],
511
- requiredEvidence: [
512
- "Artifact written to `.cclaw/artifacts/03-design.md`.",
513
- "Failure-mode table exists with mitigations.",
514
- "Test strategy includes unit/integration/e2e expectations.",
515
- "NOT-in-scope section produced.",
516
- "What-already-exists section produced.",
517
- "Completion dashboard lists every review section status, decision count, and unresolved items (or 'None')."
518
- ],
519
- inputs: ["scope contract", "system constraints", "non-functional requirements"],
520
- requiredContext: [
521
- "existing architecture and boundaries",
522
- "operational constraints",
523
- "security and reliability expectations"
524
- ],
525
- researchPlaybooks: [
526
- "research/framework-docs-lookup.md",
527
- "research/best-practices-lookup.md"
528
- ],
529
- outputs: [
530
- "architecture lock",
531
- "risk and failure map",
532
- "test and performance baseline",
533
- "NOT-in-scope section",
534
- "What-already-exists section",
535
- "design completion dashboard"
536
- ],
537
- blockers: [
538
- "architecture ambiguity remains",
539
- "failure modes not mapped",
540
- "test/performance targets missing"
541
- ],
542
- exitCriteria: [
543
- "design baseline approved",
544
- "all review sections completed",
545
- "required gates marked satisfied",
546
- "completion dashboard present with all review-section statuses",
547
- "artifact complete for spec handoff"
548
- ],
549
- antiPatterns: [
550
- "Architecture deferred to implementation phase",
551
- "Missing data-flow edge cases",
552
- "No performance budget for critical path",
553
- "Batching multiple design issues into one question",
554
- "Skipping review sections because plan seems simple",
555
- "Agreeing with user's architecture choice without evaluating alternatives",
556
- "Hedging every recommendation with 'it depends' instead of taking a position"
557
- ],
558
- redFlags: [
559
- "No explicit architecture boundary section",
560
- "No failure recovery strategy",
561
- "No defined test/perf baseline",
562
- "Review sections skipped or condensed",
563
- "No NOT-in-scope output section",
564
- "No What-already-exists output section",
565
- "Design decisions made without reading the actual code first"
566
- ],
567
- policyNeedles: [
568
- "Architecture",
569
- "Data Flow",
570
- "Failure Modes and Mitigation",
571
- "Performance Budget",
572
- "One issue at a time"
573
- ],
574
- artifactFile: "03-design.md",
575
- next: "spec",
576
- reviewSections: [
577
- {
578
- title: "Architecture Review",
579
- evaluationPoints: [
580
- "System design and component boundaries",
581
- "Dependency graph and coupling concerns",
582
- "Data flow patterns and potential bottlenecks",
583
- "Scaling characteristics and single points of failure",
584
- "Security architecture (auth, data access, API boundaries)",
585
- "For each new codepath: one realistic production failure scenario"
586
- ],
587
- stopGate: true
588
- },
589
- {
590
- title: "Code Quality Review",
591
- evaluationPoints: [
592
- "Code organization and module structure",
593
- "DRY violations — flag aggressively",
594
- "Error handling patterns and missing edge cases",
595
- "Over-engineered or under-engineered areas",
596
- "Existing diagrams in touched files — still accurate?"
597
- ],
598
- stopGate: true
599
- },
600
- {
601
- title: "Test Review",
602
- evaluationPoints: [
603
- "Diagram every new UX flow, data flow, codepath, background job, integration, error path",
604
- "For each: what type of test covers it? Does one exist? What is the gap?",
605
- "Coverage expectations: unit, integration, e2e split"
606
- ],
607
- stopGate: true
608
- },
609
- {
610
- title: "Performance Review",
611
- evaluationPoints: [
612
- "N+1 queries and database access patterns",
613
- "Memory-usage concerns",
614
- "Caching opportunities",
615
- "Slow or high-complexity code paths",
616
- "What breaks at 10x load? At 100x?"
617
- ],
618
- stopGate: true
619
- },
620
- {
621
- title: "Distribution & Delivery Review",
622
- evaluationPoints: [
623
- "If new artifact types are created (packages, CLI, configs): is the build/publish story documented?",
624
- "Are there new dependencies that need version pinning?",
625
- "Does the change affect existing consumers (APIs, shared modules)?",
626
- "Is backwards compatibility maintained or is a migration needed?"
627
- ],
628
- stopGate: false
629
- }
630
- ],
631
- completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
632
- crossStageTrace: {
633
- readsFrom: [".cclaw/artifacts/01-brainstorm.md", ".cclaw/artifacts/02-scope.md"],
634
- writesTo: [".cclaw/artifacts/03-design.md"],
635
- traceabilityRule: "Every architecture decision must trace to a scope boundary. Every downstream spec requirement must trace to a design decision."
636
- },
637
- artifactValidation: [
638
- { section: "Codebase Investigation", required: true, validationRule: "Must list blast-radius files with current responsibilities and discovered patterns." },
639
- { section: "Search Before Building", required: true, validationRule: "For each technical choice: Layer 1 (exact match), Layer 2 (partial match), Layer 3 (inspiration), EUREKA labels with reuse-first default." },
640
- { section: "Architecture Boundaries", required: true, validationRule: "Must list component boundaries with ownership." },
641
- { section: "Architecture Diagram", required: true, validationRule: "At least one diagram (ASCII, Mermaid, or image) showing component boundaries and data flow direction. Diagram must: (1) label every node with a concrete component name (no generic 'Service A/B'), (2) label every arrow with the action or message (no unlabeled arrows), (3) mark direction of data flow explicitly, (4) distinguish synchronous from asynchronous edges (e.g. solid vs dashed, or `sync:` / `async:` prefix), (5) show at least one failure edge or degraded-mode branch when the system has one." },
642
- { section: "Data Flow", required: true, validationRule: "Must include happy path, nil input, empty input, upstream error paths." },
643
- { section: "Failure Mode Table", required: true, validationRule: "Each failure mode has: trigger, detection, mitigation, user impact." },
644
- { section: "Test Strategy", required: true, validationRule: "Must define unit/integration/e2e expectations with coverage targets." },
645
- { section: "Performance Budget", required: true, validationRule: "For each critical path: metric name, target threshold, and measurement method." },
646
- { section: "What Already Exists", required: true, validationRule: "For each sub-problem: existing code/library found (Layer 1-3/EUREKA label), reuse decision, and adaptation needed." },
647
- { section: "NOT in scope", required: true, validationRule: "Work considered and explicitly deferred with one-line rationale." },
648
- { section: "Parallelization Strategy", required: false, validationRule: "If multi-module: dependency table, parallel lanes, conflict flags." },
649
- { section: "Unresolved Decisions", required: false, validationRule: "If any: what info is missing, who provides it, default if unanswered." },
650
- { section: "Interface Contracts", required: false, validationRule: "If present: for each module boundary list produces (outputs) and consumes (inputs) with data types." },
651
- { section: "Patterns to Mirror", required: false, validationRule: "If present: list discovered codebase patterns to follow, with file references and rationale for each." },
652
- { section: "Completion Dashboard", required: true, validationRule: "Lists every review section with status (clear / issues-found-resolved / issues-open), decision count, and unresolved items (or 'None')." }
653
- ],
654
- trivialOverrideSections: ["Architecture Boundaries", "NOT in scope", "Completion Dashboard"]
655
- };
656
- // ---------------------------------------------------------------------------
657
- // SPEC
658
- // ---------------------------------------------------------------------------
659
- const SPEC = {
660
- stage: "spec",
661
- skillFolder: "specification-authoring",
662
- skillName: "specification-authoring",
663
- skillDescription: "Specification stage. Produce measurable, testable requirements without ambiguity.",
664
- hardGate: "Do NOT plan tasks or write implementation code. This stage produces a specification document only. Every requirement must be expressed in observable, testable terms.",
665
- ironLaw: "EVERY ACCEPTANCE CRITERION MUST BE OBSERVABLE AND TESTABLE — OR IT DOES NOT EXIST.",
666
- purpose: "Create a testable specification aligned with approved design and constraints.",
667
- whenToUse: [
668
- "After design lock",
669
- "Before planning and implementation",
670
- "When acceptance criteria must be measurable"
671
- ],
672
- whenNotToUse: [
673
- "Design decisions are still unresolved or disputed",
674
- "The task is implementation-only cleanup with unchanged behavior",
675
- "You still need to challenge scope rather than author requirements"
676
- ],
677
- checklist: [
678
- "Read upstream — load design artifact and scope contract. Cross-reference architecture decisions.",
679
- "Define measurable acceptance criteria — each criterion must be observable and falsifiable. No vague adjectives.",
680
- "Capture edge cases — for each criterion, define at least one boundary condition and one error condition.",
681
- "Document constraints and assumptions — regulatory, system, integration, and performance boundaries. Surface implicit assumptions explicitly.",
682
- "Confirm testability — for each acceptance criterion, describe the test that would prove it. If untestable, rewrite the criterion.",
683
- "Write spec artifact and request user approval — wait for explicit confirmation before proceeding."
684
- ],
685
- interactionProtocol: [
686
- "Express each requirement in observable terms.",
687
- "Resolve ambiguity before moving to plan. Challenge vague language.",
688
- "Capture assumptions explicitly, not implicitly.",
689
- "Require user confirmation on the written spec. **STOP.** Do NOT proceed to plan until user approves.",
690
- "For each criterion, ask: how would you test this? If the answer is unclear, rewrite.",
691
- "When encountering ambiguity, classify it before acting: (A) ask user for missing info, (B) enumerate interpretations and pick one with justification, (C) propose hypothesis with validation path. Do NOT silently resolve ambiguity."
692
- ],
693
- process: [
694
- "Define measurable acceptance criteria.",
695
- "Capture constraints, assumptions, and edge cases.",
696
- "Build testability map: criterion -> test description.",
697
- "Confirm testability for each criterion.",
698
- "Write spec artifact and request approval."
699
- ],
700
- requiredGates: [
701
- { id: "spec_acceptance_measurable", description: "Acceptance criteria are measurable and observable." },
702
- { id: "spec_edge_cases_documented", description: "Boundary and error conditions are defined for each criterion." },
703
- { id: "spec_constraints_documented", description: "Constraints and assumptions are explicit." },
704
- { id: "spec_testability_confirmed", description: "Each criterion has a described test method." },
705
- { id: "spec_user_approved", description: "User approved the final written spec." }
706
- ],
707
- requiredEvidence: [
708
- "Artifact written to `.cclaw/artifacts/04-spec.md`.",
709
- "Each acceptance criterion maps to a testable outcome.",
710
- "Edge cases documented per criterion.",
711
- "Approval marker captured in artifact."
712
- ],
713
- inputs: ["design artifact", "business constraints", "quality requirements"],
714
- requiredContext: [
715
- "design lock baseline",
716
- "regulatory or system boundaries",
717
- "integration constraints"
718
- ],
719
- outputs: [
720
- "measurable specification",
721
- "acceptance-to-testability map",
722
- "approved spec artifact"
723
- ],
724
- blockers: [
725
- "non-measurable criteria",
726
- "constraints missing",
727
- "open ambiguities remain"
728
- ],
729
- exitCriteria: [
730
- "spec approved by user",
731
- "required gates marked satisfied",
732
- "plan-ready acceptance mapping exists",
733
- "testability confirmed for all criteria"
734
- ],
735
- antiPatterns: [
736
- "High-level goals without measurable outcomes",
737
- "Implicit assumptions",
738
- "Proceeding to plan before approval",
739
- "Using vague adjectives (fast, intuitive, robust) without thresholds"
740
- ],
741
- redFlags: [
742
- "Criteria use vague language (fast, intuitive, robust) without thresholds",
743
- "No explicit assumptions section",
744
- "No approval record",
745
- "No testability mapping",
746
- "Edge cases missing or deferred"
747
- ],
748
- policyNeedles: ["Acceptance Criteria", "Constraints", "Testability", "approved spec", "Edge Cases"],
749
- artifactFile: "04-spec.md",
750
- next: "plan",
751
- reviewSections: [
752
- {
753
- title: "Acceptance Criteria Audit",
754
- evaluationPoints: [
755
- "Is every criterion observable (can you point to evidence of pass/fail)?",
756
- "Is every criterion measurable (numeric threshold or boolean outcome)?",
757
- "Is every criterion falsifiable (can you describe what failure looks like)?",
758
- "Does every criterion trace to a design decision (Design Decision Ref)?",
759
- "Are there any vague adjectives (fast, intuitive, robust) without thresholds?"
760
- ],
761
- stopGate: true
762
- },
763
- {
764
- title: "Testability Audit",
765
- evaluationPoints: [
766
- "Does every criterion have a concrete test description in the Testability Map?",
767
- "Does every test specify a verification approach (unit, integration, e2e, manual)?",
768
- "Does every test include a runnable command or manual steps?",
769
- "Are edge cases (boundary + error) defined for every criterion?",
770
- "Can you run every verification command right now and get a meaningful result?"
771
- ],
772
- stopGate: true
773
- }
774
- ],
775
- completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
776
- crossStageTrace: {
777
- readsFrom: [".cclaw/artifacts/03-design.md", ".cclaw/artifacts/02-scope.md"],
778
- writesTo: [".cclaw/artifacts/04-spec.md"],
779
- traceabilityRule: "Every acceptance criterion must trace to a design decision. Every downstream plan task must trace to a spec criterion."
780
- },
781
- artifactValidation: [
782
- { section: "Acceptance Criteria", required: true, validationRule: "Each criterion is observable, measurable, and falsifiable. Table must include a Requirement Ref column linking to R# IDs in 02-scope.md and a Design Decision Ref column tracing back to design artifact. AC IDs (AC-1, AC-2…) are stable across revisions — dropped ACs stay with Priority `DROPPED`." },
783
- { section: "Edge Cases", required: true, validationRule: "At least one boundary and one error condition per criterion." },
784
- { section: "Constraints and Assumptions", required: true, validationRule: "All implicit assumptions surfaced. Constraints have sources." },
785
- { section: "Testability Map", required: true, validationRule: "Each criterion maps to a concrete test description with verification approach (unit, integration, e2e, manual) and command or manual steps." },
786
- { section: "Vague to Fixed", required: false, validationRule: "If present: table with original vague wording and rewritten observable/testable version for each ambiguous requirement." },
787
- { section: "Non-Functional Requirements", required: false, validationRule: "If present: performance thresholds, security constraints, scalability limits, reliability targets with measurable values." },
788
- { section: "Interface Contracts", required: false, validationRule: "If present: for each module boundary list produces (outputs) and consumes (inputs) with data types." },
789
- { section: "Approval", required: true, validationRule: "Explicit user approval marker present." }
790
- ]
791
- };
792
- // ---------------------------------------------------------------------------
793
- // PLAN
794
- // ---------------------------------------------------------------------------
795
- const PLAN = {
796
- stage: "plan",
797
- skillFolder: "planning-and-task-breakdown",
798
- skillName: "planning-and-task-breakdown",
799
- skillDescription: "Execution planning stage with strict confirmation gate before implementation.",
800
- hardGate: "Do NOT write code or tests. Planning only. This stage produces a task graph and execution order. WAIT_FOR_CONFIRM before any handoff to implementation.",
801
- ironLaw: "EVERY TASK IS 2–5 MINUTES, FULLY SPELLED OUT, AND CARRIES A STABLE ID — NO PLACEHOLDERS, NO ‘ETC.’.",
802
- purpose: "Create small executable tasks with dependencies and pause for explicit user confirmation.",
803
- whenToUse: [
804
- "After spec approval",
805
- "Before writing tests or implementation",
806
- "When delivery path and dependency order are needed"
807
- ],
808
- whenNotToUse: [
809
- "Specification is unapproved or lacks measurable acceptance criteria",
810
- "Execution is already in TDD stage with active slice evidence",
811
- "The request is only release packaging with no task decomposition needed"
812
- ],
813
- checklist: [
814
- "Read upstream — load spec, design, and scope artifacts. Cross-reference acceptance criteria.",
815
- "Build dependency graph — identify task ordering, parallel opportunities, and blocking dependencies.",
816
- "Group tasks into dependency waves — wave N+1 cannot start until wave N has verification evidence.",
817
- "Slice into vertical tasks — each task targets 2-5 minutes, produces one testable outcome, and touches one coherent area.",
818
- "Attach verification — every task has an acceptance criterion mapping and a concrete verification command.",
819
- "Define checkpoints — mark points where progress should be validated before continuing.",
820
- "WAIT_FOR_CONFIRM — write plan artifact and explicitly pause. **STOP.** Do NOT proceed until user confirms. Then update `flow-state.json` and tell user to run `/cc-next`."
821
- ],
822
- interactionProtocol: [
823
- "Plan in read-only mode relative to implementation.",
824
- "Split work into small vertical slices (target 2-5 minute tasks).",
825
- "Publish explicit dependency waves with entry and exit checks for each wave.",
826
- "Attach verification step to every task.",
827
- "Enforce WAIT_FOR_CONFIRM: present the plan summary with options (A) Approve / (B) Revise / (C) Reject.",
828
- "**STOP.** Do NOT proceed until user explicitly approves. Then update `flow-state.json` and tell user to run `/cc-next`."
829
- ],
830
- process: [
831
- "Build dependency graph and ordered slices.",
832
- "Group slices into execution waves and define gate criteria per wave.",
833
- "Define each task with acceptance mapping and verification commands.",
834
- "Record checkpoints and blockers.",
835
- "Write plan artifact and pause at WAIT_FOR_CONFIRM."
836
- ],
837
- requiredGates: [
838
- { id: "plan_tasks_sliced_2_5_min", description: "Tasks are small, executable slices." },
839
- { id: "plan_dependency_graph_written", description: "Dependency graph and order are explicit." },
840
- { id: "plan_dependency_waves_defined", description: "Tasks are grouped into executable waves with gate checks." },
841
- { id: "plan_verification_steps_defined", description: "Each task has verification guidance." },
842
- { id: "plan_acceptance_mapped", description: "Each task maps to a spec acceptance criterion." },
843
- { id: "plan_wait_for_confirm", description: "Execution blocked until explicit user confirmation." }
844
- ],
845
- requiredEvidence: [
846
- "Artifact written to `.cclaw/artifacts/05-plan.md`.",
847
- "Task list includes acceptance mapping.",
848
- "Dependency graph documented.",
849
- "Dependency waves documented with wave-by-wave verification gates.",
850
- "WAIT_FOR_CONFIRM status recorded."
851
- ],
852
- inputs: ["approved spec", "codebase context", "delivery constraints"],
853
- requiredContext: [
854
- "spec acceptance criteria",
855
- "current architecture",
856
- "known technical debt and dependencies"
857
- ],
858
- outputs: ["task graph", "dependency wave plan", "ordered plan", "explicit confirmation checkpoint"],
859
- blockers: [
860
- "tasks too broad",
861
- "dependency uncertainty unresolved",
862
- "wave boundaries are unclear",
863
- "no explicit confirmation"
864
- ],
865
- exitCriteria: [
866
- "plan quality gates complete",
867
- "WAIT_FOR_CONFIRM present and unresolved until user approves",
868
- "artifact ready for TDD execution",
869
- "acceptance mapping complete"
870
- ],
871
- antiPatterns: [
872
- "Horizontal decomposition without end-to-end slices",
873
- "Tasks without verification steps",
874
- "Starting execution before approval",
875
- "Tasks that touch multiple unrelated areas"
876
- ],
877
- redFlags: [
878
- "No dependency graph",
879
- "No WAIT_FOR_CONFIRM marker",
880
- "No explicit dependency waves",
881
- "Tasks exceed one coherent outcome",
882
- "No acceptance mapping"
883
- ],
884
- policyNeedles: ["WAIT_FOR_CONFIRM", "Task Graph", "Dependency Waves", "Acceptance Mapping", "verification steps"],
885
- artifactFile: "05-plan.md",
886
- next: "tdd",
887
- reviewSections: [
888
- {
889
- title: "Task Decomposition Audit",
890
- evaluationPoints: [
891
- "Does every task target a single coherent area (vertical slice)?",
892
- "Can each task be completed in 2-5 minutes?",
893
- "Does every task have an acceptance criterion link and verification command?",
894
- "Are there tasks that touch multiple unrelated areas?",
895
- "Would a new engineer understand and start each task within two minutes?"
896
- ],
897
- stopGate: true
898
- },
899
- {
900
- title: "Wave Completeness Audit",
901
- evaluationPoints: [
902
- "Does every task belong to exactly one wave?",
903
- "Does each wave have a verification gate?",
904
- "Are wave dependencies explicit and acyclic?",
905
- "Is the acceptance mapping complete — every spec criterion covered?",
906
- "Are there hidden dependencies between tasks in different waves?"
907
- ],
908
- stopGate: true
909
- },
910
- {
911
- title: "Five-Minute Budget + No-Placeholders Audit",
912
- evaluationPoints: [
913
- "Does every task carry an explicit minutes estimate (e.g. `[~3m]`) and does every estimate fit the 2-to-5-minute budget? Estimates >5 minutes must be split.",
914
- "Are all file paths, test commands, and verification commands copy-pasteable as written — no `TODO`, `TBD`, `FIXME`, `<fill-in>`, `<your-*-here>`, `xxx`, or ellipsis standing in for omitted args?",
915
- "Does every acceptance-criterion reference resolve to a real R# / AC-### in the spec (not a blank link)?",
916
- "If an estimate is genuinely uncertain (first-time integration, unfamiliar library), is the uncertainty named explicitly and scheduled as a spike task in wave 0, rather than hidden behind a large estimate?"
917
- ],
918
- stopGate: true
919
- }
920
- ],
921
- completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
922
- crossStageTrace: {
923
- readsFrom: [".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/03-design.md", ".cclaw/artifacts/02-scope.md"],
924
- writesTo: [".cclaw/artifacts/05-plan.md"],
925
- traceabilityRule: "Every task must trace to a spec acceptance criterion. Every downstream RED test must trace to a plan task."
926
- },
927
- artifactValidation: [
928
- { section: "Dependency Graph", required: true, validationRule: "Ordering and parallel opportunities explicit. No circular dependencies." },
929
- { section: "Dependency Waves", required: true, validationRule: "Every task belongs to a wave. Each wave has an exit gate and dependency statement." },
930
- { section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, verification command, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget." },
931
- { section: "Acceptance Mapping", required: true, validationRule: "Every spec criterion is covered by at least one task." },
932
- { section: "Risk Assessment", required: false, validationRule: "If present: per-task or per-wave risk identification with likelihood, impact, and mitigation strategy." },
933
- { section: "Boundary Map", required: false, validationRule: "If present: per-wave or per-task interface contracts listing what each task produces (exports) and consumes (imports) from other tasks." },
934
- { section: "WAIT_FOR_CONFIRM", required: true, validationRule: "Explicit marker present. Status: pending until user approves." },
935
- { section: "No-Placeholder Scan", required: false, validationRule: "If present: confirmation that a text scan for `TODO`, `TBD`, `FIXME`, `<fill-in>`, `<your-*-here>`, `xxx`, or bare ellipses has zero hits in the task list. A placeholder is a deferred decision masquerading as a plan." }
936
- ]
937
- };
938
- // ---------------------------------------------------------------------------
939
- // TDD — RED → GREEN → REFACTOR cycle (merged test + build)
940
- // ---------------------------------------------------------------------------
941
- const TDD = {
942
- stage: "tdd",
943
- skillFolder: "test-driven-development",
944
- skillName: "test-driven-development",
945
- skillDescription: "Full TDD cycle: RED (failing tests), GREEN (minimal implementation), REFACTOR (cleanup). One plan slice at a time with strict traceability.",
946
- hardGate: "Do NOT merge, ship, or skip review. Follow RED → GREEN → REFACTOR strictly for each plan slice. Do NOT write implementation code before RED tests exist. Do NOT skip the REFACTOR step.",
947
- ironLaw: "NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST — THE RED FAILURE IS THE SPEC.",
948
- purpose: "Implement features through the TDD cycle: write failing tests, make them pass with minimal code, then refactor.",
949
- whenToUse: [
950
- "After plan confirmation",
951
- "For every behavior change in scope",
952
- "Before review stage"
953
- ],
954
- whenNotToUse: [
955
- "Plan approval is still pending WAIT_FOR_CONFIRM",
956
- "The change is docs-only and does not alter behavior",
957
- "The stage intent is review/ship sign-off rather than implementation"
958
- ],
959
- checklist: [
960
- "Select plan slice — pick one task from the plan. Do not batch multiple tasks.",
961
- "Map to acceptance criterion — identify the specific spec criterion this test proves.",
962
- "RED: Write behavior-focused test — test the expected behavior, not implementation details. Tests MUST fail.",
963
- "RED: Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
964
- "GREEN: Minimal implementation — write the smallest code change that makes the RED tests pass. No extra features.",
965
- "GREEN: Run full suite — execute ALL tests, not just the ones you wrote. The full suite must be GREEN.",
966
- "GREEN: Verify no regressions — if any existing test breaks, fix the regression before proceeding.",
967
- "REFACTOR: Improve code quality — without changing behavior. Document what you changed and why.",
968
- "Record evidence — capture RED failure, GREEN output, and REFACTOR notes in the TDD artifact.",
969
- "Annotate traceability — link to plan task ID and spec criterion.",
970
- "Repeat for each slice — return to step 1 for the next plan slice."
971
- ],
972
- interactionProtocol: [
973
- "Pick one planned slice at a time.",
974
- "Write behavior-focused tests before changing implementation (RED).",
975
- "Capture and store failing output as RED evidence.",
976
- "Apply minimal change to satisfy RED tests (GREEN).",
977
- "Run full suite, not partial checks, for GREEN validation.",
978
- "Refactor without changing behavior and document rationale (REFACTOR).",
979
- "Stop if regressions appear and fix before proceeding.",
980
- "If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?"
981
- ],
982
- process: [
983
- "Select slice and map to acceptance criterion.",
984
- "Write test(s) that fail for expected reason (RED).",
985
- "Run tests and capture failure output.",
986
- "Implement smallest change needed for GREEN.",
987
- "Run full tests and build checks.",
988
- "Perform refactor pass preserving behavior.",
989
- "Record RED, GREEN, and REFACTOR evidence in artifact.",
990
- "Annotate traceability to plan task and spec criterion."
991
- ],
992
- requiredGates: [
993
- { id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
994
- { id: "tdd_red_failure_captured", description: "Failure output is captured as evidence." },
995
- { id: "tdd_trace_to_acceptance", description: "RED tests trace to explicit acceptance criteria." },
996
- { id: "tdd_red_failure_reason_verified", description: "Failure is for the expected reason, not an unrelated error." },
997
- { id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
998
- { id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
999
- { id: "tdd_refactor_notes_written", description: "Refactor decisions and outcomes are documented." },
1000
- { id: "tdd_traceable_to_plan", description: "Change traceability to plan slice is explicit." }
1001
- ],
1002
- requiredEvidence: [
1003
- "Artifact updated at `.cclaw/artifacts/06-tdd.md` with RED, GREEN, and REFACTOR sections.",
1004
- "Failing command output captured (RED).",
1005
- "Full test/build output recorded (GREEN).",
1006
- "Acceptance mapping documented.",
1007
- "Failure reason analysis recorded.",
1008
- "Refactor rationale captured.",
1009
- "Traceability to task identifier is documented."
1010
- ],
1011
- inputs: ["approved plan slice", "spec acceptance criterion", "test harness configuration", "coding standards and constraints"],
1012
- requiredContext: ["plan artifact", "spec artifact", "existing test patterns"],
1013
- outputs: ["failing test set", "passing implementation", "refactor evidence", "review-ready change set"],
1014
- blockers: [
1015
- "tests pass before behavior change (RED failure missing)",
1016
- "full suite not green",
1017
- "behavior changed during refactor",
1018
- "no evidence recorded"
1019
- ],
1020
- exitCriteria: [
1021
- "RED evidence exists and is traceable",
1022
- "GREEN evidence captured with full suite pass",
1023
- "REFACTOR evidence captured",
1024
- "required gates marked satisfied",
1025
- "traceability annotated"
1026
- ],
1027
- antiPatterns: [
1028
- "Writing code before failing test",
1029
- "Asserting implementation details instead of behavior",
1030
- "Big-bang implementation across multiple slices",
1031
- "Partial test runs presented as GREEN",
1032
- "Skipping evidence capture",
1033
- "Undocumented refactor changes",
1034
- "Adding features beyond what RED tests require"
1035
- ],
1036
- redFlags: [
1037
- "No failing test output (RED missing)",
1038
- "Implementation edits appear before RED evidence",
1039
- "No full-suite GREEN evidence",
1040
- "No refactor notes",
1041
- "Multiple tasks implemented in one pass without justification",
1042
- "Files changed outside current slice scope"
1043
- ],
1044
- policyNeedles: ["RED", "GREEN", "REFACTOR", "failing test", "full test suite", "acceptance criteria", "traceable to plan slice"],
1045
- artifactFile: "06-tdd.md",
1046
- next: "review",
1047
- reviewSections: [
1048
- {
1049
- title: "RED Evidence Audit",
1050
- evaluationPoints: [
1051
- "Does every slice have a captured failing test output?",
1052
- "Does each failure reason match the expected missing behavior (not a typo or config error)?",
1053
- "Were tests written BEFORE any production code for that slice?",
1054
- "Does each RED test assert observable behavior, not implementation details?",
1055
- "Is there a test for each acceptance criterion mapped in the plan?"
1056
- ],
1057
- stopGate: true
1058
- },
1059
- {
1060
- title: "GREEN/REFACTOR Audit",
1061
- evaluationPoints: [
1062
- "Does GREEN evidence show a FULL suite pass (not partial)?",
1063
- "Is the GREEN implementation minimal — no features beyond what RED tests require?",
1064
- "Does the REFACTOR step preserve all existing behavior (no new failures)?",
1065
- "Are REFACTOR notes documented with rationale?",
1066
- "Is traceability complete: every change links to plan task ID and spec criterion?"
1067
- ],
1068
- stopGate: true
1069
- },
1070
- {
1071
- title: "Test Pyramid + Size Audit",
1072
- evaluationPoints: [
1073
- "Is the tests-added count skewed toward Small (unit) tests, with Medium and Large used only when a real boundary justifies the cost?",
1074
- "Does every newly added test declare a size class (Small / Medium / Large) — either inline in the test file or in the TDD artifact table?",
1075
- "Are Large tests reserved for genuine end-to-end user journeys (not substitutes for unit coverage)?",
1076
- "Has the slice avoided using Medium/Large tests to paper over testability problems that should be fixed at the design layer?"
1077
- ],
1078
- stopGate: false
1079
- },
1080
- {
1081
- title: "Prove-It Reproduction (bug-fix slices)",
1082
- evaluationPoints: [
1083
- "Does the artifact identify this slice as a bug fix, and if so, include a reproduction test checked in alongside the fix?",
1084
- "Is there captured RED evidence from running the reproduction WITHOUT the fix applied?",
1085
- "Is there captured GREEN evidence from the same reproduction AFTER the fix was applied?",
1086
- "Is there a note confirming the reproduction test fails again if the fix is reverted (or equivalent evidence that the test is actually pinned to this fix)?"
1087
- ],
1088
- stopGate: false
1089
- },
1090
- {
1091
- title: "State-over-Interaction + Beyoncé Coverage",
1092
- evaluationPoints: [
1093
- "Do assertions target observable state (return values, persisted data, HTTP responses, logs) rather than which internal helpers were called?",
1094
- "Are mocks/spies used only at true trust boundaries (network, filesystem, time, external services), not for module-internal collaborators?",
1095
- "For every public surface touched in this slice (exported API, CLI flag, config key, env var, exit code, schema field) — does at least one test observe it?",
1096
- "If a bug or review finding revealed an uncovered surface, was a test added alongside the fix, not just the code change?",
1097
- "Are interaction-style assertions (e.g. `toHaveBeenCalledWith` without a state assertion) justified by an explicit boundary comment, or flagged for follow-up?"
1098
- ],
1099
- stopGate: false
1100
- }
1101
- ],
1102
- completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
1103
- crossStageTrace: {
1104
- readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/03-design.md"],
1105
- writesTo: [".cclaw/artifacts/06-tdd.md"],
1106
- traceabilityRule: "Every RED test traces to a plan task. Every GREEN change traces to a RED test. Every plan task traces to a spec criterion. Design decisions inform test strategy. Evidence chain must be unbroken."
1107
- },
1108
- artifactValidation: [
1109
- { section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
1110
- { section: "Acceptance Mapping", required: true, validationRule: "Each RED test links to a plan task and spec criterion." },
1111
- { section: "Failure Analysis", required: true, validationRule: "Failure reason matches expected missing behavior." },
1112
- { section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
1113
- { section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
1114
- { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." },
1115
- { section: "Verification Ladder", required: false, validationRule: "If present: per-slice verification tier (static, command, behavioral, human) with evidence for highest tier reached." },
1116
- { section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." },
1117
- { section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
1118
- { section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." }
1119
- ],
1120
- waveExecutionAllowed: true
1121
- };
1122
- // ---------------------------------------------------------------------------
1123
- // REVIEW — reference: superpowers code-review + gstack /review
1124
- // ---------------------------------------------------------------------------
1125
- const REVIEW = {
1126
- stage: "review",
1127
- skillFolder: "two-layer-review",
1128
- skillName: "two-layer-review",
1129
- skillDescription: "Two-layer review stage: spec compliance first, then code quality and production readiness. Section-by-section with severity discipline.",
1130
- hardGate: "Do NOT ship, merge, or release until both review layers complete with an explicit verdict. No exceptions for urgency. Critical blockers MUST be resolved before handoff.",
1131
- ironLaw: "NO SHIP VERDICT UNTIL BOTH REVIEW LAYERS COMPLETE AND EVERY CRITICAL IS RESOLVED OR EXPLICITLY ACCEPTED.",
1132
- purpose: "Validate that implementation matches spec and meets quality/security/performance bar through structured two-layer review.",
1133
- whenToUse: [
1134
- "After TDD stage completes",
1135
- "Before any ship action",
1136
- "When release risk must be assessed explicitly"
1137
- ],
1138
- whenNotToUse: [
1139
- "There is no implementation diff to review",
1140
- "TDD stage evidence is missing or stale",
1141
- "The goal is direct release execution without layered quality checks"
1142
- ],
1143
- checklist: [
1144
- "Diff Scope — Run `git diff` against base branch. If no diff, exit early with APPROVED (no changes to review). Scope the review to changed files unless blast-radius analysis requires wider inspection.",
1145
- "Change-Size Check — ~100 lines = normal. ~300 lines = consider splitting. ~1000+ lines = strongly recommend stacked PRs. Flag large diffs to the user.",
1146
- "Adversarial Trigger Check — compute changed-line count (`git diff --shortstat <base>..HEAD`), files-touched count, and whether trust boundaries changed (auth/secrets/external inputs/permissions). If `lines > 100` OR `files > 10` OR `trust boundary changed`, **dispatch a SECOND reviewer agent with the `adversarial-review` skill loaded** and reconcile its findings into the review army (treat the conditional dispatch as mandatory whenever the trigger holds; record the trigger that fired in the dashboard).",
1147
- "Load upstream evidence — read TDD artifact (RED + GREEN + REFACTOR), spec, and plan. Verify evidence chain is unbroken.",
1148
- "Layer 1: Spec Compliance — check every acceptance criterion against implementation. Verdict: pass/fail per criterion.",
1149
- "Layer 2a: Correctness — logic errors, race conditions, boundary violations, null handling.",
1150
- "Layer 2b: Security — input validation, auth boundaries, secrets exposure, injection vectors. **Mandatory:** also load and execute the `.cclaw/skills/security-audit/SKILL.md` utility skill (proactive pattern sweep across diff + touched modules, not just the diff itself) and merge findings into the review army. The Layer 2 security pass is not complete until the audit sweep records a finding count (0 acceptable) with file:line evidence for every Critical.",
1151
- "Layer 2c: Performance — N+1 queries, memory leaks, missing caching, hot paths.",
1152
- "Layer 2d: Architecture Fit — does the implementation match the locked design? Coupling, cohesion, interface contracts.",
1153
- "Layer 2e: External Safety — SQL safety, concurrency, secrets in logs, enum completeness (grep outside diff), LLM trust boundaries.",
1154
- "Review Army reconciliation — normalize findings into structured records, dedup by fingerprint, and mark multi-specialist confirmations.",
1155
- "Meta-Review — Were tests actually run? Do test names match what they test? Are there real assertions?",
1156
- "Classify findings — Critical (blocks ship), Important (should fix), Suggestion (optional improvement).",
1157
- "Produce verdict — APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED."
1158
- ],
1159
- interactionProtocol: [
1160
- "Run Layer 1 (spec compliance) completely before starting Layer 2.",
1161
- "In each review section, present findings ONE AT A TIME. Do NOT batch.",
1162
- "Classify every finding as Critical, Important, or Suggestion.",
1163
- "For each Critical finding: use the Decision Protocol — present resolution options (A/B/C) with trade-offs, and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that fully closes the finding with no carry-over risk and the smallest blast radius. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
1164
- "Resolve all critical blockers before ship.",
1165
- "For final verdict: use AskQuestion/AskUserQuestion only if runtime schema is confirmed; otherwise collect verdict with a plain-text single-choice prompt (APPROVED / APPROVED_WITH_CONCERNS / BLOCKED).",
1166
- "**STOP.** Do NOT proceed to ship until the user provides an explicit verdict."
1167
- ],
1168
- process: [
1169
- "Layer 1: check acceptance criteria and requirement coverage.",
1170
- "Layer 2a: check correctness — logic, races, boundaries, null handling.",
1171
- "Layer 2b: check security — validation, auth, secrets, injection.",
1172
- "Layer 2c: check performance — queries, memory, caching, hot paths.",
1173
- "Layer 2d: check architecture fit — design compliance, coupling, interfaces.",
1174
- "Reconcile multi-agent findings into `.cclaw/artifacts/07-review-army.json` (dedup + confidence + conflict notes).",
1175
- "Classify and prioritize all findings.",
1176
- "Write review report artifact with explicit verdict."
1177
- ],
1178
- requiredGates: [
1179
- { id: "review_layer1_spec_compliance", description: "Spec compliance check completed with per-criterion verdict." },
1180
- { id: "review_layer2_correctness", description: "Correctness review completed." },
1181
- { id: "review_layer2_security", description: "Security review completed." },
1182
- { id: "review_layer2_performance", description: "Performance review completed." },
1183
- { id: "review_layer2_architecture", description: "Architecture fit review completed." },
1184
- { id: "review_severity_classified", description: "All findings are severity-tagged." },
1185
- { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." },
1186
- { id: "review_army_json_valid", description: "07-review-army.json passes schema validation (validateReviewArmy)." },
1187
- { id: "review_completeness_scored", description: "Completeness score is computed and recorded (AC coverage, task coverage, slice coverage, adversarial pass)." },
1188
- { id: "review_security_audit_swept", description: "The security-audit utility skill was run against the diff scope and the modules it touches. Finding count (0 if clean) recorded in the review army with file:line evidence for every Critical." }
1189
- ],
1190
- requiredEvidence: [
1191
- "Artifact written to `.cclaw/artifacts/07-review.md`.",
1192
- "Artifact written to `.cclaw/artifacts/07-review-army.json`.",
1193
- "Layer 1 verdict captured with per-criterion pass/fail.",
1194
- "Layer 2 sections completed with findings.",
1195
- "Severity log includes critical/important/suggestion buckets.",
1196
- "Explicit final verdict: APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED."
1197
- ],
1198
- inputs: ["implementation diff", "spec and plan artifacts", "test/build evidence"],
1199
- requiredContext: ["spec criteria", "tdd artifact", "rulebook constraints"],
1200
- outputs: ["review verdict", "severity-indexed findings", "reconciled review-army findings", "ship readiness decision"],
1201
- blockers: [
1202
- "layer 1 failed",
1203
- "critical findings unresolved",
1204
- "missing regression evidence"
1205
- ],
1206
- exitCriteria: [
1207
- "both layers completed",
1208
- "all review sections evaluated",
1209
- "critical blockers resolved",
1210
- "ship readiness explicitly stated"
1211
- ],
1212
- antiPatterns: [
1213
- "Single generic review without layered structure",
1214
- "No severity classification",
1215
- "Shipping with open criticals",
1216
- "Batching multiple findings into one report without individual resolution",
1217
- "Skipping Layer 2 sections because Layer 1 passed"
1218
- ],
1219
- redFlags: [
1220
- "No separate Layer 1/Layer 2 outcomes",
1221
- "No structured review-army reconciliation artifact",
1222
- "No critical bucket",
1223
- "No explicit ready/not-ready verdict",
1224
- "Review sections skipped or abbreviated",
1225
- "Findings not classified by severity"
1226
- ],
1227
- policyNeedles: ["Layer 1", "Layer 2", "Critical", "Review Army", "Ready to Ship", "One issue at a time"],
1228
- artifactFile: "07-review.md",
1229
- next: "ship",
1230
- reviewSections: [
1231
- {
1232
- title: "Layer 1: Spec Compliance",
1233
- evaluationPoints: [
1234
- "For each acceptance criterion: does the implementation satisfy it?",
1235
- "Are there spec requirements with no corresponding implementation?",
1236
- "Are there implementations with no corresponding spec requirement (scope creep)?",
1237
- "Is every edge case from the spec handled?"
1238
- ],
1239
- stopGate: true
1240
- },
1241
- {
1242
- title: "Layer 2a: Correctness",
1243
- evaluationPoints: [
1244
- "Logic errors and boundary violations",
1245
- "Race conditions and concurrency issues",
1246
- "Null/undefined handling",
1247
- "Error propagation and recovery paths"
1248
- ],
1249
- stopGate: true
1250
- },
1251
- {
1252
- title: "Layer 2b: Security",
1253
- evaluationPoints: [
1254
- "Input validation completeness",
1255
- "Authorization boundary enforcement",
1256
- "Secrets exposure risk",
1257
- "Injection vector assessment"
1258
- ],
1259
- stopGate: true
1260
- },
1261
- {
1262
- title: "Layer 2c: Performance",
1263
- evaluationPoints: [
1264
- "N+1 query patterns",
1265
- "Memory leak potential",
1266
- "Missing caching opportunities",
1267
- "Hot path complexity analysis"
1268
- ],
1269
- stopGate: true
1270
- },
1271
- {
1272
- title: "Layer 2d: Architecture Fit",
1273
- evaluationPoints: [
1274
- "Does implementation match the locked design?",
1275
- "Coupling and cohesion assessment",
1276
- "Interface contract compliance",
1277
- "Unintended architectural drift"
1278
- ],
1279
- stopGate: true
1280
- },
1281
- {
1282
- title: "Layer 2e: External Safety Checklist",
1283
- evaluationPoints: [
1284
- "SQL/database: parameterized queries, no raw string interpolation, migration safety",
1285
- "Concurrency: race conditions in shared state, lock ordering, timeout handling",
1286
- "Secrets: no hardcoded tokens, no secrets in logs, env vars for sensitive config",
1287
- "Enum/constant completeness: grep for sibling values OUTSIDE the diff — are all cases handled?",
1288
- "Trust boundaries: if LLM/AI output is used, is it validated before acting on it?"
1289
- ],
1290
- stopGate: true
1291
- },
1292
- {
1293
- title: "Meta-Review: Verify the Verification",
1294
- evaluationPoints: [
1295
- "Were tests actually run (not just assumed to pass)?",
1296
- "Do the test names match what they actually test?",
1297
- "Is there test coverage for the specific changes in this diff?",
1298
- "Are there assertions, or do tests just run without checking results?"
1299
- ],
1300
- stopGate: false
1301
- }
1302
- ],
1303
- completionStatus: ["APPROVED", "APPROVED_WITH_CONCERNS", "BLOCKED"],
1304
- crossStageTrace: {
1305
- readsFrom: [".cclaw/artifacts/06-tdd.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/05-plan.md"],
1306
- writesTo: [".cclaw/artifacts/07-review.md", ".cclaw/artifacts/07-review-army.json"],
1307
- traceabilityRule: "Review verdict must reference specific spec criteria and TDD evidence. Downstream ship stage must reference review verdict."
1308
- },
1309
- artifactValidation: [
1310
- { section: "Layer 1 Verdict", required: true, validationRule: "Per-criterion pass/fail with references." },
1311
- { section: "Layer 2 Findings", required: true, validationRule: "Each finding has severity, description, and resolution status." },
1312
- { section: "Review Army Contract", required: true, validationRule: "Structured findings include id/severity/confidence/fingerprint/reportedBy/status with dedup reconciliation summary." },
1313
- { section: "Review Readiness Dashboard", required: true, validationRule: "Includes a per-pass table (Layer 1 / Layer 2 / Adversarial / Schema) with a 'Completed at' column, a Delegation log snapshot block (path .cclaw/state/delegation-log.json with required/completed/waived/pending), a Staleness signal block (commit at last review pass and current commit), and a Headline with open critical blockers + ship recommendation. At minimum, the section text must contain the substrings 'Completed at', 'delegation-log.json', 'commit at last review pass', and 'Ship recommendation'." },
1314
- { section: "Completeness Score", required: true, validationRule: "Records AC coverage, task coverage, test-slice coverage, and adversarial-review pass status as numeric or boolean values. At minimum, a line like 'AC coverage: N/M' or 'AC coverage: 100%'." },
1315
- { section: "Severity Summary", required: true, validationRule: "Per-severity count lines for critical, important, and suggestion buckets." },
1316
- { section: "Final Verdict", required: true, validationRule: "Exactly one of: APPROVED, APPROVED_WITH_CONCERNS, BLOCKED." }
1317
- ]
1318
- };
1319
- // ---------------------------------------------------------------------------
1320
- // SHIP — reference: superpowers finishing-a-development-branch + gstack /ship
1321
- // ---------------------------------------------------------------------------
1322
- const SHIP = {
1323
- stage: "ship",
1324
- skillFolder: "shipping-and-handoff",
1325
- skillName: "shipping-and-handoff",
1326
- skillDescription: "Release handoff stage with preflight checks, rollback readiness, and explicit finalization mode.",
1327
- hardGate: "Do NOT merge, push, or finalize without a passed preflight check, written rollback plan, and exactly one explicit finalization mode selected. No exceptions for urgency.",
1328
- ironLaw: "NO MERGE WITHOUT GREEN CI, A WRITTEN ROLLBACK, AND EXACTLY ONE SELECTED FINALIZATION MODE.",
1329
- purpose: "Prepare a safe release handoff with clear rollback and branch finalization decision.",
1330
- whenToUse: [
1331
- "After review passes with APPROVED or APPROVED_WITH_CONCERNS verdict",
1332
- "Before creating PR/merge/final branch action",
1333
- "When release notes and rollback plan are required"
1334
- ],
1335
- whenNotToUse: [
1336
- "Review verdict is BLOCKED or unresolved critical findings remain",
1337
- "Preflight checks cannot run and no approved exception exists",
1338
- "The request is still design/spec/implementation work, not release handoff"
1339
- ],
1340
- checklist: [
1341
- "Validate upstream gates — verify review verdict is APPROVED or APPROVED_WITH_CONCERNS. If BLOCKED, stop immediately.",
1342
- "Run preflight checks — tests pass, build succeeds, linter clean, type-check clean, no uncommitted changes. Every check must produce fresh output in this message.",
1343
- "Merge-base detection — identify the correct base branch. Run `git merge-base HEAD <base>`. If the base has diverged significantly, flag for rebase-first.",
1344
- "Re-run tests on merged result — if merging locally, run the full test suite AFTER the merge, not just before. Post-merge failures are common.",
1345
- "Generate release notes — summarize what changed, why, and what it affects. Reference spec criteria. Include: breaking changes, new dependencies, migration steps if any.",
1346
- "Write rollback plan — trigger conditions (what tells you it is broken), rollback steps (exact commands/git operations), and verification (how to confirm rollback worked).",
1347
- "Monitoring checklist — what should be watched after deploy? Error rates, latency, key business metrics. If no monitoring exists, flag it as a risk.",
1348
- "Select finalization mode — exactly ONE enum: (A) FINALIZE_MERGE_LOCAL, (B) FINALIZE_OPEN_PR, (C) FINALIZE_KEEP_BRANCH, (D) FINALIZE_DISCARD_BRANCH. For discard: list what will be deleted, require typed confirmation.",
1349
- "Execute finalization — perform the selected action. For merge: verify clean merge. For PR: include structured body (summary, test plan, rollback). For discard: verify deletion.",
1350
- "Worktree cleanup — if using git worktrees, clean up the worktree after merge/discard. Keep it only for 'keep branch' mode."
1351
- ],
1352
- interactionProtocol: [
1353
- "Run preflight checks before any release action.",
1354
- "Document release notes and rollback plan explicitly.",
1355
- "For finalization mode: use the Decision Protocol — present modes as labeled options (A/B/C/D) with consequences, and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the mode that best addresses release blast-radius, rollback readiness, observability, and stakeholder communication — ties go to the most reversible option. If AskQuestion/AskUserQuestion is available, send exactly ONE question per call, validate fields against runtime schema, and on schema error immediately fall back to plain-text question instead of retrying guessed payloads.",
1356
- "Do not proceed if critical blockers remain from review.",
1357
- "**STOP.** Present finalization options and wait for user selection before executing any finalization action."
1358
- ],
1359
- process: [
1360
- "Validate review and test gates.",
1361
- "Run preflight: build, test, lint, uncommitted-changes check.",
1362
- "Generate release notes and rollback procedure.",
1363
- "Choose one finalization enum: FINALIZE_MERGE_LOCAL, FINALIZE_OPEN_PR, FINALIZE_KEEP_BRANCH, or FINALIZE_DISCARD_BRANCH.",
1364
- "Execute finalization action.",
1365
- "Write ship artifact with decision, rationale, and execution result."
1366
- ],
1367
- requiredGates: [
1368
- { id: "ship_review_verdict_valid", description: "Review verdict is APPROVED or APPROVED_WITH_CONCERNS." },
1369
- { id: "ship_preflight_passed", description: "Preflight checks passed or exceptions documented and approved." },
1370
- { id: "ship_release_notes_written", description: "Release notes are complete and accurate." },
1371
- { id: "ship_rollback_plan_ready", description: "Rollback trigger, steps, and verification are documented." },
1372
- { id: "ship_finalization_mode_selected", description: "Exactly one finalization action is selected." },
1373
- { id: "ship_finalization_executed", description: "Selected finalization action was executed and verified." },
1374
- { id: "ship_post_merge_tests", description: "Full test suite re-run on the merged result (not just the branch). Post-merge failures caught before release." }
1375
- ],
1376
- requiredEvidence: [
1377
- "Artifact written to `.cclaw/artifacts/08-ship.md`.",
1378
- "Release notes section is complete.",
1379
- "Rollback section includes trigger conditions, steps, and verification.",
1380
- "Finalization section shows exactly one selected enum token.",
1381
- "Execution result documented."
1382
- ],
1383
- inputs: ["review verdict", "test/build outputs", "release context"],
1384
- requiredContext: ["review artifact", "changelog scope", "deployment constraints"],
1385
- outputs: ["release package handoff", "rollback plan", "final branch decision"],
1386
- blockers: [
1387
- "review verdict is BLOCKED",
1388
- "critical review blockers remain",
1389
- "rollback plan missing",
1390
- "finalization mode not selected"
1391
- ],
1392
- exitCriteria: [
1393
- "preflight completed",
1394
- "rollback and release notes complete",
1395
- "finalization action explicitly chosen and executed"
1396
- ],
1397
- antiPatterns: [
1398
- "Shipping without rollback strategy",
1399
- "Implicit finalization decision",
1400
- "Bypassing preflight due to urgency",
1401
- "Selecting multiple finalization modes",
1402
- "Shipping with BLOCKED review verdict"
1403
- ],
1404
- redFlags: [
1405
- "No rollback trigger/steps",
1406
- "More than one finalization mode implied",
1407
- "No explicit preflight result",
1408
- "Review verdict not referenced",
1409
- "Finalization not executed, only planned"
1410
- ],
1411
- policyNeedles: [
1412
- "Pre-Ship Checks",
1413
- "Release Notes",
1414
- "Rollback Plan",
1415
- "FINALIZE_MERGE_LOCAL",
1416
- "FINALIZE_OPEN_PR",
1417
- "FINALIZE_KEEP_BRANCH",
1418
- "FINALIZE_DISCARD_BRANCH"
1419
- ],
1420
- artifactFile: "08-ship.md",
1421
- next: "done",
1422
- reviewSections: [
1423
- {
1424
- title: "Preflight Verification",
1425
- evaluationPoints: [
1426
- "Test suite: full run, all pass, output captured",
1427
- "Build: clean build, exit code 0",
1428
- "Lint/format: no violations",
1429
- "Type-check: no errors",
1430
- "Working tree: no uncommitted changes"
1431
- ],
1432
- stopGate: true
1433
- },
1434
- {
1435
- title: "Release Readiness",
1436
- evaluationPoints: [
1437
- "Release notes are accurate and reference spec criteria",
1438
- "Breaking changes are documented with migration steps",
1439
- "Rollback plan has trigger, steps, and verification",
1440
- "If applicable: monitoring/alerting is in place for the change"
1441
- ],
1442
- stopGate: true
1443
- }
1444
- ],
1445
- completionStatus: ["SHIPPED", "SHIPPED_WITH_EXCEPTIONS", "BLOCKED"],
1446
- crossStageTrace: {
1447
- readsFrom: [".cclaw/artifacts/07-review.md", ".cclaw/artifacts/06-tdd.md", ".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md"],
1448
- writesTo: [".cclaw/artifacts/08-ship.md"],
1449
- traceabilityRule: "Ship artifact must reference review verdict and resolution status. Release notes must reference spec criteria. Rollback plan must reference specific changes that could fail."
1450
- },
1451
- artifactValidation: [
1452
- { section: "Preflight Results", required: true, validationRule: "Build, test, lint, type-check results captured with fresh output. Exceptions documented if any." },
1453
- { section: "Release Notes", required: true, validationRule: "What changed, why, impact. References spec criteria. Breaking changes flagged." },
1454
- { section: "Rollback Plan", required: true, validationRule: "Trigger conditions, rollback steps (exact commands), verification steps." },
1455
- { section: "Monitoring", required: false, validationRule: "If applicable: what metrics/logs to watch post-deploy. Risk note if no monitoring." },
1456
- { section: "Finalization", required: true, validationRule: "Exactly one finalization enum token selected. Execution result documented. Worktree cleaned if applicable." },
1457
- { section: "Completion Status", required: false, validationRule: "If present: exactly one of SHIPPED, SHIPPED_WITH_EXCEPTIONS, BLOCKED. Exceptions documented when applicable." },
1458
- { section: "Compound Step", required: false, validationRule: "Optional retrospective: at least one bullet of the form 'Insight: ... | Action: append [compound] entry to .cclaw/knowledge.jsonl', or an explicit 'No compound insight this run.' line." }
1459
- ]
1460
- };
1461
127
  // ---------------------------------------------------------------------------
1462
128
  // Stage map and accessors
1463
129
  // ---------------------------------------------------------------------------