cclaw-cli 0.49.0 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +54 -82
  2. package/dist/artifact-linter.d.ts +4 -0
  3. package/dist/artifact-linter.js +24 -3
  4. package/dist/cli.d.ts +1 -19
  5. package/dist/cli.js +49 -491
  6. package/dist/constants.d.ts +2 -13
  7. package/dist/constants.js +1 -43
  8. package/dist/content/closeout-guidance.d.ts +14 -0
  9. package/dist/content/closeout-guidance.js +42 -0
  10. package/dist/content/core-agents.js +51 -9
  11. package/dist/content/decision-protocol.d.ts +12 -0
  12. package/dist/content/decision-protocol.js +20 -0
  13. package/dist/content/diff-command.d.ts +1 -2
  14. package/dist/content/diff-command.js +8 -94
  15. package/dist/content/examples.d.ts +4 -10
  16. package/dist/content/examples.js +10 -20
  17. package/dist/content/hook-events.js +2 -2
  18. package/dist/content/hook-inline-snippets.d.ts +5 -2
  19. package/dist/content/hook-inline-snippets.js +33 -1
  20. package/dist/content/hook-manifest.d.ts +3 -4
  21. package/dist/content/hook-manifest.js +11 -12
  22. package/dist/content/hooks.js +2 -0
  23. package/dist/content/ideate-command.d.ts +2 -0
  24. package/dist/content/ideate-command.js +31 -25
  25. package/dist/content/iron-laws.d.ts +5 -5
  26. package/dist/content/iron-laws.js +5 -5
  27. package/dist/content/learnings.d.ts +3 -4
  28. package/dist/content/learnings.js +24 -50
  29. package/dist/content/meta-skill.js +31 -21
  30. package/dist/content/next-command.js +38 -38
  31. package/dist/content/node-hooks.js +17 -343
  32. package/dist/content/opencode-plugin.js +2 -100
  33. package/dist/content/research-playbooks.js +14 -14
  34. package/dist/content/review-loop.d.ts +2 -0
  35. package/dist/content/review-loop.js +8 -0
  36. package/dist/content/session-hooks.js +14 -46
  37. package/dist/content/skills.d.ts +0 -5
  38. package/dist/content/skills.js +53 -128
  39. package/dist/content/stage-common-guidance.d.ts +0 -1
  40. package/dist/content/stage-common-guidance.js +15 -14
  41. package/dist/content/stage-schema.d.ts +26 -1
  42. package/dist/content/stage-schema.js +121 -40
  43. package/dist/content/stages/_lint-metadata/index.js +9 -15
  44. package/dist/content/stages/brainstorm.js +22 -43
  45. package/dist/content/stages/design.js +37 -57
  46. package/dist/content/stages/plan.js +22 -13
  47. package/dist/content/stages/review.js +24 -27
  48. package/dist/content/stages/scope.js +34 -46
  49. package/dist/content/stages/ship.js +7 -4
  50. package/dist/content/stages/spec.js +20 -9
  51. package/dist/content/stages/tdd.js +64 -44
  52. package/dist/content/start-command.js +10 -12
  53. package/dist/content/status-command.d.ts +2 -7
  54. package/dist/content/status-command.js +19 -146
  55. package/dist/content/subagents.d.ts +0 -5
  56. package/dist/content/subagents.js +47 -28
  57. package/dist/content/templates.d.ts +1 -1
  58. package/dist/content/templates.js +126 -135
  59. package/dist/content/track-render-context.d.ts +17 -0
  60. package/dist/content/track-render-context.js +44 -0
  61. package/dist/content/tree-command.d.ts +1 -2
  62. package/dist/content/tree-command.js +4 -87
  63. package/dist/content/utility-skills.d.ts +2 -29
  64. package/dist/content/utility-skills.js +2 -1534
  65. package/dist/content/view-command.js +29 -11
  66. package/dist/delegation.d.ts +1 -1
  67. package/dist/delegation.js +5 -15
  68. package/dist/doctor-registry.js +20 -21
  69. package/dist/doctor.js +88 -344
  70. package/dist/flow-state.d.ts +3 -0
  71. package/dist/flow-state.js +2 -0
  72. package/dist/harness-adapters.d.ts +1 -1
  73. package/dist/harness-adapters.js +48 -57
  74. package/dist/install.js +128 -358
  75. package/dist/internal/advance-stage.js +3 -9
  76. package/dist/internal/compound-readiness.d.ts +1 -1
  77. package/dist/internal/compound-readiness.js +1 -1
  78. package/dist/internal/tdd-loop-status.d.ts +1 -1
  79. package/dist/internal/tdd-loop-status.js +1 -1
  80. package/dist/knowledge-store.d.ts +16 -10
  81. package/dist/knowledge-store.js +51 -15
  82. package/dist/policy.js +16 -105
  83. package/dist/run-archive.d.ts +4 -6
  84. package/dist/run-archive.js +15 -20
  85. package/dist/run-persistence.d.ts +2 -2
  86. package/dist/run-persistence.js +3 -9
  87. package/package.json +1 -2
  88. package/dist/content/archive-command.d.ts +0 -2
  89. package/dist/content/archive-command.js +0 -124
  90. package/dist/content/compound-command.d.ts +0 -5
  91. package/dist/content/compound-command.js +0 -193
  92. package/dist/content/contexts.d.ts +0 -18
  93. package/dist/content/contexts.js +0 -24
  94. package/dist/content/contracts.d.ts +0 -2
  95. package/dist/content/contracts.js +0 -51
  96. package/dist/content/doctor-references.d.ts +0 -2
  97. package/dist/content/doctor-references.js +0 -150
  98. package/dist/content/eval-scaffold.d.ts +0 -15
  99. package/dist/content/eval-scaffold.js +0 -370
  100. package/dist/content/feature-command.d.ts +0 -2
  101. package/dist/content/feature-command.js +0 -123
  102. package/dist/content/flow-map.d.ts +0 -23
  103. package/dist/content/flow-map.js +0 -134
  104. package/dist/content/harness-doc.d.ts +0 -2
  105. package/dist/content/harness-doc.js +0 -202
  106. package/dist/content/harness-playbooks.d.ts +0 -24
  107. package/dist/content/harness-playbooks.js +0 -393
  108. package/dist/content/harness-tool-refs.d.ts +0 -20
  109. package/dist/content/harness-tool-refs.js +0 -268
  110. package/dist/content/ops-command.d.ts +0 -2
  111. package/dist/content/ops-command.js +0 -71
  112. package/dist/content/protocols.d.ts +0 -7
  113. package/dist/content/protocols.js +0 -215
  114. package/dist/content/retro-command.d.ts +0 -2
  115. package/dist/content/retro-command.js +0 -165
  116. package/dist/content/rewind-command.d.ts +0 -2
  117. package/dist/content/rewind-command.js +0 -106
  118. package/dist/content/tdd-log-command.d.ts +0 -2
  119. package/dist/content/tdd-log-command.js +0 -85
  120. package/dist/eval/agents/single-shot.d.ts +0 -27
  121. package/dist/eval/agents/single-shot.js +0 -79
  122. package/dist/eval/agents/with-tools.d.ts +0 -44
  123. package/dist/eval/agents/with-tools.js +0 -261
  124. package/dist/eval/agents/workflow.d.ts +0 -31
  125. package/dist/eval/agents/workflow.js +0 -155
  126. package/dist/eval/baseline.d.ts +0 -38
  127. package/dist/eval/baseline.js +0 -282
  128. package/dist/eval/config-loader.d.ts +0 -14
  129. package/dist/eval/config-loader.js +0 -395
  130. package/dist/eval/corpus.d.ts +0 -30
  131. package/dist/eval/corpus.js +0 -330
  132. package/dist/eval/cost-guard.d.ts +0 -102
  133. package/dist/eval/cost-guard.js +0 -190
  134. package/dist/eval/diff.d.ts +0 -64
  135. package/dist/eval/diff.js +0 -323
  136. package/dist/eval/llm-client.d.ts +0 -176
  137. package/dist/eval/llm-client.js +0 -267
  138. package/dist/eval/mode.d.ts +0 -28
  139. package/dist/eval/mode.js +0 -61
  140. package/dist/eval/progress.d.ts +0 -83
  141. package/dist/eval/progress.js +0 -59
  142. package/dist/eval/report.d.ts +0 -11
  143. package/dist/eval/report.js +0 -181
  144. package/dist/eval/rubric-loader.d.ts +0 -20
  145. package/dist/eval/rubric-loader.js +0 -143
  146. package/dist/eval/runner.d.ts +0 -81
  147. package/dist/eval/runner.js +0 -746
  148. package/dist/eval/runs.d.ts +0 -41
  149. package/dist/eval/runs.js +0 -114
  150. package/dist/eval/sandbox.d.ts +0 -38
  151. package/dist/eval/sandbox.js +0 -137
  152. package/dist/eval/tools/glob.d.ts +0 -2
  153. package/dist/eval/tools/glob.js +0 -163
  154. package/dist/eval/tools/grep.d.ts +0 -2
  155. package/dist/eval/tools/grep.js +0 -152
  156. package/dist/eval/tools/index.d.ts +0 -7
  157. package/dist/eval/tools/index.js +0 -35
  158. package/dist/eval/tools/read.d.ts +0 -2
  159. package/dist/eval/tools/read.js +0 -122
  160. package/dist/eval/tools/types.d.ts +0 -49
  161. package/dist/eval/tools/types.js +0 -41
  162. package/dist/eval/tools/write.d.ts +0 -2
  163. package/dist/eval/tools/write.js +0 -92
  164. package/dist/eval/types.d.ts +0 -561
  165. package/dist/eval/types.js +0 -47
  166. package/dist/eval/verifiers/judge.d.ts +0 -40
  167. package/dist/eval/verifiers/judge.js +0 -256
  168. package/dist/eval/verifiers/rules.d.ts +0 -24
  169. package/dist/eval/verifiers/rules.js +0 -218
  170. package/dist/eval/verifiers/structural.d.ts +0 -14
  171. package/dist/eval/verifiers/structural.js +0 -171
  172. package/dist/eval/verifiers/traceability.d.ts +0 -23
  173. package/dist/eval/verifiers/traceability.js +0 -84
  174. package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
  175. package/dist/eval/verifiers/workflow-consistency.js +0 -225
  176. package/dist/eval/workflow-corpus.d.ts +0 -7
  177. package/dist/eval/workflow-corpus.js +0 -207
  178. package/dist/feature-system.d.ts +0 -42
  179. package/dist/feature-system.js +0 -432
  180. package/dist/internal/knowledge-digest.d.ts +0 -7
  181. package/dist/internal/knowledge-digest.js +0 -93
@@ -31,6 +31,7 @@ export const PLAN = {
31
31
  "No dependency graph",
32
32
  "No WAIT_FOR_CONFIRM marker",
33
33
  "No explicit dependency batches",
34
+ "No execution posture for sequencing, risk, and checkpoint cadence",
34
35
  "Tasks exceed one coherent outcome",
35
36
  "No acceptance mapping",
36
37
  "Locked decisions are missing or not mapped",
@@ -43,18 +44,20 @@ export const PLAN = {
43
44
  "Build dependency graph — identify task ordering, parallel opportunities, and blocking dependencies.",
44
45
  "Group tasks into dependency batches — batch N+1 cannot start until batch N has verification evidence.",
45
46
  "Slice into vertical tasks — each task targets 2-5 minutes, produces one testable outcome, and touches one coherent area.",
46
- "Attach verification — every task has an acceptance criterion mapping and a concrete verification command.",
47
- "Annotate slice-review metadata — if `.cclaw/config.yaml::sliceReview.enabled` is true, every task row additionally carries `touchCount` (rough number of files expected to change) and `touchPaths` (glob hints, e.g. `migrations/**`, `src/auth/**`). A task may set `highRisk: true` to force a review pass regardless of thresholds. These fields feed the TDD stage's Per-Slice Review checkpoint; when `sliceReview` is disabled they are optional.",
47
+ "Task Contract — every task has one coherent outcome, AC mapping, exact verification command/manual step, and expected evidence snippet or pass condition. Avoid vague `run tests` wording.",
48
+ "Annotate slice-review metadata — if `.cclaw/config.yaml::sliceReview.enabled` is true, every task row additionally carries `touchCount` (rough number of files expected to change) and `touchPaths` (glob hints, e.g. `migrations/**`, `src/auth/**`). A task may set `highRisk: true` to force a review pass regardless of thresholds. These fields feed the TDD stage's Per-Slice Review point; when `sliceReview` is disabled they are optional.",
48
49
  "Map scope Locked Decisions — every D-XX from scope is referenced by at least one plan task (or explicitly marked deferred with reason).",
49
50
  "Run anti-placeholder + anti-scope-reduction scans — block `TODO/TBD/...` and phrasing like `v1`, `for now`, `later` for locked boundaries.",
50
- "Define checkpoints — mark points where progress should be validated before continuing.",
51
+ "Define validation points — mark where progress must be checked before continuing, with concrete command and expected evidence.",
52
+ "Define execution posture — record whether execution should be sequential, dependency-batched, parallel-safe, or blocked; include risk triggers and RED/GREEN/REFACTOR checkpoint/commit expectations when the repo workflow supports them.",
51
53
  "WAIT_FOR_CONFIRM — write plan artifact and explicitly pause. **STOP.** Do NOT proceed until user confirms. Then close the stage with `node .cclaw/hooks/stage-complete.mjs plan` and tell user to run `/cc-next`."
52
54
  ],
53
55
  interactionProtocol: [
54
56
  "Plan in read-only mode relative to implementation.",
55
57
  "Split work into small vertical slices (target 2-5 minute tasks).",
56
58
  "Publish explicit dependency batches with entry and exit checks for each batch.",
57
- "Attach verification step to every task.",
59
+ "Expose execution posture: sequential vs batch/parallel, stop conditions, and checkpoint cadence for the TDD handoff.",
60
+ "Attach exact verification command/manual step and expected evidence to every task.",
58
61
  "Preserve locked scope boundaries: no silent scope reduction language in task rows.",
59
62
  "Enforce WAIT_FOR_CONFIRM: present the plan summary with options (A) Approve / (B) Revise / (C) Reject.",
60
63
  "**STOP.** Do NOT proceed until user explicitly approves.",
@@ -63,23 +66,24 @@ export const PLAN = {
63
66
  process: [
64
67
  "Build dependency graph and ordered slices.",
65
68
  "Group slices into execution batches and define gate criteria per batch.",
66
- "Define each task with acceptance mapping and verification commands.",
69
+ "Define each task with acceptance mapping, verification command/manual step, and expected evidence/pass condition.",
67
70
  "Trace every locked decision (D-XX) to plan tasks or explicit defer rationale.",
68
- "Record checkpoints and blockers.",
71
+ "Record validation points, blockers, and execution posture.",
69
72
  "Write plan artifact and pause at WAIT_FOR_CONFIRM."
70
73
  ],
71
74
  requiredGates: [
72
75
  { id: "plan_tasks_sliced_2_5_min", description: "Tasks are small, executable slices." },
73
- { id: "plan_dependency_batches_defined", description: "Tasks are grouped into executable batches with gate checks." },
76
+ { id: "plan_dependency_batches_defined", description: "Tasks are grouped into executable batches with gate checks and execution posture." },
74
77
  { id: "plan_acceptance_mapped", description: "Each task maps to a spec acceptance criterion." },
75
78
  { id: "plan_wait_for_confirm", description: "Execution blocked until explicit user confirmation." }
76
79
  ],
77
80
  requiredEvidence: [
78
81
  "Artifact written to `.cclaw/artifacts/05-plan.md`.",
79
- "Task list includes acceptance mapping.",
82
+ "Task list includes acceptance mapping, exact verification command/manual step, and expected evidence/pass condition.",
80
83
  "Locked decision coverage table present with D-XX trace links.",
81
84
  "Dependency graph documented.",
82
85
  "Dependency batches documented with batch-by-batch verification gates.",
86
+ "Execution posture documented with sequencing, stop conditions, and TDD checkpoint expectations.",
83
87
  "WAIT_FOR_CONFIRM status recorded."
84
88
  ],
85
89
  inputs: ["approved spec", "codebase context", "delivery constraints"],
@@ -92,6 +96,7 @@ export const PLAN = {
92
96
  "tasks too broad",
93
97
  "dependency uncertainty unresolved",
94
98
  "batch boundaries are unclear",
99
+ "execution posture is missing or contradicts dependency batches",
95
100
  "locked decisions from scope are not mapped to tasks",
96
101
  "no explicit confirmation"
97
102
  ],
@@ -99,6 +104,7 @@ export const PLAN = {
99
104
  "plan quality gates complete",
100
105
  "WAIT_FOR_CONFIRM present and unresolved until user approves",
101
106
  "artifact ready for TDD execution",
107
+ "execution posture ready for TDD handoff",
102
108
  "acceptance mapping complete"
103
109
  ],
104
110
  platformNotes: [
@@ -116,10 +122,12 @@ export const PLAN = {
116
122
  traceabilityRule: "Every task must trace to a spec acceptance criterion. Every locked scope decision (D-XX) must trace to at least one plan task or explicit defer rationale. Every downstream RED test must trace to a plan task."
117
123
  },
118
124
  artifactValidation: [
125
+ { section: "Upstream Handoff", required: false, validationRule: "Summarizes spec/design/scope decisions, constraints, open questions, and explicit drift before task breakdown." },
119
126
  { section: "Dependency Graph", required: false, validationRule: "Ordering and parallel opportunities explicit. No circular dependencies." },
120
127
  { section: "Dependency Batches", required: true, validationRule: "Every task belongs to a batch. Each batch has an exit gate and dependency statement." },
121
- { section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, verification command, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget. When the sliceReview feature is enabled in the cclaw config, each task row additionally declares touchCount, touchPaths, and an optional highRisk flag so the TDD stage can decide whether a Per-Slice Review pass is required." },
128
+ { section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, exact verification command/manual step, expected evidence/pass condition, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget. When the sliceReview option is enabled in the cclaw config, each task row additionally declares touchCount, touchPaths, and an optional highRisk flag so the TDD stage can decide whether a Per-Slice Review pass is required." },
122
129
  { section: "Acceptance Mapping", required: true, validationRule: "Every spec criterion is covered by at least one task." },
130
+ { section: "Execution Posture", required: true, validationRule: "States sequential/batch/parallel posture, stop conditions, risk triggers, and RED/GREEN/REFACTOR checkpoint or commit expectations for TDD when consistent with the repo workflow." },
123
131
  { section: "Locked Decision Coverage", required: false, validationRule: "Every locked decision ID (D-XX) from scope is listed with linked task IDs or explicit defer rationale." },
124
132
  { section: "Risk Assessment", required: false, validationRule: "If present: per-task or per-batch risk identification with likelihood, impact, and mitigation strategy." },
125
133
  { section: "Boundary Map", required: false, validationRule: "If present: per-batch or per-task interface contracts listing what each task produces (exports) and consumes (imports) from other tasks." },
@@ -129,14 +137,14 @@ export const PLAN = {
129
137
  ]
130
138
  },
131
139
  reviewLens: {
132
- outputs: ["task graph", "dependency batch plan", "ordered plan", "explicit confirmation checkpoint"],
140
+ outputs: ["task graph", "dependency batch plan", "ordered plan", "explicit confirmation gate"],
133
141
  reviewSections: [
134
142
  {
135
143
  title: "Task Decomposition Audit",
136
144
  evaluationPoints: [
137
145
  "Does every task target a single coherent area (vertical slice)?",
138
146
  "Can each task be completed in 2-5 minutes?",
139
- "Does every task have an acceptance criterion link and verification command?",
147
+ "Does every task have an acceptance criterion link, exact verification command/manual step, and expected evidence/pass condition?",
140
148
  "Are there tasks that touch multiple unrelated areas?",
141
149
  "Would a new engineer understand and start each task within two minutes?"
142
150
  ],
@@ -149,7 +157,8 @@ export const PLAN = {
149
157
  "Does each batch have a verification gate?",
150
158
  "Are batch dependencies explicit and acyclic?",
151
159
  "Is the acceptance mapping complete — every spec criterion covered?",
152
- "Are there hidden dependencies between tasks in different batches?"
160
+ "Are there hidden dependencies between tasks in different batches?",
161
+ "Does the Execution Posture match the dependency graph and stop risky parallelism?"
153
162
  ],
154
163
  stopGate: true
155
164
  },
@@ -157,7 +166,7 @@ export const PLAN = {
157
166
  title: "Five-Minute Budget + No-Placeholders Audit",
158
167
  evaluationPoints: [
159
168
  "Does every task carry an explicit minutes estimate (e.g. `[~3m]`) and does every estimate fit the 2-to-5-minute budget? Estimates >5 minutes must be split.",
160
- "Are all file paths, test commands, and verification commands copy-pasteable as written — no `TODO`, `TBD`, `FIXME`, `<fill-in>`, `<your-*-here>`, `xxx`, or ellipsis standing in for omitted args?",
169
+ "Are all file paths, test commands, verification commands, and expected evidence copy-pasteable/specific as written — no `TODO`, `TBD`, `FIXME`, `<fill-in>`, `<your-*-here>`, `xxx`, bare `run tests`, or ellipsis standing in for omitted args?",
161
170
  "Does every acceptance-criterion reference resolve to a real R# / AC-### in the spec (not a blank link)?",
162
171
  "If an estimate is genuinely uncertain (first-time integration, unfamiliar library), is the uncertainty named explicitly and scheduled as a spike task in batch 0, rather than hidden behind a large estimate?"
163
172
  ],
@@ -1,3 +1,4 @@
1
+ import { decisionProtocolInstruction, STRUCTURED_ASK_TOOL_LIST_REVIEW, structuredAskSingleChoiceInstruction } from "../decision-protocol.js";
1
2
  // ---------------------------------------------------------------------------
2
3
  // REVIEW — reference: superpowers code-review + gstack /review
3
4
  // ---------------------------------------------------------------------------
@@ -33,39 +34,34 @@ export const REVIEW = {
33
34
  checklist: [
34
35
  "Diff Scope — Run `git diff` against base branch. If no diff, exit early with APPROVED (no changes to review). Scope the review to changed files unless blast-radius analysis requires wider inspection.",
35
36
  "Change-Size Check — ~100 lines = normal. ~300 lines = consider splitting. ~1000+ lines = strongly recommend stacked PRs. Flag large diffs to the user.",
36
- "Adversarial Trigger Check — compute changed-line count (`git diff --shortstat <base>..HEAD`), files-touched count, and whether trust boundaries changed (auth/secrets/external inputs/permissions). If `lines > 100` OR `files > 10` OR `trust boundary changed`, **dispatch a SECOND reviewer agent with the `adversarial-review` skill loaded** and reconcile its findings into the review army (treat the conditional dispatch as mandatory whenever the trigger holds; record the trigger that fired in the dashboard).",
37
+ "Risk-Based Second Opinion — compute changed-line count, files-touched count, and trust-boundary movement. Dispatch an adversarial reviewer only when trust boundaries changed, Critical/Important ambiguity remains, or the diff is both large and high-risk; otherwise record `not triggered`.",
37
38
  "Load upstream evidence — read TDD artifact (RED + GREEN + REFACTOR), spec, and plan. Verify evidence chain is unbroken.",
38
39
  "Run traceability matrix — execute `cclaw internal trace-matrix` (or equivalent helper) and confirm there are no orphaned criteria/tasks/tests before declaring ship readiness.",
39
40
  "Layer 1: Spec Compliance — check every acceptance criterion against implementation. Verdict: pass/fail per criterion.",
40
- "Layer 2a: Correctnesslogic errors, race conditions, boundary violations, null handling.",
41
- "Layer 2b: Security input validation, auth boundaries, secrets exposure, injection vectors. **Mandatory:** also load and execute the `.cclaw/skills/security-audit/SKILL.md` utility skill (proactive pattern sweep across diff + touched modules, not just the diff itself) and merge findings into the review army. The Layer 2 security pass is not complete until the audit sweep records a finding count (0 acceptable) with file:line evidence for every Critical.",
42
- "Layer 2c: PerformanceN+1 queries, memory leaks, missing caching, hot paths.",
43
- "Layer 2d: Architecture Fit does the implementation match the locked design? Coupling, cohesion, interface contracts.",
44
- "Layer 2e: External Safety — SQL safety, concurrency, secrets in logs, enum completeness (grep outside diff), LLM trust boundaries.",
45
- "Incoming Feedback Intake — when human reviewer comments, bot findings, or CI annotations exist, run `.cclaw/skills/receiving-code-review/SKILL.md`, keep a per-comment disposition queue, and mirror outcomes into `07-review.md` + `07-review-army.json` before final verdict.",
46
- "Review Army reconciliation — normalize findings into structured records, dedup by fingerprint, and mark multi-specialist confirmations.",
41
+ "Layer 2: Integrated findings one structured pass tagged by category: correctness, security, performance, architecture, external-safety.",
42
+ "Security sweepmandatory dedicated security-reviewer pass across diff + touched modules. A zero-finding pass must include `NO_CHANGE_ATTESTATION` with rationale.",
43
+ "Incoming Feedback Intakewhen human reviewer comments, bot findings, or CI annotations exist, keep a per-comment disposition queue and mirror outcomes into `07-review.md` + `07-review-army.json` before final verdict.",
44
+ "Structured Review reconciliationnormalize findings into `07-review-army.json`, dedup by fingerprint, and mark multi-specialist confirmations when multiple lenses agree.",
47
45
  "Meta-Review — Were tests actually run? Do test names match what they test? Are there real assertions?",
48
46
  "Classify findings — Critical (blocks ship), Important (should fix), Suggestion (optional improvement).",
49
47
  "Produce verdict — APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.",
50
- "If verdict is BLOCKED, emit remediation route token `ROUTE_BACK_TO_TDD` and include `/cc-ops rewind tdd \"review_blocked_by_critical\"` with the blocking finding IDs."
48
+ "If verdict is BLOCKED, emit remediation route token `ROUTE_BACK_TO_TDD` and include `cclaw internal rewind tdd \"review_blocked_by_critical\"` with the blocking finding IDs."
51
49
  ],
52
50
  interactionProtocol: [
53
51
  "Run Layer 1 (spec compliance) completely before starting Layer 2.",
54
52
  "In each review section, present findings ONE AT A TIME. Do NOT batch.",
55
53
  "Classify every finding as Critical, Important, or Suggestion.",
56
- "For each Critical finding: use the Decision Protocol — present resolution options (A/B/C) with trade-offs, and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that fully closes the finding with no carry-over risk and the smallest blast radius. If the harness's native structured-ask tool is available (`AskUserQuestion` on Claude, `AskQuestion` on Cursor, `question` on OpenCode with `permission.question: \"allow\"`, `request_user_input` on Codex in Plan/Collaboration mode), send exactly ONE question per call, validate fields against the runtime schema, and on schema error immediately fall back to a plain-text lettered list instead of retrying guessed payloads.",
54
+ decisionProtocolInstruction("each Critical finding", "present resolution options (A/B/C) with trade-offs, and mark one as (recommended)", "recommend the option that fully closes the finding with no carry-over risk and the smallest blast radius", STRUCTURED_ASK_TOOL_LIST_REVIEW),
57
55
  "Resolve all critical blockers before ship.",
58
- "When verdict is BLOCKED, do not end with a passive stop: explicitly route remediation to TDD via `ROUTE_BACK_TO_TDD` and point to `/cc-ops rewind tdd` with the blocking IDs.",
59
- "For final verdict: use the native structured-ask tool (`AskUserQuestion` / `AskQuestion` / `question` / `request_user_input`) only if runtime schema is confirmed; otherwise collect verdict with a plain-text single-choice prompt (APPROVED / APPROVED_WITH_CONCERNS / BLOCKED).",
56
+ "When verdict is BLOCKED, do not end with a passive stop: explicitly route remediation to TDD via `ROUTE_BACK_TO_TDD` and point to `cclaw internal rewind tdd` with the blocking IDs.",
57
+ structuredAskSingleChoiceInstruction("final verdict", "verdict (APPROVED / APPROVED_WITH_CONCERNS / BLOCKED)"),
60
58
  "**STOP.** Do NOT proceed to ship until the user provides an explicit verdict."
61
59
  ],
62
60
  process: [
63
61
  "Layer 1: check acceptance criteria and requirement coverage.",
64
- "Layer 2a: check correctness logic, races, boundaries, null handling.",
65
- "Layer 2b: check security validation, auth, secrets, injection.",
66
- "Layer 2c: check performance queries, memory, caching, hot paths.",
67
- "Layer 2d: check architecture fit — design compliance, coupling, interfaces.",
68
- "Reconcile multi-agent findings into `.cclaw/artifacts/07-review-army.json` (dedup + confidence + conflict notes + source tags from spec/correctness/security/performance/architecture/external-safety passes).",
62
+ "Layer 2: record integrated findings tagged correctness/security/performance/architecture/external-safety.",
63
+ "Security-reviewer: run mandatory security sweep or no-change attestation.",
64
+ "Reconcile structured findings into `.cclaw/artifacts/07-review-army.json` (dedup + confidence + conflict notes + source tags from spec/correctness/security/performance/architecture/external-safety passes).",
69
65
  "Classify and prioritize all findings.",
70
66
  "Write review report artifact with explicit verdict.",
71
67
  "If verdict is BLOCKED, include the remediation route token `ROUTE_BACK_TO_TDD` and the rewind command payload."
@@ -73,7 +69,7 @@ export const REVIEW = {
73
69
  requiredGates: [
74
70
  { id: "review_layer1_spec_compliance", description: "Spec compliance check completed with per-criterion verdict." },
75
71
  { id: "review_layer2_security", description: "Security review completed." },
76
- { id: "review_layer_coverage_complete", description: "Layer coverage map in 07-review-army.json confirms spec/correctness/security/performance/architecture/external-safety passes." },
72
+ { id: "review_layer_coverage_complete", description: "Layer coverage map in 07-review-army.json confirms spec/correctness/security/performance/architecture/external-safety tags were considered." },
77
73
  { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." },
78
74
  { id: "review_army_json_valid", description: "07-review-army.json passes schema validation (validateReviewArmy)." },
79
75
  { id: "review_trace_matrix_clean", description: "Trace matrix has no orphaned criteria/tasks/test slices for the active run." }
@@ -116,11 +112,12 @@ export const REVIEW = {
116
112
  traceabilityRule: "Review verdict must reference specific spec criteria and TDD evidence. Downstream ship stage must reference review verdict."
117
113
  },
118
114
  artifactValidation: [
115
+ { section: "Upstream Handoff", required: false, validationRule: "Summarizes spec/plan/tdd decisions, constraints, open questions, and explicit drift before review verdicts." },
119
116
  { section: "Layer 1 Verdict", required: true, validationRule: "Per-criterion pass/fail with references." },
120
117
  { section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, description, and resolution status. Security coverage must include either explicit security findings or `NO_CHANGE_ATTESTATION: <reason>` when no security-relevant changes were found." },
121
- { section: "Review Army Contract", required: true, validationRule: "Structured findings include id/severity/confidence/fingerprint/reportedBy/status and source tags from {spec, correctness, security, performance, architecture, external-safety} with dedup reconciliation summary." },
122
- { section: "Review Readiness Dashboard", required: false, validationRule: "Includes a per-pass table (Layer 1 / Layer 2 / Adversarial / Schema) with a 'Completed at' column, a Delegation log snapshot block (path .cclaw/state/delegation-log.json with required/completed/waived/pending), a Staleness signal block (commit at last review pass and current commit), and a Headline with open critical blockers + ship recommendation. At minimum, the section text must contain the substrings 'Completed at', 'delegation-log.json', 'commit at last review pass', and 'Ship recommendation'." },
123
- { section: "Completeness Score", required: false, validationRule: "Records AC coverage, task coverage, test-slice coverage, and adversarial-review pass status as numeric or boolean values. At minimum, a line like 'AC coverage: N/M' or 'AC coverage: 100%'." },
118
+ { section: "Review Findings Contract", required: true, validationRule: "Structured findings in 07-review-army.json include id/severity/confidence/fingerprint/reportedBy/status and source tags from {spec, correctness, security, performance, architecture, external-safety} with dedup reconciliation summary." },
119
+ { section: "Review Readiness Snapshot", required: false, validationRule: "Optional compact summary: completed checks, delegation-log status, staleness signal, open critical blockers, and ship recommendation." },
120
+ { section: "Completeness Snapshot", required: false, validationRule: "Optional compact coverage summary for AC coverage, task coverage, test-slice coverage, and adversarial-review status when triggered." },
124
121
  { section: "Incoming Feedback Queue", required: false, validationRule: "When external review feedback exists, include a queue summary with per-item disposition (resolved / accepted-risk / rejected-with-evidence) and evidence refs." },
125
122
  { section: "Trace Matrix Check", required: false, validationRule: "Records criteria/tasks/tests orphan counts (all zero on enforced tracks) with command output reference." },
126
123
  { section: "Blocked Route", required: false, validationRule: "When Final Verdict is BLOCKED: includes `ROUTE_BACK_TO_TDD`, rewind target `tdd`, and blocked finding IDs." },
@@ -129,7 +126,7 @@ export const REVIEW = {
129
126
  ]
130
127
  },
131
128
  reviewLens: {
132
- outputs: ["review verdict", "severity-indexed findings", "reconciled review-army findings", "ship readiness decision"],
129
+ outputs: ["review verdict", "severity-indexed findings", "reconciled structured findings", "ship readiness decision"],
133
130
  reviewSections: [
134
131
  {
135
132
  title: "Layer 1: Spec Compliance",
@@ -142,7 +139,7 @@ export const REVIEW = {
142
139
  stopGate: true
143
140
  },
144
141
  {
145
- title: "Layer 2a: Correctness",
142
+ title: "Layer 2: Integrated Correctness / Performance / Architecture",
146
143
  evaluationPoints: [
147
144
  "Logic errors and boundary violations",
148
145
  "Race conditions and concurrency issues",
@@ -152,7 +149,7 @@ export const REVIEW = {
152
149
  stopGate: true
153
150
  },
154
151
  {
155
- title: "Layer 2b: Security",
152
+ title: "Security Sweep",
156
153
  evaluationPoints: [
157
154
  "Input validation completeness",
158
155
  "Authorization boundary enforcement",
@@ -162,7 +159,7 @@ export const REVIEW = {
162
159
  stopGate: true
163
160
  },
164
161
  {
165
- title: "Layer 2c: Performance",
162
+ title: "Specialist Lens: Performance",
166
163
  evaluationPoints: [
167
164
  "N+1 query patterns",
168
165
  "Memory leak potential",
@@ -172,7 +169,7 @@ export const REVIEW = {
172
169
  stopGate: true
173
170
  },
174
171
  {
175
- title: "Layer 2d: Architecture Fit",
172
+ title: "Specialist Lens: Architecture Fit",
176
173
  evaluationPoints: [
177
174
  "Does implementation match the locked design?",
178
175
  "Coupling and cohesion assessment",
@@ -182,7 +179,7 @@ export const REVIEW = {
182
179
  stopGate: true
183
180
  },
184
181
  {
185
- title: "Layer 2e: External Safety Checklist",
182
+ title: "Specialist Lens: External Safety Checklist",
186
183
  evaluationPoints: [
187
184
  "SQL/database: parameterized queries, no raw string interpolation, migration safety",
188
185
  "Concurrency: race conditions in shared state, lock ordering, timeout handling",
@@ -1,4 +1,5 @@
1
- import { REVIEW_LOOP_CHECKLISTS } from "../review-loop.js";
1
+ import { REVIEW_LOOP_CHECKLISTS, reviewLoopPolicySummary, reviewLoopSecondOpinionSummary } from "../review-loop.js";
2
+ import { decisionProtocolInstruction } from "../decision-protocol.js";
2
3
  // ---------------------------------------------------------------------------
3
4
  // SCOPE — reference: gstack CEO review
4
5
  // ---------------------------------------------------------------------------
@@ -44,51 +45,37 @@ export const SCOPE = {
44
45
  },
45
46
  executionModel: {
46
47
  checklist: [
47
- "**Pre-Scope System Audit (opt-in)** — when `.cclaw/config.yaml::optInAudits.scopePreAudit` is true, before premise challenge gather reality snapshot: recent commits (`git log -30 --oneline`), current diff (`git diff --stat`), stash state (`git stash list`), and deferred debt markers (`rg -n 'TODO|FIXME|XXX|HACK'`). Record findings in scope artifact.",
48
- "**Assess complexity** — Read the brainstorm artifact. If project is simple (single component, clear architecture, personal/prototype), run light-touch scope: mode selection, 3-5 key in/out boundaries, deferred items. Skip Dream State Mapping and Temporal Interrogation. If project is complex (multi-component, team delivery, production), run the full checklist.",
49
- "**Prime Directives** — Zero silent failures. For each in-scope capability, name concrete failure modes, the exact error surface, and trace all four data-flow paths (happy, nil, empty, upstream error). Include interaction edge cases (double-click, navigate-away, stale state), observability commitments, and explicit deferred-item logging.",
50
- "**Premise Challenge** — Is this the right problem? What if we do nothing? What are we optimizing for?",
51
- "**Landscape Check** — for EXPAND/SELECTIVE candidates, perform a brief external scan of comparable products/patterns to calibrate ambition and avoid local maxima.",
52
- "**Existing Code Leverage** — Search for existing solutions before deciding to build new.",
53
- "**Taste Calibration** — identify 2-3 high-quality files/modules in this codebase and explicitly align scope quality bar to them.",
54
- "**Dream State Mapping** — (complex projects only) describe the ideal state 12 months out using `CURRENT STATE -> THIS PLAN -> 12-MONTH IDEAL`, then verify this scope moves toward that target.",
55
- "**Implementation Alternatives** — Produce 2-3 distinct approaches. For each: Name, Summary, Effort (S/M/L/XL), Risk (Low/Med/High), 2-3 Pros, 2-3 Cons, and explicit Reuses. One option must be minimal viable, one must be ideal architecture.",
56
- "**Temporal Interrogation** — (complex projects only) simulate implementation timeline: HOUR 1 foundations, HOUR 2-3 core logic, HOUR 4-5 integration surprises, HOUR 6+ polish/tests. Decide what must be locked now vs safely deferred.",
57
- "**Mode Selection** — Present expand/selective/hold/reduce with recommendation and default heuristic: greenfield -> expand, feature enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius (>15 files or multi-team impact) -> reduce.",
58
- "**Mode-Specific Analysis** — After mode is selected, run the matching analysis: EXPAND (10x and delight opportunities), SELECTIVE (hold-scope rigor then cherry-picked expansions), HOLD (minimum-change-set hardening), REDUCE (ruthless cuts and follow-up split).",
59
- "**Plant-seed shelf (optional)** — when a deferred/out-of-scope idea still has upside, capture it as `.cclaw/seeds/SEED-<YYYY-MM-DD>-<slug>.md` with trigger_when and action instead of losing it in prose-only notes.",
60
- "**Outside Voice + Spec Review Loop** — run an adversarial second-opinion pass on the scope artifact, reconcile findings, and iterate up to 3 cycles or until quality score >= 0.8. When `.cclaw/config.yaml::reviewLoop.externalSecondOpinion.enabled` is true, run an additional external-model pass and explicitly resolve score/finding disagreements.",
61
- "**Error and Rescue Registry** — For each capability: what breaks, how detected, what fallback."
48
+ "**Default path first** — read brainstorm, challenge premise, recommend one mode, draft 3-5 key in/out boundaries plus deferred items, then seek approval.",
49
+ "**Optional audits by trigger** — run the pre-scope system audit only when configured; use deep-mode prime directives, dream-state mapping, and temporal interrogation only for complex/high-risk scope.",
50
+ "**Premise and leverage check** — test whether this is the right problem, what happens if nothing changes, and what existing code can be reused.",
51
+ "**Calibrate ambition** — for EXPAND/SELECTIVE candidates, do a brief landscape scan and align the quality bar to 2-3 strong in-repo modules.",
52
+ "**Compare implementation alternatives** — give 2-3 distinct options with effort, risk, pros/cons, and explicit reuse; include minimal viable and ideal architecture options.",
53
+ "**Select scope mode explicitly** — present expand/selective/hold/reduce with a recommendation and default heuristic justification.",
54
+ "**Run mode-specific analysis** — expand, selective, hold, or reduce according to the selected mode; do not silently add or trim scope.",
55
+ "**Handle deferred upside** — optionally park high-upside deferred/out-of-scope ideas in `.cclaw/seeds/`.",
56
+ `**Outside voice when warranted** — run/reconcile the loop for complex/high-risk or configured scope; otherwise do a concise adversarial self-check. ${reviewLoopPolicySummary("scope")} ${reviewLoopSecondOpinionSummary("scope")}`,
57
+ "**Write the scope contract** — include in-scope/out-of-scope, discretion areas, deferred items, locked decisions, error/rescue notes, completion dashboard, and explicit approval."
62
58
  ],
63
59
  interactionProtocol: [
64
- "For scope mode selection: use the Decision Protocol — present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that best covers the prime-directive failure modes, four data-flow paths, observability, and deferred handling for the in-scope set with the smallest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce. If the harness's native structured-ask tool is available (`AskUserQuestion` / `AskQuestion` / `question` / `request_user_input`), send exactly ONE question per call, validate fields against the runtime schema, and on schema error immediately fall back to a plain-text lettered list instead of retrying guessed payloads.",
65
- "Walk through the scope checklist interactively. Each checklist item that surfaces a decision should be presented to the user as a question, not as a monologue. Do not dump all items at once.",
66
- "Challenge premise and verify the problem framing before anything else.",
67
- "Take a position on every scope decision. Avoid hedging phrases like 'this could work' or 'there are many ways'; state your recommendation and one concrete condition that would change it.",
68
- "Use pushback patterns when framing is weak: vague scope -> force a specific user/problem, platform vision -> force a narrowest viable wedge, social proof -> demand behavioral evidence.",
69
- "Present one structural scope issue at a time for decision. Do NOT batch. Use structured options for each scope boundary question.",
70
- "Record explicit in-scope and out-of-scope contract.",
71
- "Once the user accepts or rejects a recommendation, commit fully. Do not re-argue.",
72
- "Before final scope approval, run an adversarial outside-voice review and reconcile every finding explicitly (accept/reject/defer with rationale).",
73
- "Bound review-loop retries: max 3 iterations or early stop at quality score >= 0.8.",
74
- "Produce a clean scope summary after all issues are resolved.",
75
- "**STOP.** Wait for explicit user approval of scope contract before advancing to design.",
76
- "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` must be marked completed or explicitly waived in `.cclaw/state/delegation-log.json`. Then close the stage via `node .cclaw/hooks/stage-complete.mjs scope` (do not hand-edit `.cclaw/state/flow-state.json`)."
60
+ decisionProtocolInstruction("scope mode selection", "present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended)", "recommend the option that best covers the prime-directive failure modes, four data-flow paths, observability, and deferred handling for the in-scope set with the smallest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce"),
61
+ "Do not walk the full checklist by default. Lead with the default scope contract; ask only when the answer changes in/out/deferred boundaries.",
62
+ "Challenge premise first, take a firm position, and name one concrete condition that would change it.",
63
+ "Push back on weak framing: vague scope needs a specific user/problem, platform vision needs a narrow wedge, social proof needs behavioral evidence.",
64
+ "Resolve one structural scope issue at a time; otherwise state the assumption and move on.",
65
+ "After acceptance/rejection, commit fully and do not re-argue.",
66
+ `Before final approval, reconcile outside-voice findings when the loop runs and bound retries with ${reviewLoopPolicySummary("scope")}`,
67
+ "**STOP.** Wait for explicit approval of the scope contract before advancing.",
68
+ "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` must be completed or explicitly waived, then close via `node .cclaw/hooks/stage-complete.mjs scope`."
77
69
  ],
78
70
  process: [
79
- "When `.cclaw/config.yaml::optInAudits.scopePreAudit` is true, run pre-scope system audit (git log/diff/stash/debt markers).",
80
- "Run premise challenge and existing-solution leverage check.",
81
- "When mode is EXPAND/SELECTIVE, run brief landscape check before final scope lock.",
82
- "Calibrate quality bar against 2-3 strong existing modules/files.",
83
- "Produce 2-3 scope alternatives in a structured format (Name, Summary, Effort, Risk, Pros, Cons, Reuses) with minimum viable and ideal architecture options included.",
84
- "Choose scope mode with user approval.",
85
- "Run mode-specific analysis that matches the selected scope mode.",
86
- "Optionally plant high-upside deferred ideas into `.cclaw/seeds/SEED-<YYYY-MM-DD>-<slug>.md` with trigger_when/action notes.",
87
- "Walk through scope review sections one at a time.",
88
- "Run outside-voice spec review loop (up to 3 iterations, quality score target >= 0.8). If configured, include external second opinion and reconcile deltas.",
89
- "Write explicit scope contract, discretion areas, and deferred items.",
90
- "Freeze non-negotiable boundaries as stable Locked Decisions (D-XX IDs).",
91
- "Produce scope summary plus completion dashboard (section status, critical gaps, resolved decisions, unresolved items or `None`)."
71
+ "Run configured pre-scope audit only when enabled.",
72
+ "Challenge premise, check existing-code leverage, and calibrate ambition/quality bar.",
73
+ "Compare structured scope alternatives with minimum viable and ideal architecture options.",
74
+ "Select scope mode with explicit user approval.",
75
+ "Run the selected mode analysis and park high-upside deferred ideas when useful.",
76
+ `Use outside-voice review only when complex/high-risk or configured; otherwise run a short adversarial self-check. If loop runs, enforce ${reviewLoopPolicySummary("scope")}`,
77
+ "Write explicit scope contract, discretion areas, deferred items, and D-XX locked decisions.",
78
+ "Produce scope summary and completion dashboard."
92
79
  ],
93
80
  requiredGates: [
94
81
  { id: "scope_mode_selected", description: "One scope mode was explicitly selected." },
@@ -104,8 +91,8 @@ export const SCOPE = {
104
91
  "Locked Decisions section lists stable D-XX IDs for non-negotiable boundaries.",
105
92
  "Premise challenge findings documented.",
106
93
  "Outside Voice findings and dispositions are recorded (accept/reject/defer with rationale).",
107
- "Spec review loop summary includes iteration count and quality score trajectory.",
108
- "When `.cclaw/config.yaml::reviewLoop.externalSecondOpinion.enabled` is true, external second-opinion disposition is captured.",
94
+ `Spec review loop summary includes iteration count and quality score trajectory per ${reviewLoopPolicySummary("scope")}`,
95
+ reviewLoopSecondOpinionSummary("scope"),
109
96
  "Deferred items list with one-line rationale for each.",
110
97
  "When an upside deferred idea is parked, a seed file is created under `.cclaw/seeds/` and referenced in the artifact.",
111
98
  "Completion dashboard lists per-section status, critical/open gaps, decision count, and unresolved items (or `None`)."
@@ -149,6 +136,7 @@ export const SCOPE = {
149
136
  traceabilityRule: "Every scope boundary must be traceable to a brainstorm decision. Every downstream design choice must stay within the scope contract."
150
137
  },
151
138
  artifactValidation: [
139
+ { section: "Upstream Handoff", required: false, validationRule: "Summarizes brainstorm/idea decisions, constraints, open questions, and explicit drift before scope decisions." },
152
140
  { section: "Pre-Scope System Audit", required: false, validationRule: "When `.cclaw/config.yaml::optInAudits.scopePreAudit` is true: must capture git log -30, git diff --stat, git stash list, and debt-marker scan (TODO/FIXME/XXX/HACK) before premise challenge." },
153
141
  { section: "Prime Directives", required: false, validationRule: "For each scoped capability: named failure modes, explicit error surface, four data-flow paths, interaction edge cases, observability expectations, and deferred-item handling." },
154
142
  { section: "Premise Challenge", required: false, validationRule: "Must contain explicit answers to: right problem? direct path? what if nothing?" },
@@ -158,13 +146,13 @@ export const SCOPE = {
158
146
  { section: "Locked Decisions (D-XX)", required: false, validationRule: "List of stable locked decisions with IDs D-01, D-02... Each ID appears once, includes rationale, and is intended for downstream cross-stage traceability." },
159
147
  { section: "Implementation Alternatives", required: false, validationRule: "2-3 options with Name, Summary, Effort, Risk, Pros, Cons, and Reuses. Must include minimal viable and ideal architecture options." },
160
148
  { section: "Scope Mode", required: true, validationRule: "Must state selected mode and rationale with default heuristic justification." },
161
- { section: "Mode-Specific Analysis", required: false, validationRule: "Must document the analysis matching the selected scope mode: EXPAND (10x and delight opportunities), SELECTIVE (hold-scope baseline then cherry-picked expansions), HOLD (minimum-change-set hardening), REDUCE (ruthless cuts and follow-up split)." },
149
+ { section: "Mode-Specific Analysis", required: false, validationRule: "Deep/complex scope only: document the analysis matching the selected mode. Default path may record a concise mode rationale instead." },
162
150
  { section: "In Scope / Out of Scope", required: true, validationRule: "Two separate explicit lists. Out-of-scope must not be empty." },
163
151
  { section: "Discretion Areas", required: false, validationRule: "Explicit list of implementer decision zones, or 'None' if scope is fully locked." },
164
152
  { section: "Deferred Items", required: false, validationRule: "Each item has one-line rationale. If empty, state 'None' explicitly." },
165
153
  { section: "Error & Rescue Registry", required: false, validationRule: "Each scoped capability has: failure mode, detection method, fallback decision." },
166
154
  { section: "Outside Voice Findings", required: false, validationRule: "Must list external/adversarial findings and disposition (accept/reject/defer) with rationale." },
167
- { section: "Spec Review Loop", required: false, validationRule: "Must record iterations (max 3), quality score per iteration, stop reason, and unresolved concerns." },
155
+ { section: "Spec Review Loop", required: false, validationRule: `Must record iterations, quality score per iteration, stop reason, and unresolved concerns. Enforce ${reviewLoopPolicySummary("scope")}` },
168
156
  { section: "Completion Dashboard", required: true, validationRule: "Lists per-review-section status, count of critical/open gaps, resolved decisions, and unresolved decisions (or 'None')." },
169
157
  { section: "Scope Summary", required: true, validationRule: "Clean summary: mode, strongest challenges, recommended path, accepted scope, deferred, excluded." },
170
158
  { section: "Dream State Mapping", required: false, validationRule: "If present (complex projects): CURRENT STATE, THIS PLAN, 12-MONTH IDEAL, and alignment verdict." },
@@ -1,3 +1,4 @@
1
+ import { decisionProtocolInstruction } from "../decision-protocol.js";
1
2
  // ---------------------------------------------------------------------------
2
3
  // SHIP — reference: superpowers finishing-a-development-branch + gstack /ship
3
4
  // ---------------------------------------------------------------------------
@@ -50,12 +51,12 @@ export const SHIP = {
50
51
  "Detect repository mode — if `.git/` is absent or inaccessible, lock finalization choices to FINALIZE_NO_VCS only and document manual handoff + rollback.",
51
52
  "Select finalization mode — exactly ONE enum: (A) FINALIZE_MERGE_LOCAL, (B) FINALIZE_OPEN_PR, (C) FINALIZE_KEEP_BRANCH, (D) FINALIZE_DISCARD_BRANCH, (E) FINALIZE_NO_VCS. For discard: list what will be deleted, require typed confirmation.",
52
53
  "Execute finalization — perform the selected action. For merge: verify clean merge. For PR: include structured body (summary, test plan, rollback). For discard: verify deletion. For NO_VCS: record handoff target, artifact bundle path, and manual rollback owner.",
53
- "Worktree cleanup — if using git worktrees, clean up the worktree after merge/discard. Keep it only for 'keep branch' mode. Skip for FINALIZE_NO_VCS."
54
+ "Branch cleanup — after merge/discard, remove only branches or temporary files the user explicitly approved. Skip for FINALIZE_NO_VCS."
54
55
  ],
55
56
  interactionProtocol: [
56
57
  "Run preflight checks before any release action.",
57
58
  "Document release notes and rollback plan explicitly.",
58
- "For finalization mode: use the Decision Protocol — present modes as labeled options (A/B/C/D/E) with consequences, and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the mode that best addresses release blast-radius, rollback readiness, observability, and stakeholder communication — ties go to the most reversible option. If the harness's native structured-ask tool is available (`AskUserQuestion` / `AskQuestion` / `question` / `request_user_input`), send exactly ONE question per call, validate fields against the runtime schema, and on schema error immediately fall back to a plain-text lettered list instead of retrying guessed payloads.",
59
+ decisionProtocolInstruction("finalization mode", "present modes as labeled options (A/B/C/D/E) with consequences, and mark one as (recommended)", "recommend the mode that best addresses release blast-radius, rollback readiness, observability, and stakeholder communication — ties go to the most reversible option"),
59
60
  "Do not proceed if critical blockers remain from review.",
60
61
  "**STOP.** Present finalization options and wait for user selection before executing any finalization action."
61
62
  ],
@@ -109,13 +110,14 @@ export const SHIP = {
109
110
  traceabilityRule: "Ship artifact must reference review verdict and resolution status. Release notes must reference spec criteria. Rollback plan must reference specific changes that could fail."
110
111
  },
111
112
  artifactValidation: [
113
+ { section: "Upstream Handoff", required: false, validationRule: "Summarizes review/tdd decisions, constraints, open questions, and explicit drift before finalization." },
112
114
  { section: "Preflight Results", required: true, validationRule: "Build, test, lint, type-check results captured with fresh output. Exceptions documented if any." },
113
115
  { section: "Release Notes", required: true, validationRule: "What changed, why, impact. References spec criteria. Breaking changes flagged." },
114
116
  { section: "Rollback Plan", required: true, validationRule: "Trigger conditions, rollback steps (exact commands), verification steps." },
115
117
  { section: "Monitoring", required: false, validationRule: "If applicable: what metrics/logs to watch post-deploy. Risk note if no monitoring." },
116
118
  { section: "Finalization", required: true, validationRule: "Exactly one finalization enum token selected (FINALIZE_MERGE_LOCAL | FINALIZE_OPEN_PR | FINALIZE_KEEP_BRANCH | FINALIZE_DISCARD_BRANCH | FINALIZE_NO_VCS). Execution result documented. Worktree cleaned if applicable." },
117
119
  { section: "Completion Status", required: false, validationRule: "If present: exactly one of SHIPPED, SHIPPED_WITH_EXCEPTIONS, BLOCKED. Exceptions documented when applicable." },
118
- { section: "Compound Step", required: false, validationRule: "Optional retrospective: at least one bullet of the form 'Insight: ... | Action: append [compound] entry to .cclaw/knowledge.jsonl', or an explicit 'No compound insight this run.' line." }
120
+ { section: "Compound Step", required: false, validationRule: "Optional retrospective: include overlap assessment before appending duplicate knowledge; distinguish bug-track fixes/tests from knowledge-track process/project guidance; use supersedes/superseded_by only for clear refreshes; or include an explicit 'No compound insight this run.' line." }
119
121
  ]
120
122
  },
121
123
  reviewLens: {
@@ -145,6 +147,7 @@ export const SHIP = {
145
147
  ]
146
148
  },
147
149
  // `done` exits the stage pipeline. Archive semantics are handled by the
148
- // closeout substate machine (`idle` -> ... -> `archived`) in flow-state.
150
+ // closeout substate machine (`idle` -> ... -> `archived`) in flow-state under
151
+ // ${closeoutSubstateInline()}.
149
152
  next: "done",
150
153
  };
@@ -30,6 +30,7 @@ export const SPEC = {
30
30
  "No explicit assumptions section",
31
31
  "No approval record",
32
32
  "No testability mapping",
33
+ "Assumptions not surfaced before sign-off",
33
34
  "Edge cases missing or deferred"
34
35
  ]
35
36
  },
@@ -39,7 +40,8 @@ export const SPEC = {
39
40
  "Define measurable acceptance criteria — each criterion must be observable and falsifiable. No vague adjectives.",
40
41
  "Capture edge cases — for each criterion, define at least one boundary condition and one error condition.",
41
42
  "Document constraints and assumptions — regulatory, system, integration, and performance boundaries. Surface implicit assumptions explicitly.",
42
- "Confirm testabilityfor each acceptance criterion, describe the test that would prove it. If untestable, rewrite the criterion.",
43
+ "Surface assumptions before finalization list each assumption with source/confidence, validation path, and whether it is accepted, rejected, or still open.",
44
+ "Build the Acceptance Mapping contract — for each AC, map upstream design decision, observable evidence, verification method, and likely test level. If any column is unclear, rewrite the criterion.",
43
45
  "Present acceptance criteria to the user in 3-5-item batches, pausing for explicit ACK between batches (see Interaction Protocol).",
44
46
  "Write spec artifact and request user approval — wait for explicit confirmation before proceeding."
45
47
  ],
@@ -47,28 +49,32 @@ export const SPEC = {
47
49
  "Express each requirement in observable terms.",
48
50
  "Resolve ambiguity before moving to plan. Challenge vague language.",
49
51
  "Capture assumptions explicitly, not implicitly.",
52
+ "Before final spec approval, present the assumptions section as its own checkpoint so the user can accept, revise, or mark an assumption unknown.",
50
53
  "**Chunk acceptance criteria for review.** When presenting the spec to the user for sign-off, deliver acceptance criteria in batches of 3-5 and **pause for explicit ACK** (via Decision Protocol) before sending the next batch. Do not dump the full criteria wall in one message — small batches surface objections earlier and keep the sign-off meaningful. Full spec writeup still lands in `04-spec.md`, but the conversation itself must be digestible.",
51
54
  "Require user confirmation on the written spec. **STOP.** Do NOT proceed to plan until user approves.",
52
- "For each criterion, ask: how would you test this? If the answer is unclear, rewrite.",
55
+ "For each criterion, ask: what exact evidence proves this passed? If the evidence or verification command/manual step is vague, rewrite.",
53
56
  "When encountering ambiguity, classify it before acting: (A) ask user for missing info, (B) enumerate interpretations and pick one with justification, (C) propose hypothesis with validation path. Do NOT silently resolve ambiguity."
54
57
  ],
55
58
  process: [
56
59
  "Define measurable acceptance criteria.",
57
60
  "Capture constraints, assumptions, and edge cases.",
58
- "Build testability map: criterion -> test description.",
59
- "Confirm testability for each criterion.",
61
+ "Review assumptions before finalization: source/confidence, validation path, and accepted/rejected/open disposition.",
62
+ "Build Acceptance Mapping: AC -> design decision -> observable evidence -> verification method -> likely test level.",
63
+ "Confirm every verification method is concrete enough for plan/TDD to use later.",
60
64
  "Present acceptance criteria to the user in 3-5-item batches, pausing for explicit ACK between batches (see Interaction Protocol).",
61
65
  "Write spec artifact and request approval."
62
66
  ],
63
67
  requiredGates: [
64
68
  { id: "spec_acceptance_measurable", description: "Acceptance criteria are measurable and observable." },
65
69
  { id: "spec_testability_confirmed", description: "Each criterion has a described test method." },
70
+ { id: "spec_assumptions_surfaced", description: "Assumptions were explicitly reviewed with source/confidence, validation path, and disposition before approval." },
66
71
  { id: "spec_user_approved", description: "User approved the final written spec." }
67
72
  ],
68
73
  requiredEvidence: [
69
74
  "Artifact written to `.cclaw/artifacts/04-spec.md`.",
70
- "Each acceptance criterion maps to a testable outcome.",
75
+ "Each acceptance criterion maps to upstream design decision, observable evidence, verification method, and likely test level.",
71
76
  "Edge cases documented per criterion.",
77
+ "Assumptions Before Finalization section records source/confidence, validation path, and accepted/rejected/open disposition.",
72
78
  "Approval marker captured in artifact."
73
79
  ],
74
80
  inputs: ["design artifact", "business constraints", "quality requirements"],
@@ -80,12 +86,14 @@ export const SPEC = {
80
86
  blockers: [
81
87
  "non-measurable criteria",
82
88
  "constraints missing",
89
+ "assumptions not surfaced before approval",
83
90
  "open ambiguities remain"
84
91
  ],
85
92
  exitCriteria: [
86
93
  "spec approved by user",
87
94
  "required gates marked satisfied",
88
95
  "plan-ready acceptance mapping exists",
96
+ "assumptions reviewed before finalization",
89
97
  "testability confirmed for all criteria"
90
98
  ],
91
99
  platformNotes: [
@@ -103,10 +111,12 @@ export const SPEC = {
103
111
  traceabilityRule: "Every acceptance criterion must trace to a design decision. Every downstream plan task must trace to a spec criterion."
104
112
  },
105
113
  artifactValidation: [
114
+ { section: "Upstream Handoff", required: false, validationRule: "Summarizes scope/design decisions, constraints, open questions, and explicit drift before acceptance criteria." },
106
115
  { section: "Acceptance Criteria", required: true, validationRule: "Each criterion is observable, measurable, and falsifiable. Table must include a Requirement Ref column linking to R# IDs in 02-scope-<slug>.md (legacy 02-scope.md is accepted during migration) and a Design Decision Ref column tracing back to design artifact. AC IDs (AC-1, AC-2…) are stable across revisions — dropped ACs stay with Priority `DROPPED`." },
107
116
  { section: "Edge Cases", required: true, validationRule: "At least one boundary and one error condition per criterion." },
108
117
  { section: "Constraints and Assumptions", required: false, validationRule: "All implicit assumptions surfaced. Constraints have sources." },
109
- { section: "Testability Map", required: true, validationRule: "Each criterion maps to a concrete test description with verification approach (unit, integration, e2e, manual) and command or manual steps." },
118
+ { section: "Assumptions Before Finalization", required: true, validationRule: "Each assumption has source/confidence, validation path, and accepted/rejected/open disposition before the Approval section is finalized." },
119
+ { section: "Acceptance Mapping", required: true, validationRule: "Each criterion maps to upstream design decision, observable evidence, verification method, likely test level (unit/integration/e2e/manual), and command or manual steps when known." },
110
120
  { section: "Vague to Fixed", required: false, validationRule: "If present: table with original vague wording and rewritten observable/testable version for each ambiguous requirement." },
111
121
  { section: "Non-Functional Requirements", required: false, validationRule: "If present: performance thresholds, security constraints, scalability limits, reliability targets with measurable values." },
112
122
  { section: "Interface Contracts", required: false, validationRule: "If present: for each module boundary list produces (outputs) and consumes (inputs) with data types." },
@@ -134,11 +144,12 @@ export const SPEC = {
134
144
  {
135
145
  title: "Testability Audit",
136
146
  evaluationPoints: [
137
- "Does every criterion have a concrete test description in the Testability Map?",
147
+ "Does every criterion have a concrete row in Acceptance Mapping?",
138
148
  "Does every test specify a verification approach (unit, integration, e2e, manual)?",
139
- "Does every test include a runnable command or manual steps?",
149
+ "Does every verification method include a runnable command or concrete manual steps when known?",
150
+ "Were assumptions surfaced before finalization with source/confidence, validation path, and disposition?",
140
151
  "Are edge cases (boundary + error) defined for every criterion?",
141
- "Can you run every verification command right now and get a meaningful result?"
152
+ "Are commands specific enough to run later (not vague `run tests` wording)?"
142
153
  ],
143
154
  stopGate: true
144
155
  }