npm - cclaw-cli - Versions diffs - 0.49.0 → 0.51.0 - Mend

cclaw-cli 0.49.0 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

package/README.md +54 -82
package/dist/artifact-linter.d.ts +4 -0
package/dist/artifact-linter.js +24 -3
package/dist/cli.d.ts +1 -19
package/dist/cli.js +49 -491
package/dist/constants.d.ts +2 -13
package/dist/constants.js +1 -43
package/dist/content/closeout-guidance.d.ts +14 -0
package/dist/content/closeout-guidance.js +42 -0
package/dist/content/core-agents.js +51 -9
package/dist/content/decision-protocol.d.ts +12 -0
package/dist/content/decision-protocol.js +20 -0
package/dist/content/diff-command.d.ts +1 -2
package/dist/content/diff-command.js +8 -94
package/dist/content/examples.d.ts +4 -10
package/dist/content/examples.js +10 -20
package/dist/content/hook-events.js +2 -2
package/dist/content/hook-inline-snippets.d.ts +5 -2
package/dist/content/hook-inline-snippets.js +33 -1
package/dist/content/hook-manifest.d.ts +3 -4
package/dist/content/hook-manifest.js +11 -12
package/dist/content/hooks.js +2 -0
package/dist/content/ideate-command.d.ts +2 -0
package/dist/content/ideate-command.js +31 -25
package/dist/content/iron-laws.d.ts +5 -5
package/dist/content/iron-laws.js +5 -5
package/dist/content/learnings.d.ts +3 -4
package/dist/content/learnings.js +24 -50
package/dist/content/meta-skill.js +31 -21
package/dist/content/next-command.js +38 -38
package/dist/content/node-hooks.js +17 -343
package/dist/content/opencode-plugin.js +2 -100
package/dist/content/research-playbooks.js +14 -14
package/dist/content/review-loop.d.ts +2 -0
package/dist/content/review-loop.js +8 -0
package/dist/content/session-hooks.js +14 -46
package/dist/content/skills.d.ts +0 -5
package/dist/content/skills.js +53 -128
package/dist/content/stage-common-guidance.d.ts +0 -1
package/dist/content/stage-common-guidance.js +15 -14
package/dist/content/stage-schema.d.ts +26 -1
package/dist/content/stage-schema.js +121 -40
package/dist/content/stages/_lint-metadata/index.js +9 -15
package/dist/content/stages/brainstorm.js +22 -43
package/dist/content/stages/design.js +37 -57
package/dist/content/stages/plan.js +22 -13
package/dist/content/stages/review.js +24 -27
package/dist/content/stages/scope.js +34 -46
package/dist/content/stages/ship.js +7 -4
package/dist/content/stages/spec.js +20 -9
package/dist/content/stages/tdd.js +64 -44
package/dist/content/start-command.js +10 -12
package/dist/content/status-command.d.ts +2 -7
package/dist/content/status-command.js +19 -146
package/dist/content/subagents.d.ts +0 -5
package/dist/content/subagents.js +47 -28
package/dist/content/templates.d.ts +1 -1
package/dist/content/templates.js +126 -135
package/dist/content/track-render-context.d.ts +17 -0
package/dist/content/track-render-context.js +44 -0
package/dist/content/tree-command.d.ts +1 -2
package/dist/content/tree-command.js +4 -87
package/dist/content/utility-skills.d.ts +2 -29
package/dist/content/utility-skills.js +2 -1534
package/dist/content/view-command.js +29 -11
package/dist/delegation.d.ts +1 -1
package/dist/delegation.js +5 -15
package/dist/doctor-registry.js +20 -21
package/dist/doctor.js +88 -344
package/dist/flow-state.d.ts +3 -0
package/dist/flow-state.js +2 -0
package/dist/harness-adapters.d.ts +1 -1
package/dist/harness-adapters.js +48 -57
package/dist/install.js +128 -358
package/dist/internal/advance-stage.js +3 -9
package/dist/internal/compound-readiness.d.ts +1 -1
package/dist/internal/compound-readiness.js +1 -1
package/dist/internal/tdd-loop-status.d.ts +1 -1
package/dist/internal/tdd-loop-status.js +1 -1
package/dist/knowledge-store.d.ts +16 -10
package/dist/knowledge-store.js +51 -15
package/dist/policy.js +16 -105
package/dist/run-archive.d.ts +4 -6
package/dist/run-archive.js +15 -20
package/dist/run-persistence.d.ts +2 -2
package/dist/run-persistence.js +3 -9
package/package.json +1 -2
package/dist/content/archive-command.d.ts +0 -2
package/dist/content/archive-command.js +0 -124
package/dist/content/compound-command.d.ts +0 -5
package/dist/content/compound-command.js +0 -193
package/dist/content/contexts.d.ts +0 -18
package/dist/content/contexts.js +0 -24
package/dist/content/contracts.d.ts +0 -2
package/dist/content/contracts.js +0 -51
package/dist/content/doctor-references.d.ts +0 -2
package/dist/content/doctor-references.js +0 -150
package/dist/content/eval-scaffold.d.ts +0 -15
package/dist/content/eval-scaffold.js +0 -370
package/dist/content/feature-command.d.ts +0 -2
package/dist/content/feature-command.js +0 -123
package/dist/content/flow-map.d.ts +0 -23
package/dist/content/flow-map.js +0 -134
package/dist/content/harness-doc.d.ts +0 -2
package/dist/content/harness-doc.js +0 -202
package/dist/content/harness-playbooks.d.ts +0 -24
package/dist/content/harness-playbooks.js +0 -393
package/dist/content/harness-tool-refs.d.ts +0 -20
package/dist/content/harness-tool-refs.js +0 -268
package/dist/content/ops-command.d.ts +0 -2
package/dist/content/ops-command.js +0 -71
package/dist/content/protocols.d.ts +0 -7
package/dist/content/protocols.js +0 -215
package/dist/content/retro-command.d.ts +0 -2
package/dist/content/retro-command.js +0 -165
package/dist/content/rewind-command.d.ts +0 -2
package/dist/content/rewind-command.js +0 -106
package/dist/content/tdd-log-command.d.ts +0 -2
package/dist/content/tdd-log-command.js +0 -85
package/dist/eval/agents/single-shot.d.ts +0 -27
package/dist/eval/agents/single-shot.js +0 -79
package/dist/eval/agents/with-tools.d.ts +0 -44
package/dist/eval/agents/with-tools.js +0 -261
package/dist/eval/agents/workflow.d.ts +0 -31
package/dist/eval/agents/workflow.js +0 -155
package/dist/eval/baseline.d.ts +0 -38
package/dist/eval/baseline.js +0 -282
package/dist/eval/config-loader.d.ts +0 -14
package/dist/eval/config-loader.js +0 -395
package/dist/eval/corpus.d.ts +0 -30
package/dist/eval/corpus.js +0 -330
package/dist/eval/cost-guard.d.ts +0 -102
package/dist/eval/cost-guard.js +0 -190
package/dist/eval/diff.d.ts +0 -64
package/dist/eval/diff.js +0 -323
package/dist/eval/llm-client.d.ts +0 -176
package/dist/eval/llm-client.js +0 -267
package/dist/eval/mode.d.ts +0 -28
package/dist/eval/mode.js +0 -61
package/dist/eval/progress.d.ts +0 -83
package/dist/eval/progress.js +0 -59
package/dist/eval/report.d.ts +0 -11
package/dist/eval/report.js +0 -181
package/dist/eval/rubric-loader.d.ts +0 -20
package/dist/eval/rubric-loader.js +0 -143
package/dist/eval/runner.d.ts +0 -81
package/dist/eval/runner.js +0 -746
package/dist/eval/runs.d.ts +0 -41
package/dist/eval/runs.js +0 -114
package/dist/eval/sandbox.d.ts +0 -38
package/dist/eval/sandbox.js +0 -137
package/dist/eval/tools/glob.d.ts +0 -2
package/dist/eval/tools/glob.js +0 -163
package/dist/eval/tools/grep.d.ts +0 -2
package/dist/eval/tools/grep.js +0 -152
package/dist/eval/tools/index.d.ts +0 -7
package/dist/eval/tools/index.js +0 -35
package/dist/eval/tools/read.d.ts +0 -2
package/dist/eval/tools/read.js +0 -122
package/dist/eval/tools/types.d.ts +0 -49
package/dist/eval/tools/types.js +0 -41
package/dist/eval/tools/write.d.ts +0 -2
package/dist/eval/tools/write.js +0 -92
package/dist/eval/types.d.ts +0 -561
package/dist/eval/types.js +0 -47
package/dist/eval/verifiers/judge.d.ts +0 -40
package/dist/eval/verifiers/judge.js +0 -256
package/dist/eval/verifiers/rules.d.ts +0 -24
package/dist/eval/verifiers/rules.js +0 -218
package/dist/eval/verifiers/structural.d.ts +0 -14
package/dist/eval/verifiers/structural.js +0 -171
package/dist/eval/verifiers/traceability.d.ts +0 -23
package/dist/eval/verifiers/traceability.js +0 -84
package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
package/dist/eval/verifiers/workflow-consistency.js +0 -225
package/dist/eval/workflow-corpus.d.ts +0 -7
package/dist/eval/workflow-corpus.js +0 -207
package/dist/feature-system.d.ts +0 -42
package/dist/feature-system.js +0 -432
package/dist/internal/knowledge-digest.d.ts +0 -7
package/dist/internal/knowledge-digest.js +0 -93

package/dist/content/stages/plan.js CHANGED Viewed

@@ -31,6 +31,7 @@ export const PLAN = {
             "No dependency graph",
             "No WAIT_FOR_CONFIRM marker",
             "No explicit dependency batches",
+            "No execution posture for sequencing, risk, and checkpoint cadence",
             "Tasks exceed one coherent outcome",
             "No acceptance mapping",
             "Locked decisions are missing or not mapped",
@@ -43,18 +44,20 @@ export const PLAN = {
             "Build dependency graph — identify task ordering, parallel opportunities, and blocking dependencies.",
             "Group tasks into dependency batches — batch N+1 cannot start until batch N has verification evidence.",
             "Slice into vertical tasks — each task targets 2-5 minutes, produces one testable outcome, and touches one coherent area.",
-            "Attach verification — every task has an acceptance criterion mapping and a concrete verification command.",
-            "Annotate slice-review metadata — if `.cclaw/config.yaml::sliceReview.enabled` is true, every task row additionally carries `touchCount` (rough number of files expected to change) and `touchPaths` (glob hints, e.g. `migrations/**`, `src/auth/**`). A task may set `highRisk: true` to force a review pass regardless of thresholds. These fields feed the TDD stage's Per-Slice Review checkpoint; when `sliceReview` is disabled they are optional.",
+            "Task Contract — every task has one coherent outcome, AC mapping, exact verification command/manual step, and expected evidence snippet or pass condition. Avoid vague `run tests` wording.",
+            "Annotate slice-review metadata — if `.cclaw/config.yaml::sliceReview.enabled` is true, every task row additionally carries `touchCount` (rough number of files expected to change) and `touchPaths` (glob hints, e.g. `migrations/**`, `src/auth/**`). A task may set `highRisk: true` to force a review pass regardless of thresholds. These fields feed the TDD stage's Per-Slice Review point; when `sliceReview` is disabled they are optional.",
             "Map scope Locked Decisions — every D-XX from scope is referenced by at least one plan task (or explicitly marked deferred with reason).",
             "Run anti-placeholder + anti-scope-reduction scans — block `TODO/TBD/...` and phrasing like `v1`, `for now`, `later` for locked boundaries.",
-            "Define checkpoints — mark points where progress should be validated before continuing.",
+            "Define validation points — mark where progress must be checked before continuing, with concrete command and expected evidence.",
+            "Define execution posture — record whether execution should be sequential, dependency-batched, parallel-safe, or blocked; include risk triggers and RED/GREEN/REFACTOR checkpoint/commit expectations when the repo workflow supports them.",
             "WAIT_FOR_CONFIRM — write plan artifact and explicitly pause. **STOP.** Do NOT proceed until user confirms. Then close the stage with `node .cclaw/hooks/stage-complete.mjs plan` and tell user to run `/cc-next`."
         ],
         interactionProtocol: [
             "Plan in read-only mode relative to implementation.",
             "Split work into small vertical slices (target 2-5 minute tasks).",
             "Publish explicit dependency batches with entry and exit checks for each batch.",
-            "Attach verification step to every task.",
+            "Expose execution posture: sequential vs batch/parallel, stop conditions, and checkpoint cadence for the TDD handoff.",
+            "Attach exact verification command/manual step and expected evidence to every task.",
             "Preserve locked scope boundaries: no silent scope reduction language in task rows.",
             "Enforce WAIT_FOR_CONFIRM: present the plan summary with options (A) Approve / (B) Revise / (C) Reject.",
             "**STOP.** Do NOT proceed until user explicitly approves.",
@@ -63,23 +66,24 @@ export const PLAN = {
         process: [
             "Build dependency graph and ordered slices.",
             "Group slices into execution batches and define gate criteria per batch.",
-            "Define each task with acceptance mapping and verification commands.",
+            "Define each task with acceptance mapping, verification command/manual step, and expected evidence/pass condition.",
             "Trace every locked decision (D-XX) to plan tasks or explicit defer rationale.",
-            "Record checkpoints and blockers.",
+            "Record validation points, blockers, and execution posture.",
             "Write plan artifact and pause at WAIT_FOR_CONFIRM."
         ],
         requiredGates: [
             { id: "plan_tasks_sliced_2_5_min", description: "Tasks are small, executable slices." },
-            { id: "plan_dependency_batches_defined", description: "Tasks are grouped into executable batches with gate checks." },
+            { id: "plan_dependency_batches_defined", description: "Tasks are grouped into executable batches with gate checks and execution posture." },
             { id: "plan_acceptance_mapped", description: "Each task maps to a spec acceptance criterion." },
             { id: "plan_wait_for_confirm", description: "Execution blocked until explicit user confirmation." }
         ],
         requiredEvidence: [
             "Artifact written to `.cclaw/artifacts/05-plan.md`.",
-            "Task list includes acceptance mapping.",
+            "Task list includes acceptance mapping, exact verification command/manual step, and expected evidence/pass condition.",
             "Locked decision coverage table present with D-XX trace links.",
             "Dependency graph documented.",
             "Dependency batches documented with batch-by-batch verification gates.",
+            "Execution posture documented with sequencing, stop conditions, and TDD checkpoint expectations.",
             "WAIT_FOR_CONFIRM status recorded."
         ],
         inputs: ["approved spec", "codebase context", "delivery constraints"],
@@ -92,6 +96,7 @@ export const PLAN = {
             "tasks too broad",
             "dependency uncertainty unresolved",
             "batch boundaries are unclear",
+            "execution posture is missing or contradicts dependency batches",
             "locked decisions from scope are not mapped to tasks",
             "no explicit confirmation"
         ],
@@ -99,6 +104,7 @@ export const PLAN = {
             "plan quality gates complete",
             "WAIT_FOR_CONFIRM present and unresolved until user approves",
             "artifact ready for TDD execution",
+            "execution posture ready for TDD handoff",
             "acceptance mapping complete"
         ],
         platformNotes: [
@@ -116,10 +122,12 @@ export const PLAN = {
             traceabilityRule: "Every task must trace to a spec acceptance criterion. Every locked scope decision (D-XX) must trace to at least one plan task or explicit defer rationale. Every downstream RED test must trace to a plan task."
         },
         artifactValidation: [
+            { section: "Upstream Handoff", required: false, validationRule: "Summarizes spec/design/scope decisions, constraints, open questions, and explicit drift before task breakdown." },
             { section: "Dependency Graph", required: false, validationRule: "Ordering and parallel opportunities explicit. No circular dependencies." },
             { section: "Dependency Batches", required: true, validationRule: "Every task belongs to a batch. Each batch has an exit gate and dependency statement." },
-            { section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, verification command, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget. When the sliceReview feature is enabled in the cclaw config, each task row additionally declares touchCount, touchPaths, and an optional highRisk flag so the TDD stage can decide whether a Per-Slice Review pass is required." },
+            { section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, exact verification command/manual step, expected evidence/pass condition, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget. When the sliceReview option is enabled in the cclaw config, each task row additionally declares touchCount, touchPaths, and an optional highRisk flag so the TDD stage can decide whether a Per-Slice Review pass is required." },
             { section: "Acceptance Mapping", required: true, validationRule: "Every spec criterion is covered by at least one task." },
+            { section: "Execution Posture", required: true, validationRule: "States sequential/batch/parallel posture, stop conditions, risk triggers, and RED/GREEN/REFACTOR checkpoint or commit expectations for TDD when consistent with the repo workflow." },
             { section: "Locked Decision Coverage", required: false, validationRule: "Every locked decision ID (D-XX) from scope is listed with linked task IDs or explicit defer rationale." },
             { section: "Risk Assessment", required: false, validationRule: "If present: per-task or per-batch risk identification with likelihood, impact, and mitigation strategy." },
             { section: "Boundary Map", required: false, validationRule: "If present: per-batch or per-task interface contracts listing what each task produces (exports) and consumes (imports) from other tasks." },
@@ -129,14 +137,14 @@ export const PLAN = {
         ]
     },
     reviewLens: {
-        outputs: ["task graph", "dependency batch plan", "ordered plan", "explicit confirmation checkpoint"],
+        outputs: ["task graph", "dependency batch plan", "ordered plan", "explicit confirmation gate"],
         reviewSections: [
             {
                 title: "Task Decomposition Audit",
                 evaluationPoints: [
                     "Does every task target a single coherent area (vertical slice)?",
                     "Can each task be completed in 2-5 minutes?",
-                    "Does every task have an acceptance criterion link and verification command?",
+                    "Does every task have an acceptance criterion link, exact verification command/manual step, and expected evidence/pass condition?",
                     "Are there tasks that touch multiple unrelated areas?",
                     "Would a new engineer understand and start each task within two minutes?"
                 ],
@@ -149,7 +157,8 @@ export const PLAN = {
                     "Does each batch have a verification gate?",
                     "Are batch dependencies explicit and acyclic?",
                     "Is the acceptance mapping complete — every spec criterion covered?",
-                    "Are there hidden dependencies between tasks in different batches?"
+                    "Are there hidden dependencies between tasks in different batches?",
+                    "Does the Execution Posture match the dependency graph and stop risky parallelism?"
                 ],
                 stopGate: true
             },
@@ -157,7 +166,7 @@ export const PLAN = {
                 title: "Five-Minute Budget + No-Placeholders Audit",
                 evaluationPoints: [
                     "Does every task carry an explicit minutes estimate (e.g. `[~3m]`) and does every estimate fit the 2-to-5-minute budget? Estimates >5 minutes must be split.",
-                    "Are all file paths, test commands, and verification commands copy-pasteable as written — no `TODO`, `TBD`, `FIXME`, `<fill-in>`, `<your-*-here>`, `xxx`, or ellipsis standing in for omitted args?",
+                    "Are all file paths, test commands, verification commands, and expected evidence copy-pasteable/specific as written — no `TODO`, `TBD`, `FIXME`, `<fill-in>`, `<your-*-here>`, `xxx`, bare `run tests`, or ellipsis standing in for omitted args?",
                     "Does every acceptance-criterion reference resolve to a real R# / AC-### in the spec (not a blank link)?",
                     "If an estimate is genuinely uncertain (first-time integration, unfamiliar library), is the uncertainty named explicitly and scheduled as a spike task in batch 0, rather than hidden behind a large estimate?"
                 ],

package/dist/content/stages/review.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { decisionProtocolInstruction, STRUCTURED_ASK_TOOL_LIST_REVIEW, structuredAskSingleChoiceInstruction } from "../decision-protocol.js";
 // ---------------------------------------------------------------------------
 // REVIEW — reference: superpowers code-review + gstack /review
 // ---------------------------------------------------------------------------
@@ -33,39 +34,34 @@ export const REVIEW = {
         checklist: [
             "Diff Scope — Run `git diff` against base branch. If no diff, exit early with APPROVED (no changes to review). Scope the review to changed files unless blast-radius analysis requires wider inspection.",
             "Change-Size Check — ~100 lines = normal. ~300 lines = consider splitting. ~1000+ lines = strongly recommend stacked PRs. Flag large diffs to the user.",
-            "Adversarial Trigger Check — compute changed-line count (`git diff --shortstat <base>..HEAD`), files-touched count, and whether trust boundaries changed (auth/secrets/external inputs/permissions). If `lines > 100` OR `files > 10` OR `trust boundary changed`, **dispatch a SECOND reviewer agent with the `adversarial-review` skill loaded** and reconcile its findings into the review army (treat the conditional dispatch as mandatory whenever the trigger holds; record the trigger that fired in the dashboard).",
+            "Risk-Based Second Opinion — compute changed-line count, files-touched count, and trust-boundary movement. Dispatch an adversarial reviewer only when trust boundaries changed, Critical/Important ambiguity remains, or the diff is both large and high-risk; otherwise record `not triggered`.",
             "Load upstream evidence — read TDD artifact (RED + GREEN + REFACTOR), spec, and plan. Verify evidence chain is unbroken.",
             "Run traceability matrix — execute `cclaw internal trace-matrix` (or equivalent helper) and confirm there are no orphaned criteria/tasks/tests before declaring ship readiness.",
             "Layer 1: Spec Compliance — check every acceptance criterion against implementation. Verdict: pass/fail per criterion.",
-            "Layer 2a: Correctness — logic errors, race conditions, boundary violations, null handling.",
-            "Layer 2b: Security — input validation, auth boundaries, secrets exposure, injection vectors. **Mandatory:** also load and execute the `.cclaw/skills/security-audit/SKILL.md` utility skill (proactive pattern sweep across diff + touched modules, not just the diff itself) and merge findings into the review army. The Layer 2 security pass is not complete until the audit sweep records a finding count (0 acceptable) with file:line evidence for every Critical.",
-            "Layer 2c: Performance — N+1 queries, memory leaks, missing caching, hot paths.",
-            "Layer 2d: Architecture Fit — does the implementation match the locked design? Coupling, cohesion, interface contracts.",
-            "Layer 2e: External Safety — SQL safety, concurrency, secrets in logs, enum completeness (grep outside diff), LLM trust boundaries.",
-            "Incoming Feedback Intake — when human reviewer comments, bot findings, or CI annotations exist, run `.cclaw/skills/receiving-code-review/SKILL.md`, keep a per-comment disposition queue, and mirror outcomes into `07-review.md` + `07-review-army.json` before final verdict.",
-            "Review Army reconciliation — normalize findings into structured records, dedup by fingerprint, and mark multi-specialist confirmations.",
+            "Layer 2: Integrated findings — one structured pass tagged by category: correctness, security, performance, architecture, external-safety.",
+            "Security sweep — mandatory dedicated security-reviewer pass across diff + touched modules. A zero-finding pass must include `NO_CHANGE_ATTESTATION` with rationale.",
+            "Incoming Feedback Intake — when human reviewer comments, bot findings, or CI annotations exist, keep a per-comment disposition queue and mirror outcomes into `07-review.md` + `07-review-army.json` before final verdict.",
+            "Structured Review reconciliation — normalize findings into `07-review-army.json`, dedup by fingerprint, and mark multi-specialist confirmations when multiple lenses agree.",
             "Meta-Review — Were tests actually run? Do test names match what they test? Are there real assertions?",
             "Classify findings — Critical (blocks ship), Important (should fix), Suggestion (optional improvement).",
             "Produce verdict — APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.",
-            "If verdict is BLOCKED, emit remediation route token `ROUTE_BACK_TO_TDD` and include `/cc-ops rewind tdd \"review_blocked_by_critical\"` with the blocking finding IDs."
+            "If verdict is BLOCKED, emit remediation route token `ROUTE_BACK_TO_TDD` and include `cclaw internal rewind tdd \"review_blocked_by_critical\"` with the blocking finding IDs."
         ],
         interactionProtocol: [
             "Run Layer 1 (spec compliance) completely before starting Layer 2.",
             "In each review section, present findings ONE AT A TIME. Do NOT batch.",
             "Classify every finding as Critical, Important, or Suggestion.",
-            "For each Critical finding: use the Decision Protocol — present resolution options (A/B/C) with trade-offs, and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that fully closes the finding with no carry-over risk and the smallest blast radius. If the harness's native structured-ask tool is available (`AskUserQuestion` on Claude, `AskQuestion` on Cursor, `question` on OpenCode with `permission.question: \"allow\"`, `request_user_input` on Codex in Plan/Collaboration mode), send exactly ONE question per call, validate fields against the runtime schema, and on schema error immediately fall back to a plain-text lettered list instead of retrying guessed payloads.",
+            decisionProtocolInstruction("each Critical finding", "present resolution options (A/B/C) with trade-offs, and mark one as (recommended)", "recommend the option that fully closes the finding with no carry-over risk and the smallest blast radius", STRUCTURED_ASK_TOOL_LIST_REVIEW),
             "Resolve all critical blockers before ship.",
-            "When verdict is BLOCKED, do not end with a passive stop: explicitly route remediation to TDD via `ROUTE_BACK_TO_TDD` and point to `/cc-ops rewind tdd` with the blocking IDs.",
-            "For final verdict: use the native structured-ask tool (`AskUserQuestion` / `AskQuestion` / `question` / `request_user_input`) only if runtime schema is confirmed; otherwise collect verdict with a plain-text single-choice prompt (APPROVED / APPROVED_WITH_CONCERNS / BLOCKED).",
+            "When verdict is BLOCKED, do not end with a passive stop: explicitly route remediation to TDD via `ROUTE_BACK_TO_TDD` and point to `cclaw internal rewind tdd` with the blocking IDs.",
+            structuredAskSingleChoiceInstruction("final verdict", "verdict (APPROVED / APPROVED_WITH_CONCERNS / BLOCKED)"),
             "**STOP.** Do NOT proceed to ship until the user provides an explicit verdict."
         ],
         process: [
             "Layer 1: check acceptance criteria and requirement coverage.",
-            "Layer 2a: check correctness — logic, races, boundaries, null handling.",
-            "Layer 2b: check security — validation, auth, secrets, injection.",
-            "Layer 2c: check performance — queries, memory, caching, hot paths.",
-            "Layer 2d: check architecture fit — design compliance, coupling, interfaces.",
-            "Reconcile multi-agent findings into `.cclaw/artifacts/07-review-army.json` (dedup + confidence + conflict notes + source tags from spec/correctness/security/performance/architecture/external-safety passes).",
+            "Layer 2: record integrated findings tagged correctness/security/performance/architecture/external-safety.",
+            "Security-reviewer: run mandatory security sweep or no-change attestation.",
+            "Reconcile structured findings into `.cclaw/artifacts/07-review-army.json` (dedup + confidence + conflict notes + source tags from spec/correctness/security/performance/architecture/external-safety passes).",
             "Classify and prioritize all findings.",
             "Write review report artifact with explicit verdict.",
             "If verdict is BLOCKED, include the remediation route token `ROUTE_BACK_TO_TDD` and the rewind command payload."
@@ -73,7 +69,7 @@ export const REVIEW = {
         requiredGates: [
             { id: "review_layer1_spec_compliance", description: "Spec compliance check completed with per-criterion verdict." },
             { id: "review_layer2_security", description: "Security review completed." },
-            { id: "review_layer_coverage_complete", description: "Layer coverage map in 07-review-army.json confirms spec/correctness/security/performance/architecture/external-safety passes." },
+            { id: "review_layer_coverage_complete", description: "Layer coverage map in 07-review-army.json confirms spec/correctness/security/performance/architecture/external-safety tags were considered." },
             { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." },
             { id: "review_army_json_valid", description: "07-review-army.json passes schema validation (validateReviewArmy)." },
             { id: "review_trace_matrix_clean", description: "Trace matrix has no orphaned criteria/tasks/test slices for the active run." }
@@ -116,11 +112,12 @@ export const REVIEW = {
             traceabilityRule: "Review verdict must reference specific spec criteria and TDD evidence. Downstream ship stage must reference review verdict."
         },
         artifactValidation: [
+            { section: "Upstream Handoff", required: false, validationRule: "Summarizes spec/plan/tdd decisions, constraints, open questions, and explicit drift before review verdicts." },
             { section: "Layer 1 Verdict", required: true, validationRule: "Per-criterion pass/fail with references." },
             { section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, description, and resolution status. Security coverage must include either explicit security findings or `NO_CHANGE_ATTESTATION: <reason>` when no security-relevant changes were found." },
-            { section: "Review Army Contract", required: true, validationRule: "Structured findings include id/severity/confidence/fingerprint/reportedBy/status and source tags from {spec, correctness, security, performance, architecture, external-safety} with dedup reconciliation summary." },
-            { section: "Review Readiness Dashboard", required: false, validationRule: "Includes a per-pass table (Layer 1 / Layer 2 / Adversarial / Schema) with a 'Completed at' column, a Delegation log snapshot block (path .cclaw/state/delegation-log.json with required/completed/waived/pending), a Staleness signal block (commit at last review pass and current commit), and a Headline with open critical blockers + ship recommendation. At minimum, the section text must contain the substrings 'Completed at', 'delegation-log.json', 'commit at last review pass', and 'Ship recommendation'." },
-            { section: "Completeness Score", required: false, validationRule: "Records AC coverage, task coverage, test-slice coverage, and adversarial-review pass status as numeric or boolean values. At minimum, a line like 'AC coverage: N/M' or 'AC coverage: 100%'." },
+            { section: "Review Findings Contract", required: true, validationRule: "Structured findings in 07-review-army.json include id/severity/confidence/fingerprint/reportedBy/status and source tags from {spec, correctness, security, performance, architecture, external-safety} with dedup reconciliation summary." },
+            { section: "Review Readiness Snapshot", required: false, validationRule: "Optional compact summary: completed checks, delegation-log status, staleness signal, open critical blockers, and ship recommendation." },
+            { section: "Completeness Snapshot", required: false, validationRule: "Optional compact coverage summary for AC coverage, task coverage, test-slice coverage, and adversarial-review status when triggered." },
             { section: "Incoming Feedback Queue", required: false, validationRule: "When external review feedback exists, include a queue summary with per-item disposition (resolved / accepted-risk / rejected-with-evidence) and evidence refs." },
             { section: "Trace Matrix Check", required: false, validationRule: "Records criteria/tasks/tests orphan counts (all zero on enforced tracks) with command output reference." },
             { section: "Blocked Route", required: false, validationRule: "When Final Verdict is BLOCKED: includes `ROUTE_BACK_TO_TDD`, rewind target `tdd`, and blocked finding IDs." },
@@ -129,7 +126,7 @@ export const REVIEW = {
         ]
     },
     reviewLens: {
-        outputs: ["review verdict", "severity-indexed findings", "reconciled review-army findings", "ship readiness decision"],
+        outputs: ["review verdict", "severity-indexed findings", "reconciled structured findings", "ship readiness decision"],
         reviewSections: [
             {
                 title: "Layer 1: Spec Compliance",
@@ -142,7 +139,7 @@ export const REVIEW = {
                 stopGate: true
             },
             {
-                title: "Layer 2a: Correctness",
+                title: "Layer 2: Integrated Correctness / Performance / Architecture",
                 evaluationPoints: [
                     "Logic errors and boundary violations",
                     "Race conditions and concurrency issues",
@@ -152,7 +149,7 @@ export const REVIEW = {
                 stopGate: true
             },
             {
-                title: "Layer 2b: Security",
+                title: "Security Sweep",
                 evaluationPoints: [
                     "Input validation completeness",
                     "Authorization boundary enforcement",
@@ -162,7 +159,7 @@ export const REVIEW = {
                 stopGate: true
             },
             {
-                title: "Layer 2c: Performance",
+                title: "Specialist Lens: Performance",
                 evaluationPoints: [
                     "N+1 query patterns",
                     "Memory leak potential",
@@ -172,7 +169,7 @@ export const REVIEW = {
                 stopGate: true
             },
             {
-                title: "Layer 2d: Architecture Fit",
+                title: "Specialist Lens: Architecture Fit",
                 evaluationPoints: [
                     "Does implementation match the locked design?",
                     "Coupling and cohesion assessment",
@@ -182,7 +179,7 @@ export const REVIEW = {
                 stopGate: true
             },
             {
-                title: "Layer 2e: External Safety Checklist",
+                title: "Specialist Lens: External Safety Checklist",
                 evaluationPoints: [
                     "SQL/database: parameterized queries, no raw string interpolation, migration safety",
                     "Concurrency: race conditions in shared state, lock ordering, timeout handling",

package/dist/content/stages/scope.js CHANGED Viewed

@@ -1,4 +1,5 @@
-import { REVIEW_LOOP_CHECKLISTS } from "../review-loop.js";
+import { REVIEW_LOOP_CHECKLISTS, reviewLoopPolicySummary, reviewLoopSecondOpinionSummary } from "../review-loop.js";
+import { decisionProtocolInstruction } from "../decision-protocol.js";
 // ---------------------------------------------------------------------------
 // SCOPE — reference: gstack CEO review
 // ---------------------------------------------------------------------------
@@ -44,51 +45,37 @@ export const SCOPE = {
     },
     executionModel: {
         checklist: [
-            "**Pre-Scope System Audit (opt-in)** — when `.cclaw/config.yaml::optInAudits.scopePreAudit` is true, before premise challenge gather reality snapshot: recent commits (`git log -30 --oneline`), current diff (`git diff --stat`), stash state (`git stash list`), and deferred debt markers (`rg -n 'TODO|FIXME|XXX|HACK'`). Record findings in scope artifact.",
-            "**Assess complexity** — Read the brainstorm artifact. If project is simple (single component, clear architecture, personal/prototype), run light-touch scope: mode selection, 3-5 key in/out boundaries, deferred items. Skip Dream State Mapping and Temporal Interrogation. If project is complex (multi-component, team delivery, production), run the full checklist.",
-            "**Prime Directives** — Zero silent failures. For each in-scope capability, name concrete failure modes, the exact error surface, and trace all four data-flow paths (happy, nil, empty, upstream error). Include interaction edge cases (double-click, navigate-away, stale state), observability commitments, and explicit deferred-item logging.",
-            "**Premise Challenge** — Is this the right problem? What if we do nothing? What are we optimizing for?",
-            "**Landscape Check** — for EXPAND/SELECTIVE candidates, perform a brief external scan of comparable products/patterns to calibrate ambition and avoid local maxima.",
-            "**Existing Code Leverage** — Search for existing solutions before deciding to build new.",
-            "**Taste Calibration** — identify 2-3 high-quality files/modules in this codebase and explicitly align scope quality bar to them.",
-            "**Dream State Mapping** — (complex projects only) describe the ideal state 12 months out using `CURRENT STATE -> THIS PLAN -> 12-MONTH IDEAL`, then verify this scope moves toward that target.",
-            "**Implementation Alternatives** — Produce 2-3 distinct approaches. For each: Name, Summary, Effort (S/M/L/XL), Risk (Low/Med/High), 2-3 Pros, 2-3 Cons, and explicit Reuses. One option must be minimal viable, one must be ideal architecture.",
-            "**Temporal Interrogation** — (complex projects only) simulate implementation timeline: HOUR 1 foundations, HOUR 2-3 core logic, HOUR 4-5 integration surprises, HOUR 6+ polish/tests. Decide what must be locked now vs safely deferred.",
-            "**Mode Selection** — Present expand/selective/hold/reduce with recommendation and default heuristic: greenfield -> expand, feature enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius (>15 files or multi-team impact) -> reduce.",
-            "**Mode-Specific Analysis** — After mode is selected, run the matching analysis: EXPAND (10x and delight opportunities), SELECTIVE (hold-scope rigor then cherry-picked expansions), HOLD (minimum-change-set hardening), REDUCE (ruthless cuts and follow-up split).",
-            "**Plant-seed shelf (optional)** — when a deferred/out-of-scope idea still has upside, capture it as `.cclaw/seeds/SEED-<YYYY-MM-DD>-<slug>.md` with trigger_when and action instead of losing it in prose-only notes.",
-            "**Outside Voice + Spec Review Loop** — run an adversarial second-opinion pass on the scope artifact, reconcile findings, and iterate up to 3 cycles or until quality score >= 0.8. When `.cclaw/config.yaml::reviewLoop.externalSecondOpinion.enabled` is true, run an additional external-model pass and explicitly resolve score/finding disagreements.",
-            "**Error and Rescue Registry** — For each capability: what breaks, how detected, what fallback."
+            "**Default path first** — read brainstorm, challenge premise, recommend one mode, draft 3-5 key in/out boundaries plus deferred items, then seek approval.",
+            "**Optional audits by trigger** — run the pre-scope system audit only when configured; use deep-mode prime directives, dream-state mapping, and temporal interrogation only for complex/high-risk scope.",
+            "**Premise and leverage check** — test whether this is the right problem, what happens if nothing changes, and what existing code can be reused.",
+            "**Calibrate ambition** — for EXPAND/SELECTIVE candidates, do a brief landscape scan and align the quality bar to 2-3 strong in-repo modules.",
+            "**Compare implementation alternatives** — give 2-3 distinct options with effort, risk, pros/cons, and explicit reuse; include minimal viable and ideal architecture options.",
+            "**Select scope mode explicitly** — present expand/selective/hold/reduce with a recommendation and default heuristic justification.",
+            "**Run mode-specific analysis** — expand, selective, hold, or reduce according to the selected mode; do not silently add or trim scope.",
+            "**Handle deferred upside** — optionally park high-upside deferred/out-of-scope ideas in `.cclaw/seeds/`.",
+            `**Outside voice when warranted** — run/reconcile the loop for complex/high-risk or configured scope; otherwise do a concise adversarial self-check. ${reviewLoopPolicySummary("scope")} ${reviewLoopSecondOpinionSummary("scope")}`,
+            "**Write the scope contract** — include in-scope/out-of-scope, discretion areas, deferred items, locked decisions, error/rescue notes, completion dashboard, and explicit approval."
         ],
         interactionProtocol: [
-            "For scope mode selection: use the Decision Protocol — present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the option that best covers the prime-directive failure modes, four data-flow paths, observability, and deferred handling for the in-scope set with the smallest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce. If the harness's native structured-ask tool is available (`AskUserQuestion` / `AskQuestion` / `question` / `request_user_input`), send exactly ONE question per call, validate fields against the runtime schema, and on schema error immediately fall back to a plain-text lettered list instead of retrying guessed payloads.",
-            "Walk through the scope checklist interactively. Each checklist item that surfaces a decision should be presented to the user as a question, not as a monologue. Do not dump all items at once.",
-            "Challenge premise and verify the problem framing before anything else.",
-            "Take a position on every scope decision. Avoid hedging phrases like 'this could work' or 'there are many ways'; state your recommendation and one concrete condition that would change it.",
-            "Use pushback patterns when framing is weak: vague scope -> force a specific user/problem, platform vision -> force a narrowest viable wedge, social proof -> demand behavioral evidence.",
-            "Present one structural scope issue at a time for decision. Do NOT batch. Use structured options for each scope boundary question.",
-            "Record explicit in-scope and out-of-scope contract.",
-            "Once the user accepts or rejects a recommendation, commit fully. Do not re-argue.",
-            "Before final scope approval, run an adversarial outside-voice review and reconcile every finding explicitly (accept/reject/defer with rationale).",
-            "Bound review-loop retries: max 3 iterations or early stop at quality score >= 0.8.",
-            "Produce a clean scope summary after all issues are resolved.",
-            "**STOP.** Wait for explicit user approval of scope contract before advancing to design.",
-            "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` must be marked completed or explicitly waived in `.cclaw/state/delegation-log.json`. Then close the stage via `node .cclaw/hooks/stage-complete.mjs scope` (do not hand-edit `.cclaw/state/flow-state.json`)."
+            decisionProtocolInstruction("scope mode selection", "present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended)", "recommend the option that best covers the prime-directive failure modes, four data-flow paths, observability, and deferred handling for the in-scope set with the smallest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce"),
+            "Do not walk the full checklist by default. Lead with the default scope contract; ask only when the answer changes in/out/deferred boundaries.",
+            "Challenge premise first, take a firm position, and name one concrete condition that would change it.",
+            "Push back on weak framing: vague scope needs a specific user/problem, platform vision needs a narrow wedge, social proof needs behavioral evidence.",
+            "Resolve one structural scope issue at a time; otherwise state the assumption and move on.",
+            "After acceptance/rejection, commit fully and do not re-argue.",
+            `Before final approval, reconcile outside-voice findings when the loop runs and bound retries with ${reviewLoopPolicySummary("scope")}`,
+            "**STOP.** Wait for explicit approval of the scope contract before advancing.",
+            "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` must be completed or explicitly waived, then close via `node .cclaw/hooks/stage-complete.mjs scope`."
         ],
         process: [
-            "When `.cclaw/config.yaml::optInAudits.scopePreAudit` is true, run pre-scope system audit (git log/diff/stash/debt markers).",
-            "Run premise challenge and existing-solution leverage check.",
-            "When mode is EXPAND/SELECTIVE, run brief landscape check before final scope lock.",
-            "Calibrate quality bar against 2-3 strong existing modules/files.",
-            "Produce 2-3 scope alternatives in a structured format (Name, Summary, Effort, Risk, Pros, Cons, Reuses) with minimum viable and ideal architecture options included.",
-            "Choose scope mode with user approval.",
-            "Run mode-specific analysis that matches the selected scope mode.",
-            "Optionally plant high-upside deferred ideas into `.cclaw/seeds/SEED-<YYYY-MM-DD>-<slug>.md` with trigger_when/action notes.",
-            "Walk through scope review sections one at a time.",
-            "Run outside-voice spec review loop (up to 3 iterations, quality score target >= 0.8). If configured, include external second opinion and reconcile deltas.",
-            "Write explicit scope contract, discretion areas, and deferred items.",
-            "Freeze non-negotiable boundaries as stable Locked Decisions (D-XX IDs).",
-            "Produce scope summary plus completion dashboard (section status, critical gaps, resolved decisions, unresolved items or `None`)."
+            "Run configured pre-scope audit only when enabled.",
+            "Challenge premise, check existing-code leverage, and calibrate ambition/quality bar.",
+            "Compare structured scope alternatives with minimum viable and ideal architecture options.",
+            "Select scope mode with explicit user approval.",
+            "Run the selected mode analysis and park high-upside deferred ideas when useful.",
+            `Use outside-voice review only when complex/high-risk or configured; otherwise run a short adversarial self-check. If loop runs, enforce ${reviewLoopPolicySummary("scope")}`,
+            "Write explicit scope contract, discretion areas, deferred items, and D-XX locked decisions.",
+            "Produce scope summary and completion dashboard."
         ],
         requiredGates: [
             { id: "scope_mode_selected", description: "One scope mode was explicitly selected." },
@@ -104,8 +91,8 @@ export const SCOPE = {
             "Locked Decisions section lists stable D-XX IDs for non-negotiable boundaries.",
             "Premise challenge findings documented.",
             "Outside Voice findings and dispositions are recorded (accept/reject/defer with rationale).",
-            "Spec review loop summary includes iteration count and quality score trajectory.",
-            "When `.cclaw/config.yaml::reviewLoop.externalSecondOpinion.enabled` is true, external second-opinion disposition is captured.",
+            `Spec review loop summary includes iteration count and quality score trajectory per ${reviewLoopPolicySummary("scope")}`,
+            reviewLoopSecondOpinionSummary("scope"),
             "Deferred items list with one-line rationale for each.",
             "When an upside deferred idea is parked, a seed file is created under `.cclaw/seeds/` and referenced in the artifact.",
             "Completion dashboard lists per-section status, critical/open gaps, decision count, and unresolved items (or `None`)."
@@ -149,6 +136,7 @@ export const SCOPE = {
             traceabilityRule: "Every scope boundary must be traceable to a brainstorm decision. Every downstream design choice must stay within the scope contract."
         },
         artifactValidation: [
+            { section: "Upstream Handoff", required: false, validationRule: "Summarizes brainstorm/idea decisions, constraints, open questions, and explicit drift before scope decisions." },
             { section: "Pre-Scope System Audit", required: false, validationRule: "When `.cclaw/config.yaml::optInAudits.scopePreAudit` is true: must capture git log -30, git diff --stat, git stash list, and debt-marker scan (TODO/FIXME/XXX/HACK) before premise challenge." },
             { section: "Prime Directives", required: false, validationRule: "For each scoped capability: named failure modes, explicit error surface, four data-flow paths, interaction edge cases, observability expectations, and deferred-item handling." },
             { section: "Premise Challenge", required: false, validationRule: "Must contain explicit answers to: right problem? direct path? what if nothing?" },
@@ -158,13 +146,13 @@ export const SCOPE = {
             { section: "Locked Decisions (D-XX)", required: false, validationRule: "List of stable locked decisions with IDs D-01, D-02... Each ID appears once, includes rationale, and is intended for downstream cross-stage traceability." },
             { section: "Implementation Alternatives", required: false, validationRule: "2-3 options with Name, Summary, Effort, Risk, Pros, Cons, and Reuses. Must include minimal viable and ideal architecture options." },
             { section: "Scope Mode", required: true, validationRule: "Must state selected mode and rationale with default heuristic justification." },
-            { section: "Mode-Specific Analysis", required: false, validationRule: "Must document the analysis matching the selected scope mode: EXPAND (10x and delight opportunities), SELECTIVE (hold-scope baseline then cherry-picked expansions), HOLD (minimum-change-set hardening), REDUCE (ruthless cuts and follow-up split)." },
+            { section: "Mode-Specific Analysis", required: false, validationRule: "Deep/complex scope only: document the analysis matching the selected mode. Default path may record a concise mode rationale instead." },
             { section: "In Scope / Out of Scope", required: true, validationRule: "Two separate explicit lists. Out-of-scope must not be empty." },
             { section: "Discretion Areas", required: false, validationRule: "Explicit list of implementer decision zones, or 'None' if scope is fully locked." },
             { section: "Deferred Items", required: false, validationRule: "Each item has one-line rationale. If empty, state 'None' explicitly." },
             { section: "Error & Rescue Registry", required: false, validationRule: "Each scoped capability has: failure mode, detection method, fallback decision." },
             { section: "Outside Voice Findings", required: false, validationRule: "Must list external/adversarial findings and disposition (accept/reject/defer) with rationale." },
-            { section: "Spec Review Loop", required: false, validationRule: "Must record iterations (max 3), quality score per iteration, stop reason, and unresolved concerns." },
+            { section: "Spec Review Loop", required: false, validationRule: `Must record iterations, quality score per iteration, stop reason, and unresolved concerns. Enforce ${reviewLoopPolicySummary("scope")}` },
             { section: "Completion Dashboard", required: true, validationRule: "Lists per-review-section status, count of critical/open gaps, resolved decisions, and unresolved decisions (or 'None')." },
             { section: "Scope Summary", required: true, validationRule: "Clean summary: mode, strongest challenges, recommended path, accepted scope, deferred, excluded." },
             { section: "Dream State Mapping", required: false, validationRule: "If present (complex projects): CURRENT STATE, THIS PLAN, 12-MONTH IDEAL, and alignment verdict." },

package/dist/content/stages/ship.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { decisionProtocolInstruction } from "../decision-protocol.js";
 // ---------------------------------------------------------------------------
 // SHIP — reference: superpowers finishing-a-development-branch + gstack /ship
 // ---------------------------------------------------------------------------
@@ -50,12 +51,12 @@ export const SHIP = {
             "Detect repository mode — if `.git/` is absent or inaccessible, lock finalization choices to FINALIZE_NO_VCS only and document manual handoff + rollback.",
             "Select finalization mode — exactly ONE enum: (A) FINALIZE_MERGE_LOCAL, (B) FINALIZE_OPEN_PR, (C) FINALIZE_KEEP_BRANCH, (D) FINALIZE_DISCARD_BRANCH, (E) FINALIZE_NO_VCS. For discard: list what will be deleted, require typed confirmation.",
             "Execute finalization — perform the selected action. For merge: verify clean merge. For PR: include structured body (summary, test plan, rollback). For discard: verify deletion. For NO_VCS: record handoff target, artifact bundle path, and manual rollback owner.",
-            "Worktree cleanup — if using git worktrees, clean up the worktree after merge/discard. Keep it only for 'keep branch' mode. Skip for FINALIZE_NO_VCS."
+            "Branch cleanup — after merge/discard, remove only branches or temporary files the user explicitly approved. Skip for FINALIZE_NO_VCS."
         ],
         interactionProtocol: [
             "Run preflight checks before any release action.",
             "Document release notes and rollback plan explicitly.",
-            "For finalization mode: use the Decision Protocol — present modes as labeled options (A/B/C/D/E) with consequences, and mark one as (recommended). Do NOT use a numeric Completeness rubric; recommend the mode that best addresses release blast-radius, rollback readiness, observability, and stakeholder communication — ties go to the most reversible option. If the harness's native structured-ask tool is available (`AskUserQuestion` / `AskQuestion` / `question` / `request_user_input`), send exactly ONE question per call, validate fields against the runtime schema, and on schema error immediately fall back to a plain-text lettered list instead of retrying guessed payloads.",
+            decisionProtocolInstruction("finalization mode", "present modes as labeled options (A/B/C/D/E) with consequences, and mark one as (recommended)", "recommend the mode that best addresses release blast-radius, rollback readiness, observability, and stakeholder communication — ties go to the most reversible option"),
             "Do not proceed if critical blockers remain from review.",
             "**STOP.** Present finalization options and wait for user selection before executing any finalization action."
         ],
@@ -109,13 +110,14 @@ export const SHIP = {
             traceabilityRule: "Ship artifact must reference review verdict and resolution status. Release notes must reference spec criteria. Rollback plan must reference specific changes that could fail."
         },
         artifactValidation: [
+            { section: "Upstream Handoff", required: false, validationRule: "Summarizes review/tdd decisions, constraints, open questions, and explicit drift before finalization." },
             { section: "Preflight Results", required: true, validationRule: "Build, test, lint, type-check results captured with fresh output. Exceptions documented if any." },
             { section: "Release Notes", required: true, validationRule: "What changed, why, impact. References spec criteria. Breaking changes flagged." },
             { section: "Rollback Plan", required: true, validationRule: "Trigger conditions, rollback steps (exact commands), verification steps." },
             { section: "Monitoring", required: false, validationRule: "If applicable: what metrics/logs to watch post-deploy. Risk note if no monitoring." },
             { section: "Finalization", required: true, validationRule: "Exactly one finalization enum token selected (FINALIZE_MERGE_LOCAL | FINALIZE_OPEN_PR | FINALIZE_KEEP_BRANCH | FINALIZE_DISCARD_BRANCH | FINALIZE_NO_VCS). Execution result documented. Worktree cleaned if applicable." },
             { section: "Completion Status", required: false, validationRule: "If present: exactly one of SHIPPED, SHIPPED_WITH_EXCEPTIONS, BLOCKED. Exceptions documented when applicable." },
-            { section: "Compound Step", required: false, validationRule: "Optional retrospective: at least one bullet of the form 'Insight: ... | Action: append [compound] entry to .cclaw/knowledge.jsonl', or an explicit 'No compound insight this run.' line." }
+            { section: "Compound Step", required: false, validationRule: "Optional retrospective: include overlap assessment before appending duplicate knowledge; distinguish bug-track fixes/tests from knowledge-track process/project guidance; use supersedes/superseded_by only for clear refreshes; or include an explicit 'No compound insight this run.' line." }
         ]
     },
     reviewLens: {
@@ -145,6 +147,7 @@ export const SHIP = {
         ]
     },
     // `done` exits the stage pipeline. Archive semantics are handled by the
-    // closeout substate machine (`idle` -> ... -> `archived`) in flow-state.
+    // closeout substate machine (`idle` -> ... -> `archived`) in flow-state under
+    // ${closeoutSubstateInline()}.
     next: "done",
 };

package/dist/content/stages/spec.js CHANGED Viewed

@@ -30,6 +30,7 @@ export const SPEC = {
             "No explicit assumptions section",
             "No approval record",
             "No testability mapping",
+            "Assumptions not surfaced before sign-off",
             "Edge cases missing or deferred"
         ]
     },
@@ -39,7 +40,8 @@ export const SPEC = {
             "Define measurable acceptance criteria — each criterion must be observable and falsifiable. No vague adjectives.",
             "Capture edge cases — for each criterion, define at least one boundary condition and one error condition.",
             "Document constraints and assumptions — regulatory, system, integration, and performance boundaries. Surface implicit assumptions explicitly.",
-            "Confirm testability — for each acceptance criterion, describe the test that would prove it. If untestable, rewrite the criterion.",
+            "Surface assumptions before finalization — list each assumption with source/confidence, validation path, and whether it is accepted, rejected, or still open.",
+            "Build the Acceptance Mapping contract — for each AC, map upstream design decision, observable evidence, verification method, and likely test level. If any column is unclear, rewrite the criterion.",
             "Present acceptance criteria to the user in 3-5-item batches, pausing for explicit ACK between batches (see Interaction Protocol).",
             "Write spec artifact and request user approval — wait for explicit confirmation before proceeding."
         ],
@@ -47,28 +49,32 @@ export const SPEC = {
             "Express each requirement in observable terms.",
             "Resolve ambiguity before moving to plan. Challenge vague language.",
             "Capture assumptions explicitly, not implicitly.",
+            "Before final spec approval, present the assumptions section as its own checkpoint so the user can accept, revise, or mark an assumption unknown.",
             "**Chunk acceptance criteria for review.** When presenting the spec to the user for sign-off, deliver acceptance criteria in batches of 3-5 and **pause for explicit ACK** (via Decision Protocol) before sending the next batch. Do not dump the full criteria wall in one message — small batches surface objections earlier and keep the sign-off meaningful. Full spec writeup still lands in `04-spec.md`, but the conversation itself must be digestible.",
             "Require user confirmation on the written spec. **STOP.** Do NOT proceed to plan until user approves.",
-            "For each criterion, ask: how would you test this? If the answer is unclear, rewrite.",
+            "For each criterion, ask: what exact evidence proves this passed? If the evidence or verification command/manual step is vague, rewrite.",
             "When encountering ambiguity, classify it before acting: (A) ask user for missing info, (B) enumerate interpretations and pick one with justification, (C) propose hypothesis with validation path. Do NOT silently resolve ambiguity."
         ],
         process: [
             "Define measurable acceptance criteria.",
             "Capture constraints, assumptions, and edge cases.",
-            "Build testability map: criterion -> test description.",
-            "Confirm testability for each criterion.",
+            "Review assumptions before finalization: source/confidence, validation path, and accepted/rejected/open disposition.",
+            "Build Acceptance Mapping: AC -> design decision -> observable evidence -> verification method -> likely test level.",
+            "Confirm every verification method is concrete enough for plan/TDD to use later.",
             "Present acceptance criteria to the user in 3-5-item batches, pausing for explicit ACK between batches (see Interaction Protocol).",
             "Write spec artifact and request approval."
         ],
         requiredGates: [
             { id: "spec_acceptance_measurable", description: "Acceptance criteria are measurable and observable." },
             { id: "spec_testability_confirmed", description: "Each criterion has a described test method." },
+            { id: "spec_assumptions_surfaced", description: "Assumptions were explicitly reviewed with source/confidence, validation path, and disposition before approval." },
             { id: "spec_user_approved", description: "User approved the final written spec." }
         ],
         requiredEvidence: [
             "Artifact written to `.cclaw/artifacts/04-spec.md`.",
-            "Each acceptance criterion maps to a testable outcome.",
+            "Each acceptance criterion maps to upstream design decision, observable evidence, verification method, and likely test level.",
             "Edge cases documented per criterion.",
+            "Assumptions Before Finalization section records source/confidence, validation path, and accepted/rejected/open disposition.",
             "Approval marker captured in artifact."
         ],
         inputs: ["design artifact", "business constraints", "quality requirements"],
@@ -80,12 +86,14 @@ export const SPEC = {
         blockers: [
             "non-measurable criteria",
             "constraints missing",
+            "assumptions not surfaced before approval",
             "open ambiguities remain"
         ],
         exitCriteria: [
             "spec approved by user",
             "required gates marked satisfied",
             "plan-ready acceptance mapping exists",
+            "assumptions reviewed before finalization",
             "testability confirmed for all criteria"
         ],
         platformNotes: [
@@ -103,10 +111,12 @@ export const SPEC = {
             traceabilityRule: "Every acceptance criterion must trace to a design decision. Every downstream plan task must trace to a spec criterion."
         },
         artifactValidation: [
+            { section: "Upstream Handoff", required: false, validationRule: "Summarizes scope/design decisions, constraints, open questions, and explicit drift before acceptance criteria." },
             { section: "Acceptance Criteria", required: true, validationRule: "Each criterion is observable, measurable, and falsifiable. Table must include a Requirement Ref column linking to R# IDs in 02-scope-<slug>.md (legacy 02-scope.md is accepted during migration) and a Design Decision Ref column tracing back to design artifact. AC IDs (AC-1, AC-2…) are stable across revisions — dropped ACs stay with Priority `DROPPED`." },
             { section: "Edge Cases", required: true, validationRule: "At least one boundary and one error condition per criterion." },
             { section: "Constraints and Assumptions", required: false, validationRule: "All implicit assumptions surfaced. Constraints have sources." },
-            { section: "Testability Map", required: true, validationRule: "Each criterion maps to a concrete test description with verification approach (unit, integration, e2e, manual) and command or manual steps." },
+            { section: "Assumptions Before Finalization", required: true, validationRule: "Each assumption has source/confidence, validation path, and accepted/rejected/open disposition before the Approval section is finalized." },
+            { section: "Acceptance Mapping", required: true, validationRule: "Each criterion maps to upstream design decision, observable evidence, verification method, likely test level (unit/integration/e2e/manual), and command or manual steps when known." },
             { section: "Vague to Fixed", required: false, validationRule: "If present: table with original vague wording and rewritten observable/testable version for each ambiguous requirement." },
             { section: "Non-Functional Requirements", required: false, validationRule: "If present: performance thresholds, security constraints, scalability limits, reliability targets with measurable values." },
             { section: "Interface Contracts", required: false, validationRule: "If present: for each module boundary list produces (outputs) and consumes (inputs) with data types." },
@@ -134,11 +144,12 @@ export const SPEC = {
             {
                 title: "Testability Audit",
                 evaluationPoints: [
-                    "Does every criterion have a concrete test description in the Testability Map?",
+                    "Does every criterion have a concrete row in Acceptance Mapping?",
                     "Does every test specify a verification approach (unit, integration, e2e, manual)?",
-                    "Does every test include a runnable command or manual steps?",
+                    "Does every verification method include a runnable command or concrete manual steps when known?",
+                    "Were assumptions surfaced before finalization with source/confidence, validation path, and disposition?",
                     "Are edge cases (boundary + error) defined for every criterion?",
-                    "Can you run every verification command right now and get a meaningful result?"
+                    "Are commands specific enough to run later (not vague `run tests` wording)?"
                 ],
                 stopGate: true
             }