npm - cclaw-cli - Versions diffs - 0.51.23 → 0.51.24 - Mend

cclaw-cli 0.51.23 → 0.51.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/content/core-agents.d.ts +18 -2
package/dist/content/core-agents.js +59 -13
package/dist/content/examples.js +13 -5
package/dist/content/seed-shelf.js +73 -8
package/dist/content/stage-schema.js +50 -6
package/dist/content/stages/brainstorm.js +20 -15
package/dist/content/stages/design.js +16 -16
package/dist/content/stages/review.js +13 -4
package/dist/content/stages/schema-types.d.ts +1 -1
package/dist/content/stages/scope.js +15 -10
package/dist/content/subagents.js +59 -3
package/dist/content/templates.js +118 -22
package/dist/doctor.js +120 -20
package/dist/flow-state.d.ts +1 -0
package/dist/flow-state.js +1 -0
package/dist/install.js +72 -1
package/dist/knowledge-store.js +30 -6
package/dist/run-archive.js +11 -0
package/dist/run-persistence.js +14 -7
package/package.json +1 -1

package/dist/content/stages/design.js CHANGED Viewed

@@ -9,11 +9,11 @@ export const DESIGN = {
     complexityTier: "standard",
     skillFolder: "engineering-design-lock",
     skillName: "engineering-design-lock",
-    skillDescription: "Engineering lock-in stage. Build a concrete technical spine before spec and planning, with section-by-section interactive review.",
+    skillDescription: "Engineering lock stage. Convert the approved scope contract into a buildable architecture with adversarial alternatives, failure/rescue paths, and spec handoff.",
     philosophy: {
         hardGate: "Do NOT write implementation code. This stage produces design decisions and architecture documents only. No code changes, no scaffolding, no test files.",
         ironLaw: "NO DESIGN DECISION WITHOUT A LABELED DIAGRAM, A REJECTED ALTERNATIVE, AND A NAMED FAILURE MODE.",
-        purpose: "Lock architecture, data flow, failure modes, and test/performance expectations through rigorous interactive review.",
+        purpose: "Lock how the scoped slice works: architecture boundary, existing fit, data/state flow, critical path, trust boundaries, failure/rescue behavior, verification, rollout, and spec handoff.",
         whenToUse: [
             "After scope agreement approval",
             "Before writing final spec and execution plan",
@@ -40,14 +40,14 @@ export const DESIGN = {
     },
     executionModel: {
         checklist: [
-            "Compact design lock — for simple greenfield/product slices, produce a tight but complete design spine: codebase investigation, architecture boundary, one labeled diagram, data flow, failure/rescue table, test/perf expectations, and handoff. Do not run a sprawling workshop when a strong engineering lock fits on one page.",
+            "Compact design lock — design does not decide what to build; it decides how the approved scope works. For simple slices, produce a tight lock: upstream handoff, existing fit, architecture boundary, one labeled diagram, data/state flow, critical path, failure/rescue, trust boundaries, test/perf expectations, rollout/rollback, rejected alternative, and spec handoff.",
             "Trivial-Change Escape Hatch — for <=3 files, no new interfaces, and no cross-module data flow, produce a mini-design (rationale, changed files, one risk) and proceed to spec.",
             "Tiered Research — for simple/medium work, do compact inline codebase/research synthesis in `Research Fleet Synthesis`; write `.cclaw/artifacts/02a-research.md` and run the full fleet only for deep/high-risk work or when external framework/architecture uncertainty exists.",
             "Design Doc Check — read upstream artifacts and current design docs; latest superseding doc wins.",
             "Investigator pass — before design decisions, read blast-radius code and record touched files, responsibilities, reuse candidates, and existing patterns.",
             "Scope Challenge + Search Before Building — find existing solutions, minimum change set, and complexity smells before custom architecture.",
-            "Architecture Review — lock boundaries, one realistic failure scenario per new codepath, and high-risk choices with chosen path, one shadow alternative, switch trigger, and verification evidence; include tier-required diagrams.",
-            "Review core risk areas — security/threat model, code quality, tests, performance, observability/debuggability, deployment/rollout, and parallelization when modules are independent.",
+            "Architecture Review — lock boundaries, chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, and verification evidence for every high-risk choice; include tier-required diagrams.",
+            "Review core risk areas — existing system fit, data/state flow, critical path, security/trust boundaries, tests, performance budget, observability/debuggability, rollout/rollback, rejected alternatives, and spec handoff.",
             `Critic pass — run/reconcile adversarial second opinion on architecture, coupling, failure modes, and cheaper alternatives. ${reviewLoopPolicySummary("design")} ${reviewLoopSecondOpinionSummary("design")}`,
             "Run optional stale-diagram audit only when configured.",
             "Capture leftovers — seed high-upside deferred ideas, list unresolved decisions with defaults, document distribution for new artifact types, and cross-reference deferred items to scope or unresolved decisions."
@@ -73,7 +73,7 @@ export const DESIGN = {
             "Run configured stale-diagram audit when enabled.",
             "Produce required outputs: NOT-in-scope, What-already-exists, tier diagrams, failure table, completion dashboard.",
             "Plant high-upside deferred ideas when useful and reconcile critic/outside-voice findings.",
-            "Write design lock artifact for downstream spec/plan."
+            "Write design lock artifact for downstream spec/plan with design decisions, rejected alternatives, verification evidence, and exact spec handoff."
         ],
         requiredGates: [
             { id: "design_research_complete", description: "Research is complete: compact inline synthesis by default, or a separate research artifact for deep/high-risk work, and findings are mapped to design decisions." },
@@ -93,6 +93,7 @@ export const DESIGN = {
             "Outside-voice findings and dispositions are recorded (accept/reject/defer).",
             `Spec review loop summary includes iteration count and quality score trajectory per ${reviewLoopPolicySummary("design")}`,
             reviewLoopSecondOpinionSummary("design"),
+            "Adversarial lock table includes chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, and verification evidence.",
             "Test strategy includes unit/integration/e2e expectations.",
             "When a high-upside idea is deferred, a seed file is created under `.cclaw/seeds/` and referenced in the artifact.",
             "NOT-in-scope section produced.",
@@ -144,30 +145,28 @@ export const DESIGN = {
         artifactValidation: [
             { section: "Upstream Handoff", required: false, validationRule: "Summarizes scope/research decisions, constraints, open questions, and explicit drift before design choices." },
             { section: "Research Fleet Synthesis", required: true, validationRule: "Must summarize the tiered lenses actually run and map findings to concrete design decisions. Default may be compact inline synthesis; full separate research pack is Deep/high-risk only." },
-            { section: "Codebase Investigation", required: false, validationRule: "Investigator pass: list blast-radius files with current responsibilities, discovered patterns, and reuse candidates." },
+            { section: "Codebase Investigation", required: false, validationRule: "Investigator pass: list blast-radius files with current responsibilities, discovered patterns, reuse candidates, and existing system fit." },
+            { section: "Engineering Lock", required: true, validationRule: "Canonical lock: chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, verification evidence, critical path, rollout/rollback, and confidence." },
             { section: "Search Before Building", required: false, validationRule: "For each technical choice: Layer 1 (exact match), Layer 2 (partial match), Layer 3 (inspiration), EUREKA labels with reuse-first default." },
             { section: "Architecture Boundaries", required: true, validationRule: "Must list component boundaries with ownership." },
             { section: "Architecture Diagram", required: true, validationRule: "Must include `<!-- diagram: architecture -->` marker. Diagram must label concrete nodes, label arrows, mark direction, distinguish sync/async edges, and include at least one failure/degraded edge." },
-            { section: "Data-Flow Shadow Paths", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: data-flow-shadow-paths -->` marker plus a table for high-risk choices: chosen path, shadow alternative, switch trigger, fallback/degrade behavior, and verification evidence." },
+            { section: "Data-Flow Shadow Paths", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: data-flow-shadow-paths -->` marker plus a table for high-risk choices: chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, and verification evidence." },
             { section: "Error Flow Diagram", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: error-flow -->` marker and failure-detection -> rescue -> user-visible outcome flow." },
-            { section: "State Machine Diagram", required: false, validationRule: "Deep add-on: include `<!-- diagram: state-machine -->` marker and state transitions for critical flow lifecycle." },
-            { section: "Rollback Flowchart", required: false, validationRule: "Deep add-on: include `<!-- diagram: rollback-flowchart -->` marker with trigger -> rollback actions -> verification." },
-            { section: "Deployment Sequence Diagram", required: false, validationRule: "Deep add-on: include `<!-- diagram: deployment-sequence -->` marker with rollout order and guard checks." },
-            { section: "Data Flow", required: false, validationRule: "Must include happy path, nil input, empty input, upstream error paths, plus Interaction Edge Case matrix rows for: double-click, nav-away-mid-request, 10K-result dataset, background-job abandonment, zombie connection. Each row must declare handled yes/no and deferred item when not handled." },
+            { section: "Data Flow", required: false, validationRule: "Must include data/state flow, happy path, nil input, empty input, upstream error paths, plus Interaction Edge Case matrix rows for double-click, nav-away-mid-request, 10K-result dataset, background-job abandonment, zombie connection. Each row declares handled yes/no and deferred item when not handled." },
             { section: "Stale Diagram Audit", required: false, validationRule: "When `.cclaw/config.yaml::optInAudits.staleDiagramAudit` is true: blast-radius files from Codebase Investigation must not be newer than the current design diagram-marker baseline unless explicitly refreshed." },
             { section: "Failure Mode Table", required: true, validationRule: "Use Method/Exception/Rescue/UserSees columns and treat silent user impact without rescue as critical." },
             { section: "Security & Threat Model", required: true, validationRule: "Must list trust boundaries, abuse/failure scenarios, mitigations, and residual risks." },
             { section: "Test Strategy", required: false, validationRule: "Must define unit/integration/e2e expectations with coverage targets." },
             { section: "Performance Budget", required: false, validationRule: "For each critical path: metric name, target threshold, and measurement method." },
             { section: "Observability & Debuggability", required: true, validationRule: "Must define logs/metrics/traces plus alerting/debug path for critical failure modes." },
-            { section: "Deployment & Rollout", required: true, validationRule: "Must define migration/flag strategy, rollback plan, and post-deploy verification steps." },
+            { section: "Deployment & Rollout", required: true, validationRule: "Must define migration/flag strategy, rollout/rollback plan, switch trigger, and post-deploy verification steps." },
             { section: "What Already Exists", required: false, validationRule: "For each sub-problem: existing code/library found (Layer 1-3/EUREKA label), reuse decision, and adaptation needed." },
+            { section: "Rejected Alternatives", required: false, validationRule: "List alternatives considered, why rejected, and what signal would revive them." },
+            { section: "Design Decisions", required: false, validationRule: "Stable design decisions with requirement/locked-decision refs and downstream spec impact." },
+            { section: "Spec Handoff", required: true, validationRule: "Exact requirements, design decisions, risks, test/perf expectations, and unresolved questions that spec must carry forward." },
             { section: "Outside Voice Findings", required: false, validationRule: "Critic pass: list adversarial findings and disposition (accept/reject/defer) with rationale per material finding." },
             { section: "Design Outside Voice Loop", required: false, validationRule: `Record iteration table with quality score per iteration, stop reason, and unresolved concerns. Enforce ${reviewLoopPolicySummary("design")}` },
             { section: "NOT in scope", required: false, validationRule: "Work considered and explicitly deferred with one-line rationale." },
-            { section: "Parallelization Strategy", required: false, validationRule: "Standard/Deep add-on when multi-module: dependency table, parallel lanes, conflict flags." },
-            { section: "Interface Contracts", required: false, validationRule: "Standard/Deep add-on when module boundaries or APIs change: producers, consumers, and payload/interface expectations." },
-            { section: "Unresolved Decisions", required: false, validationRule: "Standard/Deep add-on if any: what info is missing, who provides it, default if unanswered." },
             { section: "Completion Dashboard", required: true, validationRule: "Lists every review section with status (clear / issues-found-resolved / issues-open), critical/open gap counts, decision count, and unresolved items (or 'None')." }
         ],
         trivialOverrideSections: ["Architecture Boundaries", "NOT in scope", "Completion Dashboard"]
@@ -180,6 +179,7 @@ export const DESIGN = {
             "test and performance baseline",
             "NOT-in-scope section",
             "What-already-exists section",
+            "design decisions and spec handoff",
             "design completion dashboard"
         ],
         reviewLoop: {

package/dist/content/stages/review.js CHANGED Viewed

@@ -38,11 +38,12 @@ export const REVIEW = {
             "Load upstream evidence — read TDD artifact (RED + GREEN + REFACTOR), spec, and the active track's upstream source items.",
             "Run traceability matrix when the active track enforces it; otherwise confirm spec acceptance/reproduction slices are covered directly.",
             "Layer 1: Spec Compliance — check every acceptance criterion against implementation. Verdict: pass/fail per criterion.",
-            "Layer 2: Integrated findings — one structured pass tagged by category: correctness, security, performance, architecture, external-safety.",
-            "Security sweep — mandatory dedicated security-reviewer pass across diff + touched modules. A zero-finding pass must include `NO_CHANGE_ATTESTATION` with rationale.",
+            "Review Evidence Scope — record base/head, files inspected, changed-file coverage, diagnostics run, dependency/version audit when relevant, and any files intentionally not inspected with explicit reason.",
+            "Layer 2: Integrated findings — one structured pass tagged by category: correctness, security, performance, architecture, external-safety. Every finding uses file:line; if impossible, include an explicit no-line reason.",
+            "Security sweep — mandatory dedicated security-reviewer pass across diff + touched modules. A zero-finding pass must include `NO_CHANGE_ATTESTATION` or `NO_SECURITY_IMPACT` with rationale and inspected surfaces.",
             "Incoming Feedback Intake — when human reviewer comments, bot findings, or CI annotations exist, keep a per-comment disposition queue and mirror outcomes into `07-review.md` + `07-review-army.json` before final verdict.",
             "Structured Review reconciliation — normalize findings into `07-review-army.json`, dedup by fingerprint, and mark multi-specialist confirmations when multiple lenses agree.",
-            "Meta-Review — Were tests actually run? Do test names match what they test? Are there real assertions?",
+            "Meta-Review — Were tests/diagnostics actually run? Do test names match what they test? Are there real assertions? Is the dependency/version surface unchanged or audited?",
             "Classify findings — Critical (blocks ship), Important (should fix), Suggestion (optional improvement).",
             "Produce verdict — APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.",
             "If verdict is BLOCKED, emit remediation route token `ROUTE_BACK_TO_TDD`, include `cclaw internal rewind tdd \"review_blocked_by_critical\"` with the blocking finding IDs, and satisfy the special transition guard `review_verdict_blocked` instead of `review_criticals_resolved`."
@@ -79,7 +80,11 @@ export const REVIEW = {
             "Artifact written to `.cclaw/artifacts/07-review-army.json`.",
             "Traceability matrix run recorded (no orphaned source items or tests for enforced tracks).",
             "Layer 1 verdict captured with per-criterion pass/fail.",
+            "Review Evidence Scope lists files inspected, changed-file coverage, diagnostics run, and omitted files with explicit reason.",
             "Layer 2 sections completed across correctness, security, performance, architecture, and external-safety findings.",
+            "Every finding cites `file:line`, or an explicit no-line reason is recorded.",
+            "No-finding attestation is explicit when no issues are found.",
+            "Dependency/version audit is recorded when manifests, lockfiles, generated clients, CI, runtime config, or external APIs are relevant.",
             "Severity log includes critical/important/suggestion buckets.",
             "Explicit final verdict: APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.",
             "Fresh verification command discovery recorded, and the command cited in `review_trace_matrix_clean` evidence before ship handoff.",
@@ -114,8 +119,12 @@ export const REVIEW = {
         },
         artifactValidation: [
             { section: "Upstream Handoff", required: false, validationRule: "Summarizes spec/plan/tdd decisions, constraints, open questions, and explicit drift before review verdicts." },
+            { section: "Review Evidence Scope", required: true, validationRule: "Base/head, files inspected, changed-file coverage, diagnostics run, omitted files with reason, and reviewer/security-reviewer delegation evidence." },
+            { section: "Changed-File Coverage", required: true, validationRule: "Each changed file is covered, intentionally omitted with no-impact reason, or linked to a broader inspected module." },
             { section: "Layer 1 Verdict", required: true, validationRule: "Per-criterion pass/fail with references." },
-            { section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, description, and resolution status across correctness, security, performance, architecture, and external-safety. Security coverage must include either explicit security findings or `NO_CHANGE_ATTESTATION: <reason>` when no security-relevant changes were found." },
+            { section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, category, file:line or explicit no-line reason, description, and resolution status across correctness/security/performance/architecture/external-safety. If there are no findings, include a no-finding attestation." },
+            { section: "Security Sweep Attestation", required: false, validationRule: "Dedicated security-reviewer result: findings or `NO_CHANGE_ATTESTATION` / `NO_SECURITY_IMPACT` with inspected surfaces and rationale." },
+            { section: "Dependency & Version Audit", required: false, validationRule: "Required when manifests, lockfiles, generated clients, CI, runtime config, or external APIs changed; otherwise record no-impact rationale." },
             { section: "Review Findings Contract", required: true, validationRule: "Structured findings in 07-review-army.json include id/severity/confidence/fingerprint/reportedBy/status and source tags from {spec, correctness, security, performance, architecture, external-safety} with dedup reconciliation summary." },
             { section: "Review Readiness Snapshot", required: false, validationRule: "Optional compact summary: completed checks, delegation-log status, staleness signal, open critical blockers, and ship recommendation." },
             { section: "Completeness Snapshot", required: false, validationRule: "Optional compact coverage summary for AC coverage, source item coverage, test-slice coverage, and adversarial-review status when triggered." },

package/dist/content/stages/schema-types.d.ts CHANGED Viewed

@@ -21,7 +21,7 @@ export interface ArtifactValidation {
     validationRule: string;
 }
 export interface StageAutoSubagentDispatch {
-    agent: "planner" | "reviewer" | "security-reviewer" | "test-author" | "doc-updater";
+    agent: "planner" | "product-manager" | "critic" | "reviewer" | "security-reviewer" | "test-author" | "doc-updater";
     /**
      * - `mandatory` — must be dispatched (or explicitly waived) before stage transition.
      * - `proactive` — should be dispatched automatically when context matches `when`.

package/dist/content/stages/scope.js CHANGED Viewed

@@ -9,7 +9,7 @@ export const SCOPE = {
     complexityTier: "standard",
     skillFolder: "scope-shaping",
     skillName: "scope-shaping",
-    skillDescription: "Strategic scope stage. Challenge premise and lock explicit in-scope/out-of-scope boundaries using CEO-level thinking.",
+    skillDescription: "Strategic contract stage. Select HOLD/SELECTIVE/EXPAND/REDUCE mode, lock the slice and boundaries, and hand stable discretion zones to design.",
     philosophy: {
         hardGate: "Do NOT begin architecture, design, or code. This stage produces scope decisions only. Do not silently add or remove scope — every change is an explicit user opt-in.",
         ironLaw: "EVERY SCOPE CHANGE IS AN EXPLICIT USER OPT-IN — NEVER A SILENT ENLARGEMENT OR TRIM.",
@@ -45,19 +45,19 @@ export const SCOPE = {
     },
     executionModel: {
         checklist: [
-            "**Scope contract first** — read brainstorm, name the job-to-be-done, draft the explicit in-scope/out-of-scope/deferred contract, select one mode, and write the rationale. This is the default path; use dream/10-star/temporal/deep strategy sections only when risk, novelty, or user ambition justifies them.",
+            "**Scope contract first** — read brainstorm handoff, name upstream decisions used, explicit drift, confidence, unresolved questions, and next-stage risk hints; draft the in-scope/out-of-scope/deferred/discretion contract before any design choice.",
             "**Premise and leverage check** — answer in the artifact: *Right problem? Direct path? What if nothing? Where can we leverage existing code? What is the reversibility cost?* Take a position; do not hedge.",
             "**Conditional 10-star boundary** — for deep/high-risk/product-strategy work, show what would make the product meaningfully better, then explicitly choose what ships now, what is deferred, and what is excluded without vague `later/for now` placeholders. Skip this for straightforward repair work and record `not needed: compact scope`.",
-            "**Pick one of four gstack modes with the user** — SCOPE EXPANSION, SELECTIVE EXPANSION, HOLD SCOPE, or SCOPE REDUCTION. Recommend one, state why and what signal would change it, then STOP for the user's mode/scope approval before writing the final artifact.",
-            "**Run mode-specific analysis only to needed depth** — ordinary path is a selected-mode row plus rationale tied to the scope contract. For deep/high-risk work, expand the analysis to match the chosen mode: SCOPE EXPANSION enumerates 10x opportunities + delight features; SELECTIVE EXPANSION lists baseline + cherry-picked additions; HOLD SCOPE proves rigor on the current slice; SCOPE REDUCTION names the smallest useful wedge.",
+            "**Pick one operational mode with the user** — HOLD SCOPE preserves focus; SELECTIVE EXPANSION cherry-picks high-leverage reference ideas; SCOPE EXPANSION explores ambitious alternatives; SCOPE REDUCTION cuts to the essential wedge. Recommend one, state why and what signal would change it, then STOP for approval.",
+            "**Run mode-specific analysis only to needed depth** — lite keeps the selected-mode row compact; standard adds requirements/locked decisions/discretion; deep may add Landscape Check, Taste Calibration, Reference Pull, Ambitious Alternatives, and Ruthless Minimum Slice evidence when mode/risk warrants it.",
             "**Compare implementation alternatives** — include minimum viable, product-grade, and ideal architecture options with effort (S/M/L/XL), risk (Low/Med/High), pros, cons, and reuses. Recommend one and tie it to mode.",
             "**Run outside voice before final approval** — for simple/low-risk scope, record one concise adversarial self-check row; for complex/high-risk/configured scope, iterate until threshold. Record the loop summary in `## Scope Outside Voice Loop`, but do not treat it as user approval.",
             "**Ask only one decision-changing question** — if the user rejects the contract but is unsure, offer 3-4 concrete scope moves instead of open-ended interrogation.",
-            "**Write the scope contract after approval** — include in-scope/out-of-scope, discretion areas, deferred items, locked decisions, error/rescue notes, completion dashboard, scope summary (with canonical mode token + next-stage handoff), and explicit approval evidence."
+            "**Write the scope contract after approval** — include selected mode, in scope, out of scope, requirements, locked decisions, discretion areas, deferred ideas, accepted/rejected reference ideas, success definition, design handoff, completion dashboard, and explicit approval evidence."
         ],
         interactionProtocol: [
             decisionProtocolInstruction("scope mode selection", "present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended)", "recommend the option that best covers the prime-directive failure modes, four data-flow paths, observability, and deferred handling for the in-scope set with the smallest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce"),
-            "Do not walk the full checklist by default. Lead with a proposed scope contract and the one decision that matters most; label the mode as recommended, not selected, until the user answers.",
+            "Do not walk the full checklist by default. Lead with a proposed scope contract, selected depth (`lite`/`standard`/`deep`), and the one decision that matters most; label the mode as recommended, not selected, until the user answers.",
             "For simple web-app flows, default to HOLD SCOPE or SELECTIVE EXPANSION, show the exact in/out/deferred contract as a proposal, and STOP for one explicit approval before writing the final scope artifact or completing the stage.",
             "Challenge premise first, take a firm position, and name one concrete condition that would change it.",
             "Push back on weak framing: vague scope needs a specific user/problem, platform vision needs a narrow wedge, social proof needs behavioral evidence.",
@@ -86,7 +86,8 @@ export const SCOPE = {
             "When `.cclaw/config.yaml::optInAudits.scopePreAudit` is true, Pre-Scope System Audit findings are captured (git log/diff/stash/debt markers).",
             "In-scope and out-of-scope lists are explicit.",
             "Discretion areas are explicit (or marked as `None`).",
-            "Selected mode and rationale are documented.",
+            "Selected mode and rationale are documented using HOLD SCOPE, SELECTIVE EXPANSION, SCOPE EXPANSION, or SCOPE REDUCTION.",
+            "Scope Contract captures requirements, locked decisions, discretion areas, accepted/rejected reference ideas, success definition, and design handoff.",
             "Locked Decisions section lists stable LD#hash anchors for non-negotiable boundaries.",
             "Premise challenge findings documented.",
             "Outside Voice findings and dispositions are recorded (accept/reject/defer with rationale) before final approval.",
@@ -140,8 +141,12 @@ export const SCOPE = {
             { section: "Pre-Scope System Audit", required: false, validationRule: "When `.cclaw/config.yaml::optInAudits.scopePreAudit` is true: must capture git log -30, git diff --stat, git stash list, and debt-marker scan (TODO/FIXME/XXX/HACK) before premise challenge." },
             { section: "Prime Directives", required: false, validationRule: "For each scoped capability: named failure modes, explicit error surface, four data-flow paths, interaction edge cases, observability expectations, and deferred-item handling." },
             { section: "Premise Challenge", required: false, validationRule: "Must list at least 3 question/answer rows in a markdown table or bullet list (gstack default trio: right problem? direct path? what if we do nothing? — extend with leverage and reversibility for richer scope). The linter enforces structure, not English wording — answers may be in any language." },
-            { section: "Landscape Check", required: false, validationRule: "When mode is EXPAND/SELECTIVE, include at least one external reference insight and its impact on scope." },
-            { section: "Taste Calibration", required: false, validationRule: "Must reference 2-3 strong in-repo modules/files that define the quality bar or explicitly justify omission." },
+            { section: "Scope Contract", required: true, validationRule: "Canonical contract: selected mode, in scope, out of scope, requirements, locked decisions, discretion areas, deferred ideas, accepted/rejected reference ideas, success definition, and design handoff." },
+            { section: "Landscape Check", required: false, validationRule: "Optional evidence heading for EXPAND/SELECTIVE/deep modes: include reference insight and impact on scope, or omit for compact HOLD SCOPE." },
+            { section: "Taste Calibration", required: false, validationRule: "Optional evidence heading: reference 2-3 strong in-repo modules/files that define the quality bar or justify omission." },
+            { section: "Reference Pull", required: false, validationRule: "Optional evidence heading: cite ideas pulled from `/Users/zuevrs/Downloads/references` or state no reference pull was needed for compact HOLD SCOPE." },
+            { section: "Ambitious Alternatives", required: false, validationRule: "Optional evidence heading for SCOPE EXPANSION/SELECTIVE: list larger alternatives considered and their disposition." },
+            { section: "Ruthless Minimum Slice", required: false, validationRule: "Optional evidence heading for SCOPE REDUCTION or high-risk scope: define the smallest useful wedge and what it proves." },
             { section: "Requirements", required: false, validationRule: "Table of stable requirement IDs (R1, R2, R3…) one per row with observable outcome, priority, and source. IDs are assigned once and never renumbered across scope/design/spec/plan/review; dropped requirements stay with Priority `DROPPED`." },
             { section: "Locked Decisions (LD#hash)", required: false, validationRule: "List of stable locked decisions with unique `LD#<sha8>` anchors. Each anchor is derived from the normalized Decision cell and is referenced downstream for cross-stage traceability." },
             { section: "Implementation Alternatives", required: false, validationRule: "2-3 options with Name, Summary, Effort, Risk, Pros, Cons, and Reuses. Must include minimal viable and ideal architecture options." },
@@ -154,7 +159,7 @@ export const SCOPE = {
             { section: "Outside Voice Findings", required: false, validationRule: "Must list external/adversarial findings and disposition (accept/reject/defer) with rationale." },
             { section: "Scope Outside Voice Loop", required: false, validationRule: `Must record iterations, quality score per iteration, stop reason, and unresolved concerns. Enforce ${reviewLoopPolicySummary("scope")}` },
             { section: "Completion Dashboard", required: true, validationRule: "Lists per-review-section status, count of critical/open gaps, resolved decisions, and unresolved decisions (or 'None')." },
-            { section: "Scope Summary", required: true, validationRule: "Compact recap of the locked scope. Must name the selected mode using one of the canonical tokens (`SCOPE EXPANSION`, `SELECTIVE EXPANSION`, `HOLD SCOPE`, `SCOPE REDUCTION`) and record the track-aware next-stage handoff (`design` for standard, `spec` for medium); the linter checks structure, not English wording." },
+            { section: "Scope Summary", required: true, validationRule: "Compact recap of the locked scope. Must name the selected mode using one canonical token, confidence, explicit drift from brainstorm, unresolved questions, and the track-aware next-stage handoff (`design` for standard, `spec` for medium); the linter checks structure, not English wording." },
             { section: "Dream State Mapping", required: false, validationRule: "Deep/optional only: CURRENT STATE, THIS PLAN, 12-MONTH IDEAL, and alignment verdict. Omit for compact scope." },
             { section: "Temporal Interrogation", required: false, validationRule: "Deep/optional only: timeline simulation table with decision pressures and lock-now vs defer verdicts. Omit for compact scope." }
         ]

package/dist/content/subagents.js CHANGED Viewed

@@ -7,6 +7,8 @@ import { conversationLanguagePolicyMarkdown } from "./language-policy.js";
  */
 const SUBAGENT_AGENT_NAMES = [
     "planner",
+    "product-manager",
+    "critic",
     "reviewer",
     "security-reviewer",
     "test-author",
@@ -130,9 +132,9 @@ Concrete per-stage rules so the controller does not have to guess which tier fit
 | Stage | Deep slot | Balanced slot(s) | Fast fan-out | Trigger to escalate |
 |---|---|---|---|---|
-| brainstorm | planner (only if ambiguity spans >1 module) | — | run in-thread research playbooks | promote to \`balanced\` reviewer once direction locks |
-| scope | planner (always) | — | run \`research/git-history.md\` in-thread when churn is high | promote to \`balanced\` planner if scope touches external contracts |
-| design | planner (always) | security-reviewer (if trust boundary touched) | run \`research/framework-docs-lookup.md\` + \`research/best-practices-lookup.md\` in-thread | escalate one specialist to \`deep\` only if a failure mode is Critical-severity |
+| brainstorm | planner (only if ambiguity spans >1 module) | product-manager / critic when product value or premise is uncertain | run in-thread research playbooks | promote to \`balanced\` critic if the do-nothing path may beat the idea |
+| scope | planner (always) | product-manager / critic when mode changes user value or boundaries are soft | run \`research/git-history.md\` in-thread when churn is high | promote to \`balanced\` critic if scope mode is disputed |
+| design | planner (always) | critic, security-reviewer, test-author when alternatives/trust/testability apply | run \`research/framework-docs-lookup.md\` + \`research/best-practices-lookup.md\` in-thread | escalate one specialist to \`deep\` only if a failure mode is Critical-severity |
 | spec | — | reviewer (if spec > 200 lines or multiple ACs) | — | escalate to \`deep\` only for spec ↔ design contradictions |
 | plan | planner (solo, always) | — | — | never fan out at plan stage; one owner for dependency graph |
 | tdd | — | ${formatAgentList(stageSummary("tdd").primaryAgents)} (per slice, carrying RED/GREEN/REFACTOR evidence) · reviewer (slice-local only when sliceReview triggers) | doc-updater (API surface changes) | escalate to \`deep\` only when a RED test cannot be expressed (design leak) |
@@ -601,6 +603,56 @@ Output format (mandatory):
 - Close with RISK_SUMMARY and SHIP_BLOCKERS (explicit list, possibly empty).
 \`\`\`
+`;
+}
+function productManagerEnhancedBody() {
+    return `
+## Task Tool Delegation
+Use this payload when product discovery needs an isolated lens:
+\`\`\`
+You are a product-manager subagent.
+DISCOVERY GOAL: {problem/value decision to clarify}
+CONTEXT: {existing artifact excerpts, user segment, constraints}
+DEPTH: {lite|standard|deep}
+Required output:
+- PERSONA_JTBD: persona, job, pain/trigger
+- VALUE_HYPOTHESIS: expected value and success metric
+- EVIDENCE_SIGNAL: strongest evidence, weakest assumption
+- WHY_NOW_AND_DO_NOTHING: why now plus consequence of no action
+- NON_GOALS: explicit exclusions
+- SCOPE_HANDOFF: one recommendation for hold/selective/expand/reduce
+\`\`\`
+`;
+}
+function criticEnhancedBody() {
+    return `
+## Task Tool Delegation
+Use this payload when a premise, scope mode, or engineering path needs adversarial pressure:
+\`\`\`
+You are a critic subagent.
+DECISION_UNDER_REVIEW: {direction/scope/design choice}
+CONTEXT: {artifact excerpts, constraints, known risks}
+DEPTH: {lite|standard|deep}
+Required output:
+- PREMISE_ATTACK: what could make this decision wrong
+- CHEAPER_ALTERNATIVE: smaller or more reversible option
+- SHADOW_ALTERNATIVE: viable competing path
+- SWITCH_TRIGGER: signal that should change the decision
+- FAILURE_RESCUE: likely failure and rescue/degraded behavior
+- VERIFICATION_EVIDENCE: evidence needed before locking
+\`\`\`
 `;
 }
 function reviewerEnhancedBody() {
@@ -689,6 +741,10 @@ export function enhancedAgentBody(agentName) {
     switch (agentName) {
         case "planner":
             return plannerEnhancedBody();
+        case "product-manager":
+            return productManagerEnhancedBody();
+        case "critic":
+            return criticEnhancedBody();
         case "reviewer":
             return reviewerEnhancedBody();
         case "security-reviewer":

package/dist/content/templates.js CHANGED Viewed

@@ -29,10 +29,28 @@ export const ARTIFACT_TEMPLATES = {
 ### Discovered context
 - (paths, prior artifacts, seeds, prompt fragments — referenced by downstream stages, or \`- None.\`)
-## Problem
-- **What we're solving:**
-- **Success criteria:**
-- **Constraints:**
+## Problem Decision Record
+- **Depth:** lite | standard | deep
+- **Frame type:** product | technical-maintenance
+### Product framing (use when applicable)
+- **Persona / user:**
+- **Job to be done:**
+- **Pain / trigger:**
+- **Value hypothesis:**
+- **Evidence / signal:**
+- **Success metric:**
+- **Why now:**
+- **Do-nothing consequence:**
+- **Non-goals:**
+### Technical-maintenance framing (use when product framing is not applicable)
+- **Affected operator/developer:**
+- **Current failure mode:**
+- **Expected operational improvement:**
+- **Verification signal:**
+- **Do-nothing cost:**
+- **Non-goals:**
 ## Premise Check
 - **Right problem?** (yes/no + one-line justification — take a position)
@@ -43,11 +61,10 @@ export const ARTIFACT_TEMPLATES = {
 - *How might we …?* — one line naming the user, the desired outcome, and the binding constraint.
 ## Sharpening Questions
+> Ask one decision-changing question at a time. For concrete early exits, record \`None - early exit\` with rationale.
 | # | Question | Answer / Assumption | Decision impact |
 |---|---|---|---|
 | 1 |  |  |  |
-| 2 |  |  |  |
-| 3 |  |  |  |
 ## Clarifying Questions
 | # | Question | Answer | Decision impact |
@@ -55,7 +72,7 @@ export const ARTIFACT_TEMPLATES = {
 | 1 |  |  |  |
 ## Approach Tier
-- Tier: Lightweight | Standard | Deep
+- Tier: lite | standard | deep
 - Why this tier:
 ## Short-Circuit Decision
@@ -80,7 +97,7 @@ export const ARTIFACT_TEMPLATES = {
 - **Approach:**
 - **Rationale:** Trace this to the prior Approach Reaction.
 - **Approval:** pending
-- **Next-stage handoff:** On standard track, hand this to \`scope\`; on medium track, hand this directly to \`spec\` with explicit requirements/constraints.
+- **Next-stage handoff:** On standard track, hand this to \`scope\`; on medium track, hand this directly to \`spec\`. Include upstream decisions used, drift, confidence, unresolved questions, risk hints, and non-goals.
 ## Not Doing
 - (3-5 things this brainstorm is *not* committing to — distinct from \`Deferred\`. These will not appear in scope unless the user explicitly opts in.)
@@ -165,8 +182,21 @@ ${SEED_SHELF_SECTION}
 | HOUR 4-5 (integration) |  |  |  |
 | HOUR 6+ (polish/tests) |  |  |  |
+## Scope Contract
+- **Selected mode:** HOLD SCOPE | SELECTIVE EXPANSION | SCOPE EXPANSION | SCOPE REDUCTION
+- **In scope:**
+- **Out of scope:**
+- **Requirements:**
+- **Locked decisions:**
+- **Discretion areas:**
+- **Deferred ideas:**
+- **Accepted reference ideas:**
+- **Rejected reference ideas:**
+- **Success definition:**
+- **Design handoff:**
 ## Scope Mode
-- [ ] SCOPE EXPANSION — dream bigger; user explicitly opts into the larger product slice.
+- [ ] SCOPE EXPANSION — explore ambitious alternatives; user explicitly opts into the larger product slice.
 - [ ] SELECTIVE EXPANSION — hold baseline scope and cherry-pick one high-leverage addition.
 - [ ] HOLD SCOPE — preserve the approved brainstorm direction with maximum rigor.
 - [ ] SCOPE REDUCTION — strip to the smallest useful wedge when risk/blast radius is too high.
@@ -174,9 +204,24 @@ ${SEED_SHELF_SECTION}
 ## Mode-Specific Analysis
 | Selected mode | Rationale | Depth |
 |---|---|---|
-|  |  | default / deep |
+|  |  | lite / standard / deep |
+> Default path: one selected-mode row plus rationale. Deep/high-risk scope may expand with optional evidence headings below.
+## Landscape Check
+- Optional for EXPAND/SELECTIVE/deep; omit for compact HOLD SCOPE.
+## Taste Calibration
+- Optional quality-bar references from in-repo modules/files.
-> Default path: one selected-mode row plus rationale. Deep/high-risk scope may expand below with mode-specific analysis.
+## Reference Pull
+- Optional evidence from \`/Users/zuevrs/Downloads/references\`; list accepted/rejected ideas or \`Not needed - compact scope\`.
+## Ambitious Alternatives
+- Optional for SCOPE EXPANSION/SELECTIVE; list larger alternatives and disposition.
+## Ruthless Minimum Slice
+- Optional for SCOPE REDUCTION/high-risk scope; define the smallest useful wedge.
 ## Requirements (stable IDs)
 | ID | Requirement (observable outcome) | Priority | Source (origin doc / prompt line) |
@@ -241,6 +286,9 @@ ${SEED_SHELF_SECTION}
 ## Scope Summary
 - Selected mode: (one of \`SCOPE EXPANSION\` | \`SELECTIVE EXPANSION\` | \`HOLD SCOPE\` | \`SCOPE REDUCTION\`)
+- Confidence: high | medium | low
+- Drift from brainstorm: None / <specific drift>
+- Unresolved questions: None / <questions>
 - Strongest challenges resolved:
 - Recommended path:
 - Accepted scope:
@@ -291,7 +339,7 @@ ${SEED_SHELF_SECTION}
 ## Compact-First Scaffold
 - Default to the compact design spine unless risk requires Standard/Deep add-ons.
-- Compact required spine: Codebase Investigation, Architecture Boundaries, Architecture Diagram, Data Flow, Failure Mode Table, Test Strategy, and Completion Dashboard.
+- Compact required spine: Upstream Handoff, Codebase Investigation, Engineering Lock, Architecture Boundaries, Architecture Diagram, Data Flow, Failure Mode Table, Test Strategy, Spec Handoff, and Completion Dashboard.
 - Mark optional Standard/Deep sections as \`Omitted - compact design\` when they do not apply; do not expand the scaffold just to fill empty tables.
 ## Upstream Handoff
@@ -302,9 +350,14 @@ ${SEED_SHELF_SECTION}
 - Drift from upstream (or \`None\`):
 ## Codebase Investigation
-| File | Current responsibility | Patterns discovered |
-|---|---|---|
-|  |  |  |
+| File | Current responsibility | Patterns discovered | Existing fit / reuse candidate |
+|---|---|---|---|
+|  |  |  |  |
+## Engineering Lock
+| Decision area | Chosen path | Shadow alternative | Switch trigger | Failure/rescue/degraded behavior | Verification evidence | Confidence |
+|---|---|---|---|---|---|---|
+|  |  |  |  |  |  |  |
 ## Search Before Building
 | Layer | Label | What to reuse first |
@@ -336,9 +389,9 @@ ${MARKDOWN_CODE_FENCE}
 ## Data-Flow Shadow Paths
 - Standard/Deep add-on; omit when compact design does not need a shadow path.
 <!-- diagram: data-flow-shadow-paths -->
-| Path | Trigger | Fallback/Degrade behavior |
-|---|---|---|
-|  |  |  |
+| Chosen path | Shadow alternative | Switch trigger | Failure/rescue/degraded behavior | Verification evidence |
+|---|---|---|---|---|
+|  |  |  |  |  |
 ## Error Flow Diagram
 - Standard/Deep add-on; omit when the Failure Mode Table is sufficient.
@@ -387,6 +440,8 @@ ${MARKDOWN_CODE_FENCE}
 |  |  |  |  |
 ## Data Flow
+- Data/state flow:
+- Critical path:
 - Happy path:
 - Nil/empty input path:
 - Upstream error path:
@@ -431,6 +486,23 @@ ${MARKDOWN_CODE_FENCE}
 |---|---|---|
 |  |  |  |
+## Rejected Alternatives
+| Alternative | Why rejected | Revival signal |
+|---|---|---|
+|  |  |  |
+## Design Decisions
+| Decision Ref | Requirement / LD refs | Decision | Spec impact |
+|---|---|---|---|
+| DD-1 |  |  |  |
+## Spec Handoff
+- Requirements to carry forward:
+- Design decisions to encode:
+- Risks and rescue paths:
+- Test/performance expectations:
+- Unresolved questions (or \`None\`):
 ## Outside Voice Findings
 | ID | Dimension | Finding | Disposition | Rationale |
 |---|---|---|---|---|
@@ -735,16 +807,40 @@ Execution rule: complete and verify each batch before starting the next batch.
 - Open questions:
 - Drift from upstream (or \`None\`):
+## Review Evidence Scope
+- Base/head:
+- Files inspected:
+- Changed-file coverage summary:
+- Diagnostics run:
+- Omitted files with explicit reason:
+- Reviewer delegation evidence:
+- Security-reviewer delegation evidence:
+## Changed-File Coverage
+| File | Coverage status | Evidence / no-impact reason |
+|---|---|---|
+|  | inspected / broader-module / omitted-no-impact |  |
 ## Layer 1 Verdict
 | Criterion | Verdict | Evidence |
 |---|---|---|
 | AC-1 | PASS/FAIL |  |
 ## Layer 2 Findings
-| ID | Severity | Category | Description | Status |
-|---|---|---|---|---|
-| R-1 | Critical/Important/Suggestion | correctness/security/performance/architecture/external-safety |  | open/resolved |
-- NO_CHANGE_ATTESTATION: <required when Category=security has no entries; explain why no security-relevant changes were detected>
+| ID | Severity | Category | File:line / no-line reason | Description | Status |
+|---|---|---|---|---|---|
+| R-1 | Critical/Important/Suggestion | correctness/security/performance/architecture/external-safety | path:line |  | open/resolved |
+- NO_FINDINGS_ATTESTATION: <required when no findings are reported; cite inspected coverage>
+## Security Sweep Attestation
+- Result: findings | NO_CHANGE_ATTESTATION | NO_SECURITY_IMPACT
+- Inspected surfaces:
+- Rationale:
+## Dependency & Version Audit
+- Relevant: yes/no
+- Manifests/lockfiles/generated clients/CI/runtime config/external APIs inspected:
+- Result / no-impact rationale:
 ## Incoming Feedback Queue
 | ID | Source | Severity | File:line | Request | Status | Evidence |