npm - cclaw-cli - Versions diffs - 0.55.2 → 2.0.0 - Mend

cclaw-cli 0.55.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/README.md +3 -3
package/dist/artifact-linter/brainstorm.js +59 -1
package/dist/artifact-linter/design.js +46 -1
package/dist/artifact-linter/plan.js +22 -1
package/dist/artifact-linter/review.js +35 -1
package/dist/artifact-linter/scope.js +33 -9
package/dist/artifact-linter/shared.d.ts +12 -10
package/dist/artifact-linter/shared.js +102 -41
package/dist/artifact-linter/ship.js +36 -0
package/dist/artifact-linter/spec.js +23 -1
package/dist/artifact-linter/tdd.js +74 -0
package/dist/artifact-linter.d.ts +1 -1
package/dist/artifact-linter.js +11 -1
package/dist/constants.d.ts +1 -1
package/dist/constants.js +1 -0
package/dist/content/closeout-guidance.d.ts +1 -1
package/dist/content/closeout-guidance.js +10 -11
package/dist/content/core-agents.d.ts +35 -36
package/dist/content/core-agents.js +189 -99
package/dist/content/diff-command.js +1 -1
package/dist/content/examples.d.ts +0 -3
package/dist/content/examples.js +197 -752
package/dist/content/hook-events.js +1 -2
package/dist/content/hook-manifest.d.ts +3 -4
package/dist/content/hook-manifest.js +22 -25
package/dist/content/hooks.js +54 -14
package/dist/content/idea.d.ts +60 -0
package/dist/content/idea.js +404 -0
package/dist/content/learnings.d.ts +2 -4
package/dist/content/learnings.js +10 -26
package/dist/content/meta-skill.js +4 -3
package/dist/content/node-hooks.js +368 -164
package/dist/content/observe.js +3 -3
package/dist/content/opencode-plugin.js +12 -32
package/dist/content/reference-patterns.js +2 -2
package/dist/content/runtime-shared-snippets.d.ts +8 -0
package/dist/content/runtime-shared-snippets.js +80 -0
package/dist/content/session-hooks.js +1 -1
package/dist/content/skills-elicitation.d.ts +1 -0
package/dist/content/skills-elicitation.js +123 -0
package/dist/content/skills.d.ts +1 -0
package/dist/content/skills.js +54 -2
package/dist/content/stage-schema.js +107 -63
package/dist/content/stages/brainstorm.js +7 -3
package/dist/content/stages/design.js +4 -0
package/dist/content/stages/review.js +8 -8
package/dist/content/stages/schema-types.d.ts +2 -2
package/dist/content/stages/scope.js +7 -3
package/dist/content/stages/ship.js +1 -1
package/dist/content/start-command.js +4 -4
package/dist/content/status-command.js +3 -3
package/dist/content/subagent-context-skills.js +156 -1
package/dist/content/subagents.d.ts +0 -5
package/dist/content/subagents.js +12 -82
package/dist/content/templates.js +108 -6
package/dist/content/utility-skills.js +26 -97
package/dist/flow-state.d.ts +12 -6
package/dist/flow-state.js +5 -6
package/dist/gate-evidence.d.ts +0 -31
package/dist/gate-evidence.js +3 -181
package/dist/harness-adapters.js +1 -1
package/dist/hook-schemas/claude-hooks.v1.json +2 -3
package/dist/hook-schemas/codex-hooks.v1.json +1 -1
package/dist/hook-schemas/cursor-hooks.v1.json +1 -1
package/dist/install.js +50 -7
package/dist/internal/advance-stage/advance.js +22 -2
package/dist/internal/advance-stage/parsers.d.ts +1 -0
package/dist/internal/advance-stage/parsers.js +6 -0
package/dist/internal/advance-stage/review-loop.js +1 -10
package/dist/knowledge-store.d.ts +2 -20
package/dist/knowledge-store.js +43 -57
package/dist/policy.js +3 -3
package/dist/retro-gate.js +8 -90
package/dist/run-archive.js +1 -4
package/dist/run-persistence.d.ts +1 -1
package/dist/run-persistence.js +43 -111
package/dist/runtime/run-hook.entry.d.ts +3 -0
package/dist/runtime/run-hook.entry.js +5 -0
package/dist/runtime/run-hook.mjs +9647 -0
package/dist/track-heuristics.d.ts +7 -1
package/dist/track-heuristics.js +12 -0
package/package.json +4 -2
package/dist/content/hook-inline-snippets.d.ts +0 -96
package/dist/content/hook-inline-snippets.js +0 -515
package/dist/content/idea-command.d.ts +0 -8
package/dist/content/idea-command.js +0 -322
package/dist/content/idea-frames.d.ts +0 -31
package/dist/content/idea-frames.js +0 -140
package/dist/content/idea-ranking.d.ts +0 -25
package/dist/content/idea-ranking.js +0 -65
package/dist/trace-matrix.d.ts +0 -27
package/dist/trace-matrix.js +0 -226

package/dist/content/examples.js CHANGED Viewed

@@ -1,603 +1,273 @@
 const STAGE_EXAMPLES = {
     brainstorm: `## Context
-- **Project state:** Monorepo with CI pipeline using custom release scripts. Release checks are scattered across shell scripts with no shared validation logic.
-- **Relevant existing code/patterns:** \`scripts/pre-publish.sh\` does metadata checks. \`src/release/\` has partial validation helpers.
+- Project state: release checks exist but CI/local behavior drifts.
+- Existing anchors: \`scripts/pre-publish.sh\`, \`src/release/\`, incident notes.
 ## Problem Decision Record
-- **Depth:** standard
-- **Frame type:** \`technical-maintenance\` (free-form label; pick whatever fits — see commentary in the template for example labels)
-### Framing fields (universal — keep field names; fill in whatever is meaningful for this work)
-- **Affected user / role / operator:** release operator and package maintainer.
-- **Current state / failure mode / opportunity:** release checks are fragile and inconsistent between CI and local runs; invalid metadata sometimes reaches npm publish.
-- **Desired outcome (observable):** invalid release preconditions are caught before publish; \`pnpm release:check\` exits non-zero with explicit operator feedback in CI and local workflows.
-- **Evidence / signal supporting this framing:** prior incident postmortems referencing release-metadata drift; \`scripts/pre-publish.sh\` already partially encodes the rules.
-- **Why now (urgency / cost of waiting):** every additional release reinforces the divergent CI/local behavior and burns operator trust.
-- **Do-nothing consequence:** continued publish risk and duplicated local/CI fixes.
-- **Non-goals:** no new runtime dependencies; no release-framework rewrite.
+- Depth: standard
+- Frame type: \`technical-maintenance\`
+- Affected user / role / operator: release operator
+- Current state / failure mode / opportunity: inconsistent validation paths
+- Desired outcome (observable): one deterministic preflight in CI and local flows
+- Evidence / signal: repeated metadata drift incidents
+- Why now: recurring operational cost on every release
+- Do-nothing consequence: continued publish risk
+- Non-goals: no release framework rewrite
 ## Clarifying Questions
 | # | Question | Answer | Decision impact |
 | --- | --- | --- | --- |
-| 1 | If release metadata is invalid, should we block publishing hard or only warn? | Block hard. | Validation becomes a mandatory gate — no warning-only fallback. |
-| 2 | Should the validation logic live in a reusable module or stay as shell scripts? | Reusable module. | Architecture: shared TypeScript module imported by CI and local tooling, not duplicated shell scripts. |
-| 3 | For v1, prioritize rapid delivery or maximum configurability? | Rapid delivery. | Minimal deterministic validation surface; defer plugin/config system to v2. |
+| 1 | Block invalid releases or only warn? | Block. | Validation is a hard gate. |
+| 2 | Shared module or script-only patch? | Shared module. | Reuse in CI/local. |
 ## Approach Tier
-- **Tier:** Standard
-- **Why this tier:** Change spans CI + local release workflow and shared module boundaries, but remains bounded to one subsystem.
-## Short-Circuit Decision
-- **Status:** bypassed
-- **Why:** Core requirements were not concrete enough initially; we still needed options + trade-off conversation.
-- **Scope handoff:** Continue full brainstorm flow before scope.
+- Tier: standard
+- Why this tier: cross-cutting release path change, bounded subsystem
 ## Approaches
 | Approach | Role | Upside | Architecture | Trade-offs | Recommendation |
 | --- | --- | --- | --- | --- | --- |
-| A: Reusable validation module | baseline | high | Shared TS module with typed validators, imported by CI scripts and local CLI. Existing \`pre-publish.sh\` calls the module. | Medium upfront effort, high reuse. Requires test coverage for the module. | **Recommended** — best balance of reuse and delivery speed. |
-| B: Hardened shell scripts | baseline | modest | Keep existing script approach, add stricter checks and error messages. | Lowest effort. Weak reuse, CI/local divergence risk grows over time. | Viable fallback if TS module is blocked. |
-| C: Full release framework | challenger | higher | New release orchestrator with plugin system, config files, rollback commands. | Maximum flexibility. High risk, delivery delay, over-engineered for current needs. | Not recommended for v1. |
+| Shared validator module | baseline | high | Typed checks reused by CI/local | Medium effort | **Recommended** |
+| Script hardening only | challenger | high | Keep shell checks | Fast but drift risk remains | Fallback |
 ## Approach Reaction
-- **Closest option:** A (reusable validation module).
-- **Concerns:** User wanted to avoid framework-level overbuild and keep v1 delivery speed high.
-- **What changed after reaction:** Recommendation stayed on A, but added explicit fallback path via existing shell entrypoint to reduce migration risk.
-## Selected Direction
-- **Approach:** A — Reusable validation module
-- **Rationale:** based on user reaction favoring fast delivery and lower complexity, shared TS module gives consistent behavior in CI/local, avoids script duplication, and stays within the no-new-dependency constraint.
-- **Approval:** approved
-- **Next-stage handoff:** \`scope\` — carry the locked stack constraints and the validator module boundary forward.
-## Design
-- **Architecture:** single \`release-validator\` module in \`src/release/\` exporting typed check functions. CI script and local CLI both import and run the same checks.
-- **Key components:** \`validateMetadata()\`, \`validateChangelog()\`, \`validateVersion()\` — each returns a typed result with error details. A \`runAll()\` orchestrator runs checks and exits non-zero on any failure.
-- **Data flow:** package.json + CHANGELOG.md → validator module → structured result → CI/CLI renders human-readable report.
+- Closest option: shared validator module
+- Concerns: keep v1 delivery tight; avoid framework creep
+- What changed after reaction: kept module path and added incremental rollout guardrails
-## Assumptions and Open Questions
+## Challenger Alternative Enforcement
-- **Assumptions:** CI remains the primary execution path; existing release metadata files remain the source of truth; v1 prioritizes determinism over customization.
-- **Open questions:** What exact rollback sequence for failed publish? Should status output include machine-readable JSON alongside markdown?
+- Challenger alternative: script hardening only.
+- Disposition: rejected for this cycle.
+- Enforcement note: preserve the challenger as a bounded fallback, but do not mix both paths in v1 implementation.
-## Notes for the next stage
+## Selected Direction
-Carry the no-new-dependency constraint and hard-block behavior directly into scope in/out boundaries.`,
+- Selected approach: shared validator module
+- Approval: approved
+- Rationale: best balance of consistency and delivery speed
+- Scope handoff: carry hard-block policy + module boundary into scope
+`,
     scope: `## Scope contract
-**Mode selected:** SELECTIVE EXPANSION
-**Default heuristic used:** feature enhancement -> selective
-**Mode-specific analysis result:** hold-scope baseline accepted first; one expansion accepted (degraded-state UX), one deferred (real-time channel upgrade).
-## Prime Directives (applied)
-- Zero silent failures: every delivery failure maps to a visible degraded state.
-- Named error surfaces: stream disconnect, auth drift, and publisher timeout are explicit.
-- Four-path data flow mapped: happy, nil payload, empty payload, upstream publish error.
-- Interaction edge cases in scope: double-open panel, reconnect after sleep, stale tab state.
-- Observability in scope: stream error counter, publish-to-visible lag metric, and alert threshold.
-## Premise challenge result
-The original premise (“add notifications”) was reframed to **“ensure users know when an action requires follow-up”**, which expands the solution space beyond toast spam to include durable inbox items, empty states, and recovery paths when delivery fails.
-## Dream State Mapping
-| Stage | Statement |
-| --- | --- |
-| **CURRENT STATE** | Users miss time-sensitive follow-ups because alerts are ephemeral and not recoverable. |
-| **THIS PLAN** | Introduce durable in-app feed + live updates + explicit degraded mode fallback. |
-| **12-MONTH IDEAL** | Unified notification center with reliable multi-channel fan-out and user-level routing preferences. |
-| **Alignment verdict** | Aligned: this scope builds the durability foundation without prematurely committing to channel expansion. |
-## Mode-Specific Analysis
-**Selected mode:** SELECTIVE EXPANSION
-- **Hold-scope baseline:** SSE live updates + REST fallback is the minimum that meets the "know when action is needed" reframe. Accepted as baseline.
-- **Expansion evaluated — degraded-state UX (accepted):** Adding an explicit "live updates paused" banner and polling fallback turns a reliability gap into a visible, recoverable state. Low incremental effort (S), high user trust payoff.
-- **Expansion evaluated — real-time channel upgrade (deferred):** WebSocket channel provides lower latency but requires new infra (connection pool, auth handshake). Not justified for current load; deferred to post-v1 validation.
-## Implementation Alternatives
-| Option | Summary | Effort (S/M/L/XL) | Risk | Pros | Cons | Reuses |
-| --- | --- | --- | --- | --- | --- | --- |
-| **A (minimum viable)** | Polling-only feed with no live stream | S | Low | Fastest ship, low infra risk | Weaker UX, delayed visibility | Existing REST snapshot endpoint |
-| **B (recommended)** | SSE live updates + REST fallback snapshot | M | Med | Better timeliness, graceful degradation | Requires reconnect handling | Existing event publisher + REST path |
-| **C (ideal architecture)** | Event bus + WebSocket channel + feed projection | XL | High | Strong long-term scalability | Overbuilt for current demand | Partial reuse of publisher only |
-## Temporal Interrogation
-| Time slice | Likely decision pressure | Lock now or defer? | Reason |
-| --- | --- | --- | --- |
-| **HOUR 1 (foundations)** | Canonical event schema and dedupe key policy | **Lock now** | Prevent downstream rework in storage and UI merge behavior |
-| **HOUR 2-3 (core logic)** | Retry/backoff semantics for stream loss | **Lock now** | Impacts both backend signaling and client state machine |
-| **HOUR 4-5 (integration)** | Handling gaps between snapshot and stream cursor | **Lock now** | Prevent silent data loss during reconnect windows |
-| **HOUR 6+ (polish/tests)** | Banner copy tone and polling cadence tuning | **Defer** | Safe to iterate after baseline reliability is proven |
+Mode selected: SELECTIVE EXPANSION
 ## In scope / out of scope / deferred
 | Category | Items |
 | --- | --- |
-| **In scope** | In-app notification feed; SSE delivery path; read/unread state; basic retry on transient failures |
-| **Out of scope** | Email/SMS/push providers; marketing campaigns; per-user notification preferences beyond on/off |
-| **Deferred** | WebSocket channel; rich media attachments in notifications; full-text search across historical events |
-## Discretion Areas
-- Client-side badge rendering strategy (optimistic vs server-confirmed) is implementation discretion.
-- Polling fallback backoff curve is implementation discretion if degraded-state UX remains explicit.
+| In scope | durable in-app feed, SSE path, degraded-state UX |
+| Out of scope | email/SMS/push channels, marketing flows |
+| Deferred | WebSocket migration, rich-media payloads |
-## Error & Rescue Registry (sample entry)
+## Reference Pattern Registry
-| Capability | Failure mode | Detection | Fallback |
-| --- | --- | --- | --- |
-| Event delivery | SSE connection drops mid-session | Client \`EventSource\` error event + heartbeat timeout | Fall back to REST polling every 30s until SSE reconnect succeeds; show subtle “live updates paused” banner |
-## Completion Dashboard
-- Checklist findings: 9/9 complete (complex path)
-- Resolved decisions count: 7
-- Unresolved decisions: None
-## Scope Summary
+| Pattern | Disposition | Rationale |
+| --- | --- | --- |
+| Snapshot + stream handoff | accept | Proven consistency model |
+| Queue-backed fan-out rewrite | defer | High cost for current demand |
-- Selected mode: SELECTIVE EXPANSION (cherry-pick durable feed on hold-scope baseline).
-- Accepted scope: durable feed + SSE + explicit degraded UX.
-- Deferred: WebSocket channel and rich-media/search enhancements.
-- Explicitly excluded: outbound channels and marketing workflows for v1.
-- Next-stage handoff: design — carry the durable-feed contract, SSE failover paths, and degraded-UX expectations into architecture lock-in.`,
-    design: `## Codebase Investigation (blast-radius files)
+## Requirements
-| File | Current responsibility | Patterns discovered |
+| R# | Requirement | Why |
 | --- | --- | --- |
-| \`src/api/routes/user.ts\` | User CRUD endpoints | Express router, Zod validation, throws \`AppError\` |
-| \`src/services/event-bus.ts\` | In-process pub/sub | EventEmitter wrapper, typed channels, no persistence |
-| \`src/middleware/auth.ts\` | JWT verification | Extracts user from token, attaches to \`req.context\` |
-| \`tests/integration/user.test.ts\` | User route tests | Supertest, factory helpers, \`beforeEach\` DB reset |
+| R-1 | Feed is queryable for recent window | Baseline usability |
+| R-2 | Live updates are timely and recoverable | Reliability |
+| R-3 | Degraded state is explicit to users | No silent failure |
-Discovery: existing EventEmitter-based bus has no durability — notifications must add persistence layer on top, not replace the bus.
+## Boundary Stress-Tests
-## Search Before Building (sample result)
+- Stream disconnect while user is active -> banner + fallback path required.
+- Snapshot/stream cursor mismatch -> deterministic recovery required.
+`,
+    design: `## Blast Radius
-| Layer | Label | What to reuse first |
+| File | Change type | Reason |
 | --- | --- | --- |
-| Layer 1 | stdlib | Built-in timers, structured logging patterns, standard error types |
-| Layer 2 | existing codebase | Existing auth middleware, existing API client wrapper, existing feature flags helper |
-| Layer 3 | npm | A small, well-maintained SSE helper (only if Layer 1–2 cannot cover framing/reconnect ergonomics) |
+| \`src/services/notifications.ts\` | modify | persistence-aware publish path |
+| \`src/api/routes/notifications.ts\` | modify | snapshot + stream endpoints |
+| \`src/ui/feed.tsx\` | modify | degraded banner + reconnect states |
+| \`tests/integration/notifications.test.ts\` | add/update | consistency + auth coverage |
-## Architecture Diagram (mandatory)
+## Architecture Diagram
+\`\`\`mermaid
+flowchart LR
+  API --> Service --> Outbox --> Projector --> Feed
+  Service --> Stream
 \`\`\`
-┌─────────────┐      ┌──────────────┐      ┌────────────────┐
-│ API Gateway │─────▶│ Notification │─────▶│ Event Publisher│
-└─────────────┘      │ Service      │      └────────┬───────┘
-                     └──────┬───────┘               │
-                            │                       ▼
-                     ┌──────▼───────┐      ┌────────────────┐
-                     │ Read Model   │◀─────│ Outbox / Queue │
-                     │ (Feed Store) │      └────────────────┘
-                     └──────────────┘
-\`\`\`
-Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Queue (persist) → Read Model (project).
-## What Already Exists
-| Sub-problem | Existing code/library | Layer | Reuse decision |
-| --- | --- | --- | --- |
-| Auth context extraction | \`src/middleware/auth.ts\` | Layer 1 | Reuse as-is |
-| Event fan-out | \`src/services/event-bus.ts\` | Layer 2 | Wrap with persistence adapter |
-| SSE framing | None | Layer 3 | Evaluate \`better-sse\` npm package |
-| Notification schema | None | — | New: define in \`src/schemas/notification.ts\` |
+## Failure Modes
-## Failure Mode Table
-| Failure | Trigger | Detection | Mitigation | User impact |
-| --- | --- | --- | --- | --- |
-| SSE connection drop | Network interruption | Client heartbeat timeout (30s) | Auto-reconnect with exponential backoff + snapshot fallback | Brief delay (≤10s), no data loss |
-| Duplicate publish | Retry after timeout | Dedupe key check in outbox | Upsert with idempotency key | None (transparent) |
-| Queue backpressure | Spike >1000 events/s | Queue depth metric alarm | Back-pressure signal to publisher, shed non-critical events | Delayed delivery of low-priority notifications |
-## Test Strategy
-- **Unit:** validator functions, dedupe-key logic, event schema factories — target 90%+ line coverage.
-- **Integration:** publisher → outbox → read-model pipeline via in-memory DB; SSE reconnect with simulated drops.
-- **E2E:** one happy-path browser test (publish → feed visible) and one degraded-path test (SSE down → REST fallback + banner).
-## Performance Budget
-| Critical path | Metric | Target | Measurement method |
-| --- | --- | --- | --- |
-| Publish → visible in feed | p95 latency | ≤ 5 s | Integration test with deterministic clock + production Datadog SLO |
-| Feed snapshot load | p99 response time | ≤ 200 ms | Load test with 1 000 items per user |
-| SSE reconnect | Time to first event after drop | ≤ 3 s | Simulated disconnect in integration suite |
-## NOT in scope
-- Outbound channels (email, push, SMS) — deferred to v2.
-- Admin notification management UI — separate workstream.
-- Notification preferences / mute rules — requires user settings redesign.
-## Parallelization Strategy
-| Module | Depends on | Parallel lane | Conflict risk |
-| --- | --- | --- | --- |
-| Notification schema (T1) | — | Lane A | None |
-| Publisher + outbox (T2) | T1 | Lane A | None |
-| Client feed + SSE (T3) | T1, T2 | Lane B (after T1) | Shared event type definitions |
-## Unresolved Decisions
-| Decision | Status | Options | Missing info | Default if unanswered |
-| --- | --- | --- | --- | --- |
-| Feed storage model | OPEN | (A) append-only event log, (B) mutable rows, (C) hybrid | Load testing results on read patterns | (A) append-only — safest for audit trail |
-## Interface sketch (non-binding)
-- **Client → server:** \`GET /api/me/notifications/snapshot?limit=50\` plus optional cursor parameters (if adopted).
-- **Server → client:** \`GET /api/me/notifications/stream\` as SSE with periodic heartbeats.
-## Completion Dashboard
-| Review Section | Status | Issues |
+| Failure | Detection | Mitigation |
 | --- | --- | --- |
-| Architecture Review | issues-found-resolved | Decided on outbox pattern over direct pub/sub |
-| Code Quality Review | clear | — |
-| Test Review | issues-found-resolved | Added integration test gap for SSE reconnect |
-| Performance Review | clear | — |
-| Distribution & Delivery Review | clear | — |
-**Decisions made:** 4 | **Unresolved:** 1 (feed storage model)
+| Stream drops | heartbeat timeout | fallback polling + reconnect |
+| Cursor gap | consistency check | replay snapshot delta |
+| Auth mismatch | auth guard log | terminate stream + refresh |
-## Quality bar for this stage
+## Test Strategy
-Design output should be **reviewable by someone who did not attend brainstorming**: they can trace from constraints → components → open decisions without reading code.`,
+- Unit: merge logic, retry budget, projection idempotency.
+- Integration: snapshot+stream consistency and auth boundaries.
+- E2E: degraded banner and recovery UX.
+`,
     spec: `## Acceptance Criteria
-| ID | Criterion (observable/measurable/falsifiable) | Design Decision Ref |
-| --- | --- | --- |
-| AC-1 | Given a signed-in user with an active session, when the server publishes a new notification event for that user, the client feed shows the new item within 5 seconds without a full page reload. | Architecture: SSE delivery path |
-| AC-2 | Given the same logical notification is published twice with the same dedupe key, when the client processes the stream, the feed contains exactly one visible item for that key. | Architecture: dedupe-key in event schema |
-| AC-3 | Given the live connection is unavailable, when the user opens the notifications panel, the UI shows a non-blocking "live updates paused" banner and loads the latest snapshot via REST within 2 seconds. | Architecture: REST fallback + degraded UX |
-## Edge Cases
-| Criterion ID | Boundary case | Error case |
-| --- | --- | --- |
-| AC-1 | Notification published during client reconnect window (boundary: \u2264 5 s delivery still holds after reconnect). | Server publish fails mid-write — client never receives event; REST snapshot fills gap. |
-| AC-2 | Two events with identical dedupe key arrive within same SSE frame (boundary: only one row rendered). | Dedupe-key field missing — reject event at publisher and log error. |
-| AC-3 | SSE disconnects after exactly 30 s heartbeat timeout (boundary: banner appears within 1 s of timeout). | REST snapshot endpoint returns 500 — panel shows "unable to load" with retry button. |
-## Constraints and Assumptions
-- **Constraints:** Max feed size 1 000 items per user. SSE heartbeat interval 30 s (server-side). REST snapshot p99 \u2264 200 ms. No new runtime dependencies.
-- **Assumptions:** Users have a single active session at a time for v1. Existing auth middleware provides user context. Event publisher is single-writer per user.
-## Testability Map
-| Criterion ID | Verification approach | Command/manual steps |
-| --- | --- | --- |
-| AC-1 | Integration test: publish event \u2192 assert feed contains item within 5 s (deterministic clock). | \`pnpm vitest run tests/integration/notification-delivery.test.ts\` |
-| AC-2 | Unit test: publish same dedupe key twice \u2192 assert single row in feed store. | \`pnpm vitest run tests/unit/dedupe-feed.test.ts\` |
-| AC-3 | E2E test: kill SSE transport \u2192 assert banner visible + REST snapshot loads. | \`pnpm playwright test tests/e2e/degraded-mode.spec.ts\` |
+| AC ID | Criterion | Requirement ref | Verification approach |
+| --- | --- | --- | --- |
+| AC-1 | Feed returns recent window reliably | R-1 | integration test |
+| AC-2 | Live updates visible within agreed latency | R-2 | perf + integration |
+| AC-3 | Disconnect shows degraded state promptly | R-3 | e2e scenario |
-## Approval
+## Notes
-- Approved by: user
-- Date: 2026-04-14`,
+- Criteria are observable, measurable, and falsifiable.
+- Every AC maps to at least one task and one test path in plan/tdd.
+`,
     plan: `## Dependency Graph
-\`\`\`
-T-1 ──▶ T-2 ──▶ T-3
- │               ▲
- └───────────────┘
-\`\`\`
-Parallel opportunity: T-1 is a prerequisite for both T-2 and T-3 (T-3 also needs T-2).
+- D1 schema + persistence
+- D2 API snapshot/stream
+- D3 UI degraded-state handling
+- D4 tests + observability
-## Dependency Batches
+## Tasks
-#### Batch 1 (foundation)
-- Task IDs: T-1
-- Verification gate: schema tests pass, dedupe key fixtures validated
-#### Batch 2 (core logic)
-- Task IDs: T-2
-- Depends on: Batch 1 (T-1 complete)
-- Verification gate: integration test proves publish-to-outbox path
-#### Batch 3 (integration)
-- Task IDs: T-3
-- Depends on: Batch 2 (T-2 complete)
-- Verification gate: e2e tests pass for delivery, dedupe, and degraded mode
-Execution rule: complete and verify each batch before starting the next batch.
-## Task List
-| Task ID | Description | Acceptance criterion | Verification command | Effort |
-| --- | --- | --- | --- | --- |
-| T-1 | Define notification event schema + dedupe key rules | AC-1, AC-2: schema contract + fixtures | \`\`\`pnpm vitest run tests/unit/notification-schema.test.ts\`\`\` |
-| T-2 | Implement publisher + outbox write path | AC-1: integration test (happy path publish) | \`\`\`pnpm vitest run tests/integration/publisher.test.ts\`\`\` |
-| T-3 | Implement client feed + SSE subscribe + REST fallback | AC-1, AC-2, AC-3: e2e tests including degraded mode | \`\`\`pnpm playwright test tests/e2e/notification-feed.spec.ts\`\`\` |
+| Task ID | Description | Effort | Minutes |
+| --- | --- | --- | --- |
+| T-1 | schema + migration | M | 90 |
+| T-2 | snapshot/stream API updates | M | 90 |
+| T-3 | UI degraded-state path | M | 70 |
+| T-4 | consistency + auth tests | M | 85 |
 ## Acceptance Mapping
-| Criterion ID | Task IDs |
+| AC ID | Task IDs |
 | --- | --- |
-| AC-1 (delivery within 5s) | T-2, T-3 |
-| AC-2 (idempotency) | T-1, T-2 |
-| AC-3 (failure visibility) | T-3 |
-## Risk Assessment
-| Task/Batch | Risk | Likelihood | Impact | Mitigation |
-| --- | --- | --- | --- | --- |
-| T-3 (Batch 3) | SSE reconnect logic complex | Medium | High | Spike reconnect in isolation before integrating with feed UI |
-| Batch 2 → 3 | Publisher API contract may shift | Low | Medium | Pin contract in T-1 schema; T-2 integration test validates |
+| AC-1 | T-1, T-2, T-4 |
+| AC-2 | T-2, T-4 |
+| AC-3 | T-3, T-4 |
 ## WAIT_FOR_CONFIRM
-- Status: pending
-- Confirmed by:`,
-    tdd: `## RED Evidence
-| Slice | Test name | Command | Failure output summary |
-| --- | --- | --- | --- |
-| S-1 (event schema + dedupe) | counts unique keys and unread items | \`\`\`pnpm vitest run tests/unit/dedupe-feed.test.ts\`\`\` | Cannot find module '../notificationFeed' |
-| S-2 (publisher outbox) | publishes event to outbox with dedupe key | \`\`\`pnpm vitest run tests/integration/publisher.test.ts\`\`\` | publishToOutbox is not a function |
-| S-3 (client feed + fallback) | shows notification within 5s via SSE | \`\`\`pnpm playwright test tests/e2e/notification-feed.spec.ts\`\`\` | Element [data-testid="feed-item"] not found |
-## Acceptance Mapping
-| Slice | Plan task ID | Spec criterion ID |
-| --- | --- | --- |
-| S-1 | T-1 | AC-1, AC-2 |
-| S-2 | T-2 | AC-1 |
-| S-3 | T-3 | AC-1, AC-2, AC-3 |
-## Failure Analysis
-| Slice | Expected missing behavior | Actual failure reason |
-| --- | --- | --- |
-| S-1 | notificationFeed module does not exist yet | Module import fails — correct: implementation missing |
-| S-2 | publishToOutbox function not implemented | Function not found — correct: write path missing |
-| S-3 | Feed UI not rendered, SSE not connected | DOM element missing — correct: client component not built |
+Plan is ready to execute after user confirmation.
+`,
+    tdd: `## RED
-## GREEN Evidence
-- Full suite command: \`\`\`pnpm vitest run && pnpm playwright test\`\`\`
-- Full suite result: 47 tests passed (3 new + 44 existing), 0 failed, 0 skipped
+| Slice | Failing test evidence |
+| --- | --- |
+| S-1 feed window | expected 30d window, got 7d |
+| S-2 degraded banner | banner absent after forced disconnect |
-## REFACTOR Notes
+## Acceptance Mapping
-- What changed: Extracted \`\`\`mergeLatestByDedupeKey\`\`\` helper from inline loop in \`\`\`summarizeDedupedFeed\`\`\`; moved SSE reconnect logic into \`\`\`useSSEConnection\`\`\` hook.
-- Why: Dedupe merge logic is reused by both publisher and client; reconnect logic was duplicated across components.
-- Behavior preserved: Full suite re-run confirms 47/47 pass after refactor.
+| Slice | AC IDs |
+| --- | --- |
+| S-1 | AC-1 |
+| S-2 | AC-3 |
-## Traceability
+## GREEN
-- Plan task IDs: T-1, T-2, T-3
-- Spec criterion IDs: AC-1, AC-2, AC-3`,
-    review: `## Layer 1 Verdict
+- Targeted tests pass.
+- Full suite re-run after fixes.
-| Criterion | Verdict | Evidence |
-| --- | --- | --- |
-| AC-1: Delivery within 5s without reload | PASS | \`notification-feed.e2e.ts:44-88\` asserts SSE-to-UI timing under mock clock |
-| AC-2: Dedupe — one visible item per key | PARTIAL | Unit tests cover publisher dedupe; UI merge path lacks test for race reordering (\`feedStore.test.ts\` missing case) |
-| AC-3: Degraded mode + REST snapshot | PASS | \`NotificationsPanel.tsx:112-140\` renders banner + calls snapshot endpoint |
+## REFACTOR
-## Layer 2 Findings
+- Reduced reconnect state duplication.
+- Revalidated behavior with regression tests.
+`,
+    review: `## Layer 1 — Spec Compliance
-| ID | Severity | Category | Description | Status |
+| ID | Severity | Finding | Evidence | Status |
 | --- | --- | --- | --- | --- |
-| R-1 | Critical | correctness | Snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor — users can miss items between snapshot and subscribe. | open |
-| R-2 | Important | performance | \`feedStore.merge()\` does full-array scan on every SSE event; O(n) per event where n is feed length. | open |
-| R-3 | Suggestion | architecture | SSE reconnect logic duplicated across \`useNotifications\` and \`usePresence\`; extract shared hook. | open |
+| R-1 | low | AC mapping remains intact | trace table + tests | closed |
-## Review Findings Contract
+## Layer 2 — Code Quality
-- See \`07-review-army.json\`
-- Reconciliation summary: 1 duplicate collapsed (R-1 reported by reviewer and security-reviewer), 0 conflicts
-## Review Readiness Snapshot
-- Layer 1 complete: yes (3/3 criteria)
-- Layer 2 complete: yes (5 sections reviewed)
-- Review findings schema valid: yes
-- Open critical blockers: 1 (R-1)
-- Ship recommendation: BLOCKED until R-1 resolved
+| ID | Severity | Finding | Evidence | Status |
+| --- | --- | --- | --- | --- |
+| R-2 | high | auth guard gap in stream query path | curl repro + failing test | open |
-## Severity Summary
+## Victory Detector
-- Critical: 1
-- Important: 1
-- Suggestion: 1
+Victory Detector: pass | fail
-## Final Verdict
+- Current verdict: fail (R-2 open)
+`,
+    ship: `## Release Checklist
-- BLOCKED`,
-    ship: `## Preflight Results
+- version/changelog prepared
+- test/build/preflight passed
+- review blockers resolved or explicitly accepted
-- Review verdict: APPROVED_WITH_CONCERNS (R-1 resolved, R-2 accepted as known debt)
-- Build: pass (\`pnpm build\` succeeds)
-- Tests: pass (\`pnpm vitest run && pnpm playwright test\` — 47 passed, 0 failed)
-- Lint: pass (\`pnpm lint\` clean)
-- Type-check: pass (\`pnpm typecheck\` clean)
-- Working tree clean: yes (\`git status\` shows no uncommitted changes)
+## Victory Detector
-## Release Notes
+Victory Detector: pass | fail
-- **Added:** In-app notification feed with SSE updates and REST fallback snapshotting (AC-1, AC-3).
-- **Changed:** Notification payloads now include a stable dedupe key for idempotent rendering (AC-2).
-- **Fixed:** Panel no longer drops the newest item when reconnecting after sleep/resume.
-- **Breaking changes:** None.
+- Current verdict: pass
 ## Rollback Plan
-- Trigger conditions: error rate on \`/notifications/stream\` exceeds 5% for >5 minutes, or p95 publish-to-visible lag exceeds 10s.
-- Rollback steps: \`git revert <merge-sha> && git push origin main\` then redeploy; if DB migrations shipped, run \`2026_04_12_notifications_cursor_down.sql\` before traffic.
-- Verification steps: confirm error rate returns to pre-release baseline within 10 minutes; smoke-test feed panel manually.
-## Monitoring
-- Metrics/logs to watch: error rate on \`/notifications/stream\` and snapshot endpoint for 24h; p95 publish-to-visible lag via metrics dashboard.
-- Risk note (if no monitoring): N/A — monitoring is in place.
-## Finalization
-- Selected enum: FINALIZE_OPEN_PR
-- Selected label: B
-- Execution result: PR #42 created via \`gh pr create\`; CI passed; squash-merged to main.
-- PR URL: https://github.com/example/repo/pull/42`,
+- Trigger: error-rate or latency threshold breach
+- Steps: revert + redeploy prior artifact
+- Verification: key metrics return to baseline
+`
 };
 const GOOD_BAD_EXAMPLES = {
-    brainstorm: [
-        {
-            label: "Problem / success statement",
-            good: "Problem: release checks are fragile and inconsistent between CI and local runs; invalid metadata sometimes reaches npm publish. Success: invalid release preconditions are caught before publish with explicit operator feedback, in both CI and local workflows. Constraints: no new runtime dependencies.",
-            bad: "Problem: releases are broken. Success: make them better. Constraints: be careful.",
-            lesson: "\"Make it better\" is not a success criterion — an agent cannot know when it is done. State the observable condition that proves success."
-        },
-        {
-            label: "Alternative direction (one of 2–3)",
-            good: "Option B: Pre-publish verifier script invoked from \`release.yml\` and a \`pnpm release:check\` target. Pros: one enforcement surface; fails fast locally. Cons: adds a script to maintain; must stay in sync with \`package.json\`. Rejected alternative: relying on npm lifecycle hooks only — they run too late to block publish.",
-            bad: "We could also use a script, or hooks, or something in CI. We'll pick whichever is easier later.",
-            lesson: "Alternatives are only useful if they are concrete and comparable. Name each one, call out pros/cons, and say what was rejected — otherwise \"later\" becomes \"never\" and the choice is made by accident."
-        },
-        {
-            label: "Clarifying question",
-            good: "Before I lock direction: should a failed release:check block the CI job (hard failure) or only warn and continue? The former is safer but costs a revert cycle when the check itself is wrong; the latter preserves velocity but can let bad metadata through. Recommend A (block). Pick: A) Block  B) Warn-only  C) Block in CI, warn locally.",
-            bad: "Do you want it to fail or warn? Let me know.",
-            lesson: "A good question gives the user context, a recommendation, and lettered options they can answer with one keystroke. \"Let me know\" shifts the framing cost back to the user."
-        }
-    ],
-    scope: [
-        {
-            label: "In / out / deferred boundaries",
-            good: "In scope: in-app notification feed, SSE delivery path, read/unread state, retry on transient failures. Out of scope: email/SMS/push providers, per-user preferences. Deferred: WebSocket channel, rich media, full-text search.",
-            bad: "In scope: notifications. Out of scope: stuff we are not doing. Deferred: v2.",
-            lesson: "Vague boundaries get relitigated in every subsequent stage. Enumerate concrete capabilities on each side — \"stuff we are not doing\" is not a decision."
-        },
-        {
-            label: "Scope change trace",
-            good: "Scope delta at 2026-04-15: user asked to add per-user mute preferences. Decision: moved from Out-of-scope → In-scope; acknowledged cost (≈1 day, +1 schema migration); risk: touches settings surface. Recorded in \`.cclaw/artifacts/03-design-<slug>.md#scope-trace\`. Requires re-running scope review before design lock.",
-            bad: "Added mute preferences to scope.",
-            lesson: "Scope changes silently are how projects drift. Every in↔out move needs a timestamp, a cost estimate, and a link to the next review it invalidates."
-        }
-    ],
-    design: [
-        {
-            label: "Failure mode row",
-            good: "Failure: SSE connection drop. Trigger: network interruption. Detection: client heartbeat timeout (30s). Mitigation: auto-reconnect with exponential backoff + REST snapshot fallback. User impact: ≤10s delay, no data loss.",
-            bad: "Failure: network errors. Mitigation: retry and log. User impact: users may see issues sometimes.",
-            lesson: "A failure row without a detection signal and a bounded user impact is aspirational, not a design. Name the trigger, the detector, and the recovery behavior."
-        },
-        {
-            label: "Rejected design alternative",
-            good: "Considered WebSocket instead of SSE. Rejected because: (1) our proxy layer strips upgrade headers; (2) one-way push fits the \"notification feed\" semantics; (3) SSE plays nicer with HTTP/2 fan-out. Trade-off accepted: no client→server channel; we will fall back to REST for the tiny set of acks.",
-            bad: "We chose SSE. WebSocket could also work.",
-            lesson: "A design without a rejected alternative reads like a requirement, not a decision. The rejection is the part that survives review — it tells future readers what trade-off was taken."
-        },
-        {
-            label: "Diagram caption",
-            good: "Figure 1 — Notification pipeline (sequence diagram): producer → outbox(durable) → relay → SSE stream → client. Label on relay shows \"at-least-once; dedupe by event_id\"; label on client shows \"merge by dedupe_key before render\".",
-            bad: "Figure 1: notification flow.",
-            lesson: "An unlabeled diagram is decoration. Every arrow needs a delivery guarantee, every box needs an action verb — otherwise the diagram contradicts the prose without anyone noticing."
-        }
-    ],
-    spec: [
-        {
-            label: "Observable acceptance criterion",
-            good: "AC-1: Given a signed-in user with an active session, when the server publishes a new notification event for that user, the client feed shows the new item within 5 seconds without a full page reload.",
-            bad: "AC-1: Users should see their notifications quickly and reliably, with a good user experience.",
-            lesson: "Spec criteria must be observable, measurable, and falsifiable. \"Quickly\" is a feeling; \"within 5 seconds without a full page reload\" is a test."
-        },
-        {
-            label: "Negative / error-path criterion",
-            good: "AC-4: Given the SSE connection drops mid-session, when the client detects no heartbeat for 30 seconds, the UI shows a \"Reconnecting…\" badge and automatically re-subscribes; missed events delivered since the last ACKed id are replayed exactly once.",
-            bad: "AC-4: Handle errors gracefully.",
-            lesson: "Error-path criteria are where most bugs hide. Write them with the same \"given/when/then\" rigor as happy-path — otherwise QA ends up inventing them at release time."
-        },
-        {
-            label: "Non-functional budget",
-            good: "NFR-2: p95 end-to-end publish-to-visible latency ≤5s under 1k concurrent subscribers on a 2-vCPU pod; CPU headroom ≥30% at steady state. Measurement: \`k6 run tests/load/notifications.js\`, report median + p95 + p99.",
-            bad: "NFR-2: Performance should be good.",
-            lesson: "Non-functional goals without numbers + a measurement command are aspirational. Pin the percentile, the load shape, and the script that produces the evidence."
-        }
-    ],
-    plan: [
-        {
-            label: "Single task row",
-            good: "T-2: Implement publisher + outbox write path. Acceptance: AC-1. Verification: \`pnpm vitest run tests/integration/publisher.test.ts\`. Depends on: T-1. Effort: M (≈4 min).",
-            bad: "T-2: Build the backend. Verify: manual testing. Effort: a few days.",
-            lesson: "A task without a single acceptance criterion and a reproducible verification command is a wish. If you cannot say how you will know it is done, you cannot ship it."
-        },
-        {
-            label: "Dependency graph entry",
-            good: "T-5 (consume SSE client) depends on T-3 (stream endpoint) and T-4 (auth cookie forwarding). Parallelizable with T-6 (read-state persistence). Blocks T-8 (end-to-end happy-path e2e).",
-            bad: "T-5 depends on other tasks.",
-            lesson: "The value of a dependency graph is mechanical scheduling. \"Depends on other tasks\" is a shrug — list the IDs so the execution order is unambiguous."
-        }
-    ],
-    tdd: [
-        {
-            label: "RED → GREEN → REFACTOR slice",
-            good: "RED: \`pnpm vitest run tests/unit/dedupe-feed.test.ts\` → \`publishToOutbox is not a function\`. GREEN (after minimal impl): same command, 47/47 pass, full suite. REFACTOR: extracted \`mergeLatestByDedupeKey\`; suite still 47/47.",
-            bad: "Wrote the publisher code. Tests pass now. Will add unit tests later when I have time.",
-            lesson: "Code written before a failing test is guessing validated after the fact. The RED failure IS the specification — without it, the GREEN pass proves nothing about the intended behavior."
-        },
-        {
-            label: "Bug-fix reproduction test",
-            good: "Bug B-17: dedup fails when two events arrive in the same ms. Prove-It RED: added \`tests/unit/dedupe-feed.test.ts > dedupes when timestamps collide\`; run → \`expected 1 item, received 2\`. Fix applied; same test passes; full suite still 47/47.",
-            bad: "Fixed the duplicate rendering issue.",
-            lesson: "A bug without a reproducing test is a bug that comes back. Ship the RED test as part of the fix — it is the contract that prevents regression."
-        },
-        {
-            label: "Refactor-only slice (state-based)",
-            good: "Refactor: moved heartbeat logic into \`useHeartbeat()\` hook. No behavior change intended. Evidence: no new tests; existing state-based tests \`feed-state.test.ts\` (42 assertions) still pass; coverage unchanged at 94%.",
-            bad: "Refactored the component. Added some interaction mocks to check the new hook is called.",
-            lesson: "A refactor should assert on state, not on call shape. If you had to rewrite your mocks, it was not a refactor — it was a redesign dressed as one."
-        }
-    ],
-    review: [
-        {
-            label: "Critical finding",
-            good: "R-1 Critical: snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor — users can miss items between snapshot and subscribe. Evidence: integration test \`notification-consistency.test.ts:22-58\`. Status: open.",
-            bad: "Looks good overall. A few small things could be polished, maybe refactor the merge logic. LGTM.",
-            lesson: "\"LGTM\" is not a review — it is a signature on whatever the author shipped. Every finding needs a severity, a falsifiable description, evidence, and a status."
-        },
-        {
-            label: "Security review row",
-            good: "R-4 High (sec): SSE endpoint accepts any user_id in the query string; a logged-in attacker can subscribe to another user's stream. Evidence: \`curl\` repro in \`docs/notes/sec-r4.md\`. Fix: require auth cookie, filter events by session.user.id server-side. Status: fix in T-11; verified in \`notifications-auth.test.ts\`.",
-            bad: "Might want to double-check auth on the SSE endpoint.",
-            lesson: "Security findings without a reproduction step and a tied fix-task are suggestions, not reviews. Attach the curl (or equivalent), the fix task ID, and the verification test."
-        }
-    ],
-    ship: [
-        {
+    brainstorm: [{
+            label: "Problem framing",
+            good: "Names affected role, current failure mode, measurable target, and non-goals.",
+            bad: "Need to improve this area somehow.",
+            lesson: "Concrete framing prevents scope drift in downstream stages."
+        }],
+    scope: [{
+            label: "Boundary clarity",
+            good: "Clear in-scope/out-of-scope/deferred lists with concrete capabilities.",
+            bad: "Add improvements where useful.",
+            lesson: "Scope without hard boundaries becomes hidden commitment."
+        }],
+    design: [{
+            label: "Failure handling",
+            good: "Each failure row includes trigger, detection, and mitigation.",
+            bad: "Could fail, handle later.",
+            lesson: "Actionable design risk must be testable and operationally visible."
+        }],
+    spec: [{
+            label: "AC quality",
+            good: "AC includes measurable signal and explicit verification approach.",
+            bad: "System should work reliably.",
+            lesson: "Observable/falsifiable language is required for meaningful verification."
+        }],
+    plan: [{
+            label: "Task granularity",
+            good: "Tasks have bounded outputs, effort, and AC links.",
+            bad: "Implement feature end-to-end.",
+            lesson: "Execution speed depends on concrete, reviewable task slices."
+        }],
+    tdd: [{
+            label: "RED evidence",
+            good: "Includes failing output tied to one behavior slice.",
+            bad: "Tests failed at first.",
+            lesson: "Without concrete RED evidence, behavior intent is not auditable."
+        }],
+    review: [{
+            label: "Finding quality",
+            good: "Severity + falsifiable claim + evidence + status.",
+            bad: "LGTM with a few comments.",
+            lesson: "Review findings are decisions, not vibes."
+        }],
+    ship: [{
             label: "Rollback contract",
-            good: "Rollback trigger: error rate on \`/notifications/stream\` >5% for 5 minutes, or p95 publish-to-visible lag >10s. Steps: \`git revert <merge-sha> && git push origin main\` then redeploy; run \`2026_04_12_notifications_cursor_down.sql\` before traffic. Verification: error rate returns to baseline within 10 minutes.",
-            bad: "Rollback plan: revert the commit if anything goes wrong.",
-            lesson: "\"Revert if anything goes wrong\" leaves the on-call engineer to invent the plan at 2 a.m. The rollback trigger is an operational contract: state the signal, the command, and the verification."
-        },
-        {
-            label: "Preflight check",
-            good: "Preflight: \`pnpm release:check\` ✅ (package metadata ok, changeset captured), \`pnpm test\` ✅ 195/195, \`pnpm build\` ✅, CI green on feat/notifications @ \`abc1234\`, rollback plan captured, migration reviewed. Finalization mode: Merge via squash.",
-            bad: "All good, shipping it.",
-            lesson: "A preflight is a checklist that names each gate and the command that proved it. \"All good\" is a vibe — it cannot be audited after the fact when the deploy misbehaves."
-        }
-    ]
+            good: "Named trigger, exact rollback steps, and verification condition.",
+            bad: "Revert if something goes wrong.",
+            lesson: "Rollback must be executable under incident pressure."
+        }]
 };
 export function stageGoodBadExamples(stage) {
     const samples = GOOD_BAD_EXAMPLES[stage];
@@ -606,7 +276,7 @@ export function stageGoodBadExamples(stage) {
     const blocks = [
         "## Good vs Bad (at-a-glance)",
         "",
-        "Contrasting samples to calibrate the quality bar for this stage. Read before writing the artifact — mirror the **Good** shape, avoid the **Bad** shape. Each block targets a different axis of the stage so you can spot-check more than one dimension of your draft.",
+        "Contrasting samples to calibrate quality for this stage.",
         ""
     ];
     samples.forEach((sample, index) => {
@@ -671,49 +341,38 @@ function exampleSummaryBullets(stage) {
         return ["- Full artifact structure."];
     return headings.map((heading) => `- ${heading}`);
 }
-// Kept in sync with STAGE_EXAMPLES above so the inline summary matches the
-// reference file without duplicating the heavy text. Update whenever the
-// sample in STAGE_EXAMPLES gains or loses a top-level section.
 const STAGE_EXAMPLE_SECTION_HEADINGS = {
     brainstorm: [
         "Problem Decision Record (free-form Frame type label + universal framing fields)",
-        "Reference Pattern Candidates and approaches with trade-offs",
-        "Recommended direction + open questions",
-        "Clarification log and decision record"
+        "Approaches with explicit trade-offs",
+        "Approach Reaction and Selected Direction"
     ],
     scope: [
         "In-scope / out-of-scope / deferred lists with concrete capabilities",
-        "Reference Pattern Registry with accepted/rejected/deferred dispositions",
-        "Requirements table with stable R# IDs",
-        "Boundary stress-tests and non-negotiables",
-        "Decision record for premise challenges"
+        "Reference Pattern Registry with clear dispositions",
+        "Requirements table with stable R# IDs"
     ],
     design: [
         "Blast-radius file list",
         "Mandatory architecture diagram (Mermaid)",
-        "Reference-Grade Contracts for mirrored patterns",
-        "Failure-mode table with detection + mitigation",
-        "Test strategy + performance budget",
-        "Completion dashboard + unresolved decisions"
+        "Failure-mode table with detection + mitigation"
     ],
     spec: [
         "Acceptance-criteria table (observable, measurable, falsifiable)",
         "Requirement-ref column tying each AC back to an R# from scope",
-        "Verification-approach column",
-        "Approval block"
+        "Verification-approach column"
     ],
     plan: [
-        "Dependency graph + dependency batches",
+        "Dependency graph",
         "Task list with effort + minutes estimate per task",
-        "Acceptance mapping (every AC → task IDs)",
-        "No-Placeholder scan row + WAIT_FOR_CONFIRM marker"
+        "Acceptance mapping (every AC -> task IDs)",
+        "WAIT_FOR_CONFIRM marker"
     ],
     tdd: [
-        "RED evidence per vertical slice (failing test output)",
+        "RED evidence per vertical slice",
         "Acceptance mapping per slice",
-        "GREEN evidence (full-suite pass)",
-        "REFACTOR notes with behavior-preservation confirmation",
-        "Test-pyramid shape + prove-it reproduction when applicable"
+        "GREEN evidence",
+        "REFACTOR notes with behavior-preservation confirmation"
     ],
     review: [
         "Spec-compliance findings (Layer 1)",
@@ -722,223 +381,9 @@ const STAGE_EXAMPLE_SECTION_HEADINGS = {
         "Victory Detector-backed go / no-go verdict"
     ],
     ship: [
-        "Release checklist (version, changelog, tag, artifacts)",
-        "Victory Detector: valid review, fresh preflight, rollback, finalization enum",
+        "Release checklist",
+        "Victory Detector: pass | fail",
         "Rollback plan with trigger, steps, verification",
-        "Runbook (how to verify the release post-deploy)",
-        "Sign-off block"
+        "Runbook and sign-off"
     ]
 };
-const DOMAIN_LABELS = {
-    web: "Web app (full-stack)",
-    cli: "CLI tool",
-    library: "Library / SDK",
-    "data-pipeline": "Data pipeline / ETL"
-};
-export const RESEARCH_FLEET_USAGE_EXAMPLE = [
-    "Before drafting `.cclaw/artifacts/03-design-<slug>.md`, run `research/research-fleet.md` once and",
-    "capture all four lenses in `.cclaw/artifacts/02a-research.md`.",
-    "Dispatch semantics by harness: Claude/OpenCode/Codex = native subagents;",
-    "Cursor = generic-dispatch Task mapping; role-switch only as degraded fallback.",
-    "Design must include a `Research Fleet Synthesis` section that maps each",
-    "lens to concrete architecture decisions and risks."
-].join(" ");
-const STAGE_DOMAIN_SAMPLES = {
-    brainstorm: [
-        {
-            domain: "web",
-            label: "Direction",
-            body: "Problem: admin dashboard orders table requires manual refresh to see new orders. Success: admins see new rows within 2s of server-side status change, no full navigation. Anti-success: WebSocket rewrite of the whole table stack when only one view needs live updates."
-        },
-        {
-            domain: "cli",
-            label: "Direction",
-            body: "Problem: `npx cclaw-cli archive` silently deletes 30+ day runs with no preview. Success: a `--dry-run` flag prints would-be-archived run IDs to stdout and exits 0; current behavior is unchanged without the flag. Anti-success: adding an interactive confirmation prompt that breaks CI scripts."
-        },
-        {
-            domain: "library",
-            label: "Direction",
-            body: "Problem: consumers cannot validate hook JSON without importing internal modules. Success: `validateHookDocument(obj)` exported from the package root with typed result `{ ok, errors? }`. Anti-success: exposing the full Zod schema and forcing consumers to depend on Zod."
-        },
-        {
-            domain: "data-pipeline",
-            label: "Direction",
-            body: "Problem: reruns of the orders job create duplicate `fact_orders` rows. Success: running the job twice on the same input leaves row count unchanged and `dbt test --select fact_orders` green. Anti-success: introducing a nightly dedup job that hides the underlying non-idempotency."
-        }
-    ],
-    scope: [
-        {
-            domain: "web",
-            label: "Scope line",
-            body: "In: live-update `/dashboard/orders` table via SSE; out: notification drawer, mobile PWA, dashboards other than `orders`. Discretion: choice of SSE vs long-polling for legacy Safari. NOT in scope: rewriting the auth layer or the existing REST endpoints."
-        },
-        {
-            domain: "cli",
-            label: "Scope line",
-            body: "In: add `--dry-run` to `npx cclaw-cli archive`; out: redesigning archive formats, adding retention flags, or changing the default. Discretion: exact wording of stdout lines. NOT in scope: touching `init` / `sync` / `sync` subcommands."
-        },
-        {
-            domain: "library",
-            label: "Scope line",
-            body: "In: expose `validateHookDocument` + types from package root; out: rewriting hook schema, adding new hook kinds, dropping old ones. Discretion: whether to re-export `HookDocument` as type-only. NOT in scope: migrating consumers."
-        },
-        {
-            domain: "data-pipeline",
-            label: "Scope line",
-            body: "In: dedup step between `raw.orders` and `fact_orders` keyed on `(order_id, event_ts)`; out: redesigning ingestion, adding new partitions, or touching downstream marts. Discretion: `row_number()` vs `qualify`-style dedup. NOT in scope: backfilling historical partitions."
-        }
-    ],
-    design: [
-        {
-            domain: "web",
-            label: "Parallel research fleet handoff",
-            body: RESEARCH_FLEET_USAGE_EXAMPLE
-        },
-        {
-            domain: "web",
-            label: "Architecture note",
-            body: "Data flow: server-side order update → publish to `orders-updates` channel → SSE endpoint `/api/orders/stream` → `useOrderFeed` hook merges into React state → row rerenders. Failure mode: SSE connection drop → exponential-backoff reconnect + on-reconnect REST snapshot fallback. Trade-off accepted: no client→server channel (SSE one-way); existing REST mutations cover it."
-        },
-        {
-            domain: "cli",
-            label: "Architecture note",
-            body: "Flag is parsed by the existing Zod CLI parser; `--dry-run` short-circuits before any filesystem mutation, shares formatter `src/cli/format.ts` with `status`. Failure mode: formatter output differs between `status` and `archive --dry-run` → centralize format. Trade-off: we print run IDs unsorted to keep the code path identical to the real archive path."
-        },
-        {
-            domain: "library",
-            label: "Architecture note",
-            body: "Re-export `validateHookDocument` from package root; rename internal `__validate` to match the exported name so callsites and the export converge. Failure mode: consumers importing from `/dist/internal` break on the rename → add a deprecation re-export shim for one minor. Trade-off: slightly wider public surface today buys us a smaller public surface tomorrow."
-        },
-        {
-            domain: "data-pipeline",
-            label: "Architecture note",
-            body: "Insert `int_orders_deduped` CTE between staging and fact, keyed on `(order_id, event_ts)` with `row_number() = 1` per key; `fact_orders` reads from the deduped model only. Failure mode: late-arriving events with an earlier `event_ts` would flap the chosen row → tiebreak on `ingest_ts DESC`. Trade-off: the job now does one extra pass; measured +8% runtime, within budget."
-        }
-    ],
-    spec: [
-        {
-            domain: "web",
-            label: "AC",
-            body: "AC-W1: Given a signed-in admin viewing `/dashboard/orders`, when an order's status changes server-side, the row updates within 2s without a full navigation (assert via `pnpm playwright test orders-live.spec.ts`)."
-        },
-        {
-            domain: "cli",
-            label: "AC",
-            body: "AC-C1: Given `npx cclaw-cli init --harnesses=claude` run in an empty directory, exit code is `0`, `.cclaw/config.yaml` is created with `harnesses: [claude]`, and stderr contains no warnings (asserted by `tests/integration/init-sync-runtime.test.ts`)."
-        },
-        {
-            domain: "library",
-            label: "AC",
-            body: "AC-L1: `validateHookDocument(obj)` returns `{ ok: true }` for every fixture under `tests/fixtures/valid-hooks/` and `{ ok: false, errors: [...] }` with at least one message for every fixture under `tests/fixtures/invalid-hooks/`."
-        },
-        {
-            domain: "data-pipeline",
-            label: "AC",
-            body: "AC-D1: For any `orders.csv` input, the pipeline emits exactly one row per `(order_id, event_ts)` pair to `warehouse.fact_orders`; running the job twice on the same input is idempotent (row count unchanged, verified by `dbt test --select fact_orders`)."
-        }
-    ],
-    plan: [
-        {
-            domain: "web",
-            label: "Task",
-            body: "T-W-3 `[~4m]`: Wire SSE endpoint `/api/orders/stream` into `useOrderFeed` hook. AC-W1. Verify: `pnpm playwright test orders-live.spec.ts`. Depends on: T-W-2."
-        },
-        {
-            domain: "cli",
-            label: "Task",
-            body: "T-C-2 `[~3m]`: Add `--dry-run` flag to `npx cclaw-cli archive` that prints the would-be-archived run IDs to stdout and exits 0. AC-C3. Verify: `node dist/cli.js archive --dry-run` + `tests/unit/cli-parse.test.ts`."
-        },
-        {
-            domain: "library",
-            label: "Task",
-            body: "T-L-1 `[~5m]`: Expose `validateHookDocument` from the package root and re-export its types. AC-L1. Verify: `pnpm build && node -e \"console.log(require('./dist').validateHookDocument)\"`."
-        },
-        {
-            domain: "data-pipeline",
-            label: "Task",
-            body: "T-D-2 `[~5m]`: Add dedup step keyed on `(order_id, event_ts)` between `raw.orders` and `fact_orders`. AC-D1. Verify: `dbt run --select fact_orders+ && dbt test --select fact_orders`."
-        }
-    ],
-    tdd: [
-        {
-            domain: "web",
-            label: "RED→GREEN→REFACTOR",
-            body: "RED: `pnpm playwright test orders-live.spec.ts` → timeout waiting for row update. GREEN: wired SSE event → row rerenders via `useOrderFeed`. REFACTOR: extracted `applyOrderEvent(row, event)` pure helper; 87/87 tests still pass."
-        },
-        {
-            domain: "cli",
-            label: "RED→GREEN→REFACTOR",
-            body: "RED: `tests/unit/cli-parse.test.ts` expects `--dry-run` flag → `unknown option` error. GREEN: added to the Zod parser; 19/19 pass. REFACTOR: hoisted the dry-run formatter into `src/cli/format.ts` shared with `status`."
-        },
-        {
-            domain: "library",
-            label: "RED→GREEN→REFACTOR",
-            body: "RED: `tests/unit/hook-schema.test.ts` imports `validateHookDocument` from package root → `export not found`. GREEN: added re-export + types. REFACTOR: renamed internal `__validate` to `validateHookDocument` so the export name matches the source."
-        },
-        {
-            domain: "data-pipeline",
-            label: "RED→GREEN→REFACTOR",
-            body: "RED: `dbt test --select fact_orders` → `unique test on (order_id, event_ts)` fails on re-run. GREEN: added `row_number()` dedup in the staging model. REFACTOR: extracted the dedup CTE into `int_orders_deduped` for reuse by `fact_returns`."
-        }
-    ],
-    review: [
-        {
-            domain: "web",
-            label: "Finding",
-            body: "R-W-1 (Critical, correctness): `useOrderFeed` does not unsubscribe from the SSE channel on unmount — two mounts on the same page double-count rows. Evidence: `tests/unit/order-feed-hook.test.ts > unmount` fails. Fix owner: frontend; blocks ship."
-        },
-        {
-            domain: "cli",
-            label: "Finding",
-            body: "R-C-2 (Suggestion, UX): `npx cclaw-cli archive --dry-run` prints run IDs without a trailing newline, breaking downstream `xargs` pipelines. Evidence: `echo '' | xargs -I{} printf '%s\\n' {}` contrast. Fix owner: CLI; non-blocking."
-        },
-        {
-            domain: "library",
-            label: "Finding",
-            body: "R-L-1 (Important, surface-area): the new `validateHookDocument` export is documented in README but missing from `src/index.ts` — `import { validateHookDocument } from 'cclaw'` fails despite the docs. Evidence: `pnpm build && node -e \"require('./dist').validateHookDocument\"` prints `undefined`. Fix owner: library; blocks ship."
-        },
-        {
-            domain: "data-pipeline",
-            label: "Finding",
-            body: "R-D-1 (Critical, correctness): dedup CTE orders by `event_ts ASC` instead of `event_ts DESC` — on duplicate events we keep the older row. Evidence: `dbt test --select fact_orders` green but fixture `tests/fixtures/orders-dupes.csv` shows wrong survivor. Fix owner: analytics-eng; blocks ship."
-        }
-    ],
-    ship: [
-        {
-            domain: "web",
-            label: "Rollback",
-            body: "Trigger: error rate on `/api/orders/stream` > 2% for 5 minutes, or p95 latency > 1.5s for 10 minutes. Steps: `vercel rollback <deployment>`; run `2026_04_14_revert_orders_stream.sql` before traffic returns. Verify: error rate returns to baseline within 10 minutes on the `orders-live` dashboard."
-        },
-        {
-            domain: "cli",
-            label: "Rollback",
-            body: "Trigger: `npx cclaw-cli init --harnesses=claude` exits non-zero on a fresh tmp dir, OR `npx cclaw-cli sync` regresses (FAIL count increases) on the smoke matrix. Steps: `npm unpublish cclaw-cli@<version>` (within the 72h window) or `npm deprecate cclaw-cli@<version> '<reason>'`; publish the previous patch. Verify: `npx cclaw-cli@latest --version` prints the previous version."
-        },
-        {
-            domain: "library",
-            label: "Rollback",
-            body: "Trigger: any consumer reports `validateHookDocument` no longer exported, OR the CI `dual-package-check` job fails. Steps: `npm deprecate cclaw-cli@<version> 'broken package export — use <prev>'`; publish the previous minor with a patch bump; emit changelog `## Rollback` entry. Verify: a smoke consumer project `pnpm add cclaw-cli@latest` imports cleanly."
-        },
-        {
-            domain: "data-pipeline",
-            label: "Rollback",
-            body: "Trigger: `dbt test --select fact_orders` fails on production run, OR downstream dashboard MAU count drops >10% week-over-week. Steps: disable the new model via `dbt_project.yml` + `dbt run --select state:modified` with the previous git SHA; rerun backfill `dagster asset materialize fact_orders --partition <yesterday>`. Verify: `fact_orders` row count within ±1% of the previous week's baseline."
-        }
-    ]
-};
-export function stageDomainExamples(stage) {
-    const samples = STAGE_DOMAIN_SAMPLES[stage];
-    if (!samples || samples.length === 0)
-        return "";
-    const lines = [
-        "## Living Examples by Domain",
-        "",
-        "Use the row matching your project shape to calibrate voice, specificity, and command choice. The rows are deliberately terse — copy the **shape**, not the text.",
-        ""
-    ];
-    for (const sample of samples) {
-        lines.push(`**${DOMAIN_LABELS[sample.domain]} — ${sample.label}:** ${sample.body}`);
-        lines.push("");
-    }
-    return lines.join("\n");
-}