cclaw-cli 0.51.30 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/README.md +24 -18
  2. package/dist/artifact-linter/brainstorm.d.ts +2 -0
  3. package/dist/artifact-linter/brainstorm.js +289 -0
  4. package/dist/artifact-linter/design.d.ts +2 -0
  5. package/dist/artifact-linter/design.js +354 -0
  6. package/dist/artifact-linter/plan.d.ts +2 -0
  7. package/dist/artifact-linter/plan.js +183 -0
  8. package/dist/artifact-linter/review-army.d.ts +24 -0
  9. package/dist/artifact-linter/review-army.js +365 -0
  10. package/dist/artifact-linter/review.d.ts +2 -0
  11. package/dist/artifact-linter/review.js +99 -0
  12. package/dist/artifact-linter/scope.d.ts +2 -0
  13. package/dist/artifact-linter/scope.js +125 -0
  14. package/dist/artifact-linter/shared.d.ts +247 -0
  15. package/dist/artifact-linter/shared.js +1517 -0
  16. package/dist/artifact-linter/ship.d.ts +2 -0
  17. package/dist/artifact-linter/ship.js +82 -0
  18. package/dist/artifact-linter/spec.d.ts +2 -0
  19. package/dist/artifact-linter/spec.js +130 -0
  20. package/dist/artifact-linter/tdd.d.ts +2 -0
  21. package/dist/artifact-linter/tdd.js +198 -0
  22. package/dist/artifact-linter.d.ts +4 -76
  23. package/dist/artifact-linter.js +56 -2949
  24. package/dist/cli.d.ts +1 -6
  25. package/dist/cli.js +4 -159
  26. package/dist/codex-feature-flag.d.ts +1 -1
  27. package/dist/codex-feature-flag.js +1 -1
  28. package/dist/config.d.ts +3 -2
  29. package/dist/config.js +67 -3
  30. package/dist/constants.d.ts +1 -7
  31. package/dist/constants.js +10 -15
  32. package/dist/content/cancel-command.js +2 -2
  33. package/dist/content/closeout-guidance.d.ts +1 -1
  34. package/dist/content/closeout-guidance.js +15 -13
  35. package/dist/content/core-agents.d.ts +46 -29
  36. package/dist/content/core-agents.js +216 -82
  37. package/dist/content/decision-protocol.d.ts +1 -1
  38. package/dist/content/decision-protocol.js +1 -1
  39. package/dist/content/diff-command.js +1 -1
  40. package/dist/content/examples.d.ts +0 -3
  41. package/dist/content/examples.js +197 -752
  42. package/dist/content/harness-doc.js +20 -2
  43. package/dist/content/hook-manifest.d.ts +2 -2
  44. package/dist/content/hook-manifest.js +2 -2
  45. package/dist/content/hooks.d.ts +1 -0
  46. package/dist/content/hooks.js +32 -137
  47. package/dist/content/idea.d.ts +60 -0
  48. package/dist/content/idea.js +404 -0
  49. package/dist/content/iron-laws.d.ts +0 -1
  50. package/dist/content/iron-laws.js +31 -16
  51. package/dist/content/learnings.d.ts +2 -4
  52. package/dist/content/learnings.js +11 -27
  53. package/dist/content/meta-skill.js +7 -7
  54. package/dist/content/node-hooks.d.ts +10 -0
  55. package/dist/content/node-hooks.js +163 -95
  56. package/dist/content/opencode-plugin.js +15 -29
  57. package/dist/content/reference-patterns.js +2 -2
  58. package/dist/content/runtime-shared-snippets.d.ts +8 -0
  59. package/dist/content/runtime-shared-snippets.js +80 -0
  60. package/dist/content/session-hooks.js +1 -1
  61. package/dist/content/skills.d.ts +1 -0
  62. package/dist/content/skills.js +69 -7
  63. package/dist/content/stage-schema.js +147 -61
  64. package/dist/content/stages/_lint-metadata/index.js +26 -2
  65. package/dist/content/stages/brainstorm.js +13 -7
  66. package/dist/content/stages/design.js +16 -11
  67. package/dist/content/stages/plan.js +7 -4
  68. package/dist/content/stages/review.js +12 -12
  69. package/dist/content/stages/schema-types.d.ts +2 -2
  70. package/dist/content/stages/scope.js +15 -12
  71. package/dist/content/stages/ship.js +3 -3
  72. package/dist/content/stages/spec.js +9 -3
  73. package/dist/content/stages/tdd.js +14 -4
  74. package/dist/content/start-command.js +11 -10
  75. package/dist/content/status-command.js +5 -5
  76. package/dist/content/subagent-context-skills.js +156 -1
  77. package/dist/content/subagents.d.ts +0 -5
  78. package/dist/content/subagents.js +65 -81
  79. package/dist/content/templates.d.ts +1 -1
  80. package/dist/content/templates.js +187 -154
  81. package/dist/content/tree-command.js +2 -2
  82. package/dist/content/utility-skills.d.ts +2 -2
  83. package/dist/content/utility-skills.js +28 -99
  84. package/dist/content/view-command.js +4 -2
  85. package/dist/delegation.d.ts +2 -0
  86. package/dist/delegation.js +2 -1
  87. package/dist/early-loop.d.ts +66 -0
  88. package/dist/early-loop.js +275 -0
  89. package/dist/flow-state.d.ts +5 -6
  90. package/dist/flow-state.js +4 -6
  91. package/dist/gate-evidence.d.ts +0 -23
  92. package/dist/gate-evidence.js +111 -153
  93. package/dist/harness-adapters.d.ts +2 -2
  94. package/dist/harness-adapters.js +48 -19
  95. package/dist/install.js +190 -32
  96. package/dist/internal/advance-stage/advance.d.ts +50 -0
  97. package/dist/internal/advance-stage/advance.js +479 -0
  98. package/dist/internal/advance-stage/cancel-run.d.ts +8 -0
  99. package/dist/internal/advance-stage/cancel-run.js +19 -0
  100. package/dist/internal/advance-stage/flow-state-coercion.d.ts +3 -0
  101. package/dist/internal/advance-stage/flow-state-coercion.js +81 -0
  102. package/dist/internal/advance-stage/helpers.d.ts +14 -0
  103. package/dist/internal/advance-stage/helpers.js +145 -0
  104. package/dist/internal/advance-stage/hook.d.ts +8 -0
  105. package/dist/internal/advance-stage/hook.js +40 -0
  106. package/dist/internal/advance-stage/parsers.d.ts +54 -0
  107. package/dist/internal/advance-stage/parsers.js +307 -0
  108. package/dist/internal/advance-stage/review-loop.d.ts +7 -0
  109. package/dist/internal/advance-stage/review-loop.js +161 -0
  110. package/dist/internal/advance-stage/rewind.d.ts +14 -0
  111. package/dist/internal/advance-stage/rewind.js +108 -0
  112. package/dist/internal/advance-stage/start-flow.d.ts +11 -0
  113. package/dist/internal/advance-stage/start-flow.js +136 -0
  114. package/dist/internal/advance-stage/verify.d.ts +29 -0
  115. package/dist/internal/advance-stage/verify.js +225 -0
  116. package/dist/internal/advance-stage.js +21 -1470
  117. package/dist/internal/compound-readiness.d.ts +1 -1
  118. package/dist/internal/compound-readiness.js +2 -2
  119. package/dist/internal/early-loop-status.d.ts +7 -0
  120. package/dist/internal/early-loop-status.js +90 -0
  121. package/dist/internal/runtime-integrity.d.ts +7 -0
  122. package/dist/internal/runtime-integrity.js +288 -0
  123. package/dist/internal/tdd-red-evidence.js +1 -1
  124. package/dist/knowledge-store.d.ts +5 -28
  125. package/dist/knowledge-store.js +57 -84
  126. package/dist/managed-resources.js +24 -2
  127. package/dist/policy.js +7 -9
  128. package/dist/retro-gate.js +8 -90
  129. package/dist/run-archive.d.ts +1 -1
  130. package/dist/run-archive.js +13 -16
  131. package/dist/run-persistence.js +20 -15
  132. package/dist/runtime/run-hook.entry.d.ts +3 -0
  133. package/dist/runtime/run-hook.entry.js +5 -0
  134. package/dist/runtime/run-hook.mjs +9477 -0
  135. package/dist/tdd-cycle.d.ts +3 -3
  136. package/dist/tdd-cycle.js +1 -1
  137. package/dist/types.d.ts +18 -10
  138. package/package.json +4 -2
  139. package/dist/content/hook-inline-snippets.d.ts +0 -83
  140. package/dist/content/hook-inline-snippets.js +0 -302
  141. package/dist/content/ideate-command.d.ts +0 -8
  142. package/dist/content/ideate-command.js +0 -315
  143. package/dist/content/ideate-frames.d.ts +0 -31
  144. package/dist/content/ideate-frames.js +0 -140
  145. package/dist/content/ideate-ranking.d.ts +0 -25
  146. package/dist/content/ideate-ranking.js +0 -65
  147. package/dist/content/next-command.d.ts +0 -20
  148. package/dist/content/next-command.js +0 -298
  149. package/dist/content/seed-shelf.d.ts +0 -36
  150. package/dist/content/seed-shelf.js +0 -301
  151. package/dist/content/stage-common-guidance.d.ts +0 -1
  152. package/dist/content/stage-common-guidance.js +0 -106
  153. package/dist/doctor-registry.d.ts +0 -10
  154. package/dist/doctor-registry.js +0 -186
  155. package/dist/doctor.d.ts +0 -17
  156. package/dist/doctor.js +0 -2201
  157. package/dist/internal/hook-manifest.d.ts +0 -16
  158. package/dist/internal/hook-manifest.js +0 -77
  159. package/dist/trace-matrix.d.ts +0 -27
  160. package/dist/trace-matrix.js +0 -226
@@ -1,603 +1,273 @@
1
1
  const STAGE_EXAMPLES = {
2
2
  brainstorm: `## Context
3
3
 
4
- - **Project state:** Monorepo with CI pipeline using custom release scripts. Release checks are scattered across shell scripts with no shared validation logic.
5
- - **Relevant existing code/patterns:** \`scripts/pre-publish.sh\` does metadata checks. \`src/release/\` has partial validation helpers.
4
+ - Project state: release checks exist but CI/local behavior drifts.
5
+ - Existing anchors: \`scripts/pre-publish.sh\`, \`src/release/\`, incident notes.
6
6
 
7
7
  ## Problem Decision Record
8
8
 
9
- - **Depth:** standard
10
- - **Frame type:** \`technical-maintenance\` (free-form label; pick whatever fits — see commentary in the template for example labels)
11
-
12
- ### Framing fields (universal keep field names; fill in whatever is meaningful for this work)
13
-
14
- - **Affected user / role / operator:** release operator and package maintainer.
15
- - **Current state / failure mode / opportunity:** release checks are fragile and inconsistent between CI and local runs; invalid metadata sometimes reaches npm publish.
16
- - **Desired outcome (observable):** invalid release preconditions are caught before publish; \`pnpm release:check\` exits non-zero with explicit operator feedback in CI and local workflows.
17
- - **Evidence / signal supporting this framing:** prior incident postmortems referencing release-metadata drift; \`scripts/pre-publish.sh\` already partially encodes the rules.
18
- - **Why now (urgency / cost of waiting):** every additional release reinforces the divergent CI/local behavior and burns operator trust.
19
- - **Do-nothing consequence:** continued publish risk and duplicated local/CI fixes.
20
- - **Non-goals:** no new runtime dependencies; no release-framework rewrite.
9
+ - Depth: standard
10
+ - Frame type: \`technical-maintenance\`
11
+ - Affected user / role / operator: release operator
12
+ - Current state / failure mode / opportunity: inconsistent validation paths
13
+ - Desired outcome (observable): one deterministic preflight in CI and local flows
14
+ - Evidence / signal: repeated metadata drift incidents
15
+ - Why now: recurring operational cost on every release
16
+ - Do-nothing consequence: continued publish risk
17
+ - Non-goals: no release framework rewrite
21
18
 
22
19
  ## Clarifying Questions
23
20
 
24
21
  | # | Question | Answer | Decision impact |
25
22
  | --- | --- | --- | --- |
26
- | 1 | If release metadata is invalid, should we block publishing hard or only warn? | Block hard. | Validation becomes a mandatory gate — no warning-only fallback. |
27
- | 2 | Should the validation logic live in a reusable module or stay as shell scripts? | Reusable module. | Architecture: shared TypeScript module imported by CI and local tooling, not duplicated shell scripts. |
28
- | 3 | For v1, prioritize rapid delivery or maximum configurability? | Rapid delivery. | Minimal deterministic validation surface; defer plugin/config system to v2. |
23
+ | 1 | Block invalid releases or only warn? | Block. | Validation is a hard gate. |
24
+ | 2 | Shared module or script-only patch? | Shared module. | Reuse in CI/local. |
29
25
 
30
26
  ## Approach Tier
31
27
 
32
- - **Tier:** Standard
33
- - **Why this tier:** Change spans CI + local release workflow and shared module boundaries, but remains bounded to one subsystem.
34
-
35
- ## Short-Circuit Decision
36
-
37
- - **Status:** bypassed
38
- - **Why:** Core requirements were not concrete enough initially; we still needed options + trade-off conversation.
39
- - **Scope handoff:** Continue full brainstorm flow before scope.
28
+ - Tier: standard
29
+ - Why this tier: cross-cutting release path change, bounded subsystem
40
30
 
41
31
  ## Approaches
42
32
 
43
33
  | Approach | Role | Upside | Architecture | Trade-offs | Recommendation |
44
34
  | --- | --- | --- | --- | --- | --- |
45
- | A: Reusable validation module | baseline | high | Shared TS module with typed validators, imported by CI scripts and local CLI. Existing \`pre-publish.sh\` calls the module. | Medium upfront effort, high reuse. Requires test coverage for the module. | **Recommended** — best balance of reuse and delivery speed. |
46
- | B: Hardened shell scripts | baseline | modest | Keep existing script approach, add stricter checks and error messages. | Lowest effort. Weak reuse, CI/local divergence risk grows over time. | Viable fallback if TS module is blocked. |
47
- | C: Full release framework | challenger | higher | New release orchestrator with plugin system, config files, rollback commands. | Maximum flexibility. High risk, delivery delay, over-engineered for current needs. | Not recommended for v1. |
35
+ | Shared validator module | baseline | high | Typed checks reused by CI/local | Medium effort | **Recommended** |
36
+ | Script hardening only | challenger | high | Keep shell checks | Fast but drift risk remains | Fallback |
48
37
 
49
38
  ## Approach Reaction
50
39
 
51
- - **Closest option:** A (reusable validation module).
52
- - **Concerns:** User wanted to avoid framework-level overbuild and keep v1 delivery speed high.
53
- - **What changed after reaction:** Recommendation stayed on A, but added explicit fallback path via existing shell entrypoint to reduce migration risk.
54
-
55
- ## Selected Direction
56
-
57
- - **Approach:** A — Reusable validation module
58
- - **Rationale:** based on user reaction favoring fast delivery and lower complexity, shared TS module gives consistent behavior in CI/local, avoids script duplication, and stays within the no-new-dependency constraint.
59
- - **Approval:** approved
60
- - **Next-stage handoff:** \`scope\` — carry the locked stack constraints and the validator module boundary forward.
61
-
62
- ## Design
63
-
64
- - **Architecture:** single \`release-validator\` module in \`src/release/\` exporting typed check functions. CI script and local CLI both import and run the same checks.
65
- - **Key components:** \`validateMetadata()\`, \`validateChangelog()\`, \`validateVersion()\` — each returns a typed result with error details. A \`runAll()\` orchestrator runs checks and exits non-zero on any failure.
66
- - **Data flow:** package.json + CHANGELOG.md → validator module → structured result → CI/CLI renders human-readable report.
40
+ - Closest option: shared validator module
41
+ - Concerns: keep v1 delivery tight; avoid framework creep
42
+ - What changed after reaction: kept module path and added incremental rollout guardrails
67
43
 
68
- ## Assumptions and Open Questions
44
+ ## Challenger Alternative Enforcement
69
45
 
70
- - **Assumptions:** CI remains the primary execution path; existing release metadata files remain the source of truth; v1 prioritizes determinism over customization.
71
- - **Open questions:** What exact rollback sequence for failed publish? Should status output include machine-readable JSON alongside markdown?
46
+ - Challenger alternative: script hardening only.
47
+ - Disposition: rejected for this cycle.
48
+ - Enforcement note: preserve the challenger as a bounded fallback, but do not mix both paths in v1 implementation.
72
49
 
73
- ## Notes for the next stage
50
+ ## Selected Direction
74
51
 
75
- Carry the no-new-dependency constraint and hard-block behavior directly into scope in/out boundaries.`,
52
+ - Selected approach: shared validator module
53
+ - Approval: approved
54
+ - Rationale: best balance of consistency and delivery speed
55
+ - Scope handoff: carry hard-block policy + module boundary into scope
56
+ `,
76
57
  scope: `## Scope contract
77
58
 
78
- **Mode selected:** SELECTIVE EXPANSION
79
- **Default heuristic used:** feature enhancement -> selective
80
- **Mode-specific analysis result:** hold-scope baseline accepted first; one expansion accepted (degraded-state UX), one deferred (real-time channel upgrade).
81
-
82
- ## Prime Directives (applied)
83
-
84
- - Zero silent failures: every delivery failure maps to a visible degraded state.
85
- - Named error surfaces: stream disconnect, auth drift, and publisher timeout are explicit.
86
- - Four-path data flow mapped: happy, nil payload, empty payload, upstream publish error.
87
- - Interaction edge cases in scope: double-open panel, reconnect after sleep, stale tab state.
88
- - Observability in scope: stream error counter, publish-to-visible lag metric, and alert threshold.
89
-
90
- ## Premise challenge result
91
-
92
- The original premise (“add notifications”) was reframed to **“ensure users know when an action requires follow-up”**, which expands the solution space beyond toast spam to include durable inbox items, empty states, and recovery paths when delivery fails.
93
-
94
- ## Dream State Mapping
95
-
96
- | Stage | Statement |
97
- | --- | --- |
98
- | **CURRENT STATE** | Users miss time-sensitive follow-ups because alerts are ephemeral and not recoverable. |
99
- | **THIS PLAN** | Introduce durable in-app feed + live updates + explicit degraded mode fallback. |
100
- | **12-MONTH IDEAL** | Unified notification center with reliable multi-channel fan-out and user-level routing preferences. |
101
- | **Alignment verdict** | Aligned: this scope builds the durability foundation without prematurely committing to channel expansion. |
102
-
103
- ## Mode-Specific Analysis
104
-
105
- **Selected mode:** SELECTIVE EXPANSION
106
-
107
- - **Hold-scope baseline:** SSE live updates + REST fallback is the minimum that meets the "know when action is needed" reframe. Accepted as baseline.
108
- - **Expansion evaluated — degraded-state UX (accepted):** Adding an explicit "live updates paused" banner and polling fallback turns a reliability gap into a visible, recoverable state. Low incremental effort (S), high user trust payoff.
109
- - **Expansion evaluated — real-time channel upgrade (deferred):** WebSocket channel provides lower latency but requires new infra (connection pool, auth handshake). Not justified for current load; deferred to post-v1 validation.
110
-
111
- ## Implementation Alternatives
112
-
113
- | Option | Summary | Effort (S/M/L/XL) | Risk | Pros | Cons | Reuses |
114
- | --- | --- | --- | --- | --- | --- | --- |
115
- | **A (minimum viable)** | Polling-only feed with no live stream | S | Low | Fastest ship, low infra risk | Weaker UX, delayed visibility | Existing REST snapshot endpoint |
116
- | **B (recommended)** | SSE live updates + REST fallback snapshot | M | Med | Better timeliness, graceful degradation | Requires reconnect handling | Existing event publisher + REST path |
117
- | **C (ideal architecture)** | Event bus + WebSocket channel + feed projection | XL | High | Strong long-term scalability | Overbuilt for current demand | Partial reuse of publisher only |
118
-
119
- ## Temporal Interrogation
120
-
121
- | Time slice | Likely decision pressure | Lock now or defer? | Reason |
122
- | --- | --- | --- | --- |
123
- | **HOUR 1 (foundations)** | Canonical event schema and dedupe key policy | **Lock now** | Prevent downstream rework in storage and UI merge behavior |
124
- | **HOUR 2-3 (core logic)** | Retry/backoff semantics for stream loss | **Lock now** | Impacts both backend signaling and client state machine |
125
- | **HOUR 4-5 (integration)** | Handling gaps between snapshot and stream cursor | **Lock now** | Prevent silent data loss during reconnect windows |
126
- | **HOUR 6+ (polish/tests)** | Banner copy tone and polling cadence tuning | **Defer** | Safe to iterate after baseline reliability is proven |
59
+ Mode selected: SELECTIVE EXPANSION
127
60
 
128
61
  ## In scope / out of scope / deferred
129
62
 
130
63
  | Category | Items |
131
64
  | --- | --- |
132
- | **In scope** | In-app notification feed; SSE delivery path; read/unread state; basic retry on transient failures |
133
- | **Out of scope** | Email/SMS/push providers; marketing campaigns; per-user notification preferences beyond on/off |
134
- | **Deferred** | WebSocket channel; rich media attachments in notifications; full-text search across historical events |
135
-
136
- ## Discretion Areas
137
-
138
- - Client-side badge rendering strategy (optimistic vs server-confirmed) is implementation discretion.
139
- - Polling fallback backoff curve is implementation discretion if degraded-state UX remains explicit.
65
+ | In scope | durable in-app feed, SSE path, degraded-state UX |
66
+ | Out of scope | email/SMS/push channels, marketing flows |
67
+ | Deferred | WebSocket migration, rich-media payloads |
140
68
 
141
- ## Error & Rescue Registry (sample entry)
69
+ ## Reference Pattern Registry
142
70
 
143
- | Capability | Failure mode | Detection | Fallback |
144
- | --- | --- | --- | --- |
145
- | Event delivery | SSE connection drops mid-session | Client \`EventSource\` error event + heartbeat timeout | Fall back to REST polling every 30s until SSE reconnect succeeds; show subtle “live updates paused” banner |
146
-
147
- ## Completion Dashboard
148
-
149
- - Checklist findings: 9/9 complete (complex path)
150
- - Resolved decisions count: 7
151
- - Unresolved decisions: None
152
-
153
- ## Scope Summary
71
+ | Pattern | Disposition | Rationale |
72
+ | --- | --- | --- |
73
+ | Snapshot + stream handoff | accept | Proven consistency model |
74
+ | Queue-backed fan-out rewrite | defer | High cost for current demand |
154
75
 
155
- - Selected mode: SELECTIVE EXPANSION (cherry-pick durable feed on hold-scope baseline).
156
- - Accepted scope: durable feed + SSE + explicit degraded UX.
157
- - Deferred: WebSocket channel and rich-media/search enhancements.
158
- - Explicitly excluded: outbound channels and marketing workflows for v1.
159
- - Next-stage handoff: design — carry the durable-feed contract, SSE failover paths, and degraded-UX expectations into architecture lock-in.`,
160
- design: `## Codebase Investigation (blast-radius files)
76
+ ## Requirements
161
77
 
162
- | File | Current responsibility | Patterns discovered |
78
+ | R# | Requirement | Why |
163
79
  | --- | --- | --- |
164
- | \`src/api/routes/user.ts\` | User CRUD endpoints | Express router, Zod validation, throws \`AppError\` |
165
- | \`src/services/event-bus.ts\` | In-process pub/sub | EventEmitter wrapper, typed channels, no persistence |
166
- | \`src/middleware/auth.ts\` | JWT verification | Extracts user from token, attaches to \`req.context\` |
167
- | \`tests/integration/user.test.ts\` | User route tests | Supertest, factory helpers, \`beforeEach\` DB reset |
80
+ | R-1 | Feed is queryable for recent window | Baseline usability |
81
+ | R-2 | Live updates are timely and recoverable | Reliability |
82
+ | R-3 | Degraded state is explicit to users | No silent failure |
168
83
 
169
- Discovery: existing EventEmitter-based bus has no durability — notifications must add persistence layer on top, not replace the bus.
84
+ ## Boundary Stress-Tests
170
85
 
171
- ## Search Before Building (sample result)
86
+ - Stream disconnect while user is active -> banner + fallback path required.
87
+ - Snapshot/stream cursor mismatch -> deterministic recovery required.
88
+ `,
89
+ design: `## Blast Radius
172
90
 
173
- | Layer | Label | What to reuse first |
91
+ | File | Change type | Reason |
174
92
  | --- | --- | --- |
175
- | Layer 1 | stdlib | Built-in timers, structured logging patterns, standard error types |
176
- | Layer 2 | existing codebase | Existing auth middleware, existing API client wrapper, existing feature flags helper |
177
- | Layer 3 | npm | A small, well-maintained SSE helper (only if Layer 1–2 cannot cover framing/reconnect ergonomics) |
93
+ | \`src/services/notifications.ts\` | modify | persistence-aware publish path |
94
+ | \`src/api/routes/notifications.ts\` | modify | snapshot + stream endpoints |
95
+ | \`src/ui/feed.tsx\` | modify | degraded banner + reconnect states |
96
+ | \`tests/integration/notifications.test.ts\` | add/update | consistency + auth coverage |
178
97
 
179
- ## Architecture Diagram (mandatory)
98
+ ## Architecture Diagram
180
99
 
100
+ \`\`\`mermaid
101
+ flowchart LR
102
+ API --> Service --> Outbox --> Projector --> Feed
103
+ Service --> Stream
181
104
  \`\`\`
182
- ┌─────────────┐ ┌──────────────┐ ┌────────────────┐
183
- │ API Gateway │─────▶│ Notification │─────▶│ Event Publisher│
184
- └─────────────┘ │ Service │ └────────┬───────┘
185
- └──────┬───────┘ │
186
- │ ▼
187
- ┌──────▼───────┐ ┌────────────────┐
188
- │ Read Model │◀─────│ Outbox / Queue │
189
- │ (Feed Store) │ └────────────────┘
190
- └──────────────┘
191
- \`\`\`
192
-
193
- Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Queue (persist) → Read Model (project).
194
105
 
195
- ## What Already Exists
196
-
197
- | Sub-problem | Existing code/library | Layer | Reuse decision |
198
- | --- | --- | --- | --- |
199
- | Auth context extraction | \`src/middleware/auth.ts\` | Layer 1 | Reuse as-is |
200
- | Event fan-out | \`src/services/event-bus.ts\` | Layer 2 | Wrap with persistence adapter |
201
- | SSE framing | None | Layer 3 | Evaluate \`better-sse\` npm package |
202
- | Notification schema | None | — | New: define in \`src/schemas/notification.ts\` |
106
+ ## Failure Modes
203
107
 
204
- ## Failure Mode Table
205
-
206
- | Failure | Trigger | Detection | Mitigation | User impact |
207
- | --- | --- | --- | --- | --- |
208
- | SSE connection drop | Network interruption | Client heartbeat timeout (30s) | Auto-reconnect with exponential backoff + snapshot fallback | Brief delay (≤10s), no data loss |
209
- | Duplicate publish | Retry after timeout | Dedupe key check in outbox | Upsert with idempotency key | None (transparent) |
210
- | Queue backpressure | Spike >1000 events/s | Queue depth metric alarm | Back-pressure signal to publisher, shed non-critical events | Delayed delivery of low-priority notifications |
211
-
212
- ## Test Strategy
213
-
214
- - **Unit:** validator functions, dedupe-key logic, event schema factories — target 90%+ line coverage.
215
- - **Integration:** publisher → outbox → read-model pipeline via in-memory DB; SSE reconnect with simulated drops.
216
- - **E2E:** one happy-path browser test (publish → feed visible) and one degraded-path test (SSE down → REST fallback + banner).
217
-
218
- ## Performance Budget
219
-
220
- | Critical path | Metric | Target | Measurement method |
221
- | --- | --- | --- | --- |
222
- | Publish → visible in feed | p95 latency | ≤ 5 s | Integration test with deterministic clock + production Datadog SLO |
223
- | Feed snapshot load | p99 response time | ≤ 200 ms | Load test with 1 000 items per user |
224
- | SSE reconnect | Time to first event after drop | ≤ 3 s | Simulated disconnect in integration suite |
225
-
226
- ## NOT in scope
227
-
228
- - Outbound channels (email, push, SMS) — deferred to v2.
229
- - Admin notification management UI — separate workstream.
230
- - Notification preferences / mute rules — requires user settings redesign.
231
-
232
- ## Parallelization Strategy
233
-
234
- | Module | Depends on | Parallel lane | Conflict risk |
235
- | --- | --- | --- | --- |
236
- | Notification schema (T1) | — | Lane A | None |
237
- | Publisher + outbox (T2) | T1 | Lane A | None |
238
- | Client feed + SSE (T3) | T1, T2 | Lane B (after T1) | Shared event type definitions |
239
-
240
- ## Unresolved Decisions
241
-
242
- | Decision | Status | Options | Missing info | Default if unanswered |
243
- | --- | --- | --- | --- | --- |
244
- | Feed storage model | OPEN | (A) append-only event log, (B) mutable rows, (C) hybrid | Load testing results on read patterns | (A) append-only — safest for audit trail |
245
-
246
- ## Interface sketch (non-binding)
247
-
248
- - **Client → server:** \`GET /api/me/notifications/snapshot?limit=50\` plus optional cursor parameters (if adopted).
249
- - **Server → client:** \`GET /api/me/notifications/stream\` as SSE with periodic heartbeats.
250
-
251
- ## Completion Dashboard
252
-
253
- | Review Section | Status | Issues |
108
+ | Failure | Detection | Mitigation |
254
109
  | --- | --- | --- |
255
- | Architecture Review | issues-found-resolved | Decided on outbox pattern over direct pub/sub |
256
- | Code Quality Review | clear | |
257
- | Test Review | issues-found-resolved | Added integration test gap for SSE reconnect |
258
- | Performance Review | clear | — |
259
- | Distribution & Delivery Review | clear | — |
260
-
261
- **Decisions made:** 4 | **Unresolved:** 1 (feed storage model)
110
+ | Stream drops | heartbeat timeout | fallback polling + reconnect |
111
+ | Cursor gap | consistency check | replay snapshot delta |
112
+ | Auth mismatch | auth guard log | terminate stream + refresh |
262
113
 
263
- ## Quality bar for this stage
114
+ ## Test Strategy
264
115
 
265
- Design output should be **reviewable by someone who did not attend brainstorming**: they can trace from constraints → components → open decisions without reading code.`,
116
+ - Unit: merge logic, retry budget, projection idempotency.
117
+ - Integration: snapshot+stream consistency and auth boundaries.
118
+ - E2E: degraded banner and recovery UX.
119
+ `,
266
120
  spec: `## Acceptance Criteria
267
121
 
268
- | ID | Criterion (observable/measurable/falsifiable) | Design Decision Ref |
269
- | --- | --- | --- |
270
- | AC-1 | Given a signed-in user with an active session, when the server publishes a new notification event for that user, the client feed shows the new item within 5 seconds without a full page reload. | Architecture: SSE delivery path |
271
- | AC-2 | Given the same logical notification is published twice with the same dedupe key, when the client processes the stream, the feed contains exactly one visible item for that key. | Architecture: dedupe-key in event schema |
272
- | AC-3 | Given the live connection is unavailable, when the user opens the notifications panel, the UI shows a non-blocking "live updates paused" banner and loads the latest snapshot via REST within 2 seconds. | Architecture: REST fallback + degraded UX |
273
-
274
- ## Edge Cases
275
-
276
- | Criterion ID | Boundary case | Error case |
277
- | --- | --- | --- |
278
- | AC-1 | Notification published during client reconnect window (boundary: \u2264 5 s delivery still holds after reconnect). | Server publish fails mid-write — client never receives event; REST snapshot fills gap. |
279
- | AC-2 | Two events with identical dedupe key arrive within same SSE frame (boundary: only one row rendered). | Dedupe-key field missing — reject event at publisher and log error. |
280
- | AC-3 | SSE disconnects after exactly 30 s heartbeat timeout (boundary: banner appears within 1 s of timeout). | REST snapshot endpoint returns 500 — panel shows "unable to load" with retry button. |
281
-
282
- ## Constraints and Assumptions
283
-
284
- - **Constraints:** Max feed size 1 000 items per user. SSE heartbeat interval 30 s (server-side). REST snapshot p99 \u2264 200 ms. No new runtime dependencies.
285
- - **Assumptions:** Users have a single active session at a time for v1. Existing auth middleware provides user context. Event publisher is single-writer per user.
286
-
287
- ## Testability Map
288
-
289
- | Criterion ID | Verification approach | Command/manual steps |
290
- | --- | --- | --- |
291
- | AC-1 | Integration test: publish event \u2192 assert feed contains item within 5 s (deterministic clock). | \`pnpm vitest run tests/integration/notification-delivery.test.ts\` |
292
- | AC-2 | Unit test: publish same dedupe key twice \u2192 assert single row in feed store. | \`pnpm vitest run tests/unit/dedupe-feed.test.ts\` |
293
- | AC-3 | E2E test: kill SSE transport \u2192 assert banner visible + REST snapshot loads. | \`pnpm playwright test tests/e2e/degraded-mode.spec.ts\` |
122
+ | AC ID | Criterion | Requirement ref | Verification approach |
123
+ | --- | --- | --- | --- |
124
+ | AC-1 | Feed returns recent window reliably | R-1 | integration test |
125
+ | AC-2 | Live updates visible within agreed latency | R-2 | perf + integration |
126
+ | AC-3 | Disconnect shows degraded state promptly | R-3 | e2e scenario |
294
127
 
295
- ## Approval
128
+ ## Notes
296
129
 
297
- - Approved by: user
298
- - Date: 2026-04-14`,
130
+ - Criteria are observable, measurable, and falsifiable.
131
+ - Every AC maps to at least one task and one test path in plan/tdd.
132
+ `,
299
133
  plan: `## Dependency Graph
300
134
 
301
- \`\`\`
302
- T-1 ──▶ T-2 ──▶ T-3
303
- │ ▲
304
- └───────────────┘
305
- \`\`\`
306
-
307
- Parallel opportunity: T-1 is a prerequisite for both T-2 and T-3 (T-3 also needs T-2).
135
+ - D1 schema + persistence
136
+ - D2 API snapshot/stream
137
+ - D3 UI degraded-state handling
138
+ - D4 tests + observability
308
139
 
309
- ## Dependency Batches
140
+ ## Tasks
310
141
 
311
- #### Batch 1 (foundation)
312
- - Task IDs: T-1
313
- - Verification gate: schema tests pass, dedupe key fixtures validated
314
-
315
- #### Batch 2 (core logic)
316
- - Task IDs: T-2
317
- - Depends on: Batch 1 (T-1 complete)
318
- - Verification gate: integration test proves publish-to-outbox path
319
-
320
- #### Batch 3 (integration)
321
- - Task IDs: T-3
322
- - Depends on: Batch 2 (T-2 complete)
323
- - Verification gate: e2e tests pass for delivery, dedupe, and degraded mode
324
-
325
- Execution rule: complete and verify each batch before starting the next batch.
326
-
327
- ## Task List
328
-
329
- | Task ID | Description | Acceptance criterion | Verification command | Effort |
330
- | --- | --- | --- | --- | --- |
331
- | T-1 | Define notification event schema + dedupe key rules | AC-1, AC-2: schema contract + fixtures | \`\`\`pnpm vitest run tests/unit/notification-schema.test.ts\`\`\` |
332
- | T-2 | Implement publisher + outbox write path | AC-1: integration test (happy path publish) | \`\`\`pnpm vitest run tests/integration/publisher.test.ts\`\`\` |
333
- | T-3 | Implement client feed + SSE subscribe + REST fallback | AC-1, AC-2, AC-3: e2e tests including degraded mode | \`\`\`pnpm playwright test tests/e2e/notification-feed.spec.ts\`\`\` |
142
+ | Task ID | Description | Effort | Minutes |
143
+ | --- | --- | --- | --- |
144
+ | T-1 | schema + migration | M | 90 |
145
+ | T-2 | snapshot/stream API updates | M | 90 |
146
+ | T-3 | UI degraded-state path | M | 70 |
147
+ | T-4 | consistency + auth tests | M | 85 |
334
148
 
335
149
  ## Acceptance Mapping
336
150
 
337
- | Criterion ID | Task IDs |
151
+ | AC ID | Task IDs |
338
152
  | --- | --- |
339
- | AC-1 (delivery within 5s) | T-2, T-3 |
340
- | AC-2 (idempotency) | T-1, T-2 |
341
- | AC-3 (failure visibility) | T-3 |
342
-
343
- ## Risk Assessment
344
-
345
- | Task/Batch | Risk | Likelihood | Impact | Mitigation |
346
- | --- | --- | --- | --- | --- |
347
- | T-3 (Batch 3) | SSE reconnect logic complex | Medium | High | Spike reconnect in isolation before integrating with feed UI |
348
- | Batch 2 → 3 | Publisher API contract may shift | Low | Medium | Pin contract in T-1 schema; T-2 integration test validates |
153
+ | AC-1 | T-1, T-2, T-4 |
154
+ | AC-2 | T-2, T-4 |
155
+ | AC-3 | T-3, T-4 |
349
156
 
350
157
  ## WAIT_FOR_CONFIRM
351
- - Status: pending
352
- - Confirmed by:`,
353
- tdd: `## RED Evidence
354
-
355
- | Slice | Test name | Command | Failure output summary |
356
- | --- | --- | --- | --- |
357
- | S-1 (event schema + dedupe) | counts unique keys and unread items | \`\`\`pnpm vitest run tests/unit/dedupe-feed.test.ts\`\`\` | Cannot find module '../notificationFeed' |
358
- | S-2 (publisher outbox) | publishes event to outbox with dedupe key | \`\`\`pnpm vitest run tests/integration/publisher.test.ts\`\`\` | publishToOutbox is not a function |
359
- | S-3 (client feed + fallback) | shows notification within 5s via SSE | \`\`\`pnpm playwright test tests/e2e/notification-feed.spec.ts\`\`\` | Element [data-testid="feed-item"] not found |
360
-
361
- ## Acceptance Mapping
362
158
 
363
- | Slice | Plan task ID | Spec criterion ID |
364
- | --- | --- | --- |
365
- | S-1 | T-1 | AC-1, AC-2 |
366
- | S-2 | T-2 | AC-1 |
367
- | S-3 | T-3 | AC-1, AC-2, AC-3 |
368
-
369
- ## Failure Analysis
370
-
371
- | Slice | Expected missing behavior | Actual failure reason |
372
- | --- | --- | --- |
373
- | S-1 | notificationFeed module does not exist yet | Module import fails — correct: implementation missing |
374
- | S-2 | publishToOutbox function not implemented | Function not found — correct: write path missing |
375
- | S-3 | Feed UI not rendered, SSE not connected | DOM element missing — correct: client component not built |
159
+ Plan is ready to execute after user confirmation.
160
+ `,
161
+ tdd: `## RED
376
162
 
377
- ## GREEN Evidence
378
-
379
- - Full suite command: \`\`\`pnpm vitest run && pnpm playwright test\`\`\`
380
- - Full suite result: 47 tests passed (3 new + 44 existing), 0 failed, 0 skipped
163
+ | Slice | Failing test evidence |
164
+ | --- | --- |
165
+ | S-1 feed window | expected 30d window, got 7d |
166
+ | S-2 degraded banner | banner absent after forced disconnect |
381
167
 
382
- ## REFACTOR Notes
168
+ ## Acceptance Mapping
383
169
 
384
- - What changed: Extracted \`\`\`mergeLatestByDedupeKey\`\`\` helper from inline loop in \`\`\`summarizeDedupedFeed\`\`\`; moved SSE reconnect logic into \`\`\`useSSEConnection\`\`\` hook.
385
- - Why: Dedupe merge logic is reused by both publisher and client; reconnect logic was duplicated across components.
386
- - Behavior preserved: Full suite re-run confirms 47/47 pass after refactor.
170
+ | Slice | AC IDs |
171
+ | --- | --- |
172
+ | S-1 | AC-1 |
173
+ | S-2 | AC-3 |
387
174
 
388
- ## Traceability
175
+ ## GREEN
389
176
 
390
- - Plan task IDs: T-1, T-2, T-3
391
- - Spec criterion IDs: AC-1, AC-2, AC-3`,
392
- review: `## Layer 1 Verdict
177
+ - Targeted tests pass.
178
+ - Full suite re-run after fixes.
393
179
 
394
- | Criterion | Verdict | Evidence |
395
- | --- | --- | --- |
396
- | AC-1: Delivery within 5s without reload | PASS | \`notification-feed.e2e.ts:44-88\` asserts SSE-to-UI timing under mock clock |
397
- | AC-2: Dedupe — one visible item per key | PARTIAL | Unit tests cover publisher dedupe; UI merge path lacks test for race reordering (\`feedStore.test.ts\` missing case) |
398
- | AC-3: Degraded mode + REST snapshot | PASS | \`NotificationsPanel.tsx:112-140\` renders banner + calls snapshot endpoint |
180
+ ## REFACTOR
399
181
 
400
- ## Layer 2 Findings
182
+ - Reduced reconnect state duplication.
183
+ - Revalidated behavior with regression tests.
184
+ `,
185
+ review: `## Layer 1 — Spec Compliance
401
186
 
402
- | ID | Severity | Category | Description | Status |
187
+ | ID | Severity | Finding | Evidence | Status |
403
188
  | --- | --- | --- | --- | --- |
404
- | R-1 | Critical | correctness | Snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor — users can miss items between snapshot and subscribe. | open |
405
- | R-2 | Important | performance | \`feedStore.merge()\` does full-array scan on every SSE event; O(n) per event where n is feed length. | open |
406
- | R-3 | Suggestion | architecture | SSE reconnect logic duplicated across \`useNotifications\` and \`usePresence\`; extract shared hook. | open |
189
+ | R-1 | low | AC mapping remains intact | trace table + tests | closed |
407
190
 
408
- ## Review Findings Contract
191
+ ## Layer 2 — Code Quality
409
192
 
410
- - See \`07-review-army.json\`
411
- - Reconciliation summary: 1 duplicate collapsed (R-1 reported by reviewer and security-reviewer), 0 conflicts
412
-
413
- ## Review Readiness Snapshot
414
-
415
- - Layer 1 complete: yes (3/3 criteria)
416
- - Layer 2 complete: yes (5 sections reviewed)
417
- - Review findings schema valid: yes
418
- - Open critical blockers: 1 (R-1)
419
- - Ship recommendation: BLOCKED until R-1 resolved
193
+ | ID | Severity | Finding | Evidence | Status |
194
+ | --- | --- | --- | --- | --- |
195
+ | R-2 | high | auth guard gap in stream query path | curl repro + failing test | open |
420
196
 
421
- ## Severity Summary
197
+ ## Victory Detector
422
198
 
423
- - Critical: 1
424
- - Important: 1
425
- - Suggestion: 1
199
+ Victory Detector: pass | fail
426
200
 
427
- ## Final Verdict
201
+ - Current verdict: fail (R-2 open)
202
+ `,
203
+ ship: `## Release Checklist
428
204
 
429
- - BLOCKED`,
430
- ship: `## Preflight Results
205
+ - version/changelog prepared
206
+ - test/build/preflight passed
207
+ - review blockers resolved or explicitly accepted
431
208
 
432
- - Review verdict: APPROVED_WITH_CONCERNS (R-1 resolved, R-2 accepted as known debt)
433
- - Build: pass (\`pnpm build\` succeeds)
434
- - Tests: pass (\`pnpm vitest run && pnpm playwright test\` — 47 passed, 0 failed)
435
- - Lint: pass (\`pnpm lint\` clean)
436
- - Type-check: pass (\`pnpm typecheck\` clean)
437
- - Working tree clean: yes (\`git status\` shows no uncommitted changes)
209
+ ## Victory Detector
438
210
 
439
- ## Release Notes
211
+ Victory Detector: pass | fail
440
212
 
441
- - **Added:** In-app notification feed with SSE updates and REST fallback snapshotting (AC-1, AC-3).
442
- - **Changed:** Notification payloads now include a stable dedupe key for idempotent rendering (AC-2).
443
- - **Fixed:** Panel no longer drops the newest item when reconnecting after sleep/resume.
444
- - **Breaking changes:** None.
213
+ - Current verdict: pass
445
214
 
446
215
  ## Rollback Plan
447
216
 
448
- - Trigger conditions: error rate on \`/notifications/stream\` exceeds 5% for >5 minutes, or p95 publish-to-visible lag exceeds 10s.
449
- - Rollback steps: \`git revert <merge-sha> && git push origin main\` then redeploy; if DB migrations shipped, run \`2026_04_12_notifications_cursor_down.sql\` before traffic.
450
- - Verification steps: confirm error rate returns to pre-release baseline within 10 minutes; smoke-test feed panel manually.
451
-
452
- ## Monitoring
453
-
454
- - Metrics/logs to watch: error rate on \`/notifications/stream\` and snapshot endpoint for 24h; p95 publish-to-visible lag via metrics dashboard.
455
- - Risk note (if no monitoring): N/A — monitoring is in place.
456
-
457
- ## Finalization
458
-
459
- - Selected enum: FINALIZE_OPEN_PR
460
- - Selected label: B
461
- - Execution result: PR #42 created via \`gh pr create\`; CI passed; squash-merged to main.
462
- - PR URL: https://github.com/example/repo/pull/42`,
217
+ - Trigger: error-rate or latency threshold breach
218
+ - Steps: revert + redeploy prior artifact
219
+ - Verification: key metrics return to baseline
220
+ `
463
221
  };
464
222
  const GOOD_BAD_EXAMPLES = {
465
- brainstorm: [
466
- {
467
- label: "Problem / success statement",
468
- good: "Problem: release checks are fragile and inconsistent between CI and local runs; invalid metadata sometimes reaches npm publish. Success: invalid release preconditions are caught before publish with explicit operator feedback, in both CI and local workflows. Constraints: no new runtime dependencies.",
469
- bad: "Problem: releases are broken. Success: make them better. Constraints: be careful.",
470
- lesson: "\"Make it better\" is not a success criterion — an agent cannot know when it is done. State the observable condition that proves success."
471
- },
472
- {
473
- label: "Alternative direction (one of 2–3)",
474
- good: "Option B: Pre-publish verifier script invoked from \`release.yml\` and a \`pnpm release:check\` target. Pros: one enforcement surface; fails fast locally. Cons: adds a script to maintain; must stay in sync with \`package.json\`. Rejected alternative: relying on npm lifecycle hooks only — they run too late to block publish.",
475
- bad: "We could also use a script, or hooks, or something in CI. We'll pick whichever is easier later.",
476
- lesson: "Alternatives are only useful if they are concrete and comparable. Name each one, call out pros/cons, and say what was rejected — otherwise \"later\" becomes \"never\" and the choice is made by accident."
477
- },
478
- {
479
- label: "Clarifying question",
480
- good: "Before I lock direction: should a failed release:check block the CI job (hard failure) or only warn and continue? The former is safer but costs a revert cycle when the check itself is wrong; the latter preserves velocity but can let bad metadata through. Recommend A (block). Pick: A) Block B) Warn-only C) Block in CI, warn locally.",
481
- bad: "Do you want it to fail or warn? Let me know.",
482
- lesson: "A good question gives the user context, a recommendation, and lettered options they can answer with one keystroke. \"Let me know\" shifts the framing cost back to the user."
483
- }
484
- ],
485
- scope: [
486
- {
487
- label: "In / out / deferred boundaries",
488
- good: "In scope: in-app notification feed, SSE delivery path, read/unread state, retry on transient failures. Out of scope: email/SMS/push providers, per-user preferences. Deferred: WebSocket channel, rich media, full-text search.",
489
- bad: "In scope: notifications. Out of scope: stuff we are not doing. Deferred: v2.",
490
- lesson: "Vague boundaries get relitigated in every subsequent stage. Enumerate concrete capabilities on each side — \"stuff we are not doing\" is not a decision."
491
- },
492
- {
493
- label: "Scope change trace",
494
- good: "Scope delta at 2026-04-15: user asked to add per-user mute preferences. Decision: moved from Out-of-scope → In-scope; acknowledged cost (≈1 day, +1 schema migration); risk: touches settings surface. Recorded in \`.cclaw/artifacts/03-design-<slug>.md#scope-trace\`. Requires re-running scope review before design lock.",
495
- bad: "Added mute preferences to scope.",
496
- lesson: "Scope changes silently are how projects drift. Every in↔out move needs a timestamp, a cost estimate, and a link to the next review it invalidates."
497
- }
498
- ],
499
- design: [
500
- {
501
- label: "Failure mode row",
502
- good: "Failure: SSE connection drop. Trigger: network interruption. Detection: client heartbeat timeout (30s). Mitigation: auto-reconnect with exponential backoff + REST snapshot fallback. User impact: ≤10s delay, no data loss.",
503
- bad: "Failure: network errors. Mitigation: retry and log. User impact: users may see issues sometimes.",
504
- lesson: "A failure row without a detection signal and a bounded user impact is aspirational, not a design. Name the trigger, the detector, and the recovery behavior."
505
- },
506
- {
507
- label: "Rejected design alternative",
508
- good: "Considered WebSocket instead of SSE. Rejected because: (1) our proxy layer strips upgrade headers; (2) one-way push fits the \"notification feed\" semantics; (3) SSE plays nicer with HTTP/2 fan-out. Trade-off accepted: no client→server channel; we will fall back to REST for the tiny set of acks.",
509
- bad: "We chose SSE. WebSocket could also work.",
510
- lesson: "A design without a rejected alternative reads like a requirement, not a decision. The rejection is the part that survives review — it tells future readers what trade-off was taken."
511
- },
512
- {
513
- label: "Diagram caption",
514
- good: "Figure 1 — Notification pipeline (sequence diagram): producer → outbox(durable) → relay → SSE stream → client. Label on relay shows \"at-least-once; dedupe by event_id\"; label on client shows \"merge by dedupe_key before render\".",
515
- bad: "Figure 1: notification flow.",
516
- lesson: "An unlabeled diagram is decoration. Every arrow needs a delivery guarantee, every box needs an action verb — otherwise the diagram contradicts the prose without anyone noticing."
517
- }
518
- ],
519
- spec: [
520
- {
521
- label: "Observable acceptance criterion",
522
- good: "AC-1: Given a signed-in user with an active session, when the server publishes a new notification event for that user, the client feed shows the new item within 5 seconds without a full page reload.",
523
- bad: "AC-1: Users should see their notifications quickly and reliably, with a good user experience.",
524
- lesson: "Spec criteria must be observable, measurable, and falsifiable. \"Quickly\" is a feeling; \"within 5 seconds without a full page reload\" is a test."
525
- },
526
- {
527
- label: "Negative / error-path criterion",
528
- good: "AC-4: Given the SSE connection drops mid-session, when the client detects no heartbeat for 30 seconds, the UI shows a \"Reconnecting…\" badge and automatically re-subscribes; missed events delivered since the last ACKed id are replayed exactly once.",
529
- bad: "AC-4: Handle errors gracefully.",
530
- lesson: "Error-path criteria are where most bugs hide. Write them with the same \"given/when/then\" rigor as happy-path — otherwise QA ends up inventing them at release time."
531
- },
532
- {
533
- label: "Non-functional budget",
534
- good: "NFR-2: p95 end-to-end publish-to-visible latency ≤5s under 1k concurrent subscribers on a 2-vCPU pod; CPU headroom ≥30% at steady state. Measurement: \`k6 run tests/load/notifications.js\`, report median + p95 + p99.",
535
- bad: "NFR-2: Performance should be good.",
536
- lesson: "Non-functional goals without numbers + a measurement command are aspirational. Pin the percentile, the load shape, and the script that produces the evidence."
537
- }
538
- ],
539
- plan: [
540
- {
541
- label: "Single task row",
542
- good: "T-2: Implement publisher + outbox write path. Acceptance: AC-1. Verification: \`pnpm vitest run tests/integration/publisher.test.ts\`. Depends on: T-1. Effort: M (≈4 min).",
543
- bad: "T-2: Build the backend. Verify: manual testing. Effort: a few days.",
544
- lesson: "A task without a single acceptance criterion and a reproducible verification command is a wish. If you cannot say how you will know it is done, you cannot ship it."
545
- },
546
- {
547
- label: "Dependency graph entry",
548
- good: "T-5 (consume SSE client) depends on T-3 (stream endpoint) and T-4 (auth cookie forwarding). Parallelizable with T-6 (read-state persistence). Blocks T-8 (end-to-end happy-path e2e).",
549
- bad: "T-5 depends on other tasks.",
550
- lesson: "The value of a dependency graph is mechanical scheduling. \"Depends on other tasks\" is a shrug — list the IDs so the execution order is unambiguous."
551
- }
552
- ],
553
- tdd: [
554
- {
555
- label: "RED → GREEN → REFACTOR slice",
556
- good: "RED: \`pnpm vitest run tests/unit/dedupe-feed.test.ts\` → \`publishToOutbox is not a function\`. GREEN (after minimal impl): same command, 47/47 pass, full suite. REFACTOR: extracted \`mergeLatestByDedupeKey\`; suite still 47/47.",
557
- bad: "Wrote the publisher code. Tests pass now. Will add unit tests later when I have time.",
558
- lesson: "Code written before a failing test is guessing validated after the fact. The RED failure IS the specification — without it, the GREEN pass proves nothing about the intended behavior."
559
- },
560
- {
561
- label: "Bug-fix reproduction test",
562
- good: "Bug B-17: dedup fails when two events arrive in the same ms. Prove-It RED: added \`tests/unit/dedupe-feed.test.ts > dedupes when timestamps collide\`; run → \`expected 1 item, received 2\`. Fix applied; same test passes; full suite still 47/47.",
563
- bad: "Fixed the duplicate rendering issue.",
564
- lesson: "A bug without a reproducing test is a bug that comes back. Ship the RED test as part of the fix — it is the contract that prevents regression."
565
- },
566
- {
567
- label: "Refactor-only slice (state-based)",
568
- good: "Refactor: moved heartbeat logic into \`useHeartbeat()\` hook. No behavior change intended. Evidence: no new tests; existing state-based tests \`feed-state.test.ts\` (42 assertions) still pass; coverage unchanged at 94%.",
569
- bad: "Refactored the component. Added some interaction mocks to check the new hook is called.",
570
- lesson: "A refactor should assert on state, not on call shape. If you had to rewrite your mocks, it was not a refactor — it was a redesign dressed as one."
571
- }
572
- ],
573
- review: [
574
- {
575
- label: "Critical finding",
576
- good: "R-1 Critical: snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor — users can miss items between snapshot and subscribe. Evidence: integration test \`notification-consistency.test.ts:22-58\`. Status: open.",
577
- bad: "Looks good overall. A few small things could be polished, maybe refactor the merge logic. LGTM.",
578
- lesson: "\"LGTM\" is not a review — it is a signature on whatever the author shipped. Every finding needs a severity, a falsifiable description, evidence, and a status."
579
- },
580
- {
581
- label: "Security review row",
582
- good: "R-4 High (sec): SSE endpoint accepts any user_id in the query string; a logged-in attacker can subscribe to another user's stream. Evidence: \`curl\` repro in \`docs/notes/sec-r4.md\`. Fix: require auth cookie, filter events by session.user.id server-side. Status: fix in T-11; verified in \`notifications-auth.test.ts\`.",
583
- bad: "Might want to double-check auth on the SSE endpoint.",
584
- lesson: "Security findings without a reproduction step and a tied fix-task are suggestions, not reviews. Attach the curl (or equivalent), the fix task ID, and the verification test."
585
- }
586
- ],
587
- ship: [
588
- {
223
+ brainstorm: [{
224
+ label: "Problem framing",
225
+ good: "Names affected role, current failure mode, measurable target, and non-goals.",
226
+ bad: "Need to improve this area somehow.",
227
+ lesson: "Concrete framing prevents scope drift in downstream stages."
228
+ }],
229
+ scope: [{
230
+ label: "Boundary clarity",
231
+ good: "Clear in-scope/out-of-scope/deferred lists with concrete capabilities.",
232
+ bad: "Add improvements where useful.",
233
+ lesson: "Scope without hard boundaries becomes hidden commitment."
234
+ }],
235
+ design: [{
236
+ label: "Failure handling",
237
+ good: "Each failure row includes trigger, detection, and mitigation.",
238
+ bad: "Could fail, handle later.",
239
+ lesson: "Actionable design risk must be testable and operationally visible."
240
+ }],
241
+ spec: [{
242
+ label: "AC quality",
243
+ good: "AC includes measurable signal and explicit verification approach.",
244
+ bad: "System should work reliably.",
245
+ lesson: "Observable/falsifiable language is required for meaningful verification."
246
+ }],
247
+ plan: [{
248
+ label: "Task granularity",
249
+ good: "Tasks have bounded outputs, effort, and AC links.",
250
+ bad: "Implement feature end-to-end.",
251
+ lesson: "Execution speed depends on concrete, reviewable task slices."
252
+ }],
253
+ tdd: [{
254
+ label: "RED evidence",
255
+ good: "Includes failing output tied to one behavior slice.",
256
+ bad: "Tests failed at first.",
257
+ lesson: "Without concrete RED evidence, behavior intent is not auditable."
258
+ }],
259
+ review: [{
260
+ label: "Finding quality",
261
+ good: "Severity + falsifiable claim + evidence + status.",
262
+ bad: "LGTM with a few comments.",
263
+ lesson: "Review findings are decisions, not vibes."
264
+ }],
265
+ ship: [{
589
266
  label: "Rollback contract",
590
- good: "Rollback trigger: error rate on \`/notifications/stream\` >5% for 5 minutes, or p95 publish-to-visible lag >10s. Steps: \`git revert <merge-sha> && git push origin main\` then redeploy; run \`2026_04_12_notifications_cursor_down.sql\` before traffic. Verification: error rate returns to baseline within 10 minutes.",
591
- bad: "Rollback plan: revert the commit if anything goes wrong.",
592
- lesson: "\"Revert if anything goes wrong\" leaves the on-call engineer to invent the plan at 2 a.m. The rollback trigger is an operational contract: state the signal, the command, and the verification."
593
- },
594
- {
595
- label: "Preflight check",
596
- good: "Preflight: \`pnpm release:check\` ✅ (package metadata ok, changeset captured), \`pnpm test\` ✅ 195/195, \`pnpm build\` ✅, CI green on feat/notifications @ \`abc1234\`, rollback plan captured, migration reviewed. Finalization mode: Merge via squash.",
597
- bad: "All good, shipping it.",
598
- lesson: "A preflight is a checklist that names each gate and the command that proved it. \"All good\" is a vibe — it cannot be audited after the fact when the deploy misbehaves."
599
- }
600
- ]
267
+ good: "Named trigger, exact rollback steps, and verification condition.",
268
+ bad: "Revert if something goes wrong.",
269
+ lesson: "Rollback must be executable under incident pressure."
270
+ }]
601
271
  };
602
272
  export function stageGoodBadExamples(stage) {
603
273
  const samples = GOOD_BAD_EXAMPLES[stage];
@@ -606,7 +276,7 @@ export function stageGoodBadExamples(stage) {
606
276
  const blocks = [
607
277
  "## Good vs Bad (at-a-glance)",
608
278
  "",
609
- "Contrasting samples to calibrate the quality bar for this stage. Read before writing the artifact — mirror the **Good** shape, avoid the **Bad** shape. Each block targets a different axis of the stage so you can spot-check more than one dimension of your draft.",
279
+ "Contrasting samples to calibrate quality for this stage.",
610
280
  ""
611
281
  ];
612
282
  samples.forEach((sample, index) => {
@@ -671,49 +341,38 @@ function exampleSummaryBullets(stage) {
671
341
  return ["- Full artifact structure."];
672
342
  return headings.map((heading) => `- ${heading}`);
673
343
  }
674
- // Kept in sync with STAGE_EXAMPLES above so the inline summary matches the
675
- // reference file without duplicating the heavy text. Update whenever the
676
- // sample in STAGE_EXAMPLES gains or loses a top-level section.
677
344
  const STAGE_EXAMPLE_SECTION_HEADINGS = {
678
345
  brainstorm: [
679
346
  "Problem Decision Record (free-form Frame type label + universal framing fields)",
680
- "Reference Pattern Candidates and approaches with trade-offs",
681
- "Recommended direction + open questions",
682
- "Clarification log and decision record"
347
+ "Approaches with explicit trade-offs",
348
+ "Approach Reaction and Selected Direction"
683
349
  ],
684
350
  scope: [
685
351
  "In-scope / out-of-scope / deferred lists with concrete capabilities",
686
- "Reference Pattern Registry with accepted/rejected/deferred dispositions",
687
- "Requirements table with stable R# IDs",
688
- "Boundary stress-tests and non-negotiables",
689
- "Decision record for premise challenges"
352
+ "Reference Pattern Registry with clear dispositions",
353
+ "Requirements table with stable R# IDs"
690
354
  ],
691
355
  design: [
692
356
  "Blast-radius file list",
693
357
  "Mandatory architecture diagram (Mermaid)",
694
- "Reference-Grade Contracts for mirrored patterns",
695
- "Failure-mode table with detection + mitigation",
696
- "Test strategy + performance budget",
697
- "Completion dashboard + unresolved decisions"
358
+ "Failure-mode table with detection + mitigation"
698
359
  ],
699
360
  spec: [
700
361
  "Acceptance-criteria table (observable, measurable, falsifiable)",
701
362
  "Requirement-ref column tying each AC back to an R# from scope",
702
- "Verification-approach column",
703
- "Approval block"
363
+ "Verification-approach column"
704
364
  ],
705
365
  plan: [
706
- "Dependency graph + dependency batches",
366
+ "Dependency graph",
707
367
  "Task list with effort + minutes estimate per task",
708
- "Acceptance mapping (every AC task IDs)",
709
- "No-Placeholder scan row + WAIT_FOR_CONFIRM marker"
368
+ "Acceptance mapping (every AC -> task IDs)",
369
+ "WAIT_FOR_CONFIRM marker"
710
370
  ],
711
371
  tdd: [
712
- "RED evidence per vertical slice (failing test output)",
372
+ "RED evidence per vertical slice",
713
373
  "Acceptance mapping per slice",
714
- "GREEN evidence (full-suite pass)",
715
- "REFACTOR notes with behavior-preservation confirmation",
716
- "Test-pyramid shape + prove-it reproduction when applicable"
374
+ "GREEN evidence",
375
+ "REFACTOR notes with behavior-preservation confirmation"
717
376
  ],
718
377
  review: [
719
378
  "Spec-compliance findings (Layer 1)",
@@ -722,223 +381,9 @@ const STAGE_EXAMPLE_SECTION_HEADINGS = {
722
381
  "Victory Detector-backed go / no-go verdict"
723
382
  ],
724
383
  ship: [
725
- "Release checklist (version, changelog, tag, artifacts)",
726
- "Victory Detector: valid review, fresh preflight, rollback, finalization enum",
384
+ "Release checklist",
385
+ "Victory Detector: pass | fail",
727
386
  "Rollback plan with trigger, steps, verification",
728
- "Runbook (how to verify the release post-deploy)",
729
- "Sign-off block"
387
+ "Runbook and sign-off"
730
388
  ]
731
389
  };
732
- const DOMAIN_LABELS = {
733
- web: "Web app (full-stack)",
734
- cli: "CLI tool",
735
- library: "Library / SDK",
736
- "data-pipeline": "Data pipeline / ETL"
737
- };
738
- export const RESEARCH_FLEET_USAGE_EXAMPLE = [
739
- "Before drafting `.cclaw/artifacts/03-design-<slug>.md`, run `research/research-fleet.md` once and",
740
- "capture all four lenses in `.cclaw/artifacts/02a-research.md`.",
741
- "Dispatch semantics by harness: Claude/OpenCode/Codex = native subagents;",
742
- "Cursor = generic-dispatch Task mapping; role-switch only as degraded fallback.",
743
- "Design must include a `Research Fleet Synthesis` section that maps each",
744
- "lens to concrete architecture decisions and risks."
745
- ].join(" ");
746
- const STAGE_DOMAIN_SAMPLES = {
747
- brainstorm: [
748
- {
749
- domain: "web",
750
- label: "Direction",
751
- body: "Problem: admin dashboard orders table requires manual refresh to see new orders. Success: admins see new rows within 2s of server-side status change, no full navigation. Anti-success: WebSocket rewrite of the whole table stack when only one view needs live updates."
752
- },
753
- {
754
- domain: "cli",
755
- label: "Direction",
756
- body: "Problem: `cclaw archive` silently deletes 30+ day runs with no preview. Success: a `--dry-run` flag prints would-be-archived run IDs to stdout and exits 0; current behavior is unchanged without the flag. Anti-success: adding an interactive confirmation prompt that breaks CI scripts."
757
- },
758
- {
759
- domain: "library",
760
- label: "Direction",
761
- body: "Problem: consumers cannot validate hook JSON without importing internal modules. Success: `validateHookDocument(obj)` exported from the package root with typed result `{ ok, errors? }`. Anti-success: exposing the full Zod schema and forcing consumers to depend on Zod."
762
- },
763
- {
764
- domain: "data-pipeline",
765
- label: "Direction",
766
- body: "Problem: reruns of the orders job create duplicate `fact_orders` rows. Success: running the job twice on the same input leaves row count unchanged and `dbt test --select fact_orders` green. Anti-success: introducing a nightly dedup job that hides the underlying non-idempotency."
767
- }
768
- ],
769
- scope: [
770
- {
771
- domain: "web",
772
- label: "Scope line",
773
- body: "In: live-update `/dashboard/orders` table via SSE; out: notification drawer, mobile PWA, dashboards other than `orders`. Discretion: choice of SSE vs long-polling for legacy Safari. NOT in scope: rewriting the auth layer or the existing REST endpoints."
774
- },
775
- {
776
- domain: "cli",
777
- label: "Scope line",
778
- body: "In: add `--dry-run` to `cclaw archive`; out: redesigning archive formats, adding retention flags, or changing the default. Discretion: exact wording of stdout lines. NOT in scope: touching `init` / `sync` / `doctor` subcommands."
779
- },
780
- {
781
- domain: "library",
782
- label: "Scope line",
783
- body: "In: expose `validateHookDocument` + types from package root; out: rewriting hook schema, adding new hook kinds, dropping old ones. Discretion: whether to re-export `HookDocument` as type-only. NOT in scope: migrating consumers."
784
- },
785
- {
786
- domain: "data-pipeline",
787
- label: "Scope line",
788
- body: "In: dedup step between `raw.orders` and `fact_orders` keyed on `(order_id, event_ts)`; out: redesigning ingestion, adding new partitions, or touching downstream marts. Discretion: `row_number()` vs `qualify`-style dedup. NOT in scope: backfilling historical partitions."
789
- }
790
- ],
791
- design: [
792
- {
793
- domain: "web",
794
- label: "Parallel research fleet handoff",
795
- body: RESEARCH_FLEET_USAGE_EXAMPLE
796
- },
797
- {
798
- domain: "web",
799
- label: "Architecture note",
800
- body: "Data flow: server-side order update → publish to `orders-updates` channel → SSE endpoint `/api/orders/stream` → `useOrderFeed` hook merges into React state → row rerenders. Failure mode: SSE connection drop → exponential-backoff reconnect + on-reconnect REST snapshot fallback. Trade-off accepted: no client→server channel (SSE one-way); existing REST mutations cover it."
801
- },
802
- {
803
- domain: "cli",
804
- label: "Architecture note",
805
- body: "Flag is parsed by the existing Zod CLI parser; `--dry-run` short-circuits before any filesystem mutation, shares formatter `src/cli/format.ts` with `status`. Failure mode: formatter output differs between `status` and `archive --dry-run` → centralize format. Trade-off: we print run IDs unsorted to keep the code path identical to the real archive path."
806
- },
807
- {
808
- domain: "library",
809
- label: "Architecture note",
810
- body: "Re-export `validateHookDocument` from package root; rename internal `__validate` to match the exported name so callsites and the export converge. Failure mode: consumers importing from `/dist/internal` break on the rename → add a deprecation re-export shim for one minor. Trade-off: slightly wider public surface today buys us a smaller public surface tomorrow."
811
- },
812
- {
813
- domain: "data-pipeline",
814
- label: "Architecture note",
815
- body: "Insert `int_orders_deduped` CTE between staging and fact, keyed on `(order_id, event_ts)` with `row_number() = 1` per key; `fact_orders` reads from the deduped model only. Failure mode: late-arriving events with an earlier `event_ts` would flap the chosen row → tiebreak on `ingest_ts DESC`. Trade-off: the job now does one extra pass; measured +8% runtime, within budget."
816
- }
817
- ],
818
- spec: [
819
- {
820
- domain: "web",
821
- label: "AC",
822
- body: "AC-W1: Given a signed-in admin viewing `/dashboard/orders`, when an order's status changes server-side, the row updates within 2s without a full navigation (assert via `pnpm playwright test orders-live.spec.ts`)."
823
- },
824
- {
825
- domain: "cli",
826
- label: "AC",
827
- body: "AC-C1: Given `cclaw init --claude` run in an empty directory, exit code is `0`, `.cclaw/config.yaml` is created with `harnesses: [claude]`, and stderr contains no warnings (asserted by `tests/integration/init-sync-doctor.test.ts`)."
828
- },
829
- {
830
- domain: "library",
831
- label: "AC",
832
- body: "AC-L1: `validateHookDocument(obj)` returns `{ ok: true }` for every fixture under `tests/fixtures/valid-hooks/` and `{ ok: false, errors: [...] }` with at least one message for every fixture under `tests/fixtures/invalid-hooks/`."
833
- },
834
- {
835
- domain: "data-pipeline",
836
- label: "AC",
837
- body: "AC-D1: For any `orders.csv` input, the pipeline emits exactly one row per `(order_id, event_ts)` pair to `warehouse.fact_orders`; running the job twice on the same input is idempotent (row count unchanged, verified by `dbt test --select fact_orders`)."
838
- }
839
- ],
840
- plan: [
841
- {
842
- domain: "web",
843
- label: "Task",
844
- body: "T-W-3 `[~4m]`: Wire SSE endpoint `/api/orders/stream` into `useOrderFeed` hook. AC-W1. Verify: `pnpm playwright test orders-live.spec.ts`. Depends on: T-W-2."
845
- },
846
- {
847
- domain: "cli",
848
- label: "Task",
849
- body: "T-C-2 `[~3m]`: Add `--dry-run` flag to `cclaw archive` that prints the would-be-archived run IDs to stdout and exits 0. AC-C3. Verify: `node dist/cli.js archive --dry-run` + `tests/unit/cli-parse.test.ts`."
850
- },
851
- {
852
- domain: "library",
853
- label: "Task",
854
- body: "T-L-1 `[~5m]`: Expose `validateHookDocument` from the package root and re-export its types. AC-L1. Verify: `pnpm build && node -e \"console.log(require('./dist').validateHookDocument)\"`."
855
- },
856
- {
857
- domain: "data-pipeline",
858
- label: "Task",
859
- body: "T-D-2 `[~5m]`: Add dedup step keyed on `(order_id, event_ts)` between `raw.orders` and `fact_orders`. AC-D1. Verify: `dbt run --select fact_orders+ && dbt test --select fact_orders`."
860
- }
861
- ],
862
- tdd: [
863
- {
864
- domain: "web",
865
- label: "RED→GREEN→REFACTOR",
866
- body: "RED: `pnpm playwright test orders-live.spec.ts` → timeout waiting for row update. GREEN: wired SSE event → row rerenders via `useOrderFeed`. REFACTOR: extracted `applyOrderEvent(row, event)` pure helper; 87/87 tests still pass."
867
- },
868
- {
869
- domain: "cli",
870
- label: "RED→GREEN→REFACTOR",
871
- body: "RED: `tests/unit/cli-parse.test.ts` expects `--dry-run` flag → `unknown option` error. GREEN: added to the Zod parser; 19/19 pass. REFACTOR: hoisted the dry-run formatter into `src/cli/format.ts` shared with `status`."
872
- },
873
- {
874
- domain: "library",
875
- label: "RED→GREEN→REFACTOR",
876
- body: "RED: `tests/unit/hook-schema.test.ts` imports `validateHookDocument` from package root → `export not found`. GREEN: added re-export + types. REFACTOR: renamed internal `__validate` to `validateHookDocument` so the export name matches the source."
877
- },
878
- {
879
- domain: "data-pipeline",
880
- label: "RED→GREEN→REFACTOR",
881
- body: "RED: `dbt test --select fact_orders` → `unique test on (order_id, event_ts)` fails on re-run. GREEN: added `row_number()` dedup in the staging model. REFACTOR: extracted the dedup CTE into `int_orders_deduped` for reuse by `fact_returns`."
882
- }
883
- ],
884
- review: [
885
- {
886
- domain: "web",
887
- label: "Finding",
888
- body: "R-W-1 (Critical, correctness): `useOrderFeed` does not unsubscribe from the SSE channel on unmount — two mounts on the same page double-count rows. Evidence: `tests/unit/order-feed-hook.test.ts > unmount` fails. Fix owner: frontend; blocks ship."
889
- },
890
- {
891
- domain: "cli",
892
- label: "Finding",
893
- body: "R-C-2 (Suggestion, UX): `cclaw archive --dry-run` prints run IDs without a trailing newline, breaking downstream `xargs` pipelines. Evidence: `echo '' | xargs -I{} printf '%s\\n' {}` contrast. Fix owner: CLI; non-blocking."
894
- },
895
- {
896
- domain: "library",
897
- label: "Finding",
898
- body: "R-L-1 (Important, surface-area): the new `validateHookDocument` export is documented in README but missing from `src/index.ts` — `import { validateHookDocument } from 'cclaw'` fails despite the docs. Evidence: `pnpm build && node -e \"require('./dist').validateHookDocument\"` prints `undefined`. Fix owner: library; blocks ship."
899
- },
900
- {
901
- domain: "data-pipeline",
902
- label: "Finding",
903
- body: "R-D-1 (Critical, correctness): dedup CTE orders by `event_ts ASC` instead of `event_ts DESC` — on duplicate events we keep the older row. Evidence: `dbt test --select fact_orders` green but fixture `tests/fixtures/orders-dupes.csv` shows wrong survivor. Fix owner: analytics-eng; blocks ship."
904
- }
905
- ],
906
- ship: [
907
- {
908
- domain: "web",
909
- label: "Rollback",
910
- body: "Trigger: error rate on `/api/orders/stream` > 2% for 5 minutes, or p95 latency > 1.5s for 10 minutes. Steps: `vercel rollback <deployment>`; run `2026_04_14_revert_orders_stream.sql` before traffic returns. Verify: error rate returns to baseline within 10 minutes on the `orders-live` dashboard."
911
- },
912
- {
913
- domain: "cli",
914
- label: "Rollback",
915
- body: "Trigger: `cclaw init --claude` exits non-zero on a fresh tmp dir, OR `cclaw doctor` regresses (FAIL count increases) on the smoke matrix. Steps: `npm unpublish cclaw-cli@<version>` (within the 72h window) or `npm deprecate cclaw-cli@<version> '<reason>'`; publish the previous patch. Verify: `npx cclaw-cli@latest --version` prints the previous version."
916
- },
917
- {
918
- domain: "library",
919
- label: "Rollback",
920
- body: "Trigger: any consumer reports `validateHookDocument` no longer exported, OR the CI `dual-package-check` job fails. Steps: `npm deprecate cclaw-cli@<version> 'broken package export — use <prev>'`; publish the previous minor with a patch bump; emit changelog `## Rollback` entry. Verify: a smoke consumer project `pnpm add cclaw-cli@latest` imports cleanly."
921
- },
922
- {
923
- domain: "data-pipeline",
924
- label: "Rollback",
925
- body: "Trigger: `dbt test --select fact_orders` fails on production run, OR downstream dashboard MAU count drops >10% week-over-week. Steps: disable the new model via `dbt_project.yml` + `dbt run --select state:modified` with the previous git SHA; rerun backfill `dagster asset materialize fact_orders --partition <yesterday>`. Verify: `fact_orders` row count within ±1% of the previous week's baseline."
926
- }
927
- ]
928
- };
929
- export function stageDomainExamples(stage) {
930
- const samples = STAGE_DOMAIN_SAMPLES[stage];
931
- if (!samples || samples.length === 0)
932
- return "";
933
- const lines = [
934
- "## Living Examples by Domain",
935
- "",
936
- "Use the row matching your project shape to calibrate voice, specificity, and command choice. The rows are deliberately terse — copy the **shape**, not the text.",
937
- ""
938
- ];
939
- for (const sample of samples) {
940
- lines.push(`**${DOMAIN_LABELS[sample.domain]} — ${sample.label}:** ${sample.body}`);
941
- lines.push("");
942
- }
943
- return lines.join("\n");
944
- }