cclaw-cli 0.5.16 → 0.5.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,3 +18,16 @@ export declare function validateReviewArmy(projectRoot: string): Promise<{
18
18
  valid: boolean;
19
19
  errors: string[];
20
20
  }>;
21
+ export interface ReviewVerdictConsistencyResult {
22
+ ok: boolean;
23
+ errors: string[];
24
+ finalVerdict: "APPROVED" | "APPROVED_WITH_CONCERNS" | "BLOCKED" | "UNKNOWN";
25
+ openCriticalCount: number;
26
+ shipBlockerCount: number;
27
+ }
28
+ /**
29
+ * Ensure the narrative verdict in 07-review.md is consistent with the
30
+ * structured review-army reconciliation. A review cannot declare
31
+ * APPROVED while open Critical findings or shipBlockers remain.
32
+ */
33
+ export declare function checkReviewVerdictConsistency(projectRoot: string): Promise<ReviewVerdictConsistencyResult>;
@@ -134,7 +134,61 @@ function extractRequiredKeywords(rule) {
134
134
  return [];
135
135
  return phrases;
136
136
  }
137
- function validateSectionBody(sectionBody, rule) {
137
+ const VAGUE_AC_ADJECTIVES = [
138
+ "fast",
139
+ "quick",
140
+ "slow",
141
+ "fast enough",
142
+ "quickly",
143
+ "intuitive",
144
+ "robust",
145
+ "reliable",
146
+ "scalable",
147
+ "simple",
148
+ "easy",
149
+ "user-friendly",
150
+ "user friendly",
151
+ "nice",
152
+ "good",
153
+ "clean",
154
+ "secure enough",
155
+ "responsive",
156
+ "efficient",
157
+ "performant",
158
+ "smooth",
159
+ "seamless",
160
+ "modern"
161
+ ];
162
+ function isSeparatorRow(line) {
163
+ return /^\|[-:| ]+\|$/u.test(line);
164
+ }
165
+ function getMarkdownTableRows(sectionBody) {
166
+ const lines = sectionBody.split(/\r?\n/).map((line) => line.trim());
167
+ const rows = [];
168
+ let sawSeparator = false;
169
+ for (const line of lines) {
170
+ if (!/^\|.*\|$/u.test(line))
171
+ continue;
172
+ if (isSeparatorRow(line)) {
173
+ sawSeparator = true;
174
+ continue;
175
+ }
176
+ if (!sawSeparator)
177
+ continue;
178
+ rows.push(parseMarkdownTableRow(line));
179
+ }
180
+ return rows;
181
+ }
182
+ function lineContainsVagueAdjective(text) {
183
+ const lower = text.toLowerCase();
184
+ for (const adjective of VAGUE_AC_ADJECTIVES) {
185
+ const pattern = new RegExp(`(?:^|[^A-Za-z])${adjective.replace(/ /g, "\\s+")}(?:[^A-Za-z]|$)`, "iu");
186
+ if (pattern.test(lower))
187
+ return adjective;
188
+ }
189
+ return null;
190
+ }
191
+ function validateSectionBody(sectionBody, rule, sectionName) {
138
192
  const bodyLines = sectionBody.split(/\r?\n/).map((line) => line.trim());
139
193
  const meaningful = meaningfulLineCount(sectionBody);
140
194
  if (meaningful === 0) {
@@ -231,6 +285,29 @@ function validateSectionBody(sectionBody, rule) {
231
285
  };
232
286
  }
233
287
  }
288
+ if (normalizeHeadingTitle(sectionName).toLowerCase() === "acceptance criteria" &&
289
+ /observable[\s,]*measurable[\s,]+(and )?falsifiable/iu.test(rule)) {
290
+ const rows = getMarkdownTableRows(sectionBody);
291
+ for (const row of rows) {
292
+ const criterionText = row[1] ?? row[0] ?? "";
293
+ const adjective = lineContainsVagueAdjective(criterionText);
294
+ if (adjective) {
295
+ return {
296
+ ok: false,
297
+ details: `Acceptance criterion uses vague adjective "${adjective}" without a measurable predicate: "${criterionText.slice(0, 140)}". Rewrite with a numeric threshold or boolean outcome.`
298
+ };
299
+ }
300
+ const hasDigit = /\d/u.test(criterionText);
301
+ const hasMeasurableVerb = /\b(blocks?|rejects?|returns?|matches?|equals?|emits?|succeeds?|fails?|publishes?|logs?|persists?|reads?|writes?|creates?|deletes?|throws?|contains?|restores?|exceeds?|responds?|warns?|quarantines?|includes?|raises?|passes?|denies|refuses|exits|succeeds|completes|prevents|allows|maps|points|signals|surfaces|records|produces|accepts|requires)\b/iu.test(criterionText);
302
+ const hasMeaningfulText = /[A-Za-z]/u.test(criterionText) && criterionText.trim().length >= 12;
303
+ if (hasMeaningfulText && !hasDigit && !hasMeasurableVerb) {
304
+ return {
305
+ ok: false,
306
+ details: `Acceptance criterion lacks a measurable predicate (no numeric threshold, no observable verb like blocks/returns/publishes/matches): "${criterionText.slice(0, 140)}". Rewrite so the criterion is falsifiable by a single test.`
307
+ };
308
+ }
309
+ }
310
+ }
234
311
  return {
235
312
  ok: true,
236
313
  details: "Section heading and content satisfy lint heuristics."
@@ -273,7 +350,7 @@ export async function lintArtifact(projectRoot, stage) {
273
350
  const body = hasHeading ? sectionBodyByName(sections, v.section) : null;
274
351
  const validation = body === null
275
352
  ? { ok: false, details: `No ## heading matching required section "${v.section}".` }
276
- : validateSectionBody(body, v.validationRule);
353
+ : validateSectionBody(body, v.validationRule, v.section);
277
354
  const found = hasHeading && validation.ok;
278
355
  findings.push({
279
356
  section: v.section,
@@ -384,18 +461,19 @@ export async function validateReviewArmy(projectRoot) {
384
461
  if (!isStringArray(o.reportedBy) || o.reportedBy.length === 0) {
385
462
  errors.push(`findings[${i}].reportedBy must be a non-empty string array.`);
386
463
  }
387
- if (o.location !== undefined) {
388
- if (o.location === null || typeof o.location !== "object" || Array.isArray(o.location)) {
389
- errors.push(`findings[${i}].location must be an object when present.`);
464
+ if (o.location === undefined || o.location === null) {
465
+ errors.push(`findings[${i}].location is required and must be an object with file + line.`);
466
+ }
467
+ else if (typeof o.location !== "object" || Array.isArray(o.location)) {
468
+ errors.push(`findings[${i}].location must be an object with file + line.`);
469
+ }
470
+ else {
471
+ const loc = o.location;
472
+ if (!isNonEmptyString(loc.file)) {
473
+ errors.push(`findings[${i}].location.file must be a non-empty string.`);
390
474
  }
391
- else {
392
- const loc = o.location;
393
- if (!isNonEmptyString(loc.file)) {
394
- errors.push(`findings[${i}].location.file must be a non-empty string.`);
395
- }
396
- if (!isFiniteNumber(loc.line) || loc.line < 1) {
397
- errors.push(`findings[${i}].location.line must be a positive number.`);
398
- }
475
+ if (!isFiniteNumber(loc.line) || loc.line < 1) {
476
+ errors.push(`findings[${i}].location.line must be a positive number.`);
399
477
  }
400
478
  }
401
479
  if (o.recommendation !== undefined && !isNonEmptyString(o.recommendation)) {
@@ -445,6 +523,21 @@ export async function validateReviewArmy(projectRoot) {
445
523
  for (const msId of rec.multiSpecialistConfirmed) {
446
524
  if (!findingIds.has(msId)) {
447
525
  errors.push(`reconciliation.multiSpecialistConfirmed references unknown finding id "${msId}".`);
526
+ continue;
527
+ }
528
+ if (Array.isArray(root.findings)) {
529
+ const finding = root.findings.find((f) => {
530
+ return f && typeof f === "object" && !Array.isArray(f) && f.id === msId;
531
+ });
532
+ if (finding && typeof finding === "object" && !Array.isArray(finding)) {
533
+ const reportedBy = finding.reportedBy;
534
+ const count = Array.isArray(reportedBy)
535
+ ? new Set(reportedBy.filter((v) => typeof v === "string")).size
536
+ : 0;
537
+ if (count < 2) {
538
+ errors.push(`reconciliation.multiSpecialistConfirmed entry "${msId}" must be confirmed by at least 2 distinct reviewers (found ${count}).`);
539
+ }
540
+ }
448
541
  }
449
542
  }
450
543
  }
@@ -474,3 +567,79 @@ export async function validateReviewArmy(projectRoot) {
474
567
  }
475
568
  return { valid: errors.length === 0, errors };
476
569
  }
570
+ /**
571
+ * Ensure the narrative verdict in 07-review.md is consistent with the
572
+ * structured review-army reconciliation. A review cannot declare
573
+ * APPROVED while open Critical findings or shipBlockers remain.
574
+ */
575
+ export async function checkReviewVerdictConsistency(projectRoot) {
576
+ const errors = [];
577
+ const reviewMdPath = path.join(projectRoot, RUNTIME_ROOT, "artifacts", "07-review.md");
578
+ const armyJsonPath = path.join(projectRoot, RUNTIME_ROOT, "artifacts", "07-review-army.json");
579
+ let finalVerdict = "UNKNOWN";
580
+ if (await exists(reviewMdPath)) {
581
+ const raw = await fs.readFile(reviewMdPath, "utf8");
582
+ const sections = extractH2Sections(raw);
583
+ const verdictBody = sectionBodyByName(sections, "Final Verdict");
584
+ if (verdictBody) {
585
+ const chosen = [];
586
+ for (const token of ["APPROVED_WITH_CONCERNS", "APPROVED", "BLOCKED"]) {
587
+ const regex = new RegExp(`\\b${token}\\b`, "u");
588
+ if (regex.test(verdictBody)) {
589
+ // APPROVED would match inside APPROVED_WITH_CONCERNS; prefer the longer match first.
590
+ if (token === "APPROVED" && /\bAPPROVED_WITH_CONCERNS\b/u.test(verdictBody))
591
+ continue;
592
+ chosen.push(token);
593
+ }
594
+ }
595
+ if (chosen.length === 1) {
596
+ finalVerdict = chosen[0];
597
+ }
598
+ else if (chosen.length > 1) {
599
+ errors.push(`Final Verdict section lists multiple verdict tokens (${chosen.join(", ")}). Select exactly one.`);
600
+ }
601
+ else {
602
+ errors.push('Final Verdict section does not select APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.');
603
+ }
604
+ }
605
+ else {
606
+ errors.push('07-review.md is missing the "## Final Verdict" section.');
607
+ }
608
+ }
609
+ let openCriticalCount = 0;
610
+ let shipBlockerCount = 0;
611
+ if (await exists(armyJsonPath)) {
612
+ try {
613
+ const raw = await fs.readFile(armyJsonPath, "utf8");
614
+ const parsed = JSON.parse(raw);
615
+ const findings = Array.isArray(parsed.findings) ? parsed.findings : [];
616
+ for (const f of findings) {
617
+ if (!f || typeof f !== "object" || Array.isArray(f))
618
+ continue;
619
+ const o = f;
620
+ if (o.severity === "Critical" && o.status === "open") {
621
+ openCriticalCount++;
622
+ }
623
+ }
624
+ const rec = parsed.reconciliation && typeof parsed.reconciliation === "object" && !Array.isArray(parsed.reconciliation)
625
+ ? parsed.reconciliation
626
+ : null;
627
+ if (rec && Array.isArray(rec.shipBlockers)) {
628
+ shipBlockerCount = rec.shipBlockers.filter((v) => typeof v === "string").length;
629
+ }
630
+ }
631
+ catch {
632
+ // JSON validity is the concern of validateReviewArmy; skip silently here.
633
+ }
634
+ }
635
+ if (finalVerdict === "APPROVED" && (openCriticalCount > 0 || shipBlockerCount > 0)) {
636
+ errors.push(`Final Verdict is APPROVED but review-army has ${openCriticalCount} open Critical finding(s) and ${shipBlockerCount} shipBlocker(s). Use BLOCKED or APPROVED_WITH_CONCERNS.`);
637
+ }
638
+ return {
639
+ ok: errors.length === 0,
640
+ errors,
641
+ finalVerdict,
642
+ openCriticalCount,
643
+ shipBlockerCount
644
+ };
645
+ }
@@ -94,10 +94,10 @@ export const CCLAW_AGENTS = [
94
94
  },
95
95
  {
96
96
  name: "security-reviewer",
97
- description: "PROACTIVE after auth, crypto, secrets, parsers, or sensitive data paths change. MUST BE USED when trust boundaries move, new external inputs arrive, or LLM/tool output influences privileged actions.",
97
+ description: "MANDATORY during every review stage. Even when no auth, crypto, secrets, parsers, or sensitive data paths changed, produce an explicit 'no-change' security attestation. MUST BE USED when trust boundaries move, new external inputs arrive, or LLM/tool output influences privileged actions.",
98
98
  tools: ["Read", "Grep", "Glob"],
99
99
  model: "balanced",
100
- activation: "proactive",
100
+ activation: "mandatory",
101
101
  relatedStages: ["review", "design"],
102
102
  body: [
103
103
  "You are a **security vulnerability detection** specialist focused on practical exploitability.",
@@ -1,16 +1,16 @@
1
1
  const STAGE_EXAMPLES = {
2
- brainstorm: `### Context
2
+ brainstorm: `## Context
3
3
 
4
4
  - **Project state:** Monorepo with CI pipeline using custom release scripts. Release checks are scattered across shell scripts with no shared validation logic.
5
5
  - **Relevant existing code/patterns:** \`scripts/pre-publish.sh\` does metadata checks. \`src/release/\` has partial validation helpers.
6
6
 
7
- ### Problem
7
+ ## Problem
8
8
 
9
9
  - **What we're solving:** release checks are fragile and inconsistent between CI and local runs. Invalid metadata sometimes reaches npm publish.
10
10
  - **Success criteria:** invalid release preconditions are caught before publish with explicit operator feedback, in both CI and local workflows.
11
11
  - **Constraints:** no new runtime dependencies; must work within existing CI pipeline structure.
12
12
 
13
- ### Clarifying Questions
13
+ ## Clarifying Questions
14
14
 
15
15
  | # | Question | Answer | Decision impact |
16
16
  | --- | --- | --- | --- |
@@ -18,7 +18,7 @@ const STAGE_EXAMPLES = {
18
18
  | 2 | Should the validation logic live in a reusable module or stay as shell scripts? | Reusable module. | Architecture: shared TypeScript module imported by CI and local tooling, not duplicated shell scripts. |
19
19
  | 3 | For v1, prioritize rapid delivery or maximum configurability? | Rapid delivery. | Minimal deterministic validation surface; defer plugin/config system to v2. |
20
20
 
21
- ### Approaches
21
+ ## Approaches
22
22
 
23
23
  | Approach | Architecture | Trade-offs | Recommendation |
24
24
  | --- | --- | --- | --- |
@@ -26,33 +26,33 @@ const STAGE_EXAMPLES = {
26
26
  | B: Hardened shell scripts | Keep existing script approach, add stricter checks and error messages. | Lowest effort. Weak reuse, CI/local divergence risk grows over time. | Viable fallback if TS module is blocked. |
27
27
  | C: Full release framework | New release orchestrator with plugin system, config files, rollback commands. | Maximum flexibility. High risk, delivery delay, over-engineered for current needs. | Not recommended for v1. |
28
28
 
29
- ### Selected Direction
29
+ ## Selected Direction
30
30
 
31
31
  - **Approach:** A — Reusable validation module
32
32
  - **Rationale:** shared TS module gives consistent behavior in CI and local, avoids script duplication, and stays within the no-new-dependency constraint.
33
33
  - **Approval:** approved
34
34
 
35
- ### Design
35
+ ## Design
36
36
 
37
37
  - **Architecture:** single \`release-validator\` module in \`src/release/\` exporting typed check functions. CI script and local CLI both import and run the same checks.
38
38
  - **Key components:** \`validateMetadata()\`, \`validateChangelog()\`, \`validateVersion()\` — each returns a typed result with error details. A \`runAll()\` orchestrator runs checks and exits non-zero on any failure.
39
39
  - **Data flow:** package.json + CHANGELOG.md → validator module → structured result → CI/CLI renders human-readable report.
40
40
 
41
- ### Assumptions and Open Questions
41
+ ## Assumptions and Open Questions
42
42
 
43
43
  - **Assumptions:** CI remains the primary execution path; existing release metadata files remain the source of truth; v1 prioritizes determinism over customization.
44
44
  - **Open questions:** What exact rollback sequence for failed publish? Should status output include machine-readable JSON alongside markdown?
45
45
 
46
- ### Notes for the next stage
46
+ ## Notes for the next stage
47
47
 
48
48
  Carry the no-new-dependency constraint and hard-block behavior directly into scope in/out boundaries.`,
49
- scope: `### Scope contract
49
+ scope: `## Scope contract
50
50
 
51
51
  **Mode selected:** SELECTIVE EXPANSION
52
52
  **Default heuristic used:** feature enhancement -> selective
53
53
  **Mode-specific analysis result:** hold-scope baseline accepted first; one expansion accepted (degraded-state UX), one deferred (real-time channel upgrade).
54
54
 
55
- ### Prime Directives (applied)
55
+ ## Prime Directives (applied)
56
56
 
57
57
  - Zero silent failures: every delivery failure maps to a visible degraded state.
58
58
  - Named error surfaces: stream disconnect, auth drift, and publisher timeout are explicit.
@@ -60,11 +60,11 @@ Carry the no-new-dependency constraint and hard-block behavior directly into sco
60
60
  - Interaction edge cases in scope: double-open panel, reconnect after sleep, stale tab state.
61
61
  - Observability in scope: stream error counter, publish-to-visible lag metric, and alert threshold.
62
62
 
63
- ### Premise challenge result
63
+ ## Premise challenge result
64
64
 
65
65
  The original premise (“add notifications”) was reframed to **“ensure users know when an action requires follow-up”**, which expands the solution space beyond toast spam to include durable inbox items, empty states, and recovery paths when delivery fails.
66
66
 
67
- ### Dream State Mapping
67
+ ## Dream State Mapping
68
68
 
69
69
  | Stage | Statement |
70
70
  | --- | --- |
@@ -73,7 +73,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
73
73
  | **12-MONTH IDEAL** | Unified notification center with reliable multi-channel fan-out and user-level routing preferences. |
74
74
  | **Alignment verdict** | Aligned: this scope builds the durability foundation without prematurely committing to channel expansion. |
75
75
 
76
- ### Mode-Specific Analysis
76
+ ## Mode-Specific Analysis
77
77
 
78
78
  **Selected mode:** SELECTIVE EXPANSION
79
79
 
@@ -81,7 +81,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
81
81
  - **Expansion evaluated — degraded-state UX (accepted):** Adding an explicit "live updates paused" banner and polling fallback turns a reliability gap into a visible, recoverable state. Low incremental effort (S), high user trust payoff.
82
82
  - **Expansion evaluated — real-time channel upgrade (deferred):** WebSocket channel provides lower latency but requires new infra (connection pool, auth handshake). Not justified for current load; deferred to post-v1 validation.
83
83
 
84
- ### Implementation Alternatives
84
+ ## Implementation Alternatives
85
85
 
86
86
  | Option | Summary | Effort (S/M/L/XL) | Risk | Pros | Cons | Reuses |
87
87
  | --- | --- | --- | --- | --- | --- | --- |
@@ -89,7 +89,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
89
89
  | **B (recommended)** | SSE live updates + REST fallback snapshot | M | Med | Better timeliness, graceful degradation | Requires reconnect handling | Existing event publisher + REST path |
90
90
  | **C (ideal architecture)** | Event bus + WebSocket channel + feed projection | XL | High | Strong long-term scalability | Overbuilt for current demand | Partial reuse of publisher only |
91
91
 
92
- ### Temporal Interrogation
92
+ ## Temporal Interrogation
93
93
 
94
94
  | Time slice | Likely decision pressure | Lock now or defer? | Reason |
95
95
  | --- | --- | --- | --- |
@@ -98,7 +98,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
98
98
  | **HOUR 4-5 (integration)** | Handling gaps between snapshot and stream cursor | **Lock now** | Prevent silent data loss during reconnect windows |
99
99
  | **HOUR 6+ (polish/tests)** | Banner copy tone and polling cadence tuning | **Defer** | Safe to iterate after baseline reliability is proven |
100
100
 
101
- ### In scope / out of scope / deferred
101
+ ## In scope / out of scope / deferred
102
102
 
103
103
  | Category | Items |
104
104
  | --- | --- |
@@ -106,29 +106,29 @@ The original premise (“add notifications”) was reframed to **“ensure users
106
106
  | **Out of scope** | Email/SMS/push providers; marketing campaigns; per-user notification preferences beyond on/off |
107
107
  | **Deferred** | WebSocket channel; rich media attachments in notifications; full-text search across historical events |
108
108
 
109
- ### Discretion Areas
109
+ ## Discretion Areas
110
110
 
111
111
  - Client-side badge rendering strategy (optimistic vs server-confirmed) is implementation discretion.
112
112
  - Polling fallback backoff curve is implementation discretion if degraded-state UX remains explicit.
113
113
 
114
- ### Error & Rescue Registry (sample entry)
114
+ ## Error & Rescue Registry (sample entry)
115
115
 
116
116
  | Capability | Failure mode | Detection | Fallback |
117
117
  | --- | --- | --- | --- |
118
118
  | Event delivery | SSE connection drops mid-session | Client \`EventSource\` error event + heartbeat timeout | Fall back to REST polling every 30s until SSE reconnect succeeds; show subtle “live updates paused” banner |
119
119
 
120
- ### Completion Dashboard
120
+ ## Completion Dashboard
121
121
 
122
122
  - Checklist findings: 9/9 complete (complex path)
123
123
  - Resolved decisions count: 7
124
124
  - Unresolved decisions: None
125
125
 
126
- ### Scope Summary
126
+ ## Scope Summary
127
127
 
128
128
  - Accepted scope: durable feed + SSE + explicit degraded UX.
129
129
  - Deferred: WebSocket channel and rich-media/search enhancements.
130
130
  - Explicitly excluded: outbound channels and marketing workflows for v1.`,
131
- design: `### Codebase Investigation (blast-radius files)
131
+ design: `## Codebase Investigation (blast-radius files)
132
132
 
133
133
  | File | Current responsibility | Patterns discovered |
134
134
  | --- | --- | --- |
@@ -139,7 +139,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
139
139
 
140
140
  Discovery: existing EventEmitter-based bus has no durability — notifications must add persistence layer on top, not replace the bus.
141
141
 
142
- ### Search Before Building (sample result)
142
+ ## Search Before Building (sample result)
143
143
 
144
144
  | Layer | Label | What to reuse first |
145
145
  | --- | --- | --- |
@@ -147,7 +147,7 @@ Discovery: existing EventEmitter-based bus has no durability — notifications m
147
147
  | Layer 2 | existing codebase | Existing auth middleware, existing API client wrapper, existing feature flags helper |
148
148
  | Layer 3 | npm | A small, well-maintained SSE helper (only if Layer 1–2 cannot cover framing/reconnect ergonomics) |
149
149
 
150
- ### Architecture Diagram (mandatory)
150
+ ## Architecture Diagram (mandatory)
151
151
 
152
152
  \`\`\`
153
153
  ┌─────────────┐ ┌──────────────┐ ┌────────────────┐
@@ -163,7 +163,7 @@ Discovery: existing EventEmitter-based bus has no durability — notifications m
163
163
 
164
164
  Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Queue (persist) → Read Model (project).
165
165
 
166
- ### What Already Exists
166
+ ## What Already Exists
167
167
 
168
168
  | Sub-problem | Existing code/library | Layer | Reuse decision |
169
169
  | --- | --- | --- | --- |
@@ -172,7 +172,7 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
172
172
  | SSE framing | None | Layer 3 | Evaluate \`better-sse\` npm package |
173
173
  | Notification schema | None | — | New: define in \`src/schemas/notification.ts\` |
174
174
 
175
- ### Failure Mode Table
175
+ ## Failure Mode Table
176
176
 
177
177
  | Failure | Trigger | Detection | Mitigation | User impact |
178
178
  | --- | --- | --- | --- | --- |
@@ -180,13 +180,13 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
180
180
  | Duplicate publish | Retry after timeout | Dedupe key check in outbox | Upsert with idempotency key | None (transparent) |
181
181
  | Queue backpressure | Spike >1000 events/s | Queue depth metric alarm | Back-pressure signal to publisher, shed non-critical events | Delayed delivery of low-priority notifications |
182
182
 
183
- ### Test Strategy
183
+ ## Test Strategy
184
184
 
185
185
  - **Unit:** validator functions, dedupe-key logic, event schema factories — target 90%+ line coverage.
186
186
  - **Integration:** publisher → outbox → read-model pipeline via in-memory DB; SSE reconnect with simulated drops.
187
187
  - **E2E:** one happy-path browser test (publish → feed visible) and one degraded-path test (SSE down → REST fallback + banner).
188
188
 
189
- ### Performance Budget
189
+ ## Performance Budget
190
190
 
191
191
  | Critical path | Metric | Target | Measurement method |
192
192
  | --- | --- | --- | --- |
@@ -194,13 +194,13 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
194
194
  | Feed snapshot load | p99 response time | ≤ 200 ms | Load test with 1 000 items per user |
195
195
  | SSE reconnect | Time to first event after drop | ≤ 3 s | Simulated disconnect in integration suite |
196
196
 
197
- ### NOT in scope
197
+ ## NOT in scope
198
198
 
199
199
  - Outbound channels (email, push, SMS) — deferred to v2.
200
200
  - Admin notification management UI — separate workstream.
201
201
  - Notification preferences / mute rules — requires user settings redesign.
202
202
 
203
- ### Parallelization Strategy
203
+ ## Parallelization Strategy
204
204
 
205
205
  | Module | Depends on | Parallel lane | Conflict risk |
206
206
  | --- | --- | --- | --- |
@@ -208,18 +208,18 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
208
208
  | Publisher + outbox (T2) | T1 | Lane A | None |
209
209
  | Client feed + SSE (T3) | T1, T2 | Lane B (after T1) | Shared event type definitions |
210
210
 
211
- ### Unresolved Decisions
211
+ ## Unresolved Decisions
212
212
 
213
213
  | Decision | Status | Options | Missing info | Default if unanswered |
214
214
  | --- | --- | --- | --- | --- |
215
215
  | Feed storage model | OPEN | (A) append-only event log, (B) mutable rows, (C) hybrid | Load testing results on read patterns | (A) append-only — safest for audit trail |
216
216
 
217
- ### Interface sketch (non-binding)
217
+ ## Interface sketch (non-binding)
218
218
 
219
219
  - **Client → server:** \`GET /api/me/notifications/snapshot?limit=50\` plus optional cursor parameters (if adopted).
220
220
  - **Server → client:** \`GET /api/me/notifications/stream\` as SSE with periodic heartbeats.
221
221
 
222
- ### Completion Dashboard
222
+ ## Completion Dashboard
223
223
 
224
224
  | Review Section | Status | Issues |
225
225
  | --- | --- | --- |
@@ -231,10 +231,10 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
231
231
 
232
232
  **Decisions made:** 4 | **Unresolved:** 1 (feed storage model)
233
233
 
234
- ### Quality bar for this stage
234
+ ## Quality bar for this stage
235
235
 
236
236
  Design output should be **reviewable by someone who did not attend brainstorming**: they can trace from constraints → components → open decisions without reading code.`,
237
- spec: `### Acceptance Criteria
237
+ spec: `## Acceptance Criteria
238
238
 
239
239
  | ID | Criterion (observable/measurable/falsifiable) | Design Decision Ref |
240
240
  | --- | --- | --- |
@@ -242,7 +242,7 @@ Design output should be **reviewable by someone who did not attend brainstorming
242
242
  | AC-2 | Given the same logical notification is published twice with the same dedupe key, when the client processes the stream, the feed contains exactly one visible item for that key. | Architecture: dedupe-key in event schema |
243
243
  | AC-3 | Given the live connection is unavailable, when the user opens the notifications panel, the UI shows a non-blocking "live updates paused" banner and loads the latest snapshot via REST within 2 seconds. | Architecture: REST fallback + degraded UX |
244
244
 
245
- ### Edge Cases
245
+ ## Edge Cases
246
246
 
247
247
  | Criterion ID | Boundary case | Error case |
248
248
  | --- | --- | --- |
@@ -250,12 +250,12 @@ Design output should be **reviewable by someone who did not attend brainstorming
250
250
  | AC-2 | Two events with identical dedupe key arrive within same SSE frame (boundary: only one row rendered). | Dedupe-key field missing — reject event at publisher and log error. |
251
251
  | AC-3 | SSE disconnects after exactly 30 s heartbeat timeout (boundary: banner appears within 1 s of timeout). | REST snapshot endpoint returns 500 — panel shows "unable to load" with retry button. |
252
252
 
253
- ### Constraints and Assumptions
253
+ ## Constraints and Assumptions
254
254
 
255
255
  - **Constraints:** Max feed size 1 000 items per user. SSE heartbeat interval 30 s (server-side). REST snapshot p99 \u2264 200 ms. No new runtime dependencies.
256
256
  - **Assumptions:** Users have a single active session at a time for v1. Existing auth middleware provides user context. Event publisher is single-writer per user.
257
257
 
258
- ### Testability Map
258
+ ## Testability Map
259
259
 
260
260
  | Criterion ID | Verification approach | Command/manual steps |
261
261
  | --- | --- | --- |
@@ -263,11 +263,11 @@ Design output should be **reviewable by someone who did not attend brainstorming
263
263
  | AC-2 | Unit test: publish same dedupe key twice \u2192 assert single row in feed store. | \`pnpm vitest run tests/unit/dedupe-feed.test.ts\` |
264
264
  | AC-3 | E2E test: kill SSE transport \u2192 assert banner visible + REST snapshot loads. | \`pnpm playwright test tests/e2e/degraded-mode.spec.ts\` |
265
265
 
266
- ### Approval
266
+ ## Approval
267
267
 
268
268
  - Approved by: user
269
269
  - Date: 2026-04-14`,
270
- plan: `### Dependency Graph
270
+ plan: `## Dependency Graph
271
271
 
272
272
  \`\`\`
273
273
  T-1 ──▶ T-2 ──▶ T-3
@@ -277,7 +277,7 @@ T-1 ──▶ T-2 ──▶ T-3
277
277
 
278
278
  Parallel opportunity: T-1 is a prerequisite for both T-2 and T-3 (T-3 also needs T-2).
279
279
 
280
- ### Dependency Waves
280
+ ## Dependency Waves
281
281
 
282
282
  #### Wave 1 (foundation)
283
283
  - Task IDs: T-1
@@ -295,7 +295,7 @@ Parallel opportunity: T-1 is a prerequisite for both T-2 and T-3 (T-3 also needs
295
295
 
296
296
  Execution rule: complete and verify each wave before starting the next wave.
297
297
 
298
- ### Task List
298
+ ## Task List
299
299
 
300
300
  | Task ID | Description | Acceptance criterion | Verification command | Effort |
301
301
  | --- | --- | --- | --- | --- |
@@ -303,7 +303,7 @@ Execution rule: complete and verify each wave before starting the next wave.
303
303
  | T-2 | Implement publisher + outbox write path | AC-1: integration test (happy path publish) | \`\`\`pnpm vitest run tests/integration/publisher.test.ts\`\`\` |
304
304
  | T-3 | Implement client feed + SSE subscribe + REST fallback | AC-1, AC-2, AC-3: e2e tests including degraded mode | \`\`\`pnpm playwright test tests/e2e/notification-feed.spec.ts\`\`\` |
305
305
 
306
- ### Acceptance Mapping
306
+ ## Acceptance Mapping
307
307
 
308
308
  | Criterion ID | Task IDs |
309
309
  | --- | --- |
@@ -311,17 +311,17 @@ Execution rule: complete and verify each wave before starting the next wave.
311
311
  | AC-2 (idempotency) | T-1, T-2 |
312
312
  | AC-3 (failure visibility) | T-3 |
313
313
 
314
- ### Risk Assessment
314
+ ## Risk Assessment
315
315
 
316
316
  | Task/Wave | Risk | Likelihood | Impact | Mitigation |
317
317
  | --- | --- | --- | --- | --- |
318
318
  | T-3 (Wave 3) | SSE reconnect logic complex | Medium | High | Spike reconnect in isolation before integrating with feed UI |
319
319
  | Wave 2 → 3 | Publisher API contract may shift | Low | Medium | Pin contract in T-1 schema; T-2 integration test validates |
320
320
 
321
- ### WAIT_FOR_CONFIRM
321
+ ## WAIT_FOR_CONFIRM
322
322
  - Status: pending
323
323
  - Confirmed by:`,
324
- tdd: `### RED Evidence
324
+ tdd: `## RED Evidence
325
325
 
326
326
  | Slice | Test name | Command | Failure output summary |
327
327
  | --- | --- | --- | --- |
@@ -329,7 +329,7 @@ Execution rule: complete and verify each wave before starting the next wave.
329
329
  | S-2 (publisher outbox) | publishes event to outbox with dedupe key | \`\`\`pnpm vitest run tests/integration/publisher.test.ts\`\`\` | publishToOutbox is not a function |
330
330
  | S-3 (client feed + fallback) | shows notification within 5s via SSE | \`\`\`pnpm playwright test tests/e2e/notification-feed.spec.ts\`\`\` | Element [data-testid="feed-item"] not found |
331
331
 
332
- ### Acceptance Mapping
332
+ ## Acceptance Mapping
333
333
 
334
334
  | Slice | Plan task ID | Spec criterion ID |
335
335
  | --- | --- | --- |
@@ -337,7 +337,7 @@ Execution rule: complete and verify each wave before starting the next wave.
337
337
  | S-2 | T-2 | AC-1 |
338
338
  | S-3 | T-3 | AC-1, AC-2, AC-3 |
339
339
 
340
- ### Failure Analysis
340
+ ## Failure Analysis
341
341
 
342
342
  | Slice | Expected missing behavior | Actual failure reason |
343
343
  | --- | --- | --- |
@@ -345,22 +345,22 @@ Execution rule: complete and verify each wave before starting the next wave.
345
345
  | S-2 | publishToOutbox function not implemented | Function not found — correct: write path missing |
346
346
  | S-3 | Feed UI not rendered, SSE not connected | DOM element missing — correct: client component not built |
347
347
 
348
- ### GREEN Evidence
348
+ ## GREEN Evidence
349
349
 
350
350
  - Full suite command: \`\`\`pnpm vitest run && pnpm playwright test\`\`\`
351
351
  - Full suite result: 47 tests passed (3 new + 44 existing), 0 failed, 0 skipped
352
352
 
353
- ### REFACTOR Notes
353
+ ## REFACTOR Notes
354
354
 
355
355
  - What changed: Extracted \`\`\`mergeLatestByDedupeKey\`\`\` helper from inline loop in \`\`\`summarizeDedupedFeed\`\`\`; moved SSE reconnect logic into \`\`\`useSSEConnection\`\`\` hook.
356
356
  - Why: Dedupe merge logic is reused by both publisher and client; reconnect logic was duplicated across components.
357
357
  - Behavior preserved: Full suite re-run confirms 47/47 pass after refactor.
358
358
 
359
- ### Traceability
359
+ ## Traceability
360
360
 
361
361
  - Plan task IDs: T-1, T-2, T-3
362
362
  - Spec criterion IDs: AC-1, AC-2, AC-3`,
363
- review: `### Layer 1 Verdict
363
+ review: `## Layer 1 Verdict
364
364
 
365
365
  | Criterion | Verdict | Evidence |
366
366
  | --- | --- | --- |
@@ -368,7 +368,7 @@ Execution rule: complete and verify each wave before starting the next wave.
368
368
  | AC-2: Dedupe — one visible item per key | PARTIAL | Unit tests cover publisher dedupe; UI merge path lacks test for race reordering (\`feedStore.test.ts\` missing case) |
369
369
  | AC-3: Degraded mode + REST snapshot | PASS | \`NotificationsPanel.tsx:112-140\` renders banner + calls snapshot endpoint |
370
370
 
371
- ### Layer 2 Findings
371
+ ## Layer 2 Findings
372
372
 
373
373
  | ID | Severity | Category | Description | Status |
374
374
  | --- | --- | --- | --- | --- |
@@ -376,12 +376,12 @@ Execution rule: complete and verify each wave before starting the next wave.
376
376
  | R-2 | Important | performance | \`feedStore.merge()\` does full-array scan on every SSE event; O(n) per event where n is feed length. | open |
377
377
  | R-3 | Suggestion | architecture | SSE reconnect logic duplicated across \`useNotifications\` and \`usePresence\`; extract shared hook. | open |
378
378
 
379
- ### Review Army Contract
379
+ ## Review Army Contract
380
380
 
381
381
  - See \`07-review-army.json\`
382
382
  - Reconciliation summary: 1 duplicate collapsed (R-1 reported by spec-reviewer and code-reviewer), 0 conflicts
383
383
 
384
- ### Review Readiness Dashboard
384
+ ## Review Readiness Dashboard
385
385
 
386
386
  - Layer 1 complete: yes (3/3 criteria)
387
387
  - Layer 2 complete: yes (5 sections reviewed)
@@ -389,16 +389,16 @@ Execution rule: complete and verify each wave before starting the next wave.
389
389
  - Open critical blockers: 1 (R-1)
390
390
  - Ship recommendation: BLOCKED until R-1 resolved
391
391
 
392
- ### Severity Summary
392
+ ## Severity Summary
393
393
 
394
394
  - Critical: 1
395
395
  - Important: 1
396
396
  - Suggestion: 1
397
397
 
398
- ### Final Verdict
398
+ ## Final Verdict
399
399
 
400
400
  - BLOCKED`,
401
- ship: `### Preflight Results
401
+ ship: `## Preflight Results
402
402
 
403
403
  - Review verdict: APPROVED_WITH_CONCERNS (R-1 resolved, R-2 accepted as known debt)
404
404
  - Build: pass (\`pnpm build\` succeeds)
@@ -407,25 +407,25 @@ Execution rule: complete and verify each wave before starting the next wave.
407
407
  - Type-check: pass (\`pnpm typecheck\` clean)
408
408
  - Working tree clean: yes (\`git status\` shows no uncommitted changes)
409
409
 
410
- ### Release Notes
410
+ ## Release Notes
411
411
 
412
412
  - **Added:** In-app notification feed with SSE updates and REST fallback snapshotting (AC-1, AC-3).
413
413
  - **Changed:** Notification payloads now include a stable dedupe key for idempotent rendering (AC-2).
414
414
  - **Fixed:** Panel no longer drops the newest item when reconnecting after sleep/resume.
415
415
  - **Breaking changes:** None.
416
416
 
417
- ### Rollback Plan
417
+ ## Rollback Plan
418
418
 
419
419
  - Trigger conditions: error rate on \`/notifications/stream\` exceeds 5% for >5 minutes, or p95 publish-to-visible lag exceeds 10s.
420
420
  - Rollback steps: \`git revert <merge-sha> && git push origin main\` then redeploy; if DB migrations shipped, run \`2026_04_12_notifications_cursor_down.sql\` before traffic.
421
421
  - Verification steps: confirm error rate returns to pre-release baseline within 10 minutes; smoke-test feed panel manually.
422
422
 
423
- ### Monitoring
423
+ ## Monitoring
424
424
 
425
425
  - Metrics/logs to watch: error rate on \`/notifications/stream\` and snapshot endpoint for 24h; p95 publish-to-visible lag via metrics dashboard.
426
426
  - Risk note (if no monitoring): N/A — monitoring is in place.
427
427
 
428
- ### Finalization
428
+ ## Finalization
429
429
 
430
430
  - Selected enum: FINALIZE_OPEN_PR
431
431
  - Selected label: B
@@ -436,5 +436,14 @@ export function stageExamples(stage) {
436
436
  const examples = STAGE_EXAMPLES[stage];
437
437
  if (!examples)
438
438
  return "";
439
- return `## Examples\n\nConcrete samples of what good output looks like for this stage.\n\n${examples}\n`;
439
+ return [
440
+ "## Examples",
441
+ "",
442
+ "Concrete artifact samples. These mirror the exact heading levels agents must use when authoring the stage artifact (all H2 `##` sections), so they are presented inside a markdown fence to avoid collapsing into the SKILL outline.",
443
+ "",
444
+ "```markdown",
445
+ examples,
446
+ "```",
447
+ ""
448
+ ].join("\n");
440
449
  }
@@ -361,28 +361,13 @@ Execution rule: complete and verify each wave before starting the next wave.
361
361
  `,
362
362
  "07-review-army.json": `{
363
363
  "version": 1,
364
- "generatedAt": "",
364
+ "generatedAt": "<ISO 8601 timestamp, e.g. 2026-04-14T12:00:00Z>",
365
365
  "scope": {
366
- "base": "",
367
- "head": "",
366
+ "base": "<base branch or ref>",
367
+ "head": "<head branch or ref>",
368
368
  "files": []
369
369
  },
370
- "findings": [
371
- {
372
- "id": "",
373
- "title": "",
374
- "severity": "Critical",
375
- "confidence": 7,
376
- "category": "correctness",
377
- "location": {
378
- "file": ""
379
- },
380
- "fingerprint": "",
381
- "reportedBy": [],
382
- "status": "open",
383
- "recommendation": ""
384
- }
385
- ],
370
+ "findings": [],
386
371
  "reconciliation": {
387
372
  "duplicatesCollapsed": 0,
388
373
  "conflicts": [],
@@ -7,6 +7,11 @@ export type DelegationEntry = {
7
7
  taskId?: string;
8
8
  waiverReason?: string;
9
9
  ts: string;
10
+ /**
11
+ * Run id the entry belongs to. Older ledgers written before 0.5.17 may omit this;
12
+ * consumers treat missing runId as unscoped (conservatively excluded from current-run checks).
13
+ */
14
+ runId?: string;
10
15
  };
11
16
  export type DelegationLedger = {
12
17
  runId: string;
@@ -18,4 +23,5 @@ export declare function checkMandatoryDelegations(projectRoot: string, stage: Fl
18
23
  satisfied: boolean;
19
24
  missing: string[];
20
25
  waived: string[];
26
+ staleIgnored: string[];
21
27
  }>;
@@ -25,7 +25,8 @@ function isDelegationEntry(value) {
25
25
  statusOk &&
26
26
  typeof o.ts === "string" &&
27
27
  (o.taskId === undefined || typeof o.taskId === "string") &&
28
- (o.waiverReason === undefined || typeof o.waiverReason === "string"));
28
+ (o.waiverReason === undefined || typeof o.waiverReason === "string") &&
29
+ (o.runId === undefined || typeof o.runId === "string"));
29
30
  }
30
31
  function parseLedger(raw, runId) {
31
32
  if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
@@ -63,21 +64,27 @@ export async function appendDelegation(projectRoot, entry) {
63
64
  await withDirectoryLock(delegationLockPath(projectRoot), async () => {
64
65
  const filePath = delegationLogPath(projectRoot);
65
66
  const prior = await readDelegationLedger(projectRoot);
67
+ const stamped = { ...entry, runId: entry.runId ?? activeRunId };
66
68
  const ledger = {
67
69
  runId: activeRunId,
68
- entries: [...prior.entries, entry]
70
+ entries: [...prior.entries, stamped]
69
71
  };
70
72
  await writeFileSafe(filePath, `${JSON.stringify(ledger, null, 2)}\n`);
71
73
  });
72
74
  }
73
75
  export async function checkMandatoryDelegations(projectRoot, stage) {
74
76
  const mandatory = stageSchema(stage).mandatoryDelegations;
77
+ const { activeRunId } = await readFlowState(projectRoot);
75
78
  const ledger = await readDelegationLedger(projectRoot);
76
79
  const forStage = ledger.entries.filter((e) => e.stage === stage);
80
+ const forRun = forStage.filter((e) => e.runId === activeRunId);
81
+ const staleIgnored = forStage
82
+ .filter((e) => e.runId !== activeRunId)
83
+ .map((e) => `${e.agent}(runId=${e.runId ?? "unknown"})`);
77
84
  const missing = [];
78
85
  const waived = [];
79
86
  for (const agent of mandatory) {
80
- const rows = forStage.filter((e) => e.agent === agent);
87
+ const rows = forRun.filter((e) => e.agent === agent);
81
88
  const ok = rows.some((e) => e.status === "completed" || e.status === "waived");
82
89
  if (!ok) {
83
90
  missing.push(agent);
@@ -89,6 +96,7 @@ export async function checkMandatoryDelegations(projectRoot, stage) {
89
96
  return {
90
97
  satisfied: missing.length === 0,
91
98
  missing,
92
- waived
99
+ waived,
100
+ staleIgnored
93
101
  };
94
102
  }
package/dist/doctor.js CHANGED
@@ -13,7 +13,7 @@ import { policyChecks } from "./policy.js";
13
13
  import { readFlowState } from "./runs.js";
14
14
  import { checkMandatoryDelegations } from "./delegation.js";
15
15
  import { buildTraceMatrix } from "./trace-matrix.js";
16
- import { reconcileAndWriteCurrentStageGateCatalog, verifyCurrentStageGateEvidence } from "./gate-evidence.js";
16
+ import { reconcileAndWriteCurrentStageGateCatalog, verifyCompletedStagesGateClosure, verifyCurrentStageGateEvidence } from "./gate-evidence.js";
17
17
  import { stageSkillFolder } from "./content/skills.js";
18
18
  import { UTILITY_SKILL_FOLDERS } from "./content/utility-skills.js";
19
19
  import { CONTEXT_MODES, DEFAULT_CONTEXT_MODE } from "./content/contexts.js";
@@ -768,11 +768,37 @@ export async function doctorChecks(projectRoot, options = {}) {
768
768
  ? `warning: waived mandatory delegations for stage "${flowState.currentStage}": ${delegation.waived.join(", ")}`
769
769
  : "no waived mandatory delegations for current stage"
770
770
  });
771
+ checks.push({
772
+ name: "warning:delegation:stale_runs",
773
+ ok: true,
774
+ details: delegation.staleIgnored.length > 0
775
+ ? `warning: ${delegation.staleIgnored.length} delegation entries from other runs were ignored: ${delegation.staleIgnored.join(", ")}`
776
+ : "no stale delegation entries from prior runs"
777
+ });
771
778
  const trace = await buildTraceMatrix(projectRoot);
779
+ const artifactsDir = path.join(projectRoot, RUNTIME_ROOT, "artifacts");
780
+ const specExists = await exists(path.join(artifactsDir, "04-spec.md"));
781
+ const planExists = await exists(path.join(artifactsDir, "05-plan.md"));
782
+ const tddExists = await exists(path.join(artifactsDir, "06-tdd.md"));
772
783
  const traceHasSignal = trace.entries.length > 0 ||
773
784
  trace.orphanedCriteria.length > 0 ||
774
785
  trace.orphanedTasks.length > 0 ||
775
786
  trace.orphanedTests.length > 0;
787
+ const artifactsPresent = specExists || planExists || tddExists;
788
+ const emptyMatrixWithArtifacts = !traceHasSignal && artifactsPresent;
789
+ checks.push({
790
+ name: "trace:matrix_populated",
791
+ ok: !emptyMatrixWithArtifacts,
792
+ details: emptyMatrixWithArtifacts
793
+ ? `trace matrix is empty but artifacts exist (${[
794
+ specExists ? "04-spec.md" : null,
795
+ planExists ? "05-plan.md" : null,
796
+ tddExists ? "06-tdd.md" : null
797
+ ].filter(Boolean).join(", ")}). The extractors found no criterion/task/slice IDs — check heading conventions and ID formats.`
798
+ : artifactsPresent
799
+ ? `trace matrix parsed ${trace.entries.length} criterion(s) from present artifacts`
800
+ : "no downstream artifacts to trace yet"
801
+ });
776
802
  checks.push({
777
803
  name: "trace:criteria_coverage",
778
804
  ok: !traceHasSignal || trace.orphanedCriteria.length === 0,
@@ -802,6 +828,16 @@ export async function doctorChecks(projectRoot, options = {}) {
802
828
  ? `stage "${gateEvidence.stage}" gate evidence is consistent (required=${gateEvidence.requiredCount}, passed=${gateEvidence.passedCount}, blocked=${gateEvidence.blockedCount})`
803
829
  : gateEvidence.issues.join(" ")
804
830
  });
831
+ const completedClosure = verifyCompletedStagesGateClosure(flowState);
832
+ checks.push({
833
+ name: "gates:closure:completed_stages",
834
+ ok: completedClosure.ok,
835
+ details: completedClosure.ok
836
+ ? flowState.completedStages.length === 0
837
+ ? "no completed stages yet"
838
+ : `all ${flowState.completedStages.length} completed stages have every required gate passed`
839
+ : completedClosure.issues.join(" ")
840
+ });
805
841
  // Self-improvement block in stage skills
806
842
  for (const stage of COMMAND_FILE_ORDER) {
807
843
  const skillPath = path.join(projectRoot, RUNTIME_ROOT, "skills", stageSkillFolder(stage), "SKILL.md");
@@ -7,8 +7,22 @@ export interface GateEvidenceCheckResult {
7
7
  requiredCount: number;
8
8
  passedCount: number;
9
9
  blockedCount: number;
10
+ /** True only when every required gate for the stage is in `passed` and none are `blocked`. */
11
+ complete: boolean;
12
+ /** Required gate ids that are neither passed nor blocked. */
13
+ missingRequired: string[];
14
+ }
15
+ export interface CompletedStagesClosureResult {
16
+ ok: boolean;
17
+ issues: string[];
18
+ openStages: Array<{
19
+ stage: FlowStage;
20
+ missingRequired: string[];
21
+ blocked: string[];
22
+ }>;
10
23
  }
11
24
  export declare function verifyCurrentStageGateEvidence(projectRoot: string, flowState: FlowState): Promise<GateEvidenceCheckResult>;
25
+ export declare function verifyCompletedStagesGateClosure(flowState: FlowState): CompletedStagesClosureResult;
12
26
  export interface GateReconciliationResult {
13
27
  stage: FlowStage;
14
28
  changed: boolean;
@@ -1,6 +1,29 @@
1
- import { lintArtifact, validateReviewArmy } from "./artifact-linter.js";
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { checkReviewVerdictConsistency, lintArtifact, validateReviewArmy } from "./artifact-linter.js";
4
+ import { RUNTIME_ROOT } from "./constants.js";
2
5
  import { stageSchema } from "./content/stage-schema.js";
6
+ import { exists } from "./fs-utils.js";
3
7
  import { readFlowState, writeFlowState } from "./runs.js";
8
+ async function currentStageArtifactExists(projectRoot, stage) {
9
+ const artifactFile = stageSchema(stage).artifactFile;
10
+ const candidates = [
11
+ path.join(projectRoot, RUNTIME_ROOT, "artifacts", artifactFile),
12
+ path.join(projectRoot, artifactFile)
13
+ ];
14
+ for (const candidate of candidates) {
15
+ if (await exists(candidate))
16
+ return true;
17
+ }
18
+ // Artifact-linter also accepts the file under current working directory fallback; stat once more.
19
+ try {
20
+ await fs.access(path.join(projectRoot, artifactFile));
21
+ return true;
22
+ }
23
+ catch {
24
+ return false;
25
+ }
26
+ }
4
27
  function unique(values) {
5
28
  return [...new Set(values)];
6
29
  }
@@ -44,7 +67,8 @@ export async function verifyCurrentStageGateEvidence(projectRoot, flowState) {
44
67
  issues.push(`blocked gate "${gateId}" is not defined for stage "${stage}".`);
45
68
  }
46
69
  }
47
- const shouldValidateArtifact = catalog.passed.length > 0 || flowState.completedStages.includes(stage);
70
+ const artifactPresent = await currentStageArtifactExists(projectRoot, stage);
71
+ const shouldValidateArtifact = artifactPresent || catalog.passed.length > 0 || flowState.completedStages.includes(stage);
48
72
  if (shouldValidateArtifact) {
49
73
  const lint = await lintArtifact(projectRoot, stage);
50
74
  if (!lint.passed) {
@@ -60,6 +84,21 @@ export async function verifyCurrentStageGateEvidence(projectRoot, flowState) {
60
84
  if (!reviewArmy.valid) {
61
85
  issues.push(`review-army validation failed: ${reviewArmy.errors.join("; ")}`);
62
86
  }
87
+ const verdictConsistency = await checkReviewVerdictConsistency(projectRoot);
88
+ if (!verdictConsistency.ok) {
89
+ issues.push(`review verdict inconsistency: ${verdictConsistency.errors.join("; ")}`);
90
+ }
91
+ }
92
+ }
93
+ const passedSet = new Set(catalog.passed);
94
+ const missingRequired = required.filter((gateId) => !passedSet.has(gateId));
95
+ const complete = missingRequired.length === 0 && catalog.blocked.length === 0;
96
+ if (flowState.completedStages.includes(stage) && !complete) {
97
+ if (missingRequired.length > 0) {
98
+ issues.push(`stage "${stage}" is marked completed but required gates are not passed: ${missingRequired.join(", ")}.`);
99
+ }
100
+ if (catalog.blocked.length > 0) {
101
+ issues.push(`stage "${stage}" is marked completed but has blocked gates: ${catalog.blocked.join(", ")}.`);
63
102
  }
64
103
  }
65
104
  return {
@@ -68,9 +107,32 @@ export async function verifyCurrentStageGateEvidence(projectRoot, flowState) {
68
107
  issues,
69
108
  requiredCount: required.length,
70
109
  passedCount: catalog.passed.length,
71
- blockedCount: catalog.blocked.length
110
+ blockedCount: catalog.blocked.length,
111
+ complete,
112
+ missingRequired
72
113
  };
73
114
  }
115
+ export function verifyCompletedStagesGateClosure(flowState) {
116
+ const issues = [];
117
+ const openStages = [];
118
+ for (const stage of flowState.completedStages) {
119
+ const schema = stageSchema(stage);
120
+ const catalog = flowState.stageGateCatalog[stage];
121
+ const required = schema.requiredGates.map((gate) => gate.id);
122
+ const passedSet = new Set(catalog.passed);
123
+ const missingRequired = required.filter((gateId) => !passedSet.has(gateId));
124
+ if (missingRequired.length > 0 || catalog.blocked.length > 0) {
125
+ openStages.push({ stage, missingRequired, blocked: [...catalog.blocked] });
126
+ if (missingRequired.length > 0) {
127
+ issues.push(`completed stage "${stage}" has unpassed required gates: ${missingRequired.join(", ")}.`);
128
+ }
129
+ if (catalog.blocked.length > 0) {
130
+ issues.push(`completed stage "${stage}" still has blocked gates: ${catalog.blocked.join(", ")}.`);
131
+ }
132
+ }
133
+ }
134
+ return { ok: openStages.length === 0, issues, openStages };
135
+ }
74
136
  export function reconcileCurrentStageGateCatalog(flowState) {
75
137
  const stage = flowState.currentStage;
76
138
  const required = stageSchema(stage).requiredGates.map((gate) => gate.id);
package/dist/runs.d.ts CHANGED
@@ -1,5 +1,17 @@
1
1
  import { type FlowState } from "./flow-state.js";
2
2
  import type { FlowStage } from "./types.js";
3
+ export declare class InvalidStageTransitionError extends Error {
4
+ readonly from: FlowStage;
5
+ readonly to: FlowStage;
6
+ constructor(from: FlowStage, to: FlowStage, message: string);
7
+ }
8
+ export interface WriteFlowStateOptions {
9
+ /**
10
+ * When true, skip prior-state validation. Used for run archival, initial
11
+ * bootstrap, or explicit recovery; never set from normal stage handlers.
12
+ */
13
+ allowReset?: boolean;
14
+ }
3
15
  export interface CclawRunMeta {
4
16
  id: string;
5
17
  title: string;
@@ -32,7 +44,7 @@ export declare class CorruptFlowStateError extends Error {
32
44
  constructor(statePath: string, quarantinedPath: string, cause: unknown);
33
45
  }
34
46
  export declare function readFlowState(projectRoot: string): Promise<FlowState>;
35
- export declare function writeFlowState(projectRoot: string, state: FlowState): Promise<void>;
47
+ export declare function writeFlowState(projectRoot: string, state: FlowState, options?: WriteFlowStateOptions): Promise<void>;
36
48
  export declare function ensureRunSystem(projectRoot: string, _options?: EnsureRunSystemOptions): Promise<FlowState>;
37
49
  export declare function listRuns(projectRoot: string): Promise<CclawRunMeta[]>;
38
50
  export declare function archiveRun(projectRoot: string, featureName?: string): Promise<ArchiveRunResult>;
package/dist/runs.js CHANGED
@@ -1,8 +1,35 @@
1
1
  import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
3
  import { COMMAND_FILE_ORDER, RUNTIME_ROOT } from "./constants.js";
4
- import { createInitialFlowState } from "./flow-state.js";
4
+ import { canTransition, createInitialFlowState } from "./flow-state.js";
5
5
  import { ensureDir, exists, withDirectoryLock, writeFileSafe } from "./fs-utils.js";
6
+ export class InvalidStageTransitionError extends Error {
7
+ from;
8
+ to;
9
+ constructor(from, to, message) {
10
+ super(message);
11
+ this.from = from;
12
+ this.to = to;
13
+ this.name = "InvalidStageTransitionError";
14
+ }
15
+ }
16
+ function validateFlowTransition(prev, next) {
17
+ if (prev.activeRunId !== next.activeRunId) {
18
+ // New run — only reset paths may change the runId, but those set allowReset.
19
+ throw new InvalidStageTransitionError(prev.currentStage, next.currentStage, `cannot change activeRunId from "${prev.activeRunId}" to "${next.activeRunId}" without allowReset.`);
20
+ }
21
+ for (const completed of prev.completedStages) {
22
+ if (!next.completedStages.includes(completed)) {
23
+ throw new InvalidStageTransitionError(prev.currentStage, next.currentStage, `completedStages must be monotonic: stage "${completed}" was previously completed but is missing from the new state.`);
24
+ }
25
+ }
26
+ if (prev.currentStage === next.currentStage) {
27
+ return;
28
+ }
29
+ if (!canTransition(prev.currentStage, next.currentStage)) {
30
+ throw new InvalidStageTransitionError(prev.currentStage, next.currentStage, `no transition rule allows "${prev.currentStage}" -> "${next.currentStage}". Use /cc-next to advance stages or archive the run to reset.`);
31
+ }
32
+ }
6
33
  const FLOW_STATE_REL_PATH = `${RUNTIME_ROOT}/state/flow-state.json`;
7
34
  const RUNS_DIR_REL_PATH = `${RUNTIME_ROOT}/runs`;
8
35
  const ACTIVE_ARTIFACTS_REL_PATH = `${RUNTIME_ROOT}/artifacts`;
@@ -251,10 +278,28 @@ export async function readFlowState(projectRoot) {
251
278
  }
252
279
  return coerceFlowState(parsed);
253
280
  }
254
- export async function writeFlowState(projectRoot, state) {
281
+ export async function writeFlowState(projectRoot, state, options = {}) {
255
282
  await withDirectoryLock(flowStateLockPath(projectRoot), async () => {
283
+ const statePath = flowStatePath(projectRoot);
284
+ if (!options.allowReset && (await exists(statePath))) {
285
+ try {
286
+ const raw = await fs.readFile(statePath, "utf8");
287
+ const parsed = JSON.parse(raw);
288
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
289
+ const prev = coerceFlowState(parsed);
290
+ validateFlowTransition(prev, state);
291
+ }
292
+ }
293
+ catch (err) {
294
+ if (err instanceof InvalidStageTransitionError) {
295
+ throw err;
296
+ }
297
+ // A corrupt prior file is surfaced by readFlowState elsewhere; don't
298
+ // block a legitimate write attempt on parse errors here.
299
+ }
300
+ }
256
301
  const safe = coerceFlowState({ ...state });
257
- await writeFileSafe(flowStatePath(projectRoot), `${JSON.stringify(safe, null, 2)}\n`);
302
+ await writeFileSafe(statePath, `${JSON.stringify(safe, null, 2)}\n`);
258
303
  });
259
304
  }
260
305
  export async function ensureRunSystem(projectRoot, _options = {}) {
@@ -263,7 +308,7 @@ export async function ensureRunSystem(projectRoot, _options = {}) {
263
308
  const statePath = flowStatePath(projectRoot);
264
309
  const state = await readFlowState(projectRoot);
265
310
  if (!(await exists(statePath))) {
266
- await writeFlowState(projectRoot, state);
311
+ await writeFlowState(projectRoot, state, { allowReset: true });
267
312
  }
268
313
  return state;
269
314
  }
@@ -315,7 +360,7 @@ export async function archiveRun(projectRoot, featureName) {
315
360
  const archiveStatePath = path.join(archivePath, "state");
316
361
  const snapshottedStateFiles = await snapshotStateDirectory(projectRoot, archiveStatePath);
317
362
  const resetState = createInitialFlowState();
318
- await writeFlowState(projectRoot, resetState);
363
+ await writeFlowState(projectRoot, resetState, { allowReset: true });
319
364
  const archivedAt = new Date().toISOString();
320
365
  const manifest = {
321
366
  version: 1,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cclaw-cli",
3
- "version": "0.5.16",
3
+ "version": "0.5.17",
4
4
  "description": "Installer-first flow toolkit for coding agents",
5
5
  "type": "module",
6
6
  "bin": {