npm - cclaw-cli - Versions diffs - 0.5.16 → 0.5.17 - Mend

cclaw-cli 0.5.16 → 0.5.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/artifact-linter.d.ts +13 -0
package/dist/artifact-linter.js +182 -13
package/dist/content/agents.js +2 -2
package/dist/content/examples.js +71 -62
package/dist/content/templates.js +4 -19
package/dist/delegation.d.ts +6 -0
package/dist/delegation.js +12 -4
package/dist/doctor.js +37 -1
package/dist/gate-evidence.d.ts +14 -0
package/dist/gate-evidence.js +65 -3
package/dist/runs.d.ts +13 -1
package/dist/runs.js +50 -5
package/package.json +1 -1

package/dist/artifact-linter.d.ts CHANGED Viewed

@@ -18,3 +18,16 @@ export declare function validateReviewArmy(projectRoot: string): Promise<{
     valid: boolean;
     errors: string[];
 }>;
+export interface ReviewVerdictConsistencyResult {
+    ok: boolean;
+    errors: string[];
+    finalVerdict: "APPROVED" | "APPROVED_WITH_CONCERNS" | "BLOCKED" | "UNKNOWN";
+    openCriticalCount: number;
+    shipBlockerCount: number;
+}
+/**
+ * Ensure the narrative verdict in 07-review.md is consistent with the
+ * structured review-army reconciliation. A review cannot declare
+ * APPROVED while open Critical findings or shipBlockers remain.
+ */
+export declare function checkReviewVerdictConsistency(projectRoot: string): Promise<ReviewVerdictConsistencyResult>;

package/dist/artifact-linter.js CHANGED Viewed

@@ -134,7 +134,61 @@ function extractRequiredKeywords(rule) {
         return [];
     return phrases;
 }
-function validateSectionBody(sectionBody, rule) {
+const VAGUE_AC_ADJECTIVES = [
+    "fast",
+    "quick",
+    "slow",
+    "fast enough",
+    "quickly",
+    "intuitive",
+    "robust",
+    "reliable",
+    "scalable",
+    "simple",
+    "easy",
+    "user-friendly",
+    "user friendly",
+    "nice",
+    "good",
+    "clean",
+    "secure enough",
+    "responsive",
+    "efficient",
+    "performant",
+    "smooth",
+    "seamless",
+    "modern"
+];
+function isSeparatorRow(line) {
+    return /^\|[-:| ]+\|$/u.test(line);
+}
+function getMarkdownTableRows(sectionBody) {
+    const lines = sectionBody.split(/\r?\n/).map((line) => line.trim());
+    const rows = [];
+    let sawSeparator = false;
+    for (const line of lines) {
+        if (!/^\|.*\|$/u.test(line))
+            continue;
+        if (isSeparatorRow(line)) {
+            sawSeparator = true;
+            continue;
+        }
+        if (!sawSeparator)
+            continue;
+        rows.push(parseMarkdownTableRow(line));
+    }
+    return rows;
+}
+function lineContainsVagueAdjective(text) {
+    const lower = text.toLowerCase();
+    for (const adjective of VAGUE_AC_ADJECTIVES) {
+        const pattern = new RegExp(`(?:^|[^A-Za-z])${adjective.replace(/ /g, "\\s+")}(?:[^A-Za-z]|$)`, "iu");
+        if (pattern.test(lower))
+            return adjective;
+    }
+    return null;
+}
+function validateSectionBody(sectionBody, rule, sectionName) {
     const bodyLines = sectionBody.split(/\r?\n/).map((line) => line.trim());
     const meaningful = meaningfulLineCount(sectionBody);
     if (meaningful === 0) {
@@ -231,6 +285,29 @@ function validateSectionBody(sectionBody, rule) {
             };
         }
     }
+    if (normalizeHeadingTitle(sectionName).toLowerCase() === "acceptance criteria" &&
+        /observable[\s,]*measurable[\s,]+(and )?falsifiable/iu.test(rule)) {
+        const rows = getMarkdownTableRows(sectionBody);
+        for (const row of rows) {
+            const criterionText = row[1] ?? row[0] ?? "";
+            const adjective = lineContainsVagueAdjective(criterionText);
+            if (adjective) {
+                return {
+                    ok: false,
+                    details: `Acceptance criterion uses vague adjective "${adjective}" without a measurable predicate: "${criterionText.slice(0, 140)}". Rewrite with a numeric threshold or boolean outcome.`
+                };
+            }
+            const hasDigit = /\d/u.test(criterionText);
+            const hasMeasurableVerb = /\b(blocks?|rejects?|returns?|matches?|equals?|emits?|succeeds?|fails?|publishes?|logs?|persists?|reads?|writes?|creates?|deletes?|throws?|contains?|restores?|exceeds?|responds?|warns?|quarantines?|includes?|raises?|passes?|denies|refuses|exits|succeeds|completes|prevents|allows|maps|points|signals|surfaces|records|produces|accepts|requires)\b/iu.test(criterionText);
+            const hasMeaningfulText = /[A-Za-z]/u.test(criterionText) && criterionText.trim().length >= 12;
+            if (hasMeaningfulText && !hasDigit && !hasMeasurableVerb) {
+                return {
+                    ok: false,
+                    details: `Acceptance criterion lacks a measurable predicate (no numeric threshold, no observable verb like blocks/returns/publishes/matches): "${criterionText.slice(0, 140)}". Rewrite so the criterion is falsifiable by a single test.`
+                };
+            }
+        }
+    }
     return {
         ok: true,
         details: "Section heading and content satisfy lint heuristics."
@@ -273,7 +350,7 @@ export async function lintArtifact(projectRoot, stage) {
         const body = hasHeading ? sectionBodyByName(sections, v.section) : null;
         const validation = body === null
             ? { ok: false, details: `No ## heading matching required section "${v.section}".` }
-            : validateSectionBody(body, v.validationRule);
+            : validateSectionBody(body, v.validationRule, v.section);
         const found = hasHeading && validation.ok;
         findings.push({
             section: v.section,
@@ -384,18 +461,19 @@ export async function validateReviewArmy(projectRoot) {
             if (!isStringArray(o.reportedBy) || o.reportedBy.length === 0) {
                 errors.push(`findings[${i}].reportedBy must be a non-empty string array.`);
             }
-            if (o.location !== undefined) {
-                if (o.location === null || typeof o.location !== "object" || Array.isArray(o.location)) {
-                    errors.push(`findings[${i}].location must be an object when present.`);
+            if (o.location === undefined || o.location === null) {
+                errors.push(`findings[${i}].location is required and must be an object with file + line.`);
+            }
+            else if (typeof o.location !== "object" || Array.isArray(o.location)) {
+                errors.push(`findings[${i}].location must be an object with file + line.`);
+            }
+            else {
+                const loc = o.location;
+                if (!isNonEmptyString(loc.file)) {
+                    errors.push(`findings[${i}].location.file must be a non-empty string.`);
                 }
-                else {
-                    const loc = o.location;
-                    if (!isNonEmptyString(loc.file)) {
-                        errors.push(`findings[${i}].location.file must be a non-empty string.`);
-                    }
-                    if (!isFiniteNumber(loc.line) || loc.line < 1) {
-                        errors.push(`findings[${i}].location.line must be a positive number.`);
-                    }
+                if (!isFiniteNumber(loc.line) || loc.line < 1) {
+                    errors.push(`findings[${i}].location.line must be a positive number.`);
                 }
             }
             if (o.recommendation !== undefined && !isNonEmptyString(o.recommendation)) {
@@ -445,6 +523,21 @@ export async function validateReviewArmy(projectRoot) {
             for (const msId of rec.multiSpecialistConfirmed) {
                 if (!findingIds.has(msId)) {
                     errors.push(`reconciliation.multiSpecialistConfirmed references unknown finding id "${msId}".`);
+                    continue;
+                }
+                if (Array.isArray(root.findings)) {
+                    const finding = root.findings.find((f) => {
+                        return f && typeof f === "object" && !Array.isArray(f) && f.id === msId;
+                    });
+                    if (finding && typeof finding === "object" && !Array.isArray(finding)) {
+                        const reportedBy = finding.reportedBy;
+                        const count = Array.isArray(reportedBy)
+                            ? new Set(reportedBy.filter((v) => typeof v === "string")).size
+                            : 0;
+                        if (count < 2) {
+                            errors.push(`reconciliation.multiSpecialistConfirmed entry "${msId}" must be confirmed by at least 2 distinct reviewers (found ${count}).`);
+                        }
+                    }
                 }
             }
         }
@@ -474,3 +567,79 @@ export async function validateReviewArmy(projectRoot) {
     }
     return { valid: errors.length === 0, errors };
 }
+/**
+ * Ensure the narrative verdict in 07-review.md is consistent with the
+ * structured review-army reconciliation. A review cannot declare
+ * APPROVED while open Critical findings or shipBlockers remain.
+ */
+export async function checkReviewVerdictConsistency(projectRoot) {
+    const errors = [];
+    const reviewMdPath = path.join(projectRoot, RUNTIME_ROOT, "artifacts", "07-review.md");
+    const armyJsonPath = path.join(projectRoot, RUNTIME_ROOT, "artifacts", "07-review-army.json");
+    let finalVerdict = "UNKNOWN";
+    if (await exists(reviewMdPath)) {
+        const raw = await fs.readFile(reviewMdPath, "utf8");
+        const sections = extractH2Sections(raw);
+        const verdictBody = sectionBodyByName(sections, "Final Verdict");
+        if (verdictBody) {
+            const chosen = [];
+            for (const token of ["APPROVED_WITH_CONCERNS", "APPROVED", "BLOCKED"]) {
+                const regex = new RegExp(`\\b${token}\\b`, "u");
+                if (regex.test(verdictBody)) {
+                    // APPROVED would match inside APPROVED_WITH_CONCERNS; prefer the longer match first.
+                    if (token === "APPROVED" && /\bAPPROVED_WITH_CONCERNS\b/u.test(verdictBody))
+                        continue;
+                    chosen.push(token);
+                }
+            }
+            if (chosen.length === 1) {
+                finalVerdict = chosen[0];
+            }
+            else if (chosen.length > 1) {
+                errors.push(`Final Verdict section lists multiple verdict tokens (${chosen.join(", ")}). Select exactly one.`);
+            }
+            else {
+                errors.push('Final Verdict section does not select APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.');
+            }
+        }
+        else {
+            errors.push('07-review.md is missing the "## Final Verdict" section.');
+        }
+    }
+    let openCriticalCount = 0;
+    let shipBlockerCount = 0;
+    if (await exists(armyJsonPath)) {
+        try {
+            const raw = await fs.readFile(armyJsonPath, "utf8");
+            const parsed = JSON.parse(raw);
+            const findings = Array.isArray(parsed.findings) ? parsed.findings : [];
+            for (const f of findings) {
+                if (!f || typeof f !== "object" || Array.isArray(f))
+                    continue;
+                const o = f;
+                if (o.severity === "Critical" && o.status === "open") {
+                    openCriticalCount++;
+                }
+            }
+            const rec = parsed.reconciliation && typeof parsed.reconciliation === "object" && !Array.isArray(parsed.reconciliation)
+                ? parsed.reconciliation
+                : null;
+            if (rec && Array.isArray(rec.shipBlockers)) {
+                shipBlockerCount = rec.shipBlockers.filter((v) => typeof v === "string").length;
+            }
+        }
+        catch {
+            // JSON validity is the concern of validateReviewArmy; skip silently here.
+        }
+    }
+    if (finalVerdict === "APPROVED" && (openCriticalCount > 0 || shipBlockerCount > 0)) {
+        errors.push(`Final Verdict is APPROVED but review-army has ${openCriticalCount} open Critical finding(s) and ${shipBlockerCount} shipBlocker(s). Use BLOCKED or APPROVED_WITH_CONCERNS.`);
+    }
+    return {
+        ok: errors.length === 0,
+        errors,
+        finalVerdict,
+        openCriticalCount,
+        shipBlockerCount
+    };
+}

package/dist/content/agents.js CHANGED Viewed

@@ -94,10 +94,10 @@ export const CCLAW_AGENTS = [
     },
     {
         name: "security-reviewer",
-        description: "PROACTIVE after auth, crypto, secrets, parsers, or sensitive data paths change. MUST BE USED when trust boundaries move, new external inputs arrive, or LLM/tool output influences privileged actions.",
+        description: "MANDATORY during every review stage. Even when no auth, crypto, secrets, parsers, or sensitive data paths changed, produce an explicit 'no-change' security attestation. MUST BE USED when trust boundaries move, new external inputs arrive, or LLM/tool output influences privileged actions.",
         tools: ["Read", "Grep", "Glob"],
         model: "balanced",
-        activation: "proactive",
+        activation: "mandatory",
         relatedStages: ["review", "design"],
         body: [
             "You are a **security vulnerability detection** specialist focused on practical exploitability.",

package/dist/content/examples.js CHANGED Viewed

@@ -1,16 +1,16 @@
 const STAGE_EXAMPLES = {
-    brainstorm: `### Context
+    brainstorm: `## Context
 - **Project state:** Monorepo with CI pipeline using custom release scripts. Release checks are scattered across shell scripts with no shared validation logic.
 - **Relevant existing code/patterns:** \`scripts/pre-publish.sh\` does metadata checks. \`src/release/\` has partial validation helpers.
-### Problem
+## Problem
 - **What we're solving:** release checks are fragile and inconsistent between CI and local runs. Invalid metadata sometimes reaches npm publish.
 - **Success criteria:** invalid release preconditions are caught before publish with explicit operator feedback, in both CI and local workflows.
 - **Constraints:** no new runtime dependencies; must work within existing CI pipeline structure.
-### Clarifying Questions
+## Clarifying Questions
 | # | Question | Answer | Decision impact |
 | --- | --- | --- | --- |
@@ -18,7 +18,7 @@ const STAGE_EXAMPLES = {
 | 2 | Should the validation logic live in a reusable module or stay as shell scripts? | Reusable module. | Architecture: shared TypeScript module imported by CI and local tooling, not duplicated shell scripts. |
 | 3 | For v1, prioritize rapid delivery or maximum configurability? | Rapid delivery. | Minimal deterministic validation surface; defer plugin/config system to v2. |
-### Approaches
+## Approaches
 | Approach | Architecture | Trade-offs | Recommendation |
 | --- | --- | --- | --- |
@@ -26,33 +26,33 @@ const STAGE_EXAMPLES = {
 | B: Hardened shell scripts | Keep existing script approach, add stricter checks and error messages. | Lowest effort. Weak reuse, CI/local divergence risk grows over time. | Viable fallback if TS module is blocked. |
 | C: Full release framework | New release orchestrator with plugin system, config files, rollback commands. | Maximum flexibility. High risk, delivery delay, over-engineered for current needs. | Not recommended for v1. |
-### Selected Direction
+## Selected Direction
 - **Approach:** A — Reusable validation module
 - **Rationale:** shared TS module gives consistent behavior in CI and local, avoids script duplication, and stays within the no-new-dependency constraint.
 - **Approval:** approved
-### Design
+## Design
 - **Architecture:** single \`release-validator\` module in \`src/release/\` exporting typed check functions. CI script and local CLI both import and run the same checks.
 - **Key components:** \`validateMetadata()\`, \`validateChangelog()\`, \`validateVersion()\` — each returns a typed result with error details. A \`runAll()\` orchestrator runs checks and exits non-zero on any failure.
 - **Data flow:** package.json + CHANGELOG.md → validator module → structured result → CI/CLI renders human-readable report.
-### Assumptions and Open Questions
+## Assumptions and Open Questions
 - **Assumptions:** CI remains the primary execution path; existing release metadata files remain the source of truth; v1 prioritizes determinism over customization.
 - **Open questions:** What exact rollback sequence for failed publish? Should status output include machine-readable JSON alongside markdown?
-### Notes for the next stage
+## Notes for the next stage
 Carry the no-new-dependency constraint and hard-block behavior directly into scope in/out boundaries.`,
-    scope: `### Scope contract
+    scope: `## Scope contract
 **Mode selected:** SELECTIVE EXPANSION
 **Default heuristic used:** feature enhancement -> selective
 **Mode-specific analysis result:** hold-scope baseline accepted first; one expansion accepted (degraded-state UX), one deferred (real-time channel upgrade).
-### Prime Directives (applied)
+## Prime Directives (applied)
 - Zero silent failures: every delivery failure maps to a visible degraded state.
 - Named error surfaces: stream disconnect, auth drift, and publisher timeout are explicit.
@@ -60,11 +60,11 @@ Carry the no-new-dependency constraint and hard-block behavior directly into sco
 - Interaction edge cases in scope: double-open panel, reconnect after sleep, stale tab state.
 - Observability in scope: stream error counter, publish-to-visible lag metric, and alert threshold.
-### Premise challenge result
+## Premise challenge result
 The original premise (“add notifications”) was reframed to **“ensure users know when an action requires follow-up”**, which expands the solution space beyond toast spam to include durable inbox items, empty states, and recovery paths when delivery fails.
-### Dream State Mapping
+## Dream State Mapping
 | Stage | Statement |
 | --- | --- |
@@ -73,7 +73,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
 | **12-MONTH IDEAL** | Unified notification center with reliable multi-channel fan-out and user-level routing preferences. |
 | **Alignment verdict** | Aligned: this scope builds the durability foundation without prematurely committing to channel expansion. |
-### Mode-Specific Analysis
+## Mode-Specific Analysis
 **Selected mode:** SELECTIVE EXPANSION
@@ -81,7 +81,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
 - **Expansion evaluated — degraded-state UX (accepted):** Adding an explicit "live updates paused" banner and polling fallback turns a reliability gap into a visible, recoverable state. Low incremental effort (S), high user trust payoff.
 - **Expansion evaluated — real-time channel upgrade (deferred):** WebSocket channel provides lower latency but requires new infra (connection pool, auth handshake). Not justified for current load; deferred to post-v1 validation.
-### Implementation Alternatives
+## Implementation Alternatives
 | Option | Summary | Effort (S/M/L/XL) | Risk | Pros | Cons | Reuses |
 | --- | --- | --- | --- | --- | --- | --- |
@@ -89,7 +89,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
 | **B (recommended)** | SSE live updates + REST fallback snapshot | M | Med | Better timeliness, graceful degradation | Requires reconnect handling | Existing event publisher + REST path |
 | **C (ideal architecture)** | Event bus + WebSocket channel + feed projection | XL | High | Strong long-term scalability | Overbuilt for current demand | Partial reuse of publisher only |
-### Temporal Interrogation
+## Temporal Interrogation
 | Time slice | Likely decision pressure | Lock now or defer? | Reason |
 | --- | --- | --- | --- |
@@ -98,7 +98,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
 | **HOUR 4-5 (integration)** | Handling gaps between snapshot and stream cursor | **Lock now** | Prevent silent data loss during reconnect windows |
 | **HOUR 6+ (polish/tests)** | Banner copy tone and polling cadence tuning | **Defer** | Safe to iterate after baseline reliability is proven |
-### In scope / out of scope / deferred
+## In scope / out of scope / deferred
 | Category | Items |
 | --- | --- |
@@ -106,29 +106,29 @@ The original premise (“add notifications”) was reframed to **“ensure users
 | **Out of scope** | Email/SMS/push providers; marketing campaigns; per-user notification preferences beyond on/off |
 | **Deferred** | WebSocket channel; rich media attachments in notifications; full-text search across historical events |
-### Discretion Areas
+## Discretion Areas
 - Client-side badge rendering strategy (optimistic vs server-confirmed) is implementation discretion.
 - Polling fallback backoff curve is implementation discretion if degraded-state UX remains explicit.
-### Error & Rescue Registry (sample entry)
+## Error & Rescue Registry (sample entry)
 | Capability | Failure mode | Detection | Fallback |
 | --- | --- | --- | --- |
 | Event delivery | SSE connection drops mid-session | Client \`EventSource\` error event + heartbeat timeout | Fall back to REST polling every 30s until SSE reconnect succeeds; show subtle “live updates paused” banner |
-### Completion Dashboard
+## Completion Dashboard
 - Checklist findings: 9/9 complete (complex path)
 - Resolved decisions count: 7
 - Unresolved decisions: None
-### Scope Summary
+## Scope Summary
 - Accepted scope: durable feed + SSE + explicit degraded UX.
 - Deferred: WebSocket channel and rich-media/search enhancements.
 - Explicitly excluded: outbound channels and marketing workflows for v1.`,
-    design: `### Codebase Investigation (blast-radius files)
+    design: `## Codebase Investigation (blast-radius files)
 | File | Current responsibility | Patterns discovered |
 | --- | --- | --- |
@@ -139,7 +139,7 @@ The original premise (“add notifications”) was reframed to **“ensure users
 Discovery: existing EventEmitter-based bus has no durability — notifications must add persistence layer on top, not replace the bus.
-### Search Before Building (sample result)
+## Search Before Building (sample result)
 | Layer | Label | What to reuse first |
 | --- | --- | --- |
@@ -147,7 +147,7 @@ Discovery: existing EventEmitter-based bus has no durability — notifications m
 | Layer 2 | existing codebase | Existing auth middleware, existing API client wrapper, existing feature flags helper |
 | Layer 3 | npm | A small, well-maintained SSE helper (only if Layer 1–2 cannot cover framing/reconnect ergonomics) |
-### Architecture Diagram (mandatory)
+## Architecture Diagram (mandatory)
 \`\`\`
 ┌─────────────┐      ┌──────────────┐      ┌────────────────┐
@@ -163,7 +163,7 @@ Discovery: existing EventEmitter-based bus has no durability — notifications m
 Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Queue (persist) → Read Model (project).
-### What Already Exists
+## What Already Exists
 | Sub-problem | Existing code/library | Layer | Reuse decision |
 | --- | --- | --- | --- |
@@ -172,7 +172,7 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
 | SSE framing | None | Layer 3 | Evaluate \`better-sse\` npm package |
 | Notification schema | None | — | New: define in \`src/schemas/notification.ts\` |
-### Failure Mode Table
+## Failure Mode Table
 | Failure | Trigger | Detection | Mitigation | User impact |
 | --- | --- | --- | --- | --- |
@@ -180,13 +180,13 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
 | Duplicate publish | Retry after timeout | Dedupe key check in outbox | Upsert with idempotency key | None (transparent) |
 | Queue backpressure | Spike >1000 events/s | Queue depth metric alarm | Back-pressure signal to publisher, shed non-critical events | Delayed delivery of low-priority notifications |
-### Test Strategy
+## Test Strategy
 - **Unit:** validator functions, dedupe-key logic, event schema factories — target 90%+ line coverage.
 - **Integration:** publisher → outbox → read-model pipeline via in-memory DB; SSE reconnect with simulated drops.
 - **E2E:** one happy-path browser test (publish → feed visible) and one degraded-path test (SSE down → REST fallback + banner).
-### Performance Budget
+## Performance Budget
 | Critical path | Metric | Target | Measurement method |
 | --- | --- | --- | --- |
@@ -194,13 +194,13 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
 | Feed snapshot load | p99 response time | ≤ 200 ms | Load test with 1 000 items per user |
 | SSE reconnect | Time to first event after drop | ≤ 3 s | Simulated disconnect in integration suite |
-### NOT in scope
+## NOT in scope
 - Outbound channels (email, push, SMS) — deferred to v2.
 - Admin notification management UI — separate workstream.
 - Notification preferences / mute rules — requires user settings redesign.
-### Parallelization Strategy
+## Parallelization Strategy
 | Module | Depends on | Parallel lane | Conflict risk |
 | --- | --- | --- | --- |
@@ -208,18 +208,18 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
 | Publisher + outbox (T2) | T1 | Lane A | None |
 | Client feed + SSE (T3) | T1, T2 | Lane B (after T1) | Shared event type definitions |
-### Unresolved Decisions
+## Unresolved Decisions
 | Decision | Status | Options | Missing info | Default if unanswered |
 | --- | --- | --- | --- | --- |
 | Feed storage model | OPEN | (A) append-only event log, (B) mutable rows, (C) hybrid | Load testing results on read patterns | (A) append-only — safest for audit trail |
-### Interface sketch (non-binding)
+## Interface sketch (non-binding)
 - **Client → server:** \`GET /api/me/notifications/snapshot?limit=50\` plus optional cursor parameters (if adopted).
 - **Server → client:** \`GET /api/me/notifications/stream\` as SSE with periodic heartbeats.
-### Completion Dashboard
+## Completion Dashboard
 | Review Section | Status | Issues |
 | --- | --- | --- |
@@ -231,10 +231,10 @@ Data flow: Gateway → Service (validate + enrich) → Publisher (fan-out) → Q
 **Decisions made:** 4 | **Unresolved:** 1 (feed storage model)
-### Quality bar for this stage
+## Quality bar for this stage
 Design output should be **reviewable by someone who did not attend brainstorming**: they can trace from constraints → components → open decisions without reading code.`,
-    spec: `### Acceptance Criteria
+    spec: `## Acceptance Criteria
 | ID | Criterion (observable/measurable/falsifiable) | Design Decision Ref |
 | --- | --- | --- |
@@ -242,7 +242,7 @@ Design output should be **reviewable by someone who did not attend brainstorming
 | AC-2 | Given the same logical notification is published twice with the same dedupe key, when the client processes the stream, the feed contains exactly one visible item for that key. | Architecture: dedupe-key in event schema |
 | AC-3 | Given the live connection is unavailable, when the user opens the notifications panel, the UI shows a non-blocking "live updates paused" banner and loads the latest snapshot via REST within 2 seconds. | Architecture: REST fallback + degraded UX |
-### Edge Cases
+## Edge Cases
 | Criterion ID | Boundary case | Error case |
 | --- | --- | --- |
@@ -250,12 +250,12 @@ Design output should be **reviewable by someone who did not attend brainstorming
 | AC-2 | Two events with identical dedupe key arrive within same SSE frame (boundary: only one row rendered). | Dedupe-key field missing — reject event at publisher and log error. |
 | AC-3 | SSE disconnects after exactly 30 s heartbeat timeout (boundary: banner appears within 1 s of timeout). | REST snapshot endpoint returns 500 — panel shows "unable to load" with retry button. |
-### Constraints and Assumptions
+## Constraints and Assumptions
 - **Constraints:** Max feed size 1 000 items per user. SSE heartbeat interval 30 s (server-side). REST snapshot p99 \u2264 200 ms. No new runtime dependencies.
 - **Assumptions:** Users have a single active session at a time for v1. Existing auth middleware provides user context. Event publisher is single-writer per user.
-### Testability Map
+## Testability Map
 | Criterion ID | Verification approach | Command/manual steps |
 | --- | --- | --- |
@@ -263,11 +263,11 @@ Design output should be **reviewable by someone who did not attend brainstorming
 | AC-2 | Unit test: publish same dedupe key twice \u2192 assert single row in feed store. | \`pnpm vitest run tests/unit/dedupe-feed.test.ts\` |
 | AC-3 | E2E test: kill SSE transport \u2192 assert banner visible + REST snapshot loads. | \`pnpm playwright test tests/e2e/degraded-mode.spec.ts\` |
-### Approval
+## Approval
 - Approved by: user
 - Date: 2026-04-14`,
-    plan: `### Dependency Graph
+    plan: `## Dependency Graph
 \`\`\`
 T-1 ──▶ T-2 ──▶ T-3
@@ -277,7 +277,7 @@ T-1 ──▶ T-2 ──▶ T-3
 Parallel opportunity: T-1 is a prerequisite for both T-2 and T-3 (T-3 also needs T-2).
-### Dependency Waves
+## Dependency Waves
 #### Wave 1 (foundation)
 - Task IDs: T-1
@@ -295,7 +295,7 @@ Parallel opportunity: T-1 is a prerequisite for both T-2 and T-3 (T-3 also needs
 Execution rule: complete and verify each wave before starting the next wave.
-### Task List
+## Task List
 | Task ID | Description | Acceptance criterion | Verification command | Effort |
 | --- | --- | --- | --- | --- |
@@ -303,7 +303,7 @@ Execution rule: complete and verify each wave before starting the next wave.
 | T-2 | Implement publisher + outbox write path | AC-1: integration test (happy path publish) | \`\`\`pnpm vitest run tests/integration/publisher.test.ts\`\`\` |
 | T-3 | Implement client feed + SSE subscribe + REST fallback | AC-1, AC-2, AC-3: e2e tests including degraded mode | \`\`\`pnpm playwright test tests/e2e/notification-feed.spec.ts\`\`\` |
-### Acceptance Mapping
+## Acceptance Mapping
 | Criterion ID | Task IDs |
 | --- | --- |
@@ -311,17 +311,17 @@ Execution rule: complete and verify each wave before starting the next wave.
 | AC-2 (idempotency) | T-1, T-2 |
 | AC-3 (failure visibility) | T-3 |
-### Risk Assessment
+## Risk Assessment
 | Task/Wave | Risk | Likelihood | Impact | Mitigation |
 | --- | --- | --- | --- | --- |
 | T-3 (Wave 3) | SSE reconnect logic complex | Medium | High | Spike reconnect in isolation before integrating with feed UI |
 | Wave 2 → 3 | Publisher API contract may shift | Low | Medium | Pin contract in T-1 schema; T-2 integration test validates |
-### WAIT_FOR_CONFIRM
+## WAIT_FOR_CONFIRM
 - Status: pending
 - Confirmed by:`,
-    tdd: `### RED Evidence
+    tdd: `## RED Evidence
 | Slice | Test name | Command | Failure output summary |
 | --- | --- | --- | --- |
@@ -329,7 +329,7 @@ Execution rule: complete and verify each wave before starting the next wave.
 | S-2 (publisher outbox) | publishes event to outbox with dedupe key | \`\`\`pnpm vitest run tests/integration/publisher.test.ts\`\`\` | publishToOutbox is not a function |
 | S-3 (client feed + fallback) | shows notification within 5s via SSE | \`\`\`pnpm playwright test tests/e2e/notification-feed.spec.ts\`\`\` | Element [data-testid="feed-item"] not found |
-### Acceptance Mapping
+## Acceptance Mapping
 | Slice | Plan task ID | Spec criterion ID |
 | --- | --- | --- |
@@ -337,7 +337,7 @@ Execution rule: complete and verify each wave before starting the next wave.
 | S-2 | T-2 | AC-1 |
 | S-3 | T-3 | AC-1, AC-2, AC-3 |
-### Failure Analysis
+## Failure Analysis
 | Slice | Expected missing behavior | Actual failure reason |
 | --- | --- | --- |
@@ -345,22 +345,22 @@ Execution rule: complete and verify each wave before starting the next wave.
 | S-2 | publishToOutbox function not implemented | Function not found — correct: write path missing |
 | S-3 | Feed UI not rendered, SSE not connected | DOM element missing — correct: client component not built |
-### GREEN Evidence
+## GREEN Evidence
 - Full suite command: \`\`\`pnpm vitest run && pnpm playwright test\`\`\`
 - Full suite result: 47 tests passed (3 new + 44 existing), 0 failed, 0 skipped
-### REFACTOR Notes
+## REFACTOR Notes
 - What changed: Extracted \`\`\`mergeLatestByDedupeKey\`\`\` helper from inline loop in \`\`\`summarizeDedupedFeed\`\`\`; moved SSE reconnect logic into \`\`\`useSSEConnection\`\`\` hook.
 - Why: Dedupe merge logic is reused by both publisher and client; reconnect logic was duplicated across components.
 - Behavior preserved: Full suite re-run confirms 47/47 pass after refactor.
-### Traceability
+## Traceability
 - Plan task IDs: T-1, T-2, T-3
 - Spec criterion IDs: AC-1, AC-2, AC-3`,
-    review: `### Layer 1 Verdict
+    review: `## Layer 1 Verdict
 | Criterion | Verdict | Evidence |
 | --- | --- | --- |
@@ -368,7 +368,7 @@ Execution rule: complete and verify each wave before starting the next wave.
 | AC-2: Dedupe — one visible item per key | PARTIAL | Unit tests cover publisher dedupe; UI merge path lacks test for race reordering (\`feedStore.test.ts\` missing case) |
 | AC-3: Degraded mode + REST snapshot | PASS | \`NotificationsPanel.tsx:112-140\` renders banner + calls snapshot endpoint |
-### Layer 2 Findings
+## Layer 2 Findings
 | ID | Severity | Category | Description | Status |
 | --- | --- | --- | --- | --- |
@@ -376,12 +376,12 @@ Execution rule: complete and verify each wave before starting the next wave.
 | R-2 | Important | performance | \`feedStore.merge()\` does full-array scan on every SSE event; O(n) per event where n is feed length. | open |
 | R-3 | Suggestion | architecture | SSE reconnect logic duplicated across \`useNotifications\` and \`usePresence\`; extract shared hook. | open |
-### Review Army Contract
+## Review Army Contract
 - See \`07-review-army.json\`
 - Reconciliation summary: 1 duplicate collapsed (R-1 reported by spec-reviewer and code-reviewer), 0 conflicts
-### Review Readiness Dashboard
+## Review Readiness Dashboard
 - Layer 1 complete: yes (3/3 criteria)
 - Layer 2 complete: yes (5 sections reviewed)
@@ -389,16 +389,16 @@ Execution rule: complete and verify each wave before starting the next wave.
 - Open critical blockers: 1 (R-1)
 - Ship recommendation: BLOCKED until R-1 resolved
-### Severity Summary
+## Severity Summary
 - Critical: 1
 - Important: 1
 - Suggestion: 1
-### Final Verdict
+## Final Verdict
 - BLOCKED`,
-    ship: `### Preflight Results
+    ship: `## Preflight Results
 - Review verdict: APPROVED_WITH_CONCERNS (R-1 resolved, R-2 accepted as known debt)
 - Build: pass (\`pnpm build\` succeeds)
@@ -407,25 +407,25 @@ Execution rule: complete and verify each wave before starting the next wave.
 - Type-check: pass (\`pnpm typecheck\` clean)
 - Working tree clean: yes (\`git status\` shows no uncommitted changes)
-### Release Notes
+## Release Notes
 - **Added:** In-app notification feed with SSE updates and REST fallback snapshotting (AC-1, AC-3).
 - **Changed:** Notification payloads now include a stable dedupe key for idempotent rendering (AC-2).
 - **Fixed:** Panel no longer drops the newest item when reconnecting after sleep/resume.
 - **Breaking changes:** None.
-### Rollback Plan
+## Rollback Plan
 - Trigger conditions: error rate on \`/notifications/stream\` exceeds 5% for >5 minutes, or p95 publish-to-visible lag exceeds 10s.
 - Rollback steps: \`git revert <merge-sha> && git push origin main\` then redeploy; if DB migrations shipped, run \`2026_04_12_notifications_cursor_down.sql\` before traffic.
 - Verification steps: confirm error rate returns to pre-release baseline within 10 minutes; smoke-test feed panel manually.
-### Monitoring
+## Monitoring
 - Metrics/logs to watch: error rate on \`/notifications/stream\` and snapshot endpoint for 24h; p95 publish-to-visible lag via metrics dashboard.
 - Risk note (if no monitoring): N/A — monitoring is in place.
-### Finalization
+## Finalization
 - Selected enum: FINALIZE_OPEN_PR
 - Selected label: B
@@ -436,5 +436,14 @@ export function stageExamples(stage) {
     const examples = STAGE_EXAMPLES[stage];
     if (!examples)
         return "";
-    return `## Examples\n\nConcrete samples of what good output looks like for this stage.\n\n${examples}\n`;
+    return [
+        "## Examples",
+        "",
+        "Concrete artifact samples. These mirror the exact heading levels agents must use when authoring the stage artifact (all H2 `##` sections), so they are presented inside a markdown fence to avoid collapsing into the SKILL outline.",
+        "",
+        "```markdown",
+        examples,
+        "```",
+        ""
+    ].join("\n");
 }

package/dist/content/templates.js CHANGED Viewed

@@ -361,28 +361,13 @@ Execution rule: complete and verify each wave before starting the next wave.
 `,
     "07-review-army.json": `{
   "version": 1,
-  "generatedAt": "",
+  "generatedAt": "<ISO 8601 timestamp, e.g. 2026-04-14T12:00:00Z>",
   "scope": {
-    "base": "",
-    "head": "",
+    "base": "<base branch or ref>",
+    "head": "<head branch or ref>",
     "files": []
   },
-  "findings": [
-    {
-      "id": "",
-      "title": "",
-      "severity": "Critical",
-      "confidence": 7,
-      "category": "correctness",
-      "location": {
-        "file": ""
-      },
-      "fingerprint": "",
-      "reportedBy": [],
-      "status": "open",
-      "recommendation": ""
-    }
-  ],
+  "findings": [],
   "reconciliation": {
     "duplicatesCollapsed": 0,
     "conflicts": [],

package/dist/delegation.d.ts CHANGED Viewed

@@ -7,6 +7,11 @@ export type DelegationEntry = {
     taskId?: string;
     waiverReason?: string;
     ts: string;
+    /**
+     * Run id the entry belongs to. Older ledgers written before 0.5.17 may omit this;
+     * consumers treat missing runId as unscoped (conservatively excluded from current-run checks).
+     */
+    runId?: string;
 };
 export type DelegationLedger = {
     runId: string;
@@ -18,4 +23,5 @@ export declare function checkMandatoryDelegations(projectRoot: string, stage: Fl
     satisfied: boolean;
     missing: string[];
     waived: string[];
+    staleIgnored: string[];
 }>;

package/dist/delegation.js CHANGED Viewed

@@ -25,7 +25,8 @@ function isDelegationEntry(value) {
         statusOk &&
         typeof o.ts === "string" &&
         (o.taskId === undefined || typeof o.taskId === "string") &&
-        (o.waiverReason === undefined || typeof o.waiverReason === "string"));
+        (o.waiverReason === undefined || typeof o.waiverReason === "string") &&
+        (o.runId === undefined || typeof o.runId === "string"));
 }
 function parseLedger(raw, runId) {
     if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
@@ -63,21 +64,27 @@ export async function appendDelegation(projectRoot, entry) {
     await withDirectoryLock(delegationLockPath(projectRoot), async () => {
         const filePath = delegationLogPath(projectRoot);
         const prior = await readDelegationLedger(projectRoot);
+        const stamped = { ...entry, runId: entry.runId ?? activeRunId };
         const ledger = {
             runId: activeRunId,
-            entries: [...prior.entries, entry]
+            entries: [...prior.entries, stamped]
         };
         await writeFileSafe(filePath, `${JSON.stringify(ledger, null, 2)}\n`);
     });
 }
 export async function checkMandatoryDelegations(projectRoot, stage) {
     const mandatory = stageSchema(stage).mandatoryDelegations;
+    const { activeRunId } = await readFlowState(projectRoot);
     const ledger = await readDelegationLedger(projectRoot);
     const forStage = ledger.entries.filter((e) => e.stage === stage);
+    const forRun = forStage.filter((e) => e.runId === activeRunId);
+    const staleIgnored = forStage
+        .filter((e) => e.runId !== activeRunId)
+        .map((e) => `${e.agent}(runId=${e.runId ?? "unknown"})`);
     const missing = [];
     const waived = [];
     for (const agent of mandatory) {
-        const rows = forStage.filter((e) => e.agent === agent);
+        const rows = forRun.filter((e) => e.agent === agent);
         const ok = rows.some((e) => e.status === "completed" || e.status === "waived");
         if (!ok) {
             missing.push(agent);
@@ -89,6 +96,7 @@ export async function checkMandatoryDelegations(projectRoot, stage) {
     return {
         satisfied: missing.length === 0,
         missing,
-        waived
+        waived,
+        staleIgnored
     };
 }

package/dist/doctor.js CHANGED Viewed

@@ -13,7 +13,7 @@ import { policyChecks } from "./policy.js";
 import { readFlowState } from "./runs.js";
 import { checkMandatoryDelegations } from "./delegation.js";
 import { buildTraceMatrix } from "./trace-matrix.js";
-import { reconcileAndWriteCurrentStageGateCatalog, verifyCurrentStageGateEvidence } from "./gate-evidence.js";
+import { reconcileAndWriteCurrentStageGateCatalog, verifyCompletedStagesGateClosure, verifyCurrentStageGateEvidence } from "./gate-evidence.js";
 import { stageSkillFolder } from "./content/skills.js";
 import { UTILITY_SKILL_FOLDERS } from "./content/utility-skills.js";
 import { CONTEXT_MODES, DEFAULT_CONTEXT_MODE } from "./content/contexts.js";
@@ -768,11 +768,37 @@ export async function doctorChecks(projectRoot, options = {}) {
             ? `warning: waived mandatory delegations for stage "${flowState.currentStage}": ${delegation.waived.join(", ")}`
             : "no waived mandatory delegations for current stage"
     });
+    checks.push({
+        name: "warning:delegation:stale_runs",
+        ok: true,
+        details: delegation.staleIgnored.length > 0
+            ? `warning: ${delegation.staleIgnored.length} delegation entries from other runs were ignored: ${delegation.staleIgnored.join(", ")}`
+            : "no stale delegation entries from prior runs"
+    });
     const trace = await buildTraceMatrix(projectRoot);
+    const artifactsDir = path.join(projectRoot, RUNTIME_ROOT, "artifacts");
+    const specExists = await exists(path.join(artifactsDir, "04-spec.md"));
+    const planExists = await exists(path.join(artifactsDir, "05-plan.md"));
+    const tddExists = await exists(path.join(artifactsDir, "06-tdd.md"));
     const traceHasSignal = trace.entries.length > 0 ||
         trace.orphanedCriteria.length > 0 ||
         trace.orphanedTasks.length > 0 ||
         trace.orphanedTests.length > 0;
+    const artifactsPresent = specExists || planExists || tddExists;
+    const emptyMatrixWithArtifacts = !traceHasSignal && artifactsPresent;
+    checks.push({
+        name: "trace:matrix_populated",
+        ok: !emptyMatrixWithArtifacts,
+        details: emptyMatrixWithArtifacts
+            ? `trace matrix is empty but artifacts exist (${[
+                specExists ? "04-spec.md" : null,
+                planExists ? "05-plan.md" : null,
+                tddExists ? "06-tdd.md" : null
+            ].filter(Boolean).join(", ")}). The extractors found no criterion/task/slice IDs — check heading conventions and ID formats.`
+            : artifactsPresent
+                ? `trace matrix parsed ${trace.entries.length} criterion(s) from present artifacts`
+                : "no downstream artifacts to trace yet"
+    });
     checks.push({
         name: "trace:criteria_coverage",
         ok: !traceHasSignal || trace.orphanedCriteria.length === 0,
@@ -802,6 +828,16 @@ export async function doctorChecks(projectRoot, options = {}) {
             ? `stage "${gateEvidence.stage}" gate evidence is consistent (required=${gateEvidence.requiredCount}, passed=${gateEvidence.passedCount}, blocked=${gateEvidence.blockedCount})`
             : gateEvidence.issues.join(" ")
     });
+    const completedClosure = verifyCompletedStagesGateClosure(flowState);
+    checks.push({
+        name: "gates:closure:completed_stages",
+        ok: completedClosure.ok,
+        details: completedClosure.ok
+            ? flowState.completedStages.length === 0
+                ? "no completed stages yet"
+                : `all ${flowState.completedStages.length} completed stages have every required gate passed`
+            : completedClosure.issues.join(" ")
+    });
     // Self-improvement block in stage skills
     for (const stage of COMMAND_FILE_ORDER) {
         const skillPath = path.join(projectRoot, RUNTIME_ROOT, "skills", stageSkillFolder(stage), "SKILL.md");

package/dist/gate-evidence.d.ts CHANGED Viewed

@@ -7,8 +7,22 @@ export interface GateEvidenceCheckResult {
     requiredCount: number;
     passedCount: number;
     blockedCount: number;
+    /** True only when every required gate for the stage is in `passed` and none are `blocked`. */
+    complete: boolean;
+    /** Required gate ids that are neither passed nor blocked. */
+    missingRequired: string[];
+}
+export interface CompletedStagesClosureResult {
+    ok: boolean;
+    issues: string[];
+    openStages: Array<{
+        stage: FlowStage;
+        missingRequired: string[];
+        blocked: string[];
+    }>;
 }
 export declare function verifyCurrentStageGateEvidence(projectRoot: string, flowState: FlowState): Promise<GateEvidenceCheckResult>;
+export declare function verifyCompletedStagesGateClosure(flowState: FlowState): CompletedStagesClosureResult;
 export interface GateReconciliationResult {
     stage: FlowStage;
     changed: boolean;

package/dist/gate-evidence.js CHANGED Viewed

@@ -1,6 +1,29 @@
-import { lintArtifact, validateReviewArmy } from "./artifact-linter.js";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { checkReviewVerdictConsistency, lintArtifact, validateReviewArmy } from "./artifact-linter.js";
+import { RUNTIME_ROOT } from "./constants.js";
 import { stageSchema } from "./content/stage-schema.js";
+import { exists } from "./fs-utils.js";
 import { readFlowState, writeFlowState } from "./runs.js";
+async function currentStageArtifactExists(projectRoot, stage) {
+    const artifactFile = stageSchema(stage).artifactFile;
+    const candidates = [
+        path.join(projectRoot, RUNTIME_ROOT, "artifacts", artifactFile),
+        path.join(projectRoot, artifactFile)
+    ];
+    for (const candidate of candidates) {
+        if (await exists(candidate))
+            return true;
+    }
+    // Artifact-linter also accepts the file under current working directory fallback; stat once more.
+    try {
+        await fs.access(path.join(projectRoot, artifactFile));
+        return true;
+    }
+    catch {
+        return false;
+    }
+}
 function unique(values) {
     return [...new Set(values)];
 }
@@ -44,7 +67,8 @@ export async function verifyCurrentStageGateEvidence(projectRoot, flowState) {
             issues.push(`blocked gate "${gateId}" is not defined for stage "${stage}".`);
         }
     }
-    const shouldValidateArtifact = catalog.passed.length > 0 || flowState.completedStages.includes(stage);
+    const artifactPresent = await currentStageArtifactExists(projectRoot, stage);
+    const shouldValidateArtifact = artifactPresent || catalog.passed.length > 0 || flowState.completedStages.includes(stage);
     if (shouldValidateArtifact) {
         const lint = await lintArtifact(projectRoot, stage);
         if (!lint.passed) {
@@ -60,6 +84,21 @@ export async function verifyCurrentStageGateEvidence(projectRoot, flowState) {
             if (!reviewArmy.valid) {
                 issues.push(`review-army validation failed: ${reviewArmy.errors.join("; ")}`);
             }
+            const verdictConsistency = await checkReviewVerdictConsistency(projectRoot);
+            if (!verdictConsistency.ok) {
+                issues.push(`review verdict inconsistency: ${verdictConsistency.errors.join("; ")}`);
+            }
+        }
+    }
+    const passedSet = new Set(catalog.passed);
+    const missingRequired = required.filter((gateId) => !passedSet.has(gateId));
+    const complete = missingRequired.length === 0 && catalog.blocked.length === 0;
+    if (flowState.completedStages.includes(stage) && !complete) {
+        if (missingRequired.length > 0) {
+            issues.push(`stage "${stage}" is marked completed but required gates are not passed: ${missingRequired.join(", ")}.`);
+        }
+        if (catalog.blocked.length > 0) {
+            issues.push(`stage "${stage}" is marked completed but has blocked gates: ${catalog.blocked.join(", ")}.`);
         }
     }
     return {
@@ -68,9 +107,32 @@ export async function verifyCurrentStageGateEvidence(projectRoot, flowState) {
         issues,
         requiredCount: required.length,
         passedCount: catalog.passed.length,
-        blockedCount: catalog.blocked.length
+        blockedCount: catalog.blocked.length,
+        complete,
+        missingRequired
     };
 }
+export function verifyCompletedStagesGateClosure(flowState) {
+    const issues = [];
+    const openStages = [];
+    for (const stage of flowState.completedStages) {
+        const schema = stageSchema(stage);
+        const catalog = flowState.stageGateCatalog[stage];
+        const required = schema.requiredGates.map((gate) => gate.id);
+        const passedSet = new Set(catalog.passed);
+        const missingRequired = required.filter((gateId) => !passedSet.has(gateId));
+        if (missingRequired.length > 0 || catalog.blocked.length > 0) {
+            openStages.push({ stage, missingRequired, blocked: [...catalog.blocked] });
+            if (missingRequired.length > 0) {
+                issues.push(`completed stage "${stage}" has unpassed required gates: ${missingRequired.join(", ")}.`);
+            }
+            if (catalog.blocked.length > 0) {
+                issues.push(`completed stage "${stage}" still has blocked gates: ${catalog.blocked.join(", ")}.`);
+            }
+        }
+    }
+    return { ok: openStages.length === 0, issues, openStages };
+}
 export function reconcileCurrentStageGateCatalog(flowState) {
     const stage = flowState.currentStage;
     const required = stageSchema(stage).requiredGates.map((gate) => gate.id);

package/dist/runs.d.ts CHANGED Viewed

@@ -1,5 +1,17 @@
 import { type FlowState } from "./flow-state.js";
 import type { FlowStage } from "./types.js";
+export declare class InvalidStageTransitionError extends Error {
+    readonly from: FlowStage;
+    readonly to: FlowStage;
+    constructor(from: FlowStage, to: FlowStage, message: string);
+}
+export interface WriteFlowStateOptions {
+    /**
+     * When true, skip prior-state validation. Used for run archival, initial
+     * bootstrap, or explicit recovery; never set from normal stage handlers.
+     */
+    allowReset?: boolean;
+}
 export interface CclawRunMeta {
     id: string;
     title: string;
@@ -32,7 +44,7 @@ export declare class CorruptFlowStateError extends Error {
     constructor(statePath: string, quarantinedPath: string, cause: unknown);
 }
 export declare function readFlowState(projectRoot: string): Promise<FlowState>;
-export declare function writeFlowState(projectRoot: string, state: FlowState): Promise<void>;
+export declare function writeFlowState(projectRoot: string, state: FlowState, options?: WriteFlowStateOptions): Promise<void>;
 export declare function ensureRunSystem(projectRoot: string, _options?: EnsureRunSystemOptions): Promise<FlowState>;
 export declare function listRuns(projectRoot: string): Promise<CclawRunMeta[]>;
 export declare function archiveRun(projectRoot: string, featureName?: string): Promise<ArchiveRunResult>;

package/dist/runs.js CHANGED Viewed

@@ -1,8 +1,35 @@
 import fs from "node:fs/promises";
 import path from "node:path";
 import { COMMAND_FILE_ORDER, RUNTIME_ROOT } from "./constants.js";
-import { createInitialFlowState } from "./flow-state.js";
+import { canTransition, createInitialFlowState } from "./flow-state.js";
 import { ensureDir, exists, withDirectoryLock, writeFileSafe } from "./fs-utils.js";
+export class InvalidStageTransitionError extends Error {
+    from;
+    to;
+    constructor(from, to, message) {
+        super(message);
+        this.from = from;
+        this.to = to;
+        this.name = "InvalidStageTransitionError";
+    }
+}
+function validateFlowTransition(prev, next) {
+    if (prev.activeRunId !== next.activeRunId) {
+        // New run — only reset paths may change the runId, but those set allowReset.
+        throw new InvalidStageTransitionError(prev.currentStage, next.currentStage, `cannot change activeRunId from "${prev.activeRunId}" to "${next.activeRunId}" without allowReset.`);
+    }
+    for (const completed of prev.completedStages) {
+        if (!next.completedStages.includes(completed)) {
+            throw new InvalidStageTransitionError(prev.currentStage, next.currentStage, `completedStages must be monotonic: stage "${completed}" was previously completed but is missing from the new state.`);
+        }
+    }
+    if (prev.currentStage === next.currentStage) {
+        return;
+    }
+    if (!canTransition(prev.currentStage, next.currentStage)) {
+        throw new InvalidStageTransitionError(prev.currentStage, next.currentStage, `no transition rule allows "${prev.currentStage}" -> "${next.currentStage}". Use /cc-next to advance stages or archive the run to reset.`);
+    }
+}
 const FLOW_STATE_REL_PATH = `${RUNTIME_ROOT}/state/flow-state.json`;
 const RUNS_DIR_REL_PATH = `${RUNTIME_ROOT}/runs`;
 const ACTIVE_ARTIFACTS_REL_PATH = `${RUNTIME_ROOT}/artifacts`;
@@ -251,10 +278,28 @@ export async function readFlowState(projectRoot) {
     }
     return coerceFlowState(parsed);
 }
-export async function writeFlowState(projectRoot, state) {
+export async function writeFlowState(projectRoot, state, options = {}) {
     await withDirectoryLock(flowStateLockPath(projectRoot), async () => {
+        const statePath = flowStatePath(projectRoot);
+        if (!options.allowReset && (await exists(statePath))) {
+            try {
+                const raw = await fs.readFile(statePath, "utf8");
+                const parsed = JSON.parse(raw);
+                if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
+                    const prev = coerceFlowState(parsed);
+                    validateFlowTransition(prev, state);
+                }
+            }
+            catch (err) {
+                if (err instanceof InvalidStageTransitionError) {
+                    throw err;
+                }
+                // A corrupt prior file is surfaced by readFlowState elsewhere; don't
+                // block a legitimate write attempt on parse errors here.
+            }
+        }
         const safe = coerceFlowState({ ...state });
-        await writeFileSafe(flowStatePath(projectRoot), `${JSON.stringify(safe, null, 2)}\n`);
+        await writeFileSafe(statePath, `${JSON.stringify(safe, null, 2)}\n`);
     });
 }
 export async function ensureRunSystem(projectRoot, _options = {}) {
@@ -263,7 +308,7 @@ export async function ensureRunSystem(projectRoot, _options = {}) {
     const statePath = flowStatePath(projectRoot);
     const state = await readFlowState(projectRoot);
     if (!(await exists(statePath))) {
-        await writeFlowState(projectRoot, state);
+        await writeFlowState(projectRoot, state, { allowReset: true });
     }
     return state;
 }
@@ -315,7 +360,7 @@ export async function archiveRun(projectRoot, featureName) {
     const archiveStatePath = path.join(archivePath, "state");
     const snapshottedStateFiles = await snapshotStateDirectory(projectRoot, archiveStatePath);
     const resetState = createInitialFlowState();
-    await writeFlowState(projectRoot, resetState);
+    await writeFlowState(projectRoot, resetState, { allowReset: true });
     const archivedAt = new Date().toISOString();
     const manifest = {
         version: 1,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cclaw-cli",
-  "version": "0.5.16",
+  "version": "0.5.17",
   "description": "Installer-first flow toolkit for coding agents",
   "type": "module",
   "bin": {