npm - mustard-claude - Versions diffs - 3.1.30 → 3.1.32 - Mend

mustard-claude 3.1.30 → 3.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/package.json +1 -1
package/templates/commands/mustard/approve/SKILL.md +20 -4
package/templates/commands/mustard/bugfix/SKILL.md +39 -3
package/templates/commands/mustard/feature/SKILL.md +97 -0
package/templates/commands/mustard/metrics/SKILL.md +12 -8
package/templates/commands/mustard/resume/SKILL.md +81 -4
package/templates/commands/mustard/stats/SKILL.md +9 -16
package/templates/scripts/_rtk-gain.js +50 -0
package/templates/scripts/metrics-collect.js +377 -186
package/templates/scripts/metrics-report.js +33 -54
package/templates/scripts/scope-decompose.js +105 -0
package/templates/scripts/wave-dependency.js +239 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mustard-claude",
-  "version": "3.1.30",
+  "version": "3.1.32",
   "description": "Framework-agnostic CLI for Claude Code project setup",
   "type": "module",
   "bin": {

package/templates/commands/mustard/approve/SKILL.md CHANGED Viewed

@@ -23,15 +23,31 @@ Approves the active spec and prepares the implementation phase.
    - Do NOT proceed to step 2 without running this command
 2. **Read** `.claude/pipeline-config.md` — agents, model selection
 3. Locate active spec in `.claude/spec/active/`
+### Step 3b: Wave Plan Detection
+Check if the located spec is a wave plan: look for `.claude/spec/active/{specName}/wave-plan.md`.
+**If `wave-plan.md` exists:**
+1. Read `.claude/.pipeline-states/{specName}.json` — expect `isWavePlan: true`, `totalWaves: N`, `currentWave: 1`, `completedWaves: []`.
+2. Read `wave-plan.md` and print its ENTIRE contents verbatim inside a fenced markdown block (```` ```markdown ... ``` ````). List each wave spec file path below the block (one line each).
+3. `AskUserQuestion`:
+   - **"Approve wave plan — start with wave 1"** → proceed to step 4 (update header + state for wave 1 dispatch)
+   - **"Reject decomposition — use single spec"** → merge all wave specs back into a single spec at `.claude/spec/active/{specName}/spec.md` (concatenate `## Files`, `## Tasks`, `## Boundaries` from each wave), delete `wave-plan.md` and `wave-N-*/` subdirectories, set `scopeOverride: "user-rejected-waves"` and `isWavePlan: false` in pipeline state, proceed to step 4 on the single spec
+   - **"Stop — re-plan with guidance"** → stop. Instruct user: `Delete .claude/spec/active/{specName}/ and re-run /feature {name} with explicit guidance (e.g., "keep wave 2 and wave 3 together").`
+4. If user approved wave plan, for step 4 and onward, operate on the **wave 1 spec** (`.claude/spec/active/{specName}/wave-1-{role}/spec.md`) — update its header, not the wave-plan.md header.
+**If `wave-plan.md` does NOT exist:** proceed as a single spec (original behavior below).
 4. **Spec Checkpoint — update spec header:**
    - `### Status: approved`
    - `### Phase: PLAN`
    - `### Checkpoint: {ISO timestamp now}`
-5. **Pipeline State — create `.claude/.pipeline-states/{spec-name}.json`:**
+5. **Pipeline State — create or update `.claude/.pipeline-states/{spec-name}.json`:**
    - Extract `spec-name` from the spec directory (e.g. basename of path → `2026-02-26-linked-services-card`)
-   - Parse Tasks from spec to extract tasks per agent (DB, Backend, Frontend, etc.)
-   - Create `.claude/.pipeline-states/` directory if it doesn't exist
-   - Write state file with `specName`, `status: "approved"`, `phaseName: "PLAN"`, `tasks` with names and agents, `model`, `updatedAt`
+   - **If wave plan (from Step 3b):** state already exists. Update fields: `status: "approved"`, `currentWave: 1`, `updatedAt`. Parse tasks from **wave-1** spec only (not all waves). Preserve `isWavePlan`, `totalWaves`, `completedWaves`, `failedWaves`.
+   - **If single spec:** Parse Tasks from spec to extract tasks per agent (DB, Backend, Frontend, etc.). Create `.claude/.pipeline-states/` directory if it doesn't exist. Write state file with `specName`, `status: "approved"`, `phaseName: "PLAN"`, `tasks` with names and agents, `model`, `updatedAt`.
 5b. **Memory Persist — record architectural decisions:**
    - For each significant decision in the spec (technology choices, design patterns, trade-offs):
      ```bash

package/templates/commands/mustard/bugfix/SKILL.md CHANGED Viewed

@@ -58,6 +58,32 @@ If the diff file is empty or missing, skip the Git State header entirely. Never
    - Trace callers/callees via Grep in relevant directories (prefer Grep over Read)
    - Return as soon as root cause is clear — don't exhaustively scan
    - Return: root cause file(s), line(s), explanation
+2b. **Cache root-cause for retry reuse:**
+After DIAGNOSE returns, compute a cache signature so fix-loop retries can skip re-DIAGNOSE when the affected surface hasn't changed:
+```javascript
+// in-memory during bugfix session (also persisted to pipeline-state for Full Path)
+const affectedFiles = [...root-cause file(s) from Explore return, sorted];
+const bugDescription = {user's error description, canonical — trimmed and lowercased};
+const rootCauseHash = sha256(bugDescription + '|' + affectedFiles.join(','));
+const rootCauseSummary = {1-line root cause from Explore, ≤500 chars};
+const affectedFilesHash = sha256(concatenated contents of affectedFiles right now);
+```
+Write to pipeline-state if Full Path (`.claude/.pipeline-states/{specName}.json`):
+```json
+{
+  "rootCauseHash": "sha256...",
+  "rootCauseSummary": "...",
+  "affectedFilesHash": "sha256...",
+  "affectedFiles": ["path/a.ts", "path/b.ts"],
+  "cachedAt": "{ISO}"
+}
+```
+For Fast Path (no spec yet), keep the cache in-memory only — it lives for the duration of the bugfix session, which is sufficient for the retry loop.
 3. **ASSESS — Decision point:**
    - Explore returns clear root cause in 1-2 files → **Fast Path** (skip PLAN)
    - 3+ files, unclear impact, cross-layer → **Full Path** (brief spec via PLAN)
@@ -125,9 +151,19 @@ Before retrying a failed fix attempt, classify the failure:
 1. **Transient?** — Would re-running succeed without any change? (flaky test, cache, env) → Retry once immediately.
 2. **Resolvable?** — Is the fix clear and patchable in ≤3 lines without new reads? → Apply patch, retry (counts as retry 1).
-3. **Structural?** — Did the original ANALYZE misidentify the root cause? → Re-analyze: dispatch a focused Explore on the actual failure point, update root cause, re-dispatch bugfix agent. Does NOT count against the 2-retry cap.
-Max 2 retries for Transient + Resolvable. Structural failures trigger a targeted re-ANALYZE, not a blind retry.
+3. **Structural?** — Did the original ANALYZE misidentify the root cause? → **Before re-Exploring, consult the root-cause cache from Step 2b:**
+   - Recompute `affectedFilesHash` for the cached `affectedFiles`.
+   - **Cache hit (hash matches) AND failure signal does NOT suggest a different cause** (no keyword in the failure pointing to files outside `affectedFiles`, no REVIEW rationale explicitly naming a different root) → skip re-Explore, inject `rootCauseSummary` verbatim into the retry prompt. Log: `root-cause cached (retry {N}/2), skipping diagnose`.
+   - **Cache miss (files changed) OR failure rationale points elsewhere** → invalidate cache, run targeted Explore on the actual failure point, update root cause (including new cache entry via Step 2b), re-dispatch bugfix agent.
+   - Re-ANALYZE (with or without cache) does NOT count against the 2-retry cap.
+Max 2 retries for Transient + Resolvable. Structural failures trigger a targeted re-ANALYZE (cache-gated), not a blind retry.
+**Cache invalidation signals:**
+- Affected files changed on disk → hash mismatch invalidates
+- Review/build failure rationale mentions files outside `affectedFiles` → invalidate
+- User explicitly overrides (rare) → invalidate
+- After 2 retries exhausted, the cache is naturally flushed when the pipeline aborts or advances
 ### CLOSE

package/templates/commands/mustard/feature/SKILL.md CHANGED Viewed

@@ -121,6 +121,101 @@ Continue to PLAN regardless.
 ### PLAN Phase
+#### Wave Decomposition Pre-Check (Full scope only)
+**Skip for Light/Extended Light** — decomposition only makes sense when scope is genuinely large.
+Before writing the single spec in Full scope, check whether the work should be decomposed into waves:
+1. **Compute signals from ANALYZE output:**
+   - `fileCount` — files that will go into `## Files`
+   - `layerCount` — distinct layers (use role detection derived from paths: schema/api/ui/lib)
+   - `newEntityCount` — new entities created by this spec
+   - `estimatedTouchPoints` — count of imports/refs from Grep on affected directories (optional)
+2. **Read knowledge matches:** Read `.claude/knowledge.json` (if it exists). Extract entries whose `id` starts with `heavy-pipeline` or `high-hook-retry`. Each entry's scope signals represent a historical pipeline that cost a lot.
+3. **Run decomposition decision:**
+   ```bash
+   echo '{"fileCount":{N},"layerCount":{L},"newEntityCount":{E},"knowledgeMatches":[...]}' | node .claude/scripts/scope-decompose.js
+   ```
+   Output JSON: `{decompose: bool, reason: string, signals: {...}}`
+4. **If `decompose: false`** → proceed to `#### Full Scope` below as usual (single spec).
+5. **If `decompose: true`** → build wave plan:
+   ```bash
+   echo '{"files":[...all paths from ANALYZE...],"projectRoot":"."}' | node .claude/scripts/wave-dependency.js
+   ```
+   Output cases:
+   - `{error: "cyclic-dependency", cycle: [...]}` → warn user about cyclic imports (pre-existing architecture issue), fall back to single spec with note in `## Concerns`. Proceed to `#### Full Scope`.
+   - `{error: ...}` → fail-open: fall back to single spec.
+   - `{waves: [...]}` with only 1 wave → no real DAG depth, fall back to single spec.
+   - `{waves: [...]}` with 2+ waves → write **Wave Plan** (step 6).
+6. **Write Wave Plan structure:**
+   ```
+   .claude/spec/active/{date}-{name}/
+     ├── wave-plan.md
+     ├── wave-1-{role}/spec.md
+     ├── wave-2-{role}/spec.md
+     └── wave-N-{role}/spec.md
+   ```
+   `wave-plan.md` contains:
+   ```markdown
+   # Wave Plan: {name}
+   ### Status: draft | Phase: PLAN | Scope: full | Decomposed: yes
+   ### Checkpoint: {ISO now}
+   ### Reason: {decompose.reason}
+   ## Summary
+   {1-2 lines: what + why}
+   ## Waves
+   ### Wave 1 — {roles of wave 1}
+   Depends on: none
+   Files ({count}): {file1}, {file2}, ...
+   ### Wave 2 — {roles of wave 2}
+   Depends on: wave 1
+   Files ({count}): {file3}, ...
+   {... for each wave ...}
+   ## Rationale
+   {which knowledge entry matched or which threshold triggered; signals from scope-decompose}
+   ```
+   Each `wave-N-{role}/spec.md` is a **complete atomic spec** scoped to just that wave's files. Use the same template as Full scope single spec (Summary, Entity Info, Files, Tasks, Dependencies, Boundaries). Reference `../wave-plan.md` at the top as context.
+7. **Write pipeline state for wave plan:**
+   ```json
+   {
+     "specName": "{date}-{name}",
+     "status": "draft",
+     "phase": 2,
+     "phaseName": "PLAN",
+     "scope": "full",
+     "isWavePlan": true,
+     "currentWave": 1,
+     "totalWaves": N,
+     "completedWaves": [],
+     "failedWaves": []
+   }
+   ```
+8. **Present wave plan to user:**
+   - Read `wave-plan.md` and print its ENTIRE contents verbatim inside a fenced markdown block.
+   - Also list each wave's spec file paths (one line each) so the user can open individual wave specs if desired.
+   - Then `AskUserQuestion`:
+     - **"Approve wave plan and implement now"** → goes to EXECUTE wave 1 inline (same rules as Light inline)
+     - **"Approve wave plan for later"** → stop, user runs `/approve` + `/resume`
+     - **"Edit decomposition (hint PLAN)"** → user provides hint (e.g., "merge waves 2 and 3"), PLAN reexecutes with the hint appended to `estimatedTouchPoints`/manual grouping. Re-decompose once.
+     - **"Reject decomposition — use single spec"** → discard wave plan files, set `scopeOverride: "user-rejected-waves"` in pipeline state, proceed to `#### Full Scope` as if `decompose: false`.
+9. **If user approves the wave plan**, the single-spec `#### Full Scope` flow below is **skipped** — wave-1 becomes the first thing to execute (via `/approve --resume` or `/resume`).
 #### Full Scope
 1. Create `.claude/spec/active/{date}-{name}/spec.md` with:
@@ -300,5 +395,7 @@ Scope tag: `[LIGHT]` or `[FULL]` after progress line.
 - ALWAYS go straight to PLAN once you understand the change — more reads ≠ better spec
 - Light scope inline implement follows same dispatch rules as `/resume` (template, waves, retries)
 - Context budget: Grep entity-registry (not full read), Grep recipes (not full read), line-by-line checkbox updates
+- Wave decomposition is opt-in via signals (knowledge matches, layer/file/entity counts) — never force waves on small scopes
+- If wave decomposition is approved, single-spec Full Scope flow is skipped — waves execute sequentially via `/resume`
 ULTRATHINK

package/templates/commands/mustard/metrics/SKILL.md CHANGED Viewed

@@ -1,29 +1,32 @@
 ---
 name: mustard:metrics
-description: Show enforcement metrics report — hook hit rates, budget distributions, gate activity. Metrics are recorded automatically; just run this to see them.
+description: Focused view of enforcement hook events and compare-window deltas. For the superset (pipelines + hooks + RTK), use /mustard:stats.
 ---
-# /mustard:metrics - Show Metrics Report
+# /mustard:metrics - Hook Events & Compare
 ## Trigger
-`/mustard:metrics [--since <ISO date>] [--event <type>]`
+`/mustard:metrics [--since <ISO date>] [--event <type>] [--compare <from> <to>]`
 ## What it does
-Runs `.claude/scripts/metrics-report.js` and shows the aggregated report.
+Focused on two use cases:
-Metrics are recorded **automatically** by enforcement hooks on every Task dispatch — no activation needed. Just run this command whenever you want to see the current state.
+1. **Hook-level aggregation** (default) — runs `.claude/scripts/metrics-report.js` and emits a table of events from `.claude/.metrics/*.jsonl`, plus RTK token savings.
+2. **Compare window** (`--compare`) — delta between two git tags or ISO dates (reference window computed automatically from the delta).
+For the superset view that also includes per-pipeline metrics, orphans, Pass@1 and Last 7 Days, use **`/mustard:stats`** (cross-reference).
 ## Action
 1. Run `rtk node .claude/scripts/metrics-report.js $ARGS` (pass through any flags)
 2. Display output verbatim
-## Optional flags
+## Flags
 - `--since <ISO date>` — filter events after this date
 - `--event <type>` — filter to one event type (e.g. `budget-check`)
-- `--compare <from> <to>` — delta between two windows (git tag or ISO date)
+- `--compare <from> <to>` — delta between two windows (git tag `vX.Y.Z` or ISO date)
 ## Examples
-- `/mustard:metrics` — full report since beginning
+- `/mustard:metrics` — hook event aggregation since beginning
 - `/mustard:metrics --since 2026-04-09` — only recent events
 - `/mustard:metrics --event budget-check` — only budget-check events
 - `/mustard:metrics --compare v3.1.21 v3.1.22` — delta between two releases
@@ -34,3 +37,4 @@ Metrics are recorded **automatically** by enforcement hooks on every Task dispat
 - Logs auto-rotate at 10MB
 - To reset: delete files in `.claude/.metrics/` manually
 - Advanced: override mode via `CONTEXT_BUDGET_MODE` env var (`strict`|`warn`|`observe`). Default is `strict`.
+- `rtk-rewrite` events deliberately show only counts (no `tokens_saved` column) — real RTK numbers come from `rtk gain`, surfaced in the "RTK Token Savings" block.

package/templates/commands/mustard/resume/SKILL.md CHANGED Viewed

@@ -29,7 +29,28 @@ Before the normal detect-and-confirm flow, scan the newest pipeline state for a
      3. After the re-dispatch returns, clear the flag: remove `lastDispatchFailure` from the state object and rewrite the pipeline-state JSON.
      4. Fall through to Step 1 (normal resume flow continues from the updated state).
    - **If ageMs > 10 * 60 * 1000** (stale): silently remove `lastDispatchFailure` from the state and rewrite the file, then continue to Step 1.
-4. If `lastDispatchFailure` is absent, skip Step 0 entirely and proceed to Step 1.
+4. If `lastDispatchFailure` is absent, skip Step 0 entirely and proceed to Step 0.5.
+### Step 0.5: Resume Mode (continuar vs reanalisar)
+Before loading heavy context (sync-registry, diff-context, Explore Gate), ask the user which mode to use. This gates roughly 2-5k tokens per resume.
+1. **Skip conditions** — enter `reanalyze` mode automatically without prompting:
+   - Step 0 just re-dispatched a failed agent (recovery path → always reanalyze next step)
+   - `pipeline-state.lastDispatchFailure` was present and <10min old (already handled in Step 0)
+   - Wave plan with `failedWaves.length > 0` (handled in wave failure section below — forces `reanalyze`)
+2. **Otherwise, AskUserQuestion:**
+   - **"Continuar de onde parou (modo leve)"** → `mode = "continued"`: skip sync-registry (Step 2 #6), skip diff-context (unless wave transition forces), skip Pre-EXECUTE Existence Gate (Step 12b). Trust pipeline-state as source of truth.
+   - **"Reanalisar contexto (modo completo)"** → `mode = "reanalyzed"`: run Step 2 fully (default behavior, relê tudo).
+3. **Record mode in pipeline state:** write `resumeMode: "continued" | "reanalyzed"` and `resumeModeAt: {ISO now}` so downstream steps know which path they are in.
+4. **Stale-context fallback (safety net):** if a dispatched agent in `continued` mode returns an error indicating stale context (e.g., references a missing file, fails boundary check, or returns `BLOCKED` with reason citing out-of-date registry), escalate automatically:
+   - Update pipeline state: `resumeMode: "escalated-to-reanalyze"`, append to `resumeEscalations` array with `{at, reason}`
+   - Re-run Step 2 in full (sync-registry + diff-context)
+   - Re-dispatch the failed agent with fresh context
+   - Fail-open: escalation never blocks, just upgrades to the heavier path
 ### Step 1: Detect & Confirm
@@ -83,10 +104,14 @@ Before the normal detect-and-confirm flow, scan the newest pipeline state for a
 ### Step 2: Bootstrap (after confirmation)
 6. **AUTO-SYNC:** `node .claude/scripts/sync-registry.js`
+   - **Skip if `resumeMode === "continued"`** (Step 0.5): registry is reused from prior session.
+   - Always run if `resumeMode === "reanalyzed"` or `"escalated-to-reanalyze"`.
 ### Diff Context (automatic)
 Run `node .claude/scripts/diff-context.js --subproject {subproject_path}` per subproject to capture the current git state scoped to each subproject. Include the subproject-specific output in the agent prompt as `{diff_context}` so agents see only changes relevant to their scope.
+**Skip if `resumeMode === "continued"`** unless a wave just completed (wave transitions always refresh diff). The prior diff snapshot is reused from `.claude/.pipeline-states/{specName}.diff-{subproject}.md`.
 7. **Read** `.claude/pipeline-config.md`. For `entity-registry.json`: use Grep to extract ONLY the relevant entity block (e.g. `"Contract":`), NEVER read the full JSON
 9. **Update spec header:** `Status: implementing`, `Phase: EXECUTE`, `Checkpoint: {ISO now}`
 10. **Update/create pipeline state:** `status: "implementing"`, `phaseName: "EXECUTE"`, `specName`
@@ -99,9 +124,26 @@ Run `node .claude/scripts/diff-context.js --subproject {subproject_path}` per su
 12. **Match recipe by name only:** Grep `{subproject}/.claude/commands/recipes.md` for recipe title matching the task type — do NOT read the full recipes file. Extract only: recipe number, pattern refs, reference modules
 12b. **Pre-EXECUTE Existence Gate**: Same gate as `feature/SKILL.md § Pre-EXECUTE Existence Gate`. Invoke identically (Full scope only, `## Files` ≤ 8). On retry/resume, the gate naturally handles idempotence: tasks already `[x]` from a prior run are treated as Mixed — the Haiku confirms they stay done and the orchestrator only re-dispatches what remains `[ ]`.
+   **Skip entirely if `resumeMode === "continued"`** (Step 0.5). The `continued` mode trusts pipeline-state checkboxes as-is. If the stale-context fallback escalates to `reanalyze`, the gate runs on the re-dispatch.
     **Pre-check (same as `feature/SKILL.md § Pre-EXECUTE Existence Gate`):** Before dispatching Haiku, run `rtk git diff --stat HEAD -- <files listed in spec's ## Files>`. Skip gate entirely if output is empty (no changes) or total insertions/deletions <10. Only proceed with Haiku dispatch if ≥10 lines changed.
+12c. **Wave Plan Scope (conditional — only if `pipeline-state.isWavePlan === true`):**
+When the pipeline state indicates a wave plan, the orchestrator dispatches only the **current wave**, not the full spec:
+1. Read `pipeline-state.currentWave` and `pipeline-state.totalWaves`.
+2. The spec to work from for this invocation is `.claude/spec/active/{specName}/wave-{currentWave}-*/spec.md`. Replace any prior reference to `spec.md` at the root of the spec dir with the current wave's spec.
+3. **Between waves** (see Step 17 post-dispatch):
+   - On wave completion: run `/mustard:git commit` style commit with message `feat(wave-{N}/{role}): {summary}`. If `/mustard:git commit` is not appropriate for the project, fall back to `git add {files} && git commit -m "..."`.
+   - Update state: `completedWaves.push(currentWave)`, `currentWave += 1`, `updatedAt`.
+   - Force `resumeMode = "reanalyzed"` for the next wave transition so diff-context refreshes with the just-committed changes.
+   - If `currentWave > totalWaves` → skip remaining wave dispatch, go to Step 19 REVIEW + Step 20 CLOSE on the overall wave plan.
+4. **If a wave fails (REJECTED after 2 fix-loops, or BLOCKED)** — see § Wave Failure Handling below.
 13. **Plan waves:** `Depends on: none` → Wave 1; dependencies → later. DB+Backend parallel. Frontend after Backend UNLESS all parallel override conditions met (see `.claude/pipeline-config.md` Parallel Rules). Review agents: ALWAYS dispatch in single parallel message. Skip completed tasks.
+**Note on wave plans:** when `isWavePlan === true`, this step plans the agent wave structure **within** the current wave's spec only — agents internal to the current wave-spec may still split across DB/Backend/Frontend sub-waves. The outer wave (1..N) is the cross-spec sequence managed by Step 12c.
 14. **Build agent prompts using template** (`.claude/commands/mustard/templates/agent-prompt/SKILL.md`):
     - Read template once, then fill placeholders per agent using `.claude/pipeline-config.md` data:
       - `{subproject}` → from Agents table (Subproject column)
@@ -183,11 +225,46 @@ When REVIEW returns REJECTED (any CRITICAL):
 8. If review still REJECTED after 2 fix-loops: STOP + report exhausted retries.
 20. **CLOSE:**
+    - **Wave plan gate:** if `pipeline-state.isWavePlan === true`, only CLOSE when `completedWaves.length === totalWaves`. If waves remain (`currentWave <= totalWaves` and wave N-1 just finished), **do not** run CLOSE — instead update state (`currentWave++`, `completedWaves.push`), output `═══ WAVE {N-1} COMPLETE — {role} ═══`, and stop. Next `/mustard:resume` picks up wave N.
     - `node .claude/scripts/sync-registry.js`
-    - Spec: `Status: completed`, `Phase: CLOSE`, all `[ ]` → `[x]`
-    - Move spec to `.claude/spec/completed/`
+    - Spec: `Status: completed`, `Phase: CLOSE`, all `[ ]` → `[x]`. For wave plans: mark `wave-plan.md` status `completed`, and mark each `wave-N-{role}/spec.md` completed too.
+    - Move spec to `.claude/spec/completed/` (the entire `{specName}/` directory, including wave subdirs if any)
     - **Delete** `.claude/.pipeline-states/{spec-name}.json`
-    - Output with agent colors: `═══ PIPELINE COMPLETE — {name} | Agents: {n} ok | Files: {c} created, {m} modified ═══`
+    - Output with agent colors: `═══ PIPELINE COMPLETE — {name} | Agents: {n} ok | Files: {c} created, {m} modified ═══` (for wave plans: append `| Waves: {totalWaves}`).
+### Wave Failure Handling
+Applies only when `pipeline-state.isWavePlan === true`.
+A wave is considered **failed** when:
+- REVIEW returns REJECTED after 2 fix-loops exhausted (see Step 19b), OR
+- An implementation agent returns `BLOCKED` and the user cannot resolve inline, OR
+- Build/type-check fails repeatedly (max 2 retries) after Granular Retry Protocol is exhausted.
+**On wave failure:**
+1. Update pipeline state:
+   - `failedWaves.push(currentWave)`
+   - `status = "failed"`
+   - `updatedAt = {ISO now}`
+2. Write failure log to `.claude/spec/active/{specName}/wave-{currentWave}-{role}/failure.md`:
+   ```markdown
+   # Wave {N} Failure — {role}
+   ## When: {ISO}
+   ## Phase: {EXECUTE | REVIEW | CLOSE}
+   ## Reason: {short cause — e.g., "REVIEW REJECTED after 2 fix-loops"}
+   ## Findings (verbatim)
+   {last review findings OR BLOCKED rationale OR build error}
+   ## Files touched
+   {list from agent memory}
+   ```
+3. **Do NOT** attempt further automatic recovery. Wave N-1 commits remain in place — they are real progress.
+4. **Prompt the user via AskUserQuestion:**
+   - **"Corrigir wave {N} manualmente e retomar"** → user fixes by hand; next `/mustard:resume` clears `failedWaves` entry and restarts wave N from EXECUTE.
+   - **"Reescrever wave {N} (re-PLAN dessa onda)"** → delete `wave-{N}-{role}/spec.md`, re-enter PLAN for wave N only (run PLAN sub-flow scoped to wave N's files). User then re-approves via `/mustard:approve` for wave N.
+   - **"Abortar pipeline"** → set `status: "aborted"`, move spec to `.claude/spec/aborted/{specName}/` (create dir if needed), keep waves 1..N-1 commits. Inform user: `Pipeline aborted. Waves 1..{N-1} commits preserved. Waves {N}..{totalWaves} discarded.`
+**Risco residual documentado:** wave N-1 commits podem estar incompletos semanticamente sem wave N (ex.: schema criado mas API não). O usuário foi avisado disso no `/approve` da wave plan. O log `failure.md` explicita qual superfície ficou exposta.
 ### Granular Retry Protocol

package/templates/commands/mustard/stats/SKILL.md CHANGED Viewed

@@ -2,7 +2,7 @@
 description: "Show pipeline metrics, token savings, and performance stats — use when user asks for stats, metrics, performance, or token usage"
 ---
 <!-- mustard:generated -->
-# /stats - Pipeline Metrics
+# /stats - Pipeline Metrics (superset view)
 ## Trigger
@@ -10,7 +10,7 @@ description: "Show pipeline metrics, token savings, and performance stats — us
 ## Description
-Displays pipeline metrics including duration, API calls, retries, Pass@1 success rate, tool breakdown, RTK token savings, gate saves, wave reentries, and skill hit rate per agent.
+Superset view of pipeline state + enforcement hooks + RTK token economy. This is the primary command; `/mustard:metrics` is a focused view for hook-only events and `--compare` windows.
 ## Action
@@ -18,21 +18,14 @@ Displays pipeline metrics including duration, API calls, retries, Pass@1 success
 2. Present the output to the user
 3. If no metrics found, inform user to run a pipeline first
-## Pass@1 Metrics
+## Sections emitted
-`metrics-collect.js` emits a `## Pass@1 Metrics` section at the end of completed-pipeline output:
-- **Pass@1**: percentage of pipelines completed without any retries (retries === 0)
-- **Avg retries**: mean retry count across all completed pipelines
-Example output:
-```
-## Pass@1 Metrics
-- Pass@1: 80% (4/5 completed without retries)
-- Avg retries per pipeline: 0.4
-```
-This section is omitted automatically when no completed pipelines exist yet.
+- **Summary** — 5–8 lines with ✓/⚠/→ prefixes (pipelines tracked, orphans, Pass@1, RTK savings, top alert)
+- **Active / Orphaned (per spec)** — duration, API calls, retries, top 3 tools, retries by phase, gate saves, wave reentries, skill hits, Pass@1 by agent (heuristic)
+- **Completed Pipelines** — archived runs from `.claude/metrics/`
+- **Last 7 Days** — events per day + current week vs prior week delta
+- **Enforcement Events (hooks)** — table of events from `.claude/.metrics/*.jsonl`
+- **RTK Token Economy** — totals from `rtk gain`
 ## When to Use

package/templates/scripts/_rtk-gain.js ADDED Viewed

@@ -0,0 +1,50 @@
+'use strict';
+/**
+ * Shared helper: normalize `rtk gain --all --format json` output.
+ *
+ * rtk emits { summary: { total_saved, avg_savings_pct, total_input,
+ * total_output, total_commands }, daily, weekly, monthly }. Different
+ * rtk versions (and earlier mustard scripts) assumed top-level
+ * `saved_tokens`/`total_saved` — neither is correct on current rtk.
+ * This helper is the single source of truth.
+ */
+const { execFileSync } = require('child_process');
+function getRtkGain(opts) {
+  const timeout = (opts && opts.timeout) || 3000;
+  let raw;
+  try {
+    raw = execFileSync('rtk', ['gain', '--all', '--format', 'json'], {
+      encoding: 'utf8',
+      timeout,
+      stdio: ['ignore', 'pipe', 'ignore'],
+      windowsHide: true,
+    });
+  } catch {
+    return null;
+  }
+  let data;
+  try {
+    data = JSON.parse(raw);
+  } catch {
+    return null;
+  }
+  const s = (data && data.summary) || data || {};
+  const saved = Number(s.total_saved ?? s.saved_tokens ?? s.savedTokens ?? 0) || 0;
+  const original = Number(s.total_input ?? s.total_original ?? 0) || 0;
+  const pct = Number(s.avg_savings_pct ?? s.savings_pct ?? s.savingsPct ?? 0) || 0;
+  const commands = Number(s.total_commands ?? s.commands ?? 0) || 0;
+  if (saved <= 0 && commands <= 0) return null;
+  return {
+    saved,
+    originalTotal: original,
+    pct,
+    commands,
+    byCommand: (data && data.by_command) || null,
+    daily: (data && Array.isArray(data.daily)) ? data.daily : [],
+    weekly: (data && Array.isArray(data.weekly)) ? data.weekly : [],
+  };
+}
+module.exports = { getRtkGain };