mustard-claude 3.1.10 → 3.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mustard-claude",
3
- "version": "3.1.10",
3
+ "version": "3.1.12",
4
4
  "description": "Framework-agnostic CLI for Claude Code project setup",
5
5
  "type": "module",
6
6
  "bin": {
@@ -55,7 +55,7 @@ node scripts/sync-registry.js --force
55
55
  - PreToolUse hooks use `permissionDecision` response format
56
56
  - PostToolUse hooks use `decision` response format
57
57
  - Every new hook must be registered in `settings.json` with a timeout
58
- - Task dispatch failures (API overload) are logged to `pipeline-state.lastDispatchFailure`; `/resume` auto-recovers within 10 min
58
+ - Task dispatch failures (API overload, HTTP 5xx, tool result missing) are logged to `pipeline-state.lastDispatchFailure`; `/resume` auto-recovers within 10 min
59
59
  - Generated files must start with `<!-- mustard:generated -->` header
60
60
  - Skills must have YAML frontmatter BEFORE the `<!-- mustard:generated -->` line
61
61
 
@@ -18,11 +18,14 @@
18
18
  | `review` | general-purpose | opus | Code quality review (SOLID, security, perf) |
19
19
  | `docs` | general-purpose | sonnet | Documentation generation |
20
20
  | `refactor` | Plan → general-purpose | sonnet/opus | Plan + approve + implement refactoring |
21
+ | `implement` | general-purpose | sonnet | Single-dispatch implementation with inline guards/patterns/recipes (low-cost, standardized) |
21
22
 
22
23
  ## L0 Enforcement
23
24
 
24
25
  **CRITICAL**: Parent context does NOT read code, does NOT implement. ALL work happens in Task contexts.
25
26
 
27
+ **Note on `implement`**: the orchestrator may run targeted Greps against `.md` context files (`guards.md`, `patterns.md`, `recipes.md`) to inject standardization slices into the dispatched prompt. Those files are configuration docs, not application code — reading them in the parent is allowed. Source code reads still happen only inside the dispatched Task.
28
+
26
29
  ## Flow
27
30
 
28
31
  ### analyze / review / docs
@@ -51,6 +54,14 @@
51
54
  4. **IMPLEMENT** — Task(general-purpose) to execute approved plan
52
55
  5. **VALIDATE** — Run build/tests
53
56
 
57
+ ### implement
58
+
59
+ 1. **GREP SLICES** — Orchestrator runs targeted Greps against `{subproject}/.claude/commands/guards.md`, `patterns.md`, `recipes.md` for the scope keyword. Use `output_mode: content`, `-C 2`, `head_limit: 20` (cap ~500 tokens per file). Greps return small slices, not full files.
60
+ 2. **DISPATCH** — Single `Task(general-purpose, sonnet)` with guards/patterns/recipe injected inline in the prompt, naming conventions explicit, and return format capped at 30 lines.
61
+ 3. **BUILD** — Agent runs build/type-check at the end and reports the result.
62
+ 4. **NO OVERHEAD** — No spec, no pipeline state, no review gate. Surgical.
63
+ 5. **ON CONCERN** — If the agent returns CONCERN, orchestrator shows it to the user and offers either `/feature` Light (more gates) or an adjusted `implement` prompt.
64
+
54
65
  ## Implementation
55
66
 
56
67
  ```javascript
@@ -139,6 +150,43 @@ Task({
139
150
  `
140
151
  })
141
152
 
153
+ // implement — NEW ACTION
154
+ // Orchestrator runs targeted Greps first (each ≤500 tokens output)
155
+ const guards = grep({path: `${sp}/.claude/commands/guards.md`, pattern: keyword, output_mode: "content", "-C": 2, head_limit: 20});
156
+ const patterns = grep({path: `${sp}/.claude/commands/patterns.md`, pattern: keyword, output_mode: "content", "-C": 2, head_limit: 20});
157
+ const recipe = grep({path: `${sp}/.claude/commands/recipes.md`, pattern: keyword, output_mode: "content", "-C": 2, head_limit: 20});
158
+
159
+ // Single dispatch with everything inlined
160
+ Task({
161
+ subagent_type: "general-purpose",
162
+ model: "sonnet",
163
+ description: `Implement: ${scope}`,
164
+ prompt: `
165
+ # IMPLEMENTATION TASK (standardized, low-cost)
166
+ ## Scope: ${scope}
167
+
168
+ ## Guards (inline — do not re-read)
169
+ ${guards}
170
+
171
+ ## Patterns to follow
172
+ ${patterns}
173
+
174
+ ## Recipe
175
+ ${recipe}
176
+
177
+ ## Naming conventions
178
+ - PascalCase for classes/components
179
+ - camelCase for variables/functions
180
+ - snake_case for DB columns
181
+ - kebab-case for files/URLs
182
+
183
+ ## Return format
184
+ - ≤30 lines
185
+ - Sections: Files Changed (bullet list), Build result, Status (DONE/CONCERN/BLOCKED)
186
+ - Do NOT paste file contents
187
+ `
188
+ })
189
+
142
190
  // compare — Phase 1: Parallel exploration
143
191
  subprojects.forEach(sp => Task({
144
192
  subagent_type: "Explore",
@@ -196,6 +244,14 @@ After receiving results from `audit` or `compare`:
196
244
  /task review "Contract entity"
197
245
  /task docs "API endpoints"
198
246
  /task refactor "extract PaymentService"
247
+ /task implement "add logout button to header"
248
+ /task implement "create GET /api/users endpoint"
199
249
  ```
200
250
 
201
251
  Replace `{subproject}` with actual subproject name. Single repo: omit the subproject argument.
252
+
253
+ ## When to use implement vs /feature vs refactor
254
+
255
+ - `implement` — 1-3 arquivos, pattern conhecido, resultado verificável por build. Baixo custo, sem auditoria.
256
+ - `/feature` Light — mudanças estruturadas com spec auditável e review gate. Custo médio.
257
+ - `refactor` — reorganização sem mudança funcional (split, rename, extract). Tem fase de Plan separada.
@@ -554,7 +554,7 @@ describe("subagent-tracker.js overload detection", () => {
554
554
  assert.equal(r.code, 0);
555
555
  const state = JSON.parse(fs.readFileSync(pipelinePath, "utf8"));
556
556
  assert.ok(state.lastDispatchFailure, "flag must be set");
557
- assert.equal(state.lastDispatchFailure.reason, "api_overload");
557
+ assert.equal(state.lastDispatchFailure.reason, "dispatch_failure");
558
558
  assert.equal(state.lastDispatchFailure.agentType, "general-purpose");
559
559
  assert.equal(state.lastDispatchFailure.description, "test dispatch");
560
560
  } finally {
@@ -562,6 +562,40 @@ describe("subagent-tracker.js overload detection", () => {
562
562
  }
563
563
  });
564
564
 
565
+ it("should flag lastDispatchFailure on tool result missing infrastructure error", async () => {
566
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "infra-missing-"));
567
+ const pipelinePath = setupPipelineState(tmpDir);
568
+ try {
569
+ const r = await dispatchTaskResult(tmpDir, {
570
+ is_error: true,
571
+ content: "Tool result missing due to internal error",
572
+ });
573
+ assert.equal(r.code, 0);
574
+ const state = JSON.parse(fs.readFileSync(pipelinePath, "utf8"));
575
+ assert.ok(state.lastDispatchFailure, "flag must be set on infra failure");
576
+ assert.equal(state.lastDispatchFailure.reason, "dispatch_failure");
577
+ } finally {
578
+ fs.rmSync(tmpDir, { recursive: true, force: true });
579
+ }
580
+ });
581
+
582
+ it("should flag lastDispatchFailure on HTTP 503 service unavailable", async () => {
583
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "infra-503-"));
584
+ const pipelinePath = setupPipelineState(tmpDir);
585
+ try {
586
+ const r = await dispatchTaskResult(tmpDir, {
587
+ is_error: true,
588
+ content: "Error 503: service unavailable",
589
+ });
590
+ assert.equal(r.code, 0);
591
+ const state = JSON.parse(fs.readFileSync(pipelinePath, "utf8"));
592
+ assert.ok(state.lastDispatchFailure, "flag must be set on 5xx");
593
+ assert.equal(state.lastDispatchFailure.reason, "dispatch_failure");
594
+ } finally {
595
+ fs.rmSync(tmpDir, { recursive: true, force: true });
596
+ }
597
+ });
598
+
565
599
  it("should NOT flag on happy-path agent that merely documents rate limiting", async () => {
566
600
  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "overload-docs-"));
567
601
  const pipelinePath = setupPipelineState(tmpDir);
@@ -594,3 +628,189 @@ describe("subagent-tracker.js overload detection", () => {
594
628
  }
595
629
  });
596
630
  });
631
+
632
+ // ─── _lib/metrics-emit.js ───────────────────────────────────────────────────
633
+
634
+ describe("_lib/metrics-emit.js", () => {
635
+ const { emitMetric } = require("../_lib/metrics-emit.js");
636
+
637
+ it("should append a valid JSONL line and create the metrics dir", () => {
638
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "metrics-emit-"));
639
+ try {
640
+ emitMetric("unit-test-event", {
641
+ tokensAffected: 123,
642
+ tokensSaved: 45,
643
+ note: "hello",
644
+ extras: { source: "test", count: 7 },
645
+ cwd: tmpDir,
646
+ });
647
+ const file = path.join(tmpDir, ".claude", ".metrics", "unit-test-event.jsonl");
648
+ assert.ok(fs.existsSync(file), "JSONL file should be created");
649
+ const lines = fs.readFileSync(file, "utf8").trim().split("\n");
650
+ assert.equal(lines.length, 1, "should have one line");
651
+ const entry = JSON.parse(lines[0]);
652
+ assert.equal(entry.event, "unit-test-event");
653
+ assert.equal(entry.tokens_affected, 123);
654
+ assert.equal(entry.tokens_saved, 45);
655
+ assert.equal(entry.note, "hello");
656
+ assert.equal(entry.source, "test");
657
+ assert.equal(entry.count, 7);
658
+ assert.ok(entry.ts, "ts must be set");
659
+ } finally {
660
+ fs.rmSync(tmpDir, { recursive: true, force: true });
661
+ }
662
+ });
663
+
664
+ it("should fail-silent when the cwd is unwritable / invalid", () => {
665
+ // Pointing cwd at an existing FILE (not dir) makes mkdir/append fail.
666
+ const tmpFile = path.join(os.tmpdir(), `metrics-emit-fail-${Date.now()}.tmp`);
667
+ fs.writeFileSync(tmpFile, "not-a-dir");
668
+ try {
669
+ // Must NOT throw
670
+ assert.doesNotThrow(() => {
671
+ emitMetric("should-not-throw", {
672
+ tokensAffected: 1,
673
+ tokensSaved: 1,
674
+ note: "x",
675
+ cwd: tmpFile, // a file, not a dir → mkdir under it will fail
676
+ });
677
+ });
678
+ } finally {
679
+ fs.rmSync(tmpFile, { force: true });
680
+ }
681
+ });
682
+
683
+ it("should default missing fields to safe values", () => {
684
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "metrics-emit-defaults-"));
685
+ try {
686
+ emitMetric("defaults-event", { cwd: tmpDir });
687
+ const file = path.join(tmpDir, ".claude", ".metrics", "defaults-event.jsonl");
688
+ const entry = JSON.parse(fs.readFileSync(file, "utf8").trim());
689
+ assert.equal(entry.tokens_affected, 0);
690
+ assert.equal(entry.tokens_saved, 0);
691
+ assert.equal(entry.note, "");
692
+ } finally {
693
+ fs.rmSync(tmpDir, { recursive: true, force: true });
694
+ }
695
+ });
696
+ });
697
+
698
+ // ─── context-budget.js metrics emission ─────────────────────────────────────
699
+
700
+ describe("context-budget.js metrics emission", () => {
701
+ const hook = "context-budget.js";
702
+
703
+ it("should emit JSONL with tokens_saved > 0 and note='blocked' when over budget in strict mode", async () => {
704
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ctx-budget-metrics-"));
705
+ try {
706
+ // Explore budget = 10_000 chars. Send a 12_000 char prompt → over budget.
707
+ const oversizePrompt = "x".repeat(12000);
708
+ const result = await runHook(hook, {
709
+ hook_event_name: "PreToolUse",
710
+ tool_name: "Task",
711
+ tool_input: {
712
+ subagent_type: "Explore",
713
+ description: "metrics test",
714
+ prompt: oversizePrompt,
715
+ },
716
+ }, { cwd: tmpDir, projectDir: tmpDir });
717
+
718
+ assert.equal(result.code, 0);
719
+ // strict mode is the default — denial expected
720
+ assert.equal(result.parsed?.permissionDecision, "deny");
721
+
722
+ const metricsFile = path.join(tmpDir, ".claude", ".metrics", "budget-check.jsonl");
723
+ assert.ok(fs.existsSync(metricsFile), "budget-check.jsonl must exist");
724
+ const lines = fs.readFileSync(metricsFile, "utf8").trim().split("\n");
725
+ const entry = JSON.parse(lines[lines.length - 1]);
726
+ assert.equal(entry.event, "budget-check");
727
+ assert.equal(entry.note, "blocked");
728
+ assert.ok(entry.tokens_saved > 0, "tokens_saved should be > 0 on block");
729
+ assert.ok(entry.tokens_affected > 0, "tokens_affected should reflect prompt size");
730
+ assert.equal(entry.would_block, true);
731
+ assert.equal(entry.role, "Explore");
732
+ } finally {
733
+ fs.rmSync(tmpDir, { recursive: true, force: true });
734
+ }
735
+ });
736
+
737
+ it("should emit note='passed' and tokens_saved=0 when under budget", async () => {
738
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ctx-budget-metrics-pass-"));
739
+ try {
740
+ const result = await runHook(hook, {
741
+ hook_event_name: "PreToolUse",
742
+ tool_name: "Task",
743
+ tool_input: {
744
+ subagent_type: "Explore",
745
+ description: "small",
746
+ prompt: "x".repeat(500),
747
+ },
748
+ }, { cwd: tmpDir, projectDir: tmpDir });
749
+
750
+ assert.equal(result.code, 0);
751
+ const metricsFile = path.join(tmpDir, ".claude", ".metrics", "budget-check.jsonl");
752
+ assert.ok(fs.existsSync(metricsFile));
753
+ const entry = JSON.parse(fs.readFileSync(metricsFile, "utf8").trim().split("\n").pop());
754
+ assert.equal(entry.note, "passed");
755
+ assert.equal(entry.tokens_saved, 0);
756
+ assert.ok(entry.tokens_affected > 0);
757
+ assert.equal(entry.would_block, false);
758
+ } finally {
759
+ fs.rmSync(tmpDir, { recursive: true, force: true });
760
+ }
761
+ });
762
+ });
763
+
764
+ // ─── spec-hygiene.js metrics emission ───────────────────────────────────────
765
+
766
+ describe("spec-hygiene.js metrics emission", () => {
767
+ const hook = "spec-hygiene.js";
768
+
769
+ it("should emit spec-hygiene-move with tokens_saved > 0 when an active spec is auto-moved", async () => {
770
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "spec-hygiene-metrics-"));
771
+ try {
772
+ const specName = "2026-04-10-test-completed";
773
+ const specDir = path.join(tmpDir, ".claude", "spec", "active", specName);
774
+ fs.mkdirSync(specDir, { recursive: true });
775
+ // A spec marked completed with all checklist items done → auto-move.
776
+ const body = [
777
+ "# Test",
778
+ "",
779
+ "### Status: completed | Phase: CLOSE | Scope: light",
780
+ "",
781
+ "## Checklist",
782
+ "",
783
+ "- [x] step one",
784
+ "- [x] step two",
785
+ "",
786
+ // Pad the file so tokensSaved > 0 (file size / 4 must round up)
787
+ "## Body",
788
+ "lorem ipsum ".repeat(50),
789
+ "",
790
+ ].join("\n");
791
+ fs.writeFileSync(path.join(specDir, "spec.md"), body);
792
+
793
+ const result = await runHook(hook, {
794
+ hook_event_name: "SessionStart",
795
+ }, { cwd: tmpDir, projectDir: tmpDir });
796
+
797
+ assert.equal(result.code, 0);
798
+
799
+ // Spec must have moved
800
+ const completedSpec = path.join(tmpDir, ".claude", "spec", "completed", specName, "spec.md");
801
+ assert.ok(fs.existsSync(completedSpec), "spec must be relocated to completed/");
802
+
803
+ // Metric must be emitted
804
+ const metricsFile = path.join(tmpDir, ".claude", ".metrics", "spec-hygiene-move.jsonl");
805
+ assert.ok(fs.existsSync(metricsFile), "spec-hygiene-move.jsonl must exist");
806
+ const entry = JSON.parse(fs.readFileSync(metricsFile, "utf8").trim().split("\n").pop());
807
+ assert.equal(entry.event, "spec-hygiene-move");
808
+ assert.ok(entry.tokens_saved > 0, "tokens_saved must be > 0");
809
+ assert.ok(entry.tokens_affected > 0);
810
+ assert.ok(/stale spec/i.test(entry.note));
811
+ assert.ok(entry.from && entry.to, "extras (from/to) must be present");
812
+ } finally {
813
+ fs.rmSync(tmpDir, { recursive: true, force: true });
814
+ }
815
+ });
816
+ });
@@ -0,0 +1,50 @@
1
+ 'use strict';
2
+ /**
3
+ * metrics-emit — shared helper for appending enforcement metrics to JSONL.
4
+ *
5
+ * Schema (one line per call):
6
+ * { ts, event, tokens_affected, tokens_saved, note, ...extras }
7
+ *
8
+ * Files live under `.claude/.metrics/{event}.jsonl`. `metrics-report.js`
9
+ * iterates every `*.jsonl` in that dir, so per-event sharding is compatible.
10
+ *
11
+ * Fail-silent: ANY error (mkdir, append, JSON stringify) is swallowed so
12
+ * hooks calling this never observe a throw. Hooks remain fail-open.
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+
18
+ /**
19
+ * Append a metric line.
20
+ *
21
+ * @param {string} event e.g. "budget-check", "spec-hygiene-move", "rtk-rewrite"
22
+ * @param {object} opts
23
+ * @param {number} [opts.tokensAffected=0] Conservative tokens touched by this event.
24
+ * @param {number} [opts.tokensSaved=0] Tokens prevented from entering context.
25
+ * @param {string} [opts.note=''] Short human label (e.g. "blocked", "passed").
26
+ * @param {object} [opts.extras={}] Extra fields merged into the JSONL line.
27
+ * @param {string} [opts.cwd] Override project dir (defaults to process.cwd()).
28
+ */
29
+ function emitMetric(event, opts = {}) {
30
+ try {
31
+ if (!event || typeof event !== 'string') return;
32
+ const cwd = opts.cwd || process.cwd();
33
+ const dir = path.join(cwd, '.claude', '.metrics');
34
+ const file = path.join(dir, `${event}.jsonl`);
35
+ const line = {
36
+ ts: new Date().toISOString(),
37
+ event,
38
+ tokens_affected: Number.isFinite(opts.tokensAffected) ? opts.tokensAffected : 0,
39
+ tokens_saved: Number.isFinite(opts.tokensSaved) ? opts.tokensSaved : 0,
40
+ note: typeof opts.note === 'string' ? opts.note : '',
41
+ ...(opts.extras && typeof opts.extras === 'object' ? opts.extras : {}),
42
+ };
43
+ fs.mkdirSync(dir, { recursive: true });
44
+ fs.appendFileSync(file, JSON.stringify(line) + '\n');
45
+ } catch (_) {
46
+ // fail-silent — never throw out of a hook
47
+ }
48
+ }
49
+
50
+ module.exports = { emitMetric };
@@ -22,6 +22,7 @@
22
22
  const fs = require('fs');
23
23
  const path = require('path');
24
24
  const { shouldRun } = require('./_lib/hook-env.js');
25
+ const { emitMetric } = require('./_lib/metrics-emit.js');
25
26
 
26
27
  function getMode() {
27
28
  if (process.env.CONTEXT_BUDGET_MODE) return process.env.CONTEXT_BUDGET_MODE;
@@ -33,8 +34,6 @@ function getMode() {
33
34
  }
34
35
 
35
36
  const MODE = getMode();
36
- const METRICS_DIR = path.join(process.cwd(), '.claude', '.metrics');
37
- const METRICS_FILE = path.join(METRICS_DIR, 'budget-observations.jsonl');
38
37
 
39
38
  // Conservative regex: only match .claude/skills/**/*.md, .claude/context/**/*.md, SKILL.md references
40
39
  const MD_REF_PATTERN = /\.claude\/(?:skills|context)\/[^\s"'`]+\.md|SKILL\.md/g;
@@ -94,18 +93,19 @@ process.stdin.on('end', () => {
94
93
  : subagentType;
95
94
 
96
95
  // ALWAYS log (unconditional, fail-silent) — all modes including strict
97
- try {
98
- fs.mkdirSync(METRICS_DIR, { recursive: true });
99
- fs.appendFileSync(METRICS_FILE, JSON.stringify({
100
- ts: new Date().toISOString(),
101
- event: 'budget-check',
96
+ const wouldBlock = actual > limit;
97
+ emitMetric('budget-check', {
98
+ tokensAffected: Math.round(actual / 4),
99
+ tokensSaved: wouldBlock ? Math.max(0, Math.round((actual - limit) / 4)) : 0,
100
+ note: wouldBlock ? 'blocked' : 'passed',
101
+ extras: {
102
102
  role: roleLabel,
103
103
  actual_chars: actual,
104
104
  limit,
105
- would_block: actual > limit,
106
- mode: MODE
107
- }) + '\n');
108
- } catch (_) {}
105
+ would_block: wouldBlock,
106
+ mode: MODE,
107
+ },
108
+ });
109
109
 
110
110
  // Apply mode decision (separate concern):
111
111
  if (MODE === 'observe') {
@@ -24,6 +24,7 @@ const fs = require('fs');
24
24
  const path = require('path');
25
25
  const os = require('os');
26
26
  const { shouldRun } = require('./_lib/hook-env.js');
27
+ const { emitMetric } = require('./_lib/metrics-emit.js');
27
28
 
28
29
  const CACHE_FILE = path.join(os.tmpdir(), 'rtk-available.json');
29
30
  const CACHE_TTL_MS = 60_000;
@@ -109,6 +110,16 @@ process.stdin.on('end', () => {
109
110
  process.exit(0);
110
111
  }
111
112
 
113
+ // Record the rewrite invocation. tokens_saved is intentionally 0 — actual
114
+ // token savings are measured by `rtk gain`. This metric only tracks how
115
+ // often the rewrite path fired so it can be correlated with the gain total.
116
+ emitMetric('rtk-rewrite', {
117
+ tokensAffected: Math.round(cmd.length / 4),
118
+ tokensSaved: 0,
119
+ note: 'rewritten via rtk',
120
+ extras: { command_head: cmd.slice(0, 60) },
121
+ });
122
+
112
123
  console.log(JSON.stringify({
113
124
  hookSpecificOutput: {
114
125
  hookEventName: 'PreToolUse',
@@ -9,6 +9,7 @@
9
9
  const fs = require('fs');
10
10
  const path = require('path');
11
11
  const { shouldRun } = require('./_lib/hook-env.js');
12
+ const { emitMetric } = require('./_lib/metrics-emit.js');
12
13
 
13
14
  try {
14
15
  if (!shouldRun('spec-hygiene')) process.exit(0);
@@ -50,10 +51,26 @@ function runHygiene() {
50
51
  const dest = path.join(completedDir, name);
51
52
  fs.mkdirSync(completedDir, { recursive: true });
52
53
 
54
+ // Capture spec size BEFORE the rename so the path still resolves.
55
+ let fileSize = 0;
56
+ try { fileSize = fs.statSync(specFile).size; } catch (_) { /* best-effort */ }
57
+
53
58
  // Phase 1 (critical): atomic rename. If this fails, state is untouched.
54
59
  fs.renameSync(specDir, dest);
55
60
  process.stderr.write(`[hygiene] Moved ${name} → completed/\n`);
56
61
 
62
+ // Heuristic: tokens "saved" ≈ file_size / 4 (chars-to-tokens). The spec
63
+ // would otherwise have been re-read in future sessions; moving it to
64
+ // completed/ removes it from the active scan path.
65
+ const tokens = Math.round(fileSize / 4);
66
+ emitMetric('spec-hygiene-move', {
67
+ tokensAffected: tokens,
68
+ tokensSaved: tokens,
69
+ note: 'stale spec moved from active/',
70
+ extras: { from: specDir, to: dest },
71
+ cwd,
72
+ });
73
+
57
74
  // Phase 2 (best-effort): cleanup orphan state files.
58
75
  // Each wrapped independently so a failure in one doesn't skip the others.
59
76
  const statesDir = path.join(cwd, '.claude', '.pipeline-states');
@@ -195,15 +195,18 @@ function handlePostToolUse(data, stateDir) {
195
195
 
196
196
  const toolResponse = data.tool_response || {};
197
197
  const responseText = JSON.stringify(toolResponse).toLowerCase();
198
- // Detect overload conservatively: require is_error=true (Claude Code sets
199
- // this on Task tool failures) AND at least one overload keyword. This
200
- // avoids false positives on agents that merely *document* rate limiting
201
- // or error handling in their returned content.
202
- const isOverload =
198
+ // Detect dispatch failures conservatively: require is_error=true (Claude
199
+ // Code sets this on Task tool failures) AND at least one failure keyword.
200
+ // Covers:
201
+ // - API overload / rate limiting (429, 529, throttle, too many requests)
202
+ // - Infrastructure errors (tool result missing, HTTP 5xx, service unavailable)
203
+ // The regex avoids false positives on agents that merely *document* error
204
+ // handling in their returned content (see "unrelated error" test below).
205
+ const isDispatchFailure =
203
206
  toolResponse.is_error === true &&
204
- /overload|rate.?limit|\b429\b|\b529\b|throttl|too many requests/.test(responseText);
207
+ /overload|rate.?limit|\b429\b|\b529\b|throttl|too many requests|tool result missing|\b50[0-4]\b|service unavailable/.test(responseText);
205
208
 
206
- if (!isOverload) return;
209
+ if (!isDispatchFailure) return;
207
210
 
208
211
  const projectDir = path.resolve(stateDir, '..', '..');
209
212
  const statesDir = path.join(projectDir, '.claude', '.pipeline-states');
@@ -231,7 +234,7 @@ function handlePostToolUse(data, stateDir) {
231
234
  const state = JSON.parse(fs.readFileSync(newest, 'utf8'));
232
235
  state.lastDispatchFailure = {
233
236
  at: new Date().toISOString(),
234
- reason: 'api_overload',
237
+ reason: 'dispatch_failure',
235
238
  agentType: toolInput.subagent_type || 'unknown',
236
239
  description: toolInput.description || '',
237
240
  prompt: (toolInput.prompt || '').slice(0, 2000),
@@ -63,8 +63,20 @@ const header = '| Event | Count | Tokens Affected | Tokens Saved | Notes |';
63
63
  const sep = '|-------|-------|-----------------|--------------|-------|';
64
64
  console.log(header);
65
65
  console.log(sep);
66
+ let totalSaved = 0;
67
+ let totalAffected = 0;
68
+ let totalCount = 0;
66
69
  for (const evt of events.sort()) {
67
70
  const { count, tokensAffected, tokensSaved, notes } = agg[evt];
68
71
  const noteStr = [...notes].slice(0, 2).join('; ') || '-';
69
- console.log(`| ${evt} | ${count} | ${tokensAffected || '-'} | ${tokensSaved || '-'} | ${noteStr} |`);
72
+ // When the event records "affected" but no "saved" (e.g. rtk-rewrite,
73
+ // budget-check passing), surface the affected count instead of `-`.
74
+ const affectedCell = tokensAffected > 0 ? tokensAffected : '-';
75
+ const savedCell = tokensSaved > 0 ? tokensSaved : '-';
76
+ console.log(`| ${evt} | ${count} | ${affectedCell} | ${savedCell} | ${noteStr} |`);
77
+ totalSaved += tokensSaved;
78
+ totalAffected += tokensAffected;
79
+ totalCount += count;
70
80
  }
81
+ console.log(sep);
82
+ console.log(`| **TOTAL** | ${totalCount} | ${totalAffected || '-'} | ${totalSaved || '-'} | - |`);