mustard-claude 3.1.30 → 3.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,29 +1,32 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: mustard:metrics
|
|
3
|
-
description:
|
|
3
|
+
description: Focused view of enforcement hook events and compare-window deltas. For the superset (pipelines + hooks + RTK), use /mustard:stats.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
# /mustard:metrics -
|
|
6
|
+
# /mustard:metrics - Hook Events & Compare
|
|
7
7
|
|
|
8
8
|
## Trigger
|
|
9
|
-
`/mustard:metrics [--since <ISO date>] [--event <type>]`
|
|
9
|
+
`/mustard:metrics [--since <ISO date>] [--event <type>] [--compare <from> <to>]`
|
|
10
10
|
|
|
11
11
|
## What it does
|
|
12
|
-
|
|
12
|
+
Focused on two use cases:
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
1. **Hook-level aggregation** (default) — runs `.claude/scripts/metrics-report.js` and emits a table of events from `.claude/.metrics/*.jsonl`, plus RTK token savings.
|
|
15
|
+
2. **Compare window** (`--compare`) — delta between two git tags or ISO dates (reference window computed automatically from the delta).
|
|
16
|
+
|
|
17
|
+
For the superset view that also includes per-pipeline metrics, orphans, Pass@1 and Last 7 Days, use **`/mustard:stats`** (cross-reference).
|
|
15
18
|
|
|
16
19
|
## Action
|
|
17
20
|
1. Run `rtk node .claude/scripts/metrics-report.js $ARGS` (pass through any flags)
|
|
18
21
|
2. Display output verbatim
|
|
19
22
|
|
|
20
|
-
##
|
|
23
|
+
## Flags
|
|
21
24
|
- `--since <ISO date>` — filter events after this date
|
|
22
25
|
- `--event <type>` — filter to one event type (e.g. `budget-check`)
|
|
23
|
-
- `--compare <from> <to>` — delta between two windows (git tag or ISO date)
|
|
26
|
+
- `--compare <from> <to>` — delta between two windows (git tag `vX.Y.Z` or ISO date)
|
|
24
27
|
|
|
25
28
|
## Examples
|
|
26
|
-
- `/mustard:metrics` —
|
|
29
|
+
- `/mustard:metrics` — hook event aggregation since beginning
|
|
27
30
|
- `/mustard:metrics --since 2026-04-09` — only recent events
|
|
28
31
|
- `/mustard:metrics --event budget-check` — only budget-check events
|
|
29
32
|
- `/mustard:metrics --compare v3.1.21 v3.1.22` — delta between two releases
|
|
@@ -34,3 +37,4 @@ Metrics are recorded **automatically** by enforcement hooks on every Task dispat
|
|
|
34
37
|
- Logs auto-rotate at 10MB
|
|
35
38
|
- To reset: delete files in `.claude/.metrics/` manually
|
|
36
39
|
- Advanced: override mode via `CONTEXT_BUDGET_MODE` env var (`strict`|`warn`|`observe`). Default is `strict`.
|
|
40
|
+
- `rtk-rewrite` events deliberately show only counts (no `tokens_saved` column) — real RTK numbers come from `rtk gain`, surfaced in the "RTK Token Savings" block.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
description: "Show pipeline metrics, token savings, and performance stats — use when user asks for stats, metrics, performance, or token usage"
|
|
3
3
|
---
|
|
4
4
|
<!-- mustard:generated -->
|
|
5
|
-
# /stats - Pipeline Metrics
|
|
5
|
+
# /stats - Pipeline Metrics (superset view)
|
|
6
6
|
|
|
7
7
|
## Trigger
|
|
8
8
|
|
|
@@ -10,7 +10,7 @@ description: "Show pipeline metrics, token savings, and performance stats — us
|
|
|
10
10
|
|
|
11
11
|
## Description
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
Superset view of pipeline state + enforcement hooks + RTK token economy. This is the primary command; `/mustard:metrics` is a focused view for hook-only events and `--compare` windows.
|
|
14
14
|
|
|
15
15
|
## Action
|
|
16
16
|
|
|
@@ -18,21 +18,14 @@ Displays pipeline metrics including duration, API calls, retries, Pass@1 success
|
|
|
18
18
|
2. Present the output to the user
|
|
19
19
|
3. If no metrics found, inform user to run a pipeline first
|
|
20
20
|
|
|
21
|
-
##
|
|
21
|
+
## Sections emitted
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
- **
|
|
26
|
-
- **
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
```
|
|
30
|
-
## Pass@1 Metrics
|
|
31
|
-
- Pass@1: 80% (4/5 completed without retries)
|
|
32
|
-
- Avg retries per pipeline: 0.4
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
This section is omitted automatically when no completed pipelines exist yet.
|
|
23
|
+
- **Summary** — 5–8 lines with ✓/⚠/→ prefixes (pipelines tracked, orphans, Pass@1, RTK savings, top alert)
|
|
24
|
+
- **Active / Orphaned (per spec)** — duration, API calls, retries, top 3 tools, retries by phase, gate saves, wave reentries, skill hits, Pass@1 by agent (heuristic)
|
|
25
|
+
- **Completed Pipelines** — archived runs from `.claude/metrics/`
|
|
26
|
+
- **Last 7 Days** — events per day + current week vs prior week delta
|
|
27
|
+
- **Enforcement Events (hooks)** — table of events from `.claude/.metrics/*.jsonl`
|
|
28
|
+
- **RTK Token Economy** — totals from `rtk gain`
|
|
36
29
|
|
|
37
30
|
## When to Use
|
|
38
31
|
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* Shared helper: normalize `rtk gain --all --format json` output.
|
|
4
|
+
*
|
|
5
|
+
* rtk emits { summary: { total_saved, avg_savings_pct, total_input,
|
|
6
|
+
* total_output, total_commands }, daily, weekly, monthly }. Different
|
|
7
|
+
* rtk versions (and earlier mustard scripts) assumed top-level
|
|
8
|
+
* `saved_tokens`/`total_saved` — neither is correct on current rtk.
|
|
9
|
+
* This helper is the single source of truth.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const { execFileSync } = require('child_process');
|
|
13
|
+
|
|
14
|
+
function getRtkGain(opts) {
|
|
15
|
+
const timeout = (opts && opts.timeout) || 3000;
|
|
16
|
+
let raw;
|
|
17
|
+
try {
|
|
18
|
+
raw = execFileSync('rtk', ['gain', '--all', '--format', 'json'], {
|
|
19
|
+
encoding: 'utf8',
|
|
20
|
+
timeout,
|
|
21
|
+
stdio: ['ignore', 'pipe', 'ignore'],
|
|
22
|
+
windowsHide: true,
|
|
23
|
+
});
|
|
24
|
+
} catch {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
let data;
|
|
28
|
+
try {
|
|
29
|
+
data = JSON.parse(raw);
|
|
30
|
+
} catch {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
const s = (data && data.summary) || data || {};
|
|
34
|
+
const saved = Number(s.total_saved ?? s.saved_tokens ?? s.savedTokens ?? 0) || 0;
|
|
35
|
+
const original = Number(s.total_input ?? s.total_original ?? 0) || 0;
|
|
36
|
+
const pct = Number(s.avg_savings_pct ?? s.savings_pct ?? s.savingsPct ?? 0) || 0;
|
|
37
|
+
const commands = Number(s.total_commands ?? s.commands ?? 0) || 0;
|
|
38
|
+
if (saved <= 0 && commands <= 0) return null;
|
|
39
|
+
return {
|
|
40
|
+
saved,
|
|
41
|
+
originalTotal: original,
|
|
42
|
+
pct,
|
|
43
|
+
commands,
|
|
44
|
+
byCommand: (data && data.by_command) || null,
|
|
45
|
+
daily: (data && Array.isArray(data.daily)) ? data.daily : [],
|
|
46
|
+
weekly: (data && Array.isArray(data.weekly)) ? data.weekly : [],
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
module.exports = { getRtkGain };
|
|
@@ -1,227 +1,418 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
2
3
|
/**
|
|
3
|
-
* METRICS-COLLECT:
|
|
4
|
+
* METRICS-COLLECT: Unified pipeline + hook + RTK metrics view.
|
|
4
5
|
*
|
|
5
|
-
*
|
|
6
|
-
* -
|
|
7
|
-
* -
|
|
8
|
-
* -
|
|
6
|
+
* Sources:
|
|
7
|
+
* - .claude/.pipeline-states/{spec}.metrics.json (sidecar written by metrics-tracker hook)
|
|
8
|
+
* - .claude/.pipeline-states/{spec}.json (main state; optional, for orphan detection)
|
|
9
|
+
* - .claude/metrics/{spec}.json (archived pipelines, written by /complete)
|
|
10
|
+
* - .claude/.metrics/*.jsonl (hook enforcement events)
|
|
11
|
+
* - `rtk gain --all --format json` (token economy, via _rtk-gain helper)
|
|
9
12
|
*
|
|
10
|
-
*
|
|
13
|
+
* Flags:
|
|
14
|
+
* --hooks-only Emit only Summary + Enforcement Events + RTK (skip per-spec sections)
|
|
11
15
|
*
|
|
12
|
-
*
|
|
16
|
+
* Output: Markdown to stdout. Summary block first (5–8 lines with
|
|
17
|
+
* ✓/⚠/→ prefixes), then drill-down sections.
|
|
13
18
|
*/
|
|
14
19
|
|
|
15
|
-
const { execSync } = require('child_process');
|
|
16
20
|
const fs = require('fs');
|
|
17
21
|
const path = require('path');
|
|
22
|
+
const { getRtkGain } = require('./_rtk-gain.js');
|
|
23
|
+
|
|
24
|
+
const args = process.argv.slice(2);
|
|
25
|
+
const HOOKS_ONLY = args.includes('--hooks-only');
|
|
18
26
|
|
|
19
27
|
function main() {
|
|
20
28
|
const cwd = process.cwd();
|
|
21
29
|
const claudeDir = path.join(cwd, '.claude');
|
|
22
|
-
const parts = [];
|
|
23
30
|
|
|
31
|
+
const specs = HOOKS_ONLY ? { active: [], orphaned: [] } : collectSpecs(claudeDir);
|
|
32
|
+
const archives = HOOKS_ONLY ? [] : collectArchives(claudeDir);
|
|
33
|
+
const hookEvents = aggregateHookEvents(path.join(claudeDir, '.metrics'));
|
|
34
|
+
const rtk = safe(() => getRtkGain({ timeout: 3000 }));
|
|
35
|
+
const weekly = HOOKS_ONLY ? null : buildWeekly(path.join(claudeDir, '.metrics'));
|
|
36
|
+
|
|
37
|
+
const parts = [];
|
|
24
38
|
parts.push('# Pipeline Metrics');
|
|
25
39
|
parts.push('');
|
|
26
40
|
|
|
27
|
-
//
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
lines.push('- Tool breakdown:');
|
|
55
|
-
for (const [tool, count] of Object.entries(m.toolBreakdown).sort((a, b) => b[1] - a[1])) {
|
|
56
|
-
lines.push(` - ${tool}: ${count}`);
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
if (m.gate_saves !== undefined) lines.push(`- Gate saves: ${m.gate_saves}`);
|
|
60
|
-
if (m.wave_reentry !== undefined) lines.push(`- Wave reentries: ${m.wave_reentry}`);
|
|
61
|
-
if (m.skillHits && Object.keys(m.skillHits).length > 0) {
|
|
62
|
-
lines.push('- Skill hits:');
|
|
63
|
-
for (const [agent, hits] of Object.entries(m.skillHits).sort()) {
|
|
64
|
-
const pct = hits.loaded > 0 ? Math.round((hits.read / hits.loaded) * 100) + '%' : '\u2014';
|
|
65
|
-
lines.push(` - ${agent}: ${hits.read}/${hits.loaded} (${pct})`);
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
if (isOrphaned) {
|
|
69
|
-
lines.push('- Spec: not in spec/active/ (likely completed without /mustard:complete)');
|
|
70
|
-
}
|
|
71
|
-
lines.push('');
|
|
72
|
-
(isOrphaned ? orphanedBuckets : activeBuckets).push(lines);
|
|
73
|
-
} catch {}
|
|
41
|
+
// ── Summary (always first) ───────────────────────────────────────────
|
|
42
|
+
const summaryLines = buildSummary({ specs, archives, hookEvents, rtk });
|
|
43
|
+
if (summaryLines.length > 0) {
|
|
44
|
+
parts.push('## Summary');
|
|
45
|
+
for (const l of summaryLines) parts.push(l);
|
|
46
|
+
parts.push('');
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ── Per-spec drill-down ──────────────────────────────────────────────
|
|
50
|
+
if (!HOOKS_ONLY) {
|
|
51
|
+
renderSpecs(parts, specs.active, 'Active');
|
|
52
|
+
renderSpecs(parts, specs.orphaned, 'Orphaned');
|
|
53
|
+
if (specs.orphaned.length > 0) {
|
|
54
|
+
parts.push(`> ${specs.orphaned.length} orphaned pipeline state(s) detected. Run \`/mustard:complete {spec-name}\` or \`/mustard:maint\` to reconcile.`);
|
|
55
|
+
parts.push('');
|
|
56
|
+
}
|
|
57
|
+
renderArchives(parts, archives);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// ── Last 7 Days (temporal dimension) ─────────────────────────────────
|
|
61
|
+
if (weekly && weekly.hasData) {
|
|
62
|
+
parts.push('## Last 7 Days');
|
|
63
|
+
parts.push('');
|
|
64
|
+
parts.push('| Day | Events |');
|
|
65
|
+
parts.push('|-----|--------|');
|
|
66
|
+
for (const [day, count] of weekly.days) {
|
|
67
|
+
parts.push(`| ${day} | ${count} |`);
|
|
74
68
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
parts.push(
|
|
69
|
+
parts.push('');
|
|
70
|
+
if (weekly.delta) {
|
|
71
|
+
parts.push(`- Current week: ${weekly.currentCount} events`);
|
|
72
|
+
parts.push(`- Previous week: ${weekly.prevCount} events`);
|
|
73
|
+
parts.push(`- Delta: ${weekly.delta}`);
|
|
79
74
|
parts.push('');
|
|
80
75
|
}
|
|
81
76
|
}
|
|
82
|
-
|
|
77
|
+
|
|
78
|
+
// ── Enforcement Events (hooks) ───────────────────────────────────────
|
|
79
|
+
if (hookEvents.total > 0) {
|
|
80
|
+
parts.push('## Enforcement Events (hooks)');
|
|
81
|
+
parts.push('');
|
|
82
|
+
parts.push('| Event | Count | Tokens Affected | Tokens Saved |');
|
|
83
|
+
parts.push('|-------|-------|-----------------|--------------|');
|
|
84
|
+
let tc = 0, ta = 0, ts = 0;
|
|
85
|
+
for (const evt of Object.keys(hookEvents.byEvent).sort()) {
|
|
86
|
+
const e = hookEvents.byEvent[evt];
|
|
87
|
+
const aff = e.tokensAffected > 0 ? e.tokensAffected : '-';
|
|
88
|
+
const sav = e.tokensSaved > 0 ? e.tokensSaved : '-';
|
|
89
|
+
parts.push(`| ${evt} | ${e.count} | ${aff} | ${sav} |`);
|
|
90
|
+
tc += e.count;
|
|
91
|
+
ta += e.tokensAffected;
|
|
92
|
+
ts += e.tokensSaved;
|
|
93
|
+
}
|
|
94
|
+
parts.push('|-------|-------|-----------------|--------------|');
|
|
95
|
+
parts.push(`| **TOTAL** | ${tc} | ${ta || '-'} | ${ts || '-'} |`);
|
|
96
|
+
parts.push('');
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ── RTK Token Economy ────────────────────────────────────────────────
|
|
100
|
+
if (rtk && rtk.saved > 0) {
|
|
101
|
+
parts.push('## RTK Token Economy');
|
|
102
|
+
parts.push(`- Total saved: ${Math.round(rtk.saved / 1000)}k tokens`);
|
|
103
|
+
parts.push(`- Savings rate: ${Math.round(rtk.pct)}%`);
|
|
104
|
+
if (rtk.commands > 0) parts.push(`- Commands rewritten: ${rtk.commands}`);
|
|
105
|
+
parts.push('');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (parts.length <= 2) {
|
|
109
|
+
parts.push('No metrics data found. Run a pipeline first.');
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
console.log(parts.join('\n'));
|
|
113
|
+
process.exit(0);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ── Data collection ────────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
function collectSpecs(claudeDir) {
|
|
119
|
+
const statesDir = path.join(claudeDir, '.pipeline-states');
|
|
120
|
+
const activeSpecDir = path.join(claudeDir, 'spec', 'active');
|
|
121
|
+
const out = { active: [], orphaned: [] };
|
|
122
|
+
if (!fs.existsSync(statesDir)) return out;
|
|
123
|
+
|
|
124
|
+
// Union of spec names from sidecars AND main state files. Sidecars are the
|
|
125
|
+
// primary metrics source (see metrics-tracker.js); main state may be absent
|
|
126
|
+
// after archival but sidecars can linger — we still want to surface them.
|
|
127
|
+
const seen = new Set();
|
|
128
|
+
for (const f of fs.readdirSync(statesDir)) {
|
|
129
|
+
if (f.endsWith('.metrics.json')) seen.add(f.slice(0, -'.metrics.json'.length));
|
|
130
|
+
else if (f.endsWith('.json')) seen.add(f.slice(0, -'.json'.length));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
for (const name of seen) {
|
|
134
|
+
const mainPath = path.join(statesDir, `${name}.json`);
|
|
135
|
+
const sidecarPath = path.join(statesDir, `${name}.metrics.json`);
|
|
136
|
+
const sidecar = readJson(sidecarPath);
|
|
137
|
+
const main = readJson(mainPath);
|
|
138
|
+
const m = (sidecar && sidecar.metrics) || (main && main.metrics) || null;
|
|
139
|
+
if (!m) continue;
|
|
140
|
+
|
|
141
|
+
const specPath = path.join(activeSpecDir, name);
|
|
142
|
+
const isOrphaned = !fs.existsSync(specPath);
|
|
143
|
+
const entry = { name, metrics: m, isOrphaned, main };
|
|
144
|
+
(isOrphaned ? out.orphaned : out.active).push(entry);
|
|
145
|
+
}
|
|
146
|
+
return out;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function collectArchives(claudeDir) {
|
|
83
150
|
const metricsDir = path.join(claudeDir, 'metrics');
|
|
84
|
-
if (fs.existsSync(metricsDir))
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
151
|
+
if (!fs.existsSync(metricsDir)) return [];
|
|
152
|
+
const files = fs.readdirSync(metricsDir).filter(f => f.endsWith('.json'));
|
|
153
|
+
const out = [];
|
|
154
|
+
for (const f of files) {
|
|
155
|
+
const data = readJson(path.join(metricsDir, f));
|
|
156
|
+
if (!data) continue;
|
|
157
|
+
out.push({ name: f.replace(/\.json$/, ''), metrics: data });
|
|
158
|
+
}
|
|
159
|
+
return out;
|
|
160
|
+
}
|
|
89
161
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
totalRetries += m.retries || 0;
|
|
113
|
-
totalDurationMs += m.durationMs || 0;
|
|
114
|
-
count++;
|
|
115
|
-
} catch {}
|
|
162
|
+
function aggregateHookEvents(metricsDir) {
|
|
163
|
+
const result = { byEvent: {}, byDay: {}, total: 0 };
|
|
164
|
+
if (!fs.existsSync(metricsDir)) return result;
|
|
165
|
+
const files = fs.readdirSync(metricsDir).filter(f => f.endsWith('.jsonl'));
|
|
166
|
+
for (const file of files) {
|
|
167
|
+
let content;
|
|
168
|
+
try { content = fs.readFileSync(path.join(metricsDir, file), 'utf8'); }
|
|
169
|
+
catch { continue; }
|
|
170
|
+
for (const raw of content.split('\n')) {
|
|
171
|
+
const line = raw.trim();
|
|
172
|
+
if (!line) continue;
|
|
173
|
+
let entry;
|
|
174
|
+
try { entry = JSON.parse(line); } catch { continue; }
|
|
175
|
+
if (!entry.event) continue;
|
|
176
|
+
const k = entry.event;
|
|
177
|
+
if (!result.byEvent[k]) result.byEvent[k] = { count: 0, tokensAffected: 0, tokensSaved: 0 };
|
|
178
|
+
result.byEvent[k].count++;
|
|
179
|
+
result.total++;
|
|
180
|
+
if (typeof entry.tokens_affected === 'number') result.byEvent[k].tokensAffected += entry.tokens_affected;
|
|
181
|
+
// PR1: rtk-rewrite tokens_saved is heuristic; real numbers come from rtk-gain.
|
|
182
|
+
if (typeof entry.tokens_saved === 'number' && entry.event !== 'rtk-rewrite') {
|
|
183
|
+
result.byEvent[k].tokensSaved += entry.tokens_saved;
|
|
116
184
|
}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
parts.push(`- Avg duration: ${formatMs(Math.round(totalDurationMs / count))}`);
|
|
121
|
-
parts.push(`- Avg API calls: ${Math.round(totalCalls / count)}`);
|
|
122
|
-
parts.push(`- Avg hook retries: ${Math.round(totalRetries / count)}`);
|
|
123
|
-
parts.push('');
|
|
185
|
+
if (entry.ts) {
|
|
186
|
+
const day = String(entry.ts).slice(0, 10);
|
|
187
|
+
result.byDay[day] = (result.byDay[day] || 0) + 1;
|
|
124
188
|
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return result;
|
|
192
|
+
}
|
|
125
193
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
parts.push('## Gate & Quality Metrics');
|
|
152
|
-
parts.push('- Gate saves: ' + (hasGateData ? totalGateSaves : '\u2014') + (hasGateData ? ' (spec revisions after /approve)' : ''));
|
|
153
|
-
parts.push('- Wave reentries: ' + (hasGateData ? totalWaveReentry : '\u2014') + (hasGateData ? ' (EXECUTE \u2192 PLAN)' : ''));
|
|
154
|
-
parts.push('- Skill hit rate:');
|
|
155
|
-
const agentKeys = Object.keys(skillHitAgg);
|
|
156
|
-
if (agentKeys.length > 0) {
|
|
157
|
-
parts.push('');
|
|
158
|
-
parts.push('| Agent | Loaded | Read | Hit rate |');
|
|
159
|
-
parts.push('|-------|--------|------|----------|');
|
|
160
|
-
for (const agent of agentKeys.sort()) {
|
|
161
|
-
const { loaded, read } = skillHitAgg[agent];
|
|
162
|
-
const hitPct = loaded > 0 ? Math.round((read / loaded) * 100) + '%' : '\u2014';
|
|
163
|
-
parts.push(`| ${agent} | ${loaded} | ${read} | ${hitPct} |`);
|
|
164
|
-
}
|
|
165
|
-
} else {
|
|
166
|
-
parts.push(' (no skill tracking data yet)');
|
|
167
|
-
}
|
|
168
|
-
parts.push('');
|
|
169
|
-
}
|
|
194
|
+
function buildWeekly(metricsDir) {
|
|
195
|
+
const agg = aggregateHookEvents(metricsDir);
|
|
196
|
+
if (agg.total === 0) return { hasData: false };
|
|
197
|
+
const now = new Date();
|
|
198
|
+
const days = [];
|
|
199
|
+
for (let i = 6; i >= 0; i--) {
|
|
200
|
+
const d = new Date(now.getTime() - i * 86400000);
|
|
201
|
+
const key = d.toISOString().slice(0, 10);
|
|
202
|
+
days.push([key, agg.byDay[key] || 0]);
|
|
203
|
+
}
|
|
204
|
+
// Current week vs prior week (14-day window split in half, ending today).
|
|
205
|
+
let currentCount = 0, prevCount = 0;
|
|
206
|
+
for (let i = 0; i < 7; i++) {
|
|
207
|
+
const d = new Date(now.getTime() - i * 86400000).toISOString().slice(0, 10);
|
|
208
|
+
currentCount += agg.byDay[d] || 0;
|
|
209
|
+
}
|
|
210
|
+
for (let i = 7; i < 14; i++) {
|
|
211
|
+
const d = new Date(now.getTime() - i * 86400000).toISOString().slice(0, 10);
|
|
212
|
+
prevCount += agg.byDay[d] || 0;
|
|
213
|
+
}
|
|
214
|
+
const delta = (currentCount || prevCount) ? cell(prevCount, currentCount) : null;
|
|
215
|
+
return { hasData: days.some(d => d[1] > 0), days, currentCount, prevCount, delta };
|
|
216
|
+
}
|
|
170
217
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
218
|
+
// ── Summary ────────────────────────────────────────────────────────────
|
|
219
|
+
|
|
220
|
+
function buildSummary({ specs, archives, hookEvents, rtk }) {
|
|
221
|
+
const lines = [];
|
|
222
|
+
const activeN = specs.active.length;
|
|
223
|
+
const orphanN = specs.orphaned.length;
|
|
224
|
+
const totalSpecs = activeN + orphanN;
|
|
225
|
+
|
|
226
|
+
if (totalSpecs > 0) {
|
|
227
|
+
lines.push(`→ ${totalSpecs} pipeline${totalSpecs === 1 ? '' : 's'} tracked (sidecar) · ${archives.length} archived`);
|
|
228
|
+
} else if (archives.length > 0) {
|
|
229
|
+
lines.push(`→ ${archives.length} archived pipeline${archives.length === 1 ? '' : 's'}`);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (orphanN > 0) {
|
|
233
|
+
lines.push(`⚠ ${orphanN} orphaned state${orphanN === 1 ? '' : 's'} (spec not in active/) — run /mustard:maint`);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Pass@1: percentage of tracked specs with zero hook retries.
|
|
237
|
+
if (totalSpecs > 0) {
|
|
238
|
+
let pass = 0;
|
|
239
|
+
for (const group of [specs.active, specs.orphaned]) {
|
|
240
|
+
for (const s of group) if ((s.metrics.retries || 0) === 0) pass++;
|
|
241
|
+
}
|
|
242
|
+
const pct = Math.round((pass / totalSpecs) * 100);
|
|
243
|
+
const prefix = pct >= 80 ? '✓' : pct >= 50 ? '→' : '⚠';
|
|
244
|
+
lines.push(`${prefix} Pass@1 (hook-level): ${pct}% (${pass}/${totalSpecs} without hook retries)`);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
if (rtk && rtk.saved > 0) {
|
|
248
|
+
lines.push(`✓ RTK savings: ~${Math.round(rtk.saved / 1000)}k tokens (${Math.round(rtk.pct)}%)`);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Top alert: spec with highest retry count on any single phase ≥ 3.
|
|
252
|
+
const alert = findTopAlert([...specs.active, ...specs.orphaned]);
|
|
253
|
+
if (alert) lines.push(`⚠ ${alert}`);
|
|
254
|
+
|
|
255
|
+
return lines;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function findTopAlert(allSpecs) {
|
|
259
|
+
let worst = null;
|
|
260
|
+
for (const s of allSpecs) {
|
|
261
|
+
const attempts = s.metrics.agentAttempts || {};
|
|
262
|
+
for (const [phase, n] of Object.entries(attempts)) {
|
|
263
|
+
if (n >= 3 && (!worst || n > worst.n)) worst = { name: s.name, phase, n };
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
if (!worst) return null;
|
|
267
|
+
return `1 pipeline with ${worst.n} retries in ${worst.phase} (${worst.name})`;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// ── Rendering ──────────────────────────────────────────────────────────
|
|
271
|
+
|
|
272
|
+
function renderSpecs(parts, list, label) {
|
|
273
|
+
// Sort for stable output: newest date prefix first (names begin with YYYY-MM-DD).
|
|
274
|
+
const sorted = list.slice().sort((a, b) => b.name.localeCompare(a.name));
|
|
275
|
+
for (const s of sorted) {
|
|
276
|
+
const m = s.metrics;
|
|
277
|
+
const duration = m.startedAt ? formatDuration(new Date(m.startedAt), new Date(m.updatedAt || Date.now())) : 'unknown';
|
|
278
|
+
parts.push(`## ${label}: ${s.name}`);
|
|
279
|
+
parts.push(`- Duration: ${duration}`);
|
|
280
|
+
parts.push(`- API calls: ${m.apiCalls || 0}`);
|
|
281
|
+
parts.push(`- Hook retries: ${m.retries || 0}`);
|
|
282
|
+
|
|
283
|
+
if (m.toolBreakdown && Object.keys(m.toolBreakdown).length > 0) {
|
|
284
|
+
const top = Object.entries(m.toolBreakdown)
|
|
285
|
+
.sort((a, b) => b[1] - a[1])
|
|
286
|
+
.slice(0, 3)
|
|
287
|
+
.map(([t, n]) => `${t}:${n}`)
|
|
288
|
+
.join(', ');
|
|
289
|
+
parts.push(`- Top tools: ${top}`);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (m.agentAttempts && Object.keys(m.agentAttempts).length > 0) {
|
|
293
|
+
const entries = Object.entries(m.agentAttempts).map(([phase, n]) => {
|
|
294
|
+
const mark = n >= 3 ? ' ⚠' : '';
|
|
295
|
+
return `${phase}:${n}${mark}`;
|
|
296
|
+
});
|
|
297
|
+
parts.push(`- Retries by phase: ${entries.join(', ')}`);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (m.gate_saves !== undefined) parts.push(`- Gate saves: ${m.gate_saves}`);
|
|
301
|
+
if (m.wave_reentry !== undefined) parts.push(`- Wave reentries: ${m.wave_reentry}`);
|
|
302
|
+
|
|
303
|
+
if (m.skillHits && Object.keys(m.skillHits).length > 0) {
|
|
304
|
+
parts.push('- Skill hits:');
|
|
305
|
+
for (const [agent, hits] of Object.entries(m.skillHits).sort()) {
|
|
306
|
+
const pct = hits.loaded > 0 ? Math.round((hits.read / hits.loaded) * 100) + '%' : '—';
|
|
307
|
+
parts.push(` - ${agent}: ${hits.read}/${hits.loaded} (${pct})`);
|
|
191
308
|
}
|
|
192
309
|
}
|
|
310
|
+
|
|
311
|
+
// Pass@1 per agent (heuristic): cross subagent-registry with agentAttempts.
|
|
312
|
+
const pass1 = agentPass1(s);
|
|
313
|
+
if (pass1 && pass1.length > 0) {
|
|
314
|
+
parts.push('- Pass@1 by agent (heuristic):');
|
|
315
|
+
for (const row of pass1) parts.push(` - ${row}`);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (s.isOrphaned) {
|
|
319
|
+
parts.push('- Spec: not in spec/active/ (likely completed without /mustard:complete)');
|
|
320
|
+
}
|
|
321
|
+
parts.push('');
|
|
193
322
|
}
|
|
323
|
+
}
|
|
194
324
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
325
|
+
function agentPass1(spec) {
|
|
326
|
+
const registryPath = path.join(process.cwd(), '.claude', '.subagent-registry.json');
|
|
327
|
+
const registry = readJson(registryPath);
|
|
328
|
+
if (!registry) return null;
|
|
329
|
+
const attempts = spec.metrics.agentAttempts || {};
|
|
330
|
+
const anyRetry = Object.values(attempts).some(n => n > 0);
|
|
331
|
+
const agents = new Set();
|
|
332
|
+
for (const entry of Object.values(registry)) {
|
|
333
|
+
if (entry && entry.agentType) agents.add(entry.agentType);
|
|
334
|
+
}
|
|
335
|
+
if (agents.size === 0) return null;
|
|
336
|
+
const rows = [];
|
|
337
|
+
for (const agent of [...agents].sort()) {
|
|
338
|
+
rows.push(`${agent}: ${anyRetry ? 'advisory (retries present)' : '100%'}`);
|
|
339
|
+
}
|
|
340
|
+
return rows.slice(0, 5);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
function renderArchives(parts, archives) {
|
|
344
|
+
if (archives.length === 0) return;
|
|
345
|
+
parts.push('## Completed Pipelines');
|
|
346
|
+
parts.push('');
|
|
347
|
+
|
|
348
|
+
let totalCalls = 0;
|
|
349
|
+
let totalRetries = 0;
|
|
350
|
+
let totalDurationMs = 0;
|
|
351
|
+
let count = 0;
|
|
352
|
+
const sorted = archives.slice().sort((a, b) => b.name.localeCompare(a.name)).slice(0, 10);
|
|
353
|
+
for (const a of sorted) {
|
|
354
|
+
const m = a.metrics;
|
|
355
|
+
const duration = m.durationMs ? formatMs(m.durationMs) : 'unknown';
|
|
356
|
+
parts.push(`### ${a.name}`);
|
|
357
|
+
parts.push(`- Duration: ${duration}`);
|
|
358
|
+
parts.push(`- API calls: ${m.apiCalls || 0}`);
|
|
359
|
+
parts.push(`- Hook retries: ${m.retries || 0}`);
|
|
360
|
+
if (m.rtkSavings) {
|
|
361
|
+
parts.push(`- RTK savings: ${m.rtkSavings.pct}% (${Math.round((m.rtkSavings.saved || 0) / 1000)}k tokens)`);
|
|
211
362
|
}
|
|
212
|
-
|
|
363
|
+
parts.push('');
|
|
364
|
+
totalCalls += m.apiCalls || 0;
|
|
365
|
+
totalRetries += m.retries || 0;
|
|
366
|
+
totalDurationMs += m.durationMs || 0;
|
|
367
|
+
count++;
|
|
368
|
+
}
|
|
369
|
+
if (count > 0) {
|
|
370
|
+
parts.push(`## Averages (last ${count} pipelines)`);
|
|
371
|
+
parts.push(`- Avg duration: ${formatMs(Math.round(totalDurationMs / count))}`);
|
|
372
|
+
parts.push(`- Avg API calls: ${Math.round(totalCalls / count)}`);
|
|
373
|
+
parts.push(`- Avg hook retries: ${Math.round(totalRetries / count)}`);
|
|
374
|
+
parts.push('');
|
|
375
|
+
}
|
|
213
376
|
|
|
214
|
-
|
|
215
|
-
|
|
377
|
+
// Pass@1 across all archives.
|
|
378
|
+
let pass1Count = 0;
|
|
379
|
+
let retrySum = 0;
|
|
380
|
+
for (const a of archives) {
|
|
381
|
+
if ((a.metrics.retries || 0) === 0) pass1Count++;
|
|
382
|
+
retrySum += a.metrics.retries || 0;
|
|
216
383
|
}
|
|
384
|
+
const pct = Math.round((pass1Count / archives.length) * 100);
|
|
385
|
+
const avg = (retrySum / archives.length).toFixed(1);
|
|
386
|
+
parts.push('## Pass@1 Metrics (archived)');
|
|
387
|
+
parts.push(`- Pass@1 (hook-level): ${pct}% (${pass1Count}/${archives.length} completed with zero hook retries)`);
|
|
388
|
+
parts.push(`- Avg hook retries per pipeline: ${avg}`);
|
|
389
|
+
parts.push('');
|
|
390
|
+
}
|
|
217
391
|
|
|
218
|
-
|
|
219
|
-
|
|
392
|
+
// ── Small helpers ──────────────────────────────────────────────────────
|
|
393
|
+
|
|
394
|
+
function readJson(p) {
|
|
395
|
+
try { return JSON.parse(fs.readFileSync(p, 'utf8')); }
|
|
396
|
+
catch { return null; }
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function safe(fn) {
|
|
400
|
+
try { return fn(); } catch { return null; }
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
function pct(ref, cur) {
|
|
404
|
+
if (ref === 0) return cur === 0 ? '0%' : 'n/a';
|
|
405
|
+
const d = ((cur - ref) / ref) * 100;
|
|
406
|
+
const s = d > 0 ? '+' : '';
|
|
407
|
+
return `${s}${d.toFixed(1)}%`;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function cell(ref, cur) {
|
|
411
|
+
return `${ref}→${cur} (${pct(ref, cur)})`;
|
|
220
412
|
}
|
|
221
413
|
|
|
222
414
|
function formatDuration(start, end) {
|
|
223
|
-
|
|
224
|
-
return formatMs(ms);
|
|
415
|
+
return formatMs(end.getTime() - start.getTime());
|
|
225
416
|
}
|
|
226
417
|
|
|
227
418
|
function formatMs(ms) {
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
const fs = require('fs');
|
|
13
13
|
const path = require('path');
|
|
14
14
|
const { execFileSync } = require('child_process');
|
|
15
|
+
const { getRtkGain } = require('./_rtk-gain.js');
|
|
15
16
|
|
|
16
17
|
const METRICS_DIR = process.env.MUSTARD_METRICS_DIR
|
|
17
18
|
? path.resolve(process.env.MUSTARD_METRICS_DIR)
|
|
@@ -276,71 +277,49 @@ console.log(sep);
|
|
|
276
277
|
console.log(`| **TOTAL** | ${totalCount} | ${totalAffected || '-'} | ${totalSaved || '-'} | - |`);
|
|
277
278
|
|
|
278
279
|
// ── RTK Integration ────────────────────────────────────────────────────
|
|
279
|
-
// Query RTK
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
if (
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
});
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
console.log('');
|
|
300
|
-
console.log('## RTK Token Savings');
|
|
301
|
-
console.log('');
|
|
302
|
-
|
|
303
|
-
if (rtkData.total_saved !== undefined) {
|
|
304
|
-
const totalSaved = rtkData.total_saved || 0;
|
|
305
|
-
const totalOriginal = rtkData.total_original || 0;
|
|
306
|
-
const pct = totalOriginal > 0 ? Math.round((totalSaved / totalOriginal) * 100) : 0;
|
|
307
|
-
console.log(`| Metric | Value |`);
|
|
308
|
-
console.log(`|--------|-------|`);
|
|
309
|
-
console.log(`| Total tokens saved | ${totalSaved.toLocaleString()} |`);
|
|
310
|
-
console.log(`| Total original tokens | ${totalOriginal.toLocaleString()} |`);
|
|
311
|
-
console.log(`| Savings rate | ${pct}% |`);
|
|
312
|
-
console.log(`| Commands rewritten | ${rtkData.total_commands || '-'} |`);
|
|
313
|
-
}
|
|
280
|
+
// Query RTK via shared helper (handles field shape + missing binary).
|
|
281
|
+
const rtk = (() => {
|
|
282
|
+
try { return getRtkGain({ timeout: 5000 }); } catch (_) { return null; }
|
|
283
|
+
})();
|
|
284
|
+
|
|
285
|
+
if (rtk && rtk.saved > 0) {
|
|
286
|
+
console.log('');
|
|
287
|
+
console.log('## RTK Token Savings');
|
|
288
|
+
console.log('');
|
|
289
|
+
console.log(`| Metric | Value |`);
|
|
290
|
+
console.log(`|--------|-------|`);
|
|
291
|
+
console.log(`| Total tokens saved | ${rtk.saved.toLocaleString()} |`);
|
|
292
|
+
if (rtk.originalTotal > 0) {
|
|
293
|
+
console.log(`| Total original tokens | ${rtk.originalTotal.toLocaleString()} |`);
|
|
294
|
+
}
|
|
295
|
+
console.log(`| Savings rate | ${Math.round(rtk.pct)}% |`);
|
|
296
|
+
if (rtk.commands > 0) {
|
|
297
|
+
console.log(`| Commands rewritten | ${rtk.commands} |`);
|
|
298
|
+
}
|
|
314
299
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
console.log(`| ${cmd} | ${saved.toLocaleString()} | ${orig.toLocaleString()} | ${rate} |`);
|
|
328
|
-
}
|
|
300
|
+
if (rtk.byCommand && typeof rtk.byCommand === 'object') {
|
|
301
|
+
const cmds = Object.entries(rtk.byCommand);
|
|
302
|
+
if (cmds.length > 0) {
|
|
303
|
+
console.log('');
|
|
304
|
+
console.log('### By Command');
|
|
305
|
+
console.log('| Command | Saved | Original | Rate |');
|
|
306
|
+
console.log('|---------|-------|----------|------|');
|
|
307
|
+
for (const [cmd, stats] of cmds.sort((a, b) => (b[1].saved || 0) - (a[1].saved || 0)).slice(0, 10)) {
|
|
308
|
+
const saved = stats.saved || 0;
|
|
309
|
+
const orig = stats.original || 0;
|
|
310
|
+
const rate = orig > 0 ? Math.round((saved / orig) * 100) + '%' : '-';
|
|
311
|
+
console.log(`| ${cmd} | ${saved.toLocaleString()} | ${orig.toLocaleString()} | ${rate} |`);
|
|
329
312
|
}
|
|
330
313
|
}
|
|
331
314
|
}
|
|
332
|
-
} catch (_) {
|
|
333
|
-
// RTK not installed or gain command failed — skip section silently
|
|
334
315
|
}
|
|
335
316
|
|
|
336
|
-
// ── Correlation: hook rewrites
|
|
317
|
+
// ── Correlation: hook rewrites (count only; tokens_saved is dropped by design). ──
|
|
337
318
|
if (agg['rtk-rewrite']) {
|
|
338
319
|
const hookRewrites = agg['rtk-rewrite'].count;
|
|
339
|
-
const hookEstimatedSaved = agg['rtk-rewrite'].tokensSaved;
|
|
340
320
|
console.log('');
|
|
341
321
|
console.log('## RTK Hook Activity');
|
|
342
322
|
console.log(`| Metric | Value |`);
|
|
343
323
|
console.log(`|--------|-------|`);
|
|
344
324
|
console.log(`| Commands rewritten by hook | ${hookRewrites} |`);
|
|
345
|
-
console.log(`| Estimated tokens saved | ${hookEstimatedSaved > 0 ? hookEstimatedSaved.toLocaleString() : '-'} |`);
|
|
346
325
|
}
|