clud-bug 0.6.12 → 0.6.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/clud-bug.js CHANGED
@@ -2,7 +2,7 @@
2
2
  import { mkdir, writeFile, readFile } from 'node:fs/promises';
3
3
  import { join, dirname } from 'node:path';
4
4
  import { fileURLToPath } from 'node:url';
5
- import { spawnSync } from 'node:child_process';
5
+ import { spawnSync, spawn } from 'node:child_process';
6
6
  import { createInterface } from 'node:readline/promises';
7
7
  import { stdin as input, stdout as output } from 'node:process';
8
8
 
@@ -18,6 +18,7 @@ import { runUpdate } from '../lib/update.js';
18
18
  import { getPendingWorkflowEdits, makeBranchName, git as gitCmd } from '../lib/edit-workflow.js';
19
19
  import { applyToRepo as applyAgentDocs } from '../lib/agents-md.js';
20
20
  import { detectRepo, detectDefaultBranch, getProtectionState, enableConversationResolution } from '../lib/branch-protection.js';
21
+ import { computeReviewCost, costPerLOC, cacheHitRate, extractTokensFromLog, rollup, formatRollup } from '../lib/usage.js';
21
22
 
22
23
  const PKG_ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
23
24
  const TEMPLATES = join(PKG_ROOT, 'templates');
@@ -28,6 +29,8 @@ function parseArgs(argv) {
28
29
  _: [], offline: false, acceptAll: false, commit: false, help: false, version: false,
29
30
  since: null, changedIn: null, scopes: [], out: null,
30
31
  setProtection: true, quiet: false,
32
+ // 0.0.M.1 (v0.6.13): `clud-bug usage` flags.
33
+ repo: null, pr: null, limit: null, json: false,
31
34
  };
32
35
  for (let i = 0; i < argv.length; i++) {
33
36
  const a = argv[i];
@@ -42,6 +45,10 @@ function parseArgs(argv) {
42
45
  else if (a === '--scope') args.scopes.push(argv[++i]);
43
46
  else if (a === '--out') args.out = argv[++i];
44
47
  else if (a === '--no-set-protection') args.setProtection = false;
48
+ else if (a === '--repo') args.repo = argv[++i];
49
+ else if (a === '--pr') args.pr = Number(argv[++i]);
50
+ else if (a === '--limit') args.limit = Number(argv[++i]);
51
+ else if (a === '--json') args.json = true;
45
52
  else args._.push(a);
46
53
  }
47
54
  return args;
@@ -64,6 +71,11 @@ Commands:
64
71
  templates. Custom and skills.sh-installed specimens left alone.
65
72
  edit-workflow Helper for editing .github/workflows/clud-bug-*.yml in an isolated
66
73
  PR (the action refuses to review PRs that modify its own workflow).
74
+ usage Read recent clud-bug-review run JSON + normalize cost per LOC.
75
+ Internal Q7-clud-bug enforcement dashboard. Reports cache hit
76
+ rate, 30-day rolling \$/LOC trend, per-repo/per-model
77
+ distributions, and outliers (> 2x org median).
78
+ Use --pr / --repo / --since / --limit / --json to filter.
67
79
 
68
80
  Options:
69
81
  --offline Skip skills.sh; pin only the bundled baseline specimens.
@@ -78,6 +90,12 @@ Options:
78
90
  required_conversation_resolution on the default
79
91
  branch (init only). Use for repos that manage
80
92
  branch protection via ruleset or org policy.
93
+ --repo <owner/name> Restrict \`usage\` to a single repo. Default: all repos
94
+ with clud-bug-review.yml in the gh user's auth scope.
95
+ --pr <N> Restrict \`usage\` to a single PR.
96
+ --limit <N> Max reviews to fetch (default 50; the API caps).
97
+ --json Emit JSON instead of human-readable output.
98
+ Compatible with --quiet for pipeline consumption.
81
99
  --since <date> Audit only files changed in commits after <date> (git date string).
82
100
  --changed-in <dur> Audit only files changed in the past <dur>: 7d, 2w, 1mo, 1y. (audit only)
83
101
  --scope <glob> Limit audit to files matching <glob>; repeatable. (audit only)
@@ -107,6 +125,7 @@ async function main() {
107
125
  case 'audit': return runAudit(args);
108
126
  case 'update': return runUpdateCmd(args);
109
127
  case 'edit-workflow': return runEditWorkflow(args);
128
+ case 'usage': return runUsage(args);
110
129
  default:
111
130
  process.stderr.write(`Unknown command: ${cmd || '(none)'}\n\n${HELP}`);
112
131
  process.exit(2);
@@ -707,6 +726,202 @@ async function runAudit(args) {
707
726
  ok(`audit: ${files.length} file${files.length === 1 ? '' : 's'} surveyed; stub at ${rel(cwd, outPath)}`);
708
727
  }
709
728
 
729
+ // 0.0.M.1 (v0.6.13): Q7-clud-bug $/LOC dashboard.
730
+ //
731
+ // Reads recent clud-bug-review run JSON via `gh run list` + per-job logs
732
+ // (which contain the SDK result messages with token counts + model),
733
+ // joins to `gh pr view --json additions,deletions` for the LOC denominator,
734
+ // and reports the rollup. Internal-only — not consumer-facing.
735
+ //
736
+ // Default scope: 30 days, all repos with clud-bug-review.yml in the gh
737
+ // user's auth scope. --repo / --pr / --since / --limit narrow.
738
+ async function runUsage(args) {
739
+ const limit = args.limit ?? 50;
740
+ const since = args.since ?? '30d';
741
+
742
+ // Determine target repos. If --repo specified, just that one. Otherwise
743
+ // discover repos via the local gh user's auth scope (the org's repos we
744
+ // own clud-bug-review on).
745
+ const repos = args.repo
746
+ ? [args.repo]
747
+ : await discoverConsumingRepos();
748
+
749
+ if (repos.length === 0) {
750
+ process.stderr.write(
751
+ 'clud-bug usage: no repos with clud-bug-review.yml found in your gh scope.\n' +
752
+ 'Pass --repo <owner/name> to point at a specific repo.\n'
753
+ );
754
+ process.exit(2);
755
+ }
756
+
757
+ // Per-repo: list recent clud-bug-review runs + extract the per-run job
758
+ // logs + per-PR LOC counts. Filter to PR runs (drop schedule/dispatch).
759
+ // PR #104 fix: --pr filter must be applied AFTER resolvePrNumber
760
+ // (we don't have the PR # until then). prFilter on listRecentRuns was
761
+ // promised but never applied — bug caught by clud-bug self-review.
762
+ const reviews = [];
763
+ for (const repo of repos) {
764
+ const runs = await listRecentRuns(repo, limit, since, args.pr);
765
+ if (process.env.CLUD_BUG_DEBUG) process.stderr.write(`DBG: ${repo} runs=${runs.length}\n`);
766
+ for (const run of runs) {
767
+ const review = await fetchReviewRecord(repo, run);
768
+ if (process.env.CLUD_BUG_DEBUG) process.stderr.write(`DBG: ${run.databaseId} ${run.conclusion} → ${review ? 'OK' : 'NULL'}\n`);
769
+ if (!review) continue;
770
+ // --pr filter: drop reviews whose PR doesn't match.
771
+ if (args.pr != null && review.pr !== args.pr) continue;
772
+ reviews.push(review);
773
+ }
774
+ }
775
+
776
+ if (reviews.length === 0) {
777
+ process.stderr.write(
778
+ `clud-bug usage: no clud-bug-review runs found in scope.\n` +
779
+ ` scope: ${repos.length} repo${repos.length === 1 ? '' : 's'}, last ${since}, limit ${limit}.\n`
780
+ );
781
+ process.exit(2);
782
+ }
783
+
784
+ const summary = rollup(reviews);
785
+ process.stdout.write(formatRollup(summary, { json: args.json }));
786
+ if (!args.json) {
787
+ ok(`usage: ${reviews.length} review${reviews.length === 1 ? '' : 's'} across ${repos.length} repo${repos.length === 1 ? '' : 's'}`);
788
+ }
789
+ }
790
+
791
+ // `gh repo list` won't filter by workflow file content, so we iterate
792
+ // repos the user has access to and probe for clud-bug-review.yml. We
793
+ // limit to 100 to avoid pagination explosions.
794
+ async function discoverConsumingRepos() {
795
+ const list = await ghJson(['repo', 'list', '--limit', '100', '--json', 'nameWithOwner']);
796
+ if (!Array.isArray(list)) return [];
797
+ const owners = list.map((e) => e.nameWithOwner);
798
+ const found = [];
799
+ for (const ownerRepo of owners) {
800
+ const probe = await gh(['api', `repos/${ownerRepo}/contents/.github/workflows/clud-bug-review.yml`, '-q', '.size']);
801
+ if (probe.code === 0 && probe.stdout.trim().length > 0) {
802
+ found.push(ownerRepo);
803
+ }
804
+ }
805
+ return found;
806
+ }
807
+
808
+ // List recent clud-bug-review.yml runs in a repo. Filters to PR events
809
+ // (drops schedule, workflow_dispatch — those have no PR LOC denominator).
810
+ //
811
+ // IMPORTANT (Q7 measurement integrity, fixed during PR #104 review):
812
+ // We INCLUDE conclusion === 'failure' runs because Anthropic bills for
813
+ // tokens regardless of GitHub workflow conclusion. A run that hit the
814
+ // spend cap, errored mid-action, or failed strict-mode still incurred
815
+ // real API cost — silently excluding it would underreport spend and
816
+ // fool the Q7-clud-bug "gradient must point down" gate.
817
+ // extractTokensFromLog() returns ok:false on logs without usable token
818
+ // totals, which gracefully skips the cancelled/errored-too-early case
819
+ // without losing accountability for the partially-billed runs.
820
+ async function listRecentRuns(repo, limit, since, prFilter) {
821
+ const sinceDate = since.match(/^\d+[dwmy]$/) ? dateAgo(since) : null;
822
+ const args = [
823
+ 'run', 'list', '-R', repo,
824
+ '--workflow', 'clud-bug-review.yml',
825
+ '--limit', String(limit),
826
+ '--json', 'databaseId,headSha,createdAt,event,status,conclusion',
827
+ ];
828
+ if (sinceDate) args.push('--created', `>=${sinceDate}`);
829
+ const runs = await ghJson(args);
830
+ if (!Array.isArray(runs)) return [];
831
+ return runs
832
+ .filter((r) => r.event === 'pull_request' && (r.conclusion === 'success' || r.conclusion === 'failure'))
833
+ .map((r) => ({ ...r, repo }))
834
+ .slice(0, limit);
835
+ }
836
+
837
+ async function fetchReviewRecord(repo, run) {
838
+ // Find the clud-bug-review JOB id within the run.
839
+ const jobs = await ghJson(['api', `repos/${repo}/actions/runs/${run.databaseId}/jobs`, '-q', '.jobs']);
840
+ if (!Array.isArray(jobs)) return null;
841
+ const job = jobs.find((j) => j.name === 'clud-bug-review');
842
+ if (!job) return null;
843
+
844
+ // Fetch the job's log dump. May be large.
845
+ const logs = await gh(['api', `repos/${repo}/actions/jobs/${job.id}/logs`]);
846
+ if (logs.code !== 0) return null;
847
+
848
+ // Extract tokens + model from the SDK result-message JSON in the log.
849
+ const extracted = extractTokensFromLog(logs.stdout);
850
+ if (!extracted.ok) return null;
851
+
852
+ // Resolve the PR number from the run's pull_requests array or by SHA.
853
+ const prNumber = await resolvePrNumber(repo, run);
854
+ if (!prNumber) return null;
855
+
856
+ // Pull LOC denominator from the PR.
857
+ const prMeta = await ghJson(['pr', 'view', String(prNumber), '-R', repo, '--json', 'additions,deletions,number']);
858
+ if (!prMeta || typeof prMeta.additions !== 'number') return null;
859
+
860
+ const tokens = extracted.tokens;
861
+ const model = extracted.model;
862
+ const costInfo = computeReviewCost(tokens, model);
863
+ return {
864
+ repo,
865
+ pr: prNumber,
866
+ createdAt: run.createdAt,
867
+ model: costInfo.model, // normalized (PRICING key)
868
+ modelObserved: model, // raw value from log (may be versioned)
869
+ unknownModel: costInfo.unknownModel, // PR #104 fix: surface for dashboard warn
870
+ tokens,
871
+ additions: prMeta.additions,
872
+ deletions: prMeta.deletions,
873
+ cost: costInfo.total,
874
+ costPerLOC: costPerLOC(costInfo.total, prMeta.additions, prMeta.deletions),
875
+ cacheRate: cacheHitRate(tokens),
876
+ };
877
+ }
878
+
879
+ async function resolvePrNumber(repo, run) {
880
+ // gh's run JSON sometimes carries a `pull_requests` array; if not (or
881
+ // if it's empty because the PR has been merged), look up via the
882
+ // commits/{sha}/pulls endpoint, which includes merged/closed PRs.
883
+ const detail = await ghJson(['api', `repos/${repo}/actions/runs/${run.databaseId}`, '-q', '.pull_requests']);
884
+ if (Array.isArray(detail) && detail[0]?.number) return detail[0].number;
885
+ // commits/{sha}/pulls returns PRs that contain the commit — works for
886
+ // open AND merged/closed PRs. The default `gh pr list -S <sha>` does
887
+ // not search closed PRs and silently returns empty for the merged
888
+ // case, which made every $/LOC lookup fail on historical PRs.
889
+ const pulls = await ghJson(['api', `repos/${repo}/commits/${run.headSha}/pulls`, '-q', '[.[].number]']);
890
+ if (Array.isArray(pulls) && pulls.length > 0) return pulls[0];
891
+ return null;
892
+ }
893
+
894
+ function dateAgo(spec) {
895
+ // spec like "30d", "2w", "1m", "1y" → ISO date N units ago.
896
+ const m = spec.match(/^(\d+)([dwmy])$/);
897
+ if (!m) return null;
898
+ const n = Number(m[1]);
899
+ const unit = m[2];
900
+ const day = 24 * 60 * 60 * 1000;
901
+ const ms = n * (unit === 'd' ? day : unit === 'w' ? 7 * day : unit === 'm' ? 30 * day : 365 * day);
902
+ return new Date(Date.now() - ms).toISOString().slice(0, 10);
903
+ }
904
+
905
+ // gh helpers (reuse pattern from lib/branch-protection.js so callers can
906
+ // stub `gh` in tests if they want — but for now spawn directly).
907
+ function gh(args) {
908
+ return new Promise((resolve) => {
909
+ const child = spawn('gh', args, { stdio: ['ignore', 'pipe', 'pipe'] });
910
+ let stdout = '';
911
+ let stderr = '';
912
+ child.stdout.on('data', (d) => { stdout += d; });
913
+ child.stderr.on('data', (d) => { stderr += d; });
914
+ child.on('error', () => resolve({ code: 1, stdout: '', stderr: 'gh not on PATH' }));
915
+ child.on('close', (code) => resolve({ code, stdout, stderr }));
916
+ });
917
+ }
918
+
919
+ async function ghJson(args) {
920
+ const { code, stdout } = await gh(args);
921
+ if (code !== 0) return null;
922
+ try { return JSON.parse(stdout); } catch { return null; }
923
+ }
924
+
710
925
  function rel(from, to) {
711
926
  return to.startsWith(from + '/') ? to.slice(from.length + 1) : to;
712
927
  }
package/lib/usage.js ADDED
@@ -0,0 +1,401 @@
1
+ // lib/usage.js — Q7-clud-bug $/LOC compute.
2
+ //
3
+ // Pure functions, no I/O. Driven from bin/clud-bug.js which fetches workflow
4
+ // run JSON + PR metadata via gh CLI. Implementation of the 0.0.M.1 dashboard
5
+ // per the Phase 0.5 plan.
6
+ //
7
+ // Reads:
8
+ // - clud-bug-review job logs (via `gh api .../jobs/<id>/logs`), which
9
+ // contain the SDK's `result` messages including:
10
+ // "model": "claude-sonnet-4-6"
11
+ // "input_tokens": N
12
+ // "output_tokens": N
13
+ // "cache_read_input_tokens": N
14
+ // "cache_creation_input_tokens": N
15
+ // - `gh pr view --json additions,deletions` for the LOC denominator.
16
+ //
17
+ // Computes:
18
+ // $/LOC = total_cost(tokens, model) / (additions + deletions)
19
+ //
20
+ // Q7-clud-bug enforcement: dashboard reports the 30-day rolling trend; the
21
+ // next Phase 0.5 PR ships when the trend stops declining.
22
+
23
+ // Anthropic pricing as of 2026-05 (per MTok). Cache write is 1.25× input
24
+ // per Anthropic's published 5-min-TTL ephemeral cache rate.
25
+ export const PRICING = {
26
+ 'claude-sonnet-4-6': {
27
+ input: 3.0, output: 15.0, cacheRead: 0.30, cacheWrite: 3.75,
28
+ },
29
+ 'claude-haiku-4-5-20251001': {
30
+ input: 0.80, output: 4.0, cacheRead: 0.08, cacheWrite: 1.0,
31
+ },
32
+ 'claude-opus-4-7': {
33
+ input: 15.0, output: 75.0, cacheRead: 1.50, cacheWrite: 18.75,
34
+ },
35
+ };
36
+
37
+ // Fallback when the model field is missing or new. Use Sonnet pricing —
38
+ // conservative for unknown-but-likely-Sonnet, undercounts Opus until we
39
+ // update the table. The `unknown` flag in the result lets callers warn.
40
+ const DEFAULT_MODEL = 'claude-sonnet-4-6';
41
+
42
+ /**
43
+ * Compute the USD cost of a single clud-bug review from token counts +
44
+ * model. All four token classes are billed independently.
45
+ *
46
+ * Returns:
47
+ * {
48
+ * total: number USD,
49
+ * parts: { input, output, cacheRead, cacheWrite } USD breakdown,
50
+ * model: string (normalized),
51
+ * unknownModel: boolean (true if we used DEFAULT_MODEL pricing),
52
+ * }
53
+ */
54
+ export function computeReviewCost(tokens, model) {
55
+ const t = {
56
+ input: tokens.input_tokens || 0,
57
+ output: tokens.output_tokens || 0,
58
+ cacheRead: tokens.cache_read_input_tokens || 0,
59
+ cacheWrite: tokens.cache_creation_input_tokens || 0,
60
+ };
61
+ const normalized = model && PRICING[model] ? model : DEFAULT_MODEL;
62
+ const p = PRICING[normalized];
63
+ const parts = {
64
+ input: (t.input / 1e6) * p.input,
65
+ output: (t.output / 1e6) * p.output,
66
+ cacheRead: (t.cacheRead / 1e6) * p.cacheRead,
67
+ cacheWrite: (t.cacheWrite / 1e6) * p.cacheWrite,
68
+ };
69
+ const total = parts.input + parts.output + parts.cacheRead + parts.cacheWrite;
70
+ return {
71
+ total,
72
+ parts,
73
+ model: normalized,
74
+ unknownModel: !(model && PRICING[model]),
75
+ };
76
+ }
77
+
78
+ /**
79
+ * $/LOC for a single review. PR size denominator is additions + deletions
80
+ * — the same metric `gh pr view --json additions,deletions` returns.
81
+ *
82
+ * Returns 0 if additions + deletions === 0 (avoid div-by-zero on
83
+ * docs-only / empty PRs); callers can filter zero-LOC reviews out of
84
+ * trend lines as outliers.
85
+ */
86
+ export function costPerLOC(cost, additions, deletions) {
87
+ const loc = (additions || 0) + (deletions || 0);
88
+ if (loc === 0) return 0;
89
+ return cost / loc;
90
+ }
91
+
92
+ /**
93
+ * Cache hit rate: cached_read / (cached_read + creation + input).
94
+ * Cached creation is the cost of WRITING new entries (paid 1.25× per
95
+ * Anthropic); cached read is what we get back at 10% of input price.
96
+ * High hit rate proves the v0.6.3 caching layer is firing on
97
+ * re-reviews and fix-pushes.
98
+ */
99
+ export function cacheHitRate(tokens) {
100
+ const read = tokens.cache_read_input_tokens || 0;
101
+ const write = tokens.cache_creation_input_tokens || 0;
102
+ const input = tokens.input_tokens || 0;
103
+ const denom = read + write + input;
104
+ if (denom === 0) return 0;
105
+ return read / denom;
106
+ }
107
+
108
+ /**
109
+ * Parse the model + token counts from a clud-bug-review job log dump.
110
+ *
111
+ * PR #104 fix (token double-count): the SDK's stream-output emits a
112
+ * `"type": "result"` event at the end of a review with a CUMULATIVE
113
+ * `usage` block. It ALSO emits per-turn `"type": "assistant"` events
114
+ * (each with its own usage), AND the result event's usage contains an
115
+ * `iterations` array of per-message breakdowns. Naively summing every
116
+ * `"input_tokens"` occurrence in the log would triple-or-more count
117
+ * the same tokens.
118
+ *
119
+ * Right approach: locate the FINAL `"type": "result"` event and extract
120
+ * the FIRST `"usage": {`-block within it. That's the cumulative bill,
121
+ * the same number Anthropic charges. If no result event exists, the
122
+ * review didn't complete successfully — return ok:false so the caller
123
+ * skips this run rather than trusting partial token data.
124
+ *
125
+ * Returns:
126
+ * {
127
+ * model: string | null,
128
+ * tokens: { input, output, cacheRead, cacheWrite } | null,
129
+ * ok: boolean (false if no result event — partial / errored job),
130
+ * }
131
+ */
132
+ export function extractTokensFromLog(logText) {
133
+ if (typeof logText !== 'string' || logText.length === 0) {
134
+ return { model: null, tokens: null, ok: false };
135
+ }
136
+
137
+ // The LAST model field — final message wins (a multi-turn review
138
+ // uses the same model throughout). Captured before the usage parse
139
+ // so model is reported even when we can't find a result event.
140
+ const modelMatches = [...logText.matchAll(/"model"\s*:\s*"([^"]+)"/g)];
141
+ const model = modelMatches.length > 0
142
+ ? modelMatches[modelMatches.length - 1][1]
143
+ : null;
144
+
145
+ // Locate the final result event. There may be multiple over the
146
+ // life of a long-running session — take the LAST one.
147
+ const resultMarkerRe = /"type"\s*:\s*"result"/g;
148
+ let lastResultIdx = -1;
149
+ let m;
150
+ while ((m = resultMarkerRe.exec(logText)) !== null) {
151
+ lastResultIdx = m.index;
152
+ }
153
+
154
+ if (lastResultIdx < 0) {
155
+ // No result event — partial log or job that errored before
156
+ // emitting one. Don't sum the per-turn fields; the data isn't
157
+ // billable-equivalent.
158
+ return { model, tokens: null, ok: false };
159
+ }
160
+
161
+ // Within the result event, find the first `"usage": {` and extract
162
+ // its top-level token fields. We scope each field's regex to a window
163
+ // starting at the usage block so we don't pick up the `iterations`
164
+ // array's per-message fields (which are nested deeper but still
165
+ // appear within the same overall result block).
166
+ const fromResult = logText.slice(lastResultIdx);
167
+ const usageIdx = fromResult.search(/"usage"\s*:\s*\{/);
168
+ if (usageIdx < 0) {
169
+ return { model, tokens: null, ok: false };
170
+ }
171
+ // Slice up to the start of `"iterations"` (if present) so we don't
172
+ // double-count per-iteration breakdowns nested inside usage.
173
+ const fromUsage = fromResult.slice(usageIdx);
174
+ const iterationsIdx = fromUsage.search(/"iterations"\s*:/);
175
+ const usageOnly = iterationsIdx >= 0
176
+ ? fromUsage.slice(0, iterationsIdx)
177
+ : fromUsage;
178
+
179
+ const pluck = (re) => {
180
+ const match = usageOnly.match(re);
181
+ return match ? Number(match[1]) : 0;
182
+ };
183
+
184
+ const input = pluck(/"input_tokens"\s*:\s*(\d+)/);
185
+ const output = pluck(/"output_tokens"\s*:\s*(\d+)/);
186
+ const cacheRead = pluck(/"cache_read_input_tokens"\s*:\s*(\d+)/);
187
+ const cacheWrite = pluck(/"cache_creation_input_tokens"\s*:\s*(\d+)/);
188
+
189
+ const anyTokens = input + output + cacheRead + cacheWrite;
190
+ if (anyTokens === 0) {
191
+ return { model, tokens: null, ok: false };
192
+ }
193
+
194
+ return {
195
+ model,
196
+ tokens: {
197
+ input_tokens: input,
198
+ output_tokens: output,
199
+ cache_read_input_tokens: cacheRead,
200
+ cache_creation_input_tokens: cacheWrite,
201
+ },
202
+ ok: true,
203
+ };
204
+ }
205
+
206
+ /**
207
+ * Roll up an array of per-review records into a structured summary.
208
+ *
209
+ * Each review record:
210
+ * {
211
+ * repo: "owner/name",
212
+ * pr: number,
213
+ * createdAt: ISO 8601,
214
+ * model: string,
215
+ * tokens: { ... },
216
+ * additions: number,
217
+ * deletions: number,
218
+ * cost: number (USD, total),
219
+ * costPerLOC: number,
220
+ * cacheRate: number (0..1),
221
+ * }
222
+ *
223
+ * Returns:
224
+ * {
225
+ * total: { reviews, cost, loc, costPerLOC (median), cacheRate (median) },
226
+ * perRepo: { [repo]: { ... } },
227
+ * perModel: { [model]: { ... } },
228
+ * trend30d: { dailyMedians: [...], slopePct (MoM) },
229
+ * outliers: [{ review, severity }],
230
+ * }
231
+ *
232
+ * Pre-conditions: callers should drop zero-LOC reviews before passing in.
233
+ */
234
+ export function rollup(reviews) {
235
+ const valid = reviews.filter((r) => r.costPerLOC > 0);
236
+
237
+ const total = {
238
+ reviews: valid.length,
239
+ cost: valid.reduce((a, r) => a + r.cost, 0),
240
+ loc: valid.reduce((a, r) => a + (r.additions + r.deletions), 0),
241
+ costPerLOC: median(valid.map((r) => r.costPerLOC)),
242
+ cacheRate: median(valid.map((r) => r.cacheRate)),
243
+ };
244
+
245
+ const groupBy = (key) => {
246
+ const out = {};
247
+ for (const r of valid) {
248
+ const k = r[key];
249
+ if (!out[k]) out[k] = { reviews: 0, cost: 0, loc: 0, costPerLOCs: [], cacheRates: [] };
250
+ out[k].reviews += 1;
251
+ out[k].cost += r.cost;
252
+ out[k].loc += r.additions + r.deletions;
253
+ out[k].costPerLOCs.push(r.costPerLOC);
254
+ out[k].cacheRates.push(r.cacheRate);
255
+ }
256
+ for (const k of Object.keys(out)) {
257
+ out[k].costPerLOC = median(out[k].costPerLOCs);
258
+ out[k].cacheRate = median(out[k].cacheRates);
259
+ delete out[k].costPerLOCs;
260
+ delete out[k].cacheRates;
261
+ }
262
+ return out;
263
+ };
264
+
265
+ const perRepo = groupBy('repo');
266
+ const perModel = groupBy('model');
267
+
268
+ // Outliers: > 2× total.costPerLOC.
269
+ const outliers = valid
270
+ .filter((r) => r.costPerLOC > total.costPerLOC * 2)
271
+ .map((r) => ({
272
+ repo: r.repo, pr: r.pr,
273
+ costPerLOC: r.costPerLOC,
274
+ multiple: r.costPerLOC / total.costPerLOC,
275
+ cost: r.cost,
276
+ reason: r.cacheRate < 0.3 ? 'low cache hit' : 'unknown',
277
+ }));
278
+
279
+ // 30-day trend: median $/LOC per calendar day. Bucket by createdAt date.
280
+ // Slope reported as MoM % change between the most recent 30-day window's
281
+ // median and the previous 30-day window's median.
282
+ const trend30d = computeTrend(valid);
283
+
284
+ // PR #104 fix: surface reviews whose model wasn't in PRICING. The
285
+ // computeReviewCost fallback applied Sonnet rates to unknown models
286
+ // (~5× undercount of Opus), AND bucketed them under Sonnet in the
287
+ // per-model table — exactly the false-good signal Q7 must NOT produce.
288
+ // Caller renders this as a loud warning so the dashboard reader knows
289
+ // to update the PRICING table.
290
+ const unknownModelReviews = valid
291
+ .filter((r) => r.unknownModel === true)
292
+ .map((r) => ({ repo: r.repo, pr: r.pr, modelObserved: r.modelObserved }));
293
+
294
+ return { total, perRepo, perModel, trend30d, outliers, unknownModelReviews };
295
+ }
296
+
297
+ function median(nums) {
298
+ if (nums.length === 0) return 0;
299
+ const sorted = [...nums].sort((a, b) => a - b);
300
+ const mid = Math.floor(sorted.length / 2);
301
+ return sorted.length % 2 === 0
302
+ ? (sorted[mid - 1] + sorted[mid]) / 2
303
+ : sorted[mid];
304
+ }
305
+
306
+ function computeTrend(reviews) {
307
+ // PR #104 fix: distinguish "no prior window" (previous bucket empty)
308
+ // from "exactly flat trend" (current === previous > 0). The original
309
+ // code returned slopePct=0 for both, which masked the dangerous case
310
+ // — a stable expensive month-over-month trend rendered as if there
311
+ // were no comparison data, hiding the very signal Q7 enforces.
312
+ // `previous: null` now means "no prior window"; renderer keys on this.
313
+ if (reviews.length === 0) {
314
+ return { current: 0, previous: null, slopePct: null };
315
+ }
316
+ const now = Date.now();
317
+ const day = 24 * 60 * 60 * 1000;
318
+ const currentWindow = reviews.filter((r) => now - new Date(r.createdAt).getTime() <= 30 * day);
319
+ const previousWindow = reviews.filter((r) => {
320
+ const age = now - new Date(r.createdAt).getTime();
321
+ return age > 30 * day && age <= 60 * day;
322
+ });
323
+ const current = median(currentWindow.map((r) => r.costPerLOC));
324
+ if (previousWindow.length === 0) {
325
+ return { current, previous: null, slopePct: null };
326
+ }
327
+ const previous = median(previousWindow.map((r) => r.costPerLOC));
328
+ // previous > 0 because every review in valid[] has costPerLOC > 0
329
+ // (zero-LOC reviews dropped upstream).
330
+ const slopePct = previous > 0 ? ((current - previous) / previous) * 100 : null;
331
+ return { current, previous, slopePct };
332
+ }
333
+
334
+ /**
335
+ * Render the rollup as a human-readable table. Mirrors the sample output
336
+ * from the Phase 0.5 plan.
337
+ *
338
+ * Pass `{ json: true }` for the machine-readable form (the same data
339
+ * the rollup() function returns).
340
+ */
341
+ export function formatRollup(rollup, opts = {}) {
342
+ if (opts.json) {
343
+ return JSON.stringify(rollup, null, 2);
344
+ }
345
+ const lines = [];
346
+ const t = rollup.total;
347
+ const trend = rollup.trend30d;
348
+ // PR #104 fix: null slopePct = "no prior window" (prior 30d bucket
349
+ // was empty). A REAL 0% slope (flat trend) renders as "→ 0% MoM",
350
+ // not as "(no prior window)" — masking the latter was the bug.
351
+ let trendStr;
352
+ if (trend.slopePct === null) {
353
+ trendStr = '(no prior window)';
354
+ } else {
355
+ const trendArrow = trend.slopePct < 0 ? '↓' : trend.slopePct > 0 ? '↑' : '→';
356
+ trendStr = `${trendArrow} ${trend.slopePct.toFixed(0)}% MoM`;
357
+ }
358
+ lines.push(`ok: ${t.reviews} reviews, 30-day $/LOC trend: ${trendStr}`);
359
+
360
+ const perRepoEntries = Object.entries(rollup.perRepo)
361
+ .sort((a, b) => b[1].costPerLOC - a[1].costPerLOC);
362
+ if (perRepoEntries.length > 0) {
363
+ lines.push(' per-repo $/LOC (most → least expensive):');
364
+ for (const [repo, stats] of perRepoEntries) {
365
+ const cache = `${(stats.cacheRate * 100).toFixed(0)}% cached`;
366
+ lines.push(
367
+ ` ${repo.padEnd(28)} ${`$${stats.costPerLOC.toFixed(4)}/LOC`.padEnd(16)} · ${String(stats.reviews).padStart(2)} reviews · ${cache}`
368
+ );
369
+ }
370
+ }
371
+
372
+ lines.push(
373
+ ` org median $/LOC: $${t.costPerLOC.toFixed(4)} · org cache hit: ${(t.cacheRate * 100).toFixed(0)}%`
374
+ );
375
+ lines.push(` total spend: $${t.cost.toFixed(2)} across ${(t.loc).toLocaleString()} LOC`);
376
+
377
+ if (rollup.outliers.length > 0) {
378
+ lines.push(` outliers (>2× median):`);
379
+ for (const o of rollup.outliers) {
380
+ lines.push(
381
+ ` ${o.repo}#${o.pr} ($${o.costPerLOC.toFixed(4)}/LOC, ${o.multiple.toFixed(1)}× median — ${o.reason})`
382
+ );
383
+ }
384
+ }
385
+
386
+ // PR #104 fix: loud warning when one or more reviews used a model
387
+ // not in PRICING (we fell back to Sonnet rates — that can undercount
388
+ // by ~5× if the real model was an Opus variant). Update PRICING and
389
+ // re-run.
390
+ if (rollup.unknownModelReviews && rollup.unknownModelReviews.length > 0) {
391
+ lines.push(
392
+ ` ⚠️ ${rollup.unknownModelReviews.length} review${rollup.unknownModelReviews.length === 1 ? '' : 's'} used model${rollup.unknownModelReviews.length === 1 ? '' : 's'} not in PRICING; cost may be undercounted:`
393
+ );
394
+ const observed = new Set(rollup.unknownModelReviews.map((u) => u.modelObserved));
395
+ for (const m of observed) {
396
+ lines.push(` seen: "${m}" — add to lib/usage.js PRICING table`);
397
+ }
398
+ }
399
+
400
+ return lines.join('\n') + '\n';
401
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clud-bug",
3
- "version": "0.6.12",
3
+ "version": "0.6.14",
4
4
  "description": "Skill-driven Claude PR review. Ship a brand-voice skill, get brand reviews. Each finding cites the skill that motivated it. CLI installs the workflow + a baseline kit; add more from skills.sh.",
5
5
  "homepage": "https://cludbug.dev",
6
6
  "bugs": "https://github.com/thrillmade/clud-bug/issues",
@@ -6,7 +6,40 @@ on:
6
6
  types: [opened, synchronize]
7
7
 
8
8
  jobs:
9
+ # Pre-flight (v0.6.14 / 0.0.W) — see workflow.yml.tmpl for design notes.
10
+ paths-check:
11
+ runs-on: ubuntu-latest
12
+ permissions:
13
+ contents: read
14
+ pull-requests: read
15
+ outputs:
16
+ is_workflow_only: ${{ steps.classify.outputs.is_workflow_only }}
17
+ steps:
18
+ - name: Classify PR diff
19
+ id: classify
20
+ env:
21
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22
+ PR_NUMBER: ${{ github.event.pull_request.number }}
23
+ REPO: ${{ github.repository }}
24
+ run: |
25
+ CHANGED=$(gh pr diff "$PR_NUMBER" -R "$REPO" --name-only)
26
+ if [ -z "$CHANGED" ]; then echo "is_workflow_only=false" >> "$GITHUB_OUTPUT"; exit 0; fi
27
+ IS_WORKFLOW_ONLY=true
28
+ while IFS= read -r f; do
29
+ case "$f" in
30
+ .github/workflows/clud-bug-*.yml) ;;
31
+ .github/actions/strict-mode-gate/*) ;;
32
+ *) IS_WORKFLOW_ONLY=false; break ;;
33
+ esac
34
+ done <<< "$CHANGED"
35
+ echo "is_workflow_only=$IS_WORKFLOW_ONLY" >> "$GITHUB_OUTPUT"
36
+ if [ "$IS_WORKFLOW_ONLY" = "true" ]; then
37
+ echo "::notice title=Clud Bug 🐛::Skipping LLM review — workflow-only PR."
38
+ fi
39
+
9
40
  clud-bug-review:
41
+ needs: paths-check
42
+ if: needs.paths-check.outputs.is_workflow_only != 'true'
10
43
  runs-on: ubuntu-latest
11
44
  permissions:
12
45
  contents: read
@@ -85,6 +118,6 @@ jobs:
85
118
  # Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
86
119
  - name: Strict mode — fail check on critical findings
87
120
  if: success()
88
- uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.12
121
+ uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.14
89
122
  with:
90
123
  github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -6,7 +6,40 @@ on:
6
6
  types: [opened, synchronize]
7
7
 
8
8
  jobs:
9
+ # Pre-flight (v0.6.14 / 0.0.W) — see workflow.yml.tmpl for design notes.
10
+ paths-check:
11
+ runs-on: ubuntu-latest
12
+ permissions:
13
+ contents: read
14
+ pull-requests: read
15
+ outputs:
16
+ is_workflow_only: ${{ steps.classify.outputs.is_workflow_only }}
17
+ steps:
18
+ - name: Classify PR diff
19
+ id: classify
20
+ env:
21
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22
+ PR_NUMBER: ${{ github.event.pull_request.number }}
23
+ REPO: ${{ github.repository }}
24
+ run: |
25
+ CHANGED=$(gh pr diff "$PR_NUMBER" -R "$REPO" --name-only)
26
+ if [ -z "$CHANGED" ]; then echo "is_workflow_only=false" >> "$GITHUB_OUTPUT"; exit 0; fi
27
+ IS_WORKFLOW_ONLY=true
28
+ while IFS= read -r f; do
29
+ case "$f" in
30
+ .github/workflows/clud-bug-*.yml) ;;
31
+ .github/actions/strict-mode-gate/*) ;;
32
+ *) IS_WORKFLOW_ONLY=false; break ;;
33
+ esac
34
+ done <<< "$CHANGED"
35
+ echo "is_workflow_only=$IS_WORKFLOW_ONLY" >> "$GITHUB_OUTPUT"
36
+ if [ "$IS_WORKFLOW_ONLY" = "true" ]; then
37
+ echo "::notice title=Clud Bug 🐛::Skipping LLM review — workflow-only PR."
38
+ fi
39
+
9
40
  clud-bug-review:
41
+ needs: paths-check
42
+ if: needs.paths-check.outputs.is_workflow_only != 'true'
10
43
  runs-on: ubuntu-latest
11
44
  permissions:
12
45
  contents: read
@@ -85,6 +118,6 @@ jobs:
85
118
  # Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
86
119
  - name: Strict mode — fail check on critical findings
87
120
  if: success()
88
- uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.12
121
+ uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.14
89
122
  with:
90
123
  github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -6,7 +6,51 @@ on:
6
6
  types: [opened, synchronize]
7
7
 
8
8
  jobs:
9
+ # Pre-flight (v0.6.14 / 0.0.W): if the PR ONLY touches clud-bug workflow
10
+ # files or the strict-mode-gate composite action, skip the LLM review
11
+ # entirely. claude-code-action would refuse to run on such a PR anyway
12
+ # (self-modification guard — required for security), and a template
13
+ # re-render has no useful review surface. Skipping turns a previously
14
+ # required-admin-bypass merge into a normal one, AND saves the LLM
15
+ # call cost. Security: the classifier requires ALL changed files to
16
+ # match the allow-list — a mixed PR (workflow + code) still runs the
17
+ # review normally.
18
+ paths-check:
19
+ runs-on: ubuntu-latest
20
+ permissions:
21
+ contents: read
22
+ pull-requests: read
23
+ outputs:
24
+ is_workflow_only: ${{ steps.classify.outputs.is_workflow_only }}
25
+ steps:
26
+ - name: Classify PR diff
27
+ id: classify
28
+ env:
29
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
30
+ PR_NUMBER: ${{ github.event.pull_request.number }}
31
+ REPO: ${{ github.repository }}
32
+ run: |
33
+ CHANGED=$(gh pr diff "$PR_NUMBER" -R "$REPO" --name-only)
34
+ if [ -z "$CHANGED" ]; then
35
+ echo "is_workflow_only=false" >> "$GITHUB_OUTPUT"
36
+ exit 0
37
+ fi
38
+ IS_WORKFLOW_ONLY=true
39
+ while IFS= read -r f; do
40
+ case "$f" in
41
+ .github/workflows/clud-bug-*.yml) ;;
42
+ .github/actions/strict-mode-gate/*) ;;
43
+ *) IS_WORKFLOW_ONLY=false; break ;;
44
+ esac
45
+ done <<< "$CHANGED"
46
+ echo "is_workflow_only=$IS_WORKFLOW_ONLY" >> "$GITHUB_OUTPUT"
47
+ if [ "$IS_WORKFLOW_ONLY" = "true" ]; then
48
+ echo "::notice title=Clud Bug 🐛::Skipping LLM review — PR only touches workflow files (claude-code-action would refuse anyway due to self-modification guard)."
49
+ fi
50
+
9
51
  clud-bug-review:
52
+ needs: paths-check
53
+ if: needs.paths-check.outputs.is_workflow_only != 'true'
10
54
  runs-on: ubuntu-latest
11
55
  permissions:
12
56
  contents: read
@@ -145,6 +189,6 @@ jobs:
145
189
  # Letting the action's own failure fail the check is louder and right.
146
190
  - name: Strict mode — fail check on critical findings
147
191
  if: success()
148
- uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.12
192
+ uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.14
149
193
  with:
150
194
  github-token: ${{ secrets.GITHUB_TOKEN }}