clud-bug 0.6.12 → 0.6.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/clud-bug.js +216 -1
- package/lib/usage.js +401 -0
- package/package.json +1 -1
- package/templates/workflow-py.yml.tmpl +34 -1
- package/templates/workflow-ts.yml.tmpl +34 -1
- package/templates/workflow.yml.tmpl +45 -1
package/bin/clud-bug.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import { mkdir, writeFile, readFile } from 'node:fs/promises';
|
|
3
3
|
import { join, dirname } from 'node:path';
|
|
4
4
|
import { fileURLToPath } from 'node:url';
|
|
5
|
-
import { spawnSync } from 'node:child_process';
|
|
5
|
+
import { spawnSync, spawn } from 'node:child_process';
|
|
6
6
|
import { createInterface } from 'node:readline/promises';
|
|
7
7
|
import { stdin as input, stdout as output } from 'node:process';
|
|
8
8
|
|
|
@@ -18,6 +18,7 @@ import { runUpdate } from '../lib/update.js';
|
|
|
18
18
|
import { getPendingWorkflowEdits, makeBranchName, git as gitCmd } from '../lib/edit-workflow.js';
|
|
19
19
|
import { applyToRepo as applyAgentDocs } from '../lib/agents-md.js';
|
|
20
20
|
import { detectRepo, detectDefaultBranch, getProtectionState, enableConversationResolution } from '../lib/branch-protection.js';
|
|
21
|
+
import { computeReviewCost, costPerLOC, cacheHitRate, extractTokensFromLog, rollup, formatRollup } from '../lib/usage.js';
|
|
21
22
|
|
|
22
23
|
const PKG_ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
|
|
23
24
|
const TEMPLATES = join(PKG_ROOT, 'templates');
|
|
@@ -28,6 +29,8 @@ function parseArgs(argv) {
|
|
|
28
29
|
_: [], offline: false, acceptAll: false, commit: false, help: false, version: false,
|
|
29
30
|
since: null, changedIn: null, scopes: [], out: null,
|
|
30
31
|
setProtection: true, quiet: false,
|
|
32
|
+
// 0.0.M.1 (v0.6.13): `clud-bug usage` flags.
|
|
33
|
+
repo: null, pr: null, limit: null, json: false,
|
|
31
34
|
};
|
|
32
35
|
for (let i = 0; i < argv.length; i++) {
|
|
33
36
|
const a = argv[i];
|
|
@@ -42,6 +45,10 @@ function parseArgs(argv) {
|
|
|
42
45
|
else if (a === '--scope') args.scopes.push(argv[++i]);
|
|
43
46
|
else if (a === '--out') args.out = argv[++i];
|
|
44
47
|
else if (a === '--no-set-protection') args.setProtection = false;
|
|
48
|
+
else if (a === '--repo') args.repo = argv[++i];
|
|
49
|
+
else if (a === '--pr') args.pr = Number(argv[++i]);
|
|
50
|
+
else if (a === '--limit') args.limit = Number(argv[++i]);
|
|
51
|
+
else if (a === '--json') args.json = true;
|
|
45
52
|
else args._.push(a);
|
|
46
53
|
}
|
|
47
54
|
return args;
|
|
@@ -64,6 +71,11 @@ Commands:
|
|
|
64
71
|
templates. Custom and skills.sh-installed specimens left alone.
|
|
65
72
|
edit-workflow Helper for editing .github/workflows/clud-bug-*.yml in an isolated
|
|
66
73
|
PR (the action refuses to review PRs that modify its own workflow).
|
|
74
|
+
usage Read recent clud-bug-review run JSON + normalize cost per LOC.
|
|
75
|
+
Internal Q7-clud-bug enforcement dashboard. Reports cache hit
|
|
76
|
+
rate, 30-day rolling \$/LOC trend, per-repo/per-model
|
|
77
|
+
distributions, and outliers (> 2x org median).
|
|
78
|
+
Use --pr / --repo / --since / --limit / --json to filter.
|
|
67
79
|
|
|
68
80
|
Options:
|
|
69
81
|
--offline Skip skills.sh; pin only the bundled baseline specimens.
|
|
@@ -78,6 +90,12 @@ Options:
|
|
|
78
90
|
required_conversation_resolution on the default
|
|
79
91
|
branch (init only). Use for repos that manage
|
|
80
92
|
branch protection via ruleset or org policy.
|
|
93
|
+
--repo <owner/name> Restrict \`usage\` to a single repo. Default: all repos
|
|
94
|
+
with clud-bug-review.yml in the gh user's auth scope.
|
|
95
|
+
--pr <N> Restrict \`usage\` to a single PR.
|
|
96
|
+
--limit <N> Max reviews to fetch (default 50; the API caps).
|
|
97
|
+
--json Emit JSON instead of human-readable output.
|
|
98
|
+
Compatible with --quiet for pipeline consumption.
|
|
81
99
|
--since <date> Audit only files changed in commits after <date> (git date string).
|
|
82
100
|
--changed-in <dur> Audit only files changed in the past <dur>: 7d, 2w, 1mo, 1y. (audit only)
|
|
83
101
|
--scope <glob> Limit audit to files matching <glob>; repeatable. (audit only)
|
|
@@ -107,6 +125,7 @@ async function main() {
|
|
|
107
125
|
case 'audit': return runAudit(args);
|
|
108
126
|
case 'update': return runUpdateCmd(args);
|
|
109
127
|
case 'edit-workflow': return runEditWorkflow(args);
|
|
128
|
+
case 'usage': return runUsage(args);
|
|
110
129
|
default:
|
|
111
130
|
process.stderr.write(`Unknown command: ${cmd || '(none)'}\n\n${HELP}`);
|
|
112
131
|
process.exit(2);
|
|
@@ -707,6 +726,202 @@ async function runAudit(args) {
|
|
|
707
726
|
ok(`audit: ${files.length} file${files.length === 1 ? '' : 's'} surveyed; stub at ${rel(cwd, outPath)}`);
|
|
708
727
|
}
|
|
709
728
|
|
|
729
|
+
// 0.0.M.1 (v0.6.13): Q7-clud-bug $/LOC dashboard.
|
|
730
|
+
//
|
|
731
|
+
// Reads recent clud-bug-review run JSON via `gh run list` + per-job logs
|
|
732
|
+
// (which contain the SDK result messages with token counts + model),
|
|
733
|
+
// joins to `gh pr view --json additions,deletions` for the LOC denominator,
|
|
734
|
+
// and reports the rollup. Internal-only — not consumer-facing.
|
|
735
|
+
//
|
|
736
|
+
// Default scope: 30 days, all repos with clud-bug-review.yml in the gh
|
|
737
|
+
// user's auth scope. --repo / --pr / --since / --limit narrow.
|
|
738
|
+
async function runUsage(args) {
|
|
739
|
+
const limit = args.limit ?? 50;
|
|
740
|
+
const since = args.since ?? '30d';
|
|
741
|
+
|
|
742
|
+
// Determine target repos. If --repo specified, just that one. Otherwise
|
|
743
|
+
// discover repos via the local gh user's auth scope (the org's repos we
|
|
744
|
+
// own clud-bug-review on).
|
|
745
|
+
const repos = args.repo
|
|
746
|
+
? [args.repo]
|
|
747
|
+
: await discoverConsumingRepos();
|
|
748
|
+
|
|
749
|
+
if (repos.length === 0) {
|
|
750
|
+
process.stderr.write(
|
|
751
|
+
'clud-bug usage: no repos with clud-bug-review.yml found in your gh scope.\n' +
|
|
752
|
+
'Pass --repo <owner/name> to point at a specific repo.\n'
|
|
753
|
+
);
|
|
754
|
+
process.exit(2);
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// Per-repo: list recent clud-bug-review runs + extract the per-run job
|
|
758
|
+
// logs + per-PR LOC counts. Filter to PR runs (drop schedule/dispatch).
|
|
759
|
+
// PR #104 fix: --pr filter must be applied AFTER resolvePrNumber
|
|
760
|
+
// (we don't have the PR # until then). prFilter on listRecentRuns was
|
|
761
|
+
// promised but never applied — bug caught by clud-bug self-review.
|
|
762
|
+
const reviews = [];
|
|
763
|
+
for (const repo of repos) {
|
|
764
|
+
const runs = await listRecentRuns(repo, limit, since, args.pr);
|
|
765
|
+
if (process.env.CLUD_BUG_DEBUG) process.stderr.write(`DBG: ${repo} runs=${runs.length}\n`);
|
|
766
|
+
for (const run of runs) {
|
|
767
|
+
const review = await fetchReviewRecord(repo, run);
|
|
768
|
+
if (process.env.CLUD_BUG_DEBUG) process.stderr.write(`DBG: ${run.databaseId} ${run.conclusion} → ${review ? 'OK' : 'NULL'}\n`);
|
|
769
|
+
if (!review) continue;
|
|
770
|
+
// --pr filter: drop reviews whose PR doesn't match.
|
|
771
|
+
if (args.pr != null && review.pr !== args.pr) continue;
|
|
772
|
+
reviews.push(review);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
if (reviews.length === 0) {
|
|
777
|
+
process.stderr.write(
|
|
778
|
+
`clud-bug usage: no clud-bug-review runs found in scope.\n` +
|
|
779
|
+
` scope: ${repos.length} repo${repos.length === 1 ? '' : 's'}, last ${since}, limit ${limit}.\n`
|
|
780
|
+
);
|
|
781
|
+
process.exit(2);
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
const summary = rollup(reviews);
|
|
785
|
+
process.stdout.write(formatRollup(summary, { json: args.json }));
|
|
786
|
+
if (!args.json) {
|
|
787
|
+
ok(`usage: ${reviews.length} review${reviews.length === 1 ? '' : 's'} across ${repos.length} repo${repos.length === 1 ? '' : 's'}`);
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
// `gh repo list` won't filter by workflow file content, so we iterate
|
|
792
|
+
// repos the user has access to and probe for clud-bug-review.yml. We
|
|
793
|
+
// limit to 100 to avoid pagination explosions.
|
|
794
|
+
async function discoverConsumingRepos() {
|
|
795
|
+
const list = await ghJson(['repo', 'list', '--limit', '100', '--json', 'nameWithOwner']);
|
|
796
|
+
if (!Array.isArray(list)) return [];
|
|
797
|
+
const owners = list.map((e) => e.nameWithOwner);
|
|
798
|
+
const found = [];
|
|
799
|
+
for (const ownerRepo of owners) {
|
|
800
|
+
const probe = await gh(['api', `repos/${ownerRepo}/contents/.github/workflows/clud-bug-review.yml`, '-q', '.size']);
|
|
801
|
+
if (probe.code === 0 && probe.stdout.trim().length > 0) {
|
|
802
|
+
found.push(ownerRepo);
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
return found;
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// List recent clud-bug-review.yml runs in a repo. Filters to PR events
|
|
809
|
+
// (drops schedule, workflow_dispatch — those have no PR LOC denominator).
|
|
810
|
+
//
|
|
811
|
+
// IMPORTANT (Q7 measurement integrity, fixed during PR #104 review):
|
|
812
|
+
// We INCLUDE conclusion === 'failure' runs because Anthropic bills for
|
|
813
|
+
// tokens regardless of GitHub workflow conclusion. A run that hit the
|
|
814
|
+
// spend cap, errored mid-action, or failed strict-mode still incurred
|
|
815
|
+
// real API cost — silently excluding it would underreport spend and
|
|
816
|
+
// fool the Q7-clud-bug "gradient must point down" gate.
|
|
817
|
+
// extractTokensFromLog() returns ok:false on logs without usable token
|
|
818
|
+
// totals, which gracefully skips the cancelled/errored-too-early case
|
|
819
|
+
// without losing accountability for the partially-billed runs.
|
|
820
|
+
async function listRecentRuns(repo, limit, since, prFilter) {
|
|
821
|
+
const sinceDate = since.match(/^\d+[dwmy]$/) ? dateAgo(since) : null;
|
|
822
|
+
const args = [
|
|
823
|
+
'run', 'list', '-R', repo,
|
|
824
|
+
'--workflow', 'clud-bug-review.yml',
|
|
825
|
+
'--limit', String(limit),
|
|
826
|
+
'--json', 'databaseId,headSha,createdAt,event,status,conclusion',
|
|
827
|
+
];
|
|
828
|
+
if (sinceDate) args.push('--created', `>=${sinceDate}`);
|
|
829
|
+
const runs = await ghJson(args);
|
|
830
|
+
if (!Array.isArray(runs)) return [];
|
|
831
|
+
return runs
|
|
832
|
+
.filter((r) => r.event === 'pull_request' && (r.conclusion === 'success' || r.conclusion === 'failure'))
|
|
833
|
+
.map((r) => ({ ...r, repo }))
|
|
834
|
+
.slice(0, limit);
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
async function fetchReviewRecord(repo, run) {
|
|
838
|
+
// Find the clud-bug-review JOB id within the run.
|
|
839
|
+
const jobs = await ghJson(['api', `repos/${repo}/actions/runs/${run.databaseId}/jobs`, '-q', '.jobs']);
|
|
840
|
+
if (!Array.isArray(jobs)) return null;
|
|
841
|
+
const job = jobs.find((j) => j.name === 'clud-bug-review');
|
|
842
|
+
if (!job) return null;
|
|
843
|
+
|
|
844
|
+
// Fetch the job's log dump. May be large.
|
|
845
|
+
const logs = await gh(['api', `repos/${repo}/actions/jobs/${job.id}/logs`]);
|
|
846
|
+
if (logs.code !== 0) return null;
|
|
847
|
+
|
|
848
|
+
// Extract tokens + model from the SDK result-message JSON in the log.
|
|
849
|
+
const extracted = extractTokensFromLog(logs.stdout);
|
|
850
|
+
if (!extracted.ok) return null;
|
|
851
|
+
|
|
852
|
+
// Resolve the PR number from the run's pull_requests array or by SHA.
|
|
853
|
+
const prNumber = await resolvePrNumber(repo, run);
|
|
854
|
+
if (!prNumber) return null;
|
|
855
|
+
|
|
856
|
+
// Pull LOC denominator from the PR.
|
|
857
|
+
const prMeta = await ghJson(['pr', 'view', String(prNumber), '-R', repo, '--json', 'additions,deletions,number']);
|
|
858
|
+
if (!prMeta || typeof prMeta.additions !== 'number') return null;
|
|
859
|
+
|
|
860
|
+
const tokens = extracted.tokens;
|
|
861
|
+
const model = extracted.model;
|
|
862
|
+
const costInfo = computeReviewCost(tokens, model);
|
|
863
|
+
return {
|
|
864
|
+
repo,
|
|
865
|
+
pr: prNumber,
|
|
866
|
+
createdAt: run.createdAt,
|
|
867
|
+
model: costInfo.model, // normalized (PRICING key)
|
|
868
|
+
modelObserved: model, // raw value from log (may be versioned)
|
|
869
|
+
unknownModel: costInfo.unknownModel, // PR #104 fix: surface for dashboard warn
|
|
870
|
+
tokens,
|
|
871
|
+
additions: prMeta.additions,
|
|
872
|
+
deletions: prMeta.deletions,
|
|
873
|
+
cost: costInfo.total,
|
|
874
|
+
costPerLOC: costPerLOC(costInfo.total, prMeta.additions, prMeta.deletions),
|
|
875
|
+
cacheRate: cacheHitRate(tokens),
|
|
876
|
+
};
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
async function resolvePrNumber(repo, run) {
|
|
880
|
+
// gh's run JSON sometimes carries a `pull_requests` array; if not (or
|
|
881
|
+
// if it's empty because the PR has been merged), look up via the
|
|
882
|
+
// commits/{sha}/pulls endpoint, which includes merged/closed PRs.
|
|
883
|
+
const detail = await ghJson(['api', `repos/${repo}/actions/runs/${run.databaseId}`, '-q', '.pull_requests']);
|
|
884
|
+
if (Array.isArray(detail) && detail[0]?.number) return detail[0].number;
|
|
885
|
+
// commits/{sha}/pulls returns PRs that contain the commit — works for
|
|
886
|
+
// open AND merged/closed PRs. The default `gh pr list -S <sha>` does
|
|
887
|
+
// not search closed PRs and silently returns empty for the merged
|
|
888
|
+
// case, which made every $/LOC lookup fail on historical PRs.
|
|
889
|
+
const pulls = await ghJson(['api', `repos/${repo}/commits/${run.headSha}/pulls`, '-q', '[.[].number]']);
|
|
890
|
+
if (Array.isArray(pulls) && pulls.length > 0) return pulls[0];
|
|
891
|
+
return null;
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
function dateAgo(spec) {
|
|
895
|
+
// spec like "30d", "2w", "1m", "1y" → ISO date N units ago.
|
|
896
|
+
const m = spec.match(/^(\d+)([dwmy])$/);
|
|
897
|
+
if (!m) return null;
|
|
898
|
+
const n = Number(m[1]);
|
|
899
|
+
const unit = m[2];
|
|
900
|
+
const day = 24 * 60 * 60 * 1000;
|
|
901
|
+
const ms = n * (unit === 'd' ? day : unit === 'w' ? 7 * day : unit === 'm' ? 30 * day : 365 * day);
|
|
902
|
+
return new Date(Date.now() - ms).toISOString().slice(0, 10);
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
// gh helpers (reuse pattern from lib/branch-protection.js so callers can
|
|
906
|
+
// stub `gh` in tests if they want — but for now spawn directly).
|
|
907
|
+
function gh(args) {
|
|
908
|
+
return new Promise((resolve) => {
|
|
909
|
+
const child = spawn('gh', args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
910
|
+
let stdout = '';
|
|
911
|
+
let stderr = '';
|
|
912
|
+
child.stdout.on('data', (d) => { stdout += d; });
|
|
913
|
+
child.stderr.on('data', (d) => { stderr += d; });
|
|
914
|
+
child.on('error', () => resolve({ code: 1, stdout: '', stderr: 'gh not on PATH' }));
|
|
915
|
+
child.on('close', (code) => resolve({ code, stdout, stderr }));
|
|
916
|
+
});
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
async function ghJson(args) {
|
|
920
|
+
const { code, stdout } = await gh(args);
|
|
921
|
+
if (code !== 0) return null;
|
|
922
|
+
try { return JSON.parse(stdout); } catch { return null; }
|
|
923
|
+
}
|
|
924
|
+
|
|
710
925
|
function rel(from, to) {
|
|
711
926
|
return to.startsWith(from + '/') ? to.slice(from.length + 1) : to;
|
|
712
927
|
}
|
package/lib/usage.js
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
// lib/usage.js — Q7-clud-bug $/LOC compute.
|
|
2
|
+
//
|
|
3
|
+
// Pure functions, no I/O. Driven from bin/clud-bug.js which fetches workflow
|
|
4
|
+
// run JSON + PR metadata via gh CLI. Implementation of the 0.0.M.1 dashboard
|
|
5
|
+
// per the Phase 0.5 plan.
|
|
6
|
+
//
|
|
7
|
+
// Reads:
|
|
8
|
+
// - clud-bug-review job logs (via `gh api .../jobs/<id>/logs`), which
|
|
9
|
+
// contain the SDK's `result` messages including:
|
|
10
|
+
// "model": "claude-sonnet-4-6"
|
|
11
|
+
// "input_tokens": N
|
|
12
|
+
// "output_tokens": N
|
|
13
|
+
// "cache_read_input_tokens": N
|
|
14
|
+
// "cache_creation_input_tokens": N
|
|
15
|
+
// - `gh pr view --json additions,deletions` for the LOC denominator.
|
|
16
|
+
//
|
|
17
|
+
// Computes:
|
|
18
|
+
// $/LOC = total_cost(tokens, model) / (additions + deletions)
|
|
19
|
+
//
|
|
20
|
+
// Q7-clud-bug enforcement: dashboard reports the 30-day rolling trend; the
|
|
21
|
+
// next Phase 0.5 PR ships when the trend stops declining.
|
|
22
|
+
|
|
23
|
+
// Anthropic pricing as of 2026-05 (per MTok). Cache write is 1.25× input
|
|
24
|
+
// per Anthropic's published 5-min-TTL ephemeral cache rate.
|
|
25
|
+
export const PRICING = {
|
|
26
|
+
'claude-sonnet-4-6': {
|
|
27
|
+
input: 3.0, output: 15.0, cacheRead: 0.30, cacheWrite: 3.75,
|
|
28
|
+
},
|
|
29
|
+
'claude-haiku-4-5-20251001': {
|
|
30
|
+
input: 0.80, output: 4.0, cacheRead: 0.08, cacheWrite: 1.0,
|
|
31
|
+
},
|
|
32
|
+
'claude-opus-4-7': {
|
|
33
|
+
input: 15.0, output: 75.0, cacheRead: 1.50, cacheWrite: 18.75,
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
// Fallback when the model field is missing or new. Use Sonnet pricing —
|
|
38
|
+
// conservative for unknown-but-likely-Sonnet, undercounts Opus until we
|
|
39
|
+
// update the table. The `unknown` flag in the result lets callers warn.
|
|
40
|
+
const DEFAULT_MODEL = 'claude-sonnet-4-6';
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Compute the USD cost of a single clud-bug review from token counts +
|
|
44
|
+
* model. All four token classes are billed independently.
|
|
45
|
+
*
|
|
46
|
+
* Returns:
|
|
47
|
+
* {
|
|
48
|
+
* total: number USD,
|
|
49
|
+
* parts: { input, output, cacheRead, cacheWrite } USD breakdown,
|
|
50
|
+
* model: string (normalized),
|
|
51
|
+
* unknownModel: boolean (true if we used DEFAULT_MODEL pricing),
|
|
52
|
+
* }
|
|
53
|
+
*/
|
|
54
|
+
export function computeReviewCost(tokens, model) {
|
|
55
|
+
const t = {
|
|
56
|
+
input: tokens.input_tokens || 0,
|
|
57
|
+
output: tokens.output_tokens || 0,
|
|
58
|
+
cacheRead: tokens.cache_read_input_tokens || 0,
|
|
59
|
+
cacheWrite: tokens.cache_creation_input_tokens || 0,
|
|
60
|
+
};
|
|
61
|
+
const normalized = model && PRICING[model] ? model : DEFAULT_MODEL;
|
|
62
|
+
const p = PRICING[normalized];
|
|
63
|
+
const parts = {
|
|
64
|
+
input: (t.input / 1e6) * p.input,
|
|
65
|
+
output: (t.output / 1e6) * p.output,
|
|
66
|
+
cacheRead: (t.cacheRead / 1e6) * p.cacheRead,
|
|
67
|
+
cacheWrite: (t.cacheWrite / 1e6) * p.cacheWrite,
|
|
68
|
+
};
|
|
69
|
+
const total = parts.input + parts.output + parts.cacheRead + parts.cacheWrite;
|
|
70
|
+
return {
|
|
71
|
+
total,
|
|
72
|
+
parts,
|
|
73
|
+
model: normalized,
|
|
74
|
+
unknownModel: !(model && PRICING[model]),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* $/LOC for a single review. PR size denominator is additions + deletions
|
|
80
|
+
* — the same metric `gh pr view --json additions,deletions` returns.
|
|
81
|
+
*
|
|
82
|
+
* Returns 0 if additions + deletions === 0 (avoid div-by-zero on
|
|
83
|
+
* docs-only / empty PRs); callers can filter zero-LOC reviews out of
|
|
84
|
+
* trend lines as outliers.
|
|
85
|
+
*/
|
|
86
|
+
export function costPerLOC(cost, additions, deletions) {
|
|
87
|
+
const loc = (additions || 0) + (deletions || 0);
|
|
88
|
+
if (loc === 0) return 0;
|
|
89
|
+
return cost / loc;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Cache hit rate: cached_read / (cached_read + creation + input).
|
|
94
|
+
* Cached creation is the cost of WRITING new entries (paid 1.25× per
|
|
95
|
+
* Anthropic); cached read is what we get back at 10% of input price.
|
|
96
|
+
* High hit rate proves the v0.6.3 caching layer is firing on
|
|
97
|
+
* re-reviews and fix-pushes.
|
|
98
|
+
*/
|
|
99
|
+
export function cacheHitRate(tokens) {
|
|
100
|
+
const read = tokens.cache_read_input_tokens || 0;
|
|
101
|
+
const write = tokens.cache_creation_input_tokens || 0;
|
|
102
|
+
const input = tokens.input_tokens || 0;
|
|
103
|
+
const denom = read + write + input;
|
|
104
|
+
if (denom === 0) return 0;
|
|
105
|
+
return read / denom;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Parse the model + token counts from a clud-bug-review job log dump.
|
|
110
|
+
*
|
|
111
|
+
* PR #104 fix (token double-count): the SDK's stream-output emits a
|
|
112
|
+
* `"type": "result"` event at the end of a review with a CUMULATIVE
|
|
113
|
+
* `usage` block. It ALSO emits per-turn `"type": "assistant"` events
|
|
114
|
+
* (each with its own usage), AND the result event's usage contains an
|
|
115
|
+
* `iterations` array of per-message breakdowns. Naively summing every
|
|
116
|
+
* `"input_tokens"` occurrence in the log would triple-or-more count
|
|
117
|
+
* the same tokens.
|
|
118
|
+
*
|
|
119
|
+
* Right approach: locate the FINAL `"type": "result"` event and extract
|
|
120
|
+
* the FIRST `"usage": {`-block within it. That's the cumulative bill,
|
|
121
|
+
* the same number Anthropic charges. If no result event exists, the
|
|
122
|
+
* review didn't complete successfully — return ok:false so the caller
|
|
123
|
+
* skips this run rather than trusting partial token data.
|
|
124
|
+
*
|
|
125
|
+
* Returns:
|
|
126
|
+
* {
|
|
127
|
+
* model: string | null,
|
|
128
|
+
* tokens: { input, output, cacheRead, cacheWrite } | null,
|
|
129
|
+
* ok: boolean (false if no result event — partial / errored job),
|
|
130
|
+
* }
|
|
131
|
+
*/
|
|
132
|
+
export function extractTokensFromLog(logText) {
|
|
133
|
+
if (typeof logText !== 'string' || logText.length === 0) {
|
|
134
|
+
return { model: null, tokens: null, ok: false };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// The LAST model field — final message wins (a multi-turn review
|
|
138
|
+
// uses the same model throughout). Captured before the usage parse
|
|
139
|
+
// so model is reported even when we can't find a result event.
|
|
140
|
+
const modelMatches = [...logText.matchAll(/"model"\s*:\s*"([^"]+)"/g)];
|
|
141
|
+
const model = modelMatches.length > 0
|
|
142
|
+
? modelMatches[modelMatches.length - 1][1]
|
|
143
|
+
: null;
|
|
144
|
+
|
|
145
|
+
// Locate the final result event. There may be multiple over the
|
|
146
|
+
// life of a long-running session — take the LAST one.
|
|
147
|
+
const resultMarkerRe = /"type"\s*:\s*"result"/g;
|
|
148
|
+
let lastResultIdx = -1;
|
|
149
|
+
let m;
|
|
150
|
+
while ((m = resultMarkerRe.exec(logText)) !== null) {
|
|
151
|
+
lastResultIdx = m.index;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (lastResultIdx < 0) {
|
|
155
|
+
// No result event — partial log or job that errored before
|
|
156
|
+
// emitting one. Don't sum the per-turn fields; the data isn't
|
|
157
|
+
// billable-equivalent.
|
|
158
|
+
return { model, tokens: null, ok: false };
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Within the result event, find the first `"usage": {` and extract
|
|
162
|
+
// its top-level token fields. We scope each field's regex to a window
|
|
163
|
+
// starting at the usage block so we don't pick up the `iterations`
|
|
164
|
+
// array's per-message fields (which are nested deeper but still
|
|
165
|
+
// appear within the same overall result block).
|
|
166
|
+
const fromResult = logText.slice(lastResultIdx);
|
|
167
|
+
const usageIdx = fromResult.search(/"usage"\s*:\s*\{/);
|
|
168
|
+
if (usageIdx < 0) {
|
|
169
|
+
return { model, tokens: null, ok: false };
|
|
170
|
+
}
|
|
171
|
+
// Slice up to the start of `"iterations"` (if present) so we don't
|
|
172
|
+
// double-count per-iteration breakdowns nested inside usage.
|
|
173
|
+
const fromUsage = fromResult.slice(usageIdx);
|
|
174
|
+
const iterationsIdx = fromUsage.search(/"iterations"\s*:/);
|
|
175
|
+
const usageOnly = iterationsIdx >= 0
|
|
176
|
+
? fromUsage.slice(0, iterationsIdx)
|
|
177
|
+
: fromUsage;
|
|
178
|
+
|
|
179
|
+
const pluck = (re) => {
|
|
180
|
+
const match = usageOnly.match(re);
|
|
181
|
+
return match ? Number(match[1]) : 0;
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
const input = pluck(/"input_tokens"\s*:\s*(\d+)/);
|
|
185
|
+
const output = pluck(/"output_tokens"\s*:\s*(\d+)/);
|
|
186
|
+
const cacheRead = pluck(/"cache_read_input_tokens"\s*:\s*(\d+)/);
|
|
187
|
+
const cacheWrite = pluck(/"cache_creation_input_tokens"\s*:\s*(\d+)/);
|
|
188
|
+
|
|
189
|
+
const anyTokens = input + output + cacheRead + cacheWrite;
|
|
190
|
+
if (anyTokens === 0) {
|
|
191
|
+
return { model, tokens: null, ok: false };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
model,
|
|
196
|
+
tokens: {
|
|
197
|
+
input_tokens: input,
|
|
198
|
+
output_tokens: output,
|
|
199
|
+
cache_read_input_tokens: cacheRead,
|
|
200
|
+
cache_creation_input_tokens: cacheWrite,
|
|
201
|
+
},
|
|
202
|
+
ok: true,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Roll up an array of per-review records into a structured summary.
|
|
208
|
+
*
|
|
209
|
+
* Each review record:
|
|
210
|
+
* {
|
|
211
|
+
* repo: "owner/name",
|
|
212
|
+
* pr: number,
|
|
213
|
+
* createdAt: ISO 8601,
|
|
214
|
+
* model: string,
|
|
215
|
+
* tokens: { ... },
|
|
216
|
+
* additions: number,
|
|
217
|
+
* deletions: number,
|
|
218
|
+
* cost: number (USD, total),
|
|
219
|
+
* costPerLOC: number,
|
|
220
|
+
* cacheRate: number (0..1),
|
|
221
|
+
* }
|
|
222
|
+
*
|
|
223
|
+
* Returns:
|
|
224
|
+
* {
|
|
225
|
+
* total: { reviews, cost, loc, costPerLOC (median), cacheRate (median) },
|
|
226
|
+
* perRepo: { [repo]: { ... } },
|
|
227
|
+
* perModel: { [model]: { ... } },
|
|
228
|
+
* trend30d: { dailyMedians: [...], slopePct (MoM) },
|
|
229
|
+
* outliers: [{ review, severity }],
|
|
230
|
+
* }
|
|
231
|
+
*
|
|
232
|
+
* Pre-conditions: callers should drop zero-LOC reviews before passing in.
|
|
233
|
+
*/
|
|
234
|
+
export function rollup(reviews) {
|
|
235
|
+
const valid = reviews.filter((r) => r.costPerLOC > 0);
|
|
236
|
+
|
|
237
|
+
const total = {
|
|
238
|
+
reviews: valid.length,
|
|
239
|
+
cost: valid.reduce((a, r) => a + r.cost, 0),
|
|
240
|
+
loc: valid.reduce((a, r) => a + (r.additions + r.deletions), 0),
|
|
241
|
+
costPerLOC: median(valid.map((r) => r.costPerLOC)),
|
|
242
|
+
cacheRate: median(valid.map((r) => r.cacheRate)),
|
|
243
|
+
};
|
|
244
|
+
|
|
245
|
+
const groupBy = (key) => {
|
|
246
|
+
const out = {};
|
|
247
|
+
for (const r of valid) {
|
|
248
|
+
const k = r[key];
|
|
249
|
+
if (!out[k]) out[k] = { reviews: 0, cost: 0, loc: 0, costPerLOCs: [], cacheRates: [] };
|
|
250
|
+
out[k].reviews += 1;
|
|
251
|
+
out[k].cost += r.cost;
|
|
252
|
+
out[k].loc += r.additions + r.deletions;
|
|
253
|
+
out[k].costPerLOCs.push(r.costPerLOC);
|
|
254
|
+
out[k].cacheRates.push(r.cacheRate);
|
|
255
|
+
}
|
|
256
|
+
for (const k of Object.keys(out)) {
|
|
257
|
+
out[k].costPerLOC = median(out[k].costPerLOCs);
|
|
258
|
+
out[k].cacheRate = median(out[k].cacheRates);
|
|
259
|
+
delete out[k].costPerLOCs;
|
|
260
|
+
delete out[k].cacheRates;
|
|
261
|
+
}
|
|
262
|
+
return out;
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
const perRepo = groupBy('repo');
|
|
266
|
+
const perModel = groupBy('model');
|
|
267
|
+
|
|
268
|
+
// Outliers: > 2× total.costPerLOC.
|
|
269
|
+
const outliers = valid
|
|
270
|
+
.filter((r) => r.costPerLOC > total.costPerLOC * 2)
|
|
271
|
+
.map((r) => ({
|
|
272
|
+
repo: r.repo, pr: r.pr,
|
|
273
|
+
costPerLOC: r.costPerLOC,
|
|
274
|
+
multiple: r.costPerLOC / total.costPerLOC,
|
|
275
|
+
cost: r.cost,
|
|
276
|
+
reason: r.cacheRate < 0.3 ? 'low cache hit' : 'unknown',
|
|
277
|
+
}));
|
|
278
|
+
|
|
279
|
+
// 30-day trend: median $/LOC per calendar day. Bucket by createdAt date.
|
|
280
|
+
// Slope reported as MoM % change between the most recent 30-day window's
|
|
281
|
+
// median and the previous 30-day window's median.
|
|
282
|
+
const trend30d = computeTrend(valid);
|
|
283
|
+
|
|
284
|
+
// PR #104 fix: surface reviews whose model wasn't in PRICING. The
|
|
285
|
+
// computeReviewCost fallback applied Sonnet rates to unknown models
|
|
286
|
+
// (~5× undercount of Opus), AND bucketed them under Sonnet in the
|
|
287
|
+
// per-model table — exactly the false-good signal Q7 must NOT produce.
|
|
288
|
+
// Caller renders this as a loud warning so the dashboard reader knows
|
|
289
|
+
// to update the PRICING table.
|
|
290
|
+
const unknownModelReviews = valid
|
|
291
|
+
.filter((r) => r.unknownModel === true)
|
|
292
|
+
.map((r) => ({ repo: r.repo, pr: r.pr, modelObserved: r.modelObserved }));
|
|
293
|
+
|
|
294
|
+
return { total, perRepo, perModel, trend30d, outliers, unknownModelReviews };
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
function median(nums) {
|
|
298
|
+
if (nums.length === 0) return 0;
|
|
299
|
+
const sorted = [...nums].sort((a, b) => a - b);
|
|
300
|
+
const mid = Math.floor(sorted.length / 2);
|
|
301
|
+
return sorted.length % 2 === 0
|
|
302
|
+
? (sorted[mid - 1] + sorted[mid]) / 2
|
|
303
|
+
: sorted[mid];
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function computeTrend(reviews) {
|
|
307
|
+
// PR #104 fix: distinguish "no prior window" (previous bucket empty)
|
|
308
|
+
// from "exactly flat trend" (current === previous > 0). The original
|
|
309
|
+
// code returned slopePct=0 for both, which masked the dangerous case
|
|
310
|
+
// — a stable expensive month-over-month trend rendered as if there
|
|
311
|
+
// were no comparison data, hiding the very signal Q7 enforces.
|
|
312
|
+
// `previous: null` now means "no prior window"; renderer keys on this.
|
|
313
|
+
if (reviews.length === 0) {
|
|
314
|
+
return { current: 0, previous: null, slopePct: null };
|
|
315
|
+
}
|
|
316
|
+
const now = Date.now();
|
|
317
|
+
const day = 24 * 60 * 60 * 1000;
|
|
318
|
+
const currentWindow = reviews.filter((r) => now - new Date(r.createdAt).getTime() <= 30 * day);
|
|
319
|
+
const previousWindow = reviews.filter((r) => {
|
|
320
|
+
const age = now - new Date(r.createdAt).getTime();
|
|
321
|
+
return age > 30 * day && age <= 60 * day;
|
|
322
|
+
});
|
|
323
|
+
const current = median(currentWindow.map((r) => r.costPerLOC));
|
|
324
|
+
if (previousWindow.length === 0) {
|
|
325
|
+
return { current, previous: null, slopePct: null };
|
|
326
|
+
}
|
|
327
|
+
const previous = median(previousWindow.map((r) => r.costPerLOC));
|
|
328
|
+
// previous > 0 because every review in valid[] has costPerLOC > 0
|
|
329
|
+
// (zero-LOC reviews dropped upstream).
|
|
330
|
+
const slopePct = previous > 0 ? ((current - previous) / previous) * 100 : null;
|
|
331
|
+
return { current, previous, slopePct };
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Render the rollup as a human-readable table. Mirrors the sample output
|
|
336
|
+
* from the Phase 0.5 plan.
|
|
337
|
+
*
|
|
338
|
+
* Pass `{ json: true }` for the machine-readable form (the same data
|
|
339
|
+
* the rollup() function returns).
|
|
340
|
+
*/
|
|
341
|
+
export function formatRollup(rollup, opts = {}) {
|
|
342
|
+
if (opts.json) {
|
|
343
|
+
return JSON.stringify(rollup, null, 2);
|
|
344
|
+
}
|
|
345
|
+
const lines = [];
|
|
346
|
+
const t = rollup.total;
|
|
347
|
+
const trend = rollup.trend30d;
|
|
348
|
+
// PR #104 fix: null slopePct = "no prior window" (prior 30d bucket
|
|
349
|
+
// was empty). A REAL 0% slope (flat trend) renders as "→ 0% MoM",
|
|
350
|
+
// not as "(no prior window)" — masking the latter was the bug.
|
|
351
|
+
let trendStr;
|
|
352
|
+
if (trend.slopePct === null) {
|
|
353
|
+
trendStr = '(no prior window)';
|
|
354
|
+
} else {
|
|
355
|
+
const trendArrow = trend.slopePct < 0 ? '↓' : trend.slopePct > 0 ? '↑' : '→';
|
|
356
|
+
trendStr = `${trendArrow} ${trend.slopePct.toFixed(0)}% MoM`;
|
|
357
|
+
}
|
|
358
|
+
lines.push(`ok: ${t.reviews} reviews, 30-day $/LOC trend: ${trendStr}`);
|
|
359
|
+
|
|
360
|
+
const perRepoEntries = Object.entries(rollup.perRepo)
|
|
361
|
+
.sort((a, b) => b[1].costPerLOC - a[1].costPerLOC);
|
|
362
|
+
if (perRepoEntries.length > 0) {
|
|
363
|
+
lines.push(' per-repo $/LOC (most → least expensive):');
|
|
364
|
+
for (const [repo, stats] of perRepoEntries) {
|
|
365
|
+
const cache = `${(stats.cacheRate * 100).toFixed(0)}% cached`;
|
|
366
|
+
lines.push(
|
|
367
|
+
` ${repo.padEnd(28)} ${`$${stats.costPerLOC.toFixed(4)}/LOC`.padEnd(16)} · ${String(stats.reviews).padStart(2)} reviews · ${cache}`
|
|
368
|
+
);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
lines.push(
|
|
373
|
+
` org median $/LOC: $${t.costPerLOC.toFixed(4)} · org cache hit: ${(t.cacheRate * 100).toFixed(0)}%`
|
|
374
|
+
);
|
|
375
|
+
lines.push(` total spend: $${t.cost.toFixed(2)} across ${(t.loc).toLocaleString()} LOC`);
|
|
376
|
+
|
|
377
|
+
if (rollup.outliers.length > 0) {
|
|
378
|
+
lines.push(` outliers (>2× median):`);
|
|
379
|
+
for (const o of rollup.outliers) {
|
|
380
|
+
lines.push(
|
|
381
|
+
` ${o.repo}#${o.pr} ($${o.costPerLOC.toFixed(4)}/LOC, ${o.multiple.toFixed(1)}× median — ${o.reason})`
|
|
382
|
+
);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// PR #104 fix: loud warning when one or more reviews used a model
|
|
387
|
+
// not in PRICING (we fell back to Sonnet rates — that can undercount
|
|
388
|
+
// by ~5× if the real model was an Opus variant). Update PRICING and
|
|
389
|
+
// re-run.
|
|
390
|
+
if (rollup.unknownModelReviews && rollup.unknownModelReviews.length > 0) {
|
|
391
|
+
lines.push(
|
|
392
|
+
` ⚠️ ${rollup.unknownModelReviews.length} review${rollup.unknownModelReviews.length === 1 ? '' : 's'} used model${rollup.unknownModelReviews.length === 1 ? '' : 's'} not in PRICING; cost may be undercounted:`
|
|
393
|
+
);
|
|
394
|
+
const observed = new Set(rollup.unknownModelReviews.map((u) => u.modelObserved));
|
|
395
|
+
for (const m of observed) {
|
|
396
|
+
lines.push(` seen: "${m}" — add to lib/usage.js PRICING table`);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
return lines.join('\n') + '\n';
|
|
401
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "clud-bug",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.14",
|
|
4
4
|
"description": "Skill-driven Claude PR review. Ship a brand-voice skill, get brand reviews. Each finding cites the skill that motivated it. CLI installs the workflow + a baseline kit; add more from skills.sh.",
|
|
5
5
|
"homepage": "https://cludbug.dev",
|
|
6
6
|
"bugs": "https://github.com/thrillmade/clud-bug/issues",
|
|
@@ -6,7 +6,40 @@ on:
|
|
|
6
6
|
types: [opened, synchronize]
|
|
7
7
|
|
|
8
8
|
jobs:
|
|
9
|
+
# Pre-flight (v0.6.14 / 0.0.W) — see workflow.yml.tmpl for design notes.
|
|
10
|
+
paths-check:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
pull-requests: read
|
|
15
|
+
outputs:
|
|
16
|
+
is_workflow_only: ${{ steps.classify.outputs.is_workflow_only }}
|
|
17
|
+
steps:
|
|
18
|
+
- name: Classify PR diff
|
|
19
|
+
id: classify
|
|
20
|
+
env:
|
|
21
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
22
|
+
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
23
|
+
REPO: ${{ github.repository }}
|
|
24
|
+
run: |
|
|
25
|
+
CHANGED=$(gh pr diff "$PR_NUMBER" -R "$REPO" --name-only)
|
|
26
|
+
if [ -z "$CHANGED" ]; then echo "is_workflow_only=false" >> "$GITHUB_OUTPUT"; exit 0; fi
|
|
27
|
+
IS_WORKFLOW_ONLY=true
|
|
28
|
+
while IFS= read -r f; do
|
|
29
|
+
case "$f" in
|
|
30
|
+
.github/workflows/clud-bug-*.yml) ;;
|
|
31
|
+
.github/actions/strict-mode-gate/*) ;;
|
|
32
|
+
*) IS_WORKFLOW_ONLY=false; break ;;
|
|
33
|
+
esac
|
|
34
|
+
done <<< "$CHANGED"
|
|
35
|
+
echo "is_workflow_only=$IS_WORKFLOW_ONLY" >> "$GITHUB_OUTPUT"
|
|
36
|
+
if [ "$IS_WORKFLOW_ONLY" = "true" ]; then
|
|
37
|
+
echo "::notice title=Clud Bug 🐛::Skipping LLM review — workflow-only PR."
|
|
38
|
+
fi
|
|
39
|
+
|
|
9
40
|
clud-bug-review:
|
|
41
|
+
needs: paths-check
|
|
42
|
+
if: needs.paths-check.outputs.is_workflow_only != 'true'
|
|
10
43
|
runs-on: ubuntu-latest
|
|
11
44
|
permissions:
|
|
12
45
|
contents: read
|
|
@@ -85,6 +118,6 @@ jobs:
|
|
|
85
118
|
# Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
|
|
86
119
|
- name: Strict mode — fail check on critical findings
|
|
87
120
|
if: success()
|
|
88
|
-
uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.
|
|
121
|
+
uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.14
|
|
89
122
|
with:
|
|
90
123
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
@@ -6,7 +6,40 @@ on:
|
|
|
6
6
|
types: [opened, synchronize]
|
|
7
7
|
|
|
8
8
|
jobs:
|
|
9
|
+
# Pre-flight (v0.6.14 / 0.0.W) — see workflow.yml.tmpl for design notes.
|
|
10
|
+
paths-check:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
pull-requests: read
|
|
15
|
+
outputs:
|
|
16
|
+
is_workflow_only: ${{ steps.classify.outputs.is_workflow_only }}
|
|
17
|
+
steps:
|
|
18
|
+
- name: Classify PR diff
|
|
19
|
+
id: classify
|
|
20
|
+
env:
|
|
21
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
22
|
+
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
23
|
+
REPO: ${{ github.repository }}
|
|
24
|
+
run: |
|
|
25
|
+
CHANGED=$(gh pr diff "$PR_NUMBER" -R "$REPO" --name-only)
|
|
26
|
+
if [ -z "$CHANGED" ]; then echo "is_workflow_only=false" >> "$GITHUB_OUTPUT"; exit 0; fi
|
|
27
|
+
IS_WORKFLOW_ONLY=true
|
|
28
|
+
while IFS= read -r f; do
|
|
29
|
+
case "$f" in
|
|
30
|
+
.github/workflows/clud-bug-*.yml) ;;
|
|
31
|
+
.github/actions/strict-mode-gate/*) ;;
|
|
32
|
+
*) IS_WORKFLOW_ONLY=false; break ;;
|
|
33
|
+
esac
|
|
34
|
+
done <<< "$CHANGED"
|
|
35
|
+
echo "is_workflow_only=$IS_WORKFLOW_ONLY" >> "$GITHUB_OUTPUT"
|
|
36
|
+
if [ "$IS_WORKFLOW_ONLY" = "true" ]; then
|
|
37
|
+
echo "::notice title=Clud Bug 🐛::Skipping LLM review — workflow-only PR."
|
|
38
|
+
fi
|
|
39
|
+
|
|
9
40
|
clud-bug-review:
|
|
41
|
+
needs: paths-check
|
|
42
|
+
if: needs.paths-check.outputs.is_workflow_only != 'true'
|
|
10
43
|
runs-on: ubuntu-latest
|
|
11
44
|
permissions:
|
|
12
45
|
contents: read
|
|
@@ -85,6 +118,6 @@ jobs:
|
|
|
85
118
|
# Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
|
|
86
119
|
- name: Strict mode — fail check on critical findings
|
|
87
120
|
if: success()
|
|
88
|
-
uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.
|
|
121
|
+
uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.14
|
|
89
122
|
with:
|
|
90
123
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
@@ -6,7 +6,51 @@ on:
|
|
|
6
6
|
types: [opened, synchronize]
|
|
7
7
|
|
|
8
8
|
jobs:
|
|
9
|
+
# Pre-flight (v0.6.14 / 0.0.W): if the PR ONLY touches clud-bug workflow
|
|
10
|
+
# files or the strict-mode-gate composite action, skip the LLM review
|
|
11
|
+
# entirely. claude-code-action would refuse to run on such a PR anyway
|
|
12
|
+
# (self-modification guard — required for security), and a template
|
|
13
|
+
# re-render has no useful review surface. Skipping turns a previously
|
|
14
|
+
# required-admin-bypass merge into a normal one, AND saves the LLM
|
|
15
|
+
# call cost. Security: the classifier requires ALL changed files to
|
|
16
|
+
# match the allow-list — a mixed PR (workflow + code) still runs the
|
|
17
|
+
# review normally.
|
|
18
|
+
paths-check:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
permissions:
|
|
21
|
+
contents: read
|
|
22
|
+
pull-requests: read
|
|
23
|
+
outputs:
|
|
24
|
+
is_workflow_only: ${{ steps.classify.outputs.is_workflow_only }}
|
|
25
|
+
steps:
|
|
26
|
+
- name: Classify PR diff
|
|
27
|
+
id: classify
|
|
28
|
+
env:
|
|
29
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
30
|
+
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
31
|
+
REPO: ${{ github.repository }}
|
|
32
|
+
run: |
|
|
33
|
+
CHANGED=$(gh pr diff "$PR_NUMBER" -R "$REPO" --name-only)
|
|
34
|
+
if [ -z "$CHANGED" ]; then
|
|
35
|
+
echo "is_workflow_only=false" >> "$GITHUB_OUTPUT"
|
|
36
|
+
exit 0
|
|
37
|
+
fi
|
|
38
|
+
IS_WORKFLOW_ONLY=true
|
|
39
|
+
while IFS= read -r f; do
|
|
40
|
+
case "$f" in
|
|
41
|
+
.github/workflows/clud-bug-*.yml) ;;
|
|
42
|
+
.github/actions/strict-mode-gate/*) ;;
|
|
43
|
+
*) IS_WORKFLOW_ONLY=false; break ;;
|
|
44
|
+
esac
|
|
45
|
+
done <<< "$CHANGED"
|
|
46
|
+
echo "is_workflow_only=$IS_WORKFLOW_ONLY" >> "$GITHUB_OUTPUT"
|
|
47
|
+
if [ "$IS_WORKFLOW_ONLY" = "true" ]; then
|
|
48
|
+
echo "::notice title=Clud Bug 🐛::Skipping LLM review — PR only touches workflow files (claude-code-action would refuse anyway due to self-modification guard)."
|
|
49
|
+
fi
|
|
50
|
+
|
|
9
51
|
clud-bug-review:
|
|
52
|
+
needs: paths-check
|
|
53
|
+
if: needs.paths-check.outputs.is_workflow_only != 'true'
|
|
10
54
|
runs-on: ubuntu-latest
|
|
11
55
|
permissions:
|
|
12
56
|
contents: read
|
|
@@ -145,6 +189,6 @@ jobs:
|
|
|
145
189
|
# Letting the action's own failure fail the check is louder and right.
|
|
146
190
|
- name: Strict mode — fail check on critical findings
|
|
147
191
|
if: success()
|
|
148
|
-
uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.
|
|
192
|
+
uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.14
|
|
149
193
|
with:
|
|
150
194
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|