clud-bug 0.6.26 → 0.6.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -278,3 +278,16 @@ npm test # node:test, no runtime deps
278
278
  ## License
279
279
 
280
280
  MIT.
281
+
282
+ ---
283
+
284
+ ## Part of the thrillmade SkDD toolchain
285
+
286
+ [Skills-Driven Development](https://zakelfassi.com/skdd-skills-driven-development) (Zak Elfassi's methodology) gives you the loop; the thrillmade toolchain ships the parts:
287
+
288
+ - **[logmind](https://github.com/thrillmade/logmind)** — the *why* behind every change (decision logging as commit primitive); skill-creation + testing + auditing
289
+ - **[clud-bug](https://github.com/thrillmade/clud-bug)** — skill-driven PR review at gate time; every finding cites the skill that motivated it
290
+ - **[agent-skills](https://github.com/thrillmade/agent-skills)** — public catalog of reusable skills
291
+ - **[skills.sh](https://skills.sh)** — skill discovery + install
292
+
293
+ End-to-end agentic auto dev: write skills first → log the *why* → run them against PRs → iterate based on usage. The tools work independently; better together.
package/bin/clud-bug.js CHANGED
@@ -52,6 +52,7 @@ function parseArgs(argv) {
52
52
  else if (a === '--limit') args.limit = Number(argv[++i]);
53
53
  else if (a === '--json') args.json = true;
54
54
  else if (a === '--stdin') args.stdin = true;
55
+ else if (a === '--health') args.health = true;
55
56
  else args._.push(a);
56
57
  }
57
58
  return args;
@@ -79,6 +80,13 @@ Commands:
79
80
  rate, 30-day rolling \$/LOC trend, per-repo/per-model
80
81
  distributions, and outliers (> 2x org median).
81
82
  Use --pr / --repo / --since / --limit / --json to filter.
83
+ usage --health Deterministic skill-health dashboard (v0.6.28). Reads
84
+ \`.claude/skills/.clud-bug.json\` usage block + renders
85
+ archive-candidate / stale / new / healthy status per skill.
86
+ Read-only — no automation acts on the output. Humans
87
+ decide which skills to prune. Workflow integration ships
88
+ in v0.6.29; today this command surfaces whatever data
89
+ has been written manually or by future runs.
82
90
  eval Run the golden-set regression gate against the rendered review
83
91
  prompt (must-contain / must-not-contain / byte-budget). Same as
84
92
  \`node --test test/prompts.eval.test.js\` but works from any cwd.
@@ -807,6 +815,14 @@ async function runAudit(args) {
807
815
  // Default scope: 30 days, all repos with clud-bug-review.yml in the gh
808
816
  // user's auth scope. --repo / --pr / --since / --limit narrow.
809
817
  async function runUsage(args) {
818
+ // v0.6.28 — `clud-bug usage --health`: deterministic skill-health
819
+ // dashboard. Reads `.claude/skills/.clud-bug.json` usage block,
820
+ // applies thresholds, renders read-only table. No automation acts
821
+ // on the output. Per the pragmatic SkDD pivot (2026-05-30).
822
+ if (args.health) {
823
+ return runUsageHealth(args);
824
+ }
825
+
810
826
  const limit = args.limit ?? 50;
811
827
  const since = args.since ?? '30d';
812
828
 
@@ -861,6 +877,44 @@ async function runUsage(args) {
861
877
 
862
878
  // `gh repo list` won't filter by workflow file content, so we iterate
863
879
  // repos the user has access to and probe for clud-bug-review.yml. We
880
+ // v0.6.28 — `clud-bug usage --health` implementation. Reads the local
881
+ // .claude/skills/.clud-bug.json usage block, applies deterministic
882
+ // thresholds, renders a read-only dashboard. No I/O beyond the JSON
883
+ // read. Workflow integration that POPULATES the usage block ships in
884
+ // v0.6.29; today this command is the consumer half of the contract.
885
+ async function runUsageHealth(_args) {
886
+ const fs = await import('node:fs/promises');
887
+ const path = await import('node:path');
888
+ const { assessSkillHealth, formatHealthDashboard } = await import('../lib/skill-usage.js');
889
+
890
+ const jsonPath = path.resolve(process.cwd(), '.claude', 'skills', '.clud-bug.json');
891
+
892
+ let parsed;
893
+ try {
894
+ const raw = await fs.readFile(jsonPath, 'utf-8');
895
+ parsed = JSON.parse(raw);
896
+ } catch (err) {
897
+ if (err.code === 'ENOENT') {
898
+ process.stderr.write(
899
+ `clud-bug usage --health: no .claude/skills/.clud-bug.json found in ${process.cwd()}.\n` +
900
+ `Run \`npx clud-bug init\` first to install the catalog state.\n`
901
+ );
902
+ process.exit(1);
903
+ }
904
+ process.stderr.write(`clud-bug usage --health: failed to parse .clud-bug.json: ${err.message}\n`);
905
+ process.exit(1);
906
+ }
907
+
908
+ const usage = parsed && parsed.usage ? parsed.usage : {};
909
+ const rows = assessSkillHealth(usage, new Date());
910
+ process.stdout.write(formatHealthDashboard(rows) + '\n');
911
+
912
+ // Exit code semantics: 0 (informational). The dashboard is read-only;
913
+ // archive-candidates being present is NOT a failure mode — humans
914
+ // decide. CI gates should NOT block on this.
915
+ ok(`skill health: ${rows.length} skill${rows.length === 1 ? '' : 's'} tracked`);
916
+ }
917
+
864
918
  // limit to 100 to avoid pagination explosions.
865
919
  async function discoverConsumingRepos() {
866
920
  const list = await ghJson(['repo', 'list', '--limit', '100', '--json', 'nameWithOwner']);
package/lib/prompts.js CHANGED
@@ -201,6 +201,36 @@ Rules:
201
201
  every file's verdict so a maintainer can verify nothing was
202
202
  skipped.
203
203
 
204
+ Mid-review self-check-in (v0.6.27 / §5.5 Layer 3):
205
+ After every 5 tool_uses, write a single-line budget heartbeat as a
206
+ free-text "thinking" message (not a tool call — these don't cost a
207
+ turn) of the form:
208
+
209
+ [budget] files_reviewed=X/N, turns_used=Y/M, pace=ok|behind
210
+
211
+ Where:
212
+ - X / N is the count of files you've meaningfully looked at so far
213
+ over the total in this PR's diff.
214
+ - Y / M is your current turn count over max_turns.
215
+ - pace = "ok" when X / N >= Y / (M - 5). The denominator subtracts the
216
+ 5-turn emit reservation: over the (M - 5) turns available for file
217
+ review, your file-coverage rate must match where you actually are
218
+ in the budget. (Don't subtract from Y — that would be saying "I've
219
+ used Y minus 5 turns" which double-counts the reservation.)
220
+ pace = "behind" otherwise.
221
+
222
+ When pace = "behind", immediately pivot strategy:
223
+ 1. Stop deep-dive analysis on the current file.
224
+ 2. Switch to one-sentence verdicts for every remaining file.
225
+ 3. Keep going through the whole diff — silent skipping is
226
+ non-negotiable. Cover everything, even if some files only get
227
+ "no issues found in this file" as their verdict.
228
+
229
+ The heartbeat serves two purposes: (a) forces internal pacing — you
230
+ can't drift past budget without noticing; (b) lands in the action's
231
+ streaming output for post-hoc calibration of the per-line cost
232
+ coefficients used by paths-check's Layer 1 estimator.
233
+
204
234
  Incremental-diff handshake (v0.6.10+) — emit the SHA marker:
205
235
  At the very end of the summary (after the Skills-referenced footer,
206
236
  on its own line), append:
@@ -0,0 +1,261 @@
1
+ // lib/skill-usage.js — Component 1+2 of the pragmatic SkDD pivot.
2
+ //
3
+ // Pure functions for deterministic skill-usage tracking. Per the
4
+ // strategic pivot (2026-05-30): replace Zak Elfassi's speculative
5
+ // recursive-meta-skill direction with concrete usage data + human-gated
6
+ // approval. This module is the data layer.
7
+ //
8
+ // Three responsibilities:
9
+ //
10
+ // 1. computeSkillUsageDelta(reviewJson)
11
+ // Given the structured-output JSON from one clud-bug review,
12
+ // return the per-skill delta for that one review.
13
+ //
14
+ // 2. mergeSkillUsage(existing, delta, timestamp)
15
+ // Merge a delta into the persistent usage block (the `usage`
16
+ // field in `.claude/skills/.clud-bug.json`).
17
+ //
18
+ // 3. assessSkillHealth(usage, now)
19
+ // Apply the deterministic thresholds + return a row per skill
20
+ // that `clud-bug usage --health` renders as a table.
21
+ //
22
+ // All three are pure. Side effects (file I/O) live in bin/clud-bug.js
23
+ // and the workflow post-step (v0.6.29).
24
+ //
25
+ // Thresholds — concrete numbers per design (2026-05-30):
26
+ //
27
+ // - archive-candidate: citations == 0 across last 90 days of loads
28
+ // - stale: last cited > 60 days ago
29
+ // - healthy: >= 3 citations in any rolling 90-day window
30
+ // - new: loads < 5 (still bedding in; don't judge yet)
31
+ //
32
+ // No automation acts on this output. It's a READ-ONLY dashboard.
33
+ // Humans read; humans decide; humans act.
34
+
35
+ /**
36
+ * Compute per-skill usage delta from a single review's structured JSON.
37
+ *
38
+ * @param {object} reviewJson - Parsed structured-output JSON from one
39
+ * clud-bug review. Expected shape (subset of review-schema.js):
40
+ * - per_skill_scan: [{ skill, outcome }, ...]
41
+ * - critical_findings: [{ skill, ... }, ...]
42
+ * - minor_findings: [{ skill, ... }, ...]
43
+ * - dedicated_sections: [{ skill, findings: [...] }, ...]
44
+ *
45
+ * @returns {object} - Per-skill delta:
46
+ * { "<slug>": { loads: 1, citations: 0|1 } }
47
+ *
48
+ * Rules:
49
+ * - loads = 1 for every skill in per_skill_scan (the skill was in
50
+ * context for this review).
51
+ * - citations = 1 if the skill slug appears in ANY finding bucket
52
+ * (critical / minor / dedicated). Multiple findings from the same
53
+ * skill on one review = 1 citation, not N. Citations count REVIEWS
54
+ * that cited the skill, not findings within a review.
55
+ *
56
+ * Returns {} on missing / malformed input (defensive — never throws).
57
+ */
58
+ export function computeSkillUsageDelta(reviewJson) {
59
+ if (!reviewJson || typeof reviewJson !== 'object') return {};
60
+
61
+ const delta = {};
62
+
63
+ // Loads — one per skill that scanned.
64
+ for (const entry of reviewJson.per_skill_scan || []) {
65
+ if (!entry || typeof entry.skill !== 'string') continue;
66
+ const slug = entry.skill;
67
+ if (!delta[slug]) delta[slug] = { loads: 0, citations: 0 };
68
+ delta[slug].loads = 1;
69
+ }
70
+
71
+ // Citations — collect unique skill slugs across all finding buckets.
72
+ const cited = new Set();
73
+ const collect = (findings) => {
74
+ for (const f of findings || []) {
75
+ if (f && typeof f.skill === 'string') cited.add(f.skill);
76
+ }
77
+ };
78
+ collect(reviewJson.critical_findings);
79
+ collect(reviewJson.minor_findings);
80
+ collect(reviewJson.preexisting_findings);
81
+ for (const section of reviewJson.dedicated_sections || []) {
82
+ collect(section?.findings);
83
+ }
84
+
85
+ for (const slug of cited) {
86
+ if (!delta[slug]) delta[slug] = { loads: 0, citations: 0 };
87
+ delta[slug].citations = 1;
88
+ }
89
+
90
+ return delta;
91
+ }
92
+
93
+ /**
94
+ * Merge a per-review delta into a persistent usage block.
95
+ *
96
+ * @param {object} existing - Current usage block (may be empty/missing).
97
+ * Shape: { "<slug>": { loads: int, citations: int, last_cited: string|null } }
98
+ * @param {object} delta - From computeSkillUsageDelta (above).
99
+ * @param {string|null} timestamp - ISO 8601 timestamp of THIS review
100
+ * (e.g., "2026-05-30T16:22:26Z"). Used to update last_cited when the
101
+ * skill is cited in this review. Pass null to skip the timestamp
102
+ * update (rarely useful — tests primarily).
103
+ *
104
+ * @returns {object} - New merged usage block (does NOT mutate inputs).
105
+ *
106
+ * Semantics:
107
+ * - existing.loads + delta.loads → new.loads (accumulates forever)
108
+ * - existing.citations + delta.citations → new.citations
109
+ * - last_cited updates only when delta.citations > 0 (i.e., cited
110
+ * in THIS review). Stays at the prior value otherwise.
111
+ * - New skills (not in existing) get initialized fresh.
112
+ */
113
+ export function mergeSkillUsage(existing, delta, timestamp) {
114
+ const safeExisting = (existing && typeof existing === 'object') ? existing : {};
115
+ const result = {};
116
+
117
+ // Copy all existing skills first (preserve skills NOT in this delta).
118
+ for (const [slug, entry] of Object.entries(safeExisting)) {
119
+ if (entry && typeof entry === 'object') {
120
+ result[slug] = {
121
+ loads: Number(entry.loads) || 0,
122
+ citations: Number(entry.citations) || 0,
123
+ last_cited: entry.last_cited || null,
124
+ };
125
+ }
126
+ }
127
+
128
+ // Merge delta.
129
+ for (const [slug, d] of Object.entries(delta || {})) {
130
+ if (!result[slug]) {
131
+ result[slug] = { loads: 0, citations: 0, last_cited: null };
132
+ }
133
+ result[slug].loads += Number(d.loads) || 0;
134
+ result[slug].citations += Number(d.citations) || 0;
135
+ if ((Number(d.citations) || 0) > 0 && timestamp) {
136
+ result[slug].last_cited = timestamp;
137
+ }
138
+ }
139
+
140
+ return result;
141
+ }
142
+
143
+ /**
144
+ * Apply deterministic skill-health thresholds to a usage block.
145
+ *
146
+ * @param {object} usage - The usage block from mergeSkillUsage.
147
+ * @param {Date} now - The current time (injected for testability).
148
+ *
149
+ * @returns {object[]} - Sorted array of:
150
+ * { slug, status, loads, citations, last_cited, days_since_cited }
151
+ *
152
+ * Status values:
153
+ * - "archive-candidate": citations == 0 AND loads >= 5
154
+ * → loaded enough to judge, never cited → propose for removal
155
+ * - "stale": last_cited > 60 days ago (even with citations history)
156
+ * → was useful, hasn't fired recently
157
+ * - "new": loads < 5
158
+ * → still bedding in; don't judge yet
159
+ * - "healthy": cited within 60 days
160
+ * → still earning its place
161
+ *
162
+ * Sorted by status priority (archive > stale > new > healthy), then
163
+ * by loads desc within each group. Highest-noise skills surface first.
164
+ */
165
+ export function assessSkillHealth(usage, now) {
166
+ const safeUsage = (usage && typeof usage === 'object') ? usage : {};
167
+ const safeNow = (now instanceof Date) ? now : new Date();
168
+ const sixtyDaysAgoMs = safeNow.getTime() - (60 * 24 * 60 * 60 * 1000);
169
+
170
+ const rows = [];
171
+ for (const [slug, entry] of Object.entries(safeUsage)) {
172
+ if (!entry || typeof entry !== 'object') continue;
173
+
174
+ const loads = Number(entry.loads) || 0;
175
+ const citations = Number(entry.citations) || 0;
176
+ const last_cited = entry.last_cited || null;
177
+
178
+ let status;
179
+ let days_since_cited = null;
180
+
181
+ if (loads < 5) {
182
+ status = 'new';
183
+ } else if (citations === 0) {
184
+ status = 'archive-candidate';
185
+ } else {
186
+ // Has citations. Check recency.
187
+ const lastCitedMs = last_cited ? Date.parse(last_cited) : null;
188
+ if (lastCitedMs && lastCitedMs >= sixtyDaysAgoMs) {
189
+ status = 'healthy';
190
+ days_since_cited = Math.floor((safeNow.getTime() - lastCitedMs) / (24 * 60 * 60 * 1000));
191
+ } else if (lastCitedMs) {
192
+ status = 'stale';
193
+ days_since_cited = Math.floor((safeNow.getTime() - lastCitedMs) / (24 * 60 * 60 * 1000));
194
+ } else {
195
+ // Has citations count but no timestamp (legacy / corrupted) — treat as stale.
196
+ status = 'stale';
197
+ }
198
+ }
199
+
200
+ rows.push({ slug, status, loads, citations, last_cited, days_since_cited });
201
+ }
202
+
203
+ // Sort: archive-candidates first, then stale, then new, then healthy.
204
+ // Within each group, by loads descending (loudest first).
205
+ const statusOrder = { 'archive-candidate': 0, 'stale': 1, 'new': 2, 'healthy': 3 };
206
+ rows.sort((a, b) => {
207
+ const da = statusOrder[a.status] ?? 99;
208
+ const db = statusOrder[b.status] ?? 99;
209
+ if (da !== db) return da - db;
210
+ return b.loads - a.loads;
211
+ });
212
+
213
+ return rows;
214
+ }
215
+
216
+
217
+ /**
218
+ * Render the health dashboard as a 3-column table for the CLI.
219
+ *
220
+ * @param {object[]} rows - Output of assessSkillHealth.
221
+ * @returns {string} - Multi-line markdown-ish table for stdout.
222
+ */
223
+ export function formatHealthDashboard(rows) {
224
+ if (!rows || rows.length === 0) {
225
+ return (
226
+ 'Skill health: no usage data yet.\n\n' +
227
+ 'Usage data accumulates after clud-bug reviews land in your repo.\n' +
228
+ 'Workflow integration ships in v0.6.29 — until then this command is\n' +
229
+ 'a structural placeholder.'
230
+ );
231
+ }
232
+
233
+ const STATUS_GLYPH = {
234
+ 'archive-candidate': '🟥 archive?',
235
+ 'stale': '🟨 stale',
236
+ 'new': '🟦 new',
237
+ 'healthy': '🟩 healthy',
238
+ };
239
+
240
+ const lines = [];
241
+ lines.push('Skill health (deterministic — read-only; no automation acts on this)');
242
+ lines.push('');
243
+ lines.push(' STATUS SLUG LOADS CITES LAST CITED');
244
+ lines.push(' ---------------- -------------------------------- ----- ----- --------------');
245
+ for (const r of rows) {
246
+ const status = STATUS_GLYPH[r.status] || r.status;
247
+ const slug = r.slug.length > 32 ? r.slug.slice(0, 29) + '...' : r.slug;
248
+ const ago = r.days_since_cited != null ? `${r.days_since_cited}d ago` : '(never)';
249
+ lines.push(
250
+ ` ${status.padEnd(16)} ${slug.padEnd(32)} ${String(r.loads).padStart(5)} ` +
251
+ `${String(r.citations).padStart(5)} ${ago}`
252
+ );
253
+ }
254
+ lines.push('');
255
+ lines.push('Thresholds:');
256
+ lines.push(' archive-candidate = citations==0 + loads>=5');
257
+ lines.push(' stale = last cited >60 days ago');
258
+ lines.push(' new = loads<5 (still bedding in)');
259
+ lines.push(' healthy = cited within 60 days');
260
+ return lines.join('\n');
261
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clud-bug",
3
- "version": "0.6.26",
3
+ "version": "0.6.28",
4
4
  "description": "Skill-driven Claude PR review. Ship a brand-voice skill, get brand reviews. Each finding cites the skill that motivated it. CLI installs the workflow + a baseline kit; add more from skills.sh.",
5
5
  "homepage": "https://cludbug.dev",
6
6
  "bugs": "https://github.com/thrillmade/clud-bug/issues",
@@ -339,7 +339,7 @@ jobs:
339
339
  # Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
340
340
  - name: Strict mode — fail check on critical findings
341
341
  if: success()
342
- uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.26
342
+ uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.28
343
343
  with:
344
344
  github-token: ${{ secrets.GITHUB_TOKEN }}
345
345
  # v0.6.22 / 0.0.O: summary now posted by github-actions[bot].
@@ -339,7 +339,7 @@ jobs:
339
339
  # Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
340
340
  - name: Strict mode — fail check on critical findings
341
341
  if: success()
342
- uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.26
342
+ uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.28
343
343
  with:
344
344
  github-token: ${{ secrets.GITHUB_TOKEN }}
345
345
  # v0.6.22 / 0.0.O: summary now posted by github-actions[bot].
@@ -589,7 +589,7 @@ jobs:
589
589
  # Letting the action's own failure fail the check is louder and right.
590
590
  - name: Strict mode — fail check on critical findings
591
591
  if: success()
592
- uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.26
592
+ uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.28
593
593
  with:
594
594
  github-token: ${{ secrets.GITHUB_TOKEN }}
595
595
  # v0.6.22 / 0.0.O: the summary is now posted by the workflow