npm - clud-bug - Versions diffs - 0.6.26 → 0.6.28 - Mend

clud-bug 0.6.26 → 0.6.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +13 -0
package/bin/clud-bug.js +54 -0
package/lib/prompts.js +30 -0
package/lib/skill-usage.js +261 -0
package/package.json +1 -1
package/templates/workflow-py.yml.tmpl +1 -1
package/templates/workflow-ts.yml.tmpl +1 -1
package/templates/workflow.yml.tmpl +1 -1

package/README.md CHANGED Viewed

@@ -278,3 +278,16 @@ npm test          # node:test, no runtime deps
 ## License
 MIT.
+---
+## Part of the thrillmade SkDD toolchain
+[Skills-Driven Development](https://zakelfassi.com/skdd-skills-driven-development) (Zak Elfassi's methodology) gives you the loop; the thrillmade toolchain ships the parts:
+- **[logmind](https://github.com/thrillmade/logmind)** — the *why* behind every change (decision logging as commit primitive); skill-creation + testing + auditing
+- **[clud-bug](https://github.com/thrillmade/clud-bug)** — skill-driven PR review at gate time; every finding cites the skill that motivated it
+- **[agent-skills](https://github.com/thrillmade/agent-skills)** — public catalog of reusable skills
+- **[skills.sh](https://skills.sh)** — skill discovery + install
+End-to-end agentic auto dev: write skills first → log the *why* → run them against PRs → iterate based on usage. The tools work independently; better together.

package/bin/clud-bug.js CHANGED Viewed

@@ -52,6 +52,7 @@ function parseArgs(argv) {
     else if (a === '--limit') args.limit = Number(argv[++i]);
     else if (a === '--json') args.json = true;
     else if (a === '--stdin') args.stdin = true;
+    else if (a === '--health') args.health = true;
     else args._.push(a);
   }
   return args;
@@ -79,6 +80,13 @@ Commands:
                         rate, 30-day rolling \$/LOC trend, per-repo/per-model
                         distributions, and outliers (> 2x org median).
                         Use --pr / --repo / --since / --limit / --json to filter.
+  usage --health        Deterministic skill-health dashboard (v0.6.28). Reads
+                        \`.claude/skills/.clud-bug.json\` usage block + renders
+                        archive-candidate / stale / new / healthy status per skill.
+                        Read-only — no automation acts on the output. Humans
+                        decide which skills to prune. Workflow integration ships
+                        in v0.6.29; today this command surfaces whatever data
+                        has been written manually or by future runs.
   eval                  Run the golden-set regression gate against the rendered review
                         prompt (must-contain / must-not-contain / byte-budget). Same as
                         \`node --test test/prompts.eval.test.js\` but works from any cwd.
@@ -807,6 +815,14 @@ async function runAudit(args) {
 // Default scope: 30 days, all repos with clud-bug-review.yml in the gh
 // user's auth scope. --repo / --pr / --since / --limit narrow.
 async function runUsage(args) {
+  // v0.6.28 — `clud-bug usage --health`: deterministic skill-health
+  // dashboard. Reads `.claude/skills/.clud-bug.json` usage block,
+  // applies thresholds, renders read-only table. No automation acts
+  // on the output. Per the pragmatic SkDD pivot (2026-05-30).
+  if (args.health) {
+    return runUsageHealth(args);
+  }
   const limit = args.limit ?? 50;
   const since = args.since ?? '30d';
@@ -861,6 +877,44 @@ async function runUsage(args) {
 // `gh repo list` won't filter by workflow file content, so we iterate
 // repos the user has access to and probe for clud-bug-review.yml. We
+// v0.6.28 — `clud-bug usage --health` implementation. Reads the local
+// .claude/skills/.clud-bug.json usage block, applies deterministic
+// thresholds, renders a read-only dashboard. No I/O beyond the JSON
+// read. Workflow integration that POPULATES the usage block ships in
+// v0.6.29; today this command is the consumer half of the contract.
+async function runUsageHealth(_args) {
+  const fs = await import('node:fs/promises');
+  const path = await import('node:path');
+  const { assessSkillHealth, formatHealthDashboard } = await import('../lib/skill-usage.js');
+  const jsonPath = path.resolve(process.cwd(), '.claude', 'skills', '.clud-bug.json');
+  let parsed;
+  try {
+    const raw = await fs.readFile(jsonPath, 'utf-8');
+    parsed = JSON.parse(raw);
+  } catch (err) {
+    if (err.code === 'ENOENT') {
+      process.stderr.write(
+        `clud-bug usage --health: no .claude/skills/.clud-bug.json found in ${process.cwd()}.\n` +
+        `Run \`npx clud-bug init\` first to install the catalog state.\n`
+      );
+      process.exit(1);
+    }
+    process.stderr.write(`clud-bug usage --health: failed to parse .clud-bug.json: ${err.message}\n`);
+    process.exit(1);
+  }
+  const usage = parsed && parsed.usage ? parsed.usage : {};
+  const rows = assessSkillHealth(usage, new Date());
+  process.stdout.write(formatHealthDashboard(rows) + '\n');
+  // Exit code semantics: 0 (informational). The dashboard is read-only;
+  // archive-candidates being present is NOT a failure mode — humans
+  // decide. CI gates should NOT block on this.
+  ok(`skill health: ${rows.length} skill${rows.length === 1 ? '' : 's'} tracked`);
+}
 // limit to 100 to avoid pagination explosions.
 async function discoverConsumingRepos() {
   const list = await ghJson(['repo', 'list', '--limit', '100', '--json', 'nameWithOwner']);

package/lib/prompts.js CHANGED Viewed

@@ -201,6 +201,36 @@ Rules:
     every file's verdict so a maintainer can verify nothing was
     skipped.
+Mid-review self-check-in (v0.6.27 / §5.5 Layer 3):
+After every 5 tool_uses, write a single-line budget heartbeat as a
+free-text "thinking" message (not a tool call — these don't cost a
+turn) of the form:
+  [budget] files_reviewed=X/N, turns_used=Y/M, pace=ok|behind
+Where:
+  - X / N is the count of files you've meaningfully looked at so far
+    over the total in this PR's diff.
+  - Y / M is your current turn count over max_turns.
+  - pace = "ok" when X / N >= Y / (M - 5). The denominator subtracts the
+    5-turn emit reservation: over the (M - 5) turns available for file
+    review, your file-coverage rate must match where you actually are
+    in the budget. (Don't subtract from Y — that would be saying "I've
+    used Y minus 5 turns" which double-counts the reservation.)
+    pace = "behind" otherwise.
+When pace = "behind", immediately pivot strategy:
+  1. Stop deep-dive analysis on the current file.
+  2. Switch to one-sentence verdicts for every remaining file.
+  3. Keep going through the whole diff — silent skipping is
+     non-negotiable. Cover everything, even if some files only get
+     "no issues found in this file" as their verdict.
+The heartbeat serves two purposes: (a) forces internal pacing — you
+can't drift past budget without noticing; (b) lands in the action's
+streaming output for post-hoc calibration of the per-line cost
+coefficients used by paths-check's Layer 1 estimator.
 Incremental-diff handshake (v0.6.10+) — emit the SHA marker:
 At the very end of the summary (after the Skills-referenced footer,
 on its own line), append:

package/lib/skill-usage.js ADDED Viewed

@@ -0,0 +1,261 @@
+// lib/skill-usage.js — Component 1+2 of the pragmatic SkDD pivot.
+//
+// Pure functions for deterministic skill-usage tracking. Per the
+// strategic pivot (2026-05-30): replace Zak Elfassi's speculative
+// recursive-meta-skill direction with concrete usage data + human-gated
+// approval. This module is the data layer.
+//
+// Three responsibilities:
+//
+//   1. computeSkillUsageDelta(reviewJson)
+//      Given the structured-output JSON from one clud-bug review,
+//      return the per-skill delta for that one review.
+//
+//   2. mergeSkillUsage(existing, delta, timestamp)
+//      Merge a delta into the persistent usage block (the `usage`
+//      field in `.claude/skills/.clud-bug.json`).
+//
+//   3. assessSkillHealth(usage, now)
+//      Apply the deterministic thresholds + return a row per skill
+//      that `clud-bug usage --health` renders as a table.
+//
+// All three are pure. Side effects (file I/O) live in bin/clud-bug.js
+// and the workflow post-step (v0.6.29).
+//
+// Thresholds — concrete numbers per design (2026-05-30):
+//
+//   - archive-candidate: citations == 0 across last 90 days of loads
+//   - stale:             last cited > 60 days ago
+//   - healthy:           >= 3 citations in any rolling 90-day window
+//   - new:               loads < 5 (still bedding in; don't judge yet)
+//
+// No automation acts on this output. It's a READ-ONLY dashboard.
+// Humans read; humans decide; humans act.
+/**
+ * Compute per-skill usage delta from a single review's structured JSON.
+ *
+ * @param {object} reviewJson - Parsed structured-output JSON from one
+ *   clud-bug review. Expected shape (subset of review-schema.js):
+ *     - per_skill_scan: [{ skill, outcome }, ...]
+ *     - critical_findings: [{ skill, ... }, ...]
+ *     - minor_findings: [{ skill, ... }, ...]
+ *     - dedicated_sections: [{ skill, findings: [...] }, ...]
+ *
+ * @returns {object} - Per-skill delta:
+ *     { "<slug>": { loads: 1, citations: 0|1 } }
+ *
+ * Rules:
+ *   - loads = 1 for every skill in per_skill_scan (the skill was in
+ *     context for this review).
+ *   - citations = 1 if the skill slug appears in ANY finding bucket
+ *     (critical / minor / dedicated). Multiple findings from the same
+ *     skill on one review = 1 citation, not N. Citations count REVIEWS
+ *     that cited the skill, not findings within a review.
+ *
+ * Returns {} on missing / malformed input (defensive — never throws).
+ */
+export function computeSkillUsageDelta(reviewJson) {
+  if (!reviewJson || typeof reviewJson !== 'object') return {};
+  const delta = {};
+  // Loads — one per skill that scanned.
+  for (const entry of reviewJson.per_skill_scan || []) {
+    if (!entry || typeof entry.skill !== 'string') continue;
+    const slug = entry.skill;
+    if (!delta[slug]) delta[slug] = { loads: 0, citations: 0 };
+    delta[slug].loads = 1;
+  }
+  // Citations — collect unique skill slugs across all finding buckets.
+  const cited = new Set();
+  const collect = (findings) => {
+    for (const f of findings || []) {
+      if (f && typeof f.skill === 'string') cited.add(f.skill);
+    }
+  };
+  collect(reviewJson.critical_findings);
+  collect(reviewJson.minor_findings);
+  collect(reviewJson.preexisting_findings);
+  for (const section of reviewJson.dedicated_sections || []) {
+    collect(section?.findings);
+  }
+  for (const slug of cited) {
+    if (!delta[slug]) delta[slug] = { loads: 0, citations: 0 };
+    delta[slug].citations = 1;
+  }
+  return delta;
+}
+/**
+ * Merge a per-review delta into a persistent usage block.
+ *
+ * @param {object} existing - Current usage block (may be empty/missing).
+ *   Shape: { "<slug>": { loads: int, citations: int, last_cited: string|null } }
+ * @param {object} delta - From computeSkillUsageDelta (above).
+ * @param {string|null} timestamp - ISO 8601 timestamp of THIS review
+ *   (e.g., "2026-05-30T16:22:26Z"). Used to update last_cited when the
+ *   skill is cited in this review. Pass null to skip the timestamp
+ *   update (rarely useful — tests primarily).
+ *
+ * @returns {object} - New merged usage block (does NOT mutate inputs).
+ *
+ * Semantics:
+ *   - existing.loads + delta.loads → new.loads (accumulates forever)
+ *   - existing.citations + delta.citations → new.citations
+ *   - last_cited updates only when delta.citations > 0 (i.e., cited
+ *     in THIS review). Stays at the prior value otherwise.
+ *   - New skills (not in existing) get initialized fresh.
+ */
+export function mergeSkillUsage(existing, delta, timestamp) {
+  const safeExisting = (existing && typeof existing === 'object') ? existing : {};
+  const result = {};
+  // Copy all existing skills first (preserve skills NOT in this delta).
+  for (const [slug, entry] of Object.entries(safeExisting)) {
+    if (entry && typeof entry === 'object') {
+      result[slug] = {
+        loads: Number(entry.loads) || 0,
+        citations: Number(entry.citations) || 0,
+        last_cited: entry.last_cited || null,
+      };
+    }
+  }
+  // Merge delta.
+  for (const [slug, d] of Object.entries(delta || {})) {
+    if (!result[slug]) {
+      result[slug] = { loads: 0, citations: 0, last_cited: null };
+    }
+    result[slug].loads += Number(d.loads) || 0;
+    result[slug].citations += Number(d.citations) || 0;
+    if ((Number(d.citations) || 0) > 0 && timestamp) {
+      result[slug].last_cited = timestamp;
+    }
+  }
+  return result;
+}
+/**
+ * Apply deterministic skill-health thresholds to a usage block.
+ *
+ * @param {object} usage - The usage block from mergeSkillUsage.
+ * @param {Date} now - The current time (injected for testability).
+ *
+ * @returns {object[]} - Sorted array of:
+ *     { slug, status, loads, citations, last_cited, days_since_cited }
+ *
+ * Status values:
+ *   - "archive-candidate": citations == 0 AND loads >= 5
+ *     → loaded enough to judge, never cited → propose for removal
+ *   - "stale": last_cited > 60 days ago (even with citations history)
+ *     → was useful, hasn't fired recently
+ *   - "new": loads < 5
+ *     → still bedding in; don't judge yet
+ *   - "healthy": cited within 60 days
+ *     → still earning its place
+ *
+ * Sorted by status priority (archive > stale > new > healthy), then
+ * by loads desc within each group. Highest-noise skills surface first.
+ */
+export function assessSkillHealth(usage, now) {
+  const safeUsage = (usage && typeof usage === 'object') ? usage : {};
+  const safeNow = (now instanceof Date) ? now : new Date();
+  const sixtyDaysAgoMs = safeNow.getTime() - (60 * 24 * 60 * 60 * 1000);
+  const rows = [];
+  for (const [slug, entry] of Object.entries(safeUsage)) {
+    if (!entry || typeof entry !== 'object') continue;
+    const loads = Number(entry.loads) || 0;
+    const citations = Number(entry.citations) || 0;
+    const last_cited = entry.last_cited || null;
+    let status;
+    let days_since_cited = null;
+    if (loads < 5) {
+      status = 'new';
+    } else if (citations === 0) {
+      status = 'archive-candidate';
+    } else {
+      // Has citations. Check recency.
+      const lastCitedMs = last_cited ? Date.parse(last_cited) : null;
+      if (lastCitedMs && lastCitedMs >= sixtyDaysAgoMs) {
+        status = 'healthy';
+        days_since_cited = Math.floor((safeNow.getTime() - lastCitedMs) / (24 * 60 * 60 * 1000));
+      } else if (lastCitedMs) {
+        status = 'stale';
+        days_since_cited = Math.floor((safeNow.getTime() - lastCitedMs) / (24 * 60 * 60 * 1000));
+      } else {
+        // Has citations count but no timestamp (legacy / corrupted) — treat as stale.
+        status = 'stale';
+      }
+    }
+    rows.push({ slug, status, loads, citations, last_cited, days_since_cited });
+  }
+  // Sort: archive-candidates first, then stale, then new, then healthy.
+  // Within each group, by loads descending (loudest first).
+  const statusOrder = { 'archive-candidate': 0, 'stale': 1, 'new': 2, 'healthy': 3 };
+  rows.sort((a, b) => {
+    const da = statusOrder[a.status] ?? 99;
+    const db = statusOrder[b.status] ?? 99;
+    if (da !== db) return da - db;
+    return b.loads - a.loads;
+  });
+  return rows;
+}
+/**
+ * Render the health dashboard as a 3-column table for the CLI.
+ *
+ * @param {object[]} rows - Output of assessSkillHealth.
+ * @returns {string} - Multi-line markdown-ish table for stdout.
+ */
+export function formatHealthDashboard(rows) {
+  if (!rows || rows.length === 0) {
+    return (
+      'Skill health: no usage data yet.\n\n' +
+      'Usage data accumulates after clud-bug reviews land in your repo.\n' +
+      'Workflow integration ships in v0.6.29 — until then this command is\n' +
+      'a structural placeholder.'
+    );
+  }
+  const STATUS_GLYPH = {
+    'archive-candidate': '🟥 archive?',
+    'stale': '🟨 stale',
+    'new': '🟦 new',
+    'healthy': '🟩 healthy',
+  };
+  const lines = [];
+  lines.push('Skill health (deterministic — read-only; no automation acts on this)');
+  lines.push('');
+  lines.push('  STATUS            SLUG                              LOADS  CITES  LAST CITED');
+  lines.push('  ----------------  --------------------------------  -----  -----  --------------');
+  for (const r of rows) {
+    const status = STATUS_GLYPH[r.status] || r.status;
+    const slug = r.slug.length > 32 ? r.slug.slice(0, 29) + '...' : r.slug;
+    const ago = r.days_since_cited != null ? `${r.days_since_cited}d ago` : '(never)';
+    lines.push(
+      `  ${status.padEnd(16)}  ${slug.padEnd(32)}  ${String(r.loads).padStart(5)}  ` +
+      `${String(r.citations).padStart(5)}  ${ago}`
+    );
+  }
+  lines.push('');
+  lines.push('Thresholds:');
+  lines.push('  archive-candidate = citations==0 + loads>=5');
+  lines.push('  stale             = last cited >60 days ago');
+  lines.push('  new               = loads<5 (still bedding in)');
+  lines.push('  healthy           = cited within 60 days');
+  return lines.join('\n');
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clud-bug",
-  "version": "0.6.26",
+  "version": "0.6.28",
   "description": "Skill-driven Claude PR review. Ship a brand-voice skill, get brand reviews. Each finding cites the skill that motivated it. CLI installs the workflow + a baseline kit; add more from skills.sh.",
   "homepage": "https://cludbug.dev",
   "bugs": "https://github.com/thrillmade/clud-bug/issues",

package/templates/workflow-py.yml.tmpl CHANGED Viewed

@@ -339,7 +339,7 @@ jobs:
       # Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
       - name: Strict mode — fail check on critical findings
         if: success()
-        uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.26
+        uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.28
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           # v0.6.22 / 0.0.O: summary now posted by github-actions[bot].

package/templates/workflow-ts.yml.tmpl CHANGED Viewed

@@ -339,7 +339,7 @@ jobs:
       # Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
       - name: Strict mode — fail check on critical findings
         if: success()
-        uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.26
+        uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.28
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           # v0.6.22 / 0.0.O: summary now posted by github-actions[bot].

package/templates/workflow.yml.tmpl CHANGED Viewed

@@ -589,7 +589,7 @@ jobs:
       # Letting the action's own failure fail the check is louder and right.
       - name: Strict mode — fail check on critical findings
         if: success()
-        uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.26
+        uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.28
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           # v0.6.22 / 0.0.O: the summary is now posted by the workflow