clud-bug 0.6.16 → 0.6.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/clud-bug.js CHANGED
@@ -76,6 +76,9 @@ Commands:
76
76
  rate, 30-day rolling \$/LOC trend, per-repo/per-model
77
77
  distributions, and outliers (> 2x org median).
78
78
  Use --pr / --repo / --since / --limit / --json to filter.
79
+ eval Run the golden-set regression gate against the rendered review
80
+ prompt (must-contain / must-not-contain / byte-budget). Same as
81
+ \`node --test test/prompts.eval.test.js\` but works from any cwd.
79
82
 
80
83
  Options:
81
84
  --offline Skip skills.sh; pin only the bundled baseline specimens.
@@ -126,12 +129,30 @@ async function main() {
126
129
  case 'update': return runUpdateCmd(args);
127
130
  case 'edit-workflow': return runEditWorkflow(args);
128
131
  case 'usage': return runUsage(args);
132
+ case 'eval': return runEval();
129
133
  default:
130
134
  process.stderr.write(`Unknown command: ${cmd || '(none)'}\n\n${HELP}`);
131
135
  process.exit(2);
132
136
  }
133
137
  }
134
138
 
139
+ // 0.0.E (v0.6.17): thin wrapper around the golden-set test file. Devs
140
+ // who follow the README invoke `clud-bug eval` — this routes to the
141
+ // same `node --test` runner CI uses, so dev and CI verdicts match.
142
+ //
143
+ // Dev-only: runs against the prompt bundled in PKG_ROOT (the cloned
144
+ // clud-bug repo). `test/` is intentionally not in package.json `files`,
145
+ // so invoking this from a globally installed copy will ENOENT. No args
146
+ // supported yet — the README does not advertise any.
147
+ async function runEval() {
148
+ const result = spawnSync(
149
+ 'node',
150
+ ['--test', join(PKG_ROOT, 'test/prompts.eval.test.js')],
151
+ { stdio: 'inherit' },
152
+ );
153
+ process.exit(result.status ?? 1);
154
+ }
155
+
135
156
  async function runInit(args) {
136
157
  const cwd = process.cwd();
137
158
  log(`🐛 Field season opens in ${cwd}.`);
package/lib/prompts.js CHANGED
@@ -119,8 +119,35 @@ size discipline on those fetches pays back directly.
119
119
  comments — the FIX-PUSH FLOW handles those via reviewThreads
120
120
  GraphQL instead.
121
121
 
122
- If you genuinely cannot review safely without the elided content,
123
- say so plainly in the summary comment instead of speculating.
122
+ Tee-hint on cap fire (v0.6.18, RTK-inspired):
123
+ When ANY \`head -c "$MAX_*"\` cap fires (last line cut mid-token, or
124
+ \`wc -c\` on the captured output equals the cap exactly), you MUST do
125
+ two things, in order:
126
+
127
+ 1. Attempt ONE targeted re-fetch with double the cap on the specific
128
+ truncated section. Example for diff: \`gh pr diff "$PR_NUMBER" |
129
+ head -c $((MAX_DIFF_BYTES * 2))\`. For skills: re-fetch the
130
+ specific \`.claude/skills/<name>/SKILL.md\` that hit the cap with
131
+ \`head -c $((MAX_SKILL_BYTES * 2))\` — name the file. For
132
+ comments: re-fetch with \`per_page=40\` AND \`head -c
133
+ $((MAX_COMMENT_BYTES * 2))\` — doubling per_page alone is wasted
134
+ work when the original truncation was byte-bound.
135
+
136
+ 2. Add a \`### Diagnostics\` block above the Skills-referenced
137
+ footer (the \`<!-- last-reviewed-sha: ... -->\` marker still goes
138
+ last on its own line — Diagnostics is not the last thing in the
139
+ comment). Each line names a cap that fired, the section affected,
140
+ and the outcome of the re-fetch (e.g. "still truncated",
141
+ "recovered with 2x cap", "finding deferred — content beyond 2x").
142
+
143
+ This makes truncation an auditable event in the review trail instead
144
+ of a silent confidence reduction. The pattern is the producer-side
145
+ half of RTK's \`force_tee_tail_hint\`: never elide without naming what
146
+ was elided.
147
+
148
+ If after the re-fetch you genuinely cannot review safely without the
149
+ still-elided content, say so plainly in the summary comment instead
150
+ of speculating.
124
151
 
125
152
  Skills are not background context — they are review rules with
126
153
  authority. Before flagging any finding, scan the loaded skills in
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clud-bug",
3
- "version": "0.6.16",
3
+ "version": "0.6.18",
4
4
  "description": "Skill-driven Claude PR review. Ship a brand-voice skill, get brand reviews. Each finding cites the skill that motivated it. CLI installs the workflow + a baseline kit; add more from skills.sh.",
5
5
  "homepage": "https://cludbug.dev",
6
6
  "bugs": "https://github.com/thrillmade/clud-bug/issues",
@@ -156,6 +156,6 @@ jobs:
156
156
  # Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
157
157
  - name: Strict mode — fail check on critical findings
158
158
  if: success()
159
- uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.16
159
+ uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.18
160
160
  with:
161
161
  github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -156,6 +156,6 @@ jobs:
156
156
  # Strict-mode gate — composite action; see workflow.yml.tmpl for design notes.
157
157
  - name: Strict mode — fail check on critical findings
158
158
  if: success()
159
- uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.16
159
+ uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.18
160
160
  with:
161
161
  github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -247,6 +247,6 @@ jobs:
247
247
  # Letting the action's own failure fail the check is louder and right.
248
248
  - name: Strict mode — fail check on critical findings
249
249
  if: success()
250
- uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.16
250
+ uses: thrillmade/clud-bug/.github/actions/strict-mode-gate@v0.6.18
251
251
  with:
252
252
  github-token: ${{ secrets.GITHUB_TOKEN }}