@codexstar/bug-hunter 3.0.0 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +149 -83
- package/README.md +150 -15
- package/SKILL.md +94 -27
- package/agents/openai.yaml +4 -0
- package/bin/bug-hunter +9 -3
- package/docs/images/2026-03-12-fix-plan-rollout.png +0 -0
- package/docs/images/2026-03-12-hero-bug-hunter-overview.png +0 -0
- package/docs/images/2026-03-12-machine-readable-artifacts.png +0 -0
- package/docs/images/2026-03-12-pr-review-flow.png +0 -0
- package/docs/images/2026-03-12-security-pack.png +0 -0
- package/docs/images/adversarial-debate.png +0 -0
- package/docs/images/doc-verify-fix-plan.png +0 -0
- package/docs/images/hero.png +0 -0
- package/docs/images/pipeline-overview.png +0 -0
- package/docs/images/security-finding-card.png +0 -0
- package/docs/plans/2026-03-11-structured-output-migration-plan.md +288 -0
- package/docs/plans/2026-03-12-audit-bug-fixes-surgical-plan.md +193 -0
- package/docs/plans/2026-03-12-enterprise-security-pack-e2e-plan.md +59 -0
- package/docs/plans/2026-03-12-local-security-skills-integration-plan.md +39 -0
- package/docs/plans/2026-03-12-pr-review-strategic-fix-flow.md +78 -0
- package/evals/evals.json +366 -102
- package/modes/extended.md +2 -2
- package/modes/fix-loop.md +30 -30
- package/modes/fix-pipeline.md +32 -6
- package/modes/large-codebase.md +14 -15
- package/modes/local-sequential.md +44 -20
- package/modes/loop.md +56 -56
- package/modes/parallel.md +3 -3
- package/modes/scaled.md +2 -2
- package/modes/single-file.md +3 -3
- package/modes/small.md +11 -11
- package/package.json +11 -1
- package/prompts/fixer.md +37 -23
- package/prompts/hunter.md +39 -20
- package/prompts/referee.md +34 -20
- package/prompts/skeptic.md +25 -22
- package/schemas/coverage.schema.json +67 -0
- package/schemas/examples/findings.invalid.json +13 -0
- package/schemas/examples/findings.valid.json +17 -0
- package/schemas/findings.schema.json +76 -0
- package/schemas/fix-plan.schema.json +94 -0
- package/schemas/fix-report.schema.json +105 -0
- package/schemas/fix-strategy.schema.json +99 -0
- package/schemas/recon.schema.json +31 -0
- package/schemas/referee.schema.json +46 -0
- package/schemas/shared.schema.json +51 -0
- package/schemas/skeptic.schema.json +21 -0
- package/scripts/bug-hunter-state.cjs +35 -12
- package/scripts/code-index.cjs +11 -4
- package/scripts/fix-lock.cjs +95 -25
- package/scripts/payload-guard.cjs +24 -10
- package/scripts/pr-scope.cjs +181 -0
- package/scripts/prepublish-guard.cjs +82 -0
- package/scripts/render-report.cjs +346 -0
- package/scripts/run-bug-hunter.cjs +669 -33
- package/scripts/schema-runtime.cjs +273 -0
- package/scripts/schema-validate.cjs +40 -0
- package/scripts/tests/bug-hunter-state.test.cjs +68 -3
- package/scripts/tests/code-index.test.cjs +15 -0
- package/scripts/tests/fix-lock.test.cjs +60 -2
- package/scripts/tests/fixtures/flaky-worker.cjs +6 -1
- package/scripts/tests/fixtures/low-confidence-worker.cjs +8 -2
- package/scripts/tests/fixtures/success-worker.cjs +6 -1
- package/scripts/tests/payload-guard.test.cjs +154 -2
- package/scripts/tests/pr-scope.test.cjs +212 -0
- package/scripts/tests/render-report.test.cjs +180 -0
- package/scripts/tests/run-bug-hunter.test.cjs +686 -2
- package/scripts/tests/security-skills-integration.test.cjs +29 -0
- package/scripts/tests/skills-packaging.test.cjs +30 -0
- package/scripts/tests/worktree-harvest.test.cjs +67 -1
- package/scripts/worktree-harvest.cjs +62 -9
- package/skills/README.md +19 -0
- package/skills/commit-security-scan/SKILL.md +63 -0
- package/skills/security-review/SKILL.md +57 -0
- package/skills/threat-model-generation/SKILL.md +47 -0
- package/skills/vulnerability-validation/SKILL.md +59 -0
- package/templates/subagent-wrapper.md +12 -3
- package/modes/_dispatch.md +0 -121
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
function readJson(filePath) {
|
|
7
|
+
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function usage() {
|
|
11
|
+
console.error('Usage:');
|
|
12
|
+
console.error(' render-report.cjs report <findings-json> <referee-json>');
|
|
13
|
+
console.error(' render-report.cjs coverage <coverage-json>');
|
|
14
|
+
console.error(' render-report.cjs skeptic <skeptic-json>');
|
|
15
|
+
console.error(' render-report.cjs referee <referee-json>');
|
|
16
|
+
console.error(' render-report.cjs fix-report <fix-report-json>');
|
|
17
|
+
console.error(' render-report.cjs fix-strategy <fix-strategy-json>');
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function toArray(value) {
|
|
21
|
+
return Array.isArray(value) ? value : [];
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function renderReport({ findingsPath, refereePath }) {
|
|
25
|
+
const findings = toArray(readJson(findingsPath));
|
|
26
|
+
const verdicts = toArray(readJson(refereePath));
|
|
27
|
+
const findingByBugId = new Map(findings.map((finding) => [finding.bugId, finding]));
|
|
28
|
+
const confirmed = [];
|
|
29
|
+
const dismissed = [];
|
|
30
|
+
const manualReview = [];
|
|
31
|
+
|
|
32
|
+
for (const verdict of verdicts) {
|
|
33
|
+
const finding = findingByBugId.get(verdict.bugId) || null;
|
|
34
|
+
const row = { verdict, finding };
|
|
35
|
+
if (verdict.verdict === 'REAL_BUG') {
|
|
36
|
+
confirmed.push(row);
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
if (verdict.verdict === 'MANUAL_REVIEW') {
|
|
40
|
+
manualReview.push(row);
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
dismissed.push(row);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const lines = [
|
|
47
|
+
'# Bug Hunter Report',
|
|
48
|
+
'',
|
|
49
|
+
`- Findings reviewed: ${findings.length}`,
|
|
50
|
+
`- Confirmed: ${confirmed.length}`,
|
|
51
|
+
`- Dismissed: ${dismissed.length}`,
|
|
52
|
+
`- Manual review: ${manualReview.length}`,
|
|
53
|
+
''
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
lines.push('## Confirmed Bugs');
|
|
57
|
+
if (confirmed.length === 0) {
|
|
58
|
+
lines.push('- None');
|
|
59
|
+
} else {
|
|
60
|
+
for (const { verdict, finding } of confirmed) {
|
|
61
|
+
lines.push(`- ${verdict.bugId} | ${verdict.trueSeverity} | ${finding ? finding.file : 'unknown file'} | ${finding ? finding.claim : verdict.analysisSummary}`);
|
|
62
|
+
lines.push(` Confidence: ${verdict.confidenceScore} (${verdict.confidenceLabel}) | ${verdict.verificationMode}`);
|
|
63
|
+
lines.push(` Analysis: ${verdict.analysisSummary}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
lines.push('', '## Manual Review');
|
|
68
|
+
if (manualReview.length === 0) {
|
|
69
|
+
lines.push('- None');
|
|
70
|
+
} else {
|
|
71
|
+
for (const { verdict, finding } of manualReview) {
|
|
72
|
+
lines.push(`- ${verdict.bugId} | ${finding ? finding.file : 'unknown file'} | ${finding ? finding.claim : verdict.analysisSummary}`);
|
|
73
|
+
lines.push(` Confidence: ${verdict.confidenceScore} (${verdict.confidenceLabel})`);
|
|
74
|
+
lines.push(` Analysis: ${verdict.analysisSummary}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
lines.push('', '## Dismissed Findings');
|
|
79
|
+
if (dismissed.length === 0) {
|
|
80
|
+
lines.push('- None');
|
|
81
|
+
} else {
|
|
82
|
+
for (const { verdict, finding } of dismissed) {
|
|
83
|
+
lines.push(`- ${verdict.bugId} | ${finding ? finding.file : 'unknown file'} | ${finding ? finding.claim : 'No finding available'}`);
|
|
84
|
+
lines.push(` Analysis: ${verdict.analysisSummary}`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return `${lines.join('\n')}\n`;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function renderCoverage({ coveragePath }) {
|
|
92
|
+
const coverage = readJson(coveragePath);
|
|
93
|
+
const lines = [
|
|
94
|
+
'# Bug Hunter Coverage',
|
|
95
|
+
'',
|
|
96
|
+
`- Status: ${coverage.status}`,
|
|
97
|
+
`- Iteration: ${coverage.iteration}`,
|
|
98
|
+
`- Files: ${toArray(coverage.files).length}`,
|
|
99
|
+
`- Bugs: ${toArray(coverage.bugs).length}`,
|
|
100
|
+
`- Fix entries: ${toArray(coverage.fixes).length}`,
|
|
101
|
+
'',
|
|
102
|
+
'## Files'
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
const files = toArray(coverage.files);
|
|
106
|
+
if (files.length === 0) {
|
|
107
|
+
lines.push('- None');
|
|
108
|
+
} else {
|
|
109
|
+
for (const entry of files) {
|
|
110
|
+
lines.push(`- ${entry.status} | ${entry.path}`);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
lines.push('', '## Bugs');
|
|
115
|
+
const bugs = toArray(coverage.bugs);
|
|
116
|
+
if (bugs.length === 0) {
|
|
117
|
+
lines.push('- None');
|
|
118
|
+
} else {
|
|
119
|
+
for (const bug of bugs) {
|
|
120
|
+
lines.push(`- ${bug.bugId} | ${bug.severity} | ${bug.file} | ${bug.claim}`);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
lines.push('', '## Fixes');
|
|
125
|
+
const fixes = toArray(coverage.fixes);
|
|
126
|
+
if (fixes.length === 0) {
|
|
127
|
+
lines.push('- None');
|
|
128
|
+
} else {
|
|
129
|
+
for (const fix of fixes) {
|
|
130
|
+
lines.push(`- ${fix.bugId} | ${fix.status}`);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return `${lines.join('\n')}\n`;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function renderSkeptic({ skepticPath }) {
|
|
138
|
+
const skeptic = toArray(readJson(skepticPath));
|
|
139
|
+
const lines = ['# Skeptic Review', ''];
|
|
140
|
+
if (skeptic.length === 0) {
|
|
141
|
+
lines.push('- None');
|
|
142
|
+
return `${lines.join('\n')}\n`;
|
|
143
|
+
}
|
|
144
|
+
for (const item of skeptic) {
|
|
145
|
+
lines.push(`- ${item.bugId} | ${item.response}`);
|
|
146
|
+
lines.push(` ${item.analysisSummary}`);
|
|
147
|
+
if (item.counterEvidence) {
|
|
148
|
+
lines.push(` Evidence: ${item.counterEvidence}`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
return `${lines.join('\n')}\n`;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function renderReferee({ refereePath }) {
|
|
155
|
+
const referee = toArray(readJson(refereePath));
|
|
156
|
+
const lines = ['# Referee Verdicts', ''];
|
|
157
|
+
if (referee.length === 0) {
|
|
158
|
+
lines.push('- None');
|
|
159
|
+
return `${lines.join('\n')}\n`;
|
|
160
|
+
}
|
|
161
|
+
for (const item of referee) {
|
|
162
|
+
lines.push(`- ${item.bugId} | ${item.verdict} | ${item.trueSeverity}`);
|
|
163
|
+
lines.push(` Confidence: ${item.confidenceScore} (${item.confidenceLabel}) | ${item.verificationMode}`);
|
|
164
|
+
lines.push(` Analysis: ${item.analysisSummary}`);
|
|
165
|
+
if (item.suggestedFix) {
|
|
166
|
+
lines.push(` Suggested fix: ${item.suggestedFix}`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return `${lines.join('\n')}\n`;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function renderFixReport({ fixReportPath }) {
|
|
173
|
+
const report = readJson(fixReportPath);
|
|
174
|
+
const fixes = toArray(report.fixes);
|
|
175
|
+
const lines = [
|
|
176
|
+
'# Fix Report',
|
|
177
|
+
'',
|
|
178
|
+
`- Branch: ${report.fix_branch}`,
|
|
179
|
+
`- Base commit: ${report.base_commit}`,
|
|
180
|
+
`- Dry run: ${report.dry_run ? 'yes' : 'no'}`,
|
|
181
|
+
`- Circuit breaker: ${report.circuit_breaker_tripped ? 'tripped' : 'not tripped'}`,
|
|
182
|
+
`- Phase 2 timeout: ${report.phase2_timeout_hit ? 'hit' : 'not hit'}`,
|
|
183
|
+
'',
|
|
184
|
+
'## Fixes'
|
|
185
|
+
];
|
|
186
|
+
if (fixes.length === 0) {
|
|
187
|
+
lines.push('- None');
|
|
188
|
+
} else {
|
|
189
|
+
for (const item of fixes) {
|
|
190
|
+
lines.push(`- ${item.bugId} | ${item.status} | ${item.severity}`);
|
|
191
|
+
lines.push(` Files: ${toArray(item.files).join(', ')}`);
|
|
192
|
+
lines.push(` Lines: ${item.lines}`);
|
|
193
|
+
if (item.description) {
|
|
194
|
+
lines.push(` Description: ${item.description}`);
|
|
195
|
+
}
|
|
196
|
+
if (item.reason) {
|
|
197
|
+
lines.push(` Reason: ${item.reason}`);
|
|
198
|
+
}
|
|
199
|
+
if (item.commit) {
|
|
200
|
+
lines.push(` Commit: ${item.commit}`);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
lines.push('', '## Verification');
|
|
206
|
+
lines.push(`- Baseline: ${report.verification.baseline_pass} pass / ${report.verification.baseline_fail} fail`);
|
|
207
|
+
lines.push(`- Final: ${report.verification.final_pass} pass / ${report.verification.final_fail} fail`);
|
|
208
|
+
lines.push(`- New failures: ${report.verification.new_failures}`);
|
|
209
|
+
lines.push(`- Resolved failures: ${report.verification.resolved_failures}`);
|
|
210
|
+
lines.push(`- Typecheck: ${report.verification.typecheck_pass ? 'pass' : 'fail'}`);
|
|
211
|
+
lines.push(`- Build: ${report.verification.build_pass ? 'pass' : 'fail'}`);
|
|
212
|
+
lines.push(`- Fixer bugs found: ${report.verification.fixer_bugs_found}`);
|
|
213
|
+
|
|
214
|
+
lines.push('', '## Summary');
|
|
215
|
+
for (const [key, value] of Object.entries(report.summary || {})) {
|
|
216
|
+
lines.push(`- ${key}: ${value}`);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return `${lines.join('\n')}\n`;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function renderFixStrategy({ fixStrategyPath }) {
|
|
223
|
+
const strategy = readJson(fixStrategyPath);
|
|
224
|
+
const clusters = toArray(strategy.clusters);
|
|
225
|
+
const lines = [
|
|
226
|
+
'# Fix Strategy',
|
|
227
|
+
'',
|
|
228
|
+
`- Confidence threshold: ${strategy.confidenceThreshold}`,
|
|
229
|
+
`- Confirmed findings: ${strategy.summary.confirmed}`,
|
|
230
|
+
`- Safe autofix: ${strategy.summary.safeAutofix}`,
|
|
231
|
+
`- Manual review: ${strategy.summary.manualReview}`,
|
|
232
|
+
`- Larger refactor: ${strategy.summary.largerRefactor}`,
|
|
233
|
+
`- Architectural remediation: ${strategy.summary.architecturalRemediation}`,
|
|
234
|
+
`- Canary candidates: ${strategy.summary.canaryCandidates}`,
|
|
235
|
+
`- Rollout candidates: ${strategy.summary.rolloutCandidates}`,
|
|
236
|
+
'',
|
|
237
|
+
'## Clusters'
|
|
238
|
+
];
|
|
239
|
+
|
|
240
|
+
if (clusters.length === 0) {
|
|
241
|
+
lines.push('- None');
|
|
242
|
+
} else {
|
|
243
|
+
for (const cluster of clusters) {
|
|
244
|
+
lines.push(`- ${cluster.clusterId} | ${cluster.strategy} | ${cluster.executionStage} | max severity ${cluster.maxSeverity}`);
|
|
245
|
+
lines.push(` Bugs: ${toArray(cluster.bugIds).join(', ')}`);
|
|
246
|
+
lines.push(` Files: ${toArray(cluster.files).join(', ')}`);
|
|
247
|
+
lines.push(` Summary: ${cluster.summary}`);
|
|
248
|
+
lines.push(` Action: ${cluster.recommendedAction}`);
|
|
249
|
+
lines.push(` Reasons: ${toArray(cluster.reasons).join(' | ')}`);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return `${lines.join('\n')}\n`;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function main() {
|
|
257
|
+
const [command, ...args] = process.argv.slice(2);
|
|
258
|
+
if (!command) {
|
|
259
|
+
usage();
|
|
260
|
+
process.exit(1);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (command === 'report') {
|
|
264
|
+
const [findingsPath, refereePath] = args;
|
|
265
|
+
if (!findingsPath || !refereePath) {
|
|
266
|
+
usage();
|
|
267
|
+
process.exit(1);
|
|
268
|
+
}
|
|
269
|
+
process.stdout.write(renderReport({
|
|
270
|
+
findingsPath: path.resolve(findingsPath),
|
|
271
|
+
refereePath: path.resolve(refereePath)
|
|
272
|
+
}));
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if (command === 'coverage') {
|
|
277
|
+
const [coveragePath] = args;
|
|
278
|
+
if (!coveragePath) {
|
|
279
|
+
usage();
|
|
280
|
+
process.exit(1);
|
|
281
|
+
}
|
|
282
|
+
process.stdout.write(renderCoverage({
|
|
283
|
+
coveragePath: path.resolve(coveragePath)
|
|
284
|
+
}));
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
if (command === 'skeptic') {
|
|
289
|
+
const [skepticPath] = args;
|
|
290
|
+
if (!skepticPath) {
|
|
291
|
+
usage();
|
|
292
|
+
process.exit(1);
|
|
293
|
+
}
|
|
294
|
+
process.stdout.write(renderSkeptic({
|
|
295
|
+
skepticPath: path.resolve(skepticPath)
|
|
296
|
+
}));
|
|
297
|
+
return;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (command === 'referee') {
|
|
301
|
+
const [refereePath] = args;
|
|
302
|
+
if (!refereePath) {
|
|
303
|
+
usage();
|
|
304
|
+
process.exit(1);
|
|
305
|
+
}
|
|
306
|
+
process.stdout.write(renderReferee({
|
|
307
|
+
refereePath: path.resolve(refereePath)
|
|
308
|
+
}));
|
|
309
|
+
return;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
if (command === 'fix-report') {
|
|
313
|
+
const [fixReportPath] = args;
|
|
314
|
+
if (!fixReportPath) {
|
|
315
|
+
usage();
|
|
316
|
+
process.exit(1);
|
|
317
|
+
}
|
|
318
|
+
process.stdout.write(renderFixReport({
|
|
319
|
+
fixReportPath: path.resolve(fixReportPath)
|
|
320
|
+
}));
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (command === 'fix-strategy') {
|
|
325
|
+
const [fixStrategyPath] = args;
|
|
326
|
+
if (!fixStrategyPath) {
|
|
327
|
+
usage();
|
|
328
|
+
process.exit(1);
|
|
329
|
+
}
|
|
330
|
+
process.stdout.write(renderFixStrategy({
|
|
331
|
+
fixStrategyPath: path.resolve(fixStrategyPath)
|
|
332
|
+
}));
|
|
333
|
+
return;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
usage();
|
|
337
|
+
process.exit(1);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
try {
|
|
341
|
+
main();
|
|
342
|
+
} catch (error) {
|
|
343
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
344
|
+
console.error(message);
|
|
345
|
+
process.exit(1);
|
|
346
|
+
}
|