@codexstar/bug-hunter 3.0.0 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +149 -83
  2. package/README.md +150 -15
  3. package/SKILL.md +94 -27
  4. package/agents/openai.yaml +4 -0
  5. package/bin/bug-hunter +9 -3
  6. package/docs/images/2026-03-12-fix-plan-rollout.png +0 -0
  7. package/docs/images/2026-03-12-hero-bug-hunter-overview.png +0 -0
  8. package/docs/images/2026-03-12-machine-readable-artifacts.png +0 -0
  9. package/docs/images/2026-03-12-pr-review-flow.png +0 -0
  10. package/docs/images/2026-03-12-security-pack.png +0 -0
  11. package/docs/images/adversarial-debate.png +0 -0
  12. package/docs/images/doc-verify-fix-plan.png +0 -0
  13. package/docs/images/hero.png +0 -0
  14. package/docs/images/pipeline-overview.png +0 -0
  15. package/docs/images/security-finding-card.png +0 -0
  16. package/docs/plans/2026-03-11-structured-output-migration-plan.md +288 -0
  17. package/docs/plans/2026-03-12-audit-bug-fixes-surgical-plan.md +193 -0
  18. package/docs/plans/2026-03-12-enterprise-security-pack-e2e-plan.md +59 -0
  19. package/docs/plans/2026-03-12-local-security-skills-integration-plan.md +39 -0
  20. package/docs/plans/2026-03-12-pr-review-strategic-fix-flow.md +78 -0
  21. package/evals/evals.json +366 -102
  22. package/modes/extended.md +2 -2
  23. package/modes/fix-loop.md +30 -30
  24. package/modes/fix-pipeline.md +32 -6
  25. package/modes/large-codebase.md +14 -15
  26. package/modes/local-sequential.md +44 -20
  27. package/modes/loop.md +56 -56
  28. package/modes/parallel.md +3 -3
  29. package/modes/scaled.md +2 -2
  30. package/modes/single-file.md +3 -3
  31. package/modes/small.md +11 -11
  32. package/package.json +11 -1
  33. package/prompts/fixer.md +37 -23
  34. package/prompts/hunter.md +39 -20
  35. package/prompts/referee.md +34 -20
  36. package/prompts/skeptic.md +25 -22
  37. package/schemas/coverage.schema.json +67 -0
  38. package/schemas/examples/findings.invalid.json +13 -0
  39. package/schemas/examples/findings.valid.json +17 -0
  40. package/schemas/findings.schema.json +76 -0
  41. package/schemas/fix-plan.schema.json +94 -0
  42. package/schemas/fix-report.schema.json +105 -0
  43. package/schemas/fix-strategy.schema.json +99 -0
  44. package/schemas/recon.schema.json +31 -0
  45. package/schemas/referee.schema.json +46 -0
  46. package/schemas/shared.schema.json +51 -0
  47. package/schemas/skeptic.schema.json +21 -0
  48. package/scripts/bug-hunter-state.cjs +35 -12
  49. package/scripts/code-index.cjs +11 -4
  50. package/scripts/fix-lock.cjs +95 -25
  51. package/scripts/payload-guard.cjs +24 -10
  52. package/scripts/pr-scope.cjs +181 -0
  53. package/scripts/prepublish-guard.cjs +82 -0
  54. package/scripts/render-report.cjs +346 -0
  55. package/scripts/run-bug-hunter.cjs +669 -33
  56. package/scripts/schema-runtime.cjs +273 -0
  57. package/scripts/schema-validate.cjs +40 -0
  58. package/scripts/tests/bug-hunter-state.test.cjs +68 -3
  59. package/scripts/tests/code-index.test.cjs +15 -0
  60. package/scripts/tests/fix-lock.test.cjs +60 -2
  61. package/scripts/tests/fixtures/flaky-worker.cjs +6 -1
  62. package/scripts/tests/fixtures/low-confidence-worker.cjs +8 -2
  63. package/scripts/tests/fixtures/success-worker.cjs +6 -1
  64. package/scripts/tests/payload-guard.test.cjs +154 -2
  65. package/scripts/tests/pr-scope.test.cjs +212 -0
  66. package/scripts/tests/render-report.test.cjs +180 -0
  67. package/scripts/tests/run-bug-hunter.test.cjs +686 -2
  68. package/scripts/tests/security-skills-integration.test.cjs +29 -0
  69. package/scripts/tests/skills-packaging.test.cjs +30 -0
  70. package/scripts/tests/worktree-harvest.test.cjs +67 -1
  71. package/scripts/worktree-harvest.cjs +62 -9
  72. package/skills/README.md +19 -0
  73. package/skills/commit-security-scan/SKILL.md +63 -0
  74. package/skills/security-review/SKILL.md +57 -0
  75. package/skills/threat-model-generation/SKILL.md +47 -0
  76. package/skills/vulnerability-validation/SKILL.md +59 -0
  77. package/templates/subagent-wrapper.md +12 -3
  78. package/modes/_dispatch.md +0 -121
package/CHANGELOG.md CHANGED
@@ -1,102 +1,158 @@
1
1
  # Changelog
2
2
 
3
- ## 3.0.0 2026-03-10
4
-
5
- ### npm package, worktree-isolated Fixer, and cross-IDE installation
6
-
7
- **npm global install and CLI:**
8
- - New `package.json` with `@codexstar/bug-hunter` package name
9
- - New `bin/bug-hunter` CLI entry point with `install`, `doctor`, and `info` commands
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [3.0.5] 2026-03-11
9
+
10
+ ### Added
11
+ - `agents/openai.yaml` UI metadata for skill lists and quick-invoke prompts
12
+
13
+ ### Changed
14
+ - `SKILL.md` frontmatter now validates cleanly against the `skill-creator` validator
15
+ - `evals/evals.json` now matches the current `.bug-hunter/*` JSON-first pipeline, default loop/fix behavior, and modern flags like `--deps`, `--threat-model`, `--dry-run`, and `--autonomous`
16
+ - npm package files now include the `agents/` directory so `openai.yaml` ships with the published skill
17
+
18
+ ## [Unreleased]
19
+
20
+ ### Highlights
21
+ - PR review is now a first-class workflow with `--pr`, `--pr current`, `--pr recent`, `--pr 123`, `--last-pr`, and `--pr-security`.
22
+ - Bug Hunter now emits both `fix-strategy.json` and `fix-plan.json` before fix execution so remediation stays reviewable and confidence-gated.
23
+ - The enterprise security pack now ships inside the repository under `skills/`, making PR security review and full security audits portable.
24
+ - Fix execution is now safer through schema-validated planning, atomic lock handling, safer worktree cleanup, stash preservation, and shell-safe templating.
25
+
26
+ ### Added
27
+ - GitHub Actions npm publish workflow on release publish or manual dispatch, with version/tag verification before `npm publish`
28
+ - bundled local security skills under `skills/`: `commit-security-scan`, `security-review`, `threat-model-generation`, and `vulnerability-validation`
29
+ - enterprise security entrypoints: `--pr-security`, `--security-review`, and `--validate-security`
30
+ - regression tests and eval coverage for integrated local security-skill routing
31
+ - `schemas/fix-plan.schema.json` plus validation coverage for canonical fix-plan artifacts
32
+ - focused regressions for lock-token ownership, atomic lock acquisition, stale artifact clearing, shell-safe worker paths, failed-chunk fix-plan suppression, managed worktree cleanup, and stash-ref preservation
33
+
34
+ ### Changed
35
+ - portable security capabilities now live inside the repository under `skills/` instead of depending on external machine-specific skill paths
36
+ - package metadata now ships the `skills/` directory for self-contained distribution
37
+ - main Bug Hunter orchestration now routes into the bundled local security skills for PR security review, threat-model generation, enterprise security review, and vulnerability validation
38
+ - fix-lock now uses owner tokens for renew/release, atomic acquisition under contention, and safe recovery from corrupted lock files
39
+ - run-bug-hunter now shell-quotes templated command arguments, clears stale artifacts before retries, validates fix-plan artifacts, and skips fix-plan emission when chunks fail
40
+ - worktree cleanup/status now preserve unrelated directories, preserve stash metadata from defensive harvests, and avoid reporting manifest-only worktrees as dirty
41
+ - current-PR git fallback now diffs against the discovered `origin/<default-branch>` ref when the base branch comes from `origin/HEAD`
42
+ - README now opens with a short “New in This Update” and PR-first quick-start section
43
+ - `llms.txt` and `llms-full.txt` now describe the PR review flow, bundled local security pack, current fix artifacts, and the current regression-test coverage
44
+ - `skills/README.md` now explains how the bundled security skills map into Bug Hunter workflows
45
+
46
+ ## [3.0.4] — 2026-03-11
47
+
48
+ ### Added
49
+ - `schemas/*.schema.json` versioned contracts for recon, findings, skeptic, referee, coverage, fix-report, plus shared definitions and example findings fixtures
50
+ - `scripts/schema-runtime.cjs` lightweight schema runtime and `scripts/schema-validate.cjs` CLI for local artifact checks
51
+ - `scripts/render-report.cjs` Markdown renderer for report, coverage, skeptic, referee, and fix-report views from canonical JSON artifacts
52
+ - canonical `coverage.json` output with derived `coverage.md`
53
+ - `run-bug-hunter.cjs phase` command for schema-validated Skeptic, Referee, and Fixer phase execution with retry support
54
+ - runner tests for invalid Skeptic, Referee, and Fixer artifacts plus Markdown companion rendering
55
+
56
+ ### Changed
57
+ - Hunter, Skeptic, Referee, and Fixer prompts now describe JSON-first canonical artifacts
58
+ - `payload-guard.cjs` now emits real schema refs instead of placeholder format/version objects
59
+ - `bug-hunter-state.cjs` now rejects malformed findings and stores canonical `confidenceScore`, `category`, `evidence`, `runtimeTrigger`, and `crossReferences`
60
+ - `run-bug-hunter.cjs` now treats missing or invalid `findings.json` as a retriable chunk failure, validates phase artifacts, and checks all shipped schema assets during preflight
61
+ - loop, fix-loop, local-sequential, and major mode docs now point at `*.json` phase artifacts and `coverage.json`
62
+ - README, SKILL docs, evals, and the subagent wrapper now describe rendered Markdown as a companion to canonical JSON
63
+ - preflight now checks all shipped structured-output schemas, not just findings
64
+ - structured-output migration now enforces orchestrated outbound validation beyond the local/manual path
65
+
66
+ ## [3.0.1] — 2026-03-11
67
+
68
+ ### Changed
69
+ - Loop and fix-loop completion now require full queued source-file coverage, not just CRITICAL/HIGH coverage
70
+ - Autonomous runs now continue through remaining MEDIUM and LOW files after prioritized chunks finish unless the user interrupts
71
+ - Loop iteration guidance now scales `maxIterations` from queue size so large audits do not stop early
72
+ - Large-codebase mode now treats LOW domains as part of the default autonomous queue instead of optional skipped work
73
+
74
+ ## [3.0.0] — 2026-03-10
75
+
76
+ ### Added
77
+ - `package.json` with `@codexstar/bug-hunter` package name
78
+ - `bin/bug-hunter` CLI entry point with `install`, `doctor`, and `info` commands
10
79
  - `bug-hunter install` auto-detects Claude Code, Codex, Cursor, Kiro, and generic agents directories
11
80
  - `bug-hunter doctor` checks environment readiness (Node.js, Context Hub, Context7, git)
12
81
  - Install via: `npm install -g @codexstar/bug-hunter && bug-hunter install`
13
-
14
- **Cross-IDE installation via skills.sh:**
15
82
  - Compatible with `npx skills add codexstar69/bug-hunter` for Cursor, Windsurf, Copilot, Kiro, and Claude Code
16
- - No publish step required auto-discovered from public GitHub repo with valid SKILL.md
17
-
18
- **Worktree-isolated Fixer dispatch (subagent/teams backends):**
19
- - New `scripts/worktree-harvest.cjs` — manages git worktrees for safe, isolated Fixer execution
20
- - 6 subcommands: `prepare`, `harvest`, `checkout-fix`, `cleanup`, `cleanup-all`, `status`
21
- - Fixer edits happen in an isolated worktree; commits land on the fix branch without touching the user's working tree
22
- - Crash recovery via `cleanup-all` with automatic stash preservation
23
- - Meta-file filtering prevents `.worktree-manifest.json` and `.harvest-result.json` from polluting dirty detection
83
+ - `scripts/worktree-harvest.cjs` manages git worktrees for safe, isolated Fixer execution (6 subcommands: `prepare`, `harvest`, `checkout-fix`, `cleanup`, `cleanup-all`, `status`)
84
+ - 13 new tests in `scripts/tests/worktree-harvest.test.cjs` (full suite: 25/25 passing)
85
+ - 5 new error rows in SKILL.md for worktree failures: prepare, harvest dirty, harvest no-manifest, cleanup, and checkout-fix errors
86
+
87
+ ### Changed
24
88
  - `modes/fix-pipeline.md` updated with dual-path dispatch: worktree path (prepare → dispatch → harvest → cleanup) and direct path
25
89
  - `modes/_dispatch.md` updated with Fixer worktree lifecycle diagram and CRITICAL warning about Agent tool's built-in `isolation: "worktree"`
26
90
  - `templates/subagent-wrapper.md` updated with `{WORKTREE_RULES}` variable for Fixer isolation rules
27
- - 13 new tests in `scripts/tests/worktree-harvest.test.cjs` (full suite: 25/25 passing)
28
-
29
- **Context Hub preflight warning:**
30
- - SKILL.md Step 5b now shows a visible `⚠️` warning when `chub` is not installed, with install command
31
- - Previously was a silent suggestion — now impossible to miss
32
-
33
- **SKILL.md error table:**
34
- - 5 new error rows for worktree failures: prepare, harvest dirty, harvest no-manifest, cleanup, and checkout-fix errors
35
-
36
- ---
91
+ - SKILL.md Step 5b now shows a visible `⚠️` warning when `chub` is not installed (previously a silent suggestion)
37
92
 
38
- ## 2026-03-10 13:26
93
+ ## [2.4.1] — 2026-03-10
39
94
 
95
+ ### Fixed
40
96
  - `scripts/triage.cjs`: LOW-only repositories promoted into `scanOrder` so script-heavy codebases do not collapse to zero scannable files
41
97
  - `scripts/run-bug-hunter.cjs`: `teams` backend name aligned with the documented dispatch mode
42
98
  - `scripts/run-bug-hunter.cjs`: `code-index.cjs` treated as optional during preflight and gated only when index-backed flows are requested
43
99
  - `scripts/run-bug-hunter.cjs`: low-confidence delta expansion now reuses the caller's configured `--delta-hops` value
100
+
101
+ ### Added
44
102
  - `scripts/tests/run-bug-hunter.test.cjs`: regressions for LOW-only triage, optional `code-index`, `teams` backend selection, and delta-hop expansion
45
103
 
46
- ## 2.4.0 — 2026-03-10
104
+ ## [2.4.0] — 2026-03-10
47
105
 
48
- ### Context Hub integration — curated docs with Context7 fallback
106
+ ### Added
107
+ - `scripts/doc-lookup.cjs`: hybrid documentation lookup that tries [Context Hub](https://github.com/andrewyng/context-hub) (chub) first for curated, versioned, annotatable docs, then falls back to Context7 API when chub doesn't have the library
108
+ - Requires `@aisuite/chub` installed globally (`npm install -g @aisuite/chub`) — optional but recommended; pipeline works without it via Context7 fallback
49
109
 
50
- - New `scripts/doc-lookup.cjs`: hybrid documentation lookup that tries [Context Hub](https://github.com/andrewyng/context-hub) (chub) first for curated, versioned, annotatable docs, then falls back to Context7 API when chub doesn't have the library
110
+ ### Changed
51
111
  - All agent prompts (hunter, skeptic, fixer, doc-lookup) updated to use `doc-lookup.cjs` as primary with `context7-api.cjs` as explicit fallback
52
112
  - Preflight smoke test now checks `doc-lookup.cjs` first, falls back to `context7-api.cjs`
53
113
  - `run-bug-hunter.cjs` validates both scripts exist at startup
54
- - Requires `@aisuite/chub` installed globally (`npm install -g @aisuite/chub`) — optional but recommended; pipeline works without it via Context7 fallback
55
-
56
- ## 2.3.0 — 2026-03-10
57
114
 
58
- ### Loop mode is now on by default
115
+ ## [2.3.0] 2026-03-10
59
116
 
117
+ ### Changed
60
118
  - `LOOP_MODE=true` is the new default — every `/bug-hunter` invocation iterates until full CRITICAL/HIGH coverage
61
- - Added `--no-loop` flag to opt out and get single-pass behavior
62
119
  - `--loop` flag still accepted for backwards compatibility (no-op)
63
120
  - Updated triage warnings, coverage enforcement, and all documentation to reflect the new default
64
- - `/bug-hunter src/` now finds bugs, fixes them, AND loops until full coverage — zero flags needed
65
121
 
66
- ## 2.2.1 — 2026-03-10
122
+ ### Added
123
+ - `--no-loop` flag to opt out and get single-pass behavior
67
124
 
68
- ### Fix: `--loop` mode now actually loops
125
+ ## [2.2.1] 2026-03-10
69
126
 
70
- The `--loop` flag was broken — loop mode files described a "ralph-loop" system but never called `ralph_start`, so the pipeline ran once and stopped. Fixed:
71
-
72
- - **`modes/loop.md`**: added explicit `ralph_start` call instructions with correct `taskContent` and `maxIterations` parameters
73
- - **`modes/fix-loop.md`**: same fix for `--loop --fix` combined mode, plus removed manual state file creation (handled by `ralph_start`)
74
- - **`SKILL.md`**: added CRITICAL integration note requiring `ralph_start` call when `LOOP_MODE=true`
127
+ ### Fixed
128
+ - `modes/loop.md`: added explicit `ralph_start` call instructions with correct `taskContent` and `maxIterations` parameters
129
+ - `modes/fix-loop.md`: same fix for `--loop --fix` combined mode, plus removed manual state file creation (handled by `ralph_start`)
130
+ - `SKILL.md`: added CRITICAL integration note requiring `ralph_start` call when `LOOP_MODE=true`
75
131
  - Changed completion signal from `<promise>DONE</promise>` to `<promise>COMPLETE</promise>` (correct ralph-loop API)
76
132
  - Each iteration now calls `ralph_done` to proceed instead of relying on a non-existent hook
77
133
 
78
- ## 2.2.0 — 2026-03-10
79
-
80
- ### Fix pipeline hardening — 12 reliability and safety optimizations
134
+ ## [2.2.0] — 2026-03-10
81
135
 
82
- - **Rollback timeout guard**: `git revert` calls now timeout after 60 seconds; conflicts abort cleanly instead of hanging the pipeline indefinitely
83
- - **Dynamic lock TTL**: single-writer lock TTL scales with queue size (`max(1800, bugs * 600)`), preventing expiry on large fix runs
84
- - **Lock heartbeat renewal**: new `renew` command in `fix-lock.cjs` — fixer renews the lock after each bug fix to prevent mid-run TTL expiry
85
- - **Fixer context budget**: `MAX_BUGS_PER_FIXER = 5` large fix queues are split into sequential batches to prevent context window overflow and hallucinated patches
86
- - **Cross-file dependency ordering**: when `code-index.cjs` is available, fixes are ordered by import graph (fix dependencies before dependents)
87
- - **Flaky test detection**: baseline tests run twice; tests that fail non-deterministically are excluded from revert decisions
88
- - **Per-bug revert granularity**: clarified one-commit-per-bug as mandatory; reverts target individual bugs, not clusters
89
- - **Dynamic canary sizing**: `max(1, min(3, ceil(eligible * 0.2)))` — canary group scales with queue size instead of hardcoded 1–3
90
- - **Post-fix re-scan severity floor**: fixer-introduced bugs below MEDIUM severity are logged but don't trigger `FIXER_BUG` status
91
- - **Dry-run mode** (`--dry-run`): preview planned fixes without editing files — Fixer reads code and outputs unified diff previews, no git commits
92
- - **Machine-readable fix report**: `.bug-hunter/fix-report.json` written alongside markdown report for CI/CD gating, dashboards, and ticket automation
93
- - **Circuit breaker**: if >50% of fix attempts fail/revert (min 3 attempts), remaining fixes are halted to prevent token waste on unstable codebases
94
- - **Global Phase 2 timeout**: 30-minute deadline for the entire fix execution phase; unprocessed bugs are marked SKIPPED
136
+ ### Added
137
+ - Rollback timeout guard: `git revert` calls now timeout after 60 seconds; conflicts abort cleanly instead of hanging
138
+ - Dynamic lock TTL: single-writer lock TTL scales with queue size (`max(1800, bugs * 600)`)
139
+ - Lock heartbeat renewal: new `renew` command in `fix-lock.cjs`
140
+ - Fixer context budget: `MAX_BUGS_PER_FIXER = 5` large fix queues split into sequential batches
141
+ - Cross-file dependency ordering: when `code-index.cjs` is available, fixes are ordered by import graph
142
+ - Flaky test detection: baseline tests run twice; non-deterministic failures excluded from revert decisions
143
+ - Dynamic canary sizing: `max(1, min(3, ceil(eligible * 0.2)))` — canary group scales with queue size
144
+ - Dry-run mode (`--dry-run`): preview planned fixes without editing files
145
+ - Machine-readable fix report: `.bug-hunter/fix-report.json` for CI/CD gating, dashboards, and ticket automation
146
+ - Circuit breaker: if >50% of fix attempts fail/revert (min 3 attempts), remaining fixes are halted
147
+ - Global Phase 2 timeout: 30-minute deadline for the entire fix execution phase
95
148
 
96
- ## 2.1.0 — 2026-03-10
149
+ ### Changed
150
+ - Per-bug revert granularity: clarified one-commit-per-bug as mandatory; reverts target individual bugs, not clusters
151
+ - Post-fix re-scan severity floor: fixer-introduced bugs below MEDIUM severity are logged but don't trigger `FIXER_BUG` status
97
152
 
98
- ### v3 security pipeline + dependency scanner reliability
153
+ ## [2.1.0] 2026-03-10
99
154
 
155
+ ### Added
100
156
  - STRIDE/CWE fields in Hunter findings format, with CWE quick-reference mapping for security categories
101
157
  - Skeptic hard-exclusion fast path (15 false-positive classes) before deep review
102
158
  - Referee security enrichment: reachability, exploitability, CVSS 3.1, and PoC blocks for critical/high security bugs
@@ -104,48 +160,58 @@ The `--loop` flag was broken — loop mode files described a "ralph-loop" system
104
160
  - Dependency scan support: `--deps` flag and `scripts/dep-scan.cjs` output to `.bug-hunter/dep-findings.json`
105
161
  - JSON report contract: `.bug-hunter/findings.json` plus canonical `.bug-hunter/report.md`
106
162
  - Few-shot calibration examples for Hunter and Skeptic in `prompts/examples/`
107
- - `dep-scan.cjs` lockfile-aware audits (`npm`, `pnpm`, `yarn`, `bun`) and non-zero audit exit handling so vulnerability exits are not misreported as scanner failures
108
163
 
109
- ## 2.0.0 — 2026-03-10
164
+ ### Fixed
165
+ - `dep-scan.cjs` lockfile-aware audits (`npm`, `pnpm`, `yarn`, `bun`) and non-zero audit exit handling so vulnerability exits are not misreported as scanner failures
110
166
 
111
- ### Structural overhaul triage pipeline + 36% token reduction
167
+ ## [2.0.0]2026-03-10
112
168
 
113
- **Pipeline restructure:**
169
+ ### Changed
114
170
  - Triage moved to Step 1 (after arg parse) — was running before target resolved
115
171
  - All mode files consume triage JSON — riskMap, scanOrder, fileBudget flow downstream
116
172
  - Recon demoted to enrichment — no longer does file classification when triage exists
117
- - Step 7.0 re-audit gate removed — duplicated Referee's work
118
-
119
- **Deduplication:**
120
- - `modes/_dispatch.md` — shared dispatch patterns (18 references across modes)
121
173
  - Mode files compressed: small 7.3→2.9KB, parallel 7.9→4.2KB, extended 7.1→3.3KB, scaled 7.3→2.7KB
122
174
  - Skip-file patterns consolidated — single authoritative list in SKILL.md
123
175
  - Error handling table updated with correct step references
124
-
125
- **Dead weight removed:**
126
- - FIX-PLAN.md deleted (26KB dead planning doc)
127
- - README.md compressed from 8.5KB to 3.7KB
128
- - code-index.cjs marked optional
129
-
130
- **Prompt compression:**
131
176
  - hunter.md: scope rules and security checklist compressed
132
177
  - recon.md: output format template and "What to map" sections compressed
133
178
  - referee.md: tiering rules, re-check section, output format compressed
134
179
  - skeptic.md: false-positive patterns compressed to inline format
135
-
136
- **Logic gaps fixed:**
137
180
  - Branch-diff/staged optimization note in Step 3
138
181
  - single-file.md: local-sequential backend support added
139
182
 
140
- **Size:** 187,964 → 119,825 bytes (36% reduction, ~30K tokens)
183
+ ### Added
184
+ - `modes/_dispatch.md` — shared dispatch patterns (18 references across modes)
185
+
186
+ ### Removed
187
+ - Step 7.0 re-audit gate removed — duplicated Referee's work
188
+ - FIX-PLAN.md deleted (26KB dead planning doc)
189
+ - README.md compressed from 8.5KB to 3.7KB
190
+ - code-index.cjs marked optional
141
191
 
142
- ## 1.0.0 — 2026-03-10
192
+ ## [1.0.0] — 2026-03-10
143
193
 
144
- ### Zero-token pre-recon triage (`triage.cjs`)
145
- - `scripts/triage.cjs` runs before any LLM agent — 0 tokens, <2s for 2,000+ files
194
+ ### Added
195
+ - `scripts/triage.cjs` — zero-token pre-recon triage, runs before any LLM agent (<2s for 2,000+ files)
146
196
  - FILE_BUDGET, strategy, and domain map decided by triage, not Recon
147
197
  - Writes `.bug-hunter/triage.json` with strategy, fileBudget, domains, riskMap, scanOrder
148
198
  - `local-sequential.md` with full phase-by-phase instructions
149
199
  - Subagent wrapper template in `templates/subagent-wrapper.md`
150
200
  - Coverage enforcement — partial audits produce explicit warnings
151
201
  - Large codebase strategy with domain-first tiered scanning
202
+
203
+ [Unreleased]: https://github.com/codexstar69/bug-hunter/compare/v3.0.5...HEAD
204
+ [3.0.5]: https://github.com/codexstar69/bug-hunter/compare/v3.0.4...v3.0.5
205
+ [3.0.4]: https://github.com/codexstar69/bug-hunter/compare/v3.0.3...v3.0.4
206
+ [3.0.3]: https://github.com/codexstar69/bug-hunter/compare/v3.0.2...v3.0.3
207
+ [3.0.2]: https://github.com/codexstar69/bug-hunter/compare/v3.0.1...v3.0.2
208
+ [3.0.1]: https://github.com/codexstar69/bug-hunter/compare/v3.0.0...v3.0.1
209
+ [3.0.0]: https://github.com/codexstar69/bug-hunter/compare/v2.4.1...v3.0.0
210
+ [2.4.1]: https://github.com/codexstar69/bug-hunter/compare/v2.4.0...v2.4.1
211
+ [2.4.0]: https://github.com/codexstar69/bug-hunter/compare/v2.3.0...v2.4.0
212
+ [2.3.0]: https://github.com/codexstar69/bug-hunter/compare/v2.2.1...v2.3.0
213
+ [2.2.1]: https://github.com/codexstar69/bug-hunter/compare/v2.2.0...v2.2.1
214
+ [2.2.0]: https://github.com/codexstar69/bug-hunter/compare/v2.1.0...v2.2.0
215
+ [2.1.0]: https://github.com/codexstar69/bug-hunter/compare/v2.0.0...v2.1.0
216
+ [2.0.0]: https://github.com/codexstar69/bug-hunter/compare/v1.0.0...v2.0.0
217
+ [1.0.0]: https://github.com/codexstar69/bug-hunter/releases/tag/v1.0.0
package/README.md CHANGED
@@ -1,11 +1,13 @@
1
1
  <p align="center">
2
- <img src="docs/images/hero.png" alt="Bug Hunter AI-powered adversarial code security scanner with multi-agent pipeline for automated vulnerability detection, false-positive elimination, and safe auto-fix" width="720">
2
+ <img src="docs/images/2026-03-12-hero-bug-hunter-overview.png" alt="Bug Hunter product overview banner code and pull requests flow through adversarial review, strategic fix planning, and verified patch delivery" width="720">
3
3
  </p>
4
4
 
5
5
  <h1 align="center">🐛 Bug Hunter</h1>
6
6
  <p align="center"><strong>AI-powered adversarial bug finding that argues with itself to surface real vulnerabilities — and auto-fixes them safely.</strong></p>
7
7
  <p align="center">
8
8
  <a href="#install">Install</a> ·
9
+ <a href="#new-in-this-update">New in This Update</a> ·
10
+ <a href="#start-here">Start Here</a> ·
9
11
  <a href="#usage">Usage</a> ·
10
12
  <a href="#how-the-adversarial-pipeline-works">How It Works</a> ·
11
13
  <a href="#features">Features</a> ·
@@ -47,6 +49,39 @@ npm install -g @aisuite/chub
47
49
 
48
50
  ---
49
51
 
52
+ ## New in This Update
53
+
54
+ This release makes Bug Hunter much better at PR-first auditing and safer at automated remediation.
55
+
56
+ - **PR review is now a first-class workflow.** Review the current PR, the most recent PR, or a specific PR number with `--pr`, `--pr current`, `--pr recent`, or `--pr 123`.
57
+ - **PR security review is now built in.** `--pr-security` runs a PR-scoped security audit with threat-model and dependency context, without editing code.
58
+ - **Strategic remediation is now explicit.** Bug Hunter writes `fix-strategy.json` and `fix-plan.json` before fixes run, so auto-fix decisions stay explainable and reviewable.
59
+ - **The security pack is now bundled locally.** `commit-security-scan`, `security-review`, `threat-model-generation`, and `vulnerability-validation` now ship inside the repo under `skills/`.
60
+ - **Fix execution is harder to break.** This update adds schema-validated fix plans, atomic lock handling, safer worktree cleanup, stash preservation, and shell-safe worker command templating.
61
+
62
+ <p align="center">
63
+ <img src="docs/images/2026-03-12-pr-review-flow.png" alt="PR review workflow banner — pull request scope, security checks, threat-model context, and final verdict in a clean product-style UI" width="100%">
64
+ </p>
65
+
66
+ ## Start Here
67
+
68
+ If you're evaluating the new PR flow, start with one of these:
69
+
70
+ ```bash
71
+ /bug-hunter --pr # review the current PR end to end
72
+ /bug-hunter --pr-security # PR-focused security review without editing code
73
+ /bug-hunter --last-pr --review # review the most recent PR without fixes
74
+ /bug-hunter --plan src/ # build fix-strategy.json + fix-plan.json only
75
+ ```
76
+
77
+ If you just want the default repo audit:
78
+
79
+ ```bash
80
+ /bug-hunter
81
+ ```
82
+
83
+ ---
84
+
50
85
  ## Usage
51
86
 
52
87
  ```bash
@@ -54,9 +89,22 @@ npm install -g @aisuite/chub
54
89
  /bug-hunter src/ # scan a specific directory
55
90
  /bug-hunter lib/auth.ts # scan a single file
56
91
  /bug-hunter --scan-only src/ # report only — no code changes
92
+ /bug-hunter --review src/ # easy alias for --scan-only
57
93
  /bug-hunter --fix --approve src/ # ask before each fix
94
+ /bug-hunter --safe src/ # easy alias for --fix --approve
58
95
  /bug-hunter -b feature-xyz # scan only files changed in branch (vs main)
96
+ /bug-hunter --pr # easy alias for --pr current
97
+ /bug-hunter --pr current # review the current PR end to end
98
+ /bug-hunter --pr recent # review the most recently updated open PR
99
+ /bug-hunter --pr 123 # review a specific PR number
100
+ /bug-hunter --pr-security # PR security review with threat model + CVE context
101
+ /bug-hunter --review-pr # easy alias for --pr current
102
+ /bug-hunter --last-pr --review # review the most recent PR without editing
59
103
  /bug-hunter --staged # scan staged files (pre-commit hook)
104
+ /bug-hunter --plan src/ # easy alias for --plan-only
105
+ /bug-hunter --preview src/ # easy alias for --fix --dry-run
106
+ /bug-hunter --security-review src/ # enterprise security workflow for a path or repo
107
+ /bug-hunter --validate-security src/ # force exploitability validation for security findings
60
108
  /bug-hunter --deps --threat-model # full audit: CVEs + STRIDE threat model
61
109
  ```
62
110
 
@@ -72,6 +120,8 @@ This eliminates the two biggest problems with AI code review: **false positive o
72
120
 
73
121
  ## Table of Contents
74
122
 
123
+ - [New in This Update](#new-in-this-update)
124
+ - [Start Here](#start-here)
75
125
  - [How the Adversarial Pipeline Works](#how-the-adversarial-pipeline-works)
76
126
  - [Features](#features)
77
127
  - [Security Classification — STRIDE, CWE, and CVSS](#security-classification-stride-cwe-cvss)
@@ -162,6 +212,28 @@ This scoring creates a **self-correcting equilibrium**. The Hunter doesn't flood
162
212
 
163
213
  ## Features
164
214
 
215
+ ### Bundled Local Security Skills
216
+
217
+ Bug Hunter now ships with a portable local security pack under `skills/`:
218
+ - `commit-security-scan`
219
+ - `security-review`
220
+ - `threat-model-generation`
221
+ - `vulnerability-validation`
222
+
223
+ These are bundled inside the repository so the system does not depend on external marketplace paths or machine-specific skill installs. They are adapted to Bug Hunter-native artifacts like `.bug-hunter/threat-model.md`, `.bug-hunter/security-config.json`, `.bug-hunter/findings.json`, and `.bug-hunter/referee.json`.
224
+
225
+ They are now wired into the main Bug Hunter flow:
226
+ - PR-focused security review routes into `commit-security-scan`
227
+ - `--threat-model` routes into `threat-model-generation`
228
+ - enterprise/full security review routes into `security-review`
229
+ - exploitability confirmation for security findings routes into `vulnerability-validation`
230
+
231
+ Bug Hunter remains the top-level orchestrator; the bundled skills are capability modules inside that orchestration.
232
+
233
+ <p align="center">
234
+ <img src="docs/images/2026-03-12-security-pack.png" alt="Bundled local security pack banner — Bug Hunter orchestrates commit security scan, security review, threat-model generation, and vulnerability validation" width="100%">
235
+ </p>
236
+
165
237
  ### Zero-Token Triage — Instant File Classification
166
238
 
167
239
  Before any AI agent runs, a lightweight Node.js script (`scripts/triage.cjs`) scans your entire codebase in **under 2 seconds**. It classifies every file by risk level — CRITICAL, HIGH, MEDIUM, LOW, or CONTEXT-ONLY — computes a token budget, and selects the optimal scanning strategy.
@@ -280,7 +352,7 @@ Bug Hunter automatically selects the optimal scanning strategy based on your cod
280
352
  | **120–180 files** | Scaled | State-driven chunks with resume capability |
281
353
  | **180+ files** | Large-codebase | Domain-scoped pipelines + boundary audits (loop mode, on by default) |
282
354
 
283
- Loop mode is **on by default** — the pipeline runs iteratively until every critical and high-risk file has been audited, with persistent state enabling stop-and-resume workflows. Use `--no-loop` for a single-pass scan.
355
+ Loop mode is **on by default** — the pipeline runs iteratively until every queued scannable source file has been audited and, in fix mode, every discovered fixable bug has been processed. The agent should keep descending through CRITICAL → HIGH → MEDIUM → LOW automatically unless the user interrupts. Use `--no-loop` for a single-pass scan.
284
356
 
285
357
  ---
286
358
 
@@ -385,6 +457,10 @@ Dependency findings are saved to `.bug-hunter/dep-findings.json` and cross-refer
385
457
 
386
458
  Bug Hunter doesn't throw uncoordinated patches at your codebase. After the Referee confirms real bugs, the system builds a **strategic fix plan** with safety gates at every step — the difference between "an AI that edits files" and "an AI that engineers patches."
387
459
 
460
+ <p align="center">
461
+ <img src="docs/images/2026-03-12-fix-plan-rollout.png" alt="Strategic fix planning banner — strategy, confidence gating, canary rollout, verification, and rollback safety" width="100%">
462
+ </p>
463
+
388
464
  ### Phase 1 — Safety Setup and Git Branching
389
465
 
390
466
  - Verifies you're in a git repository (warns if not — no rollback without version control)
@@ -400,14 +476,26 @@ Bug Hunter doesn't throw uncoordinated patches at your codebase. After the Refer
400
476
  - Runs the test suite once to record the **passing baseline**
401
477
  - This baseline is critical: if a fix causes a previously-passing test to fail, the fix is auto-reverted
402
478
 
403
- ### Phase 3 — Confidence-Gated Fix Queue
479
+ ### Phase 3 — Strategy Before Patching
480
+
481
+ Before the Fixer edits anything, Bug Hunter now writes a canonical `fix-strategy.json` artifact.
482
+ It clusters confirmed bugs and classifies them into one of four tracks:
483
+
484
+ - **safe-autofix** — localized enough for guarded patching
485
+ - **manual-review** — confidence too low for unattended edits
486
+ - **larger-refactor** — needs coordinated multi-file changes
487
+ - **architectural-remediation** — broad contract or design issue; report, don’t auto-edit
488
+
489
+ This makes the remediation plan visible before execution. Users who want review without mutation can run `--plan-only` to stop after strategy + plan generation.
490
+
491
+ ### Phase 4 — Confidence-Gated Fix Queue
404
492
 
405
493
  - **75% confidence gate**: only bugs the Referee confirmed with ≥75% confidence are auto-fixed
406
494
  - Bugs below the threshold are marked `MANUAL_REVIEW` — reported but never auto-edited
407
495
  - **Conflict resolution**: same-file bugs are grouped and ordered to prevent overlapping edits
408
496
  - **Severity ordering**: Critical → High → Medium → Low
409
497
 
410
- ### Phase 4 — Canary Rollout Strategy
498
+ ### Phase 5 — Canary Rollout Strategy
411
499
 
412
500
  ```
413
501
  Fix Plan: 7 eligible bugs | canary: 2 | rollout: 5 | manual-review: 3
@@ -470,6 +558,10 @@ This prevents a common failure: the Fixer "fixing" a bug using an API pattern th
470
558
 
471
559
  ## Structured JSON Output for CI/CD Integration
472
560
 
561
+ <p align="center">
562
+ <img src="docs/images/2026-03-12-machine-readable-artifacts.png" alt="Machine-readable artifacts banner — findings, skeptic, referee, fix strategy, fix plan, and CI automation outputs" width="100%">
563
+ </p>
564
+
473
565
  Every run produces machine-readable output at `.bug-hunter/findings.json` for pipeline automation:
474
566
 
475
567
  ```json
@@ -523,12 +615,20 @@ Every run creates a `.bug-hunter/` directory (add to `.gitignore`) containing:
523
615
  |------|-----------|----------|
524
616
  | `report.md` | Always | Human-readable report: confirmed bugs, dismissed findings, coverage stats |
525
617
  | `findings.json` | Always | Machine-readable JSON for CI/CD and dashboards |
618
+ | `skeptic.json` | When findings exist | Canonical Skeptic challenge artifact |
619
+ | `referee.json` | When findings exist | Canonical Referee verdict artifact |
620
+ | `coverage.json` | Loop/autonomous runs | Canonical coverage and loop state |
526
621
  | `triage.json` | Always | File classification, risk map, strategy selection, token estimates |
527
622
  | `recon.md` | Always | Tech stack analysis, attack surface mapping, scan order |
528
- | `findings.md` | Always | Raw Hunter findings before Skeptic review |
529
- | `skeptic.md` | Always | Skeptic challenge decisions with evidence |
530
- | `referee.md` | Always | Referee final verdicts with enrichment |
531
- | `fix-report.md` | Fix mode | Per-bug fix status, verification results, git diff summary |
623
+ | `findings.md` | Optional | Markdown companion rendered from `findings.json` |
624
+ | `skeptic.md` | Optional | Markdown companion rendered from `skeptic.json` |
625
+ | `referee.md` | Optional | Markdown companion rendered from `referee.json` |
626
+ | `coverage.md` | Loop/autonomous runs | Markdown companion rendered from `coverage.json` |
627
+ | `fix-strategy.json` | When findings exist | Canonical remediation strategy: safe autofix vs manual review vs refactor vs architectural work |
628
+ | `fix-strategy.md` | When findings exist | Markdown companion rendered from `fix-strategy.json` |
629
+ | `fix-plan.json` | Plan/fix mode | Canonical execution plan for canary rollout, gating, and safe fix order |
630
+ | `fix-plan.md` | Plan/fix mode | Markdown companion rendered from `fix-plan.json` |
631
+ | `fix-report.md` | Fix mode | Markdown companion for fix results |
532
632
  | `fix-report.json` | Fix mode | Machine-readable fix results for CI/CD gating and dashboards |
533
633
  | `worktree-*/` | Worktree fix mode | Temporary isolated worktrees for Fixer subagents (auto-cleaned) |
534
634
  | `threat-model.md` | `--threat-model` | STRIDE threat model with trust boundaries and data flows |
@@ -555,16 +655,30 @@ The pipeline adapts to whatever it finds. Triage classifies files by extension a
555
655
  | `src/` or `file.ts` | Scan specific path |
556
656
  | `-b branch-name` | Scan files changed in branch (vs main) |
557
657
  | `-b branch --base dev` | Scan branch diff against specific base |
658
+ | `--pr` | Easy alias for `--pr current` |
659
+ | `--pr current` | Review the current PR using GitHub metadata when available, with git fallback on the current branch |
660
+ | `--pr recent` | Review the most recently updated open PR |
661
+ | `--pr 123` | Review a specific PR number |
662
+ | `--pr-security` | Enterprise PR security review: PR scope + threat model + dependency context |
663
+ | `--last-pr` | Easy alias for `--pr recent` |
664
+ | `--review-pr` | Alias for `--pr current` |
558
665
  | `--staged` | Scan git-staged files (pre-commit hook integration) |
559
666
  | `--scan-only` | Report only — no code changes |
667
+ | `--review` | Easy alias for `--scan-only` |
560
668
  | `--fix` | Find and auto-fix bugs (default behavior) |
669
+ | `--plan-only` | Build `fix-strategy.json` + fix plan, then stop before the fixer edits code |
670
+ | `--plan` | Easy alias for `--plan-only` |
561
671
  | `--approve` | Interactive mode — ask before each fix |
672
+ | `--safe` | Easy alias for `--fix --approve` |
562
673
  | `--autonomous` | Full auto-fix with zero intervention |
563
- | `--loop` | Iterative mode runs until 100% critical file coverage **(on by default)** |
674
+ | `--dry-run` | Preview planned fixes without editing files outputs diff previews and `fix-report.json` |
675
+ | `--preview` | Easy alias for `--fix --dry-run` |
676
+ | `--loop` | Iterative mode — runs until 100% queued source-file coverage **(on by default)** |
564
677
  | `--no-loop` | Disable loop mode — single-pass scan only |
565
678
  | `--deps` | Include dependency CVE scanning with reachability analysis |
566
679
  | `--threat-model` | Generate or use STRIDE threat model for targeted security analysis |
567
- | `--dry-run` | Preview planned fixes without editing files outputs diff previews and `fix-report.json` |
680
+ | `--security-review` | Run the bundled enterprise security-review workflow with threat model + CVE + validation context |
681
+ | `--validate-security` | Force vulnerability-validation for confirmed security findings |
568
682
 
569
683
  All flags compose: `/bug-hunter --deps --threat-model --fix src/`
570
684
 
@@ -574,6 +688,8 @@ All flags compose: `/bug-hunter --deps --threat-model --fix src/`
574
688
 
575
689
  Bug Hunter ships with a test fixture containing an Express app with **6 intentionally planted bugs** (2 Critical, 3 Medium, 1 Low):
576
690
 
691
+ The repository also ships with **60 Node.js regression tests** covering orchestration, schemas, PR scope resolution, fix-plan validation, lock behavior, worktree lifecycle, and the bundled local security-skill routing.
692
+
577
693
  ```bash
578
694
  /bug-hunter test-fixture/
579
695
  ```
@@ -594,6 +710,8 @@ bug-hunter/
594
710
  ├── SKILL.md # Pipeline orchestration logic
595
711
  ├── README.md # This documentation
596
712
  ├── CHANGELOG.md # Version history
713
+ ├── llms.txt # Short LLM-facing summary
714
+ ├── llms-full.txt # Full LLM-facing reference
597
715
  ├── package.json # npm package config (@codexstar/bug-hunter)
598
716
 
599
717
  ├── bin/
@@ -601,11 +719,15 @@ bug-hunter/
601
719
 
602
720
  ├── docs/
603
721
  │ └── images/ # Documentation visuals
604
- │ ├── hero.png # Hero banner
605
- │ ├── pipeline-overview.png # 8-stage pipeline diagram
606
- │ ├── adversarial-debate.png # Hunter vs Skeptic vs Referee flow
607
- │ ├── doc-verify-fix-plan.png # Documentation verification + fix planning
608
- └── security-finding-card.png # Enriched finding card with CVSS
722
+ │ ├── 2026-03-12-hero-bug-hunter-overview.png # Product overview hero
723
+ │ ├── 2026-03-12-pr-review-flow.png # PR review + security workflow
724
+ │ ├── 2026-03-12-security-pack.png # Bundled local security pack
725
+ │ ├── 2026-03-12-fix-plan-rollout.png # Strategic fix planning + rollout
726
+ ├── 2026-03-12-machine-readable-artifacts.png # CI/CD artifact outputs
727
+ │ ├── pipeline-overview.png # 8-stage pipeline diagram
728
+ │ ├── adversarial-debate.png # Hunter vs Skeptic vs Referee flow
729
+ │ ├── doc-verify-fix-plan.png # Documentation verification + fix planning
730
+ │ └── security-finding-card.png # Enriched finding card with CVSS
609
731
 
610
732
  ├── modes/ # Execution strategies by codebase size
611
733
  │ ├── single-file.md # 1 file
@@ -632,6 +754,19 @@ bug-hunter/
632
754
  │ ├── hunter-examples.md # 3 real + 2 false positives
633
755
  │ └── skeptic-examples.md # 2 accepted + 2 disproved + 1 review
634
756
 
757
+ ├── schemas/ # Canonical JSON artifact contracts
758
+ │ ├── findings.schema.json # Hunter findings schema
759
+ │ ├── skeptic.schema.json # Skeptic artifact schema
760
+ │ ├── referee.schema.json # Referee artifact schema
761
+ │ ├── fix-strategy.schema.json # Strategic remediation schema
762
+ │ └── fix-plan.schema.json # Fix execution schema
763
+
764
+ ├── skills/ # Bundled local security pack
765
+ │ ├── commit-security-scan/
766
+ │ ├── security-review/
767
+ │ ├── threat-model-generation/
768
+ │ └── vulnerability-validation/
769
+
635
770
  ├── scripts/ # Node.js helpers (zero AI tokens)
636
771
  │ ├── triage.cjs # File classification (<2s)
637
772
  │ ├── dep-scan.cjs # Dependency CVE scanner