@codexstar/bug-hunter 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +151 -0
  2. package/LICENSE +21 -0
  3. package/README.md +665 -0
  4. package/SKILL.md +624 -0
  5. package/bin/bug-hunter +222 -0
  6. package/evals/evals.json +362 -0
  7. package/modes/_dispatch.md +121 -0
  8. package/modes/extended.md +94 -0
  9. package/modes/fix-loop.md +115 -0
  10. package/modes/fix-pipeline.md +384 -0
  11. package/modes/large-codebase.md +212 -0
  12. package/modes/local-sequential.md +143 -0
  13. package/modes/loop.md +125 -0
  14. package/modes/parallel.md +113 -0
  15. package/modes/scaled.md +76 -0
  16. package/modes/single-file.md +38 -0
  17. package/modes/small.md +86 -0
  18. package/package.json +56 -0
  19. package/prompts/doc-lookup.md +44 -0
  20. package/prompts/examples/hunter-examples.md +131 -0
  21. package/prompts/examples/skeptic-examples.md +87 -0
  22. package/prompts/fixer.md +103 -0
  23. package/prompts/hunter.md +146 -0
  24. package/prompts/recon.md +159 -0
  25. package/prompts/referee.md +122 -0
  26. package/prompts/skeptic.md +143 -0
  27. package/prompts/threat-model.md +122 -0
  28. package/scripts/bug-hunter-state.cjs +537 -0
  29. package/scripts/code-index.cjs +541 -0
  30. package/scripts/context7-api.cjs +133 -0
  31. package/scripts/delta-mode.cjs +219 -0
  32. package/scripts/dep-scan.cjs +343 -0
  33. package/scripts/doc-lookup.cjs +316 -0
  34. package/scripts/fix-lock.cjs +167 -0
  35. package/scripts/init-test-fixture.sh +19 -0
  36. package/scripts/payload-guard.cjs +197 -0
  37. package/scripts/run-bug-hunter.cjs +892 -0
  38. package/scripts/tests/bug-hunter-state.test.cjs +87 -0
  39. package/scripts/tests/code-index.test.cjs +57 -0
  40. package/scripts/tests/delta-mode.test.cjs +47 -0
  41. package/scripts/tests/fix-lock.test.cjs +36 -0
  42. package/scripts/tests/fixtures/flaky-worker.cjs +63 -0
  43. package/scripts/tests/fixtures/low-confidence-worker.cjs +73 -0
  44. package/scripts/tests/fixtures/success-worker.cjs +42 -0
  45. package/scripts/tests/payload-guard.test.cjs +41 -0
  46. package/scripts/tests/run-bug-hunter.test.cjs +403 -0
  47. package/scripts/tests/test-utils.cjs +59 -0
  48. package/scripts/tests/worktree-harvest.test.cjs +297 -0
  49. package/scripts/triage.cjs +528 -0
  50. package/scripts/worktree-harvest.cjs +516 -0
  51. package/templates/subagent-wrapper.md +109 -0
package/bin/bug-hunter ADDED
@@ -0,0 +1,222 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * bug-hunter CLI — installs the bug-hunter skill into your agent's skill directory.
5
+ *
6
+ * Usage:
7
+ * bug-hunter install # Auto-detect agent and install skill
8
+ * bug-hunter install --agent claude-code
9
+ * bug-hunter install --path ~/.custom/skills
10
+ * bug-hunter doctor # Check environment (node, chub, context7)
11
+ * bug-hunter info # Show skill metadata
12
+ */
13
+
14
+ const fs = require('fs');
15
+ const path = require('path');
16
+ const { execFileSync } = require('child_process');
17
+
18
+ const SKILL_SRC = path.resolve(__dirname, '..');
19
+ const SKILL_NAME = 'bug-hunter';
20
+
21
+ // Agent skill directories in priority order
22
+ const AGENT_DIRS = [
23
+ { agent: 'claude-code', dir: path.join(require('os').homedir(), '.claude', 'skills', SKILL_NAME) },
24
+ { agent: 'codex', dir: path.join(require('os').homedir(), '.codex', 'skills', SKILL_NAME) },
25
+ { agent: 'agents', dir: path.join(require('os').homedir(), '.agents', 'skills', SKILL_NAME) },
26
+ { agent: 'cursor', dir: path.join(require('os').homedir(), '.cursor', 'skills', SKILL_NAME) },
27
+ { agent: 'kiro', dir: path.join(require('os').homedir(), '.kiro', 'skills', SKILL_NAME) },
28
+ ];
29
+
30
+ function usage() {
31
+ console.log(`
32
+ bug-hunter - Adversarial AI bug hunting skill
33
+
34
+ Commands:
35
+ install [--agent <name>] [--path <dir>] Install skill into agent directory
36
+ doctor Check environment readiness
37
+ info Show skill metadata
38
+
39
+ Options:
40
+ --agent <name> Target agent: claude-code, codex, cursor, kiro, agents
41
+ --path <dir> Custom install directory (overrides --agent)
42
+
43
+ Examples:
44
+ bug-hunter install # Auto-detect agent
45
+ bug-hunter install --agent claude-code # Specific agent
46
+ bug-hunter install --path ~/my-skills/bug-hunter
47
+ bug-hunter doctor # Check node, chub, context7
48
+ `.trim());
49
+ }
50
+
51
+ function copyRecursive(src, dest) {
52
+ const stat = fs.statSync(src);
53
+ if (stat.isDirectory()) {
54
+ fs.mkdirSync(dest, { recursive: true });
55
+ for (const entry of fs.readdirSync(src)) {
56
+ // Skip git, node_modules, tmp, .bug-hunter output
57
+ if (['.git', 'node_modules', 'tmp', '.bug-hunter'].includes(entry)) continue;
58
+ copyRecursive(path.join(src, entry), path.join(dest, entry));
59
+ }
60
+ } else {
61
+ fs.copyFileSync(src, dest);
62
+ }
63
+ }
64
+
65
+ function install(args) {
66
+ let targetDir = null;
67
+ let agentName = null;
68
+
69
+ // Parse args
70
+ for (let i = 0; i < args.length; i++) {
71
+ if (args[i] === '--path' && args[i + 1]) {
72
+ targetDir = path.resolve(args[++i]);
73
+ } else if (args[i] === '--agent' && args[i + 1]) {
74
+ agentName = args[++i];
75
+ }
76
+ }
77
+
78
+ // Resolve target directory
79
+ if (!targetDir) {
80
+ if (agentName) {
81
+ const match = AGENT_DIRS.find(a => a.agent === agentName);
82
+ if (!match) {
83
+ console.error(`Unknown agent: ${agentName}`);
84
+ console.error(`Available: ${AGENT_DIRS.map(a => a.agent).join(', ')}`);
85
+ process.exit(1);
86
+ }
87
+ targetDir = match.dir;
88
+ } else {
89
+ // Auto-detect: use first agent whose parent dir exists
90
+ for (const { agent, dir } of AGENT_DIRS) {
91
+ const parent = path.dirname(path.dirname(dir)); // e.g. ~/.claude
92
+ if (fs.existsSync(parent)) {
93
+ targetDir = dir;
94
+ agentName = agent;
95
+ break;
96
+ }
97
+ }
98
+ if (!targetDir) {
99
+ // Default to ~/.agents/skills/bug-hunter
100
+ targetDir = AGENT_DIRS.find(a => a.agent === 'agents').dir;
101
+ agentName = 'agents';
102
+ }
103
+ }
104
+ }
105
+
106
+ console.log(`Installing bug-hunter skill...`);
107
+ console.log(` Source: ${SKILL_SRC}`);
108
+ console.log(` Target: ${targetDir}`);
109
+ if (agentName) console.log(` Agent: ${agentName}`);
110
+
111
+ // Copy skill files
112
+ copyRecursive(SKILL_SRC, targetDir);
113
+
114
+ console.log(`\n Installed successfully.\n`);
115
+ console.log(` Usage: /bug-hunter src/`);
116
+ console.log(` Docs: https://github.com/codexstar69/bug-hunter`);
117
+
118
+ // Run doctor automatically after install
119
+ console.log('');
120
+ doctor();
121
+ }
122
+
123
+ function doctor() {
124
+ console.log('Environment check:\n');
125
+ let issues = 0;
126
+
127
+ // Node.js
128
+ try {
129
+ const nodeVersion = execFileSync('node', ['--version'], { encoding: 'utf8' }).trim();
130
+ console.log(` [ok] Node.js ${nodeVersion}`);
131
+ } catch {
132
+ console.log(' [!!] Node.js not found — required for doc verification');
133
+ issues++;
134
+ }
135
+
136
+ // Context Hub (chub)
137
+ try {
138
+ execFileSync('chub', ['--help'], { stdio: 'ignore', timeout: 3000 });
139
+ console.log(' [ok] Context Hub (chub) installed — curated docs available');
140
+ } catch {
141
+ console.log(' [--] Context Hub (chub) not installed — will fall back to Context7');
142
+ console.log(' Install for better doc verification: npm install -g @aisuite/chub');
143
+ console.log(' More info: https://github.com/andrewyng/context-hub');
144
+ }
145
+
146
+ // Context7 API
147
+ try {
148
+ const c7Script = path.join(SKILL_SRC, 'scripts', 'context7-api.cjs');
149
+ if (fs.existsSync(c7Script)) {
150
+ console.log(' [ok] Context7 fallback available');
151
+ } else {
152
+ console.log(' [--] Context7 script not found (non-critical)');
153
+ }
154
+ } catch {
155
+ console.log(' [--] Context7 check skipped');
156
+ }
157
+
158
+ // Git
159
+ try {
160
+ const gitVersion = execFileSync('git', ['--version'], { encoding: 'utf8' }).trim();
161
+ console.log(` [ok] ${gitVersion}`);
162
+ } catch {
163
+ console.log(' [!!] Git not found — required for fix pipeline');
164
+ issues++;
165
+ }
166
+
167
+ console.log('');
168
+ if (issues === 0) {
169
+ console.log(' Ready to hunt bugs.\n');
170
+ } else {
171
+ console.log(` ${issues} issue(s) found. Fix them for full functionality.\n`);
172
+ }
173
+ }
174
+
175
+ function info() {
176
+ const skillMd = path.join(SKILL_SRC, 'SKILL.md');
177
+ if (!fs.existsSync(skillMd)) {
178
+ console.error('SKILL.md not found');
179
+ process.exit(1);
180
+ }
181
+
182
+ const content = fs.readFileSync(skillMd, 'utf8');
183
+ const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
184
+ if (frontmatterMatch) {
185
+ const lines = frontmatterMatch[1].split('\n');
186
+ console.log('\nbug-hunter skill metadata:\n');
187
+ for (const line of lines) {
188
+ console.log(` ${line}`);
189
+ }
190
+ console.log('');
191
+ }
192
+
193
+ console.log(` Install: npx skills add codexstar69/bug-hunter`);
194
+ console.log(` Or: npm install -g @codexstar/bug-hunter && bug-hunter install`);
195
+ console.log(` Repo: https://github.com/codexstar69/bug-hunter`);
196
+ console.log('');
197
+ }
198
+
199
+ // Main
200
+ const args = process.argv.slice(2);
201
+ const command = args[0];
202
+
203
+ switch (command) {
204
+ case 'install':
205
+ install(args.slice(1));
206
+ break;
207
+ case 'doctor':
208
+ doctor();
209
+ break;
210
+ case 'info':
211
+ info();
212
+ break;
213
+ case '--help':
214
+ case '-h':
215
+ case undefined:
216
+ usage();
217
+ break;
218
+ default:
219
+ console.error(`Unknown command: ${command}`);
220
+ usage();
221
+ process.exit(1);
222
+ }
@@ -0,0 +1,362 @@
1
+ {
2
+ "skill_name": "bug-hunter",
3
+ "evals": [
4
+ {
5
+ "id": 1,
6
+ "prompt": "/bug-hunter test-fixture/",
7
+ "expected_output": "Full pipeline execution on the included test fixture (Express app with 6 planted bugs). Should run Recon -> Hunter -> Skeptic -> Referee and produce a final report confirming at least 5 of 6 planted bugs with severity ratings, file paths, and suggested fixes.",
8
+ "files": [
9
+ "test-fixture/server.js",
10
+ "test-fixture/auth.js",
11
+ "test-fixture/users.js",
12
+ "test-fixture/db.js"
13
+ ],
14
+ "assertions": [
15
+ {
16
+ "text": "Pipeline runs all phases: Recon, Hunter, Skeptic, Referee",
17
+ "type": "content_check"
18
+ },
19
+ {
20
+ "text": "At least 5 of 6 planted bugs are confirmed in the final report",
21
+ "type": "content_check"
22
+ },
23
+ {
24
+ "text": "Each confirmed bug includes file path, line numbers, severity, and suggested fix",
25
+ "type": "content_check"
26
+ },
27
+ {
28
+ "text": "False positives are challenged and filtered by the Skeptic/Referee pipeline",
29
+ "type": "content_check"
30
+ },
31
+ {
32
+ "text": "Final report includes scan metadata (mode, files scanned, coverage)",
33
+ "type": "content_check"
34
+ },
35
+ {
36
+ "text": "Fix pipeline is triggered by default when confirmed bugs exist; only --scan-only disables fixes",
37
+ "type": "content_check"
38
+ }
39
+ ]
40
+ },
41
+ {
42
+ "id": 2,
43
+ "prompt": "/bug-hunter src/api/auth.ts",
44
+ "expected_output": "Single-file mode scan of an auth file. Should skip Recon (not needed for single file), run one Hunter, one Skeptic, and one Referee. Output should focus on security and logic bugs in the auth file specifically.",
45
+ "files": [],
46
+ "assertions": [
47
+ {
48
+ "text": "Selects single-file mode (1 file detected)",
49
+ "type": "content_check"
50
+ },
51
+ {
52
+ "text": "Skips Recon agent (not needed for single-file mode)",
53
+ "type": "content_check"
54
+ },
55
+ {
56
+ "text": "Hunter scans the target file and reports findings with BUG-ID format",
57
+ "type": "content_check"
58
+ },
59
+ {
60
+ "text": "Skeptic challenges the findings with code-based counter-arguments",
61
+ "type": "content_check"
62
+ },
63
+ {
64
+ "text": "Referee produces a final verdict (REAL BUG or NOT A BUG) for each finding",
65
+ "type": "content_check"
66
+ }
67
+ ]
68
+ },
69
+ {
70
+ "id": 3,
71
+ "prompt": "/bug-hunter -b feature-auth --base develop",
72
+ "expected_output": "Branch diff mode. Should run git diff to find changed files between feature-auth and develop branches, filter out non-source files, then run the full pipeline on the changed source files.",
73
+ "files": [],
74
+ "assertions": [
75
+ {
76
+ "text": "Runs git diff --name-only to extract changed files between branches",
77
+ "type": "content_check"
78
+ },
79
+ {
80
+ "text": "Filters out non-source files (configs, docs, assets, lockfiles)",
81
+ "type": "content_check"
82
+ },
83
+ {
84
+ "text": "Reports the number of source files to scan after filtering",
85
+ "type": "content_check"
86
+ },
87
+ {
88
+ "text": "Selects appropriate mode based on file count (small, parallel, extended, etc.)",
89
+ "type": "content_check"
90
+ }
91
+ ]
92
+ },
93
+ {
94
+ "id": 4,
95
+ "prompt": "/bug-hunter --staged",
96
+ "expected_output": "Staged file mode for pre-commit checking. Should run git diff --cached --name-only to get staged files, filter non-source files, then scan the staged source files.",
97
+ "files": [],
98
+ "assertions": [
99
+ {
100
+ "text": "Runs git diff --cached --name-only to get staged files",
101
+ "type": "content_check"
102
+ },
103
+ {
104
+ "text": "Filters out non-source files from the staged list",
105
+ "type": "content_check"
106
+ },
107
+ {
108
+ "text": "Scans full file contents of staged files (not just diffs)",
109
+ "type": "content_check"
110
+ }
111
+ ]
112
+ },
113
+ {
114
+ "id": 5,
115
+ "prompt": "/bug-hunter --fix src/",
116
+ "expected_output": "Full pipeline with auto-fix. After Phase 1 (find & verify), should proceed to Phase 2: create a git branch, acquire single-writer lock, detect test infrastructure, capture test baseline, run Fixer clusters sequentially with checkpoint commits, run post-fix tests, auto-revert regressions, and release lock.",
117
+ "files": [],
118
+ "assertions": [
119
+ {
120
+ "text": "Creates a git safety branch (bug-hunter-fix-*) before applying fixes",
121
+ "type": "content_check"
122
+ },
123
+ {
124
+ "text": "Detects test command from package.json or project config",
125
+ "type": "content_check"
126
+ },
127
+ {
128
+ "text": "Captures test baseline before applying fixes",
129
+ "type": "content_check"
130
+ },
131
+ {
132
+ "text": "Fixer agents implement minimal, surgical code changes",
133
+ "type": "content_check"
134
+ },
135
+ {
136
+ "text": "Each fix is a separate checkpoint commit with descriptive message",
137
+ "type": "content_check"
138
+ },
139
+ {
140
+ "text": "Post-fix test run compares against baseline (new failures vs pre-existing)",
141
+ "type": "content_check"
142
+ },
143
+ {
144
+ "text": "Fixes that cause new test failures are auto-reverted",
145
+ "type": "content_check"
146
+ },
147
+ {
148
+ "text": "Acquires and releases .claude/bug-hunter-fix.lock around fix phase",
149
+ "type": "content_check"
150
+ },
151
+ {
152
+ "text": "Auto-fixes only bugs that pass confidence eligibility threshold",
153
+ "type": "content_check"
154
+ }
155
+ ]
156
+ },
157
+ {
158
+ "id": 6,
159
+ "prompt": "/bug-hunter --loop src/",
160
+ "expected_output": "Loop mode for thorough coverage. Should create ralph-loop state files, iterate the pipeline until all CRITICAL and HIGH files are scanned, track coverage in .claude/bug-hunter-coverage.md, and mark ALL_TASKS_COMPLETE when done.",
161
+ "files": [],
162
+ "assertions": [
163
+ {
164
+ "text": "Creates .claude/ralph-loop.local.md state file for loop mode",
165
+ "type": "content_check"
166
+ },
167
+ {
168
+ "text": "Creates or updates .claude/bug-hunter-coverage.md with machine-parseable format",
169
+ "type": "content_check"
170
+ },
171
+ {
172
+ "text": "Tracks file coverage status (DONE, PARTIAL, SKIPPED) per iteration",
173
+ "type": "content_check"
174
+ },
175
+ {
176
+ "text": "Subsequent iterations only scan uncovered files (no re-scanning DONE files)",
177
+ "type": "content_check"
178
+ },
179
+ {
180
+ "text": "Marks ALL_TASKS_COMPLETE when all CRITICAL and HIGH files show DONE",
181
+ "type": "content_check"
182
+ }
183
+ ]
184
+ },
185
+ {
186
+ "id": 7,
187
+ "prompt": "Can you check my Express API for security vulnerabilities? The code is in src/",
188
+ "expected_output": "Should trigger the bug-hunter skill (even though the user didn't say /bug-hunter) and run a security-focused scan on the src/ directory. The deep Hunter should prioritize security findings, with optional triage hints when enabled.",
189
+ "files": [],
190
+ "assertions": [
191
+ {
192
+ "text": "Triggers bug-hunter skill from natural language (security audit request)",
193
+ "type": "content_check"
194
+ },
195
+ {
196
+ "text": "Runs Recon to map architecture and identify trust boundaries",
197
+ "type": "content_check"
198
+ },
199
+ {
200
+ "text": "Deep Hunter focuses on injection, auth bypass, input validation, and secrets exposure in security audit requests",
201
+ "type": "content_check"
202
+ },
203
+ {
204
+ "text": "Output includes severity ratings (Critical, Medium, Low) for each finding",
205
+ "type": "content_check"
206
+ },
207
+ {
208
+ "text": "Framework-specific protections are checked (Express middleware, helmet, etc.)",
209
+ "type": "content_check"
210
+ }
211
+ ]
212
+ },
213
+ {
214
+ "id": 8,
215
+ "prompt": "/bug-hunter --fix --approve src/auth/",
216
+ "expected_output": "Fix mode with approval. Should find bugs in auth directory, then fix them but prompt the user before each edit (approval mode). Fixer agents run in default mode rather than auto mode.",
217
+ "files": [],
218
+ "assertions": [
219
+ {
220
+ "text": "APPROVE_MODE is set to true from --approve flag",
221
+ "type": "content_check"
222
+ },
223
+ {
224
+ "text": "Fixer agents run in mode: default (user reviews each edit)",
225
+ "type": "content_check"
226
+ },
227
+ {
228
+ "text": "Reports 'Running in approval mode' to the user",
229
+ "type": "content_check"
230
+ },
231
+ {
232
+ "text": "Fixes are still committed as individual checkpoint commits",
233
+ "type": "content_check"
234
+ }
235
+ ]
236
+ },
237
+ {
238
+ "id": 9,
239
+ "prompt": "/bug-hunter huge-repo/",
240
+ "expected_output": "Large-repo run should initialize .claude/bug-hunter-state.json, split files into sequential chunks, and resume from state if interrupted.",
241
+ "files": [],
242
+ "assertions": [
243
+ {
244
+ "text": "Initializes bug-hunter-state.json with chunk metadata",
245
+ "type": "content_check"
246
+ },
247
+ {
248
+ "text": "Processes chunks sequentially and marks each chunk state",
249
+ "type": "content_check"
250
+ },
251
+ {
252
+ "text": "Can resume from existing state file without rescanning completed chunks",
253
+ "type": "content_check"
254
+ }
255
+ ]
256
+ },
257
+ {
258
+ "id": 10,
259
+ "prompt": "/bug-hunter src/ (second run with unchanged files)",
260
+ "expected_output": "Hash cache should skip unchanged files and focus scan effort on changed files only.",
261
+ "files": [],
262
+ "assertions": [
263
+ {
264
+ "text": "Runs hash-filter against bug-hunter-state.json before deep scan",
265
+ "type": "content_check"
266
+ },
267
+ {
268
+ "text": "Reports skipped unchanged files from cache",
269
+ "type": "content_check"
270
+ }
271
+ ]
272
+ },
273
+ {
274
+ "id": 11,
275
+ "prompt": "/bug-hunter src/ with malformed subagent payload",
276
+ "expected_output": "Pipeline should fail fast before spawning subagents when payload validation fails.",
277
+ "files": [],
278
+ "assertions": [
279
+ {
280
+ "text": "Validates payload via payload-guard.cjs before each subagent launch",
281
+ "type": "content_check"
282
+ },
283
+ {
284
+ "text": "Does not launch subagent when payload validation fails",
285
+ "type": "content_check"
286
+ }
287
+ ]
288
+ },
289
+ {
290
+ "id": 12,
291
+ "prompt": "/bug-hunter --fix src/ while another fix run is active",
292
+ "expected_output": "Fix phase should stop when single-writer lock cannot be acquired.",
293
+ "files": [],
294
+ "assertions": [
295
+ {
296
+ "text": "Attempts to acquire .claude/bug-hunter-fix.lock before any edits",
297
+ "type": "content_check"
298
+ },
299
+ {
300
+ "text": "Stops Phase 2 with clear lock-held message when lock is already held",
301
+ "type": "content_check"
302
+ }
303
+ ]
304
+ },
305
+ {
306
+ "id": 13,
307
+ "prompt": "/bug-hunter --fix src/ with mixed-confidence bugs",
308
+ "expected_output": "Auto-fix should run only on high-confidence bugs and leave low-confidence bugs as manual review.",
309
+ "files": [],
310
+ "assertions": [
311
+ {
312
+ "text": "Applies confidence threshold gating (>=75%) for auto-fix eligibility",
313
+ "type": "content_check"
314
+ },
315
+ {
316
+ "text": "Reports low-confidence bugs as manual-review and does not auto-edit them",
317
+ "type": "content_check"
318
+ }
319
+ ]
320
+ },
321
+ {
322
+ "id": 14,
323
+ "prompt": "/bug-hunter src/ on a CLI without spawn_agent",
324
+ "expected_output": "Pipeline should auto-select the available orchestration backend and continue. If remote orchestration is unavailable, it should fall back to local sequential execution.",
325
+ "files": [],
326
+ "assertions": [
327
+ {
328
+ "text": "Selects AGENT_BACKEND in preflight based on available runtime tools",
329
+ "type": "content_check"
330
+ },
331
+ {
332
+ "text": "Falls back to next backend when launch fails",
333
+ "type": "content_check"
334
+ },
335
+ {
336
+ "text": "Completes run with local-sequential fallback when no delegation backend is available",
337
+ "type": "content_check"
338
+ }
339
+ ]
340
+ },
341
+ {
342
+ "id": 15,
343
+ "prompt": "/bug-hunter huge-repo/ with flaky chunk worker",
344
+ "expected_output": "Orchestrator should enforce per-chunk timeout, retry failed chunk once with backoff, and persist attempt details in run journal.",
345
+ "files": [],
346
+ "assertions": [
347
+ {
348
+ "text": "Uses run-bug-hunter.cjs for autonomous chunk orchestration",
349
+ "type": "content_check"
350
+ },
351
+ {
352
+ "text": "Retries timed out/failed chunk according to max-retries and backoff policy",
353
+ "type": "content_check"
354
+ },
355
+ {
356
+ "text": "Writes attempt events to .claude/bug-hunter-run.log",
357
+ "type": "content_check"
358
+ }
359
+ ]
360
+ }
361
+ ]
362
+ }