@codexstar/bug-hunter 3.0.0 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CHANGELOG.md +149 -83
  2. package/README.md +150 -15
  3. package/SKILL.md +94 -27
  4. package/agents/openai.yaml +4 -0
  5. package/bin/bug-hunter +9 -3
  6. package/docs/images/2026-03-12-fix-plan-rollout.png +0 -0
  7. package/docs/images/2026-03-12-hero-bug-hunter-overview.png +0 -0
  8. package/docs/images/2026-03-12-machine-readable-artifacts.png +0 -0
  9. package/docs/images/2026-03-12-pr-review-flow.png +0 -0
  10. package/docs/images/2026-03-12-security-pack.png +0 -0
  11. package/docs/images/adversarial-debate.png +0 -0
  12. package/docs/images/doc-verify-fix-plan.png +0 -0
  13. package/docs/images/hero.png +0 -0
  14. package/docs/images/pipeline-overview.png +0 -0
  15. package/docs/images/security-finding-card.png +0 -0
  16. package/docs/plans/2026-03-11-structured-output-migration-plan.md +288 -0
  17. package/docs/plans/2026-03-12-audit-bug-fixes-surgical-plan.md +193 -0
  18. package/docs/plans/2026-03-12-enterprise-security-pack-e2e-plan.md +59 -0
  19. package/docs/plans/2026-03-12-local-security-skills-integration-plan.md +39 -0
  20. package/docs/plans/2026-03-12-pr-review-strategic-fix-flow.md +78 -0
  21. package/evals/evals.json +366 -102
  22. package/modes/extended.md +2 -2
  23. package/modes/fix-loop.md +30 -30
  24. package/modes/fix-pipeline.md +32 -6
  25. package/modes/large-codebase.md +14 -15
  26. package/modes/local-sequential.md +44 -20
  27. package/modes/loop.md +56 -56
  28. package/modes/parallel.md +3 -3
  29. package/modes/scaled.md +2 -2
  30. package/modes/single-file.md +3 -3
  31. package/modes/small.md +11 -11
  32. package/package.json +10 -1
  33. package/prompts/fixer.md +37 -23
  34. package/prompts/hunter.md +39 -20
  35. package/prompts/referee.md +34 -20
  36. package/prompts/skeptic.md +25 -22
  37. package/schemas/coverage.schema.json +67 -0
  38. package/schemas/examples/findings.invalid.json +13 -0
  39. package/schemas/examples/findings.valid.json +17 -0
  40. package/schemas/findings.schema.json +76 -0
  41. package/schemas/fix-plan.schema.json +94 -0
  42. package/schemas/fix-report.schema.json +105 -0
  43. package/schemas/fix-strategy.schema.json +99 -0
  44. package/schemas/recon.schema.json +31 -0
  45. package/schemas/referee.schema.json +46 -0
  46. package/schemas/shared.schema.json +51 -0
  47. package/schemas/skeptic.schema.json +21 -0
  48. package/scripts/bug-hunter-state.cjs +35 -12
  49. package/scripts/code-index.cjs +11 -4
  50. package/scripts/fix-lock.cjs +95 -25
  51. package/scripts/payload-guard.cjs +24 -10
  52. package/scripts/pr-scope.cjs +181 -0
  53. package/scripts/render-report.cjs +346 -0
  54. package/scripts/run-bug-hunter.cjs +667 -32
  55. package/scripts/schema-runtime.cjs +273 -0
  56. package/scripts/schema-validate.cjs +40 -0
  57. package/scripts/tests/bug-hunter-state.test.cjs +68 -3
  58. package/scripts/tests/code-index.test.cjs +15 -0
  59. package/scripts/tests/fix-lock.test.cjs +60 -2
  60. package/scripts/tests/fixtures/flaky-worker.cjs +6 -1
  61. package/scripts/tests/fixtures/low-confidence-worker.cjs +8 -2
  62. package/scripts/tests/fixtures/success-worker.cjs +6 -1
  63. package/scripts/tests/payload-guard.test.cjs +154 -2
  64. package/scripts/tests/pr-scope.test.cjs +212 -0
  65. package/scripts/tests/render-report.test.cjs +180 -0
  66. package/scripts/tests/run-bug-hunter.test.cjs +686 -2
  67. package/scripts/tests/security-skills-integration.test.cjs +29 -0
  68. package/scripts/tests/skills-packaging.test.cjs +30 -0
  69. package/scripts/tests/worktree-harvest.test.cjs +66 -0
  70. package/scripts/worktree-harvest.cjs +62 -9
  71. package/skills/README.md +19 -0
  72. package/skills/commit-security-scan/SKILL.md +63 -0
  73. package/skills/security-review/SKILL.md +57 -0
  74. package/skills/threat-model-generation/SKILL.md +47 -0
  75. package/skills/vulnerability-validation/SKILL.md +59 -0
  76. package/templates/subagent-wrapper.md +12 -3
  77. package/modes/_dispatch.md +0 -121
@@ -13,7 +13,7 @@ Pass the single file path as the file list. No risk map needed — the file is i
13
13
 
14
14
  For `local-sequential`: read the prompt file and scan the single file yourself.
15
15
 
16
- After completion, read `.bug-hunter/findings.md`.
16
+ After completion, read `.bug-hunter/findings.json`.
17
17
 
18
18
  If TOTAL FINDINGS: 0, go to Step 7 (Final Report) in SKILL.md.
19
19
 
@@ -25,7 +25,7 @@ Dispatch Skeptic using the standard dispatch pattern (see `_dispatch.md`, role=`
25
25
 
26
26
  Inject the Hunter's findings.
27
27
 
28
- After completion, read `.bug-hunter/skeptic.md`.
28
+ After completion, read `.bug-hunter/skeptic.json`.
29
29
 
30
30
  ---
31
31
 
@@ -35,4 +35,4 @@ Dispatch Referee using the standard dispatch pattern (see `_dispatch.md`, role=`
35
35
 
36
36
  Inject Hunter + Skeptic reports.
37
37
 
38
- After completion, read `.bug-hunter/referee.md`. Go to Step 7 (Final Report) in SKILL.md.
38
+ After completion, read `.bug-hunter/referee.json`, render `.bug-hunter/report.md`, and go to Step 7 (Final Report) in SKILL.md.
package/modes/small.md CHANGED
@@ -33,12 +33,12 @@ Report architecture summary to user.
33
33
  Dispatch Hunter using the standard dispatch pattern (see `_dispatch.md`, role=`hunter`).
34
34
 
35
35
  Pass to the Hunter:
36
- - File list in risk-map order (CRITICAL → HIGH → MEDIUM). If triage exists, use `triage.scanOrder`.
36
+ - File list in risk-map order (CRITICAL → HIGH → MEDIUM → LOW). If triage exists, use `triage.scanOrder`.
37
37
  - Risk map from Recon (or triage).
38
38
  - Tech stack from Recon.
39
39
  - `doc-lookup.md` contents as phase-specific context.
40
40
 
41
- After completion, read `.bug-hunter/findings.md`.
41
+ After completion, read `.bug-hunter/findings.json`.
42
42
 
43
43
  If TOTAL FINDINGS: 0, skip Skeptic and Referee. Go to Step 7 (Final Report) in SKILL.md.
44
44
 
@@ -48,11 +48,11 @@ If TOTAL FINDINGS: 0, skip Skeptic and Referee. Go to Step 7 (Final Report) in S
48
48
 
49
49
  Compare the Hunter's FILES SCANNED list against the risk map.
50
50
 
51
- If any CRITICAL or HIGH files appear in FILES SKIPPED:
51
+ If any queued scannable files appear in FILES SKIPPED:
52
52
 
53
- **local-sequential:** Read the missed files yourself now and scan them for bugs. Append new findings to `.bug-hunter/findings.md`.
53
+ **local-sequential:** Read the missed files yourself now in priority order (CRITICAL → HIGH → MEDIUM → LOW) and scan them for bugs. Append new findings to `.bug-hunter/findings.json`.
54
54
 
55
- **subagent/teams:** Launch a second Hunter on ONLY the missed files using the standard dispatch pattern. Merge gap findings into `.bug-hunter/findings.md`.
55
+ **subagent/teams:** Launch a second Hunter on ONLY the missed files using the standard dispatch pattern. Merge gap findings into `.bug-hunter/findings.json`.
56
56
 
57
57
  ---
58
58
 
@@ -61,11 +61,11 @@ If any CRITICAL or HIGH files appear in FILES SKIPPED:
61
61
  Dispatch Skeptic using the standard dispatch pattern (see `_dispatch.md`, role=`skeptic`).
62
62
 
63
63
  Pass to the Skeptic:
64
- - Hunter findings from `.bug-hunter/findings.md` (compact format: bugId, severity, file, lines, claim, evidence, runtimeTrigger).
64
+ - Hunter findings from `.bug-hunter/findings.json`.
65
65
  - Tech stack from Recon.
66
66
  - `doc-lookup.md` contents as phase-specific context.
67
67
 
68
- After completion, read `.bug-hunter/skeptic.md`.
68
+ After completion, read `.bug-hunter/skeptic.json`.
69
69
 
70
70
  ---
71
71
 
@@ -74,13 +74,13 @@ After completion, read `.bug-hunter/skeptic.md`.
74
74
  Dispatch Referee using the standard dispatch pattern (see `_dispatch.md`, role=`referee`).
75
75
 
76
76
  Pass to the Referee:
77
- - Hunter findings from `.bug-hunter/findings.md`.
78
- - Skeptic challenges from `.bug-hunter/skeptic.md`.
77
+ - Hunter findings from `.bug-hunter/findings.json`.
78
+ - Skeptic challenges from `.bug-hunter/skeptic.json`.
79
79
 
80
- After completion, read `.bug-hunter/referee.md`.
80
+ After completion, read `.bug-hunter/referee.json`.
81
81
 
82
82
  ---
83
83
 
84
84
  ## After Step 7
85
85
 
86
- Proceed to **Step 7** (Final Report) in SKILL.md. The Referee output in `.bug-hunter/referee.md` provides the confirmed bugs table, dismissed findings, and coverage stats needed for the final report.
86
+ Proceed to **Step 7** (Final Report) in SKILL.md. The Referee output in `.bug-hunter/referee.json` plus the rendered `.bug-hunter/report.md` provide the confirmed bugs table, dismissed findings, and coverage stats needed for the final report.
package/package.json CHANGED
@@ -1,8 +1,9 @@
1
1
  {
2
2
  "name": "@codexstar/bug-hunter",
3
- "version": "3.0.0",
3
+ "version": "3.0.5",
4
4
  "description": "Adversarial AI bug hunter — multi-agent pipeline finds security vulnerabilities, logic errors, and runtime bugs, then fixes them autonomously. Works with Claude Code, Cursor, Codex CLI, Copilot, Kiro, and more.",
5
5
  "license": "MIT",
6
+ "main": "bin/bug-hunter",
6
7
  "type": "commonjs",
7
8
  "bin": {
8
9
  "bug-hunter": "bin/bug-hunter"
@@ -29,12 +30,16 @@
29
30
  "static-analysis"
30
31
  ],
31
32
  "files": [
33
+ "agents/",
32
34
  "bin/",
33
35
  "scripts/",
36
+ "schemas/",
34
37
  "prompts/",
35
38
  "templates/",
36
39
  "modes/",
40
+ "skills/",
37
41
  "evals/",
42
+ "docs/",
38
43
  "SKILL.md",
39
44
  "README.md",
40
45
  "CHANGELOG.md",
@@ -48,7 +53,11 @@
48
53
  "url": "https://github.com/codexstar69/bug-hunter/issues"
49
54
  },
50
55
  "homepage": "https://github.com/codexstar69/bug-hunter#readme",
56
+ "publishConfig": {
57
+ "access": "public"
58
+ },
51
59
  "scripts": {
60
+ "prepack": "node --test scripts/tests/*.test.cjs",
52
61
  "test": "node --test scripts/tests/*.test.cjs",
53
62
  "doctor": "node bin/bug-hunter doctor",
54
63
  "postinstall": "node -e \"console.log('\\n Run: bug-hunter install to set up the skill\\n Run: bug-hunter doctor to check your environment\\n')\""
package/prompts/fixer.md CHANGED
@@ -2,17 +2,22 @@ You are a surgical code fixer. You will receive a list of verified bugs from a R
2
2
 
3
3
  ## Output Destination
4
4
 
5
- Write your fix report to the file path provided in your assignment (typically `.bug-hunter/fix-report.md`). If no path was provided, output to stdout. The report should list each fix applied, the before/after code, and verification results.
5
+ Write your structured fix report to the file path provided in your assignment
6
+ (typically `.bug-hunter/fix-report.json`). If no path was provided, output the
7
+ JSON to stdout. If a Markdown companion is requested, write it only after the
8
+ JSON artifact exists.
6
9
 
7
10
  ## Scope Rules
8
11
 
9
12
  - Only fix the bugs listed in your assignment. Do NOT fix other issues you notice.
13
+ - Respect the assigned strategy. If the cluster is marked `manual-review`, `larger-refactor`, or `architectural-remediation`, do not silently upgrade it into a surgical patch.
10
14
  - Do NOT refactor, add tests, or improve code style — surgical fixes only.
11
15
  - Each fix should change the minimum lines necessary to resolve the bug.
12
16
 
13
17
  ## What you receive
14
18
 
15
19
  - **Bug list**: Confirmed bugs with BUG-IDs, file paths, line numbers, severity, description, and suggested fix direction
20
+ - **Fix strategy context**: Whether the assigned cluster is `safe-autofix`, `manual-review`, `larger-refactor`, or `architectural-remediation`
16
21
  - **Tech stack context**: Framework, auth mechanism, database, key dependencies
17
22
  - **Directory scope**: You are assigned bugs grouped by directory — all bugs in files from the same directory subtree are yours. All bugs in the same file are guaranteed to be in your assignment.
18
23
 
@@ -79,25 +84,34 @@ Use only when you need the correct API pattern for a fix. One lookup per fix, ma
79
84
 
80
85
  ## Output format
81
86
 
82
- After completing all fixes:
83
-
84
- ---
85
- **FIX REPORT**
86
-
87
- **Bugs fixed:**
88
-
89
- For each bug:
90
- **BUG-[N]** | [severity]
91
- - **File(s) changed:** [list of files and line ranges modified]
92
- - **What was changed:** [one-sentence description of the actual code change]
93
- - **Confidence:** [High/Medium/Low — how confident you are this fully resolves the bug]
94
- - **Side effects:** [None / list any potential side effects or breaking changes]
95
- - **Notes:** [Any caveats or partial-fix details. "Requires larger refactor" if applicable.]
96
-
97
- **Summary:**
98
- - Bugs assigned: [N]
99
- - Bugs fixed: [N]
100
- - Bugs requiring larger refactor: [N] (minimal patches applied)
101
- - Bugs skipped: [N] (with reason for each)
102
- - Files modified: [list]
103
- ---
87
+ Write a JSON object with this shape:
88
+
89
+ ```json
90
+ {
91
+ "generatedAt": "2026-03-11T12:00:00.000Z",
92
+ "summary": {
93
+ "bugsAssigned": 2,
94
+ "bugsFixed": 1,
95
+ "bugsNeedingLargerRefactor": 1,
96
+ "bugsSkipped": 0,
97
+ "filesModified": ["src/api/users.ts"]
98
+ },
99
+ "fixes": [
100
+ {
101
+ "bugId": "BUG-1",
102
+ "severity": "Critical",
103
+ "filesChanged": ["src/api/users.ts:45-52"],
104
+ "whatChanged": "Replaced string interpolation with the parameterized query helper.",
105
+ "confidenceLabel": "high",
106
+ "sideEffects": ["None"],
107
+ "notes": "Minimal patch only."
108
+ }
109
+ ]
110
+ }
111
+ ```
112
+
113
+ Rules:
114
+ - Keep the output valid JSON.
115
+ - Use `confidenceLabel` values `high`, `medium`, or `low`.
116
+ - Keep `sideEffects` as an array, using `["None"]` when there are none.
117
+ - Do not add prose outside the JSON object.
package/prompts/hunter.md CHANGED
@@ -2,7 +2,11 @@ You are a code analysis agent. Your task is to thoroughly examine the provided c
2
2
 
3
3
  ## Output Destination
4
4
 
5
- Write your complete findings report to the file path provided in your assignment (typically `.bug-hunter/findings.md`). If no path was provided, output to stdout. The orchestrator reads this file to pass your findings to the Skeptic phase.
5
+ Write your canonical findings artifact as JSON to the file path provided in your
6
+ assignment (typically `.bug-hunter/findings.json`). If no path was provided,
7
+ output the JSON to stdout. If the assignment also asks for a Markdown companion,
8
+ write that separately as a derived human-readable summary; the JSON artifact is
9
+ the source of truth the Skeptic and Referee read.
6
10
 
7
11
  ## Scope Rules
8
12
 
@@ -90,25 +94,40 @@ Quality matters more than quantity. The downstream Skeptic agent will challenge
90
94
 
91
95
  ## Output format
92
96
 
93
- For each finding, use this exact format:
94
-
95
- ---
96
- **BUG-[number]** | Severity: [Low/Medium/Critical] | Points: [1/5/10]
97
- - **File:** [exact file path]
98
- - **Line(s):** [line number or range]
99
- - **Category:** [logic | security | error-handling | concurrency | edge-case | data-integrity | type-safety | resource-leak | api-contract | cross-file]
100
- - **STRIDE:** [Spoofing | Tampering | Repudiation | InfoDisclosure | DoS | ElevationOfPrivilege | N/A]
101
- - **CWE:** [CWE-NNN | N/A]
102
- - **Claim:** [One-sentence statement of what is wrong — no justification, just the claim]
103
- - **Evidence:** [Quote the EXACT code from the file, including the line number(s). Copy-paste — do not paraphrase or reconstruct from memory. The Referee will spot-check these quotes against the actual file. If the quote doesn't match, your finding is automatically dismissed.]
104
- - **Runtime trigger:** [Describe a concrete scenario — what input, API call, or sequence of events causes this bug to manifest. Be specific: "POST /api/users with body {name: null}" not "if the input is invalid"]
105
- - **Cross-references:** [If this bug involves multiple files, list the other files and line numbers involved. Otherwise write "Single file"]
106
- ---
107
-
108
- **STRIDE + CWE rules:**
109
- - `category: security` → STRIDE and CWE are REQUIRED. Choose the most specific match from the CWE Quick Reference below.
110
- - All other categories (logic, concurrency, etc.) → STRIDE=N/A, CWE=N/A.
111
- - If a logic bug has security implications (e.g., auth bypass via wrong comparison), reclassify as `category: security`.
97
+ Write a JSON array. Each item must match this contract:
98
+
99
+ ```json
100
+ [
101
+ {
102
+ "bugId": "BUG-1",
103
+ "severity": "Critical",
104
+ "category": "security",
105
+ "file": "src/api/users.ts",
106
+ "lines": "45-49",
107
+ "claim": "SQL is built from unsanitized user input.",
108
+ "evidence": "src/api/users.ts:45-49 const query = `...${term}...`",
109
+ "runtimeTrigger": "GET /api/users?term=' OR '1'='1",
110
+ "crossReferences": ["src/db/query.ts:10-18"],
111
+ "confidenceScore": 93,
112
+ "confidenceLabel": "high",
113
+ "stride": "Tampering",
114
+ "cwe": "CWE-89"
115
+ }
116
+ ]
117
+ ```
118
+
119
+ Rules:
120
+ - Return a valid empty array `[]` when you found no bugs.
121
+ - `confidenceScore` must be numeric on a `0-100` scale.
122
+ - `confidenceLabel` is optional, but if present it must be `high`, `medium`,
123
+ or `low`.
124
+ - `crossReferences` must always be an array. Use `["Single file"]` when no
125
+ extra file is involved.
126
+ - `category: security` requires specific `stride` and `cwe` values.
127
+ - Non-security findings must use `stride: "N/A"` and `cwe: "N/A"`.
128
+ - Do not append coverage summaries, totals, or prose outside the JSON array.
129
+ - If the assignment also requested a Markdown companion, render it from this
130
+ JSON after writing the canonical artifact.
112
131
 
113
132
  ## CWE Quick Reference (security findings only)
114
133
 
@@ -6,7 +6,10 @@ You will receive both the Hunter findings file and the Skeptic challenges file.
6
6
 
7
7
  ## Output Destination
8
8
 
9
- Write your complete Referee verdict report to the file path provided in your assignment (typically `.bug-hunter/referee.md`). If no path was provided, output to stdout. This is the FINAL phase — your verdicts determine which bugs are confirmed.
9
+ Write your canonical Referee verdict artifact as JSON to the file path provided
10
+ in your assignment (typically `.bug-hunter/referee.json`). If no path was
11
+ provided, output the JSON to stdout. If a Markdown report is requested, render
12
+ it from this JSON artifact after writing the canonical file.
10
13
 
11
14
  ## Scope Rules
12
15
 
@@ -56,20 +59,38 @@ Before final report: (1) Coverage — did you evaluate every BUG-ID from both re
56
59
 
57
60
  ## Output format
58
61
 
59
- Per bug:
60
- ```
61
- **BUG-N** | Verification: INDEPENDENTLY VERIFIED / EVIDENCE-BASED
62
- - **Hunter's claim:** [summary]
63
- - **Skeptic's response:** DISPROVE/ACCEPT [summary]
64
- - **My analysis:** [what you traced and found]
65
- - **VERDICT: REAL BUG / NOT A BUG** | Confidence: High/Medium/Low
66
- - **True severity:** [Critical/Medium/Low] (if changed, explain)
67
- - **Suggested fix:** [concrete: function name, check to add, line to change]
62
+ Write a JSON array. Each item must match this contract:
63
+
64
+ ```json
65
+ [
66
+ {
67
+ "bugId": "BUG-1",
68
+ "verdict": "REAL_BUG",
69
+ "trueSeverity": "Critical",
70
+ "confidenceScore": 94,
71
+ "confidenceLabel": "high",
72
+ "verificationMode": "INDEPENDENTLY_VERIFIED",
73
+ "analysisSummary": "Confirmed by tracing user-controlled input into an unsafe sink without validation.",
74
+ "suggestedFix": "Validate the input before building the query and use the parameterized helper."
75
+ }
76
+ ]
68
77
  ```
69
78
 
79
+ Rules:
80
+ - `verdict` must be one of `REAL_BUG`, `NOT_A_BUG`, or `MANUAL_REVIEW`.
81
+ - `confidenceScore` must be numeric on a `0-100` scale.
82
+ - `confidenceLabel` must be `high`, `medium`, or `low`.
83
+ - `verificationMode` must be `INDEPENDENTLY_VERIFIED` or `EVIDENCE_BASED`.
84
+ - Keep the reasoning in `analysisSummary`; do not emit free-form prose outside
85
+ the JSON array.
86
+ - Return `[]` only when there were no findings to referee.
87
+
70
88
  ### Security enrichment (confirmed security bugs only)
71
89
 
72
- For each finding with `category: security` that you confirm as REAL BUG, add these fields below the verdict:
90
+ For each finding with `category: security` that you confirm as `REAL_BUG`,
91
+ include the security enrichment details in `analysisSummary` and
92
+ `suggestedFix`. Until the schema grows extra typed security fields, do not emit
93
+ out-of-contract keys.
73
94
 
74
95
  **Reachability** (required for all security findings):
75
96
  - `EXTERNAL` — reachable from unauthenticated external input (public API, form, URL)
@@ -111,12 +132,5 @@ Non-security findings use the standard verdict format above (no enrichment neede
111
132
 
112
133
  ## Final Report
113
134
 
114
- **VERIFIED BUG REPORT**
115
-
116
- Stats: Total reported | Dismissed | Confirmed (Critical/Medium/Low) | Independently verified vs Evidence-based | Per-Hunter accuracy (if parallel) | Skeptic accuracy
117
-
118
- Confirmed bugs table: # | Severity | STRIDE | CWE | Reachability | File | Lines | Description | Fix | Verification
119
-
120
- Low-confidence items (flagged for manual review): file + one-line uncertainty reason.
121
-
122
- <details><summary>Dismissed findings</summary>Table: # | Claim | Skeptic Position | Reason</details>
135
+ If a human-readable report is requested, generate it from the final JSON array.
136
+ The JSON artifact remains canonical.
@@ -6,7 +6,10 @@ Read the Hunter findings file completely before starting. Each finding has BUG-I
6
6
 
7
7
  ## Output Destination
8
8
 
9
- Write your Skeptic challenge report to the file path in your assignment (typically `.bug-hunter/skeptic.md`). The Referee reads both Hunter findings and your challenges.
9
+ Write your canonical Skeptic artifact as JSON to the file path in your
10
+ assignment (typically `.bug-hunter/skeptic.json`). The Referee reads the JSON
11
+ artifact, not a free-form Markdown note. If the assignment also asks for a
12
+ Markdown companion, that Markdown must be derived from the JSON output.
10
13
 
11
14
  ## Scope Rules
12
15
 
@@ -91,28 +94,28 @@ Before writing your final summary, verify:
91
94
 
92
95
  ## Output format
93
96
 
94
- For each bug:
95
-
96
- ---
97
- **BUG-[number]** | Original: [points] pts
98
- - **Code reviewed:** [List the files and line ranges you actually read to evaluate this — must include all cross-referenced files]
99
- - **Runtime trigger test:** [Did you trace the Hunter's exact scenario? What actually happens at each step?]
100
- - **Counter-argument:** [Your specific technical argument, citing code]
101
- - **Evidence:** [Quote the actual code or behavior that supports your position]
102
- - **Confidence:** [0-100]%
103
- - **Risk calc:** EV = ([confidence]% x [points]) - ([100-confidence]% x [2 x points]) = [value]
104
- - **Decision:** DISPROVE / ACCEPT
105
- ---
106
-
107
- After all bugs, output:
108
-
109
- **SUMMARY:**
110
- - Bugs disproved: [count] (total points claimed: [sum])
111
- - Bugs accepted as real: [count]
112
- - Files read during review: [list of files you actually read]
97
+ Write a JSON array. Each item must match this contract:
98
+
99
+ ```json
100
+ [
101
+ {
102
+ "bugId": "BUG-1",
103
+ "response": "DISPROVE",
104
+ "analysisSummary": "The route is wrapped by auth middleware before this handler runs, so the claimed bypass is not reachable.",
105
+ "counterEvidence": "src/routes/api.ts:10-21 attaches requireAuth before the handler."
106
+ }
107
+ ]
108
+ ```
113
109
 
114
- **ACCEPTED BUG LIST:**
115
- [List only the BUG-IDs that you ACCEPTED, with their original severity, file path, and primary file cluster]
110
+ Rules:
111
+ - Use `response: "ACCEPT"` when the finding stands as a real bug.
112
+ - Use `response: "DISPROVE"` only when your challenge is strong enough to
113
+ survive Referee review.
114
+ - Use `response: "MANUAL_REVIEW"` when you cannot safely disprove or accept the
115
+ finding.
116
+ - Return `[]` when there were no findings to challenge.
117
+ - Keep all reasoning inside `analysisSummary` and optional `counterEvidence`.
118
+ - Do not append summary prose outside the JSON array.
116
119
 
117
120
  ## Doc Lookup Tool
118
121
 
@@ -0,0 +1,67 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "schemaVersion": 1,
4
+ "artifact": "coverage",
5
+ "title": "Bug Hunter Coverage Artifact",
6
+ "type": "object",
7
+ "required": ["schemaVersion", "iteration", "status", "files", "bugs", "fixes"],
8
+ "properties": {
9
+ "schemaVersion": {
10
+ "type": "integer",
11
+ "minimum": 1
12
+ },
13
+ "iteration": {
14
+ "type": "integer",
15
+ "minimum": 0
16
+ },
17
+ "status": {
18
+ "type": "string",
19
+ "enum": ["IN_PROGRESS", "COMPLETE"]
20
+ },
21
+ "files": {
22
+ "type": "array",
23
+ "items": {
24
+ "type": "object",
25
+ "required": ["path", "status"],
26
+ "properties": {
27
+ "path": { "type": "string", "minLength": 1 },
28
+ "status": {
29
+ "type": "string",
30
+ "enum": ["pending", "in_progress", "done", "failed"]
31
+ }
32
+ },
33
+ "additionalProperties": false
34
+ }
35
+ },
36
+ "bugs": {
37
+ "type": "array",
38
+ "items": {
39
+ "type": "object",
40
+ "required": ["bugId", "severity", "file", "claim"],
41
+ "properties": {
42
+ "bugId": { "type": "string", "minLength": 1 },
43
+ "severity": {
44
+ "type": "string",
45
+ "enum": ["Critical", "Medium", "Low"]
46
+ },
47
+ "file": { "type": "string", "minLength": 1 },
48
+ "claim": { "type": "string", "minLength": 1 }
49
+ },
50
+ "additionalProperties": false
51
+ }
52
+ },
53
+ "fixes": {
54
+ "type": "array",
55
+ "items": {
56
+ "type": "object",
57
+ "required": ["bugId", "status"],
58
+ "properties": {
59
+ "bugId": { "type": "string", "minLength": 1 },
60
+ "status": { "type": "string", "minLength": 1 }
61
+ },
62
+ "additionalProperties": false
63
+ }
64
+ }
65
+ },
66
+ "additionalProperties": false
67
+ }
@@ -0,0 +1,13 @@
1
+ [
2
+ {
3
+ "bugId": "BUG-1",
4
+ "severity": "Critical",
5
+ "category": "security",
6
+ "file": "src/example.ts",
7
+ "lines": "12-16",
8
+ "evidence": "src/example.ts:12-16 unvalidated body flows into exec().",
9
+ "runtimeTrigger": "POST /api/example with body {\"command\":\"rm -rf /\"}",
10
+ "crossReferences": ["src/router.ts:8-14"],
11
+ "confidenceScore": 92
12
+ }
13
+ ]
@@ -0,0 +1,17 @@
1
+ [
2
+ {
3
+ "bugId": "BUG-1",
4
+ "severity": "Critical",
5
+ "category": "security",
6
+ "file": "src/example.ts",
7
+ "lines": "12-16",
8
+ "claim": "Request body reaches a dangerous sink without validation.",
9
+ "evidence": "src/example.ts:12-16 unvalidated body flows into exec().",
10
+ "runtimeTrigger": "POST /api/example with body {\"command\":\"rm -rf /\"}",
11
+ "crossReferences": ["src/router.ts:8-14"],
12
+ "confidenceScore": 92,
13
+ "confidenceLabel": "high",
14
+ "stride": "Tampering",
15
+ "cwe": "CWE-78"
16
+ }
17
+ ]
@@ -0,0 +1,76 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "schemaVersion": 1,
4
+ "artifact": "findings",
5
+ "title": "Bug Hunter Findings Artifact",
6
+ "type": "array",
7
+ "items": {
8
+ "type": "object",
9
+ "required": [
10
+ "bugId",
11
+ "severity",
12
+ "category",
13
+ "file",
14
+ "lines",
15
+ "claim",
16
+ "evidence",
17
+ "runtimeTrigger",
18
+ "crossReferences",
19
+ "confidenceScore"
20
+ ],
21
+ "properties": {
22
+ "bugId": { "type": "string", "minLength": 1 },
23
+ "severity": {
24
+ "type": "string",
25
+ "enum": ["Critical", "Medium", "Low"]
26
+ },
27
+ "category": {
28
+ "type": "string",
29
+ "enum": [
30
+ "logic",
31
+ "security",
32
+ "error-handling",
33
+ "concurrency",
34
+ "edge-case",
35
+ "data-integrity",
36
+ "type-safety",
37
+ "resource-leak",
38
+ "api-contract",
39
+ "cross-file"
40
+ ]
41
+ },
42
+ "file": { "type": "string", "minLength": 1 },
43
+ "lines": { "type": "string", "minLength": 1 },
44
+ "claim": { "type": "string", "minLength": 1 },
45
+ "evidence": { "type": "string", "minLength": 1 },
46
+ "runtimeTrigger": { "type": "string", "minLength": 1 },
47
+ "crossReferences": {
48
+ "type": "array",
49
+ "items": { "type": "string", "minLength": 1 }
50
+ },
51
+ "confidenceScore": {
52
+ "type": "number",
53
+ "minimum": 0,
54
+ "maximum": 100
55
+ },
56
+ "confidenceLabel": {
57
+ "type": "string",
58
+ "enum": ["high", "medium", "low"]
59
+ },
60
+ "stride": {
61
+ "type": "string",
62
+ "enum": [
63
+ "Spoofing",
64
+ "Tampering",
65
+ "Repudiation",
66
+ "InfoDisclosure",
67
+ "DoS",
68
+ "ElevationOfPrivilege",
69
+ "N/A"
70
+ ]
71
+ },
72
+ "cwe": { "type": "string", "minLength": 1 }
73
+ },
74
+ "additionalProperties": false
75
+ }
76
+ }