@codexstar/bug-hunter 3.0.0 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +149 -83
- package/README.md +150 -15
- package/SKILL.md +94 -27
- package/agents/openai.yaml +4 -0
- package/bin/bug-hunter +9 -3
- package/docs/images/2026-03-12-fix-plan-rollout.png +0 -0
- package/docs/images/2026-03-12-hero-bug-hunter-overview.png +0 -0
- package/docs/images/2026-03-12-machine-readable-artifacts.png +0 -0
- package/docs/images/2026-03-12-pr-review-flow.png +0 -0
- package/docs/images/2026-03-12-security-pack.png +0 -0
- package/docs/images/adversarial-debate.png +0 -0
- package/docs/images/doc-verify-fix-plan.png +0 -0
- package/docs/images/hero.png +0 -0
- package/docs/images/pipeline-overview.png +0 -0
- package/docs/images/security-finding-card.png +0 -0
- package/docs/plans/2026-03-11-structured-output-migration-plan.md +288 -0
- package/docs/plans/2026-03-12-audit-bug-fixes-surgical-plan.md +193 -0
- package/docs/plans/2026-03-12-enterprise-security-pack-e2e-plan.md +59 -0
- package/docs/plans/2026-03-12-local-security-skills-integration-plan.md +39 -0
- package/docs/plans/2026-03-12-pr-review-strategic-fix-flow.md +78 -0
- package/evals/evals.json +366 -102
- package/modes/extended.md +2 -2
- package/modes/fix-loop.md +30 -30
- package/modes/fix-pipeline.md +32 -6
- package/modes/large-codebase.md +14 -15
- package/modes/local-sequential.md +44 -20
- package/modes/loop.md +56 -56
- package/modes/parallel.md +3 -3
- package/modes/scaled.md +2 -2
- package/modes/single-file.md +3 -3
- package/modes/small.md +11 -11
- package/package.json +11 -1
- package/prompts/fixer.md +37 -23
- package/prompts/hunter.md +39 -20
- package/prompts/referee.md +34 -20
- package/prompts/skeptic.md +25 -22
- package/schemas/coverage.schema.json +67 -0
- package/schemas/examples/findings.invalid.json +13 -0
- package/schemas/examples/findings.valid.json +17 -0
- package/schemas/findings.schema.json +76 -0
- package/schemas/fix-plan.schema.json +94 -0
- package/schemas/fix-report.schema.json +105 -0
- package/schemas/fix-strategy.schema.json +99 -0
- package/schemas/recon.schema.json +31 -0
- package/schemas/referee.schema.json +46 -0
- package/schemas/shared.schema.json +51 -0
- package/schemas/skeptic.schema.json +21 -0
- package/scripts/bug-hunter-state.cjs +35 -12
- package/scripts/code-index.cjs +11 -4
- package/scripts/fix-lock.cjs +95 -25
- package/scripts/payload-guard.cjs +24 -10
- package/scripts/pr-scope.cjs +181 -0
- package/scripts/prepublish-guard.cjs +82 -0
- package/scripts/render-report.cjs +346 -0
- package/scripts/run-bug-hunter.cjs +669 -33
- package/scripts/schema-runtime.cjs +273 -0
- package/scripts/schema-validate.cjs +40 -0
- package/scripts/tests/bug-hunter-state.test.cjs +68 -3
- package/scripts/tests/code-index.test.cjs +15 -0
- package/scripts/tests/fix-lock.test.cjs +60 -2
- package/scripts/tests/fixtures/flaky-worker.cjs +6 -1
- package/scripts/tests/fixtures/low-confidence-worker.cjs +8 -2
- package/scripts/tests/fixtures/success-worker.cjs +6 -1
- package/scripts/tests/payload-guard.test.cjs +154 -2
- package/scripts/tests/pr-scope.test.cjs +212 -0
- package/scripts/tests/render-report.test.cjs +180 -0
- package/scripts/tests/run-bug-hunter.test.cjs +686 -2
- package/scripts/tests/security-skills-integration.test.cjs +29 -0
- package/scripts/tests/skills-packaging.test.cjs +30 -0
- package/scripts/tests/worktree-harvest.test.cjs +67 -1
- package/scripts/worktree-harvest.cjs +62 -9
- package/skills/README.md +19 -0
- package/skills/commit-security-scan/SKILL.md +63 -0
- package/skills/security-review/SKILL.md +57 -0
- package/skills/threat-model-generation/SKILL.md +47 -0
- package/skills/vulnerability-validation/SKILL.md +59 -0
- package/templates/subagent-wrapper.md +12 -3
- package/modes/_dispatch.md +0 -121
package/modes/single-file.md
CHANGED
|
@@ -13,7 +13,7 @@ Pass the single file path as the file list. No risk map needed — the file is i
|
|
|
13
13
|
|
|
14
14
|
For `local-sequential`: read the prompt file and scan the single file yourself.
|
|
15
15
|
|
|
16
|
-
After completion, read `.bug-hunter/findings.
|
|
16
|
+
After completion, read `.bug-hunter/findings.json`.
|
|
17
17
|
|
|
18
18
|
If TOTAL FINDINGS: 0, go to Step 7 (Final Report) in SKILL.md.
|
|
19
19
|
|
|
@@ -25,7 +25,7 @@ Dispatch Skeptic using the standard dispatch pattern (see `_dispatch.md`, role=`
|
|
|
25
25
|
|
|
26
26
|
Inject the Hunter's findings.
|
|
27
27
|
|
|
28
|
-
After completion, read `.bug-hunter/skeptic.
|
|
28
|
+
After completion, read `.bug-hunter/skeptic.json`.
|
|
29
29
|
|
|
30
30
|
---
|
|
31
31
|
|
|
@@ -35,4 +35,4 @@ Dispatch Referee using the standard dispatch pattern (see `_dispatch.md`, role=`
|
|
|
35
35
|
|
|
36
36
|
Inject Hunter + Skeptic reports.
|
|
37
37
|
|
|
38
|
-
After completion, read `.bug-hunter/referee.md
|
|
38
|
+
After completion, read `.bug-hunter/referee.json`, render `.bug-hunter/report.md`, and go to Step 7 (Final Report) in SKILL.md.
|
package/modes/small.md
CHANGED
|
@@ -33,12 +33,12 @@ Report architecture summary to user.
|
|
|
33
33
|
Dispatch Hunter using the standard dispatch pattern (see `_dispatch.md`, role=`hunter`).
|
|
34
34
|
|
|
35
35
|
Pass to the Hunter:
|
|
36
|
-
- File list in risk-map order (CRITICAL → HIGH → MEDIUM). If triage exists, use `triage.scanOrder`.
|
|
36
|
+
- File list in risk-map order (CRITICAL → HIGH → MEDIUM → LOW). If triage exists, use `triage.scanOrder`.
|
|
37
37
|
- Risk map from Recon (or triage).
|
|
38
38
|
- Tech stack from Recon.
|
|
39
39
|
- `doc-lookup.md` contents as phase-specific context.
|
|
40
40
|
|
|
41
|
-
After completion, read `.bug-hunter/findings.
|
|
41
|
+
After completion, read `.bug-hunter/findings.json`.
|
|
42
42
|
|
|
43
43
|
If TOTAL FINDINGS: 0, skip Skeptic and Referee. Go to Step 7 (Final Report) in SKILL.md.
|
|
44
44
|
|
|
@@ -48,11 +48,11 @@ If TOTAL FINDINGS: 0, skip Skeptic and Referee. Go to Step 7 (Final Report) in S
|
|
|
48
48
|
|
|
49
49
|
Compare the Hunter's FILES SCANNED list against the risk map.
|
|
50
50
|
|
|
51
|
-
If any
|
|
51
|
+
If any queued scannable files appear in FILES SKIPPED:
|
|
52
52
|
|
|
53
|
-
**local-sequential:** Read the missed files yourself now and scan them for bugs. Append new findings to `.bug-hunter/findings.
|
|
53
|
+
**local-sequential:** Read the missed files yourself now in priority order (CRITICAL → HIGH → MEDIUM → LOW) and scan them for bugs. Append new findings to `.bug-hunter/findings.json`.
|
|
54
54
|
|
|
55
|
-
**subagent/teams:** Launch a second Hunter on ONLY the missed files using the standard dispatch pattern. Merge gap findings into `.bug-hunter/findings.
|
|
55
|
+
**subagent/teams:** Launch a second Hunter on ONLY the missed files using the standard dispatch pattern. Merge gap findings into `.bug-hunter/findings.json`.
|
|
56
56
|
|
|
57
57
|
---
|
|
58
58
|
|
|
@@ -61,11 +61,11 @@ If any CRITICAL or HIGH files appear in FILES SKIPPED:
|
|
|
61
61
|
Dispatch Skeptic using the standard dispatch pattern (see `_dispatch.md`, role=`skeptic`).
|
|
62
62
|
|
|
63
63
|
Pass to the Skeptic:
|
|
64
|
-
- Hunter findings from `.bug-hunter/findings.
|
|
64
|
+
- Hunter findings from `.bug-hunter/findings.json`.
|
|
65
65
|
- Tech stack from Recon.
|
|
66
66
|
- `doc-lookup.md` contents as phase-specific context.
|
|
67
67
|
|
|
68
|
-
After completion, read `.bug-hunter/skeptic.
|
|
68
|
+
After completion, read `.bug-hunter/skeptic.json`.
|
|
69
69
|
|
|
70
70
|
---
|
|
71
71
|
|
|
@@ -74,13 +74,13 @@ After completion, read `.bug-hunter/skeptic.md`.
|
|
|
74
74
|
Dispatch Referee using the standard dispatch pattern (see `_dispatch.md`, role=`referee`).
|
|
75
75
|
|
|
76
76
|
Pass to the Referee:
|
|
77
|
-
- Hunter findings from `.bug-hunter/findings.
|
|
78
|
-
- Skeptic challenges from `.bug-hunter/skeptic.
|
|
77
|
+
- Hunter findings from `.bug-hunter/findings.json`.
|
|
78
|
+
- Skeptic challenges from `.bug-hunter/skeptic.json`.
|
|
79
79
|
|
|
80
|
-
After completion, read `.bug-hunter/referee.
|
|
80
|
+
After completion, read `.bug-hunter/referee.json`.
|
|
81
81
|
|
|
82
82
|
---
|
|
83
83
|
|
|
84
84
|
## After Step 7
|
|
85
85
|
|
|
86
|
-
Proceed to **Step 7** (Final Report) in SKILL.md. The Referee output in `.bug-hunter/referee.md`
|
|
86
|
+
Proceed to **Step 7** (Final Report) in SKILL.md. The Referee output in `.bug-hunter/referee.json` plus the rendered `.bug-hunter/report.md` provide the confirmed bugs table, dismissed findings, and coverage stats needed for the final report.
|
package/package.json
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codexstar/bug-hunter",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.6",
|
|
4
4
|
"description": "Adversarial AI bug hunter — multi-agent pipeline finds security vulnerabilities, logic errors, and runtime bugs, then fixes them autonomously. Works with Claude Code, Cursor, Codex CLI, Copilot, Kiro, and more.",
|
|
5
5
|
"license": "MIT",
|
|
6
|
+
"main": "bin/bug-hunter",
|
|
6
7
|
"type": "commonjs",
|
|
7
8
|
"bin": {
|
|
8
9
|
"bug-hunter": "bin/bug-hunter"
|
|
@@ -31,10 +32,14 @@
|
|
|
31
32
|
"files": [
|
|
32
33
|
"bin/",
|
|
33
34
|
"scripts/",
|
|
35
|
+
"schemas/",
|
|
34
36
|
"prompts/",
|
|
35
37
|
"templates/",
|
|
36
38
|
"modes/",
|
|
37
39
|
"evals/",
|
|
40
|
+
"docs/",
|
|
41
|
+
"agents/",
|
|
42
|
+
"skills/",
|
|
38
43
|
"SKILL.md",
|
|
39
44
|
"README.md",
|
|
40
45
|
"CHANGELOG.md",
|
|
@@ -48,7 +53,12 @@
|
|
|
48
53
|
"url": "https://github.com/codexstar69/bug-hunter/issues"
|
|
49
54
|
},
|
|
50
55
|
"homepage": "https://github.com/codexstar69/bug-hunter#readme",
|
|
56
|
+
"publishConfig": {
|
|
57
|
+
"access": "public"
|
|
58
|
+
},
|
|
51
59
|
"scripts": {
|
|
60
|
+
"prepublishOnly": "node scripts/prepublish-guard.cjs",
|
|
61
|
+
"prepack": "node --test scripts/tests/*.test.cjs",
|
|
52
62
|
"test": "node --test scripts/tests/*.test.cjs",
|
|
53
63
|
"doctor": "node bin/bug-hunter doctor",
|
|
54
64
|
"postinstall": "node -e \"console.log('\\n Run: bug-hunter install to set up the skill\\n Run: bug-hunter doctor to check your environment\\n')\""
|
package/prompts/fixer.md
CHANGED
|
@@ -2,17 +2,22 @@ You are a surgical code fixer. You will receive a list of verified bugs from a R
|
|
|
2
2
|
|
|
3
3
|
## Output Destination
|
|
4
4
|
|
|
5
|
-
Write your fix report to the file path provided in your assignment
|
|
5
|
+
Write your structured fix report to the file path provided in your assignment
|
|
6
|
+
(typically `.bug-hunter/fix-report.json`). If no path was provided, output the
|
|
7
|
+
JSON to stdout. If a Markdown companion is requested, write it only after the
|
|
8
|
+
JSON artifact exists.
|
|
6
9
|
|
|
7
10
|
## Scope Rules
|
|
8
11
|
|
|
9
12
|
- Only fix the bugs listed in your assignment. Do NOT fix other issues you notice.
|
|
13
|
+
- Respect the assigned strategy. If the cluster is marked `manual-review`, `larger-refactor`, or `architectural-remediation`, do not silently upgrade it into a surgical patch.
|
|
10
14
|
- Do NOT refactor, add tests, or improve code style — surgical fixes only.
|
|
11
15
|
- Each fix should change the minimum lines necessary to resolve the bug.
|
|
12
16
|
|
|
13
17
|
## What you receive
|
|
14
18
|
|
|
15
19
|
- **Bug list**: Confirmed bugs with BUG-IDs, file paths, line numbers, severity, description, and suggested fix direction
|
|
20
|
+
- **Fix strategy context**: Whether the assigned cluster is `safe-autofix`, `manual-review`, `larger-refactor`, or `architectural-remediation`
|
|
16
21
|
- **Tech stack context**: Framework, auth mechanism, database, key dependencies
|
|
17
22
|
- **Directory scope**: You are assigned bugs grouped by directory — all bugs in files from the same directory subtree are yours. All bugs in the same file are guaranteed to be in your assignment.
|
|
18
23
|
|
|
@@ -79,25 +84,34 @@ Use only when you need the correct API pattern for a fix. One lookup per fix, ma
|
|
|
79
84
|
|
|
80
85
|
## Output format
|
|
81
86
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
87
|
+
Write a JSON object with this shape:
|
|
88
|
+
|
|
89
|
+
```json
|
|
90
|
+
{
|
|
91
|
+
"generatedAt": "2026-03-11T12:00:00.000Z",
|
|
92
|
+
"summary": {
|
|
93
|
+
"bugsAssigned": 2,
|
|
94
|
+
"bugsFixed": 1,
|
|
95
|
+
"bugsNeedingLargerRefactor": 1,
|
|
96
|
+
"bugsSkipped": 0,
|
|
97
|
+
"filesModified": ["src/api/users.ts"]
|
|
98
|
+
},
|
|
99
|
+
"fixes": [
|
|
100
|
+
{
|
|
101
|
+
"bugId": "BUG-1",
|
|
102
|
+
"severity": "Critical",
|
|
103
|
+
"filesChanged": ["src/api/users.ts:45-52"],
|
|
104
|
+
"whatChanged": "Replaced string interpolation with the parameterized query helper.",
|
|
105
|
+
"confidenceLabel": "high",
|
|
106
|
+
"sideEffects": ["None"],
|
|
107
|
+
"notes": "Minimal patch only."
|
|
108
|
+
}
|
|
109
|
+
]
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Rules:
|
|
114
|
+
- Keep the output valid JSON.
|
|
115
|
+
- Use `confidenceLabel` values `high`, `medium`, or `low`.
|
|
116
|
+
- Keep `sideEffects` as an array, using `["None"]` when there are none.
|
|
117
|
+
- Do not add prose outside the JSON object.
|
package/prompts/hunter.md
CHANGED
|
@@ -2,7 +2,11 @@ You are a code analysis agent. Your task is to thoroughly examine the provided c
|
|
|
2
2
|
|
|
3
3
|
## Output Destination
|
|
4
4
|
|
|
5
|
-
Write your
|
|
5
|
+
Write your canonical findings artifact as JSON to the file path provided in your
|
|
6
|
+
assignment (typically `.bug-hunter/findings.json`). If no path was provided,
|
|
7
|
+
output the JSON to stdout. If the assignment also asks for a Markdown companion,
|
|
8
|
+
write that separately as a derived human-readable summary; the JSON artifact is
|
|
9
|
+
the source of truth the Skeptic and Referee read.
|
|
6
10
|
|
|
7
11
|
## Scope Rules
|
|
8
12
|
|
|
@@ -90,25 +94,40 @@ Quality matters more than quantity. The downstream Skeptic agent will challenge
|
|
|
90
94
|
|
|
91
95
|
## Output format
|
|
92
96
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
97
|
+
Write a JSON array. Each item must match this contract:
|
|
98
|
+
|
|
99
|
+
```json
|
|
100
|
+
[
|
|
101
|
+
{
|
|
102
|
+
"bugId": "BUG-1",
|
|
103
|
+
"severity": "Critical",
|
|
104
|
+
"category": "security",
|
|
105
|
+
"file": "src/api/users.ts",
|
|
106
|
+
"lines": "45-49",
|
|
107
|
+
"claim": "SQL is built from unsanitized user input.",
|
|
108
|
+
"evidence": "src/api/users.ts:45-49 const query = `...${term}...`",
|
|
109
|
+
"runtimeTrigger": "GET /api/users?term=' OR '1'='1",
|
|
110
|
+
"crossReferences": ["src/db/query.ts:10-18"],
|
|
111
|
+
"confidenceScore": 93,
|
|
112
|
+
"confidenceLabel": "high",
|
|
113
|
+
"stride": "Tampering",
|
|
114
|
+
"cwe": "CWE-89"
|
|
115
|
+
}
|
|
116
|
+
]
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Rules:
|
|
120
|
+
- Return a valid empty array `[]` when you found no bugs.
|
|
121
|
+
- `confidenceScore` must be numeric on a `0-100` scale.
|
|
122
|
+
- `confidenceLabel` is optional, but if present it must be `high`, `medium`,
|
|
123
|
+
or `low`.
|
|
124
|
+
- `crossReferences` must always be an array. Use `["Single file"]` when no
|
|
125
|
+
extra file is involved.
|
|
126
|
+
- `category: security` requires specific `stride` and `cwe` values.
|
|
127
|
+
- Non-security findings must use `stride: "N/A"` and `cwe: "N/A"`.
|
|
128
|
+
- Do not append coverage summaries, totals, or prose outside the JSON array.
|
|
129
|
+
- If the assignment also requested a Markdown companion, render it from this
|
|
130
|
+
JSON after writing the canonical artifact.
|
|
112
131
|
|
|
113
132
|
## CWE Quick Reference (security findings only)
|
|
114
133
|
|
package/prompts/referee.md
CHANGED
|
@@ -6,7 +6,10 @@ You will receive both the Hunter findings file and the Skeptic challenges file.
|
|
|
6
6
|
|
|
7
7
|
## Output Destination
|
|
8
8
|
|
|
9
|
-
Write your
|
|
9
|
+
Write your canonical Referee verdict artifact as JSON to the file path provided
|
|
10
|
+
in your assignment (typically `.bug-hunter/referee.json`). If no path was
|
|
11
|
+
provided, output the JSON to stdout. If a Markdown report is requested, render
|
|
12
|
+
it from this JSON artifact after writing the canonical file.
|
|
10
13
|
|
|
11
14
|
## Scope Rules
|
|
12
15
|
|
|
@@ -56,20 +59,38 @@ Before final report: (1) Coverage — did you evaluate every BUG-ID from both re
|
|
|
56
59
|
|
|
57
60
|
## Output format
|
|
58
61
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
62
|
+
Write a JSON array. Each item must match this contract:
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
[
|
|
66
|
+
{
|
|
67
|
+
"bugId": "BUG-1",
|
|
68
|
+
"verdict": "REAL_BUG",
|
|
69
|
+
"trueSeverity": "Critical",
|
|
70
|
+
"confidenceScore": 94,
|
|
71
|
+
"confidenceLabel": "high",
|
|
72
|
+
"verificationMode": "INDEPENDENTLY_VERIFIED",
|
|
73
|
+
"analysisSummary": "Confirmed by tracing user-controlled input into an unsafe sink without validation.",
|
|
74
|
+
"suggestedFix": "Validate the input before building the query and use the parameterized helper."
|
|
75
|
+
}
|
|
76
|
+
]
|
|
68
77
|
```
|
|
69
78
|
|
|
79
|
+
Rules:
|
|
80
|
+
- `verdict` must be one of `REAL_BUG`, `NOT_A_BUG`, or `MANUAL_REVIEW`.
|
|
81
|
+
- `confidenceScore` must be numeric on a `0-100` scale.
|
|
82
|
+
- `confidenceLabel` must be `high`, `medium`, or `low`.
|
|
83
|
+
- `verificationMode` must be `INDEPENDENTLY_VERIFIED` or `EVIDENCE_BASED`.
|
|
84
|
+
- Keep the reasoning in `analysisSummary`; do not emit free-form prose outside
|
|
85
|
+
the JSON array.
|
|
86
|
+
- Return `[]` only when there were no findings to referee.
|
|
87
|
+
|
|
70
88
|
### Security enrichment (confirmed security bugs only)
|
|
71
89
|
|
|
72
|
-
For each finding with `category: security` that you confirm as
|
|
90
|
+
For each finding with `category: security` that you confirm as `REAL_BUG`,
|
|
91
|
+
include the security enrichment details in `analysisSummary` and
|
|
92
|
+
`suggestedFix`. Until the schema grows extra typed security fields, do not emit
|
|
93
|
+
out-of-contract keys.
|
|
73
94
|
|
|
74
95
|
**Reachability** (required for all security findings):
|
|
75
96
|
- `EXTERNAL` — reachable from unauthenticated external input (public API, form, URL)
|
|
@@ -111,12 +132,5 @@ Non-security findings use the standard verdict format above (no enrichment neede
|
|
|
111
132
|
|
|
112
133
|
## Final Report
|
|
113
134
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
Stats: Total reported | Dismissed | Confirmed (Critical/Medium/Low) | Independently verified vs Evidence-based | Per-Hunter accuracy (if parallel) | Skeptic accuracy
|
|
117
|
-
|
|
118
|
-
Confirmed bugs table: # | Severity | STRIDE | CWE | Reachability | File | Lines | Description | Fix | Verification
|
|
119
|
-
|
|
120
|
-
Low-confidence items (flagged for manual review): file + one-line uncertainty reason.
|
|
121
|
-
|
|
122
|
-
<details><summary>Dismissed findings</summary>Table: # | Claim | Skeptic Position | Reason</details>
|
|
135
|
+
If a human-readable report is requested, generate it from the final JSON array.
|
|
136
|
+
The JSON artifact remains canonical.
|
package/prompts/skeptic.md
CHANGED
|
@@ -6,7 +6,10 @@ Read the Hunter findings file completely before starting. Each finding has BUG-I
|
|
|
6
6
|
|
|
7
7
|
## Output Destination
|
|
8
8
|
|
|
9
|
-
Write your Skeptic
|
|
9
|
+
Write your canonical Skeptic artifact as JSON to the file path in your
|
|
10
|
+
assignment (typically `.bug-hunter/skeptic.json`). The Referee reads the JSON
|
|
11
|
+
artifact, not a free-form Markdown note. If the assignment also asks for a
|
|
12
|
+
Markdown companion, that Markdown must be derived from the JSON output.
|
|
10
13
|
|
|
11
14
|
## Scope Rules
|
|
12
15
|
|
|
@@ -91,28 +94,28 @@ Before writing your final summary, verify:
|
|
|
91
94
|
|
|
92
95
|
## Output format
|
|
93
96
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
After all bugs, output:
|
|
108
|
-
|
|
109
|
-
**SUMMARY:**
|
|
110
|
-
- Bugs disproved: [count] (total points claimed: [sum])
|
|
111
|
-
- Bugs accepted as real: [count]
|
|
112
|
-
- Files read during review: [list of files you actually read]
|
|
97
|
+
Write a JSON array. Each item must match this contract:
|
|
98
|
+
|
|
99
|
+
```json
|
|
100
|
+
[
|
|
101
|
+
{
|
|
102
|
+
"bugId": "BUG-1",
|
|
103
|
+
"response": "DISPROVE",
|
|
104
|
+
"analysisSummary": "The route is wrapped by auth middleware before this handler runs, so the claimed bypass is not reachable.",
|
|
105
|
+
"counterEvidence": "src/routes/api.ts:10-21 attaches requireAuth before the handler."
|
|
106
|
+
}
|
|
107
|
+
]
|
|
108
|
+
```
|
|
113
109
|
|
|
114
|
-
|
|
115
|
-
|
|
110
|
+
Rules:
|
|
111
|
+
- Use `response: "ACCEPT"` when the finding stands as a real bug.
|
|
112
|
+
- Use `response: "DISPROVE"` only when your challenge is strong enough to
|
|
113
|
+
survive Referee review.
|
|
114
|
+
- Use `response: "MANUAL_REVIEW"` when you cannot safely disprove or accept the
|
|
115
|
+
finding.
|
|
116
|
+
- Return `[]` when there were no findings to challenge.
|
|
117
|
+
- Keep all reasoning inside `analysisSummary` and optional `counterEvidence`.
|
|
118
|
+
- Do not append summary prose outside the JSON array.
|
|
116
119
|
|
|
117
120
|
## Doc Lookup Tool
|
|
118
121
|
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"schemaVersion": 1,
|
|
4
|
+
"artifact": "coverage",
|
|
5
|
+
"title": "Bug Hunter Coverage Artifact",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["schemaVersion", "iteration", "status", "files", "bugs", "fixes"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"schemaVersion": {
|
|
10
|
+
"type": "integer",
|
|
11
|
+
"minimum": 1
|
|
12
|
+
},
|
|
13
|
+
"iteration": {
|
|
14
|
+
"type": "integer",
|
|
15
|
+
"minimum": 0
|
|
16
|
+
},
|
|
17
|
+
"status": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"enum": ["IN_PROGRESS", "COMPLETE"]
|
|
20
|
+
},
|
|
21
|
+
"files": {
|
|
22
|
+
"type": "array",
|
|
23
|
+
"items": {
|
|
24
|
+
"type": "object",
|
|
25
|
+
"required": ["path", "status"],
|
|
26
|
+
"properties": {
|
|
27
|
+
"path": { "type": "string", "minLength": 1 },
|
|
28
|
+
"status": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"enum": ["pending", "in_progress", "done", "failed"]
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"additionalProperties": false
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
"bugs": {
|
|
37
|
+
"type": "array",
|
|
38
|
+
"items": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"required": ["bugId", "severity", "file", "claim"],
|
|
41
|
+
"properties": {
|
|
42
|
+
"bugId": { "type": "string", "minLength": 1 },
|
|
43
|
+
"severity": {
|
|
44
|
+
"type": "string",
|
|
45
|
+
"enum": ["Critical", "Medium", "Low"]
|
|
46
|
+
},
|
|
47
|
+
"file": { "type": "string", "minLength": 1 },
|
|
48
|
+
"claim": { "type": "string", "minLength": 1 }
|
|
49
|
+
},
|
|
50
|
+
"additionalProperties": false
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
"fixes": {
|
|
54
|
+
"type": "array",
|
|
55
|
+
"items": {
|
|
56
|
+
"type": "object",
|
|
57
|
+
"required": ["bugId", "status"],
|
|
58
|
+
"properties": {
|
|
59
|
+
"bugId": { "type": "string", "minLength": 1 },
|
|
60
|
+
"status": { "type": "string", "minLength": 1 }
|
|
61
|
+
},
|
|
62
|
+
"additionalProperties": false
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"additionalProperties": false
|
|
67
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"bugId": "BUG-1",
|
|
4
|
+
"severity": "Critical",
|
|
5
|
+
"category": "security",
|
|
6
|
+
"file": "src/example.ts",
|
|
7
|
+
"lines": "12-16",
|
|
8
|
+
"evidence": "src/example.ts:12-16 unvalidated body flows into exec().",
|
|
9
|
+
"runtimeTrigger": "POST /api/example with body {\"command\":\"rm -rf /\"}",
|
|
10
|
+
"crossReferences": ["src/router.ts:8-14"],
|
|
11
|
+
"confidenceScore": 92
|
|
12
|
+
}
|
|
13
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"bugId": "BUG-1",
|
|
4
|
+
"severity": "Critical",
|
|
5
|
+
"category": "security",
|
|
6
|
+
"file": "src/example.ts",
|
|
7
|
+
"lines": "12-16",
|
|
8
|
+
"claim": "Request body reaches a dangerous sink without validation.",
|
|
9
|
+
"evidence": "src/example.ts:12-16 unvalidated body flows into exec().",
|
|
10
|
+
"runtimeTrigger": "POST /api/example with body {\"command\":\"rm -rf /\"}",
|
|
11
|
+
"crossReferences": ["src/router.ts:8-14"],
|
|
12
|
+
"confidenceScore": 92,
|
|
13
|
+
"confidenceLabel": "high",
|
|
14
|
+
"stride": "Tampering",
|
|
15
|
+
"cwe": "CWE-78"
|
|
16
|
+
}
|
|
17
|
+
]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"schemaVersion": 1,
|
|
4
|
+
"artifact": "findings",
|
|
5
|
+
"title": "Bug Hunter Findings Artifact",
|
|
6
|
+
"type": "array",
|
|
7
|
+
"items": {
|
|
8
|
+
"type": "object",
|
|
9
|
+
"required": [
|
|
10
|
+
"bugId",
|
|
11
|
+
"severity",
|
|
12
|
+
"category",
|
|
13
|
+
"file",
|
|
14
|
+
"lines",
|
|
15
|
+
"claim",
|
|
16
|
+
"evidence",
|
|
17
|
+
"runtimeTrigger",
|
|
18
|
+
"crossReferences",
|
|
19
|
+
"confidenceScore"
|
|
20
|
+
],
|
|
21
|
+
"properties": {
|
|
22
|
+
"bugId": { "type": "string", "minLength": 1 },
|
|
23
|
+
"severity": {
|
|
24
|
+
"type": "string",
|
|
25
|
+
"enum": ["Critical", "Medium", "Low"]
|
|
26
|
+
},
|
|
27
|
+
"category": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"enum": [
|
|
30
|
+
"logic",
|
|
31
|
+
"security",
|
|
32
|
+
"error-handling",
|
|
33
|
+
"concurrency",
|
|
34
|
+
"edge-case",
|
|
35
|
+
"data-integrity",
|
|
36
|
+
"type-safety",
|
|
37
|
+
"resource-leak",
|
|
38
|
+
"api-contract",
|
|
39
|
+
"cross-file"
|
|
40
|
+
]
|
|
41
|
+
},
|
|
42
|
+
"file": { "type": "string", "minLength": 1 },
|
|
43
|
+
"lines": { "type": "string", "minLength": 1 },
|
|
44
|
+
"claim": { "type": "string", "minLength": 1 },
|
|
45
|
+
"evidence": { "type": "string", "minLength": 1 },
|
|
46
|
+
"runtimeTrigger": { "type": "string", "minLength": 1 },
|
|
47
|
+
"crossReferences": {
|
|
48
|
+
"type": "array",
|
|
49
|
+
"items": { "type": "string", "minLength": 1 }
|
|
50
|
+
},
|
|
51
|
+
"confidenceScore": {
|
|
52
|
+
"type": "number",
|
|
53
|
+
"minimum": 0,
|
|
54
|
+
"maximum": 100
|
|
55
|
+
},
|
|
56
|
+
"confidenceLabel": {
|
|
57
|
+
"type": "string",
|
|
58
|
+
"enum": ["high", "medium", "low"]
|
|
59
|
+
},
|
|
60
|
+
"stride": {
|
|
61
|
+
"type": "string",
|
|
62
|
+
"enum": [
|
|
63
|
+
"Spoofing",
|
|
64
|
+
"Tampering",
|
|
65
|
+
"Repudiation",
|
|
66
|
+
"InfoDisclosure",
|
|
67
|
+
"DoS",
|
|
68
|
+
"ElevationOfPrivilege",
|
|
69
|
+
"N/A"
|
|
70
|
+
]
|
|
71
|
+
},
|
|
72
|
+
"cwe": { "type": "string", "minLength": 1 }
|
|
73
|
+
},
|
|
74
|
+
"additionalProperties": false
|
|
75
|
+
}
|
|
76
|
+
}
|