npm - @codexstar/bug-hunter - Versions diffs - 3.0.0 → 3.0.6 - Mend

@codexstar/bug-hunter 3.0.0 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/CHANGELOG.md +149 -83
package/README.md +150 -15
package/SKILL.md +94 -27
package/agents/openai.yaml +4 -0
package/bin/bug-hunter +9 -3
package/docs/images/2026-03-12-fix-plan-rollout.png +0 -0
package/docs/images/2026-03-12-hero-bug-hunter-overview.png +0 -0
package/docs/images/2026-03-12-machine-readable-artifacts.png +0 -0
package/docs/images/2026-03-12-pr-review-flow.png +0 -0
package/docs/images/2026-03-12-security-pack.png +0 -0
package/docs/images/adversarial-debate.png +0 -0
package/docs/images/doc-verify-fix-plan.png +0 -0
package/docs/images/hero.png +0 -0
package/docs/images/pipeline-overview.png +0 -0
package/docs/images/security-finding-card.png +0 -0
package/docs/plans/2026-03-11-structured-output-migration-plan.md +288 -0
package/docs/plans/2026-03-12-audit-bug-fixes-surgical-plan.md +193 -0
package/docs/plans/2026-03-12-enterprise-security-pack-e2e-plan.md +59 -0
package/docs/plans/2026-03-12-local-security-skills-integration-plan.md +39 -0
package/docs/plans/2026-03-12-pr-review-strategic-fix-flow.md +78 -0
package/evals/evals.json +366 -102
package/modes/extended.md +2 -2
package/modes/fix-loop.md +30 -30
package/modes/fix-pipeline.md +32 -6
package/modes/large-codebase.md +14 -15
package/modes/local-sequential.md +44 -20
package/modes/loop.md +56 -56
package/modes/parallel.md +3 -3
package/modes/scaled.md +2 -2
package/modes/single-file.md +3 -3
package/modes/small.md +11 -11
package/package.json +11 -1
package/prompts/fixer.md +37 -23
package/prompts/hunter.md +39 -20
package/prompts/referee.md +34 -20
package/prompts/skeptic.md +25 -22
package/schemas/coverage.schema.json +67 -0
package/schemas/examples/findings.invalid.json +13 -0
package/schemas/examples/findings.valid.json +17 -0
package/schemas/findings.schema.json +76 -0
package/schemas/fix-plan.schema.json +94 -0
package/schemas/fix-report.schema.json +105 -0
package/schemas/fix-strategy.schema.json +99 -0
package/schemas/recon.schema.json +31 -0
package/schemas/referee.schema.json +46 -0
package/schemas/shared.schema.json +51 -0
package/schemas/skeptic.schema.json +21 -0
package/scripts/bug-hunter-state.cjs +35 -12
package/scripts/code-index.cjs +11 -4
package/scripts/fix-lock.cjs +95 -25
package/scripts/payload-guard.cjs +24 -10
package/scripts/pr-scope.cjs +181 -0
package/scripts/prepublish-guard.cjs +82 -0
package/scripts/render-report.cjs +346 -0
package/scripts/run-bug-hunter.cjs +669 -33
package/scripts/schema-runtime.cjs +273 -0
package/scripts/schema-validate.cjs +40 -0
package/scripts/tests/bug-hunter-state.test.cjs +68 -3
package/scripts/tests/code-index.test.cjs +15 -0
package/scripts/tests/fix-lock.test.cjs +60 -2
package/scripts/tests/fixtures/flaky-worker.cjs +6 -1
package/scripts/tests/fixtures/low-confidence-worker.cjs +8 -2
package/scripts/tests/fixtures/success-worker.cjs +6 -1
package/scripts/tests/payload-guard.test.cjs +154 -2
package/scripts/tests/pr-scope.test.cjs +212 -0
package/scripts/tests/render-report.test.cjs +180 -0
package/scripts/tests/run-bug-hunter.test.cjs +686 -2
package/scripts/tests/security-skills-integration.test.cjs +29 -0
package/scripts/tests/skills-packaging.test.cjs +30 -0
package/scripts/tests/worktree-harvest.test.cjs +67 -1
package/scripts/worktree-harvest.cjs +62 -9
package/skills/README.md +19 -0
package/skills/commit-security-scan/SKILL.md +63 -0
package/skills/security-review/SKILL.md +57 -0
package/skills/threat-model-generation/SKILL.md +47 -0
package/skills/vulnerability-validation/SKILL.md +59 -0
package/templates/subagent-wrapper.md +12 -3
package/modes/_dispatch.md +0 -121

package/evals/evals.json CHANGED Viewed

@@ -3,8 +3,8 @@
   "evals": [
     {
       "id": 1,
-      "prompt": "/bug-hunter test-fixture/",
-      "expected_output": "Full pipeline execution on the included test fixture (Express app with 6 planted bugs). Should run Recon -> Hunter -> Skeptic -> Referee and produce a final report confirming at least 5 of 6 planted bugs with severity ratings, file paths, and suggested fixes.",
+      "prompt": "/bug-hunter --scan-only test-fixture/",
+      "expected_output": "Scan-only self-test on the included Express fixture. Should run Recon -> Hunter -> Skeptic -> Referee, confirm most planted bugs, and write canonical JSON artifacts plus a rendered report.",
       "files": [
         "test-fixture/server.js",
         "test-fixture/auth.js",
@@ -13,347 +13,611 @@
       ],
       "assertions": [
         {
-          "text": "Pipeline runs all phases: Recon, Hunter, Skeptic, Referee",
+          "text": "Pipeline runs Recon, Hunter, Skeptic, and Referee",
           "type": "content_check"
         },
         {
-          "text": "At least 5 of 6 planted bugs are confirmed in the final report",
+          "text": "Writes .bug-hunter/findings.json, .bug-hunter/referee.json, and .bug-hunter/report.md",
           "type": "content_check"
         },
         {
-          "text": "Each confirmed bug includes file path, line numbers, severity, and suggested fix",
+          "text": "Confirms at least 5 of the 6 planted bugs in the fixture",
           "type": "content_check"
         },
         {
-          "text": "False positives are challenged and filtered by the Skeptic/Referee pipeline",
+          "text": "Rendered report includes mode, files scanned, and coverage metadata",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 2,
+      "prompt": "/bug-hunter src/api/auth.ts",
+      "expected_output": "Single-file scan should skip Recon, run Hunter -> Skeptic -> Referee, and keep the output scoped to the target file while still writing canonical JSON artifacts.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Selects single-file mode when one source file is targeted",
           "type": "content_check"
         },
         {
-          "text": "Final report includes scan metadata (mode, files scanned, coverage)",
+          "text": "Skips Recon for single-file mode",
           "type": "content_check"
         },
         {
-          "text": "Fix pipeline is triggered by default when confirmed bugs exist; only --scan-only disables fixes",
+          "text": "Writes .bug-hunter/findings.json and .bug-hunter/referee.json for the single-file run",
+          "type": "content_check"
+        },
+        {
+          "text": "Referee returns REAL_BUG, NOT_A_BUG, or MANUAL_REVIEW verdicts for the findings",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 2,
-      "prompt": "/bug-hunter src/api/auth.ts",
-      "expected_output": "Single-file mode scan of an auth file. Should skip Recon (not needed for single file), run one Hunter, one Skeptic, and one Referee. Output should focus on security and logic bugs in the auth file specifically.",
+      "id": 3,
+      "prompt": "/bug-hunter -b feature-auth --base develop",
+      "expected_output": "Branch diff mode should diff the branches, filter non-source files, report the resulting scan set, and choose the execution mode from the surviving source files.",
       "files": [],
       "assertions": [
         {
-          "text": "Selects single-file mode (1 file detected)",
+          "text": "Runs git diff --name-only develop...feature-auth to resolve changed files",
+          "type": "content_check"
+        },
+        {
+          "text": "Filters docs, configs, assets, lockfiles, and other non-source files before scanning",
           "type": "content_check"
         },
         {
-          "text": "Skips Recon agent (not needed for single-file mode)",
+          "text": "Reports the number of scannable source files after filtering",
           "type": "content_check"
         },
         {
-          "text": "Hunter scans the target file and reports findings with BUG-ID format",
+          "text": "Chooses small, parallel, extended, scaled, or large-codebase mode from the filtered file count",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 4,
+      "prompt": "/bug-hunter --staged",
+      "expected_output": "Staged mode should scan full contents of staged source files after resolving them through git diff --cached --name-only and filtering non-source files.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Runs git diff --cached --name-only to collect staged files",
           "type": "content_check"
         },
         {
-          "text": "Skeptic challenges the findings with code-based counter-arguments",
+          "text": "Filters non-source files from the staged list before scanning",
           "type": "content_check"
         },
         {
-          "text": "Referee produces a final verdict (REAL BUG or NOT A BUG) for each finding",
+          "text": "Scans full file contents of staged source files rather than scanning only the patch",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 3,
-      "prompt": "/bug-hunter -b feature-auth --base develop",
-      "expected_output": "Branch diff mode. Should run git diff to find changed files between feature-auth and develop branches, filter out non-source files, then run the full pipeline on the changed source files.",
+      "id": 5,
+      "prompt": "/bug-hunter --fix src/",
+      "expected_output": "Default fix mode should run Phase 1, then acquire the fix lock, capture verification baselines, apply eligible fixes, write a machine-readable fix report, and release the lock.",
       "files": [],
       "assertions": [
         {
-          "text": "Runs git diff --name-only to extract changed files between branches",
+          "text": "Creates a git safety branch before applying fixes when git safety is available",
           "type": "content_check"
         },
         {
-          "text": "Filters out non-source files (configs, docs, assets, lockfiles)",
+          "text": "Acquires and releases .bug-hunter/fix.lock around the fix phase",
           "type": "content_check"
         },
         {
-          "text": "Reports the number of source files to scan after filtering",
+          "text": "Captures verification baseline before applying fixes",
           "type": "content_check"
         },
         {
-          "text": "Selects appropriate mode based on file count (small, parallel, extended, etc.)",
+          "text": "Writes .bug-hunter/fix-report.json as the canonical fix artifact",
+          "type": "content_check"
+        },
+        {
+          "text": "Auto-fixes only bugs that pass the confidence eligibility threshold",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 4,
-      "prompt": "/bug-hunter --staged",
-      "expected_output": "Staged file mode for pre-commit checking. Should run git diff --cached --name-only to get staged files, filter non-source files, then scan the staged source files.",
+      "id": 6,
+      "prompt": "/bug-hunter src/",
+      "expected_output": "Loop mode is the default. A normal directory scan should create loop state, iterate until queued files are covered, and track canonical coverage in JSON with a rendered Markdown companion.",
       "files": [],
       "assertions": [
         {
-          "text": "Runs git diff --cached --name-only to get staged files",
+          "text": "Treats loop mode as the default without requiring an explicit --loop flag",
           "type": "content_check"
         },
         {
-          "text": "Filters out non-source files from the staged list",
+          "text": "Creates or updates .bug-hunter/coverage.json as canonical loop state and renders .bug-hunter/coverage.md from it",
           "type": "content_check"
         },
         {
-          "text": "Scans full file contents of staged files (not just diffs)",
+          "text": "Tracks per-file coverage state in coverage.json across iterations",
+          "type": "content_check"
+        },
+        {
+          "text": "Marks completion only when all queued scannable files are done",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 5,
-      "prompt": "/bug-hunter --fix src/",
-      "expected_output": "Full pipeline with auto-fix. After Phase 1 (find & verify), should proceed to Phase 2: create a git branch, acquire single-writer lock, detect test infrastructure, capture test baseline, run Fixer clusters sequentially with checkpoint commits, run post-fix tests, auto-revert regressions, and release lock.",
+      "id": 7,
+      "prompt": "Can you check my Express API for security vulnerabilities? The code is in src/",
+      "expected_output": "Natural-language trigger should invoke the bug-hunter skill and run a security-focused audit with trust-boundary mapping and security-oriented Hunter analysis.",
       "files": [],
       "assertions": [
         {
-          "text": "Creates a git safety branch (bug-hunter-fix-*) before applying fixes",
+          "text": "Triggers bug-hunter from natural language security-audit intent without requiring /bug-hunter",
+          "type": "content_check"
+        },
+        {
+          "text": "Runs Recon to identify architecture, trust boundaries, and high-risk areas",
           "type": "content_check"
         },
         {
-          "text": "Detects test command from package.json or project config",
+          "text": "Hunter prioritizes injection, auth bypass, input validation, and secrets exposure checks",
           "type": "content_check"
         },
         {
-          "text": "Captures test baseline before applying fixes",
+          "text": "Findings use severity labels and canonical JSON fields rather than free-form Markdown only",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 8,
+      "prompt": "/bug-hunter --fix --approve src/auth/",
+      "expected_output": "Approval mode should still run the fix pipeline, but Fixer agents should operate in reviewed mode and report that approval is required for edits.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Sets APPROVE_MODE=true from the --approve flag",
           "type": "content_check"
         },
         {
-          "text": "Fixer agents implement minimal, surgical code changes",
+          "text": "Runs Fixers in reviewed/default mode instead of unattended auto-edit mode",
           "type": "content_check"
         },
         {
-          "text": "Each fix is a separate checkpoint commit with descriptive message",
+          "text": "Tells the user it is running in approval mode",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 9,
+      "prompt": "/bug-hunter huge-repo/",
+      "expected_output": "Large-repo mode should initialize persistent chunk state, process chunks sequentially, and resume from .bug-hunter/state.json when interrupted.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Initializes .bug-hunter/state.json with chunk metadata",
           "type": "content_check"
         },
         {
-          "text": "Post-fix test run compares against baseline (new failures vs pre-existing)",
+          "text": "Processes large scans in sequential chunks and records chunk status",
           "type": "content_check"
         },
         {
-          "text": "Fixes that cause new test failures are auto-reverted",
+          "text": "Resumes from existing .bug-hunter/state.json without rescanning completed chunks",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 10,
+      "prompt": "/bug-hunter src/ (second run with unchanged files)",
+      "expected_output": "A repeat run should apply the hash cache through bug-hunter-state and skip unchanged files before deep scan work starts.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Runs hash-filter against .bug-hunter/state.json before deep scan work",
           "type": "content_check"
         },
         {
-          "text": "Acquires and releases .claude/bug-hunter-fix.lock around fix phase",
+          "text": "Reports skipped unchanged files from the hash cache",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 11,
+      "prompt": "/bug-hunter src/ with malformed subagent payload",
+      "expected_output": "Payload validation should fail before any subagent launch when the generated payload does not match the required contract.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Validates subagent payloads with payload-guard.cjs before launch",
           "type": "content_check"
         },
         {
-          "text": "Auto-fixes only bugs that pass confidence eligibility threshold",
+          "text": "Does not launch a subagent when payload validation fails",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 6,
-      "prompt": "/bug-hunter --loop src/",
-      "expected_output": "Loop mode for thorough coverage. Should create ralph-loop state files, iterate the pipeline until all CRITICAL and HIGH files are scanned, track coverage in .claude/bug-hunter-coverage.md, and mark ALL_TASKS_COMPLETE when done.",
+      "id": 12,
+      "prompt": "/bug-hunter --fix src/ while another fix run is active",
+      "expected_output": "The fix phase should stop cleanly when the single-writer lock cannot be acquired.",
       "files": [],
       "assertions": [
         {
-          "text": "Creates .claude/ralph-loop.local.md state file for loop mode",
+          "text": "Attempts to acquire .bug-hunter/fix.lock before any edits",
           "type": "content_check"
         },
         {
-          "text": "Creates or updates .claude/bug-hunter-coverage.md with machine-parseable format",
+          "text": "Stops Phase 2 with a clear lock-held message when the fix lock is already held",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 13,
+      "prompt": "/bug-hunter --fix src/ with mixed-confidence bugs",
+      "expected_output": "Auto-fix should edit only eligible high-confidence bugs and leave the rest in manual review.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Applies the >=75 confidence threshold for auto-fix eligibility",
           "type": "content_check"
         },
         {
-          "text": "Tracks file coverage status (DONE, PARTIAL, SKIPPED) per iteration",
+          "text": "Keeps low-confidence bugs in manual review instead of auto-editing them",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 14,
+      "prompt": "/bug-hunter src/ on a CLI without spawn_agent",
+      "expected_output": "The skill should select the best available orchestration backend at runtime and fall back to local-sequential execution when delegation backends are unavailable.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Chooses AGENT_BACKEND during preflight based on available runtime tools",
           "type": "content_check"
         },
         {
-          "text": "Subsequent iterations only scan uncovered files (no re-scanning DONE files)",
+          "text": "Falls back to the next backend when a preferred launch path fails",
           "type": "content_check"
         },
         {
-          "text": "Marks ALL_TASKS_COMPLETE when all CRITICAL and HIGH files show DONE",
+          "text": "Completes the run with local-sequential fallback when no delegation backend is available",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 7,
-      "prompt": "Can you check my Express API for security vulnerabilities? The code is in src/",
-      "expected_output": "Should trigger the bug-hunter skill (even though the user didn't say /bug-hunter) and run a security-focused scan on the src/ directory. The deep Hunter should prioritize security findings, with optional triage hints when enabled.",
+      "id": 15,
+      "prompt": "/bug-hunter huge-repo/ with flaky chunk worker",
+      "expected_output": "The chunk orchestrator should enforce retries with backoff and write attempt details to the canonical run journal.",
       "files": [],
       "assertions": [
         {
-          "text": "Triggers bug-hunter skill from natural language (security audit request)",
+          "text": "Uses run-bug-hunter.cjs for autonomous chunk orchestration",
           "type": "content_check"
         },
         {
-          "text": "Runs Recon to map architecture and identify trust boundaries",
+          "text": "Retries timed out or failed chunks according to max-retries and backoff policy",
           "type": "content_check"
         },
         {
-          "text": "Deep Hunter focuses on injection, auth bypass, input validation, and secrets exposure in security audit requests",
+          "text": "Writes attempt events to .bug-hunter/run.log",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 16,
+      "prompt": "/bug-hunter --deps src/",
+      "expected_output": "Dependency scan mode should run the dependency audit helper, write dep-findings output, and feed reachable dependency issues into Hunter context.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Runs scripts/dep-scan.cjs when --deps is supplied",
           "type": "content_check"
         },
         {
-          "text": "Output includes severity ratings (Critical, Medium, Low) for each finding",
+          "text": "Writes .bug-hunter/dep-findings.json for dependency scan output",
           "type": "content_check"
         },
         {
-          "text": "Framework-specific protections are checked (Express middleware, helmet, etc.)",
+          "text": "Includes reachable dependency findings in Hunter analysis context",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 8,
-      "prompt": "/bug-hunter --fix --approve src/auth/",
-      "expected_output": "Fix mode with approval. Should find bugs in auth directory, then fix them but prompt the user before each edit (approval mode). Fixer agents run in default mode rather than auto mode.",
+      "id": 17,
+      "prompt": "/bug-hunter --threat-model src/",
+      "expected_output": "Threat-model mode should load or generate a STRIDE threat model and feed it into Recon and Hunter.",
       "files": [],
       "assertions": [
         {
-          "text": "APPROVE_MODE is set to true from --approve flag",
+          "text": "Loads an existing .bug-hunter/threat-model.md or generates one when missing",
           "type": "content_check"
         },
         {
-          "text": "Fixer agents run in mode: default (user reviews each edit)",
+          "text": "Marks THREAT_MODEL_AVAILABLE and uses the threat model in Recon and Hunter context",
+          "type": "content_check"
+        },
+        {
+          "text": "Keeps threat-model generation non-blocking relative to the rest of the bug-hunt flow",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 18,
+      "prompt": "/bug-hunter --fix --dry-run src/",
+      "expected_output": "Dry-run fix mode should build the fix plan and produce machine-readable fix output without editing files, committing, or taking the lock.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Sets DRY_RUN_MODE=true and forces FIX_MODE=true when --dry-run is provided",
           "type": "content_check"
         },
         {
-          "text": "Reports 'Running in approval mode' to the user",
+          "text": "Produces .bug-hunter/fix-report.json with dry_run set to true",
           "type": "content_check"
         },
         {
-          "text": "Fixes are still committed as individual checkpoint commits",
+          "text": "Skips file edits, git commits, and fix lock acquisition in dry-run mode",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 9,
-      "prompt": "/bug-hunter huge-repo/",
-      "expected_output": "Large-repo run should initialize .claude/bug-hunter-state.json, split files into sequential chunks, and resume from state if interrupted.",
+      "id": 19,
+      "prompt": "/bug-hunter --autonomous src/",
+      "expected_output": "Autonomous mode should force fix mode and run canary-first, confidence-gated fixes without waiting for per-edit approval.",
       "files": [],
       "assertions": [
         {
-          "text": "Initializes bug-hunter-state.json with chunk metadata",
+          "text": "Sets AUTONOMOUS_MODE=true and forces FIX_MODE=true when --autonomous is supplied",
           "type": "content_check"
         },
         {
-          "text": "Processes chunks sequentially and marks each chunk state",
+          "text": "Runs canary-first, confidence-gated fix rollout in autonomous mode",
           "type": "content_check"
         },
         {
-          "text": "Can resume from existing state file without rescanning completed chunks",
+          "text": "Does not require approval-mode prompts for unattended autonomous fixes",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 10,
-      "prompt": "/bug-hunter src/ (second run with unchanged files)",
-      "expected_output": "Hash cache should skip unchanged files and focus scan effort on changed files only.",
+      "id": 20,
+      "prompt": "/bug-hunter --pr current",
+      "expected_output": "PR review mode should resolve the current PR scope, save PR metadata, and scan the resolved changed files rather than the whole repository.",
       "files": [],
       "assertions": [
         {
-          "text": "Runs hash-filter against bug-hunter-state.json before deep scan",
+          "text": "Uses scripts/pr-scope.cjs to resolve current PR metadata and changed files",
           "type": "content_check"
         },
         {
-          "text": "Reports skipped unchanged files from cache",
+          "text": "Writes .bug-hunter/pr-scope.json for later reporting",
+          "type": "content_check"
+        },
+        {
+          "text": "Scans the resolved changed files as the PR review scope",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 11,
-      "prompt": "/bug-hunter src/ with malformed subagent payload",
-      "expected_output": "Pipeline should fail fast before spawning subagents when payload validation fails.",
+      "id": 21,
+      "prompt": "/bug-hunter --pr recent --scan-only",
+      "expected_output": "Recent-PR review mode should resolve the most recent PR through GitHub metadata, limit analysis to its changed files, and stop after reporting.",
       "files": [],
       "assertions": [
         {
-          "text": "Validates payload via payload-guard.cjs before each subagent launch",
+          "text": "Resolves the most recent PR through pr-scope using GitHub metadata",
+          "type": "content_check"
+        },
+        {
+          "text": "Keeps FIX_MODE disabled because scan-only was requested",
           "type": "content_check"
         },
         {
-          "text": "Does not launch subagent when payload validation fails",
+          "text": "Produces the normal findings/referee/report artifacts for the PR-scoped review",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 12,
-      "prompt": "/bug-hunter --fix src/ while another fix run is active",
-      "expected_output": "Fix phase should stop when single-writer lock cannot be acquired.",
+      "id": 22,
+      "prompt": "/bug-hunter --plan-only src/",
+      "expected_output": "Plan-only mode should build a remediation strategy and fix plan but stop before the Fixer edits code.",
       "files": [],
       "assertions": [
         {
-          "text": "Attempts to acquire .claude/bug-hunter-fix.lock before any edits",
+          "text": "Builds .bug-hunter/fix-strategy.json and .bug-hunter/fix-strategy.md before fix execution",
+          "type": "content_check"
+        },
+        {
+          "text": "Builds .bug-hunter/fix-plan.json while PLAN_ONLY_MODE is active",
           "type": "content_check"
         },
         {
-          "text": "Stops Phase 2 with clear lock-held message when lock is already held",
+          "text": "Stops before the Fixer edits files when --plan-only is supplied",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 13,
-      "prompt": "/bug-hunter --fix src/ with mixed-confidence bugs",
-      "expected_output": "Auto-fix should run only on high-confidence bugs and leave low-confidence bugs as manual review.",
+      "id": 23,
+      "prompt": "/bug-hunter --plan src/ then /bug-hunter --preview src/ then /bug-hunter --safe src/ then /bug-hunter --last-pr --review",
+      "expected_output": "Shortcut aliases should map cleanly onto their canonical behaviors without changing the underlying execution semantics.",
       "files": [],
       "assertions": [
         {
-          "text": "Applies confidence threshold gating (>=75%) for auto-fix eligibility",
+          "text": "Treats --plan as an alias for --plan-only",
           "type": "content_check"
         },
         {
-          "text": "Reports low-confidence bugs as manual-review and does not auto-edit them",
+          "text": "Treats --preview as an alias for --fix --dry-run",
+          "type": "content_check"
+        },
+        {
+          "text": "Treats --safe as an alias for --fix --approve",
+          "type": "content_check"
+        },
+        {
+          "text": "Treats --last-pr and --review as aliases for --pr recent and --scan-only",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 14,
-      "prompt": "/bug-hunter src/ on a CLI without spawn_agent",
-      "expected_output": "Pipeline should auto-select the available orchestration backend and continue. If remote orchestration is unavailable, it should fall back to local sequential execution.",
+      "id": 24,
+      "prompt": "/bug-hunter --fix src/ with a high-confidence architectural-remediation finding",
+      "expected_output": "Execution gating should honor fix-strategy classifications so non-autofix findings never enter the executable canary or rollout queue.",
       "files": [],
       "assertions": [
         {
-          "text": "Selects AGENT_BACKEND in preflight based on available runtime tools",
+          "text": "Builds fix-strategy classifications before building the executable fix plan",
           "type": "content_check"
         },
         {
-          "text": "Falls back to next backend when launch fails",
+          "text": "Excludes manual-review, larger-refactor, and architectural-remediation findings from fixPlan canary/rollout",
           "type": "content_check"
         },
         {
-          "text": "Completes run with local-sequential fallback when no delegation backend is available",
+          "text": "Allows only autofixEligible safe-autofix findings into the executable fix queue",
           "type": "content_check"
         }
       ]
     },
     {
-      "id": 15,
-      "prompt": "/bug-hunter huge-repo/ with flaky chunk worker",
-      "expected_output": "Orchestrator should enforce per-chunk timeout, retry failed chunk once with backoff, and persist attempt details in run journal.",
+      "id": 25,
+      "prompt": "/bug-hunter --pr current with gh unavailable and no trustworthy default base branch",
+      "expected_output": "Current-PR fallback should fail explicitly when it cannot determine a trustworthy base branch instead of silently assuming main.",
       "files": [],
       "assertions": [
         {
-          "text": "Uses run-bug-hunter.cjs for autonomous chunk orchestration",
+          "text": "Uses the discovered default branch or explicit --base for current-branch git fallback",
+          "type": "content_check"
+        },
+        {
+          "text": "Fails explicitly when no trustworthy base branch can be determined for current PR fallback",
+          "type": "content_check"
+        },
+        {
+          "text": "Does not silently assume main for current-PR fallback scope resolution",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 26,
+      "prompt": "/bug-hunter concurrent query-bugs and expired live fix-lock scenarios",
+      "expected_output": "Utility helpers should preserve correctness under failure and concurrency pressure.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "query-bugs uses invocation-scoped temp seed files and cleans them up even on failure",
+          "type": "content_check"
+        },
+        {
+          "text": "fix-lock does not recover an expired lock when the recorded owner PID is still alive",
+          "type": "content_check"
+        },
+        {
+          "text": "Reports a live-owner lock conflict instead of allowing overlapping fixers",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 27,
+      "prompt": "/bug-hunter --pr-security",
+      "expected_output": "Enterprise PR security review should route through the bundled local commit-security-scan workflow, using PR scope, threat-model context, and dependency-awareness without editing code.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Treats --pr-security as PR-scoped security review with FIX_MODE disabled",
+          "type": "content_check"
+        },
+        {
+          "text": "Loads the bundled local skills/commit-security-scan/SKILL.md guidance for PR-focused security review",
+          "type": "content_check"
+        },
+        {
+          "text": "Combines PR scope resolution with threat-model and dependency-scan context",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 28,
+      "prompt": "/bug-hunter --security-review src/",
+      "expected_output": "Enterprise security-review mode should route through the bundled local security-review workflow and combine threat model, code review, dependency findings, and security validation semantics.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Treats --security-review as a bundled enterprise security workflow with FIX_MODE disabled",
+          "type": "content_check"
+        },
+        {
+          "text": "Loads the bundled local skills/security-review/SKILL.md guidance during execution",
+          "type": "content_check"
+        },
+        {
+          "text": "Runs with threat-model and dependency-scan context enabled",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 29,
+      "prompt": "/bug-hunter --threat-model src/ when no threat model exists yet",
+      "expected_output": "Threat-model mode should route through the bundled local threat-model-generation skill and produce Bug Hunter-native threat-model artifacts.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Loads the bundled local skills/threat-model-generation/SKILL.md before generating the threat model",
+          "type": "content_check"
+        },
+        {
+          "text": "Writes .bug-hunter/threat-model.md and .bug-hunter/security-config.json",
+          "type": "content_check"
+        },
+        {
+          "text": "Keeps all threat-model artifacts under .bug-hunter instead of external .factory paths",
+          "type": "content_check"
+        }
+      ]
+    },
+    {
+      "id": 30,
+      "prompt": "/bug-hunter --validate-security src/ with confirmed security findings",
+      "expected_output": "Security-validation mode should route through the bundled local vulnerability-validation skill and enrich confirmed security findings with exploitability-oriented reasoning.",
+      "files": [],
+      "assertions": [
+        {
+          "text": "Loads the bundled local skills/vulnerability-validation/SKILL.md when security validation is requested",
           "type": "content_check"
         },
         {
-          "text": "Retries timed out/failed chunk according to max-retries and backoff policy",
+          "text": "Re-checks reachability, exploitability, PoC quality, and CVSS details for confirmed security findings",
           "type": "content_check"
         },
         {
-          "text": "Writes attempt events to .claude/bug-hunter-run.log",
+          "text": "Uses Bug Hunter-native artifacts rather than a separate external validation pipeline",
           "type": "content_check"
         }
       ]