@vigolium/piolium 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +117 -0
  3. package/agents/access-auditor.md +300 -0
  4. package/agents/assumption-breaker.md +154 -0
  5. package/agents/attack-designer.md +116 -0
  6. package/agents/code-scanner.md +139 -0
  7. package/agents/concurrency-auditor.md +238 -0
  8. package/agents/confirm-writer.md +257 -0
  9. package/agents/context-reviewer.md +274 -0
  10. package/agents/cross-verifier.md +165 -0
  11. package/agents/cve-scout.md +381 -0
  12. package/agents/env-builder.md +282 -0
  13. package/agents/env-profiler.md +205 -0
  14. package/agents/evidence-collector.md +140 -0
  15. package/agents/finding-grader.md +142 -0
  16. package/agents/finding-writer.md +148 -0
  17. package/agents/flow-tracer.md +106 -0
  18. package/agents/goal-backtracer.md +146 -0
  19. package/agents/history-miner.md +467 -0
  20. package/agents/independent-verifier.md +118 -0
  21. package/agents/intent-mapper.md +183 -0
  22. package/agents/longshot-collector.md +128 -0
  23. package/agents/longshot-prober.md +126 -0
  24. package/agents/patch-auditor.md +73 -0
  25. package/agents/poc-author.md +124 -0
  26. package/agents/poc-runner.md +194 -0
  27. package/agents/probe-lead.md +269 -0
  28. package/agents/red-challenger.md +101 -0
  29. package/agents/report-composer.md +208 -0
  30. package/agents/review-adjudicator.md +216 -0
  31. package/agents/spec-auditor.md +155 -0
  32. package/agents/taint-tracer.md +265 -0
  33. package/agents/test-locator.md +209 -0
  34. package/agents/threat-modeler.md +132 -0
  35. package/agents/variant-scanner.md +108 -0
  36. package/agents/variant-spotter.md +110 -0
  37. package/bin/piolium.mjs +376 -0
  38. package/extensions/piolium/_vendor/yaml.bundle.d.mts +6 -0
  39. package/extensions/piolium/_vendor/yaml.bundle.mjs +139 -0
  40. package/extensions/piolium/agent-runner.ts +322 -0
  41. package/extensions/piolium/agents.ts +266 -0
  42. package/extensions/piolium/audit-state.ts +522 -0
  43. package/extensions/piolium/bundled-resources.ts +97 -0
  44. package/extensions/piolium/candidate-scan.ts +966 -0
  45. package/extensions/piolium/command-target.ts +177 -0
  46. package/extensions/piolium/console-stream.ts +57 -0
  47. package/extensions/piolium/export-results.ts +380 -0
  48. package/extensions/piolium/findings.ts +448 -0
  49. package/extensions/piolium/heartbeat.ts +182 -0
  50. package/extensions/piolium/help.ts +234 -0
  51. package/extensions/piolium/index.ts +1865 -0
  52. package/extensions/piolium/longshot.ts +530 -0
  53. package/extensions/piolium/matcher-suggestions.ts +196 -0
  54. package/extensions/piolium/matcher-utils.ts +83 -0
  55. package/extensions/piolium/modes/balanced.ts +750 -0
  56. package/extensions/piolium/modes/confirm-bootstrap.ts +186 -0
  57. package/extensions/piolium/modes/confirm.ts +697 -0
  58. package/extensions/piolium/modes/deep.ts +917 -0
  59. package/extensions/piolium/modes/diff.ts +177 -0
  60. package/extensions/piolium/modes/lite.ts +540 -0
  61. package/extensions/piolium/modes/longshot.ts +595 -0
  62. package/extensions/piolium/modes/merge.ts +204 -0
  63. package/extensions/piolium/modes/phase-runner.ts +267 -0
  64. package/extensions/piolium/modes/reinvest.ts +546 -0
  65. package/extensions/piolium/modes/revisit.ts +279 -0
  66. package/extensions/piolium/modes.ts +48 -0
  67. package/extensions/piolium/phase-labels.ts +123 -0
  68. package/extensions/piolium/phase-status-strip.ts +92 -0
  69. package/extensions/piolium/prompt-prefix-editor.ts +39 -0
  70. package/extensions/piolium/providers/anthropic-vertex.ts +836 -0
  71. package/extensions/piolium/recon.ts +409 -0
  72. package/extensions/piolium/result-stats.ts +105 -0
  73. package/extensions/piolium/retry.ts +120 -0
  74. package/extensions/piolium/scheduler.ts +212 -0
  75. package/extensions/piolium/secrets.ts +368 -0
  76. package/extensions/piolium/tools/web-tools.ts +148 -0
  77. package/package.json +77 -0
  78. package/skills/agentic-actions-auditor/SKILL.md +327 -0
  79. package/skills/agentic-actions-auditor/references/action-profiles.md +186 -0
  80. package/skills/agentic-actions-auditor/references/cross-file-resolution.md +209 -0
  81. package/skills/agentic-actions-auditor/references/foundations.md +94 -0
  82. package/skills/agentic-actions-auditor/references/vector-a-env-var-intermediary.md +77 -0
  83. package/skills/agentic-actions-auditor/references/vector-b-direct-expression-injection.md +83 -0
  84. package/skills/agentic-actions-auditor/references/vector-c-cli-data-fetch.md +83 -0
  85. package/skills/agentic-actions-auditor/references/vector-d-pr-target-checkout.md +88 -0
  86. package/skills/agentic-actions-auditor/references/vector-e-error-log-injection.md +88 -0
  87. package/skills/agentic-actions-auditor/references/vector-f-subshell-expansion.md +82 -0
  88. package/skills/agentic-actions-auditor/references/vector-g-eval-of-ai-output.md +91 -0
  89. package/skills/agentic-actions-auditor/references/vector-h-dangerous-sandbox-configs.md +102 -0
  90. package/skills/agentic-actions-auditor/references/vector-i-wildcard-allowlists.md +88 -0
  91. package/skills/audit/SKILL.md +562 -0
  92. package/skills/audit/assets/icon.svg +7 -0
  93. package/skills/audit/hooks/scripts/validate_phase_output.py +550 -0
  94. package/skills/audit/references/adversarial-review.md +148 -0
  95. package/skills/audit/references/architecture-aware-sast.md +306 -0
  96. package/skills/audit/references/audit-workflow.md +737 -0
  97. package/skills/audit/references/chamber-protocol.md +384 -0
  98. package/skills/audit/references/creative-attack-modes.md +221 -0
  99. package/skills/audit/references/deep-analysis.md +273 -0
  100. package/skills/audit/references/domain-attack-playbooks.md +1129 -0
  101. package/skills/audit/references/knowledge-base-template.md +513 -0
  102. package/skills/audit/references/real-env-validation.md +191 -0
  103. package/skills/audit/references/report-templates.md +417 -0
  104. package/skills/audit/references/triage-and-prereqs.md +134 -0
  105. package/skills/audit/scripts/consolidate_drafts.py +554 -0
  106. package/skills/audit/scripts/partition_findings.py +152 -0
  107. package/skills/audit/scripts/rg-hotspots.sh +121 -0
  108. package/skills/audit/scripts/stamp_file_state.py +349 -0
  109. package/skills/code-reviewer/SKILL.md +65 -0
  110. package/skills/codeql/SKILL.md +281 -0
  111. package/skills/codeql/references/build-fixes.md +90 -0
  112. package/skills/codeql/references/diagnostic-query-templates.md +339 -0
  113. package/skills/codeql/references/extension-yaml-format.md +209 -0
  114. package/skills/codeql/references/important-only-suite.md +153 -0
  115. package/skills/codeql/references/language-details.md +207 -0
  116. package/skills/codeql/references/macos-arm64e-workaround.md +179 -0
  117. package/skills/codeql/references/performance-tuning.md +111 -0
  118. package/skills/codeql/references/quality-assessment.md +172 -0
  119. package/skills/codeql/references/ruleset-catalog.md +63 -0
  120. package/skills/codeql/references/run-all-suite.md +92 -0
  121. package/skills/codeql/references/sarif-processing.md +79 -0
  122. package/skills/codeql/references/threat-models.md +51 -0
  123. package/skills/codeql/workflows/build-database.md +280 -0
  124. package/skills/codeql/workflows/create-data-extensions.md +261 -0
  125. package/skills/codeql/workflows/run-analysis.md +301 -0
  126. package/skills/differential-review/SKILL.md +220 -0
  127. package/skills/differential-review/adversarial.md +203 -0
  128. package/skills/differential-review/methodology.md +234 -0
  129. package/skills/differential-review/patterns.md +300 -0
  130. package/skills/differential-review/reporting.md +369 -0
  131. package/skills/fp-check/SKILL.md +125 -0
  132. package/skills/fp-check/references/bug-class-verification.md +114 -0
  133. package/skills/fp-check/references/deep-verification.md +143 -0
  134. package/skills/fp-check/references/evidence-templates.md +91 -0
  135. package/skills/fp-check/references/false-positive-patterns.md +115 -0
  136. package/skills/fp-check/references/gate-reviews.md +27 -0
  137. package/skills/fp-check/references/standard-verification.md +78 -0
  138. package/skills/insecure-defaults/SKILL.md +117 -0
  139. package/skills/insecure-defaults/references/examples.md +409 -0
  140. package/skills/last30days/SKILL.md +444 -0
  141. package/skills/sarif-parsing/SKILL.md +483 -0
  142. package/skills/sarif-parsing/resources/jq-queries.md +162 -0
  143. package/skills/sarif-parsing/resources/sarif_helpers.py +331 -0
  144. package/skills/security-threat-model/LICENSE.txt +201 -0
  145. package/skills/security-threat-model/SKILL.md +81 -0
  146. package/skills/security-threat-model/agents/openai.yaml +4 -0
  147. package/skills/security-threat-model/references/prompt-template.md +255 -0
  148. package/skills/security-threat-model/references/security-controls-and-assets.md +32 -0
  149. package/skills/semgrep/SKILL.md +212 -0
  150. package/skills/semgrep/references/rulesets.md +162 -0
  151. package/skills/semgrep/references/scan-modes.md +110 -0
  152. package/skills/semgrep/references/scanner-task-prompt.md +140 -0
  153. package/skills/semgrep/scripts/merge_sarif.py +203 -0
  154. package/skills/semgrep/workflows/scan-workflow.md +311 -0
  155. package/skills/semgrep-rule-creator/SKILL.md +168 -0
  156. package/skills/semgrep-rule-creator/references/quick-reference.md +202 -0
  157. package/skills/semgrep-rule-creator/references/workflow.md +240 -0
  158. package/skills/semgrep-rule-variant-creator/SKILL.md +205 -0
  159. package/skills/semgrep-rule-variant-creator/references/applicability-analysis.md +250 -0
  160. package/skills/semgrep-rule-variant-creator/references/language-syntax-guide.md +324 -0
  161. package/skills/semgrep-rule-variant-creator/references/workflow.md +518 -0
  162. package/skills/sharp-edges/SKILL.md +292 -0
  163. package/skills/sharp-edges/references/auth-patterns.md +252 -0
  164. package/skills/sharp-edges/references/case-studies.md +274 -0
  165. package/skills/sharp-edges/references/config-patterns.md +333 -0
  166. package/skills/sharp-edges/references/crypto-apis.md +190 -0
  167. package/skills/sharp-edges/references/lang-c.md +205 -0
  168. package/skills/sharp-edges/references/lang-csharp.md +285 -0
  169. package/skills/sharp-edges/references/lang-go.md +270 -0
  170. package/skills/sharp-edges/references/lang-java.md +263 -0
  171. package/skills/sharp-edges/references/lang-javascript.md +269 -0
  172. package/skills/sharp-edges/references/lang-kotlin.md +265 -0
  173. package/skills/sharp-edges/references/lang-php.md +245 -0
  174. package/skills/sharp-edges/references/lang-python.md +274 -0
  175. package/skills/sharp-edges/references/lang-ruby.md +273 -0
  176. package/skills/sharp-edges/references/lang-rust.md +272 -0
  177. package/skills/sharp-edges/references/lang-swift.md +287 -0
  178. package/skills/sharp-edges/references/language-specific.md +588 -0
  179. package/skills/spec-to-code-compliance/SKILL.md +357 -0
  180. package/skills/spec-to-code-compliance/resources/COMPLETENESS_CHECKLIST.md +69 -0
  181. package/skills/spec-to-code-compliance/resources/IR_EXAMPLES.md +417 -0
  182. package/skills/spec-to-code-compliance/resources/OUTPUT_REQUIREMENTS.md +105 -0
  183. package/skills/supply-chain-risk-auditor/SKILL.md +67 -0
  184. package/skills/supply-chain-risk-auditor/resources/results-template.md +41 -0
  185. package/skills/variant-analysis/METHODOLOGY.md +327 -0
  186. package/skills/variant-analysis/SKILL.md +142 -0
  187. package/skills/variant-analysis/resources/codeql/cpp.ql +119 -0
  188. package/skills/variant-analysis/resources/codeql/go.ql +69 -0
  189. package/skills/variant-analysis/resources/codeql/java.ql +71 -0
  190. package/skills/variant-analysis/resources/codeql/javascript.ql +63 -0
  191. package/skills/variant-analysis/resources/codeql/python.ql +80 -0
  192. package/skills/variant-analysis/resources/semgrep/cpp.yaml +98 -0
  193. package/skills/variant-analysis/resources/semgrep/go.yaml +63 -0
  194. package/skills/variant-analysis/resources/semgrep/java.yaml +61 -0
  195. package/skills/variant-analysis/resources/semgrep/javascript.yaml +60 -0
  196. package/skills/variant-analysis/resources/semgrep/python.yaml +72 -0
  197. package/skills/variant-analysis/resources/variant-report-template.md +75 -0
  198. package/skills/vuln-report/SKILL.md +137 -0
  199. package/skills/vuln-report/agents/openai.yaml +4 -0
  200. package/skills/vuln-report/references/report-template.md +135 -0
  201. package/skills/wooyun-legacy/SKILL.md +367 -0
  202. package/skills/wooyun-legacy/references/bank-penetration.md +222 -0
  203. package/skills/wooyun-legacy/references/checklists/command-execution-checklist.md +119 -0
  204. package/skills/wooyun-legacy/references/checklists/csrf-checklist.md +74 -0
  205. package/skills/wooyun-legacy/references/checklists/file-upload-checklist.md +108 -0
  206. package/skills/wooyun-legacy/references/checklists/info-disclosure-checklist.md +114 -0
  207. package/skills/wooyun-legacy/references/checklists/logic-flaws-checklist.md +95 -0
  208. package/skills/wooyun-legacy/references/checklists/misconfig-checklist.md +124 -0
  209. package/skills/wooyun-legacy/references/checklists/path-traversal-checklist.md +87 -0
  210. package/skills/wooyun-legacy/references/checklists/rce-checklist.md +93 -0
  211. package/skills/wooyun-legacy/references/checklists/sql-injection-checklist.md +97 -0
  212. package/skills/wooyun-legacy/references/checklists/ssrf-checklist.md +99 -0
  213. package/skills/wooyun-legacy/references/checklists/unauthorized-access-checklist.md +89 -0
  214. package/skills/wooyun-legacy/references/checklists/weak-password-checklist.md +115 -0
  215. package/skills/wooyun-legacy/references/checklists/xss-checklist.md +103 -0
  216. package/skills/wooyun-legacy/references/checklists/xxe-checklist.md +130 -0
  217. package/skills/wooyun-legacy/references/info-disclosure.md +975 -0
  218. package/skills/wooyun-legacy/references/logic-flaws.md +721 -0
  219. package/skills/wooyun-legacy/references/path-traversal.md +1191 -0
  220. package/skills/wooyun-legacy/references/telecom-penetration.md +156 -0
  221. package/skills/wooyun-legacy/references/unauthorized-access.md +980 -0
  222. package/skills/wooyun-legacy/references/xss.md +746 -0
  223. package/skills/zeroize-audit/SKILL.md +371 -0
  224. package/skills/zeroize-audit/configs/c.yaml +21 -0
  225. package/skills/zeroize-audit/configs/default.yaml +128 -0
  226. package/skills/zeroize-audit/configs/rust.yaml +83 -0
  227. package/skills/zeroize-audit/prompts/report_template.md +238 -0
  228. package/skills/zeroize-audit/prompts/system.md +163 -0
  229. package/skills/zeroize-audit/prompts/task.md +97 -0
  230. package/skills/zeroize-audit/references/compile-commands.md +231 -0
  231. package/skills/zeroize-audit/references/detection-strategy.md +191 -0
  232. package/skills/zeroize-audit/references/ir-analysis.md +252 -0
  233. package/skills/zeroize-audit/references/mcp-analysis.md +221 -0
  234. package/skills/zeroize-audit/references/poc-generation.md +470 -0
  235. package/skills/zeroize-audit/references/rust-zeroization-patterns.md +867 -0
  236. package/skills/zeroize-audit/schemas/input.json +83 -0
  237. package/skills/zeroize-audit/schemas/output.json +140 -0
  238. package/skills/zeroize-audit/tools/analyze_asm.sh +202 -0
  239. package/skills/zeroize-audit/tools/analyze_cfg.py +381 -0
  240. package/skills/zeroize-audit/tools/analyze_heap.sh +211 -0
  241. package/skills/zeroize-audit/tools/analyze_ir_semantic.py +429 -0
  242. package/skills/zeroize-audit/tools/diff_ir.sh +135 -0
  243. package/skills/zeroize-audit/tools/diff_rust_mir.sh +189 -0
  244. package/skills/zeroize-audit/tools/emit_asm.sh +67 -0
  245. package/skills/zeroize-audit/tools/emit_ir.sh +77 -0
  246. package/skills/zeroize-audit/tools/emit_rust_asm.sh +178 -0
  247. package/skills/zeroize-audit/tools/emit_rust_ir.sh +150 -0
  248. package/skills/zeroize-audit/tools/emit_rust_mir.sh +158 -0
  249. package/skills/zeroize-audit/tools/extract_compile_flags.py +284 -0
  250. package/skills/zeroize-audit/tools/generate_poc.py +1329 -0
  251. package/skills/zeroize-audit/tools/mcp/apply_confidence_gates.py +113 -0
  252. package/skills/zeroize-audit/tools/mcp/check_mcp.sh +68 -0
  253. package/skills/zeroize-audit/tools/mcp/normalize_mcp_evidence.py +125 -0
  254. package/skills/zeroize-audit/tools/scripts/check_llvm_patterns.py +481 -0
  255. package/skills/zeroize-audit/tools/scripts/check_mir_patterns.py +554 -0
  256. package/skills/zeroize-audit/tools/scripts/check_rust_asm.py +424 -0
  257. package/skills/zeroize-audit/tools/scripts/check_rust_asm_aarch64.py +300 -0
  258. package/skills/zeroize-audit/tools/scripts/check_rust_asm_x86.py +283 -0
  259. package/skills/zeroize-audit/tools/scripts/find_dangerous_apis.py +375 -0
  260. package/skills/zeroize-audit/tools/scripts/semantic_audit.py +923 -0
  261. package/skills/zeroize-audit/tools/track_dataflow.sh +196 -0
  262. package/skills/zeroize-audit/tools/validate_rust_toolchain.sh +298 -0
  263. package/skills/zeroize-audit/workflows/phase-0-preflight.md +150 -0
  264. package/skills/zeroize-audit/workflows/phase-1-source-analysis.md +144 -0
  265. package/skills/zeroize-audit/workflows/phase-2-compiler-analysis.md +139 -0
  266. package/skills/zeroize-audit/workflows/phase-3-interim-report.md +46 -0
  267. package/skills/zeroize-audit/workflows/phase-4-poc-generation.md +46 -0
  268. package/skills/zeroize-audit/workflows/phase-5-poc-validation.md +136 -0
  269. package/skills/zeroize-audit/workflows/phase-6-final-report.md +44 -0
  270. package/skills/zeroize-audit/workflows/phase-7-test-generation.md +42 -0
  271. package/themes/piolium-srcery.json +94 -0
@@ -0,0 +1,110 @@
1
+ # Scan Modes Reference
2
+
3
+ ## Mode: Run All
4
+
5
+ Full scan with all rulesets and severity levels. Current default behavior. No filtering applied — all findings are reported and triaged.
6
+
7
+ ## Mode: Important Only
8
+
9
+ Focused on high-confidence security vulnerabilities. Excludes code quality, best practices, and low-confidence audit findings.
10
+
11
+ ### Pre-Filter: CLI Severity Flag
12
+
13
+ Add these flags to every `semgrep` command:
14
+
15
+ ```bash
16
+ --severity MEDIUM --severity HIGH --severity CRITICAL
17
+ ```
18
+
19
+ This excludes LOW/INFO severity findings at scan time, reducing output volume before post-filtering.
20
+
21
+ ### Post-Filter: Metadata Criteria
22
+
23
+ After scanning, filter each JSON result file to keep only findings matching ALL of:
24
+
25
+ | Metadata Field | Accepted Values | Rationale |
26
+ |---|---|---|
27
+ | `extra.metadata.category` | `"security"` | Excludes correctness, best-practice, maintainability, performance |
28
+ | `extra.metadata.confidence` | `"MEDIUM"`, `"HIGH"` | Excludes low-precision rules (high false positive rate) |
29
+ | `extra.metadata.impact` | `"MEDIUM"`, `"HIGH"` | Excludes low-impact informational findings |
30
+
31
+ **Third-party rules** (Trail of Bits, 0xdea, Decurity, etc.) may not have `confidence`/`impact`/`category` metadata. Findings **without** these metadata fields are **kept** — we cannot filter what is not annotated, and third-party rules are typically security-focused.
32
+
33
+ ### Semgrep Metadata Background
34
+
35
+ Semgrep security rules have these metadata fields (required for `category: security` in the official registry):
36
+
37
+ | Field | Purpose | Values |
38
+ |---|---|---|
39
+ | `severity` (top-level) | Overall rule severity, derived from likelihood × impact | `LOW`, `MEDIUM`, `HIGH`, `CRITICAL` |
40
+ | `category` | Rule category | `security`, `correctness`, `best-practice`, `maintainability`, `performance` |
41
+ | `confidence` | True positive rate of the rule (precision) | `LOW`, `MEDIUM`, `HIGH` |
42
+ | `impact` | Potential damage if vulnerability is exploited | `LOW`, `MEDIUM`, `HIGH` |
43
+ | `likelihood` | How likely the vulnerability is exploitable | `LOW`, `MEDIUM`, `HIGH` |
44
+ | `subcategory` | Finding type | `vuln`, `audit`, `secure default` |
45
+
46
+ Key relationship: `severity = f(likelihood, impact)` while `confidence` is independent (describes rule quality, not vulnerability severity).
47
+
48
+ ### Post-Filter jq Command
49
+
50
+ Apply to each JSON result file after scanning:
51
+
52
+ ```bash
53
+ # Filter a single result file
54
+ jq '{
55
+ results: [.results[] |
56
+ ((.extra.metadata.category // "security") | ascii_downcase) as $cat |
57
+ ((.extra.metadata.confidence // "HIGH") | ascii_upcase) as $conf |
58
+ ((.extra.metadata.impact // "HIGH") | ascii_upcase) as $imp |
59
+ select(
60
+ ($cat == "security") and
61
+ ($conf == "MEDIUM" or $conf == "HIGH") and
62
+ ($imp == "MEDIUM" or $imp == "HIGH")
63
+ )
64
+ ],
65
+ errors: .errors,
66
+ paths: .paths
67
+ }' "$f" > "${f%.json}-important.json"
68
+ ```
69
+
70
+ Default values (`// "security"`, `// "HIGH"`) handle third-party rules without metadata — they pass all filters by default.
71
+
72
+ ### Filter All Result Files in a Directory
73
+
74
+ Raw scan output lives in `$OUTPUT_DIR/raw/`. The filter creates `*-important.json` files alongside the originals — the raw files are preserved unmodified.
75
+
76
+ ```bash
77
+ # Apply important-only filter to all scan result JSON files in raw/
78
+ for f in "$OUTPUT_DIR/raw"/*-*.json; do
79
+ [[ "$f" == *-triage.json || "$f" == *-important.json ]] && continue
80
+ jq '{
81
+ results: [.results[] |
82
+ ((.extra.metadata.category // "security") | ascii_downcase) as $cat |
83
+ ((.extra.metadata.confidence // "HIGH") | ascii_upcase) as $conf |
84
+ ((.extra.metadata.impact // "HIGH") | ascii_upcase) as $imp |
85
+ select(
86
+ ($cat == "security") and
87
+ ($conf == "MEDIUM" or $conf == "HIGH") and
88
+ ($imp == "MEDIUM" or $imp == "HIGH")
89
+ )
90
+ ],
91
+ errors: .errors,
92
+ paths: .paths
93
+ }' "$f" > "${f%.json}-important.json"
94
+ BEFORE=$(jq '.results | length' "$f")
95
+ AFTER=$(jq '.results | length' "${f%.json}-important.json")
96
+ echo "$f: $BEFORE → $AFTER findings (filtered $(( BEFORE - AFTER )))"
97
+ done
98
+ ```
99
+
100
+ ### Scanner Task Modifications
101
+
102
+ In important-only mode, add `[SEVERITY_FLAGS]` to the scanner template:
103
+
104
+ ```bash
105
+ semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/raw/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/raw/[lang]-[ruleset].sarif [TARGET] &
106
+ ```
107
+
108
+ Where `[SEVERITY_FLAGS]` is:
109
+ - **Run all**: *(empty)*
110
+ - **Important only**: `--severity MEDIUM --severity HIGH --severity CRITICAL`
@@ -0,0 +1,140 @@
1
+ # Scanner Subagent Task Prompt
2
+
3
+ Use this prompt template when spawning scanner Tasks in Step 4. Use `subagent_type: static-analysis:semgrep-scanner`.
4
+
5
+ ## Template
6
+
7
+ ```
8
+ You are a Semgrep scanner for [LANGUAGE_CATEGORY].
9
+
10
+ ## Task
11
+ Run Semgrep scans for [LANGUAGE] files and save results to [OUTPUT_DIR]/raw.
12
+
13
+ ## Pro Engine Status: [PRO_AVAILABLE: true/false]
14
+
15
+ ## Scan Mode: [SCAN_MODE: run-all/important-only]
16
+
17
+ ## APPROVED RULESETS (from user-confirmed plan)
18
+ [LIST EXACT RULESETS USER APPROVED - DO NOT SUBSTITUTE]
19
+
20
+ Example:
21
+ - p/python
22
+ - p/django
23
+ - p/security-audit
24
+ - p/secrets
25
+ - https://github.com/trailofbits/semgrep-rules
26
+
27
+ ## Commands to Run (in parallel)
28
+
29
+ ### Clone GitHub URL rulesets first:
30
+ ```bash
31
+ mkdir -p [OUTPUT_DIR]/repos
32
+ # For each GitHub URL ruleset, clone into [OUTPUT_DIR]/repos/[name]:
33
+ git clone --depth 1 https://github.com/org/repo [OUTPUT_DIR]/repos/repo-name
34
+ ```
35
+
36
+ ### Generate commands for EACH approved ruleset:
37
+ ```bash
38
+ semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] [INCLUDE_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/raw/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/raw/[lang]-[ruleset].sarif [TARGET] &
39
+ ```
40
+
41
+ Wait for all to complete:
42
+ ```bash
43
+ wait
44
+ ```
45
+
46
+ ### Clean up cloned repos:
47
+ ```bash
48
+ [ -n "[OUTPUT_DIR]" ] && rm -rf [OUTPUT_DIR]/repos
49
+ ```
50
+
51
+ ## Critical Rules
52
+ - Use ONLY the rulesets listed above - do not add or remove any
53
+ - Always use --metrics=off (prevents sending telemetry to Semgrep servers)
54
+ - Use --pro when Pro is available (enables cross-file taint tracking)
55
+ - If scan mode is **important-only**, add `--severity MEDIUM --severity HIGH --severity CRITICAL` to every command
56
+ - If scan mode is **run-all**, do NOT add severity flags
57
+ - Run all rulesets in parallel with & and wait
58
+ - For GitHub URL rulesets, always clone into [OUTPUT_DIR]/repos/ and use the local path as --config (do NOT pass URLs directly to semgrep — its URL handling is unreliable for repos with non-standard YAML)
59
+ - Add `--include` flags for language-specific rulesets (e.g., `--include="*.py"` for p/python). Do NOT add `--include` to cross-language rulesets like p/security-audit, p/secrets, or third-party repos
60
+ - After all scans complete, delete [OUTPUT_DIR]/repos/ to avoid leaving cloned repos behind
61
+
62
+ ## Output
63
+ Report:
64
+ - Number of findings per ruleset
65
+ - Any scan errors
66
+ - File paths of JSON results (in [OUTPUT_DIR]/raw/)
67
+ - [If Pro] Note any cross-file findings detected
68
+ ```
69
+
70
+ ## Variable Substitutions
71
+
72
+ | Variable | Description | Example |
73
+ |----------|-------------|---------|
74
+ | `[LANGUAGE_CATEGORY]` | Language group being scanned | Python, JavaScript, Docker |
75
+ | `[LANGUAGE]` | Specific language | Python, TypeScript, Go |
76
+ | `[OUTPUT_DIR]` | Output directory (absolute path, resolved in Step 1) | /path/to/static_analysis_semgrep_1 |
77
+ | `[PRO_AVAILABLE]` | Whether Pro engine is available | true, false |
78
+ | `[SEVERITY_FLAGS]` | Severity pre-filter flags | *(empty)* for run-all, `--severity MEDIUM --severity HIGH --severity CRITICAL` for important-only |
79
+ | `[INCLUDE_FLAGS]` | File extension filter for language-specific rulesets | `--include="*.py"` for Python rulesets, *(empty)* for cross-language rulesets like p/security-audit, p/secrets, or third-party repos |
80
+ | `[RULESET]` | Semgrep ruleset identifier or local clone path | p/python, [OUTPUT_DIR]/repos/semgrep-rules |
81
+ | `[TARGET]` | Absolute path to directory to scan | /path/to/codebase |
82
+
83
+ ## Example: Python Scanner Task
84
+
85
+ ```
86
+ You are a Semgrep scanner for Python.
87
+
88
+ ## Task
89
+ Run Semgrep scans for Python files and save results to /path/to/static_analysis_semgrep_1/raw.
90
+
91
+ ## Pro Engine Status: true
92
+
93
+ ## Scan Mode: run-all
94
+
95
+ ## APPROVED RULESETS (from user-confirmed plan)
96
+ - p/python
97
+ - p/django
98
+ - p/security-audit
99
+ - p/secrets
100
+ - https://github.com/trailofbits/semgrep-rules
101
+
102
+ ## Commands to Run (in parallel)
103
+
104
+ ### Clone GitHub URL rulesets first:
105
+ ```bash
106
+ mkdir -p /path/to/static_analysis_semgrep_1/repos
107
+ git clone --depth 1 https://github.com/trailofbits/semgrep-rules /path/to/static_analysis_semgrep_1/repos/trailofbits
108
+ ```
109
+
110
+ ### Run scans:
111
+ ```bash
112
+ semgrep --pro --metrics=off --include="*.py" --config p/python --json -o /path/to/static_analysis_semgrep_1/raw/python-python.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-python.sarif /path/to/codebase &
113
+ semgrep --pro --metrics=off --include="*.py" --config p/django --json -o /path/to/static_analysis_semgrep_1/raw/python-django.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-django.sarif /path/to/codebase &
114
+ semgrep --pro --metrics=off --config p/security-audit --json -o /path/to/static_analysis_semgrep_1/raw/python-security-audit.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-security-audit.sarif /path/to/codebase &
115
+ semgrep --pro --metrics=off --config p/secrets --json -o /path/to/static_analysis_semgrep_1/raw/python-secrets.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-secrets.sarif /path/to/codebase &
116
+ semgrep --pro --metrics=off --config /path/to/static_analysis_semgrep_1/repos/trailofbits --json -o /path/to/static_analysis_semgrep_1/raw/python-trailofbits.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-trailofbits.sarif /path/to/codebase &
117
+ wait
118
+ ```
119
+
120
+ ### Clean up cloned repos:
121
+ ```bash
122
+ rm -rf /path/to/static_analysis_semgrep_1/repos
123
+ ```
124
+
125
+ ## Critical Rules
126
+ - Use ONLY the rulesets listed above - do not add or remove any
127
+ - Always use --metrics=off
128
+ - Use --pro when Pro is available
129
+ - Run all rulesets in parallel with & and wait
130
+ - Clone GitHub URL rulesets into the output dir repos/ subfolder, use local path as --config
131
+ - Add --include="*.py" to language-specific rulesets (p/python, p/django) but NOT to p/security-audit, p/secrets, or third-party repos
132
+ - Delete repos/ after scanning
133
+
134
+ ## Output
135
+ Report:
136
+ - Number of findings per ruleset
137
+ - Any scan errors
138
+ - File paths of JSON results (in raw/ subdirectory)
139
+ - Note any cross-file findings detected
140
+ ```
@@ -0,0 +1,203 @@
1
+ # /// script
2
+ # requires-python = ">=3.11"
3
+ # dependencies = []
4
+ # ///
5
+ """Merge SARIF files into a single consolidated output.
6
+
7
+ Usage:
8
+ uv run merge_sarif.py RAW_DIR OUTPUT_FILE
9
+
10
+ Reads *.sarif files from RAW_DIR (e.g., $OUTPUT_DIR/raw), produces
11
+ OUTPUT_FILE (e.g., $OUTPUT_DIR/results/results.sarif) containing all
12
+ findings merged and deduplicated.
13
+
14
+ Attempts to use SARIF Multitool for merging if available, falls back to
15
+ pure Python implementation.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import shutil
22
+ import subprocess
23
+ import sys
24
+ import tempfile
25
+ from pathlib import Path
26
+
27
+
28
+ def has_sarif_multitool() -> bool:
29
+ """Check if SARIF Multitool is pre-installed via npx."""
30
+ if not shutil.which("npx"):
31
+ return False
32
+ try:
33
+ result = subprocess.run(
34
+ ["npx", "--no-install", "@microsoft/sarif-multitool", "--version"],
35
+ capture_output=True,
36
+ timeout=30,
37
+ )
38
+ return result.returncode == 0
39
+ except subprocess.TimeoutExpired:
40
+ print("Warning: SARIF Multitool version check timed out", file=sys.stderr)
41
+ return False
42
+ except FileNotFoundError:
43
+ return False
44
+ except OSError as e:
45
+ print(f"Warning: Failed to check SARIF Multitool: {e}", file=sys.stderr)
46
+ return False
47
+
48
+
49
+ def merge_with_multitool(sarif_files: list[Path]) -> dict | None:
50
+ """Use SARIF Multitool to merge SARIF files. Returns merged SARIF or None."""
51
+ if not sarif_files:
52
+ return None
53
+
54
+ with tempfile.NamedTemporaryFile(suffix=".sarif", delete=False) as tmp:
55
+ tmp_path = Path(tmp.name)
56
+
57
+ try:
58
+ cmd = [
59
+ "npx",
60
+ "--no-install",
61
+ "@microsoft/sarif-multitool",
62
+ "merge",
63
+ *[str(f) for f in sarif_files],
64
+ "--output-file",
65
+ str(tmp_path),
66
+ "--force",
67
+ ]
68
+ result = subprocess.run(cmd, capture_output=True, timeout=120)
69
+ if result.returncode != 0:
70
+ print(f"SARIF Multitool merge failed: {result.stderr.decode()}", file=sys.stderr)
71
+ return None
72
+
73
+ return json.loads(tmp_path.read_text())
74
+ except subprocess.TimeoutExpired as e:
75
+ print(f"SARIF Multitool timed out: {e}", file=sys.stderr)
76
+ return None
77
+ except json.JSONDecodeError as e:
78
+ print(f"SARIF Multitool produced invalid JSON: {e}", file=sys.stderr)
79
+ return None
80
+ except FileNotFoundError as e:
81
+ print(f"SARIF Multitool not found: {e}", file=sys.stderr)
82
+ return None
83
+ except OSError as e:
84
+ print(f"SARIF Multitool OS error ({type(e).__name__}): {e}", file=sys.stderr)
85
+ return None
86
+ finally:
87
+ tmp_path.unlink(missing_ok=True)
88
+
89
+
90
+ def merge_sarif_pure_python(sarif_files: list[Path]) -> dict:
91
+ """Pure Python SARIF merge (fallback)."""
92
+ merged = {
93
+ "version": "2.1.0",
94
+ "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
95
+ "runs": [],
96
+ }
97
+
98
+ seen_rules: dict[str, dict] = {}
99
+ all_results: list[dict] = []
100
+ seen_results: set[tuple[str, str, int]] = set()
101
+ tool_info: dict | None = None
102
+ skipped_files: list[str] = []
103
+
104
+ for sarif_file in sorted(sarif_files):
105
+ try:
106
+ data = json.loads(sarif_file.read_text())
107
+ except json.JSONDecodeError as e:
108
+ print(f"Warning: Failed to parse {sarif_file}: {e}", file=sys.stderr)
109
+ skipped_files.append(str(sarif_file))
110
+ continue
111
+
112
+ for run in data.get("runs", []):
113
+ if tool_info is None and run.get("tool"):
114
+ tool_info = run["tool"]
115
+
116
+ driver = run.get("tool", {}).get("driver", {})
117
+ for rule in driver.get("rules", []):
118
+ rule_id = rule.get("id", "")
119
+ if rule_id and rule_id not in seen_rules:
120
+ seen_rules[rule_id] = rule
121
+
122
+ for result in run.get("results", []):
123
+ rule_id = result.get("ruleId", "")
124
+ uri = ""
125
+ start_line = 0
126
+ locations = result.get("locations", [])
127
+ if locations:
128
+ phys = locations[0].get("physicalLocation", {})
129
+ uri = phys.get("artifactLocation", {}).get("uri", "")
130
+ start_line = phys.get("region", {}).get("startLine", 0)
131
+ dedup_key = (rule_id, uri, start_line)
132
+ if dedup_key in seen_results:
133
+ continue
134
+ seen_results.add(dedup_key)
135
+ all_results.append(result)
136
+
137
+ if all_results:
138
+ merged_run = {
139
+ "tool": tool_info or {"driver": {"name": "semgrep", "rules": []}},
140
+ "results": all_results,
141
+ }
142
+ merged_run["tool"]["driver"]["rules"] = list(seen_rules.values())
143
+ merged["runs"].append(merged_run)
144
+
145
+ if skipped_files:
146
+ print(
147
+ f"WARNING: {len(skipped_files)} of {len(sarif_files)} SARIF files "
148
+ f"could not be parsed. Results may be incomplete.",
149
+ file=sys.stderr,
150
+ )
151
+ for sf in skipped_files:
152
+ print(f" Skipped: {sf}", file=sys.stderr)
153
+
154
+ return merged
155
+
156
+
157
+ def main() -> int:
158
+ if len(sys.argv) != 3:
159
+ print(f"Usage: {sys.argv[0]} RAW_DIR OUTPUT_FILE", file=sys.stderr)
160
+ return 1
161
+
162
+ raw_dir = Path(sys.argv[1])
163
+ output_file = Path(sys.argv[2])
164
+
165
+ if not raw_dir.is_dir():
166
+ print(f"Error: {raw_dir} is not a directory", file=sys.stderr)
167
+ return 1
168
+
169
+ # Collect SARIF files from raw directory only
170
+ sarif_files = sorted(raw_dir.glob("*.sarif"))
171
+ print(f"Found {len(sarif_files)} SARIF files to merge in {raw_dir}")
172
+
173
+ if not sarif_files:
174
+ print("No SARIF files found, nothing to merge", file=sys.stderr)
175
+ return 1
176
+
177
+ # Ensure output directory exists
178
+ output_file.parent.mkdir(parents=True, exist_ok=True)
179
+
180
+ # Try SARIF Multitool first, fall back to pure Python
181
+ merged: dict | None = None
182
+ if has_sarif_multitool():
183
+ print("Using SARIF Multitool for merge...")
184
+ merged = merge_with_multitool(sarif_files)
185
+ if merged:
186
+ print("SARIF Multitool merge successful")
187
+
188
+ if merged is None:
189
+ print("Using pure Python merge (SARIF Multitool not available or failed)")
190
+ merged = merge_sarif_pure_python(sarif_files)
191
+
192
+ result_count = sum(len(run.get("results", [])) for run in merged.get("runs", []))
193
+ print(f"Merged SARIF contains {result_count} findings")
194
+
195
+ # Write output
196
+ output_file.write_text(json.dumps(merged, indent=2))
197
+ print(f"Written to {output_file}")
198
+
199
+ return 0
200
+
201
+
202
+ if __name__ == "__main__":
203
+ sys.exit(main())