@vigolium/piolium 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +117 -0
  3. package/agents/access-auditor.md +300 -0
  4. package/agents/assumption-breaker.md +154 -0
  5. package/agents/attack-designer.md +116 -0
  6. package/agents/code-scanner.md +139 -0
  7. package/agents/concurrency-auditor.md +238 -0
  8. package/agents/confirm-writer.md +257 -0
  9. package/agents/context-reviewer.md +274 -0
  10. package/agents/cross-verifier.md +165 -0
  11. package/agents/cve-scout.md +381 -0
  12. package/agents/env-builder.md +282 -0
  13. package/agents/env-profiler.md +205 -0
  14. package/agents/evidence-collector.md +140 -0
  15. package/agents/finding-grader.md +142 -0
  16. package/agents/finding-writer.md +148 -0
  17. package/agents/flow-tracer.md +106 -0
  18. package/agents/goal-backtracer.md +146 -0
  19. package/agents/history-miner.md +467 -0
  20. package/agents/independent-verifier.md +118 -0
  21. package/agents/intent-mapper.md +183 -0
  22. package/agents/longshot-collector.md +128 -0
  23. package/agents/longshot-prober.md +126 -0
  24. package/agents/patch-auditor.md +73 -0
  25. package/agents/poc-author.md +124 -0
  26. package/agents/poc-runner.md +194 -0
  27. package/agents/probe-lead.md +269 -0
  28. package/agents/red-challenger.md +101 -0
  29. package/agents/report-composer.md +208 -0
  30. package/agents/review-adjudicator.md +216 -0
  31. package/agents/spec-auditor.md +155 -0
  32. package/agents/taint-tracer.md +265 -0
  33. package/agents/test-locator.md +209 -0
  34. package/agents/threat-modeler.md +132 -0
  35. package/agents/variant-scanner.md +108 -0
  36. package/agents/variant-spotter.md +110 -0
  37. package/bin/piolium.mjs +376 -0
  38. package/extensions/piolium/_vendor/yaml.bundle.d.mts +6 -0
  39. package/extensions/piolium/_vendor/yaml.bundle.mjs +139 -0
  40. package/extensions/piolium/agent-runner.ts +322 -0
  41. package/extensions/piolium/agents.ts +266 -0
  42. package/extensions/piolium/audit-state.ts +522 -0
  43. package/extensions/piolium/bundled-resources.ts +97 -0
  44. package/extensions/piolium/candidate-scan.ts +966 -0
  45. package/extensions/piolium/command-target.ts +177 -0
  46. package/extensions/piolium/console-stream.ts +57 -0
  47. package/extensions/piolium/export-results.ts +380 -0
  48. package/extensions/piolium/findings.ts +448 -0
  49. package/extensions/piolium/heartbeat.ts +182 -0
  50. package/extensions/piolium/help.ts +234 -0
  51. package/extensions/piolium/index.ts +1865 -0
  52. package/extensions/piolium/longshot.ts +530 -0
  53. package/extensions/piolium/matcher-suggestions.ts +196 -0
  54. package/extensions/piolium/matcher-utils.ts +83 -0
  55. package/extensions/piolium/modes/balanced.ts +750 -0
  56. package/extensions/piolium/modes/confirm-bootstrap.ts +186 -0
  57. package/extensions/piolium/modes/confirm.ts +697 -0
  58. package/extensions/piolium/modes/deep.ts +917 -0
  59. package/extensions/piolium/modes/diff.ts +177 -0
  60. package/extensions/piolium/modes/lite.ts +540 -0
  61. package/extensions/piolium/modes/longshot.ts +595 -0
  62. package/extensions/piolium/modes/merge.ts +204 -0
  63. package/extensions/piolium/modes/phase-runner.ts +267 -0
  64. package/extensions/piolium/modes/reinvest.ts +546 -0
  65. package/extensions/piolium/modes/revisit.ts +279 -0
  66. package/extensions/piolium/modes.ts +48 -0
  67. package/extensions/piolium/phase-labels.ts +123 -0
  68. package/extensions/piolium/phase-status-strip.ts +92 -0
  69. package/extensions/piolium/prompt-prefix-editor.ts +39 -0
  70. package/extensions/piolium/providers/anthropic-vertex.ts +836 -0
  71. package/extensions/piolium/recon.ts +409 -0
  72. package/extensions/piolium/result-stats.ts +105 -0
  73. package/extensions/piolium/retry.ts +120 -0
  74. package/extensions/piolium/scheduler.ts +212 -0
  75. package/extensions/piolium/secrets.ts +368 -0
  76. package/extensions/piolium/tools/web-tools.ts +148 -0
  77. package/package.json +77 -0
  78. package/skills/agentic-actions-auditor/SKILL.md +327 -0
  79. package/skills/agentic-actions-auditor/references/action-profiles.md +186 -0
  80. package/skills/agentic-actions-auditor/references/cross-file-resolution.md +209 -0
  81. package/skills/agentic-actions-auditor/references/foundations.md +94 -0
  82. package/skills/agentic-actions-auditor/references/vector-a-env-var-intermediary.md +77 -0
  83. package/skills/agentic-actions-auditor/references/vector-b-direct-expression-injection.md +83 -0
  84. package/skills/agentic-actions-auditor/references/vector-c-cli-data-fetch.md +83 -0
  85. package/skills/agentic-actions-auditor/references/vector-d-pr-target-checkout.md +88 -0
  86. package/skills/agentic-actions-auditor/references/vector-e-error-log-injection.md +88 -0
  87. package/skills/agentic-actions-auditor/references/vector-f-subshell-expansion.md +82 -0
  88. package/skills/agentic-actions-auditor/references/vector-g-eval-of-ai-output.md +91 -0
  89. package/skills/agentic-actions-auditor/references/vector-h-dangerous-sandbox-configs.md +102 -0
  90. package/skills/agentic-actions-auditor/references/vector-i-wildcard-allowlists.md +88 -0
  91. package/skills/audit/SKILL.md +562 -0
  92. package/skills/audit/assets/icon.svg +7 -0
  93. package/skills/audit/hooks/scripts/validate_phase_output.py +550 -0
  94. package/skills/audit/references/adversarial-review.md +148 -0
  95. package/skills/audit/references/architecture-aware-sast.md +306 -0
  96. package/skills/audit/references/audit-workflow.md +737 -0
  97. package/skills/audit/references/chamber-protocol.md +384 -0
  98. package/skills/audit/references/creative-attack-modes.md +221 -0
  99. package/skills/audit/references/deep-analysis.md +273 -0
  100. package/skills/audit/references/domain-attack-playbooks.md +1129 -0
  101. package/skills/audit/references/knowledge-base-template.md +513 -0
  102. package/skills/audit/references/real-env-validation.md +191 -0
  103. package/skills/audit/references/report-templates.md +417 -0
  104. package/skills/audit/references/triage-and-prereqs.md +134 -0
  105. package/skills/audit/scripts/consolidate_drafts.py +554 -0
  106. package/skills/audit/scripts/partition_findings.py +152 -0
  107. package/skills/audit/scripts/rg-hotspots.sh +121 -0
  108. package/skills/audit/scripts/stamp_file_state.py +349 -0
  109. package/skills/code-reviewer/SKILL.md +65 -0
  110. package/skills/codeql/SKILL.md +281 -0
  111. package/skills/codeql/references/build-fixes.md +90 -0
  112. package/skills/codeql/references/diagnostic-query-templates.md +339 -0
  113. package/skills/codeql/references/extension-yaml-format.md +209 -0
  114. package/skills/codeql/references/important-only-suite.md +153 -0
  115. package/skills/codeql/references/language-details.md +207 -0
  116. package/skills/codeql/references/macos-arm64e-workaround.md +179 -0
  117. package/skills/codeql/references/performance-tuning.md +111 -0
  118. package/skills/codeql/references/quality-assessment.md +172 -0
  119. package/skills/codeql/references/ruleset-catalog.md +63 -0
  120. package/skills/codeql/references/run-all-suite.md +92 -0
  121. package/skills/codeql/references/sarif-processing.md +79 -0
  122. package/skills/codeql/references/threat-models.md +51 -0
  123. package/skills/codeql/workflows/build-database.md +280 -0
  124. package/skills/codeql/workflows/create-data-extensions.md +261 -0
  125. package/skills/codeql/workflows/run-analysis.md +301 -0
  126. package/skills/differential-review/SKILL.md +220 -0
  127. package/skills/differential-review/adversarial.md +203 -0
  128. package/skills/differential-review/methodology.md +234 -0
  129. package/skills/differential-review/patterns.md +300 -0
  130. package/skills/differential-review/reporting.md +369 -0
  131. package/skills/fp-check/SKILL.md +125 -0
  132. package/skills/fp-check/references/bug-class-verification.md +114 -0
  133. package/skills/fp-check/references/deep-verification.md +143 -0
  134. package/skills/fp-check/references/evidence-templates.md +91 -0
  135. package/skills/fp-check/references/false-positive-patterns.md +115 -0
  136. package/skills/fp-check/references/gate-reviews.md +27 -0
  137. package/skills/fp-check/references/standard-verification.md +78 -0
  138. package/skills/insecure-defaults/SKILL.md +117 -0
  139. package/skills/insecure-defaults/references/examples.md +409 -0
  140. package/skills/last30days/SKILL.md +444 -0
  141. package/skills/sarif-parsing/SKILL.md +483 -0
  142. package/skills/sarif-parsing/resources/jq-queries.md +162 -0
  143. package/skills/sarif-parsing/resources/sarif_helpers.py +331 -0
  144. package/skills/security-threat-model/LICENSE.txt +201 -0
  145. package/skills/security-threat-model/SKILL.md +81 -0
  146. package/skills/security-threat-model/agents/openai.yaml +4 -0
  147. package/skills/security-threat-model/references/prompt-template.md +255 -0
  148. package/skills/security-threat-model/references/security-controls-and-assets.md +32 -0
  149. package/skills/semgrep/SKILL.md +212 -0
  150. package/skills/semgrep/references/rulesets.md +162 -0
  151. package/skills/semgrep/references/scan-modes.md +110 -0
  152. package/skills/semgrep/references/scanner-task-prompt.md +140 -0
  153. package/skills/semgrep/scripts/merge_sarif.py +203 -0
  154. package/skills/semgrep/workflows/scan-workflow.md +311 -0
  155. package/skills/semgrep-rule-creator/SKILL.md +168 -0
  156. package/skills/semgrep-rule-creator/references/quick-reference.md +202 -0
  157. package/skills/semgrep-rule-creator/references/workflow.md +240 -0
  158. package/skills/semgrep-rule-variant-creator/SKILL.md +205 -0
  159. package/skills/semgrep-rule-variant-creator/references/applicability-analysis.md +250 -0
  160. package/skills/semgrep-rule-variant-creator/references/language-syntax-guide.md +324 -0
  161. package/skills/semgrep-rule-variant-creator/references/workflow.md +518 -0
  162. package/skills/sharp-edges/SKILL.md +292 -0
  163. package/skills/sharp-edges/references/auth-patterns.md +252 -0
  164. package/skills/sharp-edges/references/case-studies.md +274 -0
  165. package/skills/sharp-edges/references/config-patterns.md +333 -0
  166. package/skills/sharp-edges/references/crypto-apis.md +190 -0
  167. package/skills/sharp-edges/references/lang-c.md +205 -0
  168. package/skills/sharp-edges/references/lang-csharp.md +285 -0
  169. package/skills/sharp-edges/references/lang-go.md +270 -0
  170. package/skills/sharp-edges/references/lang-java.md +263 -0
  171. package/skills/sharp-edges/references/lang-javascript.md +269 -0
  172. package/skills/sharp-edges/references/lang-kotlin.md +265 -0
  173. package/skills/sharp-edges/references/lang-php.md +245 -0
  174. package/skills/sharp-edges/references/lang-python.md +274 -0
  175. package/skills/sharp-edges/references/lang-ruby.md +273 -0
  176. package/skills/sharp-edges/references/lang-rust.md +272 -0
  177. package/skills/sharp-edges/references/lang-swift.md +287 -0
  178. package/skills/sharp-edges/references/language-specific.md +588 -0
  179. package/skills/spec-to-code-compliance/SKILL.md +357 -0
  180. package/skills/spec-to-code-compliance/resources/COMPLETENESS_CHECKLIST.md +69 -0
  181. package/skills/spec-to-code-compliance/resources/IR_EXAMPLES.md +417 -0
  182. package/skills/spec-to-code-compliance/resources/OUTPUT_REQUIREMENTS.md +105 -0
  183. package/skills/supply-chain-risk-auditor/SKILL.md +67 -0
  184. package/skills/supply-chain-risk-auditor/resources/results-template.md +41 -0
  185. package/skills/variant-analysis/METHODOLOGY.md +327 -0
  186. package/skills/variant-analysis/SKILL.md +142 -0
  187. package/skills/variant-analysis/resources/codeql/cpp.ql +119 -0
  188. package/skills/variant-analysis/resources/codeql/go.ql +69 -0
  189. package/skills/variant-analysis/resources/codeql/java.ql +71 -0
  190. package/skills/variant-analysis/resources/codeql/javascript.ql +63 -0
  191. package/skills/variant-analysis/resources/codeql/python.ql +80 -0
  192. package/skills/variant-analysis/resources/semgrep/cpp.yaml +98 -0
  193. package/skills/variant-analysis/resources/semgrep/go.yaml +63 -0
  194. package/skills/variant-analysis/resources/semgrep/java.yaml +61 -0
  195. package/skills/variant-analysis/resources/semgrep/javascript.yaml +60 -0
  196. package/skills/variant-analysis/resources/semgrep/python.yaml +72 -0
  197. package/skills/variant-analysis/resources/variant-report-template.md +75 -0
  198. package/skills/vuln-report/SKILL.md +137 -0
  199. package/skills/vuln-report/agents/openai.yaml +4 -0
  200. package/skills/vuln-report/references/report-template.md +135 -0
  201. package/skills/wooyun-legacy/SKILL.md +367 -0
  202. package/skills/wooyun-legacy/references/bank-penetration.md +222 -0
  203. package/skills/wooyun-legacy/references/checklists/command-execution-checklist.md +119 -0
  204. package/skills/wooyun-legacy/references/checklists/csrf-checklist.md +74 -0
  205. package/skills/wooyun-legacy/references/checklists/file-upload-checklist.md +108 -0
  206. package/skills/wooyun-legacy/references/checklists/info-disclosure-checklist.md +114 -0
  207. package/skills/wooyun-legacy/references/checklists/logic-flaws-checklist.md +95 -0
  208. package/skills/wooyun-legacy/references/checklists/misconfig-checklist.md +124 -0
  209. package/skills/wooyun-legacy/references/checklists/path-traversal-checklist.md +87 -0
  210. package/skills/wooyun-legacy/references/checklists/rce-checklist.md +93 -0
  211. package/skills/wooyun-legacy/references/checklists/sql-injection-checklist.md +97 -0
  212. package/skills/wooyun-legacy/references/checklists/ssrf-checklist.md +99 -0
  213. package/skills/wooyun-legacy/references/checklists/unauthorized-access-checklist.md +89 -0
  214. package/skills/wooyun-legacy/references/checklists/weak-password-checklist.md +115 -0
  215. package/skills/wooyun-legacy/references/checklists/xss-checklist.md +103 -0
  216. package/skills/wooyun-legacy/references/checklists/xxe-checklist.md +130 -0
  217. package/skills/wooyun-legacy/references/info-disclosure.md +975 -0
  218. package/skills/wooyun-legacy/references/logic-flaws.md +721 -0
  219. package/skills/wooyun-legacy/references/path-traversal.md +1191 -0
  220. package/skills/wooyun-legacy/references/telecom-penetration.md +156 -0
  221. package/skills/wooyun-legacy/references/unauthorized-access.md +980 -0
  222. package/skills/wooyun-legacy/references/xss.md +746 -0
  223. package/skills/zeroize-audit/SKILL.md +371 -0
  224. package/skills/zeroize-audit/configs/c.yaml +21 -0
  225. package/skills/zeroize-audit/configs/default.yaml +128 -0
  226. package/skills/zeroize-audit/configs/rust.yaml +83 -0
  227. package/skills/zeroize-audit/prompts/report_template.md +238 -0
  228. package/skills/zeroize-audit/prompts/system.md +163 -0
  229. package/skills/zeroize-audit/prompts/task.md +97 -0
  230. package/skills/zeroize-audit/references/compile-commands.md +231 -0
  231. package/skills/zeroize-audit/references/detection-strategy.md +191 -0
  232. package/skills/zeroize-audit/references/ir-analysis.md +252 -0
  233. package/skills/zeroize-audit/references/mcp-analysis.md +221 -0
  234. package/skills/zeroize-audit/references/poc-generation.md +470 -0
  235. package/skills/zeroize-audit/references/rust-zeroization-patterns.md +867 -0
  236. package/skills/zeroize-audit/schemas/input.json +83 -0
  237. package/skills/zeroize-audit/schemas/output.json +140 -0
  238. package/skills/zeroize-audit/tools/analyze_asm.sh +202 -0
  239. package/skills/zeroize-audit/tools/analyze_cfg.py +381 -0
  240. package/skills/zeroize-audit/tools/analyze_heap.sh +211 -0
  241. package/skills/zeroize-audit/tools/analyze_ir_semantic.py +429 -0
  242. package/skills/zeroize-audit/tools/diff_ir.sh +135 -0
  243. package/skills/zeroize-audit/tools/diff_rust_mir.sh +189 -0
  244. package/skills/zeroize-audit/tools/emit_asm.sh +67 -0
  245. package/skills/zeroize-audit/tools/emit_ir.sh +77 -0
  246. package/skills/zeroize-audit/tools/emit_rust_asm.sh +178 -0
  247. package/skills/zeroize-audit/tools/emit_rust_ir.sh +150 -0
  248. package/skills/zeroize-audit/tools/emit_rust_mir.sh +158 -0
  249. package/skills/zeroize-audit/tools/extract_compile_flags.py +284 -0
  250. package/skills/zeroize-audit/tools/generate_poc.py +1329 -0
  251. package/skills/zeroize-audit/tools/mcp/apply_confidence_gates.py +113 -0
  252. package/skills/zeroize-audit/tools/mcp/check_mcp.sh +68 -0
  253. package/skills/zeroize-audit/tools/mcp/normalize_mcp_evidence.py +125 -0
  254. package/skills/zeroize-audit/tools/scripts/check_llvm_patterns.py +481 -0
  255. package/skills/zeroize-audit/tools/scripts/check_mir_patterns.py +554 -0
  256. package/skills/zeroize-audit/tools/scripts/check_rust_asm.py +424 -0
  257. package/skills/zeroize-audit/tools/scripts/check_rust_asm_aarch64.py +300 -0
  258. package/skills/zeroize-audit/tools/scripts/check_rust_asm_x86.py +283 -0
  259. package/skills/zeroize-audit/tools/scripts/find_dangerous_apis.py +375 -0
  260. package/skills/zeroize-audit/tools/scripts/semantic_audit.py +923 -0
  261. package/skills/zeroize-audit/tools/track_dataflow.sh +196 -0
  262. package/skills/zeroize-audit/tools/validate_rust_toolchain.sh +298 -0
  263. package/skills/zeroize-audit/workflows/phase-0-preflight.md +150 -0
  264. package/skills/zeroize-audit/workflows/phase-1-source-analysis.md +144 -0
  265. package/skills/zeroize-audit/workflows/phase-2-compiler-analysis.md +139 -0
  266. package/skills/zeroize-audit/workflows/phase-3-interim-report.md +46 -0
  267. package/skills/zeroize-audit/workflows/phase-4-poc-generation.md +46 -0
  268. package/skills/zeroize-audit/workflows/phase-5-poc-validation.md +136 -0
  269. package/skills/zeroize-audit/workflows/phase-6-final-report.md +44 -0
  270. package/skills/zeroize-audit/workflows/phase-7-test-generation.md +42 -0
  271. package/themes/piolium-srcery.json +94 -0
@@ -0,0 +1,311 @@
1
+ # Semgrep Scan Workflow
2
+
3
+ Complete 5-step scan execution process. Read from start to finish and follow each step in order.
4
+
5
+ ## Task System Enforcement
6
+
7
+ On invocation, create these tasks with dependencies:
8
+
9
+ ```
10
+ TaskCreate: "Detect languages and Pro availability" (Step 1)
11
+ TaskCreate: "Select scan mode and rulesets" (Step 2) - blockedBy: Step 1
12
+ TaskCreate: "Present plan with rulesets, get approval" (Step 3) - blockedBy: Step 2
13
+ TaskCreate: "Execute scans with approved rulesets and mode" (Step 4) - blockedBy: Step 3
14
+ TaskCreate: "Merge results and report" (Step 5) - blockedBy: Step 4
15
+ ```
16
+
17
+ ### Mandatory Gate
18
+
19
+ | Task | Gate Type | Cannot Proceed Until |
20
+ |------|-----------|---------------------|
21
+ | Step 3 | **HARD GATE** | User explicitly approves rulesets + plan |
22
+
23
+ Mark Step 3 as `completed` ONLY after user says "yes", "proceed", "approved", or equivalent.
24
+
25
+ ---
26
+
27
+ ## Step 1: Resolve Output Directory, Detect Languages and Pro Availability
28
+
29
+ > **Entry:** User has specified or confirmed the target directory.
30
+ > **Exit:** `OUTPUT_DIR` resolved and created; language list with file counts produced; Pro availability determined.
31
+
32
+ ### Resolve Output Directory
33
+
34
+ If the user specified an output directory in their prompt, use it as `OUTPUT_DIR`. Otherwise, auto-increment. In both cases, **always `mkdir -p`** to ensure the directory exists.
35
+
36
+ ```bash
37
+ if [ -n "$USER_SPECIFIED_DIR" ]; then
38
+ OUTPUT_DIR="$USER_SPECIFIED_DIR"
39
+ else
40
+ BASE="static_analysis_semgrep"
41
+ N=1
42
+ while [ -e "${BASE}_${N}" ]; do
43
+ N=$((N + 1))
44
+ done
45
+ OUTPUT_DIR="${BASE}_${N}"
46
+ fi
47
+ mkdir -p "$OUTPUT_DIR/raw" "$OUTPUT_DIR/results"
48
+ echo "Output directory: $OUTPUT_DIR"
49
+ ```
50
+
51
+ `$OUTPUT_DIR` is used by all subsequent steps. Pass its **absolute path** to scanner subagents. Scanners write raw output to `$OUTPUT_DIR/raw/`; merged/filtered results go to `$OUTPUT_DIR/results/`.
52
+
53
+ **Detect Pro availability** (requires Bash):
54
+
55
+ ```bash
56
+ if ! command -v semgrep >/dev/null 2>&1; then
57
+ echo "ERROR: semgrep is not installed. Install from https://semgrep.dev/docs/getting-started/"
58
+ exit 1
59
+ fi
60
+ semgrep --version
61
+ semgrep --pro --validate --config p/default 2>/dev/null && echo "Pro: AVAILABLE" || echo "Pro: NOT AVAILABLE"
62
+ ```
63
+
64
+ **Detect languages** using Glob (not Bash). Run these patterns against the target directory and count matches:
65
+
66
+ `**/*.py`, `**/*.js`, `**/*.ts`, `**/*.tsx`, `**/*.jsx`, `**/*.go`, `**/*.rb`, `**/*.java`, `**/*.php`, `**/*.c`, `**/*.cpp`, `**/*.rs`, `**/Dockerfile`, `**/*.tf`
67
+
68
+ Also check for framework markers: `package.json`, `pyproject.toml`, `Gemfile`, `go.mod`, `Cargo.toml`, `pom.xml`. Use Read to inspect these files for framework dependencies (e.g., read `package.json` to detect React, Express, Next.js; read `pyproject.toml` for Django, Flask, FastAPI).
69
+
70
+ Map findings to categories:
71
+
72
+ | Detection | Category |
73
+ |-----------|----------|
74
+ | `.py`, `pyproject.toml` | Python |
75
+ | `.js`, `.ts`, `package.json` | JavaScript/TypeScript |
76
+ | `.go`, `go.mod` | Go |
77
+ | `.rb`, `Gemfile` | Ruby |
78
+ | `.java`, `pom.xml` | Java |
79
+ | `.php` | PHP |
80
+ | `.c`, `.cpp` | C/C++ |
81
+ | `.rs`, `Cargo.toml` | Rust |
82
+ | `Dockerfile` | Docker |
83
+ | `.tf` | Terraform |
84
+ | k8s manifests | Kubernetes |
85
+
86
+ ---
87
+
88
+ ## Step 2: Select Scan Mode and Rulesets
89
+
90
+ > **Entry:** Step 1 complete — languages detected, Pro status known.
91
+ > **Exit:** Scan mode selected; structured rulesets JSON compiled for all detected languages.
92
+
93
+ **First, select scan mode** using `AskUserQuestion`:
94
+
95
+ ```
96
+ header: "Scan Mode"
97
+ question: "Which scan mode should be used?"
98
+ multiSelect: false
99
+ options:
100
+ - label: "Run all (Recommended)"
101
+ description: "Full coverage — all rulesets, all severity levels"
102
+ - label: "Important only"
103
+ description: "Security vulnerabilities only — medium-high confidence and impact, no code quality"
104
+ ```
105
+
106
+ Record the selected mode. It affects Steps 4 and 5.
107
+
108
+ **Then, select rulesets.** Using the detected languages and frameworks from Step 1, follow the **Ruleset Selection Algorithm** in [rulesets.md](../references/rulesets.md).
109
+
110
+ The algorithm covers:
111
+ 1. Security baseline (always included)
112
+ 2. Language-specific rulesets
113
+ 3. Framework rulesets (if detected)
114
+ 4. Infrastructure rulesets
115
+ 5. **Required** third-party rulesets (Trail of Bits, 0xdea, Decurity — NOT optional)
116
+ 6. Registry verification
117
+
118
+ **Output:** Structured JSON passed to Step 3 for user review:
119
+
120
+ ```json
121
+ {
122
+ "baseline": ["p/security-audit", "p/secrets"],
123
+ "python": ["p/python", "p/django"],
124
+ "javascript": ["p/javascript", "p/react", "p/nodejs"],
125
+ "docker": ["p/dockerfile"],
126
+ "third_party": ["https://github.com/trailofbits/semgrep-rules"]
127
+ }
128
+ ```
129
+
130
+ ---
131
+
132
+ ## Step 3: CRITICAL GATE — Present Plan and Get Approval
133
+
134
+ > **Entry:** Step 2 complete — scan mode and rulesets selected.
135
+ > **Exit:** User has explicitly approved the plan (quoted confirmation).
136
+
137
+ > **⛔ MANDATORY CHECKPOINT — DO NOT SKIP**
138
+ >
139
+ > This step requires explicit user approval before proceeding.
140
+ > User may modify rulesets before approving.
141
+
142
+ Present plan to user with **explicit ruleset listing**:
143
+
144
+ ```
145
+ ## Semgrep Scan Plan
146
+
147
+ **Target:** /path/to/codebase
148
+ **Output directory:** $OUTPUT_DIR
149
+ **Engine:** Semgrep Pro (cross-file analysis) | Semgrep OSS (single-file)
150
+ **Scan mode:** Run all | Important only (security vulns, medium-high confidence/impact)
151
+
152
+ ### Detected Languages/Technologies:
153
+ - Python (1,234 files) - Django framework detected
154
+ - JavaScript (567 files) - React detected
155
+ - Dockerfile (3 files)
156
+
157
+ ### Rulesets to Run:
158
+
159
+ **Security Baseline (always included):**
160
+ - [x] `p/security-audit` - Comprehensive security rules
161
+ - [x] `p/secrets` - Hardcoded credentials, API keys
162
+
163
+ **Python (1,234 files):**
164
+ - [x] `p/python` - Python security patterns
165
+ - [x] `p/django` - Django-specific vulnerabilities
166
+
167
+ **JavaScript (567 files):**
168
+ - [x] `p/javascript` - JavaScript security patterns
169
+ - [x] `p/react` - React-specific issues
170
+ - [x] `p/nodejs` - Node.js server-side patterns
171
+
172
+ **Docker (3 files):**
173
+ - [x] `p/dockerfile` - Dockerfile best practices
174
+
175
+ **Third-party (auto-included for detected languages):**
176
+ - [x] Trail of Bits rules - https://github.com/trailofbits/semgrep-rules
177
+
178
+ **Want to modify rulesets?** Tell me which to add or remove.
179
+ **Ready to scan?** Say "proceed" or "yes".
180
+ ```
181
+
182
+ **⛔ STOP: Await explicit user approval.**
183
+
184
+ 1. **If user wants to modify rulesets:** Add/remove as requested, re-present the updated plan, return to waiting.
185
+ 2. **Use AskUserQuestion** if user hasn't responded:
186
+ ```
187
+ "I've prepared the scan plan with N rulesets (including Trail of Bits). Proceed with scanning?"
188
+ Options: ["Yes, run scan", "Modify rulesets first"]
189
+ ```
190
+ 3. **Valid approval:** "yes", "proceed", "approved", "go ahead", "looks good", "run it"
191
+ 4. **NOT approval:** User's original request ("scan this codebase"), silence, questions about the plan
192
+
193
+ ### Pre-Scan Checklist
194
+
195
+ Before marking Step 3 complete:
196
+ - [ ] Target directory shown to user
197
+ - [ ] Engine type (Pro/OSS) displayed
198
+ - [ ] Languages detected and listed
199
+ - [ ] **All rulesets explicitly listed with checkboxes**
200
+ - [ ] User given opportunity to modify rulesets
201
+ - [ ] User explicitly approved (quote their confirmation)
202
+ - [ ] **Final ruleset list captured for Step 4**
203
+ - [ ] Agent type listed: `static-analysis:semgrep-scanner`
204
+
205
+ ### Log Approved Rulesets
206
+
207
+ After approval, write the approved rulesets to `$OUTPUT_DIR/rulesets.txt`:
208
+
209
+ ```bash
210
+ cat > "$OUTPUT_DIR/rulesets.txt" << RULESETS
211
+ # Semgrep Scan — Approved Rulesets
212
+ # Generated: $(date -Iseconds)
213
+ # Scan mode: <run-all|important-only>
214
+
215
+ ## Rulesets:
216
+ <one ruleset per line, e.g.:>
217
+ p/security-audit
218
+ p/secrets
219
+ p/python
220
+ p/django
221
+ https://github.com/trailofbits/semgrep-rules
222
+ RULESETS
223
+ ```
224
+
225
+ ---
226
+
227
+ ## Step 4: Spawn Parallel Scan Tasks
228
+
229
+ > **Entry:** Step 3 approved — user explicitly confirmed the plan.
230
+ > **Exit:** All scan Tasks completed; result files exist in `$OUTPUT_DIR/raw/`.
231
+
232
+ **Use `$OUTPUT_DIR` resolved in Step 1.** It already exists; no need to create it again. Scanners write all output to `$OUTPUT_DIR/raw/`.
233
+
234
+ **Spawn N Tasks in a SINGLE message** (one per language category) using `subagent_type: static-analysis:semgrep-scanner`.
235
+
236
+ Use the scanner task prompt template from [scanner-task-prompt.md](../references/scanner-task-prompt.md).
237
+
238
+ **Mode-dependent scanner flags:**
239
+ - **Run all**: No additional flags
240
+ - **Important only**: Add `--severity MEDIUM --severity HIGH --severity CRITICAL` to every `semgrep` command
241
+
242
+ **Example — 3 Language Scan (with approved rulesets):**
243
+
244
+ Spawn these 3 Tasks in a SINGLE message:
245
+
246
+ 1. **Task: Python Scanner** — Rulesets: p/python, p/django, p/security-audit, p/secrets, trailofbits → `$OUTPUT_DIR/raw/python-*.json`
247
+ 2. **Task: JavaScript Scanner** — Rulesets: p/javascript, p/react, p/nodejs, p/security-audit, p/secrets, trailofbits → `$OUTPUT_DIR/raw/js-*.json`
248
+ 3. **Task: Docker Scanner** — Rulesets: p/dockerfile → `$OUTPUT_DIR/raw/docker-*.json`
249
+
250
+ ### Operational Notes
251
+
252
+ - Always use **absolute paths** for `[TARGET]` — subagents can't resolve relative paths
253
+ - Clone GitHub URL rulesets into `$OUTPUT_DIR/repos/` — never pass URLs directly to `--config` (semgrep's URL handling fails on repos with non-standard YAML)
254
+ - Delete `$OUTPUT_DIR/repos/` after all scans complete
255
+ - Run rulesets in parallel with `&` and `wait`, not sequentially
256
+ - Use `--include="*.py"` for language-specific rulesets, but NOT for cross-language rulesets (p/security-audit, p/secrets, third-party repos)
257
+
258
+ ---
259
+
260
+ ## Step 5: Merge Results and Report
261
+
262
+ > **Entry:** Step 4 complete — all scan Tasks finished.
263
+ > **Exit:** `results.sarif` exists in `$OUTPUT_DIR/results/` and is valid JSON.
264
+
265
+ **Important-only mode: Post-filter before merge.** Apply the filter from [scan-modes.md](../references/scan-modes.md) ("Filter All Result Files in a Directory" section) to each result JSON in `$OUTPUT_DIR/raw/`. The filter creates `*-important.json` files alongside the originals — the originals are preserved unmodified.
266
+
267
+ **Generate merged SARIF** using the merge script. The resolved path is in SKILL.md's "Merge command" section — use that exact path:
268
+
269
+ ```bash
270
+ uv run {baseDir}/scripts/merge_sarif.py $OUTPUT_DIR/raw $OUTPUT_DIR/results/results.sarif
271
+ ```
272
+
273
+ - **Run-all mode:** The script merges all `*.sarif` files from `$OUTPUT_DIR/raw/`.
274
+ - **Important-only mode:** Run the post-filter first (creates `*-important.json` in `raw/`), then run the merge script. Raw SARIF files are unaffected by the JSON post-filter, so the merge operates on the unfiltered SARIF. For SARIF-level filtering, apply the jq post-filter from scan-modes.md to `$OUTPUT_DIR/results/results.sarif` after merge.
275
+
276
+ **Verify merged SARIF is valid:**
277
+
278
+ ```bash
279
+ python -c "import json; d=json.load(open('$OUTPUT_DIR/results/results.sarif')); print(f'{sum(len(r.get(\"results\",[]))for r in d.get(\"runs\",[]))} findings in merged SARIF')"
280
+ ```
281
+
282
+ If verification fails, the merge script produced invalid output — investigate before reporting.
283
+
284
+ **Report to user:**
285
+
286
+ ```
287
+ ## Semgrep Scan Complete
288
+
289
+ **Scanned:** 1,804 files
290
+ **Rulesets used:** 9 (including Trail of Bits)
291
+ **Total findings:** 156
292
+
293
+ ### By Severity:
294
+ - ERROR: 5
295
+ - WARNING: 18
296
+ - INFO: 9
297
+
298
+ ### By Category:
299
+ - SQL Injection: 3
300
+ - XSS: 7
301
+ - Hardcoded secrets: 2
302
+ - Insecure configuration: 12
303
+ - Code quality: 8
304
+
305
+ Results written to:
306
+ - $OUTPUT_DIR/results/results.sarif (merged SARIF)
307
+ - $OUTPUT_DIR/raw/ (per-scan raw results, unfiltered)
308
+ - $OUTPUT_DIR/rulesets.txt (approved rulesets)
309
+ ```
310
+
311
+ **Verify** before reporting: confirm `results.sarif` exists and is valid JSON.
@@ -0,0 +1,168 @@
1
+ ---
2
+ name: semgrep-rule-creator
3
+ description: Creates custom Semgrep rules for detecting security vulnerabilities, bug patterns, and code patterns. Use when writing Semgrep rules or building custom static analysis detections.
4
+ allowed-tools:
5
+ - Bash
6
+ - Read
7
+ - Write
8
+ - Edit
9
+ - Glob
10
+ - Grep
11
+ - WebFetch
12
+ ---
13
+
14
+ # Semgrep Rule Creator
15
+
16
+ Create production-quality Semgrep rules with proper testing and validation.
17
+
18
+ ## When to Use
19
+
20
+ **Ideal scenarios:**
21
+ - Writing Semgrep rules for specific bug patterns
22
+ - Writing rules to detect security vulnerabilities in your codebase
23
+ - Writing taint mode rules for data flow vulnerabilities
24
+ - Writing rules to enforce coding standards
25
+
26
+ ## When NOT to Use
27
+
28
+ Do NOT use this skill for:
29
+ - Running existing Semgrep rulesets
30
+ - General static analysis without custom rules (use `static-analysis` skill)
31
+
32
+ ## Rationalizations to Reject
33
+
34
+ When writing Semgrep rules, reject these common shortcuts:
35
+
36
+ - **"The pattern looks complete"** → Still run `semgrep --test --config <rule-id>.yaml <rule-id>.<ext>` to verify. Untested rules have hidden false positives/negatives.
37
+ - **"It matches the vulnerable case"** → Matching vulnerabilities is half the job. Verify safe cases don't match (false positives break trust).
38
+ - **"Taint mode is overkill for this"** → If data flows from user input to a dangerous sink, taint mode gives better precision than pattern matching.
39
+ - **"One test is enough"** → Include edge cases: different coding styles, sanitized inputs, safe alternatives, and boundary conditions.
40
+ - **"I'll optimize the patterns first"** → Write correct patterns first, optimize after all tests pass. Premature optimization causes regressions.
41
+ - **"The AST dump is too complex"** → The AST reveals exactly how Semgrep sees code. Skipping it leads to patterns that miss syntactic variations.
42
+
43
+ ## Anti-Patterns
44
+
45
+ **Too broad** - matches everything, useless for detection:
46
+ ```yaml
47
+ # BAD: Matches any function call
48
+ pattern: $FUNC(...)
49
+
50
+ # GOOD: Specific dangerous function
51
+ pattern: eval(...)
52
+ ```
53
+
54
+ **Missing safe cases in tests** - leads to undetected false positives:
55
+ ```python
56
+ # BAD: Only tests vulnerable case
57
+ # ruleid: my-rule
58
+ dangerous(user_input)
59
+
60
+ # GOOD: Include safe cases to verify no false positives
61
+ # ruleid: my-rule
62
+ dangerous(user_input)
63
+
64
+ # ok: my-rule
65
+ dangerous(sanitize(user_input))
66
+
67
+ # ok: my-rule
68
+ dangerous("hardcoded_safe_value")
69
+ ```
70
+
71
+ **Overly specific patterns** - misses variations:
72
+ ```yaml
73
+ # BAD: Only matches exact format
74
+ pattern: os.system("rm " + $VAR)
75
+
76
+ # GOOD: Matches all os.system calls with taint tracking
77
+ mode: taint
78
+ pattern-sinks:
79
+ - pattern: os.system(...)
80
+ ```
81
+
82
+ ## Strictness Level
83
+
84
+ This workflow is **strict** - do not skip steps:
85
+ - **Read documentation first**: See [Documentation](#documentation) before writing Semgrep rules
86
+ - **Test-first is mandatory**: Never write a rule without tests
87
+ - **100% test pass is required**: "Most tests pass" is not acceptable
88
+ - **Optimization comes last**: Only simplify patterns after all tests pass
89
+ - **Avoid generic patterns**: Rules must be specific, not match broad patterns
90
+ - **Prioritize taint mode**: For data flow vulnerabilities
91
+ - **One YAML file - one Semgrep rule**: Each YAML file must contain only one Semgrep rule; don't combine multiple rules in a single file
92
+ - **No generic rules**: When targeting a specific language for Semgrep rules - avoid generic pattern matching (`languages: generic`)
93
+ - **Forbidden `todook` and `todoruleid` test annotations**: `todoruleid: <rule-id>` and `todook: <rule-id>` annotations in tests files for future rule improvements are forbidden
94
+
95
+ ## Overview
96
+
97
+ This skill guides creation of Semgrep rules that detect security vulnerabilities and code patterns. Rules are created iteratively: analyze the problem, write tests first, analyze AST structure, write the rule, iterate until all tests pass, optimize the rule.
98
+
99
+ **Approach selection:**
100
+ - **Taint mode** (prioritize): Data flow issues where untrusted input reaches dangerous sinks
101
+ - **Pattern matching**: Simple syntactic patterns without data flow requirements
102
+
103
+ **Why prioritize taint mode?** Pattern matching finds syntax but misses context. A pattern `eval($X)` matches both `eval(user_input)` (vulnerable) and `eval("safe_literal")` (safe). Taint mode tracks data flow, so it only alerts when untrusted data actually reaches the sink—dramatically reducing false positives for injection vulnerabilities.
104
+
105
+ **Iterating between approaches:** It's okay to experiment. If you start with taint mode and it's not working well (e.g., taint doesn't propagate as expected, too many false positives/negatives), switch to pattern matching. Conversely, if pattern matching produces too many false positives on safe cases, try taint mode instead. The goal is a working rule—not rigid adherence to one approach.
106
+
107
+ **Output structure** - exactly 2 files in a directory named after the rule-id:
108
+ ```
109
+ <rule-id>/
110
+ ├── <rule-id>.yaml # Semgrep rule
111
+ └── <rule-id>.<ext> # Test file with ruleid/ok annotations
112
+ ```
113
+
114
+ ## Quick Start
115
+
116
+ ```yaml
117
+ rules:
118
+ - id: insecure-eval
119
+ languages: [python]
120
+ severity: HIGH
121
+ message: User input passed to eval() allows code execution
122
+ mode: taint
123
+ pattern-sources:
124
+ - pattern: request.args.get(...)
125
+ pattern-sinks:
126
+ - pattern: eval(...)
127
+ ```
128
+
129
+ Test file (`insecure-eval.py`):
130
+ ```python
131
+ # ruleid: insecure-eval
132
+ eval(request.args.get('code'))
133
+
134
+ # ok: insecure-eval
135
+ eval("print('safe')")
136
+ ```
137
+
138
+ Run tests (from rule directory): `semgrep --test --config <rule-id>.yaml <rule-id>.<ext>`
139
+
140
+ ## Quick Reference
141
+
142
+ - For commands, pattern operators, and taint mode syntax, see [quick-reference.md]({baseDir}/references/quick-reference.md).
143
+ - For detailed workflow and examples, you MUST see [workflow.md]({baseDir}/references/workflow.md)
144
+
145
+ ## Workflow
146
+
147
+ Copy this checklist and track progress:
148
+
149
+ ```
150
+ Semgrep Rule Progress:
151
+ - [ ] Step 1: Analyze the Problem
152
+ - [ ] Step 2: Write Tests First
153
+ - [ ] Step 3: Analyze AST structure
154
+ - [ ] Step 4: Write the rule
155
+ - [ ] Step 5: Iterate until all tests pass (semgrep --test)
156
+ - [ ] Step 6: Optimize the rule (remove redundancies, re-test)
157
+ - [ ] Step 7: Final Run
158
+ ```
159
+
160
+ ## Documentation
161
+
162
+ **REQUIRED**: Before writing any rule, use WebFetch to read **all** of these 4 links with Semgrep documentation:
163
+
164
+ 1. [Rule Syntax](https://semgrep.dev/docs/writing-rules/rule-syntax)
165
+ 2. [Pattern Syntax](https://semgrep.dev/docs/writing-rules/pattern-syntax)
166
+ 3. [ToB Testing Handbook - Semgrep](https://appsec.guide/docs/static-analysis/semgrep/advanced/)
167
+ 4. [Constant propagation](https://semgrep.dev/docs/writing-rules/data-flow/constant-propagation)
168
+ 5. [Writing Rules Index](https://github.com/semgrep/semgrep-docs/tree/main/docs/writing-rules/)
@@ -0,0 +1,202 @@
1
+ # Semgrep Rule Quick Reference
2
+
3
+ ## Required Rule Fields
4
+
5
+ ```yaml
6
+ rules:
7
+ - id: rule-id # Unique identifier (lowercase, hyphens)
8
+ languages: # Target language(s)
9
+ - python
10
+ severity: HIGH # LOW, MEDIUM, HIGH, CRITICAL (ERROR/WARNING/INFO are legacy)
11
+ message: Description # Shown when rule matches
12
+ pattern: code(...) # OR use patterns/pattern-either/mode:taint
13
+ ```
14
+
15
+ ## Pattern Operators
16
+
17
+ ### Basic Matching
18
+ ```yaml
19
+ pattern: foo(...) # Match function call
20
+ patterns: # AND - all must match
21
+ - pattern: $X
22
+ - pattern-not: safe($X)
23
+ pattern-either: # OR - any can match
24
+ - pattern: foo(...)
25
+ - pattern: bar(...)
26
+ pattern-regex: ^foo.*bar$ # PCRE2 regex matching (multiline mode)
27
+ ```
28
+
29
+ ### Metavariables
30
+ - `$VAR` - Match any single expression
31
+ - **Must be uppercase**: `$X`, `$FUNC`, `$VAR_1` (NOT `$x`, `$var`)
32
+ - `$_` - Anonymous metavariable (matches but doesn't bind)
33
+ - `$...VAR` - Match zero or more arguments (ellipsis metavariable)
34
+ - `...` - Ellipsis, match anything in between
35
+
36
+ ### Typed Metavariables
37
+
38
+ Constrain metavariables to specific types (reduces false positives):
39
+
40
+ ```yaml
41
+ # C/C++ - match only int16_t parameters
42
+ pattern: (int16_t $X)
43
+
44
+ # C/C++ - match function with typed parameter
45
+ pattern: some_func((int $ARG))
46
+
47
+ # Java - match Logger type
48
+ pattern: (java.util.logging.Logger $LOGGER).log(...)
49
+
50
+ # Go - match pointer type (uses colon syntax)
51
+ pattern: ($READER : *zip.Reader).Open($INPUT)
52
+
53
+ # TypeScript - match specific type
54
+ pattern: ($X: DomSanitizer).sanitize(...)
55
+
56
+ Use in taint mode to track only specific types as sources:
57
+ pattern-sources:
58
+ - pattern: (int $X) # Only int parameters are taint sources
59
+ - pattern: (int16_t $X) # Only int16_t parameters
60
+ - pattern: int $X = $INIT; # Local variable declarations
61
+
62
+
63
+ ### Deep Expression Matching
64
+ ```yaml
65
+ <... $EXPR ...> # Recursively match pattern in nested expressions
66
+ ```
67
+
68
+ ### Scope Operators
69
+ ```yaml
70
+ pattern-inside: | # Must be inside this pattern
71
+ def $FUNC(...):
72
+ ...
73
+ pattern-not-inside: | # Must NOT be inside this pattern
74
+ with $CTX:
75
+ ...
76
+ ```
77
+
78
+ ### Negation
79
+ ```yaml
80
+ pattern-not: safe(...) # Exclude this pattern
81
+ pattern-not-regex: ^test_ # Exclude by regex
82
+ ```
83
+
84
+ ### Metavariable Filters
85
+ ```yaml
86
+ metavariable-regex:
87
+ metavariable: $FUNC
88
+ regex: (unsafe|dangerous).*
89
+
90
+ metavariable-pattern:
91
+ metavariable: $ARG
92
+ pattern: request.$X
93
+
94
+ metavariable-comparison:
95
+ metavariable: $NUM
96
+ comparison: $NUM > 1024
97
+ ```
98
+
99
+ ### Focus
100
+ ```yaml
101
+ focus-metavariable: $TARGET # Report finding on this metavariable only
102
+ ```
103
+
104
+ ## Taint Mode
105
+
106
+ ```yaml
107
+ rules:
108
+ - id: taint-rule
109
+ mode: taint
110
+ languages: [python]
111
+ severity: HIGH
112
+ message: Tainted data reaches sink
113
+ pattern-sources:
114
+ - pattern: user_input()
115
+ - pattern: request.args.get(...)
116
+ pattern-sinks:
117
+ - pattern: eval(...)
118
+ - pattern: os.system(...)
119
+ pattern-sanitizers: # Optional
120
+ - pattern: sanitize(...)
121
+ - pattern: escape(...)
122
+ ```
123
+
124
+ ### Taint Options
125
+ ```yaml
126
+ pattern-sources:
127
+ - pattern: source(...)
128
+ exact: true # Only exact match is source (default: false)
129
+ by-side-effect: true # Taints variable by side effect
130
+
131
+ pattern-sanitizers:
132
+ - pattern: sanitize($X)
133
+ exact: true # Only exact match (default: false)
134
+ by-side-effect: true # Sanitizes by side effect
135
+
136
+ pattern-sinks:
137
+ - pattern: sink(...)
138
+ exact: false # Subexpressions also sinks (default: true)
139
+ ```
140
+
141
+ ## Test File Annotations
142
+
143
+ Only allowed annotations are `ok: rule-id` and `ok: rule-id`.
144
+
145
+ ```python
146
+ # ruleid: rule-id
147
+ vulnerable_code() # This line MUST match
148
+
149
+ # ok: rule-id
150
+ safe_code() # This line MUST NOT match
151
+ ```
152
+
153
+ DO NOT use multi-line comments for test annotations, for example:
154
+ /* ruleid: ... */
155
+
156
+ ## Debugging Commands
157
+
158
+ ```bash
159
+ # Test rules
160
+ semgrep --test --config <rule-id>.yaml <rule-id>.<ext>
161
+
162
+ # Validate YAML syntax
163
+ semgrep --validate --config <rule-id>.yaml
164
+
165
+ # Run with dataflow traces (for taint mode rules)
166
+ semgrep --dataflow-traces -f <rule-id>.yaml <rule-id>.<ext>
167
+
168
+ # Dump AST to understand code structure
169
+ semgrep --dump-ast -l <language> <rule-id>.<ext>
170
+
171
+ # Run single rule
172
+ semgrep -f <rule-id>.yaml <rule-id>.<ext>
173
+ ```
174
+
175
+ ## Troubleshooting
176
+
177
+ ### Common Pitfalls
178
+
179
+ 1. **Wrong annotation line**: `ruleid:` must be on the line IMMEDIATELY BEFORE the finding. No other text or code
180
+ 2. **Too generic patterns**: Avoid `pattern: $X` without constraints
181
+ 3. **YAML syntax errors**: Validate with `semgrep --validate`
182
+
183
+ ### Pattern Not Matching
184
+
185
+ 1. Check AST structure: `semgrep --dump-ast -l <language> <rule-id>.<ext>`
186
+ 2. Verify metavariable binding
187
+ 3. Check for whitespace/formatting differences
188
+ 4. Try more general pattern first, then narrow down
189
+
190
+ ### Taint Not Propagating
191
+
192
+ 1. Use `--dataflow-traces` to see flow
193
+ 2. Check if sanitizer is too broad
194
+ 3. Verify source pattern matches
195
+ 4. Check sink focus-metavariable
196
+
197
+ ### Too Many False Positives
198
+
199
+ 1. Add `pattern-not` for safe cases
200
+ 2. Add sanitizers for validation functions
201
+ 3. Use `pattern-inside` to limit scope
202
+ 4. Use `metavariable-regex` to filter