@vigolium/piolium 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +117 -0
- package/agents/access-auditor.md +300 -0
- package/agents/assumption-breaker.md +154 -0
- package/agents/attack-designer.md +116 -0
- package/agents/code-scanner.md +139 -0
- package/agents/concurrency-auditor.md +238 -0
- package/agents/confirm-writer.md +257 -0
- package/agents/context-reviewer.md +274 -0
- package/agents/cross-verifier.md +165 -0
- package/agents/cve-scout.md +381 -0
- package/agents/env-builder.md +282 -0
- package/agents/env-profiler.md +205 -0
- package/agents/evidence-collector.md +140 -0
- package/agents/finding-grader.md +142 -0
- package/agents/finding-writer.md +148 -0
- package/agents/flow-tracer.md +106 -0
- package/agents/goal-backtracer.md +146 -0
- package/agents/history-miner.md +467 -0
- package/agents/independent-verifier.md +118 -0
- package/agents/intent-mapper.md +183 -0
- package/agents/longshot-collector.md +128 -0
- package/agents/longshot-prober.md +126 -0
- package/agents/patch-auditor.md +73 -0
- package/agents/poc-author.md +124 -0
- package/agents/poc-runner.md +194 -0
- package/agents/probe-lead.md +269 -0
- package/agents/red-challenger.md +101 -0
- package/agents/report-composer.md +208 -0
- package/agents/review-adjudicator.md +216 -0
- package/agents/spec-auditor.md +155 -0
- package/agents/taint-tracer.md +265 -0
- package/agents/test-locator.md +209 -0
- package/agents/threat-modeler.md +132 -0
- package/agents/variant-scanner.md +108 -0
- package/agents/variant-spotter.md +110 -0
- package/bin/piolium.mjs +376 -0
- package/extensions/piolium/_vendor/yaml.bundle.d.mts +6 -0
- package/extensions/piolium/_vendor/yaml.bundle.mjs +139 -0
- package/extensions/piolium/agent-runner.ts +322 -0
- package/extensions/piolium/agents.ts +266 -0
- package/extensions/piolium/audit-state.ts +522 -0
- package/extensions/piolium/bundled-resources.ts +97 -0
- package/extensions/piolium/candidate-scan.ts +966 -0
- package/extensions/piolium/command-target.ts +177 -0
- package/extensions/piolium/console-stream.ts +57 -0
- package/extensions/piolium/export-results.ts +380 -0
- package/extensions/piolium/findings.ts +448 -0
- package/extensions/piolium/heartbeat.ts +182 -0
- package/extensions/piolium/help.ts +234 -0
- package/extensions/piolium/index.ts +1865 -0
- package/extensions/piolium/longshot.ts +530 -0
- package/extensions/piolium/matcher-suggestions.ts +196 -0
- package/extensions/piolium/matcher-utils.ts +83 -0
- package/extensions/piolium/modes/balanced.ts +750 -0
- package/extensions/piolium/modes/confirm-bootstrap.ts +186 -0
- package/extensions/piolium/modes/confirm.ts +697 -0
- package/extensions/piolium/modes/deep.ts +917 -0
- package/extensions/piolium/modes/diff.ts +177 -0
- package/extensions/piolium/modes/lite.ts +540 -0
- package/extensions/piolium/modes/longshot.ts +595 -0
- package/extensions/piolium/modes/merge.ts +204 -0
- package/extensions/piolium/modes/phase-runner.ts +267 -0
- package/extensions/piolium/modes/reinvest.ts +546 -0
- package/extensions/piolium/modes/revisit.ts +279 -0
- package/extensions/piolium/modes.ts +48 -0
- package/extensions/piolium/phase-labels.ts +123 -0
- package/extensions/piolium/phase-status-strip.ts +92 -0
- package/extensions/piolium/prompt-prefix-editor.ts +39 -0
- package/extensions/piolium/providers/anthropic-vertex.ts +836 -0
- package/extensions/piolium/recon.ts +409 -0
- package/extensions/piolium/result-stats.ts +105 -0
- package/extensions/piolium/retry.ts +120 -0
- package/extensions/piolium/scheduler.ts +212 -0
- package/extensions/piolium/secrets.ts +368 -0
- package/extensions/piolium/tools/web-tools.ts +148 -0
- package/package.json +77 -0
- package/skills/agentic-actions-auditor/SKILL.md +327 -0
- package/skills/agentic-actions-auditor/references/action-profiles.md +186 -0
- package/skills/agentic-actions-auditor/references/cross-file-resolution.md +209 -0
- package/skills/agentic-actions-auditor/references/foundations.md +94 -0
- package/skills/agentic-actions-auditor/references/vector-a-env-var-intermediary.md +77 -0
- package/skills/agentic-actions-auditor/references/vector-b-direct-expression-injection.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-c-cli-data-fetch.md +83 -0
- package/skills/agentic-actions-auditor/references/vector-d-pr-target-checkout.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-e-error-log-injection.md +88 -0
- package/skills/agentic-actions-auditor/references/vector-f-subshell-expansion.md +82 -0
- package/skills/agentic-actions-auditor/references/vector-g-eval-of-ai-output.md +91 -0
- package/skills/agentic-actions-auditor/references/vector-h-dangerous-sandbox-configs.md +102 -0
- package/skills/agentic-actions-auditor/references/vector-i-wildcard-allowlists.md +88 -0
- package/skills/audit/SKILL.md +562 -0
- package/skills/audit/assets/icon.svg +7 -0
- package/skills/audit/hooks/scripts/validate_phase_output.py +550 -0
- package/skills/audit/references/adversarial-review.md +148 -0
- package/skills/audit/references/architecture-aware-sast.md +306 -0
- package/skills/audit/references/audit-workflow.md +737 -0
- package/skills/audit/references/chamber-protocol.md +384 -0
- package/skills/audit/references/creative-attack-modes.md +221 -0
- package/skills/audit/references/deep-analysis.md +273 -0
- package/skills/audit/references/domain-attack-playbooks.md +1129 -0
- package/skills/audit/references/knowledge-base-template.md +513 -0
- package/skills/audit/references/real-env-validation.md +191 -0
- package/skills/audit/references/report-templates.md +417 -0
- package/skills/audit/references/triage-and-prereqs.md +134 -0
- package/skills/audit/scripts/consolidate_drafts.py +554 -0
- package/skills/audit/scripts/partition_findings.py +152 -0
- package/skills/audit/scripts/rg-hotspots.sh +121 -0
- package/skills/audit/scripts/stamp_file_state.py +349 -0
- package/skills/code-reviewer/SKILL.md +65 -0
- package/skills/codeql/SKILL.md +281 -0
- package/skills/codeql/references/build-fixes.md +90 -0
- package/skills/codeql/references/diagnostic-query-templates.md +339 -0
- package/skills/codeql/references/extension-yaml-format.md +209 -0
- package/skills/codeql/references/important-only-suite.md +153 -0
- package/skills/codeql/references/language-details.md +207 -0
- package/skills/codeql/references/macos-arm64e-workaround.md +179 -0
- package/skills/codeql/references/performance-tuning.md +111 -0
- package/skills/codeql/references/quality-assessment.md +172 -0
- package/skills/codeql/references/ruleset-catalog.md +63 -0
- package/skills/codeql/references/run-all-suite.md +92 -0
- package/skills/codeql/references/sarif-processing.md +79 -0
- package/skills/codeql/references/threat-models.md +51 -0
- package/skills/codeql/workflows/build-database.md +280 -0
- package/skills/codeql/workflows/create-data-extensions.md +261 -0
- package/skills/codeql/workflows/run-analysis.md +301 -0
- package/skills/differential-review/SKILL.md +220 -0
- package/skills/differential-review/adversarial.md +203 -0
- package/skills/differential-review/methodology.md +234 -0
- package/skills/differential-review/patterns.md +300 -0
- package/skills/differential-review/reporting.md +369 -0
- package/skills/fp-check/SKILL.md +125 -0
- package/skills/fp-check/references/bug-class-verification.md +114 -0
- package/skills/fp-check/references/deep-verification.md +143 -0
- package/skills/fp-check/references/evidence-templates.md +91 -0
- package/skills/fp-check/references/false-positive-patterns.md +115 -0
- package/skills/fp-check/references/gate-reviews.md +27 -0
- package/skills/fp-check/references/standard-verification.md +78 -0
- package/skills/insecure-defaults/SKILL.md +117 -0
- package/skills/insecure-defaults/references/examples.md +409 -0
- package/skills/last30days/SKILL.md +444 -0
- package/skills/sarif-parsing/SKILL.md +483 -0
- package/skills/sarif-parsing/resources/jq-queries.md +162 -0
- package/skills/sarif-parsing/resources/sarif_helpers.py +331 -0
- package/skills/security-threat-model/LICENSE.txt +201 -0
- package/skills/security-threat-model/SKILL.md +81 -0
- package/skills/security-threat-model/agents/openai.yaml +4 -0
- package/skills/security-threat-model/references/prompt-template.md +255 -0
- package/skills/security-threat-model/references/security-controls-and-assets.md +32 -0
- package/skills/semgrep/SKILL.md +212 -0
- package/skills/semgrep/references/rulesets.md +162 -0
- package/skills/semgrep/references/scan-modes.md +110 -0
- package/skills/semgrep/references/scanner-task-prompt.md +140 -0
- package/skills/semgrep/scripts/merge_sarif.py +203 -0
- package/skills/semgrep/workflows/scan-workflow.md +311 -0
- package/skills/semgrep-rule-creator/SKILL.md +168 -0
- package/skills/semgrep-rule-creator/references/quick-reference.md +202 -0
- package/skills/semgrep-rule-creator/references/workflow.md +240 -0
- package/skills/semgrep-rule-variant-creator/SKILL.md +205 -0
- package/skills/semgrep-rule-variant-creator/references/applicability-analysis.md +250 -0
- package/skills/semgrep-rule-variant-creator/references/language-syntax-guide.md +324 -0
- package/skills/semgrep-rule-variant-creator/references/workflow.md +518 -0
- package/skills/sharp-edges/SKILL.md +292 -0
- package/skills/sharp-edges/references/auth-patterns.md +252 -0
- package/skills/sharp-edges/references/case-studies.md +274 -0
- package/skills/sharp-edges/references/config-patterns.md +333 -0
- package/skills/sharp-edges/references/crypto-apis.md +190 -0
- package/skills/sharp-edges/references/lang-c.md +205 -0
- package/skills/sharp-edges/references/lang-csharp.md +285 -0
- package/skills/sharp-edges/references/lang-go.md +270 -0
- package/skills/sharp-edges/references/lang-java.md +263 -0
- package/skills/sharp-edges/references/lang-javascript.md +269 -0
- package/skills/sharp-edges/references/lang-kotlin.md +265 -0
- package/skills/sharp-edges/references/lang-php.md +245 -0
- package/skills/sharp-edges/references/lang-python.md +274 -0
- package/skills/sharp-edges/references/lang-ruby.md +273 -0
- package/skills/sharp-edges/references/lang-rust.md +272 -0
- package/skills/sharp-edges/references/lang-swift.md +287 -0
- package/skills/sharp-edges/references/language-specific.md +588 -0
- package/skills/spec-to-code-compliance/SKILL.md +357 -0
- package/skills/spec-to-code-compliance/resources/COMPLETENESS_CHECKLIST.md +69 -0
- package/skills/spec-to-code-compliance/resources/IR_EXAMPLES.md +417 -0
- package/skills/spec-to-code-compliance/resources/OUTPUT_REQUIREMENTS.md +105 -0
- package/skills/supply-chain-risk-auditor/SKILL.md +67 -0
- package/skills/supply-chain-risk-auditor/resources/results-template.md +41 -0
- package/skills/variant-analysis/METHODOLOGY.md +327 -0
- package/skills/variant-analysis/SKILL.md +142 -0
- package/skills/variant-analysis/resources/codeql/cpp.ql +119 -0
- package/skills/variant-analysis/resources/codeql/go.ql +69 -0
- package/skills/variant-analysis/resources/codeql/java.ql +71 -0
- package/skills/variant-analysis/resources/codeql/javascript.ql +63 -0
- package/skills/variant-analysis/resources/codeql/python.ql +80 -0
- package/skills/variant-analysis/resources/semgrep/cpp.yaml +98 -0
- package/skills/variant-analysis/resources/semgrep/go.yaml +63 -0
- package/skills/variant-analysis/resources/semgrep/java.yaml +61 -0
- package/skills/variant-analysis/resources/semgrep/javascript.yaml +60 -0
- package/skills/variant-analysis/resources/semgrep/python.yaml +72 -0
- package/skills/variant-analysis/resources/variant-report-template.md +75 -0
- package/skills/vuln-report/SKILL.md +137 -0
- package/skills/vuln-report/agents/openai.yaml +4 -0
- package/skills/vuln-report/references/report-template.md +135 -0
- package/skills/wooyun-legacy/SKILL.md +367 -0
- package/skills/wooyun-legacy/references/bank-penetration.md +222 -0
- package/skills/wooyun-legacy/references/checklists/command-execution-checklist.md +119 -0
- package/skills/wooyun-legacy/references/checklists/csrf-checklist.md +74 -0
- package/skills/wooyun-legacy/references/checklists/file-upload-checklist.md +108 -0
- package/skills/wooyun-legacy/references/checklists/info-disclosure-checklist.md +114 -0
- package/skills/wooyun-legacy/references/checklists/logic-flaws-checklist.md +95 -0
- package/skills/wooyun-legacy/references/checklists/misconfig-checklist.md +124 -0
- package/skills/wooyun-legacy/references/checklists/path-traversal-checklist.md +87 -0
- package/skills/wooyun-legacy/references/checklists/rce-checklist.md +93 -0
- package/skills/wooyun-legacy/references/checklists/sql-injection-checklist.md +97 -0
- package/skills/wooyun-legacy/references/checklists/ssrf-checklist.md +99 -0
- package/skills/wooyun-legacy/references/checklists/unauthorized-access-checklist.md +89 -0
- package/skills/wooyun-legacy/references/checklists/weak-password-checklist.md +115 -0
- package/skills/wooyun-legacy/references/checklists/xss-checklist.md +103 -0
- package/skills/wooyun-legacy/references/checklists/xxe-checklist.md +130 -0
- package/skills/wooyun-legacy/references/info-disclosure.md +975 -0
- package/skills/wooyun-legacy/references/logic-flaws.md +721 -0
- package/skills/wooyun-legacy/references/path-traversal.md +1191 -0
- package/skills/wooyun-legacy/references/telecom-penetration.md +156 -0
- package/skills/wooyun-legacy/references/unauthorized-access.md +980 -0
- package/skills/wooyun-legacy/references/xss.md +746 -0
- package/skills/zeroize-audit/SKILL.md +371 -0
- package/skills/zeroize-audit/configs/c.yaml +21 -0
- package/skills/zeroize-audit/configs/default.yaml +128 -0
- package/skills/zeroize-audit/configs/rust.yaml +83 -0
- package/skills/zeroize-audit/prompts/report_template.md +238 -0
- package/skills/zeroize-audit/prompts/system.md +163 -0
- package/skills/zeroize-audit/prompts/task.md +97 -0
- package/skills/zeroize-audit/references/compile-commands.md +231 -0
- package/skills/zeroize-audit/references/detection-strategy.md +191 -0
- package/skills/zeroize-audit/references/ir-analysis.md +252 -0
- package/skills/zeroize-audit/references/mcp-analysis.md +221 -0
- package/skills/zeroize-audit/references/poc-generation.md +470 -0
- package/skills/zeroize-audit/references/rust-zeroization-patterns.md +867 -0
- package/skills/zeroize-audit/schemas/input.json +83 -0
- package/skills/zeroize-audit/schemas/output.json +140 -0
- package/skills/zeroize-audit/tools/analyze_asm.sh +202 -0
- package/skills/zeroize-audit/tools/analyze_cfg.py +381 -0
- package/skills/zeroize-audit/tools/analyze_heap.sh +211 -0
- package/skills/zeroize-audit/tools/analyze_ir_semantic.py +429 -0
- package/skills/zeroize-audit/tools/diff_ir.sh +135 -0
- package/skills/zeroize-audit/tools/diff_rust_mir.sh +189 -0
- package/skills/zeroize-audit/tools/emit_asm.sh +67 -0
- package/skills/zeroize-audit/tools/emit_ir.sh +77 -0
- package/skills/zeroize-audit/tools/emit_rust_asm.sh +178 -0
- package/skills/zeroize-audit/tools/emit_rust_ir.sh +150 -0
- package/skills/zeroize-audit/tools/emit_rust_mir.sh +158 -0
- package/skills/zeroize-audit/tools/extract_compile_flags.py +284 -0
- package/skills/zeroize-audit/tools/generate_poc.py +1329 -0
- package/skills/zeroize-audit/tools/mcp/apply_confidence_gates.py +113 -0
- package/skills/zeroize-audit/tools/mcp/check_mcp.sh +68 -0
- package/skills/zeroize-audit/tools/mcp/normalize_mcp_evidence.py +125 -0
- package/skills/zeroize-audit/tools/scripts/check_llvm_patterns.py +481 -0
- package/skills/zeroize-audit/tools/scripts/check_mir_patterns.py +554 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm.py +424 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_aarch64.py +300 -0
- package/skills/zeroize-audit/tools/scripts/check_rust_asm_x86.py +283 -0
- package/skills/zeroize-audit/tools/scripts/find_dangerous_apis.py +375 -0
- package/skills/zeroize-audit/tools/scripts/semantic_audit.py +923 -0
- package/skills/zeroize-audit/tools/track_dataflow.sh +196 -0
- package/skills/zeroize-audit/tools/validate_rust_toolchain.sh +298 -0
- package/skills/zeroize-audit/workflows/phase-0-preflight.md +150 -0
- package/skills/zeroize-audit/workflows/phase-1-source-analysis.md +144 -0
- package/skills/zeroize-audit/workflows/phase-2-compiler-analysis.md +139 -0
- package/skills/zeroize-audit/workflows/phase-3-interim-report.md +46 -0
- package/skills/zeroize-audit/workflows/phase-4-poc-generation.md +46 -0
- package/skills/zeroize-audit/workflows/phase-5-poc-validation.md +136 -0
- package/skills/zeroize-audit/workflows/phase-6-final-report.md +44 -0
- package/skills/zeroize-audit/workflows/phase-7-test-generation.md +42 -0
- package/themes/piolium-srcery.json +94 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Scan Modes Reference
|
|
2
|
+
|
|
3
|
+
## Mode: Run All
|
|
4
|
+
|
|
5
|
+
Full scan with all rulesets and severity levels. Current default behavior. No filtering applied — all findings are reported and triaged.
|
|
6
|
+
|
|
7
|
+
## Mode: Important Only
|
|
8
|
+
|
|
9
|
+
Focused on high-confidence security vulnerabilities. Excludes code quality, best practices, and low-confidence audit findings.
|
|
10
|
+
|
|
11
|
+
### Pre-Filter: CLI Severity Flag
|
|
12
|
+
|
|
13
|
+
Add these flags to every `semgrep` command:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
--severity MEDIUM --severity HIGH --severity CRITICAL
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
This excludes LOW/INFO severity findings at scan time, reducing output volume before post-filtering.
|
|
20
|
+
|
|
21
|
+
### Post-Filter: Metadata Criteria
|
|
22
|
+
|
|
23
|
+
After scanning, filter each JSON result file to keep only findings matching ALL of:
|
|
24
|
+
|
|
25
|
+
| Metadata Field | Accepted Values | Rationale |
|
|
26
|
+
|---|---|---|
|
|
27
|
+
| `extra.metadata.category` | `"security"` | Excludes correctness, best-practice, maintainability, performance |
|
|
28
|
+
| `extra.metadata.confidence` | `"MEDIUM"`, `"HIGH"` | Excludes low-precision rules (high false positive rate) |
|
|
29
|
+
| `extra.metadata.impact` | `"MEDIUM"`, `"HIGH"` | Excludes low-impact informational findings |
|
|
30
|
+
|
|
31
|
+
**Third-party rules** (Trail of Bits, 0xdea, Decurity, etc.) may not have `confidence`/`impact`/`category` metadata. Findings **without** these metadata fields are **kept** — we cannot filter what is not annotated, and third-party rules are typically security-focused.
|
|
32
|
+
|
|
33
|
+
### Semgrep Metadata Background
|
|
34
|
+
|
|
35
|
+
Semgrep security rules have these metadata fields (required for `category: security` in the official registry):
|
|
36
|
+
|
|
37
|
+
| Field | Purpose | Values |
|
|
38
|
+
|---|---|---|
|
|
39
|
+
| `severity` (top-level) | Overall rule severity, derived from likelihood × impact | `LOW`, `MEDIUM`, `HIGH`, `CRITICAL` |
|
|
40
|
+
| `category` | Rule category | `security`, `correctness`, `best-practice`, `maintainability`, `performance` |
|
|
41
|
+
| `confidence` | True positive rate of the rule (precision) | `LOW`, `MEDIUM`, `HIGH` |
|
|
42
|
+
| `impact` | Potential damage if vulnerability is exploited | `LOW`, `MEDIUM`, `HIGH` |
|
|
43
|
+
| `likelihood` | How likely the vulnerability is exploitable | `LOW`, `MEDIUM`, `HIGH` |
|
|
44
|
+
| `subcategory` | Finding type | `vuln`, `audit`, `secure default` |
|
|
45
|
+
|
|
46
|
+
Key relationship: `severity = f(likelihood, impact)` while `confidence` is independent (describes rule quality, not vulnerability severity).
|
|
47
|
+
|
|
48
|
+
### Post-Filter jq Command
|
|
49
|
+
|
|
50
|
+
Apply to each JSON result file after scanning:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Filter a single result file
|
|
54
|
+
jq '{
|
|
55
|
+
results: [.results[] |
|
|
56
|
+
((.extra.metadata.category // "security") | ascii_downcase) as $cat |
|
|
57
|
+
((.extra.metadata.confidence // "HIGH") | ascii_upcase) as $conf |
|
|
58
|
+
((.extra.metadata.impact // "HIGH") | ascii_upcase) as $imp |
|
|
59
|
+
select(
|
|
60
|
+
($cat == "security") and
|
|
61
|
+
($conf == "MEDIUM" or $conf == "HIGH") and
|
|
62
|
+
($imp == "MEDIUM" or $imp == "HIGH")
|
|
63
|
+
)
|
|
64
|
+
],
|
|
65
|
+
errors: .errors,
|
|
66
|
+
paths: .paths
|
|
67
|
+
}' "$f" > "${f%.json}-important.json"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Default values (`// "security"`, `// "HIGH"`) handle third-party rules without metadata — they pass all filters by default.
|
|
71
|
+
|
|
72
|
+
### Filter All Result Files in a Directory
|
|
73
|
+
|
|
74
|
+
Raw scan output lives in `$OUTPUT_DIR/raw/`. The filter creates `*-important.json` files alongside the originals — the raw files are preserved unmodified.
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# Apply important-only filter to all scan result JSON files in raw/
|
|
78
|
+
for f in "$OUTPUT_DIR/raw"/*-*.json; do
|
|
79
|
+
[[ "$f" == *-triage.json || "$f" == *-important.json ]] && continue
|
|
80
|
+
jq '{
|
|
81
|
+
results: [.results[] |
|
|
82
|
+
((.extra.metadata.category // "security") | ascii_downcase) as $cat |
|
|
83
|
+
((.extra.metadata.confidence // "HIGH") | ascii_upcase) as $conf |
|
|
84
|
+
((.extra.metadata.impact // "HIGH") | ascii_upcase) as $imp |
|
|
85
|
+
select(
|
|
86
|
+
($cat == "security") and
|
|
87
|
+
($conf == "MEDIUM" or $conf == "HIGH") and
|
|
88
|
+
($imp == "MEDIUM" or $imp == "HIGH")
|
|
89
|
+
)
|
|
90
|
+
],
|
|
91
|
+
errors: .errors,
|
|
92
|
+
paths: .paths
|
|
93
|
+
}' "$f" > "${f%.json}-important.json"
|
|
94
|
+
BEFORE=$(jq '.results | length' "$f")
|
|
95
|
+
AFTER=$(jq '.results | length' "${f%.json}-important.json")
|
|
96
|
+
echo "$f: $BEFORE → $AFTER findings (filtered $(( BEFORE - AFTER )))"
|
|
97
|
+
done
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Scanner Task Modifications
|
|
101
|
+
|
|
102
|
+
In important-only mode, add `[SEVERITY_FLAGS]` to the scanner template:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/raw/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/raw/[lang]-[ruleset].sarif [TARGET] &
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Where `[SEVERITY_FLAGS]` is:
|
|
109
|
+
- **Run all**: *(empty)*
|
|
110
|
+
- **Important only**: `--severity MEDIUM --severity HIGH --severity CRITICAL`
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Scanner Subagent Task Prompt
|
|
2
|
+
|
|
3
|
+
Use this prompt template when spawning scanner Tasks in Step 4. Use `subagent_type: static-analysis:semgrep-scanner`.
|
|
4
|
+
|
|
5
|
+
## Template
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
You are a Semgrep scanner for [LANGUAGE_CATEGORY].
|
|
9
|
+
|
|
10
|
+
## Task
|
|
11
|
+
Run Semgrep scans for [LANGUAGE] files and save results to [OUTPUT_DIR]/raw.
|
|
12
|
+
|
|
13
|
+
## Pro Engine Status: [PRO_AVAILABLE: true/false]
|
|
14
|
+
|
|
15
|
+
## Scan Mode: [SCAN_MODE: run-all/important-only]
|
|
16
|
+
|
|
17
|
+
## APPROVED RULESETS (from user-confirmed plan)
|
|
18
|
+
[LIST EXACT RULESETS USER APPROVED - DO NOT SUBSTITUTE]
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
- p/python
|
|
22
|
+
- p/django
|
|
23
|
+
- p/security-audit
|
|
24
|
+
- p/secrets
|
|
25
|
+
- https://github.com/trailofbits/semgrep-rules
|
|
26
|
+
|
|
27
|
+
## Commands to Run (in parallel)
|
|
28
|
+
|
|
29
|
+
### Clone GitHub URL rulesets first:
|
|
30
|
+
```bash
|
|
31
|
+
mkdir -p [OUTPUT_DIR]/repos
|
|
32
|
+
# For each GitHub URL ruleset, clone into [OUTPUT_DIR]/repos/[name]:
|
|
33
|
+
git clone --depth 1 https://github.com/org/repo [OUTPUT_DIR]/repos/repo-name
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Generate commands for EACH approved ruleset:
|
|
37
|
+
```bash
|
|
38
|
+
semgrep [--pro if available] --metrics=off [SEVERITY_FLAGS] [INCLUDE_FLAGS] --config [RULESET] --json -o [OUTPUT_DIR]/raw/[lang]-[ruleset].json --sarif-output=[OUTPUT_DIR]/raw/[lang]-[ruleset].sarif [TARGET] &
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Wait for all to complete:
|
|
42
|
+
```bash
|
|
43
|
+
wait
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Clean up cloned repos:
|
|
47
|
+
```bash
|
|
48
|
+
[ -n "[OUTPUT_DIR]" ] && rm -rf [OUTPUT_DIR]/repos
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Critical Rules
|
|
52
|
+
- Use ONLY the rulesets listed above - do not add or remove any
|
|
53
|
+
- Always use --metrics=off (prevents sending telemetry to Semgrep servers)
|
|
54
|
+
- Use --pro when Pro is available (enables cross-file taint tracking)
|
|
55
|
+
- If scan mode is **important-only**, add `--severity MEDIUM --severity HIGH --severity CRITICAL` to every command
|
|
56
|
+
- If scan mode is **run-all**, do NOT add severity flags
|
|
57
|
+
- Run all rulesets in parallel with & and wait
|
|
58
|
+
- For GitHub URL rulesets, always clone into [OUTPUT_DIR]/repos/ and use the local path as --config (do NOT pass URLs directly to semgrep — its URL handling is unreliable for repos with non-standard YAML)
|
|
59
|
+
- Add `--include` flags for language-specific rulesets (e.g., `--include="*.py"` for p/python). Do NOT add `--include` to cross-language rulesets like p/security-audit, p/secrets, or third-party repos
|
|
60
|
+
- After all scans complete, delete [OUTPUT_DIR]/repos/ to avoid leaving cloned repos behind
|
|
61
|
+
|
|
62
|
+
## Output
|
|
63
|
+
Report:
|
|
64
|
+
- Number of findings per ruleset
|
|
65
|
+
- Any scan errors
|
|
66
|
+
- File paths of JSON results (in [OUTPUT_DIR]/raw/)
|
|
67
|
+
- [If Pro] Note any cross-file findings detected
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Variable Substitutions
|
|
71
|
+
|
|
72
|
+
| Variable | Description | Example |
|
|
73
|
+
|----------|-------------|---------|
|
|
74
|
+
| `[LANGUAGE_CATEGORY]` | Language group being scanned | Python, JavaScript, Docker |
|
|
75
|
+
| `[LANGUAGE]` | Specific language | Python, TypeScript, Go |
|
|
76
|
+
| `[OUTPUT_DIR]` | Output directory (absolute path, resolved in Step 1) | /path/to/static_analysis_semgrep_1 |
|
|
77
|
+
| `[PRO_AVAILABLE]` | Whether Pro engine is available | true, false |
|
|
78
|
+
| `[SEVERITY_FLAGS]` | Severity pre-filter flags | *(empty)* for run-all, `--severity MEDIUM --severity HIGH --severity CRITICAL` for important-only |
|
|
79
|
+
| `[INCLUDE_FLAGS]` | File extension filter for language-specific rulesets | `--include="*.py"` for Python rulesets, *(empty)* for cross-language rulesets like p/security-audit, p/secrets, or third-party repos |
|
|
80
|
+
| `[RULESET]` | Semgrep ruleset identifier or local clone path | p/python, [OUTPUT_DIR]/repos/semgrep-rules |
|
|
81
|
+
| `[TARGET]` | Absolute path to directory to scan | /path/to/codebase |
|
|
82
|
+
|
|
83
|
+
## Example: Python Scanner Task
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
You are a Semgrep scanner for Python.
|
|
87
|
+
|
|
88
|
+
## Task
|
|
89
|
+
Run Semgrep scans for Python files and save results to /path/to/static_analysis_semgrep_1/raw.
|
|
90
|
+
|
|
91
|
+
## Pro Engine Status: true
|
|
92
|
+
|
|
93
|
+
## Scan Mode: run-all
|
|
94
|
+
|
|
95
|
+
## APPROVED RULESETS (from user-confirmed plan)
|
|
96
|
+
- p/python
|
|
97
|
+
- p/django
|
|
98
|
+
- p/security-audit
|
|
99
|
+
- p/secrets
|
|
100
|
+
- https://github.com/trailofbits/semgrep-rules
|
|
101
|
+
|
|
102
|
+
## Commands to Run (in parallel)
|
|
103
|
+
|
|
104
|
+
### Clone GitHub URL rulesets first:
|
|
105
|
+
```bash
|
|
106
|
+
mkdir -p /path/to/static_analysis_semgrep_1/repos
|
|
107
|
+
git clone --depth 1 https://github.com/trailofbits/semgrep-rules /path/to/static_analysis_semgrep_1/repos/trailofbits
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Run scans:
|
|
111
|
+
```bash
|
|
112
|
+
semgrep --pro --metrics=off --include="*.py" --config p/python --json -o /path/to/static_analysis_semgrep_1/raw/python-python.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-python.sarif /path/to/codebase &
|
|
113
|
+
semgrep --pro --metrics=off --include="*.py" --config p/django --json -o /path/to/static_analysis_semgrep_1/raw/python-django.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-django.sarif /path/to/codebase &
|
|
114
|
+
semgrep --pro --metrics=off --config p/security-audit --json -o /path/to/static_analysis_semgrep_1/raw/python-security-audit.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-security-audit.sarif /path/to/codebase &
|
|
115
|
+
semgrep --pro --metrics=off --config p/secrets --json -o /path/to/static_analysis_semgrep_1/raw/python-secrets.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-secrets.sarif /path/to/codebase &
|
|
116
|
+
semgrep --pro --metrics=off --config /path/to/static_analysis_semgrep_1/repos/trailofbits --json -o /path/to/static_analysis_semgrep_1/raw/python-trailofbits.json --sarif-output=/path/to/static_analysis_semgrep_1/raw/python-trailofbits.sarif /path/to/codebase &
|
|
117
|
+
wait
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Clean up cloned repos:
|
|
121
|
+
```bash
|
|
122
|
+
rm -rf /path/to/static_analysis_semgrep_1/repos
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Critical Rules
|
|
126
|
+
- Use ONLY the rulesets listed above - do not add or remove any
|
|
127
|
+
- Always use --metrics=off
|
|
128
|
+
- Use --pro when Pro is available
|
|
129
|
+
- Run all rulesets in parallel with & and wait
|
|
130
|
+
- Clone GitHub URL rulesets into the output dir repos/ subfolder, use local path as --config
|
|
131
|
+
- Add --include="*.py" to language-specific rulesets (p/python, p/django) but NOT to p/security-audit, p/secrets, or third-party repos
|
|
132
|
+
- Delete repos/ after scanning
|
|
133
|
+
|
|
134
|
+
## Output
|
|
135
|
+
Report:
|
|
136
|
+
- Number of findings per ruleset
|
|
137
|
+
- Any scan errors
|
|
138
|
+
- File paths of JSON results (in raw/ subdirectory)
|
|
139
|
+
- Note any cross-file findings detected
|
|
140
|
+
```
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# requires-python = ">=3.11"
|
|
3
|
+
# dependencies = []
|
|
4
|
+
# ///
|
|
5
|
+
"""Merge SARIF files into a single consolidated output.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
uv run merge_sarif.py RAW_DIR OUTPUT_FILE
|
|
9
|
+
|
|
10
|
+
Reads *.sarif files from RAW_DIR (e.g., $OUTPUT_DIR/raw), produces
|
|
11
|
+
OUTPUT_FILE (e.g., $OUTPUT_DIR/results/results.sarif) containing all
|
|
12
|
+
findings merged and deduplicated.
|
|
13
|
+
|
|
14
|
+
Attempts to use SARIF Multitool for merging if available, falls back to
|
|
15
|
+
pure Python implementation.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import shutil
|
|
22
|
+
import subprocess
|
|
23
|
+
import sys
|
|
24
|
+
import tempfile
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def has_sarif_multitool() -> bool:
|
|
29
|
+
"""Check if SARIF Multitool is pre-installed via npx."""
|
|
30
|
+
if not shutil.which("npx"):
|
|
31
|
+
return False
|
|
32
|
+
try:
|
|
33
|
+
result = subprocess.run(
|
|
34
|
+
["npx", "--no-install", "@microsoft/sarif-multitool", "--version"],
|
|
35
|
+
capture_output=True,
|
|
36
|
+
timeout=30,
|
|
37
|
+
)
|
|
38
|
+
return result.returncode == 0
|
|
39
|
+
except subprocess.TimeoutExpired:
|
|
40
|
+
print("Warning: SARIF Multitool version check timed out", file=sys.stderr)
|
|
41
|
+
return False
|
|
42
|
+
except FileNotFoundError:
|
|
43
|
+
return False
|
|
44
|
+
except OSError as e:
|
|
45
|
+
print(f"Warning: Failed to check SARIF Multitool: {e}", file=sys.stderr)
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def merge_with_multitool(sarif_files: list[Path]) -> dict | None:
|
|
50
|
+
"""Use SARIF Multitool to merge SARIF files. Returns merged SARIF or None."""
|
|
51
|
+
if not sarif_files:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
with tempfile.NamedTemporaryFile(suffix=".sarif", delete=False) as tmp:
|
|
55
|
+
tmp_path = Path(tmp.name)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
cmd = [
|
|
59
|
+
"npx",
|
|
60
|
+
"--no-install",
|
|
61
|
+
"@microsoft/sarif-multitool",
|
|
62
|
+
"merge",
|
|
63
|
+
*[str(f) for f in sarif_files],
|
|
64
|
+
"--output-file",
|
|
65
|
+
str(tmp_path),
|
|
66
|
+
"--force",
|
|
67
|
+
]
|
|
68
|
+
result = subprocess.run(cmd, capture_output=True, timeout=120)
|
|
69
|
+
if result.returncode != 0:
|
|
70
|
+
print(f"SARIF Multitool merge failed: {result.stderr.decode()}", file=sys.stderr)
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
return json.loads(tmp_path.read_text())
|
|
74
|
+
except subprocess.TimeoutExpired as e:
|
|
75
|
+
print(f"SARIF Multitool timed out: {e}", file=sys.stderr)
|
|
76
|
+
return None
|
|
77
|
+
except json.JSONDecodeError as e:
|
|
78
|
+
print(f"SARIF Multitool produced invalid JSON: {e}", file=sys.stderr)
|
|
79
|
+
return None
|
|
80
|
+
except FileNotFoundError as e:
|
|
81
|
+
print(f"SARIF Multitool not found: {e}", file=sys.stderr)
|
|
82
|
+
return None
|
|
83
|
+
except OSError as e:
|
|
84
|
+
print(f"SARIF Multitool OS error ({type(e).__name__}): {e}", file=sys.stderr)
|
|
85
|
+
return None
|
|
86
|
+
finally:
|
|
87
|
+
tmp_path.unlink(missing_ok=True)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def merge_sarif_pure_python(sarif_files: list[Path]) -> dict:
|
|
91
|
+
"""Pure Python SARIF merge (fallback)."""
|
|
92
|
+
merged = {
|
|
93
|
+
"version": "2.1.0",
|
|
94
|
+
"$schema": "https://json.schemastore.org/sarif-2.1.0.json",
|
|
95
|
+
"runs": [],
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
seen_rules: dict[str, dict] = {}
|
|
99
|
+
all_results: list[dict] = []
|
|
100
|
+
seen_results: set[tuple[str, str, int]] = set()
|
|
101
|
+
tool_info: dict | None = None
|
|
102
|
+
skipped_files: list[str] = []
|
|
103
|
+
|
|
104
|
+
for sarif_file in sorted(sarif_files):
|
|
105
|
+
try:
|
|
106
|
+
data = json.loads(sarif_file.read_text())
|
|
107
|
+
except json.JSONDecodeError as e:
|
|
108
|
+
print(f"Warning: Failed to parse {sarif_file}: {e}", file=sys.stderr)
|
|
109
|
+
skipped_files.append(str(sarif_file))
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
for run in data.get("runs", []):
|
|
113
|
+
if tool_info is None and run.get("tool"):
|
|
114
|
+
tool_info = run["tool"]
|
|
115
|
+
|
|
116
|
+
driver = run.get("tool", {}).get("driver", {})
|
|
117
|
+
for rule in driver.get("rules", []):
|
|
118
|
+
rule_id = rule.get("id", "")
|
|
119
|
+
if rule_id and rule_id not in seen_rules:
|
|
120
|
+
seen_rules[rule_id] = rule
|
|
121
|
+
|
|
122
|
+
for result in run.get("results", []):
|
|
123
|
+
rule_id = result.get("ruleId", "")
|
|
124
|
+
uri = ""
|
|
125
|
+
start_line = 0
|
|
126
|
+
locations = result.get("locations", [])
|
|
127
|
+
if locations:
|
|
128
|
+
phys = locations[0].get("physicalLocation", {})
|
|
129
|
+
uri = phys.get("artifactLocation", {}).get("uri", "")
|
|
130
|
+
start_line = phys.get("region", {}).get("startLine", 0)
|
|
131
|
+
dedup_key = (rule_id, uri, start_line)
|
|
132
|
+
if dedup_key in seen_results:
|
|
133
|
+
continue
|
|
134
|
+
seen_results.add(dedup_key)
|
|
135
|
+
all_results.append(result)
|
|
136
|
+
|
|
137
|
+
if all_results:
|
|
138
|
+
merged_run = {
|
|
139
|
+
"tool": tool_info or {"driver": {"name": "semgrep", "rules": []}},
|
|
140
|
+
"results": all_results,
|
|
141
|
+
}
|
|
142
|
+
merged_run["tool"]["driver"]["rules"] = list(seen_rules.values())
|
|
143
|
+
merged["runs"].append(merged_run)
|
|
144
|
+
|
|
145
|
+
if skipped_files:
|
|
146
|
+
print(
|
|
147
|
+
f"WARNING: {len(skipped_files)} of {len(sarif_files)} SARIF files "
|
|
148
|
+
f"could not be parsed. Results may be incomplete.",
|
|
149
|
+
file=sys.stderr,
|
|
150
|
+
)
|
|
151
|
+
for sf in skipped_files:
|
|
152
|
+
print(f" Skipped: {sf}", file=sys.stderr)
|
|
153
|
+
|
|
154
|
+
return merged
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def main() -> int:
|
|
158
|
+
if len(sys.argv) != 3:
|
|
159
|
+
print(f"Usage: {sys.argv[0]} RAW_DIR OUTPUT_FILE", file=sys.stderr)
|
|
160
|
+
return 1
|
|
161
|
+
|
|
162
|
+
raw_dir = Path(sys.argv[1])
|
|
163
|
+
output_file = Path(sys.argv[2])
|
|
164
|
+
|
|
165
|
+
if not raw_dir.is_dir():
|
|
166
|
+
print(f"Error: {raw_dir} is not a directory", file=sys.stderr)
|
|
167
|
+
return 1
|
|
168
|
+
|
|
169
|
+
# Collect SARIF files from raw directory only
|
|
170
|
+
sarif_files = sorted(raw_dir.glob("*.sarif"))
|
|
171
|
+
print(f"Found {len(sarif_files)} SARIF files to merge in {raw_dir}")
|
|
172
|
+
|
|
173
|
+
if not sarif_files:
|
|
174
|
+
print("No SARIF files found, nothing to merge", file=sys.stderr)
|
|
175
|
+
return 1
|
|
176
|
+
|
|
177
|
+
# Ensure output directory exists
|
|
178
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
179
|
+
|
|
180
|
+
# Try SARIF Multitool first, fall back to pure Python
|
|
181
|
+
merged: dict | None = None
|
|
182
|
+
if has_sarif_multitool():
|
|
183
|
+
print("Using SARIF Multitool for merge...")
|
|
184
|
+
merged = merge_with_multitool(sarif_files)
|
|
185
|
+
if merged:
|
|
186
|
+
print("SARIF Multitool merge successful")
|
|
187
|
+
|
|
188
|
+
if merged is None:
|
|
189
|
+
print("Using pure Python merge (SARIF Multitool not available or failed)")
|
|
190
|
+
merged = merge_sarif_pure_python(sarif_files)
|
|
191
|
+
|
|
192
|
+
result_count = sum(len(run.get("results", [])) for run in merged.get("runs", []))
|
|
193
|
+
print(f"Merged SARIF contains {result_count} findings")
|
|
194
|
+
|
|
195
|
+
# Write output
|
|
196
|
+
output_file.write_text(json.dumps(merged, indent=2))
|
|
197
|
+
print(f"Written to {output_file}")
|
|
198
|
+
|
|
199
|
+
return 0
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
if __name__ == "__main__":
|
|
203
|
+
sys.exit(main())
|