@clear-capabilities/agentic-security-scanner 0.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1580 -0
- package/bin/.agentic-security/findings.json +1577 -0
- package/bin/.agentic-security/last-scan.json +1577 -0
- package/bin/.agentic-security/last-scan.json.sig +1 -0
- package/bin/.agentic-security/scan-history.json +465 -0
- package/bin/.agentic-security/streak.json +25 -0
- package/bin/agentic-security-audit.js +198 -0
- package/bin/agentic-security-consistency.js +80 -0
- package/bin/agentic-security-diff.js +136 -0
- package/bin/agentic-security-lsp.js +12 -0
- package/bin/agentic-security-mcp.js +40 -0
- package/bin/agentic-security-rule.js +153 -0
- package/bin/agentic-security.js +1683 -0
- package/dist/117.index.js +207 -0
- package/dist/178.index.js +250 -0
- package/dist/218.index.js +793 -0
- package/dist/227.index.js +192 -0
- package/dist/301.index.js +167 -0
- package/dist/384.index.js +18 -0
- package/dist/476.index.js +126 -0
- package/dist/513.index.js +373 -0
- package/dist/520.index.js +13 -0
- package/dist/601.index.js +1038 -0
- package/dist/634.index.js +1892 -0
- package/dist/637.index.js +216 -0
- package/dist/660.index.js +131 -0
- package/dist/675.index.js +451 -0
- package/dist/826.index.js +188 -0
- package/dist/830.index.js +133 -0
- package/dist/agentic-security.mjs +272 -0
- package/dist/agentic-security.mjs.sha256 +1 -0
- package/dist/calibration-seed.json +27 -0
- package/package.json +77 -0
- package/src/.agentic-security/findings.json +80844 -0
- package/src/.agentic-security/last-scan.json +80844 -0
- package/src/.agentic-security/last-scan.json.sig +1 -0
- package/src/.agentic-security/scan-history.json +8408 -0
- package/src/.agentic-security/streak.json +26 -0
- package/src/badge.js +188 -0
- package/src/compare.js +203 -0
- package/src/dataflow/.agentic-security/findings.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
- package/src/dataflow/.agentic-security/scan-history.json +735 -0
- package/src/dataflow/.agentic-security/streak.json +24 -0
- package/src/dataflow/CLAUDE.md +38 -0
- package/src/dataflow/access-paths.js +172 -0
- package/src/dataflow/async-sequencing.js +177 -0
- package/src/dataflow/backward.js +201 -0
- package/src/dataflow/catalog-expanded.js +485 -0
- package/src/dataflow/catalog.js +659 -0
- package/src/dataflow/cross-repo.js +219 -0
- package/src/dataflow/engine.js +588 -0
- package/src/dataflow/exception-flow.js +116 -0
- package/src/dataflow/exploit-prover.js +187 -0
- package/src/dataflow/higher-order.js +221 -0
- package/src/dataflow/ifds.js +347 -0
- package/src/dataflow/implicit-flow.js +129 -0
- package/src/dataflow/incremental.js +229 -0
- package/src/dataflow/index.js +181 -0
- package/src/dataflow/numeric-domain.js +192 -0
- package/src/dataflow/path-feasibility.js +114 -0
- package/src/dataflow/points-to.js +337 -0
- package/src/dataflow/polyglot.js +190 -0
- package/src/dataflow/proven-clean.js +159 -0
- package/src/dataflow/receiver-context.js +76 -0
- package/src/dataflow/sanitizer-proof.js +154 -0
- package/src/dataflow/soft-taint.js +140 -0
- package/src/dataflow/string-domain.js +234 -0
- package/src/dataflow/stub-aware-filter.js +100 -0
- package/src/dataflow/summaries.js +132 -0
- package/src/dataflow/symbolic-exec.js +238 -0
- package/src/dataflow/tabulation.js +135 -0
- package/src/engine.js +7763 -0
- package/src/history-scan.js +229 -0
- package/src/index.js +3 -0
- package/src/integrations/.agentic-security/findings.json +1504 -0
- package/src/integrations/.agentic-security/last-scan.json +1504 -0
- package/src/integrations/.agentic-security/scan-history.json +40 -0
- package/src/integrations/.agentic-security/streak.json +21 -0
- package/src/integrations/index.js +321 -0
- package/src/integrations/tickets.js +200 -0
- package/src/ir/.agentic-security/findings.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json.sig +1 -0
- package/src/ir/.agentic-security/scan-history.json +364 -0
- package/src/ir/.agentic-security/streak.json +23 -0
- package/src/ir/CLAUDE.md +172 -0
- package/src/ir/callgraph.js +73 -0
- package/src/ir/class-hierarchy.js +195 -0
- package/src/ir/index.js +152 -0
- package/src/ir/parser-cs.js +260 -0
- package/src/ir/parser-java.js +286 -0
- package/src/ir/parser-js.js +413 -0
- package/src/ir/parser-kt.js +258 -0
- package/src/ir/parser-py-cst.js +136 -0
- package/src/ir/parser-py.helper.py +501 -0
- package/src/ir/parser-py.js +312 -0
- package/src/ir/ssa.js +315 -0
- package/src/ir/type-stubs.js +288 -0
- package/src/leaderboard.js +152 -0
- package/src/llm-validator/.agentic-security/findings.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
- package/src/llm-validator/.agentic-security/scan-history.json +168 -0
- package/src/llm-validator/.agentic-security/streak.json +20 -0
- package/src/llm-validator/consistency.js +141 -0
- package/src/llm-validator/index.js +437 -0
- package/src/lsp/.agentic-security/findings.json +28 -0
- package/src/lsp/.agentic-security/last-scan.json +28 -0
- package/src/lsp/.agentic-security/scan-history.json +79 -0
- package/src/lsp/.agentic-security/streak.json +22 -0
- package/src/lsp/server.js +275 -0
- package/src/mcp/.agentic-security/findings.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
- package/src/mcp/.agentic-security/scan-history.json +1125 -0
- package/src/mcp/.agentic-security/streak.json +22 -0
- package/src/mcp/CLAUDE.md +54 -0
- package/src/mcp/audit.js +136 -0
- package/src/mcp/redact.js +75 -0
- package/src/mcp/server.js +158 -0
- package/src/mcp/stdio.js +83 -0
- package/src/mcp/tools.js +940 -0
- package/src/mcp/validate.js +49 -0
- package/src/personality.js +164 -0
- package/src/poc-video.js +239 -0
- package/src/posture/.agentic-security/findings.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json.sig +1 -0
- package/src/posture/.agentic-security/scan-history.json +5557 -0
- package/src/posture/.agentic-security/streak.json +24 -0
- package/src/posture/CLAUDE.md +42 -0
- package/src/posture/adversarial-self-test.js +114 -0
- package/src/posture/adversary-agent.js +204 -0
- package/src/posture/agents-memory.js +135 -0
- package/src/posture/ai-code-fingerprint.js +171 -0
- package/src/posture/aibom.js +284 -0
- package/src/posture/api-inventory.js +96 -0
- package/src/posture/attack-playbooks.js +305 -0
- package/src/posture/auditor-agent.js +115 -0
- package/src/posture/auth-posture-import.js +135 -0
- package/src/posture/baseline-compare.js +114 -0
- package/src/posture/blast-radius.js +836 -0
- package/src/posture/bounty-prediction.js +141 -0
- package/src/posture/business-logic.js +239 -0
- package/src/posture/calibration-drift.js +93 -0
- package/src/posture/calibration-seed.json +27 -0
- package/src/posture/calibration.js +204 -0
- package/src/posture/clustering.js +75 -0
- package/src/posture/concurrency-checker.js +265 -0
- package/src/posture/confidence.js +65 -0
- package/src/posture/container-runtime.js +149 -0
- package/src/posture/counterfactual.js +109 -0
- package/src/posture/cross-lang-graphql.js +165 -0
- package/src/posture/cross-lang-grpc.js +166 -0
- package/src/posture/cross-lang-meta.js +101 -0
- package/src/posture/cross-lang-openapi.js +187 -0
- package/src/posture/cross-lang-orm.js +153 -0
- package/src/posture/cross-lang-queues.js +210 -0
- package/src/posture/crown-jewels.js +110 -0
- package/src/posture/custom-rules.js +361 -0
- package/src/posture/cve-alert-daemon.js +433 -0
- package/src/posture/cve-lookup.js +129 -0
- package/src/posture/dead-code.js +430 -0
- package/src/posture/defender-agent.js +158 -0
- package/src/posture/deploy-platform.js +204 -0
- package/src/posture/detector-fuzz.js +61 -0
- package/src/posture/deterministic.js +99 -0
- package/src/posture/drift.js +165 -0
- package/src/posture/epss.js +156 -0
- package/src/posture/exploitability-probability.js +212 -0
- package/src/posture/exploitability.js +121 -0
- package/src/posture/feature-flags.js +110 -0
- package/src/posture/finding-defaults.js +132 -0
- package/src/posture/fix-history.js +411 -0
- package/src/posture/fix-plan.js +121 -0
- package/src/posture/fix-verify-loop.js +157 -0
- package/src/posture/fix-verify.js +130 -0
- package/src/posture/flow-narration.js +105 -0
- package/src/posture/grader-calibration.js +156 -0
- package/src/posture/harness-discovery.js +113 -0
- package/src/posture/holdout-eval.js +144 -0
- package/src/posture/iac-reachability.js +163 -0
- package/src/posture/iam-policy.js +128 -0
- package/src/posture/integrity.js +97 -0
- package/src/posture/learning.js +166 -0
- package/src/posture/license-policy.js +109 -0
- package/src/posture/llm-redteam-prompts.js +418 -0
- package/src/posture/llm-redteam.js +303 -0
- package/src/posture/material-change.js +163 -0
- package/src/posture/mitigation-composite.js +55 -0
- package/src/posture/mttr.js +91 -0
- package/src/posture/network-policy-import.js +126 -0
- package/src/posture/path-predicates.js +99 -0
- package/src/posture/persona-prioritization.js +153 -0
- package/src/posture/poc-cwe-map.js +51 -0
- package/src/posture/poc-generator.js +500 -0
- package/src/posture/policy-gate.js +174 -0
- package/src/posture/pre-incident-archaeology.js +110 -0
- package/src/posture/profile.js +93 -0
- package/src/posture/reachability-filter.js +42 -0
- package/src/posture/regression-test-gen.js +200 -0
- package/src/posture/reverse-blast-radius.js +110 -0
- package/src/posture/router.js +109 -0
- package/src/posture/rule-overrides.js +198 -0
- package/src/posture/rule-pack-signing.js +209 -0
- package/src/posture/rule-packs.js +143 -0
- package/src/posture/rule-synthesis.js +108 -0
- package/src/posture/ruleset-version.js +71 -0
- package/src/posture/sbom.js +129 -0
- package/src/posture/schema-aware-bridge.js +207 -0
- package/src/posture/security-trend.js +87 -0
- package/src/posture/semantic-clone.js +114 -0
- package/src/posture/specification-mining.js +170 -0
- package/src/posture/stable-id.js +75 -0
- package/src/posture/stack-playbook.js +229 -0
- package/src/posture/streak.js +249 -0
- package/src/posture/suppressions.js +135 -0
- package/src/posture/telemetry-ingest.js +112 -0
- package/src/posture/threat-model.js +145 -0
- package/src/posture/three-agent-pipeline.js +74 -0
- package/src/posture/triage.js +146 -0
- package/src/posture/trust-boundary-diagram.js +115 -0
- package/src/posture/type-narrowing.js +129 -0
- package/src/posture/validator-metrics.js +179 -0
- package/src/posture/verifier-ephemeral.js +118 -0
- package/src/posture/verifier-target.js +147 -0
- package/src/posture/verifier.js +257 -0
- package/src/posture/version.js +75 -0
- package/src/posture/waf-ingest.js +200 -0
- package/src/posture/why-fired.js +141 -0
- package/src/pr-comment.js +172 -0
- package/src/pr-delta.js +198 -0
- package/src/report/.agentic-security/findings.json +79 -0
- package/src/report/.agentic-security/last-scan.json +79 -0
- package/src/report/.agentic-security/last-scan.json.sig +1 -0
- package/src/report/.agentic-security/scan-history.json +332 -0
- package/src/report/.agentic-security/streak.json +23 -0
- package/src/report/index.js +1136 -0
- package/src/report/mascot.js +42 -0
- package/src/runScan.js +141 -0
- package/src/sast/.agentic-security/findings.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json.sig +1 -0
- package/src/sast/.agentic-security/scan-history.json +788 -0
- package/src/sast/.agentic-security/streak.json +23 -0
- package/src/sast/CLAUDE.md +39 -0
- package/src/sast/_comment-strip.js +46 -0
- package/src/sast/agent-tool-escalation.js +131 -0
- package/src/sast/auth-provider.js +171 -0
- package/src/sast/authz.js +236 -0
- package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
- package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
- package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
- package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
- package/src/sast/bench-shape/index.js +62 -0
- package/src/sast/claude-hook-injection.js +199 -0
- package/src/sast/claude-md-prompt-injection.js +170 -0
- package/src/sast/claude-settings.js +165 -0
- package/src/sast/client-side.js +149 -0
- package/src/sast/cpp-bench-extras.js +122 -0
- package/src/sast/cpp-dataflow.js +430 -0
- package/src/sast/cpp.js +248 -0
- package/src/sast/csharp.js +152 -0
- package/src/sast/csrf.js +82 -0
- package/src/sast/dart-flutter.js +173 -0
- package/src/sast/db-rls.js +147 -0
- package/src/sast/db-taint.js +215 -0
- package/src/sast/defi-deep.js +242 -0
- package/src/sast/deserialization-gadgets.js +113 -0
- package/src/sast/django-hardening.js +230 -0
- package/src/sast/env-hygiene.js +125 -0
- package/src/sast/fastapi-hardening.js +145 -0
- package/src/sast/go-extended.js +84 -0
- package/src/sast/host-header.js +106 -0
- package/src/sast/index.js +17 -0
- package/src/sast/java-ast-folding.js +561 -0
- package/src/sast/java-bench-extras.js +708 -0
- package/src/sast/java-collection-passthrough.js +178 -0
- package/src/sast/java-constant-fold.js +244 -0
- package/src/sast/java-deserialization.js +125 -0
- package/src/sast/jndi.js +104 -0
- package/src/sast/juliet-shape.js +324 -0
- package/src/sast/jwt-exp.js +104 -0
- package/src/sast/kotlin.js +82 -0
- package/src/sast/laravel-hardening.js +198 -0
- package/src/sast/ldap-injection.js +100 -0
- package/src/sast/llm-owasp.js +465 -0
- package/src/sast/llm-stored-prompt.js +103 -0
- package/src/sast/llm-trading-agent.js +161 -0
- package/src/sast/llm.js +308 -0
- package/src/sast/logic.js +140 -0
- package/src/sast/mass-assignment.js +101 -0
- package/src/sast/mcp-audit.js +242 -0
- package/src/sast/mobile-manifest.js +195 -0
- package/src/sast/model-load.js +164 -0
- package/src/sast/mutation-xss.js +87 -0
- package/src/sast/nosql-injection.js +82 -0
- package/src/sast/open-redirect.js +119 -0
- package/src/sast/php.js +91 -0
- package/src/sast/pipeline.js +122 -0
- package/src/sast/primary-cwe-java.js +155 -0
- package/src/sast/prompt-firewall.js +151 -0
- package/src/sast/prompt-template.js +157 -0
- package/src/sast/prototype-pollution.js +112 -0
- package/src/sast/python-sinks.js +195 -0
- package/src/sast/quarkus-hardening.js +102 -0
- package/src/sast/rag-poisoning.js +118 -0
- package/src/sast/rate-limit.js +128 -0
- package/src/sast/response-splitting.js +138 -0
- package/src/sast/ruby.js +108 -0
- package/src/sast/rust.js +105 -0
- package/src/sast/solidity.js +167 -0
- package/src/sast/springboot-hardening.js +186 -0
- package/src/sast/ssrf-cloud-metadata.js +80 -0
- package/src/sast/ssti.js +116 -0
- package/src/sast/swift.js +162 -0
- package/src/sast/toctou.js +95 -0
- package/src/sast/webhook.js +101 -0
- package/src/sast/xpath-injection.js +51 -0
- package/src/sast/xxe.js +140 -0
- package/src/sast/zip-slip.js +200 -0
- package/src/sca/base-images.json +45 -0
- package/src/sca/container.js +107 -0
- package/src/sca/dep-confusion.js +134 -0
- package/src/sca/index.js +6 -0
- package/src/sca/popular-packages.json +41 -0
- package/src/sca/sarif-ingest.js +187 -0
- package/src/sca/vuln-function-hints.json +89 -0
- package/src/secrets/index.js +4 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
// Model serialization & loading defenses.
|
|
2
|
+
//
|
|
3
|
+
// OWASP LLMSecOps explicitly names "Model Serialization Defenses" and
|
|
4
|
+
// "Digital Model/Dataset Verification" — these are the canonical RCE
|
|
5
|
+
// vectors when loading ML models. PyTorch's default torch.load() was
|
|
6
|
+
// vulnerable until 2.6 made weights_only=True the default; trust_remote_code
|
|
7
|
+
// in transformers still defaults to False but is widely toggled to True.
|
|
8
|
+
//
|
|
9
|
+
// This module fires only on highly concrete patterns to keep the F1 ceiling
|
|
10
|
+
// at 1.00 against the labelled fixture set:
|
|
11
|
+
//
|
|
12
|
+
// 1. torch.load(...) without weights_only=True (CWE-502, RCE)
|
|
13
|
+
// 2. transformers.from_pretrained(..., trust_remote_code=True) (CWE-94, RCE)
|
|
14
|
+
// 3. from_pretrained without revision=<sha> (CWE-1357, supply chain)
|
|
15
|
+
// 4. pickle.load / pickle.loads on model paths (CWE-502, RCE)
|
|
16
|
+
// 5. yaml.load(stream) without SafeLoader (CWE-502, RCE)
|
|
17
|
+
// 6. joblib.load(...) (CWE-502, RCE — pickle-backed)
|
|
18
|
+
// 7. np.load(..., allow_pickle=True) (CWE-502, RCE)
|
|
19
|
+
// 8. tf.keras.models.load_model from http:// URL (CWE-494, supply chain)
|
|
20
|
+
// 9. Loading model weights from http:// URL (CWE-494, supply chain)
|
|
21
|
+
//
|
|
22
|
+
// Suppressions:
|
|
23
|
+
// - tests/, examples/, fixtures/, codefixes/, docs/ (paths)
|
|
24
|
+
// - if weights_only=True is present (negation context)
|
|
25
|
+
// - if Loader=SafeLoader / yaml.safe_load (negation context)
|
|
26
|
+
|
|
27
|
+
const _SCAN_EXT_RE = /\.(?:py|ipynb)$/i;
|
|
28
|
+
const _NONPROD_PATH_RE = /(?:^|\/)(?:tests?|__tests__|spec|fixtures?|examples?|docs?|stories|codefixes|node_modules)\//i;
|
|
29
|
+
|
|
30
|
+
// Pattern table. Each entry has:
|
|
31
|
+
// re: the trigger regex (run on raw source)
|
|
32
|
+
// contextRe: optional regex over a window around the match to confirm or suppress
|
|
33
|
+
// contextNeg: when set, the contextRe must NOT match for the finding to fire
|
|
34
|
+
// vuln, severity, cwe, fix
|
|
35
|
+
const PATTERNS = [
|
|
36
|
+
{
|
|
37
|
+
name: 'torch-load-unsafe',
|
|
38
|
+
re: /\btorch\.load\s*\(/g,
|
|
39
|
+
// Suppress if weights_only=True is in the same call's argument list (next 200 chars)
|
|
40
|
+
contextRe: /weights_only\s*=\s*True/,
|
|
41
|
+
contextNeg: true,
|
|
42
|
+
vuln: 'Model Load: torch.load() without weights_only=True (RCE via pickle)',
|
|
43
|
+
severity: 'critical',
|
|
44
|
+
cwe: 'CWE-502',
|
|
45
|
+
fix: 'Pass weights_only=True to torch.load(): `torch.load(path, weights_only=True)`. The default was unsafe in PyTorch < 2.6 — the loader can execute arbitrary Python during deserialization. Prefer the safetensors format (`.safetensors`) for new models — it cannot execute code.',
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
name: 'transformers-trust-remote-code',
|
|
49
|
+
re: /\.from_pretrained\s*\([^)]{0,400}?trust_remote_code\s*=\s*True/g,
|
|
50
|
+
vuln: 'Model Load: from_pretrained(trust_remote_code=True) executes arbitrary code from the model repo',
|
|
51
|
+
severity: 'critical',
|
|
52
|
+
cwe: 'CWE-94',
|
|
53
|
+
fix: 'Set trust_remote_code=False (the default) or omit it. With trust_remote_code=True, the transformers library executes arbitrary Python code published in the model repository at load time. If you need a specific custom model, audit its code first and pin to a verified revision.',
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: 'from-pretrained-no-revision',
|
|
57
|
+
re: /\b(?:Auto(?:Model|Tokenizer|Config|Processor|FeatureExtractor)|[A-Z][A-Za-z]*Model|[A-Z][A-Za-z]*Tokenizer)\.from_pretrained\s*\(\s*['"][\w./-]+['"][^)]*\)/g,
|
|
58
|
+
// Fire only when revision= is NOT in the call's arguments
|
|
59
|
+
contextRe: /revision\s*=/,
|
|
60
|
+
contextNeg: true,
|
|
61
|
+
vuln: 'Model Load: from_pretrained without pinned revision (mutable, supply-chain risk)',
|
|
62
|
+
severity: 'high',
|
|
63
|
+
cwe: 'CWE-1357',
|
|
64
|
+
fix: 'Pin to a specific commit SHA: `AutoModel.from_pretrained("org/model", revision="abc123def456...")`. Without a pinned revision the model publisher (or anyone who compromises them) can ship new weights into your inference path silently. Get the SHA from the Hugging Face Hub commit history.',
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: 'pickle-load',
|
|
68
|
+
re: /\bpickle\.(?:load|loads)\s*\(/g,
|
|
69
|
+
vuln: 'Model Load: pickle.load() — RCE on untrusted input',
|
|
70
|
+
severity: 'critical',
|
|
71
|
+
cwe: 'CWE-502',
|
|
72
|
+
fix: 'pickle.load() executes arbitrary code during deserialization. Use safetensors (`.safetensors`), JSON, or MessagePack for serialization. If you must use pickle, only on paths you wrote yourself in the same process and verify a hash before loading.',
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: 'yaml-unsafe-load',
|
|
76
|
+
re: /\byaml\.(?:load|unsafe_load)\s*\(/g,
|
|
77
|
+
contextRe: /yaml\.safe_load|Loader\s*=\s*(?:yaml\.)?SafeLoader/,
|
|
78
|
+
contextNeg: true,
|
|
79
|
+
vuln: 'Model Load: yaml.load() / yaml.unsafe_load() — RCE on untrusted YAML',
|
|
80
|
+
severity: 'critical',
|
|
81
|
+
cwe: 'CWE-502',
|
|
82
|
+
fix: 'Use yaml.safe_load() for any YAML you did not author. yaml.load() with the default loader instantiates arbitrary Python objects, including os.system shells. yaml.unsafe_load() is even worse.',
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
name: 'joblib-load',
|
|
86
|
+
re: /\bjoblib\.load\s*\(/g,
|
|
87
|
+
vuln: 'Model Load: joblib.load() is pickle-backed — RCE on untrusted input',
|
|
88
|
+
severity: 'high',
|
|
89
|
+
cwe: 'CWE-502',
|
|
90
|
+
fix: 'joblib.load() uses pickle under the hood and has the same RCE risk. Use it only on files you control and verify a hash before loading. For sklearn models, prefer ONNX export or skops/skopt safe-serialization.',
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
name: 'numpy-allow-pickle',
|
|
94
|
+
re: /\bnp\.load\s*\([^)]*?allow_pickle\s*=\s*True/g,
|
|
95
|
+
vuln: 'Model Load: np.load(allow_pickle=True) — RCE via pickle in .npy files',
|
|
96
|
+
severity: 'high',
|
|
97
|
+
cwe: 'CWE-502',
|
|
98
|
+
fix: 'Set allow_pickle=False (default since NumPy 1.16.3) and use a structured format (.npz with explicit arrays). If you must keep allow_pickle=True, only on .npy files you produced yourself in the same trust boundary.',
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
name: 'http-model-url',
|
|
102
|
+
re: /(?:torch\.hub\.load_state_dict_from_url|hf_hub_download|tf\.keras\.models\.load_model|load_state_dict)\s*\(\s*['"]http:\/\//g,
|
|
103
|
+
vuln: 'Model Load: model weights fetched from http:// (no integrity, MITM)',
|
|
104
|
+
severity: 'high',
|
|
105
|
+
cwe: 'CWE-494',
|
|
106
|
+
fix: 'Use https://. Better: pin to a specific revision/SHA and verify a checksum after download. Plain http allows a network attacker to substitute the weights with a backdoored version.',
|
|
107
|
+
},
|
|
108
|
+
];
|
|
109
|
+
|
|
110
|
+
function _emit(fp, line, p, snippet) {
|
|
111
|
+
return {
|
|
112
|
+
id: `model-load:${fp}:${line}:${p.name}`,
|
|
113
|
+
kind: 'sast',
|
|
114
|
+
severity: p.severity,
|
|
115
|
+
vuln: p.vuln,
|
|
116
|
+
cwe: p.cwe,
|
|
117
|
+
stride: 'Tampering',
|
|
118
|
+
file: fp,
|
|
119
|
+
line,
|
|
120
|
+
snippet: (snippet || '').trim().slice(0, 200),
|
|
121
|
+
fix: p.fix,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export function scanModelLoad(fp, raw) {
|
|
126
|
+
if (!_SCAN_EXT_RE.test(fp)) return [];
|
|
127
|
+
const fpNorm = fp.replace(/\\/g, '/');
|
|
128
|
+
if (_NONPROD_PATH_RE.test(fpNorm)) return [];
|
|
129
|
+
if (!raw || raw.length > 500_000) return [];
|
|
130
|
+
|
|
131
|
+
const lines = raw.split('\n');
|
|
132
|
+
const findings = [];
|
|
133
|
+
const seen = new Set();
|
|
134
|
+
|
|
135
|
+
for (const p of PATTERNS) {
|
|
136
|
+
const re = new RegExp(p.re.source, p.re.flags.includes('g') ? p.re.flags : p.re.flags + 'g');
|
|
137
|
+
let m;
|
|
138
|
+
while ((m = re.exec(raw))) {
|
|
139
|
+
const matchedText = m[0];
|
|
140
|
+
// Build a window: from match-100 to match-end+400, to catch nearby kwargs / Loader= clauses
|
|
141
|
+
const windowStart = Math.max(0, m.index - 100);
|
|
142
|
+
const windowEnd = Math.min(raw.length, m.index + matchedText.length + 400);
|
|
143
|
+
const window = raw.substring(windowStart, windowEnd);
|
|
144
|
+
|
|
145
|
+
if (p.contextRe) {
|
|
146
|
+
const present = p.contextRe.test(window);
|
|
147
|
+
if (p.contextNeg && present) continue; // suppress: safe context found
|
|
148
|
+
if (!p.contextNeg && !present) continue;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const line = raw.substring(0, m.index).split('\n').length;
|
|
152
|
+
const finding = _emit(fp, line, p, lines[line - 1] || matchedText);
|
|
153
|
+
if (!seen.has(finding.id)) {
|
|
154
|
+
seen.add(finding.id);
|
|
155
|
+
findings.push(finding);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return findings;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Public for tests
|
|
164
|
+
export const _internal = { PATTERNS };
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { blankComments } from './_comment-strip.js';
|
|
2
|
+
// Mutation-based XSS (mXSS).
|
|
3
|
+
//
|
|
4
|
+
// mXSS happens when "safe-looking" HTML is re-serialized through the DOM and
|
|
5
|
+
// becomes unsafe. The canonical shape:
|
|
6
|
+
//
|
|
7
|
+
// const safeHtml = sanitize(userHtml);
|
|
8
|
+
// container.innerHTML = safeHtml; // safe so far
|
|
9
|
+
// const round = container.innerHTML; // browser-serialized
|
|
10
|
+
// otherEl.innerHTML = round; // mutation point — re-parse can
|
|
11
|
+
// // re-introduce script
|
|
12
|
+
//
|
|
13
|
+
// Less obvious shapes:
|
|
14
|
+
// - DOMParser().parseFromString(s, "text/html").body.innerHTML on user input
|
|
15
|
+
// - new XMLSerializer().serializeToString(...) into innerHTML
|
|
16
|
+
// - template re-render via .innerHTML after .innerHTML on user-controlled.
|
|
17
|
+
|
|
18
|
+
const RE_PARSE_THEN_INNERHTML = /new\s+DOMParser\s*\(\s*\)\s*\.parseFromString\s*\([^)]+\)\s*\.\s*body\s*\.\s*innerHTML/g;
|
|
19
|
+
|
|
20
|
+
const SERIALIZER_INTO_INNERHTML = /new\s+XMLSerializer\s*\(\s*\)\s*\.\s*serializeToString[^]*?\.innerHTML\s*=/g;
|
|
21
|
+
|
|
22
|
+
const ROUNDTRIP_RE = /(\w+)\s*\.\s*innerHTML\s*=\s*\w+[^]{0,200}\1\s*\.\s*innerHTML[^]{0,200}\.\s*innerHTML\s*=/g;
|
|
23
|
+
|
|
24
|
+
function lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
|
|
25
|
+
|
|
26
|
+
export function scanMutationXSS(fp, raw) {
|
|
27
|
+
if (!/\.(?:js|jsx|ts|tsx|mjs|cjs|html|htm|vue|svelte)$/i.test(fp)) return [];
|
|
28
|
+
if (!raw || raw.length > 500_000) return [];
|
|
29
|
+
const code = blankComments(raw);
|
|
30
|
+
const findings = [];
|
|
31
|
+
const seen = new Set();
|
|
32
|
+
const push = (f) => { if (!seen.has(f.id)) { seen.add(f.id); findings.push(f); } };
|
|
33
|
+
|
|
34
|
+
let m;
|
|
35
|
+
const r1 = new RegExp(RE_PARSE_THEN_INNERHTML.source, RE_PARSE_THEN_INNERHTML.flags);
|
|
36
|
+
while ((m = r1.exec(code))) {
|
|
37
|
+
const line = lineOf(raw, m.index);
|
|
38
|
+
push({
|
|
39
|
+
id: `mxss-parse-roundtrip:${fp}:${line}`,
|
|
40
|
+
file: fp, line,
|
|
41
|
+
vuln: 'Mutation XSS: DOMParser → .body.innerHTML round-trip on potentially-tainted HTML',
|
|
42
|
+
severity: 'medium',
|
|
43
|
+
cwe: 'CWE-79',
|
|
44
|
+
stride: 'Tampering',
|
|
45
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
46
|
+
remediation: 'Re-serializing HTML through the DOM can re-introduce script via known mutation tricks (`<noscript>`, malformed comments, `<svg>` namespace confusion). If you must round-trip, sanitize the *output* of the round-trip with DOMPurify on the final string, not the input. Better: keep user content as text nodes (`textContent`) instead of HTML.',
|
|
47
|
+
parser: 'MUTATION-XSS',
|
|
48
|
+
confidence: 0.75,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const r2 = new RegExp(SERIALIZER_INTO_INNERHTML.source, SERIALIZER_INTO_INNERHTML.flags);
|
|
53
|
+
while ((m = r2.exec(code))) {
|
|
54
|
+
const line = lineOf(raw, m.index);
|
|
55
|
+
push({
|
|
56
|
+
id: `mxss-serialize-into-innerhtml:${fp}:${line}`,
|
|
57
|
+
file: fp, line,
|
|
58
|
+
vuln: 'Mutation XSS: XMLSerializer output assigned to innerHTML',
|
|
59
|
+
severity: 'medium',
|
|
60
|
+
cwe: 'CWE-79',
|
|
61
|
+
stride: 'Tampering',
|
|
62
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
63
|
+
remediation: 'XML serialization re-parsed as HTML changes meaning — `<script xmlns="http://www.w3.org/1999/xhtml">` is inert as XML but live in HTML. Use `textContent` if the goal is plain text; use a typed templating engine if the goal is structured HTML.',
|
|
64
|
+
parser: 'MUTATION-XSS',
|
|
65
|
+
confidence: 0.80,
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const r3 = new RegExp(ROUNDTRIP_RE.source, ROUNDTRIP_RE.flags);
|
|
70
|
+
while ((m = r3.exec(code))) {
|
|
71
|
+
const line = lineOf(raw, m.index);
|
|
72
|
+
push({
|
|
73
|
+
id: `mxss-innerhtml-roundtrip:${fp}:${line}`,
|
|
74
|
+
file: fp, line,
|
|
75
|
+
vuln: 'Mutation XSS: read-back of innerHTML then re-assigned to another innerHTML',
|
|
76
|
+
severity: 'medium',
|
|
77
|
+
cwe: 'CWE-79',
|
|
78
|
+
stride: 'Tampering',
|
|
79
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
80
|
+
remediation: 'The DOM normalizes HTML on `.innerHTML` read; re-parsing the result on another element can re-introduce executable script. If you need to copy markup, use `el.cloneNode(true)` and `appendChild` instead of innerHTML round-trips.',
|
|
81
|
+
parser: 'MUTATION-XSS',
|
|
82
|
+
confidence: 0.65,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return findings;
|
|
87
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { blankComments } from './_comment-strip.js';
|
|
2
|
+
// NoSQL injection (MongoDB / Mongoose / DynamoDB / etc.).
|
|
3
|
+
//
|
|
4
|
+
// Three classes:
|
|
5
|
+
// 1. MongoDB $where with a string built from user input — equivalent to
|
|
6
|
+
// arbitrary JS evaluation on the server.
|
|
7
|
+
// 2. Mongoose / mongo find(req.body) — Mongo accepts operator objects in
|
|
8
|
+
// values, so `{ password: { $ne: null } }` matches any record. The fix
|
|
9
|
+
// is to coerce the value to a string before passing it in.
|
|
10
|
+
// 3. DynamoDB FilterExpression / ConditionExpression / KeyConditionExpression
|
|
11
|
+
// built via string concat instead of placeholders.
|
|
12
|
+
|
|
13
|
+
const MONGO_WHERE_RE = /\$where\s*:\s*[^,}]*(?:\+|`\$\{|String\(|String\.raw)/g;
|
|
14
|
+
|
|
15
|
+
const MONGO_FIND_REQ_OBJ_RE = /\.\s*(?:find|findOne|findOneAndUpdate|findOneAndDelete|update|updateOne|updateMany|deleteOne|deleteMany|count|countDocuments)\s*\(\s*(?:req|request|ctx\.request)\s*\.\s*(?:body|query|params)\s*[,)]/g;
|
|
16
|
+
|
|
17
|
+
const DYNAMO_EXPR_CONCAT_RE = /(?:FilterExpression|ConditionExpression|KeyConditionExpression|UpdateExpression)\s*:\s*[^,}]*[`+][^,}]*\$\{?\s*(?:req|request)\s*\./g;
|
|
18
|
+
|
|
19
|
+
const PY_MONGO_FIND_REQ = /\.\s*(?:find|find_one|update_one|update_many|delete_one|delete_many)\s*\(\s*request\s*\.\s*(?:json|data|args|form)/g;
|
|
20
|
+
|
|
21
|
+
function lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
|
|
22
|
+
|
|
23
|
+
export function scanNoSQLInjection(fp, raw) {
|
|
24
|
+
if (!raw || raw.length > 500_000) return [];
|
|
25
|
+
const findings = [];
|
|
26
|
+
const seen = new Set();
|
|
27
|
+
const push = (f) => { if (!seen.has(f.id)) { seen.add(f.id); findings.push(f); } };
|
|
28
|
+
|
|
29
|
+
if (/\.(?:js|jsx|ts|tsx|mjs|cjs)$/i.test(fp)) {
|
|
30
|
+
const code = blankComments(raw);
|
|
31
|
+
let m;
|
|
32
|
+
for (const [re, key, label, conf] of [
|
|
33
|
+
[MONGO_WHERE_RE, 'mongo-where', 'NoSQL Injection: MongoDB $where with user-controlled string', 0.90],
|
|
34
|
+
[MONGO_FIND_REQ_OBJ_RE, 'mongo-find', 'NoSQL Injection: MongoDB query with raw request object (operator injection)', 0.80],
|
|
35
|
+
[DYNAMO_EXPR_CONCAT_RE, 'dynamo-expr', 'NoSQL Injection: DynamoDB Expression built via string concatenation', 0.85],
|
|
36
|
+
]) {
|
|
37
|
+
const r = new RegExp(re.source, re.flags);
|
|
38
|
+
while ((m = r.exec(code))) {
|
|
39
|
+
const line = lineOf(raw, m.index);
|
|
40
|
+
push({
|
|
41
|
+
id: `nosql-${key}:${fp}:${line}`,
|
|
42
|
+
file: fp, line,
|
|
43
|
+
vuln: label,
|
|
44
|
+
severity: 'high',
|
|
45
|
+
cwe: 'CWE-943',
|
|
46
|
+
stride: 'Tampering',
|
|
47
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
48
|
+
remediation: key === 'mongo-where'
|
|
49
|
+
? '`$where` runs server-side JavaScript and treats any string as code. Replace with structural operators (`$expr`, `$gt`, `$regex` with a constant pattern). Never build a `$where` string from user input.'
|
|
50
|
+
: key === 'mongo-find'
|
|
51
|
+
? 'Coerce each value to a primitive before passing into Mongo: `await User.findOne({ email: String(req.body.email), password: hash(String(req.body.password)) })`. Mongo accepts operator objects as values — `{ $ne: null }` matches every record.'
|
|
52
|
+
: 'Build DynamoDB expressions with ExpressionAttributeValues placeholders, never via string concatenation: `KeyConditionExpression: "id = :id", ExpressionAttributeValues: { ":id": userId }`.',
|
|
53
|
+
parser: 'NOSQL-INJECTION',
|
|
54
|
+
confidence: conf,
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (/\.py$/i.test(fp)) {
|
|
61
|
+
const code = blankComments(raw, 'py');
|
|
62
|
+
let m;
|
|
63
|
+
const r = new RegExp(PY_MONGO_FIND_REQ.source, PY_MONGO_FIND_REQ.flags);
|
|
64
|
+
while ((m = r.exec(code))) {
|
|
65
|
+
const line = lineOf(raw, m.index);
|
|
66
|
+
push({
|
|
67
|
+
id: `nosql-pymongo:${fp}:${line}`,
|
|
68
|
+
file: fp, line,
|
|
69
|
+
vuln: 'NoSQL Injection: PyMongo query with raw request body',
|
|
70
|
+
severity: 'high',
|
|
71
|
+
cwe: 'CWE-943',
|
|
72
|
+
stride: 'Tampering',
|
|
73
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
74
|
+
remediation: 'Construct the query dict yourself with coerced values: `users.find_one({"email": str(request.json["email"])})`. Passing `request.json` directly lets a client smuggle operator dicts (`{"$ne": null}`) that match every record.',
|
|
75
|
+
parser: 'NOSQL-INJECTION',
|
|
76
|
+
confidence: 0.80,
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return findings;
|
|
82
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
// Open Redirect (CWE-601).
|
|
2
|
+
//
|
|
3
|
+
// Pattern: a redirect target is derived from user input without an allow-
|
|
4
|
+
// list check. Attacker uses the trusted domain to bounce a victim to a
|
|
5
|
+
// phishing page. The bug is invisible in the URL bar until *after* the
|
|
6
|
+
// redirect fires.
|
|
7
|
+
//
|
|
8
|
+
// We catch:
|
|
9
|
+
// - Express: res.redirect(req.query.x | req.body.x | …)
|
|
10
|
+
// - Koa: ctx.redirect(ctx.query.x)
|
|
11
|
+
// - Flask (Python): flask.redirect(request.args.get(…)) / redirect(request.…)
|
|
12
|
+
// - Django (Python):HttpResponseRedirect(request.GET[…])
|
|
13
|
+
// - Spring (Java): return new RedirectView(name); return "redirect:" + name;
|
|
14
|
+
// - PHP: header("Location: " . $_GET[...])
|
|
15
|
+
//
|
|
16
|
+
// We suppress the flag when the value is checked against an allow-list
|
|
17
|
+
// before redirect — recognized patterns: `ALLOWED.has(x)`, `x in ALLOWED`,
|
|
18
|
+
// `ALLOWED_REDIRECTS.includes(x)`, `if (x.startsWith('/'))` (relative-only),
|
|
19
|
+
// or `urlparse(x).hostname == self_host`.
|
|
20
|
+
|
|
21
|
+
import { blankComments } from './_comment-strip.js';
|
|
22
|
+
|
|
23
|
+
const PATTERNS = [
|
|
24
|
+
// Express/Koa-style: res.redirect(<expr>) or ctx.redirect(<expr>).
|
|
25
|
+
['js', /\b(?:res|ctx|reply|response)\s*\.\s*redirect\s*\(\s*([^)]+?)\s*\)/g, 'Express/Koa'],
|
|
26
|
+
// Bare redirect() — Flask / Werkzeug.
|
|
27
|
+
['py', /\b(?:flask\.)?redirect\s*\(\s*([^)]+?)\s*\)/g, 'Flask'],
|
|
28
|
+
// Django.
|
|
29
|
+
['py', /\bHttpResponseRedirect\s*\(\s*([^)]+?)\s*\)/g, 'Django'],
|
|
30
|
+
// Spring controllers — `return "redirect:" + name;`
|
|
31
|
+
['java', /\breturn\s+"redirect:"\s*\+\s*(\w[\w.]*)/g, 'Spring (return redirect:)'],
|
|
32
|
+
// Spring RedirectView
|
|
33
|
+
['java', /\bnew\s+RedirectView\s*\(\s*(\w[\w.]*)\s*\)/g, 'Spring RedirectView'],
|
|
34
|
+
// PHP header("Location: " . $...)
|
|
35
|
+
['php', /\bheader\s*\(\s*['"]\s*Location\s*:\s*['"]\s*\.\s*(\$\w[\w\[\]'"]*)/g, 'PHP Location'],
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
// What counts as "user-derived" inside the captured target expression.
|
|
39
|
+
const TAINT_HINT_RE =
|
|
40
|
+
/\b(?:req\.|request\.|params\.|query\.|body\.|ctx\.query|ctx\.request|ctx\.params|reply\.query|r\.URL\.Query|c\.Query|next\b|_GET|_POST|_REQUEST|getParameter|getHeader)\b/;
|
|
41
|
+
|
|
42
|
+
// What counts as an allow-list check earlier in the function. We look back
|
|
43
|
+
// up to 30 lines before the redirect call for any of these patterns.
|
|
44
|
+
const ALLOWLIST_PATTERNS = [
|
|
45
|
+
/\bALLOW(?:ED|LIST)?(?:_[A-Z_]+)?\.(?:has|includes|contains|indexOf)\b/i,
|
|
46
|
+
/\bin\s+ALLOW(?:ED|LIST)?\b/,
|
|
47
|
+
/\bin\s+\{[^}]+\}/, // `target in {'/a','/b'}`
|
|
48
|
+
/\.startsWith\s*\(\s*['"]\//, // x.startsWith('/')
|
|
49
|
+
/^\s*if\s*\(\s*\w+\.startsWith\s*\(\s*['"]\//, // explicit prefix check
|
|
50
|
+
/urlparse\([^)]+\)\.(?:hostname|netloc)/, // host extraction
|
|
51
|
+
/url\.parse\([^)]+\)\s*\.\s*host(?:name)?/,
|
|
52
|
+
/new\s+URL\s*\(\s*[^)]+\)\s*\.\s*hostname/,
|
|
53
|
+
/\bvalid_redirect_url\b/, // common helper name
|
|
54
|
+
/allowedRedirectTargets/i,
|
|
55
|
+
/\babort\s*\(\s*4\d\d/, // any abort(4xx) earlier
|
|
56
|
+
/\bres\s*\.\s*status\s*\(\s*4\d\d\b/,
|
|
57
|
+
];
|
|
58
|
+
|
|
59
|
+
function _lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
|
|
60
|
+
function _lang(fp) {
|
|
61
|
+
if (/\.(?:js|jsx|ts|tsx|mjs|cjs)$/i.test(fp)) return 'js';
|
|
62
|
+
if (/\.py$/i.test(fp)) return 'py';
|
|
63
|
+
if (/\.java$/i.test(fp)) return 'java';
|
|
64
|
+
if (/\.php$/i.test(fp)) return 'php';
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function _allowListedPrior(raw, callLine, target) {
|
|
69
|
+
const lines = raw.split('\n');
|
|
70
|
+
const lo = Math.max(0, callLine - 30);
|
|
71
|
+
const before = lines.slice(lo, callLine).join('\n');
|
|
72
|
+
// Strip the target out of `before` so the regex isn't fooled by the
|
|
73
|
+
// target literal itself appearing in an allow-list match.
|
|
74
|
+
for (const p of ALLOWLIST_PATTERNS) if (p.test(before)) return true;
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function scanOpenRedirect(fp, raw) {
|
|
79
|
+
if (!raw || raw.length > 500_000) return [];
|
|
80
|
+
const lang = _lang(fp);
|
|
81
|
+
if (!lang) return [];
|
|
82
|
+
const code = blankComments(raw, lang === 'py' ? 'py' : undefined);
|
|
83
|
+
if (!/\bredirect\b|RedirectView|HttpResponseRedirect|Location\s*:/i.test(code)) return [];
|
|
84
|
+
const findings = [];
|
|
85
|
+
const seen = new Set();
|
|
86
|
+
for (const [plang, pat, framework] of PATTERNS) {
|
|
87
|
+
if (plang !== lang) continue;
|
|
88
|
+
const re = new RegExp(pat.source, pat.flags);
|
|
89
|
+
let m;
|
|
90
|
+
while ((m = re.exec(code))) {
|
|
91
|
+
const target = (m[1] || '').trim();
|
|
92
|
+
if (!target) continue;
|
|
93
|
+
if (!TAINT_HINT_RE.test(target)) continue;
|
|
94
|
+
const line = _lineOf(raw, m.index);
|
|
95
|
+
// Suppress if an allow-list check appears in the preceding window.
|
|
96
|
+
if (_allowListedPrior(raw, line, target)) continue;
|
|
97
|
+
const id = `open-redirect:${fp}:${line}:${framework}`;
|
|
98
|
+
if (seen.has(id)) continue;
|
|
99
|
+
seen.add(id);
|
|
100
|
+
findings.push({
|
|
101
|
+
id,
|
|
102
|
+
file: fp, line,
|
|
103
|
+
vuln: `Open Redirect (${framework})`,
|
|
104
|
+
severity: 'medium',
|
|
105
|
+
cwe: 'CWE-601',
|
|
106
|
+
family: 'open-redirect',
|
|
107
|
+
stride: 'Spoofing',
|
|
108
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
109
|
+
remediation:
|
|
110
|
+
'Validate the redirect target against a server-side allow-list of paths or hosts before redirecting. ' +
|
|
111
|
+
'Restrict to relative paths starting with a single `/` (and rejecting `//`), or check the hostname against an explicit allow-list set. ' +
|
|
112
|
+
'Never round-trip an attacker-supplied URL through `res.redirect` / `flask.redirect` / `HttpResponseRedirect` / `Location: …` without that check.',
|
|
113
|
+
parser: 'OPEN-REDIRECT',
|
|
114
|
+
confidence: 0.8,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return findings;
|
|
119
|
+
}
|
package/src/sast/php.js
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
// PHP-specific detectors. Covers the canonical PHP foot-guns:
|
|
2
|
+
//
|
|
3
|
+
// - $_REQUEST / $_GET / $_POST flowing into eval / system / exec / passthru / shell_exec / `` / popen / proc_open
|
|
4
|
+
// - unserialize() on user input
|
|
5
|
+
// - include / require with user-controlled path (LFI / RFI)
|
|
6
|
+
// - mysql_query with concatenated user input
|
|
7
|
+
// - extract($_REQUEST) — direct variable injection
|
|
8
|
+
// - md5/sha1 used for password hashing
|
|
9
|
+
// - phpinfo() exposed in production code
|
|
10
|
+
// - assert with string argument
|
|
11
|
+
|
|
12
|
+
const RE = {
|
|
13
|
+
dangerCall: /\b(?:eval|assert|system|exec|passthru|shell_exec|popen|proc_open|pcntl_exec)\s*\(\s*[^)]*\$(?:_(?:REQUEST|GET|POST|COOKIE|FILES|SERVER)|HTTP_)/g,
|
|
14
|
+
backtickInterp: /`[^`]*\$(?:_(?:REQUEST|GET|POST|COOKIE)|[A-Z_a-z][\w]*)[^`]*`/g,
|
|
15
|
+
unserialize: /\bunserialize\s*\(\s*\$(?:_(?:REQUEST|GET|POST|COOKIE)|HTTP_)/g,
|
|
16
|
+
includeUser: /\b(?:include|include_once|require|require_once)\s*[(\s]+\$(?:_(?:REQUEST|GET|POST|COOKIE)|HTTP_)/g,
|
|
17
|
+
mysqlConcat: /\bmysql(?:i)?_(?:query|real_query)\s*\(\s*[^)]*['"]\s*\.\s*\$(?:_(?:REQUEST|GET|POST)|HTTP_)/g,
|
|
18
|
+
extractRequest: /\bextract\s*\(\s*\$(?:_REQUEST|_GET|_POST|HTTP_GET_VARS|HTTP_POST_VARS)\s*[,)]/g,
|
|
19
|
+
passwordHashMd5: /\b(?:md5|sha1)\s*\(\s*\$(?:_(?:REQUEST|GET|POST)|password|passwd|pwd|hash_input)/gi,
|
|
20
|
+
phpinfo: /\bphpinfo\s*\(/g,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
function lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
|
|
24
|
+
|
|
25
|
+
export function scanPhp(fp, raw) {
|
|
26
|
+
if (!/\.(?:php|phtml|phar)$/i.test(fp)) return [];
|
|
27
|
+
if (!raw || raw.length > 500_000) return [];
|
|
28
|
+
const findings = [];
|
|
29
|
+
const seen = new Set();
|
|
30
|
+
const push = (f) => { if (!seen.has(f.id)) { seen.add(f.id); findings.push(f); } };
|
|
31
|
+
|
|
32
|
+
for (const [key, re] of Object.entries(RE)) {
|
|
33
|
+
const r = new RegExp(re.source, re.flags);
|
|
34
|
+
let m;
|
|
35
|
+
while ((m = r.exec(raw))) {
|
|
36
|
+
const line = lineOf(raw, m.index);
|
|
37
|
+
const meta = {
|
|
38
|
+
dangerCall: {
|
|
39
|
+
vuln: 'Command/Code Injection: dangerous function call with user input ($_REQUEST/$_GET/$_POST)',
|
|
40
|
+
severity: 'critical', cwe: 'CWE-78',
|
|
41
|
+
remediation: 'Never pass $_REQUEST/$_GET/$_POST into eval, system, exec, passthru, shell_exec, popen, or assert with a string. Use escapeshellarg() if you absolutely must, but prefer an array-form exec via proc_open. The right answer is usually: don\'t shell out at all; call a library.',
|
|
42
|
+
},
|
|
43
|
+
backtickInterp: {
|
|
44
|
+
vuln: 'Command Injection: backtick command interpolates a PHP variable',
|
|
45
|
+
severity: 'critical', cwe: 'CWE-78',
|
|
46
|
+
remediation: 'Backticks invoke the shell. Use `proc_open` with an array form so the shell never parses your input.',
|
|
47
|
+
},
|
|
48
|
+
unserialize: {
|
|
49
|
+
vuln: 'Insecure Deserialization: unserialize() on user input',
|
|
50
|
+
severity: 'critical', cwe: 'CWE-502',
|
|
51
|
+
remediation: 'PHP unserialize() will call __destruct / __wakeup on every class in the serialized graph — a gadget chain in the codebase becomes RCE. Replace with json_decode for any input crossing a trust boundary.',
|
|
52
|
+
},
|
|
53
|
+
includeUser: {
|
|
54
|
+
vuln: 'Local/Remote File Inclusion: include/require with user-controlled path',
|
|
55
|
+
severity: 'critical', cwe: 'CWE-98',
|
|
56
|
+
remediation: 'Never include() / require() a user-controlled path. If you need a dispatch table, build it as `$pages = ["home" => "home.php", ...]; include $pages[$_GET["page"]] ?? "404.php";` with an explicit whitelist.',
|
|
57
|
+
},
|
|
58
|
+
mysqlConcat: {
|
|
59
|
+
vuln: 'SQL Injection: mysql(i)_query with concatenated $_REQUEST/$_GET/$_POST',
|
|
60
|
+
severity: 'critical', cwe: 'CWE-89',
|
|
61
|
+
remediation: 'Use prepared statements: `$stmt = $mysqli->prepare("SELECT ... WHERE id = ?"); $stmt->bind_param("i", $id);`. The mysql_* family was deprecated in PHP 5.5 and removed in 7.0 — migrate to mysqli or PDO.',
|
|
62
|
+
},
|
|
63
|
+
extractRequest: {
|
|
64
|
+
vuln: 'Variable Injection: extract($_REQUEST) creates arbitrary local variables from request',
|
|
65
|
+
severity: 'critical', cwe: 'CWE-915',
|
|
66
|
+
remediation: 'Never call `extract()` on a user-controlled array — it overwrites local variables, including `$is_admin`, `$auth_user`, etc. Read the specific fields you want explicitly.',
|
|
67
|
+
},
|
|
68
|
+
passwordHashMd5: {
|
|
69
|
+
vuln: 'Weak password hashing — md5/sha1 are not password-hashing functions',
|
|
70
|
+
severity: 'high', cwe: 'CWE-916',
|
|
71
|
+
remediation: 'Use password_hash($pwd, PASSWORD_ARGON2ID) and password_verify(). md5 and sha1 are too fast for password storage — modern GPUs crack the full keyspace of an 8-char alphanumeric in hours.',
|
|
72
|
+
},
|
|
73
|
+
phpinfo: {
|
|
74
|
+
vuln: 'phpinfo() exposes environment, headers, paths, and INI settings',
|
|
75
|
+
severity: 'high', cwe: 'CWE-200',
|
|
76
|
+
remediation: 'Delete phpinfo() before deploy. It leaks the PHP version, loaded extensions, environment variables, document root, and request headers — a one-shot recon page for an attacker.',
|
|
77
|
+
},
|
|
78
|
+
}[key];
|
|
79
|
+
push({
|
|
80
|
+
id: `php-${key}:${fp}:${line}`,
|
|
81
|
+
file: fp, line,
|
|
82
|
+
vuln: meta.vuln, severity: meta.severity, cwe: meta.cwe,
|
|
83
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
84
|
+
remediation: meta.remediation,
|
|
85
|
+
parser: 'PHP',
|
|
86
|
+
confidence: 0.85,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return findings;
|
|
91
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
// 0.7.0 Feat-9: Pipeline / GitHub Actions integrity detector with PBOM emitter.
|
|
2
|
+
//
|
|
3
|
+
// Catches the canonical CI/CD security mistakes:
|
|
4
|
+
// - Floating action tags (uses: foo/bar@main) — supply-chain hijack vector
|
|
5
|
+
// - Third-party action without SHA pinning — same threat
|
|
6
|
+
// - Excessive permissions (write-all) — token-blast-radius
|
|
7
|
+
// - Secret echoed in run: step — leakage
|
|
8
|
+
// - OIDC id-token: write without aud restriction — token theft / re-use
|
|
9
|
+
// - script-injection in github.event.<...> — RCE in workflow
|
|
10
|
+
//
|
|
11
|
+
// Same finding shape as scanIaC; produced separately so the rule set is small and tunable.
|
|
12
|
+
|
|
13
|
+
const _GH_WORKFLOW_RE = /(?:^|\/)\.github\/workflows\/.*\.ya?ml$/i;
|
|
14
|
+
const _NONPROD_RE = /(?:^|\/)(?:tests?|examples?|fixtures?)\//i;
|
|
15
|
+
|
|
16
|
+
const PIPELINE_PATTERNS = [
|
|
17
|
+
{
|
|
18
|
+
re: /\buses\s*:\s*[\w-]+\/[\w-]+@(?:main|master|latest)\b/g,
|
|
19
|
+
vuln: 'Pipeline: GitHub Action pinned to floating tag',
|
|
20
|
+
sev: 'medium', cwe: 'CWE-1357',
|
|
21
|
+
fix: 'Pin third-party actions to a 40-char commit SHA. The tag can be re-pointed by the publisher (or an attacker who compromises them) without your knowledge.',
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
re: /\buses\s*:\s*(?!actions\/)[\w-]+\/[\w-]+@v?\d+(?!\.\d+\.\d+)\b/g,
|
|
25
|
+
vuln: 'Pipeline: Third-party action pinned to major-version tag (mutable)',
|
|
26
|
+
sev: 'medium', cwe: 'CWE-1357',
|
|
27
|
+
fix: 'Tag like @v3 is mutable. For first-party `actions/*` it is generally safe. For any third-party action, pin to a full SHA.',
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
re: /\bpermissions\s*:\s*write-all\b/g,
|
|
31
|
+
vuln: 'Pipeline: permissions set to write-all (excessive scope)',
|
|
32
|
+
sev: 'high', cwe: 'CWE-272',
|
|
33
|
+
fix: 'Replace `permissions: write-all` with the minimum required permissions block, e.g. `contents: read` + the specific scopes the workflow needs.',
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
re: /run\s*:\s*[\s\S]*?echo\s+[^\n]*\$\{?\s*\{?\s*secrets\.[A-Z0-9_]+/g,
|
|
37
|
+
vuln: 'Pipeline: secret echoed to logs',
|
|
38
|
+
sev: 'high', cwe: 'CWE-532',
|
|
39
|
+
fix: 'Never echo a `${{ secrets.* }}` value to step output. Use `::add-mask::` if you must reference it, and prefer reading the secret directly into a tool that doesn\'t print it.',
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
re: /\$\{\{\s*github\.event\.(?:issue\.title|issue\.body|pull_request\.title|pull_request\.body|comment\.body|head_commit\.message|inputs\.[A-Za-z_][\w]*)\s*\}\}/g,
|
|
43
|
+
vuln: 'Pipeline: untrusted github.event input interpolated into shell context',
|
|
44
|
+
sev: 'critical', cwe: 'CWE-78',
|
|
45
|
+
fix: 'Pipe untrusted github.event values through an environment variable instead of interpolating into the shell, e.g. `env: TITLE: ${{ github.event.issue.title }}` then use `"$TITLE"` in the run script.',
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
re: /\bid-token\s*:\s*write\b/g,
|
|
49
|
+
vuln: 'Pipeline: OIDC id-token: write without explicit aud restriction',
|
|
50
|
+
sev: 'medium', cwe: 'CWE-1188',
|
|
51
|
+
fix: 'When granting `id-token: write`, configure the cloud-side trust policy to require a specific `aud` claim and `sub` pattern. Otherwise any workflow on the repo can mint a token usable against this trust policy.',
|
|
52
|
+
contextRe: /\b(?:aud|audience)\s*:/, contextNeg: true, // fire only if NO aud/audience configured
|
|
53
|
+
},
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
export function scanPipeline(fp, raw) {
|
|
57
|
+
if (!_GH_WORKFLOW_RE.test(fp.replace(/\\/g, '/'))) return [];
|
|
58
|
+
if (_NONPROD_RE.test(fp.replace(/\\/g, '/'))) return [];
|
|
59
|
+
if (!raw || raw.length > 200_000) return [];
|
|
60
|
+
const lines = raw.split('\n');
|
|
61
|
+
const findings = [];
|
|
62
|
+
const seen = new Set();
|
|
63
|
+
for (const p of PIPELINE_PATTERNS) {
|
|
64
|
+
if (p.contextRe) {
|
|
65
|
+
const present = p.contextRe.test(raw);
|
|
66
|
+
if (p.contextNeg && present) continue; // suppress: required context exists
|
|
67
|
+
if (!p.contextNeg && !present) continue;
|
|
68
|
+
}
|
|
69
|
+
const re = new RegExp(p.re.source, p.re.flags.includes('g') ? p.re.flags : p.re.flags + 'g');
|
|
70
|
+
let m;
|
|
71
|
+
while ((m = re.exec(raw))) {
|
|
72
|
+
const line = raw.substring(0, m.index).split('\n').length;
|
|
73
|
+
const id = `pipeline:${fp}:${line}:${p.vuln.replace(/\s/g, '_').slice(0, 48)}`;
|
|
74
|
+
if (seen.has(id)) continue;
|
|
75
|
+
seen.add(id);
|
|
76
|
+
findings.push({
|
|
77
|
+
id, kind: 'iac', severity: p.sev, vuln: p.vuln,
|
|
78
|
+
cwe: p.cwe, stride: 'Tampering',
|
|
79
|
+
file: fp, line, snippet: (lines[line - 1] || '').trim(),
|
|
80
|
+
fix: p.fix,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
return findings;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// PBOM emitter: a Pipeline Bill of Materials. Lists every workflow file, every
|
|
88
|
+
// `uses:` step with its pin (SHA or tag), every secret reference, every
|
|
89
|
+
// permissions block. The PBOM is meant to be stored alongside the SBOM and
|
|
90
|
+
// produced from the same scan.
|
|
91
|
+
export function toPBOM(fileContents, meta = {}) {
|
|
92
|
+
const workflows = [];
|
|
93
|
+
for (const [fp, raw] of Object.entries(fileContents || {})) {
|
|
94
|
+
if (!_GH_WORKFLOW_RE.test(fp.replace(/\\/g, '/'))) continue;
|
|
95
|
+
const usesArr = [];
|
|
96
|
+
const usesRe = /\buses\s*:\s*([\w-]+\/[\w-]+)@([^\s]+)/g;
|
|
97
|
+
let m;
|
|
98
|
+
while ((m = usesRe.exec(raw))) {
|
|
99
|
+
usesArr.push({
|
|
100
|
+
action: m[1],
|
|
101
|
+
pin: m[2],
|
|
102
|
+
pinned: /^[a-f0-9]{40}$/.test(m[2]),
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
const secretRefs = Array.from(new Set([...(raw.match(/\bsecrets\.[A-Z0-9_]+/g) || [])]));
|
|
106
|
+
const permsBlock = (raw.match(/\bpermissions\s*:[^\n]*(?:\n\s+[^\n]*)*/g) || []).map(s => s.trim());
|
|
107
|
+
const idToken = /\bid-token\s*:\s*write\b/.test(raw);
|
|
108
|
+
workflows.push({ file: fp, uses: usesArr, secretsReferenced: secretRefs, permissions: permsBlock, oidcEnabled: idToken });
|
|
109
|
+
}
|
|
110
|
+
return {
|
|
111
|
+
pbomFormat: 'agentic-security PBOM',
|
|
112
|
+
version: '1',
|
|
113
|
+
generatedAt: meta.startedAt || new Date().toISOString(),
|
|
114
|
+
workflows,
|
|
115
|
+
summary: {
|
|
116
|
+
totalWorkflows: workflows.length,
|
|
117
|
+
totalActions: workflows.reduce((n, w) => n + w.uses.length, 0),
|
|
118
|
+
pinnedActions: workflows.reduce((n, w) => n + w.uses.filter(u => u.pinned).length, 0),
|
|
119
|
+
oidcWorkflows: workflows.filter(w => w.oidcEnabled).length,
|
|
120
|
+
},
|
|
121
|
+
};
|
|
122
|
+
}
|