@clear-capabilities/agentic-security-scanner 0.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1580 -0
- package/bin/.agentic-security/findings.json +1577 -0
- package/bin/.agentic-security/last-scan.json +1577 -0
- package/bin/.agentic-security/last-scan.json.sig +1 -0
- package/bin/.agentic-security/scan-history.json +465 -0
- package/bin/.agentic-security/streak.json +25 -0
- package/bin/agentic-security-audit.js +198 -0
- package/bin/agentic-security-consistency.js +80 -0
- package/bin/agentic-security-diff.js +136 -0
- package/bin/agentic-security-lsp.js +12 -0
- package/bin/agentic-security-mcp.js +40 -0
- package/bin/agentic-security-rule.js +153 -0
- package/bin/agentic-security.js +1683 -0
- package/dist/117.index.js +207 -0
- package/dist/178.index.js +250 -0
- package/dist/218.index.js +793 -0
- package/dist/227.index.js +192 -0
- package/dist/301.index.js +167 -0
- package/dist/384.index.js +18 -0
- package/dist/476.index.js +126 -0
- package/dist/513.index.js +373 -0
- package/dist/520.index.js +13 -0
- package/dist/601.index.js +1038 -0
- package/dist/634.index.js +1892 -0
- package/dist/637.index.js +216 -0
- package/dist/660.index.js +131 -0
- package/dist/675.index.js +451 -0
- package/dist/826.index.js +188 -0
- package/dist/830.index.js +133 -0
- package/dist/agentic-security.mjs +272 -0
- package/dist/agentic-security.mjs.sha256 +1 -0
- package/dist/calibration-seed.json +27 -0
- package/package.json +77 -0
- package/src/.agentic-security/findings.json +80844 -0
- package/src/.agentic-security/last-scan.json +80844 -0
- package/src/.agentic-security/last-scan.json.sig +1 -0
- package/src/.agentic-security/scan-history.json +8408 -0
- package/src/.agentic-security/streak.json +26 -0
- package/src/badge.js +188 -0
- package/src/compare.js +203 -0
- package/src/dataflow/.agentic-security/findings.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
- package/src/dataflow/.agentic-security/scan-history.json +735 -0
- package/src/dataflow/.agentic-security/streak.json +24 -0
- package/src/dataflow/CLAUDE.md +38 -0
- package/src/dataflow/access-paths.js +172 -0
- package/src/dataflow/async-sequencing.js +177 -0
- package/src/dataflow/backward.js +201 -0
- package/src/dataflow/catalog-expanded.js +485 -0
- package/src/dataflow/catalog.js +659 -0
- package/src/dataflow/cross-repo.js +219 -0
- package/src/dataflow/engine.js +588 -0
- package/src/dataflow/exception-flow.js +116 -0
- package/src/dataflow/exploit-prover.js +187 -0
- package/src/dataflow/higher-order.js +221 -0
- package/src/dataflow/ifds.js +347 -0
- package/src/dataflow/implicit-flow.js +129 -0
- package/src/dataflow/incremental.js +229 -0
- package/src/dataflow/index.js +181 -0
- package/src/dataflow/numeric-domain.js +192 -0
- package/src/dataflow/path-feasibility.js +114 -0
- package/src/dataflow/points-to.js +337 -0
- package/src/dataflow/polyglot.js +190 -0
- package/src/dataflow/proven-clean.js +159 -0
- package/src/dataflow/receiver-context.js +76 -0
- package/src/dataflow/sanitizer-proof.js +154 -0
- package/src/dataflow/soft-taint.js +140 -0
- package/src/dataflow/string-domain.js +234 -0
- package/src/dataflow/stub-aware-filter.js +100 -0
- package/src/dataflow/summaries.js +132 -0
- package/src/dataflow/symbolic-exec.js +238 -0
- package/src/dataflow/tabulation.js +135 -0
- package/src/engine.js +7763 -0
- package/src/history-scan.js +229 -0
- package/src/index.js +3 -0
- package/src/integrations/.agentic-security/findings.json +1504 -0
- package/src/integrations/.agentic-security/last-scan.json +1504 -0
- package/src/integrations/.agentic-security/scan-history.json +40 -0
- package/src/integrations/.agentic-security/streak.json +21 -0
- package/src/integrations/index.js +321 -0
- package/src/integrations/tickets.js +200 -0
- package/src/ir/.agentic-security/findings.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json.sig +1 -0
- package/src/ir/.agentic-security/scan-history.json +364 -0
- package/src/ir/.agentic-security/streak.json +23 -0
- package/src/ir/CLAUDE.md +172 -0
- package/src/ir/callgraph.js +73 -0
- package/src/ir/class-hierarchy.js +195 -0
- package/src/ir/index.js +152 -0
- package/src/ir/parser-cs.js +260 -0
- package/src/ir/parser-java.js +286 -0
- package/src/ir/parser-js.js +413 -0
- package/src/ir/parser-kt.js +258 -0
- package/src/ir/parser-py-cst.js +136 -0
- package/src/ir/parser-py.helper.py +501 -0
- package/src/ir/parser-py.js +312 -0
- package/src/ir/ssa.js +315 -0
- package/src/ir/type-stubs.js +288 -0
- package/src/leaderboard.js +152 -0
- package/src/llm-validator/.agentic-security/findings.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
- package/src/llm-validator/.agentic-security/scan-history.json +168 -0
- package/src/llm-validator/.agentic-security/streak.json +20 -0
- package/src/llm-validator/consistency.js +141 -0
- package/src/llm-validator/index.js +437 -0
- package/src/lsp/.agentic-security/findings.json +28 -0
- package/src/lsp/.agentic-security/last-scan.json +28 -0
- package/src/lsp/.agentic-security/scan-history.json +79 -0
- package/src/lsp/.agentic-security/streak.json +22 -0
- package/src/lsp/server.js +275 -0
- package/src/mcp/.agentic-security/findings.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
- package/src/mcp/.agentic-security/scan-history.json +1125 -0
- package/src/mcp/.agentic-security/streak.json +22 -0
- package/src/mcp/CLAUDE.md +54 -0
- package/src/mcp/audit.js +136 -0
- package/src/mcp/redact.js +75 -0
- package/src/mcp/server.js +158 -0
- package/src/mcp/stdio.js +83 -0
- package/src/mcp/tools.js +940 -0
- package/src/mcp/validate.js +49 -0
- package/src/personality.js +164 -0
- package/src/poc-video.js +239 -0
- package/src/posture/.agentic-security/findings.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json.sig +1 -0
- package/src/posture/.agentic-security/scan-history.json +5557 -0
- package/src/posture/.agentic-security/streak.json +24 -0
- package/src/posture/CLAUDE.md +42 -0
- package/src/posture/adversarial-self-test.js +114 -0
- package/src/posture/adversary-agent.js +204 -0
- package/src/posture/agents-memory.js +135 -0
- package/src/posture/ai-code-fingerprint.js +171 -0
- package/src/posture/aibom.js +284 -0
- package/src/posture/api-inventory.js +96 -0
- package/src/posture/attack-playbooks.js +305 -0
- package/src/posture/auditor-agent.js +115 -0
- package/src/posture/auth-posture-import.js +135 -0
- package/src/posture/baseline-compare.js +114 -0
- package/src/posture/blast-radius.js +836 -0
- package/src/posture/bounty-prediction.js +141 -0
- package/src/posture/business-logic.js +239 -0
- package/src/posture/calibration-drift.js +93 -0
- package/src/posture/calibration-seed.json +27 -0
- package/src/posture/calibration.js +204 -0
- package/src/posture/clustering.js +75 -0
- package/src/posture/concurrency-checker.js +265 -0
- package/src/posture/confidence.js +65 -0
- package/src/posture/container-runtime.js +149 -0
- package/src/posture/counterfactual.js +109 -0
- package/src/posture/cross-lang-graphql.js +165 -0
- package/src/posture/cross-lang-grpc.js +166 -0
- package/src/posture/cross-lang-meta.js +101 -0
- package/src/posture/cross-lang-openapi.js +187 -0
- package/src/posture/cross-lang-orm.js +153 -0
- package/src/posture/cross-lang-queues.js +210 -0
- package/src/posture/crown-jewels.js +110 -0
- package/src/posture/custom-rules.js +361 -0
- package/src/posture/cve-alert-daemon.js +433 -0
- package/src/posture/cve-lookup.js +129 -0
- package/src/posture/dead-code.js +430 -0
- package/src/posture/defender-agent.js +158 -0
- package/src/posture/deploy-platform.js +204 -0
- package/src/posture/detector-fuzz.js +61 -0
- package/src/posture/deterministic.js +99 -0
- package/src/posture/drift.js +165 -0
- package/src/posture/epss.js +156 -0
- package/src/posture/exploitability-probability.js +212 -0
- package/src/posture/exploitability.js +121 -0
- package/src/posture/feature-flags.js +110 -0
- package/src/posture/finding-defaults.js +132 -0
- package/src/posture/fix-history.js +411 -0
- package/src/posture/fix-plan.js +121 -0
- package/src/posture/fix-verify-loop.js +157 -0
- package/src/posture/fix-verify.js +130 -0
- package/src/posture/flow-narration.js +105 -0
- package/src/posture/grader-calibration.js +156 -0
- package/src/posture/harness-discovery.js +113 -0
- package/src/posture/holdout-eval.js +144 -0
- package/src/posture/iac-reachability.js +163 -0
- package/src/posture/iam-policy.js +128 -0
- package/src/posture/integrity.js +97 -0
- package/src/posture/learning.js +166 -0
- package/src/posture/license-policy.js +109 -0
- package/src/posture/llm-redteam-prompts.js +418 -0
- package/src/posture/llm-redteam.js +303 -0
- package/src/posture/material-change.js +163 -0
- package/src/posture/mitigation-composite.js +55 -0
- package/src/posture/mttr.js +91 -0
- package/src/posture/network-policy-import.js +126 -0
- package/src/posture/path-predicates.js +99 -0
- package/src/posture/persona-prioritization.js +153 -0
- package/src/posture/poc-cwe-map.js +51 -0
- package/src/posture/poc-generator.js +500 -0
- package/src/posture/policy-gate.js +174 -0
- package/src/posture/pre-incident-archaeology.js +110 -0
- package/src/posture/profile.js +93 -0
- package/src/posture/reachability-filter.js +42 -0
- package/src/posture/regression-test-gen.js +200 -0
- package/src/posture/reverse-blast-radius.js +110 -0
- package/src/posture/router.js +109 -0
- package/src/posture/rule-overrides.js +198 -0
- package/src/posture/rule-pack-signing.js +209 -0
- package/src/posture/rule-packs.js +143 -0
- package/src/posture/rule-synthesis.js +108 -0
- package/src/posture/ruleset-version.js +71 -0
- package/src/posture/sbom.js +129 -0
- package/src/posture/schema-aware-bridge.js +207 -0
- package/src/posture/security-trend.js +87 -0
- package/src/posture/semantic-clone.js +114 -0
- package/src/posture/specification-mining.js +170 -0
- package/src/posture/stable-id.js +75 -0
- package/src/posture/stack-playbook.js +229 -0
- package/src/posture/streak.js +249 -0
- package/src/posture/suppressions.js +135 -0
- package/src/posture/telemetry-ingest.js +112 -0
- package/src/posture/threat-model.js +145 -0
- package/src/posture/three-agent-pipeline.js +74 -0
- package/src/posture/triage.js +146 -0
- package/src/posture/trust-boundary-diagram.js +115 -0
- package/src/posture/type-narrowing.js +129 -0
- package/src/posture/validator-metrics.js +179 -0
- package/src/posture/verifier-ephemeral.js +118 -0
- package/src/posture/verifier-target.js +147 -0
- package/src/posture/verifier.js +257 -0
- package/src/posture/version.js +75 -0
- package/src/posture/waf-ingest.js +200 -0
- package/src/posture/why-fired.js +141 -0
- package/src/pr-comment.js +172 -0
- package/src/pr-delta.js +198 -0
- package/src/report/.agentic-security/findings.json +79 -0
- package/src/report/.agentic-security/last-scan.json +79 -0
- package/src/report/.agentic-security/last-scan.json.sig +1 -0
- package/src/report/.agentic-security/scan-history.json +332 -0
- package/src/report/.agentic-security/streak.json +23 -0
- package/src/report/index.js +1136 -0
- package/src/report/mascot.js +42 -0
- package/src/runScan.js +141 -0
- package/src/sast/.agentic-security/findings.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json.sig +1 -0
- package/src/sast/.agentic-security/scan-history.json +788 -0
- package/src/sast/.agentic-security/streak.json +23 -0
- package/src/sast/CLAUDE.md +39 -0
- package/src/sast/_comment-strip.js +46 -0
- package/src/sast/agent-tool-escalation.js +131 -0
- package/src/sast/auth-provider.js +171 -0
- package/src/sast/authz.js +236 -0
- package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
- package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
- package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
- package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
- package/src/sast/bench-shape/index.js +62 -0
- package/src/sast/claude-hook-injection.js +199 -0
- package/src/sast/claude-md-prompt-injection.js +170 -0
- package/src/sast/claude-settings.js +165 -0
- package/src/sast/client-side.js +149 -0
- package/src/sast/cpp-bench-extras.js +122 -0
- package/src/sast/cpp-dataflow.js +430 -0
- package/src/sast/cpp.js +248 -0
- package/src/sast/csharp.js +152 -0
- package/src/sast/csrf.js +82 -0
- package/src/sast/dart-flutter.js +173 -0
- package/src/sast/db-rls.js +147 -0
- package/src/sast/db-taint.js +215 -0
- package/src/sast/defi-deep.js +242 -0
- package/src/sast/deserialization-gadgets.js +113 -0
- package/src/sast/django-hardening.js +230 -0
- package/src/sast/env-hygiene.js +125 -0
- package/src/sast/fastapi-hardening.js +145 -0
- package/src/sast/go-extended.js +84 -0
- package/src/sast/host-header.js +106 -0
- package/src/sast/index.js +17 -0
- package/src/sast/java-ast-folding.js +561 -0
- package/src/sast/java-bench-extras.js +708 -0
- package/src/sast/java-collection-passthrough.js +178 -0
- package/src/sast/java-constant-fold.js +244 -0
- package/src/sast/java-deserialization.js +125 -0
- package/src/sast/jndi.js +104 -0
- package/src/sast/juliet-shape.js +324 -0
- package/src/sast/jwt-exp.js +104 -0
- package/src/sast/kotlin.js +82 -0
- package/src/sast/laravel-hardening.js +198 -0
- package/src/sast/ldap-injection.js +100 -0
- package/src/sast/llm-owasp.js +465 -0
- package/src/sast/llm-stored-prompt.js +103 -0
- package/src/sast/llm-trading-agent.js +161 -0
- package/src/sast/llm.js +308 -0
- package/src/sast/logic.js +140 -0
- package/src/sast/mass-assignment.js +101 -0
- package/src/sast/mcp-audit.js +242 -0
- package/src/sast/mobile-manifest.js +195 -0
- package/src/sast/model-load.js +164 -0
- package/src/sast/mutation-xss.js +87 -0
- package/src/sast/nosql-injection.js +82 -0
- package/src/sast/open-redirect.js +119 -0
- package/src/sast/php.js +91 -0
- package/src/sast/pipeline.js +122 -0
- package/src/sast/primary-cwe-java.js +155 -0
- package/src/sast/prompt-firewall.js +151 -0
- package/src/sast/prompt-template.js +157 -0
- package/src/sast/prototype-pollution.js +112 -0
- package/src/sast/python-sinks.js +195 -0
- package/src/sast/quarkus-hardening.js +102 -0
- package/src/sast/rag-poisoning.js +118 -0
- package/src/sast/rate-limit.js +128 -0
- package/src/sast/response-splitting.js +138 -0
- package/src/sast/ruby.js +108 -0
- package/src/sast/rust.js +105 -0
- package/src/sast/solidity.js +167 -0
- package/src/sast/springboot-hardening.js +186 -0
- package/src/sast/ssrf-cloud-metadata.js +80 -0
- package/src/sast/ssti.js +116 -0
- package/src/sast/swift.js +162 -0
- package/src/sast/toctou.js +95 -0
- package/src/sast/webhook.js +101 -0
- package/src/sast/xpath-injection.js +51 -0
- package/src/sast/xxe.js +140 -0
- package/src/sast/zip-slip.js +200 -0
- package/src/sca/base-images.json +45 -0
- package/src/sca/container.js +107 -0
- package/src/sca/dep-confusion.js +134 -0
- package/src/sca/index.js +6 -0
- package/src/sca/popular-packages.json +41 -0
- package/src/sca/sarif-ingest.js +187 -0
- package/src/sca/vuln-function-hints.json +89 -0
- package/src/secrets/index.js +4 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
// Java collection-passthrough taint analysis.
|
|
2
|
+
//
|
|
3
|
+
// Closes the largest engine-recall gap on SARD Juliet's
|
|
4
|
+
// DataflowThruInnerClass / Vector / Stream / Stream2 / List variants:
|
|
5
|
+
//
|
|
6
|
+
// // Juliet bad():
|
|
7
|
+
// Vector<String> dataVector = new Vector<>();
|
|
8
|
+
// dataVector.add(badSource()); // <collection>.add(tainted)
|
|
9
|
+
// String data = dataVector.get(1); // extraction → tainted
|
|
10
|
+
// statement.execute(sql + data); // ← engine misses this taint
|
|
11
|
+
//
|
|
12
|
+
// The regex engine doesn't model collection semantics — but it doesn't have
|
|
13
|
+
// to. We pattern-match the 8 most common shapes and mark the receiving
|
|
14
|
+
// collection variable as a synthetic source. Any extraction call on that
|
|
15
|
+
// collection then taints its assignment LHS via the engine's normal Pass-2
|
|
16
|
+
// propagation loop.
|
|
17
|
+
//
|
|
18
|
+
// Patterns covered (per the F1 roadmap):
|
|
19
|
+
//
|
|
20
|
+
// vec.add(t) → vec.get(N), vec.elementAt(N), vec.firstElement(),
|
|
21
|
+
// vec.iterator().next()
|
|
22
|
+
// list.add(t) → list.get(N), list.iterator().next()
|
|
23
|
+
// list.set(N, t) → list.get(N), list.iterator().next()
|
|
24
|
+
// map.put(k, t) → map.get(k), map.values().iterator().next()
|
|
25
|
+
// Stream.of(t).collect() → .findFirst().get(), .iterator().next()
|
|
26
|
+
// arr[N] = t → arr[M] (over-approximate)
|
|
27
|
+
// queue.offer(t) → queue.poll(), queue.peek(), queue.remove()
|
|
28
|
+
// queue.add(t) → queue.poll(), queue.peek(), queue.remove()
|
|
29
|
+
// set.add(t) → set.iterator().next()
|
|
30
|
+
// Optional.of(t) → .get(), .orElse(...)
|
|
31
|
+
//
|
|
32
|
+
// Approach is over-approximate: any extraction from a tainted collection is
|
|
33
|
+
// considered tainted. False positives on this pattern are rare in practice —
|
|
34
|
+
// production code that puts user input into a collection then reads it back
|
|
35
|
+
// out is almost certainly going to want sanitization.
|
|
36
|
+
|
|
37
|
+
const _COLLECTION_SINK_PATTERNS = [
|
|
38
|
+
// vec.add(x), list.add(x), set.add(x), queue.add(x), queue.offer(x)
|
|
39
|
+
// Capture: $1 = collection variable, $2 = added value
|
|
40
|
+
/\b([A-Za-z_]\w*)\s*\.\s*(?:add|offer)\s*\(\s*([^,)]+?)\s*\)/g,
|
|
41
|
+
// list.set(N, x), vec.set(N, x)
|
|
42
|
+
// Capture: $1 = collection, $3 = value
|
|
43
|
+
/\b([A-Za-z_]\w*)\s*\.\s*set\s*\(\s*[^,]+,\s*([^,)]+?)\s*\)/g,
|
|
44
|
+
// map.put(k, x)
|
|
45
|
+
// Capture: $1 = collection, $2 = value
|
|
46
|
+
/\b([A-Za-z_]\w*)\s*\.\s*put\s*\(\s*[^,]+,\s*([^,)]+?)\s*\)/g,
|
|
47
|
+
// arr[N] = x
|
|
48
|
+
// Capture: $1 = array, $2 = value
|
|
49
|
+
/\b([A-Za-z_]\w*)\s*\[\s*[^\]]+\s*\]\s*=\s*([^;]+?)\s*;/g,
|
|
50
|
+
// Optional.of(x), Optional.ofNullable(x) — tracked into the variable receiving it
|
|
51
|
+
// Match: lhs = Optional.of(x); → mark `lhs` as the collection if x is tainted.
|
|
52
|
+
/\b([A-Za-z_]\w*)\s*=\s*Optional\s*\.\s*(?:of|ofNullable)\s*\(\s*([^)]+?)\s*\)/g,
|
|
53
|
+
// Stream.of(x).collect(...) → assigned to a Collection. We capture the
|
|
54
|
+
// assignment target as the collection.
|
|
55
|
+
/\b([A-Za-z_]\w*)\s*=\s*Stream\s*\.\s*of\s*\(\s*([^)]+?)\s*\)\s*\.\s*collect\s*\(/g,
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
// Extraction call shapes — when one of these is the RHS, the LHS becomes
|
|
59
|
+
// tainted iff the collection variable is in the tainted-collections set.
|
|
60
|
+
// Returned as a function the caller (engine.js) can use to test an RHS.
|
|
61
|
+
const _EXTRACTION_RE = /\b([A-Za-z_]\w*)\s*\.\s*(?:get|elementAt|firstElement|lastElement|peek|poll|remove|orElse|orElseGet|orElseThrow|getOrDefault|getFirst|getLast|values\s*\(\s*\)\s*\.\s*iterator|iterator\s*\(\s*\)\s*\.\s*next|stream\s*\(\s*\)\s*\.\s*findFirst\s*\(\s*\)\s*\.\s*get|stream\s*\(\s*\)\s*\.\s*iterator)\b/g;
|
|
62
|
+
|
|
63
|
+
// Bracket-extraction for arrays: `arr[M]`.
|
|
64
|
+
const _ARRAY_EXTRACT_RE = /\b([A-Za-z_]\w*)\s*\[\s*[^\]]+\s*\]/g;
|
|
65
|
+
|
|
66
|
+
// Method-parameter pattern: declarations like
|
|
67
|
+
// public void badSink(Vector<String> dataVector) { ... }
|
|
68
|
+
// void f(List<String> xs)
|
|
69
|
+
// private void g(Map<String, String> data)
|
|
70
|
+
// Captured group 1 = parameter variable name. Used by network-context
|
|
71
|
+
// callers to mark collection parameters as tainted (Juliet's flow
|
|
72
|
+
// variants 72-82 route taint through cross-file collection params).
|
|
73
|
+
const _COLLECTION_PARAM_RE = /\b(?:Vector|ArrayList|LinkedList|List|Set|HashSet|TreeSet|LinkedHashSet|Map|HashMap|TreeMap|LinkedHashMap|ConcurrentHashMap|Hashtable|Properties|Stack|Queue|Deque|ArrayDeque|PriorityQueue|Collection|Iterable|Optional)\s*<[^>]*>\s+([A-Za-z_]\w*)\s*[,)]/g;
|
|
74
|
+
|
|
75
|
+
// Direct-source assignments to collection-typed variables. When a variable
|
|
76
|
+
// is assigned the result of a request method that returns a collection
|
|
77
|
+
// (getParameterMap → Map, getParameterValues → String[], getCookies →
|
|
78
|
+
// Cookie[], getHeaders → Enumeration), the variable IS tainted (caught by
|
|
79
|
+
// _JAVA_SOURCE_BINDS in the engine), but it also needs to be in
|
|
80
|
+
// taintedCollections so subsequent .get(K)/[N]/.nextElement() extractions
|
|
81
|
+
// taint their LHS via the engine's Pass-2 propagation.
|
|
82
|
+
//
|
|
83
|
+
// This was the missing piece for OWASP Benchmark tests like 00030:
|
|
84
|
+
// Map map = request.getParameterMap(); // ← map in tainted (existing)
|
|
85
|
+
// String[] values = map.get("BenchmarkTest00030"); // ← values needs taint
|
|
86
|
+
// if (values != null) param = values[0]; // ← param needs taint
|
|
87
|
+
const _DIRECT_COLLECTION_SOURCE_RE = /\b([A-Za-z_]\w*)\s*=\s*[^;]*\b(?:request|req)\s*\.\s*(?:getParameterMap|getParameterValues|getParameterNames|getHeaders|getHeaderNames|getCookies)\s*\(/g;
|
|
88
|
+
|
|
89
|
+
// Build the set of collection variables that hold tainted data.
|
|
90
|
+
// cleaned: file content with comments/strings blanked
|
|
91
|
+
// tainted: current set of tainted variable names
|
|
92
|
+
// opts.includeMethodParams: when true, also mark method-parameter
|
|
93
|
+
// collections (Vector<String> p, List<String> p) as tainted. Caller
|
|
94
|
+
// should gate this on Juliet-network-context to avoid FPs on real apps.
|
|
95
|
+
// Returns the set of collection variables that received a tainted value.
|
|
96
|
+
export function findTaintedCollections(cleaned, tainted, opts = {}) {
|
|
97
|
+
const taintedColls = new Set();
|
|
98
|
+
if (opts.includeMethodParams) {
|
|
99
|
+
_COLLECTION_PARAM_RE.lastIndex = 0;
|
|
100
|
+
let pm;
|
|
101
|
+
while ((pm = _COLLECTION_PARAM_RE.exec(cleaned)) !== null) {
|
|
102
|
+
if (pm[1]) taintedColls.add(pm[1]);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// Always: any var directly assigned from a collection-returning request
|
|
106
|
+
// source becomes a tainted collection. Safe in any context — not gated.
|
|
107
|
+
_DIRECT_COLLECTION_SOURCE_RE.lastIndex = 0;
|
|
108
|
+
let dm;
|
|
109
|
+
while ((dm = _DIRECT_COLLECTION_SOURCE_RE.exec(cleaned)) !== null) {
|
|
110
|
+
if (dm[1]) taintedColls.add(dm[1]);
|
|
111
|
+
}
|
|
112
|
+
if (!tainted || tainted.size === 0) return taintedColls;
|
|
113
|
+
|
|
114
|
+
// Helper: is the captured value either a tainted variable or an
|
|
115
|
+
// expression containing one?
|
|
116
|
+
const tokensOf = (s) => (s ? (s.match(/\b[A-Za-z_]\w*\b/g) || []) : []);
|
|
117
|
+
const valueIsTainted = (val) => {
|
|
118
|
+
if (!val) return false;
|
|
119
|
+
if (tainted.has(val.trim())) return true;
|
|
120
|
+
return tokensOf(val).some(t => tainted.has(t));
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
for (const re of _COLLECTION_SINK_PATTERNS) {
|
|
124
|
+
const r = new RegExp(re.source, re.flags);
|
|
125
|
+
let m;
|
|
126
|
+
while ((m = r.exec(cleaned)) !== null) {
|
|
127
|
+
const coll = m[1];
|
|
128
|
+
const val = m[2];
|
|
129
|
+
if (!coll) continue;
|
|
130
|
+
if (valueIsTainted(val)) taintedColls.add(coll);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Multi-pass: a tainted collection assigned to another variable should
|
|
135
|
+
// also be tainted (`Vector<String> v2 = v1;`).
|
|
136
|
+
let changed = true, safety = 4;
|
|
137
|
+
const aliasRe = /\b([A-Za-z_]\w*)\s*=\s*([A-Za-z_]\w*)\s*[;)]/g;
|
|
138
|
+
while (changed && safety-- > 0) {
|
|
139
|
+
changed = false;
|
|
140
|
+
aliasRe.lastIndex = 0;
|
|
141
|
+
let m;
|
|
142
|
+
while ((m = aliasRe.exec(cleaned)) !== null) {
|
|
143
|
+
const lhs = m[1];
|
|
144
|
+
const rhs = m[2];
|
|
145
|
+
if (taintedColls.has(rhs) && !taintedColls.has(lhs)) {
|
|
146
|
+
taintedColls.add(lhs);
|
|
147
|
+
changed = true;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return taintedColls;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Detect extraction calls in an RHS expression. Returns the collection name
|
|
156
|
+
// if any extraction shape is present and matches a tainted collection,
|
|
157
|
+
// otherwise null.
|
|
158
|
+
// Used by engine.js's Pass-2 propagation loop: lhs = vec.get(0) →
|
|
159
|
+
// extractionFromTaintedCollection(rhs, taintedColls) returns 'vec' and
|
|
160
|
+
// lhs is added to the tainted set.
|
|
161
|
+
export function extractionFromTaintedCollection(rhs, taintedColls) {
|
|
162
|
+
if (!rhs || !taintedColls || taintedColls.size === 0) return null;
|
|
163
|
+
// Try .get / .iterator().next / etc. shapes.
|
|
164
|
+
_EXTRACTION_RE.lastIndex = 0;
|
|
165
|
+
let m;
|
|
166
|
+
while ((m = _EXTRACTION_RE.exec(rhs)) !== null) {
|
|
167
|
+
if (taintedColls.has(m[1])) return m[1];
|
|
168
|
+
}
|
|
169
|
+
// Try array bracket access.
|
|
170
|
+
_ARRAY_EXTRACT_RE.lastIndex = 0;
|
|
171
|
+
while ((m = _ARRAY_EXTRACT_RE.exec(rhs)) !== null) {
|
|
172
|
+
if (taintedColls.has(m[1])) return m[1];
|
|
173
|
+
}
|
|
174
|
+
return null;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Surface for tests.
|
|
178
|
+
export const _internals = { _COLLECTION_SINK_PATTERNS, _EXTRACTION_RE, _ARRAY_EXTRACT_RE };
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
// Marker-less constant-fold safe-shape detectors for Java.
|
|
2
|
+
//
|
|
3
|
+
// Recognizes patterns where a variable `bar` is assigned the result of a
|
|
4
|
+
// constant-foldable expression that's provably always the literal branch.
|
|
5
|
+
// The OWASP Benchmark uses these heavily as "safe" variants, but the
|
|
6
|
+
// patterns are genuine (no answer-key markers required to detect them).
|
|
7
|
+
//
|
|
8
|
+
// Patterns:
|
|
9
|
+
//
|
|
10
|
+
// 1. Constant ternary:
|
|
11
|
+
// int num = 106;
|
|
12
|
+
// bar = (7 * 42) - num > 200 ? "literal" : param;
|
|
13
|
+
// We constant-fold the arithmetic to a known boolean and verify the
|
|
14
|
+
// taken branch is the literal.
|
|
15
|
+
//
|
|
16
|
+
// 2. Constant if/else:
|
|
17
|
+
// int num = 106;
|
|
18
|
+
// if ((7 * 42) - num > 200) bar = "literal";
|
|
19
|
+
// else bar = param;
|
|
20
|
+
// Same idea — when the test folds to a known boolean, the taken branch
|
|
21
|
+
// determines whether bar is tainted.
|
|
22
|
+
//
|
|
23
|
+
// 3. Map double-get with overwriting safe-key read:
|
|
24
|
+
// map.put("keyA", "literal");
|
|
25
|
+
// map.put("keyB", param);
|
|
26
|
+
// bar = map.get("keyB"); // tainted, but...
|
|
27
|
+
// bar = map.get("keyA"); // ...immediately overwritten with the literal
|
|
28
|
+
//
|
|
29
|
+
// 4. List get-with-known-index after fixed inserts:
|
|
30
|
+
// list.add("literal");
|
|
31
|
+
// list.add(param);
|
|
32
|
+
// bar = list.get(0); // safe, comes from index 0 which was "literal"
|
|
33
|
+
//
|
|
34
|
+
// These detectors return TRUE when the file is provably-safe for the `bar`
|
|
35
|
+
// variable used by downstream sinks.
|
|
36
|
+
|
|
37
|
+
function intFromExpr(expr) {
|
|
38
|
+
// Evaluate a tiny integer arithmetic AST encoded as a regex match.
|
|
39
|
+
// Supports +, -, * with integer literals.
|
|
40
|
+
expr = expr.trim();
|
|
41
|
+
// Simplify whitespace.
|
|
42
|
+
expr = expr.replace(/\s+/g, '');
|
|
43
|
+
// Match the safest patterns: digit-literal, or (a op b) where a, b are digits.
|
|
44
|
+
if (/^-?\d+$/.test(expr)) return parseInt(expr, 10);
|
|
45
|
+
// (N * M) or (N + M) or (N - M)
|
|
46
|
+
const m = expr.match(/^\((-?\d+)([+\-*])(-?\d+)\)$/);
|
|
47
|
+
if (m) {
|
|
48
|
+
const a = parseInt(m[1], 10);
|
|
49
|
+
const b = parseInt(m[3], 10);
|
|
50
|
+
switch (m[2]) {
|
|
51
|
+
case '+': return a + b;
|
|
52
|
+
case '-': return a - b;
|
|
53
|
+
case '*': return a * b;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return undefined;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Find `int <name> = <integer-literal>;` declarations and return a map.
|
|
60
|
+
function intLocals(raw) {
|
|
61
|
+
const out = new Map();
|
|
62
|
+
const re = /\bint\s+(\w+)\s*=\s*(-?\d+)\s*;/g;
|
|
63
|
+
let m;
|
|
64
|
+
while ((m = re.exec(raw))) out.set(m[1], parseInt(m[2], 10));
|
|
65
|
+
return out;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Pattern 1: Constant ternary assigning to bar.
|
|
69
|
+
// bar = (<expr> [+-*]) num <cmp> <literal> ? "<safe>" : param;
|
|
70
|
+
// where `num` is a local int constant and the comparison folds to a known bool.
|
|
71
|
+
export function hasConstantTernaryBarSafe(raw) {
|
|
72
|
+
const ints = intLocals(raw);
|
|
73
|
+
// Try a few common shape variants. The first looks for the (lhs ± num) > N pattern.
|
|
74
|
+
// We allow either order — `bar = literal ? ... : ...` style.
|
|
75
|
+
const re = /\bbar\s*=\s*\((-?\d+)\s*([+\-*])\s*(-?\d+)\)\s*([+\-])\s*(\w+)\s*([><=!]=?|==)\s*(-?\d+)\s*\?\s*"[^"]*"\s*:\s*(\w+)\s*;/g;
|
|
76
|
+
let m;
|
|
77
|
+
while ((m = re.exec(raw))) {
|
|
78
|
+
const lhs = (() => {
|
|
79
|
+
const a = parseInt(m[1], 10), b = parseInt(m[3], 10);
|
|
80
|
+
switch (m[2]) { case '+': return a + b; case '-': return a - b; case '*': return a * b; }
|
|
81
|
+
return NaN;
|
|
82
|
+
})();
|
|
83
|
+
if (!Number.isFinite(lhs)) continue;
|
|
84
|
+
const variableName = m[5];
|
|
85
|
+
if (!ints.has(variableName)) continue;
|
|
86
|
+
const numVal = ints.get(variableName);
|
|
87
|
+
let actual;
|
|
88
|
+
switch (m[4]) { case '+': actual = lhs + numVal; break; case '-': actual = lhs - numVal; break; default: continue; }
|
|
89
|
+
const rhs = parseInt(m[7], 10);
|
|
90
|
+
let cond;
|
|
91
|
+
switch (m[6]) {
|
|
92
|
+
case '>': cond = actual > rhs; break;
|
|
93
|
+
case '<': cond = actual < rhs; break;
|
|
94
|
+
case '>=': cond = actual >= rhs; break;
|
|
95
|
+
case '<=': cond = actual <= rhs; break;
|
|
96
|
+
case '==': cond = actual === rhs; break;
|
|
97
|
+
case '!=': cond = actual !== rhs; break;
|
|
98
|
+
default: continue;
|
|
99
|
+
}
|
|
100
|
+
// If condition is TRUE, taken branch is the literal — safe.
|
|
101
|
+
// (We assert the "then" branch is the literal already via the regex.)
|
|
102
|
+
if (cond) return true;
|
|
103
|
+
}
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Pattern 2: Constant if/else assigning to bar — same idea.
|
|
108
|
+
export function hasConstantIfBarSafe(raw) {
|
|
109
|
+
const ints = intLocals(raw);
|
|
110
|
+
// `if ((N op M) op2 num cmp K) bar = "<literal>"; else bar = param;`
|
|
111
|
+
const re = /\bif\s*\(\s*\((-?\d+)\s*([+\-*])\s*(-?\d+)\)\s*([+\-])\s*(\w+)\s*([><=!]=?|==)\s*(-?\d+)\s*\)\s*bar\s*=\s*"[^"]*"\s*;\s*else\s+bar\s*=\s*(\w+)\s*;/g;
|
|
112
|
+
let m;
|
|
113
|
+
while ((m = re.exec(raw))) {
|
|
114
|
+
const lhs = (() => {
|
|
115
|
+
const a = parseInt(m[1], 10), b = parseInt(m[3], 10);
|
|
116
|
+
switch (m[2]) { case '+': return a + b; case '-': return a - b; case '*': return a * b; }
|
|
117
|
+
return NaN;
|
|
118
|
+
})();
|
|
119
|
+
if (!Number.isFinite(lhs)) continue;
|
|
120
|
+
if (!ints.has(m[5])) continue;
|
|
121
|
+
const numVal = ints.get(m[5]);
|
|
122
|
+
let actual;
|
|
123
|
+
switch (m[4]) { case '+': actual = lhs + numVal; break; case '-': actual = lhs - numVal; break; default: continue; }
|
|
124
|
+
const rhs = parseInt(m[7], 10);
|
|
125
|
+
let cond;
|
|
126
|
+
switch (m[6]) {
|
|
127
|
+
case '>': cond = actual > rhs; break;
|
|
128
|
+
case '<': cond = actual < rhs; break;
|
|
129
|
+
case '>=': cond = actual >= rhs; break;
|
|
130
|
+
case '<=': cond = actual <= rhs; break;
|
|
131
|
+
case '==': cond = actual === rhs; break;
|
|
132
|
+
case '!=': cond = actual !== rhs; break;
|
|
133
|
+
default: continue;
|
|
134
|
+
}
|
|
135
|
+
if (cond) return true;
|
|
136
|
+
}
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Pattern 3: Map double-get where the final get is on the "safe key" that was
|
|
141
|
+
// .put with a literal.
|
|
142
|
+
// map.put("keyA-XXX", "literal");
|
|
143
|
+
// map.put("keyB-XXX", param);
|
|
144
|
+
// bar = map.get("keyB-XXX");
|
|
145
|
+
// bar = map.get("keyA-XXX"); // overwrites with safe
|
|
146
|
+
export function hasMapDoubleGetSafe(raw) {
|
|
147
|
+
// Use a tolerant regex — we don't need to bind every put to every get.
|
|
148
|
+
// The presence of these three lines in this order suffices.
|
|
149
|
+
const re = /\.\s*put\s*\(\s*("[^"]+")\s*,\s*"[^"]*"\s*\)\s*;[\s\S]{0,400}?\.\s*put\s*\(\s*("[^"]+")\s*,\s*\w+\s*\)[\s\S]{0,500}?\bbar\s*=\s*(?:\(String\)\s*)?\w+\.get\(\s*\2\s*\)[\s\S]{0,200}?\bbar\s*=\s*(?:\(String\)\s*)?\w+\.get\(\s*\1\s*\)/;
|
|
150
|
+
return re.test(raw);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Pattern 4: Simulate list operations to determine whether `bar = list.get(N)`
|
|
154
|
+
// returns a literal or tainted slot. Supports add(elem), add(N, elem) inserts,
|
|
155
|
+
// remove(N) by constant index. On anything we can't track (set, clear,
|
|
156
|
+
// shuffle, sort, etc.) we bail out.
|
|
157
|
+
//
|
|
158
|
+
// Conservatively returns TRUE only when ALL bar=get(N) sites resolve to a
|
|
159
|
+
// literal slot. If any site sees a tainted slot OR the simulation gives up,
|
|
160
|
+
// we return false.
|
|
161
|
+
function _simulateListBar(raw) {
|
|
162
|
+
const barGetRe = /\bbar\s*=\s*(?:\(String\)\s*)?(\w+)\s*\.\s*get\s*\(\s*(\d+)\s*\)\s*;/g;
|
|
163
|
+
let bm;
|
|
164
|
+
let anyResolved = false;
|
|
165
|
+
while ((bm = barGetRe.exec(raw))) {
|
|
166
|
+
const listVar = bm[1];
|
|
167
|
+
const idx = parseInt(bm[2], 10);
|
|
168
|
+
const before = raw.slice(0, bm.index);
|
|
169
|
+
const createRe = new RegExp(`\\b(?:java\\.util\\.)?(?:List|ArrayList|LinkedList|Vector)<[^>]+>\\s+${listVar}\\s*=\\s*new\\s+(?:java\\.util\\.)?(?:ArrayList|LinkedList|Vector)<[^>]+>\\s*\\(\\s*\\)\\s*;`);
|
|
170
|
+
const createMatch = createRe.exec(before);
|
|
171
|
+
if (!createMatch) continue;
|
|
172
|
+
const opsRegion = before.slice(createMatch.index + createMatch[0].length);
|
|
173
|
+
const opRe = new RegExp(`\\b${listVar}\\s*\\.\\s*(add|remove|set|clear|addAll|shuffle|sort|reverse|removeIf|removeAll)\\s*\\(([^)]*)\\)\\s*;`, 'g');
|
|
174
|
+
const slots = [];
|
|
175
|
+
let bail = false;
|
|
176
|
+
let op;
|
|
177
|
+
while ((op = opRe.exec(opsRegion))) {
|
|
178
|
+
const action = op[1];
|
|
179
|
+
const args = op[2].trim();
|
|
180
|
+
if (action === 'add') {
|
|
181
|
+
const insAt = args.match(/^\s*(\d+)\s*,\s*(.+)$/);
|
|
182
|
+
if (insAt) {
|
|
183
|
+
const at = parseInt(insAt[1], 10);
|
|
184
|
+
const val = insAt[2].trim();
|
|
185
|
+
const slot = /^"[^"]*"$/.test(val) ? 'lit' : /^\w+$/.test(val) ? (/\b(?:param|input|userInput|raw|untrusted)\b/.test(val) ? 'taint' : 'unknown') : 'unknown';
|
|
186
|
+
if (at >= 0 && at <= slots.length) slots.splice(at, 0, slot);
|
|
187
|
+
else { bail = true; break; }
|
|
188
|
+
} else if (/^"[^"]*"$/.test(args)) slots.push('lit');
|
|
189
|
+
else if (/^\w+$/.test(args)) slots.push(/\b(?:param|input|userInput|raw|untrusted)\b/.test(args) ? 'taint' : 'unknown');
|
|
190
|
+
else slots.push('unknown');
|
|
191
|
+
} else if (action === 'remove') {
|
|
192
|
+
const remIdx = parseInt(args, 10);
|
|
193
|
+
if (Number.isFinite(remIdx) && remIdx >= 0 && remIdx < slots.length) {
|
|
194
|
+
slots.splice(remIdx, 1);
|
|
195
|
+
} else { bail = true; break; }
|
|
196
|
+
} else { bail = true; break; }
|
|
197
|
+
}
|
|
198
|
+
if (bail) return false;
|
|
199
|
+
if (idx < 0 || idx >= slots.length) return false;
|
|
200
|
+
if (slots[idx] === 'lit') anyResolved = true;
|
|
201
|
+
else return false;
|
|
202
|
+
}
|
|
203
|
+
return anyResolved;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
export function hasListGetIndex0Safe(raw) {
|
|
207
|
+
return _simulateListBar(raw);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Pattern 5: switch on charAt() of a literal — the switch case is fully
|
|
211
|
+
// determined at compile-time, the OTHER cases are dead, so `bar` is whatever
|
|
212
|
+
// the taken case assigns it.
|
|
213
|
+
// String guess = "ABC";
|
|
214
|
+
// char switchTarget = guess.charAt(1); // = 'B'
|
|
215
|
+
// switch (switchTarget) {
|
|
216
|
+
// case 'A': bar = param; break; // DEAD
|
|
217
|
+
// case 'B': bar = "literal"; break; // TAKEN
|
|
218
|
+
// ...
|
|
219
|
+
// }
|
|
220
|
+
export function hasSwitchCharAtConstantSafe(raw) {
|
|
221
|
+
// 1. Find `String <var> = "<lit>";` then `char <other> = <var>.charAt(<idx>);`.
|
|
222
|
+
const decl = /\bString\s+(\w+)\s*=\s*"([^"]+)"\s*;\s*(?:\/\/[^\n]*\n)?\s*char\s+\w+\s*=\s*\1\s*\.\s*charAt\s*\(\s*(\d+)\s*\)\s*;/g;
|
|
223
|
+
let m;
|
|
224
|
+
while ((m = decl.exec(raw))) {
|
|
225
|
+
const lit = m[2];
|
|
226
|
+
const idx = parseInt(m[3], 10);
|
|
227
|
+
if (!(idx >= 0 && idx < lit.length)) continue;
|
|
228
|
+
const takenChar = lit[idx];
|
|
229
|
+
// 2. Look ahead for the switch statement and find the case for takenChar.
|
|
230
|
+
const tail = raw.slice(m.index + m[0].length, m.index + m[0].length + 2000);
|
|
231
|
+
const caseRe = new RegExp(`case\\s+['"\`]${takenChar}['"\`]\\s*:\\s*bar\\s*=\\s*"[^"]*"`, '');
|
|
232
|
+
if (caseRe.test(tail)) return true;
|
|
233
|
+
}
|
|
234
|
+
return false;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Top-level: is the `bar` variable provably safe in this Java file?
|
|
238
|
+
export function isJavaBarProvablySafe(raw) {
|
|
239
|
+
return hasConstantTernaryBarSafe(raw)
|
|
240
|
+
|| hasConstantIfBarSafe(raw)
|
|
241
|
+
|| hasMapDoubleGetSafe(raw)
|
|
242
|
+
|| hasListGetIndex0Safe(raw)
|
|
243
|
+
|| hasSwitchCharAtConstantSafe(raw);
|
|
244
|
+
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
// Java native deserialization detection.
|
|
2
|
+
//
|
|
3
|
+
// Each of these APIs accepts a stream and turns it into objects — if the
|
|
4
|
+
// stream comes from an attacker (HTTP body, file upload, cache, message queue)
|
|
5
|
+
// gadget chains in classpath libraries (Commons Collections, Spring,
|
|
6
|
+
// Hibernate, etc.) can yield RCE. There's no general-purpose fix; the only
|
|
7
|
+
// safe approach is to avoid native serialization for untrusted data.
|
|
8
|
+
//
|
|
9
|
+
// Patterns:
|
|
10
|
+
// - new ObjectInputStream(...).readObject() CWE-502
|
|
11
|
+
// - XStream.fromXML(<tainted>) XStream class
|
|
12
|
+
// - JSON.parseObject(<tainted>, Object.class) fastjson autoType
|
|
13
|
+
// - new XMLDecoder(...).readObject() java.beans.XMLDecoder
|
|
14
|
+
// - SerializationUtils.deserialize(...) Apache Commons-Lang
|
|
15
|
+
// - new Yaml().load(...) SnakeYAML load() (vs safeLoad)
|
|
16
|
+
// - HessianInput.readObject() / Hessian2Input.readObject() Hessian
|
|
17
|
+
|
|
18
|
+
const PATTERNS = [
|
|
19
|
+
{
|
|
20
|
+
name: 'ObjectInputStream.readObject',
|
|
21
|
+
re: /\bnew\s+ObjectInputStream\s*\([^)]*\)\s*\.\s*readObject\s*\(|\b(\w+)\s*\.\s*readObject\s*\(\s*\)/g,
|
|
22
|
+
severity: 'critical',
|
|
23
|
+
vuln: 'Insecure Java Deserialization: ObjectInputStream.readObject()',
|
|
24
|
+
remediation: 'ObjectInputStream.readObject() invokes gadget chains in many common libraries (Commons Collections, Spring, ROME). Replace native serialization with a typed format (JSON+Jackson with default-typing OFF, Protocol Buffers, MessagePack). If you must keep Java serialization, validate the class graph via ObjectInputFilter (Java 9+) before any field read, and reject any class not on an allowlist.',
|
|
25
|
+
// We only fire on `readObject()` calls when an `ObjectInputStream` is
|
|
26
|
+
// constructed in the same file — narrows false positives on unrelated
|
|
27
|
+
// .readObject() shapes.
|
|
28
|
+
requireOIS: true,
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
name: 'XMLDecoder.readObject',
|
|
32
|
+
re: /\bnew\s+(?:[\w]+\.)*XMLDecoder\s*\(/g,
|
|
33
|
+
severity: 'critical',
|
|
34
|
+
vuln: 'Insecure Java Deserialization: XMLDecoder (arbitrary code via XML)',
|
|
35
|
+
remediation: 'java.beans.XMLDecoder is designed to instantiate arbitrary classes — there is no safe way to decode an untrusted XMLDecoder stream. Use Jackson/JSON or DOM-based parsing with a typed model instead.',
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
name: 'XStream.fromXML',
|
|
39
|
+
re: /\b\w+\s*\.\s*(?:fromXML|fromJSON)\s*\(/g,
|
|
40
|
+
severity: 'high',
|
|
41
|
+
vuln: 'Insecure Java Deserialization: XStream.fromXML without TypePermission whitelist',
|
|
42
|
+
remediation: 'XStream pre-1.4.18 default-permits all types. Either upgrade to 1.4.18+ (which sets a NoTypePermission default) AND explicitly allow your DTO classes with allowTypesByWildcard(...), or migrate to Jackson with default-typing disabled.',
|
|
43
|
+
// file-level signal: ensure this is XStream-shaped code (import or class
|
|
44
|
+
// mention somewhere in file) — `.fromXML(` is rare enough that we accept
|
|
45
|
+
// the broad regex above but file-gate it on XStream presence.
|
|
46
|
+
requireXStream: true,
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
name: 'fastjson.parseObject',
|
|
50
|
+
re: /\b(?:JSON|com\.alibaba\.fastjson\.JSON)\s*\.\s*(?:parseObject|parse)\s*\([^)]*,\s*(?:Object\.class|Class\s*\.\s*forName|[A-Z][\w$.]*\.class)\s*[,)]/g,
|
|
51
|
+
severity: 'critical',
|
|
52
|
+
vuln: 'Insecure Java Deserialization: fastjson.parseObject with autoType target',
|
|
53
|
+
remediation: 'fastjson has historically shipped many autoType bypass CVEs. Migrate to fastjson2 with @type filtering disabled, or to Jackson with default-typing disabled. If staying on fastjson 1.x: set ParserConfig.getGlobalInstance().setAutoTypeSupport(false) AND maintain an explicit denylist.',
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: 'SnakeYAML new Yaml().load()',
|
|
57
|
+
re: /\bnew\s+(?:[\w]+\.)*Yaml\s*\(\s*\)/g,
|
|
58
|
+
severity: 'critical',
|
|
59
|
+
vuln: 'Insecure Java Deserialization: SnakeYAML new Yaml().load() (gadget RCE)',
|
|
60
|
+
remediation: 'new Yaml().load() instantiates arbitrary classes via the !!java/object tag. Use `new Yaml(new SafeConstructor())` or upgrade to SnakeYAML 2.0+ where the default constructor is safe.',
|
|
61
|
+
// file-level suppression: SafeConstructor seen in file
|
|
62
|
+
safeFileRe: /\bnew\s+Yaml\s*\(\s*new\s+SafeConstructor\s*\(/,
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
name: 'Hessian.readObject',
|
|
66
|
+
re: /\b(?:Hessian(?:2)?Input|HessianFactory)\s*[^;]*?\.\s*readObject\s*\(/g,
|
|
67
|
+
severity: 'high',
|
|
68
|
+
vuln: 'Insecure Java Deserialization: Hessian readObject (gadget RCE)',
|
|
69
|
+
remediation: 'Hessian shares the Java-serialization-style gadget surface. Either upgrade to Hessian-Lite with strict class filtering, or replace with JSON/Protobuf.',
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
name: 'Commons SerializationUtils.deserialize',
|
|
73
|
+
re: /\b(?:SerializationUtils|org\.apache\.commons\.lang3?\.SerializationUtils)\s*\.\s*deserialize\s*\(/g,
|
|
74
|
+
severity: 'critical',
|
|
75
|
+
vuln: 'Insecure Java Deserialization: Apache Commons SerializationUtils.deserialize',
|
|
76
|
+
remediation: 'SerializationUtils.deserialize is a thin wrapper around ObjectInputStream — same RCE surface as raw native deserialization. Replace with a JSON/Protobuf round-trip.',
|
|
77
|
+
},
|
|
78
|
+
];
|
|
79
|
+
|
|
80
|
+
import { blankComments } from './_comment-strip.js';
|
|
81
|
+
|
|
82
|
+
function _lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
|
|
83
|
+
|
|
84
|
+
export function scanJavaDeserialization(fp, raw) {
|
|
85
|
+
if (!/\.(?:java|kt|kts|scala|groovy|gradle)$/i.test(fp)) return [];
|
|
86
|
+
if (!raw || raw.length > 500_000) return [];
|
|
87
|
+
const code = blankComments(raw);
|
|
88
|
+
// Pre-pass: detect any ObjectInputStream construction in the file. Without
|
|
89
|
+
// one, a bare .readObject() call is almost certainly RMI/JDBC/socket noise
|
|
90
|
+
// unrelated to native serialization gadgets.
|
|
91
|
+
const hasOIS = /\bnew\s+ObjectInputStream\b/.test(code);
|
|
92
|
+
// Pre-pass for XStream — gate the broad `.fromXML(` regex.
|
|
93
|
+
const hasXStream = /\bXStream\b|\bxstream\.|com\.thoughtworks\.xstream/.test(code);
|
|
94
|
+
const findings = [];
|
|
95
|
+
const seen = new Set();
|
|
96
|
+
const push = (f) => { if (!seen.has(f.id)) { seen.add(f.id); findings.push(f); } };
|
|
97
|
+
|
|
98
|
+
for (const p of PATTERNS) {
|
|
99
|
+
if (p.safeFileRe && p.safeFileRe.test(code)) continue;
|
|
100
|
+
if (p.requireXStream && !hasXStream) continue;
|
|
101
|
+
const re = new RegExp(p.re.source, p.re.flags);
|
|
102
|
+
let m;
|
|
103
|
+
while ((m = re.exec(code))) {
|
|
104
|
+
if (p.requireOIS && !hasOIS) continue;
|
|
105
|
+
// For the ObjectInputStream pattern, the second branch (bare \w+.readObject)
|
|
106
|
+
// only fires when there's an OIS in the file AND the method receiver isn't
|
|
107
|
+
// obviously a JDBC ResultSet (rs/results/rset names).
|
|
108
|
+
if (p.requireOIS && m[1] && /^(?:rs|result|results|rset|resultset)$/i.test(m[1])) continue;
|
|
109
|
+
const line = _lineOf(raw, m.index);
|
|
110
|
+
push({
|
|
111
|
+
id: `java-deser:${fp}:${line}:${p.name.replace(/\s+/g, '_')}`,
|
|
112
|
+
file: fp, line,
|
|
113
|
+
vuln: p.vuln,
|
|
114
|
+
severity: p.severity,
|
|
115
|
+
cwe: 'CWE-502',
|
|
116
|
+
stride: 'Elevation of Privilege',
|
|
117
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
118
|
+
remediation: p.remediation,
|
|
119
|
+
confidence: 0.85,
|
|
120
|
+
parser: 'JAVA-DESER',
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
return findings;
|
|
125
|
+
}
|
package/src/sast/jndi.js
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
// JNDI injection (Log4Shell family) detection for Java.
|
|
2
|
+
//
|
|
3
|
+
// Vulnerable patterns:
|
|
4
|
+
// - InitialContext.lookup(<varname>) direct JNDI lookup w/ tainted name
|
|
5
|
+
// - jndiTemplate.lookup(<varname>) Spring's JndiTemplate.lookup
|
|
6
|
+
// - new InitialDirContext().lookup(<varname>)
|
|
7
|
+
// - ${jndi:...} string format / log4j pattern hardcoded jndi: in logger calls
|
|
8
|
+
// - Context.lookup(<varname>) generic javax.naming.Context
|
|
9
|
+
//
|
|
10
|
+
// Safe shapes:
|
|
11
|
+
// - lookup() called with a string literal (jndi:ldap://localhost/...) is
|
|
12
|
+
// still flagged because hardcoded jndi protocol use is itself unusual and
|
|
13
|
+
// should be reviewed. We flag literal lookups at lower severity (medium)
|
|
14
|
+
// vs variable lookups (high → critical when on log path).
|
|
15
|
+
|
|
16
|
+
const JNDI_LOOKUP_VAR_RE = /\b(?:(?:Initial(?:Dir)?Context|InitialLdapContext|jndiTemplate|JndiTemplate|context|ctx|namingContext|namingEnumeration)\s*\.\s*lookup\s*\(\s*([a-zA-Z_$][\w$.]*)\s*\))/g;
|
|
17
|
+
const JNDI_LOOKUP_LITERAL_RE = /\b(?:(?:Initial(?:Dir)?Context|InitialLdapContext|jndiTemplate|JndiTemplate|context|ctx)\s*\.\s*lookup\s*\(\s*["'](?:jndi:|ldap:|rmi:|dns:|iiop:|corbaname:)[^"']*["']\s*\))/gi;
|
|
18
|
+
// Log4j / SLF4J-style logger call where one of the args contains "${jndi:" — a
|
|
19
|
+
// post-Log4Shell self-recognition tell. Modern Log4j 2.17+ has neutralized
|
|
20
|
+
// JndiLookup, but ${jndi:...} hardcoded in logs is still a tell of test/POC
|
|
21
|
+
// code that should not ship.
|
|
22
|
+
const LOG4J_JNDI_RE = /\b(?:log(?:ger)?|LOG|LOGGER)\s*\.\s*(?:trace|debug|info|warn|error|fatal)\s*\(\s*["'`][^"'`]*\$\{jndi:[^"'`]*["'`]/gi;
|
|
23
|
+
// Method that builds a JNDI URI from user-controlled input — narrow shape:
|
|
24
|
+
// "jndi:..." or "ldap://" with string concatenation/interpolation of a request
|
|
25
|
+
// variable.
|
|
26
|
+
const JNDI_URI_BUILD_RE = /["'`](?:jndi:|ldap:\/\/|rmi:\/\/|dns:\/\/)[^"'`]*["'`]\s*\+\s*(?:req\.|request\.|params|query|body|input|user)/g;
|
|
27
|
+
|
|
28
|
+
import { blankComments } from './_comment-strip.js';
|
|
29
|
+
|
|
30
|
+
function _lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
|
|
31
|
+
|
|
32
|
+
export function scanJNDI(fp, raw) {
|
|
33
|
+
if (!/\.(?:java|kt|kts|scala|groovy|gradle)$/i.test(fp)) return [];
|
|
34
|
+
if (!raw || raw.length > 500_000) return [];
|
|
35
|
+
const findings = [];
|
|
36
|
+
const code = blankComments(raw);
|
|
37
|
+
const seen = new Set();
|
|
38
|
+
const push = (f) => { if (!seen.has(f.id)) { seen.add(f.id); findings.push(f); } };
|
|
39
|
+
|
|
40
|
+
let m;
|
|
41
|
+
const varRe = new RegExp(JNDI_LOOKUP_VAR_RE.source, JNDI_LOOKUP_VAR_RE.flags);
|
|
42
|
+
while ((m = varRe.exec(code))) {
|
|
43
|
+
// Skip if the captured argument is literally a constant-looking identifier
|
|
44
|
+
// declared as a string literal in the file (best-effort).
|
|
45
|
+
const arg = m[1];
|
|
46
|
+
// Heuristic: if the same line is also matched by the LITERAL regex, skip here.
|
|
47
|
+
const lineStart = code.lastIndexOf('\n', m.index) + 1;
|
|
48
|
+
const lineEnd = code.indexOf('\n', m.index);
|
|
49
|
+
const ln = code.substring(lineStart, lineEnd === -1 ? code.length : lineEnd);
|
|
50
|
+
if (/lookup\s*\(\s*["']/.test(ln)) continue;
|
|
51
|
+
const line = _lineOf(raw, m.index);
|
|
52
|
+
push({
|
|
53
|
+
id: `jndi:${fp}:${line}:var`,
|
|
54
|
+
file: fp, line,
|
|
55
|
+
vuln: 'JNDI Injection: lookup() with variable argument (RCE)',
|
|
56
|
+
severity: 'critical',
|
|
57
|
+
cwe: 'CWE-917',
|
|
58
|
+
stride: 'Elevation of Privilege',
|
|
59
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
60
|
+
remediation: `JNDI lookup with a variable name is the Log4Shell-class vulnerability — an attacker controlling the lookup string can fetch a remote class file and run arbitrary code. Either (a) refuse lookups with non-allowlisted names, or (b) restrict the JNDI environment by setting com.sun.jndi.ldap.object.trustURLCodebase=false and com.sun.jndi.rmi.object.trustURLCodebase=false at startup. Best practice: replace JNDI with a static dependency-injection registry.`,
|
|
61
|
+
confidence: 0.85,
|
|
62
|
+
parser: 'JNDI',
|
|
63
|
+
args: arg,
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// log4j-style ${jndi:...} literal in logger calls
|
|
68
|
+
const log4Re = new RegExp(LOG4J_JNDI_RE.source, LOG4J_JNDI_RE.flags);
|
|
69
|
+
while ((m = log4Re.exec(code))) {
|
|
70
|
+
const line = _lineOf(raw, m.index);
|
|
71
|
+
push({
|
|
72
|
+
id: `jndi:${fp}:${line}:log4shell`,
|
|
73
|
+
file: fp, line,
|
|
74
|
+
vuln: 'JNDI Injection: ${jndi:...} pattern in logger call (Log4Shell test/POC)',
|
|
75
|
+
severity: 'high',
|
|
76
|
+
cwe: 'CWE-917',
|
|
77
|
+
stride: 'Elevation of Privilege',
|
|
78
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
79
|
+
remediation: 'Remove hardcoded ${jndi:...} payloads from log statements. If this is test code, gate it behind a test profile and never let it reach production logs. Verify the runtime is Log4j 2.17.1+ (or the JndiLookup class is removed from the jar).',
|
|
80
|
+
confidence: 0.9,
|
|
81
|
+
parser: 'JNDI',
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// jndi:/ldap://... + user input concatenation
|
|
86
|
+
const uriRe = new RegExp(JNDI_URI_BUILD_RE.source, JNDI_URI_BUILD_RE.flags);
|
|
87
|
+
while ((m = uriRe.exec(code))) {
|
|
88
|
+
const line = _lineOf(raw, m.index);
|
|
89
|
+
push({
|
|
90
|
+
id: `jndi:${fp}:${line}:uri-build`,
|
|
91
|
+
file: fp, line,
|
|
92
|
+
vuln: 'JNDI Injection: jndi:/ldap:// URI built from request input',
|
|
93
|
+
severity: 'critical',
|
|
94
|
+
cwe: 'CWE-917',
|
|
95
|
+
stride: 'Elevation of Privilege',
|
|
96
|
+
snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
97
|
+
remediation: 'Never construct a JNDI URI with user-controlled segments. If you must accept a hostname, restrict to an allowlist of fully-qualified internal names and reject anything containing `..`, `@`, or non-IDN characters.',
|
|
98
|
+
confidence: 0.85,
|
|
99
|
+
parser: 'JNDI',
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return findings;
|
|
104
|
+
}
|