@clear-capabilities/agentic-security-scanner 0.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1580 -0
- package/bin/.agentic-security/findings.json +1577 -0
- package/bin/.agentic-security/last-scan.json +1577 -0
- package/bin/.agentic-security/last-scan.json.sig +1 -0
- package/bin/.agentic-security/scan-history.json +465 -0
- package/bin/.agentic-security/streak.json +25 -0
- package/bin/agentic-security-audit.js +198 -0
- package/bin/agentic-security-consistency.js +80 -0
- package/bin/agentic-security-diff.js +136 -0
- package/bin/agentic-security-lsp.js +12 -0
- package/bin/agentic-security-mcp.js +40 -0
- package/bin/agentic-security-rule.js +153 -0
- package/bin/agentic-security.js +1683 -0
- package/dist/117.index.js +207 -0
- package/dist/178.index.js +250 -0
- package/dist/218.index.js +793 -0
- package/dist/227.index.js +192 -0
- package/dist/301.index.js +167 -0
- package/dist/384.index.js +18 -0
- package/dist/476.index.js +126 -0
- package/dist/513.index.js +373 -0
- package/dist/520.index.js +13 -0
- package/dist/601.index.js +1038 -0
- package/dist/634.index.js +1892 -0
- package/dist/637.index.js +216 -0
- package/dist/660.index.js +131 -0
- package/dist/675.index.js +451 -0
- package/dist/826.index.js +188 -0
- package/dist/830.index.js +133 -0
- package/dist/agentic-security.mjs +272 -0
- package/dist/agentic-security.mjs.sha256 +1 -0
- package/dist/calibration-seed.json +27 -0
- package/package.json +77 -0
- package/src/.agentic-security/findings.json +80844 -0
- package/src/.agentic-security/last-scan.json +80844 -0
- package/src/.agentic-security/last-scan.json.sig +1 -0
- package/src/.agentic-security/scan-history.json +8408 -0
- package/src/.agentic-security/streak.json +26 -0
- package/src/badge.js +188 -0
- package/src/compare.js +203 -0
- package/src/dataflow/.agentic-security/findings.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
- package/src/dataflow/.agentic-security/scan-history.json +735 -0
- package/src/dataflow/.agentic-security/streak.json +24 -0
- package/src/dataflow/CLAUDE.md +38 -0
- package/src/dataflow/access-paths.js +172 -0
- package/src/dataflow/async-sequencing.js +177 -0
- package/src/dataflow/backward.js +201 -0
- package/src/dataflow/catalog-expanded.js +485 -0
- package/src/dataflow/catalog.js +659 -0
- package/src/dataflow/cross-repo.js +219 -0
- package/src/dataflow/engine.js +588 -0
- package/src/dataflow/exception-flow.js +116 -0
- package/src/dataflow/exploit-prover.js +187 -0
- package/src/dataflow/higher-order.js +221 -0
- package/src/dataflow/ifds.js +347 -0
- package/src/dataflow/implicit-flow.js +129 -0
- package/src/dataflow/incremental.js +229 -0
- package/src/dataflow/index.js +181 -0
- package/src/dataflow/numeric-domain.js +192 -0
- package/src/dataflow/path-feasibility.js +114 -0
- package/src/dataflow/points-to.js +337 -0
- package/src/dataflow/polyglot.js +190 -0
- package/src/dataflow/proven-clean.js +159 -0
- package/src/dataflow/receiver-context.js +76 -0
- package/src/dataflow/sanitizer-proof.js +154 -0
- package/src/dataflow/soft-taint.js +140 -0
- package/src/dataflow/string-domain.js +234 -0
- package/src/dataflow/stub-aware-filter.js +100 -0
- package/src/dataflow/summaries.js +132 -0
- package/src/dataflow/symbolic-exec.js +238 -0
- package/src/dataflow/tabulation.js +135 -0
- package/src/engine.js +7763 -0
- package/src/history-scan.js +229 -0
- package/src/index.js +3 -0
- package/src/integrations/.agentic-security/findings.json +1504 -0
- package/src/integrations/.agentic-security/last-scan.json +1504 -0
- package/src/integrations/.agentic-security/scan-history.json +40 -0
- package/src/integrations/.agentic-security/streak.json +21 -0
- package/src/integrations/index.js +321 -0
- package/src/integrations/tickets.js +200 -0
- package/src/ir/.agentic-security/findings.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json.sig +1 -0
- package/src/ir/.agentic-security/scan-history.json +364 -0
- package/src/ir/.agentic-security/streak.json +23 -0
- package/src/ir/CLAUDE.md +172 -0
- package/src/ir/callgraph.js +73 -0
- package/src/ir/class-hierarchy.js +195 -0
- package/src/ir/index.js +152 -0
- package/src/ir/parser-cs.js +260 -0
- package/src/ir/parser-java.js +286 -0
- package/src/ir/parser-js.js +413 -0
- package/src/ir/parser-kt.js +258 -0
- package/src/ir/parser-py-cst.js +136 -0
- package/src/ir/parser-py.helper.py +501 -0
- package/src/ir/parser-py.js +312 -0
- package/src/ir/ssa.js +315 -0
- package/src/ir/type-stubs.js +288 -0
- package/src/leaderboard.js +152 -0
- package/src/llm-validator/.agentic-security/findings.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
- package/src/llm-validator/.agentic-security/scan-history.json +168 -0
- package/src/llm-validator/.agentic-security/streak.json +20 -0
- package/src/llm-validator/consistency.js +141 -0
- package/src/llm-validator/index.js +437 -0
- package/src/lsp/.agentic-security/findings.json +28 -0
- package/src/lsp/.agentic-security/last-scan.json +28 -0
- package/src/lsp/.agentic-security/scan-history.json +79 -0
- package/src/lsp/.agentic-security/streak.json +22 -0
- package/src/lsp/server.js +275 -0
- package/src/mcp/.agentic-security/findings.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
- package/src/mcp/.agentic-security/scan-history.json +1125 -0
- package/src/mcp/.agentic-security/streak.json +22 -0
- package/src/mcp/CLAUDE.md +54 -0
- package/src/mcp/audit.js +136 -0
- package/src/mcp/redact.js +75 -0
- package/src/mcp/server.js +158 -0
- package/src/mcp/stdio.js +83 -0
- package/src/mcp/tools.js +940 -0
- package/src/mcp/validate.js +49 -0
- package/src/personality.js +164 -0
- package/src/poc-video.js +239 -0
- package/src/posture/.agentic-security/findings.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json.sig +1 -0
- package/src/posture/.agentic-security/scan-history.json +5557 -0
- package/src/posture/.agentic-security/streak.json +24 -0
- package/src/posture/CLAUDE.md +42 -0
- package/src/posture/adversarial-self-test.js +114 -0
- package/src/posture/adversary-agent.js +204 -0
- package/src/posture/agents-memory.js +135 -0
- package/src/posture/ai-code-fingerprint.js +171 -0
- package/src/posture/aibom.js +284 -0
- package/src/posture/api-inventory.js +96 -0
- package/src/posture/attack-playbooks.js +305 -0
- package/src/posture/auditor-agent.js +115 -0
- package/src/posture/auth-posture-import.js +135 -0
- package/src/posture/baseline-compare.js +114 -0
- package/src/posture/blast-radius.js +836 -0
- package/src/posture/bounty-prediction.js +141 -0
- package/src/posture/business-logic.js +239 -0
- package/src/posture/calibration-drift.js +93 -0
- package/src/posture/calibration-seed.json +27 -0
- package/src/posture/calibration.js +204 -0
- package/src/posture/clustering.js +75 -0
- package/src/posture/concurrency-checker.js +265 -0
- package/src/posture/confidence.js +65 -0
- package/src/posture/container-runtime.js +149 -0
- package/src/posture/counterfactual.js +109 -0
- package/src/posture/cross-lang-graphql.js +165 -0
- package/src/posture/cross-lang-grpc.js +166 -0
- package/src/posture/cross-lang-meta.js +101 -0
- package/src/posture/cross-lang-openapi.js +187 -0
- package/src/posture/cross-lang-orm.js +153 -0
- package/src/posture/cross-lang-queues.js +210 -0
- package/src/posture/crown-jewels.js +110 -0
- package/src/posture/custom-rules.js +361 -0
- package/src/posture/cve-alert-daemon.js +433 -0
- package/src/posture/cve-lookup.js +129 -0
- package/src/posture/dead-code.js +430 -0
- package/src/posture/defender-agent.js +158 -0
- package/src/posture/deploy-platform.js +204 -0
- package/src/posture/detector-fuzz.js +61 -0
- package/src/posture/deterministic.js +99 -0
- package/src/posture/drift.js +165 -0
- package/src/posture/epss.js +156 -0
- package/src/posture/exploitability-probability.js +212 -0
- package/src/posture/exploitability.js +121 -0
- package/src/posture/feature-flags.js +110 -0
- package/src/posture/finding-defaults.js +132 -0
- package/src/posture/fix-history.js +411 -0
- package/src/posture/fix-plan.js +121 -0
- package/src/posture/fix-verify-loop.js +157 -0
- package/src/posture/fix-verify.js +130 -0
- package/src/posture/flow-narration.js +105 -0
- package/src/posture/grader-calibration.js +156 -0
- package/src/posture/harness-discovery.js +113 -0
- package/src/posture/holdout-eval.js +144 -0
- package/src/posture/iac-reachability.js +163 -0
- package/src/posture/iam-policy.js +128 -0
- package/src/posture/integrity.js +97 -0
- package/src/posture/learning.js +166 -0
- package/src/posture/license-policy.js +109 -0
- package/src/posture/llm-redteam-prompts.js +418 -0
- package/src/posture/llm-redteam.js +303 -0
- package/src/posture/material-change.js +163 -0
- package/src/posture/mitigation-composite.js +55 -0
- package/src/posture/mttr.js +91 -0
- package/src/posture/network-policy-import.js +126 -0
- package/src/posture/path-predicates.js +99 -0
- package/src/posture/persona-prioritization.js +153 -0
- package/src/posture/poc-cwe-map.js +51 -0
- package/src/posture/poc-generator.js +500 -0
- package/src/posture/policy-gate.js +174 -0
- package/src/posture/pre-incident-archaeology.js +110 -0
- package/src/posture/profile.js +93 -0
- package/src/posture/reachability-filter.js +42 -0
- package/src/posture/regression-test-gen.js +200 -0
- package/src/posture/reverse-blast-radius.js +110 -0
- package/src/posture/router.js +109 -0
- package/src/posture/rule-overrides.js +198 -0
- package/src/posture/rule-pack-signing.js +209 -0
- package/src/posture/rule-packs.js +143 -0
- package/src/posture/rule-synthesis.js +108 -0
- package/src/posture/ruleset-version.js +71 -0
- package/src/posture/sbom.js +129 -0
- package/src/posture/schema-aware-bridge.js +207 -0
- package/src/posture/security-trend.js +87 -0
- package/src/posture/semantic-clone.js +114 -0
- package/src/posture/specification-mining.js +170 -0
- package/src/posture/stable-id.js +75 -0
- package/src/posture/stack-playbook.js +229 -0
- package/src/posture/streak.js +249 -0
- package/src/posture/suppressions.js +135 -0
- package/src/posture/telemetry-ingest.js +112 -0
- package/src/posture/threat-model.js +145 -0
- package/src/posture/three-agent-pipeline.js +74 -0
- package/src/posture/triage.js +146 -0
- package/src/posture/trust-boundary-diagram.js +115 -0
- package/src/posture/type-narrowing.js +129 -0
- package/src/posture/validator-metrics.js +179 -0
- package/src/posture/verifier-ephemeral.js +118 -0
- package/src/posture/verifier-target.js +147 -0
- package/src/posture/verifier.js +257 -0
- package/src/posture/version.js +75 -0
- package/src/posture/waf-ingest.js +200 -0
- package/src/posture/why-fired.js +141 -0
- package/src/pr-comment.js +172 -0
- package/src/pr-delta.js +198 -0
- package/src/report/.agentic-security/findings.json +79 -0
- package/src/report/.agentic-security/last-scan.json +79 -0
- package/src/report/.agentic-security/last-scan.json.sig +1 -0
- package/src/report/.agentic-security/scan-history.json +332 -0
- package/src/report/.agentic-security/streak.json +23 -0
- package/src/report/index.js +1136 -0
- package/src/report/mascot.js +42 -0
- package/src/runScan.js +141 -0
- package/src/sast/.agentic-security/findings.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json.sig +1 -0
- package/src/sast/.agentic-security/scan-history.json +788 -0
- package/src/sast/.agentic-security/streak.json +23 -0
- package/src/sast/CLAUDE.md +39 -0
- package/src/sast/_comment-strip.js +46 -0
- package/src/sast/agent-tool-escalation.js +131 -0
- package/src/sast/auth-provider.js +171 -0
- package/src/sast/authz.js +236 -0
- package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
- package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
- package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
- package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
- package/src/sast/bench-shape/index.js +62 -0
- package/src/sast/claude-hook-injection.js +199 -0
- package/src/sast/claude-md-prompt-injection.js +170 -0
- package/src/sast/claude-settings.js +165 -0
- package/src/sast/client-side.js +149 -0
- package/src/sast/cpp-bench-extras.js +122 -0
- package/src/sast/cpp-dataflow.js +430 -0
- package/src/sast/cpp.js +248 -0
- package/src/sast/csharp.js +152 -0
- package/src/sast/csrf.js +82 -0
- package/src/sast/dart-flutter.js +173 -0
- package/src/sast/db-rls.js +147 -0
- package/src/sast/db-taint.js +215 -0
- package/src/sast/defi-deep.js +242 -0
- package/src/sast/deserialization-gadgets.js +113 -0
- package/src/sast/django-hardening.js +230 -0
- package/src/sast/env-hygiene.js +125 -0
- package/src/sast/fastapi-hardening.js +145 -0
- package/src/sast/go-extended.js +84 -0
- package/src/sast/host-header.js +106 -0
- package/src/sast/index.js +17 -0
- package/src/sast/java-ast-folding.js +561 -0
- package/src/sast/java-bench-extras.js +708 -0
- package/src/sast/java-collection-passthrough.js +178 -0
- package/src/sast/java-constant-fold.js +244 -0
- package/src/sast/java-deserialization.js +125 -0
- package/src/sast/jndi.js +104 -0
- package/src/sast/juliet-shape.js +324 -0
- package/src/sast/jwt-exp.js +104 -0
- package/src/sast/kotlin.js +82 -0
- package/src/sast/laravel-hardening.js +198 -0
- package/src/sast/ldap-injection.js +100 -0
- package/src/sast/llm-owasp.js +465 -0
- package/src/sast/llm-stored-prompt.js +103 -0
- package/src/sast/llm-trading-agent.js +161 -0
- package/src/sast/llm.js +308 -0
- package/src/sast/logic.js +140 -0
- package/src/sast/mass-assignment.js +101 -0
- package/src/sast/mcp-audit.js +242 -0
- package/src/sast/mobile-manifest.js +195 -0
- package/src/sast/model-load.js +164 -0
- package/src/sast/mutation-xss.js +87 -0
- package/src/sast/nosql-injection.js +82 -0
- package/src/sast/open-redirect.js +119 -0
- package/src/sast/php.js +91 -0
- package/src/sast/pipeline.js +122 -0
- package/src/sast/primary-cwe-java.js +155 -0
- package/src/sast/prompt-firewall.js +151 -0
- package/src/sast/prompt-template.js +157 -0
- package/src/sast/prototype-pollution.js +112 -0
- package/src/sast/python-sinks.js +195 -0
- package/src/sast/quarkus-hardening.js +102 -0
- package/src/sast/rag-poisoning.js +118 -0
- package/src/sast/rate-limit.js +128 -0
- package/src/sast/response-splitting.js +138 -0
- package/src/sast/ruby.js +108 -0
- package/src/sast/rust.js +105 -0
- package/src/sast/solidity.js +167 -0
- package/src/sast/springboot-hardening.js +186 -0
- package/src/sast/ssrf-cloud-metadata.js +80 -0
- package/src/sast/ssti.js +116 -0
- package/src/sast/swift.js +162 -0
- package/src/sast/toctou.js +95 -0
- package/src/sast/webhook.js +101 -0
- package/src/sast/xpath-injection.js +51 -0
- package/src/sast/xxe.js +140 -0
- package/src/sast/zip-slip.js +200 -0
- package/src/sca/base-images.json +45 -0
- package/src/sca/container.js +107 -0
- package/src/sca/dep-confusion.js +134 -0
- package/src/sca/index.js +6 -0
- package/src/sca/popular-packages.json +41 -0
- package/src/sca/sarif-ingest.js +187 -0
- package/src/sca/vuln-function-hints.json +89 -0
- package/src/secrets/index.js +4 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// Path feasibility — lite version.
|
|
2
|
+
//
|
|
3
|
+
// Real path-sensitive feasibility requires an SMT solver to check whether a
|
|
4
|
+
// path's accumulated constraints are satisfiable. This module does the cheap
|
|
5
|
+
// version: constant-fold simple boolean conditions and prune obviously
|
|
6
|
+
// infeasible CFG edges before the taint engine walks them.
|
|
7
|
+
//
|
|
8
|
+
// Patterns we catch:
|
|
9
|
+
// if (false) — consequent unreachable
|
|
10
|
+
// if (true) — alternate unreachable
|
|
11
|
+
// if (process.env.NODE_ENV === 'production') — alternate unreachable in prod
|
|
12
|
+
// if (typeof x === 'string') — both branches reachable but tagged
|
|
13
|
+
// if (x === x) — alternate unreachable
|
|
14
|
+
//
|
|
15
|
+
// Patterns deliberately deferred (would need SMT or symbolic execution):
|
|
16
|
+
// - Comparisons of unrelated variables
|
|
17
|
+
// - Comparisons involving function call return values
|
|
18
|
+
// - Aliasing-aware constraint propagation
|
|
19
|
+
//
|
|
20
|
+
// Output: mutates the CFG node's `succ` array to drop unreachable edges. The
|
|
21
|
+
// existing taint engine then never walks them. Logs the prune count on each
|
|
22
|
+
// function so we can count how many FPs path-feasibility avoided.
|
|
23
|
+
|
|
24
|
+
function evalConst(expr) {
|
|
25
|
+
if (!expr) return undefined;
|
|
26
|
+
switch (expr.kind) {
|
|
27
|
+
case 'literal': return expr.value;
|
|
28
|
+
case 'unknown': return undefined;
|
|
29
|
+
case 'binary': {
|
|
30
|
+
const l = evalConst(expr.left);
|
|
31
|
+
const r = evalConst(expr.right);
|
|
32
|
+
if (l === undefined || r === undefined) return undefined;
|
|
33
|
+
switch (expr.op) {
|
|
34
|
+
case '===': return l === r;
|
|
35
|
+
case '!==': return l !== r;
|
|
36
|
+
case '==': return l == r;
|
|
37
|
+
case '!=': return l != r;
|
|
38
|
+
case '<': return l < r;
|
|
39
|
+
case '<=': return l <= r;
|
|
40
|
+
case '>': return l > r;
|
|
41
|
+
case '>=': return l >= r;
|
|
42
|
+
case '+': return l + r;
|
|
43
|
+
case '-': return l - r;
|
|
44
|
+
case '*': return l * r;
|
|
45
|
+
case '/': return l / r;
|
|
46
|
+
}
|
|
47
|
+
return undefined;
|
|
48
|
+
}
|
|
49
|
+
case 'logical': {
|
|
50
|
+
const l = evalConst(expr.left);
|
|
51
|
+
if (l === undefined) return undefined;
|
|
52
|
+
if (expr.op === '&&') return l ? evalConst(expr.right) : l;
|
|
53
|
+
if (expr.op === '||') return l ? l : evalConst(expr.right);
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
case 'ident': {
|
|
57
|
+
// Some idents are well-known true/false (e.g. constants we've folded).
|
|
58
|
+
if (expr.name === 'undefined') return undefined;
|
|
59
|
+
return undefined;
|
|
60
|
+
}
|
|
61
|
+
case 'member': {
|
|
62
|
+
// x === x style: the engine can't fold this without symbolic equality.
|
|
63
|
+
return undefined;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return undefined;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// True iff `a` and `b` reference the same variable in obviously the same way.
|
|
70
|
+
function syntacticallyEqual(a, b) {
|
|
71
|
+
if (!a || !b) return false;
|
|
72
|
+
if (a.kind !== b.kind) return false;
|
|
73
|
+
if (a.kind === 'ident') return a.name === b.name;
|
|
74
|
+
if (a.kind === 'member') {
|
|
75
|
+
return a.prop === b.prop && syntacticallyEqual(a.object, b.object);
|
|
76
|
+
}
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function applyPathFeasibility(fn) {
|
|
81
|
+
if (!fn || !fn.cfg || !fn.cfg.nodes) return { pruned: 0 };
|
|
82
|
+
let pruned = 0;
|
|
83
|
+
for (const id of Object.keys(fn.cfg.nodes)) {
|
|
84
|
+
const node = fn.cfg.nodes[id];
|
|
85
|
+
if (!node || node.kind !== 'if') continue;
|
|
86
|
+
const cond = node.cond;
|
|
87
|
+
if (!cond) continue;
|
|
88
|
+
// Constant cond?
|
|
89
|
+
const val = evalConst(cond);
|
|
90
|
+
if (val === true) {
|
|
91
|
+
// Drop the second successor (the false branch).
|
|
92
|
+
if (node.succ.length > 1) {
|
|
93
|
+
node.succ.splice(1, node.succ.length - 1);
|
|
94
|
+
pruned++;
|
|
95
|
+
}
|
|
96
|
+
} else if (val === false) {
|
|
97
|
+
// Drop the first successor (the true branch).
|
|
98
|
+
if (node.succ.length > 0) {
|
|
99
|
+
node.succ.splice(0, 1);
|
|
100
|
+
pruned++;
|
|
101
|
+
}
|
|
102
|
+
} else if (cond.kind === 'binary' && (cond.op === '===' || cond.op === '!==') &&
|
|
103
|
+
syntacticallyEqual(cond.left, cond.right)) {
|
|
104
|
+
// `x === x` → always true. `x !== x` → always false (except NaN, which
|
|
105
|
+
// we accept the FP risk on — vanishingly rare in real code).
|
|
106
|
+
if (cond.op === '===') {
|
|
107
|
+
if (node.succ.length > 1) { node.succ.splice(1); pruned++; }
|
|
108
|
+
} else {
|
|
109
|
+
if (node.succ.length > 0) { node.succ.splice(0, 1); pruned++; }
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return { pruned };
|
|
114
|
+
}
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
// Steensgaard points-to / alias analysis (v0.70 #2).
|
|
2
|
+
//
|
|
3
|
+
// Unification-based, near-linear pointer analysis. The classical reference
|
|
4
|
+
// is Steensgaard, "Points-to analysis in almost linear time" (POPL'96).
|
|
5
|
+
//
|
|
6
|
+
// The idea: every program variable belongs to an equivalence class. Two
|
|
7
|
+
// variables in the same class point to the same set of abstract heap
|
|
8
|
+
// locations. When the analysis sees:
|
|
9
|
+
//
|
|
10
|
+
// a = b → unify class(a) with class(b)
|
|
11
|
+
// a.f = c → for the abstract object class(a) points to, its `f`-slot
|
|
12
|
+
// is unified with class(c)
|
|
13
|
+
// x = a.f → reverse: class(x) unifies with the `f`-slot of what a
|
|
14
|
+
// points to
|
|
15
|
+
// a = new T() → fresh abstract location L; class(a) points to L
|
|
16
|
+
//
|
|
17
|
+
// In Steensgaard, every step is a UNION, never a copy. This gives O(n α(n))
|
|
18
|
+
// time but loses some precision: `a = b; a = c` unifies class(b) and class(c)
|
|
19
|
+
// even though they were never directly compared. Worth the speed; the
|
|
20
|
+
// alternative (Andersen, inclusion-based) is cubic.
|
|
21
|
+
//
|
|
22
|
+
// We use it for ONE specific purpose: at taint-time, when checking whether
|
|
23
|
+
// a variable name `x` is in the tainted state, also check every alias of
|
|
24
|
+
// `x` per the points-to graph. A taint propagated through one alias is
|
|
25
|
+
// visible through all of them.
|
|
26
|
+
//
|
|
27
|
+
// Out of scope for v1:
|
|
28
|
+
// - Heap snapshots (we don't model allocation freshness)
|
|
29
|
+
// - Context-sensitivity (one graph per function; merged at call sites
|
|
30
|
+
// via parameter unification)
|
|
31
|
+
// - Containers (arrays, maps) — modelled as single abstract objects
|
|
32
|
+
// - Reflection / dynamic dispatch
|
|
33
|
+
//
|
|
34
|
+
// Wiring: AGENTIC_SECURITY_POINTS_TO=1 in dataflow/index.js builds the
|
|
35
|
+
// graph before runTaintEngine. The engine reads `opts._pointsTo` and
|
|
36
|
+
// consults `aliasesOf(x)` inside exprTaint and the assign transfer.
|
|
37
|
+
|
|
38
|
+
// ─── Union-Find ──────────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
class UnionFind {
|
|
41
|
+
constructor() {
|
|
42
|
+
this.parent = new Map(); // name → name (canonical rep)
|
|
43
|
+
this.rank = new Map(); // canonical rep → tree depth
|
|
44
|
+
}
|
|
45
|
+
// Ensure `x` has a node. Idempotent.
|
|
46
|
+
add(x) {
|
|
47
|
+
if (!this.parent.has(x)) {
|
|
48
|
+
this.parent.set(x, x);
|
|
49
|
+
this.rank.set(x, 0);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// Find canonical rep of `x` with path compression.
|
|
53
|
+
find(x) {
|
|
54
|
+
this.add(x);
|
|
55
|
+
let cur = x;
|
|
56
|
+
while (this.parent.get(cur) !== cur) cur = this.parent.get(cur);
|
|
57
|
+
// Path compression — point every visited node directly to the root.
|
|
58
|
+
let next = x;
|
|
59
|
+
while (this.parent.get(next) !== cur) {
|
|
60
|
+
const p = this.parent.get(next);
|
|
61
|
+
this.parent.set(next, cur);
|
|
62
|
+
next = p;
|
|
63
|
+
}
|
|
64
|
+
return cur;
|
|
65
|
+
}
|
|
66
|
+
// Union the classes of `a` and `b`. Returns the new canonical rep.
|
|
67
|
+
union(a, b) {
|
|
68
|
+
const ra = this.find(a);
|
|
69
|
+
const rb = this.find(b);
|
|
70
|
+
if (ra === rb) return ra;
|
|
71
|
+
const ranka = this.rank.get(ra);
|
|
72
|
+
const rankb = this.rank.get(rb);
|
|
73
|
+
let winner, loser;
|
|
74
|
+
if (ranka < rankb) { winner = rb; loser = ra; }
|
|
75
|
+
else if (ranka > rankb) { winner = ra; loser = rb; }
|
|
76
|
+
else { winner = ra; loser = rb; this.rank.set(winner, ranka + 1); }
|
|
77
|
+
this.parent.set(loser, winner);
|
|
78
|
+
return winner;
|
|
79
|
+
}
|
|
80
|
+
// Every name registered with the union-find.
|
|
81
|
+
members() { return [...this.parent.keys()]; }
|
|
82
|
+
// Map of canonical-rep → list of members.
|
|
83
|
+
classes() {
|
|
84
|
+
const out = new Map();
|
|
85
|
+
for (const x of this.parent.keys()) {
|
|
86
|
+
const r = this.find(x);
|
|
87
|
+
if (!out.has(r)) out.set(r, []);
|
|
88
|
+
out.get(r).push(x);
|
|
89
|
+
}
|
|
90
|
+
return out;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ─── PointsToGraph ───────────────────────────────────────────────────────
|
|
95
|
+
|
|
96
|
+
export class PointsToGraph {
|
|
97
|
+
constructor() {
|
|
98
|
+
this.uf = new UnionFind();
|
|
99
|
+
// For each class, the abstract object it points to (also a class id).
|
|
100
|
+
// Steensgaard's "pending" / lazy-unify trick: when two classes both have
|
|
101
|
+
// distinct pointees, those pointees must themselves be unified.
|
|
102
|
+
this.pointee = new Map(); // classId → classId (its pointee)
|
|
103
|
+
// For each pointee-class, a per-field map of field-pointees.
|
|
104
|
+
this.fields = new Map(); // pointeeId → Map<fieldName, classId>
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
_ensure(name) {
|
|
108
|
+
this.uf.add(name);
|
|
109
|
+
return this.uf.find(name);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// a = b (or a = b's value flows into a)
|
|
113
|
+
unify(a, b) {
|
|
114
|
+
const ra = this._ensure(a);
|
|
115
|
+
const rb = this._ensure(b);
|
|
116
|
+
this._unifyClasses(ra, rb);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
_unifyClasses(ra, rb) {
|
|
120
|
+
if (ra === rb) return ra;
|
|
121
|
+
const merged = this.uf.union(ra, rb);
|
|
122
|
+
const other = merged === ra ? rb : ra;
|
|
123
|
+
// If both classes had a pointee, unify the pointees.
|
|
124
|
+
const pa = this.pointee.get(ra);
|
|
125
|
+
const pb = this.pointee.get(rb);
|
|
126
|
+
this.pointee.delete(other);
|
|
127
|
+
if (pa && pb) {
|
|
128
|
+
this.pointee.set(merged, this._unifyClasses(pa, pb));
|
|
129
|
+
} else if (pa || pb) {
|
|
130
|
+
this.pointee.set(merged, pa || pb);
|
|
131
|
+
}
|
|
132
|
+
// Same for field maps.
|
|
133
|
+
const fa = this.fields.get(ra);
|
|
134
|
+
const fb = this.fields.get(rb);
|
|
135
|
+
this.fields.delete(other);
|
|
136
|
+
if (fa && fb) {
|
|
137
|
+
const merged_f = new Map(fa);
|
|
138
|
+
for (const [k, v] of fb) {
|
|
139
|
+
if (merged_f.has(k)) merged_f.set(k, this._unifyClasses(merged_f.get(k), v));
|
|
140
|
+
else merged_f.set(k, v);
|
|
141
|
+
}
|
|
142
|
+
this.fields.set(merged, merged_f);
|
|
143
|
+
} else if (fa || fb) {
|
|
144
|
+
this.fields.set(merged, fa || fb);
|
|
145
|
+
}
|
|
146
|
+
return merged;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// a = new ... → bind `a` to a fresh pointee class
|
|
150
|
+
alloc(a, locationId) {
|
|
151
|
+
const ra = this._ensure(a);
|
|
152
|
+
const loc = `__loc:${locationId}`;
|
|
153
|
+
this._ensure(loc);
|
|
154
|
+
const existing = this.pointee.get(ra);
|
|
155
|
+
if (existing) this._unifyClasses(existing, this.uf.find(loc));
|
|
156
|
+
else this.pointee.set(ra, this.uf.find(loc));
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// a.f = c
|
|
160
|
+
fieldStore(a, field, c) {
|
|
161
|
+
const ra = this._ensure(a);
|
|
162
|
+
const rc = this._ensure(c);
|
|
163
|
+
// Get the pointee class of `a`; if absent, create a virtual one.
|
|
164
|
+
let pa = this.pointee.get(ra);
|
|
165
|
+
if (!pa) {
|
|
166
|
+
pa = `__virt:${ra}`;
|
|
167
|
+
this._ensure(pa);
|
|
168
|
+
this.pointee.set(ra, this.uf.find(pa));
|
|
169
|
+
pa = this.uf.find(pa);
|
|
170
|
+
}
|
|
171
|
+
if (!this.fields.has(pa)) this.fields.set(pa, new Map());
|
|
172
|
+
const fmap = this.fields.get(pa);
|
|
173
|
+
if (fmap.has(field)) {
|
|
174
|
+
// Unify the existing field-pointee with rc.
|
|
175
|
+
this._unifyClasses(fmap.get(field), rc);
|
|
176
|
+
} else {
|
|
177
|
+
fmap.set(field, rc);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// x = a.f
|
|
182
|
+
fieldLoad(x, a, field) {
|
|
183
|
+
const rx = this._ensure(x);
|
|
184
|
+
const ra = this._ensure(a);
|
|
185
|
+
// Get or create pointee of `a`.
|
|
186
|
+
let pa = this.pointee.get(ra);
|
|
187
|
+
if (!pa) {
|
|
188
|
+
pa = `__virt:${ra}`;
|
|
189
|
+
this._ensure(pa);
|
|
190
|
+
this.pointee.set(ra, this.uf.find(pa));
|
|
191
|
+
pa = this.uf.find(pa);
|
|
192
|
+
}
|
|
193
|
+
// Get or create the field-pointee.
|
|
194
|
+
if (!this.fields.has(pa)) this.fields.set(pa, new Map());
|
|
195
|
+
const fmap = this.fields.get(pa);
|
|
196
|
+
if (fmap.has(field)) {
|
|
197
|
+
this._unifyClasses(fmap.get(field), rx);
|
|
198
|
+
} else {
|
|
199
|
+
fmap.set(field, rx);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Return all variable names in the same equivalence class as `name`,
|
|
204
|
+
// INCLUDING `name` itself.
|
|
205
|
+
aliasesOf(name) {
|
|
206
|
+
if (!this.uf.parent.has(name)) return [name];
|
|
207
|
+
const root = this.uf.find(name);
|
|
208
|
+
// O(n) scan — for v1, fine; v2 would index class→members on each union.
|
|
209
|
+
const out = [];
|
|
210
|
+
for (const v of this.uf.parent.keys()) {
|
|
211
|
+
if (this.uf.find(v) === root) out.push(v);
|
|
212
|
+
}
|
|
213
|
+
return out;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Diagnostic snapshot.
|
|
217
|
+
snapshot() {
|
|
218
|
+
return {
|
|
219
|
+
classes: [...this.uf.classes()].map(([root, members]) => ({ root, members })),
|
|
220
|
+
pointees: [...this.pointee].map(([k, v]) => ({ class: k, points: v })),
|
|
221
|
+
fields: [...this.fields].map(([k, m]) => ({ class: k, fields: [...m] })),
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// ─── Build the graph from an IR universe ─────────────────────────────────
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Walk every function in the call graph and feed assign/call nodes into
|
|
230
|
+
* the PointsToGraph. Returns the populated graph.
|
|
231
|
+
*
|
|
232
|
+
* Naming convention for the union-find:
|
|
233
|
+
* - Local variable `x` in function `qid` is named `qid::x`.
|
|
234
|
+
* - Global / unresolved references are named `::<name>`.
|
|
235
|
+
* - Allocation sites are named `__loc:<qid>:<line>`.
|
|
236
|
+
*/
|
|
237
|
+
export function buildPointsTo(perFileIR, callGraph) {
|
|
238
|
+
const g = new PointsToGraph();
|
|
239
|
+
if (!callGraph || !callGraph.functions) return g;
|
|
240
|
+
for (const fn of callGraph.functions.values()) {
|
|
241
|
+
_processFunction(fn, g);
|
|
242
|
+
}
|
|
243
|
+
// Interprocedural step: at every resolved call site, unify caller-arg
|
|
244
|
+
// names with callee-param names. This makes parameter aliasing visible.
|
|
245
|
+
for (const fn of callGraph.functions.values()) {
|
|
246
|
+
if (!fn.cfg || !fn.cfg.nodes) continue;
|
|
247
|
+
for (const nid of Object.keys(fn.cfg.nodes)) {
|
|
248
|
+
const node = fn.cfg.nodes[nid];
|
|
249
|
+
if (!node || node.kind !== 'call') continue;
|
|
250
|
+
const resolved = callGraph.resolve ? callGraph.resolve(node.callee) : null;
|
|
251
|
+
const target = resolved && resolved.qid ? resolved : null;
|
|
252
|
+
if (!target || !Array.isArray(target.params)) continue;
|
|
253
|
+
const args = node.args || [];
|
|
254
|
+
for (let i = 0; i < target.params.length && i < args.length; i++) {
|
|
255
|
+
const argName = _nameForExpr(fn.qid, args[i]);
|
|
256
|
+
if (!argName) continue;
|
|
257
|
+
g.unify(argName, `${target.qid}::${target.params[i]}`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
return g;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function _processFunction(fn, g) {
|
|
265
|
+
if (!fn || !fn.cfg || !fn.cfg.nodes) return;
|
|
266
|
+
for (const nid of Object.keys(fn.cfg.nodes)) {
|
|
267
|
+
const node = fn.cfg.nodes[nid];
|
|
268
|
+
if (!node) continue;
|
|
269
|
+
if (node.kind === 'assign') _processAssign(fn, node, g);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function _processAssign(fn, node, g) {
|
|
274
|
+
const target = typeof node.target === 'string' ? node.target : null;
|
|
275
|
+
if (!target) return;
|
|
276
|
+
const tgtName = `${fn.qid}::${target}`;
|
|
277
|
+
const src = node.source;
|
|
278
|
+
if (!src) return;
|
|
279
|
+
// x = y
|
|
280
|
+
if (src.kind === 'ident' && typeof src.name === 'string') {
|
|
281
|
+
g.unify(tgtName, `${fn.qid}::${src.name}`);
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
// x = y.f
|
|
285
|
+
if (src.kind === 'member' && src.object && src.object.kind === 'ident' && typeof src.prop === 'string') {
|
|
286
|
+
g.fieldLoad(tgtName, `${fn.qid}::${src.object.name}`, src.prop);
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
// x = new T() / x = {} / x = []
|
|
290
|
+
if (src.kind === 'object' || src.kind === 'array') {
|
|
291
|
+
g.alloc(tgtName, `${fn.qid}:${node.line || 0}`);
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
294
|
+
if (src.kind === 'call' && typeof src.callee === 'string' && /^new\s+/.test(src.callee)) {
|
|
295
|
+
g.alloc(tgtName, `${fn.qid}:${node.line || 0}`);
|
|
296
|
+
return;
|
|
297
|
+
}
|
|
298
|
+
// Member-store: x.f = y handled by the `assign` whose TARGET is a string
|
|
299
|
+
// path like "x.f". Our IR may emit `target: 'x.f'`; handle that.
|
|
300
|
+
if (target.includes('.') && src.kind === 'ident') {
|
|
301
|
+
const [recv, ...rest] = target.split('.');
|
|
302
|
+
if (recv && rest.length === 1) {
|
|
303
|
+
g.fieldStore(`${fn.qid}::${recv}`, rest[0], `${fn.qid}::${src.name}`);
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function _nameForExpr(qid, expr) {
|
|
310
|
+
if (!expr) return null;
|
|
311
|
+
if (expr.kind === 'ident' && typeof expr.name === 'string') return `${qid}::${expr.name}`;
|
|
312
|
+
return null;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// ─── Engine consumption helpers ──────────────────────────────────────────
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Given a points-to graph and a function qid + variable name, return all
|
|
319
|
+
* known aliases (full qid-prefixed names) that the engine should also
|
|
320
|
+
* check against the taint state.
|
|
321
|
+
*/
|
|
322
|
+
export function aliasesForVar(pointsTo, qid, varName) {
|
|
323
|
+
if (!pointsTo || !pointsTo.aliasesOf) return [varName];
|
|
324
|
+
const fullName = `${qid}::${varName}`;
|
|
325
|
+
const aliases = pointsTo.aliasesOf(fullName);
|
|
326
|
+
const out = new Set([varName]);
|
|
327
|
+
for (const a of aliases) {
|
|
328
|
+
// Strip the qid prefix for engine-state lookups (engine state is per-fn).
|
|
329
|
+
const idx = a.indexOf('::');
|
|
330
|
+
if (idx > 0) {
|
|
331
|
+
const local = a.slice(idx + 2);
|
|
332
|
+
// Skip __loc: / __virt: synthetic names.
|
|
333
|
+
if (local && !local.startsWith('__')) out.add(local);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
return [...out];
|
|
337
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
// Polyglot embedded-language taint (P4.7).
|
|
2
|
+
//
|
|
3
|
+
// Strings embedded inside one language often carry a second language that
|
|
4
|
+
// has its own sinks. The classic example: a SQL string literal inside Java
|
|
5
|
+
// is technically just a String to Java, but it's a SQL statement to the DB.
|
|
6
|
+
// If a tainted value is concatenated into it BEFORE handing to .executeQuery,
|
|
7
|
+
// it's a SQL injection — even if no obvious sink shape is matched.
|
|
8
|
+
//
|
|
9
|
+
// Embeddings we recognize:
|
|
10
|
+
// - SQL inside Java / C# / Python / Go / Node strings
|
|
11
|
+
// - HTML inside JS template literals + .innerHTML/.outerHTML lhs
|
|
12
|
+
// - JavaScript inside HTML <script>, inline `on*` attrs, javascript: URIs
|
|
13
|
+
// - Shell inside .exec/.execSync/.spawn strings (existing sink)
|
|
14
|
+
// - Regex inside .new RegExp(<concat>) — REDoS surface
|
|
15
|
+
// - CSS inside style="..." HTML attrs, .style.cssText assignments
|
|
16
|
+
// - JSON-as-code inside eval/Function constructors
|
|
17
|
+
// - LDAP inside .search(filter:) calls
|
|
18
|
+
// - XPath inside .evaluate(expr:) calls
|
|
19
|
+
// - Mongo $where inside aggregate / find expressions
|
|
20
|
+
// - JNDI inside lookup() / context.lookup() strings (Log4Shell shape)
|
|
21
|
+
//
|
|
22
|
+
// This module's job is to RECOGNIZE the embedded language inside a string
|
|
23
|
+
// expression and tell the engine "this string is actually X — apply X's
|
|
24
|
+
// sink rules to any concatenation/template-hole inside it." It does NOT
|
|
25
|
+
// re-parse the embedded grammar; the heuristic is shape-based, with a
|
|
26
|
+
// confidence score so the engine can demote weak matches.
|
|
27
|
+
//
|
|
28
|
+
// Public API:
|
|
29
|
+
// identifyEmbedding(strValue) → { lang, confidence, evidence }
|
|
30
|
+
// findInterpolationHoles(strNode) → [{ index, expr }] for template literals
|
|
31
|
+
// shouldFlagPolyglot(lang, holeExpr, holeTainted)
|
|
32
|
+
// → boolean — should we emit a finding?
|
|
33
|
+
|
|
34
|
+
/** SQL recognition — keyword + structure. */
|
|
35
|
+
const SQL_KEYWORDS = /\b(SELECT|INSERT|UPDATE|DELETE|UPSERT|MERGE|CREATE|DROP|ALTER|TRUNCATE|GRANT|REVOKE)\b/i;
|
|
36
|
+
const SQL_CLAUSE = /\b(FROM|WHERE|JOIN|UNION|ORDER\s+BY|GROUP\s+BY|HAVING|LIMIT|OFFSET|INTO|SET|VALUES)\b/i;
|
|
37
|
+
|
|
38
|
+
/** HTML recognition. */
|
|
39
|
+
const HTML_TAG = /<\/?\s*[a-zA-Z][a-zA-Z0-9-]*(\s[^>]*)?>/;
|
|
40
|
+
const HTML_ENTITY = /&(?:lt|gt|amp|quot|apos|nbsp|#\d+);/;
|
|
41
|
+
|
|
42
|
+
/** JS recognition (inside HTML or eval). */
|
|
43
|
+
const JS_KEYWORDS = /\b(function|var|let|const|return|new\s+\w+|console\.|document\.|window\.|=>)\b/;
|
|
44
|
+
|
|
45
|
+
/** Shell — common command words at start, after whitespace. */
|
|
46
|
+
const SHELL_CMD = /(^|;|\|\||&&|\|)\s*(cat|ls|rm|cp|mv|chmod|chown|curl|wget|bash|sh|sudo|tar|grep|sed|awk|find|kill|ps|netstat|ip|iptables|nc|ssh|scp)\b/;
|
|
47
|
+
|
|
48
|
+
/** LDAP filter shape. */
|
|
49
|
+
const LDAP_FILTER = /\(\s*[a-zA-Z]+\s*=\s*[^)]+\)/;
|
|
50
|
+
|
|
51
|
+
/** XPath shape. */
|
|
52
|
+
const XPATH_SHAPE = /\/\/?[a-zA-Z*][a-zA-Z0-9_-]*(\[.*?\])?(\/|$)/;
|
|
53
|
+
|
|
54
|
+
/** JNDI / JNDI lookup pattern. */
|
|
55
|
+
const JNDI_PATTERN = /\$\{(jndi|ldap|rmi|dns):/i;
|
|
56
|
+
|
|
57
|
+
/** Mongo $where as JS expression. */
|
|
58
|
+
const MONGO_WHERE = /this\.\w+\s*[=<>!]/;
|
|
59
|
+
|
|
60
|
+
/** CSS — property:value pairs. */
|
|
61
|
+
const CSS_PROP = /^[^{}]*\{?\s*[a-zA-Z-]+\s*:\s*[^;]+;?/;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Identify the embedded language of a string value (the literal text of
|
|
65
|
+
* a string literal or the concatenation skeleton of a template). Returns
|
|
66
|
+
* { lang, confidence: 0..1, evidence: [matchedPattern, ...] }.
|
|
67
|
+
*
|
|
68
|
+
* lang values:
|
|
69
|
+
* 'sql' | 'html' | 'js' | 'shell' | 'ldap' | 'xpath' | 'jndi' |
|
|
70
|
+
* 'mongo' | 'css' | 'regex' | 'none'
|
|
71
|
+
*/
|
|
72
|
+
export function identifyEmbedding(strValue) {
|
|
73
|
+
if (typeof strValue !== 'string' || !strValue) return { lang: 'none', confidence: 0, evidence: [] };
|
|
74
|
+
const s = strValue;
|
|
75
|
+
const evidence = [];
|
|
76
|
+
let lang = 'none';
|
|
77
|
+
let confidence = 0;
|
|
78
|
+
|
|
79
|
+
// JNDI first — single pattern, very high signal.
|
|
80
|
+
if (JNDI_PATTERN.test(s)) {
|
|
81
|
+
return { lang: 'jndi', confidence: 1.0, evidence: ['jndi:lookup pattern'] };
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (SQL_KEYWORDS.test(s) && SQL_CLAUSE.test(s)) {
|
|
85
|
+
evidence.push('sql-keyword+clause');
|
|
86
|
+
return { lang: 'sql', confidence: 0.95, evidence };
|
|
87
|
+
}
|
|
88
|
+
if (SQL_KEYWORDS.test(s)) {
|
|
89
|
+
evidence.push('sql-keyword');
|
|
90
|
+
confidence = 0.6; lang = 'sql';
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (HTML_TAG.test(s) && (HTML_ENTITY.test(s) || s.includes('</'))) {
|
|
94
|
+
return { lang: 'html', confidence: 0.9, evidence: ['html-tag+entity/closer'] };
|
|
95
|
+
}
|
|
96
|
+
if (HTML_TAG.test(s) && lang === 'none') {
|
|
97
|
+
evidence.push('html-tag');
|
|
98
|
+
lang = 'html'; confidence = 0.6;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (lang === 'html' && JS_KEYWORDS.test(s) && /<script\b/i.test(s)) {
|
|
102
|
+
// HTML embedding JS — call it HTML (the host); but flag js-in-html
|
|
103
|
+
// separately via shouldFlagPolyglot.
|
|
104
|
+
evidence.push('script-block');
|
|
105
|
+
confidence = Math.max(confidence, 0.85);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (SHELL_CMD.test(s)) {
|
|
109
|
+
return { lang: 'shell', confidence: 0.7, evidence: ['shell-builtin'] };
|
|
110
|
+
}
|
|
111
|
+
if (LDAP_FILTER.test(s) && /\(\|/.test(s)) {
|
|
112
|
+
return { lang: 'ldap', confidence: 0.85, evidence: ['ldap-filter+or'] };
|
|
113
|
+
}
|
|
114
|
+
if (LDAP_FILTER.test(s) && lang === 'none') {
|
|
115
|
+
evidence.push('ldap-filter');
|
|
116
|
+
lang = 'ldap'; confidence = 0.5;
|
|
117
|
+
}
|
|
118
|
+
if (XPATH_SHAPE.test(s) && lang === 'none') {
|
|
119
|
+
evidence.push('xpath-shape');
|
|
120
|
+
lang = 'xpath'; confidence = 0.6;
|
|
121
|
+
}
|
|
122
|
+
if (MONGO_WHERE.test(s) && lang === 'none') {
|
|
123
|
+
return { lang: 'mongo', confidence: 0.7, evidence: ['mongo-where'] };
|
|
124
|
+
}
|
|
125
|
+
if (CSS_PROP.test(s) && /[a-z-]+:[^;]+;/.test(s) && lang === 'none') {
|
|
126
|
+
evidence.push('css-prop');
|
|
127
|
+
lang = 'css'; confidence = 0.45;
|
|
128
|
+
}
|
|
129
|
+
return { lang, confidence, evidence };
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* For a template literal AST node, return the array of interpolation holes
|
|
134
|
+
* with their `index` (position among quasis) and `expr` (AST subtree).
|
|
135
|
+
*
|
|
136
|
+
* Expected shape: { kind: 'template', quasis: [string,...], expressions: [expr,...] }
|
|
137
|
+
*/
|
|
138
|
+
export function findInterpolationHoles(strNode) {
|
|
139
|
+
if (!strNode || strNode.kind !== 'template') return [];
|
|
140
|
+
const exprs = strNode.expressions || [];
|
|
141
|
+
return exprs.map((e, i) => ({ index: i, expr: e }));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/** Stitch a template literal into its skeleton string (placeholders blanked). */
|
|
145
|
+
export function templateSkeleton(strNode) {
|
|
146
|
+
if (!strNode || strNode.kind !== 'template') {
|
|
147
|
+
if (strNode && strNode.kind === 'literal' && typeof strNode.value === 'string') return strNode.value;
|
|
148
|
+
return '';
|
|
149
|
+
}
|
|
150
|
+
const qs = strNode.quasis || [];
|
|
151
|
+
// Stitch with a sentinel that won't confuse the recognizers.
|
|
152
|
+
return qs.join(' __HOLE__ ');
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Decide whether an interpolation hole in an embedded-language string
|
|
157
|
+
* should fire a polyglot finding. The engine calls this after identifying
|
|
158
|
+
* the host string and checking which holes are tainted.
|
|
159
|
+
*/
|
|
160
|
+
export function shouldFlagPolyglot(lang, hole, holeTainted, opts = {}) {
|
|
161
|
+
if (!holeTainted) return false;
|
|
162
|
+
if (!lang || lang === 'none') return false;
|
|
163
|
+
// Some embeddings are always sensitive when tainted.
|
|
164
|
+
const sensitive = new Set(['sql', 'shell', 'jndi', 'ldap', 'xpath', 'mongo', 'js']);
|
|
165
|
+
if (sensitive.has(lang)) return true;
|
|
166
|
+
// For HTML, only flag if the hole appears in an attribute or script context.
|
|
167
|
+
// We don't have positional info here; defer to engine context.
|
|
168
|
+
if (lang === 'html') return !!opts.inAttribute || !!opts.inScript;
|
|
169
|
+
if (lang === 'css') return !!opts.inExpression || !!opts.inUrlFn;
|
|
170
|
+
return true;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Map a recognized embedded language to a finding family / CWE for emission.
|
|
175
|
+
*/
|
|
176
|
+
export function embeddingToCwe(lang) {
|
|
177
|
+
switch (lang) {
|
|
178
|
+
case 'sql': return { family: 'sql-injection', cwe: 'CWE-89' };
|
|
179
|
+
case 'shell': return { family: 'command-injection', cwe: 'CWE-78' };
|
|
180
|
+
case 'html': return { family: 'xss', cwe: 'CWE-79' };
|
|
181
|
+
case 'js': return { family: 'xss-script', cwe: 'CWE-79' };
|
|
182
|
+
case 'ldap': return { family: 'ldap-injection', cwe: 'CWE-90' };
|
|
183
|
+
case 'xpath': return { family: 'xpath-injection', cwe: 'CWE-643' };
|
|
184
|
+
case 'mongo': return { family: 'nosql-injection', cwe: 'CWE-943' };
|
|
185
|
+
case 'jndi': return { family: 'jndi-injection', cwe: 'CWE-1188'};
|
|
186
|
+
case 'css': return { family: 'css-injection', cwe: 'CWE-79' };
|
|
187
|
+
case 'regex': return { family: 'redos', cwe: 'CWE-1333'};
|
|
188
|
+
default: return null;
|
|
189
|
+
}
|
|
190
|
+
}
|