@clear-capabilities/agentic-security-scanner 0.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1580 -0
- package/bin/.agentic-security/findings.json +1577 -0
- package/bin/.agentic-security/last-scan.json +1577 -0
- package/bin/.agentic-security/last-scan.json.sig +1 -0
- package/bin/.agentic-security/scan-history.json +465 -0
- package/bin/.agentic-security/streak.json +25 -0
- package/bin/agentic-security-audit.js +198 -0
- package/bin/agentic-security-consistency.js +80 -0
- package/bin/agentic-security-diff.js +136 -0
- package/bin/agentic-security-lsp.js +12 -0
- package/bin/agentic-security-mcp.js +40 -0
- package/bin/agentic-security-rule.js +153 -0
- package/bin/agentic-security.js +1683 -0
- package/dist/117.index.js +207 -0
- package/dist/178.index.js +250 -0
- package/dist/218.index.js +793 -0
- package/dist/227.index.js +192 -0
- package/dist/301.index.js +167 -0
- package/dist/384.index.js +18 -0
- package/dist/476.index.js +126 -0
- package/dist/513.index.js +373 -0
- package/dist/520.index.js +13 -0
- package/dist/601.index.js +1038 -0
- package/dist/634.index.js +1892 -0
- package/dist/637.index.js +216 -0
- package/dist/660.index.js +131 -0
- package/dist/675.index.js +451 -0
- package/dist/826.index.js +188 -0
- package/dist/830.index.js +133 -0
- package/dist/agentic-security.mjs +272 -0
- package/dist/agentic-security.mjs.sha256 +1 -0
- package/dist/calibration-seed.json +27 -0
- package/package.json +77 -0
- package/src/.agentic-security/findings.json +80844 -0
- package/src/.agentic-security/last-scan.json +80844 -0
- package/src/.agentic-security/last-scan.json.sig +1 -0
- package/src/.agentic-security/scan-history.json +8408 -0
- package/src/.agentic-security/streak.json +26 -0
- package/src/badge.js +188 -0
- package/src/compare.js +203 -0
- package/src/dataflow/.agentic-security/findings.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
- package/src/dataflow/.agentic-security/scan-history.json +735 -0
- package/src/dataflow/.agentic-security/streak.json +24 -0
- package/src/dataflow/CLAUDE.md +38 -0
- package/src/dataflow/access-paths.js +172 -0
- package/src/dataflow/async-sequencing.js +177 -0
- package/src/dataflow/backward.js +201 -0
- package/src/dataflow/catalog-expanded.js +485 -0
- package/src/dataflow/catalog.js +659 -0
- package/src/dataflow/cross-repo.js +219 -0
- package/src/dataflow/engine.js +588 -0
- package/src/dataflow/exception-flow.js +116 -0
- package/src/dataflow/exploit-prover.js +187 -0
- package/src/dataflow/higher-order.js +221 -0
- package/src/dataflow/ifds.js +347 -0
- package/src/dataflow/implicit-flow.js +129 -0
- package/src/dataflow/incremental.js +229 -0
- package/src/dataflow/index.js +181 -0
- package/src/dataflow/numeric-domain.js +192 -0
- package/src/dataflow/path-feasibility.js +114 -0
- package/src/dataflow/points-to.js +337 -0
- package/src/dataflow/polyglot.js +190 -0
- package/src/dataflow/proven-clean.js +159 -0
- package/src/dataflow/receiver-context.js +76 -0
- package/src/dataflow/sanitizer-proof.js +154 -0
- package/src/dataflow/soft-taint.js +140 -0
- package/src/dataflow/string-domain.js +234 -0
- package/src/dataflow/stub-aware-filter.js +100 -0
- package/src/dataflow/summaries.js +132 -0
- package/src/dataflow/symbolic-exec.js +238 -0
- package/src/dataflow/tabulation.js +135 -0
- package/src/engine.js +7763 -0
- package/src/history-scan.js +229 -0
- package/src/index.js +3 -0
- package/src/integrations/.agentic-security/findings.json +1504 -0
- package/src/integrations/.agentic-security/last-scan.json +1504 -0
- package/src/integrations/.agentic-security/scan-history.json +40 -0
- package/src/integrations/.agentic-security/streak.json +21 -0
- package/src/integrations/index.js +321 -0
- package/src/integrations/tickets.js +200 -0
- package/src/ir/.agentic-security/findings.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json.sig +1 -0
- package/src/ir/.agentic-security/scan-history.json +364 -0
- package/src/ir/.agentic-security/streak.json +23 -0
- package/src/ir/CLAUDE.md +172 -0
- package/src/ir/callgraph.js +73 -0
- package/src/ir/class-hierarchy.js +195 -0
- package/src/ir/index.js +152 -0
- package/src/ir/parser-cs.js +260 -0
- package/src/ir/parser-java.js +286 -0
- package/src/ir/parser-js.js +413 -0
- package/src/ir/parser-kt.js +258 -0
- package/src/ir/parser-py-cst.js +136 -0
- package/src/ir/parser-py.helper.py +501 -0
- package/src/ir/parser-py.js +312 -0
- package/src/ir/ssa.js +315 -0
- package/src/ir/type-stubs.js +288 -0
- package/src/leaderboard.js +152 -0
- package/src/llm-validator/.agentic-security/findings.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
- package/src/llm-validator/.agentic-security/scan-history.json +168 -0
- package/src/llm-validator/.agentic-security/streak.json +20 -0
- package/src/llm-validator/consistency.js +141 -0
- package/src/llm-validator/index.js +437 -0
- package/src/lsp/.agentic-security/findings.json +28 -0
- package/src/lsp/.agentic-security/last-scan.json +28 -0
- package/src/lsp/.agentic-security/scan-history.json +79 -0
- package/src/lsp/.agentic-security/streak.json +22 -0
- package/src/lsp/server.js +275 -0
- package/src/mcp/.agentic-security/findings.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
- package/src/mcp/.agentic-security/scan-history.json +1125 -0
- package/src/mcp/.agentic-security/streak.json +22 -0
- package/src/mcp/CLAUDE.md +54 -0
- package/src/mcp/audit.js +136 -0
- package/src/mcp/redact.js +75 -0
- package/src/mcp/server.js +158 -0
- package/src/mcp/stdio.js +83 -0
- package/src/mcp/tools.js +940 -0
- package/src/mcp/validate.js +49 -0
- package/src/personality.js +164 -0
- package/src/poc-video.js +239 -0
- package/src/posture/.agentic-security/findings.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json.sig +1 -0
- package/src/posture/.agentic-security/scan-history.json +5557 -0
- package/src/posture/.agentic-security/streak.json +24 -0
- package/src/posture/CLAUDE.md +42 -0
- package/src/posture/adversarial-self-test.js +114 -0
- package/src/posture/adversary-agent.js +204 -0
- package/src/posture/agents-memory.js +135 -0
- package/src/posture/ai-code-fingerprint.js +171 -0
- package/src/posture/aibom.js +284 -0
- package/src/posture/api-inventory.js +96 -0
- package/src/posture/attack-playbooks.js +305 -0
- package/src/posture/auditor-agent.js +115 -0
- package/src/posture/auth-posture-import.js +135 -0
- package/src/posture/baseline-compare.js +114 -0
- package/src/posture/blast-radius.js +836 -0
- package/src/posture/bounty-prediction.js +141 -0
- package/src/posture/business-logic.js +239 -0
- package/src/posture/calibration-drift.js +93 -0
- package/src/posture/calibration-seed.json +27 -0
- package/src/posture/calibration.js +204 -0
- package/src/posture/clustering.js +75 -0
- package/src/posture/concurrency-checker.js +265 -0
- package/src/posture/confidence.js +65 -0
- package/src/posture/container-runtime.js +149 -0
- package/src/posture/counterfactual.js +109 -0
- package/src/posture/cross-lang-graphql.js +165 -0
- package/src/posture/cross-lang-grpc.js +166 -0
- package/src/posture/cross-lang-meta.js +101 -0
- package/src/posture/cross-lang-openapi.js +187 -0
- package/src/posture/cross-lang-orm.js +153 -0
- package/src/posture/cross-lang-queues.js +210 -0
- package/src/posture/crown-jewels.js +110 -0
- package/src/posture/custom-rules.js +361 -0
- package/src/posture/cve-alert-daemon.js +433 -0
- package/src/posture/cve-lookup.js +129 -0
- package/src/posture/dead-code.js +430 -0
- package/src/posture/defender-agent.js +158 -0
- package/src/posture/deploy-platform.js +204 -0
- package/src/posture/detector-fuzz.js +61 -0
- package/src/posture/deterministic.js +99 -0
- package/src/posture/drift.js +165 -0
- package/src/posture/epss.js +156 -0
- package/src/posture/exploitability-probability.js +212 -0
- package/src/posture/exploitability.js +121 -0
- package/src/posture/feature-flags.js +110 -0
- package/src/posture/finding-defaults.js +132 -0
- package/src/posture/fix-history.js +411 -0
- package/src/posture/fix-plan.js +121 -0
- package/src/posture/fix-verify-loop.js +157 -0
- package/src/posture/fix-verify.js +130 -0
- package/src/posture/flow-narration.js +105 -0
- package/src/posture/grader-calibration.js +156 -0
- package/src/posture/harness-discovery.js +113 -0
- package/src/posture/holdout-eval.js +144 -0
- package/src/posture/iac-reachability.js +163 -0
- package/src/posture/iam-policy.js +128 -0
- package/src/posture/integrity.js +97 -0
- package/src/posture/learning.js +166 -0
- package/src/posture/license-policy.js +109 -0
- package/src/posture/llm-redteam-prompts.js +418 -0
- package/src/posture/llm-redteam.js +303 -0
- package/src/posture/material-change.js +163 -0
- package/src/posture/mitigation-composite.js +55 -0
- package/src/posture/mttr.js +91 -0
- package/src/posture/network-policy-import.js +126 -0
- package/src/posture/path-predicates.js +99 -0
- package/src/posture/persona-prioritization.js +153 -0
- package/src/posture/poc-cwe-map.js +51 -0
- package/src/posture/poc-generator.js +500 -0
- package/src/posture/policy-gate.js +174 -0
- package/src/posture/pre-incident-archaeology.js +110 -0
- package/src/posture/profile.js +93 -0
- package/src/posture/reachability-filter.js +42 -0
- package/src/posture/regression-test-gen.js +200 -0
- package/src/posture/reverse-blast-radius.js +110 -0
- package/src/posture/router.js +109 -0
- package/src/posture/rule-overrides.js +198 -0
- package/src/posture/rule-pack-signing.js +209 -0
- package/src/posture/rule-packs.js +143 -0
- package/src/posture/rule-synthesis.js +108 -0
- package/src/posture/ruleset-version.js +71 -0
- package/src/posture/sbom.js +129 -0
- package/src/posture/schema-aware-bridge.js +207 -0
- package/src/posture/security-trend.js +87 -0
- package/src/posture/semantic-clone.js +114 -0
- package/src/posture/specification-mining.js +170 -0
- package/src/posture/stable-id.js +75 -0
- package/src/posture/stack-playbook.js +229 -0
- package/src/posture/streak.js +249 -0
- package/src/posture/suppressions.js +135 -0
- package/src/posture/telemetry-ingest.js +112 -0
- package/src/posture/threat-model.js +145 -0
- package/src/posture/three-agent-pipeline.js +74 -0
- package/src/posture/triage.js +146 -0
- package/src/posture/trust-boundary-diagram.js +115 -0
- package/src/posture/type-narrowing.js +129 -0
- package/src/posture/validator-metrics.js +179 -0
- package/src/posture/verifier-ephemeral.js +118 -0
- package/src/posture/verifier-target.js +147 -0
- package/src/posture/verifier.js +257 -0
- package/src/posture/version.js +75 -0
- package/src/posture/waf-ingest.js +200 -0
- package/src/posture/why-fired.js +141 -0
- package/src/pr-comment.js +172 -0
- package/src/pr-delta.js +198 -0
- package/src/report/.agentic-security/findings.json +79 -0
- package/src/report/.agentic-security/last-scan.json +79 -0
- package/src/report/.agentic-security/last-scan.json.sig +1 -0
- package/src/report/.agentic-security/scan-history.json +332 -0
- package/src/report/.agentic-security/streak.json +23 -0
- package/src/report/index.js +1136 -0
- package/src/report/mascot.js +42 -0
- package/src/runScan.js +141 -0
- package/src/sast/.agentic-security/findings.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json.sig +1 -0
- package/src/sast/.agentic-security/scan-history.json +788 -0
- package/src/sast/.agentic-security/streak.json +23 -0
- package/src/sast/CLAUDE.md +39 -0
- package/src/sast/_comment-strip.js +46 -0
- package/src/sast/agent-tool-escalation.js +131 -0
- package/src/sast/auth-provider.js +171 -0
- package/src/sast/authz.js +236 -0
- package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
- package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
- package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
- package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
- package/src/sast/bench-shape/index.js +62 -0
- package/src/sast/claude-hook-injection.js +199 -0
- package/src/sast/claude-md-prompt-injection.js +170 -0
- package/src/sast/claude-settings.js +165 -0
- package/src/sast/client-side.js +149 -0
- package/src/sast/cpp-bench-extras.js +122 -0
- package/src/sast/cpp-dataflow.js +430 -0
- package/src/sast/cpp.js +248 -0
- package/src/sast/csharp.js +152 -0
- package/src/sast/csrf.js +82 -0
- package/src/sast/dart-flutter.js +173 -0
- package/src/sast/db-rls.js +147 -0
- package/src/sast/db-taint.js +215 -0
- package/src/sast/defi-deep.js +242 -0
- package/src/sast/deserialization-gadgets.js +113 -0
- package/src/sast/django-hardening.js +230 -0
- package/src/sast/env-hygiene.js +125 -0
- package/src/sast/fastapi-hardening.js +145 -0
- package/src/sast/go-extended.js +84 -0
- package/src/sast/host-header.js +106 -0
- package/src/sast/index.js +17 -0
- package/src/sast/java-ast-folding.js +561 -0
- package/src/sast/java-bench-extras.js +708 -0
- package/src/sast/java-collection-passthrough.js +178 -0
- package/src/sast/java-constant-fold.js +244 -0
- package/src/sast/java-deserialization.js +125 -0
- package/src/sast/jndi.js +104 -0
- package/src/sast/juliet-shape.js +324 -0
- package/src/sast/jwt-exp.js +104 -0
- package/src/sast/kotlin.js +82 -0
- package/src/sast/laravel-hardening.js +198 -0
- package/src/sast/ldap-injection.js +100 -0
- package/src/sast/llm-owasp.js +465 -0
- package/src/sast/llm-stored-prompt.js +103 -0
- package/src/sast/llm-trading-agent.js +161 -0
- package/src/sast/llm.js +308 -0
- package/src/sast/logic.js +140 -0
- package/src/sast/mass-assignment.js +101 -0
- package/src/sast/mcp-audit.js +242 -0
- package/src/sast/mobile-manifest.js +195 -0
- package/src/sast/model-load.js +164 -0
- package/src/sast/mutation-xss.js +87 -0
- package/src/sast/nosql-injection.js +82 -0
- package/src/sast/open-redirect.js +119 -0
- package/src/sast/php.js +91 -0
- package/src/sast/pipeline.js +122 -0
- package/src/sast/primary-cwe-java.js +155 -0
- package/src/sast/prompt-firewall.js +151 -0
- package/src/sast/prompt-template.js +157 -0
- package/src/sast/prototype-pollution.js +112 -0
- package/src/sast/python-sinks.js +195 -0
- package/src/sast/quarkus-hardening.js +102 -0
- package/src/sast/rag-poisoning.js +118 -0
- package/src/sast/rate-limit.js +128 -0
- package/src/sast/response-splitting.js +138 -0
- package/src/sast/ruby.js +108 -0
- package/src/sast/rust.js +105 -0
- package/src/sast/solidity.js +167 -0
- package/src/sast/springboot-hardening.js +186 -0
- package/src/sast/ssrf-cloud-metadata.js +80 -0
- package/src/sast/ssti.js +116 -0
- package/src/sast/swift.js +162 -0
- package/src/sast/toctou.js +95 -0
- package/src/sast/webhook.js +101 -0
- package/src/sast/xpath-injection.js +51 -0
- package/src/sast/xxe.js +140 -0
- package/src/sast/zip-slip.js +200 -0
- package/src/sca/base-images.json +45 -0
- package/src/sca/container.js +107 -0
- package/src/sca/dep-confusion.js +134 -0
- package/src/sca/index.js +6 -0
- package/src/sca/popular-packages.json +41 -0
- package/src/sca/sarif-ingest.js +187 -0
- package/src/sca/vuln-function-hints.json +89 -0
- package/src/secrets/index.js +4 -0
|
@@ -0,0 +1,588 @@
|
|
|
1
|
+
// Interprocedural taint engine — IFDS-lite tabulation over the IR.
|
|
2
|
+
//
|
|
3
|
+
// Algorithm (simplified):
|
|
4
|
+
//
|
|
5
|
+
// For each function F:
|
|
6
|
+
// We compute a SUMMARY of the form
|
|
7
|
+
// (entry: Set<TaintFact>) → { returnTaint: bool, paramMutations: { paramName: bool }, sideEffectFindings: Finding[] }
|
|
8
|
+
// where TaintFact is currently a variable name (string).
|
|
9
|
+
//
|
|
10
|
+
// To handle inter-procedural flow:
|
|
11
|
+
// When the engine encounters a call site `f(...args)`:
|
|
12
|
+
// 1. Look up the resolved callee qid in the call graph.
|
|
13
|
+
// 2. Compute an entry-taint-state for that callee: which of the callee's
|
|
14
|
+
// parameters bind to tainted caller-side expressions?
|
|
15
|
+
// 3. If a summary already exists for that callee + entry-state, use it.
|
|
16
|
+
// Otherwise, recursively analyze the callee with that entry state,
|
|
17
|
+
// cache the summary, and use it.
|
|
18
|
+
// 4. The callee's `returnTaint` determines whether the call expression's
|
|
19
|
+
// value is tainted on return.
|
|
20
|
+
// 5. The callee's `paramMutations` taint specific caller-side variables
|
|
21
|
+
// (param-by-reference, e.g. `Object.assign(target, tainted)`).
|
|
22
|
+
//
|
|
23
|
+
// Recursion: We use the standard fixed-point trick — when a function is
|
|
24
|
+
// already on the analysis stack, return a conservative summary (no
|
|
25
|
+
// tainting). The cache then re-iterates.
|
|
26
|
+
//
|
|
27
|
+
// Sources: anywhere a CFG node reads a catalog-registered source pattern,
|
|
28
|
+
// the resulting variable becomes tainted.
|
|
29
|
+
//
|
|
30
|
+
// Sinks: anywhere a CFG node calls a catalog-registered sink with a tainted
|
|
31
|
+
// argument, we emit a finding.
|
|
32
|
+
//
|
|
33
|
+
// Sanitizers: a call to a catalog-registered sanitizer kills the taint on its
|
|
34
|
+
// argument (the call's return value is treated as clean).
|
|
35
|
+
|
|
36
|
+
import { matchSource, matchSinkOrSanitizer } from './catalog.js';
|
|
37
|
+
import { accessPathOf, isCoveredBy, addPath, removePathAndDescendants, joinSets as joinAccessSets, setsEqual as accessSetsEqual } from './access-paths.js';
|
|
38
|
+
import { aliasesForVar } from './points-to.js';
|
|
39
|
+
import { higherOrderTaintFlow } from './higher-order.js';
|
|
40
|
+
import { SummaryCache, entryStateFromCall } from './summaries.js';
|
|
41
|
+
|
|
42
|
+
// v0.70 #2 — addPath that also taints every alias of the variable.
|
|
43
|
+
// When `target` is a dotted path like "a.x" and the root `a` has aliases
|
|
44
|
+
// {a, obj}, we taint both `a.x` and `obj.x`. The points-to graph is read
|
|
45
|
+
// from callContext._pointsTo (built by runDeepAnalysis when
|
|
46
|
+
// AGENTIC_SECURITY_POINTS_TO=1).
|
|
47
|
+
function _addPathAliasAware(state, path, callContext) {
|
|
48
|
+
let s = addPath(state, path);
|
|
49
|
+
const pt = callContext && callContext._pointsTo;
|
|
50
|
+
const fnQid = callContext && callContext._currentFnQid;
|
|
51
|
+
if (!pt || !fnQid || typeof path !== 'string') return s;
|
|
52
|
+
// Determine the variable root + remainder of the path.
|
|
53
|
+
const dot = path.indexOf('.');
|
|
54
|
+
const root = dot >= 0 ? path.slice(0, dot) : path;
|
|
55
|
+
const rest = dot >= 0 ? path.slice(dot) : '';
|
|
56
|
+
const aliases = aliasesForVar(pt, fnQid, root);
|
|
57
|
+
for (const a of aliases) {
|
|
58
|
+
if (a === root) continue;
|
|
59
|
+
s = addPath(s, a + rest);
|
|
60
|
+
}
|
|
61
|
+
return s;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function exprTaint(expr, state) {
|
|
65
|
+
// Returns true iff this expression evaluates to a tainted value under the
|
|
66
|
+
// given taint state. ALSO treats catalog-registered source patterns as
|
|
67
|
+
// tainted at-read — `req.body.host` used inline (no intermediate local)
|
|
68
|
+
// is tainted because the source resolves at the read site.
|
|
69
|
+
if (expr && expr.kind === 'member' && exprIsSource(expr)) return true;
|
|
70
|
+
if (!expr) return false;
|
|
71
|
+
// P1.1 — field-sensitive access path: if the expression is a pure
|
|
72
|
+
// ident/member chain ("x.y.z"), ask the access-path lattice whether any
|
|
73
|
+
// shorter prefix in the state covers it. This is what makes
|
|
74
|
+
// `user.password` distinguishable from `user.email`.
|
|
75
|
+
const ap = accessPathOf(expr);
|
|
76
|
+
if (ap !== null) return isCoveredBy(state, ap);
|
|
77
|
+
switch (expr.kind) {
|
|
78
|
+
case 'literal': return false;
|
|
79
|
+
case 'binary':
|
|
80
|
+
case 'logical': return exprTaint(expr.left, state) || exprTaint(expr.right, state);
|
|
81
|
+
case 'tpl': return (expr.parts || []).some(p => exprTaint(p, state));
|
|
82
|
+
case 'union': return (expr.branches || []).some(b => exprTaint(b, state));
|
|
83
|
+
case 'object': return (expr.props || []).some(p => exprTaint(p.value, state));
|
|
84
|
+
case 'array': return (expr.elements || []).some(e => exprTaint(e, state));
|
|
85
|
+
case 'call': {
|
|
86
|
+
// Calls are handled at the CFG level (the call has already been processed).
|
|
87
|
+
// For an inline call expression, conservatively return whether any arg is tainted.
|
|
88
|
+
// This loses the sanitizer effect but is safe.
|
|
89
|
+
return (expr.args || []).some(a => exprTaint(a, state));
|
|
90
|
+
}
|
|
91
|
+
case 'unknown': return false;
|
|
92
|
+
default: return false;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Premortem #10: which recorded sources actually reach this expression?
|
|
97
|
+
// Collects the variable / access-path roots referenced by `expr` and returns
|
|
98
|
+
// the _taintSources entries whose varName matches one of those roots. This
|
|
99
|
+
// replaces "first source we ever saw" with "sources tied to this argument."
|
|
100
|
+
function _collectExprVars(expr, out) {
|
|
101
|
+
if (!expr) return;
|
|
102
|
+
if (typeof expr === 'string') { out.add(expr); return; }
|
|
103
|
+
if (expr.kind === 'ident' && expr.name) { out.add(expr.name); return; }
|
|
104
|
+
if (expr.kind === 'member') {
|
|
105
|
+
// Capture the access path (e.g. `user.email`) AND its root (`user`).
|
|
106
|
+
const ap = accessPathOf(expr);
|
|
107
|
+
if (ap) out.add(ap);
|
|
108
|
+
if (expr.object) _collectExprVars(expr.object, out);
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
if (expr.kind === 'binary' || expr.kind === 'logical') {
|
|
112
|
+
_collectExprVars(expr.left, out); _collectExprVars(expr.right, out); return;
|
|
113
|
+
}
|
|
114
|
+
if (expr.kind === 'tpl' && Array.isArray(expr.parts)) {
|
|
115
|
+
for (const p of expr.parts) _collectExprVars(p, out); return;
|
|
116
|
+
}
|
|
117
|
+
if (expr.kind === 'union' && Array.isArray(expr.branches)) {
|
|
118
|
+
for (const b of expr.branches) _collectExprVars(b, out); return;
|
|
119
|
+
}
|
|
120
|
+
if (expr.kind === 'object' && Array.isArray(expr.props)) {
|
|
121
|
+
for (const p of expr.props) _collectExprVars(p.value, out); return;
|
|
122
|
+
}
|
|
123
|
+
if (expr.kind === 'array' && Array.isArray(expr.elements)) {
|
|
124
|
+
for (const e of expr.elements) _collectExprVars(e, out); return;
|
|
125
|
+
}
|
|
126
|
+
if (expr.kind === 'call' && Array.isArray(expr.args)) {
|
|
127
|
+
for (const a of expr.args) _collectExprVars(a, out); return;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
function _sourcesReachingExpr(expr, _state, taintSources) {
|
|
131
|
+
if (!Array.isArray(taintSources) || taintSources.length === 0) return [];
|
|
132
|
+
const vars = new Set();
|
|
133
|
+
_collectExprVars(expr, vars);
|
|
134
|
+
if (vars.size === 0) return [];
|
|
135
|
+
// Match by exact varName OR by access-path prefix (a source recorded for
|
|
136
|
+
// `user` covers `user.email`, and a source recorded for `user.email`
|
|
137
|
+
// covers the literal expression `user.email`).
|
|
138
|
+
const matched = [];
|
|
139
|
+
for (const s of taintSources) {
|
|
140
|
+
const v = s.varName;
|
|
141
|
+
if (!v) continue;
|
|
142
|
+
if (vars.has(v)) { matched.push(s); continue; }
|
|
143
|
+
for (const candidate of vars) {
|
|
144
|
+
if (typeof candidate === 'string' && (candidate === v || candidate.startsWith(v + '.'))) {
|
|
145
|
+
matched.push(s); break;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return matched;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Heuristic: does this expression read a registered source?
|
|
153
|
+
function exprIsSource(expr) {
|
|
154
|
+
if (!expr) return null;
|
|
155
|
+
if (expr.kind === 'member') {
|
|
156
|
+
const hit = matchSource(expr);
|
|
157
|
+
if (hit) return hit;
|
|
158
|
+
}
|
|
159
|
+
// Recurse — `req.body.name` should still find `req.body` as source.
|
|
160
|
+
if (expr.kind === 'member' && expr.object) {
|
|
161
|
+
return exprIsSource(expr.object);
|
|
162
|
+
}
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Apply a CFG node to a taint-state. Returns the new state + any finding emitted.
|
|
167
|
+
function step(node, stateIn, callContext) {
|
|
168
|
+
const state = new Set(stateIn);
|
|
169
|
+
const findings = [];
|
|
170
|
+
|
|
171
|
+
switch (node.kind) {
|
|
172
|
+
case 'entry':
|
|
173
|
+
case 'exit':
|
|
174
|
+
case 'noop':
|
|
175
|
+
case 'loop-header':
|
|
176
|
+
return { state, findings };
|
|
177
|
+
|
|
178
|
+
case 'assign': {
|
|
179
|
+
// Source detection on RHS.
|
|
180
|
+
const src = exprIsSource(node.source);
|
|
181
|
+
const target = typeof node.target === 'string' ? node.target : null;
|
|
182
|
+
let newState = state;
|
|
183
|
+
// Premortem #7: interprocedural return-taint via SummaryCache. If the
|
|
184
|
+
// RHS is a call to a known callee whose empty-entry-state summary says
|
|
185
|
+
// the return is tainted, taint the assignment target. This makes the
|
|
186
|
+
// simplest cross-function flow (helper reads req.body and returns it)
|
|
187
|
+
// visible to the engine — the case the cache was built for.
|
|
188
|
+
const calleeName = node.source && node.source.kind === 'call' && typeof node.source.callee === 'string'
|
|
189
|
+
? node.source.callee : null;
|
|
190
|
+
if (target && calleeName && callContext._summaryCache && callContext._callGraph) {
|
|
191
|
+
const resolved = callContext._callGraph.resolve ? callContext._callGraph.resolve(calleeName) : null;
|
|
192
|
+
const fn = resolved && resolved.qid ? resolved : null;
|
|
193
|
+
const qid = resolved && (resolved.qid || resolved);
|
|
194
|
+
if (typeof qid === 'string') {
|
|
195
|
+
// v0.66 — context-sensitive lookup. Build the entry-state from
|
|
196
|
+
// the call args + current taint; look up (and lazily compute) the
|
|
197
|
+
// summary for THAT state, not just empty. This is what closes the
|
|
198
|
+
// "helper is pure when called clean but tainted when called with
|
|
199
|
+
// user input" FN class.
|
|
200
|
+
const callerTainted = newState;
|
|
201
|
+
const callArgs = (node.source.args || []);
|
|
202
|
+
const paramNames = (fn && Array.isArray(fn.params)) ? fn.params : [];
|
|
203
|
+
const entry = paramNames.length
|
|
204
|
+
? entryStateFromCall(paramNames, callArgs, callerTainted)
|
|
205
|
+
: new Set();
|
|
206
|
+
let sum = callContext._summaryCache.get(qid, entry);
|
|
207
|
+
if (!sum && fn && fn.cfg) {
|
|
208
|
+
// Lazy compute under this entry state. Use a fresh ctx so we
|
|
209
|
+
// don't pollute the outer caller's _taintSources with the
|
|
210
|
+
// callee's internal noise.
|
|
211
|
+
sum = callContext._summaryCache.compute(qid, entry, () => {
|
|
212
|
+
const inner = {
|
|
213
|
+
_findings: [], _taintSources: [], _returnTainted: false,
|
|
214
|
+
_stack: new Set(), deadlineMs: callContext.deadlineMs,
|
|
215
|
+
_summaryCache: callContext._summaryCache,
|
|
216
|
+
_callGraph: callContext._callGraph,
|
|
217
|
+
_mutatedParamsOut: new Set(),
|
|
218
|
+
};
|
|
219
|
+
try { analyzeFunction(fn, entry, inner); } catch {}
|
|
220
|
+
return {
|
|
221
|
+
returnTainted: !!inner._returnTainted,
|
|
222
|
+
mutatedParams: inner._mutatedParamsOut || new Set(),
|
|
223
|
+
taintedGlobals: new Set(),
|
|
224
|
+
findings: [],
|
|
225
|
+
};
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
if (sum && sum.returnTainted) {
|
|
229
|
+
newState = _addPathAliasAware(newState, target, callContext);
|
|
230
|
+
callContext._taintSources.push({
|
|
231
|
+
varName: target,
|
|
232
|
+
sourceId: `interproc:${qid}`,
|
|
233
|
+
sourceLabel: `interproc-return:${calleeName}`,
|
|
234
|
+
provenance: 'interproc',
|
|
235
|
+
line: node.line,
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
// applyAtCallSite — mutated params propagate to caller arg-vars.
|
|
239
|
+
if (sum && sum.mutatedParams && sum.mutatedParams.size && paramNames.length) {
|
|
240
|
+
const mutated = callContext._summaryCache.applyAtCallSite(
|
|
241
|
+
sum, paramNames, callArgs, callerTainted);
|
|
242
|
+
for (const v of mutated.mutated) newState = addPath(newState, v);
|
|
243
|
+
}
|
|
244
|
+
if (sum && sum.returnTainted) return { state: newState, findings: [] };
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
if (src && target) {
|
|
248
|
+
newState = _addPathAliasAware(newState, target, callContext);
|
|
249
|
+
const sourcePath = accessPathOf(node.source);
|
|
250
|
+
if (sourcePath) newState = addPath(newState, sourcePath);
|
|
251
|
+
callContext._taintSources.push({ varName: target, sourceId: src.id, sourceLabel: src.label, provenance: src.provenance || null, line: node.line });
|
|
252
|
+
} else if (exprTaint(node.source, newState)) {
|
|
253
|
+
// P1.1: when the source IS a pure access path (e.g., RHS is `obj.foo.bar`),
|
|
254
|
+
// taint the TARGET as well as transitively propagate the source path so
|
|
255
|
+
// later uses of the same source remain tainted. The target path
|
|
256
|
+
// becomes the new tainted location.
|
|
257
|
+
if (target) {
|
|
258
|
+
newState = _addPathAliasAware(newState, target, callContext);
|
|
259
|
+
const sourcePath = accessPathOf(node.source);
|
|
260
|
+
if (sourcePath && !isCoveredBy(newState, sourcePath)) newState = addPath(newState, sourcePath);
|
|
261
|
+
}
|
|
262
|
+
} else {
|
|
263
|
+
// Re-assigning a previously-tainted var to a clean value clears it
|
|
264
|
+
// AND its descendants — P1.1 semantics: assigning `x = clean` kills
|
|
265
|
+
// `x.foo`, `x.foo.bar`, etc. Sanitization at root level.
|
|
266
|
+
if (target) newState = removePathAndDescendants(newState, target);
|
|
267
|
+
}
|
|
268
|
+
return { state: newState, findings };
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
case 'call': {
|
|
272
|
+
// 1. Catalog match: sanitizer, sink, or just an external/unresolved call.
|
|
273
|
+
const cat = matchSinkOrSanitizer(node.callee);
|
|
274
|
+
const argTaints = (node.args || []).map(a => exprTaint(a, state));
|
|
275
|
+
// v0.66 — apply mutated-param taint at plain (non-assign) call sites.
|
|
276
|
+
// Object.assign(target, tainted) → target becomes tainted in caller.
|
|
277
|
+
if (callContext._summaryCache && callContext._callGraph
|
|
278
|
+
&& typeof node.callee === 'string') {
|
|
279
|
+
const resolved = callContext._callGraph.resolve
|
|
280
|
+
? callContext._callGraph.resolve(node.callee) : null;
|
|
281
|
+
const fn = resolved && resolved.qid ? resolved : null;
|
|
282
|
+
const qid = resolved && (resolved.qid || resolved);
|
|
283
|
+
if (typeof qid === 'string' && fn && Array.isArray(fn.params)) {
|
|
284
|
+
const paramNames = fn.params;
|
|
285
|
+
const entry = paramNames.length
|
|
286
|
+
? entryStateFromCall(paramNames, node.args || [], state)
|
|
287
|
+
: new Set();
|
|
288
|
+
const sum = callContext._summaryCache.get(qid, entry);
|
|
289
|
+
if (sum && sum.mutatedParams && sum.mutatedParams.size) {
|
|
290
|
+
const mutated = callContext._summaryCache.applyAtCallSite(
|
|
291
|
+
sum, paramNames, node.args || [], state);
|
|
292
|
+
for (const v of mutated.mutated) state = addPath(state, v);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
if (cat) {
|
|
297
|
+
for (const e of cat) {
|
|
298
|
+
if (e.kind === 'sink' && (
|
|
299
|
+
e.argIndex === 'all' ? argTaints.some(Boolean) :
|
|
300
|
+
(typeof e.argIndex === 'number' && argTaints[e.argIndex])
|
|
301
|
+
)) {
|
|
302
|
+
const taintedArgIdx = e.argIndex === 'all'
|
|
303
|
+
? argTaints.findIndex(Boolean) : e.argIndex;
|
|
304
|
+
const taintedArgExpr = (node.args || [])[taintedArgIdx];
|
|
305
|
+
// Premortem #10: attribute the source for THIS sink to the
|
|
306
|
+
// source(s) that taint the actual argument expression — not the
|
|
307
|
+
// first source the worklist happened to record. We walk the
|
|
308
|
+
// expression's free vars / access paths against the recorded
|
|
309
|
+
// _taintSources and keep entries whose root variable still
|
|
310
|
+
// covers something in the expression.
|
|
311
|
+
const reachingSources = _sourcesReachingExpr(taintedArgExpr, state, callContext._taintSources);
|
|
312
|
+
const traceForThisFinding = reachingSources.length
|
|
313
|
+
? reachingSources.slice(0, 5)
|
|
314
|
+
// Fallback: better to surface "no precise source" than the wrong source.
|
|
315
|
+
: [];
|
|
316
|
+
findings.push({
|
|
317
|
+
kind: 'taint',
|
|
318
|
+
sinkId: e.id,
|
|
319
|
+
vuln: e.vuln?.name || 'Tainted Sink',
|
|
320
|
+
severity: e.vuln?.severity || 'high',
|
|
321
|
+
cwe: e.vuln?.cwe || null,
|
|
322
|
+
remediation: e.vuln?.remediation || null,
|
|
323
|
+
line: node.line,
|
|
324
|
+
argIndex: taintedArgIdx,
|
|
325
|
+
callee: node.callee,
|
|
326
|
+
sourceProvenance: (traceForThisFinding[0]?.provenance) || null,
|
|
327
|
+
trace: traceForThisFinding,
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
// 2. P1.3 — higher-order taint flow. When the call is `arr.map(fn)` or
|
|
333
|
+
// `promise.then(fn)` and the receiver is tainted, propagate taint
|
|
334
|
+
// into the callback's first parameter. v1: we propagate AT THE
|
|
335
|
+
// CALLBACK INVOCATION LEVEL by adding the callback's first-arg
|
|
336
|
+
// name (when resolvable as a plain ident or function-value) into
|
|
337
|
+
// the taint state.
|
|
338
|
+
const hoFlow = (() => {
|
|
339
|
+
// Heuristic receiver-tainted check: if the callee string is
|
|
340
|
+
// "<recv>.<method>", check whether <recv> is in state.
|
|
341
|
+
const callee = typeof node.callee === 'string' ? node.callee : null;
|
|
342
|
+
if (!callee) return null;
|
|
343
|
+
const dot = callee.lastIndexOf('.');
|
|
344
|
+
if (dot <= 0) return null;
|
|
345
|
+
const recv = callee.slice(0, dot);
|
|
346
|
+
const recvTainted = isCoveredBy(state, recv);
|
|
347
|
+
return higherOrderTaintFlow(node, recvTainted);
|
|
348
|
+
})();
|
|
349
|
+
if (hoFlow && hoFlow.taintsCallbackParam === 0) {
|
|
350
|
+
// The first arg should be the callback. If it's a plain ident or
|
|
351
|
+
// function-value, the engine's per-callee summary path will pick it
|
|
352
|
+
// up when the callee is independently analyzed. We don't model the
|
|
353
|
+
// callback inline here; instead we record on callContext that the
|
|
354
|
+
// callback was invoked with a tainted first param, so the engine's
|
|
355
|
+
// call-graph pass can re-run the callback with that entry state.
|
|
356
|
+
const cb = (node.args || [])[0];
|
|
357
|
+
if (cb && (cb.kind === 'ident' || cb.kind === 'function-value')) {
|
|
358
|
+
callContext._higherOrderInvocations = callContext._higherOrderInvocations || [];
|
|
359
|
+
callContext._higherOrderInvocations.push({
|
|
360
|
+
callee: cb.kind === 'ident' ? cb.name : (cb.qid || null),
|
|
361
|
+
paramIndex: 0,
|
|
362
|
+
taintedParam: true,
|
|
363
|
+
line: node.line,
|
|
364
|
+
via: hoFlow.kind,
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
return { state, findings };
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
case 'if': {
|
|
372
|
+
// Path-feasibility lite: if the condition is a literal false / unreachable,
|
|
373
|
+
// mark the node so the CFG walker can skip the consequent edge.
|
|
374
|
+
// For now we simply propagate state to both branches.
|
|
375
|
+
return { state, findings };
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
case 'return': {
|
|
379
|
+
if (exprTaint(node.value, state)) {
|
|
380
|
+
callContext._returnTainted = true;
|
|
381
|
+
}
|
|
382
|
+
return { state, findings };
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
case 'throw': {
|
|
386
|
+
// Thrown values don't taint subsequent code in the same fn — exit.
|
|
387
|
+
return { state, findings };
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
default:
|
|
391
|
+
return { state, findings };
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Worklist traversal of one function's CFG with a given entry-taint-state.
|
|
396
|
+
// Returns the merged exit state + the union of findings on every path + the
|
|
397
|
+
// taint sources observed (for evidence trails).
|
|
398
|
+
//
|
|
399
|
+
// Premortem 2R4.4 / 2R-9: also honors callContext.deadlineMs by checking
|
|
400
|
+
// every 100 iterations. A pathological CFG (large generated file with dense
|
|
401
|
+
// control flow) can otherwise hold past the global timeout.
|
|
402
|
+
function analyzeFunction(fn, entryState, callContext) {
|
|
403
|
+
const nodes = fn.cfg.nodes; // plain object
|
|
404
|
+
const work = [];
|
|
405
|
+
const inStates = new Map(); // nodeId → Set<varName>
|
|
406
|
+
const outStates = new Map();
|
|
407
|
+
inStates.set(fn.cfg.entry, new Set(entryState));
|
|
408
|
+
work.push(fn.cfg.entry);
|
|
409
|
+
// v0.70 #2 — points-to context for the step() transfer. Setting it here
|
|
410
|
+
// (instead of plumbing through step's signature) keeps the worklist loop
|
|
411
|
+
// unchanged and lets `step` consult `aliasesForVar` when callContext._pointsTo
|
|
412
|
+
// is present.
|
|
413
|
+
if (callContext) callContext._currentFnQid = fn.qid;
|
|
414
|
+
const deadlineMs = (callContext && typeof callContext.deadlineMs === 'number') ? callContext.deadlineMs : Infinity;
|
|
415
|
+
const visited = 0;
|
|
416
|
+
let iterations = 0;
|
|
417
|
+
const ITER_BUDGET = 5000;
|
|
418
|
+
|
|
419
|
+
while (work.length) {
|
|
420
|
+
if (++iterations > ITER_BUDGET) break;
|
|
421
|
+
// Check the global deadline every 100 iterations — Date.now() is cheap
|
|
422
|
+
// but not free; this keeps overhead negligible on small functions.
|
|
423
|
+
if ((iterations & 0x7f) === 0 && Date.now() > deadlineMs) break;
|
|
424
|
+
const nid = work.shift();
|
|
425
|
+
const node = nodes[nid];
|
|
426
|
+
if (!node) continue;
|
|
427
|
+
const incoming = inStates.get(nid) || new Set();
|
|
428
|
+
const { state: out, findings } = step(node, incoming, callContext);
|
|
429
|
+
callContext._findings.push(...findings.map(f => ({ ...f, _funcQid: fn.qid })));
|
|
430
|
+
const prevOut = outStates.get(nid);
|
|
431
|
+
const merged = mergeStates(prevOut, out);
|
|
432
|
+
if (!prevOut || !stateEq(prevOut, merged)) {
|
|
433
|
+
outStates.set(nid, merged);
|
|
434
|
+
for (const s of (node.succ || [])) {
|
|
435
|
+
const succIn = inStates.get(s);
|
|
436
|
+
const newIn = mergeStates(succIn, merged);
|
|
437
|
+
if (!succIn || !stateEq(succIn, newIn)) {
|
|
438
|
+
inStates.set(s, newIn);
|
|
439
|
+
work.push(s);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
const exit = outStates.get(fn.cfg.exit) || new Set();
|
|
446
|
+
// v0.66 — record which params are tainted at function exit so the
|
|
447
|
+
// caller's applyAtCallSite can propagate that mutated taint back. We
|
|
448
|
+
// intersect the exit-state with the function's declared params (only
|
|
449
|
+
// param vars count as "mutated by reference"; locals are caller-invisible).
|
|
450
|
+
if (callContext && Array.isArray(fn.params) && fn.params.length) {
|
|
451
|
+
if (!callContext._mutatedParamsOut) callContext._mutatedParamsOut = new Set();
|
|
452
|
+
for (const p of fn.params) {
|
|
453
|
+
if (isCoveredBy(exit, p)) callContext._mutatedParamsOut.add(p);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
return exit;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function mergeStates(a, b) {
|
|
460
|
+
// P1.1: use access-path-aware union that collapses longer descendants
|
|
461
|
+
// under their shorter-prefix parents.
|
|
462
|
+
return joinAccessSets(a, b);
|
|
463
|
+
}
|
|
464
|
+
function stateEq(a, b) {
|
|
465
|
+
// P1.1: use access-path-aware set equality (canonicalized).
|
|
466
|
+
return accessSetsEqual(a, b);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// ── Top-level entry ─────────────────────────────────────────────────────────
|
|
470
|
+
//
|
|
471
|
+
// Iterate each function with an EMPTY entry-taint-state. The function's
|
|
472
|
+
// internal sources will populate the state as we walk. (Future work: when the
|
|
473
|
+
// caller of F passes tainted args, re-analyze F with those params marked.
|
|
474
|
+
// The infra for it is in callContext.)
|
|
475
|
+
//
|
|
476
|
+
// Returns a flat array of findings, each enriched with file/line/etc.
|
|
477
|
+
export function runTaintEngine(perFileIR, callGraph, opts = {}) {
|
|
478
|
+
const all = [];
|
|
479
|
+
const seen = new Set();
|
|
480
|
+
const fnLimit = opts.fnLimit || 5000;
|
|
481
|
+
const deadlineMs = typeof opts.deadlineMs === 'number' ? opts.deadlineMs : Infinity;
|
|
482
|
+
let n = 0;
|
|
483
|
+
|
|
484
|
+
// Premortem #7: instantiate the k=1 SummaryCache and seed it with each
|
|
485
|
+
// function's empty-entry-state summary (returnTainted bit). The cache is
|
|
486
|
+
// available to call sites through callContext so the worklist can ask
|
|
487
|
+
// "does callee F return tainted under this entry state?" before
|
|
488
|
+
// conservatively assuming it doesn't. This wires the cache that was
|
|
489
|
+
// exported-but-unused for several releases.
|
|
490
|
+
//
|
|
491
|
+
// v0.69 — opts.summaryCache lets the caller (runDeepAnalysis with
|
|
492
|
+
// incremental mode) hand in a pre-seeded cache from persisted state.
|
|
493
|
+
const summaryCache = opts.summaryCache || new SummaryCache();
|
|
494
|
+
|
|
495
|
+
// Deterministic ordering (Sentinel-parity §9.2): sort functions by qid so
|
|
496
|
+
// cache-cold runs produce the same finding sequence run-over-run.
|
|
497
|
+
const fnList = [...callGraph.functions.values()].sort((a, b) =>
|
|
498
|
+
a.qid < b.qid ? -1 : a.qid > b.qid ? 1 : 0
|
|
499
|
+
);
|
|
500
|
+
// Pre-pass + fixed-point: compute empty-entry-state summaries for every
|
|
501
|
+
// function, then re-run the pre-pass until the summary cache stabilizes
|
|
502
|
+
// (capped at MAX_FP_ITERS so recursion and chains converge without
|
|
503
|
+
// unbounded blowup). v0.66 — the inner ctx now records mutatedParams
|
|
504
|
+
// via _mutatedParamsOut so cross-function param mutation propagates.
|
|
505
|
+
const MAX_FP_ITERS = 3;
|
|
506
|
+
let prevCacheSize = -1;
|
|
507
|
+
for (let it = 0; it < MAX_FP_ITERS; it++) {
|
|
508
|
+
if (Date.now() > deadlineMs) break;
|
|
509
|
+
for (const fn of fnList) {
|
|
510
|
+
if (Date.now() > deadlineMs) break;
|
|
511
|
+
const entry = new Set();
|
|
512
|
+
const key = fn.qid + '::empty';
|
|
513
|
+
const existing = summaryCache.get(fn.qid, entry);
|
|
514
|
+
// On re-iterations, recompute even if cached so refined summaries
|
|
515
|
+
// (from now-known callee summaries) can lift returnTainted/mutated.
|
|
516
|
+
const ctx = {
|
|
517
|
+
_findings: [], _taintSources: [], _returnTainted: false,
|
|
518
|
+
_stack: new Set(), deadlineMs,
|
|
519
|
+
_summaryCache: summaryCache, _callGraph: callGraph,
|
|
520
|
+
_mutatedParamsOut: new Set(),
|
|
521
|
+
};
|
|
522
|
+
try { analyzeFunction(fn, entry, ctx); } catch {}
|
|
523
|
+
const next = {
|
|
524
|
+
returnTainted: !!ctx._returnTainted,
|
|
525
|
+
mutatedParams: ctx._mutatedParamsOut || new Set(),
|
|
526
|
+
taintedGlobals: new Set(),
|
|
527
|
+
findings: [],
|
|
528
|
+
};
|
|
529
|
+
if (!existing
|
|
530
|
+
|| existing.returnTainted !== next.returnTainted
|
|
531
|
+
|| (existing.mutatedParams?.size || 0) !== next.mutatedParams.size) {
|
|
532
|
+
summaryCache.set(fn.qid, entry, next);
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
if (summaryCache.size() === prevCacheSize) break;
|
|
536
|
+
prevCacheSize = summaryCache.size();
|
|
537
|
+
}
|
|
538
|
+
for (const fn of fnList) {
|
|
539
|
+
if (++n > fnLimit) break;
|
|
540
|
+
if (Date.now() > deadlineMs) break; // global timeout
|
|
541
|
+
// Module-level functions: analyze with an empty entry state. The function
|
|
542
|
+
// discovers its own sources from req.body/process.env/etc. as it walks.
|
|
543
|
+
const callContext = {
|
|
544
|
+
_findings: [],
|
|
545
|
+
_taintSources: [],
|
|
546
|
+
_returnTainted: false,
|
|
547
|
+
_stack: new Set(),
|
|
548
|
+
deadlineMs, // honored by the worklist inside analyzeFunction
|
|
549
|
+
_summaryCache: summaryCache,
|
|
550
|
+
_callGraph: callGraph,
|
|
551
|
+
};
|
|
552
|
+
try {
|
|
553
|
+
analyzeFunction(fn, new Set(), callContext);
|
|
554
|
+
} catch { continue; }
|
|
555
|
+
for (const f of callContext._findings) {
|
|
556
|
+
const key = `${f.sinkId}:${fn.file}:${f.line}`;
|
|
557
|
+
if (seen.has(key)) continue;
|
|
558
|
+
seen.add(key);
|
|
559
|
+
all.push({
|
|
560
|
+
id: `ir-taint:${fn.file}:${f.line}:${f.sinkId}`,
|
|
561
|
+
file: fn.file,
|
|
562
|
+
line: f.line,
|
|
563
|
+
vuln: f.vuln,
|
|
564
|
+
severity: f.severity,
|
|
565
|
+
cwe: f.cwe,
|
|
566
|
+
remediation: f.remediation,
|
|
567
|
+
parser: 'IR-TAINT',
|
|
568
|
+
confidence: 0.75,
|
|
569
|
+
source: f.trace && f.trace.length ? {
|
|
570
|
+
file: fn.file,
|
|
571
|
+
line: f.trace[0].line,
|
|
572
|
+
label: f.trace[0].sourceLabel,
|
|
573
|
+
} : null,
|
|
574
|
+
sink: {
|
|
575
|
+
file: fn.file,
|
|
576
|
+
line: f.line,
|
|
577
|
+
label: f.sinkId,
|
|
578
|
+
},
|
|
579
|
+
chain: (f.trace || []).map(t => ({
|
|
580
|
+
file: fn.file, line: t.line, label: t.sourceLabel,
|
|
581
|
+
})),
|
|
582
|
+
});
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
// v0.69 — expose cache to caller (runDeepAnalysis) for incremental persistence.
|
|
586
|
+
Object.defineProperty(all, '_summaryCache', { value: summaryCache, enumerable: false });
|
|
587
|
+
return all;
|
|
588
|
+
}
|