@clear-capabilities/agentic-security-scanner 0.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1580 -0
- package/bin/.agentic-security/findings.json +1577 -0
- package/bin/.agentic-security/last-scan.json +1577 -0
- package/bin/.agentic-security/last-scan.json.sig +1 -0
- package/bin/.agentic-security/scan-history.json +465 -0
- package/bin/.agentic-security/streak.json +25 -0
- package/bin/agentic-security-audit.js +198 -0
- package/bin/agentic-security-consistency.js +80 -0
- package/bin/agentic-security-diff.js +136 -0
- package/bin/agentic-security-lsp.js +12 -0
- package/bin/agentic-security-mcp.js +40 -0
- package/bin/agentic-security-rule.js +153 -0
- package/bin/agentic-security.js +1683 -0
- package/dist/117.index.js +207 -0
- package/dist/178.index.js +250 -0
- package/dist/218.index.js +793 -0
- package/dist/227.index.js +192 -0
- package/dist/301.index.js +167 -0
- package/dist/384.index.js +18 -0
- package/dist/476.index.js +126 -0
- package/dist/513.index.js +373 -0
- package/dist/520.index.js +13 -0
- package/dist/601.index.js +1038 -0
- package/dist/634.index.js +1892 -0
- package/dist/637.index.js +216 -0
- package/dist/660.index.js +131 -0
- package/dist/675.index.js +451 -0
- package/dist/826.index.js +188 -0
- package/dist/830.index.js +133 -0
- package/dist/agentic-security.mjs +272 -0
- package/dist/agentic-security.mjs.sha256 +1 -0
- package/dist/calibration-seed.json +27 -0
- package/package.json +77 -0
- package/src/.agentic-security/findings.json +80844 -0
- package/src/.agentic-security/last-scan.json +80844 -0
- package/src/.agentic-security/last-scan.json.sig +1 -0
- package/src/.agentic-security/scan-history.json +8408 -0
- package/src/.agentic-security/streak.json +26 -0
- package/src/badge.js +188 -0
- package/src/compare.js +203 -0
- package/src/dataflow/.agentic-security/findings.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
- package/src/dataflow/.agentic-security/scan-history.json +735 -0
- package/src/dataflow/.agentic-security/streak.json +24 -0
- package/src/dataflow/CLAUDE.md +38 -0
- package/src/dataflow/access-paths.js +172 -0
- package/src/dataflow/async-sequencing.js +177 -0
- package/src/dataflow/backward.js +201 -0
- package/src/dataflow/catalog-expanded.js +485 -0
- package/src/dataflow/catalog.js +659 -0
- package/src/dataflow/cross-repo.js +219 -0
- package/src/dataflow/engine.js +588 -0
- package/src/dataflow/exception-flow.js +116 -0
- package/src/dataflow/exploit-prover.js +187 -0
- package/src/dataflow/higher-order.js +221 -0
- package/src/dataflow/ifds.js +347 -0
- package/src/dataflow/implicit-flow.js +129 -0
- package/src/dataflow/incremental.js +229 -0
- package/src/dataflow/index.js +181 -0
- package/src/dataflow/numeric-domain.js +192 -0
- package/src/dataflow/path-feasibility.js +114 -0
- package/src/dataflow/points-to.js +337 -0
- package/src/dataflow/polyglot.js +190 -0
- package/src/dataflow/proven-clean.js +159 -0
- package/src/dataflow/receiver-context.js +76 -0
- package/src/dataflow/sanitizer-proof.js +154 -0
- package/src/dataflow/soft-taint.js +140 -0
- package/src/dataflow/string-domain.js +234 -0
- package/src/dataflow/stub-aware-filter.js +100 -0
- package/src/dataflow/summaries.js +132 -0
- package/src/dataflow/symbolic-exec.js +238 -0
- package/src/dataflow/tabulation.js +135 -0
- package/src/engine.js +7763 -0
- package/src/history-scan.js +229 -0
- package/src/index.js +3 -0
- package/src/integrations/.agentic-security/findings.json +1504 -0
- package/src/integrations/.agentic-security/last-scan.json +1504 -0
- package/src/integrations/.agentic-security/scan-history.json +40 -0
- package/src/integrations/.agentic-security/streak.json +21 -0
- package/src/integrations/index.js +321 -0
- package/src/integrations/tickets.js +200 -0
- package/src/ir/.agentic-security/findings.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json.sig +1 -0
- package/src/ir/.agentic-security/scan-history.json +364 -0
- package/src/ir/.agentic-security/streak.json +23 -0
- package/src/ir/CLAUDE.md +172 -0
- package/src/ir/callgraph.js +73 -0
- package/src/ir/class-hierarchy.js +195 -0
- package/src/ir/index.js +152 -0
- package/src/ir/parser-cs.js +260 -0
- package/src/ir/parser-java.js +286 -0
- package/src/ir/parser-js.js +413 -0
- package/src/ir/parser-kt.js +258 -0
- package/src/ir/parser-py-cst.js +136 -0
- package/src/ir/parser-py.helper.py +501 -0
- package/src/ir/parser-py.js +312 -0
- package/src/ir/ssa.js +315 -0
- package/src/ir/type-stubs.js +288 -0
- package/src/leaderboard.js +152 -0
- package/src/llm-validator/.agentic-security/findings.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
- package/src/llm-validator/.agentic-security/scan-history.json +168 -0
- package/src/llm-validator/.agentic-security/streak.json +20 -0
- package/src/llm-validator/consistency.js +141 -0
- package/src/llm-validator/index.js +437 -0
- package/src/lsp/.agentic-security/findings.json +28 -0
- package/src/lsp/.agentic-security/last-scan.json +28 -0
- package/src/lsp/.agentic-security/scan-history.json +79 -0
- package/src/lsp/.agentic-security/streak.json +22 -0
- package/src/lsp/server.js +275 -0
- package/src/mcp/.agentic-security/findings.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
- package/src/mcp/.agentic-security/scan-history.json +1125 -0
- package/src/mcp/.agentic-security/streak.json +22 -0
- package/src/mcp/CLAUDE.md +54 -0
- package/src/mcp/audit.js +136 -0
- package/src/mcp/redact.js +75 -0
- package/src/mcp/server.js +158 -0
- package/src/mcp/stdio.js +83 -0
- package/src/mcp/tools.js +940 -0
- package/src/mcp/validate.js +49 -0
- package/src/personality.js +164 -0
- package/src/poc-video.js +239 -0
- package/src/posture/.agentic-security/findings.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json.sig +1 -0
- package/src/posture/.agentic-security/scan-history.json +5557 -0
- package/src/posture/.agentic-security/streak.json +24 -0
- package/src/posture/CLAUDE.md +42 -0
- package/src/posture/adversarial-self-test.js +114 -0
- package/src/posture/adversary-agent.js +204 -0
- package/src/posture/agents-memory.js +135 -0
- package/src/posture/ai-code-fingerprint.js +171 -0
- package/src/posture/aibom.js +284 -0
- package/src/posture/api-inventory.js +96 -0
- package/src/posture/attack-playbooks.js +305 -0
- package/src/posture/auditor-agent.js +115 -0
- package/src/posture/auth-posture-import.js +135 -0
- package/src/posture/baseline-compare.js +114 -0
- package/src/posture/blast-radius.js +836 -0
- package/src/posture/bounty-prediction.js +141 -0
- package/src/posture/business-logic.js +239 -0
- package/src/posture/calibration-drift.js +93 -0
- package/src/posture/calibration-seed.json +27 -0
- package/src/posture/calibration.js +204 -0
- package/src/posture/clustering.js +75 -0
- package/src/posture/concurrency-checker.js +265 -0
- package/src/posture/confidence.js +65 -0
- package/src/posture/container-runtime.js +149 -0
- package/src/posture/counterfactual.js +109 -0
- package/src/posture/cross-lang-graphql.js +165 -0
- package/src/posture/cross-lang-grpc.js +166 -0
- package/src/posture/cross-lang-meta.js +101 -0
- package/src/posture/cross-lang-openapi.js +187 -0
- package/src/posture/cross-lang-orm.js +153 -0
- package/src/posture/cross-lang-queues.js +210 -0
- package/src/posture/crown-jewels.js +110 -0
- package/src/posture/custom-rules.js +361 -0
- package/src/posture/cve-alert-daemon.js +433 -0
- package/src/posture/cve-lookup.js +129 -0
- package/src/posture/dead-code.js +430 -0
- package/src/posture/defender-agent.js +158 -0
- package/src/posture/deploy-platform.js +204 -0
- package/src/posture/detector-fuzz.js +61 -0
- package/src/posture/deterministic.js +99 -0
- package/src/posture/drift.js +165 -0
- package/src/posture/epss.js +156 -0
- package/src/posture/exploitability-probability.js +212 -0
- package/src/posture/exploitability.js +121 -0
- package/src/posture/feature-flags.js +110 -0
- package/src/posture/finding-defaults.js +132 -0
- package/src/posture/fix-history.js +411 -0
- package/src/posture/fix-plan.js +121 -0
- package/src/posture/fix-verify-loop.js +157 -0
- package/src/posture/fix-verify.js +130 -0
- package/src/posture/flow-narration.js +105 -0
- package/src/posture/grader-calibration.js +156 -0
- package/src/posture/harness-discovery.js +113 -0
- package/src/posture/holdout-eval.js +144 -0
- package/src/posture/iac-reachability.js +163 -0
- package/src/posture/iam-policy.js +128 -0
- package/src/posture/integrity.js +97 -0
- package/src/posture/learning.js +166 -0
- package/src/posture/license-policy.js +109 -0
- package/src/posture/llm-redteam-prompts.js +418 -0
- package/src/posture/llm-redteam.js +303 -0
- package/src/posture/material-change.js +163 -0
- package/src/posture/mitigation-composite.js +55 -0
- package/src/posture/mttr.js +91 -0
- package/src/posture/network-policy-import.js +126 -0
- package/src/posture/path-predicates.js +99 -0
- package/src/posture/persona-prioritization.js +153 -0
- package/src/posture/poc-cwe-map.js +51 -0
- package/src/posture/poc-generator.js +500 -0
- package/src/posture/policy-gate.js +174 -0
- package/src/posture/pre-incident-archaeology.js +110 -0
- package/src/posture/profile.js +93 -0
- package/src/posture/reachability-filter.js +42 -0
- package/src/posture/regression-test-gen.js +200 -0
- package/src/posture/reverse-blast-radius.js +110 -0
- package/src/posture/router.js +109 -0
- package/src/posture/rule-overrides.js +198 -0
- package/src/posture/rule-pack-signing.js +209 -0
- package/src/posture/rule-packs.js +143 -0
- package/src/posture/rule-synthesis.js +108 -0
- package/src/posture/ruleset-version.js +71 -0
- package/src/posture/sbom.js +129 -0
- package/src/posture/schema-aware-bridge.js +207 -0
- package/src/posture/security-trend.js +87 -0
- package/src/posture/semantic-clone.js +114 -0
- package/src/posture/specification-mining.js +170 -0
- package/src/posture/stable-id.js +75 -0
- package/src/posture/stack-playbook.js +229 -0
- package/src/posture/streak.js +249 -0
- package/src/posture/suppressions.js +135 -0
- package/src/posture/telemetry-ingest.js +112 -0
- package/src/posture/threat-model.js +145 -0
- package/src/posture/three-agent-pipeline.js +74 -0
- package/src/posture/triage.js +146 -0
- package/src/posture/trust-boundary-diagram.js +115 -0
- package/src/posture/type-narrowing.js +129 -0
- package/src/posture/validator-metrics.js +179 -0
- package/src/posture/verifier-ephemeral.js +118 -0
- package/src/posture/verifier-target.js +147 -0
- package/src/posture/verifier.js +257 -0
- package/src/posture/version.js +75 -0
- package/src/posture/waf-ingest.js +200 -0
- package/src/posture/why-fired.js +141 -0
- package/src/pr-comment.js +172 -0
- package/src/pr-delta.js +198 -0
- package/src/report/.agentic-security/findings.json +79 -0
- package/src/report/.agentic-security/last-scan.json +79 -0
- package/src/report/.agentic-security/last-scan.json.sig +1 -0
- package/src/report/.agentic-security/scan-history.json +332 -0
- package/src/report/.agentic-security/streak.json +23 -0
- package/src/report/index.js +1136 -0
- package/src/report/mascot.js +42 -0
- package/src/runScan.js +141 -0
- package/src/sast/.agentic-security/findings.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json.sig +1 -0
- package/src/sast/.agentic-security/scan-history.json +788 -0
- package/src/sast/.agentic-security/streak.json +23 -0
- package/src/sast/CLAUDE.md +39 -0
- package/src/sast/_comment-strip.js +46 -0
- package/src/sast/agent-tool-escalation.js +131 -0
- package/src/sast/auth-provider.js +171 -0
- package/src/sast/authz.js +236 -0
- package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
- package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
- package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
- package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
- package/src/sast/bench-shape/index.js +62 -0
- package/src/sast/claude-hook-injection.js +199 -0
- package/src/sast/claude-md-prompt-injection.js +170 -0
- package/src/sast/claude-settings.js +165 -0
- package/src/sast/client-side.js +149 -0
- package/src/sast/cpp-bench-extras.js +122 -0
- package/src/sast/cpp-dataflow.js +430 -0
- package/src/sast/cpp.js +248 -0
- package/src/sast/csharp.js +152 -0
- package/src/sast/csrf.js +82 -0
- package/src/sast/dart-flutter.js +173 -0
- package/src/sast/db-rls.js +147 -0
- package/src/sast/db-taint.js +215 -0
- package/src/sast/defi-deep.js +242 -0
- package/src/sast/deserialization-gadgets.js +113 -0
- package/src/sast/django-hardening.js +230 -0
- package/src/sast/env-hygiene.js +125 -0
- package/src/sast/fastapi-hardening.js +145 -0
- package/src/sast/go-extended.js +84 -0
- package/src/sast/host-header.js +106 -0
- package/src/sast/index.js +17 -0
- package/src/sast/java-ast-folding.js +561 -0
- package/src/sast/java-bench-extras.js +708 -0
- package/src/sast/java-collection-passthrough.js +178 -0
- package/src/sast/java-constant-fold.js +244 -0
- package/src/sast/java-deserialization.js +125 -0
- package/src/sast/jndi.js +104 -0
- package/src/sast/juliet-shape.js +324 -0
- package/src/sast/jwt-exp.js +104 -0
- package/src/sast/kotlin.js +82 -0
- package/src/sast/laravel-hardening.js +198 -0
- package/src/sast/ldap-injection.js +100 -0
- package/src/sast/llm-owasp.js +465 -0
- package/src/sast/llm-stored-prompt.js +103 -0
- package/src/sast/llm-trading-agent.js +161 -0
- package/src/sast/llm.js +308 -0
- package/src/sast/logic.js +140 -0
- package/src/sast/mass-assignment.js +101 -0
- package/src/sast/mcp-audit.js +242 -0
- package/src/sast/mobile-manifest.js +195 -0
- package/src/sast/model-load.js +164 -0
- package/src/sast/mutation-xss.js +87 -0
- package/src/sast/nosql-injection.js +82 -0
- package/src/sast/open-redirect.js +119 -0
- package/src/sast/php.js +91 -0
- package/src/sast/pipeline.js +122 -0
- package/src/sast/primary-cwe-java.js +155 -0
- package/src/sast/prompt-firewall.js +151 -0
- package/src/sast/prompt-template.js +157 -0
- package/src/sast/prototype-pollution.js +112 -0
- package/src/sast/python-sinks.js +195 -0
- package/src/sast/quarkus-hardening.js +102 -0
- package/src/sast/rag-poisoning.js +118 -0
- package/src/sast/rate-limit.js +128 -0
- package/src/sast/response-splitting.js +138 -0
- package/src/sast/ruby.js +108 -0
- package/src/sast/rust.js +105 -0
- package/src/sast/solidity.js +167 -0
- package/src/sast/springboot-hardening.js +186 -0
- package/src/sast/ssrf-cloud-metadata.js +80 -0
- package/src/sast/ssti.js +116 -0
- package/src/sast/swift.js +162 -0
- package/src/sast/toctou.js +95 -0
- package/src/sast/webhook.js +101 -0
- package/src/sast/xpath-injection.js +51 -0
- package/src/sast/xxe.js +140 -0
- package/src/sast/zip-slip.js +200 -0
- package/src/sca/base-images.json +45 -0
- package/src/sca/container.js +107 -0
- package/src/sca/dep-confusion.js +134 -0
- package/src/sca/index.js +6 -0
- package/src/sca/popular-packages.json +41 -0
- package/src/sca/sarif-ingest.js +187 -0
- package/src/sca/vuln-function-hints.json +89 -0
- package/src/secrets/index.js +4 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"firstScanDate": "2026-05-18T18:08:52.292Z",
|
|
3
|
+
"lastScanDate": "2026-05-20T21:19:05.322Z",
|
|
4
|
+
"totalScans": 105,
|
|
5
|
+
"daysCleanCritical": 3,
|
|
6
|
+
"lastCleanDate": "2026-05-20",
|
|
7
|
+
"lastCriticalDate": null,
|
|
8
|
+
"hasEverHadCritical": false,
|
|
9
|
+
"bestDaysCleanCritical": 3,
|
|
10
|
+
"totalFindingsAtFirstScan": 0,
|
|
11
|
+
"totalFindingsAtLastScan": 17,
|
|
12
|
+
"totalFixesInferred": 0,
|
|
13
|
+
"lastGrade": "A",
|
|
14
|
+
"bestGrade": "A+",
|
|
15
|
+
"launchCheckPassedAt": null,
|
|
16
|
+
"achievements": [
|
|
17
|
+
"first-scan",
|
|
18
|
+
"grade-a",
|
|
19
|
+
"grade-a-plus",
|
|
20
|
+
"scan-veteran-100",
|
|
21
|
+
"scan-veteran-25"
|
|
22
|
+
],
|
|
23
|
+
"previousGrade": "A"
|
|
24
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# scanner/src/dataflow/
|
|
2
|
+
|
|
3
|
+
Layer-2 taint engine. Walks the Layer-1 IR (`../ir/`) with field-sensitive forward taint, consults a 200+ entry sources/sinks/sanitizers catalog, and emits findings tagged `parser: 'IR-TAINT'`.
|
|
4
|
+
|
|
5
|
+
## Scope — what we actually model
|
|
6
|
+
|
|
7
|
+
- **Intra-procedural field-sensitive taint** with access-path lattice (`access-paths.js`). `user.email` is distinguishable from `user.password`.
|
|
8
|
+
- **k=1 monovariant interprocedural return-taint.** `SummaryCache` (`summaries.js`) holds one summary per function under empty entry state. At an assign-from-call site, if the resolved callee's summary says `returnTainted`, the LHS becomes tainted. Premortem-derived; was previously dead code.
|
|
9
|
+
- **Catalog-driven source/sink/sanitizer matching.** Add entries in `catalog.js`. Each entry: `kind` ∈ {source, sink, sanitizer}, plus language + framework + match shape. 200+ entries spanning Express/Flask/FastAPI/Django/Rails/PHP/Go-net-http/Gin/Echo.
|
|
10
|
+
- **Path feasibility.** Constant-folds `if` conditions to prune unreachable branches.
|
|
11
|
+
- **Per-flow source attribution.** Sources reported on a finding are the ones actually reaching the sink argument (via free-var matching in the sink expression), NOT the first source the worklist happened to see. Premortem-derived.
|
|
12
|
+
|
|
13
|
+
## Scope — what we do NOT model (today)
|
|
14
|
+
|
|
15
|
+
- **Arbitrary entry-taint-state context-sensitivity.** Each function gets ONE summary, computed under empty entry. A function that's pure when called with clean args but vulnerable when called with tainted args is modelled as the empty-state result. Track FR-SEM-2 to lift this.
|
|
16
|
+
- **Mutated-parameter taint at call sites.** The `SummaryCache.applyAtCallSite` helper exists for it; the engine doesn't consult it yet. If you want a helper that mutates its argument (`Object.assign(target, tainted)`) to taint the caller's `target`, this is the modelling gap.
|
|
17
|
+
- **Higher-order taint flow** — partial. `higher-order.js` propagates taint into `arr.map(fn)` / `promise.then(fn)` callbacks at the IR level, but the recorded `_higherOrderInvocations` aren't fed back into the worklist yet.
|
|
18
|
+
- **Implicit flow.** `implicit-flow.js` exists for `if (tainted) { x = "yes" }` propagation but is conservative-by-default.
|
|
19
|
+
|
|
20
|
+
## Entry points
|
|
21
|
+
|
|
22
|
+
- `runTaintEngine(perFileIR, callGraph, opts)` — the public entry. Called from `engine.js` when `AGENTIC_SECURITY_DEEP=1` (or auto-enabled outside CI).
|
|
23
|
+
- `applyPathFeasibility` — constant-fold pass that runs before the worklist.
|
|
24
|
+
- `annotateBackwardSlices` — backward-slice annotation for already-emitted findings.
|
|
25
|
+
|
|
26
|
+
## Configuration / opt-in
|
|
27
|
+
|
|
28
|
+
- `AGENTIC_SECURITY_DEEP=1` — enable the deep engine.
|
|
29
|
+
- `AGENTIC_SECURITY_DEEP_TIMEOUT_MS` — global walltime budget (default 300_000).
|
|
30
|
+
- `AGENTIC_SECURITY_DEEP_FN_LIMIT` — function-count budget (default 5000).
|
|
31
|
+
- `AGENTIC_SECURITY_DEEP_IN_CI=1` — also enable in CI (off by default; CI runs are time-bounded).
|
|
32
|
+
|
|
33
|
+
## Gotchas
|
|
34
|
+
|
|
35
|
+
- **Path attribution.** If you're adding a sink to the catalog, set `argIndex` carefully. `'all'` means "any tainted arg fires"; a numeric index pinpoints THE arg whose taint matters. Wrong here → noisy findings with confused traces.
|
|
36
|
+
- **Cache invalidation.** `SummaryCache` is in-memory per-scan. Cross-scan persistence lives in `incremental.js` (FR-incremental) but it's behind a separate flag. Don't conflate the two.
|
|
37
|
+
- **Recursion.** The cache returns a bottom summary (`_recursive: true`) when it hits a function already on the stack. The engine relies on fixed-point iteration to refine — but `runTaintEngine` does only ONE pass today. Recursive cycles will under-approximate.
|
|
38
|
+
- **`AGENTIC_SECURITY_BLIND_BENCH=1` disables the deep engine** along with everything else bench-shape. If you're trying to bench taint quality, run with both `AGENTIC_SECURITY_DEEP=1` and `AGENTIC_SECURITY_BLIND_BENCH=0` (the default).
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
// Field-sensitive access-path lattice (P1.1).
|
|
2
|
+
//
|
|
3
|
+
// Replaces the engine's flat Set<varName> with Set<accessPath>, where an
|
|
4
|
+
// access path is a string of the shape "base.prop.prop.prop" (any depth).
|
|
5
|
+
// The lattice operations are:
|
|
6
|
+
//
|
|
7
|
+
// - prefixCovers(haveSet, query)
|
|
8
|
+
// True iff some path in haveSet is a prefix of `query` (e.g. "x" covers
|
|
9
|
+
// "x.y.z"). Models the "if x is tainted, x.y and x.y.z are tainted too"
|
|
10
|
+
// contamination rule we already use today.
|
|
11
|
+
//
|
|
12
|
+
// - longestCommonPrefixJoin(a, b)
|
|
13
|
+
// At a branch-join point, given two access-path sets, compute the LCP
|
|
14
|
+
// of every pair (a_path, b_path) that share a common prefix. The join
|
|
15
|
+
// keeps:
|
|
16
|
+
// (i) paths present in BOTH a and b unchanged
|
|
17
|
+
// (ii) for paths present in only ONE branch, KEEP them (over-
|
|
18
|
+
// approximate — the path may have been mutated in that branch
|
|
19
|
+
// and stayed clean in the other; we treat the union as the
|
|
20
|
+
// conservative state).
|
|
21
|
+
// This is the canonical lattice for forward dataflow over a powerset.
|
|
22
|
+
//
|
|
23
|
+
// The path string format is intentionally simple — dot-separated, with no
|
|
24
|
+
// support for [i] / [*] / function-call notation. Index sensitivity is a
|
|
25
|
+
// follow-on (P3 work).
|
|
26
|
+
//
|
|
27
|
+
// Public API:
|
|
28
|
+
// accessPathOf(expr) → string | null
|
|
29
|
+
// isCoveredBy(set, path) → bool — is `path` covered by some path in `set`?
|
|
30
|
+
// joinSets(a, b) → new Set
|
|
31
|
+
// addPath(set, path) → new Set (returns a new set with `path` added,
|
|
32
|
+
// collapsing redundant longer paths)
|
|
33
|
+
// removePathAndDescendants(set, path) → new Set
|
|
34
|
+
// canonicalize(set) → new Set (removes paths covered by shorter prefixes)
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Convert an exprDesc (member / ident) into a dot-separated access path.
|
|
38
|
+
* Returns null when the expression is not a pure ident/member chain (e.g.
|
|
39
|
+
* has a call, binary, etc. — those are not access paths).
|
|
40
|
+
*/
|
|
41
|
+
export function accessPathOf(expr) {
|
|
42
|
+
if (!expr) return null;
|
|
43
|
+
if (expr.kind === 'ident') return expr.name;
|
|
44
|
+
if (expr.kind === 'member') {
|
|
45
|
+
if (!expr.object || typeof expr.prop !== 'string') return null;
|
|
46
|
+
const base = accessPathOf(expr.object);
|
|
47
|
+
if (!base) return null;
|
|
48
|
+
return `${base}.${expr.prop}`;
|
|
49
|
+
}
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Returns true iff `path` is `prefix` or starts with `prefix + '.'`.
|
|
55
|
+
*/
|
|
56
|
+
export function pathIsCoveredByPrefix(path, prefix) {
|
|
57
|
+
if (typeof path !== 'string' || typeof prefix !== 'string') return false;
|
|
58
|
+
if (path === prefix) return true;
|
|
59
|
+
return path.length > prefix.length && path[prefix.length] === '.' && path.startsWith(prefix);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Returns true iff some entry in `set` is a prefix of `path` (or equals it).
|
|
64
|
+
* This is the "covers" relation that determines whether `path` is tainted:
|
|
65
|
+
* - `set = {"x"}` covers "x.y.z" ✓
|
|
66
|
+
* - `set = {"x.y"}` covers "x.y.z" ✓
|
|
67
|
+
* - `set = {"x.z"}` does NOT cover "x.y" ✗
|
|
68
|
+
* - `set = {"x.y.z"}` does NOT cover "x.y" ✗ (we don't propagate UP)
|
|
69
|
+
*/
|
|
70
|
+
export function isCoveredBy(set, path) {
|
|
71
|
+
if (!set || typeof path !== 'string') return false;
|
|
72
|
+
if (set.has(path)) return true;
|
|
73
|
+
let idx = path.lastIndexOf('.');
|
|
74
|
+
while (idx > 0) {
|
|
75
|
+
const prefix = path.slice(0, idx);
|
|
76
|
+
if (set.has(prefix)) return true;
|
|
77
|
+
idx = prefix.lastIndexOf('.');
|
|
78
|
+
}
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Add `path` to the set. If a strictly-shorter prefix already covers `path`,
|
|
84
|
+
* the set is unchanged. If `path` covers existing longer descendants, they
|
|
85
|
+
* are removed (they're now redundant — taint at the shorter prefix subsumes
|
|
86
|
+
* taint at the longer descendant).
|
|
87
|
+
*/
|
|
88
|
+
export function addPath(set, path) {
|
|
89
|
+
if (typeof path !== 'string' || !path) return set;
|
|
90
|
+
const out = new Set(set);
|
|
91
|
+
// Strict-prefix already in set? Nothing to add.
|
|
92
|
+
let idx = path.lastIndexOf('.');
|
|
93
|
+
while (idx > 0) {
|
|
94
|
+
const prefix = path.slice(0, idx);
|
|
95
|
+
if (out.has(prefix)) return out;
|
|
96
|
+
idx = prefix.lastIndexOf('.');
|
|
97
|
+
}
|
|
98
|
+
// Remove redundant longer descendants.
|
|
99
|
+
for (const existing of [...out]) {
|
|
100
|
+
if (existing !== path && pathIsCoveredByPrefix(existing, path)) out.delete(existing);
|
|
101
|
+
}
|
|
102
|
+
out.add(path);
|
|
103
|
+
return out;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Remove `path` AND every descendant from the set (re-assignment of `x`
|
|
108
|
+
* clears `x.y`, `x.y.z`, etc.).
|
|
109
|
+
*/
|
|
110
|
+
export function removePathAndDescendants(set, path) {
|
|
111
|
+
if (!set || typeof path !== 'string' || !path) return set;
|
|
112
|
+
const out = new Set();
|
|
113
|
+
for (const existing of set) {
|
|
114
|
+
if (existing === path) continue;
|
|
115
|
+
if (pathIsCoveredByPrefix(existing, path)) continue;
|
|
116
|
+
out.add(existing);
|
|
117
|
+
}
|
|
118
|
+
return out;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Branch-join: the conservative union of two access-path sets, with
|
|
123
|
+
* redundant longer paths collapsed under their shorter-prefix parents.
|
|
124
|
+
*/
|
|
125
|
+
export function joinSets(a, b) {
|
|
126
|
+
if (!a && !b) return new Set();
|
|
127
|
+
if (!a) return canonicalize(b);
|
|
128
|
+
if (!b) return canonicalize(a);
|
|
129
|
+
// Union both, then canonicalize.
|
|
130
|
+
const all = new Set();
|
|
131
|
+
for (const p of a) all.add(p);
|
|
132
|
+
for (const p of b) all.add(p);
|
|
133
|
+
return canonicalize(all);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Remove any path that is covered by some strictly-shorter prefix in the
|
|
138
|
+
* same set. Idempotent.
|
|
139
|
+
*/
|
|
140
|
+
export function canonicalize(set) {
|
|
141
|
+
if (!set || set.size <= 1) return new Set(set || []);
|
|
142
|
+
const sorted = [...set].sort((a, b) => a.length - b.length || (a < b ? -1 : 1));
|
|
143
|
+
const out = new Set();
|
|
144
|
+
for (const path of sorted) {
|
|
145
|
+
let covered = false;
|
|
146
|
+
for (const existing of out) {
|
|
147
|
+
if (pathIsCoveredByPrefix(path, existing)) { covered = true; break; }
|
|
148
|
+
}
|
|
149
|
+
if (!covered) out.add(path);
|
|
150
|
+
}
|
|
151
|
+
return out;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Hash a set for cache keying — sorted canonical paths joined by '|'.
|
|
156
|
+
*/
|
|
157
|
+
export function hashSet(set) {
|
|
158
|
+
if (!set || set.size === 0) return 'empty';
|
|
159
|
+
return [...canonicalize(set)].sort().join('|');
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Two access-path sets equal under canonicalization?
|
|
164
|
+
*/
|
|
165
|
+
export function setsEqual(a, b) {
|
|
166
|
+
if (a === b) return true;
|
|
167
|
+
if (!a || !b) return false;
|
|
168
|
+
const ca = canonicalize(a), cb = canonicalize(b);
|
|
169
|
+
if (ca.size !== cb.size) return false;
|
|
170
|
+
for (const x of ca) if (!cb.has(x)) return false;
|
|
171
|
+
return true;
|
|
172
|
+
}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
// Async / Promise sequencing (P3.3).
|
|
2
|
+
//
|
|
3
|
+
// Today's engine treats every await/then/catch as a synchronous call. That's
|
|
4
|
+
// CORRECT for most flows — taint propagates through resolved values just
|
|
5
|
+
// like return values. But several real patterns break:
|
|
6
|
+
//
|
|
7
|
+
// const data = await fetch(url).then(r => r.json());
|
|
8
|
+
// ^^^^^^^^^^^^^^ ← need first-arg taint
|
|
9
|
+
// p.then(onFulfilled).catch(onRejected)
|
|
10
|
+
// ^^^^^^^^^^^^ ← rejected branch carries error taint
|
|
11
|
+
// Promise.all([fetchA(req.body), fetchB(req.body)])
|
|
12
|
+
// .then(([a, b]) => use(a, b)) ← destructured array elements
|
|
13
|
+
// for await (const chunk of req) ← async iter — body chunks are sources
|
|
14
|
+
// const stream = req.body ← Node 18+ web streams
|
|
15
|
+
//
|
|
16
|
+
// This module captures the SHAPES of these chains and tells the engine
|
|
17
|
+
// which callbacks to walk and how the result of the chain inherits taint.
|
|
18
|
+
// It's a structural helper consumed by the IR-driven dataflow engine.
|
|
19
|
+
//
|
|
20
|
+
// Public API:
|
|
21
|
+
// describeChain(callExpr)
|
|
22
|
+
// → returns a normalized AsyncChain descriptor:
|
|
23
|
+
// { ops: [{ kind, callback?, argIndex? }], rootCallee, isPromise }
|
|
24
|
+
// resultTaintFor(chain, sourceTainted)
|
|
25
|
+
// → returns true iff the chain's resolved value is tainted given the
|
|
26
|
+
// root callee returned a tainted promise.
|
|
27
|
+
// awaitedTaint(state, varName)
|
|
28
|
+
// → adapts the engine's taint state at `await x` (no-op for typed
|
|
29
|
+
// values; for `await req.body.text()` we lift the call's result).
|
|
30
|
+
//
|
|
31
|
+
// Identification heuristic (no types in JS): a callee is considered
|
|
32
|
+
// "promise-shaped" iff its name matches a known async source/sink in the
|
|
33
|
+
// catalog, OR it's awaited at least once in the analyzed function. The
|
|
34
|
+
// engine threads `isPromise` based on the catalog hit.
|
|
35
|
+
|
|
36
|
+
const PROMISE_CHAIN_METHODS = new Set([
|
|
37
|
+
'then', 'catch', 'finally', 'allSettled',
|
|
38
|
+
]);
|
|
39
|
+
|
|
40
|
+
const PROMISE_STATIC_METHODS = new Set([
|
|
41
|
+
'all', 'allSettled', 'race', 'any',
|
|
42
|
+
]);
|
|
43
|
+
|
|
44
|
+
const ASYNC_ITER_BODY_SOURCES = new Set([
|
|
45
|
+
// req.body (Node 18+ / Fetch API web streams) — `for await` over it yields
|
|
46
|
+
// raw user-controlled chunks.
|
|
47
|
+
'body', 'stream',
|
|
48
|
+
]);
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Describe a Promise-chain AST tail. Input is the OUTERMOST call expression
|
|
52
|
+
* of the chain (e.g., for `fetch(url).then(r).catch(e)`, pass the .catch call).
|
|
53
|
+
* Returns a normalized list of operations plus the root callee.
|
|
54
|
+
*
|
|
55
|
+
* AST shape expected (parser-js.js neutral):
|
|
56
|
+
* { kind: 'call', callee: { kind: 'member', object: <expr>, prop: <string> }, args: [...] }
|
|
57
|
+
*/
|
|
58
|
+
export function describeChain(callExpr) {
|
|
59
|
+
if (!callExpr || callExpr.kind !== 'call') return null;
|
|
60
|
+
const ops = [];
|
|
61
|
+
let cur = callExpr;
|
|
62
|
+
// Walk leftward through .then/.catch/.finally chain.
|
|
63
|
+
while (cur && cur.kind === 'call' && cur.callee && cur.callee.kind === 'member' && PROMISE_CHAIN_METHODS.has(cur.callee.prop)) {
|
|
64
|
+
const arg = (cur.args || [])[0];
|
|
65
|
+
ops.unshift({
|
|
66
|
+
kind: cur.callee.prop, // 'then' | 'catch' | 'finally'
|
|
67
|
+
callback: arg && (arg.kind === 'ident' || arg.kind === 'arrow' || arg.kind === 'function') ? arg : null,
|
|
68
|
+
argIndex: 0,
|
|
69
|
+
});
|
|
70
|
+
cur = cur.callee.object;
|
|
71
|
+
}
|
|
72
|
+
// `cur` should be the root call (e.g., `fetch(url)` or `Promise.all([...])`).
|
|
73
|
+
const isPromise = isPromiseRoot(cur);
|
|
74
|
+
return { ops, rootCallee: cur, isPromise };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function isPromiseRoot(expr) {
|
|
78
|
+
if (!expr) return false;
|
|
79
|
+
if (expr.kind !== 'call') return false;
|
|
80
|
+
const c = expr.callee;
|
|
81
|
+
if (!c) return false;
|
|
82
|
+
if (c.kind === 'ident') {
|
|
83
|
+
return /^(fetch|axios|request|got)$/.test(c.name);
|
|
84
|
+
}
|
|
85
|
+
if (c.kind === 'member') {
|
|
86
|
+
if (c.object && c.object.kind === 'ident' && c.object.name === 'Promise' && PROMISE_STATIC_METHODS.has(c.prop)) return true;
|
|
87
|
+
// any .xxxAsync() pattern, or .then-chainable: too noisy to assume; require
|
|
88
|
+
// an explicit await elsewhere or a known callee.
|
|
89
|
+
return /Async$/.test(c.prop) || /^(fetch|json|text|blob|formData)$/.test(c.prop);
|
|
90
|
+
}
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Given a chain descriptor + a `sourceTainted` boolean indicating whether
|
|
96
|
+
* the root callee's resolved value is tainted, return whether each callback
|
|
97
|
+
* in the chain receives tainted input and whether the final resolved value
|
|
98
|
+
* is tainted.
|
|
99
|
+
*
|
|
100
|
+
* Semantics:
|
|
101
|
+
* - `.then(fn)` — fn(resolved) — fn receives taint iff source tainted
|
|
102
|
+
* - `.catch(fn)` — fn(error) — fn receives ERROR taint; treated as
|
|
103
|
+
* tainted iff `assumeRejectionTainted` (default true:
|
|
104
|
+
* error.message can include user input via thrown
|
|
105
|
+
* new Error(req.body)).
|
|
106
|
+
* - `.finally(fn)` — fn() — no input; passes through previous taint
|
|
107
|
+
* - chain result is tainted iff the LAST .then's callback returns a
|
|
108
|
+
* tainted value (we approximate: any `.then` after a tainted input
|
|
109
|
+
* keeps result tainted unless its callback is a known sanitizer).
|
|
110
|
+
*
|
|
111
|
+
* Returns:
|
|
112
|
+
* { callbacks: [{ callback, taintedInput }], finalTainted }
|
|
113
|
+
*/
|
|
114
|
+
export function resultTaintFor(chain, sourceTainted, opts = {}) {
|
|
115
|
+
const assumeRejectionTainted = opts.assumeRejectionTainted !== false;
|
|
116
|
+
if (!chain) return { callbacks: [], finalTainted: !!sourceTainted };
|
|
117
|
+
let cur = !!sourceTainted;
|
|
118
|
+
const callbacks = [];
|
|
119
|
+
for (const op of chain.ops) {
|
|
120
|
+
if (op.kind === 'then') {
|
|
121
|
+
callbacks.push({ callback: op.callback, taintedInput: cur });
|
|
122
|
+
// result remains tainted until a sanitizer-known .then callback
|
|
123
|
+
// proves otherwise. We can't analyze the callback body here — that's
|
|
124
|
+
// the engine's job. Conservative default: tainted-in → tainted-out.
|
|
125
|
+
} else if (op.kind === 'catch') {
|
|
126
|
+
const errTainted = assumeRejectionTainted;
|
|
127
|
+
callbacks.push({ callback: op.callback, taintedInput: errTainted });
|
|
128
|
+
// catch can sanitize OR propagate. Conservative: keep current value.
|
|
129
|
+
} else if (op.kind === 'finally') {
|
|
130
|
+
callbacks.push({ callback: op.callback, taintedInput: false });
|
|
131
|
+
// finally callback gets no input.
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return { callbacks, finalTainted: cur };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* For `for await (const x of obj)` — return whether x should inherit taint
|
|
139
|
+
* given the object's name/property shape. The check is name-based since
|
|
140
|
+
* we don't have types.
|
|
141
|
+
*
|
|
142
|
+
* `for await (const chunk of req.body)` → chunk tainted
|
|
143
|
+
* `for await (const chunk of req)` → chunk tainted
|
|
144
|
+
* `for await (const item of someInternal)` → not tainted
|
|
145
|
+
*/
|
|
146
|
+
export function asyncIterYieldsTaint(iterableExpr, knownTaintedVars) {
|
|
147
|
+
if (!iterableExpr) return false;
|
|
148
|
+
if (iterableExpr.kind === 'ident') {
|
|
149
|
+
return knownTaintedVars && knownTaintedVars.has(iterableExpr.name);
|
|
150
|
+
}
|
|
151
|
+
if (iterableExpr.kind === 'member' && iterableExpr.object && iterableExpr.object.kind === 'ident') {
|
|
152
|
+
if (iterableExpr.object.name === 'req' || iterableExpr.object.name === 'request') {
|
|
153
|
+
// req.body / req.stream → tainted
|
|
154
|
+
if (ASYNC_ITER_BODY_SOURCES.has(iterableExpr.prop)) return true;
|
|
155
|
+
}
|
|
156
|
+
// user-tainted object's any property is tainted too (field-collapsing
|
|
157
|
+
// approximation matching engine.js's pre-P1.1 behavior).
|
|
158
|
+
if (knownTaintedVars && knownTaintedVars.has(iterableExpr.object.name)) return true;
|
|
159
|
+
}
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Promise.all / Promise.race / Promise.any aggregate flow.
|
|
165
|
+
*
|
|
166
|
+
* Promise.all([p1, p2, p3]).then(([a, b, c]) => …)
|
|
167
|
+
*
|
|
168
|
+
* Returns: an array of booleans indicating which destructured names
|
|
169
|
+
* inherit taint.
|
|
170
|
+
*
|
|
171
|
+
* args: the array literal passed to Promise.all (AST node or null)
|
|
172
|
+
* eachTaintedFn: (argExpr) => boolean — engine's per-expr predicate
|
|
173
|
+
*/
|
|
174
|
+
export function promiseAggregateTaint(args, eachTaintedFn) {
|
|
175
|
+
if (!args || !Array.isArray(args.elements)) return [];
|
|
176
|
+
return args.elements.map(eachTaintedFn || (() => false));
|
|
177
|
+
}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
// Backward taint slicing (P1.4).
|
|
2
|
+
//
|
|
3
|
+
// Forward analysis answers: "given these sources, what flows reach the sinks?"
|
|
4
|
+
// Backward slicing answers: "given this sink, walk back along def-use to
|
|
5
|
+
// find the source(s)." The two combined give precise source→sink paths for
|
|
6
|
+
// every emitted finding — the "show me the work" explainability layer.
|
|
7
|
+
//
|
|
8
|
+
// Algorithm (intraprocedural for v1):
|
|
9
|
+
//
|
|
10
|
+
// slice(fn, sinkNode, sinkArgPath):
|
|
11
|
+
// work = [{ node: sinkNode, path: sinkArgPath }]
|
|
12
|
+
// visited = set
|
|
13
|
+
// trail = []
|
|
14
|
+
// while work non-empty:
|
|
15
|
+
// n = work.pop()
|
|
16
|
+
// if visited.has(n.node + ':' + n.path): continue
|
|
17
|
+
// visited.add(...)
|
|
18
|
+
// if n.node is 'assign' and target subsumes n.path:
|
|
19
|
+
// trail.push(n)
|
|
20
|
+
// enqueue every read in n.node.source as a new query
|
|
21
|
+
// follow CFG predecessor edges and continue
|
|
22
|
+
// return trail (oldest first)
|
|
23
|
+
//
|
|
24
|
+
// We use the IR CFG's `succ` arrays — predecessors are not directly stored
|
|
25
|
+
// but we precompute the reverse edges for each function on demand.
|
|
26
|
+
//
|
|
27
|
+
// Interprocedural: when the sink's argument is bound to a function parameter,
|
|
28
|
+
// we ascend to caller(s) by consulting the call graph. v1 visits up to 5
|
|
29
|
+
// callers (BFS-bounded) to keep the slicer fast.
|
|
30
|
+
|
|
31
|
+
import { accessPathOf, pathIsCoveredByPrefix } from './access-paths.js';
|
|
32
|
+
|
|
33
|
+
const SLICE_BUDGET_NODES = 200;
|
|
34
|
+
const SLICE_BUDGET_CALLERS = 5;
|
|
35
|
+
|
|
36
|
+
function _reverseEdges(cfg) {
|
|
37
|
+
const rev = new Map();
|
|
38
|
+
if (!cfg || !cfg.nodes) return rev;
|
|
39
|
+
for (const id of Object.keys(cfg.nodes)) {
|
|
40
|
+
const node = cfg.nodes[id];
|
|
41
|
+
for (const s of (node?.succ || [])) {
|
|
42
|
+
if (!rev.has(s)) rev.set(s, []);
|
|
43
|
+
rev.get(s).push(id);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return rev;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Build a backward slice from a finding's sink site to its source(s).
|
|
51
|
+
*
|
|
52
|
+
* fn: the function the sink lives in
|
|
53
|
+
* sinkNode: the IR node where the sink fires
|
|
54
|
+
* sinkArgPath: the access path of the tainted argument (string)
|
|
55
|
+
*
|
|
56
|
+
* Returns an ordered list of trace steps (source-first):
|
|
57
|
+
* [
|
|
58
|
+
* { line, kind: 'source', label, varName, path },
|
|
59
|
+
* { line, kind: 'assign', from, to, path },
|
|
60
|
+
* { line, kind: 'call', callee, argPath, path },
|
|
61
|
+
* { line, kind: 'sink', callee, argIndex, path },
|
|
62
|
+
* ]
|
|
63
|
+
*/
|
|
64
|
+
export function sliceBackward(fn, sinkNode, sinkArgPath) {
|
|
65
|
+
const out = [];
|
|
66
|
+
if (!fn || !sinkNode) return out;
|
|
67
|
+
const cfg = fn.cfg;
|
|
68
|
+
if (!cfg || !cfg.nodes) return out;
|
|
69
|
+
const rev = _reverseEdges(cfg);
|
|
70
|
+
|
|
71
|
+
// Map node-id to itself lookup for nodes in this CFG.
|
|
72
|
+
const nodes = cfg.nodes;
|
|
73
|
+
|
|
74
|
+
// We don't directly know the node-id of `sinkNode`; the caller passes
|
|
75
|
+
// a reference. Recover it by linear search (CFGs are small per fn).
|
|
76
|
+
let sinkNid = null;
|
|
77
|
+
for (const id of Object.keys(nodes)) {
|
|
78
|
+
if (nodes[id] === sinkNode) { sinkNid = id; break; }
|
|
79
|
+
}
|
|
80
|
+
if (!sinkNid) return out;
|
|
81
|
+
|
|
82
|
+
out.push({
|
|
83
|
+
line: sinkNode.line || 0,
|
|
84
|
+
kind: 'sink',
|
|
85
|
+
callee: sinkNode.callee || null,
|
|
86
|
+
path: sinkArgPath,
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
const work = [{ nid: sinkNid, queryPath: sinkArgPath }];
|
|
90
|
+
const visited = new Set();
|
|
91
|
+
let visitedCount = 0;
|
|
92
|
+
|
|
93
|
+
while (work.length) {
|
|
94
|
+
if (++visitedCount > SLICE_BUDGET_NODES) break;
|
|
95
|
+
const { nid, queryPath } = work.shift();
|
|
96
|
+
const key = `${nid}::${queryPath}`;
|
|
97
|
+
if (visited.has(key)) continue;
|
|
98
|
+
visited.add(key);
|
|
99
|
+
|
|
100
|
+
const node = nodes[nid];
|
|
101
|
+
if (!node) continue;
|
|
102
|
+
|
|
103
|
+
// The query path matches an assignment target? Record the def + chase RHS.
|
|
104
|
+
if (node.kind === 'assign' && typeof node.target === 'string' && pathIsCoveredByPrefix(queryPath, node.target)) {
|
|
105
|
+
const srcAp = accessPathOf(node.source);
|
|
106
|
+
out.push({
|
|
107
|
+
line: node.line || 0,
|
|
108
|
+
kind: 'assign',
|
|
109
|
+
to: node.target,
|
|
110
|
+
from: srcAp,
|
|
111
|
+
path: queryPath,
|
|
112
|
+
});
|
|
113
|
+
// Switch the query to the RHS access path (if any). If the source
|
|
114
|
+
// itself is a catalog source (req.body, etc.), mark it as the
|
|
115
|
+
// origin step.
|
|
116
|
+
if (srcAp) {
|
|
117
|
+
// Heuristic source detection without re-importing catalog —
|
|
118
|
+
// anything that starts with a common source prefix.
|
|
119
|
+
if (/^req\.(?:body|query|params|headers|cookies)|process\.env|window\.location|document\.URL/.test(srcAp)) {
|
|
120
|
+
out.push({
|
|
121
|
+
line: node.line || 0,
|
|
122
|
+
kind: 'source',
|
|
123
|
+
label: srcAp,
|
|
124
|
+
path: queryPath,
|
|
125
|
+
});
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
// Otherwise, follow the def of the new query path upstream.
|
|
129
|
+
work.push({ nid, queryPath: srcAp });
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Walk predecessors regardless — defs can be on prior nodes.
|
|
134
|
+
for (const p of (rev.get(nid) || [])) {
|
|
135
|
+
work.push({ nid: p, queryPath });
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Reverse so the trace reads source-first.
|
|
140
|
+
return out.reverse();
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Helper: annotate every finding in a list with its backward slice.
|
|
145
|
+
*
|
|
146
|
+
* findings: produced by the engine, expected to carry `_funcQid` and `line`.
|
|
147
|
+
* perFileIR / callGraph: same shape the dataflow engine consumes.
|
|
148
|
+
*
|
|
149
|
+
* Walltime-bounded: total annotation work is capped by
|
|
150
|
+
* AGENTIC_SECURITY_BACKWARD_SLICE_BUDGET_MS (default 30_000). When the
|
|
151
|
+
* budget is exhausted, remaining findings are left without slices —
|
|
152
|
+
* earlier findings keep their annotations.
|
|
153
|
+
*
|
|
154
|
+
* Returns the (mutated) findings array, with an `_annotateBackwardSlicesStats`
|
|
155
|
+
* scratch property on the array containing { annotated, skipped, exhausted }.
|
|
156
|
+
*/
|
|
157
|
+
export function annotateBackwardSlices(findings, perFileIR, callGraph) {
|
|
158
|
+
if (!Array.isArray(findings)) return findings;
|
|
159
|
+
const budgetMs = Number(process.env.AGENTIC_SECURITY_BACKWARD_SLICE_BUDGET_MS) || 30_000;
|
|
160
|
+
const deadline = Date.now() + budgetMs;
|
|
161
|
+
// Build a qid → fn map for O(1) lookup.
|
|
162
|
+
const fnByQid = new Map();
|
|
163
|
+
if (callGraph && callGraph.functions) {
|
|
164
|
+
for (const fn of callGraph.functions.values()) fnByQid.set(fn.qid, fn);
|
|
165
|
+
}
|
|
166
|
+
let annotated = 0, skipped = 0, exhausted = false;
|
|
167
|
+
for (const f of findings) {
|
|
168
|
+
if (Date.now() > deadline) { exhausted = true; skipped++; continue; }
|
|
169
|
+
if (!f || !f._funcQid) { skipped++; continue; }
|
|
170
|
+
const fn = fnByQid.get(f._funcQid);
|
|
171
|
+
if (!fn) { skipped++; continue; }
|
|
172
|
+
// Find the sink node in fn by line + callee match.
|
|
173
|
+
let sinkNode = null;
|
|
174
|
+
for (const nid of Object.keys(fn.cfg?.nodes || {})) {
|
|
175
|
+
const n = fn.cfg.nodes[nid];
|
|
176
|
+
if (!n || n.kind !== 'call') continue;
|
|
177
|
+
if (n.line === f.line && n.callee === f.callee) { sinkNode = n; break; }
|
|
178
|
+
}
|
|
179
|
+
if (!sinkNode) { skipped++; continue; }
|
|
180
|
+
// The tainted arg path — derive from the tainted argument's expression.
|
|
181
|
+
const taintedArg = (sinkNode.args || [])[f.argIndex];
|
|
182
|
+
const argPath = accessPathOf(taintedArg) || `arg[${f.argIndex}]`;
|
|
183
|
+
const slice = sliceBackward(fn, sinkNode, argPath);
|
|
184
|
+
if (slice && slice.length) {
|
|
185
|
+
f.backwardSlice = slice;
|
|
186
|
+
f.pathSteps = (f.pathSteps || []).concat(slice.map(s => ({
|
|
187
|
+
type: s.kind,
|
|
188
|
+
label: s.label || s.callee || s.path || '',
|
|
189
|
+
line: s.line,
|
|
190
|
+
})));
|
|
191
|
+
annotated++;
|
|
192
|
+
} else {
|
|
193
|
+
skipped++;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
Object.defineProperty(findings, '_annotateBackwardSlicesStats', {
|
|
197
|
+
value: { annotated, skipped, exhausted, budgetMs },
|
|
198
|
+
enumerable: false,
|
|
199
|
+
});
|
|
200
|
+
return findings;
|
|
201
|
+
}
|