@clear-capabilities/agentic-security-scanner 0.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1580 -0
- package/bin/.agentic-security/findings.json +1577 -0
- package/bin/.agentic-security/last-scan.json +1577 -0
- package/bin/.agentic-security/last-scan.json.sig +1 -0
- package/bin/.agentic-security/scan-history.json +465 -0
- package/bin/.agentic-security/streak.json +25 -0
- package/bin/agentic-security-audit.js +198 -0
- package/bin/agentic-security-consistency.js +80 -0
- package/bin/agentic-security-diff.js +136 -0
- package/bin/agentic-security-lsp.js +12 -0
- package/bin/agentic-security-mcp.js +40 -0
- package/bin/agentic-security-rule.js +153 -0
- package/bin/agentic-security.js +1683 -0
- package/dist/117.index.js +207 -0
- package/dist/178.index.js +250 -0
- package/dist/218.index.js +793 -0
- package/dist/227.index.js +192 -0
- package/dist/301.index.js +167 -0
- package/dist/384.index.js +18 -0
- package/dist/476.index.js +126 -0
- package/dist/513.index.js +373 -0
- package/dist/520.index.js +13 -0
- package/dist/601.index.js +1038 -0
- package/dist/634.index.js +1892 -0
- package/dist/637.index.js +216 -0
- package/dist/660.index.js +131 -0
- package/dist/675.index.js +451 -0
- package/dist/826.index.js +188 -0
- package/dist/830.index.js +133 -0
- package/dist/agentic-security.mjs +272 -0
- package/dist/agentic-security.mjs.sha256 +1 -0
- package/dist/calibration-seed.json +27 -0
- package/package.json +77 -0
- package/src/.agentic-security/findings.json +80844 -0
- package/src/.agentic-security/last-scan.json +80844 -0
- package/src/.agentic-security/last-scan.json.sig +1 -0
- package/src/.agentic-security/scan-history.json +8408 -0
- package/src/.agentic-security/streak.json +26 -0
- package/src/badge.js +188 -0
- package/src/compare.js +203 -0
- package/src/dataflow/.agentic-security/findings.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json +3487 -0
- package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
- package/src/dataflow/.agentic-security/scan-history.json +735 -0
- package/src/dataflow/.agentic-security/streak.json +24 -0
- package/src/dataflow/CLAUDE.md +38 -0
- package/src/dataflow/access-paths.js +172 -0
- package/src/dataflow/async-sequencing.js +177 -0
- package/src/dataflow/backward.js +201 -0
- package/src/dataflow/catalog-expanded.js +485 -0
- package/src/dataflow/catalog.js +659 -0
- package/src/dataflow/cross-repo.js +219 -0
- package/src/dataflow/engine.js +588 -0
- package/src/dataflow/exception-flow.js +116 -0
- package/src/dataflow/exploit-prover.js +187 -0
- package/src/dataflow/higher-order.js +221 -0
- package/src/dataflow/ifds.js +347 -0
- package/src/dataflow/implicit-flow.js +129 -0
- package/src/dataflow/incremental.js +229 -0
- package/src/dataflow/index.js +181 -0
- package/src/dataflow/numeric-domain.js +192 -0
- package/src/dataflow/path-feasibility.js +114 -0
- package/src/dataflow/points-to.js +337 -0
- package/src/dataflow/polyglot.js +190 -0
- package/src/dataflow/proven-clean.js +159 -0
- package/src/dataflow/receiver-context.js +76 -0
- package/src/dataflow/sanitizer-proof.js +154 -0
- package/src/dataflow/soft-taint.js +140 -0
- package/src/dataflow/string-domain.js +234 -0
- package/src/dataflow/stub-aware-filter.js +100 -0
- package/src/dataflow/summaries.js +132 -0
- package/src/dataflow/symbolic-exec.js +238 -0
- package/src/dataflow/tabulation.js +135 -0
- package/src/engine.js +7763 -0
- package/src/history-scan.js +229 -0
- package/src/index.js +3 -0
- package/src/integrations/.agentic-security/findings.json +1504 -0
- package/src/integrations/.agentic-security/last-scan.json +1504 -0
- package/src/integrations/.agentic-security/scan-history.json +40 -0
- package/src/integrations/.agentic-security/streak.json +21 -0
- package/src/integrations/index.js +321 -0
- package/src/integrations/tickets.js +200 -0
- package/src/ir/.agentic-security/findings.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json +3036 -0
- package/src/ir/.agentic-security/last-scan.json.sig +1 -0
- package/src/ir/.agentic-security/scan-history.json +364 -0
- package/src/ir/.agentic-security/streak.json +23 -0
- package/src/ir/CLAUDE.md +172 -0
- package/src/ir/callgraph.js +73 -0
- package/src/ir/class-hierarchy.js +195 -0
- package/src/ir/index.js +152 -0
- package/src/ir/parser-cs.js +260 -0
- package/src/ir/parser-java.js +286 -0
- package/src/ir/parser-js.js +413 -0
- package/src/ir/parser-kt.js +258 -0
- package/src/ir/parser-py-cst.js +136 -0
- package/src/ir/parser-py.helper.py +501 -0
- package/src/ir/parser-py.js +312 -0
- package/src/ir/ssa.js +315 -0
- package/src/ir/type-stubs.js +288 -0
- package/src/leaderboard.js +152 -0
- package/src/llm-validator/.agentic-security/findings.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
- package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
- package/src/llm-validator/.agentic-security/scan-history.json +168 -0
- package/src/llm-validator/.agentic-security/streak.json +20 -0
- package/src/llm-validator/consistency.js +141 -0
- package/src/llm-validator/index.js +437 -0
- package/src/lsp/.agentic-security/findings.json +28 -0
- package/src/lsp/.agentic-security/last-scan.json +28 -0
- package/src/lsp/.agentic-security/scan-history.json +79 -0
- package/src/lsp/.agentic-security/streak.json +22 -0
- package/src/lsp/server.js +275 -0
- package/src/mcp/.agentic-security/findings.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json +8358 -0
- package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
- package/src/mcp/.agentic-security/scan-history.json +1125 -0
- package/src/mcp/.agentic-security/streak.json +22 -0
- package/src/mcp/CLAUDE.md +54 -0
- package/src/mcp/audit.js +136 -0
- package/src/mcp/redact.js +75 -0
- package/src/mcp/server.js +158 -0
- package/src/mcp/stdio.js +83 -0
- package/src/mcp/tools.js +940 -0
- package/src/mcp/validate.js +49 -0
- package/src/personality.js +164 -0
- package/src/poc-video.js +239 -0
- package/src/posture/.agentic-security/findings.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json +51239 -0
- package/src/posture/.agentic-security/last-scan.json.sig +1 -0
- package/src/posture/.agentic-security/scan-history.json +5557 -0
- package/src/posture/.agentic-security/streak.json +24 -0
- package/src/posture/CLAUDE.md +42 -0
- package/src/posture/adversarial-self-test.js +114 -0
- package/src/posture/adversary-agent.js +204 -0
- package/src/posture/agents-memory.js +135 -0
- package/src/posture/ai-code-fingerprint.js +171 -0
- package/src/posture/aibom.js +284 -0
- package/src/posture/api-inventory.js +96 -0
- package/src/posture/attack-playbooks.js +305 -0
- package/src/posture/auditor-agent.js +115 -0
- package/src/posture/auth-posture-import.js +135 -0
- package/src/posture/baseline-compare.js +114 -0
- package/src/posture/blast-radius.js +836 -0
- package/src/posture/bounty-prediction.js +141 -0
- package/src/posture/business-logic.js +239 -0
- package/src/posture/calibration-drift.js +93 -0
- package/src/posture/calibration-seed.json +27 -0
- package/src/posture/calibration.js +204 -0
- package/src/posture/clustering.js +75 -0
- package/src/posture/concurrency-checker.js +265 -0
- package/src/posture/confidence.js +65 -0
- package/src/posture/container-runtime.js +149 -0
- package/src/posture/counterfactual.js +109 -0
- package/src/posture/cross-lang-graphql.js +165 -0
- package/src/posture/cross-lang-grpc.js +166 -0
- package/src/posture/cross-lang-meta.js +101 -0
- package/src/posture/cross-lang-openapi.js +187 -0
- package/src/posture/cross-lang-orm.js +153 -0
- package/src/posture/cross-lang-queues.js +210 -0
- package/src/posture/crown-jewels.js +110 -0
- package/src/posture/custom-rules.js +361 -0
- package/src/posture/cve-alert-daemon.js +433 -0
- package/src/posture/cve-lookup.js +129 -0
- package/src/posture/dead-code.js +430 -0
- package/src/posture/defender-agent.js +158 -0
- package/src/posture/deploy-platform.js +204 -0
- package/src/posture/detector-fuzz.js +61 -0
- package/src/posture/deterministic.js +99 -0
- package/src/posture/drift.js +165 -0
- package/src/posture/epss.js +156 -0
- package/src/posture/exploitability-probability.js +212 -0
- package/src/posture/exploitability.js +121 -0
- package/src/posture/feature-flags.js +110 -0
- package/src/posture/finding-defaults.js +132 -0
- package/src/posture/fix-history.js +411 -0
- package/src/posture/fix-plan.js +121 -0
- package/src/posture/fix-verify-loop.js +157 -0
- package/src/posture/fix-verify.js +130 -0
- package/src/posture/flow-narration.js +105 -0
- package/src/posture/grader-calibration.js +156 -0
- package/src/posture/harness-discovery.js +113 -0
- package/src/posture/holdout-eval.js +144 -0
- package/src/posture/iac-reachability.js +163 -0
- package/src/posture/iam-policy.js +128 -0
- package/src/posture/integrity.js +97 -0
- package/src/posture/learning.js +166 -0
- package/src/posture/license-policy.js +109 -0
- package/src/posture/llm-redteam-prompts.js +418 -0
- package/src/posture/llm-redteam.js +303 -0
- package/src/posture/material-change.js +163 -0
- package/src/posture/mitigation-composite.js +55 -0
- package/src/posture/mttr.js +91 -0
- package/src/posture/network-policy-import.js +126 -0
- package/src/posture/path-predicates.js +99 -0
- package/src/posture/persona-prioritization.js +153 -0
- package/src/posture/poc-cwe-map.js +51 -0
- package/src/posture/poc-generator.js +500 -0
- package/src/posture/policy-gate.js +174 -0
- package/src/posture/pre-incident-archaeology.js +110 -0
- package/src/posture/profile.js +93 -0
- package/src/posture/reachability-filter.js +42 -0
- package/src/posture/regression-test-gen.js +200 -0
- package/src/posture/reverse-blast-radius.js +110 -0
- package/src/posture/router.js +109 -0
- package/src/posture/rule-overrides.js +198 -0
- package/src/posture/rule-pack-signing.js +209 -0
- package/src/posture/rule-packs.js +143 -0
- package/src/posture/rule-synthesis.js +108 -0
- package/src/posture/ruleset-version.js +71 -0
- package/src/posture/sbom.js +129 -0
- package/src/posture/schema-aware-bridge.js +207 -0
- package/src/posture/security-trend.js +87 -0
- package/src/posture/semantic-clone.js +114 -0
- package/src/posture/specification-mining.js +170 -0
- package/src/posture/stable-id.js +75 -0
- package/src/posture/stack-playbook.js +229 -0
- package/src/posture/streak.js +249 -0
- package/src/posture/suppressions.js +135 -0
- package/src/posture/telemetry-ingest.js +112 -0
- package/src/posture/threat-model.js +145 -0
- package/src/posture/three-agent-pipeline.js +74 -0
- package/src/posture/triage.js +146 -0
- package/src/posture/trust-boundary-diagram.js +115 -0
- package/src/posture/type-narrowing.js +129 -0
- package/src/posture/validator-metrics.js +179 -0
- package/src/posture/verifier-ephemeral.js +118 -0
- package/src/posture/verifier-target.js +147 -0
- package/src/posture/verifier.js +257 -0
- package/src/posture/version.js +75 -0
- package/src/posture/waf-ingest.js +200 -0
- package/src/posture/why-fired.js +141 -0
- package/src/pr-comment.js +172 -0
- package/src/pr-delta.js +198 -0
- package/src/report/.agentic-security/findings.json +79 -0
- package/src/report/.agentic-security/last-scan.json +79 -0
- package/src/report/.agentic-security/last-scan.json.sig +1 -0
- package/src/report/.agentic-security/scan-history.json +332 -0
- package/src/report/.agentic-security/streak.json +23 -0
- package/src/report/index.js +1136 -0
- package/src/report/mascot.js +42 -0
- package/src/runScan.js +141 -0
- package/src/sast/.agentic-security/findings.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json +5051 -0
- package/src/sast/.agentic-security/last-scan.json.sig +1 -0
- package/src/sast/.agentic-security/scan-history.json +788 -0
- package/src/sast/.agentic-security/streak.json +23 -0
- package/src/sast/CLAUDE.md +39 -0
- package/src/sast/_comment-strip.js +46 -0
- package/src/sast/agent-tool-escalation.js +131 -0
- package/src/sast/auth-provider.js +171 -0
- package/src/sast/authz.js +236 -0
- package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
- package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
- package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
- package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
- package/src/sast/bench-shape/index.js +62 -0
- package/src/sast/claude-hook-injection.js +199 -0
- package/src/sast/claude-md-prompt-injection.js +170 -0
- package/src/sast/claude-settings.js +165 -0
- package/src/sast/client-side.js +149 -0
- package/src/sast/cpp-bench-extras.js +122 -0
- package/src/sast/cpp-dataflow.js +430 -0
- package/src/sast/cpp.js +248 -0
- package/src/sast/csharp.js +152 -0
- package/src/sast/csrf.js +82 -0
- package/src/sast/dart-flutter.js +173 -0
- package/src/sast/db-rls.js +147 -0
- package/src/sast/db-taint.js +215 -0
- package/src/sast/defi-deep.js +242 -0
- package/src/sast/deserialization-gadgets.js +113 -0
- package/src/sast/django-hardening.js +230 -0
- package/src/sast/env-hygiene.js +125 -0
- package/src/sast/fastapi-hardening.js +145 -0
- package/src/sast/go-extended.js +84 -0
- package/src/sast/host-header.js +106 -0
- package/src/sast/index.js +17 -0
- package/src/sast/java-ast-folding.js +561 -0
- package/src/sast/java-bench-extras.js +708 -0
- package/src/sast/java-collection-passthrough.js +178 -0
- package/src/sast/java-constant-fold.js +244 -0
- package/src/sast/java-deserialization.js +125 -0
- package/src/sast/jndi.js +104 -0
- package/src/sast/juliet-shape.js +324 -0
- package/src/sast/jwt-exp.js +104 -0
- package/src/sast/kotlin.js +82 -0
- package/src/sast/laravel-hardening.js +198 -0
- package/src/sast/ldap-injection.js +100 -0
- package/src/sast/llm-owasp.js +465 -0
- package/src/sast/llm-stored-prompt.js +103 -0
- package/src/sast/llm-trading-agent.js +161 -0
- package/src/sast/llm.js +308 -0
- package/src/sast/logic.js +140 -0
- package/src/sast/mass-assignment.js +101 -0
- package/src/sast/mcp-audit.js +242 -0
- package/src/sast/mobile-manifest.js +195 -0
- package/src/sast/model-load.js +164 -0
- package/src/sast/mutation-xss.js +87 -0
- package/src/sast/nosql-injection.js +82 -0
- package/src/sast/open-redirect.js +119 -0
- package/src/sast/php.js +91 -0
- package/src/sast/pipeline.js +122 -0
- package/src/sast/primary-cwe-java.js +155 -0
- package/src/sast/prompt-firewall.js +151 -0
- package/src/sast/prompt-template.js +157 -0
- package/src/sast/prototype-pollution.js +112 -0
- package/src/sast/python-sinks.js +195 -0
- package/src/sast/quarkus-hardening.js +102 -0
- package/src/sast/rag-poisoning.js +118 -0
- package/src/sast/rate-limit.js +128 -0
- package/src/sast/response-splitting.js +138 -0
- package/src/sast/ruby.js +108 -0
- package/src/sast/rust.js +105 -0
- package/src/sast/solidity.js +167 -0
- package/src/sast/springboot-hardening.js +186 -0
- package/src/sast/ssrf-cloud-metadata.js +80 -0
- package/src/sast/ssti.js +116 -0
- package/src/sast/swift.js +162 -0
- package/src/sast/toctou.js +95 -0
- package/src/sast/webhook.js +101 -0
- package/src/sast/xpath-injection.js +51 -0
- package/src/sast/xxe.js +140 -0
- package/src/sast/zip-slip.js +200 -0
- package/src/sca/base-images.json +45 -0
- package/src/sca/container.js +107 -0
- package/src/sca/dep-confusion.js +134 -0
- package/src/sca/index.js +6 -0
- package/src/sca/popular-packages.json +41 -0
- package/src/sca/sarif-ingest.js +187 -0
- package/src/sca/vuln-function-hints.json +89 -0
- package/src/secrets/index.js +4 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
// Cross-language chain metadata (FR-CHAIN-FILTER + FR-FAMILY-REGISTRY).
|
|
2
|
+
//
|
|
3
|
+
// Phase-1 polyglot bench revealed two issues with the cross-language chain
|
|
4
|
+
// detectors:
|
|
5
|
+
//
|
|
6
|
+
// 1. Chains fired on ANY high-severity finding in the linked file. That
|
|
7
|
+
// included CSRF, header-hardening, body-parser DoS — incidental issues
|
|
8
|
+
// that have nothing to do with what flows across the language boundary.
|
|
9
|
+
// The chain was semantically wrong.
|
|
10
|
+
//
|
|
11
|
+
// 2. Chain findings got auto-slugged family names like
|
|
12
|
+
// `cross-language-taint-client-call-post-us` (truncated to 40 chars).
|
|
13
|
+
// Ugly, brittle, and useless for filtering downstream.
|
|
14
|
+
//
|
|
15
|
+
// Both fixed here. The cross-lang-* modules import these helpers; the
|
|
16
|
+
// helpers are tested in isolation so the contract is auditable.
|
|
17
|
+
|
|
18
|
+
// ─── FR-CHAIN-FILTER ────────────────────────────────────────────────────────
|
|
19
|
+
//
|
|
20
|
+
// Only emit a cross-language chain when the linked finding is in a family
|
|
21
|
+
// that propagates meaningfully across a service boundary. CSRF on the OTHER
|
|
22
|
+
// side of a queue tells you nothing useful; SQL injection does.
|
|
23
|
+
|
|
24
|
+
const CHAIN_WORTHY_FAMILIES = new Set([
|
|
25
|
+
'sql-injection',
|
|
26
|
+
'command-injection',
|
|
27
|
+
'xss',
|
|
28
|
+
'ssrf',
|
|
29
|
+
'code-injection',
|
|
30
|
+
'insecure-deserialization',
|
|
31
|
+
'xxe',
|
|
32
|
+
'path-traversal',
|
|
33
|
+
'jndi-injection',
|
|
34
|
+
'ldap-injection',
|
|
35
|
+
'xpath-injection',
|
|
36
|
+
'nosql-injection',
|
|
37
|
+
'ssti',
|
|
38
|
+
'idor', // ownership flows across language boundary
|
|
39
|
+
'mass-assignment', // request-body taint flows
|
|
40
|
+
'prototype-pollution', // pollution flows through JSON
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
// Substring patterns we'll treat as chain-worthy when finding.family is not set.
|
|
44
|
+
// Lets callers (especially unit tests) pass minimal finding objects without
|
|
45
|
+
// requiring the dedupe pipeline to have stamped family first.
|
|
46
|
+
const CHAIN_WORTHY_VULN_PATTERNS = [
|
|
47
|
+
/\bSQL Injection\b/i, /\bCommand Injection\b/i, /\bXSS\b/i, /\bSSRF\b/i,
|
|
48
|
+
/\bCode Injection\b/i, /\bDeserialization\b/i, /\bXXE\b/i,
|
|
49
|
+
/\bPath Traversal\b/i, /\bJNDI\b/i, /\bLDAP Injection\b/i,
|
|
50
|
+
/\bXPath Injection\b/i, /\bNoSQL Injection\b/i, /\bSSTI\b/i,
|
|
51
|
+
/\bIDOR\b/i, /\bMass Assignment\b/i, /\bPrototype Pollution\b/i,
|
|
52
|
+
];
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Is this finding eligible to be the "tail" of a cross-language chain?
|
|
56
|
+
* Returns true only for families whose taint genuinely propagates across
|
|
57
|
+
* a service boundary. Falls back to a vuln-string substring check when
|
|
58
|
+
* the finding object has no `family` field yet.
|
|
59
|
+
*/
|
|
60
|
+
export function isChainWorthy(finding) {
|
|
61
|
+
if (!finding || typeof finding !== 'object') return false;
|
|
62
|
+
const fam = finding.family;
|
|
63
|
+
if (fam) return CHAIN_WORTHY_FAMILIES.has(fam);
|
|
64
|
+
const vuln = finding.vuln;
|
|
65
|
+
if (typeof vuln !== 'string') return false;
|
|
66
|
+
return CHAIN_WORTHY_VULN_PATTERNS.some(re => re.test(vuln));
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Filter a list of high-severity findings down to the chain-worthy ones.
|
|
71
|
+
*/
|
|
72
|
+
export function chainWorthyFindings(findings) {
|
|
73
|
+
if (!Array.isArray(findings)) return [];
|
|
74
|
+
return findings.filter(isChainWorthy);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ─── FR-FAMILY-REGISTRY ─────────────────────────────────────────────────────
|
|
78
|
+
//
|
|
79
|
+
// Each cross-language detector has a canonical family name. Reports filter
|
|
80
|
+
// by these stable strings instead of an auto-slug of the chain's vuln text.
|
|
81
|
+
|
|
82
|
+
export const XLANG_FAMILIES = Object.freeze({
|
|
83
|
+
openapi: 'xlang-openapi',
|
|
84
|
+
grpc: 'xlang-grpc',
|
|
85
|
+
graphql: 'xlang-graphql',
|
|
86
|
+
queue: 'xlang-queue',
|
|
87
|
+
orm: 'xlang-orm',
|
|
88
|
+
iac: 'xlang-iac',
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Resolve the canonical family for a cross-language chain by the boundary
|
|
93
|
+
* type that produced it. Detectors call this when emitting chain findings.
|
|
94
|
+
*/
|
|
95
|
+
export function familyForBoundary(boundary) {
|
|
96
|
+
if (typeof boundary !== 'string') return 'xlang-unknown';
|
|
97
|
+
return XLANG_FAMILIES[boundary] || 'xlang-unknown';
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// For tests + the no-dead-modules check.
|
|
101
|
+
export const _internals = { CHAIN_WORTHY_FAMILIES };
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import { isChainWorthy, familyForBoundary } from './cross-lang-meta.js';
|
|
2
|
+
|
|
3
|
+
// OpenAPI-aware cross-language taint propagation (Sentinel-parity FR-X-1).
|
|
4
|
+
//
|
|
5
|
+
// First-cut implementation: when an openapi.json / openapi.yaml is present in
|
|
6
|
+
// the scan root, build a map from (method, path) → endpoint description. For
|
|
7
|
+
// any client-side fetch/axios/requests call whose URL matches a known
|
|
8
|
+
// endpoint AND whose response is then passed to a sink (SQL, exec, write,
|
|
9
|
+
// innerHTML), emit a `cross_language: true` finding that ties the client
|
|
10
|
+
// site to the server route as a chain.
|
|
11
|
+
//
|
|
12
|
+
// Conservative on purpose: only flow taint when BOTH endpoints are
|
|
13
|
+
// unambiguously mapped. Ambiguous matches produce zero findings rather than
|
|
14
|
+
// false positives.
|
|
15
|
+
//
|
|
16
|
+
// Out of scope (deferred to a follow-up): gRPC .proto introspection, GraphQL
|
|
17
|
+
// resolver-to-resolver tracking, SQL/ORM round-trip, message queues.
|
|
18
|
+
|
|
19
|
+
import * as yaml from 'js-yaml';
|
|
20
|
+
|
|
21
|
+
function loadOpenAPI(fileContents) {
|
|
22
|
+
for (const [fp, c] of Object.entries(fileContents || {})) {
|
|
23
|
+
const base = fp.split('/').pop().toLowerCase();
|
|
24
|
+
if (!/openapi\.(?:ya?ml|json)$|swagger\.(?:ya?ml|json)$/.test(base)) continue;
|
|
25
|
+
try {
|
|
26
|
+
const doc = /\.json$/i.test(base) ? JSON.parse(c) : yaml.load(c);
|
|
27
|
+
if (doc && doc.paths) return { doc, file: fp };
|
|
28
|
+
} catch { /* ignore */ }
|
|
29
|
+
}
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function endpoints(doc) {
|
|
34
|
+
const out = [];
|
|
35
|
+
if (!doc || !doc.paths) return out;
|
|
36
|
+
for (const [p, methods] of Object.entries(doc.paths)) {
|
|
37
|
+
for (const m of Object.keys(methods)) {
|
|
38
|
+
if (!/^(?:get|post|put|patch|delete|options|head)$/i.test(m)) continue;
|
|
39
|
+
// staticPrefix = the literal prefix before the first {param} or :param.
|
|
40
|
+
// Used to match client URLs that look like '/users/' + id where the only
|
|
41
|
+
// static piece is the prefix.
|
|
42
|
+
const staticPrefix = p.split(/\{|:/)[0];
|
|
43
|
+
out.push({
|
|
44
|
+
method: m.toUpperCase(),
|
|
45
|
+
path: p,
|
|
46
|
+
staticPrefix,
|
|
47
|
+
urlRegex: new RegExp(
|
|
48
|
+
'^' +
|
|
49
|
+
p.replace(/[.+^$()|[\]\\]/g, '\\$&')
|
|
50
|
+
.replace(/\{[^}]+\}/g, '[^/?#]+')
|
|
51
|
+
.replace(/:[A-Za-z_][\w]*/g, '[^/?#]+') +
|
|
52
|
+
'$'
|
|
53
|
+
),
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return out;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function urlMatchesEndpoint(url, ep) {
|
|
61
|
+
const clean = url.replace(/^https?:\/\/[^/]+/, '').split('?')[0];
|
|
62
|
+
if (ep.urlRegex.test(clean)) return true;
|
|
63
|
+
// Soft match: client URL is a concat — the literal we see is just the
|
|
64
|
+
// static prefix up to a path parameter.
|
|
65
|
+
if (ep.staticPrefix && ep.staticPrefix.length >= 3 && clean === ep.staticPrefix) return true;
|
|
66
|
+
// Also match when the client wrote the path WITH a templated placeholder.
|
|
67
|
+
if (clean === ep.path) return true;
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Find client-side HTTP calls that match an OpenAPI endpoint.
|
|
72
|
+
// Returns Array<{ file, line, method, path, snippet }>
|
|
73
|
+
function clientCalls(fileContents, eps) {
|
|
74
|
+
const CALL_RE = /\b(?:fetch|axios(?:\.(?:get|post|put|patch|delete))?|requests\.(?:get|post|put|patch|delete)|http\.request|urllib\.request\.urlopen)\s*\(\s*([`'"])([^`'"]+)\1/g;
|
|
75
|
+
const out = [];
|
|
76
|
+
for (const [fp, c] of Object.entries(fileContents || {})) {
|
|
77
|
+
if (!c || typeof c !== 'string') continue;
|
|
78
|
+
if (c.length > 500_000) continue;
|
|
79
|
+
let m;
|
|
80
|
+
const r = new RegExp(CALL_RE.source, CALL_RE.flags);
|
|
81
|
+
while ((m = r.exec(c))) {
|
|
82
|
+
const url = m[2];
|
|
83
|
+
const ep = eps.find(e => urlMatchesEndpoint(url, e));
|
|
84
|
+
if (!ep) continue;
|
|
85
|
+
const line = c.substring(0, m.index).split('\n').length;
|
|
86
|
+
out.push({
|
|
87
|
+
file: fp, line,
|
|
88
|
+
method: ep.method, path: ep.path,
|
|
89
|
+
snippet: (c.split('\n')[line - 1] || '').trim().slice(0, 200),
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return out;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Match an endpoint to its server-side route handler. Looks for express's
|
|
97
|
+
// app.METHOD(path, ...) or fastapi's @app.METHOD(path) or Flask's @app.route.
|
|
98
|
+
function serverRoutes(fileContents, eps) {
|
|
99
|
+
const ROUTE_RE = [
|
|
100
|
+
// Express / Fastify / Koa
|
|
101
|
+
{ lang: 'js', re: /\b(?:app|router|server|fastify)\s*\.\s*(get|post|put|patch|delete)\s*\(\s*([`'"])([^`'"]+)\2/gi },
|
|
102
|
+
// FastAPI
|
|
103
|
+
{ lang: 'py', re: /@\w+\s*\.\s*(get|post|put|patch|delete)\s*\(\s*([`'"])([^`'"]+)\2/gi },
|
|
104
|
+
// Flask. The `methods=['POST']` part needs to anchor on the opening quote
|
|
105
|
+
// of the method literal, otherwise `[^\]]*` greedy-consumes most of it
|
|
106
|
+
// and only the last letter ends up in the capture group.
|
|
107
|
+
{ lang: 'py', re: /@(?:app|bp|blueprint)\s*\.\s*route\s*\(\s*([`'"])([^`'"]+)\1[^)]*methods\s*=\s*\[\s*['"]([A-Z]+)/gi },
|
|
108
|
+
];
|
|
109
|
+
const out = [];
|
|
110
|
+
for (const [fp, c] of Object.entries(fileContents || {})) {
|
|
111
|
+
if (!c || typeof c !== 'string') continue;
|
|
112
|
+
if (c.length > 500_000) continue;
|
|
113
|
+
for (const { lang, re } of ROUTE_RE) {
|
|
114
|
+
if (lang === 'js' && !/\.(?:js|jsx|ts|tsx|mjs|cjs)$/i.test(fp)) continue;
|
|
115
|
+
if (lang === 'py' && !/\.py$/i.test(fp)) continue;
|
|
116
|
+
const r = new RegExp(re.source, re.flags);
|
|
117
|
+
let m;
|
|
118
|
+
while ((m = r.exec(c))) {
|
|
119
|
+
let method, urlPattern;
|
|
120
|
+
if (re === ROUTE_RE[2].re) { urlPattern = m[2]; method = (m[3] || 'GET').toUpperCase(); }
|
|
121
|
+
else { method = (m[1] || '').toUpperCase(); urlPattern = m[3]; }
|
|
122
|
+
const ep = eps.find(e =>
|
|
123
|
+
e.method === method && urlMatchesEndpoint(urlPattern, e));
|
|
124
|
+
if (!ep) continue;
|
|
125
|
+
const line = c.substring(0, m.index).split('\n').length;
|
|
126
|
+
out.push({ file: fp, line, method, path: ep.path });
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return out;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Top-level: returns Finding[] describing client-side calls whose response is
|
|
134
|
+
// returned from a server-side handler that itself has tainted-input findings.
|
|
135
|
+
export function scanCrossLangOpenAPI(fileContents, existingFindings) {
|
|
136
|
+
const oa = loadOpenAPI(fileContents);
|
|
137
|
+
if (!oa) return [];
|
|
138
|
+
const eps = endpoints(oa.doc);
|
|
139
|
+
if (eps.length === 0) return [];
|
|
140
|
+
const callers = clientCalls(fileContents, eps);
|
|
141
|
+
if (callers.length === 0) return [];
|
|
142
|
+
const handlers = serverRoutes(fileContents, eps);
|
|
143
|
+
if (handlers.length === 0) return [];
|
|
144
|
+
|
|
145
|
+
// Index existing findings by file. A handler is "tainted-output" if any
|
|
146
|
+
// critical/high finding sits in its file AND it's chain-worthy
|
|
147
|
+
// (FR-CHAIN-FILTER) — CSRF, header-hardening etc. don't propagate across
|
|
148
|
+
// a service boundary in a useful way.
|
|
149
|
+
const findingsByFile = new Map();
|
|
150
|
+
for (const f of existingFindings || []) {
|
|
151
|
+
if (!f.file) continue;
|
|
152
|
+
if (!/critical|high/i.test(f.severity || '')) continue;
|
|
153
|
+
if (!isChainWorthy(f)) continue;
|
|
154
|
+
if (!findingsByFile.has(f.file)) findingsByFile.set(f.file, []);
|
|
155
|
+
findingsByFile.get(f.file).push(f);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const findings = [];
|
|
159
|
+
for (const c of callers) {
|
|
160
|
+
const matching = handlers.filter(h => h.method === c.method && h.path === c.path);
|
|
161
|
+
for (const h of matching) {
|
|
162
|
+
const fs = findingsByFile.get(h.file) || [];
|
|
163
|
+
if (!fs.length) continue;
|
|
164
|
+
const seed = fs[0];
|
|
165
|
+
findings.push({
|
|
166
|
+
id: `xlang-openapi:${c.file}:${c.line}:${h.method}-${h.path}`,
|
|
167
|
+
file: c.file, line: c.line,
|
|
168
|
+
vuln: `Cross-Language Taint: client call → ${h.method} ${h.path} (server handler in ${h.file}:${h.line} has a ${seed.severity} finding)`,
|
|
169
|
+
severity: 'high',
|
|
170
|
+
cwe: seed.cwe || 'CWE-862',
|
|
171
|
+
stride: 'Information Disclosure',
|
|
172
|
+
snippet: c.snippet,
|
|
173
|
+
remediation: `The server-side handler for ${h.method} ${h.path} (${h.file}:${h.line}) has unaddressed ${seed.severity}-severity findings — specifically "${seed.vuln}". A response from that handler that flows into a client-side sink (innerHTML, eval, exec) propagates the underlying issue. Fix the server-side finding first.`,
|
|
174
|
+
parser: 'XLANG-OPENAPI',
|
|
175
|
+
family: familyForBoundary('openapi'), // FR-FAMILY-REGISTRY
|
|
176
|
+
confidence: 0.65,
|
|
177
|
+
cross_language: true,
|
|
178
|
+
chain: [
|
|
179
|
+
{ file: c.file, line: c.line, label: 'client-call' },
|
|
180
|
+
{ file: h.file, line: h.line, label: `${h.method} ${h.path}` },
|
|
181
|
+
{ file: seed.file, line: seed.line, label: seed.vuln },
|
|
182
|
+
],
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return findings;
|
|
187
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { isChainWorthy, familyForBoundary } from './cross-lang-meta.js';
|
|
2
|
+
|
|
3
|
+
// SQL / ORM round-trip taint (Sentinel-parity FR-DET-3).
|
|
4
|
+
//
|
|
5
|
+
// When a tainted value is written to column C of table T via an ORM `create`
|
|
6
|
+
// or `update`, subsequent reads of T.C are tainted — the database is just a
|
|
7
|
+
// persistence layer, not a sanitizer. This module builds a table.column→
|
|
8
|
+
// tainted-source registry and emits chains.
|
|
9
|
+
//
|
|
10
|
+
// Coverage:
|
|
11
|
+
// - JS/TS: Mongoose (.create / .save / .findOne), Sequelize, Prisma
|
|
12
|
+
// - Python: SQLAlchemy session.add, Django ORM .objects.create / .filter
|
|
13
|
+
// - Ruby: ActiveRecord Model.create / Model.where
|
|
14
|
+
// - Go: GORM .Create / .Where
|
|
15
|
+
// - PHP: Eloquent ::create / ::where
|
|
16
|
+
//
|
|
17
|
+
// The detector is necessarily heuristic; we name table+column by best-effort.
|
|
18
|
+
|
|
19
|
+
const TAINT_HINTS = /\b(req|request|ctx\.request|params|input|userInput|body|query|cookies|headers)\b/;
|
|
20
|
+
|
|
21
|
+
// Identify ORM writes that bind a literal field name to a tainted value.
|
|
22
|
+
// Returns [{file, line, model, field, taintHint}].
|
|
23
|
+
function findOrmWrites(fileContents) {
|
|
24
|
+
const out = [];
|
|
25
|
+
for (const [fp, c] of Object.entries(fileContents || {})) {
|
|
26
|
+
if (!c || typeof c !== 'string') continue;
|
|
27
|
+
if (c.length > 500_000) continue;
|
|
28
|
+
const lang = (fp.match(/\.([a-z]+)$/i) || [])[1] || '';
|
|
29
|
+
if (!/^(?:js|jsx|ts|tsx|mjs|cjs|py|rb|go|php)$/i.test(lang)) continue;
|
|
30
|
+
|
|
31
|
+
// Match patterns like Model.create({ <field>: <expr-with-taint> })
|
|
32
|
+
// or await Model.create({ data: { <field>: <expr> } }) (Prisma)
|
|
33
|
+
const reJsPyRb = /\b([A-Z]\w+)\s*\.\s*(?:create|save|update|build|insert|upsert)\s*\(\s*\{([^}]{0,500})\}/g;
|
|
34
|
+
let m;
|
|
35
|
+
while ((m = reJsPyRb.exec(c))) {
|
|
36
|
+
const model = m[1];
|
|
37
|
+
const body = m[2];
|
|
38
|
+
// Prisma wraps under `data: { ... }`
|
|
39
|
+
const prismaInner = body.match(/data\s*:\s*\{([^}]{0,400})\}/);
|
|
40
|
+
const fields = prismaInner ? prismaInner[1] : body;
|
|
41
|
+
const fieldRe = /\b(\w+)\s*:\s*([^,}\n]+)/g;
|
|
42
|
+
let fm;
|
|
43
|
+
while ((fm = fieldRe.exec(fields))) {
|
|
44
|
+
const field = fm[1];
|
|
45
|
+
const val = fm[2].trim();
|
|
46
|
+
if (TAINT_HINTS.test(val)) {
|
|
47
|
+
const line = c.substring(0, m.index).split('\n').length;
|
|
48
|
+
out.push({ file: fp, line, model, field, val: val.slice(0, 60) });
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// Python kwargs: Model.objects.create(field1=value, field2=value)
|
|
53
|
+
const rePyKw = /\b([A-Z]\w+)\.objects\.create\s*\(([^)]{0,500})\)/g;
|
|
54
|
+
while ((m = rePyKw.exec(c))) {
|
|
55
|
+
const model = m[1];
|
|
56
|
+
const body = m[2];
|
|
57
|
+
const kwRe = /\b(\w+)\s*=\s*([^,)]+)/g;
|
|
58
|
+
let km;
|
|
59
|
+
while ((km = kwRe.exec(body))) {
|
|
60
|
+
const field = km[1];
|
|
61
|
+
const val = km[2].trim();
|
|
62
|
+
if (TAINT_HINTS.test(val)) {
|
|
63
|
+
const line = c.substring(0, m.index).split('\n').length;
|
|
64
|
+
out.push({ file: fp, line, model, field, val: val.slice(0, 60) });
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// GORM: db.Create(&user) or db.Model(&User{}).Where(...).Update("col", val)
|
|
69
|
+
const reGormUpdate = /db\s*\.\s*(?:Model\([^)]*\)\s*\.\s*)?(?:Where[^.]*\.\s*)?Update\s*\(\s*"(\w+)"\s*,\s*([^)]+)\)/g;
|
|
70
|
+
while ((m = reGormUpdate.exec(c))) {
|
|
71
|
+
const field = m[1];
|
|
72
|
+
const val = m[2].trim();
|
|
73
|
+
if (TAINT_HINTS.test(val)) {
|
|
74
|
+
const line = c.substring(0, m.index).split('\n').length;
|
|
75
|
+
out.push({ file: fp, line, model: '<gorm>', field, val: val.slice(0, 60) });
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return out;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Find ORM reads — any Model.findX / Model.where / Model.filter / db.Query / etc.
|
|
83
|
+
// that READS column C and BINDS its value into a downstream use.
|
|
84
|
+
// Returns [{file, line, model, field}] candidate read sites.
|
|
85
|
+
function findOrmReads(fileContents) {
|
|
86
|
+
const out = [];
|
|
87
|
+
for (const [fp, c] of Object.entries(fileContents || {})) {
|
|
88
|
+
if (!c || typeof c !== 'string') continue;
|
|
89
|
+
if (c.length > 500_000) continue;
|
|
90
|
+
// Match Model.findX(...) / Model.objects.filter(...) / Model.where(...)
|
|
91
|
+
const reRead = /\b([A-Z]\w+)\s*\.\s*(?:findOne|findAll|findBy\w*|find|findById|first|last|where|filter|objects\.get|objects\.filter|objects\.all)\s*\(/g;
|
|
92
|
+
let m;
|
|
93
|
+
while ((m = reRead.exec(c))) {
|
|
94
|
+
const model = m[1];
|
|
95
|
+
const line = c.substring(0, m.index).split('\n').length;
|
|
96
|
+
out.push({ file: fp, line, model });
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return out;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export function scanCrossLangOrm(fileContents, existingFindings) {
|
|
103
|
+
const writes = findOrmWrites(fileContents);
|
|
104
|
+
if (writes.length === 0) return [];
|
|
105
|
+
const reads = findOrmReads(fileContents);
|
|
106
|
+
if (reads.length === 0) return [];
|
|
107
|
+
|
|
108
|
+
// Index sinks: collect sink lines + their snippets from existing findings.
|
|
109
|
+
const sinksByFile = new Map();
|
|
110
|
+
for (const f of existingFindings || []) {
|
|
111
|
+
const sink = f.sink || f;
|
|
112
|
+
const file = sink.file || f.file;
|
|
113
|
+
const line = sink.line || f.line;
|
|
114
|
+
if (!file || !line) continue;
|
|
115
|
+
if (!/critical|high|medium/i.test(f.severity || '')) continue;
|
|
116
|
+
if (!isChainWorthy(f)) continue; // FR-CHAIN-FILTER
|
|
117
|
+
if (!sinksByFile.has(file)) sinksByFile.set(file, []);
|
|
118
|
+
sinksByFile.get(file).push({ line, vuln: f.vuln, severity: f.severity });
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const out = [];
|
|
122
|
+
for (const w of writes) {
|
|
123
|
+
// Find any READ of the same Model — anywhere in the project.
|
|
124
|
+
const readers = reads.filter(r => r.model === w.model);
|
|
125
|
+
if (!readers.length) continue;
|
|
126
|
+
for (const r of readers) {
|
|
127
|
+
// Check if there's a sink in the reader's file near the read line.
|
|
128
|
+
const sinksInReadFile = sinksByFile.get(r.file) || [];
|
|
129
|
+
const nearby = sinksInReadFile.filter(s => Math.abs((s.line || 0) - r.line) <= 20);
|
|
130
|
+
if (!nearby.length) continue;
|
|
131
|
+
const seed = nearby[0];
|
|
132
|
+
out.push({
|
|
133
|
+
id: `xlang-orm:${w.file}:${w.line}->${r.file}:${r.line}`,
|
|
134
|
+
file: r.file, line: r.line,
|
|
135
|
+
vuln: `Cross-Language Taint (ORM round-trip): ${w.model}.${w.field} written tainted at ${w.file}:${w.line} → read at ${r.file}:${r.line} → reaches ${seed.severity} sink`,
|
|
136
|
+
severity: 'medium',
|
|
137
|
+
cwe: 'CWE-89',
|
|
138
|
+
snippet: `(round-trip via ${w.model}.${w.field})`,
|
|
139
|
+
remediation: `A tainted value is written to ${w.model}.${w.field} at ${w.file}:${w.line} and read at ${r.file}:${r.line}, then flows into "${seed.vuln}". The DB doesn't sanitize — coerce/validate the value on write OR on read, ideally both. For Mongo, ensure the value is a primitive (String(...)) before write; for SQL, parameterize the downstream sink.`,
|
|
140
|
+
parser: 'XLANG-ORM',
|
|
141
|
+
family: familyForBoundary('orm'), // FR-FAMILY-REGISTRY
|
|
142
|
+
confidence: 0.55,
|
|
143
|
+
cross_language: true,
|
|
144
|
+
chain: [
|
|
145
|
+
{ file: w.file, line: w.line, label: `write ${w.model}.${w.field}` },
|
|
146
|
+
{ file: r.file, line: r.line, label: `read ${w.model}` },
|
|
147
|
+
{ file: r.file, line: seed.line, label: seed.vuln },
|
|
148
|
+
],
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return out;
|
|
153
|
+
}
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import { isChainWorthy, familyForBoundary } from './cross-lang-meta.js';
|
|
2
|
+
|
|
3
|
+
// Cross-language message-queue taint propagation (FR-XSAT-4 — P1.5).
|
|
4
|
+
//
|
|
5
|
+
// When a project ships producer and consumer code for the same message
|
|
6
|
+
// queue (Kafka topic, AWS SQS queue, RabbitMQ exchange, Redis stream,
|
|
7
|
+
// Google Pub/Sub topic), tainted data flowing into the producer carries
|
|
8
|
+
// through the queue and emerges in the consumer's hand. The engine pairs
|
|
9
|
+
// producer call sites with consumer handlers by topic name; when either
|
|
10
|
+
// end has a high+ finding, we emit a `cross_language: true` chain finding
|
|
11
|
+
// at the OTHER end so engineers see the transitive flow.
|
|
12
|
+
//
|
|
13
|
+
// This module is deliberately conservative: we only emit chains when the
|
|
14
|
+
// producer and consumer agree on the topic name (string literal match).
|
|
15
|
+
// Constant-folded topic names (variables, env vars) get a `topic: 'inferred'`
|
|
16
|
+
// tag and lower confidence rather than dropped — that's the precision/recall
|
|
17
|
+
// trade-off documented in the parent PRD's Pillar-6 honesty commitments.
|
|
18
|
+
//
|
|
19
|
+
// Detectors per queue tech:
|
|
20
|
+
// Kafka — kafkajs (Node), confluent-kafka (Python), kafka-clients (Java), sarama (Go)
|
|
21
|
+
// AWS SQS — aws-sdk (Node), boto3 (Python), aws-sdk-java
|
|
22
|
+
// RabbitMQ — amqplib (Node), pika (Python), RabbitTemplate (Spring)
|
|
23
|
+
// Redis Streams — redis (xadd/xread) — Node/Python/Java/Go
|
|
24
|
+
// Google Pub/Sub — @google-cloud/pubsub (Node), google-cloud-pubsub (Python)
|
|
25
|
+
|
|
26
|
+
// ─── Topic extraction ──────────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
// Each regex finds either a producer-write site or a consumer-handler site.
|
|
29
|
+
// Group 1 = topic name (literal or expression text).
|
|
30
|
+
const PRODUCER_PATTERNS = [
|
|
31
|
+
// Kafka
|
|
32
|
+
{ tech: 'kafka', re: /\bproducer\s*\.\s*send\s*\(\s*\{[^}]*?topic\s*:\s*['"]([^'"]+)['"]/g },
|
|
33
|
+
{ tech: 'kafka', re: /\bsendMessage\s*\(\s*['"]([^'"]+)['"]/g }, // kafka-clients (Java)
|
|
34
|
+
{ tech: 'kafka', re: /producer\.send\s*\(\s*new\s+ProducerRecord\s*[<(]\s*[^,]*?,\s*['"]([^'"]+)['"]/g }, // Java
|
|
35
|
+
{ tech: 'kafka', re: /(?:Producer|producer)\.produce\s*\(\s*['"]([^'"]+)['"]/g }, // confluent-kafka Python
|
|
36
|
+
// SQS
|
|
37
|
+
{ tech: 'sqs', re: /\bsendMessage\s*\(\s*\{[^}]*?QueueUrl\s*:\s*['"][^'"]*?\/([^'"\/]+)['"]/g }, // aws-sdk node, queue URL ends with name
|
|
38
|
+
{ tech: 'sqs', re: /\bsend_message\s*\(\s*QueueUrl\s*=\s*['"][^'"]*?\/([^'"\/]+)['"]/g }, // boto3
|
|
39
|
+
// RabbitMQ
|
|
40
|
+
{ tech: 'rabbit', re: /\bpublish\s*\(\s*['"]([^'"]+)['"]/g }, // amqplib & pika common
|
|
41
|
+
{ tech: 'rabbit', re: /rabbitTemplate\.convertAndSend\s*\(\s*['"]([^'"]+)['"]/g }, // Spring
|
|
42
|
+
// Redis streams (multi-language XADD shapes)
|
|
43
|
+
{ tech: 'redis', re: /\.\s*xadd\s*\(\s*['"]([^'"]+)['"]/gi }, // node/ioredis: xadd('key', ...)
|
|
44
|
+
{ tech: 'redis', re: /\bXADD\s+([\w:.-]+)/g }, // redis-cli style in code strings
|
|
45
|
+
{ tech: 'redis', re: /XAddArgs\s*\{\s*Stream\s*:\s*['"]([^'"]+)['"]/g }, // go-redis: &redis.XAddArgs{Stream: "..."}
|
|
46
|
+
{ tech: 'redis', re: /\.\s*xadd\s*\(\s*name\s*=\s*['"]([^'"]+)['"]/g }, // python redis-py: xadd(name="...")
|
|
47
|
+
// Google Pub/Sub
|
|
48
|
+
{ tech: 'pubsub', re: /\btopic\s*\(\s*['"]([^'"]+)['"]\s*\)\s*\.publish/g }, // @google-cloud/pubsub
|
|
49
|
+
{ tech: 'pubsub', re: /publisher\.publish\s*\(\s*topic_path\s*\([^,]+,\s*['"]([^'"]+)['"]/g }, // python
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
const CONSUMER_PATTERNS = [
|
|
53
|
+
// Kafka
|
|
54
|
+
{ tech: 'kafka', re: /\bconsumer\s*\.\s*subscribe\s*\(\s*\{[^}]*?topics?\s*:\s*\[?\s*['"]([^'"]+)['"]/g },
|
|
55
|
+
{ tech: 'kafka', re: /\.\s*subscribe\s*\(\s*\[\s*['"]([^'"]+)['"]/g },
|
|
56
|
+
{ tech: 'kafka', re: /@KafkaListener\s*\(\s*topics\s*=\s*\{?\s*['"]([^'"]+)['"]/g }, // Spring Boot
|
|
57
|
+
// SQS
|
|
58
|
+
{ tech: 'sqs', re: /\bsqsClient\.receiveMessage\s*\(\s*\{[^}]*?QueueUrl\s*:\s*['"][^'"]*?\/([^'"\/]+)['"]/g },
|
|
59
|
+
{ tech: 'sqs', re: /\breceive_message\s*\(\s*QueueUrl\s*=\s*['"][^'"]*?\/([^'"\/]+)['"]/g },
|
|
60
|
+
// RabbitMQ
|
|
61
|
+
{ tech: 'rabbit', re: /\.\s*consume\s*\(\s*['"]([^'"]+)['"]/g }, // amqplib
|
|
62
|
+
{ tech: 'rabbit', re: /\.\s*basic_consume\s*\(\s*[^,]*,\s*queue\s*=\s*['"]([^'"]+)['"]/g }, // pika
|
|
63
|
+
{ tech: 'rabbit', re: /@RabbitListener\s*\(\s*queues\s*=\s*['"]([^'"]+)['"]/g }, // Spring
|
|
64
|
+
// Redis streams (multi-language XREAD shapes)
|
|
65
|
+
{ tech: 'redis', re: /\.\s*xread(?:group)?\s*\(\s*[^)]*?streams\s*:\s*\{?\s*['"]([^'"]+)['"]/gi },
|
|
66
|
+
{ tech: 'redis', re: /\bXREAD(?:GROUP)?\s+(?:GROUP\s+\S+\s+\S+\s+)?(?:COUNT\s+\d+\s+)?STREAMS\s+([\w:.-]+)/gi },
|
|
67
|
+
{ tech: 'redis', re: /\.\s*xread\s*\(\s*\{[^}]*?key\s*:\s*['"]([^'"]+)['"]/gi }, // node-redis v4: xread({key:'...'})
|
|
68
|
+
// Google Pub/Sub
|
|
69
|
+
{ tech: 'pubsub', re: /\bsubscription\s*\(\s*['"]([^'"]+)['"]\s*\)\s*\.on/g },
|
|
70
|
+
{ tech: 'pubsub', re: /\bsubscriber\.subscribe\s*\(\s*subscription_path\s*\([^,]+,\s*['"]([^'"]+)['"]/g },
|
|
71
|
+
];
|
|
72
|
+
|
|
73
|
+
function lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Walk every file looking for queue producer/consumer call sites.
|
|
77
|
+
* Returns:
|
|
78
|
+
* {
|
|
79
|
+
* producers: Map<topic, Array<{file, line, tech}>>
|
|
80
|
+
* consumers: Map<topic, Array<{file, line, tech}>>
|
|
81
|
+
* }
|
|
82
|
+
*
|
|
83
|
+
* Topic normalization: lowercase + strip leading slashes (SQS queue URLs
|
|
84
|
+
* vary by region/account; we key only on the queue name segment).
|
|
85
|
+
*/
|
|
86
|
+
function indexQueueSites(fileContents) {
|
|
87
|
+
const producers = new Map();
|
|
88
|
+
const consumers = new Map();
|
|
89
|
+
for (const [fp, c] of Object.entries(fileContents || {})) {
|
|
90
|
+
if (typeof c !== 'string' || c.length > 500_000) continue;
|
|
91
|
+
if (!_looksLikeCodeFile(fp)) continue;
|
|
92
|
+
for (const { tech, re } of PRODUCER_PATTERNS) {
|
|
93
|
+
const rx = new RegExp(re.source, re.flags);
|
|
94
|
+
let m;
|
|
95
|
+
while ((m = rx.exec(c))) {
|
|
96
|
+
const topic = _normTopic(m[1]);
|
|
97
|
+
if (!topic) continue;
|
|
98
|
+
const line = lineOf(c, m.index);
|
|
99
|
+
const arr = producers.get(topic) || [];
|
|
100
|
+
arr.push({ file: fp, line, tech });
|
|
101
|
+
producers.set(topic, arr);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
for (const { tech, re } of CONSUMER_PATTERNS) {
|
|
105
|
+
const rx = new RegExp(re.source, re.flags);
|
|
106
|
+
let m;
|
|
107
|
+
while ((m = rx.exec(c))) {
|
|
108
|
+
const topic = _normTopic(m[1]);
|
|
109
|
+
if (!topic) continue;
|
|
110
|
+
const line = lineOf(c, m.index);
|
|
111
|
+
const arr = consumers.get(topic) || [];
|
|
112
|
+
arr.push({ file: fp, line, tech });
|
|
113
|
+
consumers.set(topic, arr);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return { producers, consumers };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function _normTopic(s) {
|
|
121
|
+
if (!s || typeof s !== 'string') return '';
|
|
122
|
+
return s.trim().toLowerCase().replace(/^\/+/, '');
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function _looksLikeCodeFile(fp) {
|
|
126
|
+
return /\.(js|jsx|ts|tsx|mjs|cjs|py|java|kt|go|rb|cs|rs|php|scala|swift)$/i.test(fp);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// ─── Chain emission ─────────────────────────────────────────────────────────
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* For each (producer, consumer) pair on the same topic, look up high+ findings
|
|
133
|
+
* at either site and emit a chain finding at the OTHER side.
|
|
134
|
+
*
|
|
135
|
+
* Returns an array of chain findings ready to splice into finalFindings.
|
|
136
|
+
*/
|
|
137
|
+
export function scanCrossLangQueues(fileContents, findings) {
|
|
138
|
+
const { producers, consumers } = indexQueueSites(fileContents);
|
|
139
|
+
if (!producers.size || !consumers.size) return [];
|
|
140
|
+
// Index existing findings by (file, line) for fast lookup. Only include
|
|
141
|
+
// chain-worthy families (FR-CHAIN-FILTER): chaining to a CSRF or
|
|
142
|
+
// header-hardening finding on the other side of a queue is semantically
|
|
143
|
+
// meaningless — taint doesn't propagate through those classes.
|
|
144
|
+
const findingsByFile = new Map();
|
|
145
|
+
for (const f of findings || []) {
|
|
146
|
+
if (!f || typeof f !== 'object') continue;
|
|
147
|
+
if (!/critical|high/.test(f.severity || '')) continue;
|
|
148
|
+
if (!isChainWorthy(f)) continue;
|
|
149
|
+
const file = f.file || f.sink?.file;
|
|
150
|
+
if (!file) continue;
|
|
151
|
+
const list = findingsByFile.get(file) || [];
|
|
152
|
+
list.push(f);
|
|
153
|
+
findingsByFile.set(file, list);
|
|
154
|
+
}
|
|
155
|
+
const chains = [];
|
|
156
|
+
for (const [topic, prodList] of producers) {
|
|
157
|
+
const consList = consumers.get(topic);
|
|
158
|
+
if (!consList) continue;
|
|
159
|
+
for (const prod of prodList) {
|
|
160
|
+
for (const cons of consList) {
|
|
161
|
+
// For each producer, see if the consumer file has high+ findings.
|
|
162
|
+
const consFindings = findingsByFile.get(cons.file) || [];
|
|
163
|
+
for (const consF of consFindings) {
|
|
164
|
+
chains.push(_chainFinding({
|
|
165
|
+
origin: prod, target: cons, topic, sourceFinding: consF, dir: 'producer->consumer',
|
|
166
|
+
}));
|
|
167
|
+
}
|
|
168
|
+
const prodFindings = findingsByFile.get(prod.file) || [];
|
|
169
|
+
for (const prodF of prodFindings) {
|
|
170
|
+
chains.push(_chainFinding({
|
|
171
|
+
origin: cons, target: prod, topic, sourceFinding: prodF, dir: 'consumer->producer',
|
|
172
|
+
}));
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return chains;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function _chainFinding({ origin, target, topic, sourceFinding, dir }) {
|
|
181
|
+
return {
|
|
182
|
+
id: `xlang-queue:${origin.file}:${origin.line}->${target.file}:${target.line}:${topic}`,
|
|
183
|
+
file: origin.file,
|
|
184
|
+
line: origin.line,
|
|
185
|
+
vuln: `Cross-language taint via ${target.tech} topic '${topic}' — ${dir} — reaches ${sourceFinding.vuln}`,
|
|
186
|
+
severity: _downgradeSeverity(sourceFinding.severity),
|
|
187
|
+
cwe: sourceFinding.cwe || null,
|
|
188
|
+
parser: 'XLANG-QUEUE',
|
|
189
|
+
family: familyForBoundary('queue'), // FR-FAMILY-REGISTRY: canonical name
|
|
190
|
+
cross_language: true,
|
|
191
|
+
boundary: 'queue',
|
|
192
|
+
topic,
|
|
193
|
+
tech: target.tech,
|
|
194
|
+
confidence: 0.6,
|
|
195
|
+
source: { file: origin.file, line: origin.line, label: `${target.tech} producer (topic ${topic})` },
|
|
196
|
+
sink: { file: target.file, line: target.line, label: `${target.tech} consumer reaches ${sourceFinding.vuln}` },
|
|
197
|
+
remediation: `A tainted message written to '${topic}' is read by a handler with a high-severity finding (${sourceFinding.cwe || sourceFinding.vuln}). Validate the payload at both ends: producer should not forward unsanitized request data; consumer should treat the queue body as untrusted.`,
|
|
198
|
+
snippet: `// taint flows: ${origin.file}:${origin.line} → ${target.tech}/${topic} → ${target.file}:${target.line}`,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function _downgradeSeverity(sev) {
|
|
203
|
+
// The chain finding is informational alongside the source finding — we
|
|
204
|
+
// demote one tier so it doesn't double-count in severity bucketing.
|
|
205
|
+
const next = { critical: 'high', high: 'medium', medium: 'low', low: 'low' };
|
|
206
|
+
return next[sev || 'high'] || 'low';
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// For tests + bench tooling.
|
|
210
|
+
export function _indexQueueSites(fileContents) { return indexQueueSites(fileContents); }
|