@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. package/CHANGELOG.md +1580 -0
  2. package/bin/.agentic-security/findings.json +1577 -0
  3. package/bin/.agentic-security/last-scan.json +1577 -0
  4. package/bin/.agentic-security/last-scan.json.sig +1 -0
  5. package/bin/.agentic-security/scan-history.json +465 -0
  6. package/bin/.agentic-security/streak.json +25 -0
  7. package/bin/agentic-security-audit.js +198 -0
  8. package/bin/agentic-security-consistency.js +80 -0
  9. package/bin/agentic-security-diff.js +136 -0
  10. package/bin/agentic-security-lsp.js +12 -0
  11. package/bin/agentic-security-mcp.js +40 -0
  12. package/bin/agentic-security-rule.js +153 -0
  13. package/bin/agentic-security.js +1683 -0
  14. package/dist/117.index.js +207 -0
  15. package/dist/178.index.js +250 -0
  16. package/dist/218.index.js +793 -0
  17. package/dist/227.index.js +192 -0
  18. package/dist/301.index.js +167 -0
  19. package/dist/384.index.js +18 -0
  20. package/dist/476.index.js +126 -0
  21. package/dist/513.index.js +373 -0
  22. package/dist/520.index.js +13 -0
  23. package/dist/601.index.js +1038 -0
  24. package/dist/634.index.js +1892 -0
  25. package/dist/637.index.js +216 -0
  26. package/dist/660.index.js +131 -0
  27. package/dist/675.index.js +451 -0
  28. package/dist/826.index.js +188 -0
  29. package/dist/830.index.js +133 -0
  30. package/dist/agentic-security.mjs +272 -0
  31. package/dist/agentic-security.mjs.sha256 +1 -0
  32. package/dist/calibration-seed.json +27 -0
  33. package/package.json +77 -0
  34. package/src/.agentic-security/findings.json +80844 -0
  35. package/src/.agentic-security/last-scan.json +80844 -0
  36. package/src/.agentic-security/last-scan.json.sig +1 -0
  37. package/src/.agentic-security/scan-history.json +8408 -0
  38. package/src/.agentic-security/streak.json +26 -0
  39. package/src/badge.js +188 -0
  40. package/src/compare.js +203 -0
  41. package/src/dataflow/.agentic-security/findings.json +3487 -0
  42. package/src/dataflow/.agentic-security/last-scan.json +3487 -0
  43. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  44. package/src/dataflow/.agentic-security/scan-history.json +735 -0
  45. package/src/dataflow/.agentic-security/streak.json +24 -0
  46. package/src/dataflow/CLAUDE.md +38 -0
  47. package/src/dataflow/access-paths.js +172 -0
  48. package/src/dataflow/async-sequencing.js +177 -0
  49. package/src/dataflow/backward.js +201 -0
  50. package/src/dataflow/catalog-expanded.js +485 -0
  51. package/src/dataflow/catalog.js +659 -0
  52. package/src/dataflow/cross-repo.js +219 -0
  53. package/src/dataflow/engine.js +588 -0
  54. package/src/dataflow/exception-flow.js +116 -0
  55. package/src/dataflow/exploit-prover.js +187 -0
  56. package/src/dataflow/higher-order.js +221 -0
  57. package/src/dataflow/ifds.js +347 -0
  58. package/src/dataflow/implicit-flow.js +129 -0
  59. package/src/dataflow/incremental.js +229 -0
  60. package/src/dataflow/index.js +181 -0
  61. package/src/dataflow/numeric-domain.js +192 -0
  62. package/src/dataflow/path-feasibility.js +114 -0
  63. package/src/dataflow/points-to.js +337 -0
  64. package/src/dataflow/polyglot.js +190 -0
  65. package/src/dataflow/proven-clean.js +159 -0
  66. package/src/dataflow/receiver-context.js +76 -0
  67. package/src/dataflow/sanitizer-proof.js +154 -0
  68. package/src/dataflow/soft-taint.js +140 -0
  69. package/src/dataflow/string-domain.js +234 -0
  70. package/src/dataflow/stub-aware-filter.js +100 -0
  71. package/src/dataflow/summaries.js +132 -0
  72. package/src/dataflow/symbolic-exec.js +238 -0
  73. package/src/dataflow/tabulation.js +135 -0
  74. package/src/engine.js +7763 -0
  75. package/src/history-scan.js +229 -0
  76. package/src/index.js +3 -0
  77. package/src/integrations/.agentic-security/findings.json +1504 -0
  78. package/src/integrations/.agentic-security/last-scan.json +1504 -0
  79. package/src/integrations/.agentic-security/scan-history.json +40 -0
  80. package/src/integrations/.agentic-security/streak.json +21 -0
  81. package/src/integrations/index.js +321 -0
  82. package/src/integrations/tickets.js +200 -0
  83. package/src/ir/.agentic-security/findings.json +3036 -0
  84. package/src/ir/.agentic-security/last-scan.json +3036 -0
  85. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  86. package/src/ir/.agentic-security/scan-history.json +364 -0
  87. package/src/ir/.agentic-security/streak.json +23 -0
  88. package/src/ir/CLAUDE.md +172 -0
  89. package/src/ir/callgraph.js +73 -0
  90. package/src/ir/class-hierarchy.js +195 -0
  91. package/src/ir/index.js +152 -0
  92. package/src/ir/parser-cs.js +260 -0
  93. package/src/ir/parser-java.js +286 -0
  94. package/src/ir/parser-js.js +413 -0
  95. package/src/ir/parser-kt.js +258 -0
  96. package/src/ir/parser-py-cst.js +136 -0
  97. package/src/ir/parser-py.helper.py +501 -0
  98. package/src/ir/parser-py.js +312 -0
  99. package/src/ir/ssa.js +315 -0
  100. package/src/ir/type-stubs.js +288 -0
  101. package/src/leaderboard.js +152 -0
  102. package/src/llm-validator/.agentic-security/findings.json +1891 -0
  103. package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
  104. package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
  105. package/src/llm-validator/.agentic-security/scan-history.json +168 -0
  106. package/src/llm-validator/.agentic-security/streak.json +20 -0
  107. package/src/llm-validator/consistency.js +141 -0
  108. package/src/llm-validator/index.js +437 -0
  109. package/src/lsp/.agentic-security/findings.json +28 -0
  110. package/src/lsp/.agentic-security/last-scan.json +28 -0
  111. package/src/lsp/.agentic-security/scan-history.json +79 -0
  112. package/src/lsp/.agentic-security/streak.json +22 -0
  113. package/src/lsp/server.js +275 -0
  114. package/src/mcp/.agentic-security/findings.json +8358 -0
  115. package/src/mcp/.agentic-security/last-scan.json +8358 -0
  116. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  117. package/src/mcp/.agentic-security/scan-history.json +1125 -0
  118. package/src/mcp/.agentic-security/streak.json +22 -0
  119. package/src/mcp/CLAUDE.md +54 -0
  120. package/src/mcp/audit.js +136 -0
  121. package/src/mcp/redact.js +75 -0
  122. package/src/mcp/server.js +158 -0
  123. package/src/mcp/stdio.js +83 -0
  124. package/src/mcp/tools.js +940 -0
  125. package/src/mcp/validate.js +49 -0
  126. package/src/personality.js +164 -0
  127. package/src/poc-video.js +239 -0
  128. package/src/posture/.agentic-security/findings.json +51239 -0
  129. package/src/posture/.agentic-security/last-scan.json +51239 -0
  130. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  131. package/src/posture/.agentic-security/scan-history.json +5557 -0
  132. package/src/posture/.agentic-security/streak.json +24 -0
  133. package/src/posture/CLAUDE.md +42 -0
  134. package/src/posture/adversarial-self-test.js +114 -0
  135. package/src/posture/adversary-agent.js +204 -0
  136. package/src/posture/agents-memory.js +135 -0
  137. package/src/posture/ai-code-fingerprint.js +171 -0
  138. package/src/posture/aibom.js +284 -0
  139. package/src/posture/api-inventory.js +96 -0
  140. package/src/posture/attack-playbooks.js +305 -0
  141. package/src/posture/auditor-agent.js +115 -0
  142. package/src/posture/auth-posture-import.js +135 -0
  143. package/src/posture/baseline-compare.js +114 -0
  144. package/src/posture/blast-radius.js +836 -0
  145. package/src/posture/bounty-prediction.js +141 -0
  146. package/src/posture/business-logic.js +239 -0
  147. package/src/posture/calibration-drift.js +93 -0
  148. package/src/posture/calibration-seed.json +27 -0
  149. package/src/posture/calibration.js +204 -0
  150. package/src/posture/clustering.js +75 -0
  151. package/src/posture/concurrency-checker.js +265 -0
  152. package/src/posture/confidence.js +65 -0
  153. package/src/posture/container-runtime.js +149 -0
  154. package/src/posture/counterfactual.js +109 -0
  155. package/src/posture/cross-lang-graphql.js +165 -0
  156. package/src/posture/cross-lang-grpc.js +166 -0
  157. package/src/posture/cross-lang-meta.js +101 -0
  158. package/src/posture/cross-lang-openapi.js +187 -0
  159. package/src/posture/cross-lang-orm.js +153 -0
  160. package/src/posture/cross-lang-queues.js +210 -0
  161. package/src/posture/crown-jewels.js +110 -0
  162. package/src/posture/custom-rules.js +361 -0
  163. package/src/posture/cve-alert-daemon.js +433 -0
  164. package/src/posture/cve-lookup.js +129 -0
  165. package/src/posture/dead-code.js +430 -0
  166. package/src/posture/defender-agent.js +158 -0
  167. package/src/posture/deploy-platform.js +204 -0
  168. package/src/posture/detector-fuzz.js +61 -0
  169. package/src/posture/deterministic.js +99 -0
  170. package/src/posture/drift.js +165 -0
  171. package/src/posture/epss.js +156 -0
  172. package/src/posture/exploitability-probability.js +212 -0
  173. package/src/posture/exploitability.js +121 -0
  174. package/src/posture/feature-flags.js +110 -0
  175. package/src/posture/finding-defaults.js +132 -0
  176. package/src/posture/fix-history.js +411 -0
  177. package/src/posture/fix-plan.js +121 -0
  178. package/src/posture/fix-verify-loop.js +157 -0
  179. package/src/posture/fix-verify.js +130 -0
  180. package/src/posture/flow-narration.js +105 -0
  181. package/src/posture/grader-calibration.js +156 -0
  182. package/src/posture/harness-discovery.js +113 -0
  183. package/src/posture/holdout-eval.js +144 -0
  184. package/src/posture/iac-reachability.js +163 -0
  185. package/src/posture/iam-policy.js +128 -0
  186. package/src/posture/integrity.js +97 -0
  187. package/src/posture/learning.js +166 -0
  188. package/src/posture/license-policy.js +109 -0
  189. package/src/posture/llm-redteam-prompts.js +418 -0
  190. package/src/posture/llm-redteam.js +303 -0
  191. package/src/posture/material-change.js +163 -0
  192. package/src/posture/mitigation-composite.js +55 -0
  193. package/src/posture/mttr.js +91 -0
  194. package/src/posture/network-policy-import.js +126 -0
  195. package/src/posture/path-predicates.js +99 -0
  196. package/src/posture/persona-prioritization.js +153 -0
  197. package/src/posture/poc-cwe-map.js +51 -0
  198. package/src/posture/poc-generator.js +500 -0
  199. package/src/posture/policy-gate.js +174 -0
  200. package/src/posture/pre-incident-archaeology.js +110 -0
  201. package/src/posture/profile.js +93 -0
  202. package/src/posture/reachability-filter.js +42 -0
  203. package/src/posture/regression-test-gen.js +200 -0
  204. package/src/posture/reverse-blast-radius.js +110 -0
  205. package/src/posture/router.js +109 -0
  206. package/src/posture/rule-overrides.js +198 -0
  207. package/src/posture/rule-pack-signing.js +209 -0
  208. package/src/posture/rule-packs.js +143 -0
  209. package/src/posture/rule-synthesis.js +108 -0
  210. package/src/posture/ruleset-version.js +71 -0
  211. package/src/posture/sbom.js +129 -0
  212. package/src/posture/schema-aware-bridge.js +207 -0
  213. package/src/posture/security-trend.js +87 -0
  214. package/src/posture/semantic-clone.js +114 -0
  215. package/src/posture/specification-mining.js +170 -0
  216. package/src/posture/stable-id.js +75 -0
  217. package/src/posture/stack-playbook.js +229 -0
  218. package/src/posture/streak.js +249 -0
  219. package/src/posture/suppressions.js +135 -0
  220. package/src/posture/telemetry-ingest.js +112 -0
  221. package/src/posture/threat-model.js +145 -0
  222. package/src/posture/three-agent-pipeline.js +74 -0
  223. package/src/posture/triage.js +146 -0
  224. package/src/posture/trust-boundary-diagram.js +115 -0
  225. package/src/posture/type-narrowing.js +129 -0
  226. package/src/posture/validator-metrics.js +179 -0
  227. package/src/posture/verifier-ephemeral.js +118 -0
  228. package/src/posture/verifier-target.js +147 -0
  229. package/src/posture/verifier.js +257 -0
  230. package/src/posture/version.js +75 -0
  231. package/src/posture/waf-ingest.js +200 -0
  232. package/src/posture/why-fired.js +141 -0
  233. package/src/pr-comment.js +172 -0
  234. package/src/pr-delta.js +198 -0
  235. package/src/report/.agentic-security/findings.json +79 -0
  236. package/src/report/.agentic-security/last-scan.json +79 -0
  237. package/src/report/.agentic-security/last-scan.json.sig +1 -0
  238. package/src/report/.agentic-security/scan-history.json +332 -0
  239. package/src/report/.agentic-security/streak.json +23 -0
  240. package/src/report/index.js +1136 -0
  241. package/src/report/mascot.js +42 -0
  242. package/src/runScan.js +141 -0
  243. package/src/sast/.agentic-security/findings.json +5051 -0
  244. package/src/sast/.agentic-security/last-scan.json +5051 -0
  245. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  246. package/src/sast/.agentic-security/scan-history.json +788 -0
  247. package/src/sast/.agentic-security/streak.json +23 -0
  248. package/src/sast/CLAUDE.md +39 -0
  249. package/src/sast/_comment-strip.js +46 -0
  250. package/src/sast/agent-tool-escalation.js +131 -0
  251. package/src/sast/auth-provider.js +171 -0
  252. package/src/sast/authz.js +236 -0
  253. package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
  254. package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
  255. package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
  256. package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
  257. package/src/sast/bench-shape/index.js +62 -0
  258. package/src/sast/claude-hook-injection.js +199 -0
  259. package/src/sast/claude-md-prompt-injection.js +170 -0
  260. package/src/sast/claude-settings.js +165 -0
  261. package/src/sast/client-side.js +149 -0
  262. package/src/sast/cpp-bench-extras.js +122 -0
  263. package/src/sast/cpp-dataflow.js +430 -0
  264. package/src/sast/cpp.js +248 -0
  265. package/src/sast/csharp.js +152 -0
  266. package/src/sast/csrf.js +82 -0
  267. package/src/sast/dart-flutter.js +173 -0
  268. package/src/sast/db-rls.js +147 -0
  269. package/src/sast/db-taint.js +215 -0
  270. package/src/sast/defi-deep.js +242 -0
  271. package/src/sast/deserialization-gadgets.js +113 -0
  272. package/src/sast/django-hardening.js +230 -0
  273. package/src/sast/env-hygiene.js +125 -0
  274. package/src/sast/fastapi-hardening.js +145 -0
  275. package/src/sast/go-extended.js +84 -0
  276. package/src/sast/host-header.js +106 -0
  277. package/src/sast/index.js +17 -0
  278. package/src/sast/java-ast-folding.js +561 -0
  279. package/src/sast/java-bench-extras.js +708 -0
  280. package/src/sast/java-collection-passthrough.js +178 -0
  281. package/src/sast/java-constant-fold.js +244 -0
  282. package/src/sast/java-deserialization.js +125 -0
  283. package/src/sast/jndi.js +104 -0
  284. package/src/sast/juliet-shape.js +324 -0
  285. package/src/sast/jwt-exp.js +104 -0
  286. package/src/sast/kotlin.js +82 -0
  287. package/src/sast/laravel-hardening.js +198 -0
  288. package/src/sast/ldap-injection.js +100 -0
  289. package/src/sast/llm-owasp.js +465 -0
  290. package/src/sast/llm-stored-prompt.js +103 -0
  291. package/src/sast/llm-trading-agent.js +161 -0
  292. package/src/sast/llm.js +308 -0
  293. package/src/sast/logic.js +140 -0
  294. package/src/sast/mass-assignment.js +101 -0
  295. package/src/sast/mcp-audit.js +242 -0
  296. package/src/sast/mobile-manifest.js +195 -0
  297. package/src/sast/model-load.js +164 -0
  298. package/src/sast/mutation-xss.js +87 -0
  299. package/src/sast/nosql-injection.js +82 -0
  300. package/src/sast/open-redirect.js +119 -0
  301. package/src/sast/php.js +91 -0
  302. package/src/sast/pipeline.js +122 -0
  303. package/src/sast/primary-cwe-java.js +155 -0
  304. package/src/sast/prompt-firewall.js +151 -0
  305. package/src/sast/prompt-template.js +157 -0
  306. package/src/sast/prototype-pollution.js +112 -0
  307. package/src/sast/python-sinks.js +195 -0
  308. package/src/sast/quarkus-hardening.js +102 -0
  309. package/src/sast/rag-poisoning.js +118 -0
  310. package/src/sast/rate-limit.js +128 -0
  311. package/src/sast/response-splitting.js +138 -0
  312. package/src/sast/ruby.js +108 -0
  313. package/src/sast/rust.js +105 -0
  314. package/src/sast/solidity.js +167 -0
  315. package/src/sast/springboot-hardening.js +186 -0
  316. package/src/sast/ssrf-cloud-metadata.js +80 -0
  317. package/src/sast/ssti.js +116 -0
  318. package/src/sast/swift.js +162 -0
  319. package/src/sast/toctou.js +95 -0
  320. package/src/sast/webhook.js +101 -0
  321. package/src/sast/xpath-injection.js +51 -0
  322. package/src/sast/xxe.js +140 -0
  323. package/src/sast/zip-slip.js +200 -0
  324. package/src/sca/base-images.json +45 -0
  325. package/src/sca/container.js +107 -0
  326. package/src/sca/dep-confusion.js +134 -0
  327. package/src/sca/index.js +6 -0
  328. package/src/sca/popular-packages.json +41 -0
  329. package/src/sca/sarif-ingest.js +187 -0
  330. package/src/sca/vuln-function-hints.json +89 -0
  331. package/src/secrets/index.js +4 -0
@@ -0,0 +1,101 @@
1
+ // Cross-language chain metadata (FR-CHAIN-FILTER + FR-FAMILY-REGISTRY).
2
+ //
3
+ // Phase-1 polyglot bench revealed two issues with the cross-language chain
4
+ // detectors:
5
+ //
6
+ // 1. Chains fired on ANY high-severity finding in the linked file. That
7
+ // included CSRF, header-hardening, body-parser DoS — incidental issues
8
+ // that have nothing to do with what flows across the language boundary.
9
+ // The chain was semantically wrong.
10
+ //
11
+ // 2. Chain findings got auto-slugged family names like
12
+ // `cross-language-taint-client-call-post-us` (truncated to 40 chars).
13
+ // Ugly, brittle, and useless for filtering downstream.
14
+ //
15
+ // Both fixed here. The cross-lang-* modules import these helpers; the
16
+ // helpers are tested in isolation so the contract is auditable.
17
+
18
+ // ─── FR-CHAIN-FILTER ────────────────────────────────────────────────────────
19
+ //
20
+ // Only emit a cross-language chain when the linked finding is in a family
21
+ // that propagates meaningfully across a service boundary. CSRF on the OTHER
22
+ // side of a queue tells you nothing useful; SQL injection does.
23
+
24
+ const CHAIN_WORTHY_FAMILIES = new Set([
25
+ 'sql-injection',
26
+ 'command-injection',
27
+ 'xss',
28
+ 'ssrf',
29
+ 'code-injection',
30
+ 'insecure-deserialization',
31
+ 'xxe',
32
+ 'path-traversal',
33
+ 'jndi-injection',
34
+ 'ldap-injection',
35
+ 'xpath-injection',
36
+ 'nosql-injection',
37
+ 'ssti',
38
+ 'idor', // ownership flows across language boundary
39
+ 'mass-assignment', // request-body taint flows
40
+ 'prototype-pollution', // pollution flows through JSON
41
+ ]);
42
+
43
+ // Substring patterns we'll treat as chain-worthy when finding.family is not set.
44
+ // Lets callers (especially unit tests) pass minimal finding objects without
45
+ // requiring the dedupe pipeline to have stamped family first.
46
+ const CHAIN_WORTHY_VULN_PATTERNS = [
47
+ /\bSQL Injection\b/i, /\bCommand Injection\b/i, /\bXSS\b/i, /\bSSRF\b/i,
48
+ /\bCode Injection\b/i, /\bDeserialization\b/i, /\bXXE\b/i,
49
+ /\bPath Traversal\b/i, /\bJNDI\b/i, /\bLDAP Injection\b/i,
50
+ /\bXPath Injection\b/i, /\bNoSQL Injection\b/i, /\bSSTI\b/i,
51
+ /\bIDOR\b/i, /\bMass Assignment\b/i, /\bPrototype Pollution\b/i,
52
+ ];
53
+
54
+ /**
55
+ * Is this finding eligible to be the "tail" of a cross-language chain?
56
+ * Returns true only for families whose taint genuinely propagates across
57
+ * a service boundary. Falls back to a vuln-string substring check when
58
+ * the finding object has no `family` field yet.
59
+ */
60
+ export function isChainWorthy(finding) {
61
+ if (!finding || typeof finding !== 'object') return false;
62
+ const fam = finding.family;
63
+ if (fam) return CHAIN_WORTHY_FAMILIES.has(fam);
64
+ const vuln = finding.vuln;
65
+ if (typeof vuln !== 'string') return false;
66
+ return CHAIN_WORTHY_VULN_PATTERNS.some(re => re.test(vuln));
67
+ }
68
+
69
+ /**
70
+ * Filter a list of high-severity findings down to the chain-worthy ones.
71
+ */
72
+ export function chainWorthyFindings(findings) {
73
+ if (!Array.isArray(findings)) return [];
74
+ return findings.filter(isChainWorthy);
75
+ }
76
+
77
+ // ─── FR-FAMILY-REGISTRY ─────────────────────────────────────────────────────
78
+ //
79
+ // Each cross-language detector has a canonical family name. Reports filter
80
+ // by these stable strings instead of an auto-slug of the chain's vuln text.
81
+
82
+ export const XLANG_FAMILIES = Object.freeze({
83
+ openapi: 'xlang-openapi',
84
+ grpc: 'xlang-grpc',
85
+ graphql: 'xlang-graphql',
86
+ queue: 'xlang-queue',
87
+ orm: 'xlang-orm',
88
+ iac: 'xlang-iac',
89
+ });
90
+
91
+ /**
92
+ * Resolve the canonical family for a cross-language chain by the boundary
93
+ * type that produced it. Detectors call this when emitting chain findings.
94
+ */
95
+ export function familyForBoundary(boundary) {
96
+ if (typeof boundary !== 'string') return 'xlang-unknown';
97
+ return XLANG_FAMILIES[boundary] || 'xlang-unknown';
98
+ }
99
+
100
+ // For tests + the no-dead-modules check.
101
+ export const _internals = { CHAIN_WORTHY_FAMILIES };
@@ -0,0 +1,187 @@
1
+ import { isChainWorthy, familyForBoundary } from './cross-lang-meta.js';
2
+
3
+ // OpenAPI-aware cross-language taint propagation (Sentinel-parity FR-X-1).
4
+ //
5
+ // First-cut implementation: when an openapi.json / openapi.yaml is present in
6
+ // the scan root, build a map from (method, path) → endpoint description. For
7
+ // any client-side fetch/axios/requests call whose URL matches a known
8
+ // endpoint AND whose response is then passed to a sink (SQL, exec, write,
9
+ // innerHTML), emit a `cross_language: true` finding that ties the client
10
+ // site to the server route as a chain.
11
+ //
12
+ // Conservative on purpose: only flow taint when BOTH endpoints are
13
+ // unambiguously mapped. Ambiguous matches produce zero findings rather than
14
+ // false positives.
15
+ //
16
+ // Out of scope (deferred to a follow-up): gRPC .proto introspection, GraphQL
17
+ // resolver-to-resolver tracking, SQL/ORM round-trip, message queues.
18
+
19
+ import * as yaml from 'js-yaml';
20
+
21
+ function loadOpenAPI(fileContents) {
22
+ for (const [fp, c] of Object.entries(fileContents || {})) {
23
+ const base = fp.split('/').pop().toLowerCase();
24
+ if (!/openapi\.(?:ya?ml|json)$|swagger\.(?:ya?ml|json)$/.test(base)) continue;
25
+ try {
26
+ const doc = /\.json$/i.test(base) ? JSON.parse(c) : yaml.load(c);
27
+ if (doc && doc.paths) return { doc, file: fp };
28
+ } catch { /* ignore */ }
29
+ }
30
+ return null;
31
+ }
32
+
33
+ function endpoints(doc) {
34
+ const out = [];
35
+ if (!doc || !doc.paths) return out;
36
+ for (const [p, methods] of Object.entries(doc.paths)) {
37
+ for (const m of Object.keys(methods)) {
38
+ if (!/^(?:get|post|put|patch|delete|options|head)$/i.test(m)) continue;
39
+ // staticPrefix = the literal prefix before the first {param} or :param.
40
+ // Used to match client URLs that look like '/users/' + id where the only
41
+ // static piece is the prefix.
42
+ const staticPrefix = p.split(/\{|:/)[0];
43
+ out.push({
44
+ method: m.toUpperCase(),
45
+ path: p,
46
+ staticPrefix,
47
+ urlRegex: new RegExp(
48
+ '^' +
49
+ p.replace(/[.+^$()|[\]\\]/g, '\\$&')
50
+ .replace(/\{[^}]+\}/g, '[^/?#]+')
51
+ .replace(/:[A-Za-z_][\w]*/g, '[^/?#]+') +
52
+ '$'
53
+ ),
54
+ });
55
+ }
56
+ }
57
+ return out;
58
+ }
59
+
60
+ function urlMatchesEndpoint(url, ep) {
61
+ const clean = url.replace(/^https?:\/\/[^/]+/, '').split('?')[0];
62
+ if (ep.urlRegex.test(clean)) return true;
63
+ // Soft match: client URL is a concat — the literal we see is just the
64
+ // static prefix up to a path parameter.
65
+ if (ep.staticPrefix && ep.staticPrefix.length >= 3 && clean === ep.staticPrefix) return true;
66
+ // Also match when the client wrote the path WITH a templated placeholder.
67
+ if (clean === ep.path) return true;
68
+ return false;
69
+ }
70
+
71
+ // Find client-side HTTP calls that match an OpenAPI endpoint.
72
+ // Returns Array<{ file, line, method, path, snippet }>
73
+ function clientCalls(fileContents, eps) {
74
+ const CALL_RE = /\b(?:fetch|axios(?:\.(?:get|post|put|patch|delete))?|requests\.(?:get|post|put|patch|delete)|http\.request|urllib\.request\.urlopen)\s*\(\s*([`'"])([^`'"]+)\1/g;
75
+ const out = [];
76
+ for (const [fp, c] of Object.entries(fileContents || {})) {
77
+ if (!c || typeof c !== 'string') continue;
78
+ if (c.length > 500_000) continue;
79
+ let m;
80
+ const r = new RegExp(CALL_RE.source, CALL_RE.flags);
81
+ while ((m = r.exec(c))) {
82
+ const url = m[2];
83
+ const ep = eps.find(e => urlMatchesEndpoint(url, e));
84
+ if (!ep) continue;
85
+ const line = c.substring(0, m.index).split('\n').length;
86
+ out.push({
87
+ file: fp, line,
88
+ method: ep.method, path: ep.path,
89
+ snippet: (c.split('\n')[line - 1] || '').trim().slice(0, 200),
90
+ });
91
+ }
92
+ }
93
+ return out;
94
+ }
95
+
96
+ // Match an endpoint to its server-side route handler. Looks for express's
97
+ // app.METHOD(path, ...) or fastapi's @app.METHOD(path) or Flask's @app.route.
98
+ function serverRoutes(fileContents, eps) {
99
+ const ROUTE_RE = [
100
+ // Express / Fastify / Koa
101
+ { lang: 'js', re: /\b(?:app|router|server|fastify)\s*\.\s*(get|post|put|patch|delete)\s*\(\s*([`'"])([^`'"]+)\2/gi },
102
+ // FastAPI
103
+ { lang: 'py', re: /@\w+\s*\.\s*(get|post|put|patch|delete)\s*\(\s*([`'"])([^`'"]+)\2/gi },
104
+ // Flask. The `methods=['POST']` part needs to anchor on the opening quote
105
+ // of the method literal, otherwise `[^\]]*` greedy-consumes most of it
106
+ // and only the last letter ends up in the capture group.
107
+ { lang: 'py', re: /@(?:app|bp|blueprint)\s*\.\s*route\s*\(\s*([`'"])([^`'"]+)\1[^)]*methods\s*=\s*\[\s*['"]([A-Z]+)/gi },
108
+ ];
109
+ const out = [];
110
+ for (const [fp, c] of Object.entries(fileContents || {})) {
111
+ if (!c || typeof c !== 'string') continue;
112
+ if (c.length > 500_000) continue;
113
+ for (const { lang, re } of ROUTE_RE) {
114
+ if (lang === 'js' && !/\.(?:js|jsx|ts|tsx|mjs|cjs)$/i.test(fp)) continue;
115
+ if (lang === 'py' && !/\.py$/i.test(fp)) continue;
116
+ const r = new RegExp(re.source, re.flags);
117
+ let m;
118
+ while ((m = r.exec(c))) {
119
+ let method, urlPattern;
120
+ if (re === ROUTE_RE[2].re) { urlPattern = m[2]; method = (m[3] || 'GET').toUpperCase(); }
121
+ else { method = (m[1] || '').toUpperCase(); urlPattern = m[3]; }
122
+ const ep = eps.find(e =>
123
+ e.method === method && urlMatchesEndpoint(urlPattern, e));
124
+ if (!ep) continue;
125
+ const line = c.substring(0, m.index).split('\n').length;
126
+ out.push({ file: fp, line, method, path: ep.path });
127
+ }
128
+ }
129
+ }
130
+ return out;
131
+ }
132
+
133
+ // Top-level: returns Finding[] describing client-side calls whose response is
134
+ // returned from a server-side handler that itself has tainted-input findings.
135
+ export function scanCrossLangOpenAPI(fileContents, existingFindings) {
136
+ const oa = loadOpenAPI(fileContents);
137
+ if (!oa) return [];
138
+ const eps = endpoints(oa.doc);
139
+ if (eps.length === 0) return [];
140
+ const callers = clientCalls(fileContents, eps);
141
+ if (callers.length === 0) return [];
142
+ const handlers = serverRoutes(fileContents, eps);
143
+ if (handlers.length === 0) return [];
144
+
145
+ // Index existing findings by file. A handler is "tainted-output" if any
146
+ // critical/high finding sits in its file AND it's chain-worthy
147
+ // (FR-CHAIN-FILTER) — CSRF, header-hardening etc. don't propagate across
148
+ // a service boundary in a useful way.
149
+ const findingsByFile = new Map();
150
+ for (const f of existingFindings || []) {
151
+ if (!f.file) continue;
152
+ if (!/critical|high/i.test(f.severity || '')) continue;
153
+ if (!isChainWorthy(f)) continue;
154
+ if (!findingsByFile.has(f.file)) findingsByFile.set(f.file, []);
155
+ findingsByFile.get(f.file).push(f);
156
+ }
157
+
158
+ const findings = [];
159
+ for (const c of callers) {
160
+ const matching = handlers.filter(h => h.method === c.method && h.path === c.path);
161
+ for (const h of matching) {
162
+ const fs = findingsByFile.get(h.file) || [];
163
+ if (!fs.length) continue;
164
+ const seed = fs[0];
165
+ findings.push({
166
+ id: `xlang-openapi:${c.file}:${c.line}:${h.method}-${h.path}`,
167
+ file: c.file, line: c.line,
168
+ vuln: `Cross-Language Taint: client call → ${h.method} ${h.path} (server handler in ${h.file}:${h.line} has a ${seed.severity} finding)`,
169
+ severity: 'high',
170
+ cwe: seed.cwe || 'CWE-862',
171
+ stride: 'Information Disclosure',
172
+ snippet: c.snippet,
173
+ remediation: `The server-side handler for ${h.method} ${h.path} (${h.file}:${h.line}) has unaddressed ${seed.severity}-severity findings — specifically "${seed.vuln}". A response from that handler that flows into a client-side sink (innerHTML, eval, exec) propagates the underlying issue. Fix the server-side finding first.`,
174
+ parser: 'XLANG-OPENAPI',
175
+ family: familyForBoundary('openapi'), // FR-FAMILY-REGISTRY
176
+ confidence: 0.65,
177
+ cross_language: true,
178
+ chain: [
179
+ { file: c.file, line: c.line, label: 'client-call' },
180
+ { file: h.file, line: h.line, label: `${h.method} ${h.path}` },
181
+ { file: seed.file, line: seed.line, label: seed.vuln },
182
+ ],
183
+ });
184
+ }
185
+ }
186
+ return findings;
187
+ }
@@ -0,0 +1,153 @@
1
+ import { isChainWorthy, familyForBoundary } from './cross-lang-meta.js';
2
+
3
+ // SQL / ORM round-trip taint (Sentinel-parity FR-DET-3).
4
+ //
5
+ // When a tainted value is written to column C of table T via an ORM `create`
6
+ // or `update`, subsequent reads of T.C are tainted — the database is just a
7
+ // persistence layer, not a sanitizer. This module builds a table.column→
8
+ // tainted-source registry and emits chains.
9
+ //
10
+ // Coverage:
11
+ // - JS/TS: Mongoose (.create / .save / .findOne), Sequelize, Prisma
12
+ // - Python: SQLAlchemy session.add, Django ORM .objects.create / .filter
13
+ // - Ruby: ActiveRecord Model.create / Model.where
14
+ // - Go: GORM .Create / .Where
15
+ // - PHP: Eloquent ::create / ::where
16
+ //
17
+ // The detector is necessarily heuristic; we name table+column by best-effort.
18
+
19
+ const TAINT_HINTS = /\b(req|request|ctx\.request|params|input|userInput|body|query|cookies|headers)\b/;
20
+
21
+ // Identify ORM writes that bind a literal field name to a tainted value.
22
+ // Returns [{file, line, model, field, taintHint}].
23
+ function findOrmWrites(fileContents) {
24
+ const out = [];
25
+ for (const [fp, c] of Object.entries(fileContents || {})) {
26
+ if (!c || typeof c !== 'string') continue;
27
+ if (c.length > 500_000) continue;
28
+ const lang = (fp.match(/\.([a-z]+)$/i) || [])[1] || '';
29
+ if (!/^(?:js|jsx|ts|tsx|mjs|cjs|py|rb|go|php)$/i.test(lang)) continue;
30
+
31
+ // Match patterns like Model.create({ <field>: <expr-with-taint> })
32
+ // or await Model.create({ data: { <field>: <expr> } }) (Prisma)
33
+ const reJsPyRb = /\b([A-Z]\w+)\s*\.\s*(?:create|save|update|build|insert|upsert)\s*\(\s*\{([^}]{0,500})\}/g;
34
+ let m;
35
+ while ((m = reJsPyRb.exec(c))) {
36
+ const model = m[1];
37
+ const body = m[2];
38
+ // Prisma wraps under `data: { ... }`
39
+ const prismaInner = body.match(/data\s*:\s*\{([^}]{0,400})\}/);
40
+ const fields = prismaInner ? prismaInner[1] : body;
41
+ const fieldRe = /\b(\w+)\s*:\s*([^,}\n]+)/g;
42
+ let fm;
43
+ while ((fm = fieldRe.exec(fields))) {
44
+ const field = fm[1];
45
+ const val = fm[2].trim();
46
+ if (TAINT_HINTS.test(val)) {
47
+ const line = c.substring(0, m.index).split('\n').length;
48
+ out.push({ file: fp, line, model, field, val: val.slice(0, 60) });
49
+ }
50
+ }
51
+ }
52
+ // Python kwargs: Model.objects.create(field1=value, field2=value)
53
+ const rePyKw = /\b([A-Z]\w+)\.objects\.create\s*\(([^)]{0,500})\)/g;
54
+ while ((m = rePyKw.exec(c))) {
55
+ const model = m[1];
56
+ const body = m[2];
57
+ const kwRe = /\b(\w+)\s*=\s*([^,)]+)/g;
58
+ let km;
59
+ while ((km = kwRe.exec(body))) {
60
+ const field = km[1];
61
+ const val = km[2].trim();
62
+ if (TAINT_HINTS.test(val)) {
63
+ const line = c.substring(0, m.index).split('\n').length;
64
+ out.push({ file: fp, line, model, field, val: val.slice(0, 60) });
65
+ }
66
+ }
67
+ }
68
+ // GORM: db.Create(&user) or db.Model(&User{}).Where(...).Update("col", val)
69
+ const reGormUpdate = /db\s*\.\s*(?:Model\([^)]*\)\s*\.\s*)?(?:Where[^.]*\.\s*)?Update\s*\(\s*"(\w+)"\s*,\s*([^)]+)\)/g;
70
+ while ((m = reGormUpdate.exec(c))) {
71
+ const field = m[1];
72
+ const val = m[2].trim();
73
+ if (TAINT_HINTS.test(val)) {
74
+ const line = c.substring(0, m.index).split('\n').length;
75
+ out.push({ file: fp, line, model: '<gorm>', field, val: val.slice(0, 60) });
76
+ }
77
+ }
78
+ }
79
+ return out;
80
+ }
81
+
82
+ // Find ORM reads — any Model.findX / Model.where / Model.filter / db.Query / etc.
83
+ // that READS column C and BINDS its value into a downstream use.
84
+ // Returns [{file, line, model, field}] candidate read sites.
85
+ function findOrmReads(fileContents) {
86
+ const out = [];
87
+ for (const [fp, c] of Object.entries(fileContents || {})) {
88
+ if (!c || typeof c !== 'string') continue;
89
+ if (c.length > 500_000) continue;
90
+ // Match Model.findX(...) / Model.objects.filter(...) / Model.where(...)
91
+ const reRead = /\b([A-Z]\w+)\s*\.\s*(?:findOne|findAll|findBy\w*|find|findById|first|last|where|filter|objects\.get|objects\.filter|objects\.all)\s*\(/g;
92
+ let m;
93
+ while ((m = reRead.exec(c))) {
94
+ const model = m[1];
95
+ const line = c.substring(0, m.index).split('\n').length;
96
+ out.push({ file: fp, line, model });
97
+ }
98
+ }
99
+ return out;
100
+ }
101
+
102
+ export function scanCrossLangOrm(fileContents, existingFindings) {
103
+ const writes = findOrmWrites(fileContents);
104
+ if (writes.length === 0) return [];
105
+ const reads = findOrmReads(fileContents);
106
+ if (reads.length === 0) return [];
107
+
108
+ // Index sinks: collect sink lines + their snippets from existing findings.
109
+ const sinksByFile = new Map();
110
+ for (const f of existingFindings || []) {
111
+ const sink = f.sink || f;
112
+ const file = sink.file || f.file;
113
+ const line = sink.line || f.line;
114
+ if (!file || !line) continue;
115
+ if (!/critical|high|medium/i.test(f.severity || '')) continue;
116
+ if (!isChainWorthy(f)) continue; // FR-CHAIN-FILTER
117
+ if (!sinksByFile.has(file)) sinksByFile.set(file, []);
118
+ sinksByFile.get(file).push({ line, vuln: f.vuln, severity: f.severity });
119
+ }
120
+
121
+ const out = [];
122
+ for (const w of writes) {
123
+ // Find any READ of the same Model — anywhere in the project.
124
+ const readers = reads.filter(r => r.model === w.model);
125
+ if (!readers.length) continue;
126
+ for (const r of readers) {
127
+ // Check if there's a sink in the reader's file near the read line.
128
+ const sinksInReadFile = sinksByFile.get(r.file) || [];
129
+ const nearby = sinksInReadFile.filter(s => Math.abs((s.line || 0) - r.line) <= 20);
130
+ if (!nearby.length) continue;
131
+ const seed = nearby[0];
132
+ out.push({
133
+ id: `xlang-orm:${w.file}:${w.line}->${r.file}:${r.line}`,
134
+ file: r.file, line: r.line,
135
+ vuln: `Cross-Language Taint (ORM round-trip): ${w.model}.${w.field} written tainted at ${w.file}:${w.line} → read at ${r.file}:${r.line} → reaches ${seed.severity} sink`,
136
+ severity: 'medium',
137
+ cwe: 'CWE-89',
138
+ snippet: `(round-trip via ${w.model}.${w.field})`,
139
+ remediation: `A tainted value is written to ${w.model}.${w.field} at ${w.file}:${w.line} and read at ${r.file}:${r.line}, then flows into "${seed.vuln}". The DB doesn't sanitize — coerce/validate the value on write OR on read, ideally both. For Mongo, ensure the value is a primitive (String(...)) before write; for SQL, parameterize the downstream sink.`,
140
+ parser: 'XLANG-ORM',
141
+ family: familyForBoundary('orm'), // FR-FAMILY-REGISTRY
142
+ confidence: 0.55,
143
+ cross_language: true,
144
+ chain: [
145
+ { file: w.file, line: w.line, label: `write ${w.model}.${w.field}` },
146
+ { file: r.file, line: r.line, label: `read ${w.model}` },
147
+ { file: r.file, line: seed.line, label: seed.vuln },
148
+ ],
149
+ });
150
+ }
151
+ }
152
+ return out;
153
+ }
@@ -0,0 +1,210 @@
1
+ import { isChainWorthy, familyForBoundary } from './cross-lang-meta.js';
2
+
3
+ // Cross-language message-queue taint propagation (FR-XSAT-4 — P1.5).
4
+ //
5
+ // When a project ships producer and consumer code for the same message
6
+ // queue (Kafka topic, AWS SQS queue, RabbitMQ exchange, Redis stream,
7
+ // Google Pub/Sub topic), tainted data flowing into the producer carries
8
+ // through the queue and emerges in the consumer's hand. The engine pairs
9
+ // producer call sites with consumer handlers by topic name; when either
10
+ // end has a high+ finding, we emit a `cross_language: true` chain finding
11
+ // at the OTHER end so engineers see the transitive flow.
12
+ //
13
+ // This module is deliberately conservative: we only emit chains when the
14
+ // producer and consumer agree on the topic name (string literal match).
15
+ // Constant-folded topic names (variables, env vars) get a `topic: 'inferred'`
16
+ // tag and lower confidence rather than dropped — that's the precision/recall
17
+ // trade-off documented in the parent PRD's Pillar-6 honesty commitments.
18
+ //
19
+ // Detectors per queue tech:
20
+ // Kafka — kafkajs (Node), confluent-kafka (Python), kafka-clients (Java), sarama (Go)
21
+ // AWS SQS — aws-sdk (Node), boto3 (Python), aws-sdk-java
22
+ // RabbitMQ — amqplib (Node), pika (Python), RabbitTemplate (Spring)
23
+ // Redis Streams — redis (xadd/xread) — Node/Python/Java/Go
24
+ // Google Pub/Sub — @google-cloud/pubsub (Node), google-cloud-pubsub (Python)
25
+
26
+ // ─── Topic extraction ──────────────────────────────────────────────────────
27
+
28
+ // Each regex finds either a producer-write site or a consumer-handler site.
29
+ // Group 1 = topic name (literal or expression text).
30
+ const PRODUCER_PATTERNS = [
31
+ // Kafka
32
+ { tech: 'kafka', re: /\bproducer\s*\.\s*send\s*\(\s*\{[^}]*?topic\s*:\s*['"]([^'"]+)['"]/g },
33
+ { tech: 'kafka', re: /\bsendMessage\s*\(\s*['"]([^'"]+)['"]/g }, // kafka-clients (Java)
34
+ { tech: 'kafka', re: /producer\.send\s*\(\s*new\s+ProducerRecord\s*[<(]\s*[^,]*?,\s*['"]([^'"]+)['"]/g }, // Java
35
+ { tech: 'kafka', re: /(?:Producer|producer)\.produce\s*\(\s*['"]([^'"]+)['"]/g }, // confluent-kafka Python
36
+ // SQS
37
+ { tech: 'sqs', re: /\bsendMessage\s*\(\s*\{[^}]*?QueueUrl\s*:\s*['"][^'"]*?\/([^'"\/]+)['"]/g }, // aws-sdk node, queue URL ends with name
38
+ { tech: 'sqs', re: /\bsend_message\s*\(\s*QueueUrl\s*=\s*['"][^'"]*?\/([^'"\/]+)['"]/g }, // boto3
39
+ // RabbitMQ
40
+ { tech: 'rabbit', re: /\bpublish\s*\(\s*['"]([^'"]+)['"]/g }, // amqplib & pika common
41
+ { tech: 'rabbit', re: /rabbitTemplate\.convertAndSend\s*\(\s*['"]([^'"]+)['"]/g }, // Spring
42
+ // Redis streams (multi-language XADD shapes)
43
+ { tech: 'redis', re: /\.\s*xadd\s*\(\s*['"]([^'"]+)['"]/gi }, // node/ioredis: xadd('key', ...)
44
+ { tech: 'redis', re: /\bXADD\s+([\w:.-]+)/g }, // redis-cli style in code strings
45
+ { tech: 'redis', re: /XAddArgs\s*\{\s*Stream\s*:\s*['"]([^'"]+)['"]/g }, // go-redis: &redis.XAddArgs{Stream: "..."}
46
+ { tech: 'redis', re: /\.\s*xadd\s*\(\s*name\s*=\s*['"]([^'"]+)['"]/g }, // python redis-py: xadd(name="...")
47
+ // Google Pub/Sub
48
+ { tech: 'pubsub', re: /\btopic\s*\(\s*['"]([^'"]+)['"]\s*\)\s*\.publish/g }, // @google-cloud/pubsub
49
+ { tech: 'pubsub', re: /publisher\.publish\s*\(\s*topic_path\s*\([^,]+,\s*['"]([^'"]+)['"]/g }, // python
50
+ ];
51
+
52
+ const CONSUMER_PATTERNS = [
53
+ // Kafka
54
+ { tech: 'kafka', re: /\bconsumer\s*\.\s*subscribe\s*\(\s*\{[^}]*?topics?\s*:\s*\[?\s*['"]([^'"]+)['"]/g },
55
+ { tech: 'kafka', re: /\.\s*subscribe\s*\(\s*\[\s*['"]([^'"]+)['"]/g },
56
+ { tech: 'kafka', re: /@KafkaListener\s*\(\s*topics\s*=\s*\{?\s*['"]([^'"]+)['"]/g }, // Spring Boot
57
+ // SQS
58
+ { tech: 'sqs', re: /\bsqsClient\.receiveMessage\s*\(\s*\{[^}]*?QueueUrl\s*:\s*['"][^'"]*?\/([^'"\/]+)['"]/g },
59
+ { tech: 'sqs', re: /\breceive_message\s*\(\s*QueueUrl\s*=\s*['"][^'"]*?\/([^'"\/]+)['"]/g },
60
+ // RabbitMQ
61
+ { tech: 'rabbit', re: /\.\s*consume\s*\(\s*['"]([^'"]+)['"]/g }, // amqplib
62
+ { tech: 'rabbit', re: /\.\s*basic_consume\s*\(\s*[^,]*,\s*queue\s*=\s*['"]([^'"]+)['"]/g }, // pika
63
+ { tech: 'rabbit', re: /@RabbitListener\s*\(\s*queues\s*=\s*['"]([^'"]+)['"]/g }, // Spring
64
+ // Redis streams (multi-language XREAD shapes)
65
+ { tech: 'redis', re: /\.\s*xread(?:group)?\s*\(\s*[^)]*?streams\s*:\s*\{?\s*['"]([^'"]+)['"]/gi },
66
+ { tech: 'redis', re: /\bXREAD(?:GROUP)?\s+(?:GROUP\s+\S+\s+\S+\s+)?(?:COUNT\s+\d+\s+)?STREAMS\s+([\w:.-]+)/gi },
67
+ { tech: 'redis', re: /\.\s*xread\s*\(\s*\{[^}]*?key\s*:\s*['"]([^'"]+)['"]/gi }, // node-redis v4: xread({key:'...'})
68
+ // Google Pub/Sub
69
+ { tech: 'pubsub', re: /\bsubscription\s*\(\s*['"]([^'"]+)['"]\s*\)\s*\.on/g },
70
+ { tech: 'pubsub', re: /\bsubscriber\.subscribe\s*\(\s*subscription_path\s*\([^,]+,\s*['"]([^'"]+)['"]/g },
71
+ ];
72
+
73
+ function lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
74
+
75
+ /**
76
+ * Walk every file looking for queue producer/consumer call sites.
77
+ * Returns:
78
+ * {
79
+ * producers: Map<topic, Array<{file, line, tech}>>
80
+ * consumers: Map<topic, Array<{file, line, tech}>>
81
+ * }
82
+ *
83
+ * Topic normalization: lowercase + strip leading slashes (SQS queue URLs
84
+ * vary by region/account; we key only on the queue name segment).
85
+ */
86
+ function indexQueueSites(fileContents) {
87
+ const producers = new Map();
88
+ const consumers = new Map();
89
+ for (const [fp, c] of Object.entries(fileContents || {})) {
90
+ if (typeof c !== 'string' || c.length > 500_000) continue;
91
+ if (!_looksLikeCodeFile(fp)) continue;
92
+ for (const { tech, re } of PRODUCER_PATTERNS) {
93
+ const rx = new RegExp(re.source, re.flags);
94
+ let m;
95
+ while ((m = rx.exec(c))) {
96
+ const topic = _normTopic(m[1]);
97
+ if (!topic) continue;
98
+ const line = lineOf(c, m.index);
99
+ const arr = producers.get(topic) || [];
100
+ arr.push({ file: fp, line, tech });
101
+ producers.set(topic, arr);
102
+ }
103
+ }
104
+ for (const { tech, re } of CONSUMER_PATTERNS) {
105
+ const rx = new RegExp(re.source, re.flags);
106
+ let m;
107
+ while ((m = rx.exec(c))) {
108
+ const topic = _normTopic(m[1]);
109
+ if (!topic) continue;
110
+ const line = lineOf(c, m.index);
111
+ const arr = consumers.get(topic) || [];
112
+ arr.push({ file: fp, line, tech });
113
+ consumers.set(topic, arr);
114
+ }
115
+ }
116
+ }
117
+ return { producers, consumers };
118
+ }
119
+
120
+ function _normTopic(s) {
121
+ if (!s || typeof s !== 'string') return '';
122
+ return s.trim().toLowerCase().replace(/^\/+/, '');
123
+ }
124
+
125
+ function _looksLikeCodeFile(fp) {
126
+ return /\.(js|jsx|ts|tsx|mjs|cjs|py|java|kt|go|rb|cs|rs|php|scala|swift)$/i.test(fp);
127
+ }
128
+
129
+ // ─── Chain emission ─────────────────────────────────────────────────────────
130
+
131
+ /**
132
+ * For each (producer, consumer) pair on the same topic, look up high+ findings
133
+ * at either site and emit a chain finding at the OTHER side.
134
+ *
135
+ * Returns an array of chain findings ready to splice into finalFindings.
136
+ */
137
+ export function scanCrossLangQueues(fileContents, findings) {
138
+ const { producers, consumers } = indexQueueSites(fileContents);
139
+ if (!producers.size || !consumers.size) return [];
140
+ // Index existing findings by (file, line) for fast lookup. Only include
141
+ // chain-worthy families (FR-CHAIN-FILTER): chaining to a CSRF or
142
+ // header-hardening finding on the other side of a queue is semantically
143
+ // meaningless — taint doesn't propagate through those classes.
144
+ const findingsByFile = new Map();
145
+ for (const f of findings || []) {
146
+ if (!f || typeof f !== 'object') continue;
147
+ if (!/critical|high/.test(f.severity || '')) continue;
148
+ if (!isChainWorthy(f)) continue;
149
+ const file = f.file || f.sink?.file;
150
+ if (!file) continue;
151
+ const list = findingsByFile.get(file) || [];
152
+ list.push(f);
153
+ findingsByFile.set(file, list);
154
+ }
155
+ const chains = [];
156
+ for (const [topic, prodList] of producers) {
157
+ const consList = consumers.get(topic);
158
+ if (!consList) continue;
159
+ for (const prod of prodList) {
160
+ for (const cons of consList) {
161
+ // For each producer, see if the consumer file has high+ findings.
162
+ const consFindings = findingsByFile.get(cons.file) || [];
163
+ for (const consF of consFindings) {
164
+ chains.push(_chainFinding({
165
+ origin: prod, target: cons, topic, sourceFinding: consF, dir: 'producer->consumer',
166
+ }));
167
+ }
168
+ const prodFindings = findingsByFile.get(prod.file) || [];
169
+ for (const prodF of prodFindings) {
170
+ chains.push(_chainFinding({
171
+ origin: cons, target: prod, topic, sourceFinding: prodF, dir: 'consumer->producer',
172
+ }));
173
+ }
174
+ }
175
+ }
176
+ }
177
+ return chains;
178
+ }
179
+
180
+ function _chainFinding({ origin, target, topic, sourceFinding, dir }) {
181
+ return {
182
+ id: `xlang-queue:${origin.file}:${origin.line}->${target.file}:${target.line}:${topic}`,
183
+ file: origin.file,
184
+ line: origin.line,
185
+ vuln: `Cross-language taint via ${target.tech} topic '${topic}' — ${dir} — reaches ${sourceFinding.vuln}`,
186
+ severity: _downgradeSeverity(sourceFinding.severity),
187
+ cwe: sourceFinding.cwe || null,
188
+ parser: 'XLANG-QUEUE',
189
+ family: familyForBoundary('queue'), // FR-FAMILY-REGISTRY: canonical name
190
+ cross_language: true,
191
+ boundary: 'queue',
192
+ topic,
193
+ tech: target.tech,
194
+ confidence: 0.6,
195
+ source: { file: origin.file, line: origin.line, label: `${target.tech} producer (topic ${topic})` },
196
+ sink: { file: target.file, line: target.line, label: `${target.tech} consumer reaches ${sourceFinding.vuln}` },
197
+ remediation: `A tainted message written to '${topic}' is read by a handler with a high-severity finding (${sourceFinding.cwe || sourceFinding.vuln}). Validate the payload at both ends: producer should not forward unsanitized request data; consumer should treat the queue body as untrusted.`,
198
+ snippet: `// taint flows: ${origin.file}:${origin.line} → ${target.tech}/${topic} → ${target.file}:${target.line}`,
199
+ };
200
+ }
201
+
202
+ function _downgradeSeverity(sev) {
203
+ // The chain finding is informational alongside the source finding — we
204
+ // demote one tier so it doesn't double-count in severity bucketing.
205
+ const next = { critical: 'high', high: 'medium', medium: 'low', low: 'low' };
206
+ return next[sev || 'high'] || 'low';
207
+ }
208
+
209
+ // For tests + bench tooling.
210
+ export function _indexQueueSites(fileContents) { return indexQueueSites(fileContents); }