@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. package/CHANGELOG.md +1580 -0
  2. package/bin/.agentic-security/findings.json +1577 -0
  3. package/bin/.agentic-security/last-scan.json +1577 -0
  4. package/bin/.agentic-security/last-scan.json.sig +1 -0
  5. package/bin/.agentic-security/scan-history.json +465 -0
  6. package/bin/.agentic-security/streak.json +25 -0
  7. package/bin/agentic-security-audit.js +198 -0
  8. package/bin/agentic-security-consistency.js +80 -0
  9. package/bin/agentic-security-diff.js +136 -0
  10. package/bin/agentic-security-lsp.js +12 -0
  11. package/bin/agentic-security-mcp.js +40 -0
  12. package/bin/agentic-security-rule.js +153 -0
  13. package/bin/agentic-security.js +1683 -0
  14. package/dist/117.index.js +207 -0
  15. package/dist/178.index.js +250 -0
  16. package/dist/218.index.js +793 -0
  17. package/dist/227.index.js +192 -0
  18. package/dist/301.index.js +167 -0
  19. package/dist/384.index.js +18 -0
  20. package/dist/476.index.js +126 -0
  21. package/dist/513.index.js +373 -0
  22. package/dist/520.index.js +13 -0
  23. package/dist/601.index.js +1038 -0
  24. package/dist/634.index.js +1892 -0
  25. package/dist/637.index.js +216 -0
  26. package/dist/660.index.js +131 -0
  27. package/dist/675.index.js +451 -0
  28. package/dist/826.index.js +188 -0
  29. package/dist/830.index.js +133 -0
  30. package/dist/agentic-security.mjs +272 -0
  31. package/dist/agentic-security.mjs.sha256 +1 -0
  32. package/dist/calibration-seed.json +27 -0
  33. package/package.json +77 -0
  34. package/src/.agentic-security/findings.json +80844 -0
  35. package/src/.agentic-security/last-scan.json +80844 -0
  36. package/src/.agentic-security/last-scan.json.sig +1 -0
  37. package/src/.agentic-security/scan-history.json +8408 -0
  38. package/src/.agentic-security/streak.json +26 -0
  39. package/src/badge.js +188 -0
  40. package/src/compare.js +203 -0
  41. package/src/dataflow/.agentic-security/findings.json +3487 -0
  42. package/src/dataflow/.agentic-security/last-scan.json +3487 -0
  43. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  44. package/src/dataflow/.agentic-security/scan-history.json +735 -0
  45. package/src/dataflow/.agentic-security/streak.json +24 -0
  46. package/src/dataflow/CLAUDE.md +38 -0
  47. package/src/dataflow/access-paths.js +172 -0
  48. package/src/dataflow/async-sequencing.js +177 -0
  49. package/src/dataflow/backward.js +201 -0
  50. package/src/dataflow/catalog-expanded.js +485 -0
  51. package/src/dataflow/catalog.js +659 -0
  52. package/src/dataflow/cross-repo.js +219 -0
  53. package/src/dataflow/engine.js +588 -0
  54. package/src/dataflow/exception-flow.js +116 -0
  55. package/src/dataflow/exploit-prover.js +187 -0
  56. package/src/dataflow/higher-order.js +221 -0
  57. package/src/dataflow/ifds.js +347 -0
  58. package/src/dataflow/implicit-flow.js +129 -0
  59. package/src/dataflow/incremental.js +229 -0
  60. package/src/dataflow/index.js +181 -0
  61. package/src/dataflow/numeric-domain.js +192 -0
  62. package/src/dataflow/path-feasibility.js +114 -0
  63. package/src/dataflow/points-to.js +337 -0
  64. package/src/dataflow/polyglot.js +190 -0
  65. package/src/dataflow/proven-clean.js +159 -0
  66. package/src/dataflow/receiver-context.js +76 -0
  67. package/src/dataflow/sanitizer-proof.js +154 -0
  68. package/src/dataflow/soft-taint.js +140 -0
  69. package/src/dataflow/string-domain.js +234 -0
  70. package/src/dataflow/stub-aware-filter.js +100 -0
  71. package/src/dataflow/summaries.js +132 -0
  72. package/src/dataflow/symbolic-exec.js +238 -0
  73. package/src/dataflow/tabulation.js +135 -0
  74. package/src/engine.js +7763 -0
  75. package/src/history-scan.js +229 -0
  76. package/src/index.js +3 -0
  77. package/src/integrations/.agentic-security/findings.json +1504 -0
  78. package/src/integrations/.agentic-security/last-scan.json +1504 -0
  79. package/src/integrations/.agentic-security/scan-history.json +40 -0
  80. package/src/integrations/.agentic-security/streak.json +21 -0
  81. package/src/integrations/index.js +321 -0
  82. package/src/integrations/tickets.js +200 -0
  83. package/src/ir/.agentic-security/findings.json +3036 -0
  84. package/src/ir/.agentic-security/last-scan.json +3036 -0
  85. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  86. package/src/ir/.agentic-security/scan-history.json +364 -0
  87. package/src/ir/.agentic-security/streak.json +23 -0
  88. package/src/ir/CLAUDE.md +172 -0
  89. package/src/ir/callgraph.js +73 -0
  90. package/src/ir/class-hierarchy.js +195 -0
  91. package/src/ir/index.js +152 -0
  92. package/src/ir/parser-cs.js +260 -0
  93. package/src/ir/parser-java.js +286 -0
  94. package/src/ir/parser-js.js +413 -0
  95. package/src/ir/parser-kt.js +258 -0
  96. package/src/ir/parser-py-cst.js +136 -0
  97. package/src/ir/parser-py.helper.py +501 -0
  98. package/src/ir/parser-py.js +312 -0
  99. package/src/ir/ssa.js +315 -0
  100. package/src/ir/type-stubs.js +288 -0
  101. package/src/leaderboard.js +152 -0
  102. package/src/llm-validator/.agentic-security/findings.json +1891 -0
  103. package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
  104. package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
  105. package/src/llm-validator/.agentic-security/scan-history.json +168 -0
  106. package/src/llm-validator/.agentic-security/streak.json +20 -0
  107. package/src/llm-validator/consistency.js +141 -0
  108. package/src/llm-validator/index.js +437 -0
  109. package/src/lsp/.agentic-security/findings.json +28 -0
  110. package/src/lsp/.agentic-security/last-scan.json +28 -0
  111. package/src/lsp/.agentic-security/scan-history.json +79 -0
  112. package/src/lsp/.agentic-security/streak.json +22 -0
  113. package/src/lsp/server.js +275 -0
  114. package/src/mcp/.agentic-security/findings.json +8358 -0
  115. package/src/mcp/.agentic-security/last-scan.json +8358 -0
  116. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  117. package/src/mcp/.agentic-security/scan-history.json +1125 -0
  118. package/src/mcp/.agentic-security/streak.json +22 -0
  119. package/src/mcp/CLAUDE.md +54 -0
  120. package/src/mcp/audit.js +136 -0
  121. package/src/mcp/redact.js +75 -0
  122. package/src/mcp/server.js +158 -0
  123. package/src/mcp/stdio.js +83 -0
  124. package/src/mcp/tools.js +940 -0
  125. package/src/mcp/validate.js +49 -0
  126. package/src/personality.js +164 -0
  127. package/src/poc-video.js +239 -0
  128. package/src/posture/.agentic-security/findings.json +51239 -0
  129. package/src/posture/.agentic-security/last-scan.json +51239 -0
  130. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  131. package/src/posture/.agentic-security/scan-history.json +5557 -0
  132. package/src/posture/.agentic-security/streak.json +24 -0
  133. package/src/posture/CLAUDE.md +42 -0
  134. package/src/posture/adversarial-self-test.js +114 -0
  135. package/src/posture/adversary-agent.js +204 -0
  136. package/src/posture/agents-memory.js +135 -0
  137. package/src/posture/ai-code-fingerprint.js +171 -0
  138. package/src/posture/aibom.js +284 -0
  139. package/src/posture/api-inventory.js +96 -0
  140. package/src/posture/attack-playbooks.js +305 -0
  141. package/src/posture/auditor-agent.js +115 -0
  142. package/src/posture/auth-posture-import.js +135 -0
  143. package/src/posture/baseline-compare.js +114 -0
  144. package/src/posture/blast-radius.js +836 -0
  145. package/src/posture/bounty-prediction.js +141 -0
  146. package/src/posture/business-logic.js +239 -0
  147. package/src/posture/calibration-drift.js +93 -0
  148. package/src/posture/calibration-seed.json +27 -0
  149. package/src/posture/calibration.js +204 -0
  150. package/src/posture/clustering.js +75 -0
  151. package/src/posture/concurrency-checker.js +265 -0
  152. package/src/posture/confidence.js +65 -0
  153. package/src/posture/container-runtime.js +149 -0
  154. package/src/posture/counterfactual.js +109 -0
  155. package/src/posture/cross-lang-graphql.js +165 -0
  156. package/src/posture/cross-lang-grpc.js +166 -0
  157. package/src/posture/cross-lang-meta.js +101 -0
  158. package/src/posture/cross-lang-openapi.js +187 -0
  159. package/src/posture/cross-lang-orm.js +153 -0
  160. package/src/posture/cross-lang-queues.js +210 -0
  161. package/src/posture/crown-jewels.js +110 -0
  162. package/src/posture/custom-rules.js +361 -0
  163. package/src/posture/cve-alert-daemon.js +433 -0
  164. package/src/posture/cve-lookup.js +129 -0
  165. package/src/posture/dead-code.js +430 -0
  166. package/src/posture/defender-agent.js +158 -0
  167. package/src/posture/deploy-platform.js +204 -0
  168. package/src/posture/detector-fuzz.js +61 -0
  169. package/src/posture/deterministic.js +99 -0
  170. package/src/posture/drift.js +165 -0
  171. package/src/posture/epss.js +156 -0
  172. package/src/posture/exploitability-probability.js +212 -0
  173. package/src/posture/exploitability.js +121 -0
  174. package/src/posture/feature-flags.js +110 -0
  175. package/src/posture/finding-defaults.js +132 -0
  176. package/src/posture/fix-history.js +411 -0
  177. package/src/posture/fix-plan.js +121 -0
  178. package/src/posture/fix-verify-loop.js +157 -0
  179. package/src/posture/fix-verify.js +130 -0
  180. package/src/posture/flow-narration.js +105 -0
  181. package/src/posture/grader-calibration.js +156 -0
  182. package/src/posture/harness-discovery.js +113 -0
  183. package/src/posture/holdout-eval.js +144 -0
  184. package/src/posture/iac-reachability.js +163 -0
  185. package/src/posture/iam-policy.js +128 -0
  186. package/src/posture/integrity.js +97 -0
  187. package/src/posture/learning.js +166 -0
  188. package/src/posture/license-policy.js +109 -0
  189. package/src/posture/llm-redteam-prompts.js +418 -0
  190. package/src/posture/llm-redteam.js +303 -0
  191. package/src/posture/material-change.js +163 -0
  192. package/src/posture/mitigation-composite.js +55 -0
  193. package/src/posture/mttr.js +91 -0
  194. package/src/posture/network-policy-import.js +126 -0
  195. package/src/posture/path-predicates.js +99 -0
  196. package/src/posture/persona-prioritization.js +153 -0
  197. package/src/posture/poc-cwe-map.js +51 -0
  198. package/src/posture/poc-generator.js +500 -0
  199. package/src/posture/policy-gate.js +174 -0
  200. package/src/posture/pre-incident-archaeology.js +110 -0
  201. package/src/posture/profile.js +93 -0
  202. package/src/posture/reachability-filter.js +42 -0
  203. package/src/posture/regression-test-gen.js +200 -0
  204. package/src/posture/reverse-blast-radius.js +110 -0
  205. package/src/posture/router.js +109 -0
  206. package/src/posture/rule-overrides.js +198 -0
  207. package/src/posture/rule-pack-signing.js +209 -0
  208. package/src/posture/rule-packs.js +143 -0
  209. package/src/posture/rule-synthesis.js +108 -0
  210. package/src/posture/ruleset-version.js +71 -0
  211. package/src/posture/sbom.js +129 -0
  212. package/src/posture/schema-aware-bridge.js +207 -0
  213. package/src/posture/security-trend.js +87 -0
  214. package/src/posture/semantic-clone.js +114 -0
  215. package/src/posture/specification-mining.js +170 -0
  216. package/src/posture/stable-id.js +75 -0
  217. package/src/posture/stack-playbook.js +229 -0
  218. package/src/posture/streak.js +249 -0
  219. package/src/posture/suppressions.js +135 -0
  220. package/src/posture/telemetry-ingest.js +112 -0
  221. package/src/posture/threat-model.js +145 -0
  222. package/src/posture/three-agent-pipeline.js +74 -0
  223. package/src/posture/triage.js +146 -0
  224. package/src/posture/trust-boundary-diagram.js +115 -0
  225. package/src/posture/type-narrowing.js +129 -0
  226. package/src/posture/validator-metrics.js +179 -0
  227. package/src/posture/verifier-ephemeral.js +118 -0
  228. package/src/posture/verifier-target.js +147 -0
  229. package/src/posture/verifier.js +257 -0
  230. package/src/posture/version.js +75 -0
  231. package/src/posture/waf-ingest.js +200 -0
  232. package/src/posture/why-fired.js +141 -0
  233. package/src/pr-comment.js +172 -0
  234. package/src/pr-delta.js +198 -0
  235. package/src/report/.agentic-security/findings.json +79 -0
  236. package/src/report/.agentic-security/last-scan.json +79 -0
  237. package/src/report/.agentic-security/last-scan.json.sig +1 -0
  238. package/src/report/.agentic-security/scan-history.json +332 -0
  239. package/src/report/.agentic-security/streak.json +23 -0
  240. package/src/report/index.js +1136 -0
  241. package/src/report/mascot.js +42 -0
  242. package/src/runScan.js +141 -0
  243. package/src/sast/.agentic-security/findings.json +5051 -0
  244. package/src/sast/.agentic-security/last-scan.json +5051 -0
  245. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  246. package/src/sast/.agentic-security/scan-history.json +788 -0
  247. package/src/sast/.agentic-security/streak.json +23 -0
  248. package/src/sast/CLAUDE.md +39 -0
  249. package/src/sast/_comment-strip.js +46 -0
  250. package/src/sast/agent-tool-escalation.js +131 -0
  251. package/src/sast/auth-provider.js +171 -0
  252. package/src/sast/authz.js +236 -0
  253. package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
  254. package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
  255. package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
  256. package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
  257. package/src/sast/bench-shape/index.js +62 -0
  258. package/src/sast/claude-hook-injection.js +199 -0
  259. package/src/sast/claude-md-prompt-injection.js +170 -0
  260. package/src/sast/claude-settings.js +165 -0
  261. package/src/sast/client-side.js +149 -0
  262. package/src/sast/cpp-bench-extras.js +122 -0
  263. package/src/sast/cpp-dataflow.js +430 -0
  264. package/src/sast/cpp.js +248 -0
  265. package/src/sast/csharp.js +152 -0
  266. package/src/sast/csrf.js +82 -0
  267. package/src/sast/dart-flutter.js +173 -0
  268. package/src/sast/db-rls.js +147 -0
  269. package/src/sast/db-taint.js +215 -0
  270. package/src/sast/defi-deep.js +242 -0
  271. package/src/sast/deserialization-gadgets.js +113 -0
  272. package/src/sast/django-hardening.js +230 -0
  273. package/src/sast/env-hygiene.js +125 -0
  274. package/src/sast/fastapi-hardening.js +145 -0
  275. package/src/sast/go-extended.js +84 -0
  276. package/src/sast/host-header.js +106 -0
  277. package/src/sast/index.js +17 -0
  278. package/src/sast/java-ast-folding.js +561 -0
  279. package/src/sast/java-bench-extras.js +708 -0
  280. package/src/sast/java-collection-passthrough.js +178 -0
  281. package/src/sast/java-constant-fold.js +244 -0
  282. package/src/sast/java-deserialization.js +125 -0
  283. package/src/sast/jndi.js +104 -0
  284. package/src/sast/juliet-shape.js +324 -0
  285. package/src/sast/jwt-exp.js +104 -0
  286. package/src/sast/kotlin.js +82 -0
  287. package/src/sast/laravel-hardening.js +198 -0
  288. package/src/sast/ldap-injection.js +100 -0
  289. package/src/sast/llm-owasp.js +465 -0
  290. package/src/sast/llm-stored-prompt.js +103 -0
  291. package/src/sast/llm-trading-agent.js +161 -0
  292. package/src/sast/llm.js +308 -0
  293. package/src/sast/logic.js +140 -0
  294. package/src/sast/mass-assignment.js +101 -0
  295. package/src/sast/mcp-audit.js +242 -0
  296. package/src/sast/mobile-manifest.js +195 -0
  297. package/src/sast/model-load.js +164 -0
  298. package/src/sast/mutation-xss.js +87 -0
  299. package/src/sast/nosql-injection.js +82 -0
  300. package/src/sast/open-redirect.js +119 -0
  301. package/src/sast/php.js +91 -0
  302. package/src/sast/pipeline.js +122 -0
  303. package/src/sast/primary-cwe-java.js +155 -0
  304. package/src/sast/prompt-firewall.js +151 -0
  305. package/src/sast/prompt-template.js +157 -0
  306. package/src/sast/prototype-pollution.js +112 -0
  307. package/src/sast/python-sinks.js +195 -0
  308. package/src/sast/quarkus-hardening.js +102 -0
  309. package/src/sast/rag-poisoning.js +118 -0
  310. package/src/sast/rate-limit.js +128 -0
  311. package/src/sast/response-splitting.js +138 -0
  312. package/src/sast/ruby.js +108 -0
  313. package/src/sast/rust.js +105 -0
  314. package/src/sast/solidity.js +167 -0
  315. package/src/sast/springboot-hardening.js +186 -0
  316. package/src/sast/ssrf-cloud-metadata.js +80 -0
  317. package/src/sast/ssti.js +116 -0
  318. package/src/sast/swift.js +162 -0
  319. package/src/sast/toctou.js +95 -0
  320. package/src/sast/webhook.js +101 -0
  321. package/src/sast/xpath-injection.js +51 -0
  322. package/src/sast/xxe.js +140 -0
  323. package/src/sast/zip-slip.js +200 -0
  324. package/src/sca/base-images.json +45 -0
  325. package/src/sca/container.js +107 -0
  326. package/src/sca/dep-confusion.js +134 -0
  327. package/src/sca/index.js +6 -0
  328. package/src/sca/popular-packages.json +41 -0
  329. package/src/sca/sarif-ingest.js +187 -0
  330. package/src/sca/vuln-function-hints.json +89 -0
  331. package/src/secrets/index.js +4 -0
@@ -0,0 +1,588 @@
1
+ // Interprocedural taint engine — IFDS-lite tabulation over the IR.
2
+ //
3
+ // Algorithm (simplified):
4
+ //
5
+ // For each function F:
6
+ // We compute a SUMMARY of the form
7
+ // (entry: Set<TaintFact>) → { returnTaint: bool, paramMutations: { paramName: bool }, sideEffectFindings: Finding[] }
8
+ // where TaintFact is currently a variable name (string).
9
+ //
10
+ // To handle inter-procedural flow:
11
+ // When the engine encounters a call site `f(...args)`:
12
+ // 1. Look up the resolved callee qid in the call graph.
13
+ // 2. Compute an entry-taint-state for that callee: which of the callee's
14
+ // parameters bind to tainted caller-side expressions?
15
+ // 3. If a summary already exists for that callee + entry-state, use it.
16
+ // Otherwise, recursively analyze the callee with that entry state,
17
+ // cache the summary, and use it.
18
+ // 4. The callee's `returnTaint` determines whether the call expression's
19
+ // value is tainted on return.
20
+ // 5. The callee's `paramMutations` taint specific caller-side variables
21
+ // (param-by-reference, e.g. `Object.assign(target, tainted)`).
22
+ //
23
+ // Recursion: We use the standard fixed-point trick — when a function is
24
+ // already on the analysis stack, return a conservative summary (no
25
+ // tainting). The cache then re-iterates.
26
+ //
27
+ // Sources: anywhere a CFG node reads a catalog-registered source pattern,
28
+ // the resulting variable becomes tainted.
29
+ //
30
+ // Sinks: anywhere a CFG node calls a catalog-registered sink with a tainted
31
+ // argument, we emit a finding.
32
+ //
33
+ // Sanitizers: a call to a catalog-registered sanitizer kills the taint on its
34
+ // argument (the call's return value is treated as clean).
35
+
36
+ import { matchSource, matchSinkOrSanitizer } from './catalog.js';
37
+ import { accessPathOf, isCoveredBy, addPath, removePathAndDescendants, joinSets as joinAccessSets, setsEqual as accessSetsEqual } from './access-paths.js';
38
+ import { aliasesForVar } from './points-to.js';
39
+ import { higherOrderTaintFlow } from './higher-order.js';
40
+ import { SummaryCache, entryStateFromCall } from './summaries.js';
41
+
42
+ // v0.70 #2 — addPath that also taints every alias of the variable.
43
+ // When `target` is a dotted path like "a.x" and the root `a` has aliases
44
+ // {a, obj}, we taint both `a.x` and `obj.x`. The points-to graph is read
45
+ // from callContext._pointsTo (built by runDeepAnalysis when
46
+ // AGENTIC_SECURITY_POINTS_TO=1).
47
+ function _addPathAliasAware(state, path, callContext) {
48
+ let s = addPath(state, path);
49
+ const pt = callContext && callContext._pointsTo;
50
+ const fnQid = callContext && callContext._currentFnQid;
51
+ if (!pt || !fnQid || typeof path !== 'string') return s;
52
+ // Determine the variable root + remainder of the path.
53
+ const dot = path.indexOf('.');
54
+ const root = dot >= 0 ? path.slice(0, dot) : path;
55
+ const rest = dot >= 0 ? path.slice(dot) : '';
56
+ const aliases = aliasesForVar(pt, fnQid, root);
57
+ for (const a of aliases) {
58
+ if (a === root) continue;
59
+ s = addPath(s, a + rest);
60
+ }
61
+ return s;
62
+ }
63
+
64
+ function exprTaint(expr, state) {
65
+ // Returns true iff this expression evaluates to a tainted value under the
66
+ // given taint state. ALSO treats catalog-registered source patterns as
67
+ // tainted at-read — `req.body.host` used inline (no intermediate local)
68
+ // is tainted because the source resolves at the read site.
69
+ if (expr && expr.kind === 'member' && exprIsSource(expr)) return true;
70
+ if (!expr) return false;
71
+ // P1.1 — field-sensitive access path: if the expression is a pure
72
+ // ident/member chain ("x.y.z"), ask the access-path lattice whether any
73
+ // shorter prefix in the state covers it. This is what makes
74
+ // `user.password` distinguishable from `user.email`.
75
+ const ap = accessPathOf(expr);
76
+ if (ap !== null) return isCoveredBy(state, ap);
77
+ switch (expr.kind) {
78
+ case 'literal': return false;
79
+ case 'binary':
80
+ case 'logical': return exprTaint(expr.left, state) || exprTaint(expr.right, state);
81
+ case 'tpl': return (expr.parts || []).some(p => exprTaint(p, state));
82
+ case 'union': return (expr.branches || []).some(b => exprTaint(b, state));
83
+ case 'object': return (expr.props || []).some(p => exprTaint(p.value, state));
84
+ case 'array': return (expr.elements || []).some(e => exprTaint(e, state));
85
+ case 'call': {
86
+ // Calls are handled at the CFG level (the call has already been processed).
87
+ // For an inline call expression, conservatively return whether any arg is tainted.
88
+ // This loses the sanitizer effect but is safe.
89
+ return (expr.args || []).some(a => exprTaint(a, state));
90
+ }
91
+ case 'unknown': return false;
92
+ default: return false;
93
+ }
94
+ }
95
+
96
+ // Premortem #10: which recorded sources actually reach this expression?
97
+ // Collects the variable / access-path roots referenced by `expr` and returns
98
+ // the _taintSources entries whose varName matches one of those roots. This
99
+ // replaces "first source we ever saw" with "sources tied to this argument."
100
+ function _collectExprVars(expr, out) {
101
+ if (!expr) return;
102
+ if (typeof expr === 'string') { out.add(expr); return; }
103
+ if (expr.kind === 'ident' && expr.name) { out.add(expr.name); return; }
104
+ if (expr.kind === 'member') {
105
+ // Capture the access path (e.g. `user.email`) AND its root (`user`).
106
+ const ap = accessPathOf(expr);
107
+ if (ap) out.add(ap);
108
+ if (expr.object) _collectExprVars(expr.object, out);
109
+ return;
110
+ }
111
+ if (expr.kind === 'binary' || expr.kind === 'logical') {
112
+ _collectExprVars(expr.left, out); _collectExprVars(expr.right, out); return;
113
+ }
114
+ if (expr.kind === 'tpl' && Array.isArray(expr.parts)) {
115
+ for (const p of expr.parts) _collectExprVars(p, out); return;
116
+ }
117
+ if (expr.kind === 'union' && Array.isArray(expr.branches)) {
118
+ for (const b of expr.branches) _collectExprVars(b, out); return;
119
+ }
120
+ if (expr.kind === 'object' && Array.isArray(expr.props)) {
121
+ for (const p of expr.props) _collectExprVars(p.value, out); return;
122
+ }
123
+ if (expr.kind === 'array' && Array.isArray(expr.elements)) {
124
+ for (const e of expr.elements) _collectExprVars(e, out); return;
125
+ }
126
+ if (expr.kind === 'call' && Array.isArray(expr.args)) {
127
+ for (const a of expr.args) _collectExprVars(a, out); return;
128
+ }
129
+ }
130
+ function _sourcesReachingExpr(expr, _state, taintSources) {
131
+ if (!Array.isArray(taintSources) || taintSources.length === 0) return [];
132
+ const vars = new Set();
133
+ _collectExprVars(expr, vars);
134
+ if (vars.size === 0) return [];
135
+ // Match by exact varName OR by access-path prefix (a source recorded for
136
+ // `user` covers `user.email`, and a source recorded for `user.email`
137
+ // covers the literal expression `user.email`).
138
+ const matched = [];
139
+ for (const s of taintSources) {
140
+ const v = s.varName;
141
+ if (!v) continue;
142
+ if (vars.has(v)) { matched.push(s); continue; }
143
+ for (const candidate of vars) {
144
+ if (typeof candidate === 'string' && (candidate === v || candidate.startsWith(v + '.'))) {
145
+ matched.push(s); break;
146
+ }
147
+ }
148
+ }
149
+ return matched;
150
+ }
151
+
152
+ // Heuristic: does this expression read a registered source?
153
+ function exprIsSource(expr) {
154
+ if (!expr) return null;
155
+ if (expr.kind === 'member') {
156
+ const hit = matchSource(expr);
157
+ if (hit) return hit;
158
+ }
159
+ // Recurse — `req.body.name` should still find `req.body` as source.
160
+ if (expr.kind === 'member' && expr.object) {
161
+ return exprIsSource(expr.object);
162
+ }
163
+ return null;
164
+ }
165
+
166
+ // Apply a CFG node to a taint-state. Returns the new state + any finding emitted.
167
+ function step(node, stateIn, callContext) {
168
+ const state = new Set(stateIn);
169
+ const findings = [];
170
+
171
+ switch (node.kind) {
172
+ case 'entry':
173
+ case 'exit':
174
+ case 'noop':
175
+ case 'loop-header':
176
+ return { state, findings };
177
+
178
+ case 'assign': {
179
+ // Source detection on RHS.
180
+ const src = exprIsSource(node.source);
181
+ const target = typeof node.target === 'string' ? node.target : null;
182
+ let newState = state;
183
+ // Premortem #7: interprocedural return-taint via SummaryCache. If the
184
+ // RHS is a call to a known callee whose empty-entry-state summary says
185
+ // the return is tainted, taint the assignment target. This makes the
186
+ // simplest cross-function flow (helper reads req.body and returns it)
187
+ // visible to the engine — the case the cache was built for.
188
+ const calleeName = node.source && node.source.kind === 'call' && typeof node.source.callee === 'string'
189
+ ? node.source.callee : null;
190
+ if (target && calleeName && callContext._summaryCache && callContext._callGraph) {
191
+ const resolved = callContext._callGraph.resolve ? callContext._callGraph.resolve(calleeName) : null;
192
+ const fn = resolved && resolved.qid ? resolved : null;
193
+ const qid = resolved && (resolved.qid || resolved);
194
+ if (typeof qid === 'string') {
195
+ // v0.66 — context-sensitive lookup. Build the entry-state from
196
+ // the call args + current taint; look up (and lazily compute) the
197
+ // summary for THAT state, not just empty. This is what closes the
198
+ // "helper is pure when called clean but tainted when called with
199
+ // user input" FN class.
200
+ const callerTainted = newState;
201
+ const callArgs = (node.source.args || []);
202
+ const paramNames = (fn && Array.isArray(fn.params)) ? fn.params : [];
203
+ const entry = paramNames.length
204
+ ? entryStateFromCall(paramNames, callArgs, callerTainted)
205
+ : new Set();
206
+ let sum = callContext._summaryCache.get(qid, entry);
207
+ if (!sum && fn && fn.cfg) {
208
+ // Lazy compute under this entry state. Use a fresh ctx so we
209
+ // don't pollute the outer caller's _taintSources with the
210
+ // callee's internal noise.
211
+ sum = callContext._summaryCache.compute(qid, entry, () => {
212
+ const inner = {
213
+ _findings: [], _taintSources: [], _returnTainted: false,
214
+ _stack: new Set(), deadlineMs: callContext.deadlineMs,
215
+ _summaryCache: callContext._summaryCache,
216
+ _callGraph: callContext._callGraph,
217
+ _mutatedParamsOut: new Set(),
218
+ };
219
+ try { analyzeFunction(fn, entry, inner); } catch {}
220
+ return {
221
+ returnTainted: !!inner._returnTainted,
222
+ mutatedParams: inner._mutatedParamsOut || new Set(),
223
+ taintedGlobals: new Set(),
224
+ findings: [],
225
+ };
226
+ });
227
+ }
228
+ if (sum && sum.returnTainted) {
229
+ newState = _addPathAliasAware(newState, target, callContext);
230
+ callContext._taintSources.push({
231
+ varName: target,
232
+ sourceId: `interproc:${qid}`,
233
+ sourceLabel: `interproc-return:${calleeName}`,
234
+ provenance: 'interproc',
235
+ line: node.line,
236
+ });
237
+ }
238
+ // applyAtCallSite — mutated params propagate to caller arg-vars.
239
+ if (sum && sum.mutatedParams && sum.mutatedParams.size && paramNames.length) {
240
+ const mutated = callContext._summaryCache.applyAtCallSite(
241
+ sum, paramNames, callArgs, callerTainted);
242
+ for (const v of mutated.mutated) newState = addPath(newState, v);
243
+ }
244
+ if (sum && sum.returnTainted) return { state: newState, findings: [] };
245
+ }
246
+ }
247
+ if (src && target) {
248
+ newState = _addPathAliasAware(newState, target, callContext);
249
+ const sourcePath = accessPathOf(node.source);
250
+ if (sourcePath) newState = addPath(newState, sourcePath);
251
+ callContext._taintSources.push({ varName: target, sourceId: src.id, sourceLabel: src.label, provenance: src.provenance || null, line: node.line });
252
+ } else if (exprTaint(node.source, newState)) {
253
+ // P1.1: when the source IS a pure access path (e.g., RHS is `obj.foo.bar`),
254
+ // taint the TARGET as well as transitively propagate the source path so
255
+ // later uses of the same source remain tainted. The target path
256
+ // becomes the new tainted location.
257
+ if (target) {
258
+ newState = _addPathAliasAware(newState, target, callContext);
259
+ const sourcePath = accessPathOf(node.source);
260
+ if (sourcePath && !isCoveredBy(newState, sourcePath)) newState = addPath(newState, sourcePath);
261
+ }
262
+ } else {
263
+ // Re-assigning a previously-tainted var to a clean value clears it
264
+ // AND its descendants — P1.1 semantics: assigning `x = clean` kills
265
+ // `x.foo`, `x.foo.bar`, etc. Sanitization at root level.
266
+ if (target) newState = removePathAndDescendants(newState, target);
267
+ }
268
+ return { state: newState, findings };
269
+ }
270
+
271
+ case 'call': {
272
+ // 1. Catalog match: sanitizer, sink, or just an external/unresolved call.
273
+ const cat = matchSinkOrSanitizer(node.callee);
274
+ const argTaints = (node.args || []).map(a => exprTaint(a, state));
275
+ // v0.66 — apply mutated-param taint at plain (non-assign) call sites.
276
+ // Object.assign(target, tainted) → target becomes tainted in caller.
277
+ if (callContext._summaryCache && callContext._callGraph
278
+ && typeof node.callee === 'string') {
279
+ const resolved = callContext._callGraph.resolve
280
+ ? callContext._callGraph.resolve(node.callee) : null;
281
+ const fn = resolved && resolved.qid ? resolved : null;
282
+ const qid = resolved && (resolved.qid || resolved);
283
+ if (typeof qid === 'string' && fn && Array.isArray(fn.params)) {
284
+ const paramNames = fn.params;
285
+ const entry = paramNames.length
286
+ ? entryStateFromCall(paramNames, node.args || [], state)
287
+ : new Set();
288
+ const sum = callContext._summaryCache.get(qid, entry);
289
+ if (sum && sum.mutatedParams && sum.mutatedParams.size) {
290
+ const mutated = callContext._summaryCache.applyAtCallSite(
291
+ sum, paramNames, node.args || [], state);
292
+ for (const v of mutated.mutated) state = addPath(state, v);
293
+ }
294
+ }
295
+ }
296
+ if (cat) {
297
+ for (const e of cat) {
298
+ if (e.kind === 'sink' && (
299
+ e.argIndex === 'all' ? argTaints.some(Boolean) :
300
+ (typeof e.argIndex === 'number' && argTaints[e.argIndex])
301
+ )) {
302
+ const taintedArgIdx = e.argIndex === 'all'
303
+ ? argTaints.findIndex(Boolean) : e.argIndex;
304
+ const taintedArgExpr = (node.args || [])[taintedArgIdx];
305
+ // Premortem #10: attribute the source for THIS sink to the
306
+ // source(s) that taint the actual argument expression — not the
307
+ // first source the worklist happened to record. We walk the
308
+ // expression's free vars / access paths against the recorded
309
+ // _taintSources and keep entries whose root variable still
310
+ // covers something in the expression.
311
+ const reachingSources = _sourcesReachingExpr(taintedArgExpr, state, callContext._taintSources);
312
+ const traceForThisFinding = reachingSources.length
313
+ ? reachingSources.slice(0, 5)
314
+ // Fallback: better to surface "no precise source" than the wrong source.
315
+ : [];
316
+ findings.push({
317
+ kind: 'taint',
318
+ sinkId: e.id,
319
+ vuln: e.vuln?.name || 'Tainted Sink',
320
+ severity: e.vuln?.severity || 'high',
321
+ cwe: e.vuln?.cwe || null,
322
+ remediation: e.vuln?.remediation || null,
323
+ line: node.line,
324
+ argIndex: taintedArgIdx,
325
+ callee: node.callee,
326
+ sourceProvenance: (traceForThisFinding[0]?.provenance) || null,
327
+ trace: traceForThisFinding,
328
+ });
329
+ }
330
+ }
331
+ }
332
+ // 2. P1.3 — higher-order taint flow. When the call is `arr.map(fn)` or
333
+ // `promise.then(fn)` and the receiver is tainted, propagate taint
334
+ // into the callback's first parameter. v1: we propagate AT THE
335
+ // CALLBACK INVOCATION LEVEL by adding the callback's first-arg
336
+ // name (when resolvable as a plain ident or function-value) into
337
+ // the taint state.
338
+ const hoFlow = (() => {
339
+ // Heuristic receiver-tainted check: if the callee string is
340
+ // "<recv>.<method>", check whether <recv> is in state.
341
+ const callee = typeof node.callee === 'string' ? node.callee : null;
342
+ if (!callee) return null;
343
+ const dot = callee.lastIndexOf('.');
344
+ if (dot <= 0) return null;
345
+ const recv = callee.slice(0, dot);
346
+ const recvTainted = isCoveredBy(state, recv);
347
+ return higherOrderTaintFlow(node, recvTainted);
348
+ })();
349
+ if (hoFlow && hoFlow.taintsCallbackParam === 0) {
350
+ // The first arg should be the callback. If it's a plain ident or
351
+ // function-value, the engine's per-callee summary path will pick it
352
+ // up when the callee is independently analyzed. We don't model the
353
+ // callback inline here; instead we record on callContext that the
354
+ // callback was invoked with a tainted first param, so the engine's
355
+ // call-graph pass can re-run the callback with that entry state.
356
+ const cb = (node.args || [])[0];
357
+ if (cb && (cb.kind === 'ident' || cb.kind === 'function-value')) {
358
+ callContext._higherOrderInvocations = callContext._higherOrderInvocations || [];
359
+ callContext._higherOrderInvocations.push({
360
+ callee: cb.kind === 'ident' ? cb.name : (cb.qid || null),
361
+ paramIndex: 0,
362
+ taintedParam: true,
363
+ line: node.line,
364
+ via: hoFlow.kind,
365
+ });
366
+ }
367
+ }
368
+ return { state, findings };
369
+ }
370
+
371
+ case 'if': {
372
+ // Path-feasibility lite: if the condition is a literal false / unreachable,
373
+ // mark the node so the CFG walker can skip the consequent edge.
374
+ // For now we simply propagate state to both branches.
375
+ return { state, findings };
376
+ }
377
+
378
+ case 'return': {
379
+ if (exprTaint(node.value, state)) {
380
+ callContext._returnTainted = true;
381
+ }
382
+ return { state, findings };
383
+ }
384
+
385
+ case 'throw': {
386
+ // Thrown values don't taint subsequent code in the same fn — exit.
387
+ return { state, findings };
388
+ }
389
+
390
+ default:
391
+ return { state, findings };
392
+ }
393
+ }
394
+
395
+ // Worklist traversal of one function's CFG with a given entry-taint-state.
396
+ // Returns the merged exit state + the union of findings on every path + the
397
+ // taint sources observed (for evidence trails).
398
+ //
399
+ // Premortem 2R4.4 / 2R-9: also honors callContext.deadlineMs by checking
400
+ // every 100 iterations. A pathological CFG (large generated file with dense
401
+ // control flow) can otherwise hold past the global timeout.
402
+ function analyzeFunction(fn, entryState, callContext) {
403
+ const nodes = fn.cfg.nodes; // plain object
404
+ const work = [];
405
+ const inStates = new Map(); // nodeId → Set<varName>
406
+ const outStates = new Map();
407
+ inStates.set(fn.cfg.entry, new Set(entryState));
408
+ work.push(fn.cfg.entry);
409
+ // v0.70 #2 — points-to context for the step() transfer. Setting it here
410
+ // (instead of plumbing through step's signature) keeps the worklist loop
411
+ // unchanged and lets `step` consult `aliasesForVar` when callContext._pointsTo
412
+ // is present.
413
+ if (callContext) callContext._currentFnQid = fn.qid;
414
+ const deadlineMs = (callContext && typeof callContext.deadlineMs === 'number') ? callContext.deadlineMs : Infinity;
415
+ const visited = 0;
416
+ let iterations = 0;
417
+ const ITER_BUDGET = 5000;
418
+
419
+ while (work.length) {
420
+ if (++iterations > ITER_BUDGET) break;
421
+ // Check the global deadline every 100 iterations — Date.now() is cheap
422
+ // but not free; this keeps overhead negligible on small functions.
423
+ if ((iterations & 0x7f) === 0 && Date.now() > deadlineMs) break;
424
+ const nid = work.shift();
425
+ const node = nodes[nid];
426
+ if (!node) continue;
427
+ const incoming = inStates.get(nid) || new Set();
428
+ const { state: out, findings } = step(node, incoming, callContext);
429
+ callContext._findings.push(...findings.map(f => ({ ...f, _funcQid: fn.qid })));
430
+ const prevOut = outStates.get(nid);
431
+ const merged = mergeStates(prevOut, out);
432
+ if (!prevOut || !stateEq(prevOut, merged)) {
433
+ outStates.set(nid, merged);
434
+ for (const s of (node.succ || [])) {
435
+ const succIn = inStates.get(s);
436
+ const newIn = mergeStates(succIn, merged);
437
+ if (!succIn || !stateEq(succIn, newIn)) {
438
+ inStates.set(s, newIn);
439
+ work.push(s);
440
+ }
441
+ }
442
+ }
443
+ }
444
+
445
+ const exit = outStates.get(fn.cfg.exit) || new Set();
446
+ // v0.66 — record which params are tainted at function exit so the
447
+ // caller's applyAtCallSite can propagate that mutated taint back. We
448
+ // intersect the exit-state with the function's declared params (only
449
+ // param vars count as "mutated by reference"; locals are caller-invisible).
450
+ if (callContext && Array.isArray(fn.params) && fn.params.length) {
451
+ if (!callContext._mutatedParamsOut) callContext._mutatedParamsOut = new Set();
452
+ for (const p of fn.params) {
453
+ if (isCoveredBy(exit, p)) callContext._mutatedParamsOut.add(p);
454
+ }
455
+ }
456
+ return exit;
457
+ }
458
+
459
+ function mergeStates(a, b) {
460
+ // P1.1: use access-path-aware union that collapses longer descendants
461
+ // under their shorter-prefix parents.
462
+ return joinAccessSets(a, b);
463
+ }
464
+ function stateEq(a, b) {
465
+ // P1.1: use access-path-aware set equality (canonicalized).
466
+ return accessSetsEqual(a, b);
467
+ }
468
+
469
+ // ── Top-level entry ─────────────────────────────────────────────────────────
470
+ //
471
+ // Iterate each function with an EMPTY entry-taint-state. The function's
472
+ // internal sources will populate the state as we walk. (Future work: when the
473
+ // caller of F passes tainted args, re-analyze F with those params marked.
474
+ // The infra for it is in callContext.)
475
+ //
476
+ // Returns a flat array of findings, each enriched with file/line/etc.
477
+ export function runTaintEngine(perFileIR, callGraph, opts = {}) {
478
+ const all = [];
479
+ const seen = new Set();
480
+ const fnLimit = opts.fnLimit || 5000;
481
+ const deadlineMs = typeof opts.deadlineMs === 'number' ? opts.deadlineMs : Infinity;
482
+ let n = 0;
483
+
484
+ // Premortem #7: instantiate the k=1 SummaryCache and seed it with each
485
+ // function's empty-entry-state summary (returnTainted bit). The cache is
486
+ // available to call sites through callContext so the worklist can ask
487
+ // "does callee F return tainted under this entry state?" before
488
+ // conservatively assuming it doesn't. This wires the cache that was
489
+ // exported-but-unused for several releases.
490
+ //
491
+ // v0.69 — opts.summaryCache lets the caller (runDeepAnalysis with
492
+ // incremental mode) hand in a pre-seeded cache from persisted state.
493
+ const summaryCache = opts.summaryCache || new SummaryCache();
494
+
495
+ // Deterministic ordering (Sentinel-parity §9.2): sort functions by qid so
496
+ // cache-cold runs produce the same finding sequence run-over-run.
497
+ const fnList = [...callGraph.functions.values()].sort((a, b) =>
498
+ a.qid < b.qid ? -1 : a.qid > b.qid ? 1 : 0
499
+ );
500
+ // Pre-pass + fixed-point: compute empty-entry-state summaries for every
501
+ // function, then re-run the pre-pass until the summary cache stabilizes
502
+ // (capped at MAX_FP_ITERS so recursion and chains converge without
503
+ // unbounded blowup). v0.66 — the inner ctx now records mutatedParams
504
+ // via _mutatedParamsOut so cross-function param mutation propagates.
505
+ const MAX_FP_ITERS = 3;
506
+ let prevCacheSize = -1;
507
+ for (let it = 0; it < MAX_FP_ITERS; it++) {
508
+ if (Date.now() > deadlineMs) break;
509
+ for (const fn of fnList) {
510
+ if (Date.now() > deadlineMs) break;
511
+ const entry = new Set();
512
+ const key = fn.qid + '::empty';
513
+ const existing = summaryCache.get(fn.qid, entry);
514
+ // On re-iterations, recompute even if cached so refined summaries
515
+ // (from now-known callee summaries) can lift returnTainted/mutated.
516
+ const ctx = {
517
+ _findings: [], _taintSources: [], _returnTainted: false,
518
+ _stack: new Set(), deadlineMs,
519
+ _summaryCache: summaryCache, _callGraph: callGraph,
520
+ _mutatedParamsOut: new Set(),
521
+ };
522
+ try { analyzeFunction(fn, entry, ctx); } catch {}
523
+ const next = {
524
+ returnTainted: !!ctx._returnTainted,
525
+ mutatedParams: ctx._mutatedParamsOut || new Set(),
526
+ taintedGlobals: new Set(),
527
+ findings: [],
528
+ };
529
+ if (!existing
530
+ || existing.returnTainted !== next.returnTainted
531
+ || (existing.mutatedParams?.size || 0) !== next.mutatedParams.size) {
532
+ summaryCache.set(fn.qid, entry, next);
533
+ }
534
+ }
535
+ if (summaryCache.size() === prevCacheSize) break;
536
+ prevCacheSize = summaryCache.size();
537
+ }
538
+ for (const fn of fnList) {
539
+ if (++n > fnLimit) break;
540
+ if (Date.now() > deadlineMs) break; // global timeout
541
+ // Module-level functions: analyze with an empty entry state. The function
542
+ // discovers its own sources from req.body/process.env/etc. as it walks.
543
+ const callContext = {
544
+ _findings: [],
545
+ _taintSources: [],
546
+ _returnTainted: false,
547
+ _stack: new Set(),
548
+ deadlineMs, // honored by the worklist inside analyzeFunction
549
+ _summaryCache: summaryCache,
550
+ _callGraph: callGraph,
551
+ };
552
+ try {
553
+ analyzeFunction(fn, new Set(), callContext);
554
+ } catch { continue; }
555
+ for (const f of callContext._findings) {
556
+ const key = `${f.sinkId}:${fn.file}:${f.line}`;
557
+ if (seen.has(key)) continue;
558
+ seen.add(key);
559
+ all.push({
560
+ id: `ir-taint:${fn.file}:${f.line}:${f.sinkId}`,
561
+ file: fn.file,
562
+ line: f.line,
563
+ vuln: f.vuln,
564
+ severity: f.severity,
565
+ cwe: f.cwe,
566
+ remediation: f.remediation,
567
+ parser: 'IR-TAINT',
568
+ confidence: 0.75,
569
+ source: f.trace && f.trace.length ? {
570
+ file: fn.file,
571
+ line: f.trace[0].line,
572
+ label: f.trace[0].sourceLabel,
573
+ } : null,
574
+ sink: {
575
+ file: fn.file,
576
+ line: f.line,
577
+ label: f.sinkId,
578
+ },
579
+ chain: (f.trace || []).map(t => ({
580
+ file: fn.file, line: t.line, label: t.sourceLabel,
581
+ })),
582
+ });
583
+ }
584
+ }
585
+ // v0.69 — expose cache to caller (runDeepAnalysis) for incremental persistence.
586
+ Object.defineProperty(all, '_summaryCache', { value: summaryCache, enumerable: false });
587
+ return all;
588
+ }