@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. package/CHANGELOG.md +1580 -0
  2. package/bin/.agentic-security/findings.json +1577 -0
  3. package/bin/.agentic-security/last-scan.json +1577 -0
  4. package/bin/.agentic-security/last-scan.json.sig +1 -0
  5. package/bin/.agentic-security/scan-history.json +465 -0
  6. package/bin/.agentic-security/streak.json +25 -0
  7. package/bin/agentic-security-audit.js +198 -0
  8. package/bin/agentic-security-consistency.js +80 -0
  9. package/bin/agentic-security-diff.js +136 -0
  10. package/bin/agentic-security-lsp.js +12 -0
  11. package/bin/agentic-security-mcp.js +40 -0
  12. package/bin/agentic-security-rule.js +153 -0
  13. package/bin/agentic-security.js +1683 -0
  14. package/dist/117.index.js +207 -0
  15. package/dist/178.index.js +250 -0
  16. package/dist/218.index.js +793 -0
  17. package/dist/227.index.js +192 -0
  18. package/dist/301.index.js +167 -0
  19. package/dist/384.index.js +18 -0
  20. package/dist/476.index.js +126 -0
  21. package/dist/513.index.js +373 -0
  22. package/dist/520.index.js +13 -0
  23. package/dist/601.index.js +1038 -0
  24. package/dist/634.index.js +1892 -0
  25. package/dist/637.index.js +216 -0
  26. package/dist/660.index.js +131 -0
  27. package/dist/675.index.js +451 -0
  28. package/dist/826.index.js +188 -0
  29. package/dist/830.index.js +133 -0
  30. package/dist/agentic-security.mjs +272 -0
  31. package/dist/agentic-security.mjs.sha256 +1 -0
  32. package/dist/calibration-seed.json +27 -0
  33. package/package.json +77 -0
  34. package/src/.agentic-security/findings.json +80844 -0
  35. package/src/.agentic-security/last-scan.json +80844 -0
  36. package/src/.agentic-security/last-scan.json.sig +1 -0
  37. package/src/.agentic-security/scan-history.json +8408 -0
  38. package/src/.agentic-security/streak.json +26 -0
  39. package/src/badge.js +188 -0
  40. package/src/compare.js +203 -0
  41. package/src/dataflow/.agentic-security/findings.json +3487 -0
  42. package/src/dataflow/.agentic-security/last-scan.json +3487 -0
  43. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  44. package/src/dataflow/.agentic-security/scan-history.json +735 -0
  45. package/src/dataflow/.agentic-security/streak.json +24 -0
  46. package/src/dataflow/CLAUDE.md +38 -0
  47. package/src/dataflow/access-paths.js +172 -0
  48. package/src/dataflow/async-sequencing.js +177 -0
  49. package/src/dataflow/backward.js +201 -0
  50. package/src/dataflow/catalog-expanded.js +485 -0
  51. package/src/dataflow/catalog.js +659 -0
  52. package/src/dataflow/cross-repo.js +219 -0
  53. package/src/dataflow/engine.js +588 -0
  54. package/src/dataflow/exception-flow.js +116 -0
  55. package/src/dataflow/exploit-prover.js +187 -0
  56. package/src/dataflow/higher-order.js +221 -0
  57. package/src/dataflow/ifds.js +347 -0
  58. package/src/dataflow/implicit-flow.js +129 -0
  59. package/src/dataflow/incremental.js +229 -0
  60. package/src/dataflow/index.js +181 -0
  61. package/src/dataflow/numeric-domain.js +192 -0
  62. package/src/dataflow/path-feasibility.js +114 -0
  63. package/src/dataflow/points-to.js +337 -0
  64. package/src/dataflow/polyglot.js +190 -0
  65. package/src/dataflow/proven-clean.js +159 -0
  66. package/src/dataflow/receiver-context.js +76 -0
  67. package/src/dataflow/sanitizer-proof.js +154 -0
  68. package/src/dataflow/soft-taint.js +140 -0
  69. package/src/dataflow/string-domain.js +234 -0
  70. package/src/dataflow/stub-aware-filter.js +100 -0
  71. package/src/dataflow/summaries.js +132 -0
  72. package/src/dataflow/symbolic-exec.js +238 -0
  73. package/src/dataflow/tabulation.js +135 -0
  74. package/src/engine.js +7763 -0
  75. package/src/history-scan.js +229 -0
  76. package/src/index.js +3 -0
  77. package/src/integrations/.agentic-security/findings.json +1504 -0
  78. package/src/integrations/.agentic-security/last-scan.json +1504 -0
  79. package/src/integrations/.agentic-security/scan-history.json +40 -0
  80. package/src/integrations/.agentic-security/streak.json +21 -0
  81. package/src/integrations/index.js +321 -0
  82. package/src/integrations/tickets.js +200 -0
  83. package/src/ir/.agentic-security/findings.json +3036 -0
  84. package/src/ir/.agentic-security/last-scan.json +3036 -0
  85. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  86. package/src/ir/.agentic-security/scan-history.json +364 -0
  87. package/src/ir/.agentic-security/streak.json +23 -0
  88. package/src/ir/CLAUDE.md +172 -0
  89. package/src/ir/callgraph.js +73 -0
  90. package/src/ir/class-hierarchy.js +195 -0
  91. package/src/ir/index.js +152 -0
  92. package/src/ir/parser-cs.js +260 -0
  93. package/src/ir/parser-java.js +286 -0
  94. package/src/ir/parser-js.js +413 -0
  95. package/src/ir/parser-kt.js +258 -0
  96. package/src/ir/parser-py-cst.js +136 -0
  97. package/src/ir/parser-py.helper.py +501 -0
  98. package/src/ir/parser-py.js +312 -0
  99. package/src/ir/ssa.js +315 -0
  100. package/src/ir/type-stubs.js +288 -0
  101. package/src/leaderboard.js +152 -0
  102. package/src/llm-validator/.agentic-security/findings.json +1891 -0
  103. package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
  104. package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
  105. package/src/llm-validator/.agentic-security/scan-history.json +168 -0
  106. package/src/llm-validator/.agentic-security/streak.json +20 -0
  107. package/src/llm-validator/consistency.js +141 -0
  108. package/src/llm-validator/index.js +437 -0
  109. package/src/lsp/.agentic-security/findings.json +28 -0
  110. package/src/lsp/.agentic-security/last-scan.json +28 -0
  111. package/src/lsp/.agentic-security/scan-history.json +79 -0
  112. package/src/lsp/.agentic-security/streak.json +22 -0
  113. package/src/lsp/server.js +275 -0
  114. package/src/mcp/.agentic-security/findings.json +8358 -0
  115. package/src/mcp/.agentic-security/last-scan.json +8358 -0
  116. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  117. package/src/mcp/.agentic-security/scan-history.json +1125 -0
  118. package/src/mcp/.agentic-security/streak.json +22 -0
  119. package/src/mcp/CLAUDE.md +54 -0
  120. package/src/mcp/audit.js +136 -0
  121. package/src/mcp/redact.js +75 -0
  122. package/src/mcp/server.js +158 -0
  123. package/src/mcp/stdio.js +83 -0
  124. package/src/mcp/tools.js +940 -0
  125. package/src/mcp/validate.js +49 -0
  126. package/src/personality.js +164 -0
  127. package/src/poc-video.js +239 -0
  128. package/src/posture/.agentic-security/findings.json +51239 -0
  129. package/src/posture/.agentic-security/last-scan.json +51239 -0
  130. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  131. package/src/posture/.agentic-security/scan-history.json +5557 -0
  132. package/src/posture/.agentic-security/streak.json +24 -0
  133. package/src/posture/CLAUDE.md +42 -0
  134. package/src/posture/adversarial-self-test.js +114 -0
  135. package/src/posture/adversary-agent.js +204 -0
  136. package/src/posture/agents-memory.js +135 -0
  137. package/src/posture/ai-code-fingerprint.js +171 -0
  138. package/src/posture/aibom.js +284 -0
  139. package/src/posture/api-inventory.js +96 -0
  140. package/src/posture/attack-playbooks.js +305 -0
  141. package/src/posture/auditor-agent.js +115 -0
  142. package/src/posture/auth-posture-import.js +135 -0
  143. package/src/posture/baseline-compare.js +114 -0
  144. package/src/posture/blast-radius.js +836 -0
  145. package/src/posture/bounty-prediction.js +141 -0
  146. package/src/posture/business-logic.js +239 -0
  147. package/src/posture/calibration-drift.js +93 -0
  148. package/src/posture/calibration-seed.json +27 -0
  149. package/src/posture/calibration.js +204 -0
  150. package/src/posture/clustering.js +75 -0
  151. package/src/posture/concurrency-checker.js +265 -0
  152. package/src/posture/confidence.js +65 -0
  153. package/src/posture/container-runtime.js +149 -0
  154. package/src/posture/counterfactual.js +109 -0
  155. package/src/posture/cross-lang-graphql.js +165 -0
  156. package/src/posture/cross-lang-grpc.js +166 -0
  157. package/src/posture/cross-lang-meta.js +101 -0
  158. package/src/posture/cross-lang-openapi.js +187 -0
  159. package/src/posture/cross-lang-orm.js +153 -0
  160. package/src/posture/cross-lang-queues.js +210 -0
  161. package/src/posture/crown-jewels.js +110 -0
  162. package/src/posture/custom-rules.js +361 -0
  163. package/src/posture/cve-alert-daemon.js +433 -0
  164. package/src/posture/cve-lookup.js +129 -0
  165. package/src/posture/dead-code.js +430 -0
  166. package/src/posture/defender-agent.js +158 -0
  167. package/src/posture/deploy-platform.js +204 -0
  168. package/src/posture/detector-fuzz.js +61 -0
  169. package/src/posture/deterministic.js +99 -0
  170. package/src/posture/drift.js +165 -0
  171. package/src/posture/epss.js +156 -0
  172. package/src/posture/exploitability-probability.js +212 -0
  173. package/src/posture/exploitability.js +121 -0
  174. package/src/posture/feature-flags.js +110 -0
  175. package/src/posture/finding-defaults.js +132 -0
  176. package/src/posture/fix-history.js +411 -0
  177. package/src/posture/fix-plan.js +121 -0
  178. package/src/posture/fix-verify-loop.js +157 -0
  179. package/src/posture/fix-verify.js +130 -0
  180. package/src/posture/flow-narration.js +105 -0
  181. package/src/posture/grader-calibration.js +156 -0
  182. package/src/posture/harness-discovery.js +113 -0
  183. package/src/posture/holdout-eval.js +144 -0
  184. package/src/posture/iac-reachability.js +163 -0
  185. package/src/posture/iam-policy.js +128 -0
  186. package/src/posture/integrity.js +97 -0
  187. package/src/posture/learning.js +166 -0
  188. package/src/posture/license-policy.js +109 -0
  189. package/src/posture/llm-redteam-prompts.js +418 -0
  190. package/src/posture/llm-redteam.js +303 -0
  191. package/src/posture/material-change.js +163 -0
  192. package/src/posture/mitigation-composite.js +55 -0
  193. package/src/posture/mttr.js +91 -0
  194. package/src/posture/network-policy-import.js +126 -0
  195. package/src/posture/path-predicates.js +99 -0
  196. package/src/posture/persona-prioritization.js +153 -0
  197. package/src/posture/poc-cwe-map.js +51 -0
  198. package/src/posture/poc-generator.js +500 -0
  199. package/src/posture/policy-gate.js +174 -0
  200. package/src/posture/pre-incident-archaeology.js +110 -0
  201. package/src/posture/profile.js +93 -0
  202. package/src/posture/reachability-filter.js +42 -0
  203. package/src/posture/regression-test-gen.js +200 -0
  204. package/src/posture/reverse-blast-radius.js +110 -0
  205. package/src/posture/router.js +109 -0
  206. package/src/posture/rule-overrides.js +198 -0
  207. package/src/posture/rule-pack-signing.js +209 -0
  208. package/src/posture/rule-packs.js +143 -0
  209. package/src/posture/rule-synthesis.js +108 -0
  210. package/src/posture/ruleset-version.js +71 -0
  211. package/src/posture/sbom.js +129 -0
  212. package/src/posture/schema-aware-bridge.js +207 -0
  213. package/src/posture/security-trend.js +87 -0
  214. package/src/posture/semantic-clone.js +114 -0
  215. package/src/posture/specification-mining.js +170 -0
  216. package/src/posture/stable-id.js +75 -0
  217. package/src/posture/stack-playbook.js +229 -0
  218. package/src/posture/streak.js +249 -0
  219. package/src/posture/suppressions.js +135 -0
  220. package/src/posture/telemetry-ingest.js +112 -0
  221. package/src/posture/threat-model.js +145 -0
  222. package/src/posture/three-agent-pipeline.js +74 -0
  223. package/src/posture/triage.js +146 -0
  224. package/src/posture/trust-boundary-diagram.js +115 -0
  225. package/src/posture/type-narrowing.js +129 -0
  226. package/src/posture/validator-metrics.js +179 -0
  227. package/src/posture/verifier-ephemeral.js +118 -0
  228. package/src/posture/verifier-target.js +147 -0
  229. package/src/posture/verifier.js +257 -0
  230. package/src/posture/version.js +75 -0
  231. package/src/posture/waf-ingest.js +200 -0
  232. package/src/posture/why-fired.js +141 -0
  233. package/src/pr-comment.js +172 -0
  234. package/src/pr-delta.js +198 -0
  235. package/src/report/.agentic-security/findings.json +79 -0
  236. package/src/report/.agentic-security/last-scan.json +79 -0
  237. package/src/report/.agentic-security/last-scan.json.sig +1 -0
  238. package/src/report/.agentic-security/scan-history.json +332 -0
  239. package/src/report/.agentic-security/streak.json +23 -0
  240. package/src/report/index.js +1136 -0
  241. package/src/report/mascot.js +42 -0
  242. package/src/runScan.js +141 -0
  243. package/src/sast/.agentic-security/findings.json +5051 -0
  244. package/src/sast/.agentic-security/last-scan.json +5051 -0
  245. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  246. package/src/sast/.agentic-security/scan-history.json +788 -0
  247. package/src/sast/.agentic-security/streak.json +23 -0
  248. package/src/sast/CLAUDE.md +39 -0
  249. package/src/sast/_comment-strip.js +46 -0
  250. package/src/sast/agent-tool-escalation.js +131 -0
  251. package/src/sast/auth-provider.js +171 -0
  252. package/src/sast/authz.js +236 -0
  253. package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
  254. package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
  255. package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
  256. package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
  257. package/src/sast/bench-shape/index.js +62 -0
  258. package/src/sast/claude-hook-injection.js +199 -0
  259. package/src/sast/claude-md-prompt-injection.js +170 -0
  260. package/src/sast/claude-settings.js +165 -0
  261. package/src/sast/client-side.js +149 -0
  262. package/src/sast/cpp-bench-extras.js +122 -0
  263. package/src/sast/cpp-dataflow.js +430 -0
  264. package/src/sast/cpp.js +248 -0
  265. package/src/sast/csharp.js +152 -0
  266. package/src/sast/csrf.js +82 -0
  267. package/src/sast/dart-flutter.js +173 -0
  268. package/src/sast/db-rls.js +147 -0
  269. package/src/sast/db-taint.js +215 -0
  270. package/src/sast/defi-deep.js +242 -0
  271. package/src/sast/deserialization-gadgets.js +113 -0
  272. package/src/sast/django-hardening.js +230 -0
  273. package/src/sast/env-hygiene.js +125 -0
  274. package/src/sast/fastapi-hardening.js +145 -0
  275. package/src/sast/go-extended.js +84 -0
  276. package/src/sast/host-header.js +106 -0
  277. package/src/sast/index.js +17 -0
  278. package/src/sast/java-ast-folding.js +561 -0
  279. package/src/sast/java-bench-extras.js +708 -0
  280. package/src/sast/java-collection-passthrough.js +178 -0
  281. package/src/sast/java-constant-fold.js +244 -0
  282. package/src/sast/java-deserialization.js +125 -0
  283. package/src/sast/jndi.js +104 -0
  284. package/src/sast/juliet-shape.js +324 -0
  285. package/src/sast/jwt-exp.js +104 -0
  286. package/src/sast/kotlin.js +82 -0
  287. package/src/sast/laravel-hardening.js +198 -0
  288. package/src/sast/ldap-injection.js +100 -0
  289. package/src/sast/llm-owasp.js +465 -0
  290. package/src/sast/llm-stored-prompt.js +103 -0
  291. package/src/sast/llm-trading-agent.js +161 -0
  292. package/src/sast/llm.js +308 -0
  293. package/src/sast/logic.js +140 -0
  294. package/src/sast/mass-assignment.js +101 -0
  295. package/src/sast/mcp-audit.js +242 -0
  296. package/src/sast/mobile-manifest.js +195 -0
  297. package/src/sast/model-load.js +164 -0
  298. package/src/sast/mutation-xss.js +87 -0
  299. package/src/sast/nosql-injection.js +82 -0
  300. package/src/sast/open-redirect.js +119 -0
  301. package/src/sast/php.js +91 -0
  302. package/src/sast/pipeline.js +122 -0
  303. package/src/sast/primary-cwe-java.js +155 -0
  304. package/src/sast/prompt-firewall.js +151 -0
  305. package/src/sast/prompt-template.js +157 -0
  306. package/src/sast/prototype-pollution.js +112 -0
  307. package/src/sast/python-sinks.js +195 -0
  308. package/src/sast/quarkus-hardening.js +102 -0
  309. package/src/sast/rag-poisoning.js +118 -0
  310. package/src/sast/rate-limit.js +128 -0
  311. package/src/sast/response-splitting.js +138 -0
  312. package/src/sast/ruby.js +108 -0
  313. package/src/sast/rust.js +105 -0
  314. package/src/sast/solidity.js +167 -0
  315. package/src/sast/springboot-hardening.js +186 -0
  316. package/src/sast/ssrf-cloud-metadata.js +80 -0
  317. package/src/sast/ssti.js +116 -0
  318. package/src/sast/swift.js +162 -0
  319. package/src/sast/toctou.js +95 -0
  320. package/src/sast/webhook.js +101 -0
  321. package/src/sast/xpath-injection.js +51 -0
  322. package/src/sast/xxe.js +140 -0
  323. package/src/sast/zip-slip.js +200 -0
  324. package/src/sca/base-images.json +45 -0
  325. package/src/sca/container.js +107 -0
  326. package/src/sca/dep-confusion.js +134 -0
  327. package/src/sca/index.js +6 -0
  328. package/src/sca/popular-packages.json +41 -0
  329. package/src/sca/sarif-ingest.js +187 -0
  330. package/src/sca/vuln-function-hints.json +89 -0
  331. package/src/secrets/index.js +4 -0
@@ -0,0 +1,312 @@
1
+ // Python IR frontend (P2.2).
2
+ //
3
+ // Emits the same IR node shape as parser-js.js so the existing dataflow
4
+ // engine consumes both transparently.
5
+ //
6
+ // v1: a pragmatic indentation-aware parser. We avoid bundling tree-sitter
7
+ // (a 4–10MB WASM blob) and instead recognize the core Python shapes we
8
+ // actually need for taint analysis:
9
+ //
10
+ // def f(args): → function header (new fn, CFG)
11
+ // x = expr → assign node
12
+ // call(args) → call node
13
+ // return expr → return node
14
+ // if cond: → if node
15
+ // while cond: → loop-header node
16
+ // for v in expr: → loop-header node + implicit assign
17
+ // raise expr → throw node
18
+ // try / except / finally → exception-flow scaffolding (P3.4 will model)
19
+ //
20
+ // Out of scope for v1: comprehensions (treated as opaque), decorators
21
+ // (parsed but not used for taint), match statements, async / await
22
+ // modeled as transparent unwrap. lambda bodies collapsed to expr.
23
+ //
24
+ // The output matches the parser-js.js shape:
25
+ //
26
+ // {
27
+ // file, functions: [{
28
+ // qid, name, line, params, cfg: { entry, exit, nodes }
29
+ // }],
30
+ // topLevel
31
+ // }
32
+
33
+ import { blankComments } from '../sast/_comment-strip.js';
34
+
35
+ let _nodeIdSeq = 0;
36
+ function nextNodeId() { return 'pyn' + (++_nodeIdSeq); }
37
+
38
+ // ── Lightweight expression parser ───────────────────────────────────────
39
+
40
+ const _IDENT = /^[A-Za-z_][A-Za-z_0-9]*/;
41
+
42
+ function parseExpr(text) {
43
+ if (!text) return { kind: 'unknown' };
44
+ text = text.trim();
45
+ if (!text) return { kind: 'unknown' };
46
+ // String literal
47
+ if (/^['"]/.test(text)) {
48
+ return { kind: 'literal', value: text };
49
+ }
50
+ // Number literal
51
+ if (/^-?\d/.test(text)) {
52
+ return { kind: 'literal', value: Number(text) || text };
53
+ }
54
+ // True / False / None
55
+ if (/^(?:True|False|None)$/.test(text)) {
56
+ return { kind: 'literal', value: text };
57
+ }
58
+ // F-string (template literal)
59
+ if (/^f['"]/.test(text)) {
60
+ const inner = text.slice(2, -1);
61
+ const parts = [];
62
+ let m;
63
+ const fre = /\{([^{}]+)\}/g;
64
+ while ((m = fre.exec(inner))) {
65
+ parts.push(parseExpr(m[1]));
66
+ }
67
+ return { kind: 'tpl', parts };
68
+ }
69
+ // Call: name(args) or name.method(args) — premortem #14: accept arbitrary
70
+ // nesting of parens/brackets/braces in args, not just one level. The old
71
+ // regex `[^()]*` rejected anything with a nested paren, silently dropping
72
+ // most idiomatic Python (`db.execute(sanitize(x))`, `f(g(y))`).
73
+ const calleeMatch = /^([A-Za-z_][\w.]*)\s*\(/.exec(text);
74
+ if (calleeMatch) {
75
+ const callee = calleeMatch[1];
76
+ const openIdx = calleeMatch[0].length - 1;
77
+ // Walk forward from openIdx, tracking balanced (), [], {}, with quote
78
+ // awareness so a `)` inside a string doesn't close the call.
79
+ let depth = 1, i = openIdx + 1, inStr = false, q = '';
80
+ for (; i < text.length; i++) {
81
+ const c = text[i];
82
+ if (inStr) {
83
+ if (c === '\\') { i++; continue; }
84
+ if (c === q) { inStr = false; }
85
+ continue;
86
+ }
87
+ if (c === '"' || c === "'") { inStr = true; q = c; continue; }
88
+ if (c === '(' || c === '[' || c === '{') depth++;
89
+ else if (c === ')' || c === ']' || c === '}') {
90
+ depth--;
91
+ if (depth === 0) break;
92
+ }
93
+ }
94
+ // Whole text must end right at the matching close-paren for this to
95
+ // be a plain call expression; otherwise it's a sub-expression.
96
+ if (depth === 0 && i === text.length - 1) {
97
+ const argsText = text.slice(openIdx + 1, i);
98
+ const args = _splitArgs(argsText).map(a => parseExpr(a));
99
+ return { kind: 'call', callee, args };
100
+ }
101
+ }
102
+ // Binary op
103
+ for (const op of [' or ', ' and ', '==', '!=', '<=', '>=', '<', '>', '+', '-', '*', '/', '%']) {
104
+ const idx = _findTopLevel(text, op);
105
+ if (idx > 0) {
106
+ const left = parseExpr(text.slice(0, idx));
107
+ const right = parseExpr(text.slice(idx + op.length));
108
+ return { kind: op === ' or ' || op === ' and ' ? 'logical' : 'binary', op: op.trim(), left, right };
109
+ }
110
+ }
111
+ // Member access x.y.z
112
+ if (/\./.test(text) && _IDENT.test(text.split('.')[0])) {
113
+ const parts = text.split('.');
114
+ let cur = { kind: 'ident', name: parts[0] };
115
+ for (let i = 1; i < parts.length; i++) {
116
+ cur = { kind: 'member', object: cur, prop: parts[i].replace(/\(.*\)$/, '') };
117
+ }
118
+ return cur;
119
+ }
120
+ // Plain ident
121
+ if (_IDENT.test(text)) {
122
+ return { kind: 'ident', name: text.match(_IDENT)[0] };
123
+ }
124
+ // List / dict literal
125
+ if (/^\[/.test(text)) {
126
+ const inner = text.slice(1, -1);
127
+ const elements = _splitArgs(inner).map(parseExpr);
128
+ return { kind: 'array', elements };
129
+ }
130
+ if (/^\{/.test(text)) {
131
+ const inner = text.slice(1, -1);
132
+ const pairs = _splitArgs(inner).map(p => {
133
+ const colon = _findTopLevel(p, ':');
134
+ if (colon < 0) return null;
135
+ return { value: parseExpr(p.slice(colon + 1)) };
136
+ }).filter(Boolean);
137
+ return { kind: 'object', props: pairs };
138
+ }
139
+ return { kind: 'unknown' };
140
+ }
141
+
142
+ function _splitArgs(s) {
143
+ if (!s) return [];
144
+ const out = [];
145
+ let depth = 0, last = 0;
146
+ for (let i = 0; i < s.length; i++) {
147
+ const c = s[i];
148
+ if (c === '(' || c === '[' || c === '{') depth++;
149
+ else if (c === ')' || c === ']' || c === '}') depth--;
150
+ else if (c === ',' && depth === 0) { out.push(s.slice(last, i).trim()); last = i + 1; }
151
+ }
152
+ const tail = s.slice(last).trim();
153
+ if (tail) out.push(tail);
154
+ return out;
155
+ }
156
+
157
+ function _findTopLevel(s, sep) {
158
+ let depth = 0;
159
+ for (let i = 0; i < s.length - sep.length + 1; i++) {
160
+ const c = s[i];
161
+ if (c === '(' || c === '[' || c === '{') depth++;
162
+ else if (c === ')' || c === ']' || c === '}') depth--;
163
+ if (depth === 0 && s.startsWith(sep, i)) return i;
164
+ }
165
+ return -1;
166
+ }
167
+
168
+ // ── Indentation-aware function extraction ───────────────────────────────
169
+
170
+ function extractFunctions(text, file) {
171
+ const lines = blankComments(text, 'py').split('\n');
172
+ const fns = [];
173
+ for (let i = 0; i < lines.length; i++) {
174
+ const line = lines[i];
175
+ // Premortem #14: balanced-paren signature parse to handle default values
176
+ // that contain parens (e.g. `def f(x=Foo(1, 2)) -> None:`). The old regex
177
+ // `\(([^)]*)\)` couldn't see past the first ')' and would either miss the
178
+ // function entirely or capture only the leading args.
179
+ const head = /^(\s*)(?:async\s+)?def\s+(\w+)\s*\(/.exec(line);
180
+ if (!head) continue;
181
+ const indent = head[1].length;
182
+ const name = head[2];
183
+ let p = head[0].length, depth = 1, inStr = false, q = '';
184
+ for (; p < line.length; p++) {
185
+ const c = line[p];
186
+ if (inStr) { if (c === '\\') { p++; continue; } if (c === q) inStr = false; continue; }
187
+ if (c === '"' || c === "'") { inStr = true; q = c; continue; }
188
+ if (c === '(' || c === '[' || c === '{') depth++;
189
+ else if (c === ')' || c === ']' || c === '}') { depth--; if (depth === 0) break; }
190
+ }
191
+ if (depth !== 0) continue;
192
+ const paramsText = line.slice(head[0].length, p);
193
+ // After the close paren, require a `:` (possibly via `-> X:`)
194
+ const after = line.slice(p + 1);
195
+ if (!/^\s*(?:->\s*[^:]+)?:\s*(?:#.*)?$/.test(after)) continue;
196
+ const params = _splitArgs(paramsText).map(s => s.trim().split(/[:=]/)[0].trim()).filter(Boolean);
197
+ // Collect body lines: anything indented strictly more than `indent`
198
+ // until we hit a line with same-or-less indent.
199
+ const body = [];
200
+ let j = i + 1;
201
+ while (j < lines.length) {
202
+ const l = lines[j];
203
+ if (l.trim() === '') { body.push({ line: j + 1, text: '' }); j++; continue; }
204
+ const li = l.match(/^(\s*)/)[1].length;
205
+ if (li <= indent) break;
206
+ body.push({ line: j + 1, text: l.slice(indent + 4) }); // strip one indent
207
+ j++;
208
+ }
209
+ fns.push({
210
+ qid: `${file}::module::${name}`,
211
+ name,
212
+ line: i + 1,
213
+ params,
214
+ body,
215
+ });
216
+ }
217
+ return fns;
218
+ }
219
+
220
+ // ── Build CFG from a function's body lines ──────────────────────────────
221
+
222
+ function buildCfg(fn) {
223
+ const nodes = {};
224
+ const entry = nextNodeId();
225
+ const exit = nextNodeId();
226
+ nodes[entry] = { id: entry, kind: 'entry', succ: [] };
227
+ nodes[exit] = { id: exit, kind: 'exit', succ: [] };
228
+
229
+ let prev = entry;
230
+ for (const { line, text } of fn.body) {
231
+ if (!text.trim()) continue;
232
+ const node = _classifyLine(text, line);
233
+ if (!node) continue;
234
+ const id = nextNodeId();
235
+ node.id = id;
236
+ nodes[id] = node;
237
+ if (nodes[prev]) {
238
+ nodes[prev].succ = nodes[prev].succ || [];
239
+ nodes[prev].succ.push(id);
240
+ }
241
+ prev = id;
242
+ }
243
+ if (nodes[prev]) {
244
+ nodes[prev].succ = nodes[prev].succ || [];
245
+ nodes[prev].succ.push(exit);
246
+ }
247
+ return { entry, exit, nodes };
248
+ }
249
+
250
+ function _classifyLine(text, line) {
251
+ text = text.trim();
252
+ if (!text) return null;
253
+ // return expr
254
+ let m;
255
+ if ((m = /^return\s*(.*)$/.exec(text))) {
256
+ return { kind: 'return', value: m[1] ? parseExpr(m[1]) : null, line, succ: [] };
257
+ }
258
+ // raise expr
259
+ if ((m = /^raise\s*(.*)$/.exec(text))) {
260
+ return { kind: 'throw', value: m[1] ? parseExpr(m[1]) : null, line, succ: [] };
261
+ }
262
+ // if cond:
263
+ if ((m = /^(?:el)?if\s+(.+):\s*$/.exec(text))) {
264
+ return { kind: 'if', cond: parseExpr(m[1]), line, succ: [] };
265
+ }
266
+ // for v in expr:
267
+ if ((m = /^for\s+(\w+)\s+in\s+(.+):\s*$/.exec(text))) {
268
+ return { kind: 'assign', target: m[1], source: parseExpr(m[2]), line, succ: [] };
269
+ }
270
+ // while cond:
271
+ if ((m = /^while\s+(.+):\s*$/.exec(text))) {
272
+ return { kind: 'loop-header', cond: parseExpr(m[1]), line, succ: [] };
273
+ }
274
+ // x = expr (avoid matching `==`)
275
+ if ((m = /^([A-Za-z_][\w.]*)\s*=(?!=)\s*(.+)$/.exec(text))) {
276
+ const target = m[1];
277
+ const source = parseExpr(m[2]);
278
+ return { kind: 'assign', target, source, line, succ: [] };
279
+ }
280
+ // bare call: func(args)
281
+ if ((m = /^([A-Za-z_][\w.]*)\s*\(([^()]*)\)\s*$/.exec(text))) {
282
+ return { kind: 'call', callee: m[1], args: _splitArgs(m[2]).map(parseExpr), line, succ: [] };
283
+ }
284
+ // unhandled: noop
285
+ return { kind: 'noop', line, succ: [] };
286
+ }
287
+
288
+ /**
289
+ * Public entry point — produces the same shape as parser-js.js's output.
290
+ *
291
+ * file: repo-relative .py path
292
+ * raw: file contents
293
+ */
294
+ export function parsePythonFile(file, raw) {
295
+ if (!file || !raw || typeof raw !== 'string') return null;
296
+ if (!/\.py$/i.test(file)) return null;
297
+ if (raw.length > 1_000_000) return null;
298
+ const fnRecs = extractFunctions(raw, file);
299
+ const functions = fnRecs.map(fn => ({
300
+ qid: fn.qid,
301
+ name: fn.name,
302
+ line: fn.line,
303
+ params: fn.params,
304
+ cfg: buildCfg(fn),
305
+ file,
306
+ }));
307
+ return {
308
+ file,
309
+ functions,
310
+ topLevel: null,
311
+ };
312
+ }
package/src/ir/ssa.js ADDED
@@ -0,0 +1,315 @@
1
+ // Static Single Assignment transform (P2.4).
2
+ //
3
+ // Renames every variable in the IR so each definition gets a unique name
4
+ // (`x_0`, `x_1`, `x_2`, ...). At control-flow joins, places a φ-node that
5
+ // merges the incoming definitions.
6
+ //
7
+ // Why this matters for taint:
8
+ // let x = req.body; // x_0 := tainted
9
+ // x = sanitize(x); // x_1 := clean
10
+ // doSink(x); // sink reads x_1 → clean
11
+ //
12
+ // Without SSA, the engine sees one variable `x`. The sanitize-then-use
13
+ // pattern looks like a "still tainted" finding because the same name was
14
+ // tainted earlier in the function. SSA turns this into two separate
15
+ // variables, eliminating the conflation.
16
+ //
17
+ // Algorithm (Cytron-Ferrante 1991):
18
+ //
19
+ // 1. Compute the dominator tree of the CFG.
20
+ // 2. Compute dominance frontiers for each node.
21
+ // 3. For each variable `v` defined in node `n`, place a φ(v) at every
22
+ // node in DF(n) — these are the join points where v's def reaches.
23
+ // 4. Rename each definition `v` to `v_<count>` (increment per def).
24
+ // Each use of `v` is rewritten to the most-recent dominating def.
25
+ //
26
+ // v1 in this module: we expose the SSA transform as a standalone pass that
27
+ // the engine can opt into via AGENTIC_SECURITY_SSA=1. Default-off because
28
+ // it changes IR shape and the existing engine must consume the new shape.
29
+ //
30
+ // Public API:
31
+ // computeSSA(cfg) → mutates cfg in place; each variable is now suffixed
32
+ // _0/_1/_2..., φ-nodes inserted at join points.
33
+ // isSSAEnabled() → bool from env
34
+
35
+ import { accessPathOf } from '../dataflow/access-paths.js';
36
+
37
+ export function isSSAEnabled() {
38
+ return process.env.AGENTIC_SECURITY_SSA === '1';
39
+ }
40
+
41
+ /**
42
+ * Compute dominators for a CFG using the iterative algorithm.
43
+ *
44
+ * Returns Map<nodeId, Set<nodeId>> — dom[n] = set of nodes that dominate n.
45
+ */
46
+ function computeDominators(cfg) {
47
+ const nodes = Object.keys(cfg.nodes || {});
48
+ const entry = cfg.entry;
49
+ const dom = new Map();
50
+ for (const n of nodes) dom.set(n, new Set(nodes));
51
+ dom.set(entry, new Set([entry]));
52
+ // Reverse adjacency for predecessor lookup.
53
+ const preds = new Map();
54
+ for (const n of nodes) preds.set(n, []);
55
+ for (const n of nodes) {
56
+ for (const s of (cfg.nodes[n]?.succ || [])) {
57
+ if (preds.has(s)) preds.get(s).push(n);
58
+ }
59
+ }
60
+ let changed = true;
61
+ let safety = 1000;
62
+ while (changed && safety-- > 0) {
63
+ changed = false;
64
+ for (const n of nodes) {
65
+ if (n === entry) continue;
66
+ const ps = preds.get(n) || [];
67
+ if (!ps.length) continue;
68
+ let newDom = null;
69
+ for (const p of ps) {
70
+ const pDom = dom.get(p);
71
+ if (!pDom) continue;
72
+ if (newDom === null) newDom = new Set(pDom);
73
+ else {
74
+ // Intersect
75
+ for (const x of [...newDom]) if (!pDom.has(x)) newDom.delete(x);
76
+ }
77
+ }
78
+ if (!newDom) newDom = new Set();
79
+ newDom.add(n);
80
+ const cur = dom.get(n);
81
+ if (!cur || cur.size !== newDom.size || [...newDom].some(x => !cur.has(x))) {
82
+ dom.set(n, newDom);
83
+ changed = true;
84
+ }
85
+ }
86
+ }
87
+ return dom;
88
+ }
89
+
90
+ /**
91
+ * Compute the immediate dominator of each node.
92
+ *
93
+ * idom[n] = the unique x in (dom[n] - {n}) that doesn't dominate any
94
+ * other node in (dom[n] - {n}).
95
+ *
96
+ * Returns Map<nodeId, nodeId | null>.
97
+ */
98
+ function computeImmediateDominators(dom) {
99
+ const idom = new Map();
100
+ for (const [n, ds] of dom) {
101
+ const candidates = [...ds].filter(x => x !== n);
102
+ if (candidates.length === 0) { idom.set(n, null); continue; }
103
+ // pick the one with the largest |dom| (closest to n).
104
+ let best = null;
105
+ let bestSize = -1;
106
+ for (const c of candidates) {
107
+ const cs = (dom.get(c) || new Set()).size;
108
+ if (cs > bestSize) { best = c; bestSize = cs; }
109
+ }
110
+ idom.set(n, best);
111
+ }
112
+ return idom;
113
+ }
114
+
115
+ /**
116
+ * Compute dominance frontiers using the standard algorithm:
117
+ *
118
+ * for each join node j (≥2 predecessors):
119
+ * for each predecessor p of j:
120
+ * runner = p
121
+ * while runner !== idom(j):
122
+ * DF[runner].add(j)
123
+ * runner = idom(runner)
124
+ *
125
+ * Returns Map<nodeId, Set<nodeId>>.
126
+ */
127
+ function computeDominanceFrontiers(cfg, idom) {
128
+ const DF = new Map();
129
+ const nodes = Object.keys(cfg.nodes || {});
130
+ for (const n of nodes) DF.set(n, new Set());
131
+ const preds = new Map();
132
+ for (const n of nodes) preds.set(n, []);
133
+ for (const n of nodes) {
134
+ for (const s of (cfg.nodes[n]?.succ || [])) {
135
+ if (preds.has(s)) preds.get(s).push(n);
136
+ }
137
+ }
138
+ for (const j of nodes) {
139
+ const ps = preds.get(j) || [];
140
+ if (ps.length < 2) continue;
141
+ const idomJ = idom.get(j);
142
+ for (const p of ps) {
143
+ let runner = p;
144
+ let safety = nodes.length + 5;
145
+ while (runner && runner !== idomJ && safety-- > 0) {
146
+ DF.get(runner).add(j);
147
+ runner = idom.get(runner) || null;
148
+ }
149
+ }
150
+ }
151
+ return DF;
152
+ }
153
+
154
+ /**
155
+ * Collect, per CFG node, the set of variables defined at that node.
156
+ */
157
+ function defsPerNode(cfg) {
158
+ const defs = new Map();
159
+ for (const id of Object.keys(cfg.nodes || {})) {
160
+ const n = cfg.nodes[id];
161
+ const set = new Set();
162
+ if (n && n.kind === 'assign' && typeof n.target === 'string') {
163
+ // Use the access path's root (the LHS top-level identifier).
164
+ const ap = n.target;
165
+ const root = ap.split('.')[0];
166
+ set.add(root);
167
+ }
168
+ defs.set(id, set);
169
+ }
170
+ return defs;
171
+ }
172
+
173
+ /**
174
+ * Place φ-nodes for variables. Returns Map<nodeId, Set<varName>> — the
175
+ * variables that need a φ at each node.
176
+ */
177
+ function placePhis(cfg, defs, DF) {
178
+ // For each variable v, collect nodes where v is defined.
179
+ const allVars = new Set();
180
+ const defNodesByVar = new Map();
181
+ for (const [nid, vars] of defs) {
182
+ for (const v of vars) {
183
+ allVars.add(v);
184
+ if (!defNodesByVar.has(v)) defNodesByVar.set(v, new Set());
185
+ defNodesByVar.get(v).add(nid);
186
+ }
187
+ }
188
+ const phis = new Map();
189
+ for (const id of Object.keys(cfg.nodes || {})) phis.set(id, new Set());
190
+ for (const v of allVars) {
191
+ const work = [...(defNodesByVar.get(v) || [])];
192
+ const visited = new Set(work);
193
+ while (work.length) {
194
+ const n = work.shift();
195
+ const df = DF.get(n) || new Set();
196
+ for (const j of df) {
197
+ if (phis.get(j).has(v)) continue;
198
+ phis.get(j).add(v);
199
+ if (!visited.has(j)) { work.push(j); visited.add(j); }
200
+ }
201
+ }
202
+ }
203
+ return phis;
204
+ }
205
+
206
+ /**
207
+ * Apply Cytron-Ferrante SSA renaming to a CFG. Mutates the CFG in place:
208
+ * - Every `assign` target gets renamed with `_N` suffix.
209
+ * - Every read of a variable gets rewritten to the dominating def's name.
210
+ * - φ-nodes are inserted at join points and carry the incoming defs.
211
+ *
212
+ * v1: we record the SSA names on a side map (`cfg.ssa.versions: Map<nid, Map<var, newName>>`)
213
+ * instead of rewriting the existing exprDesc structures — keeps the IR
214
+ * backward-compatible for engines that don't consume SSA.
215
+ */
216
+ export function computeSSA(cfg) {
217
+ if (!cfg || !cfg.nodes || !cfg.entry) return cfg;
218
+ const dom = computeDominators(cfg);
219
+ const idom = computeImmediateDominators(dom);
220
+ const DF = computeDominanceFrontiers(cfg, idom);
221
+ const defs = defsPerNode(cfg);
222
+ const phis = placePhis(cfg, defs, DF);
223
+
224
+ // Renaming pass: walk dominator tree in pre-order; for each variable
225
+ // maintain a stack of versions. On entry to a node, push a new version
226
+ // for each definition; on exit, pop.
227
+ const ssaInfo = {
228
+ versions: new Map(), // nid -> Map<var, current ssa name>
229
+ phis: new Map(), // nid -> [{ var, ssaName, incoming: [{predNid, ssaName}] }]
230
+ nextVersion: new Map(), // var -> next index
231
+ };
232
+ // Build children-of-idom map.
233
+ const idomChildren = new Map();
234
+ for (const [n, p] of idom) {
235
+ if (!p) continue;
236
+ if (!idomChildren.has(p)) idomChildren.set(p, []);
237
+ idomChildren.get(p).push(n);
238
+ }
239
+
240
+ const counter = ssaInfo.nextVersion;
241
+ const stacks = new Map();
242
+
243
+ function freshName(v) {
244
+ const i = counter.get(v) || 0;
245
+ counter.set(v, i + 1);
246
+ return `${v}_${i}`;
247
+ }
248
+ function topOf(v) {
249
+ const s = stacks.get(v);
250
+ if (!s || !s.length) return null;
251
+ return s[s.length - 1];
252
+ }
253
+ function rename(nid) {
254
+ const n = cfg.nodes[nid];
255
+ if (!n) return;
256
+ // Materialize φ-functions at this node: each phi-var gets a fresh name.
257
+ const phisHere = phis.get(nid) || new Set();
258
+ const phiList = [];
259
+ for (const v of phisHere) {
260
+ const name = freshName(v);
261
+ const pushed = stacks.get(v) || [];
262
+ pushed.push(name);
263
+ stacks.set(v, pushed);
264
+ phiList.push({ var: v, ssaName: name, incoming: [] });
265
+ }
266
+ if (phiList.length) ssaInfo.phis.set(nid, phiList);
267
+
268
+ // Record per-node versions (read-visible).
269
+ const vmap = new Map();
270
+ for (const [v, s] of stacks) {
271
+ if (s.length) vmap.set(v, s[s.length - 1]);
272
+ }
273
+
274
+ // Handle this node's def — if `assign`, fresh-name the LHS.
275
+ const myDefs = defs.get(nid) || new Set();
276
+ for (const v of myDefs) {
277
+ const name = freshName(v);
278
+ const pushed = stacks.get(v) || [];
279
+ pushed.push(name);
280
+ stacks.set(v, pushed);
281
+ vmap.set(v, name);
282
+ }
283
+ ssaInfo.versions.set(nid, vmap);
284
+
285
+ // Recurse into idom children.
286
+ for (const child of (idomChildren.get(nid) || [])) {
287
+ rename(child);
288
+ }
289
+
290
+ // Pop the stacks we pushed.
291
+ for (const v of myDefs) {
292
+ const s = stacks.get(v);
293
+ if (s) s.pop();
294
+ }
295
+ for (const { var: v } of phiList) {
296
+ const s = stacks.get(v);
297
+ if (s) s.pop();
298
+ }
299
+ }
300
+ rename(cfg.entry);
301
+
302
+ cfg.ssa = ssaInfo;
303
+ return cfg;
304
+ }
305
+
306
+ /**
307
+ * Public helper: given a CFG with `cfg.ssa` populated, return the SSA name
308
+ * of `varName` as seen on entry to `nodeId`.
309
+ */
310
+ export function ssaNameAt(cfg, nodeId, varName) {
311
+ if (!cfg || !cfg.ssa) return null;
312
+ const v = cfg.ssa.versions.get(nodeId);
313
+ if (!v) return null;
314
+ return v.get(varName) || null;
315
+ }