@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. package/CHANGELOG.md +1580 -0
  2. package/bin/.agentic-security/findings.json +1577 -0
  3. package/bin/.agentic-security/last-scan.json +1577 -0
  4. package/bin/.agentic-security/last-scan.json.sig +1 -0
  5. package/bin/.agentic-security/scan-history.json +465 -0
  6. package/bin/.agentic-security/streak.json +25 -0
  7. package/bin/agentic-security-audit.js +198 -0
  8. package/bin/agentic-security-consistency.js +80 -0
  9. package/bin/agentic-security-diff.js +136 -0
  10. package/bin/agentic-security-lsp.js +12 -0
  11. package/bin/agentic-security-mcp.js +40 -0
  12. package/bin/agentic-security-rule.js +153 -0
  13. package/bin/agentic-security.js +1683 -0
  14. package/dist/117.index.js +207 -0
  15. package/dist/178.index.js +250 -0
  16. package/dist/218.index.js +793 -0
  17. package/dist/227.index.js +192 -0
  18. package/dist/301.index.js +167 -0
  19. package/dist/384.index.js +18 -0
  20. package/dist/476.index.js +126 -0
  21. package/dist/513.index.js +373 -0
  22. package/dist/520.index.js +13 -0
  23. package/dist/601.index.js +1038 -0
  24. package/dist/634.index.js +1892 -0
  25. package/dist/637.index.js +216 -0
  26. package/dist/660.index.js +131 -0
  27. package/dist/675.index.js +451 -0
  28. package/dist/826.index.js +188 -0
  29. package/dist/830.index.js +133 -0
  30. package/dist/agentic-security.mjs +272 -0
  31. package/dist/agentic-security.mjs.sha256 +1 -0
  32. package/dist/calibration-seed.json +27 -0
  33. package/package.json +77 -0
  34. package/src/.agentic-security/findings.json +80844 -0
  35. package/src/.agentic-security/last-scan.json +80844 -0
  36. package/src/.agentic-security/last-scan.json.sig +1 -0
  37. package/src/.agentic-security/scan-history.json +8408 -0
  38. package/src/.agentic-security/streak.json +26 -0
  39. package/src/badge.js +188 -0
  40. package/src/compare.js +203 -0
  41. package/src/dataflow/.agentic-security/findings.json +3487 -0
  42. package/src/dataflow/.agentic-security/last-scan.json +3487 -0
  43. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  44. package/src/dataflow/.agentic-security/scan-history.json +735 -0
  45. package/src/dataflow/.agentic-security/streak.json +24 -0
  46. package/src/dataflow/CLAUDE.md +38 -0
  47. package/src/dataflow/access-paths.js +172 -0
  48. package/src/dataflow/async-sequencing.js +177 -0
  49. package/src/dataflow/backward.js +201 -0
  50. package/src/dataflow/catalog-expanded.js +485 -0
  51. package/src/dataflow/catalog.js +659 -0
  52. package/src/dataflow/cross-repo.js +219 -0
  53. package/src/dataflow/engine.js +588 -0
  54. package/src/dataflow/exception-flow.js +116 -0
  55. package/src/dataflow/exploit-prover.js +187 -0
  56. package/src/dataflow/higher-order.js +221 -0
  57. package/src/dataflow/ifds.js +347 -0
  58. package/src/dataflow/implicit-flow.js +129 -0
  59. package/src/dataflow/incremental.js +229 -0
  60. package/src/dataflow/index.js +181 -0
  61. package/src/dataflow/numeric-domain.js +192 -0
  62. package/src/dataflow/path-feasibility.js +114 -0
  63. package/src/dataflow/points-to.js +337 -0
  64. package/src/dataflow/polyglot.js +190 -0
  65. package/src/dataflow/proven-clean.js +159 -0
  66. package/src/dataflow/receiver-context.js +76 -0
  67. package/src/dataflow/sanitizer-proof.js +154 -0
  68. package/src/dataflow/soft-taint.js +140 -0
  69. package/src/dataflow/string-domain.js +234 -0
  70. package/src/dataflow/stub-aware-filter.js +100 -0
  71. package/src/dataflow/summaries.js +132 -0
  72. package/src/dataflow/symbolic-exec.js +238 -0
  73. package/src/dataflow/tabulation.js +135 -0
  74. package/src/engine.js +7763 -0
  75. package/src/history-scan.js +229 -0
  76. package/src/index.js +3 -0
  77. package/src/integrations/.agentic-security/findings.json +1504 -0
  78. package/src/integrations/.agentic-security/last-scan.json +1504 -0
  79. package/src/integrations/.agentic-security/scan-history.json +40 -0
  80. package/src/integrations/.agentic-security/streak.json +21 -0
  81. package/src/integrations/index.js +321 -0
  82. package/src/integrations/tickets.js +200 -0
  83. package/src/ir/.agentic-security/findings.json +3036 -0
  84. package/src/ir/.agentic-security/last-scan.json +3036 -0
  85. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  86. package/src/ir/.agentic-security/scan-history.json +364 -0
  87. package/src/ir/.agentic-security/streak.json +23 -0
  88. package/src/ir/CLAUDE.md +172 -0
  89. package/src/ir/callgraph.js +73 -0
  90. package/src/ir/class-hierarchy.js +195 -0
  91. package/src/ir/index.js +152 -0
  92. package/src/ir/parser-cs.js +260 -0
  93. package/src/ir/parser-java.js +286 -0
  94. package/src/ir/parser-js.js +413 -0
  95. package/src/ir/parser-kt.js +258 -0
  96. package/src/ir/parser-py-cst.js +136 -0
  97. package/src/ir/parser-py.helper.py +501 -0
  98. package/src/ir/parser-py.js +312 -0
  99. package/src/ir/ssa.js +315 -0
  100. package/src/ir/type-stubs.js +288 -0
  101. package/src/leaderboard.js +152 -0
  102. package/src/llm-validator/.agentic-security/findings.json +1891 -0
  103. package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
  104. package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
  105. package/src/llm-validator/.agentic-security/scan-history.json +168 -0
  106. package/src/llm-validator/.agentic-security/streak.json +20 -0
  107. package/src/llm-validator/consistency.js +141 -0
  108. package/src/llm-validator/index.js +437 -0
  109. package/src/lsp/.agentic-security/findings.json +28 -0
  110. package/src/lsp/.agentic-security/last-scan.json +28 -0
  111. package/src/lsp/.agentic-security/scan-history.json +79 -0
  112. package/src/lsp/.agentic-security/streak.json +22 -0
  113. package/src/lsp/server.js +275 -0
  114. package/src/mcp/.agentic-security/findings.json +8358 -0
  115. package/src/mcp/.agentic-security/last-scan.json +8358 -0
  116. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  117. package/src/mcp/.agentic-security/scan-history.json +1125 -0
  118. package/src/mcp/.agentic-security/streak.json +22 -0
  119. package/src/mcp/CLAUDE.md +54 -0
  120. package/src/mcp/audit.js +136 -0
  121. package/src/mcp/redact.js +75 -0
  122. package/src/mcp/server.js +158 -0
  123. package/src/mcp/stdio.js +83 -0
  124. package/src/mcp/tools.js +940 -0
  125. package/src/mcp/validate.js +49 -0
  126. package/src/personality.js +164 -0
  127. package/src/poc-video.js +239 -0
  128. package/src/posture/.agentic-security/findings.json +51239 -0
  129. package/src/posture/.agentic-security/last-scan.json +51239 -0
  130. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  131. package/src/posture/.agentic-security/scan-history.json +5557 -0
  132. package/src/posture/.agentic-security/streak.json +24 -0
  133. package/src/posture/CLAUDE.md +42 -0
  134. package/src/posture/adversarial-self-test.js +114 -0
  135. package/src/posture/adversary-agent.js +204 -0
  136. package/src/posture/agents-memory.js +135 -0
  137. package/src/posture/ai-code-fingerprint.js +171 -0
  138. package/src/posture/aibom.js +284 -0
  139. package/src/posture/api-inventory.js +96 -0
  140. package/src/posture/attack-playbooks.js +305 -0
  141. package/src/posture/auditor-agent.js +115 -0
  142. package/src/posture/auth-posture-import.js +135 -0
  143. package/src/posture/baseline-compare.js +114 -0
  144. package/src/posture/blast-radius.js +836 -0
  145. package/src/posture/bounty-prediction.js +141 -0
  146. package/src/posture/business-logic.js +239 -0
  147. package/src/posture/calibration-drift.js +93 -0
  148. package/src/posture/calibration-seed.json +27 -0
  149. package/src/posture/calibration.js +204 -0
  150. package/src/posture/clustering.js +75 -0
  151. package/src/posture/concurrency-checker.js +265 -0
  152. package/src/posture/confidence.js +65 -0
  153. package/src/posture/container-runtime.js +149 -0
  154. package/src/posture/counterfactual.js +109 -0
  155. package/src/posture/cross-lang-graphql.js +165 -0
  156. package/src/posture/cross-lang-grpc.js +166 -0
  157. package/src/posture/cross-lang-meta.js +101 -0
  158. package/src/posture/cross-lang-openapi.js +187 -0
  159. package/src/posture/cross-lang-orm.js +153 -0
  160. package/src/posture/cross-lang-queues.js +210 -0
  161. package/src/posture/crown-jewels.js +110 -0
  162. package/src/posture/custom-rules.js +361 -0
  163. package/src/posture/cve-alert-daemon.js +433 -0
  164. package/src/posture/cve-lookup.js +129 -0
  165. package/src/posture/dead-code.js +430 -0
  166. package/src/posture/defender-agent.js +158 -0
  167. package/src/posture/deploy-platform.js +204 -0
  168. package/src/posture/detector-fuzz.js +61 -0
  169. package/src/posture/deterministic.js +99 -0
  170. package/src/posture/drift.js +165 -0
  171. package/src/posture/epss.js +156 -0
  172. package/src/posture/exploitability-probability.js +212 -0
  173. package/src/posture/exploitability.js +121 -0
  174. package/src/posture/feature-flags.js +110 -0
  175. package/src/posture/finding-defaults.js +132 -0
  176. package/src/posture/fix-history.js +411 -0
  177. package/src/posture/fix-plan.js +121 -0
  178. package/src/posture/fix-verify-loop.js +157 -0
  179. package/src/posture/fix-verify.js +130 -0
  180. package/src/posture/flow-narration.js +105 -0
  181. package/src/posture/grader-calibration.js +156 -0
  182. package/src/posture/harness-discovery.js +113 -0
  183. package/src/posture/holdout-eval.js +144 -0
  184. package/src/posture/iac-reachability.js +163 -0
  185. package/src/posture/iam-policy.js +128 -0
  186. package/src/posture/integrity.js +97 -0
  187. package/src/posture/learning.js +166 -0
  188. package/src/posture/license-policy.js +109 -0
  189. package/src/posture/llm-redteam-prompts.js +418 -0
  190. package/src/posture/llm-redteam.js +303 -0
  191. package/src/posture/material-change.js +163 -0
  192. package/src/posture/mitigation-composite.js +55 -0
  193. package/src/posture/mttr.js +91 -0
  194. package/src/posture/network-policy-import.js +126 -0
  195. package/src/posture/path-predicates.js +99 -0
  196. package/src/posture/persona-prioritization.js +153 -0
  197. package/src/posture/poc-cwe-map.js +51 -0
  198. package/src/posture/poc-generator.js +500 -0
  199. package/src/posture/policy-gate.js +174 -0
  200. package/src/posture/pre-incident-archaeology.js +110 -0
  201. package/src/posture/profile.js +93 -0
  202. package/src/posture/reachability-filter.js +42 -0
  203. package/src/posture/regression-test-gen.js +200 -0
  204. package/src/posture/reverse-blast-radius.js +110 -0
  205. package/src/posture/router.js +109 -0
  206. package/src/posture/rule-overrides.js +198 -0
  207. package/src/posture/rule-pack-signing.js +209 -0
  208. package/src/posture/rule-packs.js +143 -0
  209. package/src/posture/rule-synthesis.js +108 -0
  210. package/src/posture/ruleset-version.js +71 -0
  211. package/src/posture/sbom.js +129 -0
  212. package/src/posture/schema-aware-bridge.js +207 -0
  213. package/src/posture/security-trend.js +87 -0
  214. package/src/posture/semantic-clone.js +114 -0
  215. package/src/posture/specification-mining.js +170 -0
  216. package/src/posture/stable-id.js +75 -0
  217. package/src/posture/stack-playbook.js +229 -0
  218. package/src/posture/streak.js +249 -0
  219. package/src/posture/suppressions.js +135 -0
  220. package/src/posture/telemetry-ingest.js +112 -0
  221. package/src/posture/threat-model.js +145 -0
  222. package/src/posture/three-agent-pipeline.js +74 -0
  223. package/src/posture/triage.js +146 -0
  224. package/src/posture/trust-boundary-diagram.js +115 -0
  225. package/src/posture/type-narrowing.js +129 -0
  226. package/src/posture/validator-metrics.js +179 -0
  227. package/src/posture/verifier-ephemeral.js +118 -0
  228. package/src/posture/verifier-target.js +147 -0
  229. package/src/posture/verifier.js +257 -0
  230. package/src/posture/version.js +75 -0
  231. package/src/posture/waf-ingest.js +200 -0
  232. package/src/posture/why-fired.js +141 -0
  233. package/src/pr-comment.js +172 -0
  234. package/src/pr-delta.js +198 -0
  235. package/src/report/.agentic-security/findings.json +79 -0
  236. package/src/report/.agentic-security/last-scan.json +79 -0
  237. package/src/report/.agentic-security/last-scan.json.sig +1 -0
  238. package/src/report/.agentic-security/scan-history.json +332 -0
  239. package/src/report/.agentic-security/streak.json +23 -0
  240. package/src/report/index.js +1136 -0
  241. package/src/report/mascot.js +42 -0
  242. package/src/runScan.js +141 -0
  243. package/src/sast/.agentic-security/findings.json +5051 -0
  244. package/src/sast/.agentic-security/last-scan.json +5051 -0
  245. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  246. package/src/sast/.agentic-security/scan-history.json +788 -0
  247. package/src/sast/.agentic-security/streak.json +23 -0
  248. package/src/sast/CLAUDE.md +39 -0
  249. package/src/sast/_comment-strip.js +46 -0
  250. package/src/sast/agent-tool-escalation.js +131 -0
  251. package/src/sast/auth-provider.js +171 -0
  252. package/src/sast/authz.js +236 -0
  253. package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
  254. package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
  255. package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
  256. package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
  257. package/src/sast/bench-shape/index.js +62 -0
  258. package/src/sast/claude-hook-injection.js +199 -0
  259. package/src/sast/claude-md-prompt-injection.js +170 -0
  260. package/src/sast/claude-settings.js +165 -0
  261. package/src/sast/client-side.js +149 -0
  262. package/src/sast/cpp-bench-extras.js +122 -0
  263. package/src/sast/cpp-dataflow.js +430 -0
  264. package/src/sast/cpp.js +248 -0
  265. package/src/sast/csharp.js +152 -0
  266. package/src/sast/csrf.js +82 -0
  267. package/src/sast/dart-flutter.js +173 -0
  268. package/src/sast/db-rls.js +147 -0
  269. package/src/sast/db-taint.js +215 -0
  270. package/src/sast/defi-deep.js +242 -0
  271. package/src/sast/deserialization-gadgets.js +113 -0
  272. package/src/sast/django-hardening.js +230 -0
  273. package/src/sast/env-hygiene.js +125 -0
  274. package/src/sast/fastapi-hardening.js +145 -0
  275. package/src/sast/go-extended.js +84 -0
  276. package/src/sast/host-header.js +106 -0
  277. package/src/sast/index.js +17 -0
  278. package/src/sast/java-ast-folding.js +561 -0
  279. package/src/sast/java-bench-extras.js +708 -0
  280. package/src/sast/java-collection-passthrough.js +178 -0
  281. package/src/sast/java-constant-fold.js +244 -0
  282. package/src/sast/java-deserialization.js +125 -0
  283. package/src/sast/jndi.js +104 -0
  284. package/src/sast/juliet-shape.js +324 -0
  285. package/src/sast/jwt-exp.js +104 -0
  286. package/src/sast/kotlin.js +82 -0
  287. package/src/sast/laravel-hardening.js +198 -0
  288. package/src/sast/ldap-injection.js +100 -0
  289. package/src/sast/llm-owasp.js +465 -0
  290. package/src/sast/llm-stored-prompt.js +103 -0
  291. package/src/sast/llm-trading-agent.js +161 -0
  292. package/src/sast/llm.js +308 -0
  293. package/src/sast/logic.js +140 -0
  294. package/src/sast/mass-assignment.js +101 -0
  295. package/src/sast/mcp-audit.js +242 -0
  296. package/src/sast/mobile-manifest.js +195 -0
  297. package/src/sast/model-load.js +164 -0
  298. package/src/sast/mutation-xss.js +87 -0
  299. package/src/sast/nosql-injection.js +82 -0
  300. package/src/sast/open-redirect.js +119 -0
  301. package/src/sast/php.js +91 -0
  302. package/src/sast/pipeline.js +122 -0
  303. package/src/sast/primary-cwe-java.js +155 -0
  304. package/src/sast/prompt-firewall.js +151 -0
  305. package/src/sast/prompt-template.js +157 -0
  306. package/src/sast/prototype-pollution.js +112 -0
  307. package/src/sast/python-sinks.js +195 -0
  308. package/src/sast/quarkus-hardening.js +102 -0
  309. package/src/sast/rag-poisoning.js +118 -0
  310. package/src/sast/rate-limit.js +128 -0
  311. package/src/sast/response-splitting.js +138 -0
  312. package/src/sast/ruby.js +108 -0
  313. package/src/sast/rust.js +105 -0
  314. package/src/sast/solidity.js +167 -0
  315. package/src/sast/springboot-hardening.js +186 -0
  316. package/src/sast/ssrf-cloud-metadata.js +80 -0
  317. package/src/sast/ssti.js +116 -0
  318. package/src/sast/swift.js +162 -0
  319. package/src/sast/toctou.js +95 -0
  320. package/src/sast/webhook.js +101 -0
  321. package/src/sast/xpath-injection.js +51 -0
  322. package/src/sast/xxe.js +140 -0
  323. package/src/sast/zip-slip.js +200 -0
  324. package/src/sca/base-images.json +45 -0
  325. package/src/sca/container.js +107 -0
  326. package/src/sca/dep-confusion.js +134 -0
  327. package/src/sca/index.js +6 -0
  328. package/src/sca/popular-packages.json +41 -0
  329. package/src/sca/sarif-ingest.js +187 -0
  330. package/src/sca/vuln-function-hints.json +89 -0
  331. package/src/secrets/index.js +4 -0
@@ -0,0 +1,195 @@
1
+ // Class Hierarchy Analysis (CHA) — JS/TS (P1.2).
2
+ //
3
+ // Walks the Babel ASTs across the project to build:
4
+ //
5
+ // classDefs: Map<className, { file, line, methods, fields, extends?, implements? }>
6
+ // methodOwners: Map<methodQid, className>
7
+ // typeOfVar: Map<file::scope::varName, className> — assignment-time type
8
+ // inference (simple, no
9
+ // flow analysis)
10
+ //
11
+ // The output is consumed by the dataflow engine's receiver-sensitivity layer
12
+ // (`receiver-context.js`) and by `callgraph.js` to refine virtual-call resolution.
13
+ //
14
+ // Scope of this v1: shallow analysis. We DON'T resolve:
15
+ // - polymorphic types (T<U>),
16
+ // - cross-file class inheritance via dynamic imports,
17
+ // - mixins (Object.assign / class factories),
18
+ // - prototype-based assignments outside `class` declarations.
19
+ //
20
+ // What we DO catch:
21
+ // - `class Foo {}` declarations + their method signatures.
22
+ // - `class Bar extends Foo {}` extends relationships.
23
+ // - `let x = new Foo()` typed-LHS inference.
24
+ // - `const x: Foo = ...` TS-annotated-LHS inference.
25
+ // - `function buildFoo(): Foo { ... }` typed-return inference.
26
+
27
+ const _AST_CACHE = new WeakMap();
28
+
29
+ /**
30
+ * Build the CHA over a perFileIR map (file → parsed IR with raw AST attached
31
+ * under `_ast`). When AST isn't attached, fall back to the IR's own
32
+ * structural hints (class names appearing in qids).
33
+ */
34
+ export function buildClassHierarchy(perFileIR) {
35
+ const classes = new Map(); // className -> { file, line, methods, extends }
36
+ const methodOwners = new Map(); // qid -> className
37
+ const typeOfVar = new Map(); // file::scope::var -> className
38
+
39
+ if (!perFileIR || typeof perFileIR !== 'object') {
40
+ return { classes, methodOwners, typeOfVar };
41
+ }
42
+
43
+ for (const [file, ir] of Object.entries(perFileIR)) {
44
+ if (!ir || !Array.isArray(ir.functions)) continue;
45
+ // Recover class names from method qids of the shape
46
+ // <file>::<scope>::<className.method>
47
+ // Many of our existing parsers emit class methods as `Foo.bar` in qid.
48
+ for (const fn of ir.functions) {
49
+ if (!fn.qid) continue;
50
+ const tail = fn.qid.split('::').pop() || '';
51
+ const dotIdx = tail.indexOf('.');
52
+ if (dotIdx <= 0) continue;
53
+ const className = tail.slice(0, dotIdx);
54
+ const methodName = tail.slice(dotIdx + 1);
55
+ methodOwners.set(fn.qid, className);
56
+ let cls = classes.get(className);
57
+ if (!cls) {
58
+ cls = { name: className, file, line: fn.line || 0, methods: new Set(), extends: null };
59
+ classes.set(className, cls);
60
+ }
61
+ cls.methods.add(methodName);
62
+ }
63
+ // Try to recover `let x = new Foo(...)` typing — we walk the IR's
64
+ // assign nodes for any call whose callee starts with a known class name.
65
+ for (const fn of ir.functions) {
66
+ const cfg = fn.cfg;
67
+ if (!cfg || !cfg.nodes) continue;
68
+ for (const id of Object.keys(cfg.nodes)) {
69
+ const n = cfg.nodes[id];
70
+ if (!n || n.kind !== 'assign') continue;
71
+ const src = n.source;
72
+ if (!src || src.kind !== 'call') continue;
73
+ // `new Foo()` is shaped as { kind: 'call', callee: { kind: 'ident', name: 'Foo' }, isNew: true }
74
+ const callee = src.callee;
75
+ const className = callee?.kind === 'ident' ? callee.name : null;
76
+ if (!className) continue;
77
+ if (classes.has(className) || /^[A-Z]/.test(className)) {
78
+ // Convention: PascalCase callees treated as constructors.
79
+ const target = typeof n.target === 'string' ? n.target : null;
80
+ if (target) typeOfVar.set(`${file}::${fn.qid}::${target}`, className);
81
+ }
82
+ }
83
+ }
84
+ }
85
+
86
+ return { classes, methodOwners, typeOfVar };
87
+ }
88
+
89
+ /**
90
+ * Given a variable reference (file + enclosing fn qid + var name), return
91
+ * the inferred class name if any.
92
+ */
93
+ export function classOfVar(cha, file, fnQid, varName) {
94
+ if (!cha || !cha.typeOfVar || !varName) return null;
95
+ return cha.typeOfVar.get(`${file}::${fnQid}::${varName}`) || null;
96
+ }
97
+
98
+ /**
99
+ * Given a class name + method, return the resolved qid (if we know it).
100
+ * v1: no override resolution — only direct definition.
101
+ */
102
+ export function resolveMethod(cha, className, methodName) {
103
+ if (!cha || !cha.classes || !className || !methodName) return null;
104
+ // Walk the class hierarchy upward — extends chain — to find a method.
105
+ let cur = className;
106
+ const seen = new Set();
107
+ while (cur && !seen.has(cur)) {
108
+ seen.add(cur);
109
+ const cls = cha.classes.get(cur);
110
+ if (!cls) break;
111
+ if (cls.methods && cls.methods.has(methodName)) {
112
+ // Return a synthetic qid; the call graph may have its own resolution.
113
+ return { className: cur, methodName };
114
+ }
115
+ cur = cls.extends || null;
116
+ }
117
+ return null;
118
+ }
119
+
120
+ // ════════════════════════════════════════════════════════════════════════════
121
+ // P4.5 — Rapid Type Analysis (RTA)
122
+ // ════════════════════════════════════════════════════════════════════════════
123
+ //
124
+ // CHA over-approximates virtual dispatch: a call on a receiver of type
125
+ // Animal resolves to EVERY method named `speak` on EVERY subclass — even
126
+ // subclasses that are never instantiated. RTA narrows this by tracking
127
+ // which classes are actually instantiated in the program.
128
+
129
+ /**
130
+ * Walk the IR for `new ClassName(...)` expressions and return the set of
131
+ * instantiated class names.
132
+ */
133
+ export function collectInstantiatedClasses(perFileIR) {
134
+ const live = new Set();
135
+ if (!perFileIR) return live;
136
+ for (const ir of Object.values(perFileIR)) {
137
+ for (const fn of (ir.functions || [])) {
138
+ const cfg = fn.cfg;
139
+ if (!cfg || !cfg.nodes) continue;
140
+ for (const id of Object.keys(cfg.nodes)) {
141
+ const n = cfg.nodes[id];
142
+ if (!n) continue;
143
+ if (n.kind === 'assign' && n.source && n.source.kind === 'call' && n.source.isNew) {
144
+ const callee = n.source.callee;
145
+ if (callee && typeof callee === 'object' && callee.kind === 'ident') live.add(callee.name);
146
+ else if (typeof callee === 'string') live.add(callee);
147
+ }
148
+ if (n.kind === 'call' && n.isNew && typeof n.callee === 'string') live.add(n.callee);
149
+ }
150
+ }
151
+ }
152
+ return live;
153
+ }
154
+
155
+ /**
156
+ * RTA-refined virtual-call resolution. Narrows a virtual-call candidate set
157
+ * to actually-live (instantiated) classes.
158
+ *
159
+ * cha: class hierarchy
160
+ * methodName: the method being dispatched
161
+ * liveClasses: output of collectInstantiatedClasses
162
+ * rootClass: the declared/inferred receiver type (or null = any class)
163
+ */
164
+ export function resolveMethodRTA(cha, methodName, liveClasses, rootClass) {
165
+ if (!cha || !methodName || !liveClasses) return [];
166
+ const out = [];
167
+ for (const [cn, cls] of cha.classes) {
168
+ if (!liveClasses.has(cn)) continue;
169
+ if (!cls.methods || !cls.methods.has(methodName)) continue;
170
+ if (rootClass) {
171
+ // cn must be rootClass or a transitive subclass.
172
+ let cur = cn;
173
+ let inHierarchy = false;
174
+ const seen = new Set();
175
+ while (cur && !seen.has(cur)) {
176
+ seen.add(cur);
177
+ if (cur === rootClass) { inHierarchy = true; break; }
178
+ cur = cha.classes.get(cur)?.extends || null;
179
+ }
180
+ if (!inHierarchy) continue;
181
+ }
182
+ out.push({ className: cn, methodName });
183
+ }
184
+ return out;
185
+ }
186
+
187
+ /**
188
+ * Annotate an existing CHA with the live-class set so consumers don't have
189
+ * to recompute it.
190
+ */
191
+ export function annotateRTA(cha, perFileIR) {
192
+ if (!cha) return cha;
193
+ cha.liveClasses = collectInstantiatedClasses(perFileIR);
194
+ return cha;
195
+ }
@@ -0,0 +1,152 @@
1
+ // IR entry point.
2
+ //
3
+ // Build per-file IR for every JS/TS/Python/Java file in a project, then
4
+ // build the cross-file call graph on top.
5
+
6
+ import { parseJsFile } from './parser-js.js';
7
+ import { parseCSharpFile } from './parser-cs.js';
8
+ import { parseKotlinFile } from './parser-kt.js';
9
+ import { parsePythonFile as parsePythonFileRegex } from './parser-py.js';
10
+ import {
11
+ parsePythonFile as parsePythonFileCst,
12
+ parsePythonFilesBatch as parsePythonFilesBatchCst,
13
+ probePythonAvailable,
14
+ } from './parser-py-cst.js';
15
+ import { parseJavaFile } from './parser-java.js';
16
+ import { buildCallGraph } from './callgraph.js';
17
+ import { buildClassHierarchy } from './class-hierarchy.js';
18
+ import { computeSSA, isSSAEnabled } from './ssa.js';
19
+
20
+ // Pick the Python parser based on env + capability probe.
21
+ // AGENTIC_SECURITY_PY_PARSER=cst — force AST parser; error if unavailable
22
+ // AGENTIC_SECURITY_PY_PARSER=regex — force the legacy regex parser
23
+ // AGENTIC_SECURITY_PY_PARSER=auto (default) — try CST, fall back silently
24
+ //
25
+ // The default is `auto` for one minor release so we can validate the CST
26
+ // path in real-world deployments without regressing customers who don't
27
+ // have python3 on PATH. Flip the default to `cst` once the equivalence
28
+ // corpus has run clean for two consecutive releases.
29
+ function _chooseParser() {
30
+ const choice = (process.env.AGENTIC_SECURITY_PY_PARSER || 'auto').toLowerCase();
31
+ if (choice === 'regex') return { parser: 'regex' };
32
+ if (choice === 'cst') {
33
+ const cap = probePythonAvailable();
34
+ if (!cap.ok) {
35
+ throw new Error(`AGENTIC_SECURITY_PY_PARSER=cst but Python is unavailable: ${cap.reason}`);
36
+ }
37
+ return { parser: 'cst' };
38
+ }
39
+ // auto: prefer cst when capability is present.
40
+ const cap = probePythonAvailable();
41
+ return { parser: cap.ok ? 'cst' : 'regex' };
42
+ }
43
+
44
+ function _parsePythonFiles(pyEntries) {
45
+ // pyEntries: [{ file, content }, ...]
46
+ const choice = _chooseParser();
47
+ if (choice.parser === 'cst') {
48
+ const batch = parsePythonFilesBatchCst(pyEntries);
49
+ if (batch !== null) return batch;
50
+ // Silent fall-through to regex when the CST path failed mid-run (e.g.
51
+ // helper crashed). Operators see this in stderr when debugging is on.
52
+ if (process.env.AGENTIC_SECURITY_PY_PARSER_DEBUG === '1') {
53
+ process.stderr.write('parser-py-cst: batch failed; falling back to regex parser\n');
54
+ }
55
+ }
56
+ // Regex per-file parse — matches the old behavior exactly.
57
+ return pyEntries.map(({ file, content }) => parsePythonFileRegex(file, content)).filter(Boolean);
58
+ }
59
+
60
+ // Synchronous default — JS/TS + Python only. Engine.js calls this directly.
61
+ // Java IR requires async import of java-parser; callers who want it can use
62
+ // buildProjectIRAsync instead.
63
+ export function buildProjectIR(fileContents) {
64
+ const perFile = {};
65
+ const pyBatch = [];
66
+ for (const [file, code] of Object.entries(fileContents || {})) {
67
+ if (/\.(?:js|jsx|ts|tsx|mjs|cjs)$/i.test(file)) {
68
+ const ir = parseJsFile(file, code);
69
+ if (ir) perFile[file] = ir;
70
+ } else if (/\.py$/i.test(file)) {
71
+ // Defer Python files to a single batched subprocess call.
72
+ pyBatch.push({ file, content: code });
73
+ } else if (/\.cs$/i.test(file)) {
74
+ const ir = parseCSharpFile(file, code);
75
+ if (ir) perFile[file] = ir;
76
+ } else if (/\.kt$/i.test(file)) {
77
+ const ir = parseKotlinFile(file, code);
78
+ if (ir) perFile[file] = ir;
79
+ }
80
+ }
81
+ if (pyBatch.length) {
82
+ for (const ir of _parsePythonFiles(pyBatch)) {
83
+ if (ir && ir.file) perFile[ir.file] = ir;
84
+ }
85
+ }
86
+ if (isSSAEnabled()) {
87
+ for (const ir of Object.values(perFile)) {
88
+ for (const fn of (ir.functions || [])) {
89
+ try { computeSSA(fn.cfg); } catch {}
90
+ }
91
+ }
92
+ }
93
+ const cg = buildCallGraph(perFile);
94
+ const cha = buildClassHierarchy(perFile);
95
+ return { perFile, callGraph: cg, cha };
96
+ }
97
+
98
+ // Async variant — includes Java IR via java-parser.
99
+ export async function buildProjectIRAsync(fileContents) {
100
+ const perFile = {};
101
+ const pyBatch = [];
102
+ for (const [file, code] of Object.entries(fileContents || {})) {
103
+ if (/\.(?:js|jsx|ts|tsx|mjs|cjs)$/i.test(file)) {
104
+ const ir = parseJsFile(file, code);
105
+ if (ir) perFile[file] = ir;
106
+ } else if (/\.py$/i.test(file)) {
107
+ pyBatch.push({ file, content: code });
108
+ } else if (/\.cs$/i.test(file)) {
109
+ const ir = parseCSharpFile(file, code);
110
+ if (ir) perFile[file] = ir;
111
+ } else if (/\.kt$/i.test(file)) {
112
+ const ir = parseKotlinFile(file, code);
113
+ if (ir) perFile[file] = ir;
114
+ } else if (/\.java$/i.test(file)) {
115
+ try {
116
+ const ir = await parseJavaFile(file, code);
117
+ if (ir) perFile[file] = ir;
118
+ } catch { /* skip */ }
119
+ }
120
+ }
121
+ if (pyBatch.length) {
122
+ for (const ir of _parsePythonFiles(pyBatch)) {
123
+ if (ir && ir.file) perFile[ir.file] = ir;
124
+ }
125
+ }
126
+ if (isSSAEnabled()) {
127
+ for (const ir of Object.values(perFile)) {
128
+ for (const fn of (ir.functions || [])) {
129
+ try { computeSSA(fn.cfg); } catch {}
130
+ }
131
+ }
132
+ }
133
+ const cg = buildCallGraph(perFile);
134
+ const cha = buildClassHierarchy(perFile);
135
+ return { perFile, callGraph: cg, cha };
136
+ }
137
+
138
+ // `parsePythonFile` is the single-file shim. We re-export the dispatcher
139
+ // so existing imports (e.g. tests that import { parsePythonFile } from
140
+ // './ir/index.js') keep working. The dispatcher routes to CST or regex
141
+ // according to the same rules as the batch path.
142
+ export function parsePythonFile(file, code) {
143
+ if (!file || typeof code !== 'string') return null;
144
+ const choice = _chooseParser();
145
+ if (choice.parser === 'cst') {
146
+ const r = parsePythonFileCst(file, code);
147
+ if (r) return r;
148
+ }
149
+ return parsePythonFileRegex(file, code);
150
+ }
151
+
152
+ export { parseJsFile, parseJavaFile, parseCSharpFile, parseKotlinFile, buildCallGraph, buildClassHierarchy, computeSSA, isSSAEnabled, probePythonAvailable };
@@ -0,0 +1,260 @@
1
+ // C# IR frontend (v0.66).
2
+ //
3
+ // Regex-based, pragmatic, focused on ASP.NET / Entity Framework / Dapper /
4
+ // System.IO surface area. Parallels parser-py.js (the legacy Python regex
5
+ // parser) in approach: extract method bodies, lower assignments and calls
6
+ // to the canonical IR shape, build a linear CFG.
7
+ //
8
+ // What we model:
9
+ // - method declarations: `[modifiers] returnType Name(params) { body }`
10
+ // - simple assignments: `var x = ...;` `Type x = ...;` `x = ...;`
11
+ // - method calls (statement-form): `obj.Method(args);` / `Method(args);`
12
+ // - return: `return expr;`
13
+ // - ASP.NET source-like access: `Request.Form["x"]`, `Request.QueryString[...]`
14
+ //
15
+ // What we do NOT model (regex-fallback class limits):
16
+ // - LINQ expressions (treated as opaque expression)
17
+ // - lambdas (body collapsed)
18
+ // - async/await (transparent)
19
+ // - generics on declarations beyond Type<...> name
20
+ // - attributes (skipped)
21
+ // - destructuring / tuples
22
+ // - control flow (if/for/while/switch) — body is treated as straight-line;
23
+ // this is enough for the source-reaches-sink shapes we care about.
24
+ //
25
+ // This is a v1. Promoted to a Roslyn-backed CST parser (analogous to
26
+ // parser-py-cst.js) once we have a dotnet capability probe.
27
+
28
+ import * as crypto from 'node:crypto';
29
+
30
+ const METHOD_RE = new RegExp(
31
+ '(?:^|[\\s;{}])(?:public|private|protected|internal|static|virtual|override|async|sealed|abstract|new|readonly|partial)' +
32
+ '(?:\\s+(?:public|private|protected|internal|static|virtual|override|async|sealed|abstract|new|readonly|partial))*' +
33
+ '\\s+([A-Za-z_][A-Za-z0-9_<>?\\[\\],\\s]*?)' + // return type (group 1)
34
+ '\\s+([A-Za-z_][A-Za-z0-9_]*)' + // method name (group 2)
35
+ '\\s*\\(([^)]*)\\)' + // params (group 3)
36
+ '\\s*\\{', 'g');
37
+
38
+ // Matches a top-level statement inside a method body. We split on `;` only
39
+ // at brace-depth 0; this keeps simple lambdas inside calls intact.
40
+ function _splitStatements(body) {
41
+ const out = [];
42
+ let buf = '';
43
+ let depth = 0;
44
+ let inString = null; // null | '"' | "'" | '@"' style
45
+ let escape = false;
46
+ for (let i = 0; i < body.length; i++) {
47
+ const c = body[i];
48
+ if (escape) { buf += c; escape = false; continue; }
49
+ if (inString) {
50
+ buf += c;
51
+ if (inString === '"' && c === '\\') { escape = true; continue; }
52
+ if (c === inString) inString = null;
53
+ continue;
54
+ }
55
+ if (c === '"' || c === "'") { inString = c; buf += c; continue; }
56
+ if (c === '{' || c === '(' || c === '[') depth++;
57
+ if (c === '}' || c === ')' || c === ']') depth--;
58
+ if (c === ';' && depth === 0) { out.push(buf.trim()); buf = ''; continue; }
59
+ buf += c;
60
+ }
61
+ if (buf.trim()) out.push(buf.trim());
62
+ return out;
63
+ }
64
+
65
+ function _lowerExpr(text) {
66
+ const s = String(text || '').trim();
67
+ if (!s) return { kind: 'unknown' };
68
+ // Member access: a.b.c["foo"]
69
+ if (/^[A-Za-z_][\w.]*\[[^\]]*\]$/.test(s)) {
70
+ // E.g. Request.Form["name"]. Split on first '[' to isolate index.
71
+ const lb = s.indexOf('[');
72
+ const base = s.slice(0, lb);
73
+ const dots = base.split('.');
74
+ return _buildMemberChain(dots, /*indexed*/ s.slice(lb));
75
+ }
76
+ // Plain dotted ident: Request.Form / Request.QueryString
77
+ if (/^[A-Za-z_][\w.]*$/.test(s)) {
78
+ const parts = s.split('.');
79
+ if (parts.length === 1) return { kind: 'ident', name: parts[0] };
80
+ return _buildMemberChain(parts);
81
+ }
82
+ // Call: foo.bar(args) or Bar(args). Find the LAST '(' at depth 0.
83
+ const callMatch = s.match(/^([\w.]+)\s*\((.*)\)\s*$/s);
84
+ if (callMatch) {
85
+ const callee = callMatch[1];
86
+ const args = _splitTopLevelCommas(callMatch[2]).map(_lowerExpr);
87
+ return { kind: 'call', callee, args };
88
+ }
89
+ // String concat / interpolation — heuristic.
90
+ if (s.includes('+') && /["']/.test(s)) {
91
+ const parts = _splitTopLevelPlus(s).map(_lowerExpr);
92
+ return { kind: 'tpl', parts };
93
+ }
94
+ if (/^"|^@"/.test(s)) return { kind: 'literal', value: s };
95
+ if (/^\d/.test(s)) return { kind: 'literal', value: s };
96
+ return { kind: 'unknown' };
97
+ }
98
+
99
+ function _buildMemberChain(parts, indexer) {
100
+ // [a, b, c] → member(member(ident a, b), c). If indexer, wrap as a final member.
101
+ let cur = { kind: 'ident', name: parts[0] };
102
+ for (let i = 1; i < parts.length; i++) cur = { kind: 'member', object: cur, prop: parts[i] };
103
+ if (indexer) cur = { kind: 'member', object: cur, prop: indexer };
104
+ return cur;
105
+ }
106
+
107
+ function _splitTopLevelCommas(s) {
108
+ const out = [];
109
+ let buf = '';
110
+ let depth = 0;
111
+ let inStr = null;
112
+ for (let i = 0; i < s.length; i++) {
113
+ const c = s[i];
114
+ if (inStr) {
115
+ buf += c;
116
+ if (c === inStr && s[i-1] !== '\\') inStr = null;
117
+ continue;
118
+ }
119
+ if (c === '"' || c === "'") { inStr = c; buf += c; continue; }
120
+ if (c === '(' || c === '{' || c === '[' || c === '<') depth++;
121
+ if (c === ')' || c === '}' || c === ']' || c === '>') depth--;
122
+ if (c === ',' && depth === 0) { out.push(buf.trim()); buf = ''; continue; }
123
+ buf += c;
124
+ }
125
+ if (buf.trim()) out.push(buf.trim());
126
+ return out;
127
+ }
128
+
129
+ function _splitTopLevelPlus(s) {
130
+ const out = [];
131
+ let buf = '';
132
+ let depth = 0;
133
+ let inStr = null;
134
+ for (let i = 0; i < s.length; i++) {
135
+ const c = s[i];
136
+ if (inStr) {
137
+ buf += c;
138
+ if (c === inStr && s[i-1] !== '\\') inStr = null;
139
+ continue;
140
+ }
141
+ if (c === '"' || c === "'") { inStr = c; buf += c; continue; }
142
+ if (c === '(' || c === '{' || c === '[') depth++;
143
+ if (c === ')' || c === '}' || c === ']') depth--;
144
+ if (c === '+' && depth === 0) { out.push(buf.trim()); buf = ''; continue; }
145
+ buf += c;
146
+ }
147
+ if (buf.trim()) out.push(buf.trim());
148
+ return out;
149
+ }
150
+
151
+ // Lower one C# statement to an IR node. `line` is the absolute file line.
152
+ function _lowerStmt(stmt, line) {
153
+ const s = stmt.trim().replace(/^\s+/, '');
154
+ if (!s || s.startsWith('//') || s.startsWith('/*')) return null;
155
+ // return
156
+ if (/^return\b/.test(s)) {
157
+ const m = s.match(/^return\s*(.*?)\s*$/);
158
+ const expr = m && m[1] ? _lowerExpr(m[1]) : null;
159
+ return { kind: 'return', line, value: expr };
160
+ }
161
+ // throw
162
+ if (/^throw\b/.test(s)) return { kind: 'throw', line, value: _lowerExpr(s.replace(/^throw\s*/, '')) };
163
+ // assign: `var x = …` `Type x = …` `x = …` `x.y = …`
164
+ const m = s.match(/^(?:(?:var|[A-Za-z_][\w<>?,\s.]*)\s+)?([A-Za-z_][\w.]*?)\s*=\s*(.+)$/s);
165
+ if (m) {
166
+ const target = m[1];
167
+ const sourceText = m[2];
168
+ return { kind: 'assign', line, target, source: _lowerExpr(sourceText) };
169
+ }
170
+ // statement-form call
171
+ const cm = s.match(/^([A-Za-z_][\w.]*)\s*\((.*)\)\s*$/s);
172
+ if (cm) {
173
+ return { kind: 'call', line, callee: cm[1], args: _splitTopLevelCommas(cm[2]).map(_lowerExpr) };
174
+ }
175
+ return { kind: 'unknown', line, text: s };
176
+ }
177
+
178
+ function _extractBody(src, openBrace) {
179
+ // openBrace is the index of the '{' starting the method body.
180
+ let depth = 1;
181
+ let i = openBrace + 1;
182
+ let inStr = null;
183
+ let escape = false;
184
+ while (i < src.length && depth > 0) {
185
+ const c = src[i];
186
+ if (escape) { escape = false; i++; continue; }
187
+ if (inStr) {
188
+ if (inStr === '"' && c === '\\') { escape = true; i++; continue; }
189
+ if (c === inStr) inStr = null;
190
+ i++; continue;
191
+ }
192
+ if (c === '"' || c === "'") { inStr = c; i++; continue; }
193
+ if (c === '{') depth++;
194
+ else if (c === '}') depth--;
195
+ if (depth === 0) return { body: src.slice(openBrace + 1, i), end: i };
196
+ i++;
197
+ }
198
+ return null;
199
+ }
200
+
201
+ function _lineAt(src, idx) {
202
+ let line = 1;
203
+ for (let i = 0; i < idx && i < src.length; i++) if (src[i] === '\n') line++;
204
+ return line;
205
+ }
206
+
207
+ function _qid(file, name, line, body) {
208
+ const sha = crypto.createHash('sha256').update(body).digest('hex').slice(0, 8);
209
+ return `${file}::${name}@${line}#${sha}`;
210
+ }
211
+
212
+ export function parseCSharpFile(file, code) {
213
+ if (!file || typeof code !== 'string') return null;
214
+ const functions = [];
215
+ METHOD_RE.lastIndex = 0;
216
+ let m;
217
+ while ((m = METHOD_RE.exec(code)) !== null) {
218
+ const name = m[2];
219
+ const paramsText = m[3] || '';
220
+ const params = paramsText.split(',').map(p => {
221
+ const t = p.trim();
222
+ if (!t) return null;
223
+ // "Type name" → name. "Type<T> name" → name. "Type[] name = default" → name.
224
+ const last = t.replace(/=.*$/, '').trim().split(/\s+/).pop();
225
+ return last && /^[A-Za-z_][\w]*$/.test(last) ? last : null;
226
+ }).filter(Boolean);
227
+ const braceIdx = code.indexOf('{', m.index + m[0].length - 1);
228
+ if (braceIdx < 0) continue;
229
+ const extracted = _extractBody(code, braceIdx);
230
+ if (!extracted) continue;
231
+ const startLine = _lineAt(code, m.index);
232
+ const stmts = _splitStatements(extracted.body);
233
+ // Build a linear CFG: entry → s1 → s2 → ... → exit.
234
+ const nodes = {};
235
+ nodes.entry = { kind: 'entry', line: startLine, succ: [], pred: [] };
236
+ nodes.exit = { kind: 'exit', line: startLine, succ: [], pred: [] };
237
+ let prev = 'entry';
238
+ let stmtLine = startLine;
239
+ for (let idx = 0; idx < stmts.length; idx++) {
240
+ const node = _lowerStmt(stmts[idx], stmtLine);
241
+ if (!node) continue;
242
+ const id = `n${idx}`;
243
+ nodes[id] = { ...node, succ: [], pred: [prev] };
244
+ nodes[prev].succ.push(id);
245
+ prev = id;
246
+ // Approximate per-statement line advance by counting '\n' in source.
247
+ // (Cheap, good-enough for finding line attribution.)
248
+ stmtLine += (stmts[idx].match(/\n/g) || []).length + 1;
249
+ }
250
+ nodes[prev].succ.push('exit');
251
+ nodes.exit.pred.push(prev);
252
+ functions.push({
253
+ qid: _qid(file, name, startLine, extracted.body),
254
+ name, line: startLine, params, file,
255
+ cfg: { entry: 'entry', exit: 'exit', nodes },
256
+ });
257
+ METHOD_RE.lastIndex = extracted.end + 1;
258
+ }
259
+ return { file, functions, topLevel: null };
260
+ }