@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. package/CHANGELOG.md +1580 -0
  2. package/bin/.agentic-security/findings.json +1577 -0
  3. package/bin/.agentic-security/last-scan.json +1577 -0
  4. package/bin/.agentic-security/last-scan.json.sig +1 -0
  5. package/bin/.agentic-security/scan-history.json +465 -0
  6. package/bin/.agentic-security/streak.json +25 -0
  7. package/bin/agentic-security-audit.js +198 -0
  8. package/bin/agentic-security-consistency.js +80 -0
  9. package/bin/agentic-security-diff.js +136 -0
  10. package/bin/agentic-security-lsp.js +12 -0
  11. package/bin/agentic-security-mcp.js +40 -0
  12. package/bin/agentic-security-rule.js +153 -0
  13. package/bin/agentic-security.js +1683 -0
  14. package/dist/117.index.js +207 -0
  15. package/dist/178.index.js +250 -0
  16. package/dist/218.index.js +793 -0
  17. package/dist/227.index.js +192 -0
  18. package/dist/301.index.js +167 -0
  19. package/dist/384.index.js +18 -0
  20. package/dist/476.index.js +126 -0
  21. package/dist/513.index.js +373 -0
  22. package/dist/520.index.js +13 -0
  23. package/dist/601.index.js +1038 -0
  24. package/dist/634.index.js +1892 -0
  25. package/dist/637.index.js +216 -0
  26. package/dist/660.index.js +131 -0
  27. package/dist/675.index.js +451 -0
  28. package/dist/826.index.js +188 -0
  29. package/dist/830.index.js +133 -0
  30. package/dist/agentic-security.mjs +272 -0
  31. package/dist/agentic-security.mjs.sha256 +1 -0
  32. package/dist/calibration-seed.json +27 -0
  33. package/package.json +77 -0
  34. package/src/.agentic-security/findings.json +80844 -0
  35. package/src/.agentic-security/last-scan.json +80844 -0
  36. package/src/.agentic-security/last-scan.json.sig +1 -0
  37. package/src/.agentic-security/scan-history.json +8408 -0
  38. package/src/.agentic-security/streak.json +26 -0
  39. package/src/badge.js +188 -0
  40. package/src/compare.js +203 -0
  41. package/src/dataflow/.agentic-security/findings.json +3487 -0
  42. package/src/dataflow/.agentic-security/last-scan.json +3487 -0
  43. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  44. package/src/dataflow/.agentic-security/scan-history.json +735 -0
  45. package/src/dataflow/.agentic-security/streak.json +24 -0
  46. package/src/dataflow/CLAUDE.md +38 -0
  47. package/src/dataflow/access-paths.js +172 -0
  48. package/src/dataflow/async-sequencing.js +177 -0
  49. package/src/dataflow/backward.js +201 -0
  50. package/src/dataflow/catalog-expanded.js +485 -0
  51. package/src/dataflow/catalog.js +659 -0
  52. package/src/dataflow/cross-repo.js +219 -0
  53. package/src/dataflow/engine.js +588 -0
  54. package/src/dataflow/exception-flow.js +116 -0
  55. package/src/dataflow/exploit-prover.js +187 -0
  56. package/src/dataflow/higher-order.js +221 -0
  57. package/src/dataflow/ifds.js +347 -0
  58. package/src/dataflow/implicit-flow.js +129 -0
  59. package/src/dataflow/incremental.js +229 -0
  60. package/src/dataflow/index.js +181 -0
  61. package/src/dataflow/numeric-domain.js +192 -0
  62. package/src/dataflow/path-feasibility.js +114 -0
  63. package/src/dataflow/points-to.js +337 -0
  64. package/src/dataflow/polyglot.js +190 -0
  65. package/src/dataflow/proven-clean.js +159 -0
  66. package/src/dataflow/receiver-context.js +76 -0
  67. package/src/dataflow/sanitizer-proof.js +154 -0
  68. package/src/dataflow/soft-taint.js +140 -0
  69. package/src/dataflow/string-domain.js +234 -0
  70. package/src/dataflow/stub-aware-filter.js +100 -0
  71. package/src/dataflow/summaries.js +132 -0
  72. package/src/dataflow/symbolic-exec.js +238 -0
  73. package/src/dataflow/tabulation.js +135 -0
  74. package/src/engine.js +7763 -0
  75. package/src/history-scan.js +229 -0
  76. package/src/index.js +3 -0
  77. package/src/integrations/.agentic-security/findings.json +1504 -0
  78. package/src/integrations/.agentic-security/last-scan.json +1504 -0
  79. package/src/integrations/.agentic-security/scan-history.json +40 -0
  80. package/src/integrations/.agentic-security/streak.json +21 -0
  81. package/src/integrations/index.js +321 -0
  82. package/src/integrations/tickets.js +200 -0
  83. package/src/ir/.agentic-security/findings.json +3036 -0
  84. package/src/ir/.agentic-security/last-scan.json +3036 -0
  85. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  86. package/src/ir/.agentic-security/scan-history.json +364 -0
  87. package/src/ir/.agentic-security/streak.json +23 -0
  88. package/src/ir/CLAUDE.md +172 -0
  89. package/src/ir/callgraph.js +73 -0
  90. package/src/ir/class-hierarchy.js +195 -0
  91. package/src/ir/index.js +152 -0
  92. package/src/ir/parser-cs.js +260 -0
  93. package/src/ir/parser-java.js +286 -0
  94. package/src/ir/parser-js.js +413 -0
  95. package/src/ir/parser-kt.js +258 -0
  96. package/src/ir/parser-py-cst.js +136 -0
  97. package/src/ir/parser-py.helper.py +501 -0
  98. package/src/ir/parser-py.js +312 -0
  99. package/src/ir/ssa.js +315 -0
  100. package/src/ir/type-stubs.js +288 -0
  101. package/src/leaderboard.js +152 -0
  102. package/src/llm-validator/.agentic-security/findings.json +1891 -0
  103. package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
  104. package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
  105. package/src/llm-validator/.agentic-security/scan-history.json +168 -0
  106. package/src/llm-validator/.agentic-security/streak.json +20 -0
  107. package/src/llm-validator/consistency.js +141 -0
  108. package/src/llm-validator/index.js +437 -0
  109. package/src/lsp/.agentic-security/findings.json +28 -0
  110. package/src/lsp/.agentic-security/last-scan.json +28 -0
  111. package/src/lsp/.agentic-security/scan-history.json +79 -0
  112. package/src/lsp/.agentic-security/streak.json +22 -0
  113. package/src/lsp/server.js +275 -0
  114. package/src/mcp/.agentic-security/findings.json +8358 -0
  115. package/src/mcp/.agentic-security/last-scan.json +8358 -0
  116. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  117. package/src/mcp/.agentic-security/scan-history.json +1125 -0
  118. package/src/mcp/.agentic-security/streak.json +22 -0
  119. package/src/mcp/CLAUDE.md +54 -0
  120. package/src/mcp/audit.js +136 -0
  121. package/src/mcp/redact.js +75 -0
  122. package/src/mcp/server.js +158 -0
  123. package/src/mcp/stdio.js +83 -0
  124. package/src/mcp/tools.js +940 -0
  125. package/src/mcp/validate.js +49 -0
  126. package/src/personality.js +164 -0
  127. package/src/poc-video.js +239 -0
  128. package/src/posture/.agentic-security/findings.json +51239 -0
  129. package/src/posture/.agentic-security/last-scan.json +51239 -0
  130. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  131. package/src/posture/.agentic-security/scan-history.json +5557 -0
  132. package/src/posture/.agentic-security/streak.json +24 -0
  133. package/src/posture/CLAUDE.md +42 -0
  134. package/src/posture/adversarial-self-test.js +114 -0
  135. package/src/posture/adversary-agent.js +204 -0
  136. package/src/posture/agents-memory.js +135 -0
  137. package/src/posture/ai-code-fingerprint.js +171 -0
  138. package/src/posture/aibom.js +284 -0
  139. package/src/posture/api-inventory.js +96 -0
  140. package/src/posture/attack-playbooks.js +305 -0
  141. package/src/posture/auditor-agent.js +115 -0
  142. package/src/posture/auth-posture-import.js +135 -0
  143. package/src/posture/baseline-compare.js +114 -0
  144. package/src/posture/blast-radius.js +836 -0
  145. package/src/posture/bounty-prediction.js +141 -0
  146. package/src/posture/business-logic.js +239 -0
  147. package/src/posture/calibration-drift.js +93 -0
  148. package/src/posture/calibration-seed.json +27 -0
  149. package/src/posture/calibration.js +204 -0
  150. package/src/posture/clustering.js +75 -0
  151. package/src/posture/concurrency-checker.js +265 -0
  152. package/src/posture/confidence.js +65 -0
  153. package/src/posture/container-runtime.js +149 -0
  154. package/src/posture/counterfactual.js +109 -0
  155. package/src/posture/cross-lang-graphql.js +165 -0
  156. package/src/posture/cross-lang-grpc.js +166 -0
  157. package/src/posture/cross-lang-meta.js +101 -0
  158. package/src/posture/cross-lang-openapi.js +187 -0
  159. package/src/posture/cross-lang-orm.js +153 -0
  160. package/src/posture/cross-lang-queues.js +210 -0
  161. package/src/posture/crown-jewels.js +110 -0
  162. package/src/posture/custom-rules.js +361 -0
  163. package/src/posture/cve-alert-daemon.js +433 -0
  164. package/src/posture/cve-lookup.js +129 -0
  165. package/src/posture/dead-code.js +430 -0
  166. package/src/posture/defender-agent.js +158 -0
  167. package/src/posture/deploy-platform.js +204 -0
  168. package/src/posture/detector-fuzz.js +61 -0
  169. package/src/posture/deterministic.js +99 -0
  170. package/src/posture/drift.js +165 -0
  171. package/src/posture/epss.js +156 -0
  172. package/src/posture/exploitability-probability.js +212 -0
  173. package/src/posture/exploitability.js +121 -0
  174. package/src/posture/feature-flags.js +110 -0
  175. package/src/posture/finding-defaults.js +132 -0
  176. package/src/posture/fix-history.js +411 -0
  177. package/src/posture/fix-plan.js +121 -0
  178. package/src/posture/fix-verify-loop.js +157 -0
  179. package/src/posture/fix-verify.js +130 -0
  180. package/src/posture/flow-narration.js +105 -0
  181. package/src/posture/grader-calibration.js +156 -0
  182. package/src/posture/harness-discovery.js +113 -0
  183. package/src/posture/holdout-eval.js +144 -0
  184. package/src/posture/iac-reachability.js +163 -0
  185. package/src/posture/iam-policy.js +128 -0
  186. package/src/posture/integrity.js +97 -0
  187. package/src/posture/learning.js +166 -0
  188. package/src/posture/license-policy.js +109 -0
  189. package/src/posture/llm-redteam-prompts.js +418 -0
  190. package/src/posture/llm-redteam.js +303 -0
  191. package/src/posture/material-change.js +163 -0
  192. package/src/posture/mitigation-composite.js +55 -0
  193. package/src/posture/mttr.js +91 -0
  194. package/src/posture/network-policy-import.js +126 -0
  195. package/src/posture/path-predicates.js +99 -0
  196. package/src/posture/persona-prioritization.js +153 -0
  197. package/src/posture/poc-cwe-map.js +51 -0
  198. package/src/posture/poc-generator.js +500 -0
  199. package/src/posture/policy-gate.js +174 -0
  200. package/src/posture/pre-incident-archaeology.js +110 -0
  201. package/src/posture/profile.js +93 -0
  202. package/src/posture/reachability-filter.js +42 -0
  203. package/src/posture/regression-test-gen.js +200 -0
  204. package/src/posture/reverse-blast-radius.js +110 -0
  205. package/src/posture/router.js +109 -0
  206. package/src/posture/rule-overrides.js +198 -0
  207. package/src/posture/rule-pack-signing.js +209 -0
  208. package/src/posture/rule-packs.js +143 -0
  209. package/src/posture/rule-synthesis.js +108 -0
  210. package/src/posture/ruleset-version.js +71 -0
  211. package/src/posture/sbom.js +129 -0
  212. package/src/posture/schema-aware-bridge.js +207 -0
  213. package/src/posture/security-trend.js +87 -0
  214. package/src/posture/semantic-clone.js +114 -0
  215. package/src/posture/specification-mining.js +170 -0
  216. package/src/posture/stable-id.js +75 -0
  217. package/src/posture/stack-playbook.js +229 -0
  218. package/src/posture/streak.js +249 -0
  219. package/src/posture/suppressions.js +135 -0
  220. package/src/posture/telemetry-ingest.js +112 -0
  221. package/src/posture/threat-model.js +145 -0
  222. package/src/posture/three-agent-pipeline.js +74 -0
  223. package/src/posture/triage.js +146 -0
  224. package/src/posture/trust-boundary-diagram.js +115 -0
  225. package/src/posture/type-narrowing.js +129 -0
  226. package/src/posture/validator-metrics.js +179 -0
  227. package/src/posture/verifier-ephemeral.js +118 -0
  228. package/src/posture/verifier-target.js +147 -0
  229. package/src/posture/verifier.js +257 -0
  230. package/src/posture/version.js +75 -0
  231. package/src/posture/waf-ingest.js +200 -0
  232. package/src/posture/why-fired.js +141 -0
  233. package/src/pr-comment.js +172 -0
  234. package/src/pr-delta.js +198 -0
  235. package/src/report/.agentic-security/findings.json +79 -0
  236. package/src/report/.agentic-security/last-scan.json +79 -0
  237. package/src/report/.agentic-security/last-scan.json.sig +1 -0
  238. package/src/report/.agentic-security/scan-history.json +332 -0
  239. package/src/report/.agentic-security/streak.json +23 -0
  240. package/src/report/index.js +1136 -0
  241. package/src/report/mascot.js +42 -0
  242. package/src/runScan.js +141 -0
  243. package/src/sast/.agentic-security/findings.json +5051 -0
  244. package/src/sast/.agentic-security/last-scan.json +5051 -0
  245. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  246. package/src/sast/.agentic-security/scan-history.json +788 -0
  247. package/src/sast/.agentic-security/streak.json +23 -0
  248. package/src/sast/CLAUDE.md +39 -0
  249. package/src/sast/_comment-strip.js +46 -0
  250. package/src/sast/agent-tool-escalation.js +131 -0
  251. package/src/sast/auth-provider.js +171 -0
  252. package/src/sast/authz.js +236 -0
  253. package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
  254. package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
  255. package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
  256. package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
  257. package/src/sast/bench-shape/index.js +62 -0
  258. package/src/sast/claude-hook-injection.js +199 -0
  259. package/src/sast/claude-md-prompt-injection.js +170 -0
  260. package/src/sast/claude-settings.js +165 -0
  261. package/src/sast/client-side.js +149 -0
  262. package/src/sast/cpp-bench-extras.js +122 -0
  263. package/src/sast/cpp-dataflow.js +430 -0
  264. package/src/sast/cpp.js +248 -0
  265. package/src/sast/csharp.js +152 -0
  266. package/src/sast/csrf.js +82 -0
  267. package/src/sast/dart-flutter.js +173 -0
  268. package/src/sast/db-rls.js +147 -0
  269. package/src/sast/db-taint.js +215 -0
  270. package/src/sast/defi-deep.js +242 -0
  271. package/src/sast/deserialization-gadgets.js +113 -0
  272. package/src/sast/django-hardening.js +230 -0
  273. package/src/sast/env-hygiene.js +125 -0
  274. package/src/sast/fastapi-hardening.js +145 -0
  275. package/src/sast/go-extended.js +84 -0
  276. package/src/sast/host-header.js +106 -0
  277. package/src/sast/index.js +17 -0
  278. package/src/sast/java-ast-folding.js +561 -0
  279. package/src/sast/java-bench-extras.js +708 -0
  280. package/src/sast/java-collection-passthrough.js +178 -0
  281. package/src/sast/java-constant-fold.js +244 -0
  282. package/src/sast/java-deserialization.js +125 -0
  283. package/src/sast/jndi.js +104 -0
  284. package/src/sast/juliet-shape.js +324 -0
  285. package/src/sast/jwt-exp.js +104 -0
  286. package/src/sast/kotlin.js +82 -0
  287. package/src/sast/laravel-hardening.js +198 -0
  288. package/src/sast/ldap-injection.js +100 -0
  289. package/src/sast/llm-owasp.js +465 -0
  290. package/src/sast/llm-stored-prompt.js +103 -0
  291. package/src/sast/llm-trading-agent.js +161 -0
  292. package/src/sast/llm.js +308 -0
  293. package/src/sast/logic.js +140 -0
  294. package/src/sast/mass-assignment.js +101 -0
  295. package/src/sast/mcp-audit.js +242 -0
  296. package/src/sast/mobile-manifest.js +195 -0
  297. package/src/sast/model-load.js +164 -0
  298. package/src/sast/mutation-xss.js +87 -0
  299. package/src/sast/nosql-injection.js +82 -0
  300. package/src/sast/open-redirect.js +119 -0
  301. package/src/sast/php.js +91 -0
  302. package/src/sast/pipeline.js +122 -0
  303. package/src/sast/primary-cwe-java.js +155 -0
  304. package/src/sast/prompt-firewall.js +151 -0
  305. package/src/sast/prompt-template.js +157 -0
  306. package/src/sast/prototype-pollution.js +112 -0
  307. package/src/sast/python-sinks.js +195 -0
  308. package/src/sast/quarkus-hardening.js +102 -0
  309. package/src/sast/rag-poisoning.js +118 -0
  310. package/src/sast/rate-limit.js +128 -0
  311. package/src/sast/response-splitting.js +138 -0
  312. package/src/sast/ruby.js +108 -0
  313. package/src/sast/rust.js +105 -0
  314. package/src/sast/solidity.js +167 -0
  315. package/src/sast/springboot-hardening.js +186 -0
  316. package/src/sast/ssrf-cloud-metadata.js +80 -0
  317. package/src/sast/ssti.js +116 -0
  318. package/src/sast/swift.js +162 -0
  319. package/src/sast/toctou.js +95 -0
  320. package/src/sast/webhook.js +101 -0
  321. package/src/sast/xpath-injection.js +51 -0
  322. package/src/sast/xxe.js +140 -0
  323. package/src/sast/zip-slip.js +200 -0
  324. package/src/sca/base-images.json +45 -0
  325. package/src/sca/container.js +107 -0
  326. package/src/sca/dep-confusion.js +134 -0
  327. package/src/sca/index.js +6 -0
  328. package/src/sca/popular-packages.json +41 -0
  329. package/src/sca/sarif-ingest.js +187 -0
  330. package/src/sca/vuln-function-hints.json +89 -0
  331. package/src/secrets/index.js +4 -0
@@ -0,0 +1,136 @@
1
+ // AST-backed Python parser — drop-in replacement for parser-py.js.
2
+ //
3
+ // Shells to `scanner/src/ir/parser-py.helper.py` which uses Python's stdlib
4
+ // `ast` module (zero external dependencies, ships with Python 3.8+) to
5
+ // produce the same IR shape parser-py.js emits, but computed from a real
6
+ // parser rather than a regex-balanced indentation walker.
7
+ //
8
+ // What this fixes (gaps in the regex parser, by its own admission):
9
+ // - Comprehensions, decorators, match statements, async/await, lambda
10
+ // bodies — all dropped by the regex parser; the AST parser preserves
11
+ // the function records even when the body has constructs we don't
12
+ // fully lower yet.
13
+ // - `def f(x=Foo(1, 2))` and `db.execute(sanitize(x))` — nested parens
14
+ // that the regex parser's call regex rejected.
15
+ // - Walrus `:=`, type hints (`def f(x: List[int]) -> Dict`), PEP-695
16
+ // generics — recognized cleanly by the real parser.
17
+ //
18
+ // Cost / fallback:
19
+ // - One python3 subprocess per `runScan` (batched: ALL .py files sent in
20
+ // one stdin payload). Not one process per file.
21
+ // - When python3 isn't on PATH, or is too old (< 3.8), or the helper
22
+ // fails — caller falls back to the regex parser (parser-py.js).
23
+ // - Capability probe is cached for the process; we don't re-spawn
24
+ // python3 every scan.
25
+ //
26
+ // Toggle:
27
+ // AGENTIC_SECURITY_PY_PARSER=cst → force this path (error if unavailable)
28
+ // AGENTIC_SECURITY_PY_PARSER=regex → force the legacy regex parser
29
+ // AGENTIC_SECURITY_PY_PARSER=auto → try CST, fall back silently (default)
30
+
31
+ import * as cp from 'node:child_process';
32
+ import * as path from 'node:path';
33
+ import * as fs from 'node:fs';
34
+ import { fileURLToPath } from 'node:url';
35
+
36
+ const HERE = path.dirname(fileURLToPath(import.meta.url));
37
+ const HELPER_PATH = path.join(HERE, 'parser-py.helper.py');
38
+
39
+ // Capability probe — cached per-process. Returns:
40
+ // { ok: true, python: '/usr/bin/python3', version: '3.12.2' } on success
41
+ // { ok: false, reason: '...' } on failure
42
+ let _capability = null;
43
+
44
+ export function probePythonAvailable() {
45
+ if (_capability) return _capability;
46
+ // Try the canonical names in order. macOS / most Linux have python3;
47
+ // some Linuxes only have python. We don't accept python2 (no f-strings).
48
+ for (const bin of ['python3', 'python']) {
49
+ let r;
50
+ try {
51
+ r = cp.spawnSync(bin, ['--version'], { encoding: 'utf8', timeout: 1500 });
52
+ } catch { continue; }
53
+ if (r.status !== 0) continue;
54
+ // Output format: "Python 3.12.2" (or 2.x — reject those).
55
+ const m = /Python\s+(\d+)\.(\d+)\.(\d+)/.exec(r.stdout || r.stderr || '');
56
+ if (!m) continue;
57
+ const major = parseInt(m[1], 10);
58
+ const minor = parseInt(m[2], 10);
59
+ if (major < 3 || (major === 3 && minor < 8)) continue;
60
+ _capability = { ok: true, python: bin, version: `${m[1]}.${m[2]}.${m[3]}` };
61
+ return _capability;
62
+ }
63
+ _capability = { ok: false, reason: 'no-python3-on-path' };
64
+ return _capability;
65
+ }
66
+
67
+ // Single-file shim that matches parser-py.js's signature exactly.
68
+ //
69
+ // Internally we DON'T spawn a subprocess per file — that would be slow.
70
+ // Callers should use parsePythonFilesBatch() to amortize the spawn cost.
71
+ // This single-file form is kept for the test harness and for any caller
72
+ // that passes one file at a time.
73
+ export function parsePythonFile(file, raw) {
74
+ if (!file || !raw || typeof raw !== 'string') return null;
75
+ if (!/\.py$/i.test(file)) return null;
76
+ if (raw.length > 1_000_000) return null;
77
+ const cap = probePythonAvailable();
78
+ if (!cap.ok) return null;
79
+ const out = parsePythonFilesBatch([{ file, content: raw }]);
80
+ if (!out || !out.length) return null;
81
+ return out[0];
82
+ }
83
+
84
+ // Batch entry point. Pass [{file, content}, ...]; receive [{file, functions[], topLevel}, ...].
85
+ // Returns null on capability / subprocess failure — caller is expected to
86
+ // fall back to the regex parser.
87
+ export function parsePythonFilesBatch(entries) {
88
+ if (!Array.isArray(entries) || entries.length === 0) return [];
89
+ const cap = probePythonAvailable();
90
+ if (!cap.ok) return null;
91
+ if (!fs.existsSync(HELPER_PATH)) return null;
92
+ const filtered = entries.filter(e =>
93
+ e && typeof e.file === 'string' && /\.py$/i.test(e.file) &&
94
+ typeof e.content === 'string' && e.content.length <= 1_000_000
95
+ );
96
+ if (filtered.length === 0) return [];
97
+ let payload;
98
+ try { payload = JSON.stringify(filtered); }
99
+ catch { return null; }
100
+ let r;
101
+ try {
102
+ r = cp.spawnSync(cap.python, [HELPER_PATH], {
103
+ input: payload,
104
+ encoding: 'utf8',
105
+ // 10 s for a whole batch. The helper itself processes files in a
106
+ // simple linear loop; on a 100-file repo a single-digit-second
107
+ // budget is plenty. If a customer hits the timeout, the regex
108
+ // parser fallback catches them.
109
+ timeout: 10_000,
110
+ maxBuffer: 64 * 1024 * 1024,
111
+ });
112
+ } catch (e) {
113
+ if (process.env.AGENTIC_SECURITY_PY_PARSER_DEBUG === '1') {
114
+ process.stderr.write(`parser-py-cst: spawn failed — ${e.message}\n`);
115
+ }
116
+ return null;
117
+ }
118
+ if (r.status !== 0 || !r.stdout) {
119
+ if (process.env.AGENTIC_SECURITY_PY_PARSER_DEBUG === '1') {
120
+ process.stderr.write(`parser-py-cst: helper exit=${r.status} stderr=${r.stderr || ''}\n`);
121
+ }
122
+ return null;
123
+ }
124
+ let out;
125
+ try { out = JSON.parse(r.stdout); }
126
+ catch (e) {
127
+ if (process.env.AGENTIC_SECURITY_PY_PARSER_DEBUG === '1') {
128
+ process.stderr.write(`parser-py-cst: helper output not JSON — ${e.message}\n`);
129
+ }
130
+ return null;
131
+ }
132
+ return out;
133
+ }
134
+
135
+ // Reset the cache — for tests.
136
+ export function _resetCapabilityCacheForTests() { _capability = null; }
@@ -0,0 +1,501 @@
1
+ #!/usr/bin/env python3
2
+ # Python IR helper for the agentic-security scanner.
3
+ #
4
+ # Reads a JSON list `[{"file": "...", "content": "..."}, ...]` from stdin.
5
+ # For each file, walks the Python AST (stdlib `ast`, no external deps) and
6
+ # emits the same IR shape the regex-based `parser-py.js` produces, but
7
+ # computed from a real parser. Writes a JSON array of `{file, functions[],
8
+ # topLevel}` blobs to stdout.
9
+ #
10
+ # IR shape (must mirror parser-py.js):
11
+ #
12
+ # { file, functions: [
13
+ # { qid, name, line, params, file,
14
+ # cfg: { entry: nodeId, exit: nodeId, nodes: { id: node } } }
15
+ # ], topLevel: null }
16
+ #
17
+ # node = {
18
+ # kind: 'entry' | 'exit' | 'noop' | 'loop-header' | 'assign' | 'call'
19
+ # | 'if' | 'return' | 'throw' | 'unknown',
20
+ # line, succ: [nodeId, ...], pred: [nodeId, ...],
21
+ # ...kind-specific fields
22
+ # }
23
+ #
24
+ # For assign: { target: str|None, source: expr }
25
+ # For call: { callee: str, args: [expr] }
26
+ # For if: { cond: expr }
27
+ # For return: { value: expr|None }
28
+ #
29
+ # expr = { kind: 'literal'|'ident'|'member'|'binary'|'logical'|'tpl'
30
+ # |'call'|'array'|'object'|'unknown',
31
+ # ...kind-specific fields }
32
+ #
33
+ # Constructs deliberately NOT yet lowered (emit `kind: 'unknown'`):
34
+ # - match statements (we tag the function as having one, but don't
35
+ # control-flow into it; future work).
36
+ # - walrus assignment :=
37
+ # - nested function defs inside comprehensions
38
+ # - decorators (function records keep the @-decorator names as metadata
39
+ # but the decorator expressions don't get full CFG nodes).
40
+ #
41
+ # Exit codes:
42
+ # 0 success — stdout is JSON
43
+ # 2 bad input (stdin not parseable)
44
+ # 3 no Python files in input
45
+ #
46
+ # This script is invoked by `scanner/src/ir/parser-py-cst.js`; never run
47
+ # directly by the scanner user.
48
+
49
+ import ast
50
+ import hashlib
51
+ import json
52
+ import sys
53
+ from typing import Any, Optional
54
+
55
+
56
+ # ─── ID generation ───────────────────────────────────────────────────────────
57
+
58
+ _node_id = 0
59
+
60
+
61
+ def _next_id() -> str:
62
+ global _node_id
63
+ _node_id += 1
64
+ return f"pyn{_node_id}"
65
+
66
+
67
+ def _qid(file: str, name: str, line: int) -> str:
68
+ h = hashlib.sha1(f"{file}:{name}:{line}".encode("utf-8")).hexdigest()[:8]
69
+ return f"{file}::{name}@{line}#{h}"
70
+
71
+
72
+ # ─── Expression lowering ─────────────────────────────────────────────────────
73
+
74
+
75
+ def _lower_expr(node: ast.AST) -> dict[str, Any]:
76
+ if node is None:
77
+ return {"kind": "unknown"}
78
+ if isinstance(node, ast.Constant):
79
+ v = node.value
80
+ if isinstance(v, str):
81
+ return {"kind": "literal", "value": repr(v)}
82
+ if isinstance(v, (int, float, bool)) or v is None:
83
+ return {"kind": "literal", "value": v if v is not None else "None"}
84
+ return {"kind": "literal", "value": repr(v)}
85
+ if isinstance(node, ast.Name):
86
+ return {"kind": "ident", "name": node.id}
87
+ if isinstance(node, ast.Attribute):
88
+ return {"kind": "member", "object": _lower_expr(node.value), "prop": node.attr}
89
+ if isinstance(node, ast.Subscript):
90
+ # Surface as a member-with-slice; downstream taint treats it like member access.
91
+ return {
92
+ "kind": "member",
93
+ "object": _lower_expr(node.value),
94
+ "prop": "[]",
95
+ }
96
+ if isinstance(node, ast.JoinedStr):
97
+ # f"...{expr}..." — taint flows through the interpolated parts.
98
+ parts = []
99
+ for p in node.values:
100
+ if isinstance(p, ast.FormattedValue):
101
+ parts.append(_lower_expr(p.value))
102
+ return {"kind": "tpl", "parts": parts}
103
+ if isinstance(node, ast.BinOp):
104
+ op = type(node.op).__name__
105
+ return {
106
+ "kind": "binary", "op": op,
107
+ "left": _lower_expr(node.left),
108
+ "right": _lower_expr(node.right),
109
+ }
110
+ if isinstance(node, ast.BoolOp):
111
+ # 'and' / 'or' — preserve as logical with first two values for taint analysis.
112
+ # (Multi-arg BoolOp ` a or b or c ` is left-associated into nested logical.)
113
+ kind = "logical"
114
+ op = "and" if isinstance(node.op, ast.And) else "or"
115
+ vs = node.values or []
116
+ if len(vs) == 0:
117
+ return {"kind": "unknown"}
118
+ cur = _lower_expr(vs[0])
119
+ for v in vs[1:]:
120
+ cur = {"kind": kind, "op": op, "left": cur, "right": _lower_expr(v)}
121
+ return cur
122
+ if isinstance(node, ast.Compare):
123
+ # Treat as binary on first operand pair (taint analysis doesn't need full chain).
124
+ left = _lower_expr(node.left)
125
+ right = _lower_expr(node.comparators[0]) if node.comparators else {"kind": "unknown"}
126
+ op = type(node.ops[0]).__name__ if node.ops else "Eq"
127
+ return {"kind": "binary", "op": op, "left": left, "right": right}
128
+ if isinstance(node, ast.Call):
129
+ callee = _flatten_callee(node.func)
130
+ args = [_lower_expr(a) for a in (node.args or [])]
131
+ # Keyword args lowered as positional — taint analysis treats them similarly.
132
+ for kw in (node.keywords or []):
133
+ args.append(_lower_expr(kw.value))
134
+ return {"kind": "call", "callee": callee, "args": args}
135
+ if isinstance(node, ast.List) or isinstance(node, ast.Tuple) or isinstance(node, ast.Set):
136
+ return {"kind": "array", "elements": [_lower_expr(e) for e in (node.elts or [])]}
137
+ if isinstance(node, ast.Dict):
138
+ return {
139
+ "kind": "object",
140
+ "props": [
141
+ {"value": _lower_expr(v)} for v in (node.values or [])
142
+ ],
143
+ }
144
+ if isinstance(node, ast.IfExp):
145
+ # Ternary `a if cond else b` — surface as union of both branches.
146
+ return {
147
+ "kind": "union",
148
+ "branches": [_lower_expr(node.body), _lower_expr(node.orelse)],
149
+ }
150
+ if isinstance(node, (ast.ListComp, ast.SetComp, ast.GeneratorExp)):
151
+ # Comprehension — represent as array whose element is the lowered
152
+ # elt expression. Tracks taint through `[x for x in untrusted]`.
153
+ return {
154
+ "kind": "array",
155
+ "elements": [_lower_expr(node.elt)],
156
+ }
157
+ if isinstance(node, ast.DictComp):
158
+ return {
159
+ "kind": "object",
160
+ "props": [{"value": _lower_expr(node.value)}],
161
+ }
162
+ if isinstance(node, ast.Lambda):
163
+ # Body of lambda lowered as a transparent expression; the body's
164
+ # free vars surface through the union.
165
+ return _lower_expr(node.body)
166
+ if isinstance(node, ast.Starred):
167
+ return _lower_expr(node.value)
168
+ if isinstance(node, ast.NamedExpr):
169
+ # Walrus: `(x := expr)` — flow the RHS forward.
170
+ return _lower_expr(node.value)
171
+ if isinstance(node, ast.UnaryOp):
172
+ return _lower_expr(node.operand)
173
+ if isinstance(node, ast.Await):
174
+ return _lower_expr(node.value)
175
+ if isinstance(node, ast.Yield):
176
+ return _lower_expr(node.value) if node.value else {"kind": "unknown"}
177
+ if isinstance(node, ast.YieldFrom):
178
+ return _lower_expr(node.value)
179
+ return {"kind": "unknown"}
180
+
181
+
182
+ def _flatten_callee(node: ast.AST) -> Any:
183
+ """Return a dot-joined name like 'os.path.join' for a callee, or a
184
+ structured member-access tree for harder shapes. The dataflow engine
185
+ handles both forms."""
186
+ if isinstance(node, ast.Name):
187
+ return node.id
188
+ if isinstance(node, ast.Attribute):
189
+ # Walk inward collecting names.
190
+ parts: list[str] = []
191
+ cur: Any = node
192
+ while isinstance(cur, ast.Attribute):
193
+ parts.insert(0, cur.attr)
194
+ cur = cur.value
195
+ if isinstance(cur, ast.Name):
196
+ parts.insert(0, cur.id)
197
+ return ".".join(parts)
198
+ # Mixed shape (e.g. `func()[0].attr`) — fall back to ident name.
199
+ return parts[-1] if parts else None
200
+ if isinstance(node, ast.Call):
201
+ # Chained calls — surface the immediate callee.
202
+ return _flatten_callee(node.func)
203
+ if isinstance(node, ast.Subscript):
204
+ return _flatten_callee(node.value)
205
+ return None
206
+
207
+
208
+ def _assign_target(node: ast.AST) -> Optional[str]:
209
+ """Return a single identifier or dotted-path string for an assignment target,
210
+ or None for destructuring shapes we don't model."""
211
+ if isinstance(node, ast.Name):
212
+ return node.id
213
+ if isinstance(node, ast.Attribute):
214
+ parts: list[str] = []
215
+ cur: Any = node
216
+ while isinstance(cur, ast.Attribute):
217
+ parts.insert(0, cur.attr)
218
+ cur = cur.value
219
+ if isinstance(cur, ast.Name):
220
+ parts.insert(0, cur.id)
221
+ return ".".join(parts)
222
+ # ast.Tuple, ast.List, ast.Starred — destructuring, not yet modeled.
223
+ return None
224
+
225
+
226
+ # ─── CFG construction ────────────────────────────────────────────────────────
227
+
228
+
229
+ class CfgBuilder:
230
+ """Walks a function body and emits a CFG matching the regex parser's shape."""
231
+
232
+ def __init__(self, fn_name: str) -> None:
233
+ self.fn_name = fn_name
234
+ self.nodes: dict[str, dict[str, Any]] = {}
235
+ self.entry = self._add({"kind": "entry", "line": 0})
236
+ self.exit = self._add({"kind": "exit", "line": 0})
237
+
238
+ def _add(self, node: dict[str, Any]) -> str:
239
+ nid = _next_id()
240
+ node.setdefault("succ", [])
241
+ node.setdefault("pred", [])
242
+ self.nodes[nid] = node
243
+ return nid
244
+
245
+ def _link(self, src_id: str, dst_id: str) -> None:
246
+ sn = self.nodes[src_id]
247
+ dn = self.nodes[dst_id]
248
+ if dst_id not in sn["succ"]:
249
+ sn["succ"].append(dst_id)
250
+ if src_id not in dn["pred"]:
251
+ dn["pred"].append(src_id)
252
+
253
+ def lower(self, body: list[ast.stmt]) -> None:
254
+ tail = self.entry
255
+ tail = self._lower_block(body, tail)
256
+ self._link(tail, self.exit)
257
+
258
+ def _lower_block(self, body: list[ast.stmt], prev: str) -> str:
259
+ """Lower a sequential list of statements; return the tail node id."""
260
+ for stmt in body:
261
+ prev = self._lower_stmt(stmt, prev)
262
+ return prev
263
+
264
+ def _lower_stmt(self, stmt: ast.stmt, prev: str) -> str:
265
+ line = getattr(stmt, "lineno", 0) or 0
266
+ if isinstance(stmt, ast.Expr):
267
+ # Bare expression — useful when it's a call (decorator pattern,
268
+ # dispatch shape). For everything else, noop.
269
+ if isinstance(stmt.value, ast.Call):
270
+ cur = self._add({
271
+ "kind": "call",
272
+ "callee": _flatten_callee(stmt.value.func),
273
+ "args": [_lower_expr(a) for a in (stmt.value.args or [])]
274
+ + [_lower_expr(kw.value) for kw in (stmt.value.keywords or [])],
275
+ "line": line,
276
+ })
277
+ else:
278
+ cur = self._add({"kind": "noop", "line": line})
279
+ self._link(prev, cur)
280
+ return cur
281
+ if isinstance(stmt, (ast.Assign, ast.AugAssign, ast.AnnAssign)):
282
+ # AugAssign: x += y → assign x = x + y
283
+ # AnnAssign: x: int = y → assign x = y (or noop if no value)
284
+ if isinstance(stmt, ast.AugAssign):
285
+ tgt = _assign_target(stmt.target)
286
+ src = {
287
+ "kind": "binary",
288
+ "op": type(stmt.op).__name__,
289
+ "left": {"kind": "ident", "name": tgt or "?"},
290
+ "right": _lower_expr(stmt.value),
291
+ }
292
+ elif isinstance(stmt, ast.AnnAssign):
293
+ tgt = _assign_target(stmt.target)
294
+ if stmt.value is None:
295
+ cur = self._add({"kind": "noop", "line": line})
296
+ self._link(prev, cur)
297
+ return cur
298
+ src = _lower_expr(stmt.value)
299
+ else:
300
+ # ast.Assign: targets may be multi (a = b = c). We use the first.
301
+ tgt = _assign_target(stmt.targets[0]) if stmt.targets else None
302
+ src = _lower_expr(stmt.value)
303
+ cur = self._add({"kind": "assign", "target": tgt, "source": src, "line": line})
304
+ self._link(prev, cur)
305
+ return cur
306
+ if isinstance(stmt, ast.If):
307
+ if_node = self._add({
308
+ "kind": "if",
309
+ "cond": _lower_expr(stmt.test),
310
+ "line": line,
311
+ })
312
+ self._link(prev, if_node)
313
+ t_tail = self._lower_block(stmt.body, if_node)
314
+ join = self._add({"kind": "noop", "line": line})
315
+ self._link(t_tail, join)
316
+ if stmt.orelse:
317
+ f_tail = self._lower_block(stmt.orelse, if_node)
318
+ self._link(f_tail, join)
319
+ else:
320
+ self._link(if_node, join)
321
+ return join
322
+ if isinstance(stmt, (ast.For, ast.AsyncFor)):
323
+ # for v in iter: body → assign v from iter; loop-header; body
324
+ lh = self._add({"kind": "loop-header", "line": line})
325
+ self._link(prev, lh)
326
+ # Synthesize an assign for the loop variable so taint from the iter
327
+ # propagates to `v`. Only when target is a plain name.
328
+ tgt = _assign_target(stmt.target)
329
+ if tgt is not None:
330
+ a = self._add({
331
+ "kind": "assign", "target": tgt,
332
+ "source": _lower_expr(stmt.iter), "line": line,
333
+ })
334
+ self._link(lh, a)
335
+ body_prev = a
336
+ else:
337
+ body_prev = lh
338
+ body_tail = self._lower_block(stmt.body, body_prev)
339
+ self._link(body_tail, lh)
340
+ # Loop exit edge (taken when condition false) goes to a join.
341
+ join = self._add({"kind": "noop", "line": line})
342
+ self._link(lh, join)
343
+ return join
344
+ if isinstance(stmt, (ast.While,)):
345
+ lh = self._add({"kind": "loop-header", "line": line})
346
+ self._link(prev, lh)
347
+ body_tail = self._lower_block(stmt.body, lh)
348
+ self._link(body_tail, lh)
349
+ join = self._add({"kind": "noop", "line": line})
350
+ self._link(lh, join)
351
+ return join
352
+ if isinstance(stmt, ast.Return):
353
+ cur = self._add({
354
+ "kind": "return",
355
+ "value": _lower_expr(stmt.value) if stmt.value else None,
356
+ "line": line,
357
+ })
358
+ self._link(prev, cur)
359
+ # Return implicitly flows to exit. We don't link here; the outer
360
+ # `lower` method links the final tail to exit, and the engine
361
+ # treats return as terminal.
362
+ return cur
363
+ if isinstance(stmt, ast.Raise):
364
+ cur = self._add({"kind": "throw", "line": line})
365
+ self._link(prev, cur)
366
+ return cur
367
+ if isinstance(stmt, ast.Try):
368
+ # try body + except handlers + finally. Treat the try body as a
369
+ # plain sequential block; each except handler is an alternate
370
+ # branch from the try head; finally runs after the union. This
371
+ # is a conservative over-approximation that doesn't add false
372
+ # taint but does see every reachable path.
373
+ try_head = self._add({"kind": "noop", "line": line})
374
+ self._link(prev, try_head)
375
+ body_tail = self._lower_block(stmt.body, try_head)
376
+ join = self._add({"kind": "noop", "line": line})
377
+ self._link(body_tail, join)
378
+ for handler in stmt.handlers:
379
+ h_tail = self._lower_block(handler.body, try_head)
380
+ self._link(h_tail, join)
381
+ if stmt.orelse:
382
+ else_tail = self._lower_block(stmt.orelse, body_tail)
383
+ self._link(else_tail, join)
384
+ if stmt.finalbody:
385
+ fin_tail = self._lower_block(stmt.finalbody, join)
386
+ return fin_tail
387
+ return join
388
+ if isinstance(stmt, (ast.With, ast.AsyncWith)):
389
+ # Treat `with X() as v: body` as `v = X()`-style assign followed by body.
390
+ tail = prev
391
+ for item in stmt.items:
392
+ tgt = _assign_target(item.optional_vars) if item.optional_vars else None
393
+ if tgt is not None:
394
+ a = self._add({
395
+ "kind": "assign", "target": tgt,
396
+ "source": _lower_expr(item.context_expr), "line": line,
397
+ })
398
+ self._link(tail, a)
399
+ tail = a
400
+ return self._lower_block(stmt.body, tail)
401
+ if isinstance(stmt, ast.FunctionDef) or isinstance(stmt, ast.AsyncFunctionDef):
402
+ # Nested function definition — emit a noop placeholder. The outer
403
+ # extractor handles nested functions separately via ast.walk().
404
+ cur = self._add({"kind": "noop", "line": line})
405
+ self._link(prev, cur)
406
+ return cur
407
+ if isinstance(stmt, ast.ClassDef):
408
+ cur = self._add({"kind": "noop", "line": line})
409
+ self._link(prev, cur)
410
+ return cur
411
+ if isinstance(stmt, ast.Match):
412
+ # Match statement — emit a noop for now. Future work: lower each
413
+ # case as an alternate branch with its pattern guard.
414
+ cur = self._add({"kind": "noop", "line": line, "_unmodeled": "match"})
415
+ self._link(prev, cur)
416
+ return cur
417
+ # ast.Pass, ast.Break, ast.Continue, ast.Import, ast.ImportFrom,
418
+ # ast.Global, ast.Nonlocal, ast.Delete — all noops for taint.
419
+ cur = self._add({"kind": "noop", "line": line})
420
+ self._link(prev, cur)
421
+ return cur
422
+
423
+
424
+ # ─── Function extraction ─────────────────────────────────────────────────────
425
+
426
+
427
+ def _extract_functions(tree: ast.Module, file: str) -> list[dict[str, Any]]:
428
+ """Walk the module, capturing every function (top-level or nested) into
429
+ a flat list. Each function's body is lowered into a CFG."""
430
+ fns: list[dict[str, Any]] = []
431
+ for node in ast.walk(tree):
432
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
433
+ continue
434
+ params = [a.arg for a in node.args.args]
435
+ if node.args.vararg:
436
+ params.append(node.args.vararg.arg)
437
+ if node.args.kwarg:
438
+ params.append(node.args.kwarg.arg)
439
+ for a in node.args.kwonlyargs:
440
+ params.append(a.arg)
441
+ line = node.lineno or 0
442
+ builder = CfgBuilder(node.name)
443
+ builder.lower(node.body)
444
+ fns.append({
445
+ "qid": _qid(file, node.name, line),
446
+ "name": node.name,
447
+ "line": line,
448
+ "params": params,
449
+ "file": file,
450
+ "cfg": {
451
+ "entry": builder.entry,
452
+ "exit": builder.exit,
453
+ "nodes": builder.nodes,
454
+ },
455
+ })
456
+ return fns
457
+
458
+
459
+ # ─── Driver ──────────────────────────────────────────────────────────────────
460
+
461
+
462
+ def _process_one(file: str, content: str) -> dict[str, Any]:
463
+ if not isinstance(content, str):
464
+ return {"file": file, "functions": [], "topLevel": None, "_error": "content-not-string"}
465
+ if len(content) > 1_000_000:
466
+ return {"file": file, "functions": [], "topLevel": None, "_error": "file-too-large"}
467
+ try:
468
+ tree = ast.parse(content, filename=file)
469
+ except SyntaxError as e:
470
+ return {"file": file, "functions": [], "topLevel": None, "_error": f"syntax-error: {e.msg} (line {e.lineno})"}
471
+ fns = _extract_functions(tree, file)
472
+ return {"file": file, "functions": fns, "topLevel": None}
473
+
474
+
475
+ def main() -> int:
476
+ try:
477
+ payload = json.load(sys.stdin)
478
+ except Exception as e:
479
+ sys.stderr.write(f"parser-py.helper: bad stdin JSON: {e}\n")
480
+ return 2
481
+ if not isinstance(payload, list):
482
+ sys.stderr.write("parser-py.helper: stdin must be a JSON array\n")
483
+ return 2
484
+ out: list[dict[str, Any]] = []
485
+ for entry in payload:
486
+ if not isinstance(entry, dict):
487
+ continue
488
+ file = entry.get("file") or ""
489
+ if not file.endswith(".py"):
490
+ continue
491
+ content = entry.get("content") or ""
492
+ out.append(_process_one(file, content))
493
+ if not out:
494
+ sys.stderr.write("parser-py.helper: no .py files in input\n")
495
+ return 3
496
+ json.dump(out, sys.stdout)
497
+ return 0
498
+
499
+
500
+ if __name__ == "__main__":
501
+ sys.exit(main())