@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. package/CHANGELOG.md +1580 -0
  2. package/bin/.agentic-security/findings.json +1577 -0
  3. package/bin/.agentic-security/last-scan.json +1577 -0
  4. package/bin/.agentic-security/last-scan.json.sig +1 -0
  5. package/bin/.agentic-security/scan-history.json +465 -0
  6. package/bin/.agentic-security/streak.json +25 -0
  7. package/bin/agentic-security-audit.js +198 -0
  8. package/bin/agentic-security-consistency.js +80 -0
  9. package/bin/agentic-security-diff.js +136 -0
  10. package/bin/agentic-security-lsp.js +12 -0
  11. package/bin/agentic-security-mcp.js +40 -0
  12. package/bin/agentic-security-rule.js +153 -0
  13. package/bin/agentic-security.js +1683 -0
  14. package/dist/117.index.js +207 -0
  15. package/dist/178.index.js +250 -0
  16. package/dist/218.index.js +793 -0
  17. package/dist/227.index.js +192 -0
  18. package/dist/301.index.js +167 -0
  19. package/dist/384.index.js +18 -0
  20. package/dist/476.index.js +126 -0
  21. package/dist/513.index.js +373 -0
  22. package/dist/520.index.js +13 -0
  23. package/dist/601.index.js +1038 -0
  24. package/dist/634.index.js +1892 -0
  25. package/dist/637.index.js +216 -0
  26. package/dist/660.index.js +131 -0
  27. package/dist/675.index.js +451 -0
  28. package/dist/826.index.js +188 -0
  29. package/dist/830.index.js +133 -0
  30. package/dist/agentic-security.mjs +272 -0
  31. package/dist/agentic-security.mjs.sha256 +1 -0
  32. package/dist/calibration-seed.json +27 -0
  33. package/package.json +77 -0
  34. package/src/.agentic-security/findings.json +80844 -0
  35. package/src/.agentic-security/last-scan.json +80844 -0
  36. package/src/.agentic-security/last-scan.json.sig +1 -0
  37. package/src/.agentic-security/scan-history.json +8408 -0
  38. package/src/.agentic-security/streak.json +26 -0
  39. package/src/badge.js +188 -0
  40. package/src/compare.js +203 -0
  41. package/src/dataflow/.agentic-security/findings.json +3487 -0
  42. package/src/dataflow/.agentic-security/last-scan.json +3487 -0
  43. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  44. package/src/dataflow/.agentic-security/scan-history.json +735 -0
  45. package/src/dataflow/.agentic-security/streak.json +24 -0
  46. package/src/dataflow/CLAUDE.md +38 -0
  47. package/src/dataflow/access-paths.js +172 -0
  48. package/src/dataflow/async-sequencing.js +177 -0
  49. package/src/dataflow/backward.js +201 -0
  50. package/src/dataflow/catalog-expanded.js +485 -0
  51. package/src/dataflow/catalog.js +659 -0
  52. package/src/dataflow/cross-repo.js +219 -0
  53. package/src/dataflow/engine.js +588 -0
  54. package/src/dataflow/exception-flow.js +116 -0
  55. package/src/dataflow/exploit-prover.js +187 -0
  56. package/src/dataflow/higher-order.js +221 -0
  57. package/src/dataflow/ifds.js +347 -0
  58. package/src/dataflow/implicit-flow.js +129 -0
  59. package/src/dataflow/incremental.js +229 -0
  60. package/src/dataflow/index.js +181 -0
  61. package/src/dataflow/numeric-domain.js +192 -0
  62. package/src/dataflow/path-feasibility.js +114 -0
  63. package/src/dataflow/points-to.js +337 -0
  64. package/src/dataflow/polyglot.js +190 -0
  65. package/src/dataflow/proven-clean.js +159 -0
  66. package/src/dataflow/receiver-context.js +76 -0
  67. package/src/dataflow/sanitizer-proof.js +154 -0
  68. package/src/dataflow/soft-taint.js +140 -0
  69. package/src/dataflow/string-domain.js +234 -0
  70. package/src/dataflow/stub-aware-filter.js +100 -0
  71. package/src/dataflow/summaries.js +132 -0
  72. package/src/dataflow/symbolic-exec.js +238 -0
  73. package/src/dataflow/tabulation.js +135 -0
  74. package/src/engine.js +7763 -0
  75. package/src/history-scan.js +229 -0
  76. package/src/index.js +3 -0
  77. package/src/integrations/.agentic-security/findings.json +1504 -0
  78. package/src/integrations/.agentic-security/last-scan.json +1504 -0
  79. package/src/integrations/.agentic-security/scan-history.json +40 -0
  80. package/src/integrations/.agentic-security/streak.json +21 -0
  81. package/src/integrations/index.js +321 -0
  82. package/src/integrations/tickets.js +200 -0
  83. package/src/ir/.agentic-security/findings.json +3036 -0
  84. package/src/ir/.agentic-security/last-scan.json +3036 -0
  85. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  86. package/src/ir/.agentic-security/scan-history.json +364 -0
  87. package/src/ir/.agentic-security/streak.json +23 -0
  88. package/src/ir/CLAUDE.md +172 -0
  89. package/src/ir/callgraph.js +73 -0
  90. package/src/ir/class-hierarchy.js +195 -0
  91. package/src/ir/index.js +152 -0
  92. package/src/ir/parser-cs.js +260 -0
  93. package/src/ir/parser-java.js +286 -0
  94. package/src/ir/parser-js.js +413 -0
  95. package/src/ir/parser-kt.js +258 -0
  96. package/src/ir/parser-py-cst.js +136 -0
  97. package/src/ir/parser-py.helper.py +501 -0
  98. package/src/ir/parser-py.js +312 -0
  99. package/src/ir/ssa.js +315 -0
  100. package/src/ir/type-stubs.js +288 -0
  101. package/src/leaderboard.js +152 -0
  102. package/src/llm-validator/.agentic-security/findings.json +1891 -0
  103. package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
  104. package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
  105. package/src/llm-validator/.agentic-security/scan-history.json +168 -0
  106. package/src/llm-validator/.agentic-security/streak.json +20 -0
  107. package/src/llm-validator/consistency.js +141 -0
  108. package/src/llm-validator/index.js +437 -0
  109. package/src/lsp/.agentic-security/findings.json +28 -0
  110. package/src/lsp/.agentic-security/last-scan.json +28 -0
  111. package/src/lsp/.agentic-security/scan-history.json +79 -0
  112. package/src/lsp/.agentic-security/streak.json +22 -0
  113. package/src/lsp/server.js +275 -0
  114. package/src/mcp/.agentic-security/findings.json +8358 -0
  115. package/src/mcp/.agentic-security/last-scan.json +8358 -0
  116. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  117. package/src/mcp/.agentic-security/scan-history.json +1125 -0
  118. package/src/mcp/.agentic-security/streak.json +22 -0
  119. package/src/mcp/CLAUDE.md +54 -0
  120. package/src/mcp/audit.js +136 -0
  121. package/src/mcp/redact.js +75 -0
  122. package/src/mcp/server.js +158 -0
  123. package/src/mcp/stdio.js +83 -0
  124. package/src/mcp/tools.js +940 -0
  125. package/src/mcp/validate.js +49 -0
  126. package/src/personality.js +164 -0
  127. package/src/poc-video.js +239 -0
  128. package/src/posture/.agentic-security/findings.json +51239 -0
  129. package/src/posture/.agentic-security/last-scan.json +51239 -0
  130. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  131. package/src/posture/.agentic-security/scan-history.json +5557 -0
  132. package/src/posture/.agentic-security/streak.json +24 -0
  133. package/src/posture/CLAUDE.md +42 -0
  134. package/src/posture/adversarial-self-test.js +114 -0
  135. package/src/posture/adversary-agent.js +204 -0
  136. package/src/posture/agents-memory.js +135 -0
  137. package/src/posture/ai-code-fingerprint.js +171 -0
  138. package/src/posture/aibom.js +284 -0
  139. package/src/posture/api-inventory.js +96 -0
  140. package/src/posture/attack-playbooks.js +305 -0
  141. package/src/posture/auditor-agent.js +115 -0
  142. package/src/posture/auth-posture-import.js +135 -0
  143. package/src/posture/baseline-compare.js +114 -0
  144. package/src/posture/blast-radius.js +836 -0
  145. package/src/posture/bounty-prediction.js +141 -0
  146. package/src/posture/business-logic.js +239 -0
  147. package/src/posture/calibration-drift.js +93 -0
  148. package/src/posture/calibration-seed.json +27 -0
  149. package/src/posture/calibration.js +204 -0
  150. package/src/posture/clustering.js +75 -0
  151. package/src/posture/concurrency-checker.js +265 -0
  152. package/src/posture/confidence.js +65 -0
  153. package/src/posture/container-runtime.js +149 -0
  154. package/src/posture/counterfactual.js +109 -0
  155. package/src/posture/cross-lang-graphql.js +165 -0
  156. package/src/posture/cross-lang-grpc.js +166 -0
  157. package/src/posture/cross-lang-meta.js +101 -0
  158. package/src/posture/cross-lang-openapi.js +187 -0
  159. package/src/posture/cross-lang-orm.js +153 -0
  160. package/src/posture/cross-lang-queues.js +210 -0
  161. package/src/posture/crown-jewels.js +110 -0
  162. package/src/posture/custom-rules.js +361 -0
  163. package/src/posture/cve-alert-daemon.js +433 -0
  164. package/src/posture/cve-lookup.js +129 -0
  165. package/src/posture/dead-code.js +430 -0
  166. package/src/posture/defender-agent.js +158 -0
  167. package/src/posture/deploy-platform.js +204 -0
  168. package/src/posture/detector-fuzz.js +61 -0
  169. package/src/posture/deterministic.js +99 -0
  170. package/src/posture/drift.js +165 -0
  171. package/src/posture/epss.js +156 -0
  172. package/src/posture/exploitability-probability.js +212 -0
  173. package/src/posture/exploitability.js +121 -0
  174. package/src/posture/feature-flags.js +110 -0
  175. package/src/posture/finding-defaults.js +132 -0
  176. package/src/posture/fix-history.js +411 -0
  177. package/src/posture/fix-plan.js +121 -0
  178. package/src/posture/fix-verify-loop.js +157 -0
  179. package/src/posture/fix-verify.js +130 -0
  180. package/src/posture/flow-narration.js +105 -0
  181. package/src/posture/grader-calibration.js +156 -0
  182. package/src/posture/harness-discovery.js +113 -0
  183. package/src/posture/holdout-eval.js +144 -0
  184. package/src/posture/iac-reachability.js +163 -0
  185. package/src/posture/iam-policy.js +128 -0
  186. package/src/posture/integrity.js +97 -0
  187. package/src/posture/learning.js +166 -0
  188. package/src/posture/license-policy.js +109 -0
  189. package/src/posture/llm-redteam-prompts.js +418 -0
  190. package/src/posture/llm-redteam.js +303 -0
  191. package/src/posture/material-change.js +163 -0
  192. package/src/posture/mitigation-composite.js +55 -0
  193. package/src/posture/mttr.js +91 -0
  194. package/src/posture/network-policy-import.js +126 -0
  195. package/src/posture/path-predicates.js +99 -0
  196. package/src/posture/persona-prioritization.js +153 -0
  197. package/src/posture/poc-cwe-map.js +51 -0
  198. package/src/posture/poc-generator.js +500 -0
  199. package/src/posture/policy-gate.js +174 -0
  200. package/src/posture/pre-incident-archaeology.js +110 -0
  201. package/src/posture/profile.js +93 -0
  202. package/src/posture/reachability-filter.js +42 -0
  203. package/src/posture/regression-test-gen.js +200 -0
  204. package/src/posture/reverse-blast-radius.js +110 -0
  205. package/src/posture/router.js +109 -0
  206. package/src/posture/rule-overrides.js +198 -0
  207. package/src/posture/rule-pack-signing.js +209 -0
  208. package/src/posture/rule-packs.js +143 -0
  209. package/src/posture/rule-synthesis.js +108 -0
  210. package/src/posture/ruleset-version.js +71 -0
  211. package/src/posture/sbom.js +129 -0
  212. package/src/posture/schema-aware-bridge.js +207 -0
  213. package/src/posture/security-trend.js +87 -0
  214. package/src/posture/semantic-clone.js +114 -0
  215. package/src/posture/specification-mining.js +170 -0
  216. package/src/posture/stable-id.js +75 -0
  217. package/src/posture/stack-playbook.js +229 -0
  218. package/src/posture/streak.js +249 -0
  219. package/src/posture/suppressions.js +135 -0
  220. package/src/posture/telemetry-ingest.js +112 -0
  221. package/src/posture/threat-model.js +145 -0
  222. package/src/posture/three-agent-pipeline.js +74 -0
  223. package/src/posture/triage.js +146 -0
  224. package/src/posture/trust-boundary-diagram.js +115 -0
  225. package/src/posture/type-narrowing.js +129 -0
  226. package/src/posture/validator-metrics.js +179 -0
  227. package/src/posture/verifier-ephemeral.js +118 -0
  228. package/src/posture/verifier-target.js +147 -0
  229. package/src/posture/verifier.js +257 -0
  230. package/src/posture/version.js +75 -0
  231. package/src/posture/waf-ingest.js +200 -0
  232. package/src/posture/why-fired.js +141 -0
  233. package/src/pr-comment.js +172 -0
  234. package/src/pr-delta.js +198 -0
  235. package/src/report/.agentic-security/findings.json +79 -0
  236. package/src/report/.agentic-security/last-scan.json +79 -0
  237. package/src/report/.agentic-security/last-scan.json.sig +1 -0
  238. package/src/report/.agentic-security/scan-history.json +332 -0
  239. package/src/report/.agentic-security/streak.json +23 -0
  240. package/src/report/index.js +1136 -0
  241. package/src/report/mascot.js +42 -0
  242. package/src/runScan.js +141 -0
  243. package/src/sast/.agentic-security/findings.json +5051 -0
  244. package/src/sast/.agentic-security/last-scan.json +5051 -0
  245. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  246. package/src/sast/.agentic-security/scan-history.json +788 -0
  247. package/src/sast/.agentic-security/streak.json +23 -0
  248. package/src/sast/CLAUDE.md +39 -0
  249. package/src/sast/_comment-strip.js +46 -0
  250. package/src/sast/agent-tool-escalation.js +131 -0
  251. package/src/sast/auth-provider.js +171 -0
  252. package/src/sast/authz.js +236 -0
  253. package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
  254. package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
  255. package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
  256. package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
  257. package/src/sast/bench-shape/index.js +62 -0
  258. package/src/sast/claude-hook-injection.js +199 -0
  259. package/src/sast/claude-md-prompt-injection.js +170 -0
  260. package/src/sast/claude-settings.js +165 -0
  261. package/src/sast/client-side.js +149 -0
  262. package/src/sast/cpp-bench-extras.js +122 -0
  263. package/src/sast/cpp-dataflow.js +430 -0
  264. package/src/sast/cpp.js +248 -0
  265. package/src/sast/csharp.js +152 -0
  266. package/src/sast/csrf.js +82 -0
  267. package/src/sast/dart-flutter.js +173 -0
  268. package/src/sast/db-rls.js +147 -0
  269. package/src/sast/db-taint.js +215 -0
  270. package/src/sast/defi-deep.js +242 -0
  271. package/src/sast/deserialization-gadgets.js +113 -0
  272. package/src/sast/django-hardening.js +230 -0
  273. package/src/sast/env-hygiene.js +125 -0
  274. package/src/sast/fastapi-hardening.js +145 -0
  275. package/src/sast/go-extended.js +84 -0
  276. package/src/sast/host-header.js +106 -0
  277. package/src/sast/index.js +17 -0
  278. package/src/sast/java-ast-folding.js +561 -0
  279. package/src/sast/java-bench-extras.js +708 -0
  280. package/src/sast/java-collection-passthrough.js +178 -0
  281. package/src/sast/java-constant-fold.js +244 -0
  282. package/src/sast/java-deserialization.js +125 -0
  283. package/src/sast/jndi.js +104 -0
  284. package/src/sast/juliet-shape.js +324 -0
  285. package/src/sast/jwt-exp.js +104 -0
  286. package/src/sast/kotlin.js +82 -0
  287. package/src/sast/laravel-hardening.js +198 -0
  288. package/src/sast/ldap-injection.js +100 -0
  289. package/src/sast/llm-owasp.js +465 -0
  290. package/src/sast/llm-stored-prompt.js +103 -0
  291. package/src/sast/llm-trading-agent.js +161 -0
  292. package/src/sast/llm.js +308 -0
  293. package/src/sast/logic.js +140 -0
  294. package/src/sast/mass-assignment.js +101 -0
  295. package/src/sast/mcp-audit.js +242 -0
  296. package/src/sast/mobile-manifest.js +195 -0
  297. package/src/sast/model-load.js +164 -0
  298. package/src/sast/mutation-xss.js +87 -0
  299. package/src/sast/nosql-injection.js +82 -0
  300. package/src/sast/open-redirect.js +119 -0
  301. package/src/sast/php.js +91 -0
  302. package/src/sast/pipeline.js +122 -0
  303. package/src/sast/primary-cwe-java.js +155 -0
  304. package/src/sast/prompt-firewall.js +151 -0
  305. package/src/sast/prompt-template.js +157 -0
  306. package/src/sast/prototype-pollution.js +112 -0
  307. package/src/sast/python-sinks.js +195 -0
  308. package/src/sast/quarkus-hardening.js +102 -0
  309. package/src/sast/rag-poisoning.js +118 -0
  310. package/src/sast/rate-limit.js +128 -0
  311. package/src/sast/response-splitting.js +138 -0
  312. package/src/sast/ruby.js +108 -0
  313. package/src/sast/rust.js +105 -0
  314. package/src/sast/solidity.js +167 -0
  315. package/src/sast/springboot-hardening.js +186 -0
  316. package/src/sast/ssrf-cloud-metadata.js +80 -0
  317. package/src/sast/ssti.js +116 -0
  318. package/src/sast/swift.js +162 -0
  319. package/src/sast/toctou.js +95 -0
  320. package/src/sast/webhook.js +101 -0
  321. package/src/sast/xpath-injection.js +51 -0
  322. package/src/sast/xxe.js +140 -0
  323. package/src/sast/zip-slip.js +200 -0
  324. package/src/sca/base-images.json +45 -0
  325. package/src/sca/container.js +107 -0
  326. package/src/sca/dep-confusion.js +134 -0
  327. package/src/sca/index.js +6 -0
  328. package/src/sca/popular-packages.json +41 -0
  329. package/src/sca/sarif-ingest.js +187 -0
  330. package/src/sca/vuln-function-hints.json +89 -0
  331. package/src/secrets/index.js +4 -0
@@ -0,0 +1,195 @@
1
+ // Python sink-side SAST (FR-PY-SAST — Phase-2 G3 blocker).
2
+ //
3
+ // The polyglot benchmark in v0.50.0 showed Python detector coverage is the
4
+ // single largest blocker behind the polyglot F1 gap (target 0.85, today 0.727).
5
+ // This module fills the most-common Python sink shapes:
6
+ //
7
+ // - SQLAlchemy text() with f-string concat → SQL injection
8
+ // - os.system / subprocess with shell=True or string concat → command injection
9
+ // - pickle.loads / yaml.load on request data → insecure deserialization
10
+ // - eval / exec on request data → code injection
11
+ // - flask.send_file / send_from_directory with user-controlled path → path traversal
12
+ // - requests with verify=False → insecure HTTPS
13
+ //
14
+ // Limits:
15
+ // - Regex-based, no Python AST today (tree-sitter integration is Phase 5).
16
+ // - "User-controlled" is shape-matched, not flow-traced (any `request.`
17
+ // reference in the same call site qualifies). This is conservative —
18
+ // we'll miss flows that route through helpers, and we'll false-positive
19
+ // when `request.` is unrelated to the sink. Calibration is the answer
20
+ // (FR-LEARN-5), not pre-filtering.
21
+
22
+ import { blankComments } from './_comment-strip.js';
23
+
24
+ const PY_EXT_RE = /\.py$/i;
25
+
26
+ function lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
27
+
28
+ // ─── SQLAlchemy text() with f-string ──────────────────────────────────────
29
+ //
30
+ // `engine.execute(text(f"SELECT ... {var}"))`
31
+ // `connection.execute(text(f"..."))`
32
+ // The dangerous shape is text() wrapping an f-string. Parameterized queries
33
+ // use `text("... :name").bindparams(name=...)` — the f-string variant
34
+ // indicates concat.
35
+
36
+ const SQLA_TEXT_FSTRING_RE = /\btext\s*\(\s*f['"][^'"]*\{[^}]+\}/g;
37
+ const SQLA_RAW_EXEC_CONCAT_RE = /\b(?:cursor|conn|connection|session)\s*\.\s*execute\s*\(\s*(?:f['"][^'"]*\{|['"][^'"]*['"]\s*[+%])/g;
38
+ // A common shape: build the SQL in a previous line as an f-string, then pass
39
+ // the variable to text() / execute(). We detect the f-string with SQL keywords
40
+ // AND a `{...}` interpolation directly. The f-string body can contain inner
41
+ // quotes (single quotes inside double-quoted f-string and vice versa) so we
42
+ // use two parallel patterns rather than a single character class that excludes
43
+ // both quote kinds.
44
+ const SQLA_FSTRING_SQL_ASSIGN_RE = /(?:f"[^"]*(?:SELECT|INSERT|UPDATE|DELETE)[^"]*\{[^}]*\}|f'[^']*(?:SELECT|INSERT|UPDATE|DELETE)[^']*\{[^}]*\})/gi;
45
+
46
+ // ─── Command injection ────────────────────────────────────────────────────
47
+ //
48
+ // `os.system(...)` with anything other than a literal
49
+ // `subprocess.run(..., shell=True)`
50
+ // `subprocess.Popen(..., shell=True)`
51
+ // `subprocess.call(..., shell=True)`
52
+
53
+ // os.system is dangerous when the argument is anything but a pure quoted
54
+ // literal. We use a negative lookahead for the "pure literal" shape:
55
+ // `os.system("literal text")` is safe; everything else gets flagged.
56
+ const PY_OS_SYSTEM_RE = /\bos\s*\.\s*system\s*\((?!\s*['"][^'"]*['"]\s*\))/g;
57
+ const PY_SUBPROCESS_SHELL_TRUE_RE = /\bsubprocess\s*\.\s*(?:run|Popen|call|check_call|check_output)\s*\([^)]*shell\s*=\s*True/g;
58
+ const PY_SHELL_EXEC_CONCAT_RE = /\bos\s*\.\s*(?:popen|exec[lv]p?)\s*\(/g;
59
+
60
+ // ─── Insecure deserialization ─────────────────────────────────────────────
61
+
62
+ const PY_PICKLE_LOADS_RE = /\bpickle\s*\.\s*loads?\s*\(/g;
63
+ const PY_YAML_UNSAFE_LOAD_RE = /\byaml\s*\.\s*(?:unsafe_load|load)\s*\((?![^)]*Loader\s*=\s*(?:yaml\.SafeLoader|SafeLoader))/g;
64
+ const PY_MARSHAL_LOADS_RE = /\bmarshal\s*\.\s*loads?\s*\(/g;
65
+
66
+ // ─── Code injection ───────────────────────────────────────────────────────
67
+
68
+ const PY_EVAL_USER_RE = /\b(?:eval|exec)\s*\(\s*[^)]*(?:request\.|flask\.request|input\s*\(|sys\.argv|os\.environ)/g;
69
+ const PY_COMPILE_USER_RE = /\bcompile\s*\([^)]*(?:request\.|input\s*\(|sys\.argv)/g;
70
+
71
+ // ─── Path traversal ───────────────────────────────────────────────────────
72
+ //
73
+ // `flask.send_file(user_path)` — known sink when path comes from request.
74
+ // `flask.send_from_directory(dir, user_filename)` — same.
75
+ // `open(user_path)` — generic file read with user input.
76
+
77
+ // send_file with anything other than a pure literal path is dangerous.
78
+ const PY_SEND_FILE_RE = /\b(?:flask\.)?send_file\s*\(\s*(?!['"][^'"]+['"]\s*\))/gi;
79
+ const PY_SEND_FROM_DIR_RE = /\b(?:flask\.)?send_from_directory\s*\([^)]*,\s*(?:request\.|[a-zA-Z_]\w*)\s*[,)]/g;
80
+ const PY_OPEN_USER_RE = /\bopen\s*\(\s*(?:request\.|f['"][^'"]*\{[^}]+\})/g;
81
+
82
+ // ─── Insecure transport ───────────────────────────────────────────────────
83
+
84
+ const PY_REQUESTS_VERIFY_FALSE_RE = /\brequests\s*\.\s*(?:get|post|put|delete|patch|head|request)\s*\([^)]*verify\s*=\s*False/g;
85
+ const PY_URLLIB_NOCHECK_RE = /\bssl\s*\.\s*_create_unverified_context\s*\(/g;
86
+
87
+ // ─── SSRF ─────────────────────────────────────────────────────────────────
88
+
89
+ const PY_REQUESTS_USER_URL_RE = /\brequests\s*\.\s*(?:get|post|put|delete|patch|head|request)\s*\(\s*(?:request\.|f['"][^'"]*\{[^}]+\})/g;
90
+ const PY_URLLIB_USER_URL_RE = /\b(?:urllib\.request\.urlopen|urlopen)\s*\(\s*(?:request\.|f['"][^'"]*\{[^}]+\})/g;
91
+
92
+ // ─── XXE ──────────────────────────────────────────────────────────────────
93
+
94
+ const PY_XML_INSECURE_RE = /\blxml\.etree\.(?:parse|fromstring)\s*\([^)]*\)(?!\s*[^.]*\bresolve_entities\s*=\s*False)/g;
95
+ const PY_XML_ETREE_USER_RE = /\bxml\.etree\.ElementTree\.(?:parse|fromstring)\s*\(\s*(?:request\.|f['"][^'"]*\{)/g;
96
+
97
+ // ─── Detector ─────────────────────────────────────────────────────────────
98
+
99
+ const RULES = [
100
+ // Each rule: { re, vuln, severity, cwe, family, parser }
101
+ { re: SQLA_TEXT_FSTRING_RE, vuln: 'SQL Injection (SQLAlchemy text() with f-string)', severity: 'critical', cwe: 'CWE-89', family: 'sql-injection' },
102
+ { re: SQLA_RAW_EXEC_CONCAT_RE, vuln: 'SQL Injection (cursor.execute with concat)', severity: 'critical', cwe: 'CWE-89', family: 'sql-injection' },
103
+ { re: SQLA_FSTRING_SQL_ASSIGN_RE, vuln: 'SQL Injection (f-string SQL assigned to variable)', severity: 'high', cwe: 'CWE-89', family: 'sql-injection' },
104
+ { re: PY_OS_SYSTEM_RE, vuln: 'Command Injection (os.system with variable arg)', severity: 'critical', cwe: 'CWE-78', family: 'command-injection' },
105
+ { re: PY_SUBPROCESS_SHELL_TRUE_RE, vuln: 'Command Injection (subprocess shell=True)', severity: 'critical', cwe: 'CWE-78', family: 'command-injection' },
106
+ { re: PY_SHELL_EXEC_CONCAT_RE, vuln: 'Command Injection (os.popen / os.execlp)', severity: 'high', cwe: 'CWE-78', family: 'command-injection' },
107
+ { re: PY_PICKLE_LOADS_RE, vuln: 'Insecure Deserialization (pickle.loads on untrusted)', severity: 'critical', cwe: 'CWE-502', family: 'insecure-deserialization' },
108
+ { re: PY_YAML_UNSAFE_LOAD_RE, vuln: 'Insecure Deserialization (yaml.load without SafeLoader)', severity: 'critical', cwe: 'CWE-502', family: 'insecure-deserialization' },
109
+ { re: PY_MARSHAL_LOADS_RE, vuln: 'Insecure Deserialization (marshal.loads)', severity: 'high', cwe: 'CWE-502', family: 'insecure-deserialization' },
110
+ { re: PY_EVAL_USER_RE, vuln: 'Code Injection (eval/exec on request data)', severity: 'critical', cwe: 'CWE-94', family: 'code-injection' },
111
+ { re: PY_COMPILE_USER_RE, vuln: 'Code Injection (compile() on user input)', severity: 'high', cwe: 'CWE-94', family: 'code-injection' },
112
+ { re: PY_SEND_FILE_RE, vuln: 'Path Traversal (flask.send_file with user-controlled path)', severity: 'high', cwe: 'CWE-22', family: 'path-traversal' },
113
+ { re: PY_SEND_FROM_DIR_RE, vuln: 'Path Traversal (flask.send_from_directory)', severity: 'high', cwe: 'CWE-22', family: 'path-traversal' },
114
+ { re: PY_OPEN_USER_RE, vuln: 'Path Traversal (open with user-controlled path)', severity: 'high', cwe: 'CWE-22', family: 'path-traversal' },
115
+ { re: PY_REQUESTS_VERIFY_FALSE_RE, vuln: 'Insecure HTTPS (requests verify=False)', severity: 'medium', cwe: 'CWE-295', family: 'insecure-http' },
116
+ { re: PY_URLLIB_NOCHECK_RE, vuln: 'Insecure HTTPS (ssl._create_unverified_context)', severity: 'medium', cwe: 'CWE-295', family: 'insecure-http' },
117
+ { re: PY_REQUESTS_USER_URL_RE, vuln: 'SSRF (requests with user-controlled URL)', severity: 'high', cwe: 'CWE-918', family: 'ssrf' },
118
+ { re: PY_URLLIB_USER_URL_RE, vuln: 'SSRF (urlopen with user-controlled URL)', severity: 'high', cwe: 'CWE-918', family: 'ssrf' },
119
+ { re: PY_XML_INSECURE_RE, vuln: 'XXE (lxml without resolve_entities=False)', severity: 'high', cwe: 'CWE-611', family: 'xxe' },
120
+ { re: PY_XML_ETREE_USER_RE, vuln: 'XXE (xml.etree.ElementTree on user input)', severity: 'high', cwe: 'CWE-611', family: 'xxe' },
121
+ ];
122
+
123
+ export function scanPythonSinks(fp, raw) {
124
+ if (!PY_EXT_RE.test(fp)) return [];
125
+ if (!raw || raw.length > 500_000) return [];
126
+ // Skip test files — Python projects use `test_*.py` / `*_test.py` / `tests/`.
127
+ if (/(?:^|\/)(?:tests?|test_|_test\.py$)/i.test(fp) && !/fixtures?/i.test(fp)) return [];
128
+ const code = blankComments(raw, 'py');
129
+ const findings = [];
130
+ const seen = new Set();
131
+ for (const rule of RULES) {
132
+ const re = new RegExp(rule.re.source, rule.re.flags);
133
+ let m;
134
+ while ((m = re.exec(code))) {
135
+ const line = lineOf(raw, m.index);
136
+ const id = `${rule.family}:${fp}:${line}:${rule.cwe}`;
137
+ if (seen.has(id)) continue;
138
+ seen.add(id);
139
+ findings.push({
140
+ id,
141
+ file: fp,
142
+ line,
143
+ vuln: rule.vuln,
144
+ severity: rule.severity,
145
+ cwe: rule.cwe,
146
+ family: rule.family,
147
+ stride: _strideForFamily(rule.family),
148
+ snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
149
+ parser: 'PY-SAST',
150
+ confidence: 0.7,
151
+ remediation: _remediationFor(rule.family, rule.vuln),
152
+ });
153
+ }
154
+ }
155
+ return findings;
156
+ }
157
+
158
+ function _strideForFamily(fam) {
159
+ return {
160
+ 'sql-injection': 'Tampering',
161
+ 'command-injection': 'Elevation of Privilege',
162
+ 'insecure-deserialization': 'Elevation of Privilege',
163
+ 'code-injection': 'Elevation of Privilege',
164
+ 'path-traversal': 'Information Disclosure',
165
+ 'insecure-http': 'Information Disclosure',
166
+ 'ssrf': 'Spoofing',
167
+ 'xxe': 'Information Disclosure',
168
+ }[fam] || 'Tampering';
169
+ }
170
+
171
+ function _remediationFor(fam, vuln) {
172
+ switch (fam) {
173
+ case 'sql-injection':
174
+ return 'Use parameterized queries: `connection.execute(text("SELECT ... WHERE id = :id"), {"id": id})` instead of f-string concat. For raw `cursor.execute`, pass the value as the second positional argument; never concatenate.';
175
+ case 'command-injection':
176
+ return 'Avoid `os.system` and `shell=True`. Use `subprocess.run([\'binary\', arg1, arg2], check=True)` with arguments as a list — the shell never sees the values, so shell metacharacters cannot be injected.';
177
+ case 'insecure-deserialization':
178
+ return 'Never `pickle.loads` untrusted bytes. Use `json.loads` for structured data. For YAML, use `yaml.safe_load`. For `marshal`, switch to JSON or a schema-validated alternative.';
179
+ case 'code-injection':
180
+ return 'Replace `eval` / `exec` with a safe parser appropriate to the input class — `ast.literal_eval` for Python literals, `json.loads` for JSON, a domain-specific parser for everything else.';
181
+ case 'path-traversal':
182
+ return 'Validate the user path is inside the intended directory: `os.path.realpath(os.path.join(base, user_path)).startswith(os.path.realpath(base))`. For `flask.send_from_directory`, ensure the filename is a known allowlisted value.';
183
+ case 'insecure-http':
184
+ return 'Remove `verify=False`. If you genuinely need to disable TLS verification for a known internal endpoint, scope it to that endpoint and document why; never broadly across `requests` calls.';
185
+ case 'ssrf':
186
+ return 'Validate the URL against an allowlist before fetching. Block private IP ranges (127.0.0.0/8, 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 169.254.169.254 / metadata IPs).';
187
+ case 'xxe':
188
+ return 'Configure the XML parser to disable external entities: lxml `etree.XMLParser(resolve_entities=False, no_network=True)`; defusedxml is the safest drop-in.';
189
+ default:
190
+ return `Address the ${vuln} finding above.`;
191
+ }
192
+ }
193
+
194
+ // For tests + the no-dead-modules check.
195
+ export const _ruleCount = RULES.length;
@@ -0,0 +1,102 @@
1
+ // Quarkus framework hardening.
2
+ //
3
+ // Coverage:
4
+ // 1. Resource methods exposed without @Authenticated / @RolesAllowed
5
+ // 2. application.properties: quarkus.oidc.credentials.secret literal
6
+ // 3. quarkus.security.users.embedded.* enabled in non-dev profile
7
+ // 4. quarkus.http.cors=true with overly broad origins
8
+ // 5. @PermitAll on /api/admin or similar sensitive paths
9
+ // 6. mp.jwt.verify.publickey.location missing when @Authenticated present
10
+
11
+ const _JAVA_RE = /\.java$/i;
12
+ const _PROPS_RE = /(?:^|[\\/])application(?:[-.][\w-]+)?\.(?:properties|ya?ml)$/i;
13
+
14
+ function _line(raw, idx) {
15
+ return raw.slice(0, idx).split('\n').length;
16
+ }
17
+
18
+ function _isQuarkusJava(raw) {
19
+ return /\bio\.quarkus\b|\bjakarta\.ws\.rs\b|\borg\.eclipse\.microprofile\b/.test(raw);
20
+ }
21
+
22
+ export function scanQuarkusHardening(file, raw) {
23
+ if (!file || !raw || typeof raw !== 'string') return [];
24
+ if (raw.length > 200_000) return [];
25
+ const findings = [];
26
+
27
+ // Properties file checks.
28
+ if (_PROPS_RE.test(file) && /\bquarkus\./.test(raw)) {
29
+ // OIDC client secret literal
30
+ for (const m of raw.matchAll(/^\s*quarkus\.oidc\.credentials\.secret\s*=\s*(\S+)/gmi)) {
31
+ const val = m[1].trim();
32
+ if (val.startsWith('${') || val === '' || val === '""' || val === "''") continue;
33
+ findings.push({
34
+ id: `quarkus:oidc-secret-literal:${file}:${_line(raw, m.index)}`,
35
+ file, line: _line(raw, m.index),
36
+ vuln: 'Quarkus OIDC client secret in plaintext config',
37
+ severity: 'critical',
38
+ family: 'quarkus-hardcoded-credential',
39
+ cwe: 'CWE-798',
40
+ confidence: 0.95,
41
+ description: 'quarkus.oidc.credentials.secret in source-controlled config lets anyone with repo read impersonate the application against the IdP.',
42
+ remediation: 'Replace with ${OIDC_CLIENT_SECRET} env-var reference and rotate the leaked secret.',
43
+ });
44
+ }
45
+ // Embedded user with literal password
46
+ for (const m of raw.matchAll(/^\s*quarkus\.security\.users\.embedded\.users\.\w+\s*=\s*(\S+)/gmi)) {
47
+ const val = m[1].trim();
48
+ if (val.startsWith('${')) continue;
49
+ findings.push({
50
+ id: `quarkus:embedded-user-password:${file}:${_line(raw, m.index)}`,
51
+ file, line: _line(raw, m.index),
52
+ vuln: 'Quarkus embedded user with literal password (likely dev convenience leaked to prod)',
53
+ severity: 'critical',
54
+ family: 'quarkus-hardcoded-credential',
55
+ cwe: 'CWE-798',
56
+ confidence: 0.9,
57
+ description: 'Quarkus embedded identity is convenient for dev / smoke tests; pushing it to a non-dev profile creates a backdoor.',
58
+ remediation: 'Move the user/password to a real IdentityProvider (Keycloak, LDAP, DB). Quarkus dev-mode users should never ship to production.',
59
+ });
60
+ }
61
+ // Wildcard CORS origin
62
+ for (const m of raw.matchAll(/^\s*quarkus\.http\.cors\.origins\s*=\s*['"]?\*['"]?/gm)) {
63
+ findings.push({
64
+ id: `quarkus:cors-wildcard:${file}:${_line(raw, m.index)}`,
65
+ file, line: _line(raw, m.index),
66
+ vuln: 'Quarkus CORS origins = * (wildcard)',
67
+ severity: 'high',
68
+ family: 'quarkus-cors-wildcard',
69
+ cwe: 'CWE-942',
70
+ confidence: 0.9,
71
+ description: 'Wildcard CORS combined with credentialed requests allows any origin to read authenticated responses.',
72
+ remediation: 'Set quarkus.http.cors.origins=https://app.example.com (explicit list).',
73
+ });
74
+ }
75
+ return findings;
76
+ }
77
+
78
+ if (!_JAVA_RE.test(file)) return findings;
79
+ if (!_isQuarkusJava(raw)) return findings;
80
+
81
+ // Mutating JAX-RS endpoint without @Authenticated / @RolesAllowed / @PermitAll
82
+ const verbRe = /@(?:POST|PUT|PATCH|DELETE)\b[\s\S]{0,300}?public\s+\w[\w<>,\s\[\]?]*\s+(\w+)\s*\(/g;
83
+ let mm;
84
+ while ((mm = verbRe.exec(raw))) {
85
+ const lineIdx = _line(raw, mm.index);
86
+ const above = raw.slice(Math.max(0, mm.index - 400), mm.index);
87
+ if (/@(?:Authenticated|RolesAllowed|PermitAll|DenyAll)\b/.test(above)) continue;
88
+ findings.push({
89
+ id: `quarkus:no-authz:${file}:${lineIdx}:${mm[1]}`,
90
+ file, line: lineIdx,
91
+ vuln: `Quarkus mutating endpoint ${mm[1]}() has no @Authenticated / @RolesAllowed annotation`,
92
+ severity: 'high',
93
+ family: 'quarkus-missing-authz',
94
+ cwe: 'CWE-862',
95
+ confidence: 0.7,
96
+ description: 'A POST/PUT/PATCH/DELETE handler is exposed without authentication / authorization. Unless the path is gated by a wider mechanism, this endpoint is callable by anyone reaching it.',
97
+ remediation: 'Add @Authenticated on the resource class (default to require auth) and @RolesAllowed("admin") on privileged methods. Use @PermitAll explicitly when a method is truly public.',
98
+ });
99
+ }
100
+
101
+ return findings;
102
+ }
@@ -0,0 +1,118 @@
1
+ // RAG Context-Poisoning Path (OWASP LLM02 — Training-Data Poisoning,
2
+ // applied at retrieval-time rather than fine-tune time).
3
+ //
4
+ // Pattern: untrusted text from a user (req.body, file upload, web scrape,
5
+ // external API) is written into a vector store / retrieval index without
6
+ // (a) source attribution, (b) trust-level tagging, or (c) downstream
7
+ // retrieval-side filtering. At LLM query time, the poisoned chunk is
8
+ // retrieved with no signal that it shouldn't be trusted, and its
9
+ // embedded instructions ride along into the model's context.
10
+ //
11
+ // We catch the WRITE side, not the READ — the retrieval side is too
12
+ // generic to flag without taint context. The write-side signature is
13
+ // strong: `<vector_store>.add(text=<user_input>)` with no metadata
14
+ // indicating provenance / trust level.
15
+ //
16
+ // Vector-store libraries covered (v1):
17
+ // - chromadb (Python): collection.add(documents=[...])
18
+ // - pinecone (Python/JS): index.upsert(vectors=[{values, metadata}])
19
+ // - weaviate: client.collections.<n>.data.insert(...)
20
+ // - qdrant: client.upsert(collection_name, points=[...])
21
+ // - langchain: vectorstore.add_documents(...)
22
+ // - pgvector: INSERT INTO embeddings (vec, content) VALUES (...)
23
+ //
24
+ // Suppress when:
25
+ // - the call includes `metadata: { source, trust_level, … }` and the
26
+ // trust_level is a non-trivial argument (not just '"trusted"')
27
+ // - a known sanitizer or denylist filter is referenced in the preceding
28
+ // 30 lines
29
+
30
+ import { blankComments } from './_comment-strip.js';
31
+
32
+ const TAINT_HINT_RE =
33
+ /\b(?:req\.|request\.|params\.|query\.|body\.|ctx\.query|ctx\.request|reply\.query|c\.Query|r\.URL\.Query|_GET|_POST|_REQUEST|getParameter|getHeader|webhook|scrape|fetch\s*\()/;
34
+
35
+ const PATTERNS = [
36
+ // chromadb collection.add
37
+ ['py', /\b(?:collection|chroma_collection)\s*\.\s*add\s*\(\s*documents\s*=\s*([^)]+?)\s*[,)]/g, 'ChromaDB'],
38
+ // langchain add_documents / add_texts
39
+ ['py', /\bvectorstore\s*\.\s*add_(?:documents|texts)\s*\(\s*([^)]+?)\s*[,)]/g, 'LangChain'],
40
+ ['js', /\bvectorStore\s*\.\s*add(?:Documents|Texts)\s*\(\s*([^)]+?)\s*[,)]/g, 'LangChain.js'],
41
+ // pinecone upsert
42
+ ['py', /\bindex\s*\.\s*upsert\s*\(\s*vectors\s*=\s*([^)]+?)\s*[,)]/g, 'Pinecone'],
43
+ ['js', /\bindex\s*\.\s*upsert\s*\(\s*([^)]+?)\s*\)/g, 'Pinecone'],
44
+ // weaviate insert
45
+ ['py', /\.\s*data\s*\.\s*insert\s*\(\s*([^)]+?)\s*[,)]/g, 'Weaviate'],
46
+ // qdrant upsert
47
+ ['py', /\bclient\s*\.\s*upsert\s*\(\s*collection_name[^,]+,\s*points\s*=\s*([^)]+?)\s*[,)]/g, 'Qdrant'],
48
+ // pgvector via raw INSERT
49
+ ['py', /\bINSERT\s+INTO\s+\w*embedding[^;]*VALUES\s*\(\s*([^)]+?)\)/gi, 'pgvector raw INSERT'],
50
+ ];
51
+
52
+ const PROVENANCE_HINT_RE =
53
+ /\bmetadatas?\s*[=:]\s*\[?\s*\{[^}]*(?:source|trust_level|provenance|tenant_id|user_id|origin)/i;
54
+
55
+ const SANITIZER_HINT_RE =
56
+ /\b(?:bleach\s*\.\s*clean|DOMPurify\.sanitize|stripUntrustedInstructions|detect_prompt_injection|denylist[A-Za-z0-9_]*|trustLevelOf)\b/;
57
+
58
+ function _lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
59
+ function _lang(fp) {
60
+ if (/\.(?:js|jsx|ts|tsx|mjs|cjs)$/i.test(fp)) return 'js';
61
+ if (/\.py$/i.test(fp)) return 'py';
62
+ return null;
63
+ }
64
+ function _hasSanitizerAbove(raw, line) {
65
+ const lines = raw.split('\n');
66
+ const lo = Math.max(0, line - 30);
67
+ return SANITIZER_HINT_RE.test(lines.slice(lo, line).join('\n'));
68
+ }
69
+
70
+ export function scanRAGPoisoning(fp, raw) {
71
+ if (!raw || raw.length > 500_000) return [];
72
+ const lang = _lang(fp);
73
+ if (!lang) return [];
74
+ const code = blankComments(raw, lang === 'py' ? 'py' : undefined);
75
+ if (!/\b(?:chromadb|chroma|pinecone|weaviate|qdrant|pgvector|langchain|vectorstore|vectorStore|embedding)\b/i.test(code)) return [];
76
+ const findings = [];
77
+ const seen = new Set();
78
+ for (const [plang, pat, label] of PATTERNS) {
79
+ if (plang !== lang) continue;
80
+ const re = new RegExp(pat.source, pat.flags);
81
+ let m;
82
+ while ((m = re.exec(code))) {
83
+ const callArgs = (m[1] || '');
84
+ if (!TAINT_HINT_RE.test(callArgs)) continue;
85
+ // The full call may extend beyond the captured fragment; look at the
86
+ // rest of the line block to check for provenance metadata.
87
+ const lineNo = _lineOf(raw, m.index);
88
+ const lines = raw.split('\n');
89
+ const blockEnd = Math.min(lines.length, lineNo + 5);
90
+ const block = lines.slice(lineNo - 1, blockEnd).join('\n');
91
+ if (PROVENANCE_HINT_RE.test(block)) continue;
92
+ if (_hasSanitizerAbove(raw, lineNo)) continue;
93
+ const id = `rag-poisoning:${fp}:${lineNo}:${label}`;
94
+ if (seen.has(id)) continue;
95
+ seen.add(id);
96
+ findings.push({
97
+ id,
98
+ file: fp, line: lineNo,
99
+ vuln: `RAG Context-Poisoning Path (${label})`,
100
+ severity: 'high',
101
+ cwe: 'CWE-1336',
102
+ family: 'rag-poisoning',
103
+ stride: 'Tampering',
104
+ snippet: (lines[lineNo - 1] || '').trim().slice(0, 200),
105
+ remediation:
106
+ 'Untrusted user content is being written to a retrieval index without a provenance/trust-level tag. At retrieval time the chunk will appear in LLM context with no signal it shouldn\'t be trusted, and any embedded instructions ride along. ' +
107
+ 'Mitigations: ' +
108
+ '(1) tag every write with `metadata: { source, trust_level, tenant_id }` and FILTER on `trust_level` at retrieval time; ' +
109
+ '(2) keep user-generated content in a separate index from curated/admin content and never mix them in the same retrieval; ' +
110
+ '(3) at retrieval time, wrap user-tier chunks in rare-token delimiters and instruct the model to treat them as data, not instructions; ' +
111
+ '(4) reject content that contains known prompt-injection sentinels ("ignore previous instructions", role-frame strings, etc.) before insertion.',
112
+ parser: 'RAG-POISONING',
113
+ confidence: 0.75,
114
+ });
115
+ }
116
+ }
117
+ return findings;
118
+ }
@@ -0,0 +1,128 @@
1
+ // Rate limiting & abuse prevention advisor.
2
+ //
3
+ // Vibecoders forget rate limiting on auth, AI, payment, and form endpoints.
4
+ // The consequence is account brute-force, $10k+ AI API bills from a single
5
+ // attacker, and credential-stuffing. This module detects handler files that
6
+ // define sensitive-category routes without a recognisable rate-limit guard.
7
+ //
8
+ // Findings:
9
+ // RATE_LIMIT_AUTH — auth endpoint (login/register/forgot) without rate limiting
10
+ // RATE_LIMIT_AI — AI generation endpoint without rate limiting
11
+ // RATE_LIMIT_PAYMENT — payment / checkout endpoint without rate limiting
12
+ // RATE_LIMIT_CONTACT — contact / submit form endpoint without rate limiting
13
+ // RATE_LIMIT_MISSING — generic API endpoint without rate limiting when no RL lib imported
14
+
15
+ const _SCAN_EXT_RE = /\.(?:js|jsx|ts|tsx|mjs|cjs)$/i;
16
+ const _NONPROD_RE = /(?:^|\/)(?:tests?|__tests__|spec|fixtures?|examples?|node_modules)\//i;
17
+
18
+ // Rate-limit library imports / usage signals
19
+ const RL_IMPORT_RE = /(?:from|require)\s*\(?\s*['"`](?:express-rate-limit|rate-limiter-flexible|@upstash\/ratelimit|hono-rate-limiter|next-rate-limit|bottleneck|p-throttle|@nestjs\/throttler|fastify-rate-limit|koa-ratelimit|slowDown|express-slow-down)['"`]/i;
20
+ const RL_USAGE_RE = /\b(?:rateLimit|rateLimiter|limiter|throttle|throttler|createRateLimiter|upstashRatelimit|slidingWindow|fixedWindow|tokenBucket)\s*\(/;
21
+ const REDIS_RL_RE = /\b(?:incr|expire|setex)\s*\([^)]*(?:rate|limit|attempt|count)/i;
22
+
23
+ // Route definition patterns
24
+ const ROUTE_DEF_RE = /(?:app|router|server|Route)\s*\.\s*(?:get|post|put|patch|delete|all)\s*\(\s*['"`]([^'"`]+)['"`]/g;
25
+ const NEXT_HANDLER_RE = /export\s+(?:default\s+)?(?:async\s+)?function\s+(?:POST|GET|PUT|PATCH|DELETE|handler)\b/;
26
+ const NEXT_ROUTE_FILE_RE = /(?:^|\/)(?:app|pages)\/(?:api\/)?([^/]+(?:\/[^/]+)*)\//;
27
+
28
+ // Sensitive endpoint categories by URL segment
29
+ const AUTH_PATH_RE = /\/(?:auth|login|logout|signin|signout|signup|register|forgot|reset|password|verify|otp|mfa|2fa|token|refresh)\b/i;
30
+ const AI_PATH_RE = /\/(?:ai|chat|generate|complete|completion|embed|embedding|gpt|claude|llm|openai|anthropic|inference|predict)\b/i;
31
+ const PAYMENT_PATH_RE = /\/(?:pay(?:ment)?|checkout|stripe|order|subscribe|billing|invoice|charge|purchase)\b/i;
32
+ const CONTACT_PATH_RE = /\/(?:contact|submit|feedback|form|newsletter|subscribe|waitlist|signup|onboard)\b/i;
33
+
34
+ function _hasRateLimit(content) {
35
+ return RL_IMPORT_RE.test(content) || RL_USAGE_RE.test(content) || REDIS_RL_RE.test(content);
36
+ }
37
+
38
+ function _categorise(path) {
39
+ if (AUTH_PATH_RE.test(path)) return 'auth';
40
+ if (AI_PATH_RE.test(path)) return 'ai';
41
+ if (PAYMENT_PATH_RE.test(path)) return 'payment';
42
+ if (CONTACT_PATH_RE.test(path)) return 'contact';
43
+ return null;
44
+ }
45
+
46
+ const CATEGORY_META = {
47
+ auth: {
48
+ severity: 'high',
49
+ title: 'Auth endpoint missing rate limiting',
50
+ description: 'Authentication endpoints without rate limiting are trivially brute-forced. An attacker can try thousands of passwords per second at zero cost.',
51
+ remediation: 'Add a rate limiter: max 5 attempts per IP per 15 minutes on login/register. Use express-rate-limit, @upstash/ratelimit, or your platform\'s edge middleware.',
52
+ cwe: 'CWE-307',
53
+ },
54
+ ai: {
55
+ severity: 'high',
56
+ title: 'AI generation endpoint missing rate limiting',
57
+ description: 'AI API call endpoints without rate limiting let a single attacker exhaust your entire monthly OpenAI/Anthropic budget in minutes. This is a direct financial attack vector.',
58
+ remediation: 'Add per-user and per-IP rate limits on AI endpoints. Use @upstash/ratelimit for serverless or express-rate-limit for Node servers. Consider per-request cost caps as well.',
59
+ cwe: 'CWE-400',
60
+ },
61
+ payment: {
62
+ severity: 'high',
63
+ title: 'Payment endpoint missing rate limiting',
64
+ description: 'Payment and checkout endpoints without rate limiting enable card-testing attacks where attackers enumerate stolen card numbers at high speed.',
65
+ remediation: 'Add strict rate limiting (max 3 attempts per IP per hour) on payment endpoints. Stripe also recommends enabling Radar rules in the dashboard.',
66
+ cwe: 'CWE-307',
67
+ },
68
+ contact: {
69
+ severity: 'medium',
70
+ title: 'Contact / form endpoint missing rate limiting',
71
+ description: 'Unprotected form submission endpoints are used for spam campaigns, email flooding, and enumeration of valid email addresses.',
72
+ remediation: 'Add rate limiting (max 3 submissions per IP per hour) and consider adding a honeypot field or CAPTCHA for public-facing forms.',
73
+ cwe: 'CWE-400',
74
+ },
75
+ };
76
+
77
+ function scanRateLimit(file, content) {
78
+ if (!_SCAN_EXT_RE.test(file)) return [];
79
+ if (_NONPROD_RE.test(file)) return [];
80
+ if (_hasRateLimit(content)) return [];
81
+ const findings = [];
82
+ const lines = content.split('\n');
83
+
84
+ // Check named route definitions
85
+ let m;
86
+ ROUTE_DEF_RE.lastIndex = 0;
87
+ while ((m = ROUTE_DEF_RE.exec(content)) !== null) {
88
+ const routePath = m[1];
89
+ const cat = _categorise(routePath);
90
+ if (cat) {
91
+ const lineNum = content.slice(0, m.index).split('\n').length;
92
+ const meta = CATEGORY_META[cat];
93
+ findings.push({
94
+ id: `rate-limit:RATE_LIMIT_${cat.toUpperCase()}:${file}:${lineNum}`,
95
+ title: meta.title,
96
+ severity: meta.severity,
97
+ file, line: lineNum,
98
+ description: meta.description,
99
+ remediation: meta.remediation,
100
+ cwe: meta.cwe,
101
+ });
102
+ }
103
+ }
104
+
105
+ // Next.js route handler: infer category from file path
106
+ if (findings.length === 0 && NEXT_HANDLER_RE.test(content)) {
107
+ const filePathMatch = NEXT_ROUTE_FILE_RE.exec(file);
108
+ const routeSegment = filePathMatch ? '/' + filePathMatch[1] : file;
109
+ const cat = _categorise(routeSegment) || _categorise(file);
110
+ if (cat) {
111
+ const handlerLine = lines.findIndex(l => NEXT_HANDLER_RE.test(l)) + 1;
112
+ const meta = CATEGORY_META[cat];
113
+ findings.push({
114
+ id: `rate-limit:RATE_LIMIT_${cat.toUpperCase()}:${file}:${handlerLine}`,
115
+ title: meta.title,
116
+ severity: meta.severity,
117
+ file, line: handlerLine,
118
+ description: meta.description,
119
+ remediation: meta.remediation,
120
+ cwe: meta.cwe,
121
+ });
122
+ }
123
+ }
124
+
125
+ return findings;
126
+ }
127
+
128
+ export { scanRateLimit };