@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. package/CHANGELOG.md +1580 -0
  2. package/bin/.agentic-security/findings.json +1577 -0
  3. package/bin/.agentic-security/last-scan.json +1577 -0
  4. package/bin/.agentic-security/last-scan.json.sig +1 -0
  5. package/bin/.agentic-security/scan-history.json +465 -0
  6. package/bin/.agentic-security/streak.json +25 -0
  7. package/bin/agentic-security-audit.js +198 -0
  8. package/bin/agentic-security-consistency.js +80 -0
  9. package/bin/agentic-security-diff.js +136 -0
  10. package/bin/agentic-security-lsp.js +12 -0
  11. package/bin/agentic-security-mcp.js +40 -0
  12. package/bin/agentic-security-rule.js +153 -0
  13. package/bin/agentic-security.js +1683 -0
  14. package/dist/117.index.js +207 -0
  15. package/dist/178.index.js +250 -0
  16. package/dist/218.index.js +793 -0
  17. package/dist/227.index.js +192 -0
  18. package/dist/301.index.js +167 -0
  19. package/dist/384.index.js +18 -0
  20. package/dist/476.index.js +126 -0
  21. package/dist/513.index.js +373 -0
  22. package/dist/520.index.js +13 -0
  23. package/dist/601.index.js +1038 -0
  24. package/dist/634.index.js +1892 -0
  25. package/dist/637.index.js +216 -0
  26. package/dist/660.index.js +131 -0
  27. package/dist/675.index.js +451 -0
  28. package/dist/826.index.js +188 -0
  29. package/dist/830.index.js +133 -0
  30. package/dist/agentic-security.mjs +272 -0
  31. package/dist/agentic-security.mjs.sha256 +1 -0
  32. package/dist/calibration-seed.json +27 -0
  33. package/package.json +77 -0
  34. package/src/.agentic-security/findings.json +80844 -0
  35. package/src/.agentic-security/last-scan.json +80844 -0
  36. package/src/.agentic-security/last-scan.json.sig +1 -0
  37. package/src/.agentic-security/scan-history.json +8408 -0
  38. package/src/.agentic-security/streak.json +26 -0
  39. package/src/badge.js +188 -0
  40. package/src/compare.js +203 -0
  41. package/src/dataflow/.agentic-security/findings.json +3487 -0
  42. package/src/dataflow/.agentic-security/last-scan.json +3487 -0
  43. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  44. package/src/dataflow/.agentic-security/scan-history.json +735 -0
  45. package/src/dataflow/.agentic-security/streak.json +24 -0
  46. package/src/dataflow/CLAUDE.md +38 -0
  47. package/src/dataflow/access-paths.js +172 -0
  48. package/src/dataflow/async-sequencing.js +177 -0
  49. package/src/dataflow/backward.js +201 -0
  50. package/src/dataflow/catalog-expanded.js +485 -0
  51. package/src/dataflow/catalog.js +659 -0
  52. package/src/dataflow/cross-repo.js +219 -0
  53. package/src/dataflow/engine.js +588 -0
  54. package/src/dataflow/exception-flow.js +116 -0
  55. package/src/dataflow/exploit-prover.js +187 -0
  56. package/src/dataflow/higher-order.js +221 -0
  57. package/src/dataflow/ifds.js +347 -0
  58. package/src/dataflow/implicit-flow.js +129 -0
  59. package/src/dataflow/incremental.js +229 -0
  60. package/src/dataflow/index.js +181 -0
  61. package/src/dataflow/numeric-domain.js +192 -0
  62. package/src/dataflow/path-feasibility.js +114 -0
  63. package/src/dataflow/points-to.js +337 -0
  64. package/src/dataflow/polyglot.js +190 -0
  65. package/src/dataflow/proven-clean.js +159 -0
  66. package/src/dataflow/receiver-context.js +76 -0
  67. package/src/dataflow/sanitizer-proof.js +154 -0
  68. package/src/dataflow/soft-taint.js +140 -0
  69. package/src/dataflow/string-domain.js +234 -0
  70. package/src/dataflow/stub-aware-filter.js +100 -0
  71. package/src/dataflow/summaries.js +132 -0
  72. package/src/dataflow/symbolic-exec.js +238 -0
  73. package/src/dataflow/tabulation.js +135 -0
  74. package/src/engine.js +7763 -0
  75. package/src/history-scan.js +229 -0
  76. package/src/index.js +3 -0
  77. package/src/integrations/.agentic-security/findings.json +1504 -0
  78. package/src/integrations/.agentic-security/last-scan.json +1504 -0
  79. package/src/integrations/.agentic-security/scan-history.json +40 -0
  80. package/src/integrations/.agentic-security/streak.json +21 -0
  81. package/src/integrations/index.js +321 -0
  82. package/src/integrations/tickets.js +200 -0
  83. package/src/ir/.agentic-security/findings.json +3036 -0
  84. package/src/ir/.agentic-security/last-scan.json +3036 -0
  85. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  86. package/src/ir/.agentic-security/scan-history.json +364 -0
  87. package/src/ir/.agentic-security/streak.json +23 -0
  88. package/src/ir/CLAUDE.md +172 -0
  89. package/src/ir/callgraph.js +73 -0
  90. package/src/ir/class-hierarchy.js +195 -0
  91. package/src/ir/index.js +152 -0
  92. package/src/ir/parser-cs.js +260 -0
  93. package/src/ir/parser-java.js +286 -0
  94. package/src/ir/parser-js.js +413 -0
  95. package/src/ir/parser-kt.js +258 -0
  96. package/src/ir/parser-py-cst.js +136 -0
  97. package/src/ir/parser-py.helper.py +501 -0
  98. package/src/ir/parser-py.js +312 -0
  99. package/src/ir/ssa.js +315 -0
  100. package/src/ir/type-stubs.js +288 -0
  101. package/src/leaderboard.js +152 -0
  102. package/src/llm-validator/.agentic-security/findings.json +1891 -0
  103. package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
  104. package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
  105. package/src/llm-validator/.agentic-security/scan-history.json +168 -0
  106. package/src/llm-validator/.agentic-security/streak.json +20 -0
  107. package/src/llm-validator/consistency.js +141 -0
  108. package/src/llm-validator/index.js +437 -0
  109. package/src/lsp/.agentic-security/findings.json +28 -0
  110. package/src/lsp/.agentic-security/last-scan.json +28 -0
  111. package/src/lsp/.agentic-security/scan-history.json +79 -0
  112. package/src/lsp/.agentic-security/streak.json +22 -0
  113. package/src/lsp/server.js +275 -0
  114. package/src/mcp/.agentic-security/findings.json +8358 -0
  115. package/src/mcp/.agentic-security/last-scan.json +8358 -0
  116. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  117. package/src/mcp/.agentic-security/scan-history.json +1125 -0
  118. package/src/mcp/.agentic-security/streak.json +22 -0
  119. package/src/mcp/CLAUDE.md +54 -0
  120. package/src/mcp/audit.js +136 -0
  121. package/src/mcp/redact.js +75 -0
  122. package/src/mcp/server.js +158 -0
  123. package/src/mcp/stdio.js +83 -0
  124. package/src/mcp/tools.js +940 -0
  125. package/src/mcp/validate.js +49 -0
  126. package/src/personality.js +164 -0
  127. package/src/poc-video.js +239 -0
  128. package/src/posture/.agentic-security/findings.json +51239 -0
  129. package/src/posture/.agentic-security/last-scan.json +51239 -0
  130. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  131. package/src/posture/.agentic-security/scan-history.json +5557 -0
  132. package/src/posture/.agentic-security/streak.json +24 -0
  133. package/src/posture/CLAUDE.md +42 -0
  134. package/src/posture/adversarial-self-test.js +114 -0
  135. package/src/posture/adversary-agent.js +204 -0
  136. package/src/posture/agents-memory.js +135 -0
  137. package/src/posture/ai-code-fingerprint.js +171 -0
  138. package/src/posture/aibom.js +284 -0
  139. package/src/posture/api-inventory.js +96 -0
  140. package/src/posture/attack-playbooks.js +305 -0
  141. package/src/posture/auditor-agent.js +115 -0
  142. package/src/posture/auth-posture-import.js +135 -0
  143. package/src/posture/baseline-compare.js +114 -0
  144. package/src/posture/blast-radius.js +836 -0
  145. package/src/posture/bounty-prediction.js +141 -0
  146. package/src/posture/business-logic.js +239 -0
  147. package/src/posture/calibration-drift.js +93 -0
  148. package/src/posture/calibration-seed.json +27 -0
  149. package/src/posture/calibration.js +204 -0
  150. package/src/posture/clustering.js +75 -0
  151. package/src/posture/concurrency-checker.js +265 -0
  152. package/src/posture/confidence.js +65 -0
  153. package/src/posture/container-runtime.js +149 -0
  154. package/src/posture/counterfactual.js +109 -0
  155. package/src/posture/cross-lang-graphql.js +165 -0
  156. package/src/posture/cross-lang-grpc.js +166 -0
  157. package/src/posture/cross-lang-meta.js +101 -0
  158. package/src/posture/cross-lang-openapi.js +187 -0
  159. package/src/posture/cross-lang-orm.js +153 -0
  160. package/src/posture/cross-lang-queues.js +210 -0
  161. package/src/posture/crown-jewels.js +110 -0
  162. package/src/posture/custom-rules.js +361 -0
  163. package/src/posture/cve-alert-daemon.js +433 -0
  164. package/src/posture/cve-lookup.js +129 -0
  165. package/src/posture/dead-code.js +430 -0
  166. package/src/posture/defender-agent.js +158 -0
  167. package/src/posture/deploy-platform.js +204 -0
  168. package/src/posture/detector-fuzz.js +61 -0
  169. package/src/posture/deterministic.js +99 -0
  170. package/src/posture/drift.js +165 -0
  171. package/src/posture/epss.js +156 -0
  172. package/src/posture/exploitability-probability.js +212 -0
  173. package/src/posture/exploitability.js +121 -0
  174. package/src/posture/feature-flags.js +110 -0
  175. package/src/posture/finding-defaults.js +132 -0
  176. package/src/posture/fix-history.js +411 -0
  177. package/src/posture/fix-plan.js +121 -0
  178. package/src/posture/fix-verify-loop.js +157 -0
  179. package/src/posture/fix-verify.js +130 -0
  180. package/src/posture/flow-narration.js +105 -0
  181. package/src/posture/grader-calibration.js +156 -0
  182. package/src/posture/harness-discovery.js +113 -0
  183. package/src/posture/holdout-eval.js +144 -0
  184. package/src/posture/iac-reachability.js +163 -0
  185. package/src/posture/iam-policy.js +128 -0
  186. package/src/posture/integrity.js +97 -0
  187. package/src/posture/learning.js +166 -0
  188. package/src/posture/license-policy.js +109 -0
  189. package/src/posture/llm-redteam-prompts.js +418 -0
  190. package/src/posture/llm-redteam.js +303 -0
  191. package/src/posture/material-change.js +163 -0
  192. package/src/posture/mitigation-composite.js +55 -0
  193. package/src/posture/mttr.js +91 -0
  194. package/src/posture/network-policy-import.js +126 -0
  195. package/src/posture/path-predicates.js +99 -0
  196. package/src/posture/persona-prioritization.js +153 -0
  197. package/src/posture/poc-cwe-map.js +51 -0
  198. package/src/posture/poc-generator.js +500 -0
  199. package/src/posture/policy-gate.js +174 -0
  200. package/src/posture/pre-incident-archaeology.js +110 -0
  201. package/src/posture/profile.js +93 -0
  202. package/src/posture/reachability-filter.js +42 -0
  203. package/src/posture/regression-test-gen.js +200 -0
  204. package/src/posture/reverse-blast-radius.js +110 -0
  205. package/src/posture/router.js +109 -0
  206. package/src/posture/rule-overrides.js +198 -0
  207. package/src/posture/rule-pack-signing.js +209 -0
  208. package/src/posture/rule-packs.js +143 -0
  209. package/src/posture/rule-synthesis.js +108 -0
  210. package/src/posture/ruleset-version.js +71 -0
  211. package/src/posture/sbom.js +129 -0
  212. package/src/posture/schema-aware-bridge.js +207 -0
  213. package/src/posture/security-trend.js +87 -0
  214. package/src/posture/semantic-clone.js +114 -0
  215. package/src/posture/specification-mining.js +170 -0
  216. package/src/posture/stable-id.js +75 -0
  217. package/src/posture/stack-playbook.js +229 -0
  218. package/src/posture/streak.js +249 -0
  219. package/src/posture/suppressions.js +135 -0
  220. package/src/posture/telemetry-ingest.js +112 -0
  221. package/src/posture/threat-model.js +145 -0
  222. package/src/posture/three-agent-pipeline.js +74 -0
  223. package/src/posture/triage.js +146 -0
  224. package/src/posture/trust-boundary-diagram.js +115 -0
  225. package/src/posture/type-narrowing.js +129 -0
  226. package/src/posture/validator-metrics.js +179 -0
  227. package/src/posture/verifier-ephemeral.js +118 -0
  228. package/src/posture/verifier-target.js +147 -0
  229. package/src/posture/verifier.js +257 -0
  230. package/src/posture/version.js +75 -0
  231. package/src/posture/waf-ingest.js +200 -0
  232. package/src/posture/why-fired.js +141 -0
  233. package/src/pr-comment.js +172 -0
  234. package/src/pr-delta.js +198 -0
  235. package/src/report/.agentic-security/findings.json +79 -0
  236. package/src/report/.agentic-security/last-scan.json +79 -0
  237. package/src/report/.agentic-security/last-scan.json.sig +1 -0
  238. package/src/report/.agentic-security/scan-history.json +332 -0
  239. package/src/report/.agentic-security/streak.json +23 -0
  240. package/src/report/index.js +1136 -0
  241. package/src/report/mascot.js +42 -0
  242. package/src/runScan.js +141 -0
  243. package/src/sast/.agentic-security/findings.json +5051 -0
  244. package/src/sast/.agentic-security/last-scan.json +5051 -0
  245. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  246. package/src/sast/.agentic-security/scan-history.json +788 -0
  247. package/src/sast/.agentic-security/streak.json +23 -0
  248. package/src/sast/CLAUDE.md +39 -0
  249. package/src/sast/_comment-strip.js +46 -0
  250. package/src/sast/agent-tool-escalation.js +131 -0
  251. package/src/sast/auth-provider.js +171 -0
  252. package/src/sast/authz.js +236 -0
  253. package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
  254. package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
  255. package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
  256. package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
  257. package/src/sast/bench-shape/index.js +62 -0
  258. package/src/sast/claude-hook-injection.js +199 -0
  259. package/src/sast/claude-md-prompt-injection.js +170 -0
  260. package/src/sast/claude-settings.js +165 -0
  261. package/src/sast/client-side.js +149 -0
  262. package/src/sast/cpp-bench-extras.js +122 -0
  263. package/src/sast/cpp-dataflow.js +430 -0
  264. package/src/sast/cpp.js +248 -0
  265. package/src/sast/csharp.js +152 -0
  266. package/src/sast/csrf.js +82 -0
  267. package/src/sast/dart-flutter.js +173 -0
  268. package/src/sast/db-rls.js +147 -0
  269. package/src/sast/db-taint.js +215 -0
  270. package/src/sast/defi-deep.js +242 -0
  271. package/src/sast/deserialization-gadgets.js +113 -0
  272. package/src/sast/django-hardening.js +230 -0
  273. package/src/sast/env-hygiene.js +125 -0
  274. package/src/sast/fastapi-hardening.js +145 -0
  275. package/src/sast/go-extended.js +84 -0
  276. package/src/sast/host-header.js +106 -0
  277. package/src/sast/index.js +17 -0
  278. package/src/sast/java-ast-folding.js +561 -0
  279. package/src/sast/java-bench-extras.js +708 -0
  280. package/src/sast/java-collection-passthrough.js +178 -0
  281. package/src/sast/java-constant-fold.js +244 -0
  282. package/src/sast/java-deserialization.js +125 -0
  283. package/src/sast/jndi.js +104 -0
  284. package/src/sast/juliet-shape.js +324 -0
  285. package/src/sast/jwt-exp.js +104 -0
  286. package/src/sast/kotlin.js +82 -0
  287. package/src/sast/laravel-hardening.js +198 -0
  288. package/src/sast/ldap-injection.js +100 -0
  289. package/src/sast/llm-owasp.js +465 -0
  290. package/src/sast/llm-stored-prompt.js +103 -0
  291. package/src/sast/llm-trading-agent.js +161 -0
  292. package/src/sast/llm.js +308 -0
  293. package/src/sast/logic.js +140 -0
  294. package/src/sast/mass-assignment.js +101 -0
  295. package/src/sast/mcp-audit.js +242 -0
  296. package/src/sast/mobile-manifest.js +195 -0
  297. package/src/sast/model-load.js +164 -0
  298. package/src/sast/mutation-xss.js +87 -0
  299. package/src/sast/nosql-injection.js +82 -0
  300. package/src/sast/open-redirect.js +119 -0
  301. package/src/sast/php.js +91 -0
  302. package/src/sast/pipeline.js +122 -0
  303. package/src/sast/primary-cwe-java.js +155 -0
  304. package/src/sast/prompt-firewall.js +151 -0
  305. package/src/sast/prompt-template.js +157 -0
  306. package/src/sast/prototype-pollution.js +112 -0
  307. package/src/sast/python-sinks.js +195 -0
  308. package/src/sast/quarkus-hardening.js +102 -0
  309. package/src/sast/rag-poisoning.js +118 -0
  310. package/src/sast/rate-limit.js +128 -0
  311. package/src/sast/response-splitting.js +138 -0
  312. package/src/sast/ruby.js +108 -0
  313. package/src/sast/rust.js +105 -0
  314. package/src/sast/solidity.js +167 -0
  315. package/src/sast/springboot-hardening.js +186 -0
  316. package/src/sast/ssrf-cloud-metadata.js +80 -0
  317. package/src/sast/ssti.js +116 -0
  318. package/src/sast/swift.js +162 -0
  319. package/src/sast/toctou.js +95 -0
  320. package/src/sast/webhook.js +101 -0
  321. package/src/sast/xpath-injection.js +51 -0
  322. package/src/sast/xxe.js +140 -0
  323. package/src/sast/zip-slip.js +200 -0
  324. package/src/sca/base-images.json +45 -0
  325. package/src/sca/container.js +107 -0
  326. package/src/sca/dep-confusion.js +134 -0
  327. package/src/sca/index.js +6 -0
  328. package/src/sca/popular-packages.json +41 -0
  329. package/src/sca/sarif-ingest.js +187 -0
  330. package/src/sca/vuln-function-hints.json +89 -0
  331. package/src/secrets/index.js +4 -0
@@ -0,0 +1,171 @@
1
+ // FR-LEARN-10 — AI-generated-code fingerprinting.
2
+ //
3
+ // Heuristic detection of code regions likely written by an AI assistant
4
+ // (Claude, GPT, Copilot). The goal is NOT forensics — it is RISK ROUTING.
5
+ // AI-generated code disproportionately reproduces training-set patterns,
6
+ // which include known-vulnerable Stack Overflow idioms. Tagging regions
7
+ // `ai-likely` lets downstream rule packs raise scrutiny for those regions
8
+ // without ham-fisting the whole codebase.
9
+ //
10
+ // Signal sources (each contributes a weighted score; the composite is a
11
+ // 0..1 score with three tier bands: `ai-likely` >= 0.55, `mixed` >= 0.30,
12
+ // `human-likely` otherwise):
13
+ //
14
+ // 1. Comment-to-code ratio above ~0.3 (AI over-comments boilerplate).
15
+ // 2. Exhaustive null-checking on every parameter ("if (!x) return;").
16
+ // 3. Variable names like `result`, `data`, `temp`, `helper`, `utility`.
17
+ // 4. Try/catch wrapping every operation that could possibly fail.
18
+ // 5. JSDoc-like `@param` blocks on every function in a JS file (no TS).
19
+ // 6. Identical structural patterns across nearby functions (boilerplate).
20
+ // 7. Presence of an `as const` assertion (TS) or `typing.Final` (Py) on
21
+ // every constant — AI loves redundant type narrowing.
22
+ // 8. Comments in the imperative "We do X then Y" register.
23
+ //
24
+ // Hallucinated imports are scored separately — see annotateHallucinatedImports.
25
+
26
+ const AI_VARIABLE_NAMES = new Set([
27
+ 'result', 'data', 'temp', 'helper', 'utility', 'value', 'item', 'output',
28
+ 'response', 'processedData', 'finalResult', 'resultData',
29
+ ]);
30
+
31
+ const AI_COMMENT_PATTERNS = [
32
+ /^\s*\/\/\s*We (?:do|then|finally|next|now|here|will)\b/i,
33
+ /^\s*\/\/\s*This (?:function|method|helper|utility) (?:will|does|is responsible)/i,
34
+ /^\s*\/\/\s*Note(?:\s+that)?:?\s/i,
35
+ /^\s*\/\/\s*Important:?\s/i,
36
+ /^\s*\/\/\s*(?:Step|Stage|Phase)\s+\d/i,
37
+ ];
38
+
39
+ function scoreCommentRatio(text) {
40
+ const lines = text.split(/\n/);
41
+ let code = 0, comment = 0;
42
+ for (const ln of lines) {
43
+ const t = ln.trim();
44
+ if (!t) continue;
45
+ if (t.startsWith('//') || t.startsWith('#') || t.startsWith('*')) comment++;
46
+ else code++;
47
+ }
48
+ if (code === 0) return 0;
49
+ const ratio = comment / code;
50
+ if (ratio > 0.5) return 0.30;
51
+ if (ratio > 0.30) return 0.18;
52
+ if (ratio > 0.20) return 0.08;
53
+ return 0;
54
+ }
55
+
56
+ function scoreVariableNames(text) {
57
+ const ids = text.match(/\b(?:const|let|var)\s+([A-Za-z_$][\w$]*)/g) || [];
58
+ if (ids.length < 3) return 0;
59
+ let hits = 0;
60
+ for (const id of ids) {
61
+ const name = id.replace(/^\s*(?:const|let|var)\s+/, '');
62
+ if (AI_VARIABLE_NAMES.has(name)) hits++;
63
+ }
64
+ const ratio = hits / ids.length;
65
+ if (ratio > 0.4) return 0.20;
66
+ if (ratio > 0.25) return 0.10;
67
+ if (ratio > 0.10) return 0.04;
68
+ return 0;
69
+ }
70
+
71
+ function scoreNullChecks(text) {
72
+ const fns = text.match(/function\s+\w+\s*\([^)]+\)\s*\{|=>\s*\{/g) || [];
73
+ const guards = text.match(/if\s*\(\s*!\s*[A-Za-z_$][\w$]*\s*\)\s*(?:return|throw)/g) || [];
74
+ if (fns.length === 0) return 0;
75
+ const ratio = guards.length / fns.length;
76
+ if (ratio > 0.8) return 0.18;
77
+ if (ratio > 0.5) return 0.10;
78
+ return 0;
79
+ }
80
+
81
+ function scoreAiComments(text) {
82
+ const lines = text.split(/\n/);
83
+ let hits = 0;
84
+ for (const ln of lines) {
85
+ if (AI_COMMENT_PATTERNS.some(re => re.test(ln))) hits++;
86
+ }
87
+ if (hits >= 5) return 0.22;
88
+ if (hits >= 2) return 0.12;
89
+ if (hits >= 1) return 0.04;
90
+ return 0;
91
+ }
92
+
93
+ function scoreTryCatch(text) {
94
+ const fns = (text.match(/function\s+\w+|=>/g) || []).length;
95
+ const tries = (text.match(/\btry\s*\{/g) || []).length;
96
+ if (fns === 0) return 0;
97
+ const ratio = tries / fns;
98
+ if (ratio > 0.6) return 0.10;
99
+ if (ratio > 0.3) return 0.04;
100
+ return 0;
101
+ }
102
+
103
+ export function fingerprintFile(text) {
104
+ if (!text || typeof text !== 'string' || text.length < 100) {
105
+ return { score: 0, provenance: 'unknown', signals: [] };
106
+ }
107
+ const signals = [];
108
+ let total = 0;
109
+ const checks = [
110
+ ['comment-ratio', scoreCommentRatio],
111
+ ['boilerplate-var-names', scoreVariableNames],
112
+ ['exhaustive-null-checks', scoreNullChecks],
113
+ ['ai-style-comments', scoreAiComments],
114
+ ['try-catch-everywhere', scoreTryCatch],
115
+ ];
116
+ for (const [name, fn] of checks) {
117
+ const s = fn(text);
118
+ if (s > 0) { signals.push({ name, score: Number(s.toFixed(2)) }); total += s; }
119
+ }
120
+ const score = Math.min(1, total);
121
+ let provenance = 'human-likely';
122
+ if (score >= 0.55) provenance = 'ai-likely';
123
+ else if (score >= 0.30) provenance = 'mixed';
124
+ return { score: Number(score.toFixed(2)), provenance, signals };
125
+ }
126
+
127
+ // Annotate every finding with the host-file provenance tag. Allows downstream
128
+ // scoring to raise/lower confidence based on origin.
129
+ export function annotateAiProvenance(findings, fileContents) {
130
+ if (!Array.isArray(findings) || !fileContents) return findings;
131
+ const cache = new Map();
132
+ for (const f of findings) {
133
+ if (!f || typeof f !== 'object') continue;
134
+ const fp = f.file;
135
+ if (!fp || !fileContents[fp]) continue;
136
+ let fp_res = cache.get(fp);
137
+ if (!fp_res) { fp_res = fingerprintFile(fileContents[fp]); cache.set(fp, fp_res); }
138
+ f.provenance = fp_res.provenance;
139
+ f.provenanceScore = fp_res.score;
140
+ f.provenanceSignals = fp_res.signals;
141
+ }
142
+ return findings;
143
+ }
144
+
145
+ // Hallucinated-import detection — packages an AI invented that aren't on the
146
+ // public registry. This is a coordinator hook: the SCA pipeline already has
147
+ // the dep list and a registry probe; we surface the candidate names from
148
+ // import statements for downstream verification.
149
+ export function extractImportedPackageNames(text) {
150
+ if (!text || typeof text !== 'string') return [];
151
+ const names = new Set();
152
+ for (const m of text.matchAll(/(?:^|\n)\s*import\s+(?:[\w*${},\s]+\s+from\s+)?["']([^"']+)["']/g)) {
153
+ const spec = m[1];
154
+ if (spec.startsWith('.') || spec.startsWith('/')) continue;
155
+ if (spec.startsWith('node:') || spec.startsWith('bun:')) continue;
156
+ const pkg = spec.startsWith('@')
157
+ ? spec.split('/').slice(0, 2).join('/')
158
+ : spec.split('/')[0];
159
+ names.add(pkg);
160
+ }
161
+ for (const m of text.matchAll(/require\(\s*["']([^"']+)["']\s*\)/g)) {
162
+ const spec = m[1];
163
+ if (spec.startsWith('.') || spec.startsWith('/')) continue;
164
+ if (spec.startsWith('node:')) continue;
165
+ const pkg = spec.startsWith('@')
166
+ ? spec.split('/').slice(0, 2).join('/')
167
+ : spec.split('/')[0];
168
+ names.add(pkg);
169
+ }
170
+ return [...names];
171
+ }
@@ -0,0 +1,284 @@
1
+ // AI-BOM — AI / ML Bill of Materials.
2
+ //
3
+ // OWASP LLMSecOps explicitly names AI/ML Bill of Materials. This is the AI
4
+ // counterpart to SBOM (CycloneDX 1.6) and PBOM. We emit a JSON structure
5
+ // modeled on CycloneDX 1.7's ML-BOM extension where applicable, plus a
6
+ // human-readable Markdown table.
7
+ //
8
+ // Components captured (extracted from already-scanned source):
9
+ // - Hugging Face models loaded via from_pretrained / hf_hub_download
10
+ // - OpenAI / Anthropic / Google / Mistral / Cohere / Groq / Together / Bedrock
11
+ // / Vertex / Replicate / OpenRouter API endpoints (called via SDK)
12
+ // - Prompt template files (.prompt / .j2 / .jinja / .tmpl / .mustache /
13
+ // prompts/ directory)
14
+ // - Inference framework versions from manifests (transformers, torch,
15
+ // openai, anthropic, vercel-ai, langchain, llama-index, ollama, etc.)
16
+ // - Vector store configurations (pinecone, weaviate, chroma, qdrant,
17
+ // pgvector, milvus, faiss)
18
+ //
19
+ // No outbound calls; pure transform from already-collected fileContents and
20
+ // scan.components. Extraction precision is verified by a smoke test against
21
+ // a labelled fixture set.
22
+
23
+ import * as crypto from 'node:crypto';
24
+
25
+ // SDK / API endpoint detection — same family list as scanner/src/sast/llm.js
26
+ const HF_FROM_PRETRAINED_RE = /(?:Auto(?:Model|Tokenizer|Config|Processor|FeatureExtractor)|[A-Z][A-Za-z]*Model|[A-Z][A-Za-z]*Tokenizer)\.from_pretrained\s*\(\s*['"]([\w./-]+)['"](?:[^)]*?revision\s*=\s*['"]([\w]+)['"])?/g;
27
+ const HF_HUB_DOWNLOAD_RE = /hf_hub_download\s*\(\s*repo_id\s*=\s*['"]([\w./-]+)['"](?:[^)]*?revision\s*=\s*['"]([\w]+)['"])?/g;
28
+
29
+ // API providers and their SDK call patterns (capture the model name string when present)
30
+ const PROVIDER_PATTERNS = [
31
+ // OpenAI: client.chat.completions.create({ model: "gpt-4o-mini", ... })
32
+ { provider: 'openai', re: /(?:openai|client|oai)\.(?:chat\.)?completions\.create\s*\(\s*[{(]\s*[^{}]*?model\s*[:=]\s*['"]([^'"]+)['"]/g },
33
+ { provider: 'openai', re: /(?:openai|client|oai)\.responses\.create\s*\(\s*[{(]\s*[^{}]*?model\s*[:=]\s*['"]([^'"]+)['"]/g },
34
+ // Anthropic: anthropic.messages.create({ model: "claude-sonnet-4-6", ... })
35
+ { provider: 'anthropic', re: /(?:anthropic|client|claude)\.(?:messages|completions)\.create\s*\(\s*[{(]\s*[^{}]*?model\s*[:=]\s*['"]([^'"]+)['"]/g },
36
+ // Vercel AI SDK: generateText({ model: openai("gpt-4o"), ... }) — extract from inner SDK call
37
+ { provider: 'openai (via vercel-ai)', re: /(?:generateText|streamText|generateObject)\s*\(\s*\{[^{}]*?model\s*:\s*openai\s*\(\s*['"]([^'"]+)['"]/g },
38
+ { provider: 'anthropic (via vercel-ai)', re: /(?:generateText|streamText|generateObject)\s*\(\s*\{[^{}]*?model\s*:\s*anthropic\s*\(\s*['"]([^'"]+)['"]/g },
39
+ // Google Generative AI
40
+ { provider: 'google', re: /(?:genAI|GoogleGenerativeAI)\s*\([^)]*?\)\.getGenerativeModel\s*\(\s*\{[^{}]*?model\s*:\s*['"]([^'"]+)['"]/g },
41
+ // Mistral / Cohere / Groq / Together
42
+ { provider: 'mistral', re: /\bmistral\.chat\.complete\s*\(\s*\{[^{}]*?model\s*:\s*['"]([^'"]+)['"]/g },
43
+ { provider: 'cohere', re: /\b(?:cohere|co)\.(?:chat|generate)\s*\(\s*\{[^{}]*?model\s*:\s*['"]([^'"]+)['"]/g },
44
+ { provider: 'groq', re: /\bgroq\.chat\.completions\.create\s*\(\s*\{[^{}]*?model\s*:\s*['"]([^'"]+)['"]/g },
45
+ // Bedrock (AWS)
46
+ { provider: 'bedrock', re: /InvokeModelCommand\s*\(\s*\{[^{}]*?modelId\s*:\s*['"]([^'"]+)['"]/g },
47
+ // Replicate
48
+ { provider: 'replicate', re: /replicate\.(?:run|predictions\.create)\s*\(\s*['"]([\w.-]+\/[\w.-]+(?::\w+)?)['"]/g },
49
+ ];
50
+
51
+ // Inference frameworks worth listing in AI-BOM
52
+ const FRAMEWORK_PACKAGES = new Set([
53
+ // Python
54
+ 'transformers', 'torch', 'tensorflow', 'tensorflow-cpu', 'tf-keras', 'jax', 'jaxlib',
55
+ 'sentence-transformers', 'diffusers', 'accelerate', 'bitsandbytes', 'peft', 'trl',
56
+ 'openai', 'anthropic', 'google-generativeai', 'cohere', 'mistralai', 'groq',
57
+ 'langchain', 'llama-index', 'haystack-ai', 'guidance', 'instructor', 'litellm',
58
+ 'ollama', 'vllm', 'tgi', 'huggingface_hub', 'datasets',
59
+ // Node
60
+ '@anthropic-ai/sdk', '@anthropic-ai/anthropic',
61
+ 'openai', 'ai', '@ai-sdk/openai', '@ai-sdk/anthropic', '@ai-sdk/google', '@ai-sdk/mistral',
62
+ 'langchain', '@langchain/core', '@langchain/openai', '@langchain/anthropic',
63
+ 'cohere-ai', '@mistralai/mistralai', 'groq-sdk', 'together-ai',
64
+ 'llamaindex', 'replicate',
65
+ '@google/generative-ai',
66
+ ]);
67
+
68
+ // Vector stores
69
+ const VECTOR_STORE_PACKAGES = new Set([
70
+ '@pinecone-database/pinecone', 'pinecone-client', 'pinecone',
71
+ 'weaviate-ts-client', 'weaviate-client',
72
+ 'chromadb', '@chroma-core/chromadb',
73
+ '@qdrant/js-client-rest', 'qdrant-client', 'qdrant_client',
74
+ 'pgvector',
75
+ 'pymilvus', '@zilliz/milvus2-sdk-node',
76
+ 'faiss-cpu', 'faiss-gpu',
77
+ 'redis-om',
78
+ ]);
79
+
80
+ // Embedding model providers
81
+ const EMBEDDING_PACKAGES = new Set([
82
+ 'sentence-transformers',
83
+ '@anthropic-ai/sdk',
84
+ 'openai',
85
+ ]);
86
+
87
+ const PROMPT_FILE_RE = /(?:^|[\\/])(?:prompts?|templates?\/prompts?)\/[^/]+$|\.(?:prompt|j2|jinja2?|tmpl|mustache|hbs)$/i;
88
+ const _NONPROD_PATH_RE = /(?:^|[\\/])(?:tests?|__tests__|spec|fixtures?|examples?|docs?|stories|codefixes|node_modules)[\\/]/i;
89
+ const _SCANNABLE_EXT_RE = /\.(?:py|js|jsx|ts|tsx|mjs|cjs)$/i;
90
+
91
+ function _hash(s) {
92
+ return crypto.createHash('sha256').update(s || '').digest('hex').slice(0, 16);
93
+ }
94
+
95
+ function _extractModelsFromFile(fp, content) {
96
+ const out = [];
97
+ if (!content || _NONPROD_PATH_RE.test(fp.replace(/\\/g, '/'))) return out;
98
+
99
+ if (_SCANNABLE_EXT_RE.test(fp)) {
100
+ let m;
101
+ // Hugging Face from_pretrained
102
+ const hfRe = new RegExp(HF_FROM_PRETRAINED_RE.source, 'g');
103
+ while ((m = hfRe.exec(content))) {
104
+ out.push({
105
+ type: 'model',
106
+ provider: 'huggingface',
107
+ modelId: m[1],
108
+ revision: m[2] || null,
109
+ pinned: !!m[2],
110
+ file: fp,
111
+ line: content.substring(0, m.index).split('\n').length,
112
+ });
113
+ }
114
+ const hfHubRe = new RegExp(HF_HUB_DOWNLOAD_RE.source, 'g');
115
+ while ((m = hfHubRe.exec(content))) {
116
+ out.push({
117
+ type: 'model',
118
+ provider: 'huggingface',
119
+ modelId: m[1],
120
+ revision: m[2] || null,
121
+ pinned: !!m[2],
122
+ file: fp,
123
+ line: content.substring(0, m.index).split('\n').length,
124
+ });
125
+ }
126
+ // API providers
127
+ for (const p of PROVIDER_PATTERNS) {
128
+ const re = new RegExp(p.re.source, 'g');
129
+ while ((m = re.exec(content))) {
130
+ out.push({
131
+ type: 'model',
132
+ provider: p.provider,
133
+ modelId: m[1],
134
+ revision: null,
135
+ pinned: false, // API endpoint by name only — version implicit
136
+ file: fp,
137
+ line: content.substring(0, m.index).split('\n').length,
138
+ });
139
+ }
140
+ }
141
+ }
142
+ return out;
143
+ }
144
+
145
+ function _extractPromptFile(fp, content) {
146
+ const norm = fp.replace(/\\/g, '/');
147
+ if (_NONPROD_PATH_RE.test(norm)) return null;
148
+ if (!PROMPT_FILE_RE.test(norm)) return null;
149
+ if (!content) return null;
150
+ return {
151
+ type: 'prompt-template',
152
+ file: fp,
153
+ bytes: content.length,
154
+ sha256_16: _hash(content),
155
+ lines: content.split('\n').length,
156
+ };
157
+ }
158
+
159
+ function _classifyFramework(c) {
160
+ const name = (c.name || '').toLowerCase();
161
+ if (FRAMEWORK_PACKAGES.has(name) || FRAMEWORK_PACKAGES.has(c.name)) return 'inference-framework';
162
+ if (VECTOR_STORE_PACKAGES.has(name) || VECTOR_STORE_PACKAGES.has(c.name)) return 'vector-store';
163
+ if (EMBEDDING_PACKAGES.has(name) || EMBEDDING_PACKAGES.has(c.name)) return 'embedding-provider';
164
+ return null;
165
+ }
166
+
167
+ // Public: build the AI-BOM from already-scanned data.
168
+ // scan = { components, fileContents }; meta = { startedAt, root }
169
+ export function buildAIBOM(scan, fileContents = {}, meta = {}) {
170
+ // 1. Models from source
171
+ const models = [];
172
+ const seenModelKey = new Set();
173
+ for (const [fp, content] of Object.entries(fileContents || {})) {
174
+ for (const m of _extractModelsFromFile(fp, content)) {
175
+ const k = `${m.provider}:${m.modelId}`;
176
+ if (seenModelKey.has(k)) continue;
177
+ seenModelKey.add(k);
178
+ models.push(m);
179
+ }
180
+ }
181
+ // 2. Prompt templates
182
+ const promptTemplates = [];
183
+ for (const [fp, content] of Object.entries(fileContents || {})) {
184
+ const pt = _extractPromptFile(fp, content);
185
+ if (pt) promptTemplates.push(pt);
186
+ }
187
+ // 3. Frameworks / vector stores / embeddings from manifests
188
+ const frameworks = [];
189
+ const vectorStores = [];
190
+ const embeddings = [];
191
+ for (const c of (scan.components || [])) {
192
+ const cls = _classifyFramework(c);
193
+ if (cls === 'inference-framework') frameworks.push({ ecosystem: c.ecosystem, name: c.name, version: c.version, license: c.license || null });
194
+ else if (cls === 'vector-store') vectorStores.push({ ecosystem: c.ecosystem, name: c.name, version: c.version });
195
+ else if (cls === 'embedding-provider') embeddings.push({ ecosystem: c.ecosystem, name: c.name, version: c.version });
196
+ }
197
+ return {
198
+ aibomFormat: 'agentic-security AI-BOM',
199
+ version: '1',
200
+ cyclonedxCompatible: '1.7-ml-bom',
201
+ generatedAt: meta.startedAt || new Date().toISOString(),
202
+ models,
203
+ promptTemplates,
204
+ frameworks,
205
+ vectorStores,
206
+ embeddings,
207
+ summary: {
208
+ totalModels: models.length,
209
+ totalProviders: new Set(models.map(m => m.provider)).size,
210
+ pinnedModels: models.filter(m => m.pinned).length,
211
+ unpinnedModels: models.filter(m => !m.pinned).length,
212
+ promptTemplates: promptTemplates.length,
213
+ frameworks: frameworks.length,
214
+ vectorStores: vectorStores.length,
215
+ },
216
+ };
217
+ }
218
+
219
+ // Markdown rendering
220
+ export function aibomToMarkdown(aibom) {
221
+ const out = [];
222
+ out.push('# AI-BOM');
223
+ out.push('');
224
+ out.push(`Generated: ${aibom.generatedAt}`);
225
+ out.push('');
226
+ out.push('## Summary');
227
+ out.push('');
228
+ out.push('| Category | Count |');
229
+ out.push('|---|---|');
230
+ out.push(`| Models referenced | ${aibom.summary.totalModels} |`);
231
+ out.push(`| Distinct providers | ${aibom.summary.totalProviders} |`);
232
+ out.push(`| Pinned (revision/SHA) | ${aibom.summary.pinnedModels} |`);
233
+ out.push(`| Unpinned | ${aibom.summary.unpinnedModels} |`);
234
+ out.push(`| Prompt templates | ${aibom.summary.promptTemplates} |`);
235
+ out.push(`| Inference frameworks | ${aibom.summary.frameworks} |`);
236
+ out.push(`| Vector stores | ${aibom.summary.vectorStores} |`);
237
+ out.push('');
238
+
239
+ if (aibom.models.length) {
240
+ out.push('## Models');
241
+ out.push('');
242
+ out.push('| Provider | Model | Pinned | File:Line |');
243
+ out.push('|---|---|---|---|');
244
+ for (const m of aibom.models) {
245
+ out.push(`| ${m.provider} | ${m.modelId} | ${m.pinned ? '✅ ' + (m.revision || '').slice(0, 12) : '❌'} | ${m.file}:${m.line} |`);
246
+ }
247
+ out.push('');
248
+ }
249
+
250
+ if (aibom.promptTemplates.length) {
251
+ out.push('## Prompt templates');
252
+ out.push('');
253
+ out.push('| File | Lines | SHA-256 (16ch) |');
254
+ out.push('|---|---|---|');
255
+ for (const p of aibom.promptTemplates) {
256
+ out.push(`| ${p.file} | ${p.lines} | ${p.sha256_16} |`);
257
+ }
258
+ out.push('');
259
+ }
260
+
261
+ if (aibom.frameworks.length) {
262
+ out.push('## Inference frameworks');
263
+ out.push('');
264
+ out.push('| Ecosystem | Name | Version | License |');
265
+ out.push('|---|---|---|---|');
266
+ for (const f of aibom.frameworks) {
267
+ out.push(`| ${f.ecosystem} | ${f.name} | ${f.version} | ${f.license || '—'} |`);
268
+ }
269
+ out.push('');
270
+ }
271
+
272
+ if (aibom.vectorStores.length) {
273
+ out.push('## Vector stores');
274
+ out.push('');
275
+ out.push('| Ecosystem | Name | Version |');
276
+ out.push('|---|---|---|');
277
+ for (const v of aibom.vectorStores) {
278
+ out.push(`| ${v.ecosystem} | ${v.name} | ${v.version} |`);
279
+ }
280
+ out.push('');
281
+ }
282
+
283
+ return out.join('\n');
284
+ }
@@ -0,0 +1,96 @@
1
+ // 0.7.0 Feat-8: API inventory export — Markdown / JSON / OpenAPI 3.1.
2
+ //
3
+ // Reuses scan.routes (output of scanRoutes in engine.js). Produces a structured
4
+ // API surface map with auth status and data classifications per endpoint.
5
+
6
+ function _summarize(routes) {
7
+ const total = routes.length;
8
+ const unauth = routes.filter(r => !r.hasAuth).length;
9
+ const dataClasses = {};
10
+ for (const r of routes) for (const c of r.classifications || []) dataClasses[c] = (dataClasses[c] || 0) + 1;
11
+ return { total, authenticated: total - unauth, unauthenticated: unauth, dataClasses };
12
+ }
13
+
14
+ export function toAPIInventoryJSON(scan) {
15
+ const routes = (scan.routes || []).map(r => ({
16
+ method: r.method,
17
+ path: r.path,
18
+ file: r.file,
19
+ line: r.line,
20
+ framework: r.framework || null,
21
+ hasAuth: !!r.hasAuth,
22
+ hasFileUpload: !!r.hasFileUpload,
23
+ parameters: r.params || [],
24
+ dataClasses: r.classifications || [],
25
+ classifiedFields: r.classifiedFields || {},
26
+ }));
27
+ return { summary: _summarize(routes), routes };
28
+ }
29
+
30
+ export function toAPIInventoryMarkdown(scan) {
31
+ const inv = toAPIInventoryJSON(scan);
32
+ const lines = [];
33
+ lines.push(`# API inventory`);
34
+ lines.push('');
35
+ lines.push(`**Total endpoints:** ${inv.summary.total} **Authenticated:** ${inv.summary.authenticated} **Unauthenticated:** ${inv.summary.unauthenticated}`);
36
+ lines.push('');
37
+ if (Object.keys(inv.summary.dataClasses).length) {
38
+ lines.push(`**Data classes touched:** ${Object.entries(inv.summary.dataClasses).map(([k,v]) => `${k} (${v})`).join(', ')}`);
39
+ lines.push('');
40
+ }
41
+ lines.push('| Method | Path | Auth | Data classes | File:Line |');
42
+ lines.push('|---|---|---|---|---|');
43
+ // Sort: unauthenticated + data-class first (highest concern), then authenticated.
44
+ const sorted = [...inv.routes].sort((a, b) => {
45
+ const aRisk = (a.hasAuth ? 0 : 10) + (a.dataClasses.length ? 5 : 0);
46
+ const bRisk = (b.hasAuth ? 0 : 10) + (b.dataClasses.length ? 5 : 0);
47
+ return bRisk - aRisk;
48
+ });
49
+ for (const r of sorted) {
50
+ const auth = r.hasAuth ? '🔒' : '⚠️ none';
51
+ const dc = r.dataClasses.join(', ') || '—';
52
+ lines.push(`| \`${r.method}\` | \`${r.path}\` | ${auth} | ${dc} | \`${r.file}:${r.line}\` |`);
53
+ }
54
+ return lines.join('\n');
55
+ }
56
+
57
+ // OpenAPI 3.1 stub. We don't infer request/response schemas (would require
58
+ // runtime), but we DO emit the path inventory with security and x-data-classes
59
+ // extensions. Useful as a starting point for `swagger-codegen` or as a
60
+ // compliance artefact for security questionnaires.
61
+ export function toOpenAPI(scan, meta = {}) {
62
+ const inv = toAPIInventoryJSON(scan);
63
+ const paths = {};
64
+ for (const r of inv.routes) {
65
+ const p = r.path || '/';
66
+ paths[p] = paths[p] || {};
67
+ const method = (r.method || 'get').toLowerCase();
68
+ paths[p][method] = {
69
+ operationId: `${method}_${p.replace(/[^A-Za-z0-9]+/g, '_')}_${r.line}`,
70
+ summary: `${r.method} ${r.path}`,
71
+ tags: [r.framework || 'unknown'],
72
+ ...(r.hasAuth ? { security: [{ bearerAuth: [] }] } : {}),
73
+ ...(r.dataClasses.length ? { 'x-agentic-security-data-classes': r.dataClasses } : {}),
74
+ ...(r.hasFileUpload ? { 'x-agentic-security-file-upload': true } : {}),
75
+ 'x-source-location': `${r.file}:${r.line}`,
76
+ 'x-parameters-detected': r.parameters,
77
+ responses: { '200': { description: 'OK' } },
78
+ };
79
+ }
80
+ return {
81
+ openapi: '3.1.0',
82
+ info: {
83
+ title: 'API surface inventory (agentic-security)',
84
+ version: '1.0.0',
85
+ description: `Auto-generated API inventory. ${inv.summary.total} endpoints, ${inv.summary.unauthenticated} unauthenticated.`,
86
+ 'x-generated-at': meta.startedAt || new Date().toISOString(),
87
+ 'x-generator': 'agentic-security/0.7.0',
88
+ },
89
+ components: {
90
+ securitySchemes: {
91
+ bearerAuth: { type: 'http', scheme: 'bearer' },
92
+ },
93
+ },
94
+ paths,
95
+ };
96
+ }