rag-lite-ts 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. package/dist/{core → cjs/core}/model-validator.js +1 -1
  2. package/dist/{core → cjs/core}/vector-index.js +4 -2
  3. package/dist/esm/api-errors.d.ts +90 -0
  4. package/dist/esm/api-errors.js +320 -0
  5. package/dist/esm/cli/indexer.d.ts +11 -0
  6. package/dist/esm/cli/indexer.js +471 -0
  7. package/dist/esm/cli/search.d.ts +7 -0
  8. package/dist/esm/cli/search.js +332 -0
  9. package/dist/esm/cli.d.ts +3 -0
  10. package/dist/esm/cli.js +529 -0
  11. package/dist/esm/config.d.ts +51 -0
  12. package/dist/esm/config.js +79 -0
  13. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  14. package/dist/esm/core/abstract-embedder.js +264 -0
  15. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  16. package/dist/esm/core/actionable-error-messages.js +397 -0
  17. package/dist/esm/core/adapters.d.ts +93 -0
  18. package/dist/esm/core/adapters.js +139 -0
  19. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  20. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  21. package/dist/esm/core/binary-index-format.d.ts +78 -0
  22. package/dist/esm/core/binary-index-format.js +291 -0
  23. package/dist/esm/core/chunker.d.ts +119 -0
  24. package/dist/esm/core/chunker.js +73 -0
  25. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  26. package/dist/esm/core/cli-database-utils.js +239 -0
  27. package/dist/esm/core/config.d.ts +102 -0
  28. package/dist/esm/core/config.js +247 -0
  29. package/dist/esm/core/content-errors.d.ts +111 -0
  30. package/dist/esm/core/content-errors.js +362 -0
  31. package/dist/esm/core/content-manager.d.ts +335 -0
  32. package/dist/esm/core/content-manager.js +1476 -0
  33. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  34. package/dist/esm/core/content-performance-optimizer.js +516 -0
  35. package/dist/esm/core/content-resolver.d.ts +104 -0
  36. package/dist/esm/core/content-resolver.js +285 -0
  37. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  38. package/dist/esm/core/cross-modal-search.js +342 -0
  39. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  40. package/dist/esm/core/database-connection-manager.js +310 -0
  41. package/dist/esm/core/db.d.ts +213 -0
  42. package/dist/esm/core/db.js +895 -0
  43. package/dist/esm/core/embedder-factory.d.ts +154 -0
  44. package/dist/esm/core/embedder-factory.js +311 -0
  45. package/dist/esm/core/error-handler.d.ts +112 -0
  46. package/dist/esm/core/error-handler.js +239 -0
  47. package/dist/esm/core/index.d.ts +59 -0
  48. package/dist/esm/core/index.js +69 -0
  49. package/dist/esm/core/ingestion.d.ts +202 -0
  50. package/dist/esm/core/ingestion.js +901 -0
  51. package/dist/esm/core/interfaces.d.ts +408 -0
  52. package/dist/esm/core/interfaces.js +106 -0
  53. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  54. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  55. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  56. package/dist/esm/core/mode-detection-service.js +565 -0
  57. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  58. package/dist/esm/core/mode-model-validator.js +203 -0
  59. package/dist/esm/core/model-registry.d.ts +116 -0
  60. package/dist/esm/core/model-registry.js +411 -0
  61. package/dist/esm/core/model-validator.d.ts +217 -0
  62. package/dist/esm/core/model-validator.js +782 -0
  63. package/dist/esm/core/path-manager.d.ts +47 -0
  64. package/dist/esm/core/path-manager.js +71 -0
  65. package/dist/esm/core/raglite-paths.d.ts +121 -0
  66. package/dist/esm/core/raglite-paths.js +145 -0
  67. package/dist/esm/core/reranking-config.d.ts +42 -0
  68. package/dist/esm/core/reranking-config.js +147 -0
  69. package/dist/esm/core/reranking-factory.d.ts +92 -0
  70. package/dist/esm/core/reranking-factory.js +410 -0
  71. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  72. package/dist/esm/core/reranking-strategies.js +650 -0
  73. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  74. package/dist/esm/core/resource-cleanup.js +371 -0
  75. package/dist/esm/core/resource-manager.d.ts +212 -0
  76. package/dist/esm/core/resource-manager.js +564 -0
  77. package/dist/esm/core/search-pipeline.d.ts +111 -0
  78. package/dist/esm/core/search-pipeline.js +287 -0
  79. package/dist/esm/core/search.d.ts +141 -0
  80. package/dist/esm/core/search.js +320 -0
  81. package/dist/esm/core/streaming-operations.d.ts +145 -0
  82. package/dist/esm/core/streaming-operations.js +409 -0
  83. package/dist/esm/core/types.d.ts +66 -0
  84. package/dist/esm/core/types.js +6 -0
  85. package/dist/esm/core/universal-embedder.d.ts +177 -0
  86. package/dist/esm/core/universal-embedder.js +139 -0
  87. package/dist/esm/core/validation-messages.d.ts +99 -0
  88. package/dist/esm/core/validation-messages.js +334 -0
  89. package/dist/esm/core/vector-index.d.ts +72 -0
  90. package/dist/esm/core/vector-index.js +333 -0
  91. package/dist/esm/dom-polyfills.d.ts +6 -0
  92. package/dist/esm/dom-polyfills.js +37 -0
  93. package/dist/esm/factories/index.d.ts +27 -0
  94. package/dist/esm/factories/index.js +29 -0
  95. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  96. package/dist/esm/factories/ingestion-factory.js +477 -0
  97. package/dist/esm/factories/search-factory.d.ts +154 -0
  98. package/dist/esm/factories/search-factory.js +344 -0
  99. package/dist/esm/file-processor.d.ts +147 -0
  100. package/dist/esm/file-processor.js +963 -0
  101. package/dist/esm/index-manager.d.ts +116 -0
  102. package/dist/esm/index-manager.js +598 -0
  103. package/dist/esm/index.d.ts +75 -0
  104. package/dist/esm/index.js +110 -0
  105. package/dist/esm/indexer.d.ts +7 -0
  106. package/dist/esm/indexer.js +54 -0
  107. package/dist/esm/ingestion.d.ts +63 -0
  108. package/dist/esm/ingestion.js +124 -0
  109. package/dist/esm/mcp-server.d.ts +46 -0
  110. package/dist/esm/mcp-server.js +1820 -0
  111. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  112. package/dist/esm/multimodal/clip-embedder.js +996 -0
  113. package/dist/esm/multimodal/index.d.ts +6 -0
  114. package/dist/esm/multimodal/index.js +6 -0
  115. package/dist/esm/preprocess.d.ts +19 -0
  116. package/dist/esm/preprocess.js +203 -0
  117. package/dist/esm/preprocessors/index.d.ts +17 -0
  118. package/dist/esm/preprocessors/index.js +38 -0
  119. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  120. package/dist/esm/preprocessors/mdx.js +101 -0
  121. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  122. package/dist/esm/preprocessors/mermaid.js +329 -0
  123. package/dist/esm/preprocessors/registry.d.ts +56 -0
  124. package/dist/esm/preprocessors/registry.js +179 -0
  125. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  126. package/dist/esm/run-error-recovery-tests.js +101 -0
  127. package/dist/esm/search-standalone.d.ts +7 -0
  128. package/dist/esm/search-standalone.js +117 -0
  129. package/dist/esm/search.d.ts +99 -0
  130. package/dist/esm/search.js +177 -0
  131. package/dist/esm/test-utils.d.ts +18 -0
  132. package/dist/esm/test-utils.js +27 -0
  133. package/dist/esm/text/chunker.d.ts +33 -0
  134. package/dist/esm/text/chunker.js +279 -0
  135. package/dist/esm/text/embedder.d.ts +111 -0
  136. package/dist/esm/text/embedder.js +386 -0
  137. package/dist/esm/text/index.d.ts +8 -0
  138. package/dist/esm/text/index.js +9 -0
  139. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  140. package/dist/esm/text/preprocessors/index.js +38 -0
  141. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  142. package/dist/esm/text/preprocessors/mdx.js +101 -0
  143. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  144. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  145. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  146. package/dist/esm/text/preprocessors/registry.js +180 -0
  147. package/dist/esm/text/reranker.d.ts +49 -0
  148. package/dist/esm/text/reranker.js +274 -0
  149. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  150. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  151. package/dist/esm/text/tokenizer.d.ts +22 -0
  152. package/dist/esm/text/tokenizer.js +64 -0
  153. package/dist/esm/types.d.ts +83 -0
  154. package/dist/esm/types.js +3 -0
  155. package/dist/esm/utils/vector-math.d.ts +31 -0
  156. package/dist/esm/utils/vector-math.js +70 -0
  157. package/package.json +30 -12
  158. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  159. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  160. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  161. /package/dist/{cli → cjs/cli}/indexer.js +0 -0
  162. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  163. /package/dist/{cli → cjs/cli}/search.js +0 -0
  164. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  165. /package/dist/{cli.js → cjs/cli.js} +0 -0
  166. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  167. /package/dist/{config.js → cjs/config.js} +0 -0
  168. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  169. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  170. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  171. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  172. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  173. /package/dist/{core → cjs/core}/adapters.js +0 -0
  174. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  175. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  176. /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
  177. /package/dist/{core → cjs/core}/binary-index-format.js +0 -0
  178. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  179. /package/dist/{core → cjs/core}/chunker.js +0 -0
  180. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  181. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  182. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/config.js +0 -0
  184. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  186. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  188. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  192. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  194. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  196. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/db.js +0 -0
  198. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  200. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  202. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/index.js +0 -0
  204. /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/ingestion.js +0 -0
  206. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  208. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  210. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  212. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  214. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  216. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  218. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  219. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  220. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  221. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  222. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  223. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  224. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  225. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  226. /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
  227. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  229. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  231. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  233. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/search.js +0 -0
  235. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  237. /package/dist/{core → cjs/core}/types.d.ts +0 -0
  238. /package/dist/{core → cjs/core}/types.js +0 -0
  239. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  240. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  241. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  242. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  243. /package/dist/{core → cjs/core}/vector-index.d.ts +0 -0
  244. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  245. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  246. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  247. /package/dist/{factories → cjs/factories}/index.js +0 -0
  248. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  249. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  250. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  251. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  252. /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
  253. /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
  254. /package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +0 -0
  255. /package/dist/{index-manager.js → cjs/index-manager.js} +0 -0
  256. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  257. /package/dist/{index.js → cjs/index.js} +0 -0
  258. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  259. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  260. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  261. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  262. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  263. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  264. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  265. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  268. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  269. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  270. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  278. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  279. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  280. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  281. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  282. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  283. /package/dist/{search.js → cjs/search.js} +0 -0
  284. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  285. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  286. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  287. /package/dist/{text → cjs/text}/chunker.js +0 -0
  288. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  289. /package/dist/{text → cjs/text}/embedder.js +0 -0
  290. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  291. /package/dist/{text → cjs/text}/index.js +0 -0
  292. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  300. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  301. /package/dist/{text → cjs/text}/reranker.js +0 -0
  302. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  304. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  306. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  307. /package/dist/{types.js → cjs/types.js} +0 -0
  308. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,327 @@
1
+ /**
2
+ * MULTIMODAL IMPLEMENTATION — CLIP Embedder Implementation
3
+ *
4
+ * Implements UniversalEmbedder interface for CLIP models with full multimodal support.
5
+ * Provides reliable text and image embedding using CLIPTextModelWithProjection and
6
+ * CLIPVisionModelWithProjection for true cross-modal search capabilities.
7
+ *
8
+ * Features:
9
+ * - Text embedding using CLIP text encoder (512-dimensional vectors)
10
+ * - Image embedding using CLIP vision encoder (512-dimensional vectors)
11
+ * - Unified embedding space enabling cross-modal similarity search
12
+ * - Text queries can find semantically similar images
13
+ * - Image queries can find semantically similar text
14
+ * - Batch processing optimization for both text and images
15
+ *
16
+ * Supported Models:
17
+ * - Xenova/clip-vit-base-patch32 (recommended, faster)
18
+ * - Xenova/clip-vit-base-patch16 (higher accuracy, slower)
19
+ */
20
+ import { BaseUniversalEmbedder, type EmbedderOptions } from '../core/abstract-embedder.js';
21
+ import type { EmbeddingResult } from '../types.js';
22
+ /**
23
+ * CLIP embedder implementation for multimodal content
24
+ *
25
+ * Provides reliable text and image embedding using separate CLIP model components:
26
+ * - CLIPTextModelWithProjection for text-only embedding (no pixel_values errors)
27
+ * - CLIPVisionModelWithProjection for image embedding
28
+ * - AutoTokenizer for proper text tokenization with CLIP's 77 token limit
29
+ *
30
+ * All embeddings are 512-dimensional vectors in a unified embedding space,
31
+ * enabling true cross-modal search where text queries can find images and
32
+ * image queries can find text based on semantic similarity.
33
+ *
34
+ * Example Usage:
35
+ * ```typescript
36
+ * const embedder = await createEmbedder('Xenova/clip-vit-base-patch32');
37
+ *
38
+ * // Embed text
39
+ * const textResult = await embedder.embedText('a red sports car');
40
+ *
41
+ * // Embed image
42
+ * const imageResult = await embedder.embedImage('./car.jpg');
43
+ *
44
+ * // Calculate cross-modal similarity
45
+ * const similarity = cosineSimilarity(textResult.vector, imageResult.vector);
46
+ * ```
47
+ */
48
+ export declare class CLIPEmbedder extends BaseUniversalEmbedder {
49
+ private tokenizer;
50
+ private textModel;
51
+ private imageModel;
52
+ private resourceManager;
53
+ private embedderResourceId?;
54
+ private tokenizerResourceId?;
55
+ private textModelResourceId?;
56
+ private imageModelResourceId?;
57
+ constructor(modelName: string, options?: EmbedderOptions);
58
+ /**
59
+ * Load the CLIP model components
60
+ *
61
+ * Loads three separate components for reliable multimodal embedding:
62
+ * 1. AutoTokenizer - Handles text tokenization with CLIP's 77 token limit
63
+ * 2. CLIPTextModelWithProjection - Generates text embeddings without pixel_values errors
64
+ * 3. CLIPVisionModelWithProjection - Generates image embeddings
65
+ *
66
+ * All components are registered with the resource manager for proper cleanup.
67
+ * Models are cached locally after first download for faster subsequent loads.
68
+ *
69
+ * @throws {Error} If model loading fails or components are not available
70
+ */
71
+ loadModel(): Promise<void>;
72
+ /**
73
+ * Clean up model resources with comprehensive disposal
74
+ *
75
+ * Properly disposes of all CLIP model components:
76
+ * - Tokenizer resources
77
+ * - Text model resources
78
+ * - Vision model resources
79
+ *
80
+ * Uses the resource manager for coordinated cleanup and forces garbage
81
+ * collection to free memory from CLIP models which can be memory intensive.
82
+ *
83
+ * This method is safe to call multiple times and will not throw errors
84
+ * during cleanup - errors are logged but don't prevent cleanup completion.
85
+ */
86
+ cleanup(): Promise<void>;
87
+ /**
88
+ * Apply L2-normalization to an embedding vector
89
+ *
90
+ * L2-normalization ensures that all embeddings have unit length (magnitude = 1),
91
+ * which is essential for CLIP models as they were trained with normalized embeddings.
92
+ * This normalization makes cosine similarity calculations more reliable and ensures
93
+ * that vector magnitudes don't affect similarity scores.
94
+ *
95
+ * @param embedding - The embedding vector to normalize (modified in-place)
96
+ * @returns The normalized embedding vector (same reference as input)
97
+ * @private
98
+ */
99
+ private normalizeEmbedding;
100
+ /**
101
+ * Embed text using CLIP text encoder
102
+ *
103
+ * Uses CLIPTextModelWithProjection for reliable text-only embedding without
104
+ * pixel_values errors. Text is tokenized with CLIP's 77 token limit and
105
+ * automatically truncated if necessary.
106
+ *
107
+ * Returns a 512-dimensional L2-normalized embedding vector in the unified CLIP
108
+ * embedding space, which is directly comparable to image embeddings for cross-modal search.
109
+ *
110
+ * @param text - The text to embed (will be trimmed and validated)
111
+ * @returns EmbeddingResult with 512-dimensional normalized vector and metadata
112
+ * @throws {Error} If text is empty, model not loaded, or embedding fails
113
+ *
114
+ * @example
115
+ * ```typescript
116
+ * const result = await embedder.embedText('a red sports car');
117
+ * console.log(result.vector.length); // 512
118
+ * console.log(result.contentType); // 'text'
119
+ * ```
120
+ */
121
+ embedText(text: string): Promise<EmbeddingResult>;
122
+ /**
123
+ * Embed image using CLIP vision encoder
124
+ *
125
+ * Uses CLIPVisionModelWithProjection to generate image embeddings in the same
126
+ * unified embedding space as text embeddings, enabling true cross-modal search.
127
+ *
128
+ * Supports both local file paths and URLs. Images are automatically preprocessed:
129
+ * - Resized to 224x224 pixels (CLIP's expected input size)
130
+ * - Converted to proper pixel_values format using AutoProcessor
131
+ * - Normalized for CLIP vision model
132
+ *
133
+ * Returns a 512-dimensional L2-normalized embedding vector directly comparable to text embeddings.
134
+ *
135
+ * @param imagePath - Local file path or URL to the image
136
+ * @returns EmbeddingResult with 512-dimensional normalized vector and metadata
137
+ * @throws {Error} If image not found, unsupported format, or embedding fails
138
+ *
139
+ * @example
140
+ * ```typescript
141
+ * // Local file
142
+ * const result = await embedder.embedImage('./car.jpg');
143
+ *
144
+ * // URL
145
+ * const result = await embedder.embedImage('https://example.com/car.jpg');
146
+ *
147
+ * console.log(result.vector.length); // 512
148
+ * console.log(result.contentType); // 'image'
149
+ * ```
150
+ *
151
+ * Supported formats: PNG, JPEG, GIF, BMP, WebP
152
+ */
153
+ embedImage(imagePath: string): Promise<EmbeddingResult>;
154
+ /**
155
+ * Load and preprocess image for CLIP vision model
156
+ *
157
+ * Handles image loading from both local files and URLs with automatic format
158
+ * detection and preprocessing. Uses Sharp library when available for better
159
+ * Node.js support, falls back to RawImage for browser compatibility.
160
+ *
161
+ * Preprocessing steps:
162
+ * 1. Load image from path or URL
163
+ * 2. Resize to 224x224 pixels (CLIP's expected input size)
164
+ * 3. Convert to RGB format if needed
165
+ * 4. Return RawImage object for AutoProcessor
166
+ *
167
+ * @param imagePath - Local file path or URL to the image
168
+ * @returns RawImage object ready for AutoProcessor
169
+ * @throws {Error} If image loading or preprocessing fails
170
+ * @private
171
+ */
172
+ private loadAndPreprocessImage;
173
+ /**
174
+ * Optimized batch processing for CLIP models
175
+ *
176
+ * Processes mixed batches of text and image content efficiently using the
177
+ * BatchProcessingOptimizer for memory management and progress tracking.
178
+ *
179
+ * Features:
180
+ * - Automatic separation of text and image items
181
+ * - Memory-efficient processing for large batches
182
+ * - Progress reporting for batches > 20 items
183
+ * - Garbage collection between batches
184
+ * - Detailed statistics logging
185
+ *
186
+ * @param batch - Array of items with content, contentType, and optional metadata
187
+ * @returns Array of EmbeddingResult objects in the same order as input
188
+ * @throws {Error} If batch processing fails
189
+ * @protected
190
+ */
191
+ protected processBatch(batch: Array<{
192
+ content: string;
193
+ contentType: string;
194
+ metadata?: Record<string, any>;
195
+ }>): Promise<EmbeddingResult[]>;
196
+ /**
197
+ * Process batch of text items using CLIPTextModelWithProjection
198
+ *
199
+ * Efficiently processes multiple text items by tokenizing all texts first,
200
+ * then generating embeddings sequentially. This approach balances memory
201
+ * usage with processing speed.
202
+ *
203
+ * @param textItems - Array of text items to process
204
+ * @returns Array of EmbeddingResult objects
205
+ * @throws {Error} If batch processing fails or dimension mismatch occurs
206
+ * @private
207
+ */
208
+ private processBatchText;
209
+ /**
210
+ * Get comprehensive model information including CLIP-specific capabilities
211
+ *
212
+ * Extends base model info with CLIP-specific capabilities including multimodal
213
+ * support, zero-shot classification, and cross-modal retrieval features.
214
+ *
215
+ * @returns Object with model information and capabilities
216
+ */
217
+ getModelInfo(): {
218
+ capabilities: {
219
+ supportsMultimodal: boolean;
220
+ supportsZeroShotClassification: boolean;
221
+ supportsImageTextSimilarity: boolean;
222
+ supportsTextImageRetrieval: boolean;
223
+ recommendedUseCase: string;
224
+ imageEmbeddingStatus: string;
225
+ supportsText: boolean;
226
+ supportsImages: boolean;
227
+ supportsBatchProcessing: boolean;
228
+ supportsMetadata: boolean;
229
+ maxBatchSize?: number;
230
+ maxTextLength?: number;
231
+ supportedImageFormats?: readonly string[];
232
+ supportsCrossModalSearch?: boolean;
233
+ unifiedEmbeddingSpace?: boolean;
234
+ reliableImplementation?: boolean;
235
+ };
236
+ name: string;
237
+ type: import("../core/universal-embedder.js").ModelType;
238
+ dimensions: number;
239
+ version: string;
240
+ supportedContentTypes: readonly string[];
241
+ requirements: import("../types.js").ModelRequirements;
242
+ };
243
+ /**
244
+ * Check if the model is suitable for a specific task
245
+ *
246
+ * CLIP models excel at similarity, classification, retrieval, and multimodal
247
+ * tasks due to their unified embedding space and zero-shot capabilities.
248
+ *
249
+ * @param task - The task type to check
250
+ * @returns true if CLIP is suitable for the task, false otherwise
251
+ */
252
+ isSuitableForTask(task: 'similarity' | 'classification' | 'clustering' | 'retrieval' | 'multimodal'): boolean;
253
+ /**
254
+ * Get information about multimodal capabilities
255
+ *
256
+ * Returns detailed information about what content types are supported and
257
+ * what features are planned for future implementation.
258
+ *
259
+ * @returns Object describing multimodal support status
260
+ */
261
+ getMultimodalCapabilities(): {
262
+ textSupport: boolean;
263
+ imageSupport: boolean;
264
+ videoSupport: boolean;
265
+ audioSupport: boolean;
266
+ plannedFeatures: string[];
267
+ };
268
+ /**
269
+ * Get CLIP model variant information
270
+ *
271
+ * Extracts architecture details from the model name to provide variant-specific
272
+ * configuration parameters like patch size, image size, and text length limits.
273
+ *
274
+ * @returns Object with architecture details
275
+ */
276
+ getModelVariant(): {
277
+ architecture: string;
278
+ patchSize: number;
279
+ imageSize: number;
280
+ textMaxLength: number;
281
+ };
282
+ /**
283
+ * Check if text length is within CLIP's token limit
284
+ *
285
+ * Estimates token count based on character length (rough approximation of
286
+ * ~4 characters per token for English text). CLIP has a hard limit of 77 tokens.
287
+ *
288
+ * @param text - Text to validate
289
+ * @returns true if text is within token limit, false otherwise
290
+ */
291
+ isTextLengthValid(text: string): boolean;
292
+ /**
293
+ * Get performance characteristics for this CLIP variant
294
+ *
295
+ * Provides guidance on speed, accuracy, memory usage, and recommended batch
296
+ * sizes based on the CLIP model variant (patch32 vs patch16).
297
+ *
298
+ * @returns Object with performance characteristics
299
+ */
300
+ getPerformanceInfo(): {
301
+ speed: 'fast' | 'medium' | 'slow';
302
+ accuracy: 'good' | 'better' | 'best';
303
+ memoryUsage: 'low' | 'medium' | 'high';
304
+ recommendedBatchSize: number;
305
+ };
306
+ /**
307
+ * Check if all CLIP model components are loaded
308
+ *
309
+ * Verifies that tokenizer, text model, and vision model are all loaded and
310
+ * ready for use. All three components must be available for the embedder
311
+ * to be considered fully loaded.
312
+ *
313
+ * @returns true if all components are loaded, false otherwise
314
+ */
315
+ isLoaded(): boolean;
316
+ /**
317
+ * Validate that this is a supported CLIP model
318
+ *
319
+ * Checks the model name against the list of supported CLIP models. Currently
320
+ * supports Xenova/clip-vit-base-patch32 and Xenova/clip-vit-base-patch16.
321
+ *
322
+ * @throws {Error} If model is not in the supported list
323
+ * @private
324
+ */
325
+ private validateCLIPModel;
326
+ }
327
+ //# sourceMappingURL=clip-embedder.d.ts.map