rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,125 @@
1
+ /**
2
+ * CORE MODULE — Abstract Base Embedder
3
+ *
4
+ * Provides model-agnostic base functionality for all embedder implementations.
5
+ * This is an abstract base class, not a concrete implementation.
6
+ *
7
+ * ARCHITECTURAL NOTE:
8
+ * While this contains implementation logic, it remains in the core layer because:
9
+ * 1. It's model-agnostic (no knowledge of specific models or transformers.js)
10
+ * 2. It's shared by multiple implementation layers (text, multimodal)
11
+ * 3. It provides common infrastructure (lifecycle, validation, batch processing)
12
+ * 4. Moving it would create awkward cross-layer dependencies
13
+ *
14
+ * This follows the "shared base class" pattern common in framework design,
15
+ * similar to React.Component, Django Model, or other framework base classes.
16
+ *
17
+ * RESPONSIBILITIES:
18
+ * - Model lifecycle management (loading, cleanup, disposal)
19
+ * - Batch processing coordination
20
+ * - Input validation and text truncation
21
+ * - Error handling with helpful messages
22
+ * - Embedding ID generation
23
+ * - Common utility methods
24
+ *
25
+ * IMPLEMENTATION LAYERS:
26
+ * - Text: SentenceTransformerEmbedder extends this class
27
+ * - Multimodal: CLIPEmbedder extends this class
28
+ */
29
+ import type { UniversalEmbedder, ModelInfo, ModelType, EmbeddingBatchItem } from './universal-embedder.js';
30
+ import type { EmbeddingResult } from '../types.js';
31
+ /**
32
+ * Abstract base class for universal embedders
33
+ * Provides common functionality and lifecycle management
34
+ */
35
+ export declare abstract class BaseUniversalEmbedder implements UniversalEmbedder {
36
+ readonly modelName: string;
37
+ protected readonly options: EmbedderOptions;
38
+ protected _isLoaded: boolean;
39
+ protected _modelInfo: ModelInfo;
40
+ constructor(modelName: string, options?: EmbedderOptions);
41
+ get modelType(): ModelType;
42
+ get dimensions(): number;
43
+ get supportedContentTypes(): readonly string[];
44
+ isLoaded(): boolean;
45
+ getModelInfo(): ModelInfo;
46
+ /**
47
+ * Load the model - must be implemented by subclasses
48
+ */
49
+ abstract loadModel(): Promise<void>;
50
+ /**
51
+ * Embed text content - must be implemented by subclasses
52
+ */
53
+ abstract embedText(text: string): Promise<EmbeddingResult>;
54
+ /**
55
+ * Clean up resources - must be implemented by subclasses
56
+ */
57
+ abstract cleanup(): Promise<void>;
58
+ /**
59
+ * Dispose of all resources and prepare for garbage collection
60
+ * This method should be called when the embedder is no longer needed
61
+ */
62
+ dispose(): Promise<void>;
63
+ /**
64
+ * Embed image content - optional, only implemented by multimodal embedders
65
+ */
66
+ embedImage?(imagePath: string): Promise<EmbeddingResult>;
67
+ /**
68
+ * Batch embedding with default implementation
69
+ * Subclasses can override for more efficient batch processing
70
+ */
71
+ embedBatch(items: EmbeddingBatchItem[]): Promise<EmbeddingResult[]>;
72
+ /**
73
+ * Process a single batch of items
74
+ * Can be overridden by subclasses for more efficient batch processing
75
+ */
76
+ protected processBatch(batch: EmbeddingBatchItem[]): Promise<EmbeddingResult[]>;
77
+ /**
78
+ * Validate that the model is loaded before operations
79
+ */
80
+ protected ensureLoaded(): void;
81
+ /**
82
+ * Generate a unique embedding ID
83
+ */
84
+ protected generateEmbeddingId(content: string, contentType?: string): string;
85
+ /**
86
+ * Simple hash function for content identification
87
+ */
88
+ private simpleHash;
89
+ /**
90
+ * Validate text length against model constraints
91
+ */
92
+ protected validateTextLength(text: string): void;
93
+ /**
94
+ * Truncate text to model's maximum length
95
+ */
96
+ protected truncateText(text: string): string;
97
+ /**
98
+ * Log model loading progress
99
+ */
100
+ protected logModelLoading(stage: string, details?: string): void;
101
+ /**
102
+ * Handle model loading errors with helpful messages
103
+ */
104
+ protected handleLoadingError(error: Error): Error;
105
+ }
106
+ /**
107
+ * Options for configuring embedder instances
108
+ */
109
+ export interface EmbedderOptions {
110
+ cachePath?: string;
111
+ maxBatchSize?: number;
112
+ timeout?: number;
113
+ enableGPU?: boolean;
114
+ customConfig?: Record<string, any>;
115
+ logLevel?: 'debug' | 'info' | 'warn' | 'error' | 'silent';
116
+ }
117
+ /**
118
+ * Create embedder options with defaults
119
+ */
120
+ export declare function createEmbedderOptions(options?: Partial<EmbedderOptions>): EmbedderOptions;
121
+ /**
122
+ * Validate embedder options
123
+ */
124
+ export declare function validateEmbedderOptions(options: EmbedderOptions): void;
125
+ //# sourceMappingURL=abstract-embedder.d.ts.map
@@ -0,0 +1,264 @@
1
+ /**
2
+ * CORE MODULE — Abstract Base Embedder
3
+ *
4
+ * Provides model-agnostic base functionality for all embedder implementations.
5
+ * This is an abstract base class, not a concrete implementation.
6
+ *
7
+ * ARCHITECTURAL NOTE:
8
+ * While this contains implementation logic, it remains in the core layer because:
9
+ * 1. It's model-agnostic (no knowledge of specific models or transformers.js)
10
+ * 2. It's shared by multiple implementation layers (text, multimodal)
11
+ * 3. It provides common infrastructure (lifecycle, validation, batch processing)
12
+ * 4. Moving it would create awkward cross-layer dependencies
13
+ *
14
+ * This follows the "shared base class" pattern common in framework design,
15
+ * similar to React.Component, Django Model, or other framework base classes.
16
+ *
17
+ * RESPONSIBILITIES:
18
+ * - Model lifecycle management (loading, cleanup, disposal)
19
+ * - Batch processing coordination
20
+ * - Input validation and text truncation
21
+ * - Error handling with helpful messages
22
+ * - Embedding ID generation
23
+ * - Common utility methods
24
+ *
25
+ * IMPLEMENTATION LAYERS:
26
+ * - Text: SentenceTransformerEmbedder extends this class
27
+ * - Multimodal: CLIPEmbedder extends this class
28
+ */
29
+ import { ModelRegistry } from './model-registry.js';
30
+ import { validateContentType, createEnhancedEmbeddingResult } from './universal-embedder.js';
31
+ // =============================================================================
32
+ // BASE EMBEDDER ABSTRACT CLASS
33
+ // =============================================================================
34
+ /**
35
+ * Abstract base class for universal embedders
36
+ * Provides common functionality and lifecycle management
37
+ */
38
+ export class BaseUniversalEmbedder {
39
+ modelName;
40
+ options;
41
+ _isLoaded = false;
42
+ _modelInfo;
43
+ constructor(modelName, options = {}) {
44
+ this.modelName = modelName;
45
+ this.options = options;
46
+ const modelInfo = ModelRegistry.getModelInfo(modelName);
47
+ if (!modelInfo) {
48
+ throw new Error(`Model '${modelName}' is not supported. ` +
49
+ `Supported models: ${ModelRegistry.getSupportedModels().join(', ')}`);
50
+ }
51
+ this._modelInfo = modelInfo;
52
+ }
53
+ // =============================================================================
54
+ // PUBLIC INTERFACE IMPLEMENTATION
55
+ // =============================================================================
56
+ get modelType() {
57
+ return this._modelInfo.type;
58
+ }
59
+ get dimensions() {
60
+ return this._modelInfo.dimensions;
61
+ }
62
+ get supportedContentTypes() {
63
+ return this._modelInfo.supportedContentTypes;
64
+ }
65
+ isLoaded() {
66
+ return this._isLoaded;
67
+ }
68
+ getModelInfo() {
69
+ return { ...this._modelInfo }; // Return a copy to prevent mutation
70
+ }
71
+ /**
72
+ * Dispose of all resources and prepare for garbage collection
73
+ * This method should be called when the embedder is no longer needed
74
+ */
75
+ async dispose() {
76
+ try {
77
+ // Call the specific cleanup implementation
78
+ await this.cleanup();
79
+ // Clear internal state
80
+ this._isLoaded = false;
81
+ // Force garbage collection if available
82
+ if (global.gc) {
83
+ global.gc();
84
+ }
85
+ this.logModelLoading('Resources disposed and garbage collection triggered');
86
+ }
87
+ catch (error) {
88
+ console.warn(`Error during resource disposal: ${error instanceof Error ? error.message : 'Unknown error'}`);
89
+ }
90
+ }
91
+ // =============================================================================
92
+ // DEFAULT IMPLEMENTATIONS
93
+ // =============================================================================
94
+ /**
95
+ * Batch embedding with default implementation
96
+ * Subclasses can override for more efficient batch processing
97
+ */
98
+ async embedBatch(items) {
99
+ if (!this._isLoaded) {
100
+ await this.loadModel();
101
+ }
102
+ const results = [];
103
+ const batchSize = this.options.maxBatchSize || this._modelInfo.capabilities.maxBatchSize || 8;
104
+ // Process in batches to avoid memory issues
105
+ for (let i = 0; i < items.length; i += batchSize) {
106
+ const batch = items.slice(i, i + batchSize);
107
+ const batchResults = await this.processBatch(batch);
108
+ results.push(...batchResults);
109
+ }
110
+ return results;
111
+ }
112
+ // =============================================================================
113
+ // PROTECTED HELPER METHODS
114
+ // =============================================================================
115
+ /**
116
+ * Process a single batch of items
117
+ * Can be overridden by subclasses for more efficient batch processing
118
+ */
119
+ async processBatch(batch) {
120
+ const results = [];
121
+ for (const item of batch) {
122
+ try {
123
+ validateContentType(item.contentType, this.supportedContentTypes);
124
+ let result;
125
+ if (item.contentType === 'text') {
126
+ result = await this.embedText(item.content);
127
+ }
128
+ else if (item.contentType === 'image' && this.embedImage) {
129
+ result = await this.embedImage(item.content);
130
+ }
131
+ else {
132
+ throw new Error(`Content type '${item.contentType}' not supported by model '${this.modelName}'`);
133
+ }
134
+ // Enhance the result with content type and metadata
135
+ const enhancedResult = createEnhancedEmbeddingResult(result.embedding_id, result.vector, item.contentType, item.metadata);
136
+ results.push(enhancedResult);
137
+ }
138
+ catch (error) {
139
+ // Log error but continue processing other items
140
+ console.warn(`Failed to embed item: ${error instanceof Error ? error.message : 'Unknown error'}`);
141
+ // Create a placeholder result with zero vector for failed items
142
+ const zeroVector = new Float32Array(this.dimensions).fill(0);
143
+ const failedResult = createEnhancedEmbeddingResult(`failed_${Date.now()}_${Math.random()}`, zeroVector, item.contentType, { ...item.metadata, error: error instanceof Error ? error.message : 'Unknown error' });
144
+ results.push(failedResult);
145
+ }
146
+ }
147
+ return results;
148
+ }
149
+ /**
150
+ * Validate that the model is loaded before operations
151
+ */
152
+ ensureLoaded() {
153
+ if (!this._isLoaded) {
154
+ throw new Error(`Model '${this.modelName}' is not loaded. Call loadModel() first.`);
155
+ }
156
+ }
157
+ /**
158
+ * Generate a unique embedding ID
159
+ */
160
+ generateEmbeddingId(content, contentType = 'text') {
161
+ const timestamp = Date.now();
162
+ const random = Math.random().toString(36).substring(2);
163
+ const contentHash = this.simpleHash(content);
164
+ return `${contentType}_${contentHash}_${timestamp}_${random}`;
165
+ }
166
+ /**
167
+ * Simple hash function for content identification
168
+ */
169
+ simpleHash(str) {
170
+ let hash = 0;
171
+ for (let i = 0; i < str.length; i++) {
172
+ const char = str.charCodeAt(i);
173
+ hash = ((hash << 5) - hash) + char;
174
+ hash = hash & hash; // Convert to 32-bit integer
175
+ }
176
+ return Math.abs(hash).toString(36);
177
+ }
178
+ /**
179
+ * Validate text length against model constraints
180
+ */
181
+ validateTextLength(text) {
182
+ const maxLength = this._modelInfo.capabilities.maxTextLength;
183
+ if (maxLength && text.length > maxLength) {
184
+ console.warn(`Text length (${text.length}) exceeds model maximum (${maxLength}). ` +
185
+ `Text will be truncated.`);
186
+ }
187
+ }
188
+ /**
189
+ * Truncate text to model's maximum length
190
+ */
191
+ truncateText(text) {
192
+ const maxLength = this._modelInfo.capabilities.maxTextLength;
193
+ if (maxLength && text.length > maxLength) {
194
+ return text.substring(0, maxLength);
195
+ }
196
+ return text;
197
+ }
198
+ /**
199
+ * Log model loading progress
200
+ */
201
+ logModelLoading(stage, details) {
202
+ const message = `[${this.modelName}] ${stage}`;
203
+ if (details) {
204
+ console.log(`${message}: ${details}`);
205
+ }
206
+ else {
207
+ console.log(message);
208
+ }
209
+ }
210
+ /**
211
+ * Handle model loading errors with helpful messages
212
+ */
213
+ handleLoadingError(error) {
214
+ const baseMessage = `Failed to load model '${this.modelName}': ${error.message}`;
215
+ // Provide specific guidance based on error type
216
+ if (error.message.includes('network') || error.message.includes('fetch')) {
217
+ return new Error(`${baseMessage}\n` +
218
+ `This appears to be a network error. Please check your internet connection ` +
219
+ `and ensure the model repository is accessible.`);
220
+ }
221
+ if (error.message.includes('memory') || error.message.includes('OOM')) {
222
+ return new Error(`${baseMessage}\n` +
223
+ `This appears to be a memory error. Try using a smaller model or ` +
224
+ `increase available memory. Required: ${this._modelInfo.requirements.minimumMemory}MB`);
225
+ }
226
+ if (error.message.includes('unsupported') || error.message.includes('not found')) {
227
+ const suggestions = ModelRegistry.getSupportedModels(this.modelType);
228
+ return new Error(`${baseMessage}\n` +
229
+ `Model may not be available. Supported ${this.modelType} models: ${suggestions.join(', ')}`);
230
+ }
231
+ return new Error(baseMessage);
232
+ }
233
+ }
234
+ // =============================================================================
235
+ // UTILITY FUNCTIONS
236
+ // =============================================================================
237
+ /**
238
+ * Create embedder options with defaults
239
+ */
240
+ export function createEmbedderOptions(options = {}) {
241
+ return {
242
+ maxBatchSize: 8,
243
+ timeout: 30000, // 30 seconds
244
+ enableGPU: false,
245
+ logLevel: 'info',
246
+ ...options
247
+ };
248
+ }
249
+ /**
250
+ * Validate embedder options
251
+ */
252
+ export function validateEmbedderOptions(options) {
253
+ if (options.maxBatchSize && (options.maxBatchSize < 1 || options.maxBatchSize > 128)) {
254
+ throw new Error('maxBatchSize must be between 1 and 128');
255
+ }
256
+ if (options.timeout && options.timeout < 1000) {
257
+ throw new Error('timeout must be at least 1000ms');
258
+ }
259
+ const validLogLevels = ['debug', 'info', 'warn', 'error', 'silent'];
260
+ if (options.logLevel && !validLogLevels.includes(options.logLevel)) {
261
+ throw new Error(`logLevel must be one of: ${validLogLevels.join(', ')}`);
262
+ }
263
+ }
264
+ //# sourceMappingURL=abstract-embedder.js.map
@@ -0,0 +1,60 @@
1
+ /**
2
+ * CORE MODULE — Actionable Error Messages
3
+ * Provides user-friendly, actionable error messages with specific guidance
4
+ * Replaces technical error messages with helpful troubleshooting steps
5
+ */
6
+ /**
7
+ * Configuration for actionable error messages
8
+ */
9
+ export interface ActionableErrorConfig {
10
+ /** Include examples in error messages */
11
+ includeExamples?: boolean;
12
+ /** Include troubleshooting steps */
13
+ includeTroubleshooting?: boolean;
14
+ /** Context about the current operation */
15
+ operationContext?: string;
16
+ }
17
+ /**
18
+ * Create actionable error message for missing files
19
+ */
20
+ export declare function createMissingFileError(filePath: string, fileType: 'index' | 'database' | 'config' | 'content', config?: ActionableErrorConfig): Error;
21
+ /**
22
+ * Create actionable error message for invalid paths
23
+ */
24
+ export declare function createInvalidPathError(paths: {
25
+ name: string;
26
+ value: string | undefined;
27
+ }[], config?: ActionableErrorConfig): Error;
28
+ /**
29
+ * Create actionable error message for model loading failures
30
+ */
31
+ export declare function createModelLoadingError(modelName: string, originalError: string, config?: ActionableErrorConfig): Error;
32
+ /**
33
+ * Create actionable error message for dimension mismatches
34
+ */
35
+ export declare function createDimensionMismatchError(expected: number, actual: number, context: string, config?: ActionableErrorConfig): Error;
36
+ /**
37
+ * Create actionable error message for mode mismatches
38
+ */
39
+ export declare function createModeMismatchError(expectedMode: string, actualMode: string, config?: ActionableErrorConfig): Error;
40
+ /**
41
+ * Create actionable error message for empty or invalid content
42
+ */
43
+ export declare function createInvalidContentError(contentType: string, issue: 'empty' | 'invalid_format' | 'too_large' | 'unsupported', config?: ActionableErrorConfig): Error;
44
+ /**
45
+ * Create actionable error message for missing dependencies
46
+ */
47
+ export declare function createMissingDependencyError(dependencyName: string, dependencyType: 'function' | 'object' | 'service', config?: ActionableErrorConfig): Error;
48
+ /**
49
+ * Create actionable error message for factory creation failures
50
+ */
51
+ export declare function createFactoryCreationError(factoryName: string, originalError: string, config?: ActionableErrorConfig): Error;
52
+ /**
53
+ * Enhance an existing error with actionable information
54
+ */
55
+ export declare function enhanceError(originalError: Error, context: string, suggestions?: string[], config?: ActionableErrorConfig): Error;
56
+ /**
57
+ * Create a user-friendly error message with context
58
+ */
59
+ export declare function createContextualError(message: string, context: string, suggestions?: string[], examples?: string[], config?: ActionableErrorConfig): Error;
60
+ //# sourceMappingURL=actionable-error-messages.d.ts.map