rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -188,6 +188,15 @@ async function discoverFilesRecursive(dirPath, options) {
188
188
  // Check file size based on content type
189
189
  const stats = await fs.stat(fullPath);
190
190
  const contentType = getContentType(fullPath);
191
+ // Filter by mode: skip incompatible content types
192
+ const mode = options.mode || 'text';
193
+ if (mode === 'text' && contentType === 'image') {
194
+ result.skipped.push({
195
+ path: fullPath,
196
+ reason: `Image files not supported in text mode. Use --mode multimodal for image processing.`
197
+ });
198
+ continue;
199
+ }
191
200
  // Different size limits for different content types
192
201
  const maxSize = contentType === 'image'
193
202
  ? 50 * 1024 * 1024 // 50MB for images
@@ -250,6 +259,17 @@ export async function discoverFiles(path, options = DEFAULT_FILE_PROCESSOR_OPTIO
250
259
  };
251
260
  }
252
261
  const contentType = getContentType(resolvedPath);
262
+ // Filter by mode: skip incompatible content types
263
+ const mode = options.mode || 'text';
264
+ if (mode === 'text' && contentType === 'image') {
265
+ return {
266
+ files: [],
267
+ skipped: [{
268
+ path: resolvedPath,
269
+ reason: `Image files not supported in text mode. Use --mode multimodal for image processing.`
270
+ }]
271
+ };
272
+ }
253
273
  // Check file size based on content type
254
274
  const maxSize = contentType === 'image'
255
275
  ? 50 * 1024 * 1024 // 50MB for images
@@ -7,12 +7,16 @@ export interface IndexStats {
7
7
  export declare class IndexManager {
8
8
  private modelName?;
9
9
  private vectorIndex;
10
+ private textIndex?;
11
+ private imageIndex?;
10
12
  private db;
11
13
  private indexPath;
12
14
  private dbPath;
13
15
  private isInitialized;
14
16
  private hashToEmbeddingId;
15
17
  private embeddingIdToHash;
18
+ private groupedEmbeddings?;
19
+ private vectorIndexOptions;
16
20
  constructor(indexPath: string, dbPath: string, dimensions: number, modelName?: string | undefined);
17
21
  /**
18
22
  * Initialize the index manager and load existing index if available
@@ -30,6 +34,10 @@ export declare class IndexManager {
30
34
  * Requirements: 5.3 - When new documents are added THEN system SHALL append new chunks and vectors without rebuilding existing index
31
35
  */
32
36
  addVectors(embeddings: EmbeddingResult[]): Promise<void>;
37
+ /**
38
+ * Add grouped embeddings by content type (for new grouped format)
39
+ */
40
+ addGroupedEmbeddings(textEmbeddings: EmbeddingResult[], imageEmbeddings: EmbeddingResult[]): Promise<void>;
33
41
  /**
34
42
  * Rebuild the entire index from scratch
35
43
  * Requirements: 5.2, 5.4 - Create full index rebuild functionality for model changes or document deletions
@@ -68,10 +76,18 @@ export declare class IndexManager {
68
76
  * Save the vector index to disk
69
77
  */
70
78
  saveIndex(): Promise<void>;
79
+ /**
80
+ * Create specialized indexes for text and image content when grouped data is available
81
+ */
82
+ private createSpecializedIndexes;
83
+ /**
84
+ * Save index with content type grouping (for new grouped format)
85
+ */
86
+ saveGroupedIndex(textEmbeddings: EmbeddingResult[], imageEmbeddings: EmbeddingResult[]): Promise<void>;
71
87
  /**
72
88
  * Search for similar vectors
73
89
  */
74
- search(queryVector: Float32Array, k?: number): {
90
+ search(queryVector: Float32Array, k?: number, contentType?: 'text' | 'image' | 'combined'): {
75
91
  embeddingIds: string[];
76
92
  distances: number[];
77
93
  };
@@ -1,26 +1,33 @@
1
1
  import { VectorIndex } from './core/vector-index.js';
2
+ import { BinaryIndexFormat } from './core/binary-index-format.js';
2
3
  import { openDatabase, getSystemInfo, setSystemInfo } from './core/db.js';
3
4
  import { config, getModelDefaults } from './core/config.js';
4
5
  export class IndexManager {
5
6
  modelName;
6
7
  vectorIndex;
8
+ textIndex;
9
+ imageIndex;
7
10
  db = null;
8
11
  indexPath;
9
12
  dbPath;
10
13
  isInitialized = false;
11
14
  hashToEmbeddingId = new Map();
12
15
  embeddingIdToHash = new Map();
16
+ groupedEmbeddings;
17
+ vectorIndexOptions;
13
18
  constructor(indexPath, dbPath, dimensions, modelName) {
14
19
  this.modelName = modelName;
15
20
  this.indexPath = indexPath;
16
21
  this.dbPath = dbPath;
17
- // Initialize with provided dimensions from config
18
- this.vectorIndex = new VectorIndex(indexPath, {
22
+ // Store options for creating specialized indexes
23
+ this.vectorIndexOptions = {
19
24
  dimensions: dimensions,
20
25
  maxElements: 100000, // Start with 100k capacity
21
26
  efConstruction: 200,
22
27
  M: 16
23
- });
28
+ };
29
+ // Initialize with provided dimensions from config
30
+ this.vectorIndex = new VectorIndex(indexPath, this.vectorIndexOptions);
24
31
  }
25
32
  /**
26
33
  * Initialize the index manager and load existing index if available
@@ -47,6 +54,8 @@ export class IndexManager {
47
54
  // Only try to load existing index if not forcing recreation
48
55
  console.log('Loading existing vector index...');
49
56
  await this.vectorIndex.loadIndex();
57
+ // Check if the loaded index has grouped data and create specialized indexes
58
+ await this.createSpecializedIndexes();
50
59
  }
51
60
  // Always populate the embedding ID mapping from existing database entries
52
61
  // This is needed both for new and existing indexes
@@ -55,7 +64,8 @@ export class IndexManager {
55
64
  this.hashEmbeddingId(chunk.embedding_id); // This will populate the mapping
56
65
  }
57
66
  this.isInitialized = true;
58
- console.log(`Index manager initialized with ${this.vectorIndex.getCurrentCount()} vectors`);
67
+ const vectorCount = this.vectorIndex.getCurrentCount();
68
+ console.log(`Index manager initialized with ${vectorCount} vectors${this.textIndex && this.imageIndex ? ' (multi-graph mode)' : ''}`);
59
69
  }
60
70
  catch (error) {
61
71
  throw new Error(`Failed to initialize index manager: ${error}`);
@@ -153,6 +163,31 @@ export class IndexManager {
153
163
  throw new Error(`Failed to add vectors to index: ${error instanceof Error ? error.message : 'Unknown error'}`);
154
164
  }
155
165
  }
166
+ /**
167
+ * Add grouped embeddings by content type (for new grouped format)
168
+ */
169
+ async addGroupedEmbeddings(textEmbeddings, imageEmbeddings) {
170
+ if (!this.isInitialized) {
171
+ throw new Error('Index manager not initialized');
172
+ }
173
+ console.log(`addGroupedEmbeddings: text=${textEmbeddings.length}, image=${imageEmbeddings.length}`);
174
+ const allEmbeddings = [...textEmbeddings, ...imageEmbeddings];
175
+ if (allEmbeddings.length === 0) {
176
+ return;
177
+ }
178
+ try {
179
+ // Store grouped information for later saving
180
+ this.groupedEmbeddings = { text: textEmbeddings, image: imageEmbeddings };
181
+ console.log('addGroupedEmbeddings: stored grouped embeddings');
182
+ // Add all embeddings to the index (maintains current behavior)
183
+ await this.addVectors(allEmbeddings);
184
+ console.log('addGroupedEmbeddings: addVectors completed');
185
+ // The saveIndex method will now use grouped format if groupedEmbeddings exists
186
+ }
187
+ catch (error) {
188
+ throw new Error(`Failed to add grouped embeddings to index: ${error instanceof Error ? error.message : 'Unknown error'}`);
189
+ }
190
+ }
156
191
  /**
157
192
  * Rebuild the entire index from scratch
158
193
  * Requirements: 5.2, 5.4 - Create full index rebuild functionality for model changes or document deletions
@@ -349,16 +384,122 @@ export class IndexManager {
349
384
  if (!this.isInitialized) {
350
385
  throw new Error('Index manager not initialized');
351
386
  }
352
- await this.vectorIndex.saveIndex();
387
+ // If we have grouped embeddings, save in grouped format
388
+ if (this.groupedEmbeddings) {
389
+ console.log('IndexManager: Saving in grouped format');
390
+ await this.saveGroupedIndex(this.groupedEmbeddings.text, this.groupedEmbeddings.image);
391
+ // Clear grouped data after saving
392
+ this.groupedEmbeddings = undefined;
393
+ }
394
+ else {
395
+ console.log('IndexManager: Saving in standard format');
396
+ await this.vectorIndex.saveIndex();
397
+ }
398
+ }
399
+ /**
400
+ * Create specialized indexes for text and image content when grouped data is available
401
+ */
402
+ async createSpecializedIndexes() {
403
+ try {
404
+ // Load the index data to check if it has grouped information
405
+ const indexData = await BinaryIndexFormat.load(this.indexPath);
406
+ if (indexData.hasContentTypeGroups && indexData.textVectors && indexData.imageVectors) {
407
+ // Only create specialized indexes if we have both text and image vectors
408
+ // In text-only mode, textVectors would be populated but imageVectors empty
409
+ // In multimodal mode, both would be populated
410
+ const hasTextVectors = indexData.textVectors.length > 0;
411
+ const hasImageVectors = indexData.imageVectors.length > 0;
412
+ if (hasTextVectors && hasImageVectors) {
413
+ console.log('Creating specialized indexes for content type filtering...');
414
+ // Create text-only index
415
+ this.textIndex = new VectorIndex(`${this.indexPath}.text`, this.vectorIndexOptions);
416
+ await this.textIndex.initialize();
417
+ this.textIndex.addVectors(indexData.textVectors);
418
+ console.log(`✓ Text index created with ${indexData.textVectors.length} vectors`);
419
+ // Create image-only index
420
+ this.imageIndex = new VectorIndex(`${this.indexPath}.image`, this.vectorIndexOptions);
421
+ await this.imageIndex.initialize();
422
+ this.imageIndex.addVectors(indexData.imageVectors);
423
+ console.log(`✓ Image index created with ${indexData.imageVectors.length} vectors`);
424
+ console.log('✓ Specialized indexes ready for content type filtering');
425
+ }
426
+ else if (hasTextVectors) {
427
+ console.log('Text-only index detected - using combined index for all searches');
428
+ // In text-only mode, we don't need specialized indexes
429
+ // The combined index (vectorIndex) already contains all text vectors
430
+ }
431
+ }
432
+ }
433
+ catch (error) {
434
+ console.warn('Failed to create specialized indexes, falling back to combined index:', error);
435
+ // Continue without specialized indexes - search will still work with combined index
436
+ }
437
+ }
438
+ /**
439
+ * Save index with content type grouping (for new grouped format)
440
+ */
441
+ async saveGroupedIndex(textEmbeddings, imageEmbeddings) {
442
+ if (!this.isInitialized) {
443
+ throw new Error('Index manager not initialized');
444
+ }
445
+ console.log(`saveGroupedIndex: text=${textEmbeddings.length}, image=${imageEmbeddings.length}`);
446
+ // Group vectors by content type
447
+ const textVectors = textEmbeddings.map((embedding) => ({
448
+ id: this.hashEmbeddingId(embedding.embedding_id),
449
+ vector: embedding.vector
450
+ }));
451
+ const imageVectors = imageEmbeddings.map((embedding) => ({
452
+ id: this.hashEmbeddingId(embedding.embedding_id),
453
+ vector: embedding.vector
454
+ }));
455
+ // Get index parameters
456
+ const options = this.vectorIndex.getOptions();
457
+ const allVectors = [...textVectors, ...imageVectors];
458
+ console.log(`saveGroupedIndex: dimensions=${options.dimensions}, totalVectors=${allVectors.length}`);
459
+ const indexData = {
460
+ dimensions: options.dimensions,
461
+ maxElements: options.maxElements,
462
+ M: options.M || 16,
463
+ efConstruction: options.efConstruction || 200,
464
+ seed: options.seed || 100,
465
+ currentSize: textVectors.length + imageVectors.length,
466
+ vectors: allVectors, // Required for backward compatibility
467
+ hasContentTypeGroups: true,
468
+ textVectors,
469
+ imageVectors
470
+ };
471
+ console.log('saveGroupedIndex: Calling BinaryIndexFormat.saveGrouped');
472
+ // Save using grouped format
473
+ await BinaryIndexFormat.saveGrouped(this.indexPath, indexData);
474
+ console.log(`✓ Saved grouped index with ${textVectors.length} text and ${imageVectors.length} image vectors`);
353
475
  }
354
476
  /**
355
477
  * Search for similar vectors
356
478
  */
357
- search(queryVector, k = 5) {
479
+ search(queryVector, k = 5, contentType) {
358
480
  if (!this.isInitialized) {
359
481
  throw new Error('Index manager not initialized');
360
482
  }
361
- const results = this.vectorIndex.search(queryVector, k);
483
+ // Select the appropriate index based on content type
484
+ let targetIndex;
485
+ // If we have specialized indexes (multimodal mode), use them for filtering
486
+ if (this.textIndex && this.imageIndex) {
487
+ if (contentType === 'text') {
488
+ targetIndex = this.textIndex;
489
+ }
490
+ else if (contentType === 'image') {
491
+ targetIndex = this.imageIndex;
492
+ }
493
+ else {
494
+ // 'combined' or undefined
495
+ targetIndex = this.vectorIndex;
496
+ }
497
+ }
498
+ else {
499
+ // No specialized indexes (text-only mode) - ignore contentType and use combined index
500
+ targetIndex = this.vectorIndex;
501
+ }
502
+ const results = targetIndex.search(queryVector, k);
362
503
  // Convert numeric IDs back to embedding IDs
363
504
  const embeddingIds = results.neighbors.map(id => this.unhashEmbeddingId(id));
364
505
  return {
@@ -338,73 +338,78 @@ export class CLIPEmbedder extends BaseUniversalEmbedder {
338
338
  if (!this.textModel || !this.tokenizer) {
339
339
  throw new Error('CLIP text model or tokenizer not initialized');
340
340
  }
341
- // Use the validated CLIPTextModelWithProjection approach (no pixel_values errors)
342
- // Tokenize text with CLIP's requirements
343
- // The tokenizer handles truncation at 77 TOKENS (not characters)
344
- const tokens = await this.tokenizer(processedText, {
345
- padding: true,
346
- truncation: true,
347
- max_length: 77, // CLIP's text sequence length limit (77 tokens)
348
- return_tensors: 'pt'
349
- });
350
- // Log token information for debugging (only in development)
351
- if (process.env.NODE_ENV === 'development') {
352
- const tokenIds = tokens.input_ids?.data || [];
353
- const actualTokenCount = Array.from(tokenIds).filter((id) => id !== 0).length;
354
- if (actualTokenCount >= 77) {
355
- console.warn(`Text truncated by tokenizer: "${processedText.substring(0, 50)}..." (truncated to 77 tokens)`);
356
- }
357
- }
358
- // Generate text embedding using CLIPTextModelWithProjection
359
- const output = await this.textModel(tokens);
360
- // Extract embedding from text_embeds (no pixel_values dependency)
361
- const embedding = new Float32Array(output.text_embeds.data);
362
- // Validate embedding dimensions and values
363
- if (embedding.length !== this.dimensions) {
364
- throw new Error(`CLIP embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`);
365
- }
366
- // Validate that all values are finite numbers
367
- const invalidValues = Array.from(embedding).filter(val => !isFinite(val) || isNaN(val));
368
- if (invalidValues.length > 0) {
369
- throw new Error(`CLIP embedding contains ${invalidValues.length} invalid values`);
370
- }
371
- // Validate embedding quality - should not be all zeros
372
- const nonZeroValues = Array.from(embedding).filter(val => Math.abs(val) > 1e-8);
373
- if (nonZeroValues.length === 0) {
374
- throw new Error('CLIP embedding is all zeros');
375
- }
376
- // Calculate embedding magnitude before normalization for quality assessment
377
- const magnitudeBeforeNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
378
- if (magnitudeBeforeNorm < 1e-6) {
379
- throw new Error(`CLIP embedding has critically low magnitude: ${magnitudeBeforeNorm.toExponential(3)}`);
380
- }
381
- // Apply L2-normalization (CLIP models are trained with normalized embeddings)
382
- this.normalizeEmbedding(embedding);
383
- // Verify normalization was successful
384
- const magnitudeAfterNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
385
- if (Math.abs(magnitudeAfterNorm - 1.0) > 0.01) {
386
- console.warn(`Warning: Embedding normalization may be imprecise (magnitude: ${magnitudeAfterNorm.toFixed(6)})`);
387
- }
388
- // Log text embedding generation
389
- console.log(`[CLIP] Generated text embedding for: "${processedText.substring(0, 30)}${processedText.length > 30 ? '...' : ''}"`);
390
- // Generate unique embedding ID
391
- const embeddingId = this.generateEmbeddingId(processedText, 'text');
392
- return {
393
- embedding_id: embeddingId,
394
- vector: embedding,
395
- contentType: 'text',
396
- metadata: {
397
- originalText: text,
398
- processedText: processedText,
399
- textLength: processedText.length,
400
- embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
401
- embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
402
- normalized: true,
403
- modelName: this.modelName,
404
- modelType: this.modelType,
405
- dimensions: this.dimensions
341
+ try {
342
+ // Use the validated CLIPTextModelWithProjection approach (no pixel_values errors)
343
+ // Tokenize text with CLIP's requirements
344
+ // The tokenizer handles truncation at 77 TOKENS (not characters)
345
+ const tokens = await this.tokenizer(processedText, {
346
+ padding: true,
347
+ truncation: true,
348
+ max_length: 77, // CLIP's text sequence length limit (77 tokens)
349
+ return_tensors: 'pt'
350
+ });
351
+ // Log token information for debugging (only in development)
352
+ if (process.env.NODE_ENV === 'development') {
353
+ const tokenIds = tokens.input_ids?.data || [];
354
+ const actualTokenCount = Array.from(tokenIds).filter((id) => id !== 0).length;
355
+ if (actualTokenCount >= 77) {
356
+ console.warn(`Text truncated by tokenizer: "${processedText.substring(0, 50)}..." (truncated to 77 tokens)`);
357
+ }
406
358
  }
407
- };
359
+ // Generate text embedding using CLIPTextModelWithProjection
360
+ const output = await this.textModel(tokens);
361
+ // Extract embedding from text_embeds (no pixel_values dependency)
362
+ const embedding = new Float32Array(output.text_embeds.data);
363
+ // Validate embedding dimensions and values
364
+ if (embedding.length !== this.dimensions) {
365
+ throw new Error(`CLIP embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`);
366
+ }
367
+ // Validate that all values are finite numbers
368
+ const invalidValues = Array.from(embedding).filter(val => !isFinite(val) || isNaN(val));
369
+ if (invalidValues.length > 0) {
370
+ throw new Error(`CLIP embedding contains ${invalidValues.length} invalid values`);
371
+ }
372
+ // Validate embedding quality - should not be all zeros
373
+ const nonZeroValues = Array.from(embedding).filter(val => Math.abs(val) > 1e-8);
374
+ if (nonZeroValues.length === 0) {
375
+ throw new Error('CLIP embedding is all zeros');
376
+ }
377
+ // Calculate embedding magnitude before normalization for quality assessment
378
+ const magnitudeBeforeNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
379
+ if (magnitudeBeforeNorm < 1e-6) {
380
+ throw new Error(`CLIP embedding has critically low magnitude: ${magnitudeBeforeNorm.toExponential(3)}`);
381
+ }
382
+ // Apply L2-normalization (CLIP models are trained with normalized embeddings)
383
+ this.normalizeEmbedding(embedding);
384
+ // Verify normalization was successful
385
+ const magnitudeAfterNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
386
+ if (Math.abs(magnitudeAfterNorm - 1.0) > 0.01) {
387
+ console.warn(`Warning: Embedding normalization may be imprecise (magnitude: ${magnitudeAfterNorm.toFixed(6)})`);
388
+ }
389
+ // Log text embedding generation
390
+ console.log(`[CLIP] Generated text embedding for: "${processedText.substring(0, 30)}${processedText.length > 30 ? '...' : ''}"`);
391
+ // Generate unique embedding ID
392
+ const embeddingId = this.generateEmbeddingId(processedText, 'text');
393
+ return {
394
+ embedding_id: embeddingId,
395
+ vector: embedding,
396
+ contentType: 'text',
397
+ metadata: {
398
+ originalText: text,
399
+ processedText: processedText,
400
+ textLength: processedText.length,
401
+ embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
402
+ embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
403
+ normalized: true,
404
+ modelName: this.modelName,
405
+ modelType: this.modelType,
406
+ dimensions: this.dimensions
407
+ }
408
+ };
409
+ }
410
+ catch (error) {
411
+ throw error;
412
+ }
408
413
  }
409
414
  // =============================================================================
410
415
  // IMAGE EMBEDDING METHODS
@@ -0,0 +1,90 @@
1
+ /**
2
+ * User-friendly error classes with actionable suggestions
3
+ * Requirements: 5.3 - Create user-friendly error classes with actionable suggestions
4
+ */
5
+ /**
6
+ * Base class for API errors with actionable suggestions
7
+ */
8
+ export declare abstract class APIError extends Error {
9
+ code: string;
10
+ suggestions: string[];
11
+ context?: string | undefined;
12
+ constructor(message: string, code: string, suggestions: string[], context?: string | undefined);
13
+ /**
14
+ * Get formatted error message with suggestions
15
+ */
16
+ getFormattedMessage(): string;
17
+ /**
18
+ * Log the error with proper formatting
19
+ */
20
+ logError(): void;
21
+ }
22
+ /**
23
+ * Ingestion-related errors
24
+ */
25
+ export declare class IngestionError extends APIError {
26
+ constructor(message: string, code: string, suggestions: string[], context?: string);
27
+ }
28
+ /**
29
+ * Search-related errors
30
+ */
31
+ export declare class SearchError extends APIError {
32
+ constructor(message: string, code: string, suggestions: string[], context?: string);
33
+ }
34
+ /**
35
+ * Resource management errors
36
+ */
37
+ export declare class ResourceError extends APIError {
38
+ constructor(message: string, code: string, suggestions: string[], context?: string);
39
+ }
40
+ /**
41
+ * Model compatibility errors
42
+ */
43
+ export declare class ModelCompatibilityError extends APIError {
44
+ constructor(message: string, code: string, suggestions: string[], context?: string);
45
+ }
46
+ /**
47
+ * Error factory for creating user-friendly errors from internal errors
48
+ * Requirements: 5.3 - Map internal errors to clear guidance
49
+ */
50
+ export declare class ErrorFactory {
51
+ /**
52
+ * Create user-friendly ingestion error from internal error
53
+ */
54
+ static createIngestionError(error: unknown, context: string): IngestionError;
55
+ /**
56
+ * Create user-friendly search error from internal error
57
+ */
58
+ static createSearchError(error: unknown, context: string): SearchError;
59
+ /**
60
+ * Create user-friendly resource error from internal error
61
+ */
62
+ static createResourceError(error: unknown, context: string): ResourceError;
63
+ }
64
+ /**
65
+ * Common error scenarios with predefined messages and suggestions
66
+ * Requirements: 5.3 - Add specific error handling for common scenarios
67
+ */
68
+ export declare const CommonErrors: {
69
+ /**
70
+ * Error when trying to search without running ingestion first
71
+ */
72
+ NO_DOCUMENTS_INGESTED: SearchError;
73
+ /**
74
+ * Error when model versions don't match
75
+ */
76
+ MODEL_VERSION_MISMATCH: ModelCompatibilityError;
77
+ /**
78
+ * Error when required files are missing
79
+ */
80
+ MISSING_REQUIRED_FILES: SearchError;
81
+ /**
82
+ * Error when initialization fails
83
+ */
84
+ INITIALIZATION_FAILED: ResourceError;
85
+ };
86
+ /**
87
+ * Utility function to handle and log errors appropriately
88
+ */
89
+ export declare function handleAPIError(error: unknown, context: string, operation: 'ingestion' | 'search' | 'resource'): never;
90
+ //# sourceMappingURL=api-errors.d.ts.map