rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,598 @@
1
+ import { VectorIndex } from './core/vector-index.js';
2
+ import { BinaryIndexFormat } from './core/binary-index-format.js';
3
+ import { openDatabase, getSystemInfo, setSystemInfo } from './core/db.js';
4
+ import { config, getModelDefaults } from './core/config.js';
5
+ export class IndexManager {
6
+ modelName;
7
+ vectorIndex;
8
+ textIndex;
9
+ imageIndex;
10
+ db = null;
11
+ indexPath;
12
+ dbPath;
13
+ isInitialized = false;
14
+ hashToEmbeddingId = new Map();
15
+ embeddingIdToHash = new Map();
16
+ groupedEmbeddings;
17
+ vectorIndexOptions;
18
+ constructor(indexPath, dbPath, dimensions, modelName) {
19
+ this.modelName = modelName;
20
+ this.indexPath = indexPath;
21
+ this.dbPath = dbPath;
22
+ // Store options for creating specialized indexes
23
+ this.vectorIndexOptions = {
24
+ dimensions: dimensions,
25
+ maxElements: 100000, // Start with 100k capacity
26
+ efConstruction: 200,
27
+ M: 16
28
+ };
29
+ // Initialize with provided dimensions from config
30
+ this.vectorIndex = new VectorIndex(indexPath, this.vectorIndexOptions);
31
+ }
32
+ /**
33
+ * Initialize the index manager and load existing index if available
34
+ * @param skipModelCheck - Skip model compatibility check (used for rebuilds)
35
+ * @param forceRecreate - Force recreation of index (used for model changes)
36
+ */
37
+ async initialize(skipModelCheck = false, forceRecreate = false) {
38
+ if (this.isInitialized) {
39
+ return;
40
+ }
41
+ try {
42
+ // Open database connection
43
+ this.db = await openDatabase(this.dbPath);
44
+ // Check model compatibility BEFORE trying to load the vector index
45
+ // This prevents WebAssembly exceptions when dimensions don't match
46
+ if (!skipModelCheck && !forceRecreate) {
47
+ await this.checkModelCompatibility();
48
+ }
49
+ if (forceRecreate || !this.vectorIndex.indexExists()) {
50
+ console.log('Creating new vector index...');
51
+ await this.vectorIndex.initialize();
52
+ }
53
+ else {
54
+ // Only try to load existing index if not forcing recreation
55
+ console.log('Loading existing vector index...');
56
+ await this.vectorIndex.loadIndex();
57
+ // Check if the loaded index has grouped data and create specialized indexes
58
+ await this.createSpecializedIndexes();
59
+ }
60
+ // Always populate the embedding ID mapping from existing database entries
61
+ // This is needed both for new and existing indexes
62
+ const existingChunks = await this.db.all('SELECT embedding_id FROM chunks ORDER BY id');
63
+ for (const chunk of existingChunks) {
64
+ this.hashEmbeddingId(chunk.embedding_id); // This will populate the mapping
65
+ }
66
+ this.isInitialized = true;
67
+ const vectorCount = this.vectorIndex.getCurrentCount();
68
+ console.log(`Index manager initialized with ${vectorCount} vectors${this.textIndex && this.imageIndex ? ' (multi-graph mode)' : ''}`);
69
+ }
70
+ catch (error) {
71
+ throw new Error(`Failed to initialize index manager: ${error}`);
72
+ }
73
+ }
74
+ /**
75
+ * Check model compatibility between stored and current configuration
76
+ * Requirements: 2.1, 2.2, 2.4, 5.1, 5.2, 5.3, 5.4
77
+ */
78
+ async checkModelCompatibility() {
79
+ if (!this.db) {
80
+ throw new Error('Database not initialized');
81
+ }
82
+ try {
83
+ // Get stored model information
84
+ const systemInfo = await getSystemInfo(this.db);
85
+ const currentModel = this.modelName || config.embedding_model;
86
+ const currentDefaults = getModelDefaults(currentModel);
87
+ if (systemInfo && systemInfo.modelName && systemInfo.modelDimensions) {
88
+ // Check if models match
89
+ if (systemInfo.modelName !== currentModel) {
90
+ throw new Error(`Model mismatch detected!\n` +
91
+ `Current model: ${currentModel} (${currentDefaults.dimensions} dimensions)\n` +
92
+ `Index model: ${systemInfo.modelName} (${systemInfo.modelDimensions} dimensions)\n` +
93
+ `\n` +
94
+ `The embedding model has changed since the index was created.\n` +
95
+ `This requires a full index rebuild to maintain consistency.\n` +
96
+ `\n` +
97
+ `To fix this issue:\n` +
98
+ `1. Run: npm run rebuild\n` +
99
+ `2. Or run: node dist/cli.js rebuild\n` +
100
+ `\n` +
101
+ `This will regenerate all embeddings with the new model.`);
102
+ }
103
+ // Check if dimensions match (additional safety check)
104
+ if (systemInfo.modelDimensions !== currentDefaults.dimensions) {
105
+ throw new Error(`Model dimension mismatch detected!\n` +
106
+ `Current model dimensions: ${currentDefaults.dimensions}\n` +
107
+ `Index model dimensions: ${systemInfo.modelDimensions}\n` +
108
+ `\n` +
109
+ `This indicates a configuration inconsistency.\n` +
110
+ `Please run: npm run rebuild`);
111
+ }
112
+ console.log(`Model compatibility verified: ${currentModel} (${currentDefaults.dimensions} dimensions)`);
113
+ }
114
+ else {
115
+ // First run - store the model info
116
+ console.log(`No model info stored yet - storing current model info: ${currentModel}`);
117
+ await setSystemInfo(this.db, {
118
+ modelName: currentModel,
119
+ modelDimensions: currentDefaults.dimensions
120
+ });
121
+ }
122
+ }
123
+ catch (error) {
124
+ if (error instanceof Error) {
125
+ throw error; // Re-throw our formatted errors
126
+ }
127
+ throw new Error(`Failed to check model compatibility: ${error}`);
128
+ }
129
+ }
130
+ /**
131
+ * Add vectors to the index with corresponding metadata (incremental addition)
132
+ * Requirements: 5.3 - When new documents are added THEN system SHALL append new chunks and vectors without rebuilding existing index
133
+ */
134
+ async addVectors(embeddings) {
135
+ if (!this.isInitialized) {
136
+ throw new Error('Index manager not initialized');
137
+ }
138
+ if (embeddings.length === 0) {
139
+ return;
140
+ }
141
+ try {
142
+ // Convert embedding IDs to numeric IDs for hnswlib
143
+ const vectors = embeddings.map((embedding) => ({
144
+ id: this.hashEmbeddingId(embedding.embedding_id),
145
+ vector: embedding.vector
146
+ }));
147
+ // Check if we need to resize the index before adding
148
+ const currentCount = this.vectorIndex.getCurrentCount();
149
+ const newCount = currentCount + vectors.length;
150
+ const currentCapacity = 100000; // This should match the initial capacity
151
+ if (newCount > currentCapacity * 0.9) {
152
+ const newCapacity = Math.ceil(newCount * 1.5);
153
+ console.log(`Resizing index from ${currentCapacity} to ${newCapacity} to accommodate new vectors`);
154
+ this.vectorIndex.resizeIndex(newCapacity);
155
+ }
156
+ // Add vectors incrementally (this is the key requirement - no rebuild needed)
157
+ this.vectorIndex.addVectors(vectors);
158
+ console.log(`Incrementally added ${embeddings.length} vectors to index (total: ${this.vectorIndex.getCurrentCount()})`);
159
+ // Save the updated index
160
+ await this.saveIndex();
161
+ }
162
+ catch (error) {
163
+ throw new Error(`Failed to add vectors to index: ${error instanceof Error ? error.message : 'Unknown error'}`);
164
+ }
165
+ }
166
+ /**
167
+ * Add grouped embeddings by content type (for new grouped format)
168
+ */
169
+ async addGroupedEmbeddings(textEmbeddings, imageEmbeddings) {
170
+ if (!this.isInitialized) {
171
+ throw new Error('Index manager not initialized');
172
+ }
173
+ console.log(`addGroupedEmbeddings: text=${textEmbeddings.length}, image=${imageEmbeddings.length}`);
174
+ const allEmbeddings = [...textEmbeddings, ...imageEmbeddings];
175
+ if (allEmbeddings.length === 0) {
176
+ return;
177
+ }
178
+ try {
179
+ // Store grouped information for later saving
180
+ this.groupedEmbeddings = { text: textEmbeddings, image: imageEmbeddings };
181
+ console.log('addGroupedEmbeddings: stored grouped embeddings');
182
+ // Add all embeddings to the index (maintains current behavior)
183
+ await this.addVectors(allEmbeddings);
184
+ console.log('addGroupedEmbeddings: addVectors completed');
185
+ // The saveIndex method will now use grouped format if groupedEmbeddings exists
186
+ }
187
+ catch (error) {
188
+ throw new Error(`Failed to add grouped embeddings to index: ${error instanceof Error ? error.message : 'Unknown error'}`);
189
+ }
190
+ }
191
+ /**
192
+ * Rebuild the entire index from scratch
193
+ * Requirements: 5.2, 5.4 - Create full index rebuild functionality for model changes or document deletions
194
+ */
195
+ async rebuildIndex(newModelVersion) {
196
+ if (!this.db) {
197
+ throw new Error('Database not initialized');
198
+ }
199
+ console.log('Starting full index rebuild...');
200
+ try {
201
+ // Initialize new empty index (this will overwrite existing index)
202
+ await this.vectorIndex.initialize();
203
+ // Get all chunk embedding IDs from database (we'll need to regenerate embeddings)
204
+ const chunkData = await this.getAllChunksFromDB();
205
+ if (chunkData.length === 0) {
206
+ console.log('No chunks found in database - index rebuild complete with 0 vectors');
207
+ // Update model version if provided
208
+ if (newModelVersion) {
209
+ await this.updateModelVersion(newModelVersion);
210
+ }
211
+ await this.saveIndex();
212
+ return;
213
+ }
214
+ console.log(`Found ${chunkData.length} chunks in database that need re-embedding`);
215
+ // Note: In a complete implementation, we would need to:
216
+ // 1. Re-generate embeddings for all chunks using the new model
217
+ // 2. Add the new vectors to the index
218
+ // For now, we'll create a placeholder implementation that shows the structure
219
+ console.warn('WARNING: Full rebuild requires re-generating embeddings for all chunks.');
220
+ console.warn('This implementation requires integration with the EmbeddingEngine.');
221
+ console.warn('The index has been reset but vectors need to be regenerated.');
222
+ // Check if we need to resize index based on chunk count
223
+ const currentCapacity = 100000; // Default capacity
224
+ if (chunkData.length > currentCapacity * 0.8) {
225
+ const newCapacity = Math.ceil(chunkData.length * 1.5);
226
+ this.vectorIndex.resizeIndex(newCapacity);
227
+ console.log(`Resized index capacity to ${newCapacity} for ${chunkData.length} chunks`);
228
+ }
229
+ // Update model version if provided
230
+ if (newModelVersion) {
231
+ await this.updateModelVersion(newModelVersion);
232
+ }
233
+ // Save the (empty) rebuilt index structure
234
+ await this.saveIndex();
235
+ console.log(`Index rebuild structure complete. ${chunkData.length} chunks need re-embedding.`);
236
+ }
237
+ catch (error) {
238
+ throw new Error(`Failed to rebuild index: ${error instanceof Error ? error.message : 'Unknown error'}`);
239
+ }
240
+ }
241
+ /**
242
+ * Trigger a full rebuild when documents are modified or deleted
243
+ * Requirements: 5.4 - When documents are modified or deleted THEN system SHALL trigger full index rebuild
244
+ */
245
+ async triggerRebuildForDocumentChanges(reason) {
246
+ console.log(`Triggering index rebuild due to: ${reason}`);
247
+ await this.rebuildIndex();
248
+ }
249
+ /**
250
+ * Complete rebuild workflow with embedding regeneration
251
+ * This method should be called by higher-level components that have access to the EmbeddingEngine
252
+ * Requirements: 5.2, 5.4 - Full index rebuild functionality
253
+ */
254
+ async rebuildWithEmbeddings(embeddingEngine) {
255
+ if (!this.db) {
256
+ throw new Error('Database not initialized');
257
+ }
258
+ console.log('Starting complete index rebuild with embedding regeneration...');
259
+ try {
260
+ // Get all chunks that need re-embedding
261
+ const chunkData = await this.getAllChunksFromDB();
262
+ if (chunkData.length === 0) {
263
+ console.log('No chunks found - initializing empty index');
264
+ await this.vectorIndex.initialize();
265
+ await this.updateModelVersion(embeddingEngine.getModelVersion());
266
+ // Store model info for the new model
267
+ const currentModel = this.modelName || config.embedding_model;
268
+ const currentDefaults = getModelDefaults(currentModel);
269
+ await setSystemInfo(this.db, {
270
+ modelName: currentModel,
271
+ modelDimensions: currentDefaults.dimensions
272
+ });
273
+ await this.saveIndex();
274
+ return;
275
+ }
276
+ // Initialize new empty index
277
+ await this.vectorIndex.initialize();
278
+ // Check if we need to resize index
279
+ const currentCapacity = 100000;
280
+ if (chunkData.length > currentCapacity * 0.8) {
281
+ const newCapacity = Math.ceil(chunkData.length * 1.5);
282
+ this.vectorIndex.resizeIndex(newCapacity);
283
+ console.log(`Resized index capacity to ${newCapacity}`);
284
+ }
285
+ // Re-generate embeddings for all chunks
286
+ console.log(`Re-generating embeddings for ${chunkData.length} chunks...`);
287
+ const texts = chunkData.map(chunk => chunk.text);
288
+ const newEmbeddings = await embeddingEngine.embedDocumentBatch(texts);
289
+ if (newEmbeddings.length === 0) {
290
+ throw new Error('Failed to generate any embeddings during rebuild');
291
+ }
292
+ // Add all vectors to the new index
293
+ const vectors = newEmbeddings.map((embedding) => ({
294
+ id: this.hashEmbeddingId(embedding.embedding_id),
295
+ vector: embedding.vector
296
+ }));
297
+ this.vectorIndex.addVectors(vectors);
298
+ console.log(`Added ${vectors.length} vectors to rebuilt index`);
299
+ // Update model version
300
+ await this.updateModelVersion(embeddingEngine.getModelVersion());
301
+ // Store model info for the new model
302
+ const currentModel = this.modelName || config.embedding_model;
303
+ const currentDefaults = getModelDefaults(currentModel);
304
+ await setSystemInfo(this.db, {
305
+ modelName: currentModel,
306
+ modelDimensions: currentDefaults.dimensions
307
+ });
308
+ // Save the rebuilt index
309
+ await this.saveIndex();
310
+ console.log(`Index rebuild complete: ${vectors.length} vectors with new model version`);
311
+ }
312
+ catch (error) {
313
+ throw new Error(`Failed to rebuild index with embeddings: ${error instanceof Error ? error.message : 'Unknown error'}`);
314
+ }
315
+ }
316
+ /**
317
+ * Check if the current model version matches stored version
318
+ * Requirements: 5.1, 5.2 - Compare current embedding model version with stored version
319
+ */
320
+ async checkModelVersion(currentVersion) {
321
+ if (!this.db) {
322
+ throw new Error('Database not initialized');
323
+ }
324
+ try {
325
+ const systemInfo = await getSystemInfo(this.db);
326
+ const storedVersion = systemInfo?.modelVersion;
327
+ if (!storedVersion || storedVersion === "") {
328
+ // No version stored yet, this is first run - store current version
329
+ await setSystemInfo(this.db, { modelVersion: currentVersion });
330
+ console.log(`Stored initial model version: ${currentVersion}`);
331
+ return true;
332
+ }
333
+ const matches = storedVersion === currentVersion;
334
+ if (!matches) {
335
+ console.error(`Model version mismatch detected!`);
336
+ console.error(`Stored version: ${storedVersion}`);
337
+ console.error(`Current version: ${currentVersion}`);
338
+ console.error(`A full index rebuild is required before the system can continue.`);
339
+ }
340
+ return matches;
341
+ }
342
+ catch (error) {
343
+ throw new Error(`Failed to check model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
344
+ }
345
+ }
346
+ /**
347
+ * Update the stored model version after successful rebuild
348
+ * Requirements: 5.5 - Save model name and hash in SQLite for version tracking
349
+ */
350
+ async updateModelVersion(version) {
351
+ if (!this.db) {
352
+ throw new Error('Database not initialized');
353
+ }
354
+ try {
355
+ await setSystemInfo(this.db, { modelVersion: version });
356
+ console.log(`Updated model version to: ${version}`);
357
+ }
358
+ catch (error) {
359
+ throw new Error(`Failed to update model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
360
+ }
361
+ }
362
+ /**
363
+ * Validate model version and exit if mismatch detected
364
+ * Requirements: 5.2 - System SHALL exit with error message until full index rebuild is completed
365
+ */
366
+ async validateModelVersionOrExit(currentVersion) {
367
+ const isValid = await this.checkModelVersion(currentVersion);
368
+ if (!isValid) {
369
+ console.error('\n=== MODEL VERSION MISMATCH ===');
370
+ console.error('The embedding model version has changed since the last index build.');
371
+ console.error('This requires a full index rebuild to maintain consistency.');
372
+ console.error('\nTo rebuild the index, run:');
373
+ console.error(' npm run rebuild-index');
374
+ console.error(' # or');
375
+ console.error(' node dist/cli.js rebuild');
376
+ console.error('\nThe system will now exit.');
377
+ process.exit(1);
378
+ }
379
+ }
380
+ /**
381
+ * Save the vector index to disk
382
+ */
383
+ async saveIndex() {
384
+ if (!this.isInitialized) {
385
+ throw new Error('Index manager not initialized');
386
+ }
387
+ // If we have grouped embeddings, save in grouped format
388
+ if (this.groupedEmbeddings) {
389
+ console.log('IndexManager: Saving in grouped format');
390
+ await this.saveGroupedIndex(this.groupedEmbeddings.text, this.groupedEmbeddings.image);
391
+ // Clear grouped data after saving
392
+ this.groupedEmbeddings = undefined;
393
+ }
394
+ else {
395
+ console.log('IndexManager: Saving in standard format');
396
+ await this.vectorIndex.saveIndex();
397
+ }
398
+ }
399
+ /**
400
+ * Create specialized indexes for text and image content when grouped data is available
401
+ */
402
+ async createSpecializedIndexes() {
403
+ try {
404
+ // Load the index data to check if it has grouped information
405
+ const indexData = await BinaryIndexFormat.load(this.indexPath);
406
+ if (indexData.hasContentTypeGroups && indexData.textVectors && indexData.imageVectors) {
407
+ // Only create specialized indexes if we have both text and image vectors
408
+ // In text-only mode, textVectors would be populated but imageVectors empty
409
+ // In multimodal mode, both would be populated
410
+ const hasTextVectors = indexData.textVectors.length > 0;
411
+ const hasImageVectors = indexData.imageVectors.length > 0;
412
+ if (hasTextVectors && hasImageVectors) {
413
+ console.log('Creating specialized indexes for content type filtering...');
414
+ // Create text-only index
415
+ this.textIndex = new VectorIndex(`${this.indexPath}.text`, this.vectorIndexOptions);
416
+ await this.textIndex.initialize();
417
+ this.textIndex.addVectors(indexData.textVectors);
418
+ console.log(`✓ Text index created with ${indexData.textVectors.length} vectors`);
419
+ // Create image-only index
420
+ this.imageIndex = new VectorIndex(`${this.indexPath}.image`, this.vectorIndexOptions);
421
+ await this.imageIndex.initialize();
422
+ this.imageIndex.addVectors(indexData.imageVectors);
423
+ console.log(`✓ Image index created with ${indexData.imageVectors.length} vectors`);
424
+ console.log('✓ Specialized indexes ready for content type filtering');
425
+ }
426
+ else if (hasTextVectors) {
427
+ console.log('Text-only index detected - using combined index for all searches');
428
+ // In text-only mode, we don't need specialized indexes
429
+ // The combined index (vectorIndex) already contains all text vectors
430
+ }
431
+ }
432
+ }
433
+ catch (error) {
434
+ console.warn('Failed to create specialized indexes, falling back to combined index:', error);
435
+ // Continue without specialized indexes - search will still work with combined index
436
+ }
437
+ }
438
+ /**
439
+ * Save index with content type grouping (for new grouped format)
440
+ */
441
+ async saveGroupedIndex(textEmbeddings, imageEmbeddings) {
442
+ if (!this.isInitialized) {
443
+ throw new Error('Index manager not initialized');
444
+ }
445
+ console.log(`saveGroupedIndex: text=${textEmbeddings.length}, image=${imageEmbeddings.length}`);
446
+ // Group vectors by content type
447
+ const textVectors = textEmbeddings.map((embedding) => ({
448
+ id: this.hashEmbeddingId(embedding.embedding_id),
449
+ vector: embedding.vector
450
+ }));
451
+ const imageVectors = imageEmbeddings.map((embedding) => ({
452
+ id: this.hashEmbeddingId(embedding.embedding_id),
453
+ vector: embedding.vector
454
+ }));
455
+ // Get index parameters
456
+ const options = this.vectorIndex.getOptions();
457
+ const allVectors = [...textVectors, ...imageVectors];
458
+ console.log(`saveGroupedIndex: dimensions=${options.dimensions}, totalVectors=${allVectors.length}`);
459
+ const indexData = {
460
+ dimensions: options.dimensions,
461
+ maxElements: options.maxElements,
462
+ M: options.M || 16,
463
+ efConstruction: options.efConstruction || 200,
464
+ seed: options.seed || 100,
465
+ currentSize: textVectors.length + imageVectors.length,
466
+ vectors: allVectors, // Required for backward compatibility
467
+ hasContentTypeGroups: true,
468
+ textVectors,
469
+ imageVectors
470
+ };
471
+ console.log('saveGroupedIndex: Calling BinaryIndexFormat.saveGrouped');
472
+ // Save using grouped format
473
+ await BinaryIndexFormat.saveGrouped(this.indexPath, indexData);
474
+ console.log(`✓ Saved grouped index with ${textVectors.length} text and ${imageVectors.length} image vectors`);
475
+ }
476
+ /**
477
+ * Search for similar vectors
478
+ */
479
+ search(queryVector, k = 5, contentType) {
480
+ if (!this.isInitialized) {
481
+ throw new Error('Index manager not initialized');
482
+ }
483
+ // Select the appropriate index based on content type
484
+ let targetIndex;
485
+ // If we have specialized indexes (multimodal mode), use them for filtering
486
+ if (this.textIndex && this.imageIndex) {
487
+ if (contentType === 'text') {
488
+ targetIndex = this.textIndex;
489
+ }
490
+ else if (contentType === 'image') {
491
+ targetIndex = this.imageIndex;
492
+ }
493
+ else {
494
+ // 'combined' or undefined
495
+ targetIndex = this.vectorIndex;
496
+ }
497
+ }
498
+ else {
499
+ // No specialized indexes (text-only mode) - ignore contentType and use combined index
500
+ targetIndex = this.vectorIndex;
501
+ }
502
+ const results = targetIndex.search(queryVector, k);
503
+ // Convert numeric IDs back to embedding IDs
504
+ const embeddingIds = results.neighbors.map(id => this.unhashEmbeddingId(id));
505
+ return {
506
+ embeddingIds,
507
+ distances: results.distances
508
+ };
509
+ }
510
+ /**
511
+ * Get index statistics
512
+ */
513
+ async getStats() {
514
+ if (!this.db) {
515
+ throw new Error('Database not initialized');
516
+ }
517
+ const totalVectors = this.vectorIndex.getCurrentCount();
518
+ try {
519
+ const systemInfo = await getSystemInfo(this.db);
520
+ const modelVersion = systemInfo?.modelVersion || null;
521
+ return {
522
+ totalVectors,
523
+ modelVersion: modelVersion || 'unknown',
524
+ lastUpdated: new Date() // Could be enhanced to track actual last update time
525
+ };
526
+ }
527
+ catch (error) {
528
+ throw new Error(`Failed to get stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
529
+ }
530
+ }
531
+ /**
532
+ * Get all chunks from database for rebuild (returns chunk data, not embeddings)
533
+ * Note: Embeddings need to be regenerated during rebuild since we don't store vectors in DB
534
+ */
535
+ async getAllChunksFromDB() {
536
+ if (!this.db) {
537
+ throw new Error('Database not initialized');
538
+ }
539
+ try {
540
+ const rows = await this.db.all('SELECT embedding_id, content as text, document_id FROM chunks ORDER BY id');
541
+ return rows.map(row => ({
542
+ embedding_id: row.embedding_id,
543
+ text: row.text,
544
+ document_id: row.document_id
545
+ }));
546
+ }
547
+ catch (error) {
548
+ throw new Error(`Failed to get chunks from DB: ${error instanceof Error ? error.message : 'Unknown error'}`);
549
+ }
550
+ }
551
+ /**
552
+ * Convert embedding ID string to numeric ID for hnswlib with collision handling
553
+ */
554
+ hashEmbeddingId(embeddingId) {
555
+ // Check if we already have a mapping for this embedding ID
556
+ if (this.embeddingIdToHash.has(embeddingId)) {
557
+ return this.embeddingIdToHash.get(embeddingId);
558
+ }
559
+ let hash = 0;
560
+ for (let i = 0; i < embeddingId.length; i++) {
561
+ const char = embeddingId.charCodeAt(i);
562
+ hash = ((hash << 5) - hash) + char;
563
+ hash = hash & hash; // Convert to 32-bit integer
564
+ }
565
+ hash = Math.abs(hash);
566
+ // Handle hash collisions by incrementing until we find an unused hash
567
+ let finalHash = hash;
568
+ while (this.hashToEmbeddingId.has(finalHash) && this.hashToEmbeddingId.get(finalHash) !== embeddingId) {
569
+ finalHash = (finalHash + 1) & 0x7FFFFFFF; // Keep it positive
570
+ }
571
+ // Store the bidirectional mapping
572
+ this.embeddingIdToHash.set(embeddingId, finalHash);
573
+ this.hashToEmbeddingId.set(finalHash, embeddingId);
574
+ return finalHash;
575
+ }
576
+ /**
577
+ * Convert numeric ID back to embedding ID using the maintained mapping
578
+ */
579
+ unhashEmbeddingId(numericId) {
580
+ const embeddingId = this.hashToEmbeddingId.get(numericId);
581
+ if (!embeddingId) {
582
+ console.warn(`Warning: No embedding ID found for hash ${numericId}. This may indicate index/database synchronization issues.`);
583
+ console.warn('Consider running "raglite rebuild" to fix synchronization problems.');
584
+ throw new Error(`No embedding ID found for hash ${numericId}`);
585
+ }
586
+ return embeddingId;
587
+ }
588
+ /**
589
+ * Close database connection
590
+ */
591
+ async close() {
592
+ if (this.db) {
593
+ await this.db.close();
594
+ this.db = null;
595
+ }
596
+ }
597
+ }
598
+ //# sourceMappingURL=index-manager.js.map
@@ -0,0 +1,75 @@
1
+ /**
2
+ * rag-lite-ts - Clean Architecture with Factory Pattern
3
+ *
4
+ * Quick Start (Recommended):
5
+ * ```typescript
6
+ * import { SearchFactory, IngestionFactory } from 'rag-lite-ts';
7
+ *
8
+ * // Simple search - just works!
9
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite');
10
+ * const results = await search.search('your query');
11
+ *
12
+ * // Simple ingestion - just works!
13
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
14
+ * await ingestion.ingestDirectory('./documents');
15
+ * ```
16
+ *
17
+ * With Configuration:
18
+ * ```typescript
19
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite', {
20
+ * embeddingModel: 'Xenova/all-mpnet-base-v2',
21
+ * enableReranking: true
22
+ * });
23
+ * ```
24
+ *
25
+ * Complete RAG System:
26
+ * ```typescript
27
+ * import { RAGFactory } from 'rag-lite-ts';
28
+ *
29
+ * const { searchEngine, ingestionPipeline } = await RAGFactory.createBoth(
30
+ * './index.bin',
31
+ * './db.sqlite'
32
+ * );
33
+ * ```
34
+ *
35
+ * Advanced Usage (Direct Dependency Injection):
36
+ * ```typescript
37
+ * import { CoreSearchEngine, createTextEmbedFunction } from 'rag-lite-ts';
38
+ *
39
+ * const embedFn = await createTextEmbedFunction();
40
+ * const search = new CoreSearchEngine(embedFn, indexManager, db);
41
+ * ```
42
+ */
43
+ export { IngestionFactory, SearchFactory } from './factories/index.js';
44
+ export type { IngestionFactoryOptions } from './factories/index.js';
45
+ export { SearchEngine as CoreSearchEngine } from './core/search.js';
46
+ export { IngestionPipeline as CoreIngestionPipeline } from './core/ingestion.js';
47
+ export { SearchEngine } from './search.js';
48
+ export { IngestionPipeline } from './ingestion.js';
49
+ export { LazyEmbedderLoader, LazyRerankerLoader, LazyMultimodalLoader, LazyDependencyManager } from './core/lazy-dependency-loader.js';
50
+ export type { EmbedFunction, RerankFunction, EmbeddingQueryInterface, RerankingInterface, SearchEngineConfig, ContentTypeStrategy, ModelAgnosticInterface, ExtendedEmbeddingInterface, ExtendedRerankingInterface, SearchPipelineInterface, SearchDependencyFactory } from './core/interfaces.js';
51
+ export { InterfaceValidator } from './core/interfaces.js';
52
+ export { validateModeModelCompatibility, validateModeModelCompatibilityOrThrow, getRecommendedModelsForMode, isModeModelCompatible, getCompatibleModelsForMode, type ModeModelValidationResult } from './core/mode-model-validator.js';
53
+ export { createMissingFileError, createInvalidPathError, createModelLoadingError, createDimensionMismatchError, createModeMismatchError, createInvalidContentError, createMissingDependencyError, createFactoryCreationError, enhanceError, createContextualError, type ActionableErrorConfig } from './core/actionable-error-messages.js';
54
+ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction } from './text/embedder.js';
55
+ export type { UniversalEmbedder } from './core/universal-embedder.js';
56
+ export { CLIPEmbedder } from './multimodal/clip-embedder.js';
57
+ export { createEmbedder } from './core/embedder-factory.js';
58
+ export { CrossEncoderReranker, createTextRerankFunction } from './text/reranker.js';
59
+ export { countTokens } from './text/tokenizer.js';
60
+ export type { RerankingStrategyType, RerankingConfig } from './core/reranking-config.js';
61
+ export { validateRerankingStrategy, validateRerankingConfig, getDefaultRerankingConfig, isStrategySupported, getSupportedStrategies, RerankingConfigBuilder, DEFAULT_TEXT_RERANKING_CONFIG, DEFAULT_MULTIMODAL_RERANKING_CONFIG } from './core/reranking-config.js';
62
+ export { openDatabase, initializeSchema, insertDocument, insertChunk, upsertDocument, getChunksByEmbeddingIds, type DatabaseConnection } from './core/db.js';
63
+ export { IndexManager } from './index-manager.js';
64
+ export { VectorIndex } from './core/vector-index.js';
65
+ export { config, getModelDefaults, type CoreConfig, type ExtensibleConfig, type ModelDefaults, EXIT_CODES, ConfigurationError, getDefaultModelCachePath, handleUnrecoverableError, logError } from './core/config.js';
66
+ export { discoverFiles, processFiles, discoverAndProcessFiles, DEFAULT_FILE_PROCESSOR_OPTIONS, type FileProcessorOptions, type FileDiscoveryResult, type DocumentProcessingResult } from './file-processor.js';
67
+ export { chunkDocument, type ChunkConfig } from './core/chunker.js';
68
+ export { DocumentPathManager } from './core/path-manager.js';
69
+ export { resolveRagLitePaths, ensureRagLiteStructure, migrateToRagLiteStructure, getStandardRagLitePaths, type RagLiteConfig, type RagLitePaths } from './core/raglite-paths.js';
70
+ export type { SearchResult, SearchOptions, Document, EmbeddingResult, ContentDocument, ContentChunk } from './core/types.js';
71
+ export type { Chunk, Preprocessor, PreprocessorOptions, PreprocessingConfig } from './types.js';
72
+ export type { IngestionOptions, IngestionResult } from './core/ingestion.js';
73
+ export { handleError, safeExecute, ErrorCategory, ErrorSeverity, createError, type ErrorContext } from './core/error-handler.js';
74
+ export { APIError, IngestionError, SearchError, ResourceError, ModelCompatibilityError, ErrorFactory, CommonErrors, handleAPIError } from './api-errors.js';
75
+ //# sourceMappingURL=index.d.ts.map