rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,471 @@
1
+ import { existsSync, statSync } from 'fs';
2
+ import { resolve } from 'path';
3
+ import { IngestionFactory } from '../factories/ingestion-factory.js';
4
+ import { withCLIDatabaseAccess, setupCLICleanup, isDatabaseBusy } from '../core/cli-database-utils.js';
5
+ import { EXIT_CODES, ConfigurationError } from '../core/config.js';
6
+ /**
7
+ * Validate mode-specific model and strategy combinations
8
+ * Ensures that the selected model is compatible with the chosen mode
9
+ * and that reranking strategies are valid for the mode
10
+ */
11
+ async function validateModeConfiguration(options) {
12
+ const mode = options.mode || 'text';
13
+ const model = options.embeddingModel;
14
+ // Define supported models for each mode
15
+ const textModels = [
16
+ 'sentence-transformers/all-MiniLM-L6-v2',
17
+ 'Xenova/all-mpnet-base-v2'
18
+ ];
19
+ const multimodalModels = [
20
+ 'Xenova/clip-vit-base-patch32',
21
+ 'Xenova/clip-vit-base-patch16'
22
+ ];
23
+ // Validate model compatibility with mode
24
+ if (model) {
25
+ if (mode === 'text' && !textModels.includes(model)) {
26
+ if (multimodalModels.includes(model)) {
27
+ throw new ConfigurationError(`Model '${model}' is a multimodal model but text mode was selected.\n` +
28
+ `\n` +
29
+ `To use this model, specify multimodal mode:\n` +
30
+ ` raglite ingest <path> --mode multimodal --model ${model}\n` +
31
+ `\n` +
32
+ `Or choose a text model for text mode:\n` +
33
+ ` ${textModels.map(m => `raglite ingest <path> --model ${m}`).join('\n ')}\n`, EXIT_CODES.INVALID_ARGUMENTS);
34
+ }
35
+ else {
36
+ throw new ConfigurationError(`Model '${model}' is not supported for text mode.\n` +
37
+ `\n` +
38
+ `Supported models for text mode:\n` +
39
+ ` ${textModels.join('\n ')}\n` +
40
+ `\n` +
41
+ `Examples:\n` +
42
+ ` raglite ingest <path> --model sentence-transformers/all-MiniLM-L6-v2\n` +
43
+ ` raglite ingest <path> --model Xenova/all-mpnet-base-v2\n`, EXIT_CODES.INVALID_ARGUMENTS);
44
+ }
45
+ }
46
+ if (mode === 'multimodal' && !multimodalModels.includes(model)) {
47
+ if (textModels.includes(model)) {
48
+ throw new ConfigurationError(`Model '${model}' is a text-only model but multimodal mode was selected.\n` +
49
+ `\n` +
50
+ `To use this model, specify text mode:\n` +
51
+ ` raglite ingest <path> --mode text --model ${model}\n` +
52
+ `\n` +
53
+ `Or choose a multimodal model for multimodal mode:\n` +
54
+ ` ${multimodalModels.map(m => `raglite ingest <path> --mode multimodal --model ${m}`).join('\n ')}\n`, EXIT_CODES.INVALID_ARGUMENTS);
55
+ }
56
+ else {
57
+ throw new ConfigurationError(`Model '${model}' is not supported for multimodal mode.\n` +
58
+ `\n` +
59
+ `Supported models for multimodal mode:\n` +
60
+ ` ${multimodalModels.join('\n ')}\n` +
61
+ `\n` +
62
+ `Example:\n` +
63
+ ` raglite ingest <path> --mode multimodal --model Xenova/clip-vit-base-patch32\n`, EXIT_CODES.INVALID_ARGUMENTS);
64
+ }
65
+ }
66
+ }
67
+ // Log the final configuration
68
+ console.log('✅ Mode configuration validated successfully');
69
+ if (mode !== 'text') {
70
+ console.log(` Mode: ${mode}`);
71
+ }
72
+ if (model) {
73
+ console.log(` Model: ${model}`);
74
+ }
75
+ }
76
+ /**
77
+ * Run document ingestion from CLI
78
+ * @param path - File or directory path to ingest
79
+ * @param options - CLI options including model selection
80
+ */
81
+ export async function runIngest(path, options = {}) {
82
+ try {
83
+ // Handle --rebuild-if-needed flag immediately to prevent dimension mismatch error
84
+ // Validate path exists
85
+ const resolvedPath = resolve(path);
86
+ if (!existsSync(resolvedPath)) {
87
+ console.error(`Error: Path does not exist: ${path}`);
88
+ console.error('');
89
+ console.error('Please check:');
90
+ console.error('- The path is spelled correctly');
91
+ console.error('- You have read permissions for the path');
92
+ console.error('- The file or directory exists');
93
+ console.error('');
94
+ console.error('Examples:');
95
+ console.error(' raglite ingest ./docs/ # Ingest directory');
96
+ console.error(' raglite ingest ./readme.md # Ingest single file');
97
+ process.exit(EXIT_CODES.FILE_NOT_FOUND);
98
+ }
99
+ // Check if it's a file or directory
100
+ let stats;
101
+ try {
102
+ stats = statSync(resolvedPath);
103
+ }
104
+ catch (error) {
105
+ console.error(`Error: Cannot access path: ${path}`);
106
+ console.error('');
107
+ if (error instanceof Error && error.message.includes('EACCES')) {
108
+ console.error('Permission denied. Please check that you have read permissions for this path.');
109
+ process.exit(EXIT_CODES.PERMISSION_ERROR);
110
+ }
111
+ else {
112
+ console.error('The path exists but cannot be accessed. Please check permissions.');
113
+ process.exit(EXIT_CODES.FILE_NOT_FOUND);
114
+ }
115
+ }
116
+ const pathType = stats.isDirectory() ? 'directory' : 'file';
117
+ // Validate file type for single files
118
+ if (stats.isFile()) {
119
+ const mode = options.mode || 'text';
120
+ // Only formats with actual processing implementations
121
+ const textExtensions = ['.md', '.txt', '.mdx', '.pdf', '.docx'];
122
+ const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'];
123
+ const validExtensions = mode === 'multimodal'
124
+ ? [...textExtensions, ...imageExtensions]
125
+ : textExtensions;
126
+ const hasValidExtension = validExtensions.some(ext => path.toLowerCase().endsWith(ext));
127
+ if (!hasValidExtension) {
128
+ console.error(`Error: Unsupported file type: ${path}`);
129
+ console.error('');
130
+ if (mode === 'multimodal') {
131
+ console.error('Supported file types in multimodal mode:');
132
+ console.error(' Text: .md, .txt, .mdx');
133
+ console.error(' Documents: .pdf, .docx');
134
+ console.error(' Images: .jpg, .jpeg, .png, .gif, .webp, .bmp');
135
+ }
136
+ else {
137
+ console.error('Supported file types in text mode:');
138
+ console.error(' Text: .md, .txt, .mdx');
139
+ console.error(' Documents: .pdf, .docx');
140
+ console.error('');
141
+ console.error('For image files, use --mode multimodal:');
142
+ console.error(' raglite ingest <path> --mode multimodal');
143
+ }
144
+ console.error('');
145
+ console.error('If you want to ingest multiple files, provide a directory path instead.');
146
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
147
+ }
148
+ }
149
+ console.log(`Starting ingestion of ${pathType}: ${resolvedPath}`);
150
+ console.log('This may take a while for large document collections...');
151
+ console.log('');
152
+ // Prepare factory options
153
+ const factoryOptions = {};
154
+ if (options.model) {
155
+ factoryOptions.embeddingModel = options.model;
156
+ console.log(`Using embedding model: ${options.model}`);
157
+ }
158
+ if (options.mode) {
159
+ factoryOptions.mode = options.mode;
160
+ console.log(`Using processing mode: ${options.mode}`);
161
+ }
162
+ if (options.rebuildIfNeeded) {
163
+ factoryOptions.forceRebuild = true;
164
+ console.log('Force rebuild enabled due to rebuildIfNeeded option');
165
+ // Delete old index file immediately to prevent dimension mismatch errors
166
+ const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
167
+ const { existsSync, unlinkSync } = await import('fs');
168
+ if (existsSync(indexPath)) {
169
+ try {
170
+ unlinkSync(indexPath);
171
+ console.log('🗑️ Removed old index file to prevent dimension mismatch');
172
+ }
173
+ catch (error) {
174
+ console.warn(`⚠️ Could not remove old index file: ${error}`);
175
+ }
176
+ }
177
+ }
178
+ // Validate mode-specific model and strategy combinations
179
+ await validateModeConfiguration(factoryOptions);
180
+ const dbPath = process.env.RAG_DB_FILE || './db.sqlite';
181
+ const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
182
+ // Setup graceful cleanup
183
+ setupCLICleanup(dbPath);
184
+ // Check if database is busy before starting
185
+ const busyStatus = await isDatabaseBusy(dbPath);
186
+ if (busyStatus.isBusy) {
187
+ console.log('⚠️ Database appears to be in use by another process');
188
+ console.log(` Reason: ${busyStatus.reason}`);
189
+ console.log(' Attempting to proceed anyway...');
190
+ console.log('');
191
+ }
192
+ // Create ingestion pipeline using factory
193
+ let pipeline;
194
+ try {
195
+ // Create ingestion pipeline using IngestionFactory with database protection
196
+ pipeline = await withCLIDatabaseAccess(dbPath, () => IngestionFactory.create(dbPath, indexPath, factoryOptions), {
197
+ commandName: 'Ingestion command',
198
+ showProgress: true,
199
+ maxWaitMs: 15000 // Longer timeout for ingestion
200
+ });
201
+ const result = await pipeline.ingestPath(resolvedPath, { mode: factoryOptions.mode });
202
+ // Display final results
203
+ console.log('\n' + '='.repeat(50));
204
+ console.log('INGESTION SUMMARY');
205
+ console.log('='.repeat(50));
206
+ console.log(`Documents processed: ${result.documentsProcessed}`);
207
+ console.log(`Chunks created: ${result.chunksCreated}`);
208
+ console.log(`Embeddings generated: ${result.embeddingsGenerated}`);
209
+ if (result.documentErrors > 0) {
210
+ console.log(`Document errors: ${result.documentErrors}`);
211
+ }
212
+ if (result.embeddingErrors > 0) {
213
+ console.log(`Embedding errors: ${result.embeddingErrors}`);
214
+ }
215
+ console.log(`Total processing time: ${(result.processingTimeMs / 1000).toFixed(2)} seconds`);
216
+ if (result.chunksCreated > 0) {
217
+ const chunksPerSecond = (result.chunksCreated / (result.processingTimeMs / 1000)).toFixed(1);
218
+ console.log(`Processing rate: ${chunksPerSecond} chunks/second`);
219
+ }
220
+ console.log('\nIngestion completed successfully!');
221
+ // Display mode-specific information
222
+ const mode = options.mode || 'text';
223
+ if (mode === 'multimodal') {
224
+ console.log('✨ Multimodal mode enabled - you can now search across text and image content');
225
+ }
226
+ console.log('You can now search your documents using: raglite search "your query"');
227
+ console.log('');
228
+ console.log('💡 The search command will automatically detect and use the ingestion mode.');
229
+ }
230
+ finally {
231
+ if (pipeline) {
232
+ await pipeline.cleanup();
233
+ }
234
+ // Ensure clean exit for CLI commands
235
+ const { DatabaseConnectionManager } = await import('../core/database-connection-manager.js');
236
+ await DatabaseConnectionManager.closeAllConnections();
237
+ // Force exit for CLI commands to prevent hanging
238
+ setTimeout(() => {
239
+ process.exit(0);
240
+ }, 100);
241
+ }
242
+ }
243
+ catch (error) {
244
+ console.error('\n' + '='.repeat(50));
245
+ console.error('INGESTION FAILED');
246
+ console.error('='.repeat(50));
247
+ if (error instanceof ConfigurationError) {
248
+ console.error('Configuration Error:');
249
+ console.error(error.message);
250
+ process.exit(error.exitCode);
251
+ }
252
+ else if (error instanceof Error) {
253
+ console.error('Error:', error.message);
254
+ console.error('');
255
+ // Provide specific help for common error types
256
+ if (error.message.includes('ENOENT')) {
257
+ console.error('File/Directory Not Found:');
258
+ console.error('- Verify the path exists and is accessible');
259
+ console.error('- Check file and directory permissions');
260
+ console.error('- Ensure you have read access to the specified location');
261
+ process.exit(EXIT_CODES.FILE_NOT_FOUND);
262
+ }
263
+ else if (error.message.includes('EACCES') || error.message.includes('permission')) {
264
+ console.error('Permission Denied:');
265
+ console.error('- Check that you have read permissions for the files/directories');
266
+ console.error('- Ensure the database file is not locked by another process');
267
+ console.error('- Try running with appropriate permissions');
268
+ process.exit(EXIT_CODES.PERMISSION_ERROR);
269
+ }
270
+ else if (error.message.includes('SQLITE') || error.message.includes('database')) {
271
+ console.error('Database Error:');
272
+ console.error('- The database file may be corrupted or locked');
273
+ console.error('- Try running: raglite rebuild');
274
+ console.error('- Ensure no other RAG-lite processes are running');
275
+ console.error('- Check available disk space');
276
+ process.exit(EXIT_CODES.DATABASE_ERROR);
277
+ }
278
+ else if (error.message.includes('ONNX') || error.message.includes('model')) {
279
+ console.error('Model Loading Error:');
280
+ console.error('- The embedding model failed to load');
281
+ console.error('- This may happen on first run while downloading the model');
282
+ console.error('- Ensure you have internet connection for initial model download');
283
+ console.error('- Check available disk space in the models directory');
284
+ console.error('- Try running the command again');
285
+ process.exit(EXIT_CODES.MODEL_ERROR);
286
+ }
287
+ else if (error.message.includes('index') || error.message.includes('vector')) {
288
+ console.error('Vector Index Error:');
289
+ console.error('- The vector index may be corrupted');
290
+ console.error('- Try running: raglite rebuild');
291
+ console.error('- Check available disk space');
292
+ process.exit(EXIT_CODES.INDEX_ERROR);
293
+ }
294
+ else {
295
+ console.error('General Error:');
296
+ console.error('- An unexpected error occurred during ingestion');
297
+ console.error('- Check the error message above for more details');
298
+ console.error('- Try running the command again');
299
+ console.error('- If the problem persists, try: raglite rebuild');
300
+ process.exit(EXIT_CODES.GENERAL_ERROR);
301
+ }
302
+ }
303
+ else {
304
+ console.error('Unknown error:', String(error));
305
+ process.exit(EXIT_CODES.GENERAL_ERROR);
306
+ }
307
+ }
308
+ }
309
+ /**
310
+ * Run index rebuild from CLI
311
+ */
312
+ export async function runRebuild() {
313
+ try {
314
+ console.log('Starting index rebuild...');
315
+ console.log('This will regenerate all embeddings and rebuild the vector index.');
316
+ console.log('This may take a while depending on your document collection size.');
317
+ console.log('');
318
+ console.log('Progress will be shown below...');
319
+ console.log('');
320
+ // Detect mode from existing database for rebuild
321
+ const dbPath = process.env.RAG_DB_FILE || './db.sqlite';
322
+ const indexPath = process.env.RAG_INDEX_FILE || './vector-index.bin';
323
+ let rebuildOptions = { forceRebuild: true };
324
+ if (existsSync(dbPath)) {
325
+ try {
326
+ // Import mode detection service
327
+ const { ModeDetectionService } = await import('../core/mode-detection-service.js');
328
+ const modeService = new ModeDetectionService(dbPath);
329
+ const systemInfo = await modeService.detectMode();
330
+ console.log(`🎯 Detected existing configuration:`);
331
+ console.log(` Mode: ${systemInfo.mode}`);
332
+ console.log(` Model: ${systemInfo.modelName}`);
333
+ console.log(` Reranking: ${systemInfo.rerankingStrategy}`);
334
+ console.log('');
335
+ // Use the detected configuration for rebuild
336
+ rebuildOptions.mode = systemInfo.mode;
337
+ rebuildOptions.embeddingModel = systemInfo.modelName;
338
+ rebuildOptions.rerankingStrategy = systemInfo.rerankingStrategy;
339
+ }
340
+ catch (error) {
341
+ console.warn('⚠️ Could not detect existing mode configuration, using defaults');
342
+ console.warn(` Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
343
+ }
344
+ }
345
+ // Create ingestion pipeline with force rebuild using factory
346
+ const pipeline = await IngestionFactory.create(dbPath, indexPath, rebuildOptions);
347
+ try {
348
+ // Get all documents from database and re-ingest them
349
+ const { openDatabase } = await import('../core/db.js');
350
+ const db = await openDatabase(dbPath);
351
+ try {
352
+ const documents = await db.all('SELECT DISTINCT source FROM documents ORDER BY source');
353
+ if (documents.length === 0) {
354
+ throw new Error('No documents found in database. Nothing to rebuild.');
355
+ }
356
+ console.log(`Found ${documents.length} document${documents.length === 1 ? '' : 's'} to rebuild`);
357
+ let totalResult = {
358
+ documentsProcessed: 0,
359
+ chunksCreated: 0,
360
+ embeddingsGenerated: 0,
361
+ documentErrors: 0,
362
+ embeddingErrors: 0,
363
+ processingTimeMs: 0
364
+ };
365
+ // Re-ingest each document
366
+ for (const doc of documents) {
367
+ if (existsSync(doc.source)) {
368
+ console.log(`Rebuilding: ${doc.source}`);
369
+ const result = await pipeline.ingestFile(doc.source);
370
+ totalResult.documentsProcessed += result.documentsProcessed;
371
+ totalResult.chunksCreated += result.chunksCreated;
372
+ totalResult.embeddingsGenerated += result.embeddingsGenerated;
373
+ totalResult.documentErrors += result.documentErrors;
374
+ totalResult.embeddingErrors += result.embeddingErrors;
375
+ totalResult.processingTimeMs += result.processingTimeMs;
376
+ }
377
+ else {
378
+ console.warn(`⚠️ Document not found, skipping: ${doc.source}`);
379
+ totalResult.documentErrors++;
380
+ }
381
+ }
382
+ console.log('\n' + '='.repeat(50));
383
+ console.log('REBUILD COMPLETE');
384
+ console.log('='.repeat(50));
385
+ console.log(`Documents processed: ${totalResult.documentsProcessed}`);
386
+ console.log(`Chunks created: ${totalResult.chunksCreated}`);
387
+ console.log(`Embeddings generated: ${totalResult.embeddingsGenerated}`);
388
+ if (totalResult.documentErrors > 0) {
389
+ console.log(`Document errors: ${totalResult.documentErrors}`);
390
+ }
391
+ if (totalResult.embeddingErrors > 0) {
392
+ console.log(`Embedding errors: ${totalResult.embeddingErrors}`);
393
+ }
394
+ console.log(`Total processing time: ${(totalResult.processingTimeMs / 1000).toFixed(2)} seconds`);
395
+ console.log('');
396
+ console.log('The vector index has been successfully rebuilt.');
397
+ console.log('All embeddings have been regenerated with the current model.');
398
+ console.log('');
399
+ console.log('You can now search your documents using: raglite search "your query"');
400
+ }
401
+ finally {
402
+ await db.close();
403
+ }
404
+ }
405
+ finally {
406
+ await pipeline.cleanup();
407
+ // Ensure clean exit for CLI commands
408
+ const { DatabaseConnectionManager } = await import('../core/database-connection-manager.js');
409
+ await DatabaseConnectionManager.closeAllConnections();
410
+ // Force exit for CLI commands to prevent hanging
411
+ setTimeout(() => {
412
+ process.exit(0);
413
+ }, 100);
414
+ }
415
+ }
416
+ catch (error) {
417
+ console.error('\n' + '='.repeat(50));
418
+ console.error('REBUILD FAILED');
419
+ console.error('='.repeat(50));
420
+ if (error instanceof ConfigurationError) {
421
+ console.error('Configuration Error:');
422
+ console.error(error.message);
423
+ process.exit(error.exitCode);
424
+ }
425
+ else if (error instanceof Error) {
426
+ console.error('Error:', error.message);
427
+ console.error('');
428
+ if (error.message.includes('No documents found') || error.message.includes('empty')) {
429
+ console.error('No Documents Found:');
430
+ console.error('- Make sure you have ingested documents first');
431
+ console.error('- Run: raglite ingest <path>');
432
+ console.error('- Check that your documents are in supported formats (.md, .txt)');
433
+ process.exit(EXIT_CODES.FILE_NOT_FOUND);
434
+ }
435
+ else if (error.message.includes('SQLITE') || error.message.includes('database')) {
436
+ console.error('Database Error:');
437
+ console.error('- The database file may be missing or corrupted');
438
+ console.error('- Try deleting db.sqlite and re-ingesting your documents');
439
+ console.error('- Ensure you have write permissions in the current directory');
440
+ process.exit(EXIT_CODES.DATABASE_ERROR);
441
+ }
442
+ else if (error.message.includes('model') || error.message.includes('ONNX')) {
443
+ console.error('Model Error:');
444
+ console.error('- The embedding model failed to load');
445
+ console.error('- Ensure you have internet connection for model download');
446
+ console.error('- Check available disk space');
447
+ console.error('- Try deleting the models directory and running again');
448
+ process.exit(EXIT_CODES.MODEL_ERROR);
449
+ }
450
+ else if (error.message.includes('index') || error.message.includes('vector')) {
451
+ console.error('Index Error:');
452
+ console.error('- Failed to rebuild the vector index');
453
+ console.error('- Try deleting vector-index.bin and running again');
454
+ console.error('- Check available disk space');
455
+ process.exit(EXIT_CODES.INDEX_ERROR);
456
+ }
457
+ else {
458
+ console.error('General Error:');
459
+ console.error('- An unexpected error occurred during rebuild');
460
+ console.error('- Try deleting db.sqlite and vector-index.bin, then re-ingest');
461
+ console.error('- Check available disk space and permissions');
462
+ process.exit(EXIT_CODES.GENERAL_ERROR);
463
+ }
464
+ }
465
+ else {
466
+ console.error('Unknown error:', String(error));
467
+ process.exit(EXIT_CODES.GENERAL_ERROR);
468
+ }
469
+ }
470
+ }
471
+ //# sourceMappingURL=indexer.js.map
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Run search from CLI
3
+ * @param query - Search query string or image file path
4
+ * @param options - CLI options including top-k and rerank settings
5
+ */
6
+ export declare function runSearch(query: string, options?: Record<string, any>): Promise<void>;
7
+ //# sourceMappingURL=search.d.ts.map