rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,287 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ /**
6
+ * Core search pipeline coordinator
7
+ * Orchestrates the search pipeline: query processing → vector search → metadata retrieval → optional reranking
8
+ * Remains completely independent of specific embedding models or transformer libraries
9
+ */
10
+ export class SearchPipelineCoordinator {
11
+ embedQueryFn = null;
12
+ rerankResultsFn = null;
13
+ indexManager = null;
14
+ dbConnection = null;
15
+ defaultContentType = 'text';
16
+ /**
17
+ * Set the embedding function for query processing
18
+ */
19
+ setEmbedFunction(embedFn) {
20
+ this.embedQueryFn = embedFn;
21
+ }
22
+ /**
23
+ * Set the reranking function for result reranking
24
+ */
25
+ setRerankFunction(rerankFn) {
26
+ this.rerankResultsFn = rerankFn;
27
+ }
28
+ /**
29
+ * Set the index manager for vector search
30
+ */
31
+ setIndexManager(indexManager) {
32
+ this.indexManager = indexManager;
33
+ }
34
+ /**
35
+ * Set the database connection for metadata retrieval
36
+ */
37
+ setDatabaseConnection(dbConnection) {
38
+ this.dbConnection = dbConnection;
39
+ }
40
+ /**
41
+ * Set the default content type
42
+ */
43
+ setDefaultContentType(contentType) {
44
+ this.defaultContentType = contentType;
45
+ }
46
+ /**
47
+ * Execute the complete search pipeline
48
+ * Coordinates all steps without knowledge of specific embedding models
49
+ */
50
+ async executeSearchPipeline(query, options = {}) {
51
+ if (!query || query.trim().length === 0) {
52
+ return [];
53
+ }
54
+ const startTime = performance.now();
55
+ const topK = options.top_k || 10;
56
+ const shouldRerank = options.rerank !== undefined ? options.rerank : (this.rerankResultsFn !== null);
57
+ const contentType = options.contentType || this.defaultContentType;
58
+ // Validate dependencies
59
+ this.validateDependencies();
60
+ try {
61
+ // Step 1: Query processing and embedding
62
+ const embeddingStartTime = performance.now();
63
+ const queryEmbedding = await this.embedQuery(query, contentType);
64
+ const embeddingTime = performance.now() - embeddingStartTime;
65
+ // Step 2: Vector search
66
+ const searchStartTime = performance.now();
67
+ const searchResult = await this.vectorSearch(queryEmbedding.vector, topK);
68
+ const vectorSearchTime = performance.now() - searchStartTime;
69
+ if (searchResult.embeddingIds.length === 0) {
70
+ const totalTime = performance.now() - startTime;
71
+ console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
72
+ return [];
73
+ }
74
+ // Step 3: Metadata retrieval
75
+ const retrievalStartTime = performance.now();
76
+ const chunks = await this.retrieveMetadata(searchResult.embeddingIds);
77
+ const retrievalTime = performance.now() - retrievalStartTime;
78
+ // Step 4: Format initial results
79
+ let results = this.formatResults(chunks, searchResult.distances, searchResult.embeddingIds);
80
+ // Step 5: Optional reranking
81
+ let rerankTime = 0;
82
+ if (shouldRerank && this.rerankResultsFn && results.length > 1) {
83
+ try {
84
+ const rerankStartTime = performance.now();
85
+ results = await this.rerankResults(query, results, contentType);
86
+ rerankTime = performance.now() - rerankStartTime;
87
+ }
88
+ catch (error) {
89
+ console.warn(`Reranking failed, using vector search results: ${error instanceof Error ? error.message : 'Unknown error'}`);
90
+ }
91
+ }
92
+ const totalTime = performance.now() - startTime;
93
+ console.log(`Search pipeline completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
94
+ `(embed: ${embeddingTime.toFixed(2)}ms, vector: ${vectorSearchTime.toFixed(2)}ms, ` +
95
+ `retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
96
+ return results;
97
+ }
98
+ catch (error) {
99
+ throw new Error(`Search pipeline failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
100
+ }
101
+ }
102
+ /**
103
+ * Step 1: Process and embed the query
104
+ * Uses injected embedding function without knowledge of specific models
105
+ */
106
+ async embedQuery(query, contentType) {
107
+ if (!this.embedQueryFn) {
108
+ throw new Error('No embedding function provided. Set embedding function before executing pipeline.');
109
+ }
110
+ try {
111
+ return await this.embedQueryFn(query, contentType);
112
+ }
113
+ catch (error) {
114
+ throw new Error(`Query embedding failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
115
+ }
116
+ }
117
+ /**
118
+ * Step 2: Perform vector search
119
+ * Uses index manager without knowledge of specific embedding models
120
+ */
121
+ async vectorSearch(queryVector, topK) {
122
+ if (!this.indexManager) {
123
+ throw new Error('Index manager not set. Set index manager before executing pipeline.');
124
+ }
125
+ try {
126
+ return this.indexManager.search(queryVector, topK);
127
+ }
128
+ catch (error) {
129
+ if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
130
+ console.warn(`Hash mapping issue detected: ${error.message}`);
131
+ console.warn('This may indicate index/database synchronization issues. Consider running: raglite rebuild');
132
+ return { embeddingIds: [], distances: [] };
133
+ }
134
+ throw new Error(`Vector search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
135
+ }
136
+ }
137
+ /**
138
+ * Step 3: Retrieve metadata from database
139
+ * Uses database connection without knowledge of specific data formats
140
+ */
141
+ async retrieveMetadata(embeddingIds) {
142
+ if (!this.dbConnection) {
143
+ throw new Error('Database connection not set. Set database connection before executing pipeline.');
144
+ }
145
+ try {
146
+ const { getChunksByEmbeddingIds } = await import('./db.js');
147
+ return await getChunksByEmbeddingIds(this.dbConnection, embeddingIds);
148
+ }
149
+ catch (error) {
150
+ throw new Error(`Metadata retrieval failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
151
+ }
152
+ }
153
+ /**
154
+ * Step 4: Format initial results
155
+ * Formats results in core format without knowledge of specific content types
156
+ */
157
+ formatResults(chunks, distances, embeddingIds) {
158
+ const results = [];
159
+ // Create a map for quick chunk lookup by embedding_id
160
+ const chunkMap = new Map();
161
+ chunks.forEach(chunk => {
162
+ chunkMap.set(chunk.embedding_id, chunk);
163
+ });
164
+ // Build results in the order of search results
165
+ for (let i = 0; i < embeddingIds.length; i++) {
166
+ const embeddingId = embeddingIds[i];
167
+ const chunk = chunkMap.get(embeddingId);
168
+ if (chunk) {
169
+ // Convert cosine distance to similarity score (1 - distance)
170
+ const score = Math.max(0, 1 - distances[i]);
171
+ results.push({
172
+ content: chunk.text,
173
+ score: score,
174
+ contentType: chunk.content_type || this.defaultContentType,
175
+ document: {
176
+ id: chunk.document_id,
177
+ source: chunk.document_source,
178
+ title: chunk.document_title,
179
+ contentType: chunk.document_content_type || this.defaultContentType
180
+ },
181
+ metadata: chunk.metadata ? this.parseMetadata(chunk.metadata) : undefined
182
+ });
183
+ }
184
+ }
185
+ return results;
186
+ }
187
+ /**
188
+ * Step 5: Optional reranking
189
+ * Uses injected reranking function without knowledge of specific models
190
+ */
191
+ async rerankResults(query, results, contentType) {
192
+ if (!this.rerankResultsFn) {
193
+ return results; // No reranking function available
194
+ }
195
+ try {
196
+ return await this.rerankResultsFn(query, results, contentType);
197
+ }
198
+ catch (error) {
199
+ console.warn(`Reranking failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
200
+ return results; // Return original results on reranking failure
201
+ }
202
+ }
203
+ /**
204
+ * Validate that all required dependencies are set
205
+ */
206
+ validateDependencies() {
207
+ const missing = [];
208
+ if (!this.embedQueryFn) {
209
+ missing.push('embedding function');
210
+ }
211
+ if (!this.indexManager) {
212
+ missing.push('index manager');
213
+ }
214
+ if (!this.dbConnection) {
215
+ missing.push('database connection');
216
+ }
217
+ if (missing.length > 0) {
218
+ throw new Error(`Missing required dependencies: ${missing.join(', ')}. Set all dependencies before executing search pipeline.`);
219
+ }
220
+ }
221
+ /**
222
+ * Parse metadata safely
223
+ */
224
+ parseMetadata(metadata) {
225
+ try {
226
+ return JSON.parse(metadata);
227
+ }
228
+ catch (error) {
229
+ console.warn(`Failed to parse metadata: ${metadata}`);
230
+ return undefined;
231
+ }
232
+ }
233
+ /**
234
+ * Check if the pipeline is ready to execute
235
+ */
236
+ isReady() {
237
+ return !!(this.embedQueryFn && this.indexManager && this.dbConnection);
238
+ }
239
+ /**
240
+ * Get pipeline status information
241
+ */
242
+ getStatus() {
243
+ return {
244
+ hasEmbedFunction: this.embedQueryFn !== null,
245
+ hasRerankFunction: this.rerankResultsFn !== null,
246
+ hasIndexManager: this.indexManager !== null,
247
+ hasDatabaseConnection: this.dbConnection !== null,
248
+ isReady: this.isReady()
249
+ };
250
+ }
251
+ /**
252
+ * Reset all dependencies (useful for testing or reconfiguration)
253
+ */
254
+ reset() {
255
+ this.embedQueryFn = null;
256
+ this.rerankResultsFn = null;
257
+ this.indexManager = null;
258
+ this.dbConnection = null;
259
+ this.defaultContentType = 'text';
260
+ }
261
+ }
262
+ /**
263
+ * Factory for creating search pipeline coordinators
264
+ */
265
+ export class SearchPipelineFactory {
266
+ /**
267
+ * Create a search pipeline coordinator with all dependencies
268
+ */
269
+ static create(embedFn, indexManager, dbConnection, rerankFn, defaultContentType = 'text') {
270
+ const coordinator = new SearchPipelineCoordinator();
271
+ coordinator.setEmbedFunction(embedFn);
272
+ coordinator.setIndexManager(indexManager);
273
+ coordinator.setDatabaseConnection(dbConnection);
274
+ coordinator.setDefaultContentType(defaultContentType);
275
+ if (rerankFn) {
276
+ coordinator.setRerankFunction(rerankFn);
277
+ }
278
+ return coordinator;
279
+ }
280
+ /**
281
+ * Create an empty coordinator for manual configuration
282
+ */
283
+ static createEmpty() {
284
+ return new SearchPipelineCoordinator();
285
+ }
286
+ }
287
+ //# sourceMappingURL=search-pipeline.js.map
@@ -0,0 +1,141 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ import { IndexManager } from '../index-manager.js';
6
+ import { DatabaseConnection } from './db.js';
7
+ import type { SearchResult, SearchOptions } from './types.js';
8
+ import type { EmbedFunction, RerankFunction } from './interfaces.js';
9
+ /**
10
+ * Search engine that provides semantic search capabilities
11
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
12
+ * Uses explicit dependency injection for clean architecture
13
+ */
14
+ export declare class SearchEngine {
15
+ private embedFn;
16
+ private indexManager;
17
+ private db;
18
+ private rerankFn?;
19
+ private contentResolver?;
20
+ /**
21
+ * Creates a new SearchEngine with explicit dependency injection
22
+ *
23
+ * DEPENDENCY INJECTION PATTERN:
24
+ * This constructor requires all dependencies to be explicitly provided, enabling:
25
+ * - Clean separation between core logic and implementation-specific components
26
+ * - Support for different embedding models (text-only, multimodal, custom)
27
+ * - Testability through mock injection
28
+ * - Future extensibility without core changes
29
+ *
30
+ * @param embedFn - Function to embed queries into vectors
31
+ * - Signature: (query: string, contentType?: string) => Promise<EmbeddingResult>
32
+ * - Examples:
33
+ * - Text: const embedFn = (query) => textEmbedder.embedSingle(query)
34
+ * - Multimodal: const embedFn = (query, type) => type === 'image' ? clipEmbedder.embedImage(query) : clipEmbedder.embedText(query)
35
+ * - Custom: const embedFn = (query) => customModel.embed(query)
36
+ *
37
+ * @param indexManager - Vector index manager for similarity search
38
+ * - Handles vector storage and retrieval operations
39
+ * - Works with any embedding dimensions (384, 512, 768, etc.)
40
+ * - Example: new IndexManager('./index.bin')
41
+ *
42
+ * @param db - Database connection for metadata retrieval
43
+ * - Provides access to document and chunk metadata
44
+ * - Supports different content types through metadata fields
45
+ * - Example: await openDatabase('./db.sqlite')
46
+ *
47
+ * @param rerankFn - Optional function to rerank search results
48
+ * - Signature: (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>
49
+ * - Examples:
50
+ * - Text: const rerankFn = (query, results) => textReranker.rerank(query, results)
51
+ * - Custom: const rerankFn = (query, results) => customReranker.rerank(query, results)
52
+ * - Disabled: undefined (no reranking)
53
+ *
54
+ * USAGE EXAMPLES:
55
+ * ```typescript
56
+ * // Text-only search engine
57
+ * const textEmbedFn = createTextEmbedFunction();
58
+ * const textRerankFn = createTextRerankFunction();
59
+ * const indexManager = new IndexManager('./index.bin');
60
+ * const db = await openDatabase('./db.sqlite');
61
+ * const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
62
+ *
63
+ * // Search engine without reranking
64
+ * const search = new SearchEngine(textEmbedFn, indexManager, db);
65
+ *
66
+ * // Custom embedding implementation
67
+ * const customEmbedFn = async (query) => ({
68
+ * embedding_id: generateId(),
69
+ * vector: await myCustomModel.embed(query)
70
+ * });
71
+ * const search = new SearchEngine(customEmbedFn, indexManager, db);
72
+ * ```
73
+ */
74
+ constructor(embedFn: EmbedFunction, indexManager: IndexManager, db: DatabaseConnection, rerankFn?: RerankFunction | undefined, contentResolver?: import('./content-resolver.js').ContentResolver);
75
+ /**
76
+ * Perform semantic search on the indexed documents
77
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
78
+ * @param query - Search query string
79
+ * @param options - Search options including top_k and rerank settings
80
+ * @returns Promise resolving to array of search results
81
+ */
82
+ search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
83
+ /**
84
+ * Perform semantic search using a pre-computed embedding vector
85
+ * Useful for image-based search or when embedding is computed externally
86
+ * @param queryVector - Pre-computed query embedding vector
87
+ * @param options - Search options including top_k and rerank settings
88
+ * @param originalQuery - Optional original query for reranking (text or image path)
89
+ * @param embeddingTime - Optional embedding time for logging
90
+ * @returns Promise resolving to array of search results
91
+ */
92
+ searchWithVector(queryVector: Float32Array, options?: SearchOptions, originalQuery?: string, embeddingTime?: number): Promise<SearchResult[]>;
93
+ /**
94
+ * Format search results with proper structure
95
+ * @param chunks - Database chunks with metadata
96
+ * @param distances - Similarity distances from vector search
97
+ * @param embeddingIds - Embedding IDs in search result order
98
+ * @returns Formatted search results
99
+ */
100
+ private formatSearchResults;
101
+ /**
102
+ * Get search engine statistics
103
+ * @returns Object with current search engine stats
104
+ */
105
+ getStats(): Promise<{
106
+ totalChunks: number;
107
+ indexSize: number;
108
+ rerankingEnabled: boolean;
109
+ }>;
110
+ /**
111
+ * Retrieve content by ID in the specified format
112
+ * @param contentId - Content ID to retrieve
113
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
114
+ * @returns Promise that resolves to content in requested format
115
+ */
116
+ getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
117
+ /**
118
+ * Retrieve multiple content items efficiently in batch
119
+ * @param contentIds - Array of content IDs to retrieve
120
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
121
+ * @returns Promise that resolves to array of content in requested format
122
+ */
123
+ getContentBatch(contentIds: string[], format?: 'file' | 'base64'): Promise<string[]>;
124
+ /**
125
+ * Retrieve content metadata for result enhancement
126
+ * @param contentId - Content ID to get metadata for
127
+ * @returns Promise that resolves to content metadata
128
+ */
129
+ getContentMetadata(contentId: string): Promise<import('./content-resolver.js').ContentMetadata>;
130
+ /**
131
+ * Verify that content exists and is accessible
132
+ * @param contentId - Content ID to verify
133
+ * @returns Promise that resolves to true if content exists, false otherwise
134
+ */
135
+ verifyContentExists(contentId: string): Promise<boolean>;
136
+ /**
137
+ * Clean up resources - explicit cleanup method
138
+ */
139
+ cleanup(): Promise<void>;
140
+ }
141
+ //# sourceMappingURL=search.d.ts.map