rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,320 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ import { getChunksByEmbeddingIds } from './db.js';
6
+ import { config } from './config.js';
7
+ import { createMissingDependencyError } from './actionable-error-messages.js';
8
+ /**
9
+ * Search engine that provides semantic search capabilities
10
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
11
+ * Uses explicit dependency injection for clean architecture
12
+ */
13
+ export class SearchEngine {
14
+ embedFn;
15
+ indexManager;
16
+ db;
17
+ rerankFn;
18
+ contentResolver;
19
+ /**
20
+ * Creates a new SearchEngine with explicit dependency injection
21
+ *
22
+ * DEPENDENCY INJECTION PATTERN:
23
+ * This constructor requires all dependencies to be explicitly provided, enabling:
24
+ * - Clean separation between core logic and implementation-specific components
25
+ * - Support for different embedding models (text-only, multimodal, custom)
26
+ * - Testability through mock injection
27
+ * - Future extensibility without core changes
28
+ *
29
+ * @param embedFn - Function to embed queries into vectors
30
+ * - Signature: (query: string, contentType?: string) => Promise<EmbeddingResult>
31
+ * - Examples:
32
+ * - Text: const embedFn = (query) => textEmbedder.embedSingle(query)
33
+ * - Multimodal: const embedFn = (query, type) => type === 'image' ? clipEmbedder.embedImage(query) : clipEmbedder.embedText(query)
34
+ * - Custom: const embedFn = (query) => customModel.embed(query)
35
+ *
36
+ * @param indexManager - Vector index manager for similarity search
37
+ * - Handles vector storage and retrieval operations
38
+ * - Works with any embedding dimensions (384, 512, 768, etc.)
39
+ * - Example: new IndexManager('./index.bin')
40
+ *
41
+ * @param db - Database connection for metadata retrieval
42
+ * - Provides access to document and chunk metadata
43
+ * - Supports different content types through metadata fields
44
+ * - Example: await openDatabase('./db.sqlite')
45
+ *
46
+ * @param rerankFn - Optional function to rerank search results
47
+ * - Signature: (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>
48
+ * - Examples:
49
+ * - Text: const rerankFn = (query, results) => textReranker.rerank(query, results)
50
+ * - Custom: const rerankFn = (query, results) => customReranker.rerank(query, results)
51
+ * - Disabled: undefined (no reranking)
52
+ *
53
+ * USAGE EXAMPLES:
54
+ * ```typescript
55
+ * // Text-only search engine
56
+ * const textEmbedFn = createTextEmbedFunction();
57
+ * const textRerankFn = createTextRerankFunction();
58
+ * const indexManager = new IndexManager('./index.bin');
59
+ * const db = await openDatabase('./db.sqlite');
60
+ * const search = new SearchEngine(textEmbedFn, indexManager, db, textRerankFn);
61
+ *
62
+ * // Search engine without reranking
63
+ * const search = new SearchEngine(textEmbedFn, indexManager, db);
64
+ *
65
+ * // Custom embedding implementation
66
+ * const customEmbedFn = async (query) => ({
67
+ * embedding_id: generateId(),
68
+ * vector: await myCustomModel.embed(query)
69
+ * });
70
+ * const search = new SearchEngine(customEmbedFn, indexManager, db);
71
+ * ```
72
+ */
73
+ constructor(embedFn, indexManager, db, rerankFn, contentResolver) {
74
+ this.embedFn = embedFn;
75
+ this.indexManager = indexManager;
76
+ this.db = db;
77
+ this.rerankFn = rerankFn;
78
+ // Validate required dependencies
79
+ if (!embedFn || typeof embedFn !== 'function') {
80
+ throw createMissingDependencyError('embedFn', 'function', {
81
+ operationContext: 'SearchEngine constructor'
82
+ });
83
+ }
84
+ if (!indexManager) {
85
+ throw createMissingDependencyError('indexManager', 'object', {
86
+ operationContext: 'SearchEngine constructor'
87
+ });
88
+ }
89
+ if (!db) {
90
+ throw createMissingDependencyError('db', 'object', {
91
+ operationContext: 'SearchEngine constructor'
92
+ });
93
+ }
94
+ // Initialize ContentResolver if provided, or create lazily when needed
95
+ this.contentResolver = contentResolver;
96
+ }
97
+ /**
98
+ * Perform semantic search on the indexed documents
99
+ * Implements the core search pipeline: query embedding → vector search → metadata retrieval → optional reranking
100
+ * @param query - Search query string
101
+ * @param options - Search options including top_k and rerank settings
102
+ * @returns Promise resolving to array of search results
103
+ */
104
+ async search(query, options = {}) {
105
+ if (!query || query.trim().length === 0) {
106
+ return [];
107
+ }
108
+ const startTime = performance.now();
109
+ try {
110
+ // Step 1: Build query embedding using injected embed function
111
+ const embeddingStartTime = performance.now();
112
+ const queryEmbedding = await this.embedFn(query);
113
+ const embeddingTime = performance.now() - embeddingStartTime;
114
+ // Step 2: Search with the vector
115
+ const results = await this.searchWithVector(queryEmbedding.vector, options, query, embeddingTime);
116
+ return results;
117
+ }
118
+ catch (error) {
119
+ throw new Error(`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
120
+ }
121
+ }
122
+ /**
123
+ * Perform semantic search using a pre-computed embedding vector
124
+ * Useful for image-based search or when embedding is computed externally
125
+ * @param queryVector - Pre-computed query embedding vector
126
+ * @param options - Search options including top_k and rerank settings
127
+ * @param originalQuery - Optional original query for reranking (text or image path)
128
+ * @param embeddingTime - Optional embedding time for logging
129
+ * @returns Promise resolving to array of search results
130
+ */
131
+ async searchWithVector(queryVector, options = {}, originalQuery, embeddingTime) {
132
+ const startTime = performance.now();
133
+ const topK = options.top_k || config.top_k || 10;
134
+ // Phase 1: Disable reranking by default for better performance
135
+ // Users must explicitly opt-in with --rerank flag
136
+ const shouldRerank = options.rerank === true;
137
+ try {
138
+ // Step 1: Search using IndexManager (which handles hash mapping properly)
139
+ const searchStartTime = performance.now();
140
+ let searchResult;
141
+ try {
142
+ const contentType = options.contentType;
143
+ searchResult = this.indexManager.search(queryVector, topK, contentType);
144
+ }
145
+ catch (error) {
146
+ if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
147
+ console.warn(`Hash mapping issue detected: ${error.message}`);
148
+ console.warn('This may indicate index/database synchronization issues. Consider running: raglite rebuild');
149
+ return [];
150
+ }
151
+ throw error;
152
+ }
153
+ const vectorSearchTime = performance.now() - searchStartTime;
154
+ if (searchResult.embeddingIds.length === 0) {
155
+ const totalTime = performance.now() - startTime;
156
+ console.log(`No similar documents found (${totalTime.toFixed(2)}ms total)`);
157
+ return [];
158
+ }
159
+ // Step 2: Retrieve chunks from database using embedding IDs
160
+ const retrievalStartTime = performance.now();
161
+ const chunks = await getChunksByEmbeddingIds(this.db, searchResult.embeddingIds);
162
+ const retrievalTime = performance.now() - retrievalStartTime;
163
+ // Step 3: Format results as JSON with text, score, and document metadata
164
+ let results = this.formatSearchResults(chunks, searchResult.distances, searchResult.embeddingIds);
165
+ // Step 4: Optional reranking with injected rerank function
166
+ let rerankTime = 0;
167
+ if (shouldRerank && this.rerankFn && results.length > 1 && originalQuery) {
168
+ try {
169
+ const rerankStartTime = performance.now();
170
+ results = await this.rerankFn(originalQuery, results);
171
+ rerankTime = performance.now() - rerankStartTime;
172
+ }
173
+ catch (error) {
174
+ // Fallback to vector search results and log the error
175
+ console.warn(`Reranking failed, using vector search results: ${error instanceof Error ? error.message : 'Unknown error'}`);
176
+ }
177
+ }
178
+ const totalTime = performance.now() - startTime;
179
+ // Measure latency without premature optimization - just log for monitoring
180
+ const embedTimeStr = embeddingTime !== undefined ? `embed: ${embeddingTime.toFixed(2)}ms, ` : '';
181
+ console.log(`Search completed: ${results.length} results in ${totalTime.toFixed(2)}ms ` +
182
+ `(${embedTimeStr}vector: ${vectorSearchTime.toFixed(2)}ms, ` +
183
+ `retrieval: ${retrievalTime.toFixed(2)}ms${rerankTime > 0 ? `, rerank: ${rerankTime.toFixed(2)}ms` : ''})`);
184
+ return results;
185
+ }
186
+ catch (error) {
187
+ throw new Error(`Vector search failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
188
+ }
189
+ }
190
+ /**
191
+ * Format search results with proper structure
192
+ * @param chunks - Database chunks with metadata
193
+ * @param distances - Similarity distances from vector search
194
+ * @param embeddingIds - Embedding IDs in search result order
195
+ * @returns Formatted search results
196
+ */
197
+ formatSearchResults(chunks, distances, embeddingIds) {
198
+ const results = [];
199
+ // Create a map for quick chunk lookup by embedding_id
200
+ const chunkMap = new Map();
201
+ chunks.forEach(chunk => {
202
+ chunkMap.set(chunk.embedding_id, chunk);
203
+ });
204
+ // Build results in the order of search results
205
+ for (let i = 0; i < embeddingIds.length; i++) {
206
+ const embeddingId = embeddingIds[i];
207
+ const chunk = chunkMap.get(embeddingId);
208
+ if (chunk) {
209
+ // Convert cosine distance to similarity score (1 - distance)
210
+ // hnswlib-wasm returns cosine distance, we want similarity
211
+ const score = Math.max(0, 1 - distances[i]);
212
+ results.push({
213
+ content: chunk.content,
214
+ score: score,
215
+ contentType: chunk.content_type || 'text',
216
+ document: {
217
+ id: chunk.document_id,
218
+ source: chunk.document_source,
219
+ title: chunk.document_title,
220
+ contentType: chunk.document_content_type || 'text',
221
+ contentId: chunk.document_content_id || undefined
222
+ }
223
+ });
224
+ }
225
+ }
226
+ return results;
227
+ }
228
+ /**
229
+ * Get search engine statistics
230
+ * @returns Object with current search engine stats
231
+ */
232
+ async getStats() {
233
+ const indexStats = await this.indexManager.getStats();
234
+ return {
235
+ totalChunks: indexStats.totalVectors,
236
+ indexSize: indexStats.totalVectors,
237
+ rerankingEnabled: this.rerankFn !== undefined
238
+ };
239
+ }
240
+ /**
241
+ * Retrieve content by ID in the specified format
242
+ * @param contentId - Content ID to retrieve
243
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
244
+ * @returns Promise that resolves to content in requested format
245
+ */
246
+ async getContent(contentId, format = 'file') {
247
+ // Lazy initialization of ContentResolver
248
+ if (!this.contentResolver) {
249
+ const { ContentResolver } = await import('./content-resolver.js');
250
+ this.contentResolver = new ContentResolver(this.db);
251
+ }
252
+ return this.contentResolver.getContent(contentId, format);
253
+ }
254
+ /**
255
+ * Retrieve multiple content items efficiently in batch
256
+ * @param contentIds - Array of content IDs to retrieve
257
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
258
+ * @returns Promise that resolves to array of content in requested format
259
+ */
260
+ async getContentBatch(contentIds, format = 'file') {
261
+ // Lazy initialization of ContentResolver
262
+ if (!this.contentResolver) {
263
+ const { ContentResolver } = await import('./content-resolver.js');
264
+ this.contentResolver = new ContentResolver(this.db);
265
+ }
266
+ // Convert contentIds array to ContentRequest array
267
+ const requests = contentIds.map(contentId => ({ contentId, format }));
268
+ const results = await this.contentResolver.getContentBatch(requests);
269
+ // Extract content from results, maintaining order and handling errors
270
+ return results.map(result => {
271
+ if (!result.success) {
272
+ throw new Error(`Failed to retrieve content ${result.contentId}: ${result.error}`);
273
+ }
274
+ return result.content;
275
+ });
276
+ }
277
+ /**
278
+ * Retrieve content metadata for result enhancement
279
+ * @param contentId - Content ID to get metadata for
280
+ * @returns Promise that resolves to content metadata
281
+ */
282
+ async getContentMetadata(contentId) {
283
+ // Lazy initialization of ContentResolver
284
+ if (!this.contentResolver) {
285
+ const { ContentResolver } = await import('./content-resolver.js');
286
+ this.contentResolver = new ContentResolver(this.db);
287
+ }
288
+ return this.contentResolver.getContentMetadata(contentId);
289
+ }
290
+ /**
291
+ * Verify that content exists and is accessible
292
+ * @param contentId - Content ID to verify
293
+ * @returns Promise that resolves to true if content exists, false otherwise
294
+ */
295
+ async verifyContentExists(contentId) {
296
+ // Lazy initialization of ContentResolver
297
+ if (!this.contentResolver) {
298
+ const { ContentResolver } = await import('./content-resolver.js');
299
+ this.contentResolver = new ContentResolver(this.db);
300
+ }
301
+ return this.contentResolver.verifyContentExists(contentId);
302
+ }
303
+ /**
304
+ * Clean up resources - explicit cleanup method
305
+ */
306
+ async cleanup() {
307
+ try {
308
+ // Clean up ContentResolver to prevent resource leaks
309
+ if (this.contentResolver && typeof this.contentResolver.cleanup === 'function') {
310
+ this.contentResolver.cleanup();
311
+ }
312
+ await this.db.close();
313
+ await this.indexManager.close();
314
+ }
315
+ catch (error) {
316
+ console.error('Error during SearchEngine cleanup:', error instanceof Error ? error.message : String(error));
317
+ }
318
+ }
319
+ }
320
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Streaming Operations for Large Content - Task 9.1 Implementation
3
+ * Provides memory-efficient streaming operations for content ingestion and retrieval
4
+ * Minimizes memory usage for large files through streaming algorithms
5
+ */
6
+ /**
7
+ * Progress callback for long-running operations
8
+ */
9
+ export interface ProgressCallback {
10
+ (bytesProcessed: number, totalBytes?: number): void;
11
+ }
12
+ /**
13
+ * Streaming hash calculation result
14
+ */
15
+ export interface StreamingHashResult {
16
+ hash: string;
17
+ bytesProcessed: number;
18
+ processingTimeMs: number;
19
+ }
20
+ /**
21
+ * Streaming file copy result
22
+ */
23
+ export interface StreamingCopyResult {
24
+ bytesWritten: number;
25
+ processingTimeMs: number;
26
+ hash?: string;
27
+ }
28
+ /**
29
+ * Configuration for streaming operations
30
+ */
31
+ export interface StreamingConfig {
32
+ chunkSize: number;
33
+ enableProgress: boolean;
34
+ enableHashing: boolean;
35
+ timeout: number;
36
+ }
37
+ /**
38
+ * StreamingOperations class provides memory-efficient operations for large content
39
+ */
40
+ export declare class StreamingOperations {
41
+ private config;
42
+ constructor(config?: Partial<StreamingConfig>);
43
+ /**
44
+ * Calculates SHA-256 hash of a file using streaming to minimize memory usage
45
+ * @param filePath - Path to the file to hash
46
+ * @param progressCallback - Optional callback for progress reporting
47
+ * @returns Promise that resolves to hash result
48
+ */
49
+ calculateFileHashStreaming(filePath: string, progressCallback?: ProgressCallback): Promise<StreamingHashResult>;
50
+ /**
51
+ * Calculates SHA-256 hash of a buffer using streaming to minimize memory usage
52
+ * @param content - Buffer to hash
53
+ * @param progressCallback - Optional callback for progress reporting
54
+ * @returns Promise that resolves to hash result
55
+ */
56
+ calculateBufferHashStreaming(content: Buffer, progressCallback?: ProgressCallback): Promise<StreamingHashResult>;
57
+ /**
58
+ * Copies a file using streaming operations with optional hashing
59
+ * @param sourcePath - Source file path
60
+ * @param destinationPath - Destination file path
61
+ * @param progressCallback - Optional callback for progress reporting
62
+ * @returns Promise that resolves to copy result
63
+ */
64
+ copyFileStreaming(sourcePath: string, destinationPath: string, progressCallback?: ProgressCallback): Promise<StreamingCopyResult>;
65
+ /**
66
+ * Writes buffer content to file using streaming operations
67
+ * @param content - Buffer to write
68
+ * @param destinationPath - Destination file path
69
+ * @param progressCallback - Optional callback for progress reporting
70
+ * @returns Promise that resolves to write result
71
+ */
72
+ writeBufferStreaming(content: Buffer, destinationPath: string, progressCallback?: ProgressCallback): Promise<StreamingCopyResult>;
73
+ /**
74
+ * Reads file content and converts to base64 using streaming to minimize memory usage
75
+ * @param filePath - Path to the file to read
76
+ * @param progressCallback - Optional callback for progress reporting
77
+ * @returns Promise that resolves to base64 string
78
+ */
79
+ readFileAsBase64Streaming(filePath: string, progressCallback?: ProgressCallback): Promise<string>;
80
+ /**
81
+ * Validates file integrity by comparing streaming hash with expected hash
82
+ * @param filePath - Path to the file to validate
83
+ * @param expectedHash - Expected SHA-256 hash
84
+ * @param progressCallback - Optional callback for progress reporting
85
+ * @returns Promise that resolves to validation result
86
+ */
87
+ validateFileIntegrityStreaming(filePath: string, expectedHash: string, progressCallback?: ProgressCallback): Promise<{
88
+ isValid: boolean;
89
+ actualHash: string;
90
+ bytesProcessed: number;
91
+ }>;
92
+ /**
93
+ * Gets file information without loading content into memory
94
+ * @param filePath - Path to the file
95
+ * @returns Promise that resolves to file information
96
+ */
97
+ getFileInfo(filePath: string): Promise<{
98
+ size: number;
99
+ isFile: boolean;
100
+ isDirectory: boolean;
101
+ lastModified: Date;
102
+ canRead: boolean;
103
+ canWrite: boolean;
104
+ }>;
105
+ /**
106
+ * Converts buffer to chunks for streaming
107
+ * @param buffer - Buffer to chunk
108
+ * @returns Generator that yields buffer chunks
109
+ */
110
+ private bufferToChunks;
111
+ /**
112
+ * Wraps a promise with timeout functionality
113
+ * @param promise - Promise to wrap
114
+ * @param timeoutMs - Timeout in milliseconds
115
+ * @param errorMessage - Error message for timeout
116
+ * @returns Promise that rejects if timeout is reached
117
+ */
118
+ private withTimeout;
119
+ }
120
+ /**
121
+ * Creates a StreamingOperations instance with default configuration
122
+ * @param config - Optional configuration overrides
123
+ * @returns StreamingOperations instance
124
+ */
125
+ export declare function createStreamingOperations(config?: Partial<StreamingConfig>): StreamingOperations;
126
+ /**
127
+ * Utility function to format bytes for progress reporting
128
+ * @param bytes - Number of bytes
129
+ * @returns Formatted string (e.g., "1.5 MB")
130
+ */
131
+ export declare function formatBytes(bytes: number): string;
132
+ /**
133
+ * Utility function to format processing time
134
+ * @param milliseconds - Processing time in milliseconds
135
+ * @returns Formatted string (e.g., "1.5s" or "150ms")
136
+ */
137
+ export declare function formatProcessingTime(milliseconds: number): string;
138
+ /**
139
+ * Utility function to calculate processing speed
140
+ * @param bytes - Number of bytes processed
141
+ * @param milliseconds - Processing time in milliseconds
142
+ * @returns Speed in MB/s
143
+ */
144
+ export declare function calculateProcessingSpeed(bytes: number, milliseconds: number): number;
145
+ //# sourceMappingURL=streaming-operations.d.ts.map