rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Binary Index Format Module
3
+ *
4
+ * Provides efficient binary serialization for HNSW vector indices.
5
+ *
6
+ * Format Specification:
7
+ * - Header: 24 bytes (6 × uint32)
8
+ * - Vectors: N × (4 + D × 4) bytes
9
+ * - Little-endian encoding for cross-platform compatibility
10
+ * - 4-byte alignment for Float32Array zero-copy views
11
+ *
12
+ * Performance:
13
+ * - 3.66x smaller than JSON format
14
+ * - 3.5x faster loading
15
+ * - Zero-copy Float32Array views
16
+ */
17
+ export interface BinaryIndexData {
18
+ dimensions: number;
19
+ maxElements: number;
20
+ M: number;
21
+ efConstruction: number;
22
+ seed: number;
23
+ currentSize: number;
24
+ vectors: Array<{
25
+ id: number;
26
+ vector: Float32Array;
27
+ }>;
28
+ hasContentTypeGroups?: boolean;
29
+ textVectors?: Array<{
30
+ id: number;
31
+ vector: Float32Array;
32
+ }>;
33
+ imageVectors?: Array<{
34
+ id: number;
35
+ vector: Float32Array;
36
+ }>;
37
+ }
38
+ export declare class BinaryIndexFormat {
39
+ /**
40
+ * Save index data to binary format (original format for backward compatibility)
41
+ *
42
+ * File structure:
43
+ * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
44
+ * - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
45
+ *
46
+ * @param indexPath Path to save the binary index file
47
+ * @param data Index data to serialize
48
+ */
49
+ static save(indexPath: string, data: BinaryIndexData): Promise<void>;
50
+ /**
51
+ * Save index data to grouped binary format
52
+ *
53
+ * File structure:
54
+ * - Extended Header (40 bytes):
55
+ * - Original 6 fields (24 bytes)
56
+ * - hasGroups flag (4 bytes)
57
+ * - textOffset (4 bytes)
58
+ * - textCount (4 bytes)
59
+ * - imageOffset (4 bytes)
60
+ * - imageCount (4 bytes)
61
+ * - Data section: [text vectors...][image vectors...]
62
+ *
63
+ * @param indexPath Path to save the binary index file
64
+ * @param data Index data to serialize
65
+ */
66
+ static saveGrouped(indexPath: string, data: BinaryIndexData): Promise<void>;
67
+ /**
68
+ * Load index data from binary format (supports both original and grouped formats)
69
+ *
70
+ * Uses zero-copy Float32Array views for efficient loading.
71
+ * Copies the views to ensure data persistence after buffer lifecycle.
72
+ *
73
+ * @param indexPath Path to the binary index file
74
+ * @returns Deserialized index data
75
+ */
76
+ static load(indexPath: string): Promise<BinaryIndexData>;
77
+ }
78
+ //# sourceMappingURL=binary-index-format.d.ts.map
@@ -0,0 +1,291 @@
1
+ /**
2
+ * Binary Index Format Module
3
+ *
4
+ * Provides efficient binary serialization for HNSW vector indices.
5
+ *
6
+ * Format Specification:
7
+ * - Header: 24 bytes (6 × uint32)
8
+ * - Vectors: N × (4 + D × 4) bytes
9
+ * - Little-endian encoding for cross-platform compatibility
10
+ * - 4-byte alignment for Float32Array zero-copy views
11
+ *
12
+ * Performance:
13
+ * - 3.66x smaller than JSON format
14
+ * - 3.5x faster loading
15
+ * - Zero-copy Float32Array views
16
+ */
17
+ import { readFileSync, writeFileSync } from 'fs';
18
+ export class BinaryIndexFormat {
19
+ /**
20
+ * Save index data to binary format (original format for backward compatibility)
21
+ *
22
+ * File structure:
23
+ * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
24
+ * - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
25
+ *
26
+ * @param indexPath Path to save the binary index file
27
+ * @param data Index data to serialize
28
+ */
29
+ static async save(indexPath, data) {
30
+ // Calculate total size
31
+ const headerSize = 24; // 6 uint32 fields
32
+ const vectorSize = 4 + (data.dimensions * 4); // id + vector
33
+ const totalSize = headerSize + (data.currentSize * vectorSize);
34
+ const buffer = new ArrayBuffer(totalSize);
35
+ const view = new DataView(buffer);
36
+ let offset = 0;
37
+ // Write header (24 bytes, all little-endian)
38
+ view.setUint32(offset, data.dimensions, true);
39
+ offset += 4;
40
+ view.setUint32(offset, data.maxElements, true);
41
+ offset += 4;
42
+ view.setUint32(offset, data.M, true);
43
+ offset += 4;
44
+ view.setUint32(offset, data.efConstruction, true);
45
+ offset += 4;
46
+ view.setUint32(offset, data.seed, true);
47
+ offset += 4;
48
+ view.setUint32(offset, data.currentSize, true);
49
+ offset += 4;
50
+ // Write vectors
51
+ for (const item of data.vectors) {
52
+ // Ensure 4-byte alignment (should always be true with our format)
53
+ if (offset % 4 !== 0) {
54
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
55
+ }
56
+ // Write vector ID
57
+ view.setUint32(offset, item.id, true);
58
+ offset += 4;
59
+ // Write vector data
60
+ for (let i = 0; i < item.vector.length; i++) {
61
+ view.setFloat32(offset, item.vector[i], true);
62
+ offset += 4;
63
+ }
64
+ }
65
+ // Write to file
66
+ writeFileSync(indexPath, Buffer.from(buffer));
67
+ }
68
+ /**
69
+ * Save index data to grouped binary format
70
+ *
71
+ * File structure:
72
+ * - Extended Header (40 bytes):
73
+ * - Original 6 fields (24 bytes)
74
+ * - hasGroups flag (4 bytes)
75
+ * - textOffset (4 bytes)
76
+ * - textCount (4 bytes)
77
+ * - imageOffset (4 bytes)
78
+ * - imageCount (4 bytes)
79
+ * - Data section: [text vectors...][image vectors...]
80
+ *
81
+ * @param indexPath Path to save the binary index file
82
+ * @param data Index data to serialize
83
+ */
84
+ static async saveGrouped(indexPath, data) {
85
+ if (!data.hasContentTypeGroups || !data.textVectors || !data.imageVectors) {
86
+ // Fallback to original format
87
+ return this.save(indexPath, data);
88
+ }
89
+ const headerSize = 44; // Extended header: 24 + 20 bytes (hasGroups + textOffset + textCount + imageOffset + imageCount)
90
+ const vectorSize = 4 + (data.dimensions * 4); // id + vector
91
+ // Calculate offsets and total size
92
+ const textOffset = headerSize;
93
+ const imageOffset = textOffset + (data.textVectors.length * vectorSize);
94
+ const totalSize = imageOffset + (data.imageVectors.length * vectorSize);
95
+ const buffer = new ArrayBuffer(totalSize);
96
+ const view = new DataView(buffer);
97
+ let offset = 0;
98
+ // Write extended header (40 bytes, all little-endian)
99
+ if (offset + 40 > buffer.byteLength) {
100
+ throw new Error(`Header write would exceed buffer bounds: offset=${offset}, headerSize=40, bufferSize=${buffer.byteLength}`);
101
+ }
102
+ view.setUint32(offset, data.dimensions, true);
103
+ offset += 4;
104
+ view.setUint32(offset, data.maxElements, true);
105
+ offset += 4;
106
+ view.setUint32(offset, data.M, true);
107
+ offset += 4;
108
+ view.setUint32(offset, data.efConstruction, true);
109
+ offset += 4;
110
+ view.setUint32(offset, data.seed, true);
111
+ offset += 4;
112
+ view.setUint32(offset, data.currentSize, true);
113
+ offset += 4;
114
+ // Extended fields
115
+ view.setUint32(offset, 1, true);
116
+ offset += 4; // hasGroups = 1
117
+ view.setUint32(offset, textOffset, true);
118
+ offset += 4;
119
+ view.setUint32(offset, data.textVectors.length, true);
120
+ offset += 4;
121
+ view.setUint32(offset, imageOffset, true);
122
+ offset += 4;
123
+ view.setUint32(offset, data.imageVectors.length, true);
124
+ offset += 4;
125
+ // Write text vectors
126
+ for (const item of data.textVectors) {
127
+ // Ensure 4-byte alignment
128
+ if (offset % 4 !== 0) {
129
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
130
+ }
131
+ // Check bounds before writing
132
+ if (offset + 4 > buffer.byteLength) {
133
+ throw new Error(`ID write would exceed buffer bounds: offset=${offset}, bufferSize=${buffer.byteLength}`);
134
+ }
135
+ // Write vector ID
136
+ view.setUint32(offset, item.id, true);
137
+ offset += 4;
138
+ // Check bounds for vector data
139
+ const vectorDataSize = item.vector.length * 4;
140
+ if (offset + vectorDataSize > buffer.byteLength) {
141
+ throw new Error(`Vector data write would exceed buffer bounds: offset=${offset}, dataSize=${vectorDataSize}, bufferSize=${buffer.byteLength}`);
142
+ }
143
+ // Write vector data
144
+ for (let i = 0; i < item.vector.length; i++) {
145
+ view.setFloat32(offset, item.vector[i], true);
146
+ offset += 4;
147
+ }
148
+ }
149
+ // Write image vectors
150
+ for (const item of data.imageVectors) {
151
+ // Ensure 4-byte alignment
152
+ if (offset % 4 !== 0) {
153
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
154
+ }
155
+ // Check bounds before writing
156
+ if (offset + 4 > buffer.byteLength) {
157
+ throw new Error(`ID write would exceed buffer bounds: offset=${offset}, bufferSize=${buffer.byteLength}`);
158
+ }
159
+ // Write vector ID
160
+ view.setUint32(offset, item.id, true);
161
+ offset += 4;
162
+ // Check bounds for vector data
163
+ const vectorDataSize = item.vector.length * 4;
164
+ if (offset + vectorDataSize > buffer.byteLength) {
165
+ throw new Error(`Vector data write would exceed buffer bounds: offset=${offset}, dataSize=${vectorDataSize}, bufferSize=${buffer.byteLength}`);
166
+ }
167
+ // Write vector data
168
+ for (let i = 0; i < item.vector.length; i++) {
169
+ view.setFloat32(offset, item.vector[i], true);
170
+ offset += 4;
171
+ }
172
+ }
173
+ // Write to file
174
+ writeFileSync(indexPath, Buffer.from(buffer));
175
+ }
176
+ /**
177
+ * Load index data from binary format (supports both original and grouped formats)
178
+ *
179
+ * Uses zero-copy Float32Array views for efficient loading.
180
+ * Copies the views to ensure data persistence after buffer lifecycle.
181
+ *
182
+ * @param indexPath Path to the binary index file
183
+ * @returns Deserialized index data
184
+ */
185
+ static async load(indexPath) {
186
+ const buffer = readFileSync(indexPath);
187
+ const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
188
+ let offset = 0;
189
+ // Read basic header (24 bytes, all little-endian)
190
+ const dimensions = view.getUint32(offset, true);
191
+ offset += 4;
192
+ const maxElements = view.getUint32(offset, true);
193
+ offset += 4;
194
+ const M = view.getUint32(offset, true);
195
+ offset += 4;
196
+ const efConstruction = view.getUint32(offset, true);
197
+ offset += 4;
198
+ const seed = view.getUint32(offset, true);
199
+ offset += 4;
200
+ const currentSize = view.getUint32(offset, true);
201
+ offset += 4;
202
+ // Check if this is the extended grouped format (40+ bytes header)
203
+ const hasGroups = buffer.byteLength >= 40 ? view.getUint32(offset, true) : 0;
204
+ if (hasGroups === 1 && buffer.byteLength >= 40) {
205
+ // Load grouped format
206
+ const textOffset = view.getUint32(offset + 4, true);
207
+ const textCount = view.getUint32(offset + 8, true);
208
+ const imageOffset = view.getUint32(offset + 12, true);
209
+ const imageCount = view.getUint32(offset + 16, true);
210
+ // Load text vectors
211
+ const textVectors = [];
212
+ offset = textOffset;
213
+ for (let i = 0; i < textCount; i++) {
214
+ // Ensure 4-byte alignment
215
+ if (offset % 4 !== 0) {
216
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
217
+ }
218
+ // Read vector ID
219
+ const id = view.getUint32(offset, true);
220
+ offset += 4;
221
+ // Zero-copy Float32Array view
222
+ const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
223
+ // Copy to avoid buffer lifecycle issues
224
+ const vector = new Float32Array(vectorView);
225
+ offset += dimensions * 4;
226
+ textVectors.push({ id, vector });
227
+ }
228
+ // Load image vectors
229
+ const imageVectors = [];
230
+ offset = imageOffset;
231
+ for (let i = 0; i < imageCount; i++) {
232
+ // Ensure 4-byte alignment
233
+ if (offset % 4 !== 0) {
234
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
235
+ }
236
+ // Read vector ID
237
+ const id = view.getUint32(offset, true);
238
+ offset += 4;
239
+ // Zero-copy Float32Array view
240
+ const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
241
+ // Copy to avoid buffer lifecycle issues
242
+ const vector = new Float32Array(vectorView);
243
+ offset += dimensions * 4;
244
+ imageVectors.push({ id, vector });
245
+ }
246
+ // Combine all vectors for backward compatibility
247
+ const allVectors = [...textVectors, ...imageVectors];
248
+ return {
249
+ dimensions,
250
+ maxElements,
251
+ M,
252
+ efConstruction,
253
+ seed,
254
+ currentSize,
255
+ vectors: allVectors,
256
+ hasContentTypeGroups: true,
257
+ textVectors,
258
+ imageVectors
259
+ };
260
+ }
261
+ else {
262
+ // Load original format
263
+ const vectors = [];
264
+ for (let i = 0; i < currentSize; i++) {
265
+ // Ensure 4-byte alignment (should always be true with our format)
266
+ if (offset % 4 !== 0) {
267
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
268
+ }
269
+ // Read vector ID
270
+ const id = view.getUint32(offset, true);
271
+ offset += 4;
272
+ // Zero-copy Float32Array view (fast!)
273
+ const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
274
+ // Copy to avoid buffer lifecycle issues
275
+ const vector = new Float32Array(vectorView);
276
+ offset += dimensions * 4;
277
+ vectors.push({ id, vector });
278
+ }
279
+ return {
280
+ dimensions,
281
+ maxElements,
282
+ M,
283
+ efConstruction,
284
+ seed,
285
+ currentSize,
286
+ vectors
287
+ };
288
+ }
289
+ }
290
+ }
291
+ //# sourceMappingURL=binary-index-format.js.map
@@ -0,0 +1,119 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ /**
6
+ * Configuration for chunking behavior
7
+ */
8
+ export interface ChunkConfig {
9
+ /** Target chunk size in tokens (200-300 recommended) */
10
+ chunkSize: number;
11
+ /** Overlap between chunks in tokens (50 recommended) */
12
+ chunkOverlap: number;
13
+ }
14
+ /**
15
+ * Generic document interface that can represent different content types
16
+ */
17
+ export interface GenericDocument {
18
+ /** Source path or identifier */
19
+ source: string;
20
+ /** Document title */
21
+ title: string;
22
+ /** Content (text, image path, etc.) */
23
+ content: string;
24
+ /** Content type identifier (text, image, etc.) */
25
+ contentType: string;
26
+ /** Optional metadata for the document */
27
+ metadata?: Record<string, any>;
28
+ }
29
+ /**
30
+ * Generic chunk interface that can represent different content types
31
+ */
32
+ export interface GenericChunk {
33
+ /** The content of the chunk (text, image path, etc.) */
34
+ content: string;
35
+ /** Content type identifier (text, image, etc.) */
36
+ contentType: string;
37
+ /** Index of this chunk within the document */
38
+ chunkIndex: number;
39
+ /** Optional metadata for the chunk */
40
+ metadata?: Record<string, any>;
41
+ }
42
+ /**
43
+ * Strategy interface for chunking different content types
44
+ */
45
+ export interface ChunkingStrategy {
46
+ /**
47
+ * Check if this strategy applies to the given content type
48
+ */
49
+ appliesTo(contentType: string): boolean;
50
+ /**
51
+ * Chunk a document using this strategy
52
+ */
53
+ chunk(document: GenericDocument, config: ChunkConfig): Promise<GenericChunk[]>;
54
+ }
55
+ /**
56
+ * Registry for chunking strategies
57
+ */
58
+ export declare class ChunkingStrategyRegistry {
59
+ private strategies;
60
+ /**
61
+ * Register a chunking strategy
62
+ */
63
+ register(strategy: ChunkingStrategy): void;
64
+ /**
65
+ * Find the appropriate strategy for a content type
66
+ */
67
+ findStrategy(contentType: string): ChunkingStrategy | undefined;
68
+ /**
69
+ * Get all registered strategies
70
+ */
71
+ getStrategies(): ChunkingStrategy[];
72
+ }
73
+ /**
74
+ * Default chunking configuration
75
+ */
76
+ export declare const DEFAULT_CHUNK_CONFIG: ChunkConfig;
77
+ /**
78
+ * Global chunking strategy registry
79
+ */
80
+ export declare const chunkingRegistry: ChunkingStrategyRegistry;
81
+ /**
82
+ * Generic chunking function that uses registered strategies
83
+ */
84
+ export declare function chunkGenericDocument(document: GenericDocument, config?: ChunkConfig): Promise<GenericChunk[]>;
85
+ /**
86
+ * Document interface for text chunking
87
+ */
88
+ export interface Document {
89
+ /** Source path or identifier */
90
+ source: string;
91
+ /** Document title */
92
+ title: string;
93
+ /** Full text content */
94
+ content: string;
95
+ /** Optional metadata */
96
+ metadata?: Record<string, any>;
97
+ }
98
+ /**
99
+ * Chunk interface for text chunking results
100
+ */
101
+ export interface Chunk {
102
+ /** The text content of the chunk */
103
+ text: string;
104
+ /** Index of this chunk within the document */
105
+ chunkIndex: number;
106
+ /** Number of tokens in this chunk */
107
+ tokenCount: number;
108
+ }
109
+ /**
110
+ * Text document chunking function
111
+ * Uses the text chunking strategy from the text implementation layer
112
+ */
113
+ export declare function chunkDocument(document: Document, config?: ChunkConfig): Promise<Chunk[]>;
114
+ /**
115
+ * Register the text chunking strategy with the global registry
116
+ * This should be called during application initialization
117
+ */
118
+ export declare function registerTextChunkingStrategy(): Promise<void>;
119
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1,73 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ /**
6
+ * Registry for chunking strategies
7
+ */
8
+ export class ChunkingStrategyRegistry {
9
+ strategies = [];
10
+ /**
11
+ * Register a chunking strategy
12
+ */
13
+ register(strategy) {
14
+ this.strategies.push(strategy);
15
+ }
16
+ /**
17
+ * Find the appropriate strategy for a content type
18
+ */
19
+ findStrategy(contentType) {
20
+ return this.strategies.find(strategy => strategy.appliesTo(contentType));
21
+ }
22
+ /**
23
+ * Get all registered strategies
24
+ */
25
+ getStrategies() {
26
+ return [...this.strategies];
27
+ }
28
+ }
29
+ /**
30
+ * Default chunking configuration
31
+ */
32
+ export const DEFAULT_CHUNK_CONFIG = {
33
+ chunkSize: 250, // Target 200-300 tokens
34
+ chunkOverlap: 50
35
+ };
36
+ /**
37
+ * Global chunking strategy registry
38
+ */
39
+ export const chunkingRegistry = new ChunkingStrategyRegistry();
40
+ /**
41
+ * Generic chunking function that uses registered strategies
42
+ */
43
+ export async function chunkGenericDocument(document, config = DEFAULT_CHUNK_CONFIG) {
44
+ const strategy = chunkingRegistry.findStrategy(document.contentType);
45
+ if (!strategy) {
46
+ throw new Error(`No chunking strategy found for content type: ${document.contentType}`);
47
+ }
48
+ return strategy.chunk(document, config);
49
+ }
50
+ /**
51
+ * Text document chunking function
52
+ * Uses the text chunking strategy from the text implementation layer
53
+ */
54
+ export async function chunkDocument(document, config = DEFAULT_CHUNK_CONFIG) {
55
+ // Import the text chunker implementation dynamically to avoid circular dependencies
56
+ const { chunkDocument: textChunkDocument } = await import('../text/chunker.js');
57
+ return textChunkDocument(document, config);
58
+ }
59
+ /**
60
+ * Register the text chunking strategy with the global registry
61
+ * This should be called during application initialization
62
+ */
63
+ export async function registerTextChunkingStrategy() {
64
+ const { TextChunkingStrategy } = await import('../text/chunker.js');
65
+ const textStrategy = new TextChunkingStrategy();
66
+ chunkingRegistry.register(textStrategy);
67
+ }
68
+ // Auto-register the text strategy when this module is loaded
69
+ // This ensures text chunking works out of the box
70
+ registerTextChunkingStrategy().catch(error => {
71
+ console.warn('Failed to register text chunking strategy:', error);
72
+ });
73
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1,53 @@
1
+ /**
2
+ * CLI Database Utilities - Database access helpers for CLI commands
3
+ * Provides database locking detection and retry mechanisms for CLI operations
4
+ * Prevents conflicts between CLI commands and long-running processes like MCP server
5
+ */
6
+ /**
7
+ * CLI-specific database access options
8
+ */
9
+ export interface CLIDatabaseOptions {
10
+ /** Maximum time to wait for database access (ms) */
11
+ maxWaitMs?: number;
12
+ /** Retry interval (ms) */
13
+ retryIntervalMs?: number;
14
+ /** Show progress messages to user */
15
+ showProgress?: boolean;
16
+ /** Command name for better error messages */
17
+ commandName?: string;
18
+ }
19
+ /**
20
+ * Wait for database to become available for CLI operations
21
+ * Provides user-friendly progress messages and error handling
22
+ */
23
+ export declare function waitForCLIDatabaseAccess(dbPath: string, options?: CLIDatabaseOptions): Promise<void>;
24
+ /**
25
+ * Execute a CLI operation with database access protection
26
+ * Automatically handles database locking and provides user feedback
27
+ */
28
+ export declare function withCLIDatabaseAccess<T>(dbPath: string, operation: () => Promise<T>, options?: CLIDatabaseOptions): Promise<T>;
29
+ /**
30
+ * Check if database is currently busy (non-blocking)
31
+ * Useful for showing warnings or status information
32
+ */
33
+ export declare function isDatabaseBusy(dbPath: string): Promise<{
34
+ isBusy: boolean;
35
+ reason?: string;
36
+ suggestions?: string[];
37
+ }>;
38
+ /**
39
+ * Show database status information for debugging
40
+ * Useful for troubleshooting CLI issues
41
+ */
42
+ export declare function showDatabaseStatus(dbPath: string): Promise<void>;
43
+ /**
44
+ * Force cleanup of database connections (emergency use only)
45
+ * Use with caution - only for recovery from stuck states
46
+ */
47
+ export declare function forceCleanupDatabase(dbPath: string): Promise<void>;
48
+ /**
49
+ * Graceful shutdown helper for CLI commands
50
+ * Ensures proper cleanup when CLI commands are interrupted
51
+ */
52
+ export declare function setupCLICleanup(dbPath?: string): void;
53
+ //# sourceMappingURL=cli-database-utils.d.ts.map