rag-lite-ts 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. package/README.md +88 -5
  2. package/dist/{cli → cjs/cli}/indexer.js +73 -15
  3. package/dist/cjs/cli/ui-server.d.ts +5 -0
  4. package/dist/cjs/cli/ui-server.js +152 -0
  5. package/dist/{cli.js → cjs/cli.js} +25 -6
  6. package/dist/{core → cjs/core}/binary-index-format.js +6 -3
  7. package/dist/{core → cjs/core}/db.d.ts +56 -0
  8. package/dist/{core → cjs/core}/db.js +105 -0
  9. package/dist/{core → cjs/core}/ingestion.js +3 -0
  10. package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
  11. package/dist/cjs/core/knowledge-base-manager.js +256 -0
  12. package/dist/{core → cjs/core}/model-validator.js +1 -1
  13. package/dist/{core → cjs/core}/search-pipeline.js +1 -1
  14. package/dist/{core → cjs/core}/search.js +1 -1
  15. package/dist/cjs/core/vector-index-messages.d.ts +52 -0
  16. package/dist/cjs/core/vector-index-messages.js +5 -0
  17. package/dist/cjs/core/vector-index-worker.d.ts +6 -0
  18. package/dist/cjs/core/vector-index-worker.js +304 -0
  19. package/dist/cjs/core/vector-index.d.ts +107 -0
  20. package/dist/cjs/core/vector-index.js +344 -0
  21. package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
  22. package/dist/{factories → cjs/factories}/search-factory.js +11 -0
  23. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
  24. package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
  25. package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
  26. package/dist/{index.js → cjs/index.js} +3 -1
  27. package/dist/esm/api-errors.d.ts +90 -0
  28. package/dist/esm/api-errors.js +320 -0
  29. package/dist/esm/cli/indexer.d.ts +11 -0
  30. package/dist/esm/cli/indexer.js +529 -0
  31. package/dist/esm/cli/search.d.ts +7 -0
  32. package/dist/esm/cli/search.js +332 -0
  33. package/dist/esm/cli/ui-server.d.ts +5 -0
  34. package/dist/esm/cli/ui-server.js +152 -0
  35. package/dist/esm/cli.d.ts +3 -0
  36. package/dist/esm/cli.js +548 -0
  37. package/dist/esm/config.d.ts +51 -0
  38. package/dist/esm/config.js +79 -0
  39. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  40. package/dist/esm/core/abstract-embedder.js +264 -0
  41. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  42. package/dist/esm/core/actionable-error-messages.js +397 -0
  43. package/dist/esm/core/adapters.d.ts +93 -0
  44. package/dist/esm/core/adapters.js +139 -0
  45. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  46. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  47. package/dist/esm/core/binary-index-format.d.ts +78 -0
  48. package/dist/esm/core/binary-index-format.js +294 -0
  49. package/dist/esm/core/chunker.d.ts +119 -0
  50. package/dist/esm/core/chunker.js +73 -0
  51. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  52. package/dist/esm/core/cli-database-utils.js +239 -0
  53. package/dist/esm/core/config.d.ts +102 -0
  54. package/dist/esm/core/config.js +247 -0
  55. package/dist/esm/core/content-errors.d.ts +111 -0
  56. package/dist/esm/core/content-errors.js +362 -0
  57. package/dist/esm/core/content-manager.d.ts +335 -0
  58. package/dist/esm/core/content-manager.js +1476 -0
  59. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  60. package/dist/esm/core/content-performance-optimizer.js +516 -0
  61. package/dist/esm/core/content-resolver.d.ts +104 -0
  62. package/dist/esm/core/content-resolver.js +285 -0
  63. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  64. package/dist/esm/core/cross-modal-search.js +342 -0
  65. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  66. package/dist/esm/core/database-connection-manager.js +310 -0
  67. package/dist/esm/core/db.d.ts +269 -0
  68. package/dist/esm/core/db.js +1000 -0
  69. package/dist/esm/core/embedder-factory.d.ts +154 -0
  70. package/dist/esm/core/embedder-factory.js +311 -0
  71. package/dist/esm/core/error-handler.d.ts +112 -0
  72. package/dist/esm/core/error-handler.js +239 -0
  73. package/dist/esm/core/index.d.ts +59 -0
  74. package/dist/esm/core/index.js +69 -0
  75. package/dist/esm/core/ingestion.d.ts +202 -0
  76. package/dist/esm/core/ingestion.js +904 -0
  77. package/dist/esm/core/interfaces.d.ts +408 -0
  78. package/dist/esm/core/interfaces.js +106 -0
  79. package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
  80. package/dist/esm/core/knowledge-base-manager.js +256 -0
  81. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  82. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  83. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  84. package/dist/esm/core/mode-detection-service.js +565 -0
  85. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  86. package/dist/esm/core/mode-model-validator.js +203 -0
  87. package/dist/esm/core/model-registry.d.ts +116 -0
  88. package/dist/esm/core/model-registry.js +411 -0
  89. package/dist/esm/core/model-validator.d.ts +217 -0
  90. package/dist/esm/core/model-validator.js +782 -0
  91. package/dist/esm/core/path-manager.d.ts +47 -0
  92. package/dist/esm/core/path-manager.js +71 -0
  93. package/dist/esm/core/raglite-paths.d.ts +121 -0
  94. package/dist/esm/core/raglite-paths.js +145 -0
  95. package/dist/esm/core/reranking-config.d.ts +42 -0
  96. package/dist/esm/core/reranking-config.js +147 -0
  97. package/dist/esm/core/reranking-factory.d.ts +92 -0
  98. package/dist/esm/core/reranking-factory.js +410 -0
  99. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  100. package/dist/esm/core/reranking-strategies.js +650 -0
  101. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  102. package/dist/esm/core/resource-cleanup.js +371 -0
  103. package/dist/esm/core/resource-manager.d.ts +212 -0
  104. package/dist/esm/core/resource-manager.js +564 -0
  105. package/dist/esm/core/search-pipeline.d.ts +111 -0
  106. package/dist/esm/core/search-pipeline.js +287 -0
  107. package/dist/esm/core/search.d.ts +141 -0
  108. package/dist/esm/core/search.js +320 -0
  109. package/dist/esm/core/streaming-operations.d.ts +145 -0
  110. package/dist/esm/core/streaming-operations.js +409 -0
  111. package/dist/esm/core/types.d.ts +66 -0
  112. package/dist/esm/core/types.js +6 -0
  113. package/dist/esm/core/universal-embedder.d.ts +177 -0
  114. package/dist/esm/core/universal-embedder.js +139 -0
  115. package/dist/esm/core/validation-messages.d.ts +99 -0
  116. package/dist/esm/core/validation-messages.js +334 -0
  117. package/dist/esm/core/vector-index-messages.d.ts +52 -0
  118. package/dist/esm/core/vector-index-messages.js +5 -0
  119. package/dist/esm/core/vector-index-worker.d.ts +6 -0
  120. package/dist/esm/core/vector-index-worker.js +304 -0
  121. package/dist/esm/core/vector-index.d.ts +107 -0
  122. package/dist/esm/core/vector-index.js +344 -0
  123. package/dist/esm/dom-polyfills.d.ts +6 -0
  124. package/dist/esm/dom-polyfills.js +37 -0
  125. package/dist/esm/factories/index.d.ts +27 -0
  126. package/dist/esm/factories/index.js +29 -0
  127. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  128. package/dist/esm/factories/ingestion-factory.js +473 -0
  129. package/dist/esm/factories/search-factory.d.ts +154 -0
  130. package/dist/esm/factories/search-factory.js +355 -0
  131. package/dist/esm/file-processor.d.ts +147 -0
  132. package/dist/esm/file-processor.js +963 -0
  133. package/dist/esm/index-manager.d.ts +136 -0
  134. package/dist/esm/index-manager.js +667 -0
  135. package/dist/esm/index.d.ts +76 -0
  136. package/dist/esm/index.js +112 -0
  137. package/dist/esm/indexer.d.ts +7 -0
  138. package/dist/esm/indexer.js +54 -0
  139. package/dist/esm/ingestion.d.ts +63 -0
  140. package/dist/esm/ingestion.js +124 -0
  141. package/dist/esm/mcp-server.d.ts +46 -0
  142. package/dist/esm/mcp-server.js +1820 -0
  143. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  144. package/dist/esm/multimodal/clip-embedder.js +996 -0
  145. package/dist/esm/multimodal/index.d.ts +6 -0
  146. package/dist/esm/multimodal/index.js +6 -0
  147. package/dist/esm/preprocess.d.ts +19 -0
  148. package/dist/esm/preprocess.js +203 -0
  149. package/dist/esm/preprocessors/index.d.ts +17 -0
  150. package/dist/esm/preprocessors/index.js +38 -0
  151. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  152. package/dist/esm/preprocessors/mdx.js +101 -0
  153. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  154. package/dist/esm/preprocessors/mermaid.js +329 -0
  155. package/dist/esm/preprocessors/registry.d.ts +56 -0
  156. package/dist/esm/preprocessors/registry.js +179 -0
  157. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  158. package/dist/esm/run-error-recovery-tests.js +101 -0
  159. package/dist/esm/search-standalone.d.ts +7 -0
  160. package/dist/esm/search-standalone.js +117 -0
  161. package/dist/esm/search.d.ts +99 -0
  162. package/dist/esm/search.js +177 -0
  163. package/dist/esm/test-utils.d.ts +18 -0
  164. package/dist/esm/test-utils.js +27 -0
  165. package/dist/esm/text/chunker.d.ts +33 -0
  166. package/dist/esm/text/chunker.js +279 -0
  167. package/dist/esm/text/embedder.d.ts +111 -0
  168. package/dist/esm/text/embedder.js +386 -0
  169. package/dist/esm/text/index.d.ts +8 -0
  170. package/dist/esm/text/index.js +9 -0
  171. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  172. package/dist/esm/text/preprocessors/index.js +38 -0
  173. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  174. package/dist/esm/text/preprocessors/mdx.js +101 -0
  175. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  176. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  177. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  178. package/dist/esm/text/preprocessors/registry.js +180 -0
  179. package/dist/esm/text/reranker.d.ts +49 -0
  180. package/dist/esm/text/reranker.js +274 -0
  181. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  182. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  183. package/dist/esm/text/tokenizer.d.ts +22 -0
  184. package/dist/esm/text/tokenizer.js +64 -0
  185. package/dist/esm/types.d.ts +83 -0
  186. package/dist/esm/types.js +3 -0
  187. package/dist/esm/utils/vector-math.d.ts +31 -0
  188. package/dist/esm/utils/vector-math.js +70 -0
  189. package/package.json +39 -14
  190. package/dist/core/vector-index.d.ts +0 -72
  191. package/dist/core/vector-index.js +0 -331
  192. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  193. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  194. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  195. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  196. /package/dist/{cli → cjs/cli}/search.js +0 -0
  197. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  198. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  199. /package/dist/{config.js → cjs/config.js} +0 -0
  200. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  202. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  204. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/adapters.js +0 -0
  206. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  208. /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  210. /package/dist/{core → cjs/core}/chunker.js +0 -0
  211. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  212. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  213. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  214. /package/dist/{core → cjs/core}/config.js +0 -0
  215. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  216. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  217. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  218. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  219. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  220. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  221. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  222. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  223. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  224. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  225. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  226. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  227. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  229. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  231. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/index.js +0 -0
  233. /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  235. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  236. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  238. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  240. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  242. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  244. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  245. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  246. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  247. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  248. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  249. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  250. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  251. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  252. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  253. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  254. /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
  255. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  256. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  257. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  258. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  259. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  260. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  261. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  262. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  263. /package/dist/{core → cjs/core}/types.d.ts +0 -0
  264. /package/dist/{core → cjs/core}/types.js +0 -0
  265. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  266. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  267. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  268. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  269. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  270. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  271. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  272. /package/dist/{factories → cjs/factories}/index.js +0 -0
  273. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  274. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  275. /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
  276. /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
  277. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  278. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  279. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  280. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  281. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  282. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  283. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  284. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
  285. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  286. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  287. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  288. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  289. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  290. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  291. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  292. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  293. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  294. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  295. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  296. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  297. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  298. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  299. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  300. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  301. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  302. /package/dist/{search.js → cjs/search.js} +0 -0
  303. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  304. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  305. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/chunker.js +0 -0
  307. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  308. /package/dist/{text → cjs/text}/embedder.js +0 -0
  309. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  310. /package/dist/{text → cjs/text}/index.js +0 -0
  311. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  312. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  313. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  314. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  315. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  316. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  317. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  318. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  319. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  320. /package/dist/{text → cjs/text}/reranker.js +0 -0
  321. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  322. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  323. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  324. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  325. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  326. /package/dist/{types.js → cjs/types.js} +0 -0
  327. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  328. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,304 @@
1
+ /**
2
+ * Worker thread for VectorIndex operations
3
+ * Isolates hnswlib-wasm WebAssembly memory to prevent accumulation
4
+ */
5
+ import { parentPort } from 'worker_threads';
6
+ import { existsSync } from 'fs';
7
+ import { BinaryIndexFormat } from './binary-index-format.js';
8
+ // Set up browser-like environment for hnswlib-wasm (same as current vector-index.ts)
9
+ if (typeof window === 'undefined') {
10
+ const { JSDOM } = await import('jsdom');
11
+ const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
12
+ url: 'http://localhost',
13
+ pretendToBeVisual: true,
14
+ resources: 'usable'
15
+ });
16
+ global.window = dom.window;
17
+ global.document = dom.window.document;
18
+ global.XMLHttpRequest = dom.window.XMLHttpRequest;
19
+ global.indexedDB = undefined;
20
+ Object.defineProperty(dom.window, 'indexedDB', {
21
+ value: undefined,
22
+ writable: false,
23
+ configurable: true
24
+ });
25
+ }
26
+ // Worker state
27
+ let hnswlib = null;
28
+ let index = null;
29
+ let vectorStorage = new Map();
30
+ let currentSize = 0;
31
+ let options = null;
32
+ let indexPath = null; // Set during init or loadIndex
33
+ // Helper: Load hnswlib module (only once per worker)
34
+ async function loadHnswlibModule() {
35
+ if (hnswlib) {
36
+ return hnswlib;
37
+ }
38
+ // Suppress stderr during loading (same as current implementation)
39
+ const originalStderrWrite = process.stderr.write;
40
+ const originalConsoleError = console.error;
41
+ process.stderr.write = function (chunk, encoding, callback) {
42
+ const message = chunk.toString();
43
+ if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
44
+ message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
45
+ message.includes('jsFS Error') || message.includes('syncing FS') ||
46
+ message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
47
+ message.includes('abort') || message.includes('assert') ||
48
+ message.includes('hnswlib-wasm/dist/hnswlib')) {
49
+ if (callback)
50
+ callback();
51
+ return true;
52
+ }
53
+ return originalStderrWrite.call(this, chunk, encoding, callback);
54
+ };
55
+ console.error = (...args) => {
56
+ const message = args.join(' ');
57
+ if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
58
+ message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
59
+ message.includes('jsFS Error') || message.includes('syncing FS') ||
60
+ message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
61
+ message.includes('abort') || message.includes('assert') ||
62
+ message.includes('hnswlib-wasm/dist/hnswlib')) {
63
+ return;
64
+ }
65
+ originalConsoleError.apply(console, args);
66
+ };
67
+ try {
68
+ const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
69
+ const { loadHnswlib } = hnswlibModule;
70
+ hnswlib = await loadHnswlib();
71
+ return hnswlib;
72
+ }
73
+ finally {
74
+ process.stderr.write = originalStderrWrite;
75
+ console.error = originalConsoleError;
76
+ }
77
+ }
78
+ // Helper: Convert ArrayBuffer to Float32Array
79
+ function bufferToFloat32Array(buffer, dimensions) {
80
+ return new Float32Array(buffer, 0, dimensions);
81
+ }
82
+ // Message handlers
83
+ async function handleInit(payload) {
84
+ await loadHnswlibModule();
85
+ // Store indexPath for saveIndex operations
86
+ indexPath = payload.indexPath;
87
+ options = {
88
+ dimensions: payload.dimensions,
89
+ maxElements: payload.maxElements,
90
+ M: payload.M || 16,
91
+ efConstruction: payload.efConstruction || 200,
92
+ seed: payload.seed || 100
93
+ };
94
+ index = new hnswlib.HierarchicalNSW('cosine', options.dimensions, '');
95
+ index.initIndex(options.maxElements, options.M, options.efConstruction, options.seed);
96
+ currentSize = 0;
97
+ vectorStorage.clear();
98
+ }
99
+ async function handleLoadIndex(payload) {
100
+ if (!existsSync(payload.indexPath)) {
101
+ throw new Error(`Index file not found: ${payload.indexPath}`);
102
+ }
103
+ await loadHnswlibModule();
104
+ const data = await BinaryIndexFormat.load(payload.indexPath);
105
+ indexPath = payload.indexPath;
106
+ if (!options) {
107
+ options = {
108
+ dimensions: data.dimensions,
109
+ maxElements: data.maxElements,
110
+ M: data.M || 16,
111
+ efConstruction: data.efConstruction || 200,
112
+ seed: data.seed || 100
113
+ };
114
+ }
115
+ // Validate dimensions
116
+ if (data.dimensions !== options.dimensions) {
117
+ throw new Error(`Dimension mismatch: stored ${data.dimensions}, expected ${options.dimensions}`);
118
+ }
119
+ // Create index
120
+ index = new hnswlib.HierarchicalNSW('cosine', options.dimensions, '');
121
+ index.initIndex(options.maxElements, options.M, options.efConstruction, options.seed);
122
+ // Clear and repopulate
123
+ vectorStorage.clear();
124
+ currentSize = 0;
125
+ // Load vectors in batches
126
+ const batchSize = 1000;
127
+ const totalVectors = data.vectors.length;
128
+ for (let i = 0; i < totalVectors; i += batchSize) {
129
+ const batch = data.vectors.slice(i, i + batchSize);
130
+ for (const item of batch) {
131
+ try {
132
+ index.addPoint(item.vector, item.id, false);
133
+ vectorStorage.set(item.id, item.vector);
134
+ currentSize++;
135
+ }
136
+ catch (error) {
137
+ if (error?.message?.includes('Cannot enlarge memory') ||
138
+ error?.message?.includes('memory') ||
139
+ (error?.name === 'WebAssembly.Exception' && error?.message?.includes('memory'))) {
140
+ throw new Error(`WebAssembly memory limit exceeded while loading vector index. ` +
141
+ `Index contains ${totalVectors} vectors which requires more than 2GB of memory. ` +
142
+ `Consider: 1) Rebuilding the index with fewer vectors, 2) Using a smaller embedding model, ` +
143
+ `3) Splitting your data into multiple smaller indexes, or 4) Increasing Node.js memory with --max-old-space-size=4096`);
144
+ }
145
+ throw error;
146
+ }
147
+ }
148
+ if (totalVectors > 10000 && (i + batchSize) % 10000 === 0) {
149
+ console.log(` Loaded ${Math.min(i + batchSize, totalVectors)}/${totalVectors} vectors...`);
150
+ }
151
+ }
152
+ }
153
+ async function handleSaveIndex() {
154
+ if (!index || !indexPath) {
155
+ throw new Error('Index not initialized or indexPath not set');
156
+ }
157
+ const vectors = Array.from(vectorStorage.entries()).map(([id, vector]) => ({
158
+ id,
159
+ vector
160
+ }));
161
+ await BinaryIndexFormat.save(indexPath, {
162
+ dimensions: options.dimensions,
163
+ maxElements: options.maxElements,
164
+ M: options.M,
165
+ efConstruction: options.efConstruction,
166
+ seed: options.seed,
167
+ currentSize: vectors.length,
168
+ vectors
169
+ });
170
+ }
171
+ function handleAddVector(payload) {
172
+ if (!index || !options) {
173
+ throw new Error('Index not initialized');
174
+ }
175
+ const vector = bufferToFloat32Array(payload.vector, payload.dimensions);
176
+ if (vector.length !== options.dimensions) {
177
+ throw new Error(`Vector dimension mismatch: ${vector.length} vs ${options.dimensions}`);
178
+ }
179
+ index.addPoint(vector, payload.id, false);
180
+ vectorStorage.set(payload.id, new Float32Array(vector));
181
+ currentSize++;
182
+ }
183
+ function handleAddVectors(payload) {
184
+ for (const item of payload.vectors) {
185
+ handleAddVector({
186
+ id: item.id,
187
+ vector: item.vector,
188
+ dimensions: item.dimensions
189
+ });
190
+ }
191
+ }
192
+ function handleSearch(payload) {
193
+ if (!index || !options) {
194
+ throw new Error('Index not initialized');
195
+ }
196
+ const queryVector = bufferToFloat32Array(payload.queryVector, payload.dimensions);
197
+ if (queryVector.length !== options.dimensions) {
198
+ throw new Error(`Query vector dimension mismatch: ${queryVector.length} vs ${options.dimensions}`);
199
+ }
200
+ if (currentSize === 0) {
201
+ return { neighbors: [], distances: [] };
202
+ }
203
+ const result = index.searchKnn(queryVector, Math.min(payload.k, currentSize), undefined);
204
+ return {
205
+ neighbors: result.neighbors,
206
+ distances: result.distances
207
+ };
208
+ }
209
+ function handleGetCurrentCount() {
210
+ return currentSize;
211
+ }
212
+ function handleResizeIndex(payload) {
213
+ if (!index || !options) {
214
+ throw new Error('Index not initialized');
215
+ }
216
+ if (payload.newMaxElements <= options.maxElements) {
217
+ throw new Error(`New max elements must be greater than current`);
218
+ }
219
+ index.resizeIndex(payload.newMaxElements);
220
+ options.maxElements = payload.newMaxElements;
221
+ }
222
+ async function handleReset() {
223
+ vectorStorage.clear();
224
+ currentSize = 0;
225
+ if (index && options && hnswlib) {
226
+ index = new hnswlib.HierarchicalNSW('cosine', options.dimensions, '');
227
+ index.initIndex(options.maxElements, options.M, options.efConstruction, options.seed);
228
+ // Set efSearch for query time
229
+ if (typeof index.setEfSearch === 'function') {
230
+ index.setEfSearch(50);
231
+ }
232
+ }
233
+ }
234
+ function handleSetEf(payload) {
235
+ if (!index) {
236
+ throw new Error('Index not initialized');
237
+ }
238
+ if (typeof index.setEfSearch === 'function') {
239
+ index.setEfSearch(payload.ef);
240
+ }
241
+ }
242
+ function handleIndexExists(payload) {
243
+ return existsSync(payload.indexPath);
244
+ }
245
+ // Main message handler
246
+ parentPort.on('message', async (request) => {
247
+ try {
248
+ let response = { id: request.id, type: 'success' };
249
+ switch (request.type) {
250
+ case 'init':
251
+ await handleInit(request.payload);
252
+ break;
253
+ case 'loadIndex':
254
+ await handleLoadIndex(request.payload);
255
+ response.payload = { count: currentSize };
256
+ break;
257
+ case 'saveIndex':
258
+ await handleSaveIndex();
259
+ response.payload = { count: currentSize };
260
+ break;
261
+ case 'addVector':
262
+ handleAddVector(request.payload);
263
+ response.payload = { count: currentSize };
264
+ break;
265
+ case 'addVectors':
266
+ handleAddVectors(request.payload);
267
+ response.payload = { count: currentSize };
268
+ break;
269
+ case 'search':
270
+ response.payload = handleSearch(request.payload);
271
+ break;
272
+ case 'getCurrentCount':
273
+ response.payload = { count: currentSize };
274
+ break;
275
+ case 'resizeIndex':
276
+ handleResizeIndex(request.payload);
277
+ break;
278
+ case 'reset':
279
+ await handleReset();
280
+ response.payload = { count: 0 };
281
+ break;
282
+ case 'setEf':
283
+ handleSetEf(request.payload);
284
+ break;
285
+ case 'indexExists':
286
+ response.payload = { exists: handleIndexExists(request.payload) };
287
+ break;
288
+ case 'cleanup':
289
+ // Worker will be terminated by main thread, just acknowledge
290
+ break;
291
+ default:
292
+ throw new Error(`Unknown request type: ${request.type}`);
293
+ }
294
+ parentPort.postMessage(response);
295
+ }
296
+ catch (error) {
297
+ parentPort.postMessage({
298
+ id: request.id,
299
+ type: 'error',
300
+ error: error instanceof Error ? error.message : String(error)
301
+ });
302
+ }
303
+ });
304
+ //# sourceMappingURL=vector-index-worker.js.map
@@ -0,0 +1,107 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ *
5
+ * Worker-based implementation to prevent WebAssembly memory accumulation.
6
+ */
7
+ export interface VectorIndexOptions {
8
+ dimensions: number;
9
+ maxElements: number;
10
+ efConstruction?: number;
11
+ M?: number;
12
+ seed?: number;
13
+ }
14
+ export interface SearchResult {
15
+ neighbors: number[];
16
+ distances: number[];
17
+ }
18
+ export declare class VectorIndex {
19
+ private worker;
20
+ private indexPath;
21
+ private options;
22
+ private messageQueue;
23
+ private messageId;
24
+ private isInitialized;
25
+ constructor(indexPath: string, options: VectorIndexOptions);
26
+ /**
27
+ * Get the path to the worker script
28
+ * Always uses compiled .js files - workers cannot execute TypeScript directly
29
+ */
30
+ private getWorkerPath;
31
+ /**
32
+ * Ensure worker is created and ready
33
+ */
34
+ private ensureWorker;
35
+ /**
36
+ * Send a message to the worker and wait for response
37
+ */
38
+ private sendMessage;
39
+ /**
40
+ * Convert Float32Array to ArrayBuffer for transfer
41
+ */
42
+ private float32ArrayToBuffer;
43
+ /**
44
+ * Initialize the HNSW index with cosine similarity using hnswlib-wasm
45
+ */
46
+ initialize(): Promise<void>;
47
+ /**
48
+ * Load existing index from file using hnswlib-wasm
49
+ */
50
+ loadIndex(): Promise<void>;
51
+ /**
52
+ * Save index to binary format
53
+ */
54
+ saveIndex(): Promise<void>;
55
+ /**
56
+ * Add a single vector to the HNSW index
57
+ * Now async due to worker-based implementation
58
+ */
59
+ addVector(embeddingId: number, vector: Float32Array): Promise<void>;
60
+ /**
61
+ * Add multiple vectors to the index in batch
62
+ * Now async due to worker-based implementation
63
+ */
64
+ addVectors(vectors: Array<{
65
+ id: number;
66
+ vector: Float32Array;
67
+ }>): Promise<void>;
68
+ /**
69
+ * Search for k nearest neighbors using hnswlib-wasm
70
+ * Now async due to worker-based implementation
71
+ */
72
+ search(queryVector: Float32Array, k?: number): Promise<SearchResult>;
73
+ /**
74
+ * Get current number of vectors in the index
75
+ * Now async due to worker-based implementation
76
+ */
77
+ getCurrentCount(): Promise<number>;
78
+ /**
79
+ * Check if index exists on disk
80
+ */
81
+ indexExists(): boolean;
82
+ /**
83
+ * Set search parameters for query time
84
+ * Now async due to worker-based implementation
85
+ */
86
+ setEf(ef: number): Promise<void>;
87
+ /**
88
+ * Resize index to accommodate more vectors
89
+ * Now async due to worker-based implementation
90
+ */
91
+ resizeIndex(newMaxElements: number): Promise<void>;
92
+ /**
93
+ * Reset the vector index to an empty state.
94
+ * Clears all vectors from the HNSW graph and vectorStorage.
95
+ * The index parameters (dimensions, M, efConstruction) are preserved.
96
+ */
97
+ reset(): Promise<void>;
98
+ /**
99
+ * Get index options (for external access to configuration)
100
+ */
101
+ getOptions(): VectorIndexOptions;
102
+ /**
103
+ * Cleanup: terminate worker and free all WebAssembly memory
104
+ */
105
+ cleanup(): Promise<void>;
106
+ }
107
+ //# sourceMappingURL=vector-index.d.ts.map