rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,333 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ import { existsSync } from 'fs';
6
+ import { JSDOM } from 'jsdom';
7
+ import { ErrorCategory, ErrorSeverity, safeExecute } from './error-handler.js';
8
+ import { createMissingFileError, createDimensionMismatchError } from './actionable-error-messages.js';
9
+ import { BinaryIndexFormat } from './binary-index-format.js';
10
+ // Set up browser-like environment for hnswlib-wasm
11
+ if (typeof window === 'undefined') {
12
+ const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
13
+ url: 'http://localhost',
14
+ pretendToBeVisual: true,
15
+ resources: 'usable'
16
+ });
17
+ // Type assertion to avoid TypeScript issues with global polyfills
18
+ global.window = dom.window;
19
+ global.document = dom.window.document;
20
+ global.XMLHttpRequest = dom.window.XMLHttpRequest;
21
+ // Disable IndexedDB to prevent hnswlib-wasm from trying to use it
22
+ global.indexedDB = undefined;
23
+ // Override indexedDB on the window object to return undefined
24
+ Object.defineProperty(dom.window, 'indexedDB', {
25
+ value: undefined,
26
+ writable: false,
27
+ configurable: true
28
+ });
29
+ }
30
+ export class VectorIndex {
31
+ index = null;
32
+ hnswlib = null;
33
+ indexPath;
34
+ options;
35
+ currentSize = 0;
36
+ vectorStorage = new Map(); // For persistence
37
+ constructor(indexPath, options) {
38
+ this.indexPath = indexPath;
39
+ this.options = {
40
+ efConstruction: 200,
41
+ M: 16,
42
+ seed: 100,
43
+ ...options
44
+ };
45
+ }
46
+ /**
47
+ * Initialize the HNSW index with cosine similarity using hnswlib-wasm
48
+ */
49
+ async initialize() {
50
+ await safeExecute(async () => {
51
+ // Load the hnswlib module
52
+ if (!this.hnswlib) {
53
+ // Temporarily suppress stderr output during hnswlib loading to avoid IndexedDB warnings
54
+ const originalStderrWrite = process.stderr.write;
55
+ const originalConsoleError = console.error;
56
+ process.stderr.write = function (chunk, encoding, callback) {
57
+ const message = chunk.toString();
58
+ // Suppress specific IndexedDB/IDBFS related errors and WebAssembly errors
59
+ if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
60
+ message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
61
+ message.includes('jsFS Error') || message.includes('syncing FS') ||
62
+ message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
63
+ message.includes('abort') || message.includes('assert') ||
64
+ message.includes('hnswlib-wasm/dist/hnswlib')) {
65
+ if (callback)
66
+ callback();
67
+ return true;
68
+ }
69
+ return originalStderrWrite.call(this, chunk, encoding, callback);
70
+ };
71
+ console.error = (...args) => {
72
+ const message = args.join(' ');
73
+ if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
74
+ message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
75
+ message.includes('jsFS Error') || message.includes('syncing FS') ||
76
+ message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
77
+ message.includes('abort') || message.includes('assert') ||
78
+ message.includes('hnswlib-wasm/dist/hnswlib')) {
79
+ return;
80
+ }
81
+ originalConsoleError.apply(console, args);
82
+ };
83
+ try {
84
+ const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
85
+ const { loadHnswlib } = hnswlibModule;
86
+ this.hnswlib = await loadHnswlib();
87
+ }
88
+ finally {
89
+ // Restore original output streams
90
+ process.stderr.write = originalStderrWrite;
91
+ console.error = originalConsoleError;
92
+ }
93
+ }
94
+ // Create new HNSW index (third parameter is autoSaveFilename, but we'll handle persistence manually)
95
+ this.index = new this.hnswlib.HierarchicalNSW('cosine', this.options.dimensions, '');
96
+ this.index.initIndex(this.options.maxElements, this.options.M || 16, this.options.efConstruction || 200, this.options.seed || 100);
97
+ this.currentSize = 0;
98
+ console.log(`Initialized HNSW index with ${this.options.dimensions} dimensions using hnswlib-wasm`);
99
+ }, 'Vector Index Initialization', {
100
+ category: ErrorCategory.INDEX,
101
+ severity: ErrorSeverity.FATAL
102
+ });
103
+ }
104
+ /**
105
+ * Load existing index from file using hnswlib-wasm
106
+ */
107
+ async loadIndex() {
108
+ if (!existsSync(this.indexPath)) {
109
+ throw createMissingFileError(this.indexPath, 'index', {
110
+ operationContext: 'VectorIndex.loadIndex'
111
+ });
112
+ }
113
+ try {
114
+ // Load the hnswlib module
115
+ if (!this.hnswlib) {
116
+ // Temporarily suppress stderr output during hnswlib loading to avoid IndexedDB warnings
117
+ const originalStderrWrite = process.stderr.write;
118
+ const originalConsoleError = console.error;
119
+ process.stderr.write = function (chunk, encoding, callback) {
120
+ const message = chunk.toString();
121
+ // Suppress specific IndexedDB/IDBFS related errors and WebAssembly errors
122
+ if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
123
+ message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
124
+ message.includes('jsFS Error') || message.includes('syncing FS') ||
125
+ message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
126
+ message.includes('abort') || message.includes('assert') ||
127
+ message.includes('hnswlib-wasm/dist/hnswlib')) {
128
+ if (callback)
129
+ callback();
130
+ return true;
131
+ }
132
+ return originalStderrWrite.call(this, chunk, encoding, callback);
133
+ };
134
+ console.error = (...args) => {
135
+ const message = args.join(' ');
136
+ if (message.includes('IDBFS') || message.includes('indexedDB not supported') ||
137
+ message.includes('EmscriptenFileSystemManager') || message.includes('Aborted') ||
138
+ message.includes('jsFS Error') || message.includes('syncing FS') ||
139
+ message.includes('RuntimeError: unreachable') || message.includes('___trap') ||
140
+ message.includes('abort') || message.includes('assert') ||
141
+ message.includes('hnswlib-wasm/dist/hnswlib')) {
142
+ return;
143
+ }
144
+ originalConsoleError.apply(console, args);
145
+ };
146
+ try {
147
+ const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
148
+ const { loadHnswlib } = hnswlibModule;
149
+ this.hnswlib = await loadHnswlib();
150
+ }
151
+ finally {
152
+ // Restore original output streams
153
+ process.stderr.write = originalStderrWrite;
154
+ console.error = originalConsoleError;
155
+ }
156
+ }
157
+ // Create new HNSW index (third parameter is autoSaveFilename, but we'll handle persistence manually)
158
+ this.index = new this.hnswlib.HierarchicalNSW('cosine', this.options.dimensions, '');
159
+ // Load from binary format
160
+ const data = await BinaryIndexFormat.load(this.indexPath);
161
+ // Validate dimensions
162
+ if (data.dimensions !== this.options.dimensions) {
163
+ console.log(`⚠️ Dimension mismatch detected:`);
164
+ console.log(` Stored dimensions: ${data.dimensions}`);
165
+ console.log(` Expected dimensions: ${this.options.dimensions}`);
166
+ console.log(` Number of vectors: ${data.vectors.length}`);
167
+ if (data.vectors.length > 0) {
168
+ console.log(` Actual vector length: ${data.vectors[0].vector.length}`);
169
+ }
170
+ throw createDimensionMismatchError(this.options.dimensions, data.dimensions, 'vector index loading', { operationContext: 'VectorIndex.loadIndex' });
171
+ }
172
+ // Update options from stored data
173
+ this.options.maxElements = data.maxElements;
174
+ this.options.M = data.M;
175
+ this.options.efConstruction = data.efConstruction;
176
+ this.options.seed = data.seed;
177
+ // Initialize HNSW index
178
+ this.index.initIndex(this.options.maxElements, this.options.M, this.options.efConstruction, this.options.seed);
179
+ // Clear and repopulate vector storage
180
+ this.vectorStorage.clear();
181
+ // Add all stored vectors to HNSW index
182
+ for (const item of data.vectors) {
183
+ this.index.addPoint(item.vector, item.id, false);
184
+ this.vectorStorage.set(item.id, item.vector);
185
+ }
186
+ this.currentSize = data.currentSize;
187
+ console.log(`✓ Loaded HNSW index with ${this.currentSize} vectors from ${this.indexPath}`);
188
+ }
189
+ catch (error) {
190
+ throw new Error(`Failed to load index from ${this.indexPath}: ${error}`);
191
+ }
192
+ }
193
+ /**
194
+ * Save index to binary format
195
+ */
196
+ async saveIndex() {
197
+ if (!this.index) {
198
+ throw new Error('Index not initialized');
199
+ }
200
+ try {
201
+ // Collect all vectors from storage
202
+ const vectors = Array.from(this.vectorStorage.entries()).map(([id, vector]) => ({
203
+ id,
204
+ vector
205
+ }));
206
+ // Save to binary format
207
+ await BinaryIndexFormat.save(this.indexPath, {
208
+ dimensions: this.options.dimensions,
209
+ maxElements: this.options.maxElements,
210
+ M: this.options.M || 16,
211
+ efConstruction: this.options.efConstruction || 200,
212
+ seed: this.options.seed || 100,
213
+ currentSize: this.currentSize,
214
+ vectors
215
+ });
216
+ console.log(`✓ Saved HNSW index with ${this.currentSize} vectors to ${this.indexPath}`);
217
+ }
218
+ catch (error) {
219
+ throw new Error(`Failed to save index to ${this.indexPath}: ${error}`);
220
+ }
221
+ }
222
+ /**
223
+ * Add a single vector to the HNSW index
224
+ */
225
+ addVector(embeddingId, vector) {
226
+ if (!this.index) {
227
+ throw new Error('Index not initialized');
228
+ }
229
+ if (vector.length !== this.options.dimensions) {
230
+ throw createDimensionMismatchError(this.options.dimensions, vector.length, 'vector addition', { operationContext: 'VectorIndex.addVector' });
231
+ }
232
+ try {
233
+ this.index.addPoint(vector, embeddingId, false);
234
+ // Store vector for persistence
235
+ this.vectorStorage.set(embeddingId, new Float32Array(vector));
236
+ this.currentSize++;
237
+ }
238
+ catch (error) {
239
+ throw new Error(`Failed to add vector ${embeddingId}: ${error}`);
240
+ }
241
+ }
242
+ /**
243
+ * Add multiple vectors to the index in batch
244
+ */
245
+ addVectors(vectors) {
246
+ for (const { id, vector } of vectors) {
247
+ this.addVector(id, vector);
248
+ }
249
+ }
250
+ /**
251
+ * Search for k nearest neighbors using hnswlib-wasm
252
+ */
253
+ search(queryVector, k = 5) {
254
+ if (!this.index) {
255
+ throw new Error('Index not initialized');
256
+ }
257
+ if (queryVector.length !== this.options.dimensions) {
258
+ throw createDimensionMismatchError(this.options.dimensions, queryVector.length, 'vector search', { operationContext: 'VectorIndex.search' });
259
+ }
260
+ if (this.currentSize === 0) {
261
+ return { neighbors: [], distances: [] };
262
+ }
263
+ try {
264
+ const result = this.index.searchKnn(queryVector, Math.min(k, this.currentSize), undefined);
265
+ return {
266
+ neighbors: result.neighbors,
267
+ distances: result.distances
268
+ };
269
+ }
270
+ catch (error) {
271
+ throw new Error(`Search failed: ${error}`);
272
+ }
273
+ }
274
+ /**
275
+ * Get current number of vectors in the index
276
+ */
277
+ getCurrentCount() {
278
+ return this.currentSize;
279
+ }
280
+ /**
281
+ * Check if index exists on disk
282
+ */
283
+ indexExists() {
284
+ return existsSync(this.indexPath);
285
+ }
286
+ /**
287
+ * Set search parameters for query time
288
+ */
289
+ setEf(ef) {
290
+ if (!this.index) {
291
+ throw new Error('Index not initialized');
292
+ }
293
+ try {
294
+ // hnswlib-wasm might not have setEf method, check if it exists
295
+ if (typeof this.index.setEfSearch === 'function') {
296
+ this.index.setEfSearch(ef);
297
+ console.log(`Set efSearch to ${ef}`);
298
+ }
299
+ else {
300
+ console.log(`setEfSearch not available in hnswlib-wasm`);
301
+ }
302
+ }
303
+ catch (error) {
304
+ console.log(`Failed to set ef: ${error}`);
305
+ }
306
+ }
307
+ /**
308
+ * Resize index to accommodate more vectors
309
+ */
310
+ resizeIndex(newMaxElements) {
311
+ if (!this.index) {
312
+ throw new Error('Index not initialized');
313
+ }
314
+ if (newMaxElements <= this.options.maxElements) {
315
+ throw new Error(`New max elements (${newMaxElements}) must be greater than current (${this.options.maxElements})`);
316
+ }
317
+ try {
318
+ this.index.resizeIndex(newMaxElements);
319
+ this.options.maxElements = newMaxElements;
320
+ console.log(`Resized index to accommodate ${newMaxElements} vectors`);
321
+ }
322
+ catch (error) {
323
+ throw new Error(`Failed to resize index: ${error}`);
324
+ }
325
+ }
326
+ /**
327
+ * Get index options (for external access to configuration)
328
+ */
329
+ getOptions() {
330
+ return { ...this.options };
331
+ }
332
+ }
333
+ //# sourceMappingURL=vector-index.js.map
@@ -0,0 +1,6 @@
1
+ /**
2
+ * DOM polyfills for Node.js environment
3
+ * Required for transformers.js and other browser-dependent libraries
4
+ */
5
+ export {};
6
+ //# sourceMappingURL=dom-polyfills.d.ts.map
@@ -0,0 +1,37 @@
1
+ /**
2
+ * DOM polyfills for Node.js environment
3
+ * Required for transformers.js and other browser-dependent libraries
4
+ */
5
+ import { JSDOM } from 'jsdom';
6
+ // Only set up polyfills if we're in Node.js (not browser)
7
+ if (typeof window === 'undefined') {
8
+ console.log('Setting up DOM polyfills for Node.js environment...');
9
+ // Create a minimal DOM environment
10
+ const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
11
+ pretendToBeVisual: true,
12
+ resources: 'usable'
13
+ });
14
+ // Set up global objects that transformers.js expects
15
+ if (typeof globalThis.self === 'undefined') {
16
+ globalThis.self = globalThis;
17
+ }
18
+ // Also set on global for older Node.js versions
19
+ if (typeof global.self === 'undefined') {
20
+ global.self = global;
21
+ }
22
+ console.log('DOM polyfills set up successfully. self is now:', typeof self !== 'undefined' ? 'defined' : 'undefined');
23
+ if (typeof globalThis.window === 'undefined') {
24
+ globalThis.window = dom.window;
25
+ }
26
+ if (typeof globalThis.document === 'undefined') {
27
+ globalThis.document = dom.window.document;
28
+ }
29
+ // Additional polyfills that might be needed
30
+ if (typeof globalThis.navigator === 'undefined') {
31
+ globalThis.navigator = dom.window.navigator;
32
+ }
33
+ // Note: Do NOT polyfill createImageBitmap with a fake implementation
34
+ // RawImage.fromURL() will handle image loading correctly without it
35
+ // Setting a fake createImageBitmap that throws errors breaks image loading
36
+ }
37
+ //# sourceMappingURL=dom-polyfills.js.map
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Factory exports for creating RAG instances
3
+ * Provides convenient factory functions for common use cases
4
+ *
5
+ * This module serves as the main entry point for factory functions that
6
+ * simplify the creation of search and ingestion systems.
7
+ * The factories handle complex initialization while providing clean APIs.
8
+ *
9
+ * MAIN FACTORY CLASSES:
10
+ * - IngestionFactory: Creates IngestionPipeline instances for document ingestion
11
+ * - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * import { IngestionFactory, SearchFactory } from './factories';
16
+ *
17
+ * // Create ingestion pipeline
18
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
19
+ *
20
+ * // Create search engine with automatic mode detection
21
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite');
22
+ * ```
23
+ */
24
+ export { IngestionFactory } from './ingestion-factory.js';
25
+ export { SearchFactory } from './search-factory.js';
26
+ export type { IngestionFactoryOptions, ContentSystemConfig } from './ingestion-factory.js';
27
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Factory exports for creating RAG instances
3
+ * Provides convenient factory functions for common use cases
4
+ *
5
+ * This module serves as the main entry point for factory functions that
6
+ * simplify the creation of search and ingestion systems.
7
+ * The factories handle complex initialization while providing clean APIs.
8
+ *
9
+ * MAIN FACTORY CLASSES:
10
+ * - IngestionFactory: Creates IngestionPipeline instances for document ingestion
11
+ * - SearchFactory: Creates SearchEngine with automatic mode detection (recommended)
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * import { IngestionFactory, SearchFactory } from './factories';
16
+ *
17
+ * // Create ingestion pipeline
18
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
19
+ *
20
+ * // Create search engine with automatic mode detection
21
+ * const search = await SearchFactory.create('./index.bin', './db.sqlite');
22
+ * ```
23
+ */
24
+ // Main factory classes
25
+ export { IngestionFactory } from './ingestion-factory.js';
26
+ // Polymorphic search factory (recommended for automatic mode detection)
27
+ // Re-exported from core for convenience
28
+ export { SearchFactory } from './search-factory.js';
29
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,200 @@
1
+ /**
2
+ * Factory functions for creating text-specific search and ingestion instances
3
+ * Handles complex initialization logic while providing clean API for common use cases
4
+ *
5
+ * FACTORY PATTERN BENEFITS:
6
+ * - Abstracts complex initialization (model loading, database setup, index initialization)
7
+ * - Provides simple API for common use cases while preserving access to dependency injection
8
+ * - Clear validation and error handling without fallback mechanisms
9
+ * - Supports different embedding models and configurations
10
+ * - Enables clean separation between simple usage and advanced customization
11
+ *
12
+ * MODE SELECTION GUIDE:
13
+ * - Text Mode (default): Optimized for text-only content
14
+ * - Uses sentence-transformer models (fast, accurate for text)
15
+ * - Images converted to text descriptions
16
+ * - Best for: document search, text clustering, semantic similarity
17
+ *
18
+ * - Multimodal Mode: Optimized for mixed text/image content
19
+ * - Uses CLIP models (unified embedding space)
20
+ * - True cross-modal search (text finds images, images find text)
21
+ * - Best for: image search, visual QA, multimodal retrieval
22
+ *
23
+ * USAGE PATTERNS:
24
+ *
25
+ * 1. Mode Selection:
26
+ * ```typescript
27
+ * // Text mode (default) - optimized for text-only content
28
+ * const textIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
29
+ * mode: 'text',
30
+ * embeddingModel: 'sentence-transformers/all-MiniLM-L6-v2'
31
+ * });
32
+ *
33
+ * // Multimodal mode - enables cross-modal search
34
+ * const multimodalIngestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
35
+ * mode: 'multimodal',
36
+ * embeddingModel: 'Xenova/clip-vit-base-patch32',
37
+ * rerankingStrategy: 'text-derived'
38
+ * });
39
+ * ```
40
+ */
41
+ import { IngestionPipeline } from '../core/ingestion.js';
42
+ /**
43
+ * Content system configuration options
44
+ */
45
+ export interface ContentSystemConfig {
46
+ /** Content directory path (default: '.raglite/content') */
47
+ contentDir?: string;
48
+ /** Maximum file size in bytes (default: 50MB) */
49
+ maxFileSize?: number;
50
+ /** Maximum content directory size in bytes (default: 2GB) */
51
+ maxContentDirSize?: number;
52
+ /** Enable content deduplication (default: true) */
53
+ enableDeduplication?: boolean;
54
+ /** Enable storage tracking (default: true) */
55
+ enableStorageTracking?: boolean;
56
+ }
57
+ /**
58
+ * Options for text ingestion factory
59
+ */
60
+ export interface IngestionFactoryOptions {
61
+ /** Embedding model name override */
62
+ embeddingModel?: string;
63
+ /** Embedding batch size override */
64
+ batchSize?: number;
65
+ /** Chunk size override */
66
+ chunkSize?: number;
67
+ /** Chunk overlap override */
68
+ chunkOverlap?: number;
69
+ /** Whether to force rebuild the index */
70
+ forceRebuild?: boolean;
71
+ /** Mode for the ingestion pipeline (text or multimodal) */
72
+ mode?: 'text' | 'multimodal';
73
+ /** Reranking strategy for multimodal mode */
74
+ rerankingStrategy?: 'cross-encoder' | 'text-derived' | 'metadata' | 'hybrid' | 'disabled';
75
+ /** Content system configuration */
76
+ contentSystemConfig?: ContentSystemConfig;
77
+ }
78
+ /**
79
+ * Factory for creating text-based IngestionPipeline instances
80
+ * Handles model loading, database initialization, and index setup
81
+ *
82
+ * This factory abstracts the complex initialization process required for text ingestion:
83
+ * 1. Creates necessary directories if they don't exist
84
+ * 2. Validates mode-model compatibility (no fallback mechanisms)
85
+ * 3. Loads and validates embedding models with clear error reporting
86
+ * 4. Establishes database connections and initializes schema
87
+ * 5. Stores mode configuration in database for automatic detection
88
+ * 6. Creates or loads vector indexes with proper configuration
89
+ * 7. Creates IngestionPipeline with proper dependency injection
90
+ *
91
+ * Mode Configuration:
92
+ * - Text Mode (default): Uses sentence-transformer models for text-only content
93
+ * - Multimodal Mode: Uses CLIP models for mixed text/image content
94
+ * - Mode is stored in database and auto-detected during search
95
+ * - Clear validation prevents mode-model mismatches
96
+ *
97
+ * @example
98
+ * ```typescript
99
+ * // Basic usage
100
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin');
101
+ * await ingestion.ingestDirectory('./documents');
102
+ *
103
+ * // With custom configuration
104
+ * const ingestion = await IngestionFactory.create('./db.sqlite', './index.bin', {
105
+ * embeddingModel: 'all-MiniLM-L6-v2',
106
+ * chunkSize: 512,
107
+ * chunkOverlap: 50,
108
+ * forceRebuild: true
109
+ * });
110
+ *
111
+ * // With defaults
112
+ * const ingestion = await IngestionFactory.createWithDefaults({
113
+ * batchSize: 32 // Faster processing
114
+ * });
115
+ * ```
116
+ */
117
+ export declare class IngestionFactory {
118
+ /**
119
+ * Create an IngestionPipeline configured for text ingestion
120
+ *
121
+ * This method handles the complete initialization process:
122
+ * - Creates necessary directories if they don't exist
123
+ * - Loads text embedding model (with lazy initialization)
124
+ * - Opens database connection and initializes schema
125
+ * - Creates or loads vector index (with force rebuild option)
126
+ * - Creates IngestionPipeline with dependency injection
127
+ * - Validates the complete setup
128
+ *
129
+ * @param dbPath - Path to the SQLite database file (will be created if doesn't exist)
130
+ * @param indexPath - Path to the vector index file (will be created if doesn't exist)
131
+ * @param options - Optional configuration overrides
132
+ * @param options.embeddingModel - Override embedding model (default: from config)
133
+ * @param options.batchSize - Override embedding batch size (default: from config)
134
+ * @param options.chunkSize - Override chunk size (default: from config)
135
+ * @param options.chunkOverlap - Override chunk overlap (default: from config)
136
+ * @param options.forceRebuild - Force rebuild of existing index (default: false)
137
+ * @param options.contentSystemConfig - Content system configuration options
138
+ * @param options.contentSystemConfig.contentDir - Content directory path (default: '.raglite/content')
139
+ * @param options.contentSystemConfig.maxFileSize - Maximum file size in bytes (default: 50MB)
140
+ * @param options.contentSystemConfig.maxContentDirSize - Maximum content directory size (default: 2GB)
141
+ * @param options.contentSystemConfig.enableDeduplication - Enable content deduplication (default: true)
142
+ * @param options.contentSystemConfig.enableStorageTracking - Enable storage tracking (default: true)
143
+ * @returns Promise resolving to configured IngestionPipeline
144
+ * @throws {Error} If initialization fails
145
+ *
146
+ * @example
147
+ * ```typescript
148
+ * // Create ingestion pipeline with default content system
149
+ * const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin');
150
+ *
151
+ * // Create with custom content system configuration
152
+ * const ingestion = await IngestionFactory.create('./my-db.sqlite', './my-index.bin', {
153
+ * contentSystemConfig: {
154
+ * contentDir: './custom-content',
155
+ * maxFileSize: 100 * 1024 * 1024, // 100MB
156
+ * maxContentDirSize: 5 * 1024 * 1024 * 1024, // 5GB
157
+ * enableDeduplication: true
158
+ * }
159
+ * });
160
+ *
161
+ * // Ingest documents from directory
162
+ * const result = await ingestion.ingestDirectory('./documents');
163
+ * console.log(`Processed ${result.documentsProcessed} documents`);
164
+ *
165
+ * // Ingest content from memory (MCP integration)
166
+ * const contentId = await ingestion.ingestFromMemory(buffer, {
167
+ * displayName: 'uploaded-file.pdf',
168
+ * contentType: 'application/pdf'
169
+ * });
170
+ *
171
+ * // Clean up when done
172
+ * await ingestion.cleanup();
173
+ * ```
174
+ */
175
+ static create(dbPath: string, indexPath: string, options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
176
+ /**
177
+ * Create an IngestionPipeline with automatic path resolution
178
+ * Uses default paths based on current working directory
179
+ * @param options - Optional configuration overrides
180
+ * @returns Promise resolving to configured IngestionPipeline
181
+ */
182
+ static createWithDefaults(options?: IngestionFactoryOptions): Promise<IngestionPipeline>;
183
+ /**
184
+ * Handles mode storage during ingestion
185
+ * Creates or validates system info based on the provided mode and options
186
+ * @private
187
+ */
188
+ private static handleModeStorage;
189
+ /**
190
+ * Updates system info in the database
191
+ * @private
192
+ */
193
+ private static updateSystemInfo;
194
+ /**
195
+ * Validates and prepares content system configuration
196
+ * @private
197
+ */
198
+ private static validateAndPrepareContentSystemConfig;
199
+ }
200
+ //# sourceMappingURL=ingestion-factory.d.ts.map