rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -198,7 +198,7 @@ export async function runIngest(path, options = {}) {
198
198
  showProgress: true,
199
199
  maxWaitMs: 15000 // Longer timeout for ingestion
200
200
  });
201
- const result = await pipeline.ingestPath(resolvedPath);
201
+ const result = await pipeline.ingestPath(resolvedPath, { mode: factoryOptions.mode });
202
202
  // Display final results
203
203
  console.log('\n' + '='.repeat(50));
204
204
  console.log('INGESTION SUMMARY');
@@ -137,6 +137,11 @@ export async function runSearch(query, options = {}) {
137
137
  if (options['top-k'] !== undefined) {
138
138
  searchOptions.top_k = options['top-k'];
139
139
  }
140
+ // Set content type filter for search-level filtering
141
+ const contentTypeFilter = options['content-type'];
142
+ if (contentTypeFilter && contentTypeFilter !== 'all') {
143
+ searchOptions.contentType = contentTypeFilter;
144
+ }
140
145
  // Phase 2: Disable reranking for image-to-image searches to preserve visual similarity
141
146
  let rerankingForciblyDisabled = false;
142
147
  if (isImage && embedder) {
@@ -174,16 +179,6 @@ export async function runSearch(query, options = {}) {
174
179
  results = await searchEngine.search(query, searchOptions);
175
180
  }
176
181
  const searchTime = Date.now() - startTime;
177
- // Apply content type filter if specified
178
- const contentTypeFilter = options['content-type'];
179
- if (contentTypeFilter && contentTypeFilter !== 'all') {
180
- const originalCount = results.length;
181
- results = results.filter(r => r.contentType === contentTypeFilter);
182
- if (results.length < originalCount) {
183
- console.log(`Filtered to ${results.length} ${contentTypeFilter} result${results.length === 1 ? '' : 's'} (from ${originalCount} total)`);
184
- console.log('');
185
- }
186
- }
187
182
  // Display results
188
183
  if (results.length === 0) {
189
184
  console.log('No results found.');
@@ -25,10 +25,19 @@ export interface BinaryIndexData {
25
25
  id: number;
26
26
  vector: Float32Array;
27
27
  }>;
28
+ hasContentTypeGroups?: boolean;
29
+ textVectors?: Array<{
30
+ id: number;
31
+ vector: Float32Array;
32
+ }>;
33
+ imageVectors?: Array<{
34
+ id: number;
35
+ vector: Float32Array;
36
+ }>;
28
37
  }
29
38
  export declare class BinaryIndexFormat {
30
39
  /**
31
- * Save index data to binary format
40
+ * Save index data to binary format (original format for backward compatibility)
32
41
  *
33
42
  * File structure:
34
43
  * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
@@ -39,7 +48,24 @@ export declare class BinaryIndexFormat {
39
48
  */
40
49
  static save(indexPath: string, data: BinaryIndexData): Promise<void>;
41
50
  /**
42
- * Load index data from binary format
51
+ * Save index data to grouped binary format
52
+ *
53
+ * File structure:
54
+ * - Extended Header (40 bytes):
55
+ * - Original 6 fields (24 bytes)
56
+ * - hasGroups flag (4 bytes)
57
+ * - textOffset (4 bytes)
58
+ * - textCount (4 bytes)
59
+ * - imageOffset (4 bytes)
60
+ * - imageCount (4 bytes)
61
+ * - Data section: [text vectors...][image vectors...]
62
+ *
63
+ * @param indexPath Path to save the binary index file
64
+ * @param data Index data to serialize
65
+ */
66
+ static saveGrouped(indexPath: string, data: BinaryIndexData): Promise<void>;
67
+ /**
68
+ * Load index data from binary format (supports both original and grouped formats)
43
69
  *
44
70
  * Uses zero-copy Float32Array views for efficient loading.
45
71
  * Copies the views to ensure data persistence after buffer lifecycle.
@@ -0,0 +1,291 @@
1
+ /**
2
+ * Binary Index Format Module
3
+ *
4
+ * Provides efficient binary serialization for HNSW vector indices.
5
+ *
6
+ * Format Specification:
7
+ * - Header: 24 bytes (6 × uint32)
8
+ * - Vectors: N × (4 + D × 4) bytes
9
+ * - Little-endian encoding for cross-platform compatibility
10
+ * - 4-byte alignment for Float32Array zero-copy views
11
+ *
12
+ * Performance:
13
+ * - 3.66x smaller than JSON format
14
+ * - 3.5x faster loading
15
+ * - Zero-copy Float32Array views
16
+ */
17
+ import { readFileSync, writeFileSync } from 'fs';
18
+ export class BinaryIndexFormat {
19
+ /**
20
+ * Save index data to binary format (original format for backward compatibility)
21
+ *
22
+ * File structure:
23
+ * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
24
+ * - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
25
+ *
26
+ * @param indexPath Path to save the binary index file
27
+ * @param data Index data to serialize
28
+ */
29
+ static async save(indexPath, data) {
30
+ // Calculate total size
31
+ const headerSize = 24; // 6 uint32 fields
32
+ const vectorSize = 4 + (data.dimensions * 4); // id + vector
33
+ const totalSize = headerSize + (data.currentSize * vectorSize);
34
+ const buffer = new ArrayBuffer(totalSize);
35
+ const view = new DataView(buffer);
36
+ let offset = 0;
37
+ // Write header (24 bytes, all little-endian)
38
+ view.setUint32(offset, data.dimensions, true);
39
+ offset += 4;
40
+ view.setUint32(offset, data.maxElements, true);
41
+ offset += 4;
42
+ view.setUint32(offset, data.M, true);
43
+ offset += 4;
44
+ view.setUint32(offset, data.efConstruction, true);
45
+ offset += 4;
46
+ view.setUint32(offset, data.seed, true);
47
+ offset += 4;
48
+ view.setUint32(offset, data.currentSize, true);
49
+ offset += 4;
50
+ // Write vectors
51
+ for (const item of data.vectors) {
52
+ // Ensure 4-byte alignment (should always be true with our format)
53
+ if (offset % 4 !== 0) {
54
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
55
+ }
56
+ // Write vector ID
57
+ view.setUint32(offset, item.id, true);
58
+ offset += 4;
59
+ // Write vector data
60
+ for (let i = 0; i < item.vector.length; i++) {
61
+ view.setFloat32(offset, item.vector[i], true);
62
+ offset += 4;
63
+ }
64
+ }
65
+ // Write to file
66
+ writeFileSync(indexPath, Buffer.from(buffer));
67
+ }
68
+ /**
69
+ * Save index data to grouped binary format
70
+ *
71
+ * File structure:
72
+ * - Extended Header (40 bytes):
73
+ * - Original 6 fields (24 bytes)
74
+ * - hasGroups flag (4 bytes)
75
+ * - textOffset (4 bytes)
76
+ * - textCount (4 bytes)
77
+ * - imageOffset (4 bytes)
78
+ * - imageCount (4 bytes)
79
+ * - Data section: [text vectors...][image vectors...]
80
+ *
81
+ * @param indexPath Path to save the binary index file
82
+ * @param data Index data to serialize
83
+ */
84
+ static async saveGrouped(indexPath, data) {
85
+ if (!data.hasContentTypeGroups || !data.textVectors || !data.imageVectors) {
86
+ // Fallback to original format
87
+ return this.save(indexPath, data);
88
+ }
89
+ const headerSize = 44; // Extended header: 24 + 20 bytes (hasGroups + textOffset + textCount + imageOffset + imageCount)
90
+ const vectorSize = 4 + (data.dimensions * 4); // id + vector
91
+ // Calculate offsets and total size
92
+ const textOffset = headerSize;
93
+ const imageOffset = textOffset + (data.textVectors.length * vectorSize);
94
+ const totalSize = imageOffset + (data.imageVectors.length * vectorSize);
95
+ const buffer = new ArrayBuffer(totalSize);
96
+ const view = new DataView(buffer);
97
+ let offset = 0;
98
+ // Write extended header (40 bytes, all little-endian)
99
+ if (offset + 40 > buffer.byteLength) {
100
+ throw new Error(`Header write would exceed buffer bounds: offset=${offset}, headerSize=40, bufferSize=${buffer.byteLength}`);
101
+ }
102
+ view.setUint32(offset, data.dimensions, true);
103
+ offset += 4;
104
+ view.setUint32(offset, data.maxElements, true);
105
+ offset += 4;
106
+ view.setUint32(offset, data.M, true);
107
+ offset += 4;
108
+ view.setUint32(offset, data.efConstruction, true);
109
+ offset += 4;
110
+ view.setUint32(offset, data.seed, true);
111
+ offset += 4;
112
+ view.setUint32(offset, data.currentSize, true);
113
+ offset += 4;
114
+ // Extended fields
115
+ view.setUint32(offset, 1, true);
116
+ offset += 4; // hasGroups = 1
117
+ view.setUint32(offset, textOffset, true);
118
+ offset += 4;
119
+ view.setUint32(offset, data.textVectors.length, true);
120
+ offset += 4;
121
+ view.setUint32(offset, imageOffset, true);
122
+ offset += 4;
123
+ view.setUint32(offset, data.imageVectors.length, true);
124
+ offset += 4;
125
+ // Write text vectors
126
+ for (const item of data.textVectors) {
127
+ // Ensure 4-byte alignment
128
+ if (offset % 4 !== 0) {
129
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
130
+ }
131
+ // Check bounds before writing
132
+ if (offset + 4 > buffer.byteLength) {
133
+ throw new Error(`ID write would exceed buffer bounds: offset=${offset}, bufferSize=${buffer.byteLength}`);
134
+ }
135
+ // Write vector ID
136
+ view.setUint32(offset, item.id, true);
137
+ offset += 4;
138
+ // Check bounds for vector data
139
+ const vectorDataSize = item.vector.length * 4;
140
+ if (offset + vectorDataSize > buffer.byteLength) {
141
+ throw new Error(`Vector data write would exceed buffer bounds: offset=${offset}, dataSize=${vectorDataSize}, bufferSize=${buffer.byteLength}`);
142
+ }
143
+ // Write vector data
144
+ for (let i = 0; i < item.vector.length; i++) {
145
+ view.setFloat32(offset, item.vector[i], true);
146
+ offset += 4;
147
+ }
148
+ }
149
+ // Write image vectors
150
+ for (const item of data.imageVectors) {
151
+ // Ensure 4-byte alignment
152
+ if (offset % 4 !== 0) {
153
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
154
+ }
155
+ // Check bounds before writing
156
+ if (offset + 4 > buffer.byteLength) {
157
+ throw new Error(`ID write would exceed buffer bounds: offset=${offset}, bufferSize=${buffer.byteLength}`);
158
+ }
159
+ // Write vector ID
160
+ view.setUint32(offset, item.id, true);
161
+ offset += 4;
162
+ // Check bounds for vector data
163
+ const vectorDataSize = item.vector.length * 4;
164
+ if (offset + vectorDataSize > buffer.byteLength) {
165
+ throw new Error(`Vector data write would exceed buffer bounds: offset=${offset}, dataSize=${vectorDataSize}, bufferSize=${buffer.byteLength}`);
166
+ }
167
+ // Write vector data
168
+ for (let i = 0; i < item.vector.length; i++) {
169
+ view.setFloat32(offset, item.vector[i], true);
170
+ offset += 4;
171
+ }
172
+ }
173
+ // Write to file
174
+ writeFileSync(indexPath, Buffer.from(buffer));
175
+ }
176
+ /**
177
+ * Load index data from binary format (supports both original and grouped formats)
178
+ *
179
+ * Uses zero-copy Float32Array views for efficient loading.
180
+ * Copies the views to ensure data persistence after buffer lifecycle.
181
+ *
182
+ * @param indexPath Path to the binary index file
183
+ * @returns Deserialized index data
184
+ */
185
+ static async load(indexPath) {
186
+ const buffer = readFileSync(indexPath);
187
+ const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
188
+ let offset = 0;
189
+ // Read basic header (24 bytes, all little-endian)
190
+ const dimensions = view.getUint32(offset, true);
191
+ offset += 4;
192
+ const maxElements = view.getUint32(offset, true);
193
+ offset += 4;
194
+ const M = view.getUint32(offset, true);
195
+ offset += 4;
196
+ const efConstruction = view.getUint32(offset, true);
197
+ offset += 4;
198
+ const seed = view.getUint32(offset, true);
199
+ offset += 4;
200
+ const currentSize = view.getUint32(offset, true);
201
+ offset += 4;
202
+ // Check if this is the extended grouped format (40+ bytes header)
203
+ const hasGroups = buffer.byteLength >= 40 ? view.getUint32(offset, true) : 0;
204
+ if (hasGroups === 1 && buffer.byteLength >= 40) {
205
+ // Load grouped format
206
+ const textOffset = view.getUint32(offset + 4, true);
207
+ const textCount = view.getUint32(offset + 8, true);
208
+ const imageOffset = view.getUint32(offset + 12, true);
209
+ const imageCount = view.getUint32(offset + 16, true);
210
+ // Load text vectors
211
+ const textVectors = [];
212
+ offset = textOffset;
213
+ for (let i = 0; i < textCount; i++) {
214
+ // Ensure 4-byte alignment
215
+ if (offset % 4 !== 0) {
216
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
217
+ }
218
+ // Read vector ID
219
+ const id = view.getUint32(offset, true);
220
+ offset += 4;
221
+ // Zero-copy Float32Array view
222
+ const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
223
+ // Copy to avoid buffer lifecycle issues
224
+ const vector = new Float32Array(vectorView);
225
+ offset += dimensions * 4;
226
+ textVectors.push({ id, vector });
227
+ }
228
+ // Load image vectors
229
+ const imageVectors = [];
230
+ offset = imageOffset;
231
+ for (let i = 0; i < imageCount; i++) {
232
+ // Ensure 4-byte alignment
233
+ if (offset % 4 !== 0) {
234
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
235
+ }
236
+ // Read vector ID
237
+ const id = view.getUint32(offset, true);
238
+ offset += 4;
239
+ // Zero-copy Float32Array view
240
+ const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
241
+ // Copy to avoid buffer lifecycle issues
242
+ const vector = new Float32Array(vectorView);
243
+ offset += dimensions * 4;
244
+ imageVectors.push({ id, vector });
245
+ }
246
+ // Combine all vectors for backward compatibility
247
+ const allVectors = [...textVectors, ...imageVectors];
248
+ return {
249
+ dimensions,
250
+ maxElements,
251
+ M,
252
+ efConstruction,
253
+ seed,
254
+ currentSize,
255
+ vectors: allVectors,
256
+ hasContentTypeGroups: true,
257
+ textVectors,
258
+ imageVectors
259
+ };
260
+ }
261
+ else {
262
+ // Load original format
263
+ const vectors = [];
264
+ for (let i = 0; i < currentSize; i++) {
265
+ // Ensure 4-byte alignment (should always be true with our format)
266
+ if (offset % 4 !== 0) {
267
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
268
+ }
269
+ // Read vector ID
270
+ const id = view.getUint32(offset, true);
271
+ offset += 4;
272
+ // Zero-copy Float32Array view (fast!)
273
+ const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
274
+ // Copy to avoid buffer lifecycle issues
275
+ const vector = new Float32Array(vectorView);
276
+ offset += dimensions * 4;
277
+ vectors.push({ id, vector });
278
+ }
279
+ return {
280
+ dimensions,
281
+ maxElements,
282
+ M,
283
+ efConstruction,
284
+ seed,
285
+ currentSize,
286
+ vectors
287
+ };
288
+ }
289
+ }
290
+ }
291
+ //# sourceMappingURL=binary-index-format.js.map
@@ -162,9 +162,13 @@ export declare class IngestionPipeline {
162
162
  */
163
163
  private storeDocumentsAndChunksWithContentTypes;
164
164
  /**
165
- * Update vector index with new embeddings
165
+ * Update vector index with new embeddings (supports grouped content type storage)
166
166
  */
167
167
  private updateVectorIndex;
168
+ /**
169
+ * Filter documents based on ingestion mode to avoid processing incompatible content types
170
+ */
171
+ private filterDocumentsByMode;
168
172
  /**
169
173
  * Converts MIME type to simple content type for embedding function
170
174
  * @param mimeType - MIME type string (e.g., 'text/plain', 'image/jpeg')
@@ -287,21 +287,30 @@ export class IngestionPipeline {
287
287
  try {
288
288
  // Phase 1: File Discovery and Processing with Content-Type Detection
289
289
  console.log('\n--- Phase 1: File Discovery and Processing ---');
290
- const fileResult = await discoverAndProcessFiles(path, options.fileOptions, this.pathManager);
291
- if (fileResult.documents.length === 0) {
290
+ const mode = options.mode || 'text';
291
+ const fileOptions = {
292
+ recursive: true,
293
+ maxFileSize: 10 * 1024 * 1024, // 10MB
294
+ ...options.fileOptions,
295
+ mode
296
+ };
297
+ const fileResult = await discoverAndProcessFiles(path, fileOptions, this.pathManager);
298
+ // Additional filtering as fallback (should be minimal with mode-aware discovery)
299
+ const filteredResult = this.filterDocumentsByMode(fileResult, mode);
300
+ if (filteredResult.documents.length === 0) {
292
301
  console.log('No documents found to process');
293
302
  return {
294
303
  documentsProcessed: 0,
295
304
  chunksCreated: 0,
296
305
  embeddingsGenerated: 0,
297
- documentErrors: fileResult.processingResult.errors.length,
306
+ documentErrors: filteredResult.processingResult.errors.length,
298
307
  embeddingErrors: 0,
299
308
  processingTimeMs: Date.now() - startTime,
300
309
  contentIds: []
301
310
  };
302
311
  }
303
312
  // Content-type detection and routing
304
- const contentTypeStats = this.analyzeContentTypes(fileResult.documents);
313
+ const contentTypeStats = this.analyzeContentTypes(filteredResult.documents);
305
314
  console.log(`📊 Content analysis: ${contentTypeStats.text} text, ${contentTypeStats.image} image, ${contentTypeStats.other} other files`);
306
315
  // Phase 2: Document Chunking with Content-Type Awareness
307
316
  console.log('\n--- Phase 2: Document Chunking ---');
@@ -309,7 +318,7 @@ export class IngestionPipeline {
309
318
  chunkSize: config.chunk_size,
310
319
  chunkOverlap: config.chunk_overlap
311
320
  };
312
- const chunkingResult = await this.chunkDocumentsWithContentTypes(fileResult.documents, effectiveChunkConfig, options.mode);
321
+ const chunkingResult = await this.chunkDocumentsWithContentTypes(filteredResult.documents, effectiveChunkConfig, options.mode);
313
322
  if (chunkingResult.totalChunks === 0) {
314
323
  console.log('No chunks created from documents');
315
324
  return {
@@ -334,10 +343,10 @@ export class IngestionPipeline {
334
343
  const endTime = Date.now();
335
344
  const processingTimeMs = endTime - startTime;
336
345
  const result = {
337
- documentsProcessed: fileResult.documents.length,
346
+ documentsProcessed: filteredResult.documents.length,
338
347
  chunksCreated: chunkingResult.totalChunks,
339
348
  embeddingsGenerated: embeddingResult.embeddings.length,
340
- documentErrors: fileResult.processingResult.errors.length,
349
+ documentErrors: filteredResult.processingResult.errors.length,
341
350
  embeddingErrors: embeddingResult.errors,
342
351
  processingTimeMs,
343
352
  contentIds
@@ -595,16 +604,35 @@ export class IngestionPipeline {
595
604
  return contentIds;
596
605
  }
597
606
  /**
598
- * Update vector index with new embeddings
607
+ * Update vector index with new embeddings (supports grouped content type storage)
599
608
  */
600
609
  async updateVectorIndex(embeddings) {
610
+ console.log('updateVectorIndex called with', embeddings.length, 'embeddings');
601
611
  if (embeddings.length === 0) {
602
612
  console.log('No embeddings to add to vector index');
603
613
  return;
604
614
  }
605
615
  console.log(`Adding ${embeddings.length} vector${embeddings.length === 1 ? '' : 's'} to search index...`);
606
616
  try {
607
- await this.indexManager.addVectors(embeddings);
617
+ // Group embeddings by content type for optimized storage
618
+ const groupedEmbeddings = embeddings.reduce((groups, embedding) => {
619
+ const contentType = embedding.contentType || 'text';
620
+ if (!groups[contentType]) {
621
+ groups[contentType] = [];
622
+ }
623
+ groups[contentType].push(embedding);
624
+ return groups;
625
+ }, {});
626
+ const textEmbeddings = groupedEmbeddings.text || [];
627
+ const imageEmbeddings = groupedEmbeddings.image || [];
628
+ console.log(`Grouped: ${textEmbeddings.length} text, ${imageEmbeddings.length} image vectors`);
629
+ // Use grouped storage method if available, fallback to regular method
630
+ if (this.indexManager.addGroupedEmbeddings) {
631
+ await this.indexManager.addGroupedEmbeddings(textEmbeddings, imageEmbeddings);
632
+ }
633
+ else {
634
+ await this.indexManager.addVectors(embeddings);
635
+ }
608
636
  console.log(`✓ Vector index updated successfully with ${embeddings.length} new vectors`);
609
637
  }
610
638
  catch (error) {
@@ -612,6 +640,45 @@ export class IngestionPipeline {
612
640
  throw error;
613
641
  }
614
642
  }
643
+ /**
644
+ * Filter documents based on ingestion mode to avoid processing incompatible content types
645
+ */
646
+ filterDocumentsByMode(fileResult, mode) {
647
+ if (mode === 'multimodal') {
648
+ // In multimodal mode, keep all documents
649
+ return fileResult;
650
+ }
651
+ // In text mode, filter out image documents
652
+ const filteredDocuments = fileResult.documents.filter(doc => {
653
+ const contentType = doc.metadata?.contentType || 'text';
654
+ const isCompatible = contentType === 'text' ||
655
+ contentType.startsWith('text/') ||
656
+ contentType === 'application/pdf' ||
657
+ contentType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
658
+ if (!isCompatible) {
659
+ console.log(`⚠️ Skipping ${doc.source} (${contentType}) - not compatible with text mode`);
660
+ }
661
+ return isCompatible;
662
+ });
663
+ // Update processing result to reflect filtering
664
+ const filteredProcessingResult = {
665
+ ...fileResult.processingResult,
666
+ skippedFiles: [
667
+ ...(fileResult.processingResult.skippedFiles || []),
668
+ ...fileResult.documents
669
+ .filter(doc => !filteredDocuments.includes(doc))
670
+ .map(doc => ({
671
+ path: doc.source,
672
+ reason: `Content type not compatible with ${mode} mode`
673
+ }))
674
+ ]
675
+ };
676
+ return {
677
+ documents: filteredDocuments,
678
+ discoveryResult: fileResult.discoveryResult,
679
+ processingResult: filteredProcessingResult
680
+ };
681
+ }
615
682
  /**
616
683
  * Converts MIME type to simple content type for embedding function
617
684
  * @param mimeType - MIME type string (e.g., 'text/plain', 'image/jpeg')
@@ -105,7 +105,7 @@ export class ModelValidator {
105
105
  }
106
106
  // Fallback: try to detect from package.json import
107
107
  try {
108
- const packageInfo = await import('@huggingface/transformers/package.json');
108
+ const packageInfo = await import('@huggingface/transformers/package.json' + '');
109
109
  if (packageInfo.version) {
110
110
  this.currentTransformersVersion = packageInfo.version;
111
111
  return packageInfo.version;
@@ -194,7 +194,7 @@ export class TextDerivedRerankingStrategy {
194
194
  catch (error) {
195
195
  console.warn(`Failed to generate description for image ${imagePath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
196
196
  // Fallback to filename-based description
197
- const filename = imagePath.split('/').pop() || imagePath.split('\\').pop() || imagePath;
197
+ const filename = imagePath.split('/').pop() || imagePath;
198
198
  return `Image file: ${filename}`;
199
199
  }
200
200
  }
@@ -211,17 +211,16 @@ export class TextDerivedRerankingStrategy {
211
211
  // Step 1: Convert images to text descriptions
212
212
  const processedResults = await Promise.all(results.map(async (result) => {
213
213
  if (result.contentType === 'image') {
214
- // Generate text description for image using the file path from document.source
215
- const description = await this.generateImageDescription(result.document.source);
214
+ // Generate text description for image
215
+ const description = await this.generateImageDescription(result.content);
216
216
  return {
217
217
  ...result,
218
218
  content: description,
219
- contentType: 'text', // Change to 'text' so cross-encoder will process it
220
219
  originalContent: result.content,
221
220
  originalContentType: result.contentType,
222
221
  metadata: {
223
222
  ...result.metadata,
224
- originalImagePath: result.document.source,
223
+ originalImagePath: result.content,
225
224
  generatedDescription: description
226
225
  }
227
226
  };
@@ -139,7 +139,8 @@ export class SearchEngine {
139
139
  const searchStartTime = performance.now();
140
140
  let searchResult;
141
141
  try {
142
- searchResult = this.indexManager.search(queryVector, topK);
142
+ const contentType = options.contentType;
143
+ searchResult = this.indexManager.search(queryVector, topK, contentType);
143
144
  }
144
145
  catch (error) {
145
146
  if (error instanceof Error && error.message.includes('No embedding ID found for hash')) {
@@ -49,7 +49,7 @@ export interface RerankingInterface {
49
49
  export interface SearchOptions {
50
50
  top_k?: number;
51
51
  rerank?: boolean;
52
- contentType?: string;
52
+ contentType?: 'text' | 'image' | 'combined';
53
53
  }
54
54
  export interface Chunk {
55
55
  text: string;
@@ -64,5 +64,9 @@ export declare class VectorIndex {
64
64
  * Resize index to accommodate more vectors
65
65
  */
66
66
  resizeIndex(newMaxElements: number): void;
67
+ /**
68
+ * Get index options (for external access to configuration)
69
+ */
70
+ getOptions(): VectorIndexOptions;
67
71
  }
68
72
  //# sourceMappingURL=vector-index.d.ts.map
@@ -81,7 +81,8 @@ export class VectorIndex {
81
81
  originalConsoleError.apply(console, args);
82
82
  };
83
83
  try {
84
- const { loadHnswlib } = await import('hnswlib-wasm/dist/hnswlib.js');
84
+ const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
85
+ const { loadHnswlib } = hnswlibModule;
85
86
  this.hnswlib = await loadHnswlib();
86
87
  }
87
88
  finally {
@@ -143,7 +144,8 @@ export class VectorIndex {
143
144
  originalConsoleError.apply(console, args);
144
145
  };
145
146
  try {
146
- const { loadHnswlib } = await import('hnswlib-wasm/dist/hnswlib.js');
147
+ const hnswlibModule = await import('hnswlib-wasm/dist/hnswlib.js');
148
+ const { loadHnswlib } = hnswlibModule;
147
149
  this.hnswlib = await loadHnswlib();
148
150
  }
149
151
  finally {
@@ -321,5 +323,11 @@ export class VectorIndex {
321
323
  throw new Error(`Failed to resize index: ${error}`);
322
324
  }
323
325
  }
326
+ /**
327
+ * Get index options (for external access to configuration)
328
+ */
329
+ getOptions() {
330
+ return { ...this.options };
331
+ }
324
332
  }
325
333
  //# sourceMappingURL=vector-index.js.map
@@ -8,6 +8,8 @@ export interface FileProcessorOptions {
8
8
  recursive?: boolean;
9
9
  /** Maximum file size in bytes (default: 10MB) */
10
10
  maxFileSize?: number;
11
+ /** Processing mode to filter compatible files */
12
+ mode?: 'text' | 'multimodal';
11
13
  }
12
14
  /**
13
15
  * Default options for file processing