rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Standalone search script for direct node execution
4
+ * Usage: node search.js <query> [--top-k <number>] [--rerank|--no-rerank]
5
+ */
6
+ import { runSearch } from './cli/search.js';
7
+ import { EXIT_CODES, ConfigurationError } from './core/config.js';
8
+ function parseArgs() {
9
+ const args = process.argv.slice(2);
10
+ if (args.length === 0) {
11
+ console.error('RAG-lite TS Document Search');
12
+ console.error('');
13
+ console.error('Usage: node search.js <query> [options]');
14
+ console.error('');
15
+ console.error('Arguments:');
16
+ console.error(' <query> Search query (wrap in quotes if it contains spaces)');
17
+ console.error('');
18
+ console.error('Options:');
19
+ console.error(' --top-k <number> Number of results to return (default: 10, max: 100)');
20
+ console.error(' --rerank Enable reranking for better results');
21
+ console.error(' --no-rerank Disable reranking');
22
+ console.error('');
23
+ console.error('Examples:');
24
+ console.error(' node search.js "machine learning"');
25
+ console.error(' node search.js "API documentation" --top-k 10');
26
+ console.error(' node search.js "tutorial" --rerank');
27
+ console.error(' node search.js "how to install" --top-k 20 --rerank');
28
+ console.error('');
29
+ console.error('Note: Make sure you have ingested documents first using:');
30
+ console.error(' node indexer.js <path>');
31
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
32
+ }
33
+ const queryParts = [];
34
+ const options = {};
35
+ for (let i = 0; i < args.length; i++) {
36
+ const arg = args[i];
37
+ if (arg === '--top-k') {
38
+ const nextArg = args[i + 1];
39
+ if (!nextArg) {
40
+ console.error('Error: --top-k requires a numeric value');
41
+ console.error('');
42
+ console.error('Examples:');
43
+ console.error(' --top-k 5 # Return 5 results');
44
+ console.error(' --top-k 20 # Return 20 results');
45
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
46
+ }
47
+ const topK = parseInt(nextArg, 10);
48
+ if (isNaN(topK) || topK <= 0) {
49
+ console.error('Error: --top-k must be a positive number');
50
+ console.error('');
51
+ console.error('Examples:');
52
+ console.error(' --top-k 5 # Return 5 results');
53
+ console.error(' --top-k 20 # Return 20 results');
54
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
55
+ }
56
+ if (topK > 100) {
57
+ console.warn(`Warning: Large --top-k value (${topK}) may impact performance. Consider using a smaller value.`);
58
+ }
59
+ options['top-k'] = topK;
60
+ i++; // Skip next argument
61
+ }
62
+ else if (arg === '--rerank') {
63
+ options.rerank = true;
64
+ }
65
+ else if (arg === '--no-rerank') {
66
+ options.rerank = false;
67
+ }
68
+ else if (arg === '--help' || arg === '-h') {
69
+ // Re-show help and exit
70
+ parseArgs();
71
+ }
72
+ else if (arg.startsWith('--')) {
73
+ console.error(`Error: Unknown option '${arg}'`);
74
+ console.error('');
75
+ console.error('Available options:');
76
+ console.error(' --top-k <number> Number of results to return');
77
+ console.error(' --rerank Enable reranking');
78
+ console.error(' --no-rerank Disable reranking');
79
+ console.error(' --help, -h Show this help');
80
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
81
+ }
82
+ else {
83
+ queryParts.push(arg);
84
+ }
85
+ }
86
+ const query = queryParts.join(' ');
87
+ if (!query.trim()) {
88
+ console.error('Error: Query cannot be empty');
89
+ console.error('');
90
+ console.error('Please provide a search query:');
91
+ console.error(' node search.js "your search terms"');
92
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
93
+ }
94
+ if (query.trim().length > 500) {
95
+ console.error('Error: Query is too long (maximum 500 characters)');
96
+ console.error('');
97
+ console.error('Please use a shorter, more specific query.');
98
+ process.exit(EXIT_CODES.INVALID_ARGUMENTS);
99
+ }
100
+ return { query, options };
101
+ }
102
+ async function main() {
103
+ const { query, options } = parseArgs();
104
+ await runSearch(query, options);
105
+ }
106
+ main().catch((error) => {
107
+ if (error instanceof ConfigurationError) {
108
+ console.error('Configuration Error:');
109
+ console.error(error.message);
110
+ process.exit(error.exitCode);
111
+ }
112
+ else {
113
+ console.error('Fatal Error:', error instanceof Error ? error.message : String(error));
114
+ process.exit(EXIT_CODES.GENERAL_ERROR);
115
+ }
116
+ });
117
+ //# sourceMappingURL=search-standalone.js.map
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Public API SearchEngine - Simple constructor with Chameleon Architecture
3
+ *
4
+ * This class provides a clean, simple API that automatically adapts to the mode
5
+ * (text or multimodal) stored in the database during ingestion. The system detects
6
+ * the mode and creates the appropriate embedder and reranker without user intervention.
7
+ *
8
+ * Chameleon Architecture Features:
9
+ * - Automatic mode detection from database configuration
10
+ * - Seamless switching between text and multimodal modes
11
+ * - Appropriate embedder selection (sentence-transformer or CLIP)
12
+ * - Mode-specific reranking strategies
13
+ *
14
+ * @example
15
+ * ```typescript
16
+ * // Simple usage - mode automatically detected from database
17
+ * const search = new SearchEngine('./index.bin', './db.sqlite');
18
+ * const results = await search.search('query');
19
+ *
20
+ * // Works for both text and multimodal databases
21
+ * // Text mode: uses sentence-transformer embeddings
22
+ * // Multimodal mode: uses CLIP embeddings for cross-modal search
23
+ *
24
+ * // With options (advanced)
25
+ * const search = new SearchEngine('./index.bin', './db.sqlite', {
26
+ * enableReranking: true
27
+ * });
28
+ * ```
29
+ */
30
+ import type { SearchResult, SearchOptions, EmbedFunction, RerankFunction } from './core/types.js';
31
+ export interface SearchEngineOptions {
32
+ /** Embedding model name override */
33
+ embeddingModel?: string;
34
+ /** Embedding batch size override */
35
+ batchSize?: number;
36
+ /** Reranking model name override */
37
+ rerankingModel?: string;
38
+ /** Whether to enable reranking (default: true) */
39
+ enableReranking?: boolean;
40
+ /** Top-k results to return (default: from config) */
41
+ topK?: number;
42
+ /** Custom embedding function (advanced usage) */
43
+ embedFn?: EmbedFunction;
44
+ /** Custom reranking function (advanced usage) */
45
+ rerankFn?: RerankFunction;
46
+ }
47
+ export declare class SearchEngine {
48
+ private indexPath;
49
+ private dbPath;
50
+ private options;
51
+ private coreEngine;
52
+ private initPromise;
53
+ constructor(indexPath: string, dbPath: string, options?: SearchEngineOptions);
54
+ /**
55
+ * Initialize the search engine using polymorphic factory or direct injection
56
+ *
57
+ * Chameleon Architecture Implementation:
58
+ * - Automatically detects mode from database (text or multimodal)
59
+ * - Creates appropriate embedder based on detected mode
60
+ * - Applies mode-specific reranking strategies
61
+ * - Provides seamless polymorphic behavior
62
+ */
63
+ private initialize;
64
+ /**
65
+ * Perform semantic search
66
+ */
67
+ search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
68
+ /**
69
+ * Retrieve content by ID in the specified format
70
+ * @param contentId - Content ID to retrieve
71
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
72
+ * @returns Promise that resolves to content in requested format
73
+ */
74
+ getContent(contentId: string, format?: 'file' | 'base64'): Promise<string>;
75
+ /**
76
+ * Retrieve multiple content items efficiently in batch
77
+ * @param contentIds - Array of content IDs to retrieve
78
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
79
+ * @returns Promise that resolves to array of content in requested format
80
+ */
81
+ getContentBatch(contentIds: string[], format?: 'file' | 'base64'): Promise<string[]>;
82
+ /**
83
+ * Retrieve content metadata for result enhancement
84
+ * @param contentId - Content ID to get metadata for
85
+ * @returns Promise that resolves to content metadata
86
+ */
87
+ getContentMetadata(contentId: string): Promise<import('./core/content-resolver.js').ContentMetadata>;
88
+ /**
89
+ * Verify that content exists and is accessible
90
+ * @param contentId - Content ID to verify
91
+ * @returns Promise that resolves to true if content exists, false otherwise
92
+ */
93
+ verifyContentExists(contentId: string): Promise<boolean>;
94
+ /**
95
+ * Clean up resources
96
+ */
97
+ cleanup(): Promise<void>;
98
+ }
99
+ //# sourceMappingURL=search.d.ts.map
@@ -0,0 +1,177 @@
1
+ /**
2
+ * Public API SearchEngine - Simple constructor with Chameleon Architecture
3
+ *
4
+ * This class provides a clean, simple API that automatically adapts to the mode
5
+ * (text or multimodal) stored in the database during ingestion. The system detects
6
+ * the mode and creates the appropriate embedder and reranker without user intervention.
7
+ *
8
+ * Chameleon Architecture Features:
9
+ * - Automatic mode detection from database configuration
10
+ * - Seamless switching between text and multimodal modes
11
+ * - Appropriate embedder selection (sentence-transformer or CLIP)
12
+ * - Mode-specific reranking strategies
13
+ *
14
+ * @example
15
+ * ```typescript
16
+ * // Simple usage - mode automatically detected from database
17
+ * const search = new SearchEngine('./index.bin', './db.sqlite');
18
+ * const results = await search.search('query');
19
+ *
20
+ * // Works for both text and multimodal databases
21
+ * // Text mode: uses sentence-transformer embeddings
22
+ * // Multimodal mode: uses CLIP embeddings for cross-modal search
23
+ *
24
+ * // With options (advanced)
25
+ * const search = new SearchEngine('./index.bin', './db.sqlite', {
26
+ * enableReranking: true
27
+ * });
28
+ * ```
29
+ */
30
+ import { SearchEngine as CoreSearchEngine } from './core/search.js';
31
+ export class SearchEngine {
32
+ indexPath;
33
+ dbPath;
34
+ options;
35
+ coreEngine = null;
36
+ initPromise = null;
37
+ constructor(indexPath, dbPath, options = {}) {
38
+ this.indexPath = indexPath;
39
+ this.dbPath = dbPath;
40
+ this.options = options;
41
+ // Validate required parameters
42
+ if (!indexPath || typeof indexPath !== 'string' || indexPath.trim() === '') {
43
+ throw new Error('Both indexPath and dbPath are required.\n' +
44
+ 'Example: const search = new SearchEngine("./index.bin", "./db.sqlite");\n' +
45
+ 'Or use: const search = await SearchFactory.create("./index.bin", "./db.sqlite");');
46
+ }
47
+ if (!dbPath || typeof dbPath !== 'string' || dbPath.trim() === '') {
48
+ throw new Error('Both indexPath and dbPath are required.\n' +
49
+ 'Example: const search = new SearchEngine("./index.bin", "./db.sqlite");\n' +
50
+ 'Or use: const search = await SearchFactory.create("./index.bin", "./db.sqlite");');
51
+ }
52
+ }
53
+ /**
54
+ * Initialize the search engine using polymorphic factory or direct injection
55
+ *
56
+ * Chameleon Architecture Implementation:
57
+ * - Automatically detects mode from database (text or multimodal)
58
+ * - Creates appropriate embedder based on detected mode
59
+ * - Applies mode-specific reranking strategies
60
+ * - Provides seamless polymorphic behavior
61
+ */
62
+ async initialize() {
63
+ if (this.coreEngine) {
64
+ return; // Already initialized
65
+ }
66
+ if (this.initPromise) {
67
+ return this.initPromise; // Initialization in progress
68
+ }
69
+ this.initPromise = (async () => {
70
+ // If custom functions are provided, use direct dependency injection
71
+ if (this.options.embedFn || this.options.rerankFn) {
72
+ const { IndexManager } = await import('./index-manager.js');
73
+ const { openDatabase } = await import('./core/db.js');
74
+ const { createTextEmbedFunction } = await import('./text/embedder.js');
75
+ const { existsSync } = await import('fs');
76
+ // Validate files exist
77
+ if (!existsSync(this.indexPath)) {
78
+ throw new Error(`Vector index not found at: ${this.indexPath}`);
79
+ }
80
+ if (!existsSync(this.dbPath)) {
81
+ throw new Error(`Database not found at: ${this.dbPath}`);
82
+ }
83
+ // Use custom embedFn or create default
84
+ const embedFn = this.options.embedFn || createTextEmbedFunction(this.options.embeddingModel);
85
+ // Get model defaults for dimensions
86
+ const { getModelDefaults, config } = await import('./core/config.js');
87
+ const modelDefaults = getModelDefaults(this.options.embeddingModel || config.embedding_model);
88
+ // Initialize dependencies
89
+ const db = await openDatabase(this.dbPath);
90
+ const indexManager = new IndexManager(this.indexPath, this.dbPath, modelDefaults.dimensions, this.options.embeddingModel);
91
+ await indexManager.initialize();
92
+ // Create ContentResolver for unified content system
93
+ const { ContentResolver } = await import('./core/content-resolver.js');
94
+ const contentResolver = new ContentResolver(db);
95
+ // Create core engine with dependency injection
96
+ this.coreEngine = new CoreSearchEngine(embedFn, indexManager, db, this.options.rerankFn, contentResolver);
97
+ }
98
+ else {
99
+ // Use core polymorphic factory for automatic mode detection (Chameleon Architecture)
100
+ // This enables SearchEngine to automatically adapt to text or multimodal mode
101
+ // based on the configuration stored in the database during ingestion
102
+ const { SearchFactory } = await import('./factories/search-factory.js');
103
+ this.coreEngine = await SearchFactory.create(this.indexPath, this.dbPath);
104
+ }
105
+ })();
106
+ return this.initPromise;
107
+ }
108
+ /**
109
+ * Perform semantic search
110
+ */
111
+ async search(query, options) {
112
+ await this.initialize();
113
+ if (!this.coreEngine) {
114
+ throw new Error('SearchEngine failed to initialize');
115
+ }
116
+ return this.coreEngine.search(query, options);
117
+ }
118
+ /**
119
+ * Retrieve content by ID in the specified format
120
+ * @param contentId - Content ID to retrieve
121
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
122
+ * @returns Promise that resolves to content in requested format
123
+ */
124
+ async getContent(contentId, format = 'file') {
125
+ await this.initialize();
126
+ if (!this.coreEngine) {
127
+ throw new Error('SearchEngine failed to initialize');
128
+ }
129
+ return this.coreEngine.getContent(contentId, format);
130
+ }
131
+ /**
132
+ * Retrieve multiple content items efficiently in batch
133
+ * @param contentIds - Array of content IDs to retrieve
134
+ * @param format - Format to return ('file' for CLI clients, 'base64' for MCP clients)
135
+ * @returns Promise that resolves to array of content in requested format
136
+ */
137
+ async getContentBatch(contentIds, format = 'file') {
138
+ await this.initialize();
139
+ if (!this.coreEngine) {
140
+ throw new Error('SearchEngine failed to initialize');
141
+ }
142
+ return this.coreEngine.getContentBatch(contentIds, format);
143
+ }
144
+ /**
145
+ * Retrieve content metadata for result enhancement
146
+ * @param contentId - Content ID to get metadata for
147
+ * @returns Promise that resolves to content metadata
148
+ */
149
+ async getContentMetadata(contentId) {
150
+ await this.initialize();
151
+ if (!this.coreEngine) {
152
+ throw new Error('SearchEngine failed to initialize');
153
+ }
154
+ return this.coreEngine.getContentMetadata(contentId);
155
+ }
156
+ /**
157
+ * Verify that content exists and is accessible
158
+ * @param contentId - Content ID to verify
159
+ * @returns Promise that resolves to true if content exists, false otherwise
160
+ */
161
+ async verifyContentExists(contentId) {
162
+ await this.initialize();
163
+ if (!this.coreEngine) {
164
+ throw new Error('SearchEngine failed to initialize');
165
+ }
166
+ return this.coreEngine.verifyContentExists(contentId);
167
+ }
168
+ /**
169
+ * Clean up resources
170
+ */
171
+ async cleanup() {
172
+ if (this.coreEngine) {
173
+ await this.coreEngine.cleanup();
174
+ }
175
+ }
176
+ }
177
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Test utilities for multi-model support
3
+ * Provides common configurations and helpers for testing with different embedding models
4
+ */
5
+ export interface TestModel {
6
+ name: string;
7
+ dimensions: number;
8
+ chunkSize: number;
9
+ batchSize: number;
10
+ }
11
+ export declare const TEST_MODELS: TestModel[];
12
+ /**
13
+ * Retrieve model configuration by name
14
+ * @param modelName - The name of the model to retrieve
15
+ * @returns Model configuration object or undefined if not found
16
+ */
17
+ export declare function getTestModel(modelName: string): TestModel | undefined;
18
+ //# sourceMappingURL=test-utils.d.ts.map
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Test utilities for multi-model support
3
+ * Provides common configurations and helpers for testing with different embedding models
4
+ */
5
+ export const TEST_MODELS = [
6
+ {
7
+ name: 'sentence-transformers/all-MiniLM-L6-v2',
8
+ dimensions: 384,
9
+ chunkSize: 250,
10
+ batchSize: 16
11
+ },
12
+ {
13
+ name: 'Xenova/all-mpnet-base-v2',
14
+ dimensions: 768,
15
+ chunkSize: 400,
16
+ batchSize: 8
17
+ }
18
+ ];
19
+ /**
20
+ * Retrieve model configuration by name
21
+ * @param modelName - The name of the model to retrieve
22
+ * @returns Model configuration object or undefined if not found
23
+ */
24
+ export function getTestModel(modelName) {
25
+ return TEST_MODELS.find(m => m.name === modelName);
26
+ }
27
+ //# sourceMappingURL=test-utils.js.map
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Text-specific chunking implementation
3
+ * Implements the ChunkingStrategy interface for text content
4
+ */
5
+ import '../dom-polyfills.js';
6
+ import { ChunkingStrategy, GenericDocument, GenericChunk, ChunkConfig } from '../core/chunker.js';
7
+ /**
8
+ * Document interface for text chunking
9
+ */
10
+ export interface Document {
11
+ source: string;
12
+ title: string;
13
+ content: string;
14
+ metadata?: Record<string, any>;
15
+ }
16
+ export interface Chunk {
17
+ text: string;
18
+ chunkIndex: number;
19
+ tokenCount: number;
20
+ }
21
+ /**
22
+ * Text chunking strategy implementation
23
+ */
24
+ export declare class TextChunkingStrategy implements ChunkingStrategy {
25
+ appliesTo(contentType: string): boolean;
26
+ chunk(document: GenericDocument, config: ChunkConfig): Promise<GenericChunk[]>;
27
+ }
28
+ /**
29
+ * Text document chunking function
30
+ * Converts between text-specific and generic interfaces
31
+ */
32
+ export declare function chunkDocument(document: Document, config?: ChunkConfig): Promise<Chunk[]>;
33
+ //# sourceMappingURL=chunker.d.ts.map