rag-lite-ts 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. package/dist/{core → cjs/core}/model-validator.js +1 -1
  2. package/dist/{core → cjs/core}/vector-index.js +4 -2
  3. package/dist/esm/api-errors.d.ts +90 -0
  4. package/dist/esm/api-errors.js +320 -0
  5. package/dist/esm/cli/indexer.d.ts +11 -0
  6. package/dist/esm/cli/indexer.js +471 -0
  7. package/dist/esm/cli/search.d.ts +7 -0
  8. package/dist/esm/cli/search.js +332 -0
  9. package/dist/esm/cli.d.ts +3 -0
  10. package/dist/esm/cli.js +529 -0
  11. package/dist/esm/config.d.ts +51 -0
  12. package/dist/esm/config.js +79 -0
  13. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  14. package/dist/esm/core/abstract-embedder.js +264 -0
  15. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  16. package/dist/esm/core/actionable-error-messages.js +397 -0
  17. package/dist/esm/core/adapters.d.ts +93 -0
  18. package/dist/esm/core/adapters.js +139 -0
  19. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  20. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  21. package/dist/esm/core/binary-index-format.d.ts +78 -0
  22. package/dist/esm/core/binary-index-format.js +291 -0
  23. package/dist/esm/core/chunker.d.ts +119 -0
  24. package/dist/esm/core/chunker.js +73 -0
  25. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  26. package/dist/esm/core/cli-database-utils.js +239 -0
  27. package/dist/esm/core/config.d.ts +102 -0
  28. package/dist/esm/core/config.js +247 -0
  29. package/dist/esm/core/content-errors.d.ts +111 -0
  30. package/dist/esm/core/content-errors.js +362 -0
  31. package/dist/esm/core/content-manager.d.ts +335 -0
  32. package/dist/esm/core/content-manager.js +1476 -0
  33. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  34. package/dist/esm/core/content-performance-optimizer.js +516 -0
  35. package/dist/esm/core/content-resolver.d.ts +104 -0
  36. package/dist/esm/core/content-resolver.js +285 -0
  37. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  38. package/dist/esm/core/cross-modal-search.js +342 -0
  39. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  40. package/dist/esm/core/database-connection-manager.js +310 -0
  41. package/dist/esm/core/db.d.ts +213 -0
  42. package/dist/esm/core/db.js +895 -0
  43. package/dist/esm/core/embedder-factory.d.ts +154 -0
  44. package/dist/esm/core/embedder-factory.js +311 -0
  45. package/dist/esm/core/error-handler.d.ts +112 -0
  46. package/dist/esm/core/error-handler.js +239 -0
  47. package/dist/esm/core/index.d.ts +59 -0
  48. package/dist/esm/core/index.js +69 -0
  49. package/dist/esm/core/ingestion.d.ts +202 -0
  50. package/dist/esm/core/ingestion.js +901 -0
  51. package/dist/esm/core/interfaces.d.ts +408 -0
  52. package/dist/esm/core/interfaces.js +106 -0
  53. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  54. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  55. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  56. package/dist/esm/core/mode-detection-service.js +565 -0
  57. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  58. package/dist/esm/core/mode-model-validator.js +203 -0
  59. package/dist/esm/core/model-registry.d.ts +116 -0
  60. package/dist/esm/core/model-registry.js +411 -0
  61. package/dist/esm/core/model-validator.d.ts +217 -0
  62. package/dist/esm/core/model-validator.js +782 -0
  63. package/dist/esm/core/path-manager.d.ts +47 -0
  64. package/dist/esm/core/path-manager.js +71 -0
  65. package/dist/esm/core/raglite-paths.d.ts +121 -0
  66. package/dist/esm/core/raglite-paths.js +145 -0
  67. package/dist/esm/core/reranking-config.d.ts +42 -0
  68. package/dist/esm/core/reranking-config.js +147 -0
  69. package/dist/esm/core/reranking-factory.d.ts +92 -0
  70. package/dist/esm/core/reranking-factory.js +410 -0
  71. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  72. package/dist/esm/core/reranking-strategies.js +650 -0
  73. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  74. package/dist/esm/core/resource-cleanup.js +371 -0
  75. package/dist/esm/core/resource-manager.d.ts +212 -0
  76. package/dist/esm/core/resource-manager.js +564 -0
  77. package/dist/esm/core/search-pipeline.d.ts +111 -0
  78. package/dist/esm/core/search-pipeline.js +287 -0
  79. package/dist/esm/core/search.d.ts +141 -0
  80. package/dist/esm/core/search.js +320 -0
  81. package/dist/esm/core/streaming-operations.d.ts +145 -0
  82. package/dist/esm/core/streaming-operations.js +409 -0
  83. package/dist/esm/core/types.d.ts +66 -0
  84. package/dist/esm/core/types.js +6 -0
  85. package/dist/esm/core/universal-embedder.d.ts +177 -0
  86. package/dist/esm/core/universal-embedder.js +139 -0
  87. package/dist/esm/core/validation-messages.d.ts +99 -0
  88. package/dist/esm/core/validation-messages.js +334 -0
  89. package/dist/esm/core/vector-index.d.ts +72 -0
  90. package/dist/esm/core/vector-index.js +333 -0
  91. package/dist/esm/dom-polyfills.d.ts +6 -0
  92. package/dist/esm/dom-polyfills.js +37 -0
  93. package/dist/esm/factories/index.d.ts +27 -0
  94. package/dist/esm/factories/index.js +29 -0
  95. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  96. package/dist/esm/factories/ingestion-factory.js +477 -0
  97. package/dist/esm/factories/search-factory.d.ts +154 -0
  98. package/dist/esm/factories/search-factory.js +344 -0
  99. package/dist/esm/file-processor.d.ts +147 -0
  100. package/dist/esm/file-processor.js +963 -0
  101. package/dist/esm/index-manager.d.ts +116 -0
  102. package/dist/esm/index-manager.js +598 -0
  103. package/dist/esm/index.d.ts +75 -0
  104. package/dist/esm/index.js +110 -0
  105. package/dist/esm/indexer.d.ts +7 -0
  106. package/dist/esm/indexer.js +54 -0
  107. package/dist/esm/ingestion.d.ts +63 -0
  108. package/dist/esm/ingestion.js +124 -0
  109. package/dist/esm/mcp-server.d.ts +46 -0
  110. package/dist/esm/mcp-server.js +1820 -0
  111. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  112. package/dist/esm/multimodal/clip-embedder.js +996 -0
  113. package/dist/esm/multimodal/index.d.ts +6 -0
  114. package/dist/esm/multimodal/index.js +6 -0
  115. package/dist/esm/preprocess.d.ts +19 -0
  116. package/dist/esm/preprocess.js +203 -0
  117. package/dist/esm/preprocessors/index.d.ts +17 -0
  118. package/dist/esm/preprocessors/index.js +38 -0
  119. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  120. package/dist/esm/preprocessors/mdx.js +101 -0
  121. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  122. package/dist/esm/preprocessors/mermaid.js +329 -0
  123. package/dist/esm/preprocessors/registry.d.ts +56 -0
  124. package/dist/esm/preprocessors/registry.js +179 -0
  125. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  126. package/dist/esm/run-error-recovery-tests.js +101 -0
  127. package/dist/esm/search-standalone.d.ts +7 -0
  128. package/dist/esm/search-standalone.js +117 -0
  129. package/dist/esm/search.d.ts +99 -0
  130. package/dist/esm/search.js +177 -0
  131. package/dist/esm/test-utils.d.ts +18 -0
  132. package/dist/esm/test-utils.js +27 -0
  133. package/dist/esm/text/chunker.d.ts +33 -0
  134. package/dist/esm/text/chunker.js +279 -0
  135. package/dist/esm/text/embedder.d.ts +111 -0
  136. package/dist/esm/text/embedder.js +386 -0
  137. package/dist/esm/text/index.d.ts +8 -0
  138. package/dist/esm/text/index.js +9 -0
  139. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  140. package/dist/esm/text/preprocessors/index.js +38 -0
  141. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  142. package/dist/esm/text/preprocessors/mdx.js +101 -0
  143. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  144. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  145. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  146. package/dist/esm/text/preprocessors/registry.js +180 -0
  147. package/dist/esm/text/reranker.d.ts +49 -0
  148. package/dist/esm/text/reranker.js +274 -0
  149. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  150. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  151. package/dist/esm/text/tokenizer.d.ts +22 -0
  152. package/dist/esm/text/tokenizer.js +64 -0
  153. package/dist/esm/types.d.ts +83 -0
  154. package/dist/esm/types.js +3 -0
  155. package/dist/esm/utils/vector-math.d.ts +31 -0
  156. package/dist/esm/utils/vector-math.js +70 -0
  157. package/package.json +30 -12
  158. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  159. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  160. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  161. /package/dist/{cli → cjs/cli}/indexer.js +0 -0
  162. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  163. /package/dist/{cli → cjs/cli}/search.js +0 -0
  164. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  165. /package/dist/{cli.js → cjs/cli.js} +0 -0
  166. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  167. /package/dist/{config.js → cjs/config.js} +0 -0
  168. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  169. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  170. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  171. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  172. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  173. /package/dist/{core → cjs/core}/adapters.js +0 -0
  174. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  175. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  176. /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
  177. /package/dist/{core → cjs/core}/binary-index-format.js +0 -0
  178. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  179. /package/dist/{core → cjs/core}/chunker.js +0 -0
  180. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  181. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  182. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/config.js +0 -0
  184. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  186. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  188. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  192. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  194. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  196. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/db.js +0 -0
  198. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  200. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  202. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/index.js +0 -0
  204. /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/ingestion.js +0 -0
  206. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  208. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  210. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  212. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  214. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  216. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  218. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  219. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  220. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  221. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  222. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  223. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  224. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  225. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  226. /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
  227. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  229. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  231. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  233. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/search.js +0 -0
  235. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  237. /package/dist/{core → cjs/core}/types.d.ts +0 -0
  238. /package/dist/{core → cjs/core}/types.js +0 -0
  239. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  240. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  241. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  242. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  243. /package/dist/{core → cjs/core}/vector-index.d.ts +0 -0
  244. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  245. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  246. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  247. /package/dist/{factories → cjs/factories}/index.js +0 -0
  248. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  249. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  250. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  251. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  252. /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
  253. /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
  254. /package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +0 -0
  255. /package/dist/{index-manager.js → cjs/index-manager.js} +0 -0
  256. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  257. /package/dist/{index.js → cjs/index.js} +0 -0
  258. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  259. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  260. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  261. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  262. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  263. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  264. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  265. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  268. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  269. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  270. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  278. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  279. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  280. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  281. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  282. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  283. /package/dist/{search.js → cjs/search.js} +0 -0
  284. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  285. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  286. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  287. /package/dist/{text → cjs/text}/chunker.js +0 -0
  288. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  289. /package/dist/{text → cjs/text}/embedder.js +0 -0
  290. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  291. /package/dist/{text → cjs/text}/index.js +0 -0
  292. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  300. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  301. /package/dist/{text → cjs/text}/reranker.js +0 -0
  302. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  304. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  306. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  307. /package/dist/{types.js → cjs/types.js} +0 -0
  308. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,650 @@
1
+ /**
2
+ * Cross-Encoder Reranking Strategy for Text Mode
3
+ *
4
+ * Adapts the existing CrossEncoderReranker to work with the new RerankingStrategy
5
+ * interface defined in the Chameleon Multimodal Architecture.
6
+ */
7
+ import { CrossEncoderReranker } from '../text/reranker.js';
8
+ /**
9
+ * Cross-Encoder Reranking Strategy Implementation
10
+ *
11
+ * Uses the existing CrossEncoderReranker from the text module to provide
12
+ * reranking functionality for text mode in the new architecture.
13
+ */
14
+ export class CrossEncoderRerankingStrategy {
15
+ name = 'cross-encoder';
16
+ supportedContentTypes = ['text'];
17
+ isEnabled = true;
18
+ reranker;
19
+ modelName;
20
+ initialized = false;
21
+ constructor(modelName) {
22
+ this.modelName = modelName;
23
+ this.reranker = new CrossEncoderReranker();
24
+ // Set custom model name if provided
25
+ if (modelName) {
26
+ this.reranker.modelName = modelName;
27
+ }
28
+ }
29
+ /**
30
+ * Initialize the reranker if not already done
31
+ */
32
+ async ensureInitialized() {
33
+ if (!this.initialized) {
34
+ try {
35
+ await this.reranker.loadModel();
36
+ this.initialized = true;
37
+ this.isEnabled = this.reranker.isLoaded();
38
+ if (!this.isEnabled) {
39
+ console.warn('Cross-encoder reranker failed to load, strategy disabled');
40
+ }
41
+ }
42
+ catch (error) {
43
+ console.warn(`Cross-encoder reranker initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
44
+ this.isEnabled = false;
45
+ }
46
+ }
47
+ }
48
+ /**
49
+ * Rerank search results using cross-encoder model
50
+ */
51
+ rerank = async (query, results, contentType) => {
52
+ // If strategy is disabled, return results unchanged immediately
53
+ if (!this.isEnabled) {
54
+ return results;
55
+ }
56
+ // Validate content type
57
+ if (contentType && !this.supportedContentTypes.includes(contentType)) {
58
+ throw new Error(`Cross-encoder strategy does not support content type '${contentType}'. ` +
59
+ `Supported types: ${this.supportedContentTypes.join(', ')}`);
60
+ }
61
+ // Ensure reranker is initialized
62
+ await this.ensureInitialized();
63
+ // If reranker failed to initialize, return results unchanged
64
+ if (!this.isEnabled) {
65
+ console.warn('Cross-encoder reranker not enabled, returning results unchanged');
66
+ return results;
67
+ }
68
+ // Filter to only text content if mixed content types are present
69
+ const textResults = results.filter(result => !result.contentType || result.contentType === 'text');
70
+ if (textResults.length === 0) {
71
+ return results; // No text results to rerank
72
+ }
73
+ if (textResults.length !== results.length) {
74
+ console.warn(`Cross-encoder reranker filtering ${results.length - textResults.length} ` +
75
+ `non-text results from reranking`);
76
+ }
77
+ try {
78
+ // Use the existing reranker implementation
79
+ const rerankedTextResults = await this.reranker.rerank(query, textResults);
80
+ // If we filtered results, we need to merge back non-text results
81
+ if (textResults.length !== results.length) {
82
+ const nonTextResults = results.filter(result => result.contentType && result.contentType !== 'text');
83
+ // Append non-text results at the end with their original scores
84
+ return [...rerankedTextResults, ...nonTextResults];
85
+ }
86
+ return rerankedTextResults;
87
+ }
88
+ catch (error) {
89
+ console.warn(`Cross-encoder reranking failed: ${error instanceof Error ? error.message : 'Unknown error'}. ` +
90
+ `Returning original results.`);
91
+ return results;
92
+ }
93
+ };
94
+ /**
95
+ * Configure the reranking strategy
96
+ */
97
+ configure(config) {
98
+ if (config.modelName && typeof config.modelName === 'string') {
99
+ this.modelName = config.modelName;
100
+ // Reset initialization to use new model
101
+ this.initialized = false;
102
+ this.reranker = new CrossEncoderReranker();
103
+ this.reranker.modelName = config.modelName;
104
+ }
105
+ if (config.enabled !== undefined) {
106
+ this.isEnabled = Boolean(config.enabled);
107
+ }
108
+ }
109
+ /**
110
+ * Get metadata about this reranking strategy
111
+ */
112
+ getMetadata() {
113
+ return {
114
+ description: 'Cross-encoder reranking using transformer models for improved text relevance scoring',
115
+ requiredModels: [
116
+ 'Xenova/ms-marco-MiniLM-L-6-v2',
117
+ 'cross-encoder/ms-marco-MiniLM-L-6-v2',
118
+ 'cross-encoder/ms-marco-MiniLM-L-2-v2'
119
+ ],
120
+ configOptions: {
121
+ modelName: {
122
+ type: 'string',
123
+ description: 'Cross-encoder model name to use for reranking',
124
+ default: 'Xenova/ms-marco-MiniLM-L-6-v2'
125
+ },
126
+ enabled: {
127
+ type: 'boolean',
128
+ description: 'Enable or disable cross-encoder reranking',
129
+ default: true
130
+ }
131
+ }
132
+ };
133
+ }
134
+ /**
135
+ * Check if the strategy is ready to use
136
+ */
137
+ async isReady() {
138
+ await this.ensureInitialized();
139
+ return this.isEnabled && this.reranker.isLoaded();
140
+ }
141
+ /**
142
+ * Get the current model name being used
143
+ */
144
+ getModelName() {
145
+ return this.reranker.getModelName();
146
+ }
147
+ /**
148
+ * Clean up resources
149
+ */
150
+ async cleanup() {
151
+ // The existing CrossEncoderReranker doesn't have explicit cleanup
152
+ // but we can reset the initialization state
153
+ this.initialized = false;
154
+ }
155
+ }
156
+ /**
157
+ * Factory function to create a cross-encoder reranking strategy
158
+ *
159
+ * This provides a simple way to create the strategy without complex factory patterns,
160
+ * following the design principle of using simple functions over complex factories.
161
+ */
162
+ export function createCrossEncoderStrategy(modelName) {
163
+ return new CrossEncoderRerankingStrategy(modelName);
164
+ }
165
+ /**
166
+ * Text-Derived Reranking Strategy Implementation
167
+ *
168
+ * Converts images to text descriptions using image-to-text models, then applies
169
+ * cross-encoder reranking to the text descriptions. This enables multimodal
170
+ * content to be reranked using text-based reranking models.
171
+ */
172
+ export class TextDerivedRerankingStrategy {
173
+ name = 'text-derived';
174
+ supportedContentTypes = ['text', 'image'];
175
+ isEnabled = true;
176
+ crossEncoderReranker;
177
+ constructor(imageToTextModelName, crossEncoderModelName) {
178
+ // Note: imageToTextModelName parameter is kept for backward compatibility
179
+ // but is no longer used since we delegate to file-processor's implementation
180
+ // Create the underlying cross-encoder strategy
181
+ this.crossEncoderReranker = new CrossEncoderRerankingStrategy(crossEncoderModelName);
182
+ }
183
+ /**
184
+ * Generate text description for an image
185
+ * Uses the shared image-to-text functionality from file-processor
186
+ */
187
+ async generateImageDescription(imagePath) {
188
+ try {
189
+ // Use the file-processor's image description function which has proven to work reliably
190
+ const { generateImageDescriptionForFile } = await import('../file-processor.js');
191
+ const result = await generateImageDescriptionForFile(imagePath);
192
+ return result.description;
193
+ }
194
+ catch (error) {
195
+ console.warn(`Failed to generate description for image ${imagePath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
196
+ // Fallback to filename-based description
197
+ const filename = imagePath.split('/').pop() || imagePath;
198
+ return `Image file: ${filename}`;
199
+ }
200
+ }
201
+ /**
202
+ * Rerank search results using text-derived approach
203
+ */
204
+ rerank = async (query, results, contentType) => {
205
+ // Validate content type
206
+ if (contentType && !this.supportedContentTypes.includes(contentType)) {
207
+ throw new Error(`Text-derived strategy does not support content type '${contentType}'. ` +
208
+ `Supported types: ${this.supportedContentTypes.join(', ')}`);
209
+ }
210
+ try {
211
+ // Step 1: Convert images to text descriptions
212
+ const processedResults = await Promise.all(results.map(async (result) => {
213
+ if (result.contentType === 'image') {
214
+ // Generate text description for image
215
+ const description = await this.generateImageDescription(result.content);
216
+ return {
217
+ ...result,
218
+ content: description,
219
+ originalContent: result.content,
220
+ originalContentType: result.contentType,
221
+ metadata: {
222
+ ...result.metadata,
223
+ originalImagePath: result.content,
224
+ generatedDescription: description
225
+ }
226
+ };
227
+ }
228
+ return result;
229
+ }));
230
+ // Step 2: Use cross-encoder reranking on the text descriptions
231
+ const rerankedResults = await this.crossEncoderReranker.rerank(query, processedResults);
232
+ // Step 3: Restore original content for images
233
+ return rerankedResults.map(result => {
234
+ if (result.originalContent && result.originalContentType) {
235
+ return {
236
+ ...result,
237
+ content: result.originalContent,
238
+ contentType: result.originalContentType,
239
+ // Keep the generated description in metadata for reference
240
+ metadata: {
241
+ ...result.metadata,
242
+ generatedDescription: result.content
243
+ }
244
+ };
245
+ }
246
+ return result;
247
+ });
248
+ }
249
+ catch (error) {
250
+ console.warn(`Text-derived reranking failed: ${error instanceof Error ? error.message : 'Unknown error'}. ` +
251
+ `Returning original results.`);
252
+ return results;
253
+ }
254
+ };
255
+ /**
256
+ * Configure the reranking strategy
257
+ */
258
+ configure(config) {
259
+ // Note: imageToTextModel configuration is no longer used
260
+ // since we delegate to file-processor's implementation
261
+ if (config.crossEncoderModel && typeof config.crossEncoderModel === 'string') {
262
+ this.crossEncoderReranker.configure({ modelName: config.crossEncoderModel });
263
+ }
264
+ if (config.enabled !== undefined) {
265
+ this.isEnabled = Boolean(config.enabled);
266
+ }
267
+ }
268
+ /**
269
+ * Get metadata about this reranking strategy
270
+ */
271
+ getMetadata() {
272
+ return {
273
+ description: 'Text-derived reranking that converts images to text descriptions then applies cross-encoder reranking',
274
+ requiredModels: [
275
+ 'Xenova/vit-gpt2-image-captioning', // Image-to-text model (via file-processor)
276
+ 'Xenova/ms-marco-MiniLM-L-6-v2' // Cross-encoder model
277
+ ],
278
+ configOptions: {
279
+ crossEncoderModel: {
280
+ type: 'string',
281
+ description: 'Cross-encoder model name for text reranking',
282
+ default: 'Xenova/ms-marco-MiniLM-L-6-v2'
283
+ },
284
+ enabled: {
285
+ type: 'boolean',
286
+ description: 'Enable or disable text-derived reranking',
287
+ default: true
288
+ }
289
+ }
290
+ };
291
+ }
292
+ /**
293
+ * Check if the strategy is ready to use
294
+ */
295
+ async isReady() {
296
+ const crossEncoderReady = await this.crossEncoderReranker.isReady();
297
+ return this.isEnabled && crossEncoderReady;
298
+ }
299
+ /**
300
+ * Get the current model names being used
301
+ */
302
+ getModelNames() {
303
+ return {
304
+ imageToText: 'Xenova/vit-gpt2-image-captioning', // Fixed model via file-processor
305
+ crossEncoder: this.crossEncoderReranker.getModelName()
306
+ };
307
+ }
308
+ /**
309
+ * Clean up resources
310
+ */
311
+ async cleanup() {
312
+ await this.crossEncoderReranker.cleanup();
313
+ }
314
+ }
315
+ /**
316
+ * Factory function to create a text-derived reranking strategy
317
+ */
318
+ export function createTextDerivedStrategy(imageToTextModelName, crossEncoderModelName) {
319
+ return new TextDerivedRerankingStrategy(imageToTextModelName, crossEncoderModelName);
320
+ }
321
+ /**
322
+ * Create a RerankFunction using the text-derived strategy
323
+ */
324
+ export function createTextDerivedRerankFunction(imageToTextModelName, crossEncoderModelName) {
325
+ const strategy = createTextDerivedStrategy(imageToTextModelName, crossEncoderModelName);
326
+ return strategy.rerank;
327
+ }
328
+ /**
329
+ * Metadata-Based Reranking Strategy Implementation
330
+ *
331
+ * Reranks search results based on filename patterns, metadata, and content type
332
+ * information. This strategy is particularly useful for multimodal content where
333
+ * semantic similarity might not capture all relevant aspects.
334
+ */
335
+ export class MetadataRerankingStrategy {
336
+ name = 'metadata';
337
+ supportedContentTypes = ['text', 'image', 'pdf', 'docx'];
338
+ isEnabled = true;
339
+ config;
340
+ constructor(config) {
341
+ // Default configuration with reasonable weights and boost factors
342
+ const defaultConfig = {
343
+ weights: {
344
+ filename: 0.4,
345
+ contentType: 0.3,
346
+ metadata: 0.3
347
+ },
348
+ boostFactors: {
349
+ diagram: 1.5,
350
+ chart: 1.4,
351
+ graph: 1.4,
352
+ image: 1.2,
353
+ screenshot: 1.3,
354
+ figure: 1.3
355
+ },
356
+ keywordBoosts: {
357
+ // Technical terms
358
+ 'api': 1.2,
359
+ 'architecture': 1.3,
360
+ 'design': 1.2,
361
+ 'implementation': 1.2,
362
+ 'configuration': 1.2,
363
+ 'setup': 1.2,
364
+ 'guide': 1.2,
365
+ 'tutorial': 1.2,
366
+ 'example': 1.1,
367
+ 'demo': 1.1,
368
+ // Visual content indicators
369
+ 'visual': 1.3,
370
+ 'overview': 1.2,
371
+ 'flow': 1.3,
372
+ 'process': 1.2,
373
+ 'workflow': 1.3
374
+ }
375
+ };
376
+ this.config = {
377
+ weights: { ...defaultConfig.weights, ...config?.weights },
378
+ boostFactors: { ...defaultConfig.boostFactors, ...config?.boostFactors },
379
+ keywordBoosts: { ...defaultConfig.keywordBoosts, ...config?.keywordBoosts }
380
+ };
381
+ }
382
+ /**
383
+ * Calculate filename-based score
384
+ */
385
+ calculateFilenameScore(query, filename) {
386
+ const queryLower = query.toLowerCase();
387
+ const filenameLower = filename.toLowerCase();
388
+ let score = 0;
389
+ // Exact filename match gets highest score
390
+ if (filenameLower.includes(queryLower)) {
391
+ score += 1.0;
392
+ }
393
+ // Word-level matching
394
+ const queryWords = queryLower.split(/\s+/).filter(word => word.length > 2);
395
+ const filenameWords = filenameLower.split(/[_\-\s\.]+/).filter(word => word.length > 2);
396
+ for (const queryWord of queryWords) {
397
+ for (const filenameWord of filenameWords) {
398
+ if (filenameWord.includes(queryWord) || queryWord.includes(filenameWord)) {
399
+ score += 0.3;
400
+ }
401
+ }
402
+ }
403
+ // Apply keyword boosts
404
+ for (const [keyword, boost] of Object.entries(this.config.keywordBoosts)) {
405
+ if (filenameLower.includes(keyword)) {
406
+ score *= boost;
407
+ }
408
+ }
409
+ // Apply pattern-based boosts
410
+ for (const [pattern, boost] of Object.entries(this.config.boostFactors)) {
411
+ if (filenameLower.includes(pattern)) {
412
+ score *= boost;
413
+ }
414
+ }
415
+ return Math.min(score, 2.0); // Cap at 2.0 to prevent extreme scores
416
+ }
417
+ /**
418
+ * Calculate content type-based score
419
+ */
420
+ calculateContentTypeScore(query, contentType) {
421
+ const queryLower = query.toLowerCase();
422
+ // Base scores for different content types
423
+ const contentTypeScores = {
424
+ 'image': 0.8,
425
+ 'text': 1.0,
426
+ 'pdf': 0.9,
427
+ 'docx': 0.9
428
+ };
429
+ let score = contentTypeScores[contentType] || 0.5;
430
+ // Boost image content for visual-related queries
431
+ if (contentType === 'image') {
432
+ const visualKeywords = ['diagram', 'chart', 'graph', 'image', 'visual', 'screenshot', 'figure', 'illustration', 'visualization'];
433
+ for (const keyword of visualKeywords) {
434
+ if (queryLower.includes(keyword)) {
435
+ score *= 2.0; // More aggressive boost
436
+ break;
437
+ }
438
+ }
439
+ }
440
+ // Boost document content for text-heavy queries
441
+ if (contentType === 'text' || contentType === 'pdf' || contentType === 'docx') {
442
+ const textKeywords = ['documentation', 'guide', 'tutorial', 'explanation', 'description', 'details'];
443
+ for (const keyword of textKeywords) {
444
+ if (queryLower.includes(keyword)) {
445
+ score *= 1.8; // More aggressive boost
446
+ break;
447
+ }
448
+ }
449
+ }
450
+ return score;
451
+ }
452
+ /**
453
+ * Calculate metadata-based score
454
+ */
455
+ calculateMetadataScore(query, metadata = {}) {
456
+ let score = 0;
457
+ const queryLower = query.toLowerCase();
458
+ // Check various metadata fields
459
+ const metadataFields = ['title', 'description', 'tags', 'category', 'type'];
460
+ for (const field of metadataFields) {
461
+ const value = metadata[field];
462
+ if (typeof value === 'string') {
463
+ const valueLower = value.toLowerCase();
464
+ if (valueLower.includes(queryLower)) {
465
+ score += 0.5;
466
+ }
467
+ // Word-level matching
468
+ const queryWords = queryLower.split(/\s+/).filter(word => word.length > 2);
469
+ for (const word of queryWords) {
470
+ if (valueLower.includes(word)) {
471
+ score += 0.2;
472
+ }
473
+ }
474
+ }
475
+ else if (Array.isArray(value)) {
476
+ // Handle tag arrays
477
+ for (const item of value) {
478
+ if (typeof item === 'string' && item.toLowerCase().includes(queryLower)) {
479
+ score += 0.3;
480
+ }
481
+ }
482
+ }
483
+ }
484
+ // Special handling for image metadata
485
+ if (metadata.dimensions) {
486
+ // Larger images might be more important
487
+ const { width, height } = metadata.dimensions;
488
+ if (width && height && width * height > 500000) { // > 500k pixels
489
+ score += 0.5; // More significant boost for large images
490
+ }
491
+ }
492
+ // File size considerations
493
+ if (metadata.fileSize) {
494
+ // Very small files might be less important
495
+ if (metadata.fileSize < 1000) {
496
+ score -= 0.1;
497
+ }
498
+ }
499
+ return Math.max(score, 0); // Ensure non-negative
500
+ }
501
+ /**
502
+ * Rerank search results using metadata-based scoring
503
+ */
504
+ rerank = async (query, results, contentType) => {
505
+ // If strategy is disabled, return results unchanged
506
+ if (!this.isEnabled) {
507
+ return results;
508
+ }
509
+ // Validate content type if specified
510
+ if (contentType && !this.supportedContentTypes.includes(contentType)) {
511
+ console.warn(`Metadata strategy does not support content type '${contentType}'. ` +
512
+ `Supported types: ${this.supportedContentTypes.join(', ')}. Proceeding anyway.`);
513
+ }
514
+ try {
515
+ // Calculate metadata scores for each result
516
+ const scoredResults = results.map(result => {
517
+ const filename = result.document.source.split('/').pop() || result.document.source;
518
+ // Calculate individual scores
519
+ const filenameScore = this.calculateFilenameScore(query, filename);
520
+ const contentTypeScore = this.calculateContentTypeScore(query, result.contentType);
521
+ const metadataScore = this.calculateMetadataScore(query, result.metadata);
522
+ // Combine scores using configured weights
523
+ const metadataBoost = (filenameScore * this.config.weights.filename +
524
+ contentTypeScore * this.config.weights.contentType +
525
+ metadataScore * this.config.weights.metadata);
526
+ // Combine with original vector similarity score
527
+ // Use a weighted combination where metadata boost can have significant influence
528
+ const combinedScore = result.score * 0.4 + metadataBoost * 0.6;
529
+ return {
530
+ ...result,
531
+ score: combinedScore,
532
+ metadata: {
533
+ ...result.metadata,
534
+ rerankingScores: {
535
+ original: result.score,
536
+ filename: filenameScore,
537
+ contentType: contentTypeScore,
538
+ metadata: metadataScore,
539
+ combined: combinedScore
540
+ }
541
+ }
542
+ };
543
+ });
544
+ // Sort by combined score (descending)
545
+ scoredResults.sort((a, b) => b.score - a.score);
546
+ return scoredResults;
547
+ }
548
+ catch (error) {
549
+ console.warn(`Metadata reranking failed: ${error instanceof Error ? error.message : 'Unknown error'}. ` +
550
+ `Returning original results.`);
551
+ return results;
552
+ }
553
+ };
554
+ /**
555
+ * Configure the reranking strategy
556
+ */
557
+ configure(config) {
558
+ if (config.weights && typeof config.weights === 'object') {
559
+ this.config.weights = { ...this.config.weights, ...config.weights };
560
+ }
561
+ if (config.boostFactors && typeof config.boostFactors === 'object') {
562
+ this.config.boostFactors = { ...this.config.boostFactors, ...config.boostFactors };
563
+ }
564
+ if (config.keywordBoosts && typeof config.keywordBoosts === 'object') {
565
+ this.config.keywordBoosts = { ...this.config.keywordBoosts, ...config.keywordBoosts };
566
+ }
567
+ if (config.enabled !== undefined) {
568
+ this.isEnabled = Boolean(config.enabled);
569
+ }
570
+ }
571
+ /**
572
+ * Get metadata about this reranking strategy
573
+ */
574
+ getMetadata() {
575
+ return {
576
+ description: 'Metadata-based reranking using filename patterns, content types, and file metadata',
577
+ requiredModels: [], // No models required
578
+ configOptions: {
579
+ weights: {
580
+ type: 'object',
581
+ description: 'Weights for different scoring components',
582
+ default: this.config.weights,
583
+ properties: {
584
+ filename: { type: 'number', min: 0, max: 1 },
585
+ contentType: { type: 'number', min: 0, max: 1 },
586
+ metadata: { type: 'number', min: 0, max: 1 }
587
+ }
588
+ },
589
+ boostFactors: {
590
+ type: 'object',
591
+ description: 'Boost factors for specific file patterns',
592
+ default: this.config.boostFactors
593
+ },
594
+ keywordBoosts: {
595
+ type: 'object',
596
+ description: 'Boost factors for specific keywords in filenames',
597
+ default: this.config.keywordBoosts
598
+ },
599
+ enabled: {
600
+ type: 'boolean',
601
+ description: 'Enable or disable metadata-based reranking',
602
+ default: true
603
+ }
604
+ }
605
+ };
606
+ }
607
+ /**
608
+ * Check if the strategy is ready to use
609
+ */
610
+ async isReady() {
611
+ // Metadata strategy doesn't require model loading, so it's always ready if enabled
612
+ return this.isEnabled;
613
+ }
614
+ /**
615
+ * Get current configuration
616
+ */
617
+ getConfig() {
618
+ return { ...this.config };
619
+ }
620
+ /**
621
+ * Clean up resources (no-op for metadata strategy)
622
+ */
623
+ async cleanup() {
624
+ // No resources to clean up for metadata-based reranking
625
+ }
626
+ }
627
+ /**
628
+ * Factory function to create a metadata reranking strategy
629
+ */
630
+ export function createMetadataStrategy(config) {
631
+ return new MetadataRerankingStrategy(config);
632
+ }
633
+ /**
634
+ * Create a RerankFunction using the metadata strategy
635
+ */
636
+ export function createMetadataRerankFunction(config) {
637
+ const strategy = createMetadataStrategy(config);
638
+ return strategy.rerank;
639
+ }
640
+ /**
641
+ * Create a RerankFunction using the cross-encoder strategy
642
+ *
643
+ * This provides backward compatibility with the existing RerankFunction interface
644
+ * while using the new strategy-based architecture internally.
645
+ */
646
+ export function createCrossEncoderRerankFunction(modelName) {
647
+ const strategy = createCrossEncoderStrategy(modelName);
648
+ return strategy.rerank;
649
+ }
650
+ //# sourceMappingURL=reranking-strategies.js.map