rag-lite-ts 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. package/dist/{core → cjs/core}/model-validator.js +1 -1
  2. package/dist/{core → cjs/core}/vector-index.js +4 -2
  3. package/dist/esm/api-errors.d.ts +90 -0
  4. package/dist/esm/api-errors.js +320 -0
  5. package/dist/esm/cli/indexer.d.ts +11 -0
  6. package/dist/esm/cli/indexer.js +471 -0
  7. package/dist/esm/cli/search.d.ts +7 -0
  8. package/dist/esm/cli/search.js +332 -0
  9. package/dist/esm/cli.d.ts +3 -0
  10. package/dist/esm/cli.js +529 -0
  11. package/dist/esm/config.d.ts +51 -0
  12. package/dist/esm/config.js +79 -0
  13. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  14. package/dist/esm/core/abstract-embedder.js +264 -0
  15. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  16. package/dist/esm/core/actionable-error-messages.js +397 -0
  17. package/dist/esm/core/adapters.d.ts +93 -0
  18. package/dist/esm/core/adapters.js +139 -0
  19. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  20. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  21. package/dist/esm/core/binary-index-format.d.ts +78 -0
  22. package/dist/esm/core/binary-index-format.js +291 -0
  23. package/dist/esm/core/chunker.d.ts +119 -0
  24. package/dist/esm/core/chunker.js +73 -0
  25. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  26. package/dist/esm/core/cli-database-utils.js +239 -0
  27. package/dist/esm/core/config.d.ts +102 -0
  28. package/dist/esm/core/config.js +247 -0
  29. package/dist/esm/core/content-errors.d.ts +111 -0
  30. package/dist/esm/core/content-errors.js +362 -0
  31. package/dist/esm/core/content-manager.d.ts +335 -0
  32. package/dist/esm/core/content-manager.js +1476 -0
  33. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  34. package/dist/esm/core/content-performance-optimizer.js +516 -0
  35. package/dist/esm/core/content-resolver.d.ts +104 -0
  36. package/dist/esm/core/content-resolver.js +285 -0
  37. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  38. package/dist/esm/core/cross-modal-search.js +342 -0
  39. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  40. package/dist/esm/core/database-connection-manager.js +310 -0
  41. package/dist/esm/core/db.d.ts +213 -0
  42. package/dist/esm/core/db.js +895 -0
  43. package/dist/esm/core/embedder-factory.d.ts +154 -0
  44. package/dist/esm/core/embedder-factory.js +311 -0
  45. package/dist/esm/core/error-handler.d.ts +112 -0
  46. package/dist/esm/core/error-handler.js +239 -0
  47. package/dist/esm/core/index.d.ts +59 -0
  48. package/dist/esm/core/index.js +69 -0
  49. package/dist/esm/core/ingestion.d.ts +202 -0
  50. package/dist/esm/core/ingestion.js +901 -0
  51. package/dist/esm/core/interfaces.d.ts +408 -0
  52. package/dist/esm/core/interfaces.js +106 -0
  53. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  54. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  55. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  56. package/dist/esm/core/mode-detection-service.js +565 -0
  57. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  58. package/dist/esm/core/mode-model-validator.js +203 -0
  59. package/dist/esm/core/model-registry.d.ts +116 -0
  60. package/dist/esm/core/model-registry.js +411 -0
  61. package/dist/esm/core/model-validator.d.ts +217 -0
  62. package/dist/esm/core/model-validator.js +782 -0
  63. package/dist/esm/core/path-manager.d.ts +47 -0
  64. package/dist/esm/core/path-manager.js +71 -0
  65. package/dist/esm/core/raglite-paths.d.ts +121 -0
  66. package/dist/esm/core/raglite-paths.js +145 -0
  67. package/dist/esm/core/reranking-config.d.ts +42 -0
  68. package/dist/esm/core/reranking-config.js +147 -0
  69. package/dist/esm/core/reranking-factory.d.ts +92 -0
  70. package/dist/esm/core/reranking-factory.js +410 -0
  71. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  72. package/dist/esm/core/reranking-strategies.js +650 -0
  73. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  74. package/dist/esm/core/resource-cleanup.js +371 -0
  75. package/dist/esm/core/resource-manager.d.ts +212 -0
  76. package/dist/esm/core/resource-manager.js +564 -0
  77. package/dist/esm/core/search-pipeline.d.ts +111 -0
  78. package/dist/esm/core/search-pipeline.js +287 -0
  79. package/dist/esm/core/search.d.ts +141 -0
  80. package/dist/esm/core/search.js +320 -0
  81. package/dist/esm/core/streaming-operations.d.ts +145 -0
  82. package/dist/esm/core/streaming-operations.js +409 -0
  83. package/dist/esm/core/types.d.ts +66 -0
  84. package/dist/esm/core/types.js +6 -0
  85. package/dist/esm/core/universal-embedder.d.ts +177 -0
  86. package/dist/esm/core/universal-embedder.js +139 -0
  87. package/dist/esm/core/validation-messages.d.ts +99 -0
  88. package/dist/esm/core/validation-messages.js +334 -0
  89. package/dist/esm/core/vector-index.d.ts +72 -0
  90. package/dist/esm/core/vector-index.js +333 -0
  91. package/dist/esm/dom-polyfills.d.ts +6 -0
  92. package/dist/esm/dom-polyfills.js +37 -0
  93. package/dist/esm/factories/index.d.ts +27 -0
  94. package/dist/esm/factories/index.js +29 -0
  95. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  96. package/dist/esm/factories/ingestion-factory.js +477 -0
  97. package/dist/esm/factories/search-factory.d.ts +154 -0
  98. package/dist/esm/factories/search-factory.js +344 -0
  99. package/dist/esm/file-processor.d.ts +147 -0
  100. package/dist/esm/file-processor.js +963 -0
  101. package/dist/esm/index-manager.d.ts +116 -0
  102. package/dist/esm/index-manager.js +598 -0
  103. package/dist/esm/index.d.ts +75 -0
  104. package/dist/esm/index.js +110 -0
  105. package/dist/esm/indexer.d.ts +7 -0
  106. package/dist/esm/indexer.js +54 -0
  107. package/dist/esm/ingestion.d.ts +63 -0
  108. package/dist/esm/ingestion.js +124 -0
  109. package/dist/esm/mcp-server.d.ts +46 -0
  110. package/dist/esm/mcp-server.js +1820 -0
  111. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  112. package/dist/esm/multimodal/clip-embedder.js +996 -0
  113. package/dist/esm/multimodal/index.d.ts +6 -0
  114. package/dist/esm/multimodal/index.js +6 -0
  115. package/dist/esm/preprocess.d.ts +19 -0
  116. package/dist/esm/preprocess.js +203 -0
  117. package/dist/esm/preprocessors/index.d.ts +17 -0
  118. package/dist/esm/preprocessors/index.js +38 -0
  119. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  120. package/dist/esm/preprocessors/mdx.js +101 -0
  121. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  122. package/dist/esm/preprocessors/mermaid.js +329 -0
  123. package/dist/esm/preprocessors/registry.d.ts +56 -0
  124. package/dist/esm/preprocessors/registry.js +179 -0
  125. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  126. package/dist/esm/run-error-recovery-tests.js +101 -0
  127. package/dist/esm/search-standalone.d.ts +7 -0
  128. package/dist/esm/search-standalone.js +117 -0
  129. package/dist/esm/search.d.ts +99 -0
  130. package/dist/esm/search.js +177 -0
  131. package/dist/esm/test-utils.d.ts +18 -0
  132. package/dist/esm/test-utils.js +27 -0
  133. package/dist/esm/text/chunker.d.ts +33 -0
  134. package/dist/esm/text/chunker.js +279 -0
  135. package/dist/esm/text/embedder.d.ts +111 -0
  136. package/dist/esm/text/embedder.js +386 -0
  137. package/dist/esm/text/index.d.ts +8 -0
  138. package/dist/esm/text/index.js +9 -0
  139. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  140. package/dist/esm/text/preprocessors/index.js +38 -0
  141. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  142. package/dist/esm/text/preprocessors/mdx.js +101 -0
  143. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  144. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  145. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  146. package/dist/esm/text/preprocessors/registry.js +180 -0
  147. package/dist/esm/text/reranker.d.ts +49 -0
  148. package/dist/esm/text/reranker.js +274 -0
  149. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  150. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  151. package/dist/esm/text/tokenizer.d.ts +22 -0
  152. package/dist/esm/text/tokenizer.js +64 -0
  153. package/dist/esm/types.d.ts +83 -0
  154. package/dist/esm/types.js +3 -0
  155. package/dist/esm/utils/vector-math.d.ts +31 -0
  156. package/dist/esm/utils/vector-math.js +70 -0
  157. package/package.json +30 -12
  158. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  159. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  160. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  161. /package/dist/{cli → cjs/cli}/indexer.js +0 -0
  162. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  163. /package/dist/{cli → cjs/cli}/search.js +0 -0
  164. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  165. /package/dist/{cli.js → cjs/cli.js} +0 -0
  166. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  167. /package/dist/{config.js → cjs/config.js} +0 -0
  168. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  169. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  170. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  171. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  172. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  173. /package/dist/{core → cjs/core}/adapters.js +0 -0
  174. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  175. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  176. /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
  177. /package/dist/{core → cjs/core}/binary-index-format.js +0 -0
  178. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  179. /package/dist/{core → cjs/core}/chunker.js +0 -0
  180. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  181. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  182. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/config.js +0 -0
  184. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  186. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  188. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  192. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  194. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  196. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/db.js +0 -0
  198. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  200. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  202. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/index.js +0 -0
  204. /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/ingestion.js +0 -0
  206. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  208. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  210. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  212. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  214. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  216. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  218. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  219. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  220. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  221. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  222. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  223. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  224. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  225. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  226. /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
  227. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  229. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  231. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  233. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/search.js +0 -0
  235. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  237. /package/dist/{core → cjs/core}/types.d.ts +0 -0
  238. /package/dist/{core → cjs/core}/types.js +0 -0
  239. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  240. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  241. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  242. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  243. /package/dist/{core → cjs/core}/vector-index.d.ts +0 -0
  244. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  245. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  246. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  247. /package/dist/{factories → cjs/factories}/index.js +0 -0
  248. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  249. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  250. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  251. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  252. /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
  253. /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
  254. /package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +0 -0
  255. /package/dist/{index-manager.js → cjs/index-manager.js} +0 -0
  256. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  257. /package/dist/{index.js → cjs/index.js} +0 -0
  258. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  259. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  260. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  261. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  262. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  263. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  264. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  265. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  268. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  269. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  270. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  278. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  279. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  280. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  281. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  282. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  283. /package/dist/{search.js → cjs/search.js} +0 -0
  284. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  285. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  286. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  287. /package/dist/{text → cjs/text}/chunker.js +0 -0
  288. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  289. /package/dist/{text → cjs/text}/embedder.js +0 -0
  290. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  291. /package/dist/{text → cjs/text}/index.js +0 -0
  292. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  300. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  301. /package/dist/{text → cjs/text}/reranker.js +0 -0
  302. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  304. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  306. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  307. /package/dist/{types.js → cjs/types.js} +0 -0
  308. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,274 @@
1
+ // Set up polyfills immediately before any other imports
2
+ // Force polyfills in Node.js environment regardless of window state
3
+ if (typeof globalThis !== 'undefined') {
4
+ if (typeof globalThis.self === 'undefined') {
5
+ globalThis.self = globalThis;
6
+ }
7
+ if (typeof global.self === 'undefined') {
8
+ global.self = global;
9
+ }
10
+ }
11
+ import '../dom-polyfills.js';
12
+ import { config } from '../core/config.js';
13
+ /**
14
+ * Embedding-based reranker for improving search result quality
15
+ * Uses embedding similarity to rerank initial vector search results
16
+ */
17
+ export class CrossEncoderReranker {
18
+ model = null; // Use any to avoid complex transformers.js typing issues
19
+ tokenizer = null;
20
+ modelName = 'Xenova/ms-marco-MiniLM-L-6-v2'; // Use working cross-encoder model
21
+ /**
22
+ * Ensure DOM polyfills are set up for transformers.js
23
+ */
24
+ ensurePolyfills() {
25
+ // Use the exact same approach as the working standalone version
26
+ if (typeof window === 'undefined' && typeof globalThis !== 'undefined') {
27
+ if (typeof globalThis.self === 'undefined') {
28
+ globalThis.self = globalThis;
29
+ }
30
+ if (typeof global.self === 'undefined') {
31
+ global.self = global;
32
+ }
33
+ // Polyfills already set up at module level
34
+ }
35
+ }
36
+ /**
37
+ * Load the embedding model
38
+ */
39
+ async loadModel() {
40
+ await this.tryLoadModel(this.modelName);
41
+ }
42
+ /**
43
+ * Try to load a specific model
44
+ */
45
+ async tryLoadModel(modelName) {
46
+ console.log(`Loading cross-encoder model: ${modelName}`);
47
+ // Ensure polyfills are set up exactly like the working standalone version
48
+ this.ensurePolyfills();
49
+ // Use the exact same approach as the working standalone test
50
+ const { AutoTokenizer, AutoModelForSequenceClassification } = await import('@huggingface/transformers');
51
+ console.log('Loading model...');
52
+ this.model = await AutoModelForSequenceClassification.from_pretrained(modelName, {
53
+ cache_dir: config.model_cache_path,
54
+ dtype: 'fp32'
55
+ });
56
+ console.log('Loading tokenizer...');
57
+ this.tokenizer = await AutoTokenizer.from_pretrained(modelName, {
58
+ cache_dir: config.model_cache_path
59
+ });
60
+ console.log(`Cross-encoder model loaded successfully: ${modelName}`);
61
+ }
62
+ /**
63
+ * Rerank search results using embedding similarity scoring
64
+ * @param query - Original search query
65
+ * @param results - Initial search results from vector search
66
+ * @returns Promise resolving to reranked results
67
+ */
68
+ async rerank(query, results) {
69
+ if (!this.model) {
70
+ throw new Error('Cross-encoder model not loaded. Call loadModel() first.');
71
+ }
72
+ if (results.length === 0) {
73
+ return results;
74
+ }
75
+ try {
76
+ // Use cross-encoder approach - prepare queries and documents for proper cross-encoder format
77
+ const queries = results.map(() => query);
78
+ const documents = results.map(result => result.content);
79
+ console.log('Tokenizing query-document pairs...');
80
+ // Tokenize using the proper cross-encoder format with text_pair
81
+ const features = this.tokenizer(queries, {
82
+ text_pair: documents,
83
+ padding: true,
84
+ truncation: true,
85
+ return_tensors: 'pt'
86
+ });
87
+ console.log('Running cross-encoder inference...');
88
+ // Get model predictions
89
+ const output = await this.model(features);
90
+ // Extract logits - these are the raw relevance scores
91
+ const logits = output.logits;
92
+ // Convert logits to scores and pair with results
93
+ const scores = results.map((result, i) => {
94
+ // For cross-encoders, we typically use the raw logit as the relevance score
95
+ // Higher logits = more relevant
96
+ const rawScore = Array.isArray(logits) ? logits[i] : logits.data[i];
97
+ // Apply sigmoid to convert logit to probability-like score
98
+ const score = 1 / (1 + Math.exp(-rawScore));
99
+ return {
100
+ score: score,
101
+ result: result
102
+ };
103
+ });
104
+ // Check if scores look reasonable
105
+ const maxScore = Math.max(...scores.map(s => s.score));
106
+ const minScore = Math.min(...scores.map(s => s.score));
107
+ const scoreRange = maxScore - minScore;
108
+ // If there's very little variation in scores, fall back to simple reranking
109
+ if (scoreRange < 0.1) {
110
+ console.log(`Cross-encoder scores have low variation (range: ${scoreRange.toFixed(3)}), using simple text reranking`);
111
+ return this.simpleTextReranking(query, results);
112
+ }
113
+ // Sort by cross-encoder scores (descending)
114
+ scores.sort((a, b) => b.score - a.score);
115
+ // Update results with new scores and return reranked results
116
+ return scores.map(item => ({
117
+ ...item.result,
118
+ score: item.score
119
+ }));
120
+ }
121
+ catch (error) {
122
+ console.warn(`Reranking failed, falling back to simple text reranking: ${error instanceof Error ? error.message : 'Unknown error'}`);
123
+ return this.simpleTextReranking(query, results);
124
+ }
125
+ }
126
+ /**
127
+ * Check if the model is loaded
128
+ */
129
+ isLoaded() {
130
+ return this.model !== null && this.tokenizer !== null;
131
+ }
132
+ /**
133
+ * Get the model name being used
134
+ */
135
+ getModelName() {
136
+ return this.modelName;
137
+ }
138
+ /**
139
+ * Compute cosine similarity between two Float32Array embeddings
140
+ */
141
+ cosineSimilarity(a, b) {
142
+ if (a.length !== b.length) {
143
+ return 0;
144
+ }
145
+ let dotProduct = 0;
146
+ let normA = 0;
147
+ let normB = 0;
148
+ for (let i = 0; i < a.length; i++) {
149
+ dotProduct += a[i] * b[i];
150
+ normA += a[i] * a[i];
151
+ normB += b[i] * b[i];
152
+ }
153
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
154
+ return magnitude === 0 ? 0 : dotProduct / magnitude;
155
+ }
156
+ /**
157
+ * Simple text-based reranking using keyword matching and text similarity
158
+ * This is a fallback when embedding similarity doesn't provide good discrimination
159
+ */
160
+ simpleTextReranking(query, results) {
161
+ const queryLower = query.toLowerCase();
162
+ const queryWords = queryLower.split(/\s+/).filter(word => word.length > 2);
163
+ const rerankedResults = results.map(result => {
164
+ const textLower = result.content.toLowerCase();
165
+ const titleLower = result.document.title?.toLowerCase() || '';
166
+ let score = result.score; // Start with vector search score
167
+ let bonus = 0;
168
+ // Bonus for exact query matches
169
+ if (textLower.includes(queryLower)) {
170
+ bonus += 0.15;
171
+ }
172
+ // Bonus for title matches
173
+ if (titleLower.includes(queryLower)) {
174
+ bonus += 0.1;
175
+ }
176
+ // Special handling for "What is X?" queries
177
+ if (queryLower.startsWith('what is')) {
178
+ const subject = queryLower.replace('what is', '').trim().replace(/\?$/, '');
179
+ // Major bonus for definitional patterns like "X is a/an..."
180
+ const definitionPatterns = [
181
+ `${subject} is a`,
182
+ `${subject} is an`,
183
+ `${subject} is the`,
184
+ `${subject} provides`,
185
+ `${subject} helps`
186
+ ];
187
+ for (const pattern of definitionPatterns) {
188
+ if (textLower.includes(pattern)) {
189
+ bonus += 0.3; // Large bonus for definitional content
190
+ break;
191
+ }
192
+ }
193
+ }
194
+ // Bonus for individual word matches
195
+ let wordMatches = 0;
196
+ for (const word of queryWords) {
197
+ if (textLower.includes(word)) {
198
+ wordMatches++;
199
+ }
200
+ if (titleLower.includes(word)) {
201
+ wordMatches += 0.5; // Title matches are worth more
202
+ }
203
+ }
204
+ // Normalize word match bonus
205
+ if (queryWords.length > 0) {
206
+ bonus += (wordMatches / queryWords.length) * 0.1;
207
+ }
208
+ // Bonus for content that appears to be introductory/definitional
209
+ const introKeywords = ['introduction', 'what is', 'overview', 'about', 'definition', '# introduction'];
210
+ for (const keyword of introKeywords) {
211
+ if (textLower.includes(keyword) || titleLower.includes(keyword)) {
212
+ bonus += 0.08;
213
+ }
214
+ }
215
+ // Extra bonus for files with "intro" in the path
216
+ if (result.document.source?.toLowerCase().includes('intro')) {
217
+ bonus += 0.1;
218
+ }
219
+ // Apply bonus but cap the total score at 1.0
220
+ const newScore = Math.min(1.0, score + bonus);
221
+ return {
222
+ ...result,
223
+ score: newScore
224
+ };
225
+ });
226
+ // Sort by new scores (descending)
227
+ rerankedResults.sort((a, b) => b.score - a.score);
228
+ return rerankedResults;
229
+ }
230
+ }
231
+ /*
232
+ *
233
+ * Create a RerankFunction implementation using the embedding-based reranker
234
+ * This function implements the core RerankFunction interface for dependency injection
235
+ * @param modelName - Optional model name override
236
+ * @returns RerankFunction that can be injected into core components
237
+ */
238
+ export function createTextRerankFunction(modelName) {
239
+ let reranker = null;
240
+ const rerankFunction = async (query, results, contentType) => {
241
+ // Only support text content type
242
+ if (contentType && contentType !== 'text') {
243
+ throw new Error(`Text reranker only supports 'text' content type, got: ${contentType}`);
244
+ }
245
+ // Initialize reranker if not already done
246
+ if (!reranker) {
247
+ reranker = new CrossEncoderReranker();
248
+ if (modelName) {
249
+ // Set custom model name if provided
250
+ reranker.modelName = modelName;
251
+ }
252
+ await reranker.loadModel();
253
+ }
254
+ // If reranker failed to load, return results unchanged
255
+ if (!reranker.isLoaded()) {
256
+ console.warn('Text reranker not loaded, returning results unchanged');
257
+ return results;
258
+ }
259
+ // Use the existing rerank method
260
+ return await reranker.rerank(query, results);
261
+ };
262
+ return rerankFunction;
263
+ }
264
+ // =============================================================================
265
+ // REMOVED IN v3.0.0: createTextReranker() factory object
266
+ // =============================================================================
267
+ // The createTextReranker() function has been removed as it was redundant.
268
+ // It was just a wrapper that created new CrossEncoderReranker instances.
269
+ //
270
+ // Migration guide:
271
+ // - For public API: Use createReranker() from core/reranking-factory.ts
272
+ // - For dependency injection: Use createTextRerankFunction()
273
+ // - For direct access: Use new CrossEncoderReranker() directly
274
+ //# sourceMappingURL=reranker.js.map
@@ -0,0 +1,96 @@
1
+ /**
2
+ * TEXT IMPLEMENTATION — Sentence Transformer Embedder Implementation
3
+ * Implements UniversalEmbedder interface for sentence-transformer models
4
+ * Adapts existing text embedding logic to the universal interface
5
+ */
6
+ import '../dom-polyfills.js';
7
+ import { BaseUniversalEmbedder, type EmbedderOptions } from '../core/abstract-embedder.js';
8
+ import type { EmbeddingResult } from '../types.js';
9
+ /**
10
+ * Sentence transformer embedder implementation
11
+ * Supports sentence-transformers/all-MiniLM-L6-v2 and Xenova/all-mpnet-base-v2
12
+ * Ensures consistent EmbeddingResult format with contentType='text'
13
+ * Adapts existing EmbeddingEngine to UniversalEmbedder interface
14
+ */
15
+ export declare class SentenceTransformerEmbedder extends BaseUniversalEmbedder {
16
+ private embeddingEngine;
17
+ private resourceManager;
18
+ private embedderResourceId?;
19
+ private engineResourceId?;
20
+ constructor(modelName: string, options?: EmbedderOptions);
21
+ /**
22
+ * Load the sentence transformer model using existing EmbeddingEngine
23
+ */
24
+ loadModel(): Promise<void>;
25
+ /**
26
+ * Clean up model resources with comprehensive disposal
27
+ */
28
+ cleanup(): Promise<void>;
29
+ /**
30
+ * Embed text using the existing EmbeddingEngine
31
+ */
32
+ embedText(text: string): Promise<EmbeddingResult>;
33
+ /**
34
+ * Optimized batch processing using existing EmbeddingEngine and BatchProcessingOptimizer
35
+ * Overrides the base implementation for better performance with progress reporting
36
+ */
37
+ protected processBatch(batch: Array<{
38
+ content: string;
39
+ contentType: string;
40
+ metadata?: Record<string, any>;
41
+ }>): Promise<EmbeddingResult[]>;
42
+ /**
43
+ * Get model-specific information
44
+ */
45
+ getModelInfo(): {
46
+ capabilities: {
47
+ supportsSemanticSimilarity: boolean;
48
+ supportsTextClassification: boolean;
49
+ supportsTextClustering: boolean;
50
+ recommendedUseCase: string;
51
+ supportsText: boolean;
52
+ supportsImages: boolean;
53
+ supportsBatchProcessing: boolean;
54
+ supportsMetadata: boolean;
55
+ maxBatchSize?: number;
56
+ maxTextLength?: number;
57
+ supportedImageFormats?: readonly string[];
58
+ supportsMultimodal?: boolean;
59
+ supportsCrossModalSearch?: boolean;
60
+ unifiedEmbeddingSpace?: boolean;
61
+ reliableImplementation?: boolean;
62
+ };
63
+ name: string;
64
+ type: import("../core/universal-embedder.js").ModelType;
65
+ dimensions: number;
66
+ version: string;
67
+ supportedContentTypes: readonly string[];
68
+ requirements: import("../types.js").ModelRequirements;
69
+ };
70
+ /**
71
+ * Check if the model is suitable for a specific task
72
+ */
73
+ isSuitableForTask(task: 'similarity' | 'classification' | 'clustering' | 'retrieval'): boolean;
74
+ /**
75
+ * Embed document batch using existing EmbeddingEngine's optimized method
76
+ * This method provides compatibility with the existing document ingestion pipeline
77
+ */
78
+ embedDocumentBatch(chunks: string[]): Promise<EmbeddingResult[]>;
79
+ /**
80
+ * Get the model version from the underlying EmbeddingEngine
81
+ */
82
+ getModelVersion(): string;
83
+ /**
84
+ * Get the batch size from the underlying EmbeddingEngine
85
+ */
86
+ getBatchSize(): number;
87
+ /**
88
+ * Check if the underlying EmbeddingEngine is loaded
89
+ */
90
+ isEngineLoaded(): boolean;
91
+ /**
92
+ * Override isLoaded to check both internal state and engine state
93
+ */
94
+ isLoaded(): boolean;
95
+ }
96
+ //# sourceMappingURL=sentence-transformer-embedder.d.ts.map