rag-lite-ts 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/dist/{cli → cjs/cli}/indexer.js +1 -1
  2. package/dist/{cli → cjs/cli}/search.js +5 -10
  3. package/dist/{core → cjs/core}/binary-index-format.d.ts +28 -2
  4. package/dist/cjs/core/binary-index-format.js +291 -0
  5. package/dist/{core → cjs/core}/ingestion.d.ts +5 -1
  6. package/dist/{core → cjs/core}/ingestion.js +76 -9
  7. package/dist/{core → cjs/core}/model-validator.js +1 -1
  8. package/dist/{core → cjs/core}/reranking-strategies.js +4 -5
  9. package/dist/{core → cjs/core}/search.js +2 -1
  10. package/dist/{core → cjs/core}/types.d.ts +1 -1
  11. package/dist/{core → cjs/core}/vector-index.d.ts +4 -0
  12. package/dist/{core → cjs/core}/vector-index.js +10 -2
  13. package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +2 -0
  14. package/dist/{file-processor.js → cjs/file-processor.js} +20 -0
  15. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +17 -1
  16. package/dist/{index-manager.js → cjs/index-manager.js} +148 -7
  17. package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +71 -66
  18. package/dist/esm/api-errors.d.ts +90 -0
  19. package/dist/esm/api-errors.js +320 -0
  20. package/dist/esm/cli/indexer.d.ts +11 -0
  21. package/dist/esm/cli/indexer.js +471 -0
  22. package/dist/esm/cli/search.d.ts +7 -0
  23. package/dist/esm/cli/search.js +332 -0
  24. package/dist/esm/cli.d.ts +3 -0
  25. package/dist/esm/cli.js +529 -0
  26. package/dist/esm/config.d.ts +51 -0
  27. package/dist/esm/config.js +79 -0
  28. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  29. package/dist/esm/core/abstract-embedder.js +264 -0
  30. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  31. package/dist/esm/core/actionable-error-messages.js +397 -0
  32. package/dist/esm/core/adapters.d.ts +93 -0
  33. package/dist/esm/core/adapters.js +139 -0
  34. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  35. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  36. package/dist/esm/core/binary-index-format.d.ts +78 -0
  37. package/dist/esm/core/binary-index-format.js +291 -0
  38. package/dist/esm/core/chunker.d.ts +119 -0
  39. package/dist/esm/core/chunker.js +73 -0
  40. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  41. package/dist/esm/core/cli-database-utils.js +239 -0
  42. package/dist/esm/core/config.d.ts +102 -0
  43. package/dist/esm/core/config.js +247 -0
  44. package/dist/esm/core/content-errors.d.ts +111 -0
  45. package/dist/esm/core/content-errors.js +362 -0
  46. package/dist/esm/core/content-manager.d.ts +335 -0
  47. package/dist/esm/core/content-manager.js +1476 -0
  48. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  49. package/dist/esm/core/content-performance-optimizer.js +516 -0
  50. package/dist/esm/core/content-resolver.d.ts +104 -0
  51. package/dist/esm/core/content-resolver.js +285 -0
  52. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  53. package/dist/esm/core/cross-modal-search.js +342 -0
  54. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  55. package/dist/esm/core/database-connection-manager.js +310 -0
  56. package/dist/esm/core/db.d.ts +213 -0
  57. package/dist/esm/core/db.js +895 -0
  58. package/dist/esm/core/embedder-factory.d.ts +154 -0
  59. package/dist/esm/core/embedder-factory.js +311 -0
  60. package/dist/esm/core/error-handler.d.ts +112 -0
  61. package/dist/esm/core/error-handler.js +239 -0
  62. package/dist/esm/core/index.d.ts +59 -0
  63. package/dist/esm/core/index.js +69 -0
  64. package/dist/esm/core/ingestion.d.ts +202 -0
  65. package/dist/esm/core/ingestion.js +901 -0
  66. package/dist/esm/core/interfaces.d.ts +408 -0
  67. package/dist/esm/core/interfaces.js +106 -0
  68. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  69. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  70. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  71. package/dist/esm/core/mode-detection-service.js +565 -0
  72. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  73. package/dist/esm/core/mode-model-validator.js +203 -0
  74. package/dist/esm/core/model-registry.d.ts +116 -0
  75. package/dist/esm/core/model-registry.js +411 -0
  76. package/dist/esm/core/model-validator.d.ts +217 -0
  77. package/dist/esm/core/model-validator.js +782 -0
  78. package/dist/esm/core/path-manager.d.ts +47 -0
  79. package/dist/esm/core/path-manager.js +71 -0
  80. package/dist/esm/core/raglite-paths.d.ts +121 -0
  81. package/dist/esm/core/raglite-paths.js +145 -0
  82. package/dist/esm/core/reranking-config.d.ts +42 -0
  83. package/dist/esm/core/reranking-config.js +147 -0
  84. package/dist/esm/core/reranking-factory.d.ts +92 -0
  85. package/dist/esm/core/reranking-factory.js +410 -0
  86. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  87. package/dist/esm/core/reranking-strategies.js +650 -0
  88. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  89. package/dist/esm/core/resource-cleanup.js +371 -0
  90. package/dist/esm/core/resource-manager.d.ts +212 -0
  91. package/dist/esm/core/resource-manager.js +564 -0
  92. package/dist/esm/core/search-pipeline.d.ts +111 -0
  93. package/dist/esm/core/search-pipeline.js +287 -0
  94. package/dist/esm/core/search.d.ts +141 -0
  95. package/dist/esm/core/search.js +320 -0
  96. package/dist/esm/core/streaming-operations.d.ts +145 -0
  97. package/dist/esm/core/streaming-operations.js +409 -0
  98. package/dist/esm/core/types.d.ts +66 -0
  99. package/dist/esm/core/types.js +6 -0
  100. package/dist/esm/core/universal-embedder.d.ts +177 -0
  101. package/dist/esm/core/universal-embedder.js +139 -0
  102. package/dist/esm/core/validation-messages.d.ts +99 -0
  103. package/dist/esm/core/validation-messages.js +334 -0
  104. package/dist/esm/core/vector-index.d.ts +72 -0
  105. package/dist/esm/core/vector-index.js +333 -0
  106. package/dist/esm/dom-polyfills.d.ts +6 -0
  107. package/dist/esm/dom-polyfills.js +37 -0
  108. package/dist/esm/factories/index.d.ts +27 -0
  109. package/dist/esm/factories/index.js +29 -0
  110. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  111. package/dist/esm/factories/ingestion-factory.js +477 -0
  112. package/dist/esm/factories/search-factory.d.ts +154 -0
  113. package/dist/esm/factories/search-factory.js +344 -0
  114. package/dist/esm/file-processor.d.ts +147 -0
  115. package/dist/esm/file-processor.js +963 -0
  116. package/dist/esm/index-manager.d.ts +116 -0
  117. package/dist/esm/index-manager.js +598 -0
  118. package/dist/esm/index.d.ts +75 -0
  119. package/dist/esm/index.js +110 -0
  120. package/dist/esm/indexer.d.ts +7 -0
  121. package/dist/esm/indexer.js +54 -0
  122. package/dist/esm/ingestion.d.ts +63 -0
  123. package/dist/esm/ingestion.js +124 -0
  124. package/dist/esm/mcp-server.d.ts +46 -0
  125. package/dist/esm/mcp-server.js +1820 -0
  126. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  127. package/dist/esm/multimodal/clip-embedder.js +996 -0
  128. package/dist/esm/multimodal/index.d.ts +6 -0
  129. package/dist/esm/multimodal/index.js +6 -0
  130. package/dist/esm/preprocess.d.ts +19 -0
  131. package/dist/esm/preprocess.js +203 -0
  132. package/dist/esm/preprocessors/index.d.ts +17 -0
  133. package/dist/esm/preprocessors/index.js +38 -0
  134. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  135. package/dist/esm/preprocessors/mdx.js +101 -0
  136. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  137. package/dist/esm/preprocessors/mermaid.js +329 -0
  138. package/dist/esm/preprocessors/registry.d.ts +56 -0
  139. package/dist/esm/preprocessors/registry.js +179 -0
  140. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  141. package/dist/esm/run-error-recovery-tests.js +101 -0
  142. package/dist/esm/search-standalone.d.ts +7 -0
  143. package/dist/esm/search-standalone.js +117 -0
  144. package/dist/esm/search.d.ts +99 -0
  145. package/dist/esm/search.js +177 -0
  146. package/dist/esm/test-utils.d.ts +18 -0
  147. package/dist/esm/test-utils.js +27 -0
  148. package/dist/esm/text/chunker.d.ts +33 -0
  149. package/dist/esm/text/chunker.js +279 -0
  150. package/dist/esm/text/embedder.d.ts +111 -0
  151. package/dist/esm/text/embedder.js +386 -0
  152. package/dist/esm/text/index.d.ts +8 -0
  153. package/dist/esm/text/index.js +9 -0
  154. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  155. package/dist/esm/text/preprocessors/index.js +38 -0
  156. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  157. package/dist/esm/text/preprocessors/mdx.js +101 -0
  158. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  159. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  160. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  161. package/dist/esm/text/preprocessors/registry.js +180 -0
  162. package/dist/esm/text/reranker.d.ts +49 -0
  163. package/dist/esm/text/reranker.js +274 -0
  164. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  165. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  166. package/dist/esm/text/tokenizer.d.ts +22 -0
  167. package/dist/esm/text/tokenizer.js +64 -0
  168. package/dist/esm/types.d.ts +83 -0
  169. package/dist/esm/types.js +3 -0
  170. package/dist/esm/utils/vector-math.d.ts +31 -0
  171. package/dist/esm/utils/vector-math.js +70 -0
  172. package/package.json +30 -12
  173. package/dist/core/binary-index-format.js +0 -122
  174. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  175. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  176. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  177. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  178. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  179. /package/dist/{cli.js → cjs/cli.js} +0 -0
  180. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  181. /package/dist/{config.js → cjs/config.js} +0 -0
  182. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  184. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  186. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/adapters.js +0 -0
  188. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/chunker.js +0 -0
  192. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  194. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/config.js +0 -0
  196. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  198. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  200. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  202. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  204. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  206. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  208. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/db.js +0 -0
  210. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  212. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  214. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/index.js +0 -0
  216. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  218. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  219. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  220. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  221. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  222. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  223. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  224. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  225. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  226. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  227. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  229. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  231. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  233. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  235. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  238. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  240. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  242. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  244. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  245. /package/dist/{core → cjs/core}/types.js +0 -0
  246. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  247. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  248. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  249. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  250. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  251. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  252. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  253. /package/dist/{factories → cjs/factories}/index.js +0 -0
  254. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  255. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  256. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  257. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  258. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  259. /package/dist/{index.js → cjs/index.js} +0 -0
  260. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  261. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  262. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  263. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  264. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  265. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  268. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  269. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  270. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  278. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  279. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  280. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  281. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  282. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  283. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  284. /package/dist/{search.js → cjs/search.js} +0 -0
  285. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  286. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  287. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  288. /package/dist/{text → cjs/text}/chunker.js +0 -0
  289. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  290. /package/dist/{text → cjs/text}/embedder.js +0 -0
  291. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  292. /package/dist/{text → cjs/text}/index.js +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  300. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  301. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  302. /package/dist/{text → cjs/text}/reranker.js +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  304. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  307. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  308. /package/dist/{types.js → cjs/types.js} +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  310. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,330 @@
1
+ import { ContentTypeDetector } from './registry.js';
2
+ /**
3
+ * Mermaid preprocessor for handling Mermaid diagrams in Markdown files
4
+ * Supports strip, extract, and placeholder modes
5
+ */
6
+ export class MermaidPreprocessor {
7
+ /**
8
+ * Check if this preprocessor applies to the given language/content type
9
+ * Applies to mermaid code blocks and content with Mermaid syntax
10
+ */
11
+ appliesTo(language) {
12
+ return language === 'mermaid';
13
+ }
14
+ /**
15
+ * Process Mermaid content based on the specified mode
16
+ */
17
+ process(content, options) {
18
+ // Only process if content actually contains Mermaid diagrams
19
+ if (!ContentTypeDetector.hasMermaidContent(content)) {
20
+ return content;
21
+ }
22
+ switch (options.mode) {
23
+ case 'strip':
24
+ return this.stripMermaid(content);
25
+ case 'extract':
26
+ return this.extractMermaidEdges(content);
27
+ case 'placeholder':
28
+ return this.replaceWithPlaceholders(content);
29
+ default:
30
+ console.log(`Unknown Mermaid processing mode: ${options.mode}, using placeholder`);
31
+ return this.replaceWithPlaceholders(content);
32
+ }
33
+ }
34
+ /**
35
+ * Strip Mermaid diagrams entirely
36
+ */
37
+ stripMermaid(content) {
38
+ let cleaned = content;
39
+ // Remove mermaid code blocks: ```mermaid ... ```
40
+ cleaned = cleaned.replace(/```mermaid[\s\S]*?```/gi, '');
41
+ // Clean up multiple consecutive newlines and trim
42
+ cleaned = cleaned.replace(/\n\s*\n\s*\n/g, '\n\n').trim();
43
+ // Ensure we never return empty content (requirement 6.4)
44
+ if (!cleaned.trim()) {
45
+ return '[content removed]';
46
+ }
47
+ return cleaned;
48
+ }
49
+ /**
50
+ * Replace Mermaid diagrams with descriptive placeholders
51
+ */
52
+ replaceWithPlaceholders(content) {
53
+ let cleaned = content;
54
+ // Replace mermaid code blocks with placeholder
55
+ cleaned = cleaned.replace(/```mermaid[\s\S]*?```/gi, '[diagram removed]');
56
+ // Clean up multiple consecutive newlines and trim
57
+ cleaned = cleaned.replace(/\n\s*\n\s*\n/g, '\n\n').trim();
58
+ return cleaned;
59
+ }
60
+ /**
61
+ * Extract semantic information from Mermaid diagrams
62
+ * Converts diagram edges to plain text while ignoring styling and layout instructions
63
+ */
64
+ extractMermaidEdges(content) {
65
+ let processed = content;
66
+ // Process each mermaid code block
67
+ processed = processed.replace(/```mermaid([\s\S]*?)```/gi, (match, diagramContent) => {
68
+ const edges = this.extractEdgesFromDiagram(diagramContent);
69
+ return edges.length > 0 ? edges.join('\n') : '[diagram content extracted]';
70
+ });
71
+ return processed;
72
+ }
73
+ /**
74
+ * Extract edges and relationships from a Mermaid diagram
75
+ * Ignores styling, layout, and formatting instructions
76
+ */
77
+ extractEdgesFromDiagram(diagramContent) {
78
+ const edges = [];
79
+ const lines = diagramContent.split('\n');
80
+ // Detect diagram type from first non-empty line
81
+ let diagramType = 'flowchart'; // default
82
+ for (const line of lines) {
83
+ const trimmed = line.trim();
84
+ if (trimmed && !trimmed.startsWith('%')) {
85
+ if (trimmed.includes('sequenceDiagram'))
86
+ diagramType = 'sequence';
87
+ else if (trimmed.includes('classDiagram'))
88
+ diagramType = 'class';
89
+ else if (trimmed.includes('stateDiagram'))
90
+ diagramType = 'state';
91
+ else if (trimmed.includes('erDiagram'))
92
+ diagramType = 'er';
93
+ else if (trimmed.includes('graph') || trimmed.includes('flowchart'))
94
+ diagramType = 'flowchart';
95
+ break;
96
+ }
97
+ }
98
+ // For flowcharts, build a node label mapping first
99
+ const nodeLabelMap = new Map();
100
+ if (diagramType === 'flowchart') {
101
+ this.buildNodeLabelMap(lines, nodeLabelMap);
102
+ }
103
+ for (const line of lines) {
104
+ const trimmedLine = line.trim();
105
+ // Skip empty lines, comments, and diagram type declarations
106
+ if (!trimmedLine ||
107
+ trimmedLine.startsWith('%') ||
108
+ trimmedLine.startsWith('%%') ||
109
+ this.isLayoutInstruction(trimmedLine) ||
110
+ this.isStyleInstruction(trimmedLine)) {
111
+ continue;
112
+ }
113
+ // Extract edges from different diagram types
114
+ const edge = this.extractEdgeFromLine(trimmedLine, diagramType, nodeLabelMap);
115
+ if (edge) {
116
+ edges.push(edge);
117
+ }
118
+ }
119
+ return edges;
120
+ }
121
+ /**
122
+ * Build a mapping of node IDs to their labels by scanning all lines
123
+ */
124
+ buildNodeLabelMap(lines, nodeLabelMap) {
125
+ for (const line of lines) {
126
+ const trimmedLine = line.trim();
127
+ if (!trimmedLine || trimmedLine.startsWith('%') || trimmedLine.startsWith('%%')) {
128
+ continue;
129
+ }
130
+ // Find all node definitions in the line (both source and target)
131
+ const regex = /(\w+)(?:\[([^\]]+)\]|\{([^}]+)\}|\(\(([^)]+)\)\)|\(([^)]+)\))/g;
132
+ let match;
133
+ while ((match = regex.exec(trimmedLine)) !== null) {
134
+ const [, nodeId, squareLabel, curlyLabel, doubleParenLabel, parenLabel] = match;
135
+ const label = squareLabel || curlyLabel || doubleParenLabel || parenLabel;
136
+ if (label) {
137
+ nodeLabelMap.set(nodeId, label.trim());
138
+ }
139
+ }
140
+ }
141
+ }
142
+ /**
143
+ * Check if a line is a layout instruction (should be ignored)
144
+ */
145
+ isLayoutInstruction(line) {
146
+ const layoutPatterns = [
147
+ /^(graph|flowchart)\s+(TD|TB|BT|RL|LR)/i,
148
+ /^(sequenceDiagram|classDiagram|stateDiagram|erDiagram|journey|gantt)/i,
149
+ /^pie\s+title/i,
150
+ /^direction\s+(TD|TB|BT|RL|LR)/i,
151
+ /^subgraph/i,
152
+ /^end$/i
153
+ ];
154
+ return layoutPatterns.some(pattern => pattern.test(line));
155
+ }
156
+ /**
157
+ * Check if a line is a styling instruction (should be ignored)
158
+ */
159
+ isStyleInstruction(line) {
160
+ const stylePatterns = [
161
+ /^classDef\s+/i,
162
+ /^class\s+.*\s+\w+$/i,
163
+ /^style\s+/i,
164
+ /^fill:/i,
165
+ /^stroke:/i,
166
+ /^color:/i,
167
+ /:::.*$/, // CSS class assignments
168
+ /^linkStyle\s+/i,
169
+ /^click\s+/i
170
+ ];
171
+ return stylePatterns.some(pattern => pattern.test(line));
172
+ }
173
+ /**
174
+ * Extract the meaningful label from a node definition
175
+ * Examples: A[Start] -> "Start", B{Decision} -> "Decision", C((End)) -> "End", D -> "D"
176
+ */
177
+ extractNodeLabel(nodeText, nodeLabelMap) {
178
+ // Handle square brackets: A[Start]
179
+ const squareMatch = nodeText.match(/\w+\[([^\]]+)\]/);
180
+ if (squareMatch) {
181
+ return squareMatch[1].trim();
182
+ }
183
+ // Handle curly braces: B{Decision}
184
+ const curlyMatch = nodeText.match(/\w+\{([^}]+)\}/);
185
+ if (curlyMatch) {
186
+ return curlyMatch[1].trim();
187
+ }
188
+ // Handle double parentheses: C((End))
189
+ const doubleParenMatch = nodeText.match(/\w+\(\(([^)]+)\)\)/);
190
+ if (doubleParenMatch) {
191
+ return doubleParenMatch[1].trim();
192
+ }
193
+ // Handle single parentheses: D(Text)
194
+ const parenMatch = nodeText.match(/\w+\(([^)]+)\)/);
195
+ if (parenMatch) {
196
+ return parenMatch[1].trim();
197
+ }
198
+ // If no inline label found, check the node label map
199
+ const idMatch = nodeText.match(/(\w+)/);
200
+ const nodeId = idMatch ? idMatch[1] : nodeText;
201
+ if (nodeLabelMap && nodeLabelMap.has(nodeId)) {
202
+ return nodeLabelMap.get(nodeId);
203
+ }
204
+ // Return the node ID as fallback
205
+ return nodeId;
206
+ }
207
+ /**
208
+ * Extract edge information from a single line
209
+ */
210
+ extractEdgeFromLine(line, diagramType = 'flowchart', nodeLabelMap) {
211
+ // Handle based on diagram type
212
+ if (diagramType === 'sequence') {
213
+ // Sequence diagram: A->>B: message or A-->>B: message
214
+ const sequenceMatch = line.match(/(\w+)\s*--?>>?\+?\s*(\w+)\s*:\s*(.+)/);
215
+ if (sequenceMatch) {
216
+ const [, from, to, message] = sequenceMatch;
217
+ return `${from} sends to ${to}: ${message.trim()}`;
218
+ }
219
+ }
220
+ if (diagramType === 'class') {
221
+ // Class diagram inheritance: Animal --|> Dog
222
+ const inheritanceMatch = line.match(/(\w+)\s*--\|>\s*(\w+)/);
223
+ if (inheritanceMatch) {
224
+ const [, from, to] = inheritanceMatch;
225
+ return `${from} inherits from ${to}`;
226
+ }
227
+ // Class diagram other relationships
228
+ const classMatch = line.match(/(\w+)\s*(<\|--|--\||<\|--\|>|\*--|--\*|o--|--o)\s*(\w+)/);
229
+ if (classMatch) {
230
+ const [, from, connector, to] = classMatch;
231
+ const relationship = this.interpretClassConnector(connector);
232
+ return `${from} ${relationship} ${to}`;
233
+ }
234
+ }
235
+ if (diagramType === 'er') {
236
+ // ER diagram: CUSTOMER ||--o{ ORDER
237
+ const erMatch = line.match(/(\w+)\s*(\|\|--o\{|\}o--\|\||\|\|--\|\||o\{--\|\|)\s*(\w+)/);
238
+ if (erMatch) {
239
+ const [, from, connector, to] = erMatch;
240
+ const relationship = this.interpretERConnector(connector);
241
+ return `${from} ${relationship} ${to}`;
242
+ }
243
+ }
244
+ if (diagramType === 'state') {
245
+ // State diagram: state1 --> state2 (but not [*] --> state)
246
+ const stateMatch = line.match(/^(?!\s*\[\*\])\s*(\w+)\s*-->\s*(\w+)$/);
247
+ if (stateMatch) {
248
+ const [, from, to] = stateMatch;
249
+ return `${from} transitions to ${to}`;
250
+ }
251
+ }
252
+ if (diagramType === 'flowchart') {
253
+ // Flowchart/Graph edges with labels: A --> B | label | (not A -->|label| B)
254
+ const flowchartLabelMatch = line.match(/(\w+(?:\[[^\]]+\]|\{[^}]+\}|\(\([^)]+\)\)|\([^)]+\))?)\s*(-->|---|-.->|-\.-|->|--)\s+(\w+(?:\[[^\]]+\]|\{[^}]+\}|\(\([^)]+\)\)|\([^)]+\))?)\s*\|\s*([^|]+)\s*\|/);
255
+ if (flowchartLabelMatch) {
256
+ const [, fromNode, connector, toNode, edgeLabel] = flowchartLabelMatch;
257
+ const from = this.extractNodeLabel(fromNode, nodeLabelMap);
258
+ const to = this.extractNodeLabel(toNode, nodeLabelMap);
259
+ const relationship = this.interpretConnector(connector);
260
+ return `${from} ${relationship} ${to} (${edgeLabel.trim()})`;
261
+ }
262
+ // Alternative label format: A -->|label| B (handle nodes with spaces in labels)
263
+ const flowchartLabelMatch2 = line.match(/(\w+(?:\[[^\]]+\]|\{[^}]+\}|\(\([^)]+\)\)|\([^)]+\))?)\s*(-->|---|-.->|-\.-|->|--)\|([^|]+)\|\s*(.+)/);
264
+ if (flowchartLabelMatch2) {
265
+ const [, fromNode, connector, edgeLabel, toNode] = flowchartLabelMatch2;
266
+ const from = this.extractNodeLabel(fromNode, nodeLabelMap);
267
+ const to = this.extractNodeLabel(toNode.trim(), nodeLabelMap);
268
+ const relationship = this.interpretConnector(connector);
269
+ return `${from} ${relationship} ${to} (${edgeLabel.trim()})`;
270
+ }
271
+ // Flowchart/Graph edges: A --> B, A --- B, A -.-> B, etc. (with optional node styling)
272
+ const flowchartEdgeMatch = line.match(/(\w+(?:\[[^\]]+\]|\{[^}]+\}|\(\([^)]+\)\)|\([^)]+\))?)\s*(-->|---|-.->|-\.-|->|--)\s*(.+)/);
273
+ if (flowchartEdgeMatch) {
274
+ const [, fromNode, connector, toNode] = flowchartEdgeMatch;
275
+ // Skip if this looks like a label format (contains |)
276
+ if (toNode.includes('|')) {
277
+ return null;
278
+ }
279
+ const from = this.extractNodeLabel(fromNode, nodeLabelMap);
280
+ const to = this.extractNodeLabel(toNode.trim(), nodeLabelMap);
281
+ const relationship = this.interpretConnector(connector);
282
+ return `${from} ${relationship} ${to}`;
283
+ }
284
+ }
285
+ return null;
286
+ }
287
+ /**
288
+ * Interpret flowchart connector symbols
289
+ */
290
+ interpretConnector(connector) {
291
+ if (connector.includes('-->'))
292
+ return 'leads to';
293
+ if (connector.includes('-.->'))
294
+ return 'optionally leads to';
295
+ if (connector.includes('---'))
296
+ return 'connects to';
297
+ if (connector.includes('-.-'))
298
+ return 'optionally connects to';
299
+ return 'relates to';
300
+ }
301
+ /**
302
+ * Interpret class diagram connector symbols
303
+ */
304
+ interpretClassConnector(connector) {
305
+ switch (connector) {
306
+ case '--|>': return 'inherits from';
307
+ case '<|--': return 'is inherited by';
308
+ case '--||': return 'implements';
309
+ case '<|--|>': return 'has bidirectional inheritance with';
310
+ case '*--': return 'composes';
311
+ case '--*': return 'is composed by';
312
+ case 'o--': return 'aggregates';
313
+ case '--o': return 'is aggregated by';
314
+ default: return 'relates to';
315
+ }
316
+ }
317
+ /**
318
+ * Interpret ER diagram connector symbols
319
+ */
320
+ interpretERConnector(connector) {
321
+ switch (connector) {
322
+ case '||--o{': return 'has one-to-many relationship with';
323
+ case '}o--||': return 'has many-to-one relationship with';
324
+ case '||--||': return 'has one-to-one relationship with';
325
+ case 'o{--||': return 'has many-to-one relationship with';
326
+ default: return 'has relationship with';
327
+ }
328
+ }
329
+ }
330
+ //# sourceMappingURL=mermaid.js.map
@@ -0,0 +1,56 @@
1
+ import { Preprocessor } from '../../types.js';
2
+ /**
3
+ * Registry for managing preprocessors and content type detection
4
+ */
5
+ export declare class PreprocessorRegistry {
6
+ private preprocessors;
7
+ /**
8
+ * Register a preprocessor with a given name
9
+ */
10
+ register(name: string, preprocessor: Preprocessor): void;
11
+ /**
12
+ * Get a specific preprocessor by name
13
+ */
14
+ get(name: string): Preprocessor | undefined;
15
+ /**
16
+ * Get all preprocessors that apply to the given language/content type
17
+ */
18
+ getApplicable(language: string): Preprocessor[];
19
+ /**
20
+ * Get all registered preprocessor names
21
+ */
22
+ getRegisteredNames(): string[];
23
+ /**
24
+ * Validate that all required preprocessors are available
25
+ */
26
+ validatePreprocessors(requiredNames: string[]): {
27
+ valid: boolean;
28
+ missing: string[];
29
+ };
30
+ }
31
+ /**
32
+ * Content type detection utilities
33
+ */
34
+ export declare class ContentTypeDetector {
35
+ /**
36
+ * Detect content type from file extension
37
+ */
38
+ static detectFromExtension(filePath: string): string | null;
39
+ /**
40
+ * Detect content type from code fence language identifier
41
+ */
42
+ static detectFromCodeFence(language: string): string;
43
+ /**
44
+ * Detect JSX content in text
45
+ */
46
+ static hasJsxContent(content: string): boolean;
47
+ /**
48
+ * Detect Mermaid diagram content
49
+ */
50
+ static hasMermaidContent(content: string): boolean;
51
+ /**
52
+ * Extract code fence language from a code block
53
+ */
54
+ static extractCodeFenceLanguage(codeBlock: string): string | null;
55
+ }
56
+ //# sourceMappingURL=registry.d.ts.map
@@ -0,0 +1,180 @@
1
+ /**
2
+ * Registry for managing preprocessors and content type detection
3
+ */
4
+ export class PreprocessorRegistry {
5
+ preprocessors = new Map();
6
+ /**
7
+ * Register a preprocessor with a given name
8
+ */
9
+ register(name, preprocessor) {
10
+ this.preprocessors.set(name, preprocessor);
11
+ }
12
+ /**
13
+ * Get a specific preprocessor by name
14
+ */
15
+ get(name) {
16
+ return this.preprocessors.get(name);
17
+ }
18
+ /**
19
+ * Get all preprocessors that apply to the given language/content type
20
+ */
21
+ getApplicable(language) {
22
+ const applicable = [];
23
+ const preprocessors = Array.from(this.preprocessors.values());
24
+ for (const preprocessor of preprocessors) {
25
+ if (preprocessor.appliesTo(language)) {
26
+ applicable.push(preprocessor);
27
+ }
28
+ }
29
+ return applicable;
30
+ }
31
+ /**
32
+ * Get all registered preprocessor names
33
+ */
34
+ getRegisteredNames() {
35
+ return Array.from(this.preprocessors.keys());
36
+ }
37
+ /**
38
+ * Validate that all required preprocessors are available
39
+ */
40
+ validatePreprocessors(requiredNames) {
41
+ const missing = requiredNames.filter(name => !this.preprocessors.has(name));
42
+ return {
43
+ valid: missing.length === 0,
44
+ missing
45
+ };
46
+ }
47
+ }
48
+ /**
49
+ * Content type detection utilities
50
+ */
51
+ export class ContentTypeDetector {
52
+ /**
53
+ * Detect content type from file extension
54
+ */
55
+ static detectFromExtension(filePath) {
56
+ const extension = filePath.toLowerCase().split('.').pop();
57
+ switch (extension) {
58
+ case 'mdx':
59
+ return 'mdx';
60
+ case 'md':
61
+ return 'markdown';
62
+ case 'js':
63
+ case 'jsx':
64
+ return 'javascript';
65
+ case 'ts':
66
+ case 'tsx':
67
+ return 'typescript';
68
+ case 'py':
69
+ return 'python';
70
+ case 'java':
71
+ return 'java';
72
+ case 'cpp':
73
+ case 'cc':
74
+ case 'cxx':
75
+ return 'cpp';
76
+ case 'c':
77
+ return 'c';
78
+ case 'cs':
79
+ return 'csharp';
80
+ case 'php':
81
+ return 'php';
82
+ case 'rb':
83
+ return 'ruby';
84
+ case 'go':
85
+ return 'go';
86
+ case 'rs':
87
+ return 'rust';
88
+ case 'swift':
89
+ return 'swift';
90
+ case 'kt':
91
+ return 'kotlin';
92
+ case 'scala':
93
+ return 'scala';
94
+ case 'sh':
95
+ case 'bash':
96
+ return 'bash';
97
+ case 'ps1':
98
+ return 'powershell';
99
+ case 'sql':
100
+ return 'sql';
101
+ case 'html':
102
+ return 'html';
103
+ case 'css':
104
+ return 'css';
105
+ case 'json':
106
+ return 'json';
107
+ case 'xml':
108
+ return 'xml';
109
+ case 'yaml':
110
+ case 'yml':
111
+ return 'yaml';
112
+ default:
113
+ return null;
114
+ }
115
+ }
116
+ /**
117
+ * Detect content type from code fence language identifier
118
+ */
119
+ static detectFromCodeFence(language) {
120
+ // Normalize language identifier
121
+ const normalized = language.toLowerCase().trim();
122
+ // Handle common aliases
123
+ switch (normalized) {
124
+ case 'js':
125
+ return 'javascript';
126
+ case 'ts':
127
+ return 'typescript';
128
+ case 'py':
129
+ return 'python';
130
+ case 'sh':
131
+ case 'shell':
132
+ return 'bash';
133
+ case 'yml':
134
+ return 'yaml';
135
+ default:
136
+ return normalized;
137
+ }
138
+ }
139
+ /**
140
+ * Detect JSX content in text
141
+ */
142
+ static hasJsxContent(content) {
143
+ // Look for JSX patterns: <Component>, <div className=, etc.
144
+ const jsxPatterns = [
145
+ /<[A-Z][a-zA-Z0-9]*(?:\s[^>]*)?\/?>/, // Component tags
146
+ /<[a-z]+\s+className=/, // className attribute
147
+ /\{[^}]*\}/, // JSX expressions
148
+ /import\s+.*\s+from\s+['"][^'"]*['"]/, // ES6 imports
149
+ /export\s+(default\s+)?/ // ES6 exports
150
+ ];
151
+ return jsxPatterns.some(pattern => pattern.test(content));
152
+ }
153
+ /**
154
+ * Detect Mermaid diagram content
155
+ */
156
+ static hasMermaidContent(content) {
157
+ // Look for mermaid code blocks or mermaid keywords
158
+ const mermaidPatterns = [
159
+ /```mermaid/i,
160
+ /graph\s+(TD|TB|BT|RL|LR)/i,
161
+ /flowchart\s+(TD|TB|BT|RL|LR)/i,
162
+ /sequenceDiagram/i,
163
+ /classDiagram/i,
164
+ /stateDiagram/i,
165
+ /erDiagram/i,
166
+ /journey/i,
167
+ /gantt/i,
168
+ /pie\s+title/i
169
+ ];
170
+ return mermaidPatterns.some(pattern => pattern.test(content));
171
+ }
172
+ /**
173
+ * Extract code fence language from a code block
174
+ */
175
+ static extractCodeFenceLanguage(codeBlock) {
176
+ const match = codeBlock.match(/^```(\w+)/);
177
+ return match ? this.detectFromCodeFence(match[1]) : null;
178
+ }
179
+ }
180
+ //# sourceMappingURL=registry.js.map
@@ -0,0 +1,49 @@
1
+ import '../dom-polyfills.js';
2
+ import type { SearchResult, RerankFunction } from '../core/types.js';
3
+ /**
4
+ * Embedding-based reranker for improving search result quality
5
+ * Uses embedding similarity to rerank initial vector search results
6
+ */
7
+ export declare class CrossEncoderReranker {
8
+ private model;
9
+ private tokenizer;
10
+ private modelName;
11
+ /**
12
+ * Ensure DOM polyfills are set up for transformers.js
13
+ */
14
+ private ensurePolyfills;
15
+ /**
16
+ * Load the embedding model
17
+ */
18
+ loadModel(): Promise<void>;
19
+ /**
20
+ * Try to load a specific model
21
+ */
22
+ private tryLoadModel;
23
+ /**
24
+ * Rerank search results using embedding similarity scoring
25
+ * @param query - Original search query
26
+ * @param results - Initial search results from vector search
27
+ * @returns Promise resolving to reranked results
28
+ */
29
+ rerank(query: string, results: SearchResult[]): Promise<SearchResult[]>;
30
+ /**
31
+ * Check if the model is loaded
32
+ */
33
+ isLoaded(): boolean;
34
+ /**
35
+ * Get the model name being used
36
+ */
37
+ getModelName(): string;
38
+ /**
39
+ * Compute cosine similarity between two Float32Array embeddings
40
+ */
41
+ private cosineSimilarity;
42
+ /**
43
+ * Simple text-based reranking using keyword matching and text similarity
44
+ * This is a fallback when embedding similarity doesn't provide good discrimination
45
+ */
46
+ private simpleTextReranking;
47
+ }
48
+ export declare function createTextRerankFunction(modelName?: string): RerankFunction;
49
+ //# sourceMappingURL=reranker.d.ts.map