rag-lite-ts 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. package/dist/{core → cjs/core}/model-validator.js +1 -1
  2. package/dist/{core → cjs/core}/vector-index.js +4 -2
  3. package/dist/esm/api-errors.d.ts +90 -0
  4. package/dist/esm/api-errors.js +320 -0
  5. package/dist/esm/cli/indexer.d.ts +11 -0
  6. package/dist/esm/cli/indexer.js +471 -0
  7. package/dist/esm/cli/search.d.ts +7 -0
  8. package/dist/esm/cli/search.js +332 -0
  9. package/dist/esm/cli.d.ts +3 -0
  10. package/dist/esm/cli.js +529 -0
  11. package/dist/esm/config.d.ts +51 -0
  12. package/dist/esm/config.js +79 -0
  13. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  14. package/dist/esm/core/abstract-embedder.js +264 -0
  15. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  16. package/dist/esm/core/actionable-error-messages.js +397 -0
  17. package/dist/esm/core/adapters.d.ts +93 -0
  18. package/dist/esm/core/adapters.js +139 -0
  19. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  20. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  21. package/dist/esm/core/binary-index-format.d.ts +78 -0
  22. package/dist/esm/core/binary-index-format.js +291 -0
  23. package/dist/esm/core/chunker.d.ts +119 -0
  24. package/dist/esm/core/chunker.js +73 -0
  25. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  26. package/dist/esm/core/cli-database-utils.js +239 -0
  27. package/dist/esm/core/config.d.ts +102 -0
  28. package/dist/esm/core/config.js +247 -0
  29. package/dist/esm/core/content-errors.d.ts +111 -0
  30. package/dist/esm/core/content-errors.js +362 -0
  31. package/dist/esm/core/content-manager.d.ts +335 -0
  32. package/dist/esm/core/content-manager.js +1476 -0
  33. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  34. package/dist/esm/core/content-performance-optimizer.js +516 -0
  35. package/dist/esm/core/content-resolver.d.ts +104 -0
  36. package/dist/esm/core/content-resolver.js +285 -0
  37. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  38. package/dist/esm/core/cross-modal-search.js +342 -0
  39. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  40. package/dist/esm/core/database-connection-manager.js +310 -0
  41. package/dist/esm/core/db.d.ts +213 -0
  42. package/dist/esm/core/db.js +895 -0
  43. package/dist/esm/core/embedder-factory.d.ts +154 -0
  44. package/dist/esm/core/embedder-factory.js +311 -0
  45. package/dist/esm/core/error-handler.d.ts +112 -0
  46. package/dist/esm/core/error-handler.js +239 -0
  47. package/dist/esm/core/index.d.ts +59 -0
  48. package/dist/esm/core/index.js +69 -0
  49. package/dist/esm/core/ingestion.d.ts +202 -0
  50. package/dist/esm/core/ingestion.js +901 -0
  51. package/dist/esm/core/interfaces.d.ts +408 -0
  52. package/dist/esm/core/interfaces.js +106 -0
  53. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  54. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  55. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  56. package/dist/esm/core/mode-detection-service.js +565 -0
  57. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  58. package/dist/esm/core/mode-model-validator.js +203 -0
  59. package/dist/esm/core/model-registry.d.ts +116 -0
  60. package/dist/esm/core/model-registry.js +411 -0
  61. package/dist/esm/core/model-validator.d.ts +217 -0
  62. package/dist/esm/core/model-validator.js +782 -0
  63. package/dist/esm/core/path-manager.d.ts +47 -0
  64. package/dist/esm/core/path-manager.js +71 -0
  65. package/dist/esm/core/raglite-paths.d.ts +121 -0
  66. package/dist/esm/core/raglite-paths.js +145 -0
  67. package/dist/esm/core/reranking-config.d.ts +42 -0
  68. package/dist/esm/core/reranking-config.js +147 -0
  69. package/dist/esm/core/reranking-factory.d.ts +92 -0
  70. package/dist/esm/core/reranking-factory.js +410 -0
  71. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  72. package/dist/esm/core/reranking-strategies.js +650 -0
  73. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  74. package/dist/esm/core/resource-cleanup.js +371 -0
  75. package/dist/esm/core/resource-manager.d.ts +212 -0
  76. package/dist/esm/core/resource-manager.js +564 -0
  77. package/dist/esm/core/search-pipeline.d.ts +111 -0
  78. package/dist/esm/core/search-pipeline.js +287 -0
  79. package/dist/esm/core/search.d.ts +141 -0
  80. package/dist/esm/core/search.js +320 -0
  81. package/dist/esm/core/streaming-operations.d.ts +145 -0
  82. package/dist/esm/core/streaming-operations.js +409 -0
  83. package/dist/esm/core/types.d.ts +66 -0
  84. package/dist/esm/core/types.js +6 -0
  85. package/dist/esm/core/universal-embedder.d.ts +177 -0
  86. package/dist/esm/core/universal-embedder.js +139 -0
  87. package/dist/esm/core/validation-messages.d.ts +99 -0
  88. package/dist/esm/core/validation-messages.js +334 -0
  89. package/dist/esm/core/vector-index.d.ts +72 -0
  90. package/dist/esm/core/vector-index.js +333 -0
  91. package/dist/esm/dom-polyfills.d.ts +6 -0
  92. package/dist/esm/dom-polyfills.js +37 -0
  93. package/dist/esm/factories/index.d.ts +27 -0
  94. package/dist/esm/factories/index.js +29 -0
  95. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  96. package/dist/esm/factories/ingestion-factory.js +477 -0
  97. package/dist/esm/factories/search-factory.d.ts +154 -0
  98. package/dist/esm/factories/search-factory.js +344 -0
  99. package/dist/esm/file-processor.d.ts +147 -0
  100. package/dist/esm/file-processor.js +963 -0
  101. package/dist/esm/index-manager.d.ts +116 -0
  102. package/dist/esm/index-manager.js +598 -0
  103. package/dist/esm/index.d.ts +75 -0
  104. package/dist/esm/index.js +110 -0
  105. package/dist/esm/indexer.d.ts +7 -0
  106. package/dist/esm/indexer.js +54 -0
  107. package/dist/esm/ingestion.d.ts +63 -0
  108. package/dist/esm/ingestion.js +124 -0
  109. package/dist/esm/mcp-server.d.ts +46 -0
  110. package/dist/esm/mcp-server.js +1820 -0
  111. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  112. package/dist/esm/multimodal/clip-embedder.js +996 -0
  113. package/dist/esm/multimodal/index.d.ts +6 -0
  114. package/dist/esm/multimodal/index.js +6 -0
  115. package/dist/esm/preprocess.d.ts +19 -0
  116. package/dist/esm/preprocess.js +203 -0
  117. package/dist/esm/preprocessors/index.d.ts +17 -0
  118. package/dist/esm/preprocessors/index.js +38 -0
  119. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  120. package/dist/esm/preprocessors/mdx.js +101 -0
  121. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  122. package/dist/esm/preprocessors/mermaid.js +329 -0
  123. package/dist/esm/preprocessors/registry.d.ts +56 -0
  124. package/dist/esm/preprocessors/registry.js +179 -0
  125. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  126. package/dist/esm/run-error-recovery-tests.js +101 -0
  127. package/dist/esm/search-standalone.d.ts +7 -0
  128. package/dist/esm/search-standalone.js +117 -0
  129. package/dist/esm/search.d.ts +99 -0
  130. package/dist/esm/search.js +177 -0
  131. package/dist/esm/test-utils.d.ts +18 -0
  132. package/dist/esm/test-utils.js +27 -0
  133. package/dist/esm/text/chunker.d.ts +33 -0
  134. package/dist/esm/text/chunker.js +279 -0
  135. package/dist/esm/text/embedder.d.ts +111 -0
  136. package/dist/esm/text/embedder.js +386 -0
  137. package/dist/esm/text/index.d.ts +8 -0
  138. package/dist/esm/text/index.js +9 -0
  139. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  140. package/dist/esm/text/preprocessors/index.js +38 -0
  141. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  142. package/dist/esm/text/preprocessors/mdx.js +101 -0
  143. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  144. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  145. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  146. package/dist/esm/text/preprocessors/registry.js +180 -0
  147. package/dist/esm/text/reranker.d.ts +49 -0
  148. package/dist/esm/text/reranker.js +274 -0
  149. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  150. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  151. package/dist/esm/text/tokenizer.d.ts +22 -0
  152. package/dist/esm/text/tokenizer.js +64 -0
  153. package/dist/esm/types.d.ts +83 -0
  154. package/dist/esm/types.js +3 -0
  155. package/dist/esm/utils/vector-math.d.ts +31 -0
  156. package/dist/esm/utils/vector-math.js +70 -0
  157. package/package.json +30 -12
  158. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  159. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  160. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  161. /package/dist/{cli → cjs/cli}/indexer.js +0 -0
  162. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  163. /package/dist/{cli → cjs/cli}/search.js +0 -0
  164. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  165. /package/dist/{cli.js → cjs/cli.js} +0 -0
  166. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  167. /package/dist/{config.js → cjs/config.js} +0 -0
  168. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  169. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  170. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  171. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  172. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  173. /package/dist/{core → cjs/core}/adapters.js +0 -0
  174. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  175. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  176. /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
  177. /package/dist/{core → cjs/core}/binary-index-format.js +0 -0
  178. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  179. /package/dist/{core → cjs/core}/chunker.js +0 -0
  180. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  181. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  182. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  183. /package/dist/{core → cjs/core}/config.js +0 -0
  184. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  185. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  186. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  187. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  188. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  189. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  190. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  191. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  192. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  193. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  194. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  195. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  196. /package/dist/{core → cjs/core}/db.d.ts +0 -0
  197. /package/dist/{core → cjs/core}/db.js +0 -0
  198. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  199. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  200. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  202. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/index.js +0 -0
  204. /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/ingestion.js +0 -0
  206. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  208. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  210. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  211. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  212. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  213. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  214. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  215. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  216. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  217. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  218. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  219. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  220. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  221. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  222. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  223. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  224. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  225. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  226. /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
  227. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  229. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  231. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/search-pipeline.js +0 -0
  233. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/search.js +0 -0
  235. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  236. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  237. /package/dist/{core → cjs/core}/types.d.ts +0 -0
  238. /package/dist/{core → cjs/core}/types.js +0 -0
  239. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  240. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  241. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  242. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  243. /package/dist/{core → cjs/core}/vector-index.d.ts +0 -0
  244. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  245. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  246. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  247. /package/dist/{factories → cjs/factories}/index.js +0 -0
  248. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  249. /package/dist/{factories → cjs/factories}/ingestion-factory.js +0 -0
  250. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  251. /package/dist/{factories → cjs/factories}/search-factory.js +0 -0
  252. /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
  253. /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
  254. /package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +0 -0
  255. /package/dist/{index-manager.js → cjs/index-manager.js} +0 -0
  256. /package/dist/{index.d.ts → cjs/index.d.ts} +0 -0
  257. /package/dist/{index.js → cjs/index.js} +0 -0
  258. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  259. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  260. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  261. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  262. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  263. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  264. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  265. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
  266. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  267. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  268. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  269. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  270. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  271. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  272. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  273. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  274. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  275. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  276. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  277. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  278. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  279. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  280. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  281. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  282. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  283. /package/dist/{search.js → cjs/search.js} +0 -0
  284. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  285. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  286. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  287. /package/dist/{text → cjs/text}/chunker.js +0 -0
  288. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  289. /package/dist/{text → cjs/text}/embedder.js +0 -0
  290. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  291. /package/dist/{text → cjs/text}/index.js +0 -0
  292. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  293. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  294. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  295. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  296. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  297. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  298. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  299. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  300. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  301. /package/dist/{text → cjs/text}/reranker.js +0 -0
  302. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  303. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  304. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  305. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  306. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  307. /package/dist/{types.js → cjs/types.js} +0 -0
  308. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  309. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,386 @@
1
+ import '../dom-polyfills.js';
2
+ import { createHash } from 'crypto';
3
+ import { config } from '../core/config.js';
4
+ import { handleError, ErrorCategory, ErrorSeverity, safeExecute } from '../core/error-handler.js';
5
+ import { createModelLoadingError, createInvalidContentError, createMissingDependencyError } from '../core/actionable-error-messages.js';
6
+ /**
7
+ * List of supported embedding models
8
+ */
9
+ const SUPPORTED_MODELS = [
10
+ 'sentence-transformers/all-MiniLM-L6-v2',
11
+ 'Xenova/all-mpnet-base-v2'
12
+ ];
13
+ /**
14
+ * Embedding engine using transformers.js for generating embeddings
15
+ */
16
+ export class EmbeddingEngine {
17
+ model = null;
18
+ modelVersion = null;
19
+ modelName;
20
+ batchSize;
21
+ constructor(modelName, batchSize) {
22
+ this.modelName = modelName || config.embedding_model;
23
+ this.batchSize = batchSize || config.batch_size;
24
+ // Validate that the model is supported
25
+ if (!SUPPORTED_MODELS.includes(this.modelName)) {
26
+ throw createModelLoadingError(this.modelName, `Model not in supported list. Supported models: ${SUPPORTED_MODELS.join(', ')}`, { operationContext: 'EmbeddingEngine constructor' });
27
+ }
28
+ console.log(`🤖 EmbeddingEngine initialized with model: ${this.modelName}, batchSize: ${this.batchSize}`);
29
+ }
30
+ /**
31
+ * Load the embedding model
32
+ * @throws {Error} If model loading fails
33
+ */
34
+ async loadModel() {
35
+ await safeExecute(async () => {
36
+ console.log(`Loading embedding model: ${this.modelName}`);
37
+ // Ensure DOM polyfills are set up before importing transformers
38
+ if (typeof globalThis.self === 'undefined') {
39
+ globalThis.self = globalThis;
40
+ }
41
+ if (typeof global.self === 'undefined') {
42
+ global.self = global;
43
+ }
44
+ // Additional polyfills that might be needed
45
+ if (typeof globalThis.window === 'undefined') {
46
+ globalThis.window = {};
47
+ }
48
+ if (typeof globalThis.document === 'undefined') {
49
+ globalThis.document = {};
50
+ }
51
+ console.log('Embedder polyfills set up. self is now:', typeof self !== 'undefined' ? 'defined' : 'undefined');
52
+ // Initialize the feature extraction pipeline using dynamic import
53
+ // Let transformers.js handle model caching automatically
54
+ try {
55
+ const { pipeline } = await import('@huggingface/transformers');
56
+ this.model = await pipeline('feature-extraction', this.modelName, {
57
+ cache_dir: config.model_cache_path,
58
+ local_files_only: false,
59
+ dtype: 'fp32' // Explicitly specify dtype to suppress warning
60
+ });
61
+ }
62
+ catch (error) {
63
+ // Enhanced error handling for model download failures
64
+ if (error instanceof Error && (error.message.includes('network') ||
65
+ error.message.includes('download') ||
66
+ error.message.includes('fetch') ||
67
+ error.message.includes('ENOTFOUND') ||
68
+ error.message.includes('ECONNREFUSED') ||
69
+ error.message.includes('timeout'))) {
70
+ throw new Error(`Failed to download model '${this.modelName}'. ` +
71
+ `Check your internet connection or see models/README.md for offline setup instructions.`);
72
+ }
73
+ throw error;
74
+ }
75
+ // Generate model version hash
76
+ this.modelVersion = this.generateModelVersion();
77
+ console.log(`Model loaded successfully. Version: ${this.modelVersion}`);
78
+ }, 'Model Loading', {
79
+ category: ErrorCategory.MODEL,
80
+ severity: ErrorSeverity.FATAL,
81
+ exitCode: 6
82
+ });
83
+ }
84
+ /**
85
+ * Generate embeddings for a batch of texts
86
+ * @param texts - Array of text strings to embed
87
+ * @returns Promise resolving to array of embedding results
88
+ */
89
+ async embedBatch(texts) {
90
+ if (!this.model) {
91
+ throw createMissingDependencyError('model', 'object', {
92
+ operationContext: 'embedBatch',
93
+ includeTroubleshooting: true
94
+ });
95
+ }
96
+ if (texts.length === 0) {
97
+ return [];
98
+ }
99
+ // Split into smaller batches based on configured batch size
100
+ const results = [];
101
+ for (let i = 0; i < texts.length; i += this.batchSize) {
102
+ const batch = texts.slice(i, i + this.batchSize);
103
+ const batchResults = await this.processBatchWithErrorHandling(batch, i);
104
+ results.push(...batchResults);
105
+ }
106
+ return results;
107
+ }
108
+ /**
109
+ * Process a single batch with error handling for individual chunks
110
+ * @param batch - Array of text strings in this batch
111
+ * @param startIndex - Starting index for this batch in the original array
112
+ * @returns Promise resolving to array of embedding results
113
+ */
114
+ async processBatchWithErrorHandling(batch, startIndex) {
115
+ return await safeExecute(async () => {
116
+ // Try to process the entire batch first
117
+ const embeddings = await this.model(batch, {
118
+ pooling: 'mean',
119
+ normalize: true
120
+ });
121
+ // Convert to EmbeddingResult format
122
+ const results = [];
123
+ const embeddingData = embeddings.tolist();
124
+ for (let i = 0; i < batch.length; i++) {
125
+ const embedding_id = this.generateEmbeddingId(batch[i], startIndex + i);
126
+ const vector = new Float32Array(embeddingData[i]);
127
+ results.push({
128
+ embedding_id,
129
+ vector,
130
+ contentType: 'text'
131
+ });
132
+ }
133
+ return results;
134
+ }, `Batch Embedding (${batch.length} chunks)`, {
135
+ category: ErrorCategory.EMBEDDING,
136
+ severity: ErrorSeverity.ERROR,
137
+ skipError: true,
138
+ fallbackValue: []
139
+ }) || await this.fallbackToIndividualProcessing(batch, startIndex);
140
+ }
141
+ /**
142
+ * Fallback to individual chunk processing when batch fails
143
+ */
144
+ async fallbackToIndividualProcessing(batch, startIndex) {
145
+ handleError(`Batch processing failed for ${batch.length} chunks, falling back to individual processing`, 'Embedding Batch Processing', {
146
+ category: ErrorCategory.EMBEDDING,
147
+ severity: ErrorSeverity.WARNING,
148
+ skipError: true
149
+ });
150
+ const results = [];
151
+ for (let i = 0; i < batch.length; i++) {
152
+ const singleResult = await safeExecute(() => this.processSingleChunk(batch[i], startIndex + i), `Individual Chunk Embedding (${startIndex + i})`, {
153
+ category: ErrorCategory.EMBEDDING,
154
+ severity: ErrorSeverity.WARNING,
155
+ skipError: true
156
+ });
157
+ if (singleResult) {
158
+ results.push(singleResult);
159
+ }
160
+ }
161
+ return results;
162
+ }
163
+ /**
164
+ * Process a single chunk with error handling
165
+ * @param text - Text to embed
166
+ * @param index - Index of this chunk
167
+ * @returns Promise resolving to embedding result or null if failed
168
+ */
169
+ async processSingleChunk(text, index) {
170
+ try {
171
+ const embeddings = await this.model([text], {
172
+ pooling: 'mean',
173
+ normalize: true
174
+ });
175
+ const embeddingData = embeddings.tolist();
176
+ const embedding_id = this.generateEmbeddingId(text, index);
177
+ const vector = new Float32Array(embeddingData[0]);
178
+ return {
179
+ embedding_id,
180
+ vector,
181
+ contentType: 'text'
182
+ };
183
+ }
184
+ catch (error) {
185
+ // Return null to indicate failure
186
+ throw error;
187
+ }
188
+ }
189
+ /**
190
+ * Generate embedding for a single text
191
+ * @param text - Text string to embed
192
+ * @returns Promise resolving to embedding result
193
+ */
194
+ async embedSingle(text) {
195
+ const results = await this.embedBatch([text]);
196
+ if (results.length === 0) {
197
+ throw createInvalidContentError('text', 'empty', {
198
+ operationContext: 'embedText'
199
+ });
200
+ }
201
+ return results[0];
202
+ }
203
+ /**
204
+ * Generate embeddings for document chunks with progress logging
205
+ * Optimized for large document ingestion with batch processing
206
+ * @param chunks - Array of text chunks from documents
207
+ * @returns Promise resolving to array of embedding results
208
+ */
209
+ async embedDocumentBatch(chunks) {
210
+ if (!this.model) {
211
+ throw new Error('Model not loaded. Call loadModel() first.');
212
+ }
213
+ if (chunks.length === 0) {
214
+ return [];
215
+ }
216
+ console.log(`Processing ${chunks.length} chunk${chunks.length === 1 ? '' : 's'} in batches of ${this.batchSize}...`);
217
+ const results = [];
218
+ const totalBatches = Math.ceil(chunks.length / this.batchSize);
219
+ let processedChunks = 0;
220
+ let skippedChunks = 0;
221
+ for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) {
222
+ const startIdx = batchIndex * this.batchSize;
223
+ const endIdx = Math.min(startIdx + this.batchSize, chunks.length);
224
+ const batch = chunks.slice(startIdx, endIdx);
225
+ try {
226
+ const batchResults = await this.processBatchWithErrorHandling(batch, startIdx);
227
+ results.push(...batchResults);
228
+ processedChunks += batchResults.length;
229
+ skippedChunks += (batch.length - batchResults.length);
230
+ // Progress logging - more frequent updates for better user experience
231
+ const progressInterval = Math.max(1, Math.floor(totalBatches / 20)); // Show progress every 5%
232
+ if ((batchIndex + 1) % progressInterval === 0 || batchIndex === totalBatches - 1) {
233
+ const percentage = Math.round(((batchIndex + 1) / totalBatches) * 100);
234
+ console.log(`Processed ${processedChunks} of ${chunks.length} chunks (${percentage}%)${skippedChunks > 0 ? ` - ${skippedChunks} skipped` : ''}`);
235
+ }
236
+ }
237
+ catch (error) {
238
+ console.error(`Failed to process batch ${batchIndex + 1}/${totalBatches}:`, error instanceof Error ? error.message : String(error));
239
+ skippedChunks += batch.length;
240
+ }
241
+ }
242
+ if (skippedChunks > 0) {
243
+ console.log(`✓ Embedding complete: ${processedChunks} successful, ${skippedChunks} skipped due to errors`);
244
+ }
245
+ else {
246
+ console.log(`✓ Embedding complete: ${processedChunks} chunks processed successfully`);
247
+ }
248
+ return results;
249
+ }
250
+ /**
251
+ * Get the current model version identifier
252
+ * @returns Model version string
253
+ */
254
+ getModelVersion() {
255
+ if (!this.modelVersion) {
256
+ throw new Error('Model not loaded. Call loadModel() first.');
257
+ }
258
+ return this.modelVersion;
259
+ }
260
+ /**
261
+ * Check if the model is loaded
262
+ * @returns True if model is loaded
263
+ */
264
+ isLoaded() {
265
+ return this.model !== null;
266
+ }
267
+ /**
268
+ * Get the model name
269
+ * @returns Model name string
270
+ */
271
+ getModelName() {
272
+ return this.modelName;
273
+ }
274
+ /**
275
+ * Get the batch size
276
+ * @returns Batch size number
277
+ */
278
+ getBatchSize() {
279
+ return this.batchSize;
280
+ }
281
+ /**
282
+ * Generate a deterministic model version identifier
283
+ * Uses model name and configuration for consistent versioning
284
+ * @returns Model version string
285
+ */
286
+ generateModelVersion() {
287
+ // Create a deterministic hash based on model name and configuration
288
+ // This ensures the same model configuration always produces the same version
289
+ const configData = JSON.stringify({
290
+ model: this.modelName,
291
+ // Add other relevant config that affects embeddings
292
+ quantized: false,
293
+ revision: 'main'
294
+ });
295
+ const hash = createHash('sha256').update(configData).digest('hex').substring(0, 16);
296
+ return `${this.modelName.replace('/', '_')}_${hash}`;
297
+ }
298
+ /**
299
+ * Generate a deterministic embedding ID for a text chunk
300
+ * @param text - The text content
301
+ * @param index - Index in the batch
302
+ * @returns Deterministic embedding ID
303
+ */
304
+ generateEmbeddingId(text, index) {
305
+ // Create deterministic ID based on content hash only
306
+ // This ensures the same text always gets the same ID regardless of processing order
307
+ const contentHash = createHash('sha256').update(text.trim()).digest('hex');
308
+ return contentHash.substring(0, 32);
309
+ }
310
+ }
311
+ /**
312
+ * Singleton instance for the embedding engine
313
+ * Ensures model is loaded only once across the application
314
+ */
315
+ let embeddingEngineInstance = null;
316
+ /**
317
+ * Get the singleton embedding engine instance
318
+ * @param modelName - Optional model name override
319
+ * @param batchSize - Optional batch size override
320
+ * @returns EmbeddingEngine instance
321
+ */
322
+ export function getEmbeddingEngine(modelName, batchSize) {
323
+ // Always create a new instance if specific parameters are provided
324
+ // This ensures we don't use cached instances with wrong configuration
325
+ if (modelName || batchSize) {
326
+ embeddingEngineInstance = new EmbeddingEngine(modelName, batchSize);
327
+ }
328
+ else if (!embeddingEngineInstance) {
329
+ embeddingEngineInstance = new EmbeddingEngine();
330
+ }
331
+ return embeddingEngineInstance;
332
+ }
333
+ /**
334
+ * Initialize the embedding engine and load the model
335
+ * @param modelName - Optional model name override
336
+ * @param batchSize - Optional batch size override
337
+ * @returns Promise resolving to the loaded embedding engine
338
+ */
339
+ export async function initializeEmbeddingEngine(modelName, batchSize) {
340
+ const engine = getEmbeddingEngine(modelName, batchSize);
341
+ if (!engine.isLoaded()) {
342
+ await engine.loadModel();
343
+ }
344
+ return engine;
345
+ }
346
+ /**
347
+
348
+ * Create an EmbedFunction implementation using the text embedding engine
349
+ * This function implements the core EmbedFunction interface for dependency injection
350
+ * @param modelName - Optional model name override
351
+ * @param batchSize - Optional batch size override
352
+ * @returns EmbedFunction that can be injected into core components
353
+ */
354
+ export function createTextEmbedFunction(modelName, batchSize) {
355
+ let engine = null;
356
+ const embedFunction = async (query, contentType) => {
357
+ // Only support text content type
358
+ if (contentType && contentType !== 'text') {
359
+ throw new Error(`Text embedder only supports 'text' content type, got: ${contentType}`);
360
+ }
361
+ // Initialize engine if not already done
362
+ if (!engine) {
363
+ engine = await initializeEmbeddingEngine(modelName, batchSize);
364
+ }
365
+ // Use the existing embedSingle method
366
+ const result = await engine.embedSingle(query);
367
+ // Ensure contentType is present (should already be included from embedSingle)
368
+ return {
369
+ ...result,
370
+ contentType: result.contentType || 'text'
371
+ };
372
+ };
373
+ return embedFunction;
374
+ }
375
+ // =============================================================================
376
+ // REMOVED: createTextEmbedder() factory object
377
+ // =============================================================================
378
+ // The createTextEmbedder() function has been removed as it was redundant.
379
+ // It was just a wrapper around initializeEmbeddingEngine() that provided no
380
+ // additional value over using the engine directly or using createEmbedder().
381
+ //
382
+ // Migration guide:
383
+ // - For public API: Use createEmbedder() from core/embedder-factory.ts
384
+ // - For dependency injection: Use createTextEmbedFunction()
385
+ // - For direct access: Use initializeEmbeddingEngine() or new EmbeddingEngine()
386
+ //# sourceMappingURL=embedder.js.map
@@ -0,0 +1,8 @@
1
+ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction } from './embedder.js';
2
+ export { CrossEncoderReranker, createTextRerankFunction } from './reranker.js';
3
+ export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
4
+ export { chunkDocument, type Chunk, type Document } from '../core/chunker.js';
5
+ export { type ChunkConfig } from '../core/chunker.js';
6
+ export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
7
+ export * from './preprocessors/index.js';
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,9 @@
1
+ // Text implementation layer exports
2
+ export { EmbeddingEngine, getEmbeddingEngine, initializeEmbeddingEngine, createTextEmbedFunction } from './embedder.js';
3
+ export { CrossEncoderReranker, createTextRerankFunction } from './reranker.js';
4
+ export { countTokens, getTokenizer, resetTokenizer } from './tokenizer.js';
5
+ export { chunkDocument } from '../core/chunker.js';
6
+ export { SentenceTransformerEmbedder } from './sentence-transformer-embedder.js';
7
+ // Re-export preprocessors
8
+ export * from './preprocessors/index.js';
9
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,17 @@
1
+ import { PreprocessorRegistry } from './registry.js';
2
+ export { PreprocessorRegistry, ContentTypeDetector } from './registry.js';
3
+ export { MdxPreprocessor } from './mdx.js';
4
+ export { MermaidPreprocessor } from './mermaid.js';
5
+ /**
6
+ * Global preprocessor registry instance
7
+ */
8
+ export declare const preprocessorRegistry: PreprocessorRegistry;
9
+ /**
10
+ * Validate that all required preprocessors are available in the registry
11
+ */
12
+ export declare function validatePreprocessorConfiguration(requiredPreprocessors: string[]): void;
13
+ /**
14
+ * Get all available preprocessor names
15
+ */
16
+ export declare function getAvailablePreprocessors(): string[];
17
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,38 @@
1
+ import { PreprocessorRegistry } from './registry.js';
2
+ import { MdxPreprocessor } from './mdx.js';
3
+ import { MermaidPreprocessor } from './mermaid.js';
4
+ // Export all preprocessor classes
5
+ export { PreprocessorRegistry, ContentTypeDetector } from './registry.js';
6
+ export { MdxPreprocessor } from './mdx.js';
7
+ export { MermaidPreprocessor } from './mermaid.js';
8
+ /**
9
+ * Create and initialize the global preprocessor registry
10
+ */
11
+ function createPreprocessorRegistry() {
12
+ const registry = new PreprocessorRegistry();
13
+ // Register built-in preprocessors
14
+ registry.register('mdx', new MdxPreprocessor());
15
+ registry.register('mermaid', new MermaidPreprocessor());
16
+ return registry;
17
+ }
18
+ /**
19
+ * Global preprocessor registry instance
20
+ */
21
+ export const preprocessorRegistry = createPreprocessorRegistry();
22
+ /**
23
+ * Validate that all required preprocessors are available in the registry
24
+ */
25
+ export function validatePreprocessorConfiguration(requiredPreprocessors) {
26
+ const validation = preprocessorRegistry.validatePreprocessors(requiredPreprocessors);
27
+ if (!validation.valid) {
28
+ const missingList = validation.missing.join(', ');
29
+ throw new Error(`Missing required preprocessors: ${missingList}. Available: ${preprocessorRegistry.getRegisteredNames().join(', ')}`);
30
+ }
31
+ }
32
+ /**
33
+ * Get all available preprocessor names
34
+ */
35
+ export function getAvailablePreprocessors() {
36
+ return preprocessorRegistry.getRegisteredNames();
37
+ }
38
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,25 @@
1
+ import { Preprocessor, PreprocessorOptions } from '../../types.js';
2
+ /**
3
+ * MDX preprocessor for handling JSX content in Markdown files
4
+ * Ports the existing cleanMdxContent logic with mode-aware behavior
5
+ */
6
+ export declare class MdxPreprocessor implements Preprocessor {
7
+ /**
8
+ * Check if this preprocessor applies to the given language/content type
9
+ * Applies to .mdx files and content with JSX syntax
10
+ */
11
+ appliesTo(language: string): boolean;
12
+ /**
13
+ * Process MDX content based on the specified mode
14
+ */
15
+ process(content: string, options: PreprocessorOptions): string;
16
+ /**
17
+ * Strip JSX content entirely - ported from cleanMdxContent logic
18
+ */
19
+ private stripJsx;
20
+ /**
21
+ * Replace JSX with descriptive placeholders
22
+ */
23
+ private replaceWithPlaceholders;
24
+ }
25
+ //# sourceMappingURL=mdx.d.ts.map
@@ -0,0 +1,101 @@
1
+ import { ContentTypeDetector } from './registry.js';
2
+ /**
3
+ * MDX preprocessor for handling JSX content in Markdown files
4
+ * Ports the existing cleanMdxContent logic with mode-aware behavior
5
+ */
6
+ export class MdxPreprocessor {
7
+ /**
8
+ * Check if this preprocessor applies to the given language/content type
9
+ * Applies to .mdx files and content with JSX syntax
10
+ */
11
+ appliesTo(language) {
12
+ return language === 'mdx';
13
+ }
14
+ /**
15
+ * Process MDX content based on the specified mode
16
+ */
17
+ process(content, options) {
18
+ // Only process if content actually contains JSX
19
+ if (!ContentTypeDetector.hasJsxContent(content)) {
20
+ return content;
21
+ }
22
+ switch (options.mode) {
23
+ case 'strip':
24
+ return this.stripJsx(content);
25
+ case 'keep':
26
+ return content; // Keep JSX as-is
27
+ case 'placeholder':
28
+ return this.replaceWithPlaceholders(content);
29
+ default:
30
+ console.log(`Unknown MDX processing mode: ${options.mode}, using placeholder`);
31
+ return this.replaceWithPlaceholders(content);
32
+ }
33
+ }
34
+ /**
35
+ * Strip JSX content entirely - ported from cleanMdxContent logic
36
+ */
37
+ stripJsx(content) {
38
+ let cleaned = content;
39
+ // Remove JSX import statements (requirement 11.1)
40
+ // Matches: import ... from '...' or import ... from "..."
41
+ cleaned = cleaned.replace(/^import\s+.*?from\s+['"][^'"]*['"];?\s*$/gm, '');
42
+ // Remove JSX export statements (requirement 11.2)
43
+ // Handle both single-line and multi-line exports
44
+ // Multi-line function exports: export default function() { ... }
45
+ cleaned = cleaned.replace(/^export\s+default\s+function[^{]*\{[\s\S]*?\n\}\s*$/gm, '');
46
+ // Object exports: export const metadata = { ... }
47
+ cleaned = cleaned.replace(/^export\s+const\s+[^=]*=\s*\{[\s\S]*?\}\s*;?\s*$/gm, '');
48
+ // Single line exports: export const x = ...; or export default ...
49
+ cleaned = cleaned.replace(/^export\s+(?:default\s+)?(?:const|let|var|function|class)\s+[^;{]*;?\s*$/gm, '');
50
+ // Simple exports: export default Component
51
+ cleaned = cleaned.replace(/^export\s+default\s+[^;{]*;?\s*$/gm, '');
52
+ // Remove JSX components (requirements 11.3, 11.4)
53
+ // Self-closing tags: <Component />
54
+ cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*\/>/g, '');
55
+ // Opening and closing tags with content: <Component>content</Component>
56
+ // This handles nested components by replacing the outermost ones first
57
+ let previousLength;
58
+ do {
59
+ previousLength = cleaned.length;
60
+ cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*>.*?<\/[A-Z][a-zA-Z0-9]*>/gs, '');
61
+ } while (cleaned.length !== previousLength);
62
+ // Clean up multiple consecutive newlines and trim
63
+ cleaned = cleaned.replace(/\n\s*\n\s*\n/g, '\n\n').trim();
64
+ // Ensure we never return empty content (requirement 6.4)
65
+ if (!cleaned.trim()) {
66
+ return '[content removed]';
67
+ }
68
+ return cleaned;
69
+ }
70
+ /**
71
+ * Replace JSX with descriptive placeholders
72
+ */
73
+ replaceWithPlaceholders(content) {
74
+ let cleaned = content;
75
+ // Replace JSX import statements
76
+ cleaned = cleaned.replace(/^import\s+.*?from\s+['"][^'"]*['"];?\s*$/gm, '[import removed]');
77
+ // Replace JSX export statements
78
+ // Multi-line function exports: export default function() { ... }
79
+ cleaned = cleaned.replace(/^export\s+default\s+function[^{]*\{[\s\S]*?\n\}\s*$/gm, '[export removed]');
80
+ // Object exports: export const metadata = { ... }
81
+ cleaned = cleaned.replace(/^export\s+const\s+[^=]*=\s*\{[\s\S]*?\}\s*;?\s*$/gm, '[export removed]');
82
+ // Single line exports: export const x = ...; or export default ...
83
+ cleaned = cleaned.replace(/^export\s+(?:default\s+)?(?:const|let|var|function|class)\s+[^;{]*;?\s*$/gm, '[export removed]');
84
+ // Simple exports: export default Component
85
+ cleaned = cleaned.replace(/^export\s+default\s+[^;{]*;?\s*$/gm, '[export removed]');
86
+ // Replace JSX components with placeholder
87
+ // Self-closing tags: <Component />
88
+ cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*\/>/g, '[component removed]');
89
+ // Opening and closing tags with content: <Component>content</Component>
90
+ // This handles nested components by replacing the outermost ones first
91
+ let previousLength;
92
+ do {
93
+ previousLength = cleaned.length;
94
+ cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*>.*?<\/[A-Z][a-zA-Z0-9]*>/gs, '[component removed]');
95
+ } while (cleaned.length !== previousLength);
96
+ // Clean up multiple consecutive newlines and trim
97
+ cleaned = cleaned.replace(/\n\s*\n\s*\n/g, '\n\n').trim();
98
+ return cleaned;
99
+ }
100
+ }
101
+ //# sourceMappingURL=mdx.js.map
@@ -0,0 +1,68 @@
1
+ import { Preprocessor, PreprocessorOptions } from '../../types.js';
2
+ /**
3
+ * Mermaid preprocessor for handling Mermaid diagrams in Markdown files
4
+ * Supports strip, extract, and placeholder modes
5
+ */
6
+ export declare class MermaidPreprocessor implements Preprocessor {
7
+ /**
8
+ * Check if this preprocessor applies to the given language/content type
9
+ * Applies to mermaid code blocks and content with Mermaid syntax
10
+ */
11
+ appliesTo(language: string): boolean;
12
+ /**
13
+ * Process Mermaid content based on the specified mode
14
+ */
15
+ process(content: string, options: PreprocessorOptions): string;
16
+ /**
17
+ * Strip Mermaid diagrams entirely
18
+ */
19
+ private stripMermaid;
20
+ /**
21
+ * Replace Mermaid diagrams with descriptive placeholders
22
+ */
23
+ private replaceWithPlaceholders;
24
+ /**
25
+ * Extract semantic information from Mermaid diagrams
26
+ * Converts diagram edges to plain text while ignoring styling and layout instructions
27
+ */
28
+ private extractMermaidEdges;
29
+ /**
30
+ * Extract edges and relationships from a Mermaid diagram
31
+ * Ignores styling, layout, and formatting instructions
32
+ */
33
+ private extractEdgesFromDiagram;
34
+ /**
35
+ * Build a mapping of node IDs to their labels by scanning all lines
36
+ */
37
+ private buildNodeLabelMap;
38
+ /**
39
+ * Check if a line is a layout instruction (should be ignored)
40
+ */
41
+ private isLayoutInstruction;
42
+ /**
43
+ * Check if a line is a styling instruction (should be ignored)
44
+ */
45
+ private isStyleInstruction;
46
+ /**
47
+ * Extract the meaningful label from a node definition
48
+ * Examples: A[Start] -> "Start", B{Decision} -> "Decision", C((End)) -> "End", D -> "D"
49
+ */
50
+ private extractNodeLabel;
51
+ /**
52
+ * Extract edge information from a single line
53
+ */
54
+ private extractEdgeFromLine;
55
+ /**
56
+ * Interpret flowchart connector symbols
57
+ */
58
+ private interpretConnector;
59
+ /**
60
+ * Interpret class diagram connector symbols
61
+ */
62
+ private interpretClassConnector;
63
+ /**
64
+ * Interpret ER diagram connector symbols
65
+ */
66
+ private interpretERConnector;
67
+ }
68
+ //# sourceMappingURL=mermaid.d.ts.map