@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,422 @@
1
+ /**
2
+ * Chunker Registry
3
+ *
4
+ * Centralized registry for all chunking strategies with metadata
5
+ * and discovery capabilities. Follows the BaseRegistry pattern.
6
+ */
7
+ import { BaseRegistry } from "../core/infrastructure/index.js";
8
+ import { logger } from "../utils/logger.js";
9
+ import { ChunkingError, RAGErrorCodes } from "./errors/RAGError.js";
10
+ /**
11
+ * Default chunker metadata entries
12
+ */
13
+ const DEFAULT_CHUNKER_METADATA = {
14
+ character: {
15
+ description: "Splits text into fixed-size character chunks with optional overlap",
16
+ defaultConfig: {
17
+ maxSize: 1000,
18
+ overlap: 100,
19
+ },
20
+ supportedOptions: ["maxSize", "overlap", "minSize"],
21
+ useCases: [
22
+ "Simple text processing",
23
+ "Fixed-size chunks needed",
24
+ "Language-agnostic splitting",
25
+ ],
26
+ aliases: ["char", "fixed-size", "fixed"],
27
+ },
28
+ recursive: {
29
+ description: "Recursively splits text using ordered separators (paragraphs, sentences, etc.)",
30
+ defaultConfig: {
31
+ maxSize: 1000,
32
+ overlap: 100,
33
+ separators: ["\n\n", "\n", ". ", " ", ""],
34
+ },
35
+ supportedOptions: [
36
+ "maxSize",
37
+ "overlap",
38
+ "separators",
39
+ "keepSeparators",
40
+ "minSize",
41
+ ],
42
+ useCases: [
43
+ "General text documents",
44
+ "Preserving semantic boundaries",
45
+ "Default choice for most use cases",
46
+ ],
47
+ aliases: ["recursive-character", "langchain-default"],
48
+ },
49
+ sentence: {
50
+ description: "Splits text by sentence boundaries for semantically meaningful chunks",
51
+ defaultConfig: {
52
+ maxSize: 1000,
53
+ overlap: 1,
54
+ },
55
+ supportedOptions: [
56
+ "maxSize",
57
+ "overlap",
58
+ "boundaryDetection",
59
+ "maxSentences",
60
+ ],
61
+ useCases: [
62
+ "Q&A applications",
63
+ "Sentence-level analysis",
64
+ "Preserving complete thoughts",
65
+ ],
66
+ aliases: ["sent", "sentence-based"],
67
+ },
68
+ token: {
69
+ description: "Splits text by token count using a specific tokenizer (GPT, Claude, etc.)",
70
+ defaultConfig: {
71
+ maxSize: 512,
72
+ overlap: 50,
73
+ },
74
+ supportedOptions: ["maxSize", "overlap", "tokenizer", "maxTokens"],
75
+ useCases: [
76
+ "Token-aware splitting",
77
+ "Optimal for specific models",
78
+ "Precise token budget management",
79
+ ],
80
+ aliases: ["tok", "tokenized"],
81
+ },
82
+ markdown: {
83
+ description: "Splits markdown content by headers and structural elements",
84
+ defaultConfig: {
85
+ maxSize: 1000,
86
+ overlap: 0,
87
+ },
88
+ supportedOptions: [
89
+ "maxSize",
90
+ "overlap",
91
+ "headerLevels",
92
+ "splitCodeBlocks",
93
+ "preserveMetadata",
94
+ ],
95
+ useCases: [
96
+ "Documentation processing",
97
+ "README files",
98
+ "Technical documentation",
99
+ ],
100
+ aliases: ["md", "markdown-header"],
101
+ },
102
+ html: {
103
+ description: "Splits HTML content by semantic tags while optionally stripping markup",
104
+ defaultConfig: {
105
+ maxSize: 1000,
106
+ overlap: 0,
107
+ },
108
+ supportedOptions: [
109
+ "maxSize",
110
+ "overlap",
111
+ "splitTags",
112
+ "stripTags",
113
+ "preserveAttributes",
114
+ ],
115
+ useCases: ["Web content processing", "HTML documents", "Web scraping"],
116
+ aliases: ["html-tag", "web"],
117
+ },
118
+ json: {
119
+ description: "Splits JSON documents by object boundaries and nested structures",
120
+ defaultConfig: {
121
+ maxSize: 1000,
122
+ overlap: 0,
123
+ },
124
+ supportedOptions: ["maxSize", "overlap", "maxDepth", "chunkKeys"],
125
+ useCases: [
126
+ "API response processing",
127
+ "Structured data",
128
+ "Configuration files",
129
+ ],
130
+ aliases: ["json-object", "structured"],
131
+ },
132
+ latex: {
133
+ description: "Splits LaTeX documents by sections, environments, and math blocks",
134
+ defaultConfig: {
135
+ maxSize: 1000,
136
+ overlap: 0,
137
+ },
138
+ supportedOptions: [
139
+ "maxSize",
140
+ "overlap",
141
+ "environments",
142
+ "splitMathBlocks",
143
+ "preserveMetadata",
144
+ ],
145
+ useCases: [
146
+ "Academic papers",
147
+ "Scientific documents",
148
+ "Mathematical content",
149
+ ],
150
+ aliases: ["tex", "latex-section"],
151
+ },
152
+ semantic: {
153
+ description: "Uses LLM to identify semantically meaningful split points",
154
+ defaultConfig: {
155
+ maxSize: 1000,
156
+ overlap: 100,
157
+ },
158
+ supportedOptions: [
159
+ "maxSize",
160
+ "overlap",
161
+ "modelName",
162
+ "provider",
163
+ "similarityThreshold",
164
+ ],
165
+ useCases: [
166
+ "Advanced semantic understanding",
167
+ "Context-aware splitting",
168
+ "AI-enhanced chunking",
169
+ ],
170
+ aliases: ["llm", "ai-semantic"],
171
+ },
172
+ "semantic-markdown": {
173
+ description: "Combines markdown splitting with semantic similarity for intelligent merging",
174
+ defaultConfig: {
175
+ maxSize: 1000,
176
+ overlap: 100,
177
+ },
178
+ supportedOptions: [
179
+ "maxSize",
180
+ "overlap",
181
+ "similarityThreshold",
182
+ "maxMergeSize",
183
+ "preserveMetadata",
184
+ ],
185
+ useCases: [
186
+ "Context-aware documentation",
187
+ "Knowledge base creation",
188
+ "Semantic search preparation",
189
+ ],
190
+ aliases: ["semantic-md", "smart-markdown"],
191
+ },
192
+ };
193
+ /**
194
+ * Chunker Registry
195
+ *
196
+ * Manages registration and discovery of all chunking strategies.
197
+ * Extends BaseRegistry for consistent lifecycle management.
198
+ */
199
+ export class ChunkerRegistry extends BaseRegistry {
200
+ static instance = null;
201
+ aliasMap = new Map();
202
+ constructor() {
203
+ super();
204
+ }
205
+ /**
206
+ * Get singleton instance
207
+ */
208
+ static getInstance() {
209
+ if (!ChunkerRegistry.instance) {
210
+ ChunkerRegistry.instance = new ChunkerRegistry();
211
+ }
212
+ return ChunkerRegistry.instance;
213
+ }
214
+ /**
215
+ * Reset singleton (for testing)
216
+ */
217
+ static resetInstance() {
218
+ if (ChunkerRegistry.instance) {
219
+ ChunkerRegistry.instance.clear();
220
+ ChunkerRegistry.instance = null;
221
+ }
222
+ }
223
+ /**
224
+ * Register all default chunkers
225
+ */
226
+ async registerAll() {
227
+ // Register character chunker
228
+ this.registerChunker("character", async () => {
229
+ const { CharacterChunker } = await import("./chunkers/CharacterChunker.js");
230
+ return new CharacterChunker();
231
+ }, DEFAULT_CHUNKER_METADATA.character);
232
+ // Register recursive chunker
233
+ this.registerChunker("recursive", async () => {
234
+ const { RecursiveChunker } = await import("./chunkers/RecursiveChunker.js");
235
+ return new RecursiveChunker();
236
+ }, DEFAULT_CHUNKER_METADATA.recursive);
237
+ // Register sentence chunker
238
+ this.registerChunker("sentence", async () => {
239
+ const { SentenceChunker } = await import("./chunkers/SentenceChunker.js");
240
+ return new SentenceChunker();
241
+ }, DEFAULT_CHUNKER_METADATA.sentence);
242
+ // Register token chunker
243
+ this.registerChunker("token", async () => {
244
+ const { TokenChunker } = await import("./chunkers/TokenChunker.js");
245
+ return new TokenChunker();
246
+ }, DEFAULT_CHUNKER_METADATA.token);
247
+ // Register markdown chunker
248
+ this.registerChunker("markdown", async () => {
249
+ const { MarkdownChunker } = await import("./chunkers/MarkdownChunker.js");
250
+ return new MarkdownChunker();
251
+ }, DEFAULT_CHUNKER_METADATA.markdown);
252
+ // Register HTML chunker
253
+ this.registerChunker("html", async () => {
254
+ const { HTMLChunker } = await import("./chunkers/HTMLChunker.js");
255
+ return new HTMLChunker();
256
+ }, DEFAULT_CHUNKER_METADATA.html);
257
+ // Register JSON chunker
258
+ this.registerChunker("json", async () => {
259
+ const { JSONChunker } = await import("./chunkers/JSONChunker.js");
260
+ return new JSONChunker();
261
+ }, DEFAULT_CHUNKER_METADATA.json);
262
+ // Register LaTeX chunker
263
+ this.registerChunker("latex", async () => {
264
+ const { LaTeXChunker } = await import("./chunkers/LaTeXChunker.js");
265
+ return new LaTeXChunker();
266
+ }, DEFAULT_CHUNKER_METADATA.latex);
267
+ // Register semantic chunker
268
+ this.registerChunker("semantic", async () => {
269
+ const { SemanticChunker } = await import("./chunking/semanticChunker.js");
270
+ return new SemanticChunker();
271
+ }, DEFAULT_CHUNKER_METADATA.semantic);
272
+ // Register semantic-markdown chunker
273
+ this.registerChunker("semantic-markdown", async () => {
274
+ const { SemanticMarkdownChunker } = await import("./chunkers/SemanticMarkdownChunker.js");
275
+ return new SemanticMarkdownChunker();
276
+ }, DEFAULT_CHUNKER_METADATA["semantic-markdown"]);
277
+ logger.debug(`[ChunkerRegistry] Registered ${this.items.size} chunking strategies`);
278
+ }
279
+ /**
280
+ * Register a chunker with aliases
281
+ */
282
+ registerChunker(strategy, factory, metadata) {
283
+ this.register(strategy, factory, metadata);
284
+ // Register aliases
285
+ if (metadata.aliases) {
286
+ for (const alias of metadata.aliases) {
287
+ this.aliasMap.set(alias.toLowerCase(), strategy);
288
+ logger.debug(`[ChunkerRegistry] Registered alias '${alias}' -> '${strategy}'`);
289
+ }
290
+ }
291
+ }
292
+ /**
293
+ * Resolve strategy name from alias
294
+ */
295
+ resolveStrategy(nameOrAlias) {
296
+ const lower = nameOrAlias.toLowerCase();
297
+ // Check if it's a direct strategy name
298
+ if (this.items.has(lower)) {
299
+ return lower;
300
+ }
301
+ // Check aliases
302
+ const resolved = this.aliasMap.get(lower);
303
+ if (resolved) {
304
+ return resolved;
305
+ }
306
+ throw new ChunkingError(`Unknown chunking strategy: '${nameOrAlias}'. Available strategies: ${this.list()
307
+ .map((item) => item.id)
308
+ .join(", ")}`, {
309
+ code: RAGErrorCodes.CHUNKING_STRATEGY_NOT_FOUND,
310
+ details: {
311
+ requestedStrategy: nameOrAlias,
312
+ availableStrategies: this.list().map((item) => item.id),
313
+ },
314
+ });
315
+ }
316
+ /**
317
+ * Get a chunker by strategy name or alias
318
+ */
319
+ async getChunker(strategyOrAlias) {
320
+ await this.ensureInitialized();
321
+ const strategy = this.resolveStrategy(strategyOrAlias);
322
+ const chunker = await this.get(strategy);
323
+ if (!chunker) {
324
+ throw new ChunkingError(`Chunker not found: ${strategy}`, {
325
+ code: RAGErrorCodes.CHUNKING_STRATEGY_NOT_FOUND,
326
+ details: { strategy },
327
+ });
328
+ }
329
+ return chunker;
330
+ }
331
+ /**
332
+ * Get list of available chunker strategies
333
+ */
334
+ async getAvailableChunkers() {
335
+ await this.ensureInitialized();
336
+ return this.list().map((item) => item.id);
337
+ }
338
+ /**
339
+ * Get metadata for a specific chunker
340
+ */
341
+ getChunkerMetadata(strategyOrAlias) {
342
+ const strategy = this.resolveStrategy(strategyOrAlias);
343
+ const entry = this.list().find((item) => item.id === strategy);
344
+ return entry?.metadata;
345
+ }
346
+ /**
347
+ * Get all aliases for a strategy
348
+ */
349
+ getAliasesForStrategy(strategy) {
350
+ const metadata = DEFAULT_CHUNKER_METADATA[strategy];
351
+ return metadata?.aliases ?? [];
352
+ }
353
+ /**
354
+ * Get all registered aliases
355
+ */
356
+ getAllAliases() {
357
+ return new Map(this.aliasMap);
358
+ }
359
+ /**
360
+ * Check if a strategy or alias exists
361
+ */
362
+ hasChunker(strategyOrAlias) {
363
+ try {
364
+ this.resolveStrategy(strategyOrAlias);
365
+ return true;
366
+ }
367
+ catch {
368
+ return false;
369
+ }
370
+ }
371
+ /**
372
+ * Get chunkers by use case
373
+ */
374
+ getChunkersByUseCase(useCase) {
375
+ const matches = [];
376
+ const useCaseLower = useCase.toLowerCase();
377
+ for (const [strategy, metadata] of Object.entries(DEFAULT_CHUNKER_METADATA)) {
378
+ const hasMatchingUseCase = metadata.useCases?.some((uc) => uc.toLowerCase().includes(useCaseLower)) ?? false;
379
+ if (hasMatchingUseCase) {
380
+ matches.push(strategy);
381
+ }
382
+ }
383
+ return matches;
384
+ }
385
+ /**
386
+ * Get default configuration for a chunker
387
+ */
388
+ getDefaultConfig(strategyOrAlias) {
389
+ const metadata = this.getChunkerMetadata(strategyOrAlias);
390
+ return metadata?.defaultConfig;
391
+ }
392
+ /**
393
+ * Clear the registry (also clears aliases)
394
+ */
395
+ clear() {
396
+ super.clear();
397
+ this.aliasMap.clear();
398
+ }
399
+ }
400
+ /**
401
+ * Global chunker registry singleton
402
+ */
403
+ export const chunkerRegistry = ChunkerRegistry.getInstance();
404
+ /**
405
+ * Convenience function to get available chunkers
406
+ */
407
+ export async function getAvailableChunkers() {
408
+ return chunkerRegistry.getAvailableChunkers();
409
+ }
410
+ /**
411
+ * Convenience function to get chunker by strategy
412
+ */
413
+ export async function getChunker(strategyOrAlias) {
414
+ return chunkerRegistry.getChunker(strategyOrAlias);
415
+ }
416
+ /**
417
+ * Convenience function to get chunker metadata
418
+ */
419
+ export function getChunkerMetadata(strategyOrAlias) {
420
+ return chunkerRegistry.getChunkerMetadata(strategyOrAlias);
421
+ }
422
+ //# sourceMappingURL=ChunkerRegistry.js.map
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Base Chunker
3
+ *
4
+ * Abstract base class for all chunker implementations.
5
+ * Provides common functionality and interface contract.
6
+ */
7
+ import type { Chunk, Chunker, ChunkerConfig, ChunkingStrategy } from "../types.js";
8
+ /**
9
+ * Default chunker configuration
10
+ */
11
+ export declare const DEFAULT_CHUNKER_CONFIG: ChunkerConfig;
12
+ /**
13
+ * Base Chunker abstract class
14
+ *
15
+ * All chunker implementations should extend this class.
16
+ */
17
+ export declare abstract class BaseChunker implements Chunker {
18
+ abstract readonly strategy: ChunkingStrategy;
19
+ protected config: ChunkerConfig;
20
+ constructor(config?: ChunkerConfig);
21
+ /**
22
+ * Get default configuration for this chunker
23
+ */
24
+ getDefaultConfig(): ChunkerConfig;
25
+ /**
26
+ * Validate chunker configuration
27
+ */
28
+ protected validateConfig(): void;
29
+ /**
30
+ * Chunk content into smaller pieces
31
+ */
32
+ chunk(content: string, config?: ChunkerConfig): Promise<Chunk[]>;
33
+ /**
34
+ * Perform the actual chunking (to be implemented by subclasses)
35
+ */
36
+ protected abstract doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
37
+ /**
38
+ * Filter chunks based on minimum size
39
+ */
40
+ protected filterChunks(chunks: Chunk[], config: ChunkerConfig): Chunk[];
41
+ /**
42
+ * Create a chunk object
43
+ */
44
+ protected createChunk(text: string, chunkIndex: number, startPosition: number, endPosition: number, documentId?: string, customMetadata?: Record<string, unknown>): Chunk;
45
+ /**
46
+ * Split content by size with overlap
47
+ */
48
+ protected splitBySizeWithOverlap(content: string, maxSize: number, overlap: number): Array<{
49
+ text: string;
50
+ start: number;
51
+ end: number;
52
+ }>;
53
+ }
@@ -0,0 +1,144 @@
1
+ /**
2
+ * Base Chunker
3
+ *
4
+ * Abstract base class for all chunker implementations.
5
+ * Provides common functionality and interface contract.
6
+ */
7
+ import { v4 as uuidv4 } from "uuid";
8
+ import { ChunkingError, RAGErrorCodes } from "../errors/RAGError.js";
9
+ /**
10
+ * Default chunker configuration
11
+ */
12
+ export const DEFAULT_CHUNKER_CONFIG = {
13
+ maxSize: 1000,
14
+ overlap: 100,
15
+ minSize: 10,
16
+ preserveMetadata: true,
17
+ };
18
+ /**
19
+ * Base Chunker abstract class
20
+ *
21
+ * All chunker implementations should extend this class.
22
+ */
23
+ export class BaseChunker {
24
+ config;
25
+ constructor(config) {
26
+ this.config = { ...this.getDefaultConfig(), ...config };
27
+ this.validateConfig();
28
+ }
29
+ /**
30
+ * Get default configuration for this chunker
31
+ */
32
+ getDefaultConfig() {
33
+ return { ...DEFAULT_CHUNKER_CONFIG };
34
+ }
35
+ /**
36
+ * Validate chunker configuration
37
+ */
38
+ validateConfig() {
39
+ if (this.config.maxSize !== undefined && this.config.maxSize <= 0) {
40
+ throw new ChunkingError("maxSize must be positive", {
41
+ code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
42
+ details: { maxSize: this.config.maxSize },
43
+ });
44
+ }
45
+ if (this.config.overlap !== undefined && this.config.overlap < 0) {
46
+ throw new ChunkingError("overlap cannot be negative", {
47
+ code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
48
+ details: { overlap: this.config.overlap },
49
+ });
50
+ }
51
+ if (this.config.maxSize !== undefined &&
52
+ this.config.overlap !== undefined &&
53
+ this.config.overlap >= this.config.maxSize) {
54
+ throw new ChunkingError("overlap must be less than maxSize", {
55
+ code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
56
+ details: {
57
+ maxSize: this.config.maxSize,
58
+ overlap: this.config.overlap,
59
+ },
60
+ });
61
+ }
62
+ }
63
+ /**
64
+ * Chunk content into smaller pieces
65
+ */
66
+ async chunk(content, config) {
67
+ const effectiveConfig = { ...this.config, ...config };
68
+ if (!content || content.trim().length === 0) {
69
+ throw new ChunkingError("Content is empty", {
70
+ code: RAGErrorCodes.CHUNKING_EMPTY_CONTENT,
71
+ strategy: this.strategy,
72
+ contentLength: 0,
73
+ });
74
+ }
75
+ try {
76
+ const chunks = await this.doChunk(content, effectiveConfig);
77
+ return this.filterChunks(chunks, effectiveConfig);
78
+ }
79
+ catch (error) {
80
+ if (error instanceof ChunkingError) {
81
+ throw error;
82
+ }
83
+ throw new ChunkingError(`Chunking failed: ${error instanceof Error ? error.message : String(error)}`, {
84
+ code: RAGErrorCodes.CHUNKING_ERROR,
85
+ cause: error instanceof Error ? error : undefined,
86
+ strategy: this.strategy,
87
+ contentLength: content.length,
88
+ });
89
+ }
90
+ }
91
+ /**
92
+ * Filter chunks based on minimum size
93
+ */
94
+ filterChunks(chunks, config) {
95
+ const minSize = config.minSize ?? 0;
96
+ return chunks.filter((chunk) => chunk.text.length >= minSize);
97
+ }
98
+ /**
99
+ * Create a chunk object
100
+ */
101
+ createChunk(text, chunkIndex, startPosition, endPosition, documentId = "unknown", customMetadata) {
102
+ const metadata = {
103
+ documentId,
104
+ chunkIndex,
105
+ startPosition,
106
+ endPosition,
107
+ custom: this.config.preserveMetadata ? customMetadata : undefined,
108
+ };
109
+ return {
110
+ id: uuidv4(),
111
+ text,
112
+ metadata,
113
+ };
114
+ }
115
+ /**
116
+ * Split content by size with overlap
117
+ */
118
+ splitBySizeWithOverlap(content, maxSize, overlap) {
119
+ const result = [];
120
+ let start = 0;
121
+ while (start < content.length) {
122
+ const end = Math.min(start + maxSize, content.length);
123
+ result.push({
124
+ text: content.slice(start, end),
125
+ start,
126
+ end,
127
+ });
128
+ // If we've reached the end of content, stop
129
+ if (end >= content.length) {
130
+ break;
131
+ }
132
+ // Move start position, accounting for overlap
133
+ // Ensure start always moves forward by at least 1 character
134
+ const nextStart = end - overlap;
135
+ start = Math.max(nextStart, start + 1);
136
+ // Prevent infinite loop if overlap >= chunk size
137
+ if (start >= end) {
138
+ break;
139
+ }
140
+ }
141
+ return result;
142
+ }
143
+ }
144
+ //# sourceMappingURL=BaseChunker.js.map
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Character Chunker
3
+ *
4
+ * Splits text into fixed-size character chunks with optional overlap.
5
+ * The simplest chunking strategy for language-agnostic processing.
6
+ */
7
+ import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
8
+ import { BaseChunker } from "./BaseChunker.js";
9
+ /**
10
+ * Character Chunker
11
+ *
12
+ * Splits content into fixed-size character chunks.
13
+ */
14
+ export declare class CharacterChunker extends BaseChunker {
15
+ readonly strategy: ChunkingStrategy;
16
+ getDefaultConfig(): ChunkerConfig;
17
+ protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
18
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Character Chunker
3
+ *
4
+ * Splits text into fixed-size character chunks with optional overlap.
5
+ * The simplest chunking strategy for language-agnostic processing.
6
+ */
7
+ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
8
+ /**
9
+ * Character Chunker
10
+ *
11
+ * Splits content into fixed-size character chunks.
12
+ */
13
+ export class CharacterChunker extends BaseChunker {
14
+ strategy = "character";
15
+ getDefaultConfig() {
16
+ return {
17
+ ...DEFAULT_CHUNKER_CONFIG,
18
+ maxSize: 1000,
19
+ overlap: 100,
20
+ };
21
+ }
22
+ async doChunk(content, config) {
23
+ const maxSize = config.maxSize ?? 1000;
24
+ const overlap = config.overlap ?? 100;
25
+ const segments = this.splitBySizeWithOverlap(content, maxSize, overlap);
26
+ return segments.map((segment, index) => this.createChunk(segment.text, index, segment.start, segment.end));
27
+ }
28
+ }
29
+ //# sourceMappingURL=CharacterChunker.js.map
@@ -0,0 +1,19 @@
1
+ /**
2
+ * HTML Chunker
3
+ *
4
+ * Splits HTML content by semantic tags.
5
+ */
6
+ import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
7
+ import { BaseChunker } from "./BaseChunker.js";
8
+ /**
9
+ * HTML Chunker
10
+ */
11
+ export declare class HTMLChunker extends BaseChunker {
12
+ readonly strategy: ChunkingStrategy;
13
+ getDefaultConfig(): ChunkerConfig;
14
+ protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
15
+ /**
16
+ * Strip HTML tags from content
17
+ */
18
+ private stripHtml;
19
+ }