@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -2,14 +2,14 @@ import { createOpenAI } from "@ai-sdk/openai";
2
2
  import { streamText } from "ai";
3
3
  import { AIProviderName } from "../constants/enums.js";
4
4
  import { BaseProvider } from "../core/baseProvider.js";
5
- import { logger } from "../utils/logger.js";
6
- import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
7
- import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
8
5
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
9
- import { validateApiKey, createOpenAIConfig, getProviderModel, } from "../utils/providerConfig.js";
10
6
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
11
7
  import { createProxyFetch } from "../proxy/proxyFetch.js";
8
+ import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
9
+ import { logger } from "../utils/logger.js";
10
+ import { createOpenAIConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js";
12
11
  import { isZodSchema } from "../utils/schemaConversion.js";
12
+ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
13
13
  // Configuration helpers - now using consolidated utility
14
14
  const getOpenAIApiKey = () => {
15
15
  return validateApiKey(createOpenAIConfig());
@@ -54,6 +54,13 @@ export class OpenAIProvider extends BaseProvider {
54
54
  getDefaultModel() {
55
55
  return getOpenAIModel();
56
56
  }
57
+ /**
58
+ * Get the default embedding model for OpenAI
59
+ * @returns The default OpenAI embedding model name
60
+ */
61
+ getDefaultEmbeddingModel() {
62
+ return process.env.OPENAI_EMBEDDING_MODEL || "text-embedding-3-small";
63
+ }
57
64
  /**
58
65
  * Returns the Vercel AI SDK model instance for OpenAI
59
66
  */
@@ -218,9 +225,12 @@ export class OpenAIProvider extends BaseProvider {
218
225
  const timeout = this.getTimeout(options);
219
226
  const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
220
227
  try {
221
- // Get tools consistently with generate method
228
+ // Get tools - options.tools is pre-merged by BaseProvider.stream() with
229
+ // base tools (MCP/built-in) + user-provided tools (RAG, etc.)
222
230
  const shouldUseTools = !options.disableTools && this.supportsTools();
223
- const allTools = shouldUseTools ? await this.getAllTools() : {};
231
+ const allTools = shouldUseTools
232
+ ? options.tools || (await this.getAllTools())
233
+ : {};
224
234
  // OpenAI-specific fix: Validate tools format and filter out problematic ones
225
235
  let tools = this.validateAndFilterToolsForOpenAI(allTools);
226
236
  // OpenAI max tools limit - configurable via environment variable
@@ -446,6 +456,50 @@ export class OpenAIProvider extends BaseProvider {
446
456
  throw this.handleProviderError(error);
447
457
  }
448
458
  }
459
+ /**
460
+ * Generate embeddings for text using OpenAI text-embedding models
461
+ * @param text - The text to embed
462
+ * @param modelName - The embedding model to use (default: text-embedding-3-small)
463
+ * @returns Promise resolving to the embedding vector
464
+ */
465
+ async embed(text, modelName) {
466
+ const embeddingModelName = modelName || "text-embedding-3-small";
467
+ logger.debug("Generating embedding", {
468
+ provider: this.providerName,
469
+ model: embeddingModelName,
470
+ textLength: text.length,
471
+ });
472
+ try {
473
+ // Create embedding model using the AI SDK
474
+ const { embed } = await import("ai");
475
+ // Create the OpenAI provider
476
+ const openai = createOpenAI({
477
+ apiKey: getOpenAIApiKey(),
478
+ fetch: createProxyFetch(),
479
+ });
480
+ // Get the text embedding model
481
+ const embeddingModel = openai.textEmbeddingModel(embeddingModelName);
482
+ // Generate the embedding
483
+ const result = await embed({
484
+ model: embeddingModel,
485
+ value: text,
486
+ });
487
+ logger.debug("Embedding generated successfully", {
488
+ provider: this.providerName,
489
+ model: embeddingModelName,
490
+ embeddingDimension: result.embedding.length,
491
+ });
492
+ return result.embedding;
493
+ }
494
+ catch (error) {
495
+ logger.error("Embedding generation failed", {
496
+ error: error instanceof Error ? error.message : String(error),
497
+ model: embeddingModelName,
498
+ textLength: text.length,
499
+ });
500
+ throw this.handleProviderError(error);
501
+ }
502
+ }
449
503
  }
450
504
  // Export for factory registration
451
505
  export default OpenAIProvider;
@@ -1,8 +1,8 @@
1
+ import { type LanguageModelV1, type Schema } from "ai";
1
2
  import type { ZodType, ZodTypeDef } from "zod";
2
- import { type Schema, type LanguageModelV1 } from "ai";
3
3
  import { AIProviderName } from "../constants/enums.js";
4
- import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
5
4
  import { BaseProvider } from "../core/baseProvider.js";
5
+ import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
6
6
  /**
7
7
  * OpenRouter Provider - BaseProvider Implementation
8
8
  * Provides access to 300+ models from 60+ providers via OpenRouter unified gateway
@@ -1,13 +1,13 @@
1
1
  import { createOpenRouter } from "@openrouter/ai-sdk-provider";
2
- import { streamText, Output } from "ai";
2
+ import { Output, streamText } from "ai";
3
3
  import { AIProviderName } from "../constants/enums.js";
4
4
  import { BaseProvider } from "../core/baseProvider.js";
5
- import { logger } from "../utils/logger.js";
6
- import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
7
- import { getProviderModel } from "../utils/providerConfig.js";
8
- import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
9
5
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
6
+ import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
7
  import { createProxyFetch } from "../proxy/proxyFetch.js";
8
+ import { logger } from "../utils/logger.js";
9
+ import { getProviderModel } from "../utils/providerConfig.js";
10
+ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
11
11
  // Constants
12
12
  const MODELS_DISCOVERY_TIMEOUT_MS = 5000; // 5 seconds for model discovery
13
13
  // Configuration helpers
@@ -203,8 +203,12 @@ export class OpenRouterProvider extends BaseProvider {
203
203
  const messages = await this.buildMessagesForStream(options);
204
204
  const model = await this.getAISDKModelWithMiddleware(options);
205
205
  // Get all available tools (direct + MCP + external) for streaming
206
+ // BaseProvider.stream() pre-merges base tools + external tools into options.tools
206
207
  const shouldUseTools = !options.disableTools && this.supportsTools();
207
- const tools = shouldUseTools ? await this.getAllTools() : {};
208
+ const tools = shouldUseTools
209
+ ? options.tools ||
210
+ (await this.getAllTools())
211
+ : {};
208
212
  logger.debug(`OpenRouter: Tools for streaming`, {
209
213
  shouldUseTools,
210
214
  toolCount: Object.keys(tools).length,
@@ -131,7 +131,7 @@ export declare class SageMakerLanguageModel implements LanguageModelV1 {
131
131
  provider: string;
132
132
  specificationVersion: string;
133
133
  endpointName: string;
134
- modelType: "huggingface" | "mistral" | "custom" | "claude" | "llama" | "jumpstart" | undefined;
134
+ modelType: "huggingface" | "mistral" | "custom" | "llama" | "claude" | "jumpstart" | undefined;
135
135
  region: string;
136
136
  };
137
137
  /**
@@ -178,7 +178,7 @@ export declare class SageMakerLanguageModel implements LanguageModelV1 {
178
178
  provider: string;
179
179
  specificationVersion: string;
180
180
  endpointName: string;
181
- modelType: "huggingface" | "mistral" | "custom" | "claude" | "llama" | "jumpstart" | undefined;
181
+ modelType: "huggingface" | "mistral" | "custom" | "llama" | "claude" | "jumpstart" | undefined;
182
182
  region: string;
183
183
  };
184
184
  }
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Chunker Factory
3
+ *
4
+ * Factory for creating chunker instances with configuration.
5
+ * Follows the BaseFactory pattern for consistent lifecycle management.
6
+ */
7
+ import { BaseFactory } from "../core/infrastructure/index.js";
8
+ import type { Chunker, ChunkerConfig, ChunkerMetadata, ChunkingStrategy } from "./types.js";
9
+ /**
10
+ * Chunker Factory
11
+ *
12
+ * Creates chunker instances based on strategy with configuration support.
13
+ * Uses lazy loading via dynamic imports to avoid circular dependencies.
14
+ */
15
+ export declare class ChunkerFactory extends BaseFactory<Chunker, ChunkerConfig> {
16
+ private static instance;
17
+ private metadataMap;
18
+ private constructor();
19
+ /**
20
+ * Get singleton instance
21
+ */
22
+ static getInstance(): ChunkerFactory;
23
+ /**
24
+ * Reset singleton (for testing)
25
+ */
26
+ static resetInstance(): void;
27
+ /**
28
+ * Register all default chunkers
29
+ */
30
+ protected registerAll(): Promise<void>;
31
+ /**
32
+ * Register a chunker with metadata and aliases
33
+ */
34
+ registerChunker(strategy: ChunkingStrategy | string, factory: (config?: ChunkerConfig) => Promise<Chunker>, metadata: ChunkerMetadata): void;
35
+ /**
36
+ * Create a chunker by strategy name or alias
37
+ */
38
+ createChunker(strategyOrAlias: string, config?: ChunkerConfig): Promise<Chunker>;
39
+ /**
40
+ * Get metadata for a chunker
41
+ */
42
+ getChunkerMetadata(strategyOrAlias: string): ChunkerMetadata | undefined;
43
+ /**
44
+ * Get default configuration for a chunker
45
+ */
46
+ getDefaultConfig(strategyOrAlias: string): ChunkerConfig | undefined;
47
+ /**
48
+ * Get available chunking strategies (not including aliases)
49
+ */
50
+ getAvailableStrategies(): Promise<ChunkingStrategy[]>;
51
+ /**
52
+ * Get all aliases mapped to their strategies
53
+ */
54
+ getStrategyAliases(): Map<string, string>;
55
+ /**
56
+ * Check if a strategy exists
57
+ */
58
+ hasStrategy(strategyOrAlias: string): boolean;
59
+ /**
60
+ * Get chunkers suitable for a use case
61
+ */
62
+ getChunkersForUseCase(useCase: string): ChunkingStrategy[];
63
+ /**
64
+ * Get all chunker metadata
65
+ */
66
+ getAllMetadata(): Map<string, ChunkerMetadata>;
67
+ /**
68
+ * Clear factory and metadata
69
+ */
70
+ clear(): void;
71
+ }
72
+ /**
73
+ * Global chunker factory singleton
74
+ */
75
+ export declare const chunkerFactory: ChunkerFactory;
76
+ /**
77
+ * Convenience function to create a chunker
78
+ */
79
+ export declare function createChunker(strategyOrAlias: string, config?: ChunkerConfig): Promise<Chunker>;
80
+ /**
81
+ * Convenience function to get available strategies
82
+ */
83
+ export declare function getAvailableStrategies(): Promise<ChunkingStrategy[]>;
84
+ /**
85
+ * Convenience function to get chunker metadata
86
+ */
87
+ export declare function getChunkerMetadata(strategyOrAlias: string): ChunkerMetadata | undefined;
88
+ /**
89
+ * Convenience function to get default config
90
+ */
91
+ export declare function getDefaultConfig(strategyOrAlias: string): ChunkerConfig | undefined;
@@ -0,0 +1,321 @@
1
+ /**
2
+ * Chunker Factory
3
+ *
4
+ * Factory for creating chunker instances with configuration.
5
+ * Follows the BaseFactory pattern for consistent lifecycle management.
6
+ */
7
+ import { BaseFactory } from "../core/infrastructure/index.js";
8
+ import { logger } from "../utils/logger.js";
9
+ import { ChunkingError, RAGErrorCodes } from "./errors/RAGError.js";
10
+ /**
11
+ * Default chunker metadata entries
12
+ */
13
+ const DEFAULT_CHUNKER_METADATA = {
14
+ character: {
15
+ description: "Splits text into fixed-size character chunks with optional overlap",
16
+ defaultConfig: { maxSize: 1000, overlap: 100 },
17
+ supportedOptions: ["maxSize", "overlap", "minSize"],
18
+ useCases: ["Simple text processing", "Fixed-size chunks needed"],
19
+ aliases: ["char", "fixed-size", "fixed"],
20
+ },
21
+ recursive: {
22
+ description: "Recursively splits text using ordered separators",
23
+ defaultConfig: {
24
+ maxSize: 1000,
25
+ overlap: 100,
26
+ separators: ["\n\n", "\n", ". ", " ", ""],
27
+ },
28
+ supportedOptions: ["maxSize", "overlap", "separators", "keepSeparators"],
29
+ useCases: ["General text documents", "Default choice"],
30
+ aliases: ["recursive-character", "langchain-default"],
31
+ },
32
+ sentence: {
33
+ description: "Splits text by sentence boundaries",
34
+ defaultConfig: { maxSize: 1000, overlap: 1 },
35
+ supportedOptions: [
36
+ "maxSize",
37
+ "overlap",
38
+ "boundaryDetection",
39
+ "maxSentences",
40
+ ],
41
+ useCases: ["Q&A applications", "Sentence-level analysis"],
42
+ aliases: ["sent", "sentence-based"],
43
+ },
44
+ token: {
45
+ description: "Splits text by token count using a specific tokenizer",
46
+ defaultConfig: { maxSize: 512, overlap: 50 },
47
+ supportedOptions: ["maxSize", "overlap", "tokenizer", "maxTokens"],
48
+ useCases: ["Token-aware splitting", "Model-specific chunks"],
49
+ aliases: ["tok", "tokenized"],
50
+ },
51
+ markdown: {
52
+ description: "Splits markdown content by headers and structural elements",
53
+ defaultConfig: { maxSize: 1000, overlap: 0 },
54
+ supportedOptions: ["maxSize", "headerLevels", "splitCodeBlocks"],
55
+ useCases: ["Documentation processing", "README files"],
56
+ aliases: ["md", "markdown-header"],
57
+ },
58
+ html: {
59
+ description: "Splits HTML content by semantic tags",
60
+ defaultConfig: { maxSize: 1000, overlap: 0 },
61
+ supportedOptions: [
62
+ "maxSize",
63
+ "splitTags",
64
+ "stripTags",
65
+ "preserveAttributes",
66
+ ],
67
+ useCases: ["Web content processing", "HTML documents"],
68
+ aliases: ["html-tag", "web"],
69
+ },
70
+ json: {
71
+ description: "Splits JSON documents by object boundaries",
72
+ defaultConfig: { maxSize: 1000, overlap: 0 },
73
+ supportedOptions: ["maxSize", "maxDepth", "chunkKeys"],
74
+ useCases: ["API response processing", "Structured data"],
75
+ aliases: ["json-object", "structured"],
76
+ },
77
+ latex: {
78
+ description: "Splits LaTeX documents by sections and environments",
79
+ defaultConfig: { maxSize: 1000, overlap: 0 },
80
+ supportedOptions: ["maxSize", "environments", "splitMathBlocks"],
81
+ useCases: ["Academic papers", "Scientific documents"],
82
+ aliases: ["tex", "latex-section"],
83
+ },
84
+ semantic: {
85
+ description: "Uses LLM to identify semantically meaningful split points",
86
+ defaultConfig: { maxSize: 1000, overlap: 100 },
87
+ supportedOptions: [
88
+ "maxSize",
89
+ "modelName",
90
+ "provider",
91
+ "similarityThreshold",
92
+ ],
93
+ useCases: ["Advanced semantic understanding", "AI-enhanced chunking"],
94
+ aliases: ["llm", "ai-semantic"],
95
+ },
96
+ "semantic-markdown": {
97
+ description: "Combines markdown splitting with semantic similarity",
98
+ defaultConfig: { maxSize: 1000, overlap: 100 },
99
+ supportedOptions: ["maxSize", "similarityThreshold", "maxMergeSize"],
100
+ useCases: ["Context-aware documentation", "Knowledge bases"],
101
+ aliases: ["semantic-md", "smart-markdown"],
102
+ },
103
+ };
104
+ /**
105
+ * Chunker Factory
106
+ *
107
+ * Creates chunker instances based on strategy with configuration support.
108
+ * Uses lazy loading via dynamic imports to avoid circular dependencies.
109
+ */
110
+ export class ChunkerFactory extends BaseFactory {
111
+ static instance = null;
112
+ metadataMap = new Map();
113
+ constructor() {
114
+ super();
115
+ }
116
+ /**
117
+ * Get singleton instance
118
+ */
119
+ static getInstance() {
120
+ if (!ChunkerFactory.instance) {
121
+ ChunkerFactory.instance = new ChunkerFactory();
122
+ }
123
+ return ChunkerFactory.instance;
124
+ }
125
+ /**
126
+ * Reset singleton (for testing)
127
+ */
128
+ static resetInstance() {
129
+ if (ChunkerFactory.instance) {
130
+ ChunkerFactory.instance.clear();
131
+ ChunkerFactory.instance = null;
132
+ }
133
+ }
134
+ /**
135
+ * Register all default chunkers
136
+ */
137
+ async registerAll() {
138
+ // Register character chunker
139
+ this.registerChunker("character", async (config) => {
140
+ const { CharacterChunker } = await import("./chunkers/CharacterChunker.js");
141
+ return new CharacterChunker(config);
142
+ }, DEFAULT_CHUNKER_METADATA.character);
143
+ // Register recursive chunker
144
+ this.registerChunker("recursive", async (config) => {
145
+ const { RecursiveChunker } = await import("./chunkers/RecursiveChunker.js");
146
+ return new RecursiveChunker(config);
147
+ }, DEFAULT_CHUNKER_METADATA.recursive);
148
+ // Register sentence chunker
149
+ this.registerChunker("sentence", async (config) => {
150
+ const { SentenceChunker } = await import("./chunkers/SentenceChunker.js");
151
+ return new SentenceChunker(config);
152
+ }, DEFAULT_CHUNKER_METADATA.sentence);
153
+ // Register token chunker
154
+ this.registerChunker("token", async (config) => {
155
+ const { TokenChunker } = await import("./chunkers/TokenChunker.js");
156
+ return new TokenChunker(config);
157
+ }, DEFAULT_CHUNKER_METADATA.token);
158
+ // Register markdown chunker
159
+ this.registerChunker("markdown", async (config) => {
160
+ const { MarkdownChunker } = await import("./chunkers/MarkdownChunker.js");
161
+ return new MarkdownChunker(config);
162
+ }, DEFAULT_CHUNKER_METADATA.markdown);
163
+ // Register HTML chunker
164
+ this.registerChunker("html", async (config) => {
165
+ const { HTMLChunker } = await import("./chunkers/HTMLChunker.js");
166
+ return new HTMLChunker(config);
167
+ }, DEFAULT_CHUNKER_METADATA.html);
168
+ // Register JSON chunker
169
+ this.registerChunker("json", async (config) => {
170
+ const { JSONChunker } = await import("./chunkers/JSONChunker.js");
171
+ return new JSONChunker(config);
172
+ }, DEFAULT_CHUNKER_METADATA.json);
173
+ // Register LaTeX chunker
174
+ this.registerChunker("latex", async (config) => {
175
+ const { LaTeXChunker } = await import("./chunkers/LaTeXChunker.js");
176
+ return new LaTeXChunker(config);
177
+ }, DEFAULT_CHUNKER_METADATA.latex);
178
+ // Register semantic chunker (placeholder - uses recursive as fallback)
179
+ this.registerChunker("semantic", async (config) => {
180
+ // TODO: Implement dedicated SemanticChunker with LLM support
181
+ // For now, fall back to RecursiveChunker with semantic defaults
182
+ const { RecursiveChunker } = await import("./chunkers/RecursiveChunker.js");
183
+ return new RecursiveChunker(config);
184
+ }, DEFAULT_CHUNKER_METADATA.semantic);
185
+ // Register semantic-markdown chunker
186
+ this.registerChunker("semantic-markdown", async (config) => {
187
+ const { SemanticMarkdownChunker } = await import("./chunkers/SemanticMarkdownChunker.js");
188
+ return new SemanticMarkdownChunker(config);
189
+ }, DEFAULT_CHUNKER_METADATA["semantic-markdown"]);
190
+ logger.debug(`[ChunkerFactory] Registered ${this.items.size} chunking strategies`);
191
+ }
192
+ /**
193
+ * Register a chunker with metadata and aliases
194
+ */
195
+ registerChunker(strategy, factory, metadata) {
196
+ // Store metadata
197
+ this.metadataMap.set(strategy, metadata);
198
+ // Register with aliases
199
+ this.register(strategy, factory, metadata.aliases, { metadata });
200
+ logger.debug(`[ChunkerFactory] Registered chunker '${strategy}' with aliases: ${metadata.aliases?.join(", ") ?? "none"}`);
201
+ }
202
+ /**
203
+ * Create a chunker by strategy name or alias
204
+ */
205
+ async createChunker(strategyOrAlias, config) {
206
+ await this.ensureInitialized();
207
+ const resolvedName = this.resolveName(strategyOrAlias);
208
+ if (!this.has(resolvedName)) {
209
+ const available = this.getAvailable();
210
+ throw new ChunkingError(`Unknown chunking strategy: '${strategyOrAlias}'. Available strategies: ${available.join(", ")}`, {
211
+ code: RAGErrorCodes.CHUNKING_STRATEGY_NOT_FOUND,
212
+ details: {
213
+ requestedStrategy: strategyOrAlias,
214
+ availableStrategies: available,
215
+ },
216
+ });
217
+ }
218
+ try {
219
+ const chunker = await this.create(resolvedName, config);
220
+ logger.debug(`[ChunkerFactory] Created chunker '${resolvedName}' with config:`, config);
221
+ return chunker;
222
+ }
223
+ catch (error) {
224
+ throw new ChunkingError(`Failed to create chunker '${resolvedName}': ${error instanceof Error ? error.message : String(error)}`, {
225
+ code: RAGErrorCodes.CHUNKING_ERROR,
226
+ cause: error instanceof Error ? error : undefined,
227
+ details: { strategy: resolvedName, config },
228
+ });
229
+ }
230
+ }
231
+ /**
232
+ * Get metadata for a chunker
233
+ */
234
+ getChunkerMetadata(strategyOrAlias) {
235
+ const resolvedName = this.resolveName(strategyOrAlias);
236
+ return this.metadataMap.get(resolvedName);
237
+ }
238
+ /**
239
+ * Get default configuration for a chunker
240
+ */
241
+ getDefaultConfig(strategyOrAlias) {
242
+ const metadata = this.getChunkerMetadata(strategyOrAlias);
243
+ return metadata?.defaultConfig;
244
+ }
245
+ /**
246
+ * Get available chunking strategies (not including aliases)
247
+ */
248
+ async getAvailableStrategies() {
249
+ await this.ensureInitialized();
250
+ return this.getAvailable();
251
+ }
252
+ /**
253
+ * Get all aliases mapped to their strategies
254
+ */
255
+ getStrategyAliases() {
256
+ return this.getAliases();
257
+ }
258
+ /**
259
+ * Check if a strategy exists
260
+ */
261
+ hasStrategy(strategyOrAlias) {
262
+ const resolved = this.resolveName(strategyOrAlias);
263
+ return this.has(resolved);
264
+ }
265
+ /**
266
+ * Get chunkers suitable for a use case
267
+ */
268
+ getChunkersForUseCase(useCase) {
269
+ const matches = [];
270
+ const useCaseLower = useCase.toLowerCase();
271
+ for (const [strategy, metadata] of this.metadataMap) {
272
+ const hasMatch = metadata.useCases?.some((uc) => uc.toLowerCase().includes(useCaseLower)) ?? false;
273
+ if (hasMatch) {
274
+ matches.push(strategy);
275
+ }
276
+ }
277
+ return matches;
278
+ }
279
+ /**
280
+ * Get all chunker metadata
281
+ */
282
+ getAllMetadata() {
283
+ return new Map(this.metadataMap);
284
+ }
285
+ /**
286
+ * Clear factory and metadata
287
+ */
288
+ clear() {
289
+ super.clear();
290
+ this.metadataMap.clear();
291
+ }
292
+ }
293
+ /**
294
+ * Global chunker factory singleton
295
+ */
296
+ export const chunkerFactory = ChunkerFactory.getInstance();
297
+ /**
298
+ * Convenience function to create a chunker
299
+ */
300
+ export async function createChunker(strategyOrAlias, config) {
301
+ return chunkerFactory.createChunker(strategyOrAlias, config);
302
+ }
303
+ /**
304
+ * Convenience function to get available strategies
305
+ */
306
+ export async function getAvailableStrategies() {
307
+ return chunkerFactory.getAvailableStrategies();
308
+ }
309
+ /**
310
+ * Convenience function to get chunker metadata
311
+ */
312
+ export function getChunkerMetadata(strategyOrAlias) {
313
+ return chunkerFactory.getChunkerMetadata(strategyOrAlias);
314
+ }
315
+ /**
316
+ * Convenience function to get default config
317
+ */
318
+ export function getDefaultConfig(strategyOrAlias) {
319
+ return chunkerFactory.getDefaultConfig(strategyOrAlias);
320
+ }
321
+ //# sourceMappingURL=ChunkerFactory.js.map
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Chunker Registry
3
+ *
4
+ * Centralized registry for all chunking strategies with metadata
5
+ * and discovery capabilities. Follows the BaseRegistry pattern.
6
+ */
7
+ import { BaseRegistry } from "../core/infrastructure/index.js";
8
+ import type { Chunker, ChunkerConfig, ChunkerMetadata, ChunkingStrategy } from "./types.js";
9
+ /**
10
+ * Chunker Registry
11
+ *
12
+ * Manages registration and discovery of all chunking strategies.
13
+ * Extends BaseRegistry for consistent lifecycle management.
14
+ */
15
+ export declare class ChunkerRegistry extends BaseRegistry<Chunker, ChunkerMetadata> {
16
+ private static instance;
17
+ private aliasMap;
18
+ private constructor();
19
+ /**
20
+ * Get singleton instance
21
+ */
22
+ static getInstance(): ChunkerRegistry;
23
+ /**
24
+ * Reset singleton (for testing)
25
+ */
26
+ static resetInstance(): void;
27
+ /**
28
+ * Register all default chunkers
29
+ */
30
+ protected registerAll(): Promise<void>;
31
+ /**
32
+ * Register a chunker with aliases
33
+ */
34
+ registerChunker(strategy: ChunkingStrategy | string, factory: () => Promise<Chunker>, metadata: ChunkerMetadata): void;
35
+ /**
36
+ * Resolve strategy name from alias
37
+ */
38
+ resolveStrategy(nameOrAlias: string): ChunkingStrategy;
39
+ /**
40
+ * Get a chunker by strategy name or alias
41
+ */
42
+ getChunker(strategyOrAlias: string): Promise<Chunker>;
43
+ /**
44
+ * Get list of available chunker strategies
45
+ */
46
+ getAvailableChunkers(): Promise<ChunkingStrategy[]>;
47
+ /**
48
+ * Get metadata for a specific chunker
49
+ */
50
+ getChunkerMetadata(strategyOrAlias: string): ChunkerMetadata | undefined;
51
+ /**
52
+ * Get all aliases for a strategy
53
+ */
54
+ getAliasesForStrategy(strategy: ChunkingStrategy): string[];
55
+ /**
56
+ * Get all registered aliases
57
+ */
58
+ getAllAliases(): Map<string, ChunkingStrategy>;
59
+ /**
60
+ * Check if a strategy or alias exists
61
+ */
62
+ hasChunker(strategyOrAlias: string): boolean;
63
+ /**
64
+ * Get chunkers by use case
65
+ */
66
+ getChunkersByUseCase(useCase: string): ChunkingStrategy[];
67
+ /**
68
+ * Get default configuration for a chunker
69
+ */
70
+ getDefaultConfig(strategyOrAlias: string): ChunkerConfig | undefined;
71
+ /**
72
+ * Clear the registry (also clears aliases)
73
+ */
74
+ clear(): void;
75
+ }
76
+ /**
77
+ * Global chunker registry singleton
78
+ */
79
+ export declare const chunkerRegistry: ChunkerRegistry;
80
+ /**
81
+ * Convenience function to get available chunkers
82
+ */
83
+ export declare function getAvailableChunkers(): Promise<ChunkingStrategy[]>;
84
+ /**
85
+ * Convenience function to get chunker by strategy
86
+ */
87
+ export declare function getChunker(strategyOrAlias: string): Promise<Chunker>;
88
+ /**
89
+ * Convenience function to get chunker metadata
90
+ */
91
+ export declare function getChunkerMetadata(strategyOrAlias: string): ChunkerMetadata | undefined;