@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,39 @@
1
+ /**
2
+ * HTML Chunker
3
+ *
4
+ * Splits HTML content by semantic tags.
5
+ */
6
+ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
7
+ /**
8
+ * HTML Chunker
9
+ */
10
+ export class HTMLChunker extends BaseChunker {
11
+ strategy = "html";
12
+ getDefaultConfig() {
13
+ return {
14
+ ...DEFAULT_CHUNKER_CONFIG,
15
+ maxSize: 1000,
16
+ overlap: 0,
17
+ };
18
+ }
19
+ async doChunk(content, config) {
20
+ const maxSize = config.maxSize ?? 1000;
21
+ // Strip HTML tags for text content
22
+ const textContent = this.stripHtml(content);
23
+ // Use simple character-based splitting for now
24
+ const segments = this.splitBySizeWithOverlap(textContent, maxSize, 0);
25
+ return segments.map((segment, index) => this.createChunk(segment.text, index, segment.start, segment.end));
26
+ }
27
+ /**
28
+ * Strip HTML tags from content
29
+ */
30
+ stripHtml(html) {
31
+ return html
32
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
33
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
34
+ .replace(/<[^>]+>/g, " ")
35
+ .replace(/\s+/g, " ")
36
+ .trim();
37
+ }
38
+ }
39
+ //# sourceMappingURL=HTMLChunker.js.map
@@ -0,0 +1,19 @@
1
+ /**
2
+ * JSON Chunker
3
+ *
4
+ * Splits JSON documents by object boundaries.
5
+ */
6
+ import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
7
+ import { BaseChunker } from "./BaseChunker.js";
8
+ /**
9
+ * JSON Chunker
10
+ */
11
+ export declare class JSONChunker extends BaseChunker {
12
+ readonly strategy: ChunkingStrategy;
13
+ getDefaultConfig(): ChunkerConfig;
14
+ protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
15
+ /**
16
+ * Flatten JSON into array of objects
17
+ */
18
+ private flattenJson;
19
+ }
@@ -0,0 +1,69 @@
1
+ /**
2
+ * JSON Chunker
3
+ *
4
+ * Splits JSON documents by object boundaries.
5
+ */
6
+ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
7
+ import { ChunkingError, RAGErrorCodes } from "../errors/RAGError.js";
8
+ /**
9
+ * JSON Chunker
10
+ */
11
+ export class JSONChunker extends BaseChunker {
12
+ strategy = "json";
13
+ getDefaultConfig() {
14
+ return {
15
+ ...DEFAULT_CHUNKER_CONFIG,
16
+ maxSize: 1000,
17
+ overlap: 0,
18
+ };
19
+ }
20
+ async doChunk(content, config) {
21
+ const maxSize = config.maxSize ?? 1000;
22
+ let parsed;
23
+ try {
24
+ parsed = JSON.parse(content);
25
+ }
26
+ catch {
27
+ throw new ChunkingError("Invalid JSON content", {
28
+ code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
29
+ strategy: this.strategy,
30
+ });
31
+ }
32
+ const chunks = [];
33
+ const items = this.flattenJson(parsed);
34
+ for (let i = 0; i < items.length; i++) {
35
+ const item = items[i];
36
+ if (!item) {
37
+ continue;
38
+ }
39
+ const jsonString = JSON.stringify(item, null, 2);
40
+ if (jsonString.length <= maxSize) {
41
+ const startOffset = content.indexOf(jsonString.slice(0, 20));
42
+ chunks.push(this.createChunk(jsonString, i, startOffset >= 0 ? startOffset : i * maxSize, startOffset >= 0
43
+ ? startOffset + jsonString.length
44
+ : (i + 1) * maxSize));
45
+ }
46
+ else {
47
+ // Split large objects
48
+ const segments = this.splitBySizeWithOverlap(jsonString, maxSize, 0);
49
+ for (const segment of segments) {
50
+ chunks.push(this.createChunk(segment.text, chunks.length, segment.start, segment.end));
51
+ }
52
+ }
53
+ }
54
+ return chunks;
55
+ }
56
+ /**
57
+ * Flatten JSON into array of objects
58
+ */
59
+ flattenJson(data) {
60
+ if (Array.isArray(data)) {
61
+ return data;
62
+ }
63
+ if (typeof data === "object" && data !== null) {
64
+ return [data];
65
+ }
66
+ return [{ value: data }];
67
+ }
68
+ }
69
+ //# sourceMappingURL=JSONChunker.js.map
@@ -0,0 +1,15 @@
1
+ /**
2
+ * LaTeX Chunker
3
+ *
4
+ * Splits LaTeX documents by sections and environments.
5
+ */
6
+ import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
7
+ import { BaseChunker } from "./BaseChunker.js";
8
+ /**
9
+ * LaTeX Chunker
10
+ */
11
+ export declare class LaTeXChunker extends BaseChunker {
12
+ readonly strategy: ChunkingStrategy;
13
+ getDefaultConfig(): ChunkerConfig;
14
+ protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
15
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * LaTeX Chunker
3
+ *
4
+ * Splits LaTeX documents by sections and environments.
5
+ */
6
+ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
7
+ /**
8
+ * LaTeX Chunker
9
+ */
10
+ export class LaTeXChunker extends BaseChunker {
11
+ strategy = "latex";
12
+ getDefaultConfig() {
13
+ return {
14
+ ...DEFAULT_CHUNKER_CONFIG,
15
+ maxSize: 1000,
16
+ overlap: 0,
17
+ };
18
+ }
19
+ async doChunk(content, config) {
20
+ const maxSize = config.maxSize ?? 1000;
21
+ // Split by sections
22
+ const sectionPattern = /\\(?:section|subsection|subsubsection|chapter|paragraph)\{[^}]+\}/g;
23
+ const sections = [];
24
+ let lastIndex = 0;
25
+ let match;
26
+ while ((match = sectionPattern.exec(content)) !== null) {
27
+ if (match.index > lastIndex) {
28
+ sections.push(content.slice(lastIndex, match.index));
29
+ }
30
+ lastIndex = match.index;
31
+ }
32
+ if (lastIndex < content.length) {
33
+ sections.push(content.slice(lastIndex));
34
+ }
35
+ if (sections.length === 0) {
36
+ sections.push(content);
37
+ }
38
+ const chunks = [];
39
+ let offset = 0;
40
+ for (const section of sections) {
41
+ const trimmed = section.trim();
42
+ if (!trimmed) {
43
+ continue;
44
+ }
45
+ if (trimmed.length <= maxSize) {
46
+ const startOffset = content.indexOf(trimmed, offset);
47
+ chunks.push(this.createChunk(trimmed, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
48
+ ? startOffset + trimmed.length
49
+ : offset + trimmed.length));
50
+ if (startOffset >= 0) {
51
+ offset = startOffset + 1;
52
+ }
53
+ }
54
+ else {
55
+ const segments = this.splitBySizeWithOverlap(trimmed, maxSize, 0);
56
+ for (const segment of segments) {
57
+ chunks.push(this.createChunk(segment.text, chunks.length, segment.start, segment.end));
58
+ }
59
+ }
60
+ }
61
+ return chunks;
62
+ }
63
+ }
64
+ //# sourceMappingURL=LaTeXChunker.js.map
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Markdown Chunker
3
+ *
4
+ * Splits markdown content by headers and structural elements.
5
+ */
6
+ import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
7
+ import { BaseChunker } from "./BaseChunker.js";
8
+ /**
9
+ * Markdown Chunker
10
+ */
11
+ export declare class MarkdownChunker extends BaseChunker {
12
+ readonly strategy: ChunkingStrategy;
13
+ getDefaultConfig(): ChunkerConfig;
14
+ protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
15
+ }
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Markdown Chunker
3
+ *
4
+ * Splits markdown content by headers and structural elements.
5
+ */
6
+ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
7
+ /**
8
+ * Markdown Chunker
9
+ */
10
+ export class MarkdownChunker extends BaseChunker {
11
+ strategy = "markdown";
12
+ getDefaultConfig() {
13
+ return {
14
+ ...DEFAULT_CHUNKER_CONFIG,
15
+ maxSize: 1000,
16
+ overlap: 0,
17
+ };
18
+ }
19
+ async doChunk(content, config) {
20
+ const maxSize = config.maxSize ?? 1000;
21
+ // Split by headers
22
+ const headerPattern = /^(#{1,6})\s+(.+)$/gm;
23
+ const sections = [];
24
+ let lastIndex = 0;
25
+ let match = headerPattern.exec(content);
26
+ while (match !== null) {
27
+ // Add content before this header
28
+ if (match.index > lastIndex) {
29
+ const prevContent = content.slice(lastIndex, match.index).trim();
30
+ if (prevContent && sections.length > 0) {
31
+ const lastSection = sections[sections.length - 1];
32
+ if (lastSection) {
33
+ lastSection.content += "\n\n" + prevContent;
34
+ }
35
+ }
36
+ else if (prevContent) {
37
+ sections.push({ header: "", content: prevContent, level: 0 });
38
+ }
39
+ }
40
+ sections.push({
41
+ header: match[0],
42
+ content: "",
43
+ level: match[1]?.length ?? 1,
44
+ });
45
+ lastIndex = match.index + match[0].length;
46
+ match = headerPattern.exec(content);
47
+ }
48
+ // Add remaining content
49
+ if (lastIndex < content.length) {
50
+ const remaining = content.slice(lastIndex).trim();
51
+ if (remaining) {
52
+ if (sections.length > 0) {
53
+ const lastSection = sections[sections.length - 1];
54
+ if (lastSection) {
55
+ lastSection.content += remaining;
56
+ }
57
+ }
58
+ else {
59
+ sections.push({ header: "", content: remaining, level: 0 });
60
+ }
61
+ }
62
+ }
63
+ // Convert sections to chunks
64
+ const chunks = [];
65
+ let offset = 0;
66
+ for (let i = 0; i < sections.length; i++) {
67
+ const section = sections[i];
68
+ if (!section) {
69
+ continue;
70
+ }
71
+ const fullContent = section.header
72
+ ? section.header + "\n\n" + section.content.trim()
73
+ : section.content.trim();
74
+ if (!fullContent) {
75
+ continue;
76
+ }
77
+ // Split if too large
78
+ if (fullContent.length > maxSize) {
79
+ const subChunks = this.splitBySizeWithOverlap(fullContent, maxSize, 0);
80
+ for (const sub of subChunks) {
81
+ const startOffset = content.indexOf(sub.text, offset);
82
+ chunks.push(this.createChunk(sub.text, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
83
+ ? startOffset + sub.text.length
84
+ : offset + sub.text.length, "unknown", { sectionContext: section.header }));
85
+ if (startOffset >= 0) {
86
+ offset = startOffset + 1;
87
+ }
88
+ }
89
+ }
90
+ else {
91
+ const startOffset = content.indexOf(fullContent, offset);
92
+ chunks.push(this.createChunk(fullContent, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
93
+ ? startOffset + fullContent.length
94
+ : offset + fullContent.length, "unknown", { sectionContext: section.header }));
95
+ if (startOffset >= 0) {
96
+ offset = startOffset + 1;
97
+ }
98
+ }
99
+ }
100
+ return chunks;
101
+ }
102
+ }
103
+ //# sourceMappingURL=MarkdownChunker.js.map
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Recursive Chunker
3
+ *
4
+ * Recursively splits text using an ordered list of separators.
5
+ * Tries each separator in order until chunks are small enough.
6
+ */
7
+ import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
8
+ import { BaseChunker } from "./BaseChunker.js";
9
+ /**
10
+ * Recursive Chunker
11
+ *
12
+ * Splits content using ordered separators, recursively breaking
13
+ * down text until chunks meet size requirements.
14
+ */
15
+ export declare class RecursiveChunker extends BaseChunker {
16
+ readonly strategy: ChunkingStrategy;
17
+ getDefaultConfig(): ChunkerConfig;
18
+ protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
19
+ /**
20
+ * Recursively split text using separators
21
+ */
22
+ private recursiveSplit;
23
+ /**
24
+ * Apply overlap between chunks
25
+ */
26
+ private applyOverlap;
27
+ }
@@ -0,0 +1,140 @@
1
+ /**
2
+ * Recursive Chunker
3
+ *
4
+ * Recursively splits text using an ordered list of separators.
5
+ * Tries each separator in order until chunks are small enough.
6
+ */
7
+ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
8
+ /**
9
+ * Default separators for recursive splitting
10
+ */
11
+ const DEFAULT_SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
12
+ /**
13
+ * Recursive Chunker
14
+ *
15
+ * Splits content using ordered separators, recursively breaking
16
+ * down text until chunks meet size requirements.
17
+ */
18
+ export class RecursiveChunker extends BaseChunker {
19
+ strategy = "recursive";
20
+ getDefaultConfig() {
21
+ return {
22
+ ...DEFAULT_CHUNKER_CONFIG,
23
+ maxSize: 1000,
24
+ overlap: 100,
25
+ separators: DEFAULT_SEPARATORS,
26
+ };
27
+ }
28
+ async doChunk(content, config) {
29
+ const recursiveConfig = config;
30
+ const maxSize = config.maxSize ?? 1000;
31
+ const overlap = config.overlap ?? 100;
32
+ const separators = recursiveConfig.separators ?? DEFAULT_SEPARATORS;
33
+ const keepSeparators = recursiveConfig.keepSeparators ?? true;
34
+ const chunks = [];
35
+ let offset = 0;
36
+ const textChunks = this.recursiveSplit(content, separators, maxSize, overlap, keepSeparators);
37
+ for (let i = 0; i < textChunks.length; i++) {
38
+ const text = textChunks[i];
39
+ if (!text) {
40
+ continue;
41
+ }
42
+ const startOffset = content.indexOf(text, offset);
43
+ const endOffset = startOffset + text.length;
44
+ chunks.push(this.createChunk(text, i, startOffset, endOffset));
45
+ offset = Math.max(offset, startOffset + 1);
46
+ }
47
+ return chunks;
48
+ }
49
+ /**
50
+ * Recursively split text using separators
51
+ */
52
+ recursiveSplit(text, separators, maxSize, overlap, keepSeparators) {
53
+ if (text.length <= maxSize) {
54
+ return [text];
55
+ }
56
+ // Find the first separator that exists in the text
57
+ let separator = "";
58
+ for (const sep of separators) {
59
+ if (sep === "" || text.includes(sep)) {
60
+ separator = sep;
61
+ break;
62
+ }
63
+ }
64
+ // If no separator found or empty separator, split by size
65
+ if (separator === "") {
66
+ const result = [];
67
+ let start = 0;
68
+ while (start < text.length) {
69
+ const end = Math.min(start + maxSize, text.length);
70
+ result.push(text.slice(start, end));
71
+ const previousStart = start;
72
+ start = end - overlap;
73
+ if (start <= previousStart) {
74
+ start = previousStart + 1;
75
+ }
76
+ if (start >= text.length) {
77
+ break;
78
+ }
79
+ }
80
+ return result;
81
+ }
82
+ // Split by separator
83
+ const parts = text.split(separator);
84
+ const result = [];
85
+ let currentChunk = "";
86
+ for (let i = 0; i < parts.length; i++) {
87
+ const part = parts[i];
88
+ const addSeparator = keepSeparators && i < parts.length - 1;
89
+ const toAdd = part + (addSeparator ? separator : "");
90
+ if (currentChunk.length + toAdd.length <= maxSize) {
91
+ currentChunk += toAdd;
92
+ }
93
+ else {
94
+ // Current chunk is full
95
+ if (currentChunk.length > 0) {
96
+ result.push(currentChunk);
97
+ }
98
+ // If the part itself is too large, recursively split it
99
+ if (toAdd.length > maxSize) {
100
+ const remainingSeparators = separators.slice(separators.indexOf(separator) + 1);
101
+ const subChunks = this.recursiveSplit(toAdd, remainingSeparators, maxSize, overlap, keepSeparators);
102
+ result.push(...subChunks);
103
+ currentChunk = "";
104
+ }
105
+ else {
106
+ currentChunk = toAdd;
107
+ }
108
+ }
109
+ }
110
+ if (currentChunk.length > 0) {
111
+ result.push(currentChunk);
112
+ }
113
+ // Apply overlap between chunks
114
+ if (overlap > 0 && result.length > 1) {
115
+ return this.applyOverlap(result, overlap);
116
+ }
117
+ return result;
118
+ }
119
+ /**
120
+ * Apply overlap between chunks
121
+ */
122
+ applyOverlap(chunks, overlap) {
123
+ if (chunks.length <= 1) {
124
+ return chunks;
125
+ }
126
+ const result = [];
127
+ for (let i = 0; i < chunks.length; i++) {
128
+ let chunk = chunks[i] ?? "";
129
+ // Add overlap from previous chunk
130
+ if (i > 0 && chunks[i - 1]) {
131
+ const prevChunk = chunks[i - 1];
132
+ const overlapText = prevChunk.slice(-Math.min(overlap, prevChunk.length));
133
+ chunk = overlapText + chunk;
134
+ }
135
+ result.push(chunk);
136
+ }
137
+ return result;
138
+ }
139
+ }
140
+ //# sourceMappingURL=RecursiveChunker.js.map
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Semantic Markdown Chunker
3
+ *
4
+ * Combines markdown splitting with semantic similarity for intelligent merging.
5
+ */
6
+ import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
7
+ import { BaseChunker } from "./BaseChunker.js";
8
+ /**
9
+ * Semantic Markdown Chunker
10
+ *
11
+ * Extends markdown chunking with semantic awareness.
12
+ * Can be enhanced with embedding-based similarity.
13
+ */
14
+ export declare class SemanticMarkdownChunker extends BaseChunker {
15
+ readonly strategy: ChunkingStrategy;
16
+ getDefaultConfig(): ChunkerConfig;
17
+ protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
18
+ /**
19
+ * Merge small sections to optimize chunk sizes
20
+ */
21
+ private mergeSmallSections;
22
+ }
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Semantic Markdown Chunker
3
+ *
4
+ * Combines markdown splitting with semantic similarity for intelligent merging.
5
+ */
6
+ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
7
+ /**
8
+ * Semantic Markdown Chunker
9
+ *
10
+ * Extends markdown chunking with semantic awareness.
11
+ * Can be enhanced with embedding-based similarity.
12
+ */
13
+ export class SemanticMarkdownChunker extends BaseChunker {
14
+ strategy = "semantic-markdown";
15
+ getDefaultConfig() {
16
+ return {
17
+ ...DEFAULT_CHUNKER_CONFIG,
18
+ maxSize: 1000,
19
+ overlap: 100,
20
+ };
21
+ }
22
+ async doChunk(content, config) {
23
+ const maxSize = config.maxSize ?? 1000;
24
+ const overlap = config.overlap ?? 100;
25
+ // First, split by markdown headers
26
+ const headerPattern = /^(#{1,6})\s+(.+)$/gm;
27
+ const sections = [];
28
+ let lastIndex = 0;
29
+ let match = headerPattern.exec(content);
30
+ while (match !== null) {
31
+ if (match.index > lastIndex) {
32
+ const prevContent = content.slice(lastIndex, match.index).trim();
33
+ if (prevContent && sections.length > 0) {
34
+ const lastSection = sections[sections.length - 1];
35
+ if (lastSection) {
36
+ lastSection.content += "\n\n" + prevContent;
37
+ }
38
+ }
39
+ else if (prevContent) {
40
+ sections.push({ header: "", content: prevContent });
41
+ }
42
+ }
43
+ sections.push({ header: match[0], content: "" });
44
+ lastIndex = match.index + match[0].length;
45
+ match = headerPattern.exec(content);
46
+ }
47
+ if (lastIndex < content.length) {
48
+ const remaining = content.slice(lastIndex).trim();
49
+ if (remaining) {
50
+ if (sections.length > 0) {
51
+ const lastSection = sections[sections.length - 1];
52
+ if (lastSection) {
53
+ lastSection.content += remaining;
54
+ }
55
+ }
56
+ else {
57
+ sections.push({ header: "", content: remaining });
58
+ }
59
+ }
60
+ }
61
+ // Merge small sections that are semantically related
62
+ const mergedSections = this.mergeSmallSections(sections, maxSize);
63
+ // Convert to chunks
64
+ const chunks = [];
65
+ let offset = 0;
66
+ for (let i = 0; i < mergedSections.length; i++) {
67
+ const section = mergedSections[i];
68
+ if (!section) {
69
+ continue;
70
+ }
71
+ const fullContent = section.header
72
+ ? section.header + "\n\n" + section.content.trim()
73
+ : section.content.trim();
74
+ if (!fullContent) {
75
+ continue;
76
+ }
77
+ if (fullContent.length > maxSize) {
78
+ const segments = this.splitBySizeWithOverlap(fullContent, maxSize, overlap);
79
+ for (const segment of segments) {
80
+ const startOffset = content.indexOf(segment.text.slice(0, 50), offset);
81
+ chunks.push(this.createChunk(segment.text, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
82
+ ? startOffset + segment.text.length
83
+ : offset + segment.text.length, "unknown", { sectionContext: section.header }));
84
+ if (startOffset >= 0) {
85
+ offset = startOffset + 1;
86
+ }
87
+ }
88
+ }
89
+ else {
90
+ const startOffset = content.indexOf(fullContent.slice(0, 50), offset);
91
+ chunks.push(this.createChunk(fullContent, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
92
+ ? startOffset + fullContent.length
93
+ : offset + fullContent.length, "unknown", { sectionContext: section.header }));
94
+ if (startOffset >= 0) {
95
+ offset = startOffset + 1;
96
+ }
97
+ }
98
+ }
99
+ return chunks;
100
+ }
101
+ /**
102
+ * Merge small sections to optimize chunk sizes
103
+ */
104
+ mergeSmallSections(sections, maxSize) {
105
+ const result = [];
106
+ let current = null;
107
+ for (const section of sections) {
108
+ const fullContent = section.header
109
+ ? section.header + "\n\n" + section.content.trim()
110
+ : section.content.trim();
111
+ const sectionLength = fullContent.length;
112
+ if (!current) {
113
+ current = { ...section };
114
+ continue;
115
+ }
116
+ const currentLength = current.header
117
+ ? current.header.length + current.content.length + 2
118
+ : current.content.length;
119
+ // Merge if combined size is within limit
120
+ if (currentLength + sectionLength <= maxSize) {
121
+ if (section.header) {
122
+ current.content += "\n\n" + section.header + "\n" + section.content;
123
+ }
124
+ else {
125
+ current.content += "\n\n" + section.content;
126
+ }
127
+ }
128
+ else {
129
+ result.push(current);
130
+ current = { ...section };
131
+ }
132
+ }
133
+ if (current) {
134
+ result.push(current);
135
+ }
136
+ return result;
137
+ }
138
+ }
139
+ //# sourceMappingURL=SemanticMarkdownChunker.js.map
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Sentence Chunker
3
+ *
4
+ * Splits text by sentence boundaries for semantically meaningful chunks.
5
+ */
6
+ import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
7
+ import { BaseChunker } from "./BaseChunker.js";
8
+ /**
9
+ * Sentence Chunker
10
+ */
11
+ export declare class SentenceChunker extends BaseChunker {
12
+ readonly strategy: ChunkingStrategy;
13
+ getDefaultConfig(): ChunkerConfig;
14
+ protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
15
+ /**
16
+ * Split content into sentences
17
+ */
18
+ private splitIntoSentences;
19
+ }