@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Sentence Chunker
3
+ *
4
+ * Splits text by sentence boundaries for semantically meaningful chunks.
5
+ */
6
+ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
7
+ /**
8
+ * Sentence Chunker
9
+ */
10
+ export class SentenceChunker extends BaseChunker {
11
+ strategy = "sentence";
12
+ getDefaultConfig() {
13
+ return {
14
+ ...DEFAULT_CHUNKER_CONFIG,
15
+ maxSize: 1000,
16
+ overlap: 1, // Overlap in sentences
17
+ };
18
+ }
19
+ async doChunk(content, config) {
20
+ const maxSize = config.maxSize ?? 1000;
21
+ // Simple sentence splitting (can be enhanced with NLP)
22
+ const sentences = this.splitIntoSentences(content);
23
+ const chunks = [];
24
+ let currentChunk = "";
25
+ let currentStart = 0;
26
+ let chunkIndex = 0;
27
+ for (const sentence of sentences) {
28
+ if (currentChunk.length + sentence.length <= maxSize) {
29
+ currentChunk += sentence;
30
+ }
31
+ else {
32
+ if (currentChunk.length > 0) {
33
+ const startOffset = content.indexOf(currentChunk, currentStart);
34
+ chunks.push(this.createChunk(currentChunk, chunkIndex++, startOffset, startOffset + currentChunk.length));
35
+ currentStart = startOffset + 1;
36
+ }
37
+ currentChunk = sentence;
38
+ }
39
+ }
40
+ // Add remaining chunk
41
+ if (currentChunk.length > 0) {
42
+ const startOffset = content.indexOf(currentChunk, currentStart);
43
+ chunks.push(this.createChunk(currentChunk, chunkIndex, startOffset, startOffset + currentChunk.length));
44
+ }
45
+ return chunks;
46
+ }
47
+ /**
48
+ * Split content into sentences
49
+ */
50
+ splitIntoSentences(content) {
51
+ // Simple regex-based sentence splitting
52
+ // Handles common abbreviations and sentence endings
53
+ const sentencePattern = /[^.!?]*[.!?]+(?:\s|$)/g;
54
+ const sentences = [];
55
+ let match;
56
+ while ((match = sentencePattern.exec(content)) !== null) {
57
+ sentences.push(match[0]);
58
+ }
59
+ // Handle remaining content without sentence ending
60
+ const lastIndex = sentences.reduce((acc, s) => acc + s.length, 0);
61
+ if (lastIndex < content.length) {
62
+ sentences.push(content.slice(lastIndex));
63
+ }
64
+ return sentences;
65
+ }
66
+ }
67
+ //# sourceMappingURL=SentenceChunker.js.map
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Token Chunker
3
+ *
4
+ * Splits text by token count using a tokenizer.
5
+ * Useful for precise token budget management.
6
+ */
7
+ import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
8
+ import { BaseChunker } from "./BaseChunker.js";
9
+ /**
10
+ * Token Chunker
11
+ *
12
+ * Approximates token-based splitting using word count.
13
+ * For production, integrate with a proper tokenizer (tiktoken, etc.)
14
+ */
15
+ export declare class TokenChunker extends BaseChunker {
16
+ readonly strategy: ChunkingStrategy;
17
+ getDefaultConfig(): ChunkerConfig;
18
+ protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
19
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Token Chunker
3
+ *
4
+ * Splits text by token count using a tokenizer.
5
+ * Useful for precise token budget management.
6
+ */
7
+ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
8
+ /**
9
+ * Token Chunker
10
+ *
11
+ * Approximates token-based splitting using word count.
12
+ * For production, integrate with a proper tokenizer (tiktoken, etc.)
13
+ */
14
+ export class TokenChunker extends BaseChunker {
15
+ strategy = "token";
16
+ getDefaultConfig() {
17
+ return {
18
+ ...DEFAULT_CHUNKER_CONFIG,
19
+ maxSize: 512, // Tokens
20
+ overlap: 50, // Tokens
21
+ };
22
+ }
23
+ async doChunk(content, config) {
24
+ const maxTokens = config.maxSize ?? 512;
25
+ const overlapTokens = config.overlap ?? 50;
26
+ // Approximate tokenization using words
27
+ // In production, use a proper tokenizer like tiktoken
28
+ const words = content.split(/\s+/);
29
+ const chunks = [];
30
+ let currentWords = [];
31
+ let currentStart = 0;
32
+ let chunkIndex = 0;
33
+ for (let i = 0; i < words.length; i++) {
34
+ const word = words[i];
35
+ if (!word) {
36
+ continue;
37
+ }
38
+ // Estimate tokens (roughly 1.3 tokens per word on average)
39
+ const estimatedTokens = Math.ceil(currentWords.length * 1.3);
40
+ if (estimatedTokens >= maxTokens) {
41
+ const chunkText = currentWords.join(" ");
42
+ const startOffset = content.indexOf(currentWords[0] ?? "", currentStart);
43
+ const endOffset = startOffset + chunkText.length;
44
+ chunks.push(this.createChunk(chunkText, chunkIndex++, startOffset, endOffset));
45
+ // Keep overlap words
46
+ const overlapCount = Math.ceil(overlapTokens / 1.3);
47
+ currentWords = currentWords.slice(-overlapCount);
48
+ currentStart = endOffset - currentWords.join(" ").length;
49
+ }
50
+ currentWords.push(word);
51
+ }
52
+ // Add remaining chunk
53
+ if (currentWords.length > 0) {
54
+ const chunkText = currentWords.join(" ");
55
+ const startOffset = content.indexOf(currentWords[0] ?? "", currentStart);
56
+ const endOffset = startOffset + chunkText.length;
57
+ chunks.push(this.createChunk(chunkText, chunkIndex, startOffset, endOffset));
58
+ }
59
+ return chunks;
60
+ }
61
+ }
62
+ //# sourceMappingURL=TokenChunker.js.map
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Chunkers Index
3
+ *
4
+ * Exports all chunker implementations.
5
+ */
6
+ export { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
7
+ export { CharacterChunker } from "./CharacterChunker.js";
8
+ export { RecursiveChunker } from "./RecursiveChunker.js";
9
+ export { SentenceChunker } from "./SentenceChunker.js";
10
+ export { TokenChunker } from "./TokenChunker.js";
11
+ export { MarkdownChunker } from "./MarkdownChunker.js";
12
+ export { HTMLChunker } from "./HTMLChunker.js";
13
+ export { JSONChunker } from "./JSONChunker.js";
14
+ export { LaTeXChunker } from "./LaTeXChunker.js";
15
+ export { SemanticMarkdownChunker } from "./SemanticMarkdownChunker.js";
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Chunkers Index
3
+ *
4
+ * Exports all chunker implementations.
5
+ */
6
+ export { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
7
+ export { CharacterChunker } from "./CharacterChunker.js";
8
+ export { RecursiveChunker } from "./RecursiveChunker.js";
9
+ export { SentenceChunker } from "./SentenceChunker.js";
10
+ export { TokenChunker } from "./TokenChunker.js";
11
+ export { MarkdownChunker } from "./MarkdownChunker.js";
12
+ export { HTMLChunker } from "./HTMLChunker.js";
13
+ export { JSONChunker } from "./JSONChunker.js";
14
+ export { LaTeXChunker } from "./LaTeXChunker.js";
15
+ export { SemanticMarkdownChunker } from "./SemanticMarkdownChunker.js";
16
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Character-based Chunker
3
+ *
4
+ * Simple character-based text splitting with configurable separator and overlap.
5
+ * Best for unstructured text where character count is the primary concern.
6
+ */
7
+ import type { Chunker, Chunk, ChunkerValidationResult, CharacterChunkerConfig, BaseChunkerConfig } from "../types.js";
8
+ /**
9
+ * Character-based chunker implementation
10
+ * Splits text by character count with optional separator
11
+ */
12
+ export declare class CharacterChunker implements Chunker {
13
+ readonly strategy: "character";
14
+ chunk(text: string, config?: CharacterChunkerConfig): Promise<Chunk[]>;
15
+ validateConfig(config: BaseChunkerConfig): ChunkerValidationResult;
16
+ }
@@ -0,0 +1,143 @@
1
+ /**
2
+ * Character-based Chunker
3
+ *
4
+ * Simple character-based text splitting with configurable separator and overlap.
5
+ * Best for unstructured text where character count is the primary concern.
6
+ */
7
+ import { randomUUID } from "crypto";
8
+ /**
9
+ * Character-based chunker implementation
10
+ * Splits text by character count with optional separator
11
+ */
12
+ export class CharacterChunker {
13
+ strategy = "character";
14
+ async chunk(text, config) {
15
+ const { maxSize = 1000, overlap = 0, separator = "", keepSeparator = false, trimWhitespace = true, metadata = {}, } = config || {};
16
+ const chunks = [];
17
+ const documentId = randomUUID();
18
+ if (!text || text.length === 0) {
19
+ return chunks;
20
+ }
21
+ // Split by separator if provided
22
+ let segments;
23
+ if (separator) {
24
+ segments = text.split(separator);
25
+ if (keepSeparator && separator) {
26
+ segments = segments.map((s, i) => i < segments.length - 1 ? s + separator : s);
27
+ }
28
+ }
29
+ else {
30
+ segments = [text];
31
+ }
32
+ let currentChunk = "";
33
+ let chunkIndex = 0;
34
+ let startPosition = 0;
35
+ for (const segment of segments) {
36
+ if (currentChunk.length + segment.length <= maxSize) {
37
+ currentChunk += segment;
38
+ }
39
+ else {
40
+ // Save current chunk if it has content
41
+ if (currentChunk.length > 0) {
42
+ const chunkText = trimWhitespace ? currentChunk.trim() : currentChunk;
43
+ if (chunkText.length > 0) {
44
+ chunks.push({
45
+ id: randomUUID(),
46
+ text: chunkText,
47
+ metadata: {
48
+ documentId,
49
+ chunkIndex,
50
+ startPosition,
51
+ endPosition: startPosition + currentChunk.length,
52
+ documentType: "text",
53
+ custom: metadata,
54
+ },
55
+ });
56
+ chunkIndex++;
57
+ }
58
+ }
59
+ // Handle overlap
60
+ if (overlap > 0 && currentChunk.length > overlap) {
61
+ currentChunk = currentChunk.slice(-overlap) + segment;
62
+ startPosition = startPosition + currentChunk.length - overlap;
63
+ }
64
+ else {
65
+ startPosition += currentChunk.length;
66
+ currentChunk = segment;
67
+ }
68
+ // If segment is larger than maxSize, split it further
69
+ while (currentChunk.length > maxSize) {
70
+ const chunkText = trimWhitespace
71
+ ? currentChunk.slice(0, maxSize).trim()
72
+ : currentChunk.slice(0, maxSize);
73
+ chunks.push({
74
+ id: randomUUID(),
75
+ text: chunkText,
76
+ metadata: {
77
+ documentId,
78
+ chunkIndex,
79
+ startPosition,
80
+ endPosition: startPosition + maxSize,
81
+ documentType: "text",
82
+ custom: metadata,
83
+ },
84
+ });
85
+ chunkIndex++;
86
+ const overlapStart = Math.max(0, maxSize - overlap);
87
+ currentChunk = currentChunk.slice(overlapStart);
88
+ startPosition += overlapStart;
89
+ }
90
+ }
91
+ }
92
+ // Don't forget the last chunk
93
+ if (currentChunk.length > 0) {
94
+ const chunkText = trimWhitespace ? currentChunk.trim() : currentChunk;
95
+ if (chunkText.length > 0) {
96
+ chunks.push({
97
+ id: randomUUID(),
98
+ text: chunkText,
99
+ metadata: {
100
+ documentId,
101
+ chunkIndex,
102
+ startPosition,
103
+ endPosition: startPosition + currentChunk.length,
104
+ documentType: "text",
105
+ custom: metadata,
106
+ },
107
+ });
108
+ }
109
+ }
110
+ // Update total chunks count
111
+ chunks.forEach((chunk) => {
112
+ chunk.metadata.totalChunks = chunks.length;
113
+ });
114
+ return chunks;
115
+ }
116
+ validateConfig(config) {
117
+ const errors = [];
118
+ const warnings = [];
119
+ const charConfig = config;
120
+ if (charConfig.maxSize !== undefined && charConfig.maxSize <= 0) {
121
+ errors.push("maxSize must be greater than 0");
122
+ }
123
+ if (charConfig.overlap !== undefined && charConfig.overlap < 0) {
124
+ errors.push("overlap must be non-negative");
125
+ }
126
+ if (charConfig.overlap !== undefined && charConfig.maxSize !== undefined) {
127
+ if (charConfig.overlap >= charConfig.maxSize) {
128
+ errors.push("overlap must be less than maxSize");
129
+ }
130
+ }
131
+ if (charConfig.minSize !== undefined && charConfig.maxSize !== undefined) {
132
+ if (charConfig.minSize > charConfig.maxSize) {
133
+ warnings.push("minSize is greater than maxSize, some chunks may be smaller than minSize");
134
+ }
135
+ }
136
+ return {
137
+ valid: errors.length === 0,
138
+ errors,
139
+ warnings,
140
+ };
141
+ }
142
+ }
143
+ //# sourceMappingURL=characterChunker.js.map
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Chunker Registry
3
+ *
4
+ * Central registry for all chunking strategies following NeuroLink's registry pattern.
5
+ * Provides factory methods for creating chunker instances.
6
+ */
7
+ import type { Chunker, ChunkingStrategy } from "../types.js";
8
+ /**
9
+ * Registry for chunking strategies
10
+ * Follows NeuroLink's factory pattern with lazy initialization
11
+ */
12
+ export declare class ChunkerRegistry {
13
+ private static chunkers;
14
+ private static initialized;
15
+ /**
16
+ * Initialize all built-in chunkers
17
+ */
18
+ static initialize(): void;
19
+ /**
20
+ * Register a custom chunker
21
+ * @param strategy - Strategy name
22
+ * @param factory - Factory function that creates chunker instance
23
+ */
24
+ static register(strategy: ChunkingStrategy, factory: () => Chunker): void;
25
+ /**
26
+ * Get a chunker by strategy name
27
+ * @param strategy - Chunking strategy name
28
+ * @returns Chunker instance
29
+ * @throws Error if strategy is not registered
30
+ */
31
+ static get(strategy: ChunkingStrategy): Chunker;
32
+ /**
33
+ * Get all available chunking strategies
34
+ * @returns Array of strategy names
35
+ */
36
+ static getAvailableStrategies(): ChunkingStrategy[];
37
+ /**
38
+ * Check if a strategy is registered
39
+ * @param strategy - Strategy name to check
40
+ * @returns True if strategy is registered
41
+ */
42
+ static has(strategy: ChunkingStrategy): boolean;
43
+ /**
44
+ * Get strategy recommendation based on content type
45
+ * @param contentType - Document type or MIME type
46
+ * @returns Recommended chunking strategy
47
+ */
48
+ static getRecommendedStrategy(contentType: string): ChunkingStrategy;
49
+ /**
50
+ * Get default configuration for a strategy
51
+ * @param strategy - Chunking strategy
52
+ * @returns Default configuration object
53
+ */
54
+ static getDefaultConfig(strategy: ChunkingStrategy): Record<string, unknown>;
55
+ /**
56
+ * Reset the registry (useful for testing)
57
+ */
58
+ static reset(): void;
59
+ }
60
+ /**
61
+ * Convenience function to chunk text with a given strategy
62
+ * @param text - Text to chunk
63
+ * @param strategy - Chunking strategy (default: "recursive")
64
+ * @param config - Strategy-specific configuration
65
+ * @returns Array of chunks
66
+ */
67
+ export declare function chunkText(text: string, strategy?: ChunkingStrategy, config?: Record<string, unknown>): Promise<import("../types.js").Chunk[]>;
@@ -0,0 +1,195 @@
1
+ /**
2
+ * Chunker Registry
3
+ *
4
+ * Central registry for all chunking strategies following NeuroLink's registry pattern.
5
+ * Provides factory methods for creating chunker instances.
6
+ */
7
+ import { SemanticMarkdownChunker } from "../chunkers/SemanticMarkdownChunker.js";
8
+ import { CharacterChunker } from "./characterChunker.js";
9
+ import { HTMLChunker } from "./htmlChunker.js";
10
+ import { JSONChunker } from "./jsonChunker.js";
11
+ import { LaTeXChunker } from "./latexChunker.js";
12
+ import { MarkdownChunker } from "./markdownChunker.js";
13
+ import { RecursiveChunker } from "./recursiveChunker.js";
14
+ import { SemanticChunker } from "./semanticChunker.js";
15
+ import { SentenceChunker } from "./sentenceChunker.js";
16
+ import { TokenChunker } from "./tokenChunker.js";
17
+ /**
18
+ * Registry for chunking strategies
19
+ * Follows NeuroLink's factory pattern with lazy initialization
20
+ */
21
+ export class ChunkerRegistry {
22
+ static chunkers = new Map();
23
+ static initialized = false;
24
+ /**
25
+ * Initialize all built-in chunkers
26
+ */
27
+ static initialize() {
28
+ if (ChunkerRegistry.initialized) {
29
+ return;
30
+ }
31
+ ChunkerRegistry.register("character", () => new CharacterChunker());
32
+ ChunkerRegistry.register("recursive", () => new RecursiveChunker());
33
+ ChunkerRegistry.register("sentence", () => new SentenceChunker());
34
+ ChunkerRegistry.register("token", () => new TokenChunker());
35
+ ChunkerRegistry.register("markdown", () => new MarkdownChunker());
36
+ ChunkerRegistry.register("html", () => new HTMLChunker());
37
+ ChunkerRegistry.register("json", () => new JSONChunker());
38
+ ChunkerRegistry.register("latex", () => new LaTeXChunker());
39
+ ChunkerRegistry.register("semantic", () => new SemanticChunker());
40
+ ChunkerRegistry.register("semantic-markdown", () => new SemanticMarkdownChunker());
41
+ ChunkerRegistry.initialized = true;
42
+ }
43
+ /**
44
+ * Register a custom chunker
45
+ * @param strategy - Strategy name
46
+ * @param factory - Factory function that creates chunker instance
47
+ */
48
+ static register(strategy, factory) {
49
+ ChunkerRegistry.chunkers.set(strategy, factory);
50
+ }
51
+ /**
52
+ * Get a chunker by strategy name
53
+ * @param strategy - Chunking strategy name
54
+ * @returns Chunker instance
55
+ * @throws Error if strategy is not registered
56
+ */
57
+ static get(strategy) {
58
+ ChunkerRegistry.initialize();
59
+ const factory = ChunkerRegistry.chunkers.get(strategy);
60
+ if (!factory) {
61
+ throw new Error(`Unknown chunking strategy: ${strategy}. Available strategies: ${ChunkerRegistry.getAvailableStrategies().join(", ")}`);
62
+ }
63
+ return factory();
64
+ }
65
+ /**
66
+ * Get all available chunking strategies
67
+ * @returns Array of strategy names
68
+ */
69
+ static getAvailableStrategies() {
70
+ ChunkerRegistry.initialize();
71
+ return Array.from(ChunkerRegistry.chunkers.keys());
72
+ }
73
+ /**
74
+ * Check if a strategy is registered
75
+ * @param strategy - Strategy name to check
76
+ * @returns True if strategy is registered
77
+ */
78
+ static has(strategy) {
79
+ ChunkerRegistry.initialize();
80
+ return ChunkerRegistry.chunkers.has(strategy);
81
+ }
82
+ /**
83
+ * Get strategy recommendation based on content type
84
+ * @param contentType - Document type or MIME type
85
+ * @returns Recommended chunking strategy
86
+ */
87
+ static getRecommendedStrategy(contentType) {
88
+ const normalized = contentType.toLowerCase();
89
+ if (normalized.includes("markdown") || normalized === "md") {
90
+ return "markdown";
91
+ }
92
+ if (normalized.includes("html") || normalized.includes("htm")) {
93
+ return "html";
94
+ }
95
+ if (normalized.includes("json")) {
96
+ return "json";
97
+ }
98
+ // Check for latex specifically - don't match "text" which contains "tex"
99
+ if (normalized.includes("latex") ||
100
+ normalized === "tex" ||
101
+ normalized.endsWith("/tex")) {
102
+ return "latex";
103
+ }
104
+ if (normalized.includes("code") || normalized.includes("programming")) {
105
+ return "recursive";
106
+ }
107
+ if (normalized.includes("document") || normalized.includes("text")) {
108
+ return "sentence";
109
+ }
110
+ // Default to recursive for general text
111
+ return "recursive";
112
+ }
113
+ /**
114
+ * Get default configuration for a strategy
115
+ * @param strategy - Chunking strategy
116
+ * @returns Default configuration object
117
+ */
118
+ static getDefaultConfig(strategy) {
119
+ const defaults = {
120
+ character: {
121
+ maxSize: 1000,
122
+ overlap: 0,
123
+ separator: "",
124
+ keepSeparator: false,
125
+ },
126
+ recursive: {
127
+ maxSize: 1000,
128
+ overlap: 200,
129
+ separators: ["\n\n", "\n", ". ", " ", ""],
130
+ },
131
+ sentence: {
132
+ maxSize: 1000,
133
+ overlap: 0,
134
+ minSentences: 1,
135
+ sentenceEnders: [".", "!", "?"],
136
+ },
137
+ token: {
138
+ maxTokens: 512,
139
+ tokenOverlap: 50,
140
+ tokenizer: "cl100k_base",
141
+ },
142
+ markdown: {
143
+ maxSize: 1000,
144
+ headerLevels: [1, 2, 3],
145
+ preserveCodeBlocks: true,
146
+ includeHeader: true,
147
+ },
148
+ html: {
149
+ maxSize: 1000,
150
+ splitTags: ["div", "p", "section", "article"],
151
+ extractTextOnly: false,
152
+ },
153
+ json: {
154
+ maxSize: 1000,
155
+ maxDepth: 10,
156
+ includeJsonPath: true,
157
+ },
158
+ latex: {
159
+ maxSize: 1000,
160
+ splitEnvironments: ["section", "subsection", "chapter"],
161
+ preserveMath: true,
162
+ },
163
+ semantic: {
164
+ maxSize: 1000,
165
+ similarityThreshold: 0.7,
166
+ joinThreshold: 100,
167
+ },
168
+ "semantic-markdown": {
169
+ maxSize: 1000,
170
+ overlap: 100,
171
+ similarityThreshold: 0.7,
172
+ },
173
+ };
174
+ return defaults[strategy] || { maxSize: 1000 };
175
+ }
176
+ /**
177
+ * Reset the registry (useful for testing)
178
+ */
179
+ static reset() {
180
+ ChunkerRegistry.chunkers.clear();
181
+ ChunkerRegistry.initialized = false;
182
+ }
183
+ }
184
+ /**
185
+ * Convenience function to chunk text with a given strategy
186
+ * @param text - Text to chunk
187
+ * @param strategy - Chunking strategy (default: "recursive")
188
+ * @param config - Strategy-specific configuration
189
+ * @returns Array of chunks
190
+ */
191
+ export async function chunkText(text, strategy = "recursive", config) {
192
+ const chunker = ChunkerRegistry.get(strategy);
193
+ return chunker.chunk(text, config);
194
+ }
195
+ //# sourceMappingURL=chunkerRegistry.js.map
@@ -0,0 +1,34 @@
1
+ /**
2
+ * HTML-aware Chunker
3
+ *
4
+ * Splits HTML documents based on tag structure while preserving semantics.
5
+ * Best for web pages, email templates, and structured HTML content.
6
+ */
7
+ import type { BaseChunkerConfig, Chunk, Chunker, ChunkerValidationResult, HTMLChunkerConfig } from "../types.js";
8
+ /**
9
+ * HTML-aware chunker implementation
10
+ * Splits based on HTML structure (tags, elements)
11
+ */
12
+ export declare class HTMLChunker implements Chunker {
13
+ readonly strategy: "html";
14
+ private readonly defaultSplitTags;
15
+ private readonly defaultPreserveTags;
16
+ chunk(text: string, config?: HTMLChunkerConfig): Promise<Chunk[]>;
17
+ /**
18
+ * Split HTML by structural tags
19
+ */
20
+ private splitByTags;
21
+ /**
22
+ * Parse HTML attributes from string
23
+ */
24
+ private parseAttributes;
25
+ /**
26
+ * Extract plain text from HTML
27
+ */
28
+ private extractText;
29
+ /**
30
+ * Split content that exceeds max size
31
+ */
32
+ private splitContent;
33
+ validateConfig(config: BaseChunkerConfig): ChunkerValidationResult;
34
+ }