@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,248 @@
1
+ /**
2
+ * HTML-aware Chunker
3
+ *
4
+ * Splits HTML documents based on tag structure while preserving semantics.
5
+ * Best for web pages, email templates, and structured HTML content.
6
+ */
7
+ import { randomUUID } from "crypto";
8
+ /**
9
+ * HTML-aware chunker implementation
10
+ * Splits based on HTML structure (tags, elements)
11
+ */
12
+ export class HTMLChunker {
13
+ strategy = "html";
14
+ defaultSplitTags = [
15
+ "div",
16
+ "p",
17
+ "section",
18
+ "article",
19
+ "main",
20
+ "aside",
21
+ "header",
22
+ "footer",
23
+ "nav",
24
+ "li",
25
+ "tr",
26
+ "td",
27
+ "th",
28
+ ];
29
+ defaultPreserveTags = [
30
+ "pre",
31
+ "code",
32
+ "table",
33
+ "ul",
34
+ "ol",
35
+ "blockquote",
36
+ ];
37
+ async chunk(text, config) {
38
+ const { maxSize = 1000, overlap = 0, splitTags = this.defaultSplitTags, preserveTags = this.defaultPreserveTags, extractTextOnly = false, includeTagMetadata = true, trimWhitespace = true, metadata = {}, } = config || {};
39
+ const documentId = randomUUID();
40
+ const chunks = [];
41
+ if (!text || text.length === 0) {
42
+ return chunks;
43
+ }
44
+ // Extract and split by structural tags
45
+ const sections = this.splitByTags(text, splitTags, preserveTags);
46
+ let chunkIndex = 0;
47
+ let currentPosition = 0;
48
+ for (const section of sections) {
49
+ const { content, tagName, attributes } = section;
50
+ // Process content
51
+ let processedContent = content;
52
+ if (extractTextOnly) {
53
+ processedContent = this.extractText(content);
54
+ }
55
+ // Split if content is too large
56
+ const contentChunks = this.splitContent(processedContent, maxSize, overlap);
57
+ for (const contentChunk of contentChunks) {
58
+ const finalText = trimWhitespace ? contentChunk.trim() : contentChunk;
59
+ if (finalText.length > 0) {
60
+ const chunkMetadata = {
61
+ ...metadata,
62
+ };
63
+ if (includeTagMetadata && tagName) {
64
+ chunkMetadata.tagName = tagName;
65
+ if (attributes && Object.keys(attributes).length > 0) {
66
+ chunkMetadata.attributes = attributes;
67
+ }
68
+ }
69
+ chunks.push({
70
+ id: randomUUID(),
71
+ text: finalText,
72
+ metadata: {
73
+ documentId,
74
+ chunkIndex,
75
+ startPosition: currentPosition,
76
+ endPosition: currentPosition + contentChunk.length,
77
+ documentType: "html",
78
+ custom: chunkMetadata,
79
+ },
80
+ });
81
+ chunkIndex++;
82
+ }
83
+ currentPosition += contentChunk.length;
84
+ }
85
+ }
86
+ // Update total chunks count
87
+ chunks.forEach((chunk) => {
88
+ chunk.metadata.totalChunks = chunks.length;
89
+ });
90
+ return chunks;
91
+ }
92
+ /**
93
+ * Split HTML by structural tags
94
+ */
95
+ splitByTags(html, splitTags, preserveTags) {
96
+ const sections = [];
97
+ // Create regex pattern for split tags
98
+ const tagPattern = new RegExp(`<(${splitTags.join("|")})([^>]*)>([\\s\\S]*?)</\\1>`, "gi");
99
+ let lastIndex = 0;
100
+ let match;
101
+ // Reset regex
102
+ tagPattern.lastIndex = 0;
103
+ while ((match = tagPattern.exec(html)) !== null) {
104
+ // Content before this tag
105
+ if (match.index > lastIndex) {
106
+ const beforeContent = html.slice(lastIndex, match.index).trim();
107
+ if (beforeContent.length > 0) {
108
+ sections.push({
109
+ content: beforeContent,
110
+ });
111
+ }
112
+ }
113
+ const tagName = match[1].toLowerCase();
114
+ const attributeString = match[2];
115
+ const innerContent = match[3];
116
+ // Parse attributes
117
+ const attributes = this.parseAttributes(attributeString);
118
+ // Check if this tag should be preserved as a unit
119
+ const shouldPreserve = preserveTags.some((pt) => innerContent.toLowerCase().includes(`<${pt}`));
120
+ if (shouldPreserve) {
121
+ // Keep the full tag content
122
+ sections.push({
123
+ content: match[0],
124
+ tagName,
125
+ attributes,
126
+ });
127
+ }
128
+ else {
129
+ // Just the inner content
130
+ sections.push({
131
+ content: innerContent,
132
+ tagName,
133
+ attributes,
134
+ });
135
+ }
136
+ lastIndex = match.index + match[0].length;
137
+ }
138
+ // Don't forget content after the last tag
139
+ if (lastIndex < html.length) {
140
+ const remaining = html.slice(lastIndex).trim();
141
+ if (remaining.length > 0) {
142
+ sections.push({
143
+ content: remaining,
144
+ });
145
+ }
146
+ }
147
+ // If no tags found, return entire text as one section
148
+ if (sections.length === 0 && html.trim()) {
149
+ sections.push({
150
+ content: html.trim(),
151
+ });
152
+ }
153
+ return sections;
154
+ }
155
+ /**
156
+ * Parse HTML attributes from string
157
+ */
158
+ parseAttributes(attributeString) {
159
+ const attributes = {};
160
+ const attrPattern = /(\w+)(?:=["']([^"']*?)["'])?/g;
161
+ let match;
162
+ while ((match = attrPattern.exec(attributeString)) !== null) {
163
+ const name = match[1];
164
+ const value = match[2] || "";
165
+ attributes[name] = value;
166
+ }
167
+ return attributes;
168
+ }
169
+ /**
170
+ * Extract plain text from HTML
171
+ */
172
+ extractText(html) {
173
+ return (html
174
+ // Remove script and style elements
175
+ .replace(/<script[\s\S]*?<\/script>/gi, "")
176
+ .replace(/<style[\s\S]*?<\/style>/gi, "")
177
+ // Remove HTML comments
178
+ .replace(/<!--[\s\S]*?-->/g, "")
179
+ // Replace block elements with newlines
180
+ .replace(/<\/(p|div|br|h[1-6]|li|tr)>/gi, "\n")
181
+ // Remove remaining tags
182
+ .replace(/<[^>]+>/g, "")
183
+ // Decode common HTML entities
184
+ .replace(/&nbsp;/gi, " ")
185
+ .replace(/&amp;/gi, "&")
186
+ .replace(/&lt;/gi, "<")
187
+ .replace(/&gt;/gi, ">")
188
+ .replace(/&quot;/gi, '"')
189
+ .replace(/&#039;/gi, "'")
190
+ // Normalize whitespace
191
+ .replace(/\s+/g, " ")
192
+ .trim());
193
+ }
194
+ /**
195
+ * Split content that exceeds max size
196
+ */
197
+ splitContent(content, maxSize, overlap) {
198
+ const effectiveMaxSize = Math.max(maxSize, 1);
199
+ const effectiveOverlap = Math.min(Math.max(overlap, 0), effectiveMaxSize - 1);
200
+ if (content.length <= effectiveMaxSize) {
201
+ return [content];
202
+ }
203
+ const chunks = [];
204
+ let start = 0;
205
+ while (start < content.length) {
206
+ let end = Math.min(start + effectiveMaxSize, content.length);
207
+ // Try to break at a natural boundary
208
+ if (end < content.length) {
209
+ const searchStart = Math.max(start, end - 100);
210
+ const searchText = content.slice(searchStart, end);
211
+ // Look for paragraph/sentence break
212
+ const breakMatch = searchText.match(/[.!?\n]\s+/);
213
+ if (breakMatch && breakMatch.index !== undefined) {
214
+ end = searchStart + breakMatch.index + 1;
215
+ }
216
+ }
217
+ chunks.push(content.slice(start, end));
218
+ start = Math.max(start + 1, end - effectiveOverlap);
219
+ }
220
+ return chunks;
221
+ }
222
+ validateConfig(config) {
223
+ const errors = [];
224
+ const warnings = [];
225
+ const htmlConfig = config;
226
+ if (htmlConfig.maxSize !== undefined && htmlConfig.maxSize <= 0) {
227
+ errors.push("maxSize must be greater than 0");
228
+ }
229
+ if (htmlConfig.overlap !== undefined && htmlConfig.overlap < 0) {
230
+ errors.push("overlap must be non-negative");
231
+ }
232
+ if (htmlConfig.overlap !== undefined &&
233
+ htmlConfig.maxSize !== undefined &&
234
+ htmlConfig.overlap >= htmlConfig.maxSize) {
235
+ errors.push("overlap must be less than maxSize");
236
+ }
237
+ if (htmlConfig.splitTags !== undefined &&
238
+ htmlConfig.splitTags.length === 0) {
239
+ warnings.push("No split tags specified, using defaults");
240
+ }
241
+ return {
242
+ valid: errors.length === 0,
243
+ errors,
244
+ warnings,
245
+ };
246
+ }
247
+ }
248
+ //# sourceMappingURL=htmlChunker.js.map
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Chunking Module Exports
3
+ *
4
+ * Provides all chunking strategies and the chunker registry.
5
+ */
6
+ export { ChunkerRegistry, chunkText } from "./chunkerRegistry.js";
7
+ export { CharacterChunker } from "./characterChunker.js";
8
+ export { RecursiveChunker } from "./recursiveChunker.js";
9
+ export { SentenceChunker } from "./sentenceChunker.js";
10
+ export { TokenChunker } from "./tokenChunker.js";
11
+ export { MarkdownChunker } from "./markdownChunker.js";
12
+ export { HTMLChunker } from "./htmlChunker.js";
13
+ export { JSONChunker } from "./jsonChunker.js";
14
+ export { LaTeXChunker } from "./latexChunker.js";
15
+ export { SemanticChunker } from "./semanticChunker.js";
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Chunking Module Exports
3
+ *
4
+ * Provides all chunking strategies and the chunker registry.
5
+ */
6
+ // Registry
7
+ export { ChunkerRegistry, chunkText } from "./chunkerRegistry.js";
8
+ // Individual chunkers
9
+ export { CharacterChunker } from "./characterChunker.js";
10
+ export { RecursiveChunker } from "./recursiveChunker.js";
11
+ export { SentenceChunker } from "./sentenceChunker.js";
12
+ export { TokenChunker } from "./tokenChunker.js";
13
+ export { MarkdownChunker } from "./markdownChunker.js";
14
+ export { HTMLChunker } from "./htmlChunker.js";
15
+ export { JSONChunker } from "./jsonChunker.js";
16
+ export { LaTeXChunker } from "./latexChunker.js";
17
+ export { SemanticChunker } from "./semanticChunker.js";
18
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,20 @@
1
+ /**
2
+ * JSON-aware Chunker
3
+ *
4
+ * Splits JSON documents based on structure (arrays, objects, keys).
5
+ * Best for API responses, configuration files, and structured data.
6
+ */
7
+ import type { Chunker, Chunk, ChunkerValidationResult, JSONChunkerConfig, BaseChunkerConfig } from "../types.js";
8
+ /**
9
+ * JSON-aware chunker implementation
10
+ * Splits based on JSON structure
11
+ */
12
+ export declare class JSONChunker implements Chunker {
13
+ readonly strategy: "json";
14
+ chunk(text: string, config?: JSONChunkerConfig): Promise<Chunk[]>;
15
+ /**
16
+ * Recursively extract chunks from JSON structure
17
+ */
18
+ private extractChunks;
19
+ validateConfig(config: BaseChunkerConfig): ChunkerValidationResult;
20
+ }
@@ -0,0 +1,282 @@
1
+ /**
2
+ * JSON-aware Chunker
3
+ *
4
+ * Splits JSON documents based on structure (arrays, objects, keys).
5
+ * Best for API responses, configuration files, and structured data.
6
+ */
7
+ import { randomUUID } from "crypto";
8
+ /**
9
+ * JSON-aware chunker implementation
10
+ * Splits based on JSON structure
11
+ */
12
+ export class JSONChunker {
13
+ strategy = "json";
14
+ async chunk(text, config) {
15
+ const { maxSize = 1000, maxDepth = 10, splitKeys = [], preserveKeys = [], includeJsonPath = true, trimWhitespace = true, metadata = {}, } = config || {};
16
+ const documentId = randomUUID();
17
+ const chunks = [];
18
+ if (!text || text.length === 0) {
19
+ return chunks;
20
+ }
21
+ // Parse JSON
22
+ let jsonData;
23
+ try {
24
+ jsonData = JSON.parse(text);
25
+ }
26
+ catch {
27
+ // If not valid JSON, treat as plain text
28
+ chunks.push({
29
+ id: randomUUID(),
30
+ text: trimWhitespace ? text.trim() : text,
31
+ metadata: {
32
+ documentId,
33
+ chunkIndex: 0,
34
+ totalChunks: 1,
35
+ startPosition: 0,
36
+ endPosition: text.length,
37
+ documentType: "json",
38
+ custom: {
39
+ ...metadata,
40
+ parseError: "Invalid JSON",
41
+ },
42
+ },
43
+ });
44
+ return chunks;
45
+ }
46
+ // Extract chunks from JSON structure
47
+ const extractedChunks = this.extractChunks({
48
+ data: jsonData,
49
+ path: "",
50
+ depth: 0,
51
+ maxDepth,
52
+ maxSize,
53
+ splitKeys,
54
+ preserveKeys,
55
+ includeJsonPath,
56
+ });
57
+ // Convert to Chunk objects
58
+ let chunkIndex = 0;
59
+ let currentPosition = 0;
60
+ for (const extracted of extractedChunks) {
61
+ const chunkText = JSON.stringify(extracted.value, null, 2);
62
+ const finalText = trimWhitespace ? chunkText.trim() : chunkText;
63
+ if (finalText.length > 0) {
64
+ const chunkMetadata = {
65
+ ...metadata,
66
+ };
67
+ if (includeJsonPath && extracted.path) {
68
+ chunkMetadata.jsonPath = extracted.path;
69
+ }
70
+ chunks.push({
71
+ id: randomUUID(),
72
+ text: finalText,
73
+ metadata: {
74
+ documentId,
75
+ chunkIndex,
76
+ startPosition: currentPosition,
77
+ endPosition: currentPosition + finalText.length,
78
+ documentType: "json",
79
+ jsonPath: extracted.path,
80
+ custom: chunkMetadata,
81
+ },
82
+ });
83
+ chunkIndex++;
84
+ currentPosition += finalText.length;
85
+ }
86
+ }
87
+ // Update total chunks count
88
+ chunks.forEach((chunk) => {
89
+ chunk.metadata.totalChunks = chunks.length;
90
+ });
91
+ return chunks;
92
+ }
93
+ /**
94
+ * Recursively extract chunks from JSON structure
95
+ */
96
+ extractChunks(options) {
97
+ const { data, path, depth, maxDepth, maxSize, splitKeys, preserveKeys, includeJsonPath, } = options;
98
+ const results = [];
99
+ // Check depth limit
100
+ if (depth > maxDepth) {
101
+ results.push({ value: data, path });
102
+ return results;
103
+ }
104
+ // Check if this should be preserved as a unit
105
+ const currentKey = path.split(".").pop() || "";
106
+ if (preserveKeys.includes(currentKey)) {
107
+ results.push({ value: data, path });
108
+ return results;
109
+ }
110
+ // Check size - if small enough, keep as one chunk
111
+ const serialized = JSON.stringify(data, null, 2);
112
+ if (serialized.length <= maxSize) {
113
+ results.push({ value: data, path });
114
+ return results;
115
+ }
116
+ // Handle arrays
117
+ if (Array.isArray(data)) {
118
+ // Check if array should be split by index
119
+ if (splitKeys.length === 0 || splitKeys.some((k) => path.endsWith(k))) {
120
+ // Split array into individual elements or groups
121
+ let currentGroup = [];
122
+ let currentGroupSize = 0;
123
+ for (let i = 0; i < data.length; i++) {
124
+ const item = data[i];
125
+ const itemSize = JSON.stringify(item, null, 2).length;
126
+ if (currentGroupSize + itemSize > maxSize &&
127
+ currentGroup.length > 0) {
128
+ // Save current group
129
+ results.push({
130
+ value: currentGroup.length === 1 ? currentGroup[0] : currentGroup,
131
+ path: `${path}[${i - currentGroup.length}:${i}]`,
132
+ });
133
+ currentGroup = [];
134
+ currentGroupSize = 0;
135
+ }
136
+ // If single item is too large, recursively split it
137
+ if (itemSize > maxSize) {
138
+ const subChunks = this.extractChunks({
139
+ data: item,
140
+ path: `${path}[${i}]`,
141
+ depth: depth + 1,
142
+ maxDepth,
143
+ maxSize,
144
+ splitKeys,
145
+ preserveKeys,
146
+ includeJsonPath,
147
+ });
148
+ results.push(...subChunks);
149
+ }
150
+ else {
151
+ currentGroup.push(item);
152
+ currentGroupSize += itemSize;
153
+ }
154
+ }
155
+ // Don't forget the last group
156
+ if (currentGroup.length > 0) {
157
+ results.push({
158
+ value: currentGroup.length === 1 ? currentGroup[0] : currentGroup,
159
+ path: `${path}[${data.length - currentGroup.length}:${data.length}]`,
160
+ });
161
+ }
162
+ }
163
+ else {
164
+ // Keep array as one unit but may need to truncate
165
+ results.push({ value: data, path });
166
+ }
167
+ }
168
+ // Handle objects
169
+ else if (data !== null && typeof data === "object") {
170
+ const obj = data;
171
+ const keys = Object.keys(obj);
172
+ // Check if any keys should be split
173
+ const keysToSplit = keys.filter((k) => splitKeys.length === 0 || splitKeys.includes(k));
174
+ if (keysToSplit.length > 0) {
175
+ let currentObj = {};
176
+ let currentObjSize = 0;
177
+ for (const key of keys) {
178
+ const value = obj[key];
179
+ const valueSize = JSON.stringify({ [key]: value }, null, 2).length;
180
+ // Check if this key should be split out
181
+ if (splitKeys.includes(key)) {
182
+ // Save current object first if it has content
183
+ if (Object.keys(currentObj).length > 0) {
184
+ results.push({
185
+ value: currentObj,
186
+ path: path,
187
+ });
188
+ currentObj = {};
189
+ currentObjSize = 0;
190
+ }
191
+ // Recursively process this value
192
+ const subChunks = this.extractChunks({
193
+ data: value,
194
+ path: path ? `${path}.${key}` : key,
195
+ depth: depth + 1,
196
+ maxDepth,
197
+ maxSize,
198
+ splitKeys,
199
+ preserveKeys,
200
+ includeJsonPath,
201
+ });
202
+ results.push(...subChunks);
203
+ }
204
+ else if (currentObjSize + valueSize > maxSize &&
205
+ Object.keys(currentObj).length > 0) {
206
+ // Save current object
207
+ results.push({
208
+ value: currentObj,
209
+ path: path,
210
+ });
211
+ currentObj = { [key]: value };
212
+ currentObjSize = valueSize;
213
+ }
214
+ else {
215
+ currentObj[key] = value;
216
+ currentObjSize += valueSize;
217
+ }
218
+ }
219
+ // Don't forget the last object
220
+ if (Object.keys(currentObj).length > 0) {
221
+ results.push({
222
+ value: currentObj,
223
+ path: path,
224
+ });
225
+ }
226
+ }
227
+ else {
228
+ // Process each key individually
229
+ for (const key of keys) {
230
+ const value = obj[key];
231
+ const keyPath = path ? `${path}.${key}` : key;
232
+ const valueSize = JSON.stringify(value, null, 2).length;
233
+ if (valueSize > maxSize) {
234
+ // Recursively split
235
+ const subChunks = this.extractChunks({
236
+ data: value,
237
+ path: keyPath,
238
+ depth: depth + 1,
239
+ maxDepth,
240
+ maxSize,
241
+ splitKeys,
242
+ preserveKeys,
243
+ includeJsonPath,
244
+ });
245
+ results.push(...subChunks);
246
+ }
247
+ else {
248
+ results.push({
249
+ value: { [key]: value },
250
+ path: keyPath,
251
+ });
252
+ }
253
+ }
254
+ }
255
+ }
256
+ // Primitive values
257
+ else {
258
+ results.push({ value: data, path });
259
+ }
260
+ return results;
261
+ }
262
+ validateConfig(config) {
263
+ const errors = [];
264
+ const warnings = [];
265
+ const jsonConfig = config;
266
+ if (jsonConfig.maxSize !== undefined && jsonConfig.maxSize <= 0) {
267
+ errors.push("maxSize must be greater than 0");
268
+ }
269
+ if (jsonConfig.maxDepth !== undefined && jsonConfig.maxDepth < 1) {
270
+ errors.push("maxDepth must be at least 1");
271
+ }
272
+ if (jsonConfig.maxDepth !== undefined && jsonConfig.maxDepth > 100) {
273
+ warnings.push("Very high maxDepth may cause performance issues");
274
+ }
275
+ return {
276
+ valid: errors.length === 0,
277
+ errors,
278
+ warnings,
279
+ };
280
+ }
281
+ }
282
+ //# sourceMappingURL=jsonChunker.js.map
@@ -0,0 +1,26 @@
1
+ /**
2
+ * LaTeX-aware Chunker
3
+ *
4
+ * Splits LaTeX documents based on structure (sections, environments, math).
5
+ * Best for academic papers, scientific documents, and mathematical content.
6
+ */
7
+ import type { BaseChunkerConfig, Chunk, Chunker, ChunkerValidationResult, LaTeXChunkerConfig } from "../types.js";
8
+ /**
9
+ * LaTeX-aware chunker implementation
10
+ * Splits based on LaTeX structure (sections, environments)
11
+ */
12
+ export declare class LaTeXChunker implements Chunker {
13
+ readonly strategy: "latex";
14
+ private readonly defaultSplitEnvironments;
15
+ private readonly mathEnvironments;
16
+ chunk(text: string, config?: LaTeXChunkerConfig): Promise<Chunk[]>;
17
+ /**
18
+ * Split LaTeX by sectioning commands
19
+ */
20
+ private splitBySections;
21
+ /**
22
+ * Split content that exceeds max size
23
+ */
24
+ private splitContent;
25
+ validateConfig(config: BaseChunkerConfig): ChunkerValidationResult;
26
+ }