@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,392 @@
1
+ /**
2
+ * MDocument - Main Document Processing Class
3
+ *
4
+ * Provides a fluent interface for document processing using the Factory + Registry pattern.
5
+ * Supports various document types, chunking strategies, and metadata extraction.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * const doc = await MDocument.fromText(content);
10
+ * const chunks = await doc.chunk({
11
+ * strategy: 'recursive',
12
+ * config: { maxSize: 1000, overlap: 200 }
13
+ * });
14
+ * const enriched = await doc.extractMetadata({
15
+ * title: true,
16
+ * summary: true,
17
+ * keywords: true
18
+ * });
19
+ * ```
20
+ */
21
+ import { randomUUID } from "crypto";
22
+ import { logger } from "../../utils/logger.js";
23
+ import { ChunkerRegistry } from "../chunking/chunkerRegistry.js";
24
+ import { LLMMetadataExtractor } from "../metadata/metadataExtractor.js";
25
+ /**
26
+ * MDocument class for comprehensive document processing
27
+ *
28
+ * Provides a chainable API for:
29
+ * - Loading documents from various sources
30
+ * - Chunking with multiple strategies
31
+ * - Metadata extraction using LLMs
32
+ * - Embedding generation
33
+ */
34
+ export class MDocument {
35
+ state;
36
+ documentId;
37
+ /**
38
+ * Create a new MDocument instance
39
+ * @param content - Document content
40
+ * @param config - Document configuration
41
+ */
42
+ constructor(content, config) {
43
+ this.documentId = randomUUID();
44
+ this.state = {
45
+ content,
46
+ type: config?.type ?? "text",
47
+ metadata: {
48
+ ...config?.metadata,
49
+ documentId: this.documentId,
50
+ createdAt: new Date().toISOString(),
51
+ },
52
+ chunks: [],
53
+ embeddings: [],
54
+ history: ["created"],
55
+ };
56
+ }
57
+ // ============================================================================
58
+ // Static Factory Methods
59
+ // ============================================================================
60
+ /**
61
+ * Create MDocument from plain text
62
+ * @param text - Plain text content
63
+ * @param metadata - Optional metadata
64
+ * @returns MDocument instance
65
+ */
66
+ static fromText(text, metadata) {
67
+ return new MDocument(text, { type: "text", metadata });
68
+ }
69
+ /**
70
+ * Create MDocument from markdown content
71
+ * @param markdown - Markdown content
72
+ * @param metadata - Optional metadata
73
+ * @returns MDocument instance
74
+ */
75
+ static fromMarkdown(markdown, metadata) {
76
+ return new MDocument(markdown, { type: "markdown", metadata });
77
+ }
78
+ /**
79
+ * Create MDocument from HTML content
80
+ * @param html - HTML content
81
+ * @param metadata - Optional metadata
82
+ * @returns MDocument instance
83
+ */
84
+ static fromHTML(html, metadata) {
85
+ return new MDocument(html, { type: "html", metadata });
86
+ }
87
+ /**
88
+ * Create MDocument from JSON content
89
+ * @param json - JSON string or object
90
+ * @param metadata - Optional metadata
91
+ * @returns MDocument instance
92
+ */
93
+ static fromJSONContent(json, metadata) {
94
+ const content = typeof json === "string" ? json : JSON.stringify(json, null, 2);
95
+ return new MDocument(content, { type: "json", metadata });
96
+ }
97
+ /**
98
+ * Create MDocument from LaTeX content
99
+ * @param latex - LaTeX content
100
+ * @param metadata - Optional metadata
101
+ * @returns MDocument instance
102
+ */
103
+ static fromLaTeX(latex, metadata) {
104
+ return new MDocument(latex, { type: "latex", metadata });
105
+ }
106
+ /**
107
+ * Create MDocument from CSV content
108
+ * @param csv - CSV content
109
+ * @param metadata - Optional metadata
110
+ * @returns MDocument instance
111
+ */
112
+ static fromCSV(csv, metadata) {
113
+ return new MDocument(csv, { type: "csv", metadata });
114
+ }
115
+ // ============================================================================
116
+ // Core Processing Methods
117
+ // ============================================================================
118
+ /**
119
+ * Chunk the document using specified strategy
120
+ * @param params - Chunking parameters
121
+ * @returns This MDocument instance (for chaining)
122
+ */
123
+ async chunk(params) {
124
+ const { strategy = this.getDefaultStrategy(), config = {} } = params || {};
125
+ logger.debug("[MDocument] Chunking document", {
126
+ documentId: this.documentId,
127
+ strategy,
128
+ contentLength: this.state.content.length,
129
+ });
130
+ const chunker = ChunkerRegistry.get(strategy);
131
+ // Merge document metadata into chunk config
132
+ const chunkConfig = {
133
+ ...config,
134
+ metadata: {
135
+ ...config.metadata,
136
+ source: this.state.metadata.source,
137
+ documentType: this.state.type,
138
+ },
139
+ };
140
+ this.state.chunks = await chunker.chunk(this.state.content, chunkConfig);
141
+ this.state.history.push(`chunked:${strategy}`);
142
+ logger.info("[MDocument] Document chunked", {
143
+ documentId: this.documentId,
144
+ strategy,
145
+ chunkCount: this.state.chunks.length,
146
+ });
147
+ return this;
148
+ }
149
+ /**
150
+ * Extract metadata from chunks using LLM
151
+ * @param params - Extraction parameters
152
+ * @param options - Extractor options
153
+ * @returns This MDocument instance (for chaining)
154
+ */
155
+ async extractMetadata(params, options) {
156
+ if (this.state.chunks.length === 0) {
157
+ logger.warn("[MDocument] No chunks to extract metadata from. Call chunk() first.");
158
+ return this;
159
+ }
160
+ logger.debug("[MDocument] Extracting metadata", {
161
+ documentId: this.documentId,
162
+ chunkCount: this.state.chunks.length,
163
+ params: Object.keys(params),
164
+ });
165
+ const extractor = new LLMMetadataExtractor(options);
166
+ const results = await extractor.extract(this.state.chunks, params);
167
+ // Merge extraction results into chunk metadata
168
+ for (let i = 0; i < this.state.chunks.length && i < results.length; i++) {
169
+ const result = results[i];
170
+ if (result.title) {
171
+ this.state.chunks[i].metadata.title = result.title;
172
+ }
173
+ if (result.summary) {
174
+ this.state.chunks[i].metadata.summary = result.summary;
175
+ }
176
+ if (result.keywords) {
177
+ this.state.chunks[i].metadata.keywords = result.keywords;
178
+ }
179
+ if (result.custom) {
180
+ this.state.chunks[i].metadata.custom = {
181
+ ...(this.state.chunks[i].metadata.custom || {}),
182
+ ...result.custom,
183
+ };
184
+ }
185
+ }
186
+ this.state.history.push(`metadata:${Object.keys(params).join(",")}`);
187
+ logger.info("[MDocument] Metadata extracted", {
188
+ documentId: this.documentId,
189
+ extractedFields: Object.keys(params),
190
+ });
191
+ return this;
192
+ }
193
+ /**
194
+ * Generate embeddings for all chunks
195
+ * @param provider - Embedding provider name
196
+ * @param modelName - Embedding model name
197
+ * @returns This MDocument instance (for chaining)
198
+ */
199
+ async embed(provider = "openai", modelName = "text-embedding-3-small") {
200
+ if (this.state.chunks.length === 0) {
201
+ logger.warn("[MDocument] No chunks to embed. Call chunk() first.");
202
+ return this;
203
+ }
204
+ // Lazy import to avoid circular dependencies
205
+ const { ProviderFactory } = await import("../../factories/providerFactory.js");
206
+ logger.debug("[MDocument] Generating embeddings", {
207
+ documentId: this.documentId,
208
+ chunkCount: this.state.chunks.length,
209
+ provider,
210
+ model: modelName,
211
+ });
212
+ const embeddingProvider = await ProviderFactory.createProvider(provider, modelName);
213
+ if (typeof embeddingProvider.embed !==
214
+ "function") {
215
+ throw new Error(`Provider ${provider} does not support embeddings`);
216
+ }
217
+ this.state.embeddings = [];
218
+ for (const chunk of this.state.chunks) {
219
+ const embedding = await embeddingProvider.embed(chunk.text);
220
+ this.state.embeddings.push(embedding);
221
+ chunk.embedding = embedding;
222
+ }
223
+ this.state.history.push(`embedded:${provider}:${modelName}`);
224
+ logger.info("[MDocument] Embeddings generated", {
225
+ documentId: this.documentId,
226
+ embeddingCount: this.state.embeddings.length,
227
+ dimension: this.state.embeddings[0]?.length,
228
+ });
229
+ return this;
230
+ }
231
+ // ============================================================================
232
+ // Accessor Methods
233
+ // ============================================================================
234
+ /**
235
+ * Get document ID
236
+ */
237
+ getId() {
238
+ return this.documentId;
239
+ }
240
+ /**
241
+ * Get raw document content
242
+ */
243
+ getContent() {
244
+ return this.state.content;
245
+ }
246
+ /**
247
+ * Get document type
248
+ */
249
+ getType() {
250
+ return this.state.type;
251
+ }
252
+ /**
253
+ * Get document metadata
254
+ */
255
+ getMetadata() {
256
+ return { ...this.state.metadata };
257
+ }
258
+ /**
259
+ * Get processed chunks
260
+ */
261
+ getChunks() {
262
+ return [...this.state.chunks];
263
+ }
264
+ /**
265
+ * Get chunk embeddings
266
+ */
267
+ getEmbeddings() {
268
+ return [...this.state.embeddings];
269
+ }
270
+ /**
271
+ * Get processing history
272
+ */
273
+ getHistory() {
274
+ return [...this.state.history];
275
+ }
276
+ /**
277
+ * Check if document has been chunked
278
+ */
279
+ isChunked() {
280
+ return this.state.chunks.length > 0;
281
+ }
282
+ /**
283
+ * Check if document has embeddings
284
+ */
285
+ hasEmbeddings() {
286
+ return this.state.embeddings.length > 0;
287
+ }
288
+ /**
289
+ * Get chunk count
290
+ */
291
+ getChunkCount() {
292
+ return this.state.chunks.length;
293
+ }
294
+ // ============================================================================
295
+ // Transformation Methods
296
+ // ============================================================================
297
+ /**
298
+ * Set document metadata
299
+ * @param key - Metadata key
300
+ * @param value - Metadata value
301
+ * @returns This MDocument instance (for chaining)
302
+ */
303
+ setMetadata(key, value) {
304
+ this.state.metadata[key] = value;
305
+ return this;
306
+ }
307
+ /**
308
+ * Merge metadata into document
309
+ * @param metadata - Metadata to merge
310
+ * @returns This MDocument instance (for chaining)
311
+ */
312
+ mergeMetadata(metadata) {
313
+ this.state.metadata = { ...this.state.metadata, ...metadata };
314
+ return this;
315
+ }
316
+ /**
317
+ * Filter chunks based on predicate
318
+ * @param predicate - Filter function
319
+ * @returns New MDocument with filtered chunks
320
+ */
321
+ filterChunks(predicate) {
322
+ const doc = new MDocument(this.state.content, {
323
+ type: this.state.type,
324
+ metadata: this.state.metadata,
325
+ });
326
+ doc.state.chunks = this.state.chunks.filter(predicate);
327
+ doc.state.embeddings = this.state.embeddings.filter((_, i) => predicate(this.state.chunks[i]));
328
+ doc.state.history = [...this.state.history, "filtered"];
329
+ return doc;
330
+ }
331
+ /**
332
+ * Map transformation over chunks
333
+ * @param transform - Transform function
334
+ * @returns New MDocument with transformed chunks
335
+ */
336
+ mapChunks(transform) {
337
+ const doc = new MDocument(this.state.content, {
338
+ type: this.state.type,
339
+ metadata: this.state.metadata,
340
+ });
341
+ doc.state.chunks = this.state.chunks.map(transform);
342
+ doc.state.embeddings = [...this.state.embeddings];
343
+ doc.state.history = [...this.state.history, "mapped"];
344
+ return doc;
345
+ }
346
+ // ============================================================================
347
+ // Serialization Methods
348
+ // ============================================================================
349
+ /**
350
+ * Convert to plain object for serialization
351
+ */
352
+ toJSON() {
353
+ return {
354
+ id: this.documentId,
355
+ content: this.state.content,
356
+ type: this.state.type,
357
+ metadata: this.state.metadata,
358
+ chunks: this.state.chunks,
359
+ history: this.state.history,
360
+ };
361
+ }
362
+ /**
363
+ * Create MDocument from serialized JSON
364
+ * @param json - Serialized document data
365
+ * @returns MDocument instance
366
+ */
367
+ static fromJSON(json) {
368
+ const doc = new MDocument(json.content, {
369
+ type: json.type,
370
+ metadata: json.metadata,
371
+ });
372
+ if (json.id) {
373
+ doc.documentId = json.id;
374
+ }
375
+ if (json.chunks) {
376
+ doc.state.chunks = json.chunks;
377
+ }
378
+ if (json.history) {
379
+ doc.state.history = json.history;
380
+ }
381
+ return doc;
382
+ }
383
+ // ============================================================================
384
+ // Private Helper Methods
385
+ // ============================================================================
386
+ /**
387
+ * Get default chunking strategy based on document type
388
+ */
389
+ getDefaultStrategy() {
390
+ return ChunkerRegistry.getRecommendedStrategy(this.state.type);
391
+ }
392
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Document Module Exports
3
+ */
4
+ export { MDocument } from "./MDocument.js";
5
+ export { type DocumentLoader, TextLoader, MarkdownLoader, HTMLLoader, JSONLoader, CSVLoader, PDFLoader, WebLoader, loadDocument, loadDocuments, type LoaderOptions, type WebLoaderOptions, type PDFLoaderOptions, type CSVLoaderOptions, } from "./loaders.js";
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Document Module Exports
3
+ */
4
+ export { MDocument } from "./MDocument.js";
5
+ export { TextLoader, MarkdownLoader, HTMLLoader, JSONLoader, CSVLoader, PDFLoader, WebLoader, loadDocument, loadDocuments, } from "./loaders.js";
@@ -0,0 +1,201 @@
1
+ /**
2
+ * Document Loaders
3
+ *
4
+ * Provides loaders for various document formats including:
5
+ * - Text files
6
+ * - Markdown files
7
+ * - HTML files and web pages
8
+ * - JSON files
9
+ * - CSV files
10
+ * - PDF files
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * import { loadDocument, WebLoader, PDFLoader } from 'neurolink/rag';
15
+ *
16
+ * // Load from file path
17
+ * const doc = await loadDocument('/path/to/document.md');
18
+ *
19
+ * // Load from URL
20
+ * const webDoc = await WebLoader.load('https://example.com/article');
21
+ *
22
+ * // Load PDF
23
+ * const pdfDoc = await PDFLoader.load('/path/to/document.pdf');
24
+ * ```
25
+ */
26
+ import { MDocument } from "./MDocument.js";
27
+ import type { DocumentType } from "../types.js";
28
+ /**
29
+ * Document loader options
30
+ */
31
+ export interface LoaderOptions {
32
+ /** Custom metadata to add to document */
33
+ metadata?: Record<string, unknown>;
34
+ /** Encoding for text files */
35
+ encoding?: BufferEncoding;
36
+ /** Document type override */
37
+ type?: DocumentType;
38
+ }
39
+ /**
40
+ * Web loader options
41
+ */
42
+ export interface WebLoaderOptions extends LoaderOptions {
43
+ /** Request timeout in milliseconds */
44
+ timeout?: number;
45
+ /** Custom headers for request */
46
+ headers?: Record<string, string>;
47
+ /** Extract only main content (remove navigation, ads, etc.) */
48
+ extractMainContent?: boolean;
49
+ /** Selector for main content (CSS selector) */
50
+ contentSelector?: string;
51
+ /** User agent string */
52
+ userAgent?: string;
53
+ }
54
+ /**
55
+ * PDF loader options
56
+ */
57
+ export interface PDFLoaderOptions extends LoaderOptions {
58
+ /** Page range to extract (e.g., "1-5" or "1,3,5") */
59
+ pageRange?: string;
60
+ /** Extract images as base64 */
61
+ extractImages?: boolean;
62
+ /** OCR for scanned documents */
63
+ enableOCR?: boolean;
64
+ /** Preserve layout formatting */
65
+ preserveLayout?: boolean;
66
+ }
67
+ /**
68
+ * CSV loader options
69
+ */
70
+ export interface CSVLoaderOptions extends LoaderOptions {
71
+ /** Delimiter character */
72
+ delimiter?: string;
73
+ /** Whether first row is header */
74
+ hasHeader?: boolean;
75
+ /** Column names (if no header) */
76
+ columns?: string[];
77
+ /** Output format */
78
+ outputFormat?: "text" | "json" | "markdown";
79
+ }
80
+ /**
81
+ * Abstract document loader interface
82
+ */
83
+ export interface DocumentLoader {
84
+ /**
85
+ * Load document from source
86
+ * @param source - File path, URL, or content
87
+ * @param options - Loader options
88
+ * @returns Promise resolving to MDocument
89
+ */
90
+ load(source: string, options?: LoaderOptions): Promise<MDocument>;
91
+ /**
92
+ * Check if loader can handle the source
93
+ * @param source - File path, URL, or content
94
+ * @returns True if loader can handle the source
95
+ */
96
+ canHandle(source: string): boolean;
97
+ }
98
+ /**
99
+ * Text file loader
100
+ */
101
+ export declare class TextLoader implements DocumentLoader {
102
+ load(source: string, options?: LoaderOptions): Promise<MDocument>;
103
+ canHandle(source: string): boolean;
104
+ protected loadContent(source: string, encoding?: BufferEncoding): Promise<string>;
105
+ protected getSourceName(source: string): string;
106
+ }
107
+ /**
108
+ * Markdown file loader
109
+ */
110
+ export declare class MarkdownLoader extends TextLoader {
111
+ load(source: string, options?: LoaderOptions): Promise<MDocument>;
112
+ canHandle(source: string): boolean;
113
+ }
114
+ /**
115
+ * HTML file loader
116
+ */
117
+ export declare class HTMLLoader extends TextLoader {
118
+ load(source: string, options?: LoaderOptions): Promise<MDocument>;
119
+ canHandle(source: string): boolean;
120
+ }
121
+ /**
122
+ * JSON file loader
123
+ */
124
+ export declare class JSONLoader extends TextLoader {
125
+ load(source: string, options?: LoaderOptions): Promise<MDocument>;
126
+ canHandle(source: string): boolean;
127
+ }
128
+ /**
129
+ * CSV file loader
130
+ */
131
+ export declare class CSVLoader extends TextLoader {
132
+ load(source: string, options?: CSVLoaderOptions): Promise<MDocument>;
133
+ canHandle(source: string): boolean;
134
+ private parseCSVLine;
135
+ private toMarkdownTable;
136
+ private toTextTable;
137
+ }
138
+ /**
139
+ * PDF file loader
140
+ *
141
+ * Note: Requires external PDF processing library for full functionality.
142
+ * Falls back to placeholder implementation if pdf-parse is not available.
143
+ */
144
+ export declare class PDFLoader implements DocumentLoader {
145
+ load(source: string, options?: PDFLoaderOptions): Promise<MDocument>;
146
+ canHandle(source: string): boolean;
147
+ private loadPdfParser;
148
+ private parsePageRange;
149
+ }
150
+ /**
151
+ * Web page loader
152
+ *
153
+ * Fetches and extracts content from web pages.
154
+ * Supports basic HTML parsing without external dependencies.
155
+ */
156
+ export declare class WebLoader implements DocumentLoader {
157
+ private defaultUserAgent;
158
+ load(source: string, options?: WebLoaderOptions): Promise<MDocument>;
159
+ canHandle(source: string): boolean;
160
+ /**
161
+ * Extract main content from HTML
162
+ */
163
+ private extractMainContent;
164
+ /**
165
+ * Convert HTML to plain text
166
+ */
167
+ private htmlToText;
168
+ }
169
+ /**
170
+ * Load document from file path, URL, or content
171
+ *
172
+ * Automatically detects the document type and uses the appropriate loader.
173
+ *
174
+ * @param source - File path, URL, or raw content
175
+ * @param options - Loader options
176
+ * @returns Promise resolving to MDocument
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * // Load from file
181
+ * const doc = await loadDocument('/path/to/document.md');
182
+ *
183
+ * // Load from URL
184
+ * const webDoc = await loadDocument('https://example.com/article');
185
+ *
186
+ * // Load with options
187
+ * const pdfDoc = await loadDocument('/path/to/doc.pdf', {
188
+ * pageRange: '1-5',
189
+ * metadata: { project: 'research' }
190
+ * });
191
+ * ```
192
+ */
193
+ export declare function loadDocument(source: string, options?: LoaderOptions): Promise<MDocument>;
194
+ /**
195
+ * Load multiple documents
196
+ *
197
+ * @param sources - Array of file paths, URLs, or content
198
+ * @param options - Loader options (applied to all)
199
+ * @returns Promise resolving to array of MDocuments
200
+ */
201
+ export declare function loadDocuments(sources: string[], options?: LoaderOptions): Promise<MDocument[]>;