@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,353 @@
1
+ /**
2
+ * Word Document Processing Utility
3
+ *
4
+ * Handles downloading, validating, and processing Word (.docx, .doc) files.
5
+ * Uses mammoth library to extract text and HTML content from Word documents.
6
+ *
7
+ * Features:
8
+ * - DOCX format validation via ZIP/PK signature check
9
+ * - Text extraction using mammoth.extractRawText()
10
+ * - HTML conversion using mammoth.convertToHtml()
11
+ * - Warning collection from mammoth processing
12
+ * - Support for both URL downloads and direct buffer input
13
+ *
14
+ * @module processors/document/WordProcessor
15
+ *
16
+ * @example
17
+ * ```typescript
18
+ * import { wordProcessor, processWord, isWordFile } from "./WordProcessor.js";
19
+ *
20
+ * // Check if file is supported
21
+ * if (isWordFile(file.mimetype, file.name)) {
22
+ * const result = await processWord(fileInfo, {
23
+ * authHeaders: { Authorization: "Bearer token" },
24
+ * });
25
+ *
26
+ * if (result.success) {
27
+ * console.log("Text:", result.data.textContent);
28
+ * console.log("HTML:", result.data.htmlContent);
29
+ * console.log("Warnings:", result.data.warnings);
30
+ * }
31
+ * }
32
+ * ```
33
+ */
34
+ import * as mammoth from "mammoth";
35
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
36
+ import { SIZE_LIMITS } from "../config/index.js";
37
+ import { FileErrorCode } from "../errors/index.js";
38
+ // =============================================================================
39
+ // CONSTANTS
40
+ // =============================================================================
41
+ /**
42
+ * Supported MIME types for Word documents
43
+ */
44
+ const SUPPORTED_WORD_MIME_TYPES = [
45
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
46
+ "application/msword",
47
+ ];
48
+ /**
49
+ * Supported file extensions for Word documents
50
+ */
51
+ const SUPPORTED_WORD_EXTENSIONS = [".docx", ".doc"];
52
+ /**
53
+ * Default timeout for Word processing (60 seconds)
54
+ * Word documents can be larger due to embedded images and complex formatting
55
+ */
56
+ const WORD_TIMEOUT_MS = 60000;
57
+ // =============================================================================
58
+ // WORD PROCESSOR CLASS
59
+ // =============================================================================
60
+ /**
61
+ * Word Processor - handles .docx and .doc files
62
+ *
63
+ * Uses mammoth library for both text and HTML extraction. The processor
64
+ * validates DOCX files by checking for the ZIP/PK signature (since DOCX
65
+ * files are actually ZIP archives).
66
+ *
67
+ * @example
68
+ * ```typescript
69
+ * const processor = new WordProcessor();
70
+ *
71
+ * // Check if file is supported
72
+ * if (processor.isFileSupported("application/msword", "report.doc")) {
73
+ * const result = await processor.processFile(fileInfo);
74
+ * if (result.success) {
75
+ * console.log("Extracted text:", result.data.textContent);
76
+ * }
77
+ * }
78
+ * ```
79
+ */
80
+ export class WordProcessor extends BaseFileProcessor {
81
+ constructor() {
82
+ super({
83
+ maxSizeMB: SIZE_LIMITS.WORD_MAX_MB,
84
+ timeoutMs: WORD_TIMEOUT_MS,
85
+ supportedMimeTypes: SUPPORTED_WORD_MIME_TYPES,
86
+ supportedExtensions: SUPPORTED_WORD_EXTENSIONS,
87
+ fileTypeName: "Word",
88
+ defaultFilename: "document.docx",
89
+ });
90
+ }
91
+ /**
92
+ * Validate downloaded Word document has correct magic bytes.
93
+ * DOCX files are ZIP archives starting with PK signature (0x50 0x4B).
94
+ *
95
+ * @param buffer - Downloaded file content
96
+ * @param fileInfo - Original file information
97
+ * @returns null if valid, error message if invalid
98
+ */
99
+ async validateDownloadedFile(buffer, _fileInfo) {
100
+ // Minimum size check
101
+ if (buffer.length < 4) {
102
+ return "Invalid Word document - file too small";
103
+ }
104
+ // DOCX files are ZIP archives (PK signature: 0x50 0x4B)
105
+ const pkSignature = buffer.subarray(0, 2).toString("ascii");
106
+ if (pkSignature !== "PK") {
107
+ // Log what we actually received to help debug
108
+ const preview = buffer
109
+ .subarray(0, 100)
110
+ .toString("utf8")
111
+ .substring(0, 100);
112
+ const looksLikeHtml = preview.includes("<!DOCTYPE") || preview.includes("<html");
113
+ // Provide more specific error message
114
+ if (looksLikeHtml) {
115
+ return "Invalid Word document - received HTML response instead of file content (possibly an error page)";
116
+ }
117
+ return "Invalid Word document - not a valid DOCX format (expected ZIP/PK signature)";
118
+ }
119
+ return null;
120
+ }
121
+ /**
122
+ * Build processed Word result with extracted text and HTML content.
123
+ * This is a stub that returns an empty result - actual processing
124
+ * happens in the overridden processFile method since mammoth
125
+ * operations are asynchronous.
126
+ *
127
+ * @param buffer - Downloaded file content
128
+ * @param fileInfo - Original file information
129
+ * @returns Processed Word result (placeholder)
130
+ */
131
+ buildProcessedResult(buffer, fileInfo) {
132
+ // Note: This is a synchronous placeholder since buildProcessedResult is sync
133
+ // The actual mammoth extraction happens in the overridden processFile method
134
+ return {
135
+ textContent: "",
136
+ htmlContent: "",
137
+ warnings: [],
138
+ buffer,
139
+ mimetype: fileInfo.mimetype ||
140
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
141
+ size: fileInfo.size,
142
+ filename: this.getFilename(fileInfo),
143
+ };
144
+ }
145
+ /**
146
+ * Override processFile for async mammoth extraction.
147
+ *
148
+ * The mammoth library's extractRawText and convertToHtml methods are
149
+ * asynchronous, so we need to override the entire processFile method
150
+ * rather than just buildProcessedResult.
151
+ *
152
+ * Processing steps:
153
+ * 1. Validate file type and size
154
+ * 2. Get buffer (download from URL or use provided buffer)
155
+ * 3. Validate downloaded file (check PK signature)
156
+ * 4. Extract text with mammoth.extractRawText()
157
+ * 5. Convert to HTML with mammoth.convertToHtml()
158
+ * 6. Collect any warnings from mammoth
159
+ * 7. Return structured result
160
+ *
161
+ * @param fileInfo - File information with URL or buffer
162
+ * @param options - Optional processing options
163
+ * @returns Processing result with text, HTML, and warnings
164
+ */
165
+ async processFile(fileInfo, options) {
166
+ try {
167
+ // Step 1: Validate file type and size
168
+ const validationResult = this.validateFileWithResult(fileInfo);
169
+ if (!validationResult.success) {
170
+ return {
171
+ success: false,
172
+ error: validationResult.error,
173
+ };
174
+ }
175
+ // Step 2: Get file buffer (from direct buffer or download from URL)
176
+ let buffer;
177
+ if (fileInfo.buffer) {
178
+ // Direct buffer provided - skip download
179
+ buffer = fileInfo.buffer;
180
+ }
181
+ else if (fileInfo.url) {
182
+ // Download from URL
183
+ const downloadResult = await this.downloadFileWithRetry(fileInfo, options);
184
+ if (!downloadResult.success) {
185
+ return {
186
+ success: false,
187
+ error: downloadResult.error,
188
+ };
189
+ }
190
+ if (!downloadResult.data) {
191
+ return {
192
+ success: false,
193
+ error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
194
+ reason: "Download succeeded but returned no data",
195
+ }),
196
+ };
197
+ }
198
+ buffer = downloadResult.data;
199
+ }
200
+ else {
201
+ // No buffer or URL provided
202
+ return {
203
+ success: false,
204
+ error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
205
+ reason: "No buffer or URL provided for file",
206
+ }),
207
+ };
208
+ }
209
+ // Step 3: Validate downloaded file (check magic bytes)
210
+ const postValidationError = await this.validateDownloadedFile(buffer, fileInfo);
211
+ if (postValidationError) {
212
+ return {
213
+ success: false,
214
+ error: this.createError(FileErrorCode.INVALID_FORMAT, {
215
+ reason: postValidationError,
216
+ }),
217
+ };
218
+ }
219
+ // Step 4 & 5: Extract text and HTML content using mammoth
220
+ let textContent = "";
221
+ let htmlContent = "";
222
+ const warnings = [];
223
+ try {
224
+ // Extract plain text
225
+ const textResult = await mammoth.extractRawText({ buffer });
226
+ textContent = textResult.value;
227
+ // Collect warnings from text extraction
228
+ if (textResult.messages && textResult.messages.length > 0) {
229
+ warnings.push(...textResult.messages.map((m) => `[text] ${m.message}`));
230
+ }
231
+ // Convert to HTML for richer formatting
232
+ const htmlResult = await mammoth.convertToHtml({ buffer });
233
+ htmlContent = htmlResult.value;
234
+ // Collect warnings from HTML conversion
235
+ if (htmlResult.messages && htmlResult.messages.length > 0) {
236
+ warnings.push(...htmlResult.messages.map((m) => `[html] ${m.message}`));
237
+ }
238
+ }
239
+ catch (extractError) {
240
+ return {
241
+ success: false,
242
+ error: this.createError(FileErrorCode.PROCESSING_FAILED, {
243
+ reason: "Failed to extract Word document content",
244
+ fileType: "Word",
245
+ }, extractError instanceof Error ? extractError : undefined),
246
+ };
247
+ }
248
+ // Step 6: Return structured result
249
+ return {
250
+ success: true,
251
+ data: {
252
+ buffer,
253
+ mimetype: fileInfo.mimetype ||
254
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
255
+ size: fileInfo.size,
256
+ filename: this.getFilename(fileInfo),
257
+ textContent,
258
+ htmlContent,
259
+ warnings,
260
+ },
261
+ };
262
+ }
263
+ catch (error) {
264
+ // Catch any unexpected errors
265
+ return {
266
+ success: false,
267
+ error: this.createError(FileErrorCode.UNKNOWN_ERROR, {
268
+ error: error instanceof Error ? error.message : String(error),
269
+ }, error instanceof Error ? error : undefined),
270
+ };
271
+ }
272
+ }
273
+ }
274
+ // =============================================================================
275
+ // SINGLETON INSTANCE
276
+ // =============================================================================
277
+ /**
278
+ * Singleton Word processor instance.
279
+ * Use this for most use cases to avoid creating multiple instances.
280
+ */
281
+ export const wordProcessor = new WordProcessor();
282
+ // =============================================================================
283
+ // HELPER FUNCTIONS
284
+ // =============================================================================
285
+ /**
286
+ * Check if a file is a Word document (.docx or .doc).
287
+ *
288
+ * @param mimetype - MIME type of the file
289
+ * @param filename - Filename (for extension-based detection)
290
+ * @returns true if the file is a supported Word document
291
+ *
292
+ * @example
293
+ * ```typescript
294
+ * if (isWordFile(file.mimetype, file.name)) {
295
+ * const result = await processWord(file);
296
+ * }
297
+ * ```
298
+ */
299
+ export function isWordFile(mimetype, filename) {
300
+ return wordProcessor.isFileSupported(mimetype, filename);
301
+ }
302
+ /**
303
+ * Validate Word document size against configured limit.
304
+ *
305
+ * @param sizeBytes - File size in bytes
306
+ * @returns true if size is within the allowed limit
307
+ *
308
+ * @example
309
+ * ```typescript
310
+ * if (!validateWordSize(file.size)) {
311
+ * throw new Error(`File exceeds ${SIZE_LIMITS.WORD_MAX_MB}MB limit`);
312
+ * }
313
+ * ```
314
+ */
315
+ export function validateWordSize(sizeBytes) {
316
+ const maxBytes = SIZE_LIMITS.WORD_MAX_MB * 1024 * 1024;
317
+ return sizeBytes <= maxBytes;
318
+ }
319
+ /**
320
+ * Process a single Word document.
321
+ *
322
+ * Convenience function that uses the singleton wordProcessor instance.
323
+ *
324
+ * @param fileInfo - File information with URL or buffer
325
+ * @param options - Optional processing options (auth headers, timeout, retry config)
326
+ * @returns Processing result with extracted text, HTML, and warnings
327
+ *
328
+ * @example
329
+ * ```typescript
330
+ * const result = await processWord({
331
+ * id: "doc-123",
332
+ * name: "report.docx",
333
+ * mimetype: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
334
+ * size: 12345,
335
+ * url: "https://example.com/files/report.docx",
336
+ * }, {
337
+ * authHeaders: { Authorization: "Bearer token" },
338
+ * });
339
+ *
340
+ * if (result.success) {
341
+ * console.log("Text content:", result.data.textContent);
342
+ * console.log("HTML content:", result.data.htmlContent);
343
+ * if (result.data.warnings.length > 0) {
344
+ * console.warn("Warnings:", result.data.warnings);
345
+ * }
346
+ * } else {
347
+ * console.error("Failed:", result.error.userMessage);
348
+ * }
349
+ * ```
350
+ */
351
+ export async function processWord(fileInfo, options) {
352
+ return wordProcessor.processFile(fileInfo, options);
353
+ }
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Document Processors Module
3
+ *
4
+ * Exports document file processors for Word, Excel, and other document formats.
5
+ * Each processor handles downloading, validating, and extracting content from
6
+ * their respective file types.
7
+ *
8
+ * @module processors/document
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * import {
13
+ * // Word documents
14
+ * WordProcessor,
15
+ * wordProcessor,
16
+ * isWordFile,
17
+ * processWord,
18
+ * type ProcessedWord,
19
+ *
20
+ * // Excel spreadsheets
21
+ * ExcelProcessor,
22
+ * excelProcessor,
23
+ * isExcelFile,
24
+ * processExcel,
25
+ * type ProcessedExcel,
26
+ * type ExcelWorksheet,
27
+ * } from "./document/index.js";
28
+ *
29
+ * // Process a Word document
30
+ * if (isWordFile(file.mimetype, file.name)) {
31
+ * const result = await processWord(fileInfo);
32
+ * if (result.success) {
33
+ * console.log("Text:", result.data.textContent);
34
+ * console.log("HTML:", result.data.htmlContent);
35
+ * }
36
+ * }
37
+ *
38
+ * // Process an Excel spreadsheet
39
+ * if (isExcelFile(file.mimetype, file.name)) {
40
+ * const result = await processExcel(fileInfo);
41
+ * if (result.success) {
42
+ * console.log(`Sheets: ${result.data.sheetCount}`);
43
+ * console.log(`Total rows: ${result.data.totalRows}`);
44
+ * for (const sheet of result.data.worksheets) {
45
+ * console.log(` ${sheet.name}: ${sheet.rowCount} rows`);
46
+ * }
47
+ * }
48
+ * }
49
+ * ```
50
+ */
51
+ export { isWordFile, type ProcessedWord, processWord, validateWordSize, WordProcessor, wordProcessor, } from "./WordProcessor.js";
52
+ export { ExcelProcessor, type ExcelWorksheet, excelProcessor, getExcelMaxRows, getExcelMaxSheets, getExcelMaxSizeMB, isExcelFile, type ProcessedExcel, processExcel, validateExcelSize, } from "./ExcelProcessor.js";
53
+ export { isRtfFile, type ProcessedRtf, processRtf, RtfProcessor, rtfProcessor, validateRtfSize, } from "./RtfProcessor.js";
54
+ export { getOpenDocumentMaxSizeMB, isOpenDocumentFile, OpenDocumentProcessor, openDocumentProcessor, type ProcessedOpenDocument, processOpenDocument, validateOpenDocumentSize, } from "./OpenDocumentProcessor.js";
@@ -0,0 +1,90 @@
1
+ /**
2
+ * Document Processors Module
3
+ *
4
+ * Exports document file processors for Word, Excel, and other document formats.
5
+ * Each processor handles downloading, validating, and extracting content from
6
+ * their respective file types.
7
+ *
8
+ * @module processors/document
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * import {
13
+ * // Word documents
14
+ * WordProcessor,
15
+ * wordProcessor,
16
+ * isWordFile,
17
+ * processWord,
18
+ * type ProcessedWord,
19
+ *
20
+ * // Excel spreadsheets
21
+ * ExcelProcessor,
22
+ * excelProcessor,
23
+ * isExcelFile,
24
+ * processExcel,
25
+ * type ProcessedExcel,
26
+ * type ExcelWorksheet,
27
+ * } from "./document/index.js";
28
+ *
29
+ * // Process a Word document
30
+ * if (isWordFile(file.mimetype, file.name)) {
31
+ * const result = await processWord(fileInfo);
32
+ * if (result.success) {
33
+ * console.log("Text:", result.data.textContent);
34
+ * console.log("HTML:", result.data.htmlContent);
35
+ * }
36
+ * }
37
+ *
38
+ * // Process an Excel spreadsheet
39
+ * if (isExcelFile(file.mimetype, file.name)) {
40
+ * const result = await processExcel(fileInfo);
41
+ * if (result.success) {
42
+ * console.log(`Sheets: ${result.data.sheetCount}`);
43
+ * console.log(`Total rows: ${result.data.totalRows}`);
44
+ * for (const sheet of result.data.worksheets) {
45
+ * console.log(` ${sheet.name}: ${sheet.rowCount} rows`);
46
+ * }
47
+ * }
48
+ * }
49
+ * ```
50
+ */
51
+ // =============================================================================
52
+ // WORD PROCESSOR
53
+ // =============================================================================
54
+ export {
55
+ // Helper functions
56
+ isWordFile, processWord, validateWordSize,
57
+ // Class
58
+ WordProcessor,
59
+ // Singleton instance
60
+ wordProcessor, } from "./WordProcessor.js";
61
+ // =============================================================================
62
+ // EXCEL PROCESSOR
63
+ // =============================================================================
64
+ export {
65
+ // Class
66
+ ExcelProcessor,
67
+ // Singleton instance
68
+ excelProcessor,
69
+ // Helper functions
70
+ getExcelMaxRows, getExcelMaxSheets, getExcelMaxSizeMB, isExcelFile, processExcel, validateExcelSize, } from "./ExcelProcessor.js";
71
+ // =============================================================================
72
+ // RTF PROCESSOR
73
+ // =============================================================================
74
+ export {
75
+ // Helper functions
76
+ isRtfFile, processRtf,
77
+ // Class
78
+ RtfProcessor,
79
+ // Singleton instance
80
+ rtfProcessor, validateRtfSize, } from "./RtfProcessor.js";
81
+ // =============================================================================
82
+ // OPENDOCUMENT PROCESSOR
83
+ // =============================================================================
84
+ export { getOpenDocumentMaxSizeMB,
85
+ // Helper functions
86
+ isOpenDocumentFile,
87
+ // Class
88
+ OpenDocumentProcessor,
89
+ // Singleton instance
90
+ openDocumentProcessor, processOpenDocument, validateOpenDocumentSize, } from "./OpenDocumentProcessor.js";
@@ -0,0 +1,98 @@
1
+ /**
2
+ * File Processing Error Codes
3
+ *
4
+ * Comprehensive error codes for file processing operations including:
5
+ * - Download operations (timeout, auth, network)
6
+ * - File validation (size, type, format)
7
+ * - Content processing (parsing, encoding, extraction)
8
+ * - Security validation (XXE, XSS, zip bombs)
9
+ * - System errors
10
+ *
11
+ * @module processors/errors
12
+ */
13
+ /**
14
+ * Enumeration of all file processing error codes.
15
+ * Each code represents a specific failure scenario with associated messaging.
16
+ */
17
+ export declare enum FileErrorCode {
18
+ /** File download failed due to network or server error */
19
+ DOWNLOAD_FAILED = "DOWNLOAD_FAILED",
20
+ /** Download operation exceeded timeout threshold */
21
+ DOWNLOAD_TIMEOUT = "DOWNLOAD_TIMEOUT",
22
+ /** Authentication failed when accessing the file */
23
+ DOWNLOAD_AUTH_FAILED = "DOWNLOAD_AUTH_FAILED",
24
+ /** Network error during download (connection reset, DNS failure, etc.) */
25
+ NETWORK_ERROR = "NETWORK_ERROR",
26
+ /** File was not found at the specified location */
27
+ FILE_NOT_FOUND = "FILE_NOT_FOUND",
28
+ /** Request was rate limited by the server */
29
+ RATE_LIMITED = "RATE_LIMITED",
30
+ /** File exceeds maximum allowed size */
31
+ FILE_TOO_LARGE = "FILE_TOO_LARGE",
32
+ /** File type is not supported for processing */
33
+ UNSUPPORTED_TYPE = "UNSUPPORTED_TYPE",
34
+ /** File format is invalid or malformed */
35
+ INVALID_FORMAT = "INVALID_FORMAT",
36
+ /** File MIME type doesn't match expected format */
37
+ INVALID_MIME_TYPE = "INVALID_MIME_TYPE",
38
+ /** File magic bytes don't match expected file type */
39
+ INVALID_MAGIC_BYTES = "INVALID_MAGIC_BYTES",
40
+ /** File appears to be corrupted or damaged */
41
+ CORRUPTED_FILE = "CORRUPTED_FILE",
42
+ /** File internal structure is invalid */
43
+ INVALID_STRUCTURE = "INVALID_STRUCTURE",
44
+ /** Generic processing failure */
45
+ PROCESSING_FAILED = "PROCESSING_FAILED",
46
+ /** Failed to parse file content */
47
+ PARSING_FAILED = "PARSING_FAILED",
48
+ /** Text encoding error (not UTF-8, BOM issues, etc.) */
49
+ ENCODING_ERROR = "ENCODING_ERROR",
50
+ /** Failed to extract content from file */
51
+ EXTRACTION_FAILED = "EXTRACTION_FAILED",
52
+ /** Failed to decompress file content */
53
+ DECOMPRESSION_FAILED = "DECOMPRESSION_FAILED",
54
+ /** Security validation failed */
55
+ SECURITY_VALIDATION_FAILED = "SECURITY_VALIDATION_FAILED",
56
+ /** XML External Entity (XXE) attack detected */
57
+ XXE_DETECTED = "XXE_DETECTED",
58
+ /** Cross-site scripting (XSS) attack detected */
59
+ XSS_DETECTED = "XSS_DETECTED",
60
+ /** Potentially malicious code execution detected */
61
+ CODE_EXECUTION_DETECTED = "CODE_EXECUTION_DETECTED",
62
+ /** Zip bomb or decompression bomb detected */
63
+ ZIP_BOMB_DETECTED = "ZIP_BOMB_DETECTED",
64
+ /** Unknown or unexpected error */
65
+ UNKNOWN_ERROR = "UNKNOWN_ERROR"
66
+ }
67
+ /**
68
+ * Error message template with user-friendly messaging and retry information.
69
+ */
70
+ export interface ErrorMessageTemplate {
71
+ /** Technical error message */
72
+ message: string;
73
+ /** User-friendly error message */
74
+ userMessage: string;
75
+ /** Suggested action to resolve the error */
76
+ suggestedAction: string;
77
+ /** Whether this error is potentially retryable */
78
+ retryable: boolean;
79
+ }
80
+ /**
81
+ * Error messages map with technical and user-friendly messaging for each error code.
82
+ * All messages are designed to be clear, actionable, and free of technical jargon.
83
+ */
84
+ export declare const ERROR_MESSAGES: Record<FileErrorCode, ErrorMessageTemplate>;
85
+ /**
86
+ * Get the error message template for a specific error code.
87
+ *
88
+ * @param code - The FileErrorCode to get the template for
89
+ * @returns The ErrorMessageTemplate for the given code
90
+ */
91
+ export declare function getErrorTemplate(code: FileErrorCode): ErrorMessageTemplate;
92
+ /**
93
+ * Check if an error code represents a retryable error.
94
+ *
95
+ * @param code - The FileErrorCode to check
96
+ * @returns true if the error is retryable
97
+ */
98
+ export declare function isRetryableErrorCode(code: FileErrorCode): boolean;