@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Word Document Processing Utility
|
|
3
|
+
*
|
|
4
|
+
* Handles downloading, validating, and processing Word (.docx, .doc) files.
|
|
5
|
+
* Uses mammoth library to extract text and HTML content from Word documents.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - DOCX format validation via ZIP/PK signature check
|
|
9
|
+
* - Text extraction using mammoth.extractRawText()
|
|
10
|
+
* - HTML conversion using mammoth.convertToHtml()
|
|
11
|
+
* - Warning collection from mammoth processing
|
|
12
|
+
* - Support for both URL downloads and direct buffer input
|
|
13
|
+
*
|
|
14
|
+
* @module processors/document/WordProcessor
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* import { wordProcessor, processWord, isWordFile } from "./WordProcessor.js";
|
|
19
|
+
*
|
|
20
|
+
* // Check if file is supported
|
|
21
|
+
* if (isWordFile(file.mimetype, file.name)) {
|
|
22
|
+
* const result = await processWord(fileInfo, {
|
|
23
|
+
* authHeaders: { Authorization: "Bearer token" },
|
|
24
|
+
* });
|
|
25
|
+
*
|
|
26
|
+
* if (result.success) {
|
|
27
|
+
* console.log("Text:", result.data.textContent);
|
|
28
|
+
* console.log("HTML:", result.data.htmlContent);
|
|
29
|
+
* console.log("Warnings:", result.data.warnings);
|
|
30
|
+
* }
|
|
31
|
+
* }
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
import * as mammoth from "mammoth";
|
|
35
|
+
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
|
|
36
|
+
import { SIZE_LIMITS } from "../config/index.js";
|
|
37
|
+
import { FileErrorCode } from "../errors/index.js";
|
|
38
|
+
// =============================================================================
|
|
39
|
+
// CONSTANTS
|
|
40
|
+
// =============================================================================
|
|
41
|
+
/**
|
|
42
|
+
* Supported MIME types for Word documents
|
|
43
|
+
*/
|
|
44
|
+
const SUPPORTED_WORD_MIME_TYPES = [
|
|
45
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
46
|
+
"application/msword",
|
|
47
|
+
];
|
|
48
|
+
/**
|
|
49
|
+
* Supported file extensions for Word documents
|
|
50
|
+
*/
|
|
51
|
+
const SUPPORTED_WORD_EXTENSIONS = [".docx", ".doc"];
|
|
52
|
+
/**
|
|
53
|
+
* Default timeout for Word processing (60 seconds)
|
|
54
|
+
* Word documents can be larger due to embedded images and complex formatting
|
|
55
|
+
*/
|
|
56
|
+
const WORD_TIMEOUT_MS = 60000;
|
|
57
|
+
// =============================================================================
|
|
58
|
+
// WORD PROCESSOR CLASS
|
|
59
|
+
// =============================================================================
|
|
60
|
+
/**
|
|
61
|
+
* Word Processor - handles .docx and .doc files
|
|
62
|
+
*
|
|
63
|
+
* Uses mammoth library for both text and HTML extraction. The processor
|
|
64
|
+
* validates DOCX files by checking for the ZIP/PK signature (since DOCX
|
|
65
|
+
* files are actually ZIP archives).
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* ```typescript
|
|
69
|
+
* const processor = new WordProcessor();
|
|
70
|
+
*
|
|
71
|
+
* // Check if file is supported
|
|
72
|
+
* if (processor.isFileSupported("application/msword", "report.doc")) {
|
|
73
|
+
* const result = await processor.processFile(fileInfo);
|
|
74
|
+
* if (result.success) {
|
|
75
|
+
* console.log("Extracted text:", result.data.textContent);
|
|
76
|
+
* }
|
|
77
|
+
* }
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
export class WordProcessor extends BaseFileProcessor {
|
|
81
|
+
constructor() {
|
|
82
|
+
super({
|
|
83
|
+
maxSizeMB: SIZE_LIMITS.WORD_MAX_MB,
|
|
84
|
+
timeoutMs: WORD_TIMEOUT_MS,
|
|
85
|
+
supportedMimeTypes: SUPPORTED_WORD_MIME_TYPES,
|
|
86
|
+
supportedExtensions: SUPPORTED_WORD_EXTENSIONS,
|
|
87
|
+
fileTypeName: "Word",
|
|
88
|
+
defaultFilename: "document.docx",
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Validate downloaded Word document has correct magic bytes.
|
|
93
|
+
* DOCX files are ZIP archives starting with PK signature (0x50 0x4B).
|
|
94
|
+
*
|
|
95
|
+
* @param buffer - Downloaded file content
|
|
96
|
+
* @param fileInfo - Original file information
|
|
97
|
+
* @returns null if valid, error message if invalid
|
|
98
|
+
*/
|
|
99
|
+
async validateDownloadedFile(buffer, _fileInfo) {
|
|
100
|
+
// Minimum size check
|
|
101
|
+
if (buffer.length < 4) {
|
|
102
|
+
return "Invalid Word document - file too small";
|
|
103
|
+
}
|
|
104
|
+
// DOCX files are ZIP archives (PK signature: 0x50 0x4B)
|
|
105
|
+
const pkSignature = buffer.subarray(0, 2).toString("ascii");
|
|
106
|
+
if (pkSignature !== "PK") {
|
|
107
|
+
// Log what we actually received to help debug
|
|
108
|
+
const preview = buffer
|
|
109
|
+
.subarray(0, 100)
|
|
110
|
+
.toString("utf8")
|
|
111
|
+
.substring(0, 100);
|
|
112
|
+
const looksLikeHtml = preview.includes("<!DOCTYPE") || preview.includes("<html");
|
|
113
|
+
// Provide more specific error message
|
|
114
|
+
if (looksLikeHtml) {
|
|
115
|
+
return "Invalid Word document - received HTML response instead of file content (possibly an error page)";
|
|
116
|
+
}
|
|
117
|
+
return "Invalid Word document - not a valid DOCX format (expected ZIP/PK signature)";
|
|
118
|
+
}
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Build processed Word result with extracted text and HTML content.
|
|
123
|
+
* This is a stub that returns an empty result - actual processing
|
|
124
|
+
* happens in the overridden processFile method since mammoth
|
|
125
|
+
* operations are asynchronous.
|
|
126
|
+
*
|
|
127
|
+
* @param buffer - Downloaded file content
|
|
128
|
+
* @param fileInfo - Original file information
|
|
129
|
+
* @returns Processed Word result (placeholder)
|
|
130
|
+
*/
|
|
131
|
+
buildProcessedResult(buffer, fileInfo) {
|
|
132
|
+
// Note: This is a synchronous placeholder since buildProcessedResult is sync
|
|
133
|
+
// The actual mammoth extraction happens in the overridden processFile method
|
|
134
|
+
return {
|
|
135
|
+
textContent: "",
|
|
136
|
+
htmlContent: "",
|
|
137
|
+
warnings: [],
|
|
138
|
+
buffer,
|
|
139
|
+
mimetype: fileInfo.mimetype ||
|
|
140
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
141
|
+
size: fileInfo.size,
|
|
142
|
+
filename: this.getFilename(fileInfo),
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Override processFile for async mammoth extraction.
|
|
147
|
+
*
|
|
148
|
+
* The mammoth library's extractRawText and convertToHtml methods are
|
|
149
|
+
* asynchronous, so we need to override the entire processFile method
|
|
150
|
+
* rather than just buildProcessedResult.
|
|
151
|
+
*
|
|
152
|
+
* Processing steps:
|
|
153
|
+
* 1. Validate file type and size
|
|
154
|
+
* 2. Get buffer (download from URL or use provided buffer)
|
|
155
|
+
* 3. Validate downloaded file (check PK signature)
|
|
156
|
+
* 4. Extract text with mammoth.extractRawText()
|
|
157
|
+
* 5. Convert to HTML with mammoth.convertToHtml()
|
|
158
|
+
* 6. Collect any warnings from mammoth
|
|
159
|
+
* 7. Return structured result
|
|
160
|
+
*
|
|
161
|
+
* @param fileInfo - File information with URL or buffer
|
|
162
|
+
* @param options - Optional processing options
|
|
163
|
+
* @returns Processing result with text, HTML, and warnings
|
|
164
|
+
*/
|
|
165
|
+
async processFile(fileInfo, options) {
|
|
166
|
+
try {
|
|
167
|
+
// Step 1: Validate file type and size
|
|
168
|
+
const validationResult = this.validateFileWithResult(fileInfo);
|
|
169
|
+
if (!validationResult.success) {
|
|
170
|
+
return {
|
|
171
|
+
success: false,
|
|
172
|
+
error: validationResult.error,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
// Step 2: Get file buffer (from direct buffer or download from URL)
|
|
176
|
+
let buffer;
|
|
177
|
+
if (fileInfo.buffer) {
|
|
178
|
+
// Direct buffer provided - skip download
|
|
179
|
+
buffer = fileInfo.buffer;
|
|
180
|
+
}
|
|
181
|
+
else if (fileInfo.url) {
|
|
182
|
+
// Download from URL
|
|
183
|
+
const downloadResult = await this.downloadFileWithRetry(fileInfo, options);
|
|
184
|
+
if (!downloadResult.success) {
|
|
185
|
+
return {
|
|
186
|
+
success: false,
|
|
187
|
+
error: downloadResult.error,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
if (!downloadResult.data) {
|
|
191
|
+
return {
|
|
192
|
+
success: false,
|
|
193
|
+
error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
|
|
194
|
+
reason: "Download succeeded but returned no data",
|
|
195
|
+
}),
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
buffer = downloadResult.data;
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
// No buffer or URL provided
|
|
202
|
+
return {
|
|
203
|
+
success: false,
|
|
204
|
+
error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
|
|
205
|
+
reason: "No buffer or URL provided for file",
|
|
206
|
+
}),
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
// Step 3: Validate downloaded file (check magic bytes)
|
|
210
|
+
const postValidationError = await this.validateDownloadedFile(buffer, fileInfo);
|
|
211
|
+
if (postValidationError) {
|
|
212
|
+
return {
|
|
213
|
+
success: false,
|
|
214
|
+
error: this.createError(FileErrorCode.INVALID_FORMAT, {
|
|
215
|
+
reason: postValidationError,
|
|
216
|
+
}),
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
// Step 4 & 5: Extract text and HTML content using mammoth
|
|
220
|
+
let textContent = "";
|
|
221
|
+
let htmlContent = "";
|
|
222
|
+
const warnings = [];
|
|
223
|
+
try {
|
|
224
|
+
// Extract plain text
|
|
225
|
+
const textResult = await mammoth.extractRawText({ buffer });
|
|
226
|
+
textContent = textResult.value;
|
|
227
|
+
// Collect warnings from text extraction
|
|
228
|
+
if (textResult.messages && textResult.messages.length > 0) {
|
|
229
|
+
warnings.push(...textResult.messages.map((m) => `[text] ${m.message}`));
|
|
230
|
+
}
|
|
231
|
+
// Convert to HTML for richer formatting
|
|
232
|
+
const htmlResult = await mammoth.convertToHtml({ buffer });
|
|
233
|
+
htmlContent = htmlResult.value;
|
|
234
|
+
// Collect warnings from HTML conversion
|
|
235
|
+
if (htmlResult.messages && htmlResult.messages.length > 0) {
|
|
236
|
+
warnings.push(...htmlResult.messages.map((m) => `[html] ${m.message}`));
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
catch (extractError) {
|
|
240
|
+
return {
|
|
241
|
+
success: false,
|
|
242
|
+
error: this.createError(FileErrorCode.PROCESSING_FAILED, {
|
|
243
|
+
reason: "Failed to extract Word document content",
|
|
244
|
+
fileType: "Word",
|
|
245
|
+
}, extractError instanceof Error ? extractError : undefined),
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
// Step 6: Return structured result
|
|
249
|
+
return {
|
|
250
|
+
success: true,
|
|
251
|
+
data: {
|
|
252
|
+
buffer,
|
|
253
|
+
mimetype: fileInfo.mimetype ||
|
|
254
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
255
|
+
size: fileInfo.size,
|
|
256
|
+
filename: this.getFilename(fileInfo),
|
|
257
|
+
textContent,
|
|
258
|
+
htmlContent,
|
|
259
|
+
warnings,
|
|
260
|
+
},
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
catch (error) {
|
|
264
|
+
// Catch any unexpected errors
|
|
265
|
+
return {
|
|
266
|
+
success: false,
|
|
267
|
+
error: this.createError(FileErrorCode.UNKNOWN_ERROR, {
|
|
268
|
+
error: error instanceof Error ? error.message : String(error),
|
|
269
|
+
}, error instanceof Error ? error : undefined),
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
// =============================================================================
|
|
275
|
+
// SINGLETON INSTANCE
|
|
276
|
+
// =============================================================================
|
|
277
|
+
/**
|
|
278
|
+
* Singleton Word processor instance.
|
|
279
|
+
* Use this for most use cases to avoid creating multiple instances.
|
|
280
|
+
*/
|
|
281
|
+
export const wordProcessor = new WordProcessor();
|
|
282
|
+
// =============================================================================
|
|
283
|
+
// HELPER FUNCTIONS
|
|
284
|
+
// =============================================================================
|
|
285
|
+
/**
|
|
286
|
+
* Check if a file is a Word document (.docx or .doc).
|
|
287
|
+
*
|
|
288
|
+
* @param mimetype - MIME type of the file
|
|
289
|
+
* @param filename - Filename (for extension-based detection)
|
|
290
|
+
* @returns true if the file is a supported Word document
|
|
291
|
+
*
|
|
292
|
+
* @example
|
|
293
|
+
* ```typescript
|
|
294
|
+
* if (isWordFile(file.mimetype, file.name)) {
|
|
295
|
+
* const result = await processWord(file);
|
|
296
|
+
* }
|
|
297
|
+
* ```
|
|
298
|
+
*/
|
|
299
|
+
export function isWordFile(mimetype, filename) {
|
|
300
|
+
return wordProcessor.isFileSupported(mimetype, filename);
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Validate Word document size against configured limit.
|
|
304
|
+
*
|
|
305
|
+
* @param sizeBytes - File size in bytes
|
|
306
|
+
* @returns true if size is within the allowed limit
|
|
307
|
+
*
|
|
308
|
+
* @example
|
|
309
|
+
* ```typescript
|
|
310
|
+
* if (!validateWordSize(file.size)) {
|
|
311
|
+
* throw new Error(`File exceeds ${SIZE_LIMITS.WORD_MAX_MB}MB limit`);
|
|
312
|
+
* }
|
|
313
|
+
* ```
|
|
314
|
+
*/
|
|
315
|
+
export function validateWordSize(sizeBytes) {
|
|
316
|
+
const maxBytes = SIZE_LIMITS.WORD_MAX_MB * 1024 * 1024;
|
|
317
|
+
return sizeBytes <= maxBytes;
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* Process a single Word document.
|
|
321
|
+
*
|
|
322
|
+
* Convenience function that uses the singleton wordProcessor instance.
|
|
323
|
+
*
|
|
324
|
+
* @param fileInfo - File information with URL or buffer
|
|
325
|
+
* @param options - Optional processing options (auth headers, timeout, retry config)
|
|
326
|
+
* @returns Processing result with extracted text, HTML, and warnings
|
|
327
|
+
*
|
|
328
|
+
* @example
|
|
329
|
+
* ```typescript
|
|
330
|
+
* const result = await processWord({
|
|
331
|
+
* id: "doc-123",
|
|
332
|
+
* name: "report.docx",
|
|
333
|
+
* mimetype: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
334
|
+
* size: 12345,
|
|
335
|
+
* url: "https://example.com/files/report.docx",
|
|
336
|
+
* }, {
|
|
337
|
+
* authHeaders: { Authorization: "Bearer token" },
|
|
338
|
+
* });
|
|
339
|
+
*
|
|
340
|
+
* if (result.success) {
|
|
341
|
+
* console.log("Text content:", result.data.textContent);
|
|
342
|
+
* console.log("HTML content:", result.data.htmlContent);
|
|
343
|
+
* if (result.data.warnings.length > 0) {
|
|
344
|
+
* console.warn("Warnings:", result.data.warnings);
|
|
345
|
+
* }
|
|
346
|
+
* } else {
|
|
347
|
+
* console.error("Failed:", result.error.userMessage);
|
|
348
|
+
* }
|
|
349
|
+
* ```
|
|
350
|
+
*/
|
|
351
|
+
export async function processWord(fileInfo, options) {
|
|
352
|
+
return wordProcessor.processFile(fileInfo, options);
|
|
353
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Processors Module
|
|
3
|
+
*
|
|
4
|
+
* Exports document file processors for Word, Excel, and other document formats.
|
|
5
|
+
* Each processor handles downloading, validating, and extracting content from
|
|
6
|
+
* their respective file types.
|
|
7
|
+
*
|
|
8
|
+
* @module processors/document
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* import {
|
|
13
|
+
* // Word documents
|
|
14
|
+
* WordProcessor,
|
|
15
|
+
* wordProcessor,
|
|
16
|
+
* isWordFile,
|
|
17
|
+
* processWord,
|
|
18
|
+
* type ProcessedWord,
|
|
19
|
+
*
|
|
20
|
+
* // Excel spreadsheets
|
|
21
|
+
* ExcelProcessor,
|
|
22
|
+
* excelProcessor,
|
|
23
|
+
* isExcelFile,
|
|
24
|
+
* processExcel,
|
|
25
|
+
* type ProcessedExcel,
|
|
26
|
+
* type ExcelWorksheet,
|
|
27
|
+
* } from "./document/index.js";
|
|
28
|
+
*
|
|
29
|
+
* // Process a Word document
|
|
30
|
+
* if (isWordFile(file.mimetype, file.name)) {
|
|
31
|
+
* const result = await processWord(fileInfo);
|
|
32
|
+
* if (result.success) {
|
|
33
|
+
* console.log("Text:", result.data.textContent);
|
|
34
|
+
* console.log("HTML:", result.data.htmlContent);
|
|
35
|
+
* }
|
|
36
|
+
* }
|
|
37
|
+
*
|
|
38
|
+
* // Process an Excel spreadsheet
|
|
39
|
+
* if (isExcelFile(file.mimetype, file.name)) {
|
|
40
|
+
* const result = await processExcel(fileInfo);
|
|
41
|
+
* if (result.success) {
|
|
42
|
+
* console.log(`Sheets: ${result.data.sheetCount}`);
|
|
43
|
+
* console.log(`Total rows: ${result.data.totalRows}`);
|
|
44
|
+
* for (const sheet of result.data.worksheets) {
|
|
45
|
+
* console.log(` ${sheet.name}: ${sheet.rowCount} rows`);
|
|
46
|
+
* }
|
|
47
|
+
* }
|
|
48
|
+
* }
|
|
49
|
+
* ```
|
|
50
|
+
*/
|
|
51
|
+
export { isWordFile, type ProcessedWord, processWord, validateWordSize, WordProcessor, wordProcessor, } from "./WordProcessor.js";
|
|
52
|
+
export { ExcelProcessor, type ExcelWorksheet, excelProcessor, getExcelMaxRows, getExcelMaxSheets, getExcelMaxSizeMB, isExcelFile, type ProcessedExcel, processExcel, validateExcelSize, } from "./ExcelProcessor.js";
|
|
53
|
+
export { isRtfFile, type ProcessedRtf, processRtf, RtfProcessor, rtfProcessor, validateRtfSize, } from "./RtfProcessor.js";
|
|
54
|
+
export { getOpenDocumentMaxSizeMB, isOpenDocumentFile, OpenDocumentProcessor, openDocumentProcessor, type ProcessedOpenDocument, processOpenDocument, validateOpenDocumentSize, } from "./OpenDocumentProcessor.js";
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Processors Module
|
|
3
|
+
*
|
|
4
|
+
* Exports document file processors for Word, Excel, and other document formats.
|
|
5
|
+
* Each processor handles downloading, validating, and extracting content from
|
|
6
|
+
* their respective file types.
|
|
7
|
+
*
|
|
8
|
+
* @module processors/document
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* import {
|
|
13
|
+
* // Word documents
|
|
14
|
+
* WordProcessor,
|
|
15
|
+
* wordProcessor,
|
|
16
|
+
* isWordFile,
|
|
17
|
+
* processWord,
|
|
18
|
+
* type ProcessedWord,
|
|
19
|
+
*
|
|
20
|
+
* // Excel spreadsheets
|
|
21
|
+
* ExcelProcessor,
|
|
22
|
+
* excelProcessor,
|
|
23
|
+
* isExcelFile,
|
|
24
|
+
* processExcel,
|
|
25
|
+
* type ProcessedExcel,
|
|
26
|
+
* type ExcelWorksheet,
|
|
27
|
+
* } from "./document/index.js";
|
|
28
|
+
*
|
|
29
|
+
* // Process a Word document
|
|
30
|
+
* if (isWordFile(file.mimetype, file.name)) {
|
|
31
|
+
* const result = await processWord(fileInfo);
|
|
32
|
+
* if (result.success) {
|
|
33
|
+
* console.log("Text:", result.data.textContent);
|
|
34
|
+
* console.log("HTML:", result.data.htmlContent);
|
|
35
|
+
* }
|
|
36
|
+
* }
|
|
37
|
+
*
|
|
38
|
+
* // Process an Excel spreadsheet
|
|
39
|
+
* if (isExcelFile(file.mimetype, file.name)) {
|
|
40
|
+
* const result = await processExcel(fileInfo);
|
|
41
|
+
* if (result.success) {
|
|
42
|
+
* console.log(`Sheets: ${result.data.sheetCount}`);
|
|
43
|
+
* console.log(`Total rows: ${result.data.totalRows}`);
|
|
44
|
+
* for (const sheet of result.data.worksheets) {
|
|
45
|
+
* console.log(` ${sheet.name}: ${sheet.rowCount} rows`);
|
|
46
|
+
* }
|
|
47
|
+
* }
|
|
48
|
+
* }
|
|
49
|
+
* ```
|
|
50
|
+
*/
|
|
51
|
+
// =============================================================================
|
|
52
|
+
// WORD PROCESSOR
|
|
53
|
+
// =============================================================================
|
|
54
|
+
export {
|
|
55
|
+
// Helper functions
|
|
56
|
+
isWordFile, processWord, validateWordSize,
|
|
57
|
+
// Class
|
|
58
|
+
WordProcessor,
|
|
59
|
+
// Singleton instance
|
|
60
|
+
wordProcessor, } from "./WordProcessor.js";
|
|
61
|
+
// =============================================================================
|
|
62
|
+
// EXCEL PROCESSOR
|
|
63
|
+
// =============================================================================
|
|
64
|
+
export {
|
|
65
|
+
// Class
|
|
66
|
+
ExcelProcessor,
|
|
67
|
+
// Singleton instance
|
|
68
|
+
excelProcessor,
|
|
69
|
+
// Helper functions
|
|
70
|
+
getExcelMaxRows, getExcelMaxSheets, getExcelMaxSizeMB, isExcelFile, processExcel, validateExcelSize, } from "./ExcelProcessor.js";
|
|
71
|
+
// =============================================================================
|
|
72
|
+
// RTF PROCESSOR
|
|
73
|
+
// =============================================================================
|
|
74
|
+
export {
|
|
75
|
+
// Helper functions
|
|
76
|
+
isRtfFile, processRtf,
|
|
77
|
+
// Class
|
|
78
|
+
RtfProcessor,
|
|
79
|
+
// Singleton instance
|
|
80
|
+
rtfProcessor, validateRtfSize, } from "./RtfProcessor.js";
|
|
81
|
+
// =============================================================================
|
|
82
|
+
// OPENDOCUMENT PROCESSOR
|
|
83
|
+
// =============================================================================
|
|
84
|
+
export { getOpenDocumentMaxSizeMB,
|
|
85
|
+
// Helper functions
|
|
86
|
+
isOpenDocumentFile,
|
|
87
|
+
// Class
|
|
88
|
+
OpenDocumentProcessor,
|
|
89
|
+
// Singleton instance
|
|
90
|
+
openDocumentProcessor, processOpenDocument, validateOpenDocumentSize, } from "./OpenDocumentProcessor.js";
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File Processing Error Codes
|
|
3
|
+
*
|
|
4
|
+
* Comprehensive error codes for file processing operations including:
|
|
5
|
+
* - Download operations (timeout, auth, network)
|
|
6
|
+
* - File validation (size, type, format)
|
|
7
|
+
* - Content processing (parsing, encoding, extraction)
|
|
8
|
+
* - Security validation (XXE, XSS, zip bombs)
|
|
9
|
+
* - System errors
|
|
10
|
+
*
|
|
11
|
+
* @module processors/errors
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Enumeration of all file processing error codes.
|
|
15
|
+
* Each code represents a specific failure scenario with associated messaging.
|
|
16
|
+
*/
|
|
17
|
+
export declare enum FileErrorCode {
|
|
18
|
+
/** File download failed due to network or server error */
|
|
19
|
+
DOWNLOAD_FAILED = "DOWNLOAD_FAILED",
|
|
20
|
+
/** Download operation exceeded timeout threshold */
|
|
21
|
+
DOWNLOAD_TIMEOUT = "DOWNLOAD_TIMEOUT",
|
|
22
|
+
/** Authentication failed when accessing the file */
|
|
23
|
+
DOWNLOAD_AUTH_FAILED = "DOWNLOAD_AUTH_FAILED",
|
|
24
|
+
/** Network error during download (connection reset, DNS failure, etc.) */
|
|
25
|
+
NETWORK_ERROR = "NETWORK_ERROR",
|
|
26
|
+
/** File was not found at the specified location */
|
|
27
|
+
FILE_NOT_FOUND = "FILE_NOT_FOUND",
|
|
28
|
+
/** Request was rate limited by the server */
|
|
29
|
+
RATE_LIMITED = "RATE_LIMITED",
|
|
30
|
+
/** File exceeds maximum allowed size */
|
|
31
|
+
FILE_TOO_LARGE = "FILE_TOO_LARGE",
|
|
32
|
+
/** File type is not supported for processing */
|
|
33
|
+
UNSUPPORTED_TYPE = "UNSUPPORTED_TYPE",
|
|
34
|
+
/** File format is invalid or malformed */
|
|
35
|
+
INVALID_FORMAT = "INVALID_FORMAT",
|
|
36
|
+
/** File MIME type doesn't match expected format */
|
|
37
|
+
INVALID_MIME_TYPE = "INVALID_MIME_TYPE",
|
|
38
|
+
/** File magic bytes don't match expected file type */
|
|
39
|
+
INVALID_MAGIC_BYTES = "INVALID_MAGIC_BYTES",
|
|
40
|
+
/** File appears to be corrupted or damaged */
|
|
41
|
+
CORRUPTED_FILE = "CORRUPTED_FILE",
|
|
42
|
+
/** File internal structure is invalid */
|
|
43
|
+
INVALID_STRUCTURE = "INVALID_STRUCTURE",
|
|
44
|
+
/** Generic processing failure */
|
|
45
|
+
PROCESSING_FAILED = "PROCESSING_FAILED",
|
|
46
|
+
/** Failed to parse file content */
|
|
47
|
+
PARSING_FAILED = "PARSING_FAILED",
|
|
48
|
+
/** Text encoding error (not UTF-8, BOM issues, etc.) */
|
|
49
|
+
ENCODING_ERROR = "ENCODING_ERROR",
|
|
50
|
+
/** Failed to extract content from file */
|
|
51
|
+
EXTRACTION_FAILED = "EXTRACTION_FAILED",
|
|
52
|
+
/** Failed to decompress file content */
|
|
53
|
+
DECOMPRESSION_FAILED = "DECOMPRESSION_FAILED",
|
|
54
|
+
/** Security validation failed */
|
|
55
|
+
SECURITY_VALIDATION_FAILED = "SECURITY_VALIDATION_FAILED",
|
|
56
|
+
/** XML External Entity (XXE) attack detected */
|
|
57
|
+
XXE_DETECTED = "XXE_DETECTED",
|
|
58
|
+
/** Cross-site scripting (XSS) attack detected */
|
|
59
|
+
XSS_DETECTED = "XSS_DETECTED",
|
|
60
|
+
/** Potentially malicious code execution detected */
|
|
61
|
+
CODE_EXECUTION_DETECTED = "CODE_EXECUTION_DETECTED",
|
|
62
|
+
/** Zip bomb or decompression bomb detected */
|
|
63
|
+
ZIP_BOMB_DETECTED = "ZIP_BOMB_DETECTED",
|
|
64
|
+
/** Unknown or unexpected error */
|
|
65
|
+
UNKNOWN_ERROR = "UNKNOWN_ERROR"
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Error message template with user-friendly messaging and retry information.
|
|
69
|
+
*/
|
|
70
|
+
export interface ErrorMessageTemplate {
|
|
71
|
+
/** Technical error message */
|
|
72
|
+
message: string;
|
|
73
|
+
/** User-friendly error message */
|
|
74
|
+
userMessage: string;
|
|
75
|
+
/** Suggested action to resolve the error */
|
|
76
|
+
suggestedAction: string;
|
|
77
|
+
/** Whether this error is potentially retryable */
|
|
78
|
+
retryable: boolean;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Error messages map with technical and user-friendly messaging for each error code.
|
|
82
|
+
* All messages are designed to be clear, actionable, and free of technical jargon.
|
|
83
|
+
*/
|
|
84
|
+
export declare const ERROR_MESSAGES: Record<FileErrorCode, ErrorMessageTemplate>;
|
|
85
|
+
/**
|
|
86
|
+
* Get the error message template for a specific error code.
|
|
87
|
+
*
|
|
88
|
+
* @param code - The FileErrorCode to get the template for
|
|
89
|
+
* @returns The ErrorMessageTemplate for the given code
|
|
90
|
+
*/
|
|
91
|
+
export declare function getErrorTemplate(code: FileErrorCode): ErrorMessageTemplate;
|
|
92
|
+
/**
|
|
93
|
+
* Check if an error code represents a retryable error.
|
|
94
|
+
*
|
|
95
|
+
* @param code - The FileErrorCode to check
|
|
96
|
+
* @returns true if the error is retryable
|
|
97
|
+
*/
|
|
98
|
+
export declare function isRetryableErrorCode(code: FileErrorCode): boolean;
|