@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenDocument Processor
|
|
3
|
+
*
|
|
4
|
+
* Processes OpenDocument format files (.odt, .ods, .odp) by extracting
|
|
5
|
+
* text content from the internal XML structure.
|
|
6
|
+
*
|
|
7
|
+
* @module processors/document/OpenDocumentProcessor
|
|
8
|
+
*/
|
|
9
|
+
import { createRequire } from "node:module";
|
|
10
|
+
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
|
|
11
|
+
import { SIZE_LIMITS } from "../config/index.js";
|
|
12
|
+
const require = createRequire(import.meta.url);
|
|
13
|
+
/**
|
|
14
|
+
* OpenDocument Processor - handles .odt, .ods, .odp files
|
|
15
|
+
*
|
|
16
|
+
* OpenDocument files are ZIP archives containing XML content.
|
|
17
|
+
* The main content is in content.xml within the archive.
|
|
18
|
+
*
|
|
19
|
+
* Priority: ~105 (between Word and Text)
|
|
20
|
+
*/
|
|
21
|
+
export class OpenDocumentProcessor extends BaseFileProcessor {
|
|
22
|
+
constructor() {
|
|
23
|
+
super({
|
|
24
|
+
maxSizeMB: SIZE_LIMITS.DOCUMENT_MAX_MB,
|
|
25
|
+
timeoutMs: 60000,
|
|
26
|
+
supportedMimeTypes: [
|
|
27
|
+
"application/vnd.oasis.opendocument.text",
|
|
28
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
|
29
|
+
"application/vnd.oasis.opendocument.presentation",
|
|
30
|
+
],
|
|
31
|
+
supportedExtensions: [".odt", ".ods", ".odp"],
|
|
32
|
+
fileTypeName: "OpenDocument",
|
|
33
|
+
defaultFilename: "document.odt",
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Validate that the file is a valid ZIP archive (OpenDocument format)
|
|
38
|
+
*/
|
|
39
|
+
async validateDownloadedFile(buffer, _fileInfo) {
|
|
40
|
+
if (buffer.length < 4) {
|
|
41
|
+
return "File too small to be a valid OpenDocument file";
|
|
42
|
+
}
|
|
43
|
+
// Check for PK (ZIP) signature
|
|
44
|
+
const signature = buffer.subarray(0, 2).toString("ascii");
|
|
45
|
+
if (signature !== "PK") {
|
|
46
|
+
return "Invalid OpenDocument format - not a valid ZIP archive";
|
|
47
|
+
}
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Build the processed result by extracting content from the OpenDocument
|
|
52
|
+
*/
|
|
53
|
+
buildProcessedResult(buffer, fileInfo) {
|
|
54
|
+
const ext = this.getExtension(fileInfo.name);
|
|
55
|
+
const format = this.detectFormat(ext);
|
|
56
|
+
let textContent = "";
|
|
57
|
+
let paragraphCount = 0;
|
|
58
|
+
let truncated = false;
|
|
59
|
+
try {
|
|
60
|
+
// Dynamically import adm-zip to avoid issues if not available
|
|
61
|
+
const AdmZip = require("adm-zip");
|
|
62
|
+
const zip = new AdmZip(buffer);
|
|
63
|
+
// Try to get content.xml
|
|
64
|
+
const contentEntry = zip.getEntry("content.xml");
|
|
65
|
+
if (contentEntry) {
|
|
66
|
+
const xmlContent = contentEntry.getData().toString("utf-8");
|
|
67
|
+
const extracted = this.extractTextFromXml(xmlContent);
|
|
68
|
+
textContent = extracted.text;
|
|
69
|
+
paragraphCount = extracted.paragraphCount;
|
|
70
|
+
// Check for truncation
|
|
71
|
+
const maxChars = 500000; // ~500KB of text
|
|
72
|
+
if (textContent.length > maxChars) {
|
|
73
|
+
textContent =
|
|
74
|
+
textContent.substring(0, maxChars) + "\n\n... [Content truncated]";
|
|
75
|
+
truncated = true;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
throw new Error("content.xml not found in OpenDocument archive");
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
catch (error) {
|
|
83
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
84
|
+
throw new Error(`Failed to extract OpenDocument content: ${message}`);
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
textContent,
|
|
88
|
+
format,
|
|
89
|
+
paragraphCount,
|
|
90
|
+
truncated,
|
|
91
|
+
buffer,
|
|
92
|
+
mimetype: fileInfo.mimetype || "application/vnd.oasis.opendocument.text",
|
|
93
|
+
size: fileInfo.size,
|
|
94
|
+
filename: this.getFilename(fileInfo),
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Decode HTML entities in a single pass to prevent double-unescaping.
|
|
99
|
+
* Sequential replacement is vulnerable: "&lt;" → "<" → "<"
|
|
100
|
+
* Single-pass avoids this by replacing each entity exactly once.
|
|
101
|
+
*/
|
|
102
|
+
decodeHtmlEntities(text) {
|
|
103
|
+
const entityMap = {
|
|
104
|
+
"&": "&",
|
|
105
|
+
"<": "<",
|
|
106
|
+
">": ">",
|
|
107
|
+
""": '"',
|
|
108
|
+
"'": "'",
|
|
109
|
+
};
|
|
110
|
+
return text.replace(/&(?:amp|lt|gt|quot|apos);/g, (match) => entityMap[match] ?? match);
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Extract text content from OpenDocument XML
|
|
114
|
+
*/
|
|
115
|
+
extractTextFromXml(xml) {
|
|
116
|
+
const paragraphs = [];
|
|
117
|
+
// Extract text from <text:p> elements (paragraphs)
|
|
118
|
+
// Also handle <text:h> (headings) and <text:span> (spans)
|
|
119
|
+
const textPattern = /<text:(?:p|h|span)[^>]*>([\s\S]*?)<\/text:(?:p|h|span)>/gi;
|
|
120
|
+
const matches = xml.matchAll(textPattern);
|
|
121
|
+
for (const match of matches) {
|
|
122
|
+
// Remove nested tags iteratively to handle fragments like "<scr<script>ipt>"
|
|
123
|
+
let stripped = match[1];
|
|
124
|
+
let prev;
|
|
125
|
+
do {
|
|
126
|
+
prev = stripped;
|
|
127
|
+
stripped = stripped.replace(/<[^>]+>/g, "");
|
|
128
|
+
} while (stripped !== prev);
|
|
129
|
+
const text = this.decodeHtmlEntities(stripped).trim();
|
|
130
|
+
if (text) {
|
|
131
|
+
paragraphs.push(text);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
// Also try a simpler approach for spreadsheets and presentations
|
|
135
|
+
// where content might be in different structures
|
|
136
|
+
if (paragraphs.length === 0) {
|
|
137
|
+
// Try to extract any text content between tags (iterative to handle nested fragments)
|
|
138
|
+
let stripped = xml;
|
|
139
|
+
let prev;
|
|
140
|
+
do {
|
|
141
|
+
prev = stripped;
|
|
142
|
+
stripped = stripped.replace(/<[^>]+>/g, " ");
|
|
143
|
+
} while (stripped !== prev);
|
|
144
|
+
const simpleText = this.decodeHtmlEntities(stripped.replace(/\s+/g, " ")).trim();
|
|
145
|
+
if (simpleText) {
|
|
146
|
+
paragraphs.push(simpleText);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return {
|
|
150
|
+
text: paragraphs.join("\n\n"),
|
|
151
|
+
paragraphCount: paragraphs.length,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Detect the OpenDocument format from file extension
|
|
156
|
+
*/
|
|
157
|
+
detectFormat(ext) {
|
|
158
|
+
switch (ext?.toLowerCase()) {
|
|
159
|
+
case ".odt":
|
|
160
|
+
return "odt";
|
|
161
|
+
case ".ods":
|
|
162
|
+
return "ods";
|
|
163
|
+
case ".odp":
|
|
164
|
+
return "odp";
|
|
165
|
+
default:
|
|
166
|
+
return "unknown";
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Get file extension from filename
|
|
171
|
+
*/
|
|
172
|
+
getExtension(filename) {
|
|
173
|
+
const match = filename.toLowerCase().match(/\.[^.]+$/);
|
|
174
|
+
return match ? match[0] : null;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
// =============================================================================
|
|
178
|
+
// SINGLETON INSTANCE
|
|
179
|
+
// =============================================================================
|
|
180
|
+
/**
|
|
181
|
+
* Singleton instance of OpenDocumentProcessor
|
|
182
|
+
*/
|
|
183
|
+
export const openDocumentProcessor = new OpenDocumentProcessor();
|
|
184
|
+
// =============================================================================
|
|
185
|
+
// HELPER FUNCTIONS
|
|
186
|
+
// =============================================================================
|
|
187
|
+
/**
|
|
188
|
+
* Check if a file is an OpenDocument file by MIME type or extension
|
|
189
|
+
*/
|
|
190
|
+
export function isOpenDocumentFile(mimetype, filename) {
|
|
191
|
+
return openDocumentProcessor.isFileSupported(mimetype, filename);
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Validate OpenDocument file size against limits
|
|
195
|
+
*/
|
|
196
|
+
export function validateOpenDocumentSize(sizeBytes) {
|
|
197
|
+
return sizeBytes <= SIZE_LIMITS.DOCUMENT_MAX_MB * 1024 * 1024;
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Process an OpenDocument file
|
|
201
|
+
*/
|
|
202
|
+
export async function processOpenDocument(fileInfo, options) {
|
|
203
|
+
return openDocumentProcessor.processFile(fileInfo, options);
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Get the maximum allowed OpenDocument file size in MB
|
|
207
|
+
*/
|
|
208
|
+
export function getOpenDocumentMaxSizeMB() {
|
|
209
|
+
return SIZE_LIMITS.DOCUMENT_MAX_MB;
|
|
210
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RTF Document Processor
|
|
3
|
+
*
|
|
4
|
+
* Processes Rich Text Format (.rtf) files by extracting plain text content
|
|
5
|
+
* from RTF control codes. Uses a lightweight text extraction approach
|
|
6
|
+
* without requiring external dependencies.
|
|
7
|
+
*
|
|
8
|
+
* Key features:
|
|
9
|
+
* - RTF control code stripping
|
|
10
|
+
* - Text content extraction
|
|
11
|
+
* - Raw content preservation for debugging
|
|
12
|
+
* - No external dependencies required
|
|
13
|
+
*
|
|
14
|
+
* Priority: ~110 (document format, processed after binary formats)
|
|
15
|
+
*
|
|
16
|
+
* @module processors/document/RtfProcessor
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* ```typescript
|
|
20
|
+
* import { rtfProcessor, processRtf, isRtfFile } from "./document/index.js";
|
|
21
|
+
*
|
|
22
|
+
* // Check if a file is an RTF file
|
|
23
|
+
* if (isRtfFile("application/rtf", "document.rtf")) {
|
|
24
|
+
* const result = await processRtf({
|
|
25
|
+
* id: "file-123",
|
|
26
|
+
* name: "document.rtf",
|
|
27
|
+
* mimetype: "application/rtf",
|
|
28
|
+
* size: 10240,
|
|
29
|
+
* buffer: rtfBuffer,
|
|
30
|
+
* });
|
|
31
|
+
*
|
|
32
|
+
* if (result.success) {
|
|
33
|
+
* console.log(`Text content: ${result.data.textContent}`);
|
|
34
|
+
* }
|
|
35
|
+
* }
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
|
|
39
|
+
import type { FileInfo, FileProcessingResult, ProcessOptions } from "../base/types.js";
|
|
40
|
+
export type { ProcessedRtf } from "../base/types.js";
|
|
41
|
+
import type { ProcessedRtf } from "../base/types.js";
|
|
42
|
+
/**
|
|
43
|
+
* RTF Processor - handles Rich Text Format files.
|
|
44
|
+
*
|
|
45
|
+
* Extracts plain text from RTF documents by stripping RTF control codes.
|
|
46
|
+
* This is a lightweight implementation that doesn't require external
|
|
47
|
+
* RTF parsing libraries.
|
|
48
|
+
*
|
|
49
|
+
* Priority: ~110 (document format)
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* ```typescript
|
|
53
|
+
* const processor = new RtfProcessor();
|
|
54
|
+
*
|
|
55
|
+
* const result = await processor.processFile({
|
|
56
|
+
* id: "file-123",
|
|
57
|
+
* name: "report.rtf",
|
|
58
|
+
* mimetype: "application/rtf",
|
|
59
|
+
* size: 5120,
|
|
60
|
+
* buffer: rtfBuffer,
|
|
61
|
+
* });
|
|
62
|
+
*
|
|
63
|
+
* if (result.success) {
|
|
64
|
+
* console.log("Extracted text:", result.data.textContent);
|
|
65
|
+
* }
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
68
|
+
export declare class RtfProcessor extends BaseFileProcessor<ProcessedRtf> {
|
|
69
|
+
constructor();
|
|
70
|
+
/**
|
|
71
|
+
* Validate downloaded RTF document.
|
|
72
|
+
* Checks for RTF header signature "{\\rtf".
|
|
73
|
+
*
|
|
74
|
+
* @param buffer - Downloaded file content
|
|
75
|
+
* @param fileInfo - Original file information
|
|
76
|
+
* @returns null if valid, error message if invalid
|
|
77
|
+
*/
|
|
78
|
+
protected validateDownloadedFile(buffer: Buffer, _fileInfo: FileInfo): Promise<string | null>;
|
|
79
|
+
/**
|
|
80
|
+
* Build the processed RTF result.
|
|
81
|
+
* Extracts plain text by stripping RTF control codes.
|
|
82
|
+
*
|
|
83
|
+
* @param buffer - Raw file content
|
|
84
|
+
* @param fileInfo - Original file information
|
|
85
|
+
* @returns Processed RTF with extracted text content
|
|
86
|
+
*/
|
|
87
|
+
protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedRtf;
|
|
88
|
+
/**
|
|
89
|
+
* Extract plain text from RTF content.
|
|
90
|
+
* Strips RTF control codes, groups, and formatting commands.
|
|
91
|
+
*
|
|
92
|
+
* This is a basic RTF parser that handles common RTF constructs:
|
|
93
|
+
* - Control groups like {\fonttbl...}
|
|
94
|
+
* - Control words like \par, \b, \i
|
|
95
|
+
* - Special characters like \' hex escapes
|
|
96
|
+
* - Newlines from \par and \line commands
|
|
97
|
+
*
|
|
98
|
+
* @param rtf - Raw RTF content
|
|
99
|
+
* @returns Extracted plain text
|
|
100
|
+
*/
|
|
101
|
+
private extractText;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Singleton instance of the RtfProcessor.
|
|
105
|
+
* Use this for all RTF document processing to share configuration.
|
|
106
|
+
*/
|
|
107
|
+
export declare const rtfProcessor: RtfProcessor;
|
|
108
|
+
/**
|
|
109
|
+
* Check if a file is an RTF document.
|
|
110
|
+
*
|
|
111
|
+
* @param mimetype - MIME type of the file
|
|
112
|
+
* @param filename - Filename for detection
|
|
113
|
+
* @returns true if the file is a supported RTF document
|
|
114
|
+
*
|
|
115
|
+
* @example
|
|
116
|
+
* ```typescript
|
|
117
|
+
* if (isRtfFile("application/rtf", "document.rtf")) {
|
|
118
|
+
* console.log("This is an RTF document");
|
|
119
|
+
* }
|
|
120
|
+
* ```
|
|
121
|
+
*/
|
|
122
|
+
export declare function isRtfFile(mimetype: string, filename: string): boolean;
|
|
123
|
+
/**
|
|
124
|
+
* Validate RTF document size against configured limit.
|
|
125
|
+
*
|
|
126
|
+
* @param sizeBytes - File size in bytes
|
|
127
|
+
* @returns true if size is within the allowed limit
|
|
128
|
+
*/
|
|
129
|
+
export declare function validateRtfSize(sizeBytes: number): boolean;
|
|
130
|
+
/**
|
|
131
|
+
* Process an RTF document.
|
|
132
|
+
*
|
|
133
|
+
* @param fileInfo - File information (can include URL or buffer)
|
|
134
|
+
* @param options - Optional processing options
|
|
135
|
+
* @returns Processing result with success flag and either data or error
|
|
136
|
+
*
|
|
137
|
+
* @example
|
|
138
|
+
* ```typescript
|
|
139
|
+
* const result = await processRtf({
|
|
140
|
+
* id: "file-123",
|
|
141
|
+
* name: "report.rtf",
|
|
142
|
+
* mimetype: "application/rtf",
|
|
143
|
+
* size: 10240,
|
|
144
|
+
* buffer: rtfBuffer,
|
|
145
|
+
* });
|
|
146
|
+
*
|
|
147
|
+
* if (result.success) {
|
|
148
|
+
* console.log("Extracted text:", result.data.textContent);
|
|
149
|
+
* }
|
|
150
|
+
* ```
|
|
151
|
+
*/
|
|
152
|
+
export declare function processRtf(fileInfo: FileInfo, options?: ProcessOptions): Promise<FileProcessingResult<ProcessedRtf>>;
|