@juspay/neurolink 9.5.2 → 9.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +29 -25
- package/dist/agent/directTools.d.ts +5 -5
- package/dist/cli/commands/config.d.ts +9 -9
- package/dist/cli/commands/serve.d.ts +37 -0
- package/dist/cli/commands/serve.js +302 -229
- package/dist/cli/commands/setup-anthropic.d.ts +2 -2
- package/dist/cli/commands/setup-azure.d.ts +2 -2
- package/dist/cli/commands/setup-bedrock.d.ts +2 -2
- package/dist/cli/commands/setup-gcp.d.ts +2 -2
- package/dist/cli/commands/setup-google-ai.d.ts +2 -2
- package/dist/cli/commands/setup-huggingface.d.ts +2 -2
- package/dist/cli/commands/setup-mistral.d.ts +2 -2
- package/dist/cli/commands/setup-openai.d.ts +2 -2
- package/dist/cli/commands/setup.d.ts +2 -2
- package/dist/cli/factories/commandFactory.js +16 -2
- package/dist/cli/loop/optionsSchema.d.ts +2 -2
- package/dist/cli/loop/session.d.ts +4 -0
- package/dist/cli/loop/session.js +49 -4
- package/dist/cli/utils/interactiveSetup.d.ts +4 -4
- package/dist/config/conversationMemory.d.ts +2 -0
- package/dist/config/conversationMemory.js +5 -5
- package/dist/constants/contextWindows.d.ts +46 -0
- package/dist/constants/contextWindows.js +156 -0
- package/dist/context/budgetChecker.d.ts +18 -0
- package/dist/context/budgetChecker.js +71 -0
- package/dist/context/contextCompactor.d.ts +22 -0
- package/dist/context/contextCompactor.js +106 -0
- package/dist/context/effectiveHistory.d.ts +52 -0
- package/dist/context/effectiveHistory.js +105 -0
- package/dist/context/errorDetection.d.ts +14 -0
- package/dist/context/errorDetection.js +124 -0
- package/dist/context/fileSummarizationService.d.ts +54 -0
- package/dist/context/fileSummarizationService.js +255 -0
- package/dist/context/fileSummarizer.d.ts +56 -0
- package/dist/context/fileSummarizer.js +145 -0
- package/dist/context/fileTokenBudget.d.ts +53 -0
- package/dist/context/fileTokenBudget.js +127 -0
- package/dist/context/prompts/summarizationPrompt.d.ts +17 -0
- package/dist/context/prompts/summarizationPrompt.js +110 -0
- package/dist/context/stages/fileReadDeduplicator.d.ts +10 -0
- package/dist/context/stages/fileReadDeduplicator.js +66 -0
- package/dist/context/stages/slidingWindowTruncator.d.ts +11 -0
- package/dist/context/stages/slidingWindowTruncator.js +42 -0
- package/dist/context/stages/structuredSummarizer.d.ts +10 -0
- package/dist/context/stages/structuredSummarizer.js +49 -0
- package/dist/context/stages/toolOutputPruner.d.ts +10 -0
- package/dist/context/stages/toolOutputPruner.js +52 -0
- package/dist/context/summarizationEngine.d.ts +45 -0
- package/dist/context/summarizationEngine.js +110 -0
- package/dist/context/toolOutputLimits.d.ts +17 -0
- package/dist/context/toolOutputLimits.js +84 -0
- package/dist/context/toolPairRepair.d.ts +16 -0
- package/dist/context/toolPairRepair.js +66 -0
- package/dist/core/conversationMemoryManager.d.ts +5 -15
- package/dist/core/conversationMemoryManager.js +15 -75
- package/dist/core/modules/MessageBuilder.d.ts +1 -1
- package/dist/core/modules/MessageBuilder.js +2 -0
- package/dist/core/modules/TelemetryHandler.d.ts +2 -3
- package/dist/core/modules/TelemetryHandler.js +3 -3
- package/dist/core/modules/ToolsManager.d.ts +2 -2
- package/dist/core/redisConversationMemoryManager.d.ts +8 -14
- package/dist/core/redisConversationMemoryManager.js +69 -78
- package/dist/factories/providerFactory.d.ts +2 -2
- package/dist/files/fileReferenceRegistry.d.ts +276 -0
- package/dist/files/fileReferenceRegistry.js +1543 -0
- package/dist/files/fileTools.d.ts +423 -0
- package/dist/files/fileTools.js +449 -0
- package/dist/files/index.d.ts +14 -0
- package/dist/files/index.js +13 -0
- package/dist/files/streamingReader.d.ts +93 -0
- package/dist/files/streamingReader.js +321 -0
- package/dist/files/types.d.ts +23 -0
- package/dist/files/types.js +23 -0
- package/dist/image-gen/imageGenTools.d.ts +2 -2
- package/dist/image-gen/types.d.ts +12 -12
- package/dist/lib/agent/directTools.d.ts +7 -7
- package/dist/lib/config/conversationMemory.d.ts +2 -0
- package/dist/lib/config/conversationMemory.js +5 -5
- package/dist/lib/constants/contextWindows.d.ts +46 -0
- package/dist/lib/constants/contextWindows.js +157 -0
- package/dist/lib/context/budgetChecker.d.ts +18 -0
- package/dist/lib/context/budgetChecker.js +72 -0
- package/dist/lib/context/contextCompactor.d.ts +22 -0
- package/dist/lib/context/contextCompactor.js +107 -0
- package/dist/lib/context/effectiveHistory.d.ts +52 -0
- package/dist/lib/context/effectiveHistory.js +106 -0
- package/dist/lib/context/errorDetection.d.ts +14 -0
- package/dist/lib/context/errorDetection.js +125 -0
- package/dist/lib/context/fileSummarizationService.d.ts +54 -0
- package/dist/lib/context/fileSummarizationService.js +256 -0
- package/dist/lib/context/fileSummarizer.d.ts +56 -0
- package/dist/lib/context/fileSummarizer.js +146 -0
- package/dist/lib/context/fileTokenBudget.d.ts +53 -0
- package/dist/lib/context/fileTokenBudget.js +128 -0
- package/dist/lib/context/prompts/summarizationPrompt.d.ts +17 -0
- package/dist/lib/context/prompts/summarizationPrompt.js +111 -0
- package/dist/lib/context/stages/fileReadDeduplicator.d.ts +10 -0
- package/dist/lib/context/stages/fileReadDeduplicator.js +67 -0
- package/dist/lib/context/stages/slidingWindowTruncator.d.ts +11 -0
- package/dist/lib/context/stages/slidingWindowTruncator.js +43 -0
- package/dist/lib/context/stages/structuredSummarizer.d.ts +10 -0
- package/dist/lib/context/stages/structuredSummarizer.js +50 -0
- package/dist/lib/context/stages/toolOutputPruner.d.ts +10 -0
- package/dist/lib/context/stages/toolOutputPruner.js +53 -0
- package/dist/lib/context/summarizationEngine.d.ts +45 -0
- package/dist/lib/context/summarizationEngine.js +111 -0
- package/dist/lib/context/toolOutputLimits.d.ts +17 -0
- package/dist/lib/context/toolOutputLimits.js +85 -0
- package/dist/lib/context/toolPairRepair.d.ts +16 -0
- package/dist/lib/context/toolPairRepair.js +67 -0
- package/dist/lib/core/conversationMemoryManager.d.ts +5 -15
- package/dist/lib/core/conversationMemoryManager.js +15 -75
- package/dist/lib/core/modules/MessageBuilder.d.ts +1 -1
- package/dist/lib/core/modules/MessageBuilder.js +2 -0
- package/dist/lib/core/modules/TelemetryHandler.d.ts +2 -3
- package/dist/lib/core/modules/TelemetryHandler.js +3 -3
- package/dist/lib/core/modules/ToolsManager.d.ts +2 -2
- package/dist/lib/core/redisConversationMemoryManager.d.ts +8 -14
- package/dist/lib/core/redisConversationMemoryManager.js +69 -78
- package/dist/lib/factories/providerFactory.d.ts +2 -2
- package/dist/lib/files/fileReferenceRegistry.d.ts +276 -0
- package/dist/lib/files/fileReferenceRegistry.js +1544 -0
- package/dist/lib/files/fileTools.d.ts +423 -0
- package/dist/lib/files/fileTools.js +450 -0
- package/dist/lib/files/index.d.ts +14 -0
- package/dist/lib/files/index.js +14 -0
- package/dist/lib/files/streamingReader.d.ts +93 -0
- package/dist/lib/files/streamingReader.js +322 -0
- package/dist/lib/files/types.d.ts +23 -0
- package/dist/lib/files/types.js +24 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +2 -2
- package/dist/lib/image-gen/types.d.ts +12 -12
- package/dist/lib/memory/mem0Initializer.d.ts +2 -2
- package/dist/lib/neurolink.d.ts +61 -2
- package/dist/lib/neurolink.js +619 -307
- package/dist/lib/processors/archive/ArchiveProcessor.d.ts +327 -0
- package/dist/lib/processors/archive/ArchiveProcessor.js +1309 -0
- package/dist/lib/processors/archive/index.d.ts +33 -0
- package/dist/lib/processors/archive/index.js +43 -0
- package/dist/lib/processors/base/types.d.ts +70 -64
- package/dist/lib/processors/base/types.js +6 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +8 -8
- package/dist/lib/processors/cli/fileProcessorCli.js +5 -5
- package/dist/lib/processors/config/mimeTypes.js +25 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +52 -40
- package/dist/lib/processors/config/sizeLimits.js +56 -44
- package/dist/lib/processors/document/ExcelProcessor.d.ts +14 -0
- package/dist/lib/processors/document/ExcelProcessor.js +72 -1
- package/dist/lib/processors/document/PptxProcessor.d.ts +63 -0
- package/dist/lib/processors/document/PptxProcessor.js +158 -0
- package/dist/lib/processors/document/index.d.ts +1 -0
- package/dist/lib/processors/document/index.js +6 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +2 -2
- package/dist/lib/processors/errors/errorHelpers.d.ts +2 -2
- package/dist/lib/processors/errors/errorSerializer.d.ts +4 -4
- package/dist/lib/processors/index.d.ts +8 -2
- package/dist/lib/processors/index.js +5 -2
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +8 -8
- package/dist/lib/processors/integration/FileProcessorIntegration.js +7 -7
- package/dist/lib/processors/media/AudioProcessor.d.ts +328 -0
- package/dist/lib/processors/media/AudioProcessor.js +708 -0
- package/dist/lib/processors/media/VideoProcessor.d.ts +350 -0
- package/dist/lib/processors/media/VideoProcessor.js +992 -0
- package/dist/lib/processors/media/index.d.ts +27 -0
- package/dist/lib/processors/media/index.js +37 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +19 -5
- package/dist/lib/processors/registry/ProcessorRegistry.js +103 -8
- package/dist/lib/processors/registry/index.d.ts +1 -1
- package/dist/lib/processors/registry/index.js +1 -1
- package/dist/lib/processors/registry/types.d.ts +2 -2
- package/dist/lib/providers/googleAiStudio.d.ts +34 -0
- package/dist/lib/providers/googleAiStudio.js +267 -397
- package/dist/lib/providers/googleVertex.d.ts +55 -1
- package/dist/lib/providers/googleVertex.js +452 -719
- package/dist/lib/providers/sagemaker/detection.d.ts +6 -6
- package/dist/lib/providers/sagemaker/diagnostics.d.ts +4 -4
- package/dist/lib/providers/sagemaker/parsers.d.ts +4 -4
- package/dist/lib/rag/chunkers/RecursiveChunker.js +2 -2
- package/dist/lib/rag/document/loaders.d.ts +6 -71
- package/dist/lib/rag/document/loaders.js +5 -5
- package/dist/lib/rag/graphRag/graphRAG.js +26 -9
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
- package/dist/lib/rag/metadata/metadataExtractor.js +6 -3
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +8 -126
- package/dist/lib/rag/pipeline/RAGPipeline.js +11 -11
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +3 -42
- package/dist/lib/rag/pipeline/contextAssembly.js +6 -3
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +5 -60
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +3 -33
- package/dist/lib/rag/resilience/RetryHandler.d.ts +2 -21
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +3 -41
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +2 -13
- package/dist/lib/rag/retrieval/vectorQueryTool.js +4 -3
- package/dist/lib/rag/types.d.ts +3 -3
- package/dist/lib/sdk/toolRegistration.d.ts +2 -2
- package/dist/lib/server/middleware/cache.d.ts +2 -2
- package/dist/lib/server/middleware/rateLimit.d.ts +2 -2
- package/dist/lib/server/routes/mcpRoutes.js +277 -249
- package/dist/lib/server/routes/memoryRoutes.js +287 -281
- package/dist/lib/server/utils/validation.d.ts +10 -10
- package/dist/lib/session/globalSessionState.d.ts +2 -2
- package/dist/lib/telemetry/telemetryService.d.ts +2 -2
- package/dist/lib/types/common.d.ts +39 -0
- package/dist/lib/types/contextTypes.d.ts +255 -0
- package/dist/lib/types/contextTypes.js +0 -2
- package/dist/lib/types/conversation.d.ts +62 -0
- package/dist/lib/types/conversationMemoryInterface.d.ts +27 -0
- package/dist/lib/types/conversationMemoryInterface.js +7 -0
- package/dist/lib/types/fileReferenceTypes.d.ts +222 -0
- package/dist/lib/types/fileReferenceTypes.js +9 -0
- package/dist/lib/types/fileTypes.d.ts +26 -3
- package/dist/lib/types/generateTypes.d.ts +22 -1
- package/dist/lib/types/index.d.ts +4 -5
- package/dist/lib/types/index.js +8 -10
- package/dist/lib/types/modelTypes.d.ts +2 -2
- package/dist/lib/types/processorTypes.d.ts +597 -0
- package/dist/lib/types/processorTypes.js +91 -0
- package/dist/lib/types/ragTypes.d.ts +481 -0
- package/dist/lib/types/ragTypes.js +8 -0
- package/dist/lib/types/sdkTypes.d.ts +17 -18
- package/dist/lib/types/streamTypes.d.ts +11 -1
- package/dist/lib/utils/async/retry.d.ts +2 -2
- package/dist/lib/utils/async/withTimeout.js +3 -1
- package/dist/lib/utils/conversationMemory.d.ts +12 -6
- package/dist/lib/utils/conversationMemory.js +76 -36
- package/dist/lib/utils/fileDetector.d.ts +62 -0
- package/dist/lib/utils/fileDetector.js +1014 -14
- package/dist/lib/utils/json/safeParse.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +806 -153
- package/dist/lib/utils/modelChoices.d.ts +2 -2
- package/dist/lib/utils/multimodalOptionsBuilder.d.ts +2 -1
- package/dist/lib/utils/multimodalOptionsBuilder.js +1 -0
- package/dist/lib/utils/rateLimiter.d.ts +2 -2
- package/dist/lib/utils/sanitizers/filename.d.ts +4 -4
- package/dist/lib/utils/sanitizers/svg.d.ts +2 -2
- package/dist/lib/utils/thinkingConfig.d.ts +6 -6
- package/dist/lib/utils/tokenEstimation.d.ts +68 -0
- package/dist/lib/utils/tokenEstimation.js +113 -0
- package/dist/lib/utils/tokenUtils.d.ts +4 -4
- package/dist/lib/utils/ttsProcessor.d.ts +2 -2
- package/dist/lib/workflow/config.d.ts +150 -150
- package/dist/memory/mem0Initializer.d.ts +2 -2
- package/dist/neurolink.d.ts +61 -2
- package/dist/neurolink.js +619 -307
- package/dist/processors/archive/ArchiveProcessor.d.ts +327 -0
- package/dist/processors/archive/ArchiveProcessor.js +1308 -0
- package/dist/processors/archive/index.d.ts +33 -0
- package/dist/processors/archive/index.js +42 -0
- package/dist/processors/base/types.d.ts +70 -64
- package/dist/processors/base/types.js +6 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +8 -8
- package/dist/processors/cli/fileProcessorCli.js +5 -5
- package/dist/processors/config/mimeTypes.js +25 -0
- package/dist/processors/config/sizeLimits.d.ts +52 -40
- package/dist/processors/config/sizeLimits.js +56 -44
- package/dist/processors/document/ExcelProcessor.d.ts +14 -0
- package/dist/processors/document/ExcelProcessor.js +72 -1
- package/dist/processors/document/PptxProcessor.d.ts +63 -0
- package/dist/processors/document/PptxProcessor.js +157 -0
- package/dist/processors/document/index.d.ts +1 -0
- package/dist/processors/document/index.js +6 -0
- package/dist/processors/errors/FileErrorCode.d.ts +2 -2
- package/dist/processors/errors/errorHelpers.d.ts +2 -2
- package/dist/processors/errors/errorSerializer.d.ts +4 -4
- package/dist/processors/index.d.ts +8 -2
- package/dist/processors/index.js +5 -2
- package/dist/processors/integration/FileProcessorIntegration.d.ts +8 -8
- package/dist/processors/integration/FileProcessorIntegration.js +7 -7
- package/dist/processors/media/AudioProcessor.d.ts +328 -0
- package/dist/processors/media/AudioProcessor.js +707 -0
- package/dist/processors/media/VideoProcessor.d.ts +350 -0
- package/dist/processors/media/VideoProcessor.js +991 -0
- package/dist/processors/media/ffprobe-static.d.ts +4 -0
- package/dist/processors/media/index.d.ts +27 -0
- package/dist/processors/media/index.js +36 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +19 -5
- package/dist/processors/registry/ProcessorRegistry.js +103 -8
- package/dist/processors/registry/index.d.ts +1 -1
- package/dist/processors/registry/index.js +1 -1
- package/dist/processors/registry/types.d.ts +2 -2
- package/dist/providers/googleAiStudio.d.ts +34 -0
- package/dist/providers/googleAiStudio.js +267 -397
- package/dist/providers/googleVertex.d.ts +55 -1
- package/dist/providers/googleVertex.js +452 -719
- package/dist/providers/sagemaker/detection.d.ts +6 -6
- package/dist/providers/sagemaker/diagnostics.d.ts +4 -4
- package/dist/providers/sagemaker/parsers.d.ts +4 -4
- package/dist/rag/chunkers/RecursiveChunker.js +2 -2
- package/dist/rag/document/loaders.d.ts +6 -71
- package/dist/rag/document/loaders.js +5 -5
- package/dist/rag/graphRag/graphRAG.js +26 -9
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
- package/dist/rag/metadata/metadataExtractor.js +6 -3
- package/dist/rag/pipeline/RAGPipeline.d.ts +8 -126
- package/dist/rag/pipeline/RAGPipeline.js +11 -11
- package/dist/rag/pipeline/contextAssembly.d.ts +3 -42
- package/dist/rag/pipeline/contextAssembly.js +6 -3
- package/dist/rag/reranker/RerankerFactory.d.ts +5 -60
- package/dist/rag/resilience/CircuitBreaker.d.ts +3 -33
- package/dist/rag/resilience/RetryHandler.d.ts +2 -21
- package/dist/rag/retrieval/hybridSearch.d.ts +3 -41
- package/dist/rag/retrieval/vectorQueryTool.d.ts +2 -13
- package/dist/rag/retrieval/vectorQueryTool.js +4 -3
- package/dist/rag/types.d.ts +3 -3
- package/dist/sdk/toolRegistration.d.ts +2 -2
- package/dist/server/middleware/cache.d.ts +2 -2
- package/dist/server/middleware/rateLimit.d.ts +2 -2
- package/dist/server/routes/mcpRoutes.js +277 -249
- package/dist/server/routes/memoryRoutes.js +287 -281
- package/dist/server/utils/validation.d.ts +4 -4
- package/dist/session/globalSessionState.d.ts +2 -2
- package/dist/telemetry/telemetryService.d.ts +2 -2
- package/dist/types/common.d.ts +39 -0
- package/dist/types/contextTypes.d.ts +255 -0
- package/dist/types/contextTypes.js +0 -2
- package/dist/types/conversation.d.ts +62 -0
- package/dist/types/conversationMemoryInterface.d.ts +27 -0
- package/dist/types/conversationMemoryInterface.js +6 -0
- package/dist/types/fileReferenceTypes.d.ts +222 -0
- package/dist/types/fileReferenceTypes.js +8 -0
- package/dist/types/fileTypes.d.ts +26 -3
- package/dist/types/generateTypes.d.ts +22 -1
- package/dist/types/index.d.ts +4 -5
- package/dist/types/index.js +8 -10
- package/dist/types/processorTypes.d.ts +597 -0
- package/dist/types/processorTypes.js +90 -0
- package/dist/types/ragTypes.d.ts +481 -0
- package/dist/types/ragTypes.js +7 -0
- package/dist/types/sdkTypes.d.ts +17 -18
- package/dist/types/streamTypes.d.ts +11 -1
- package/dist/utils/async/retry.d.ts +2 -2
- package/dist/utils/async/withTimeout.js +3 -1
- package/dist/utils/conversationMemory.d.ts +12 -6
- package/dist/utils/conversationMemory.js +76 -36
- package/dist/utils/fileDetector.d.ts +62 -0
- package/dist/utils/fileDetector.js +1014 -14
- package/dist/utils/json/safeParse.d.ts +2 -2
- package/dist/utils/messageBuilder.js +806 -153
- package/dist/utils/modelChoices.d.ts +2 -2
- package/dist/utils/multimodalOptionsBuilder.d.ts +2 -1
- package/dist/utils/multimodalOptionsBuilder.js +1 -0
- package/dist/utils/rateLimiter.d.ts +2 -2
- package/dist/utils/sanitizers/filename.d.ts +4 -4
- package/dist/utils/sanitizers/svg.d.ts +2 -2
- package/dist/utils/thinkingConfig.d.ts +6 -6
- package/dist/utils/tokenEstimation.d.ts +68 -0
- package/dist/utils/tokenEstimation.js +112 -0
- package/dist/utils/tokenUtils.d.ts +4 -4
- package/dist/utils/ttsProcessor.d.ts +2 -2
- package/dist/workflow/config.d.ts +104 -104
- package/package.json +18 -6
- package/dist/lib/utils/conversationMemoryUtils.d.ts +0 -25
- package/dist/lib/utils/conversationMemoryUtils.js +0 -138
- package/dist/utils/conversationMemoryUtils.d.ts +0 -25
- package/dist/utils/conversationMemoryUtils.js +0 -137
|
@@ -5,6 +5,9 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { readFile, stat } from "fs/promises";
|
|
7
7
|
import { getGlobalDispatcher, interceptors, request } from "undici";
|
|
8
|
+
import { archiveProcessor } from "../processors/archive/ArchiveProcessor.js";
|
|
9
|
+
import { audioProcessor } from "../processors/media/AudioProcessor.js";
|
|
10
|
+
import { videoProcessor } from "../processors/media/VideoProcessor.js";
|
|
8
11
|
import { CSVProcessor } from "./csvProcessor.js";
|
|
9
12
|
import { ImageProcessor } from "./imageProcessor.js";
|
|
10
13
|
import { logger } from "./logger.js";
|
|
@@ -211,8 +214,13 @@ export class FileDetector {
|
|
|
211
214
|
logger.debug(`[FileDetector] ${allowedType} fallback failed: ${errorMsg}`);
|
|
212
215
|
}
|
|
213
216
|
}
|
|
214
|
-
// All fallbacks failed
|
|
215
|
-
|
|
217
|
+
// All fallbacks failed — fall through to processFile() which handles
|
|
218
|
+
// "unknown" types gracefully by extracting binary metadata and printable
|
|
219
|
+
// strings instead of throwing.
|
|
220
|
+
logger.warn(`[FileDetector] All fallback parsing failed for type "${detection.type}". ` +
|
|
221
|
+
`Attempted: ${options.allowedTypes.join(", ")}. Falling through to universal handler.`);
|
|
222
|
+
const csvOptions = options?.csvOptions;
|
|
223
|
+
return await FileDetector.processFile(content, detection, csvOptions, options?.provider);
|
|
216
224
|
}
|
|
217
225
|
const content = await FileDetector.loadContent(input, detection, options);
|
|
218
226
|
// Extract CSV-specific options from FileDetectorOptions
|
|
@@ -262,6 +270,40 @@ export class FileDetector {
|
|
|
262
270
|
// Audio requires magic bytes - can't fallback without detection
|
|
263
271
|
throw new Error("Audio type requires binary detection, cannot fallback parse");
|
|
264
272
|
}
|
|
273
|
+
case "video": {
|
|
274
|
+
// Video requires magic bytes - can't fallback without detection
|
|
275
|
+
throw new Error("Video type requires binary detection, cannot fallback parse");
|
|
276
|
+
}
|
|
277
|
+
case "archive": {
|
|
278
|
+
// Archive requires magic bytes - can't fallback without detection
|
|
279
|
+
throw new Error("Archive type requires binary detection, cannot fallback parse");
|
|
280
|
+
}
|
|
281
|
+
case "xlsx": {
|
|
282
|
+
// Document formats require binary detection
|
|
283
|
+
throw new Error("Excel type requires binary detection, cannot fallback parse");
|
|
284
|
+
}
|
|
285
|
+
case "docx": {
|
|
286
|
+
throw new Error("Word type requires binary detection, cannot fallback parse");
|
|
287
|
+
}
|
|
288
|
+
case "pptx": {
|
|
289
|
+
throw new Error("PowerPoint type requires binary detection, cannot fallback parse");
|
|
290
|
+
}
|
|
291
|
+
case "svg": {
|
|
292
|
+
// SVG can be detected from text content
|
|
293
|
+
const svgContent = content.toString("utf-8");
|
|
294
|
+
if (svgContent.includes("<svg") && svgContent.includes("</svg>")) {
|
|
295
|
+
return {
|
|
296
|
+
type: "svg",
|
|
297
|
+
content: svgContent,
|
|
298
|
+
mimeType: "image/svg+xml",
|
|
299
|
+
metadata: {
|
|
300
|
+
confidence: 70,
|
|
301
|
+
size: content.length,
|
|
302
|
+
},
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
throw new Error("Content does not appear to be valid SVG");
|
|
306
|
+
}
|
|
265
307
|
default:
|
|
266
308
|
return null;
|
|
267
309
|
}
|
|
@@ -438,6 +480,251 @@ export class FileDetector {
|
|
|
438
480
|
throw new Error(`Unknown source: ${source}`);
|
|
439
481
|
}
|
|
440
482
|
}
|
|
483
|
+
/**
|
|
484
|
+
* SDK-8: Format an informative placeholder when a file processor fails.
|
|
485
|
+
* Instead of bare "[Video file: name]" strings, include size, format, and
|
|
486
|
+
* the reason for failure so the LLM can acknowledge the attachment.
|
|
487
|
+
*/
|
|
488
|
+
static formatInformativePlaceholder(typeName, filename, content, detection, error) {
|
|
489
|
+
const sizeStr = content.length < 1024
|
|
490
|
+
? `${content.length} bytes`
|
|
491
|
+
: content.length < 1024 * 1024
|
|
492
|
+
? `${(content.length / 1024).toFixed(1)} KB`
|
|
493
|
+
: `${(content.length / (1024 * 1024)).toFixed(1)} MB`;
|
|
494
|
+
const errorMsg = error instanceof Error
|
|
495
|
+
? error.message
|
|
496
|
+
: error
|
|
497
|
+
? String(error)
|
|
498
|
+
: "Processing returned no usable content";
|
|
499
|
+
return (`[${typeName} File: "${filename}"]\n` +
|
|
500
|
+
`Size: ${sizeStr}\n` +
|
|
501
|
+
`Format: ${detection.mimeType || "unknown"}\n` +
|
|
502
|
+
`Error: Could not extract content (${errorMsg}).\n` +
|
|
503
|
+
`The file was attached but could not be fully analyzed.`);
|
|
504
|
+
}
|
|
505
|
+
/**
|
|
506
|
+
* Extract metadata and printable strings from an unrecognized binary file.
|
|
507
|
+
* This is the "extract what you can" path for unknown file types.
|
|
508
|
+
*
|
|
509
|
+
* Extracts:
|
|
510
|
+
* - File size (human-readable)
|
|
511
|
+
* - MIME type / detected format
|
|
512
|
+
* - First N bytes as hex dump (for identification)
|
|
513
|
+
* - Printable ASCII/UTF-8 strings found in the binary (like `strings` command)
|
|
514
|
+
* - Known file signatures that we don't have full processors for
|
|
515
|
+
*
|
|
516
|
+
* @param content Raw file buffer
|
|
517
|
+
* @param detection Detection result (may be "unknown")
|
|
518
|
+
* @param filename Original filename (if known)
|
|
519
|
+
* @returns Formatted text summary suitable for LLM consumption
|
|
520
|
+
*/
|
|
521
|
+
static extractBinaryMetadata(content, detection, filename) {
|
|
522
|
+
const parts = [];
|
|
523
|
+
// Header
|
|
524
|
+
const ext = detection.extension
|
|
525
|
+
? `.${detection.extension}`
|
|
526
|
+
: filename.includes(".")
|
|
527
|
+
? filename.slice(filename.lastIndexOf("."))
|
|
528
|
+
: "";
|
|
529
|
+
const typeLabel = ext
|
|
530
|
+
? `${ext.toUpperCase().slice(1)} file`
|
|
531
|
+
: "Binary file";
|
|
532
|
+
parts.push(`[${typeLabel}: "${filename}"]`);
|
|
533
|
+
// Basic metadata
|
|
534
|
+
const sizeStr = formatFileSize(content.length);
|
|
535
|
+
parts.push(`Size: ${sizeStr}`);
|
|
536
|
+
if (detection.mimeType &&
|
|
537
|
+
detection.mimeType !== "application/octet-stream") {
|
|
538
|
+
parts.push(`Format: ${detection.mimeType}`);
|
|
539
|
+
}
|
|
540
|
+
// Known binary signature identification (broader than our processing capabilities)
|
|
541
|
+
const sigLabel = FileDetector.identifyBinarySignature(content);
|
|
542
|
+
if (sigLabel) {
|
|
543
|
+
parts.push(`Identified as: ${sigLabel}`);
|
|
544
|
+
}
|
|
545
|
+
// Hex dump of first 32 bytes for identification
|
|
546
|
+
const hexPreview = content
|
|
547
|
+
.subarray(0, Math.min(32, content.length))
|
|
548
|
+
.toString("hex")
|
|
549
|
+
.match(/.{1,2}/g)
|
|
550
|
+
?.join(" ");
|
|
551
|
+
if (hexPreview) {
|
|
552
|
+
parts.push(`Header bytes: ${hexPreview}`);
|
|
553
|
+
}
|
|
554
|
+
// Extract printable strings (similar to Unix `strings` command)
|
|
555
|
+
const strings = FileDetector.extractPrintableStrings(content, 4, 50);
|
|
556
|
+
if (strings.length > 0) {
|
|
557
|
+
parts.push(`\nEmbedded text found (${strings.length} string${strings.length > 1 ? "s" : ""}):`);
|
|
558
|
+
for (const s of strings) {
|
|
559
|
+
parts.push(` "${s}"`);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
parts.push(`\nThis file was attached but its format is not fully supported for content extraction.`);
|
|
563
|
+
parts.push(`The above metadata and any embedded text have been extracted for context.`);
|
|
564
|
+
return parts.join("\n");
|
|
565
|
+
}
|
|
566
|
+
/**
|
|
567
|
+
* Identify known binary file signatures beyond what we can process.
|
|
568
|
+
* Returns a human-readable description, or null if unrecognized.
|
|
569
|
+
*/
|
|
570
|
+
static identifyBinarySignature(buf) {
|
|
571
|
+
if (buf.length < 4) {
|
|
572
|
+
return null;
|
|
573
|
+
}
|
|
574
|
+
// SQLite: "SQLite format 3\0"
|
|
575
|
+
if (buf.length >= 16 &&
|
|
576
|
+
buf.subarray(0, 15).toString("ascii") === "SQLite format 3") {
|
|
577
|
+
return "SQLite database";
|
|
578
|
+
}
|
|
579
|
+
// WOFF: "wOFF"
|
|
580
|
+
if (buf[0] === 0x77 &&
|
|
581
|
+
buf[1] === 0x4f &&
|
|
582
|
+
buf[2] === 0x46 &&
|
|
583
|
+
buf[3] === 0x46) {
|
|
584
|
+
return "WOFF font";
|
|
585
|
+
}
|
|
586
|
+
// WOFF2: "wOF2"
|
|
587
|
+
if (buf[0] === 0x77 &&
|
|
588
|
+
buf[1] === 0x4f &&
|
|
589
|
+
buf[2] === 0x46 &&
|
|
590
|
+
buf[3] === 0x32) {
|
|
591
|
+
return "WOFF2 font";
|
|
592
|
+
}
|
|
593
|
+
// TrueType/OpenType: starts with 0x00010000 or "OTTO"
|
|
594
|
+
if ((buf[0] === 0x00 &&
|
|
595
|
+
buf[1] === 0x01 &&
|
|
596
|
+
buf[2] === 0x00 &&
|
|
597
|
+
buf[3] === 0x00) ||
|
|
598
|
+
(buf[0] === 0x4f && buf[1] === 0x54 && buf[2] === 0x54 && buf[3] === 0x4f)) {
|
|
599
|
+
return "TrueType/OpenType font";
|
|
600
|
+
}
|
|
601
|
+
// ELF executable: \x7fELF
|
|
602
|
+
if (buf[0] === 0x7f &&
|
|
603
|
+
buf[1] === 0x45 &&
|
|
604
|
+
buf[2] === 0x4c &&
|
|
605
|
+
buf[3] === 0x46) {
|
|
606
|
+
return "ELF executable/library";
|
|
607
|
+
}
|
|
608
|
+
// Mach-O: 0xFEEDFACE or 0xFEEDFACF (64-bit) or 0xCAFEBABE (universal)
|
|
609
|
+
if ((buf[0] === 0xfe &&
|
|
610
|
+
buf[1] === 0xed &&
|
|
611
|
+
buf[2] === 0xfa &&
|
|
612
|
+
buf[3] === 0xce) ||
|
|
613
|
+
(buf[0] === 0xfe &&
|
|
614
|
+
buf[1] === 0xed &&
|
|
615
|
+
buf[2] === 0xfa &&
|
|
616
|
+
buf[3] === 0xcf) ||
|
|
617
|
+
(buf[0] === 0xca && buf[1] === 0xfe && buf[2] === 0xba && buf[3] === 0xbe)) {
|
|
618
|
+
return "Mach-O executable/library";
|
|
619
|
+
}
|
|
620
|
+
// PE/Windows executable: "MZ"
|
|
621
|
+
if (buf[0] === 0x4d && buf[1] === 0x5a) {
|
|
622
|
+
return "Windows PE executable/DLL";
|
|
623
|
+
}
|
|
624
|
+
// WebAssembly: "\0asm"
|
|
625
|
+
if (buf[0] === 0x00 &&
|
|
626
|
+
buf[1] === 0x61 &&
|
|
627
|
+
buf[2] === 0x73 &&
|
|
628
|
+
buf[3] === 0x6d) {
|
|
629
|
+
return "WebAssembly binary";
|
|
630
|
+
}
|
|
631
|
+
// DWG (AutoCAD): starts with "AC10"
|
|
632
|
+
if (buf[0] === 0x41 &&
|
|
633
|
+
buf[1] === 0x43 &&
|
|
634
|
+
buf[2] === 0x31 &&
|
|
635
|
+
buf[3] === 0x30) {
|
|
636
|
+
return "AutoCAD DWG drawing";
|
|
637
|
+
}
|
|
638
|
+
// BZ2: "BZ" + 'h'
|
|
639
|
+
if (buf[0] === 0x42 && buf[1] === 0x5a && buf[2] === 0x68) {
|
|
640
|
+
return "BZip2 compressed archive";
|
|
641
|
+
}
|
|
642
|
+
// XZ: 0xFD + "7zXZ"
|
|
643
|
+
if (buf.length >= 6 &&
|
|
644
|
+
buf[0] === 0xfd &&
|
|
645
|
+
buf[1] === 0x37 &&
|
|
646
|
+
buf[2] === 0x7a &&
|
|
647
|
+
buf[3] === 0x58 &&
|
|
648
|
+
buf[4] === 0x5a &&
|
|
649
|
+
buf[5] === 0x00) {
|
|
650
|
+
return "XZ compressed archive";
|
|
651
|
+
}
|
|
652
|
+
// 7z: "7z" + BC AF 27 1C
|
|
653
|
+
if (buf.length >= 6 &&
|
|
654
|
+
buf[0] === 0x37 &&
|
|
655
|
+
buf[1] === 0x7a &&
|
|
656
|
+
buf[2] === 0xbc &&
|
|
657
|
+
buf[3] === 0xaf &&
|
|
658
|
+
buf[4] === 0x27 &&
|
|
659
|
+
buf[5] === 0x1c) {
|
|
660
|
+
return "7-Zip archive";
|
|
661
|
+
}
|
|
662
|
+
// ISO 9660: "CD001" at offset 32769
|
|
663
|
+
if (buf.length > 32773 &&
|
|
664
|
+
buf.subarray(32769, 32774).toString("ascii") === "CD001") {
|
|
665
|
+
return "ISO 9660 disc image";
|
|
666
|
+
}
|
|
667
|
+
// Apache Parquet: "PAR1"
|
|
668
|
+
if (buf[0] === 0x50 &&
|
|
669
|
+
buf[1] === 0x41 &&
|
|
670
|
+
buf[2] === 0x52 &&
|
|
671
|
+
buf[3] === 0x31) {
|
|
672
|
+
return "Apache Parquet data file";
|
|
673
|
+
}
|
|
674
|
+
// Protocol Buffers compiled: (no fixed magic, skip)
|
|
675
|
+
// TIFF (already handled as image, but including for completeness)
|
|
676
|
+
if ((buf[0] === 0x49 &&
|
|
677
|
+
buf[1] === 0x49 &&
|
|
678
|
+
buf[2] === 0x2a &&
|
|
679
|
+
buf[3] === 0x00) ||
|
|
680
|
+
(buf[0] === 0x4d && buf[1] === 0x4d && buf[2] === 0x00 && buf[3] === 0x2a)) {
|
|
681
|
+
return "TIFF image";
|
|
682
|
+
}
|
|
683
|
+
// ICO: 00 00 01 00
|
|
684
|
+
if (buf[0] === 0x00 &&
|
|
685
|
+
buf[1] === 0x00 &&
|
|
686
|
+
buf[2] === 0x01 &&
|
|
687
|
+
buf[3] === 0x00) {
|
|
688
|
+
return "ICO icon image";
|
|
689
|
+
}
|
|
690
|
+
return null;
|
|
691
|
+
}
|
|
692
|
+
/**
|
|
693
|
+
* Extract printable ASCII strings from a binary buffer.
|
|
694
|
+
* Similar to the Unix `strings` utility.
|
|
695
|
+
*
|
|
696
|
+
* @param buf Buffer to scan
|
|
697
|
+
* @param minLength Minimum string length to include (default 4)
|
|
698
|
+
* @param maxStrings Maximum number of strings to return (default 50)
|
|
699
|
+
* @returns Array of printable strings found in the binary
|
|
700
|
+
*/
|
|
701
|
+
static extractPrintableStrings(buf, minLength = 4, maxStrings = 50) {
|
|
702
|
+
const strings = [];
|
|
703
|
+
let current = "";
|
|
704
|
+
// Only scan first 64KB to avoid huge processing time
|
|
705
|
+
const scanLimit = Math.min(buf.length, 64 * 1024);
|
|
706
|
+
for (let i = 0; i < scanLimit; i++) {
|
|
707
|
+
const byte = buf[i];
|
|
708
|
+
// Printable ASCII range (space through tilde) plus tab
|
|
709
|
+
if ((byte >= 0x20 && byte <= 0x7e) || byte === 0x09) {
|
|
710
|
+
current += String.fromCharCode(byte);
|
|
711
|
+
}
|
|
712
|
+
else {
|
|
713
|
+
if (current.length >= minLength) {
|
|
714
|
+
strings.push(current);
|
|
715
|
+
if (strings.length >= maxStrings) {
|
|
716
|
+
break;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
current = "";
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
// Flush last string
|
|
723
|
+
if (current.length >= minLength && strings.length < maxStrings) {
|
|
724
|
+
strings.push(current);
|
|
725
|
+
}
|
|
726
|
+
return strings;
|
|
727
|
+
}
|
|
441
728
|
/**
|
|
442
729
|
* Route to appropriate processor
|
|
443
730
|
*/
|
|
@@ -458,6 +745,18 @@ export class FileDetector {
|
|
|
458
745
|
// SVG is processed as text content (sanitized XML markup)
|
|
459
746
|
// AI providers don't support SVG as image format, so we extract text content
|
|
460
747
|
return await FileDetector.processSvgAsText(content, detection);
|
|
748
|
+
case "video":
|
|
749
|
+
return await FileDetector.processVideoFile(content, detection);
|
|
750
|
+
case "audio":
|
|
751
|
+
return await FileDetector.processAudioFile(content, detection);
|
|
752
|
+
case "archive":
|
|
753
|
+
return await FileDetector.processArchiveFile(content, detection);
|
|
754
|
+
case "xlsx":
|
|
755
|
+
return await FileDetector.processXlsxFile(content, detection);
|
|
756
|
+
case "docx":
|
|
757
|
+
return await FileDetector.processDocxFile(content, detection);
|
|
758
|
+
case "pptx":
|
|
759
|
+
return await FileDetector.processPptxFile(content, detection);
|
|
461
760
|
case "text":
|
|
462
761
|
return {
|
|
463
762
|
type: "text",
|
|
@@ -465,9 +764,365 @@ export class FileDetector {
|
|
|
465
764
|
mimeType: detection.mimeType || "text/plain",
|
|
466
765
|
metadata: detection.metadata,
|
|
467
766
|
};
|
|
468
|
-
default:
|
|
469
|
-
|
|
767
|
+
default: {
|
|
768
|
+
// Graceful degradation: try to treat unknown types as text if content is valid UTF-8
|
|
769
|
+
const unknownContent = content.toString("utf-8");
|
|
770
|
+
if (FileDetector.isValidText(unknownContent)) {
|
|
771
|
+
logger.warn(`[FileDetector] Unknown type "${detection.type}", treating as text`);
|
|
772
|
+
return {
|
|
773
|
+
type: "text",
|
|
774
|
+
content: unknownContent,
|
|
775
|
+
mimeType: detection.mimeType || "text/plain",
|
|
776
|
+
metadata: detection.metadata,
|
|
777
|
+
};
|
|
778
|
+
}
|
|
779
|
+
// Binary file that we can't fully process — extract what we can
|
|
780
|
+
// (metadata, printable strings, signature identification)
|
|
781
|
+
const filename = detection.metadata.filename || "file";
|
|
782
|
+
logger.warn(`[FileDetector] Unknown binary type "${detection.type}", extracting metadata for "${filename}"`);
|
|
783
|
+
return {
|
|
784
|
+
type: "unknown",
|
|
785
|
+
content: FileDetector.extractBinaryMetadata(content, detection, filename),
|
|
786
|
+
mimeType: detection.mimeType || "application/octet-stream",
|
|
787
|
+
metadata: detection.metadata,
|
|
788
|
+
};
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
/**
|
|
793
|
+
* Process video file: extract metadata, keyframes, and subtitles via VideoProcessor
|
|
794
|
+
*/
|
|
795
|
+
static async processVideoFile(content, detection) {
|
|
796
|
+
const videoFilename = detection.metadata.filename || "video";
|
|
797
|
+
try {
|
|
798
|
+
const videoResult = await videoProcessor.processFile({
|
|
799
|
+
id: videoFilename,
|
|
800
|
+
name: videoFilename,
|
|
801
|
+
mimetype: detection.mimeType || "video/mp4",
|
|
802
|
+
size: content.length,
|
|
803
|
+
buffer: content,
|
|
804
|
+
});
|
|
805
|
+
if (videoResult.success && videoResult.data) {
|
|
806
|
+
return {
|
|
807
|
+
type: "video",
|
|
808
|
+
content: videoResult.data.textContent ||
|
|
809
|
+
FileDetector.formatInformativePlaceholder("Video", videoFilename, content, detection),
|
|
810
|
+
mimeType: detection.mimeType,
|
|
811
|
+
images: videoResult.data.keyframes && videoResult.data.keyframes.length > 0
|
|
812
|
+
? videoResult.data.keyframes
|
|
813
|
+
: undefined,
|
|
814
|
+
metadata: {
|
|
815
|
+
...detection.metadata,
|
|
816
|
+
frameCount: videoResult.data.frameCount,
|
|
817
|
+
hasKeyframes: videoResult.data.hasKeyframes,
|
|
818
|
+
},
|
|
819
|
+
};
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
catch (videoError) {
|
|
823
|
+
logger.warn(`[FileDetector] VideoProcessor failed for ${videoFilename}, using fallback`, videoError instanceof Error ? videoError.message : String(videoError));
|
|
824
|
+
return {
|
|
825
|
+
type: "video",
|
|
826
|
+
content: FileDetector.formatInformativePlaceholder("Video", videoFilename, content, detection, videoError),
|
|
827
|
+
mimeType: detection.mimeType,
|
|
828
|
+
metadata: detection.metadata,
|
|
829
|
+
};
|
|
830
|
+
}
|
|
831
|
+
// Fallback if processor returned no data
|
|
832
|
+
return {
|
|
833
|
+
type: "video",
|
|
834
|
+
content: FileDetector.formatInformativePlaceholder("Video", videoFilename, content, detection),
|
|
835
|
+
mimeType: detection.mimeType,
|
|
836
|
+
metadata: detection.metadata,
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
/**
|
|
840
|
+
* Process audio file: extract metadata, tags, and cover art via AudioProcessor
|
|
841
|
+
*/
|
|
842
|
+
static async processAudioFile(content, detection) {
|
|
843
|
+
const audioFilename = detection.metadata.filename || "audio";
|
|
844
|
+
try {
|
|
845
|
+
const audioResult = await audioProcessor.processFile({
|
|
846
|
+
id: audioFilename,
|
|
847
|
+
name: audioFilename,
|
|
848
|
+
mimetype: detection.mimeType || "audio/mpeg",
|
|
849
|
+
size: content.length,
|
|
850
|
+
buffer: content,
|
|
851
|
+
});
|
|
852
|
+
if (audioResult.success && audioResult.data) {
|
|
853
|
+
return {
|
|
854
|
+
type: "audio",
|
|
855
|
+
content: audioResult.data.textContent ||
|
|
856
|
+
FileDetector.formatInformativePlaceholder("Audio", audioFilename, content, detection),
|
|
857
|
+
mimeType: detection.mimeType,
|
|
858
|
+
// Surface embedded cover art as an image content block
|
|
859
|
+
images: audioResult.data.coverArt
|
|
860
|
+
? [audioResult.data.coverArt]
|
|
861
|
+
: undefined,
|
|
862
|
+
metadata: detection.metadata,
|
|
863
|
+
};
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
catch (audioError) {
|
|
867
|
+
logger.warn(`[FileDetector] AudioProcessor failed for ${audioFilename}, using fallback`, audioError instanceof Error ? audioError.message : String(audioError));
|
|
868
|
+
return {
|
|
869
|
+
type: "audio",
|
|
870
|
+
content: FileDetector.formatInformativePlaceholder("Audio", audioFilename, content, detection, audioError),
|
|
871
|
+
mimeType: detection.mimeType,
|
|
872
|
+
metadata: detection.metadata,
|
|
873
|
+
};
|
|
470
874
|
}
|
|
875
|
+
// Fallback if processor returned no data
|
|
876
|
+
return {
|
|
877
|
+
type: "audio",
|
|
878
|
+
content: FileDetector.formatInformativePlaceholder("Audio", audioFilename, content, detection),
|
|
879
|
+
mimeType: detection.mimeType,
|
|
880
|
+
metadata: detection.metadata,
|
|
881
|
+
};
|
|
882
|
+
}
|
|
883
|
+
/**
|
|
884
|
+
* Process archive file: list contents and extract metadata via ArchiveProcessor
|
|
885
|
+
*/
|
|
886
|
+
static async processArchiveFile(content, detection) {
|
|
887
|
+
const archiveFilename = detection.metadata.filename || "archive";
|
|
888
|
+
try {
|
|
889
|
+
const archiveResult = await archiveProcessor.processFile({
|
|
890
|
+
id: archiveFilename,
|
|
891
|
+
name: archiveFilename,
|
|
892
|
+
mimetype: detection.mimeType || "application/zip",
|
|
893
|
+
size: content.length,
|
|
894
|
+
buffer: content,
|
|
895
|
+
});
|
|
896
|
+
if (archiveResult.success && archiveResult.data) {
|
|
897
|
+
return {
|
|
898
|
+
type: "archive",
|
|
899
|
+
content: archiveResult.data.textContent ||
|
|
900
|
+
FileDetector.formatInformativePlaceholder("Archive", archiveFilename, content, detection),
|
|
901
|
+
mimeType: detection.mimeType,
|
|
902
|
+
metadata: detection.metadata,
|
|
903
|
+
};
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
catch (archiveError) {
|
|
907
|
+
logger.warn(`[FileDetector] ArchiveProcessor failed for ${archiveFilename}, using fallback`, archiveError instanceof Error
|
|
908
|
+
? archiveError.message
|
|
909
|
+
: String(archiveError));
|
|
910
|
+
return {
|
|
911
|
+
type: "archive",
|
|
912
|
+
content: FileDetector.formatInformativePlaceholder("Archive", archiveFilename, content, detection, archiveError),
|
|
913
|
+
mimeType: detection.mimeType,
|
|
914
|
+
metadata: detection.metadata,
|
|
915
|
+
};
|
|
916
|
+
}
|
|
917
|
+
// Fallback if processor returned no data
|
|
918
|
+
return {
|
|
919
|
+
type: "archive",
|
|
920
|
+
content: FileDetector.formatInformativePlaceholder("Archive", archiveFilename, content, detection),
|
|
921
|
+
mimeType: detection.mimeType,
|
|
922
|
+
metadata: detection.metadata,
|
|
923
|
+
};
|
|
924
|
+
}
|
|
925
|
+
/**
|
|
926
|
+
* Process Excel/OpenDocument spreadsheet file via ExcelProcessor or OpenDocumentProcessor
|
|
927
|
+
*/
|
|
928
|
+
static async processXlsxFile(content, detection) {
|
|
929
|
+
const xlsxFilename = detection.metadata.filename || "spreadsheet";
|
|
930
|
+
try {
|
|
931
|
+
const ext = detection.extension?.toLowerCase();
|
|
932
|
+
if (ext === "ods") {
|
|
933
|
+
const { openDocumentProcessor } = await import("../processors/document/OpenDocumentProcessor.js");
|
|
934
|
+
const odsResult = await openDocumentProcessor.processFile({
|
|
935
|
+
id: xlsxFilename,
|
|
936
|
+
name: xlsxFilename,
|
|
937
|
+
mimetype: detection.mimeType ||
|
|
938
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
|
939
|
+
size: content.length,
|
|
940
|
+
buffer: content,
|
|
941
|
+
});
|
|
942
|
+
if (odsResult.success && odsResult.data) {
|
|
943
|
+
return {
|
|
944
|
+
type: "xlsx",
|
|
945
|
+
content: odsResult.data.textContent ||
|
|
946
|
+
FileDetector.formatInformativePlaceholder("Spreadsheet", xlsxFilename, content, detection),
|
|
947
|
+
mimeType: detection.mimeType,
|
|
948
|
+
metadata: detection.metadata,
|
|
949
|
+
};
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
else {
|
|
953
|
+
const { excelProcessor } = await import("../processors/document/ExcelProcessor.js");
|
|
954
|
+
const xlsxResult = await excelProcessor.processFile({
|
|
955
|
+
id: xlsxFilename,
|
|
956
|
+
name: xlsxFilename,
|
|
957
|
+
mimetype: detection.mimeType ||
|
|
958
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
959
|
+
size: content.length,
|
|
960
|
+
buffer: content,
|
|
961
|
+
});
|
|
962
|
+
if (xlsxResult.success && xlsxResult.data) {
|
|
963
|
+
// Build text content from worksheets
|
|
964
|
+
const sheets = xlsxResult.data.worksheets || [];
|
|
965
|
+
let textContent = `Spreadsheet: ${sheets.length} sheet(s), ${xlsxResult.data.totalRows} total rows\n`;
|
|
966
|
+
for (const sheet of sheets) {
|
|
967
|
+
textContent += `\n### Sheet: ${sheet.name}\n`;
|
|
968
|
+
textContent += `Columns (${sheet.columnCount}): ${sheet.headers.join(", ")}\n`;
|
|
969
|
+
textContent += `Rows: ${sheet.rowCount}\n`;
|
|
970
|
+
// Include first rows as sample data
|
|
971
|
+
const sampleRows = sheet.rows.slice(0, 20);
|
|
972
|
+
const rowText = sampleRows
|
|
973
|
+
.map((row) => row.map((c) => String(c ?? "")).join("\t"))
|
|
974
|
+
.join("\n");
|
|
975
|
+
if (!rowText) {
|
|
976
|
+
continue;
|
|
977
|
+
}
|
|
978
|
+
textContent += `\nData:\n${sheet.headers.join("\t")}\n${rowText}\n`;
|
|
979
|
+
const remaining = sheet.rowCount - 20;
|
|
980
|
+
if (remaining > 0) {
|
|
981
|
+
textContent += `... (${remaining} more rows)\n`;
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
return {
|
|
985
|
+
type: "xlsx",
|
|
986
|
+
content: textContent,
|
|
987
|
+
mimeType: detection.mimeType,
|
|
988
|
+
metadata: detection.metadata,
|
|
989
|
+
};
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
catch (xlsxError) {
|
|
994
|
+
logger.warn(`[FileDetector] ExcelProcessor failed for ${xlsxFilename}, using fallback`, xlsxError instanceof Error ? xlsxError.message : String(xlsxError));
|
|
995
|
+
return {
|
|
996
|
+
type: "xlsx",
|
|
997
|
+
content: FileDetector.formatInformativePlaceholder("Spreadsheet", xlsxFilename, content, detection, xlsxError),
|
|
998
|
+
mimeType: detection.mimeType,
|
|
999
|
+
metadata: detection.metadata,
|
|
1000
|
+
};
|
|
1001
|
+
}
|
|
1002
|
+
// Fallback if processor returned no data
|
|
1003
|
+
return {
|
|
1004
|
+
type: "xlsx",
|
|
1005
|
+
content: FileDetector.formatInformativePlaceholder("Spreadsheet", xlsxFilename, content, detection),
|
|
1006
|
+
mimeType: detection.mimeType,
|
|
1007
|
+
metadata: detection.metadata,
|
|
1008
|
+
};
|
|
1009
|
+
}
|
|
1010
|
+
/**
|
|
1011
|
+
* Process Word/OpenDocument/RTF document via WordProcessor, OpenDocumentProcessor, or RtfProcessor
|
|
1012
|
+
*/
|
|
1013
|
+
static async processDocxFile(content, detection) {
|
|
1014
|
+
const docxFilename = detection.metadata.filename || "document";
|
|
1015
|
+
const ext = detection.extension?.toLowerCase();
|
|
1016
|
+
try {
|
|
1017
|
+
if (ext === "odt") {
|
|
1018
|
+
const { openDocumentProcessor } = await import("../processors/document/OpenDocumentProcessor.js");
|
|
1019
|
+
const odtResult = await openDocumentProcessor.processFile({
|
|
1020
|
+
id: docxFilename,
|
|
1021
|
+
name: docxFilename,
|
|
1022
|
+
mimetype: detection.mimeType || "application/vnd.oasis.opendocument.text",
|
|
1023
|
+
size: content.length,
|
|
1024
|
+
buffer: content,
|
|
1025
|
+
});
|
|
1026
|
+
if (odtResult.success && odtResult.data) {
|
|
1027
|
+
return {
|
|
1028
|
+
type: "docx",
|
|
1029
|
+
content: odtResult.data.textContent ||
|
|
1030
|
+
FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection),
|
|
1031
|
+
mimeType: detection.mimeType,
|
|
1032
|
+
metadata: detection.metadata,
|
|
1033
|
+
};
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
else if (ext === "rtf") {
|
|
1037
|
+
const { rtfProcessor } = await import("../processors/document/RtfProcessor.js");
|
|
1038
|
+
const rtfResult = await rtfProcessor.processFile({
|
|
1039
|
+
id: docxFilename,
|
|
1040
|
+
name: docxFilename,
|
|
1041
|
+
mimetype: detection.mimeType || "application/rtf",
|
|
1042
|
+
size: content.length,
|
|
1043
|
+
buffer: content,
|
|
1044
|
+
});
|
|
1045
|
+
if (rtfResult.success && rtfResult.data) {
|
|
1046
|
+
return {
|
|
1047
|
+
type: "docx",
|
|
1048
|
+
content: rtfResult.data.textContent ||
|
|
1049
|
+
FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection),
|
|
1050
|
+
mimeType: detection.mimeType,
|
|
1051
|
+
metadata: detection.metadata,
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
else {
|
|
1056
|
+
const { wordProcessor } = await import("../processors/document/WordProcessor.js");
|
|
1057
|
+
const docxResult = await wordProcessor.processFile({
|
|
1058
|
+
id: docxFilename,
|
|
1059
|
+
name: docxFilename,
|
|
1060
|
+
mimetype: detection.mimeType ||
|
|
1061
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
1062
|
+
size: content.length,
|
|
1063
|
+
buffer: content,
|
|
1064
|
+
});
|
|
1065
|
+
if (docxResult.success && docxResult.data) {
|
|
1066
|
+
return {
|
|
1067
|
+
type: "docx",
|
|
1068
|
+
content: docxResult.data.textContent ||
|
|
1069
|
+
FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection),
|
|
1070
|
+
mimeType: detection.mimeType,
|
|
1071
|
+
metadata: detection.metadata,
|
|
1072
|
+
};
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
catch (docxError) {
|
|
1077
|
+
logger.warn(`[FileDetector] Document processor failed for ${docxFilename}, using fallback`, docxError instanceof Error ? docxError.message : String(docxError));
|
|
1078
|
+
return {
|
|
1079
|
+
type: "docx",
|
|
1080
|
+
content: FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection, docxError),
|
|
1081
|
+
mimeType: detection.mimeType,
|
|
1082
|
+
metadata: detection.metadata,
|
|
1083
|
+
};
|
|
1084
|
+
}
|
|
1085
|
+
// Fallback if processor returned no data
|
|
1086
|
+
return {
|
|
1087
|
+
type: "docx",
|
|
1088
|
+
content: FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection),
|
|
1089
|
+
mimeType: detection.mimeType,
|
|
1090
|
+
metadata: detection.metadata,
|
|
1091
|
+
};
|
|
1092
|
+
}
|
|
1093
|
+
/**
|
|
1094
|
+
* Process PowerPoint/OpenDocument presentation via PptxProcessor
|
|
1095
|
+
*/
|
|
1096
|
+
static async processPptxFile(content, detection) {
|
|
1097
|
+
const pptxFilename = detection.metadata.filename || "presentation";
|
|
1098
|
+
try {
|
|
1099
|
+
const { PptxProcessor } = await import("../processors/document/PptxProcessor.js");
|
|
1100
|
+
const pptxResult = await PptxProcessor.extractText(content);
|
|
1101
|
+
if (pptxResult) {
|
|
1102
|
+
return {
|
|
1103
|
+
type: "pptx",
|
|
1104
|
+
content: pptxResult,
|
|
1105
|
+
mimeType: detection.mimeType,
|
|
1106
|
+
metadata: detection.metadata,
|
|
1107
|
+
};
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
catch (pptxError) {
|
|
1111
|
+
logger.warn(`[FileDetector] PptxProcessor failed for ${pptxFilename}, using fallback`, pptxError instanceof Error ? pptxError.message : String(pptxError));
|
|
1112
|
+
return {
|
|
1113
|
+
type: "pptx",
|
|
1114
|
+
content: FileDetector.formatInformativePlaceholder("Presentation", pptxFilename, content, detection, pptxError),
|
|
1115
|
+
mimeType: detection.mimeType,
|
|
1116
|
+
metadata: detection.metadata,
|
|
1117
|
+
};
|
|
1118
|
+
}
|
|
1119
|
+
// Fallback if processor returned no content
|
|
1120
|
+
return {
|
|
1121
|
+
type: "pptx",
|
|
1122
|
+
content: FileDetector.formatInformativePlaceholder("Presentation", pptxFilename, content, detection),
|
|
1123
|
+
mimeType: detection.mimeType,
|
|
1124
|
+
metadata: detection.metadata,
|
|
1125
|
+
};
|
|
471
1126
|
}
|
|
472
1127
|
/**
|
|
473
1128
|
* Process SVG file as text content
|
|
@@ -535,7 +1190,7 @@ export class FileDetector {
|
|
|
535
1190
|
* Load file from URL with automatic retry on transient network errors
|
|
536
1191
|
*/
|
|
537
1192
|
static async loadFromURL(url, options) {
|
|
538
|
-
const maxSize = options?.maxSize ||
|
|
1193
|
+
const maxSize = options?.maxSize || 200 * 1024 * 1024; // 200MB default (matches Curator memory-safety cap)
|
|
539
1194
|
const timeout = options?.timeout || FileDetector.DEFAULT_NETWORK_TIMEOUT;
|
|
540
1195
|
const maxRetries = options?.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
541
1196
|
const retryDelay = options?.retryDelay ?? DEFAULT_RETRY_DELAY;
|
|
@@ -565,7 +1220,7 @@ export class FileDetector {
|
|
|
565
1220
|
* Load file from filesystem path
|
|
566
1221
|
*/
|
|
567
1222
|
static async loadFromPath(path, options) {
|
|
568
|
-
const maxSize = options?.maxSize ||
|
|
1223
|
+
const maxSize = options?.maxSize || 200 * 1024 * 1024; // 200MB default (matches Curator memory-safety cap)
|
|
569
1224
|
const statInfo = await stat(path);
|
|
570
1225
|
if (!statInfo.isFile()) {
|
|
571
1226
|
throw new Error("Not a file");
|
|
@@ -610,6 +1265,98 @@ class MagicBytesStrategy {
|
|
|
610
1265
|
if (this.isPDF(input)) {
|
|
611
1266
|
return this.result("pdf", "application/pdf", 95);
|
|
612
1267
|
}
|
|
1268
|
+
// MP4/MOV: "ftyp" at offset 4
|
|
1269
|
+
if (input.length >= 8 &&
|
|
1270
|
+
input[4] === 0x66 &&
|
|
1271
|
+
input[5] === 0x74 &&
|
|
1272
|
+
input[6] === 0x79 &&
|
|
1273
|
+
input[7] === 0x70) {
|
|
1274
|
+
return this.result("video", "video/mp4", 95);
|
|
1275
|
+
}
|
|
1276
|
+
// MKV/WebM: EBML header
|
|
1277
|
+
if (input.length >= 4 &&
|
|
1278
|
+
input[0] === 0x1a &&
|
|
1279
|
+
input[1] === 0x45 &&
|
|
1280
|
+
input[2] === 0xdf &&
|
|
1281
|
+
input[3] === 0xa3) {
|
|
1282
|
+
return this.result("video", "video/x-matroska", 90);
|
|
1283
|
+
}
|
|
1284
|
+
// AVI: "RIFF" + "AVI "
|
|
1285
|
+
if (input.length >= 12 &&
|
|
1286
|
+
input[0] === 0x52 &&
|
|
1287
|
+
input[1] === 0x49 &&
|
|
1288
|
+
input[2] === 0x46 &&
|
|
1289
|
+
input[3] === 0x46 &&
|
|
1290
|
+
input[8] === 0x41 &&
|
|
1291
|
+
input[9] === 0x56 &&
|
|
1292
|
+
input[10] === 0x49 &&
|
|
1293
|
+
input[11] === 0x20) {
|
|
1294
|
+
return this.result("video", "video/x-msvideo", 95);
|
|
1295
|
+
}
|
|
1296
|
+
// WAV: "RIFF" + "WAVE"
|
|
1297
|
+
if (input.length >= 12 &&
|
|
1298
|
+
input[0] === 0x52 &&
|
|
1299
|
+
input[1] === 0x49 &&
|
|
1300
|
+
input[2] === 0x46 &&
|
|
1301
|
+
input[3] === 0x46 &&
|
|
1302
|
+
input[8] === 0x57 &&
|
|
1303
|
+
input[9] === 0x41 &&
|
|
1304
|
+
input[10] === 0x56 &&
|
|
1305
|
+
input[11] === 0x45) {
|
|
1306
|
+
return this.result("audio", "audio/wav", 95);
|
|
1307
|
+
}
|
|
1308
|
+
// MP3: ID3 tag
|
|
1309
|
+
if (input.length >= 3 &&
|
|
1310
|
+
input[0] === 0x49 &&
|
|
1311
|
+
input[1] === 0x44 &&
|
|
1312
|
+
input[2] === 0x33) {
|
|
1313
|
+
return this.result("audio", "audio/mpeg", 95);
|
|
1314
|
+
}
|
|
1315
|
+
// MP3: sync word
|
|
1316
|
+
if (input.length >= 2 && input[0] === 0xff && (input[1] & 0xe0) === 0xe0) {
|
|
1317
|
+
return this.result("audio", "audio/mpeg", 80);
|
|
1318
|
+
}
|
|
1319
|
+
// FLAC: "fLaC"
|
|
1320
|
+
if (input.length >= 4 &&
|
|
1321
|
+
input[0] === 0x66 &&
|
|
1322
|
+
input[1] === 0x4c &&
|
|
1323
|
+
input[2] === 0x61 &&
|
|
1324
|
+
input[3] === 0x43) {
|
|
1325
|
+
return this.result("audio", "audio/flac", 95);
|
|
1326
|
+
}
|
|
1327
|
+
// OGG: "OggS"
|
|
1328
|
+
if (input.length >= 4 &&
|
|
1329
|
+
input[0] === 0x4f &&
|
|
1330
|
+
input[1] === 0x67 &&
|
|
1331
|
+
input[2] === 0x67 &&
|
|
1332
|
+
input[3] === 0x53) {
|
|
1333
|
+
return this.result("audio", "audio/ogg", 90);
|
|
1334
|
+
}
|
|
1335
|
+
// ZIP: "PK\x03\x04"
|
|
1336
|
+
// NOTE: Many document formats (OOXML: .xlsx, .docx, .pptx; ODF: .odt, .ods)
|
|
1337
|
+
// are internally ZIP archives and share these magic bytes. We return a lower
|
|
1338
|
+
// confidence (70%) so the ExtensionStrategy (85%) can override with the correct
|
|
1339
|
+
// document type when a file path with extension is available. For raw buffers
|
|
1340
|
+
// without path info, this falls through to archive as a safe default.
|
|
1341
|
+
if (input.length >= 4 &&
|
|
1342
|
+
input[0] === 0x50 &&
|
|
1343
|
+
input[1] === 0x4b &&
|
|
1344
|
+
input[2] === 0x03 &&
|
|
1345
|
+
input[3] === 0x04) {
|
|
1346
|
+
return this.result("archive", "application/zip", 70);
|
|
1347
|
+
}
|
|
1348
|
+
// GZIP: 1F 8B
|
|
1349
|
+
if (input.length >= 2 && input[0] === 0x1f && input[1] === 0x8b) {
|
|
1350
|
+
return this.result("archive", "application/gzip", 90);
|
|
1351
|
+
}
|
|
1352
|
+
// RAR: "Rar!"
|
|
1353
|
+
if (input.length >= 4 &&
|
|
1354
|
+
input[0] === 0x52 &&
|
|
1355
|
+
input[1] === 0x61 &&
|
|
1356
|
+
input[2] === 0x72 &&
|
|
1357
|
+
input[3] === 0x21) {
|
|
1358
|
+
return this.result("archive", "application/x-rar-compressed", 95);
|
|
1359
|
+
}
|
|
613
1360
|
return this.unknown();
|
|
614
1361
|
}
|
|
615
1362
|
isPNG(buf) {
|
|
@@ -687,24 +1434,95 @@ class MimeTypeStrategy {
|
|
|
687
1434
|
}
|
|
688
1435
|
}
|
|
689
1436
|
mimeToFileType(mime) {
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
if (mime.includes("text/tab-separated-values")) {
|
|
1437
|
+
const lower = mime.toLowerCase().split(";")[0].trim();
|
|
1438
|
+
// CSV
|
|
1439
|
+
if (lower === "text/csv" || lower === "text/tab-separated-values") {
|
|
694
1440
|
return "csv";
|
|
695
1441
|
}
|
|
696
1442
|
// SVG is processed as text/markup, NOT as image
|
|
697
1443
|
// Must check before generic image/ check
|
|
698
|
-
if (
|
|
1444
|
+
if (lower === "image/svg+xml") {
|
|
699
1445
|
return "svg";
|
|
700
1446
|
}
|
|
701
|
-
|
|
1447
|
+
// Images
|
|
1448
|
+
if (lower.startsWith("image/")) {
|
|
702
1449
|
return "image";
|
|
703
1450
|
}
|
|
704
|
-
|
|
1451
|
+
// PDF
|
|
1452
|
+
if (lower === "application/pdf") {
|
|
705
1453
|
return "pdf";
|
|
706
1454
|
}
|
|
707
|
-
|
|
1455
|
+
// Video
|
|
1456
|
+
if (lower.startsWith("video/")) {
|
|
1457
|
+
return "video";
|
|
1458
|
+
}
|
|
1459
|
+
// Audio
|
|
1460
|
+
if (lower.startsWith("audio/")) {
|
|
1461
|
+
return "audio";
|
|
1462
|
+
}
|
|
1463
|
+
// Office documents — OOXML
|
|
1464
|
+
if (lower ===
|
|
1465
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
|
|
1466
|
+
lower === "application/msword") {
|
|
1467
|
+
return "docx";
|
|
1468
|
+
}
|
|
1469
|
+
if (lower ===
|
|
1470
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ||
|
|
1471
|
+
lower === "application/vnd.ms-excel") {
|
|
1472
|
+
return "xlsx";
|
|
1473
|
+
}
|
|
1474
|
+
if (lower ===
|
|
1475
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation" ||
|
|
1476
|
+
lower === "application/vnd.ms-powerpoint") {
|
|
1477
|
+
return "pptx";
|
|
1478
|
+
}
|
|
1479
|
+
// OpenDocument formats
|
|
1480
|
+
if (lower === "application/vnd.oasis.opendocument.text") {
|
|
1481
|
+
return "docx";
|
|
1482
|
+
}
|
|
1483
|
+
if (lower === "application/vnd.oasis.opendocument.spreadsheet") {
|
|
1484
|
+
return "xlsx";
|
|
1485
|
+
}
|
|
1486
|
+
if (lower === "application/vnd.oasis.opendocument.presentation") {
|
|
1487
|
+
return "pptx";
|
|
1488
|
+
}
|
|
1489
|
+
// RTF
|
|
1490
|
+
if (lower === "application/rtf" || lower === "text/rtf") {
|
|
1491
|
+
return "docx";
|
|
1492
|
+
}
|
|
1493
|
+
// Archive formats
|
|
1494
|
+
if (lower === "application/zip" ||
|
|
1495
|
+
lower === "application/x-zip-compressed" ||
|
|
1496
|
+
lower === "application/gzip" ||
|
|
1497
|
+
lower === "application/x-gzip" ||
|
|
1498
|
+
lower === "application/x-tar" ||
|
|
1499
|
+
lower === "application/x-compressed-tar" ||
|
|
1500
|
+
lower === "application/java-archive" ||
|
|
1501
|
+
lower === "application/x-rar-compressed" ||
|
|
1502
|
+
lower === "application/vnd.rar" ||
|
|
1503
|
+
lower === "application/x-7z-compressed") {
|
|
1504
|
+
return "archive";
|
|
1505
|
+
}
|
|
1506
|
+
// Text/markup/source code — broad matching
|
|
1507
|
+
if (lower === "text/plain" ||
|
|
1508
|
+
lower === "text/markdown" ||
|
|
1509
|
+
lower === "text/html" ||
|
|
1510
|
+
lower === "text/css" ||
|
|
1511
|
+
lower === "text/javascript" ||
|
|
1512
|
+
lower === "text/typescript" ||
|
|
1513
|
+
lower === "application/json" ||
|
|
1514
|
+
lower === "application/xml" ||
|
|
1515
|
+
lower === "text/xml" ||
|
|
1516
|
+
lower === "application/yaml" ||
|
|
1517
|
+
lower === "application/x-yaml") {
|
|
1518
|
+
return "text";
|
|
1519
|
+
}
|
|
1520
|
+
// Source code MIME types (text/x-*)
|
|
1521
|
+
if (lower.startsWith("text/x-")) {
|
|
1522
|
+
return "text";
|
|
1523
|
+
}
|
|
1524
|
+
// Generic text types we may not have listed explicitly
|
|
1525
|
+
if (lower.startsWith("text/")) {
|
|
708
1526
|
return "text";
|
|
709
1527
|
}
|
|
710
1528
|
return "unknown";
|
|
@@ -751,18 +1569,109 @@ class ExtensionStrategy {
|
|
|
751
1569
|
svg: "svg",
|
|
752
1570
|
avif: "image",
|
|
753
1571
|
pdf: "pdf",
|
|
1572
|
+
// Video formats
|
|
1573
|
+
mp4: "video",
|
|
1574
|
+
mkv: "video",
|
|
1575
|
+
mov: "video",
|
|
1576
|
+
avi: "video",
|
|
1577
|
+
webm: "video",
|
|
1578
|
+
wmv: "video",
|
|
1579
|
+
flv: "video",
|
|
1580
|
+
// Audio formats
|
|
1581
|
+
mp3: "audio",
|
|
1582
|
+
wav: "audio",
|
|
1583
|
+
ogg: "audio",
|
|
1584
|
+
flac: "audio",
|
|
1585
|
+
m4a: "audio",
|
|
1586
|
+
aac: "audio",
|
|
1587
|
+
wma: "audio",
|
|
1588
|
+
opus: "audio",
|
|
1589
|
+
// Archive formats
|
|
1590
|
+
zip: "archive",
|
|
1591
|
+
tar: "archive",
|
|
1592
|
+
gz: "archive",
|
|
1593
|
+
tgz: "archive",
|
|
1594
|
+
rar: "archive",
|
|
1595
|
+
"7z": "archive",
|
|
1596
|
+
jar: "archive",
|
|
1597
|
+
// Document formats (ZIP-based internally)
|
|
1598
|
+
xlsx: "xlsx",
|
|
1599
|
+
xls: "xlsx",
|
|
1600
|
+
docx: "docx",
|
|
1601
|
+
doc: "docx",
|
|
1602
|
+
pptx: "pptx",
|
|
1603
|
+
ppt: "pptx",
|
|
1604
|
+
odt: "docx", // OpenDocument text → processed like docx
|
|
1605
|
+
ods: "xlsx", // OpenDocument spreadsheet → processed like xlsx
|
|
1606
|
+
odp: "pptx", // OpenDocument presentation → processed like pptx
|
|
1607
|
+
rtf: "docx", // RTF → processed like docx (text extraction)
|
|
1608
|
+
// Text/markup formats
|
|
754
1609
|
txt: "text",
|
|
755
1610
|
md: "text",
|
|
1611
|
+
markdown: "text",
|
|
756
1612
|
json: "text",
|
|
757
1613
|
xml: "text",
|
|
758
1614
|
yaml: "text",
|
|
759
1615
|
yml: "text",
|
|
760
1616
|
html: "text",
|
|
761
1617
|
htm: "text",
|
|
1618
|
+
css: "text",
|
|
762
1619
|
log: "text",
|
|
763
1620
|
conf: "text",
|
|
764
1621
|
cfg: "text",
|
|
765
1622
|
ini: "text",
|
|
1623
|
+
env: "text",
|
|
1624
|
+
toml: "text",
|
|
1625
|
+
properties: "text",
|
|
1626
|
+
gitignore: "text",
|
|
1627
|
+
dockerignore: "text",
|
|
1628
|
+
editorconfig: "text",
|
|
1629
|
+
prettierrc: "text",
|
|
1630
|
+
eslintrc: "text",
|
|
1631
|
+
babelrc: "text",
|
|
1632
|
+
// Source code formats
|
|
1633
|
+
js: "text",
|
|
1634
|
+
mjs: "text",
|
|
1635
|
+
cjs: "text",
|
|
1636
|
+
jsx: "text",
|
|
1637
|
+
ts: "text",
|
|
1638
|
+
tsx: "text",
|
|
1639
|
+
py: "text",
|
|
1640
|
+
java: "text",
|
|
1641
|
+
go: "text",
|
|
1642
|
+
rs: "text",
|
|
1643
|
+
rb: "text",
|
|
1644
|
+
php: "text",
|
|
1645
|
+
c: "text",
|
|
1646
|
+
cpp: "text",
|
|
1647
|
+
cc: "text",
|
|
1648
|
+
h: "text",
|
|
1649
|
+
hpp: "text",
|
|
1650
|
+
cs: "text",
|
|
1651
|
+
swift: "text",
|
|
1652
|
+
kt: "text",
|
|
1653
|
+
kts: "text",
|
|
1654
|
+
scala: "text",
|
|
1655
|
+
sh: "text",
|
|
1656
|
+
bash: "text",
|
|
1657
|
+
zsh: "text",
|
|
1658
|
+
ps1: "text",
|
|
1659
|
+
sql: "text",
|
|
1660
|
+
r: "text",
|
|
1661
|
+
lua: "text",
|
|
1662
|
+
pl: "text",
|
|
1663
|
+
perl: "text",
|
|
1664
|
+
dart: "text",
|
|
1665
|
+
ex: "text",
|
|
1666
|
+
exs: "text",
|
|
1667
|
+
erl: "text",
|
|
1668
|
+
hs: "text",
|
|
1669
|
+
clj: "text",
|
|
1670
|
+
lisp: "text",
|
|
1671
|
+
vim: "text",
|
|
1672
|
+
// Additional video/image
|
|
1673
|
+
m4v: "video",
|
|
1674
|
+
ico: "image",
|
|
766
1675
|
};
|
|
767
1676
|
const type = typeMap[ext.toLowerCase()];
|
|
768
1677
|
return {
|
|
@@ -809,18 +1718,109 @@ class ExtensionStrategy {
|
|
|
809
1718
|
svg: "image/svg+xml",
|
|
810
1719
|
avif: "image/avif",
|
|
811
1720
|
pdf: "application/pdf",
|
|
1721
|
+
// Video MIME types
|
|
1722
|
+
mp4: "video/mp4",
|
|
1723
|
+
mkv: "video/x-matroska",
|
|
1724
|
+
mov: "video/quicktime",
|
|
1725
|
+
avi: "video/x-msvideo",
|
|
1726
|
+
webm: "video/webm",
|
|
1727
|
+
wmv: "video/x-ms-wmv",
|
|
1728
|
+
flv: "video/x-flv",
|
|
1729
|
+
// Audio MIME types
|
|
1730
|
+
mp3: "audio/mpeg",
|
|
1731
|
+
wav: "audio/wav",
|
|
1732
|
+
ogg: "audio/ogg",
|
|
1733
|
+
flac: "audio/flac",
|
|
1734
|
+
m4a: "audio/mp4",
|
|
1735
|
+
aac: "audio/aac",
|
|
1736
|
+
wma: "audio/x-ms-wma",
|
|
1737
|
+
opus: "audio/opus",
|
|
1738
|
+
// Archive MIME types
|
|
1739
|
+
zip: "application/zip",
|
|
1740
|
+
tar: "application/x-tar",
|
|
1741
|
+
gz: "application/gzip",
|
|
1742
|
+
tgz: "application/gzip",
|
|
1743
|
+
rar: "application/x-rar-compressed",
|
|
1744
|
+
"7z": "application/x-7z-compressed",
|
|
1745
|
+
jar: "application/java-archive",
|
|
1746
|
+
// Document MIME types
|
|
1747
|
+
xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
1748
|
+
xls: "application/vnd.ms-excel",
|
|
1749
|
+
docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
1750
|
+
doc: "application/msword",
|
|
1751
|
+
pptx: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
1752
|
+
ppt: "application/vnd.ms-powerpoint",
|
|
1753
|
+
odt: "application/vnd.oasis.opendocument.text",
|
|
1754
|
+
ods: "application/vnd.oasis.opendocument.spreadsheet",
|
|
1755
|
+
odp: "application/vnd.oasis.opendocument.presentation",
|
|
1756
|
+
rtf: "application/rtf",
|
|
1757
|
+
// Text/markup MIME types
|
|
812
1758
|
txt: "text/plain",
|
|
813
1759
|
md: "text/markdown",
|
|
1760
|
+
markdown: "text/markdown",
|
|
814
1761
|
json: "application/json",
|
|
815
1762
|
xml: "application/xml",
|
|
816
1763
|
yaml: "application/yaml",
|
|
817
1764
|
yml: "application/yaml",
|
|
818
1765
|
html: "text/html",
|
|
819
1766
|
htm: "text/html",
|
|
1767
|
+
css: "text/css",
|
|
820
1768
|
log: "text/plain",
|
|
821
1769
|
conf: "text/plain",
|
|
822
1770
|
cfg: "text/plain",
|
|
823
1771
|
ini: "text/plain",
|
|
1772
|
+
env: "text/plain",
|
|
1773
|
+
toml: "text/plain",
|
|
1774
|
+
properties: "text/plain",
|
|
1775
|
+
gitignore: "text/plain",
|
|
1776
|
+
dockerignore: "text/plain",
|
|
1777
|
+
editorconfig: "text/plain",
|
|
1778
|
+
prettierrc: "application/json",
|
|
1779
|
+
eslintrc: "application/json",
|
|
1780
|
+
babelrc: "application/json",
|
|
1781
|
+
// Source code MIME types
|
|
1782
|
+
js: "text/javascript",
|
|
1783
|
+
mjs: "text/javascript",
|
|
1784
|
+
cjs: "text/javascript",
|
|
1785
|
+
jsx: "text/javascript",
|
|
1786
|
+
ts: "text/typescript",
|
|
1787
|
+
tsx: "text/typescript",
|
|
1788
|
+
py: "text/x-python",
|
|
1789
|
+
java: "text/x-java-source",
|
|
1790
|
+
go: "text/x-go",
|
|
1791
|
+
rs: "text/x-rustsrc",
|
|
1792
|
+
rb: "text/x-ruby",
|
|
1793
|
+
php: "text/x-php",
|
|
1794
|
+
c: "text/x-c",
|
|
1795
|
+
cpp: "text/x-c++",
|
|
1796
|
+
cc: "text/x-c++",
|
|
1797
|
+
h: "text/x-c",
|
|
1798
|
+
hpp: "text/x-c++",
|
|
1799
|
+
cs: "text/x-csharp",
|
|
1800
|
+
swift: "text/x-swift",
|
|
1801
|
+
kt: "text/x-kotlin",
|
|
1802
|
+
kts: "text/x-kotlin",
|
|
1803
|
+
scala: "text/x-scala",
|
|
1804
|
+
sh: "text/x-shellscript",
|
|
1805
|
+
bash: "text/x-shellscript",
|
|
1806
|
+
zsh: "text/x-shellscript",
|
|
1807
|
+
ps1: "text/x-powershell",
|
|
1808
|
+
sql: "text/x-sql",
|
|
1809
|
+
r: "text/x-r",
|
|
1810
|
+
lua: "text/x-lua",
|
|
1811
|
+
pl: "text/x-perl",
|
|
1812
|
+
perl: "text/x-perl",
|
|
1813
|
+
dart: "text/x-dart",
|
|
1814
|
+
ex: "text/x-elixir",
|
|
1815
|
+
exs: "text/x-elixir",
|
|
1816
|
+
erl: "text/x-erlang",
|
|
1817
|
+
hs: "text/x-haskell",
|
|
1818
|
+
clj: "text/x-clojure",
|
|
1819
|
+
lisp: "text/x-lisp",
|
|
1820
|
+
vim: "text/plain",
|
|
1821
|
+
// Additional video/image
|
|
1822
|
+
m4v: "video/mp4",
|
|
1823
|
+
ico: "image/x-icon",
|
|
824
1824
|
};
|
|
825
1825
|
return mimeMap[ext.toLowerCase()] || "application/octet-stream";
|
|
826
1826
|
}
|