@juspay/neurolink 9.5.2 → 9.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +29 -25
- package/dist/agent/directTools.d.ts +5 -5
- package/dist/cli/commands/config.d.ts +9 -9
- package/dist/cli/commands/serve.d.ts +37 -0
- package/dist/cli/commands/serve.js +302 -229
- package/dist/cli/commands/setup-anthropic.d.ts +2 -2
- package/dist/cli/commands/setup-azure.d.ts +2 -2
- package/dist/cli/commands/setup-bedrock.d.ts +2 -2
- package/dist/cli/commands/setup-gcp.d.ts +2 -2
- package/dist/cli/commands/setup-google-ai.d.ts +2 -2
- package/dist/cli/commands/setup-huggingface.d.ts +2 -2
- package/dist/cli/commands/setup-mistral.d.ts +2 -2
- package/dist/cli/commands/setup-openai.d.ts +2 -2
- package/dist/cli/commands/setup.d.ts +2 -2
- package/dist/cli/factories/commandFactory.js +16 -2
- package/dist/cli/loop/optionsSchema.d.ts +2 -2
- package/dist/cli/loop/session.d.ts +4 -0
- package/dist/cli/loop/session.js +49 -4
- package/dist/cli/utils/interactiveSetup.d.ts +4 -4
- package/dist/config/conversationMemory.d.ts +2 -0
- package/dist/config/conversationMemory.js +5 -5
- package/dist/constants/contextWindows.d.ts +46 -0
- package/dist/constants/contextWindows.js +156 -0
- package/dist/context/budgetChecker.d.ts +18 -0
- package/dist/context/budgetChecker.js +71 -0
- package/dist/context/contextCompactor.d.ts +22 -0
- package/dist/context/contextCompactor.js +106 -0
- package/dist/context/effectiveHistory.d.ts +52 -0
- package/dist/context/effectiveHistory.js +105 -0
- package/dist/context/errorDetection.d.ts +14 -0
- package/dist/context/errorDetection.js +124 -0
- package/dist/context/fileSummarizationService.d.ts +54 -0
- package/dist/context/fileSummarizationService.js +255 -0
- package/dist/context/fileSummarizer.d.ts +56 -0
- package/dist/context/fileSummarizer.js +145 -0
- package/dist/context/fileTokenBudget.d.ts +53 -0
- package/dist/context/fileTokenBudget.js +127 -0
- package/dist/context/prompts/summarizationPrompt.d.ts +17 -0
- package/dist/context/prompts/summarizationPrompt.js +110 -0
- package/dist/context/stages/fileReadDeduplicator.d.ts +10 -0
- package/dist/context/stages/fileReadDeduplicator.js +66 -0
- package/dist/context/stages/slidingWindowTruncator.d.ts +11 -0
- package/dist/context/stages/slidingWindowTruncator.js +42 -0
- package/dist/context/stages/structuredSummarizer.d.ts +10 -0
- package/dist/context/stages/structuredSummarizer.js +49 -0
- package/dist/context/stages/toolOutputPruner.d.ts +10 -0
- package/dist/context/stages/toolOutputPruner.js +52 -0
- package/dist/context/summarizationEngine.d.ts +45 -0
- package/dist/context/summarizationEngine.js +110 -0
- package/dist/context/toolOutputLimits.d.ts +17 -0
- package/dist/context/toolOutputLimits.js +84 -0
- package/dist/context/toolPairRepair.d.ts +16 -0
- package/dist/context/toolPairRepair.js +66 -0
- package/dist/core/conversationMemoryManager.d.ts +5 -15
- package/dist/core/conversationMemoryManager.js +15 -75
- package/dist/core/modules/MessageBuilder.d.ts +1 -1
- package/dist/core/modules/MessageBuilder.js +2 -0
- package/dist/core/modules/TelemetryHandler.d.ts +2 -3
- package/dist/core/modules/TelemetryHandler.js +3 -3
- package/dist/core/modules/ToolsManager.d.ts +2 -2
- package/dist/core/redisConversationMemoryManager.d.ts +8 -14
- package/dist/core/redisConversationMemoryManager.js +69 -78
- package/dist/factories/providerFactory.d.ts +2 -2
- package/dist/files/fileReferenceRegistry.d.ts +276 -0
- package/dist/files/fileReferenceRegistry.js +1543 -0
- package/dist/files/fileTools.d.ts +423 -0
- package/dist/files/fileTools.js +449 -0
- package/dist/files/index.d.ts +14 -0
- package/dist/files/index.js +13 -0
- package/dist/files/streamingReader.d.ts +93 -0
- package/dist/files/streamingReader.js +321 -0
- package/dist/files/types.d.ts +23 -0
- package/dist/files/types.js +23 -0
- package/dist/image-gen/imageGenTools.d.ts +2 -2
- package/dist/image-gen/types.d.ts +12 -12
- package/dist/lib/agent/directTools.d.ts +7 -7
- package/dist/lib/config/conversationMemory.d.ts +2 -0
- package/dist/lib/config/conversationMemory.js +5 -5
- package/dist/lib/constants/contextWindows.d.ts +46 -0
- package/dist/lib/constants/contextWindows.js +157 -0
- package/dist/lib/context/budgetChecker.d.ts +18 -0
- package/dist/lib/context/budgetChecker.js +72 -0
- package/dist/lib/context/contextCompactor.d.ts +22 -0
- package/dist/lib/context/contextCompactor.js +107 -0
- package/dist/lib/context/effectiveHistory.d.ts +52 -0
- package/dist/lib/context/effectiveHistory.js +106 -0
- package/dist/lib/context/errorDetection.d.ts +14 -0
- package/dist/lib/context/errorDetection.js +125 -0
- package/dist/lib/context/fileSummarizationService.d.ts +54 -0
- package/dist/lib/context/fileSummarizationService.js +256 -0
- package/dist/lib/context/fileSummarizer.d.ts +56 -0
- package/dist/lib/context/fileSummarizer.js +146 -0
- package/dist/lib/context/fileTokenBudget.d.ts +53 -0
- package/dist/lib/context/fileTokenBudget.js +128 -0
- package/dist/lib/context/prompts/summarizationPrompt.d.ts +17 -0
- package/dist/lib/context/prompts/summarizationPrompt.js +111 -0
- package/dist/lib/context/stages/fileReadDeduplicator.d.ts +10 -0
- package/dist/lib/context/stages/fileReadDeduplicator.js +67 -0
- package/dist/lib/context/stages/slidingWindowTruncator.d.ts +11 -0
- package/dist/lib/context/stages/slidingWindowTruncator.js +43 -0
- package/dist/lib/context/stages/structuredSummarizer.d.ts +10 -0
- package/dist/lib/context/stages/structuredSummarizer.js +50 -0
- package/dist/lib/context/stages/toolOutputPruner.d.ts +10 -0
- package/dist/lib/context/stages/toolOutputPruner.js +53 -0
- package/dist/lib/context/summarizationEngine.d.ts +45 -0
- package/dist/lib/context/summarizationEngine.js +111 -0
- package/dist/lib/context/toolOutputLimits.d.ts +17 -0
- package/dist/lib/context/toolOutputLimits.js +85 -0
- package/dist/lib/context/toolPairRepair.d.ts +16 -0
- package/dist/lib/context/toolPairRepair.js +67 -0
- package/dist/lib/core/conversationMemoryManager.d.ts +5 -15
- package/dist/lib/core/conversationMemoryManager.js +15 -75
- package/dist/lib/core/modules/MessageBuilder.d.ts +1 -1
- package/dist/lib/core/modules/MessageBuilder.js +2 -0
- package/dist/lib/core/modules/TelemetryHandler.d.ts +2 -3
- package/dist/lib/core/modules/TelemetryHandler.js +3 -3
- package/dist/lib/core/modules/ToolsManager.d.ts +2 -2
- package/dist/lib/core/redisConversationMemoryManager.d.ts +8 -14
- package/dist/lib/core/redisConversationMemoryManager.js +69 -78
- package/dist/lib/factories/providerFactory.d.ts +2 -2
- package/dist/lib/files/fileReferenceRegistry.d.ts +276 -0
- package/dist/lib/files/fileReferenceRegistry.js +1544 -0
- package/dist/lib/files/fileTools.d.ts +423 -0
- package/dist/lib/files/fileTools.js +450 -0
- package/dist/lib/files/index.d.ts +14 -0
- package/dist/lib/files/index.js +14 -0
- package/dist/lib/files/streamingReader.d.ts +93 -0
- package/dist/lib/files/streamingReader.js +322 -0
- package/dist/lib/files/types.d.ts +23 -0
- package/dist/lib/files/types.js +24 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +2 -2
- package/dist/lib/image-gen/types.d.ts +12 -12
- package/dist/lib/memory/mem0Initializer.d.ts +2 -2
- package/dist/lib/neurolink.d.ts +61 -2
- package/dist/lib/neurolink.js +619 -307
- package/dist/lib/processors/archive/ArchiveProcessor.d.ts +327 -0
- package/dist/lib/processors/archive/ArchiveProcessor.js +1309 -0
- package/dist/lib/processors/archive/index.d.ts +33 -0
- package/dist/lib/processors/archive/index.js +43 -0
- package/dist/lib/processors/base/types.d.ts +70 -64
- package/dist/lib/processors/base/types.js +6 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +8 -8
- package/dist/lib/processors/cli/fileProcessorCli.js +5 -5
- package/dist/lib/processors/config/mimeTypes.js +25 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +52 -40
- package/dist/lib/processors/config/sizeLimits.js +56 -44
- package/dist/lib/processors/document/ExcelProcessor.d.ts +14 -0
- package/dist/lib/processors/document/ExcelProcessor.js +72 -1
- package/dist/lib/processors/document/PptxProcessor.d.ts +63 -0
- package/dist/lib/processors/document/PptxProcessor.js +158 -0
- package/dist/lib/processors/document/index.d.ts +1 -0
- package/dist/lib/processors/document/index.js +6 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +2 -2
- package/dist/lib/processors/errors/errorHelpers.d.ts +2 -2
- package/dist/lib/processors/errors/errorSerializer.d.ts +4 -4
- package/dist/lib/processors/index.d.ts +8 -2
- package/dist/lib/processors/index.js +5 -2
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +8 -8
- package/dist/lib/processors/integration/FileProcessorIntegration.js +7 -7
- package/dist/lib/processors/media/AudioProcessor.d.ts +328 -0
- package/dist/lib/processors/media/AudioProcessor.js +708 -0
- package/dist/lib/processors/media/VideoProcessor.d.ts +350 -0
- package/dist/lib/processors/media/VideoProcessor.js +992 -0
- package/dist/lib/processors/media/index.d.ts +27 -0
- package/dist/lib/processors/media/index.js +37 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +19 -5
- package/dist/lib/processors/registry/ProcessorRegistry.js +103 -8
- package/dist/lib/processors/registry/index.d.ts +1 -1
- package/dist/lib/processors/registry/index.js +1 -1
- package/dist/lib/processors/registry/types.d.ts +2 -2
- package/dist/lib/providers/googleAiStudio.d.ts +34 -0
- package/dist/lib/providers/googleAiStudio.js +267 -397
- package/dist/lib/providers/googleVertex.d.ts +55 -1
- package/dist/lib/providers/googleVertex.js +452 -719
- package/dist/lib/providers/sagemaker/detection.d.ts +6 -6
- package/dist/lib/providers/sagemaker/diagnostics.d.ts +4 -4
- package/dist/lib/providers/sagemaker/parsers.d.ts +4 -4
- package/dist/lib/rag/chunkers/RecursiveChunker.js +2 -2
- package/dist/lib/rag/document/loaders.d.ts +6 -71
- package/dist/lib/rag/document/loaders.js +5 -5
- package/dist/lib/rag/graphRag/graphRAG.js +26 -9
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
- package/dist/lib/rag/metadata/metadataExtractor.js +6 -3
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +8 -126
- package/dist/lib/rag/pipeline/RAGPipeline.js +11 -11
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +3 -42
- package/dist/lib/rag/pipeline/contextAssembly.js +6 -3
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +5 -60
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +3 -33
- package/dist/lib/rag/resilience/RetryHandler.d.ts +2 -21
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +3 -41
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +2 -13
- package/dist/lib/rag/retrieval/vectorQueryTool.js +4 -3
- package/dist/lib/rag/types.d.ts +3 -3
- package/dist/lib/sdk/toolRegistration.d.ts +2 -2
- package/dist/lib/server/middleware/cache.d.ts +2 -2
- package/dist/lib/server/middleware/rateLimit.d.ts +2 -2
- package/dist/lib/server/routes/mcpRoutes.js +277 -249
- package/dist/lib/server/routes/memoryRoutes.js +287 -281
- package/dist/lib/server/utils/validation.d.ts +10 -10
- package/dist/lib/session/globalSessionState.d.ts +2 -2
- package/dist/lib/telemetry/telemetryService.d.ts +2 -2
- package/dist/lib/types/common.d.ts +39 -0
- package/dist/lib/types/contextTypes.d.ts +255 -0
- package/dist/lib/types/contextTypes.js +0 -2
- package/dist/lib/types/conversation.d.ts +62 -0
- package/dist/lib/types/conversationMemoryInterface.d.ts +27 -0
- package/dist/lib/types/conversationMemoryInterface.js +7 -0
- package/dist/lib/types/fileReferenceTypes.d.ts +222 -0
- package/dist/lib/types/fileReferenceTypes.js +9 -0
- package/dist/lib/types/fileTypes.d.ts +26 -3
- package/dist/lib/types/generateTypes.d.ts +22 -1
- package/dist/lib/types/index.d.ts +4 -5
- package/dist/lib/types/index.js +8 -10
- package/dist/lib/types/modelTypes.d.ts +2 -2
- package/dist/lib/types/processorTypes.d.ts +597 -0
- package/dist/lib/types/processorTypes.js +91 -0
- package/dist/lib/types/ragTypes.d.ts +481 -0
- package/dist/lib/types/ragTypes.js +8 -0
- package/dist/lib/types/sdkTypes.d.ts +17 -18
- package/dist/lib/types/streamTypes.d.ts +11 -1
- package/dist/lib/utils/async/retry.d.ts +2 -2
- package/dist/lib/utils/async/withTimeout.js +3 -1
- package/dist/lib/utils/conversationMemory.d.ts +12 -6
- package/dist/lib/utils/conversationMemory.js +76 -36
- package/dist/lib/utils/fileDetector.d.ts +62 -0
- package/dist/lib/utils/fileDetector.js +1014 -14
- package/dist/lib/utils/json/safeParse.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +806 -153
- package/dist/lib/utils/modelChoices.d.ts +2 -2
- package/dist/lib/utils/multimodalOptionsBuilder.d.ts +2 -1
- package/dist/lib/utils/multimodalOptionsBuilder.js +1 -0
- package/dist/lib/utils/rateLimiter.d.ts +2 -2
- package/dist/lib/utils/sanitizers/filename.d.ts +4 -4
- package/dist/lib/utils/sanitizers/svg.d.ts +2 -2
- package/dist/lib/utils/thinkingConfig.d.ts +6 -6
- package/dist/lib/utils/tokenEstimation.d.ts +68 -0
- package/dist/lib/utils/tokenEstimation.js +113 -0
- package/dist/lib/utils/tokenUtils.d.ts +4 -4
- package/dist/lib/utils/ttsProcessor.d.ts +2 -2
- package/dist/lib/workflow/config.d.ts +150 -150
- package/dist/memory/mem0Initializer.d.ts +2 -2
- package/dist/neurolink.d.ts +61 -2
- package/dist/neurolink.js +619 -307
- package/dist/processors/archive/ArchiveProcessor.d.ts +327 -0
- package/dist/processors/archive/ArchiveProcessor.js +1308 -0
- package/dist/processors/archive/index.d.ts +33 -0
- package/dist/processors/archive/index.js +42 -0
- package/dist/processors/base/types.d.ts +70 -64
- package/dist/processors/base/types.js +6 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +8 -8
- package/dist/processors/cli/fileProcessorCli.js +5 -5
- package/dist/processors/config/mimeTypes.js +25 -0
- package/dist/processors/config/sizeLimits.d.ts +52 -40
- package/dist/processors/config/sizeLimits.js +56 -44
- package/dist/processors/document/ExcelProcessor.d.ts +14 -0
- package/dist/processors/document/ExcelProcessor.js +72 -1
- package/dist/processors/document/PptxProcessor.d.ts +63 -0
- package/dist/processors/document/PptxProcessor.js +157 -0
- package/dist/processors/document/index.d.ts +1 -0
- package/dist/processors/document/index.js +6 -0
- package/dist/processors/errors/FileErrorCode.d.ts +2 -2
- package/dist/processors/errors/errorHelpers.d.ts +2 -2
- package/dist/processors/errors/errorSerializer.d.ts +4 -4
- package/dist/processors/index.d.ts +8 -2
- package/dist/processors/index.js +5 -2
- package/dist/processors/integration/FileProcessorIntegration.d.ts +8 -8
- package/dist/processors/integration/FileProcessorIntegration.js +7 -7
- package/dist/processors/media/AudioProcessor.d.ts +328 -0
- package/dist/processors/media/AudioProcessor.js +707 -0
- package/dist/processors/media/VideoProcessor.d.ts +350 -0
- package/dist/processors/media/VideoProcessor.js +991 -0
- package/dist/processors/media/ffprobe-static.d.ts +4 -0
- package/dist/processors/media/index.d.ts +27 -0
- package/dist/processors/media/index.js +36 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +19 -5
- package/dist/processors/registry/ProcessorRegistry.js +103 -8
- package/dist/processors/registry/index.d.ts +1 -1
- package/dist/processors/registry/index.js +1 -1
- package/dist/processors/registry/types.d.ts +2 -2
- package/dist/providers/googleAiStudio.d.ts +34 -0
- package/dist/providers/googleAiStudio.js +267 -397
- package/dist/providers/googleVertex.d.ts +55 -1
- package/dist/providers/googleVertex.js +452 -719
- package/dist/providers/sagemaker/detection.d.ts +6 -6
- package/dist/providers/sagemaker/diagnostics.d.ts +4 -4
- package/dist/providers/sagemaker/parsers.d.ts +4 -4
- package/dist/rag/chunkers/RecursiveChunker.js +2 -2
- package/dist/rag/document/loaders.d.ts +6 -71
- package/dist/rag/document/loaders.js +5 -5
- package/dist/rag/graphRag/graphRAG.js +26 -9
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
- package/dist/rag/metadata/metadataExtractor.js +6 -3
- package/dist/rag/pipeline/RAGPipeline.d.ts +8 -126
- package/dist/rag/pipeline/RAGPipeline.js +11 -11
- package/dist/rag/pipeline/contextAssembly.d.ts +3 -42
- package/dist/rag/pipeline/contextAssembly.js +6 -3
- package/dist/rag/reranker/RerankerFactory.d.ts +5 -60
- package/dist/rag/resilience/CircuitBreaker.d.ts +3 -33
- package/dist/rag/resilience/RetryHandler.d.ts +2 -21
- package/dist/rag/retrieval/hybridSearch.d.ts +3 -41
- package/dist/rag/retrieval/vectorQueryTool.d.ts +2 -13
- package/dist/rag/retrieval/vectorQueryTool.js +4 -3
- package/dist/rag/types.d.ts +3 -3
- package/dist/sdk/toolRegistration.d.ts +2 -2
- package/dist/server/middleware/cache.d.ts +2 -2
- package/dist/server/middleware/rateLimit.d.ts +2 -2
- package/dist/server/routes/mcpRoutes.js +277 -249
- package/dist/server/routes/memoryRoutes.js +287 -281
- package/dist/server/utils/validation.d.ts +4 -4
- package/dist/session/globalSessionState.d.ts +2 -2
- package/dist/telemetry/telemetryService.d.ts +2 -2
- package/dist/types/common.d.ts +39 -0
- package/dist/types/contextTypes.d.ts +255 -0
- package/dist/types/contextTypes.js +0 -2
- package/dist/types/conversation.d.ts +62 -0
- package/dist/types/conversationMemoryInterface.d.ts +27 -0
- package/dist/types/conversationMemoryInterface.js +6 -0
- package/dist/types/fileReferenceTypes.d.ts +222 -0
- package/dist/types/fileReferenceTypes.js +8 -0
- package/dist/types/fileTypes.d.ts +26 -3
- package/dist/types/generateTypes.d.ts +22 -1
- package/dist/types/index.d.ts +4 -5
- package/dist/types/index.js +8 -10
- package/dist/types/processorTypes.d.ts +597 -0
- package/dist/types/processorTypes.js +90 -0
- package/dist/types/ragTypes.d.ts +481 -0
- package/dist/types/ragTypes.js +7 -0
- package/dist/types/sdkTypes.d.ts +17 -18
- package/dist/types/streamTypes.d.ts +11 -1
- package/dist/utils/async/retry.d.ts +2 -2
- package/dist/utils/async/withTimeout.js +3 -1
- package/dist/utils/conversationMemory.d.ts +12 -6
- package/dist/utils/conversationMemory.js +76 -36
- package/dist/utils/fileDetector.d.ts +62 -0
- package/dist/utils/fileDetector.js +1014 -14
- package/dist/utils/json/safeParse.d.ts +2 -2
- package/dist/utils/messageBuilder.js +806 -153
- package/dist/utils/modelChoices.d.ts +2 -2
- package/dist/utils/multimodalOptionsBuilder.d.ts +2 -1
- package/dist/utils/multimodalOptionsBuilder.js +1 -0
- package/dist/utils/rateLimiter.d.ts +2 -2
- package/dist/utils/sanitizers/filename.d.ts +4 -4
- package/dist/utils/sanitizers/svg.d.ts +2 -2
- package/dist/utils/thinkingConfig.d.ts +6 -6
- package/dist/utils/tokenEstimation.d.ts +68 -0
- package/dist/utils/tokenEstimation.js +112 -0
- package/dist/utils/tokenUtils.d.ts +4 -4
- package/dist/utils/ttsProcessor.d.ts +2 -2
- package/dist/workflow/config.d.ts +104 -104
- package/package.json +18 -6
- package/dist/lib/utils/conversationMemoryUtils.d.ts +0 -25
- package/dist/lib/utils/conversationMemoryUtils.js +0 -138
- package/dist/utils/conversationMemoryUtils.d.ts +0 -25
- package/dist/utils/conversationMemoryUtils.js +0 -137
|
@@ -3,15 +3,215 @@
|
|
|
3
3
|
* Centralized logic for building message arrays from TextGenerationOptions
|
|
4
4
|
* Enhanced with multimodal support for images
|
|
5
5
|
*/
|
|
6
|
-
import { existsSync, readFileSync } from "fs";
|
|
6
|
+
import { existsSync, readFileSync, statSync } from "fs";
|
|
7
7
|
import { getGlobalDispatcher, interceptors, request } from "undici";
|
|
8
8
|
import { MultimodalLogger, ProviderImageAdapter, } from "../adapters/providerImageAdapter.js";
|
|
9
9
|
import { CONVERSATION_INSTRUCTIONS, STRUCTURED_OUTPUT_INSTRUCTIONS, } from "../config/conversationMemory.js";
|
|
10
|
+
import { getAvailableInputTokens } from "../constants/contextWindows.js";
|
|
11
|
+
import { enforceAggregateFileBudget, FILE_READ_BUDGET_PERCENT, } from "../context/fileTokenBudget.js";
|
|
12
|
+
import { SIZE_TIER_THRESHOLDS } from "../files/types.js";
|
|
10
13
|
import { FileDetector } from "./fileDetector.js";
|
|
11
14
|
import { getImageCache } from "./imageCache.js";
|
|
12
15
|
import { logger } from "./logger.js";
|
|
13
16
|
import { PDFImageConverter, PDFProcessor } from "./pdfProcessor.js";
|
|
14
17
|
import { urlDownloadRateLimiter } from "./rateLimiter.js";
|
|
18
|
+
import { estimateTokens } from "./tokenEstimation.js";
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// SDK-7: Lightweight file-type inference helpers for budget estimation
|
|
21
|
+
// These avoid calling the full FileDetector pipeline — they only need to
|
|
22
|
+
// classify files into broad categories (video, audio, image, etc.) so
|
|
23
|
+
// estimatePostProcessingTokens() can use type-aware estimates.
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
/** Extension → file type mapping for budget estimation */
|
|
26
|
+
const EXTENSION_TYPE_MAP = {
|
|
27
|
+
// Video
|
|
28
|
+
mp4: "video",
|
|
29
|
+
mkv: "video",
|
|
30
|
+
mov: "video",
|
|
31
|
+
avi: "video",
|
|
32
|
+
webm: "video",
|
|
33
|
+
wmv: "video",
|
|
34
|
+
flv: "video",
|
|
35
|
+
m4v: "video",
|
|
36
|
+
// Audio
|
|
37
|
+
mp3: "audio",
|
|
38
|
+
wav: "audio",
|
|
39
|
+
ogg: "audio",
|
|
40
|
+
flac: "audio",
|
|
41
|
+
m4a: "audio",
|
|
42
|
+
aac: "audio",
|
|
43
|
+
wma: "audio",
|
|
44
|
+
opus: "audio",
|
|
45
|
+
// Image
|
|
46
|
+
jpg: "image",
|
|
47
|
+
jpeg: "image",
|
|
48
|
+
png: "image",
|
|
49
|
+
gif: "image",
|
|
50
|
+
webp: "image",
|
|
51
|
+
bmp: "image",
|
|
52
|
+
tiff: "image",
|
|
53
|
+
tif: "image",
|
|
54
|
+
avif: "image",
|
|
55
|
+
// Archive
|
|
56
|
+
zip: "archive",
|
|
57
|
+
tar: "archive",
|
|
58
|
+
gz: "archive",
|
|
59
|
+
tgz: "archive",
|
|
60
|
+
rar: "archive",
|
|
61
|
+
"7z": "archive",
|
|
62
|
+
jar: "archive",
|
|
63
|
+
// Documents
|
|
64
|
+
xlsx: "xlsx",
|
|
65
|
+
xls: "xlsx",
|
|
66
|
+
ods: "xlsx",
|
|
67
|
+
docx: "docx",
|
|
68
|
+
doc: "docx",
|
|
69
|
+
odt: "docx",
|
|
70
|
+
rtf: "docx",
|
|
71
|
+
pptx: "pptx",
|
|
72
|
+
ppt: "pptx",
|
|
73
|
+
odp: "pptx",
|
|
74
|
+
// PDF
|
|
75
|
+
pdf: "pdf",
|
|
76
|
+
// SVG
|
|
77
|
+
svg: "svg",
|
|
78
|
+
// CSV
|
|
79
|
+
csv: "csv",
|
|
80
|
+
tsv: "csv",
|
|
81
|
+
};
|
|
82
|
+
/**
|
|
83
|
+
* Infer file type from extension in a file path or URL.
|
|
84
|
+
* Returns undefined if no extension or unrecognized.
|
|
85
|
+
*/
|
|
86
|
+
function inferFileTypeFromExtension(filePath) {
|
|
87
|
+
// Strip query string / fragment for URLs
|
|
88
|
+
const cleaned = filePath.split("?")[0].split("#")[0];
|
|
89
|
+
const lastDot = cleaned.lastIndexOf(".");
|
|
90
|
+
if (lastDot === -1) {
|
|
91
|
+
return undefined;
|
|
92
|
+
}
|
|
93
|
+
const ext = cleaned.slice(lastDot + 1).toLowerCase();
|
|
94
|
+
return EXTENSION_TYPE_MAP[ext];
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Infer file type from the first few magic bytes of a Buffer.
|
|
98
|
+
* Only checks the most common binary types — text types default to undefined.
|
|
99
|
+
*/
|
|
100
|
+
function inferFileTypeFromBuffer(buf) {
|
|
101
|
+
if (buf.length < 4) {
|
|
102
|
+
return undefined;
|
|
103
|
+
}
|
|
104
|
+
// PNG
|
|
105
|
+
if (buf[0] === 0x89 &&
|
|
106
|
+
buf[1] === 0x50 &&
|
|
107
|
+
buf[2] === 0x4e &&
|
|
108
|
+
buf[3] === 0x47) {
|
|
109
|
+
return "image";
|
|
110
|
+
}
|
|
111
|
+
// JPEG
|
|
112
|
+
if (buf[0] === 0xff && buf[1] === 0xd8 && buf[2] === 0xff) {
|
|
113
|
+
return "image";
|
|
114
|
+
}
|
|
115
|
+
// GIF
|
|
116
|
+
if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) {
|
|
117
|
+
return "image";
|
|
118
|
+
}
|
|
119
|
+
// WebP (RIFF + WEBP)
|
|
120
|
+
if (buf.length >= 12 &&
|
|
121
|
+
buf[0] === 0x52 &&
|
|
122
|
+
buf[1] === 0x49 &&
|
|
123
|
+
buf[2] === 0x46 &&
|
|
124
|
+
buf[3] === 0x46 &&
|
|
125
|
+
buf[8] === 0x57 &&
|
|
126
|
+
buf[9] === 0x45 &&
|
|
127
|
+
buf[10] === 0x42 &&
|
|
128
|
+
buf[11] === 0x50) {
|
|
129
|
+
return "image";
|
|
130
|
+
}
|
|
131
|
+
// PDF
|
|
132
|
+
if (buf[0] === 0x25 &&
|
|
133
|
+
buf[1] === 0x50 &&
|
|
134
|
+
buf[2] === 0x44 &&
|
|
135
|
+
buf[3] === 0x46) {
|
|
136
|
+
return "pdf";
|
|
137
|
+
}
|
|
138
|
+
// MP4/MOV (ftyp at offset 4)
|
|
139
|
+
if (buf.length >= 8 &&
|
|
140
|
+
buf[4] === 0x66 &&
|
|
141
|
+
buf[5] === 0x74 &&
|
|
142
|
+
buf[6] === 0x79 &&
|
|
143
|
+
buf[7] === 0x70) {
|
|
144
|
+
return "video";
|
|
145
|
+
}
|
|
146
|
+
// MKV/WebM (EBML)
|
|
147
|
+
if (buf[0] === 0x1a &&
|
|
148
|
+
buf[1] === 0x45 &&
|
|
149
|
+
buf[2] === 0xdf &&
|
|
150
|
+
buf[3] === 0xa3) {
|
|
151
|
+
return "video";
|
|
152
|
+
}
|
|
153
|
+
// AVI (RIFF + AVI)
|
|
154
|
+
if (buf.length >= 12 &&
|
|
155
|
+
buf[0] === 0x52 &&
|
|
156
|
+
buf[1] === 0x49 &&
|
|
157
|
+
buf[2] === 0x46 &&
|
|
158
|
+
buf[3] === 0x46 &&
|
|
159
|
+
buf[8] === 0x41 &&
|
|
160
|
+
buf[9] === 0x56 &&
|
|
161
|
+
buf[10] === 0x49 &&
|
|
162
|
+
buf[11] === 0x20) {
|
|
163
|
+
return "video";
|
|
164
|
+
}
|
|
165
|
+
// WAV (RIFF + WAVE)
|
|
166
|
+
if (buf.length >= 12 &&
|
|
167
|
+
buf[0] === 0x52 &&
|
|
168
|
+
buf[1] === 0x49 &&
|
|
169
|
+
buf[2] === 0x46 &&
|
|
170
|
+
buf[3] === 0x46 &&
|
|
171
|
+
buf[8] === 0x57 &&
|
|
172
|
+
buf[9] === 0x41 &&
|
|
173
|
+
buf[10] === 0x56 &&
|
|
174
|
+
buf[11] === 0x45) {
|
|
175
|
+
return "audio";
|
|
176
|
+
}
|
|
177
|
+
// MP3 (ID3 tag)
|
|
178
|
+
if (buf[0] === 0x49 && buf[1] === 0x44 && buf[2] === 0x33) {
|
|
179
|
+
return "audio";
|
|
180
|
+
}
|
|
181
|
+
// FLAC
|
|
182
|
+
if (buf[0] === 0x66 &&
|
|
183
|
+
buf[1] === 0x4c &&
|
|
184
|
+
buf[2] === 0x61 &&
|
|
185
|
+
buf[3] === 0x43) {
|
|
186
|
+
return "audio";
|
|
187
|
+
}
|
|
188
|
+
// OGG
|
|
189
|
+
if (buf[0] === 0x4f &&
|
|
190
|
+
buf[1] === 0x67 &&
|
|
191
|
+
buf[2] === 0x67 &&
|
|
192
|
+
buf[3] === 0x53) {
|
|
193
|
+
return "audio";
|
|
194
|
+
}
|
|
195
|
+
// ZIP (also .xlsx, .docx, .pptx — but without extension we default to archive)
|
|
196
|
+
if (buf[0] === 0x50 &&
|
|
197
|
+
buf[1] === 0x4b &&
|
|
198
|
+
buf[2] === 0x03 &&
|
|
199
|
+
buf[3] === 0x04) {
|
|
200
|
+
return "archive";
|
|
201
|
+
}
|
|
202
|
+
// GZIP
|
|
203
|
+
if (buf[0] === 0x1f && buf[1] === 0x8b) {
|
|
204
|
+
return "archive";
|
|
205
|
+
}
|
|
206
|
+
// RAR
|
|
207
|
+
if (buf[0] === 0x52 &&
|
|
208
|
+
buf[1] === 0x61 &&
|
|
209
|
+
buf[2] === 0x72 &&
|
|
210
|
+
buf[3] === 0x21) {
|
|
211
|
+
return "archive";
|
|
212
|
+
}
|
|
213
|
+
return undefined;
|
|
214
|
+
}
|
|
15
215
|
/**
|
|
16
216
|
* Type guard to check if an image input has alt text
|
|
17
217
|
*/
|
|
@@ -359,180 +559,375 @@ export async function buildMessagesArray(options) {
|
|
|
359
559
|
return messages;
|
|
360
560
|
}
|
|
361
561
|
/**
|
|
362
|
-
*
|
|
363
|
-
*
|
|
562
|
+
* Enforce aggregate file budget, excluding files that would exceed the context window.
|
|
563
|
+
* Mutates options.input.files and options.input.text as needed.
|
|
364
564
|
*/
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
provider: provider,
|
|
382
|
-
});
|
|
383
|
-
if (result.type === "csv") {
|
|
384
|
-
const filename = extractFilename(file);
|
|
385
|
-
const filePath = typeof file === "string" ? file : filename;
|
|
386
|
-
let csvSection = `\n\n## CSV Data from "${filename}":\n`;
|
|
387
|
-
// Add metadata from csv-parser library
|
|
388
|
-
if (result.metadata) {
|
|
389
|
-
const metadataText = formatCSVMetadata(result.metadata);
|
|
390
|
-
if (metadataText) {
|
|
391
|
-
csvSection += metadataText + `\n\n`;
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
csvSection += buildCSVToolInstructions(filePath);
|
|
395
|
-
csvSection += result.content;
|
|
396
|
-
options.input.text += csvSection;
|
|
397
|
-
logger.info(`[FileDetector] ✅ CSV: ${filename}`);
|
|
398
|
-
}
|
|
399
|
-
else if (result.type === "svg") {
|
|
400
|
-
// SVG is processed as text content (sanitized XML markup)
|
|
401
|
-
// Inject into text prompt instead of sending as image
|
|
402
|
-
const filename = extractFilename(file);
|
|
403
|
-
const svgSection = `\n\n## SVG Content from "${filename}":\n\`\`\`xml\n${result.content}\n\`\`\`\n`;
|
|
404
|
-
options.input.text += svgSection;
|
|
405
|
-
logger.info(`[FileDetector] ✅ SVG (as text): ${filename}`);
|
|
406
|
-
}
|
|
407
|
-
else if (result.type === "image") {
|
|
408
|
-
options.input.images = [
|
|
409
|
-
...(options.input.images || []),
|
|
410
|
-
result.content,
|
|
411
|
-
];
|
|
412
|
-
logger.info(`[FileDetector] ✅ Image: ${result.mimeType}`);
|
|
565
|
+
function enforceFileBudget(options, provider, model) {
|
|
566
|
+
if (!options.input.files || options.input.files.length === 0) {
|
|
567
|
+
return;
|
|
568
|
+
}
|
|
569
|
+
const availableTokens = getAvailableInputTokens(provider, model);
|
|
570
|
+
const budgetFiles = options.input.files.map((file, idx) => {
|
|
571
|
+
let sizeBytes;
|
|
572
|
+
let fileType;
|
|
573
|
+
if (Buffer.isBuffer(file)) {
|
|
574
|
+
sizeBytes = file.length;
|
|
575
|
+
fileType = inferFileTypeFromBuffer(file);
|
|
576
|
+
}
|
|
577
|
+
else if (typeof file === "string") {
|
|
578
|
+
if (existsSync(file)) {
|
|
579
|
+
try {
|
|
580
|
+
sizeBytes = statSync(file).size;
|
|
413
581
|
}
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
...(options.input.pdfFiles || []),
|
|
417
|
-
result.content,
|
|
418
|
-
];
|
|
419
|
-
logger.info(`[FileDetector] ✅ PDF: ${extractFilename(file)}`);
|
|
582
|
+
catch {
|
|
583
|
+
sizeBytes = 0;
|
|
420
584
|
}
|
|
421
585
|
}
|
|
422
|
-
|
|
423
|
-
|
|
586
|
+
else {
|
|
587
|
+
sizeBytes = file.length;
|
|
424
588
|
}
|
|
589
|
+
fileType = inferFileTypeFromExtension(file);
|
|
425
590
|
}
|
|
591
|
+
else {
|
|
592
|
+
sizeBytes = 0;
|
|
593
|
+
}
|
|
594
|
+
return {
|
|
595
|
+
name: typeof file === "string" ? file : `file-${idx}`,
|
|
596
|
+
sizeBytes,
|
|
597
|
+
fileType,
|
|
598
|
+
originalIndex: idx,
|
|
599
|
+
};
|
|
600
|
+
});
|
|
601
|
+
const budgetResult = enforceAggregateFileBudget(budgetFiles.map((f) => ({
|
|
602
|
+
name: f.name,
|
|
603
|
+
sizeBytes: f.sizeBytes,
|
|
604
|
+
fileType: f.fileType,
|
|
605
|
+
})), availableTokens);
|
|
606
|
+
if (budgetResult.excluded.length > 0) {
|
|
607
|
+
const includedNames = new Set(budgetResult.included.map((f) => f.name));
|
|
608
|
+
options.input.files = options.input.files.filter((_file, idx) => {
|
|
609
|
+
const entry = budgetFiles[idx];
|
|
610
|
+
return includedNames.has(entry.name);
|
|
611
|
+
});
|
|
612
|
+
options.input.text =
|
|
613
|
+
(options.input.text || "") + "\n\n" + budgetResult.notices.join("\n");
|
|
614
|
+
logger.warn(`[FileDetector] Aggregate file budget enforcement: excluded ${budgetResult.excluded.length} file(s)`);
|
|
426
615
|
}
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
// Add metadata from csv-parser library
|
|
442
|
-
if (result.metadata) {
|
|
443
|
-
const metadataText = formatCSVMetadata(result.metadata);
|
|
444
|
-
if (metadataText) {
|
|
445
|
-
csvSection += metadataText + `\n\n`;
|
|
446
|
-
}
|
|
447
|
-
}
|
|
448
|
-
csvSection += buildCSVToolInstructions(filePath);
|
|
449
|
-
csvSection += result.content;
|
|
450
|
-
options.input.text += csvSection;
|
|
451
|
-
logger.info(`[CSV] ✅ Processed: ${filename}`);
|
|
616
|
+
}
|
|
617
|
+
/**
|
|
618
|
+
* Append a detected file result to options.input based on its type.
|
|
619
|
+
* Handles CSV, SVG, image, PDF, video, audio, archive, xlsx, docx, pptx, text, and unknown types.
|
|
620
|
+
*/
|
|
621
|
+
function appendDetectedFileResult(result, file, options) {
|
|
622
|
+
const filename = extractFilename(file);
|
|
623
|
+
if (result.type === "csv") {
|
|
624
|
+
const filePath = typeof file === "string" ? file : filename;
|
|
625
|
+
let csvSection = `\n\n## CSV Data from "${filename}":\n`;
|
|
626
|
+
if (result.metadata) {
|
|
627
|
+
const metadataText = formatCSVMetadata(result.metadata);
|
|
628
|
+
if (metadataText) {
|
|
629
|
+
csvSection += metadataText + `\n\n`;
|
|
452
630
|
}
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
631
|
+
}
|
|
632
|
+
csvSection += buildCSVToolInstructions(filePath);
|
|
633
|
+
csvSection += result.content;
|
|
634
|
+
options.input.text += csvSection;
|
|
635
|
+
logger.info(`[FileDetector] ✅ CSV: ${filename}`);
|
|
636
|
+
}
|
|
637
|
+
else if (result.type === "svg") {
|
|
638
|
+
const svgSection = `\n\n## SVG Content from "${filename}":\n\`\`\`xml\n${result.content}\n\`\`\`\n`;
|
|
639
|
+
options.input.text += svgSection;
|
|
640
|
+
logger.info(`[FileDetector] ✅ SVG (as text): ${filename}`);
|
|
641
|
+
}
|
|
642
|
+
else if (result.type === "image") {
|
|
643
|
+
options.input.images = [...(options.input.images || []), result.content];
|
|
644
|
+
logger.info(`[FileDetector] ✅ Image: ${result.mimeType}`);
|
|
645
|
+
}
|
|
646
|
+
else if (result.type === "pdf") {
|
|
647
|
+
options.input.pdfFiles = [
|
|
648
|
+
...(options.input.pdfFiles || []),
|
|
649
|
+
result.content,
|
|
650
|
+
];
|
|
651
|
+
logger.info(`[FileDetector] ✅ PDF: ${filename}`);
|
|
652
|
+
}
|
|
653
|
+
else if (result.type === "video") {
|
|
654
|
+
if (result.content) {
|
|
655
|
+
options.input.text += `\n\n## Video File: "${filename}"\n${result.content}\n`;
|
|
656
|
+
}
|
|
657
|
+
if (result.images && result.images.length > 0) {
|
|
658
|
+
options.input.images = [
|
|
659
|
+
...(options.input.images || []),
|
|
660
|
+
...result.images,
|
|
661
|
+
];
|
|
662
|
+
logger.info(`[FileDetector] Added ${result.images.length} video keyframes as images`);
|
|
663
|
+
}
|
|
664
|
+
logger.info(`[FileDetector] ✅ Video: ${filename}`);
|
|
665
|
+
}
|
|
666
|
+
else if (result.type === "audio") {
|
|
667
|
+
if (result.content) {
|
|
668
|
+
options.input.text += `\n\n## Audio File: "${filename}"\n${result.content}\n`;
|
|
669
|
+
}
|
|
670
|
+
if (result.images && result.images.length > 0) {
|
|
671
|
+
options.input.images = [
|
|
672
|
+
...(options.input.images || []),
|
|
673
|
+
...result.images,
|
|
674
|
+
];
|
|
675
|
+
logger.info(`[FileDetector] Added audio cover art as image`);
|
|
676
|
+
}
|
|
677
|
+
logger.info(`[FileDetector] ✅ Audio: ${filename}`);
|
|
678
|
+
}
|
|
679
|
+
else if (result.type === "archive") {
|
|
680
|
+
if (result.content) {
|
|
681
|
+
options.input.text += `\n\n## Archive File: "${filename}"\n${result.content}\n`;
|
|
682
|
+
}
|
|
683
|
+
logger.info(`[FileDetector] ✅ Archive: ${filename}`);
|
|
684
|
+
}
|
|
685
|
+
else if (result.type === "xlsx") {
|
|
686
|
+
if (result.content) {
|
|
687
|
+
options.input.text += `\n\n## Spreadsheet: "${filename}"\n${result.content}\n`;
|
|
688
|
+
}
|
|
689
|
+
logger.info(`[FileDetector] ✅ Spreadsheet: ${filename}`);
|
|
690
|
+
}
|
|
691
|
+
else if (result.type === "docx") {
|
|
692
|
+
if (result.content) {
|
|
693
|
+
options.input.text += `\n\n## Document: "${filename}"\n${result.content}\n`;
|
|
694
|
+
}
|
|
695
|
+
logger.info(`[FileDetector] ✅ Document: ${filename}`);
|
|
696
|
+
}
|
|
697
|
+
else if (result.type === "pptx") {
|
|
698
|
+
if (result.content) {
|
|
699
|
+
options.input.text += `\n\n## Presentation: "${filename}"\n${result.content}\n`;
|
|
700
|
+
}
|
|
701
|
+
logger.info(`[FileDetector] ✅ Presentation: ${filename}`);
|
|
702
|
+
}
|
|
703
|
+
else if (result.type === "text") {
|
|
704
|
+
if (result.content) {
|
|
705
|
+
const langHint = getLanguageHint(result.mimeType, filename);
|
|
706
|
+
const MAX_TEXT_FILE_CHARS = 200_000;
|
|
707
|
+
let fileContent = result.content;
|
|
708
|
+
let truncated = false;
|
|
709
|
+
if (fileContent.length > MAX_TEXT_FILE_CHARS) {
|
|
710
|
+
const headChars = Math.floor(MAX_TEXT_FILE_CHARS * 0.75);
|
|
711
|
+
const tailChars = Math.floor(MAX_TEXT_FILE_CHARS * 0.25);
|
|
712
|
+
const omittedChars = fileContent.length - headChars - tailChars;
|
|
713
|
+
fileContent =
|
|
714
|
+
fileContent.slice(0, headChars) +
|
|
715
|
+
`\n\n... [${omittedChars.toLocaleString()} characters omitted — file truncated to fit context window] ...\n\n` +
|
|
716
|
+
fileContent.slice(-tailChars);
|
|
717
|
+
truncated = true;
|
|
718
|
+
}
|
|
719
|
+
const textSection = langHint
|
|
720
|
+
? `\n\n## File: "${filename}"\n\`\`\`${langHint}\n${fileContent}\n\`\`\`\n`
|
|
721
|
+
: `\n\n## File: "${filename}"\n${fileContent}\n`;
|
|
722
|
+
options.input.text += textSection;
|
|
723
|
+
if (truncated) {
|
|
724
|
+
logger.warn(`[FileDetector] Large text file "${filename}" truncated from ${result.content.length.toLocaleString()} to ${MAX_TEXT_FILE_CHARS.toLocaleString()} chars`);
|
|
458
725
|
}
|
|
459
726
|
}
|
|
727
|
+
logger.info(`[FileDetector] ✅ Text: ${filename}`);
|
|
460
728
|
}
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
logger.info(`[
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
729
|
+
else if (result.type === "unknown") {
|
|
730
|
+
if (result.content) {
|
|
731
|
+
options.input.text += `\n\n## Attached File: "${filename}"\n${result.content}\n`;
|
|
732
|
+
}
|
|
733
|
+
logger.info(`[FileDetector] ⚠️ Unknown format (metadata extracted): ${filename}`);
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
/**
|
|
737
|
+
* Process the unified files array with auto-detection.
|
|
738
|
+
* Handles lazy file registration, full processing, and preview injection.
|
|
739
|
+
*/
|
|
740
|
+
async function processUnifiedFilesArray(options, maxSize, provider) {
|
|
741
|
+
if (!options.input.files || options.input.files.length === 0) {
|
|
742
|
+
return;
|
|
743
|
+
}
|
|
744
|
+
logger.info(`[FileDetector] Processing ${options.input.files.length} file(s) with auto-detection`);
|
|
745
|
+
options.input.text = options.input.text || "";
|
|
746
|
+
const fileRegistry = options.fileRegistry;
|
|
747
|
+
for (let fileIdx = 0; fileIdx < options.input.files.length; fileIdx++) {
|
|
748
|
+
const file = options.input.files[fileIdx];
|
|
749
|
+
try {
|
|
750
|
+
// ─── Lazy file registration path ──────────────────────────────
|
|
751
|
+
const fileSize = fileRegistry ? getFileSize(file) : 0;
|
|
752
|
+
if (fileRegistry && fileSize > SIZE_TIER_THRESHOLDS.TINY_MAX) {
|
|
753
|
+
const registered = await tryRegisterFileReference(file, fileSize, fileRegistry, fileIdx);
|
|
754
|
+
if (registered) {
|
|
755
|
+
continue;
|
|
482
756
|
}
|
|
483
757
|
}
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
758
|
+
// ─── Full processing path (current behavior) ──────────────────
|
|
759
|
+
const genericFileMaxSize = Math.max(maxSize, 100 * 1024 * 1024);
|
|
760
|
+
const rawFileInput = isFileWithMetadata(file) ? file.buffer : file;
|
|
761
|
+
const result = await FileDetector.detectAndProcess(rawFileInput, {
|
|
762
|
+
maxSize: genericFileMaxSize,
|
|
763
|
+
allowedTypes: [
|
|
764
|
+
"csv",
|
|
765
|
+
"image",
|
|
766
|
+
"pdf",
|
|
767
|
+
"svg",
|
|
768
|
+
"video",
|
|
769
|
+
"audio",
|
|
770
|
+
"archive",
|
|
771
|
+
"xlsx",
|
|
772
|
+
"docx",
|
|
773
|
+
"pptx",
|
|
774
|
+
"text",
|
|
775
|
+
"unknown",
|
|
776
|
+
],
|
|
777
|
+
csvOptions: options.csvOptions,
|
|
778
|
+
provider: provider,
|
|
779
|
+
});
|
|
780
|
+
appendDetectedFileResult(result, file, options);
|
|
781
|
+
}
|
|
782
|
+
catch (error) {
|
|
783
|
+
logger.error(`[FileDetector] ❌ Failed to process file:`, error);
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
// After processing all files, inject previews for any lazily-registered files
|
|
787
|
+
if (fileRegistry && fileRegistry.size > 0) {
|
|
788
|
+
const previewText = await fileRegistry.generatePromptPreview();
|
|
789
|
+
if (previewText) {
|
|
790
|
+
options.input.text = (options.input.text || "") + previewText;
|
|
791
|
+
logger.info(`[FileDetector] Injected previews for ${fileRegistry.size} lazily-registered file(s)`);
|
|
792
|
+
}
|
|
793
|
+
const registeredFiles = fileRegistry.list();
|
|
794
|
+
for (const ref of registeredFiles) {
|
|
795
|
+
if (ref.extractedImages && ref.extractedImages.length > 0) {
|
|
796
|
+
options.input.images = [
|
|
797
|
+
...(options.input.images || []),
|
|
798
|
+
...ref.extractedImages,
|
|
799
|
+
];
|
|
800
|
+
logger.info(`[FileDetector] Injected ${ref.extractedImages.length} extracted images from "${ref.filename}"`);
|
|
487
801
|
}
|
|
488
802
|
}
|
|
489
803
|
}
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
804
|
+
}
|
|
805
|
+
/**
|
|
806
|
+
* Process explicit CSV files array and append to options.input.text.
|
|
807
|
+
*/
|
|
808
|
+
async function processExplicitCsvFiles(options) {
|
|
809
|
+
if (!options.input.csvFiles || options.input.csvFiles.length === 0) {
|
|
810
|
+
return;
|
|
811
|
+
}
|
|
812
|
+
logger.info(`[CSV] Processing ${options.input.csvFiles.length} explicit CSV file(s)`);
|
|
813
|
+
options.input.text = options.input.text || "";
|
|
814
|
+
for (let i = 0; i < options.input.csvFiles.length; i++) {
|
|
815
|
+
const csvFile = options.input.csvFiles[i];
|
|
816
|
+
try {
|
|
817
|
+
const result = await FileDetector.detectAndProcess(csvFile, {
|
|
818
|
+
allowedTypes: ["csv"],
|
|
819
|
+
csvOptions: options.csvOptions,
|
|
820
|
+
});
|
|
821
|
+
const filename = extractFilename(csvFile, i);
|
|
822
|
+
const filePath = typeof csvFile === "string" ? csvFile : filename;
|
|
823
|
+
let csvSection = `\n\n## CSV Data from "${filename}":\n`;
|
|
824
|
+
if (result.metadata) {
|
|
825
|
+
const metadataText = formatCSVMetadata(result.metadata);
|
|
826
|
+
if (metadataText) {
|
|
827
|
+
csvSection += metadataText + `\n\n`;
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
csvSection += buildCSVToolInstructions(filePath);
|
|
831
|
+
csvSection += result.content;
|
|
832
|
+
options.input.text += csvSection;
|
|
833
|
+
logger.info(`[CSV] ✅ Processed: ${filename}`);
|
|
501
834
|
}
|
|
502
|
-
|
|
503
|
-
|
|
835
|
+
catch (error) {
|
|
836
|
+
logger.error(`[CSV] ❌ Failed:`, error);
|
|
837
|
+
const filename = extractFilename(csvFile, i);
|
|
838
|
+
options.input.text += `\n\n## CSV Data Error: Failed to process "${filename}"`;
|
|
839
|
+
options.input.text += `\nReason: ${error instanceof Error ? error.message : "Unknown error"}`;
|
|
504
840
|
}
|
|
505
|
-
|
|
506
|
-
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
/**
|
|
844
|
+
* Enforce post-processing budget on accumulated text content and log token usage.
|
|
845
|
+
*/
|
|
846
|
+
function enforcePostProcessingBudget(options, provider, model) {
|
|
847
|
+
if (!options.input.text) {
|
|
848
|
+
return;
|
|
849
|
+
}
|
|
850
|
+
const availableTokens = getAvailableInputTokens(provider, model);
|
|
851
|
+
const textTokenBudget = Math.floor(availableTokens * FILE_READ_BUDGET_PERCENT);
|
|
852
|
+
const actualTextTokens = estimateTokens(options.input.text, provider);
|
|
853
|
+
if (actualTextTokens > textTokenBudget && textTokenBudget > 0) {
|
|
854
|
+
const maxChars = textTokenBudget * 4;
|
|
855
|
+
if (options.input.text.length > maxChars) {
|
|
856
|
+
const headChars = Math.floor(maxChars * 0.75);
|
|
857
|
+
const tailChars = Math.floor(maxChars * 0.25);
|
|
858
|
+
const head = options.input.text.slice(0, headChars);
|
|
859
|
+
const tail = options.input.text.slice(-tailChars);
|
|
860
|
+
const truncatedTokens = actualTextTokens - textTokenBudget;
|
|
861
|
+
options.input.text =
|
|
862
|
+
head +
|
|
863
|
+
`\n\n[... ${truncatedTokens.toLocaleString()} tokens of file content truncated to fit context window ...]\n\n` +
|
|
864
|
+
tail;
|
|
865
|
+
logger.warn(`[FileDetector] Post-processing budget enforcement: truncated ~${truncatedTokens.toLocaleString()} tokens of file content to fit ${textTokenBudget.toLocaleString()} token budget`);
|
|
507
866
|
}
|
|
508
|
-
const standardMessages = await buildMessagesArray(options);
|
|
509
|
-
return standardMessages.map((msg) => ({
|
|
510
|
-
role: msg.role,
|
|
511
|
-
content: typeof msg.content === "string" ? msg.content : msg.content,
|
|
512
|
-
}));
|
|
513
867
|
}
|
|
514
|
-
//
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
868
|
+
// Token usage breakdown logging
|
|
869
|
+
const textTokens = estimateTokens(options.input.text, provider);
|
|
870
|
+
const imageCount = (options.input.images?.length ?? 0) +
|
|
871
|
+
(options.input.content?.filter((c) => c.type === "image").length ?? 0);
|
|
872
|
+
const imageTokens = imageCount * 1500;
|
|
873
|
+
const totalContentTokens = textTokens + imageTokens;
|
|
874
|
+
const contextWindow = getAvailableInputTokens(provider, model);
|
|
875
|
+
logger.info(`[TokenUsage] Content breakdown: text=${textTokens.toLocaleString()} tokens, ` +
|
|
876
|
+
`images=${imageCount} (~${imageTokens.toLocaleString()} tokens), ` +
|
|
877
|
+
`total=${totalContentTokens.toLocaleString()} tokens, ` +
|
|
878
|
+
`budget=${contextWindow.toLocaleString()} tokens, ` +
|
|
879
|
+
`utilization=${contextWindow > 0 ? ((totalContentTokens / contextWindow) * 100).toFixed(1) : "N/A"}%`);
|
|
880
|
+
}
|
|
881
|
+
/**
|
|
882
|
+
* Process explicit PDF files and return structured PDF entries for multimodal processing.
|
|
883
|
+
*/
|
|
884
|
+
async function processExplicitPdfFiles(options, maxSize, provider) {
|
|
885
|
+
const pdfFiles = [];
|
|
886
|
+
if (!options.input.pdfFiles || options.input.pdfFiles.length === 0) {
|
|
887
|
+
return pdfFiles;
|
|
518
888
|
}
|
|
519
|
-
|
|
520
|
-
|
|
889
|
+
logger.info(`[PDF] Processing ${options.input.pdfFiles.length} explicit PDF file(s) for ${provider}`);
|
|
890
|
+
for (let i = 0; i < options.input.pdfFiles.length; i++) {
|
|
891
|
+
const pdfFile = options.input.pdfFiles[i];
|
|
892
|
+
const filename = extractFilename(pdfFile, i);
|
|
893
|
+
try {
|
|
894
|
+
const result = await FileDetector.detectAndProcess(pdfFile, {
|
|
895
|
+
maxSize,
|
|
896
|
+
allowedTypes: ["pdf"],
|
|
897
|
+
provider: provider,
|
|
898
|
+
});
|
|
899
|
+
if (Buffer.isBuffer(result.content)) {
|
|
900
|
+
pdfFiles.push({
|
|
901
|
+
buffer: result.content,
|
|
902
|
+
filename,
|
|
903
|
+
pageCount: result.metadata?.estimatedPages ?? null,
|
|
904
|
+
});
|
|
905
|
+
logger.info(`[PDF] ✅ Queued for multimodal: ${filename} (${result.metadata?.estimatedPages ?? "unknown"} pages)`);
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
catch (error) {
|
|
909
|
+
logger.error(`[PDF] ❌ Failed to process ${filename}:`, error);
|
|
910
|
+
throw error;
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
return pdfFiles;
|
|
914
|
+
}
|
|
915
|
+
/**
|
|
916
|
+
* Build the enhanced system prompt for multimodal messages, including
|
|
917
|
+
* conversation instructions, structured output instructions, and file handling guidance.
|
|
918
|
+
*/
|
|
919
|
+
function buildMultimodalSystemPrompt(options, hasPDFFiles) {
|
|
521
920
|
let systemPrompt = options.systemPrompt?.trim() || "";
|
|
522
|
-
// Add conversation-aware instructions when history exists
|
|
523
921
|
const hasConversationHistory = options.conversationHistory && options.conversationHistory.length > 0;
|
|
524
922
|
if (hasConversationHistory) {
|
|
525
923
|
systemPrompt = `${systemPrompt.trim()}${CONVERSATION_INSTRUCTIONS}`;
|
|
526
924
|
}
|
|
527
|
-
// Add structured output instructions when schema is provided with json/structured format
|
|
528
925
|
if (shouldUseStructuredOutput(options)) {
|
|
529
926
|
systemPrompt = `${systemPrompt.trim()}${STRUCTURED_OUTPUT_INSTRUCTIONS}`;
|
|
530
927
|
}
|
|
531
|
-
// Add file handling guidance when multimodal files are present
|
|
532
928
|
const hasCSVFiles = (options.input.csvFiles && options.input.csvFiles.length > 0) ||
|
|
533
929
|
(options.input.files &&
|
|
534
930
|
options.input.files.some((f) => typeof f === "string" ? f.toLowerCase().endsWith(".csv") : false));
|
|
535
|
-
const hasPDFFiles = pdfFiles.length > 0;
|
|
536
931
|
if (hasCSVFiles || hasPDFFiles) {
|
|
537
932
|
const fileTypes = [];
|
|
538
933
|
if (hasPDFFiles) {
|
|
@@ -548,7 +943,58 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
|
|
|
548
943
|
- GitHub MCP tools are ONLY for remote repository operations, not local filesystem access
|
|
549
944
|
- Use the file content shown in this message for your analysis`;
|
|
550
945
|
}
|
|
551
|
-
|
|
946
|
+
return systemPrompt;
|
|
947
|
+
}
|
|
948
|
+
/**
|
|
949
|
+
* Build multimodal message array with image support
|
|
950
|
+
* Detects when images are present and routes through provider adapter
|
|
951
|
+
*/
|
|
952
|
+
export async function buildMultimodalMessagesArray(options, provider, model) {
|
|
953
|
+
// Compute provider-specific max PDF size once for consistent validation
|
|
954
|
+
const pdfConfig = PDFProcessor.getProviderConfig(provider);
|
|
955
|
+
const maxSize = pdfConfig
|
|
956
|
+
? pdfConfig.maxSizeMB * 1024 * 1024
|
|
957
|
+
: 10 * 1024 * 1024;
|
|
958
|
+
// Aggregate file budget enforcement
|
|
959
|
+
enforceFileBudget(options, provider, model);
|
|
960
|
+
// Process unified files array (auto-detect)
|
|
961
|
+
await processUnifiedFilesArray(options, maxSize, provider);
|
|
962
|
+
// Process explicit CSV files array
|
|
963
|
+
await processExplicitCsvFiles(options);
|
|
964
|
+
// Post-processing budget enforcement and token usage logging
|
|
965
|
+
enforcePostProcessingBudget(options, provider, model);
|
|
966
|
+
// Process explicit PDF files
|
|
967
|
+
const pdfFiles = await processExplicitPdfFiles(options, maxSize, provider);
|
|
968
|
+
// Check if this is a multimodal request
|
|
969
|
+
const hasImages = (options.input.images && options.input.images.length > 0) ||
|
|
970
|
+
(options.input.content &&
|
|
971
|
+
options.input.content.some((c) => c.type === "image"));
|
|
972
|
+
const hasPDFs = pdfFiles.length > 0;
|
|
973
|
+
// If no images or PDFs, use standard message building and convert to MultimodalChatMessage[]
|
|
974
|
+
if (!hasImages && !hasPDFs) {
|
|
975
|
+
if (options.input.csvFiles) {
|
|
976
|
+
options.input.csvFiles = [];
|
|
977
|
+
}
|
|
978
|
+
if (options.input.pdfFiles) {
|
|
979
|
+
options.input.pdfFiles = [];
|
|
980
|
+
}
|
|
981
|
+
if (options.input.files) {
|
|
982
|
+
options.input.files = [];
|
|
983
|
+
}
|
|
984
|
+
const standardMessages = await buildMessagesArray(options);
|
|
985
|
+
return standardMessages.map((msg) => ({
|
|
986
|
+
role: msg.role,
|
|
987
|
+
content: typeof msg.content === "string" ? msg.content : msg.content,
|
|
988
|
+
}));
|
|
989
|
+
}
|
|
990
|
+
// Validate provider supports vision
|
|
991
|
+
if (!ProviderImageAdapter.supportsVision(provider, model)) {
|
|
992
|
+
throw new Error(`Provider ${provider} with model ${model} does not support vision processing. ` +
|
|
993
|
+
`Supported providers: ${ProviderImageAdapter.getVisionProviders().join(", ")}`);
|
|
994
|
+
}
|
|
995
|
+
const messages = [];
|
|
996
|
+
// Build enhanced system prompt
|
|
997
|
+
const systemPrompt = buildMultimodalSystemPrompt(options, pdfFiles.length > 0);
|
|
552
998
|
if (systemPrompt.trim()) {
|
|
553
999
|
messages.push({
|
|
554
1000
|
role: "system",
|
|
@@ -556,8 +1002,8 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
|
|
|
556
1002
|
});
|
|
557
1003
|
}
|
|
558
1004
|
// Add conversation history if available
|
|
1005
|
+
const hasConversationHistory = options.conversationHistory && options.conversationHistory.length > 0;
|
|
559
1006
|
if (hasConversationHistory && options.conversationHistory) {
|
|
560
|
-
// Convert conversation history to MultimodalChatMessage format
|
|
561
1007
|
options.conversationHistory.forEach((msg) => {
|
|
562
1008
|
messages.push({
|
|
563
1009
|
role: msg.role,
|
|
@@ -569,29 +1015,22 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
|
|
|
569
1015
|
try {
|
|
570
1016
|
let userContent;
|
|
571
1017
|
if (options.input.content && options.input.content.length > 0) {
|
|
572
|
-
// Advanced content format - convert to provider-specific format
|
|
573
1018
|
userContent = await convertContentToProviderFormat(options.input.content, provider, model);
|
|
574
1019
|
}
|
|
575
1020
|
else if ((options.input.images && options.input.images.length > 0) ||
|
|
576
1021
|
pdfFiles.length > 0) {
|
|
577
|
-
// Simple images/PDFs format - convert to provider-specific format
|
|
578
1022
|
userContent = await convertMultimodalToProviderFormat(options.input.text, options.input.images || [], pdfFiles, provider, model);
|
|
579
1023
|
}
|
|
580
1024
|
else {
|
|
581
|
-
// Text-only fallback
|
|
582
1025
|
userContent = options.input.text;
|
|
583
1026
|
}
|
|
584
|
-
// 🔧 CRITICAL FIX: Handle multimodal content properly for Vercel AI SDK
|
|
585
1027
|
if (typeof userContent === "string") {
|
|
586
|
-
// Simple text content - use standard MultimodalChatMessage format
|
|
587
1028
|
messages.push({
|
|
588
1029
|
role: "user",
|
|
589
1030
|
content: userContent,
|
|
590
1031
|
});
|
|
591
1032
|
}
|
|
592
1033
|
else {
|
|
593
|
-
// 🔧 MULTIMODAL CONTENT: Wrap the content array in a proper message object
|
|
594
|
-
// The Vercel AI SDK expects messages with multimodal content arrays
|
|
595
1034
|
messages.push({
|
|
596
1035
|
role: "user",
|
|
597
1036
|
content: userContent,
|
|
@@ -900,9 +1339,22 @@ async function convertMultimodalToProviderFormat(text, images, pdfFiles, provide
|
|
|
900
1339
|
return content;
|
|
901
1340
|
}
|
|
902
1341
|
/**
|
|
903
|
-
*
|
|
1342
|
+
* Type guard for FileWithMetadata objects.
|
|
1343
|
+
*/
|
|
1344
|
+
function isFileWithMetadata(file) {
|
|
1345
|
+
return (typeof file === "object" &&
|
|
1346
|
+
!Buffer.isBuffer(file) &&
|
|
1347
|
+
"buffer" in file &&
|
|
1348
|
+
"filename" in file);
|
|
1349
|
+
}
|
|
1350
|
+
/**
|
|
1351
|
+
* Extract filename from file input.
|
|
1352
|
+
* Supports Buffers (generic name), strings (path/URL), and FileWithMetadata objects.
|
|
904
1353
|
*/
|
|
905
1354
|
function extractFilename(file, index = 0) {
|
|
1355
|
+
if (isFileWithMetadata(file)) {
|
|
1356
|
+
return file.filename;
|
|
1357
|
+
}
|
|
906
1358
|
if (typeof file === "string") {
|
|
907
1359
|
if (file.startsWith("http")) {
|
|
908
1360
|
try {
|
|
@@ -917,6 +1369,207 @@ function extractFilename(file, index = 0) {
|
|
|
917
1369
|
}
|
|
918
1370
|
return `file-${index + 1}`;
|
|
919
1371
|
}
|
|
1372
|
+
/**
|
|
1373
|
+
* Get the byte size of a file input.
|
|
1374
|
+
* For FileWithMetadata: returns buffer.length.
|
|
1375
|
+
* For Buffers: returns buffer.length.
|
|
1376
|
+
* For strings that are file paths: returns the stat size.
|
|
1377
|
+
* For URLs/data URIs: returns a rough estimate from string length.
|
|
1378
|
+
*/
|
|
1379
|
+
function getFileSize(file) {
|
|
1380
|
+
if (isFileWithMetadata(file)) {
|
|
1381
|
+
return file.buffer.length;
|
|
1382
|
+
}
|
|
1383
|
+
if (Buffer.isBuffer(file)) {
|
|
1384
|
+
return file.length;
|
|
1385
|
+
}
|
|
1386
|
+
if (typeof file === "string" && existsSync(file)) {
|
|
1387
|
+
try {
|
|
1388
|
+
return statSync(file).size;
|
|
1389
|
+
}
|
|
1390
|
+
catch {
|
|
1391
|
+
return 0;
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
// For URLs and data URIs, use string length as rough estimate
|
|
1395
|
+
return typeof file === "string" ? file.length : 0;
|
|
1396
|
+
}
|
|
1397
|
+
/**
|
|
1398
|
+
* Get a Buffer from a file input.
|
|
1399
|
+
* For FileWithMetadata: returns the buffer property.
|
|
1400
|
+
* For Buffers: returns as-is.
|
|
1401
|
+
* For file paths: reads the file.
|
|
1402
|
+
* For URLs/data URIs: returns null (not supported for lazy registration).
|
|
1403
|
+
*/
|
|
1404
|
+
async function getFileBuffer(file) {
|
|
1405
|
+
if (isFileWithMetadata(file)) {
|
|
1406
|
+
return file.buffer;
|
|
1407
|
+
}
|
|
1408
|
+
if (Buffer.isBuffer(file)) {
|
|
1409
|
+
return file;
|
|
1410
|
+
}
|
|
1411
|
+
if (typeof file === "string" && existsSync(file)) {
|
|
1412
|
+
try {
|
|
1413
|
+
return readFileSync(file);
|
|
1414
|
+
}
|
|
1415
|
+
catch {
|
|
1416
|
+
return null;
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
// URLs and data URIs can't be lazily registered (need download first)
|
|
1420
|
+
return null;
|
|
1421
|
+
}
|
|
1422
|
+
/**
|
|
1423
|
+
* Determine the source type of a file input.
|
|
1424
|
+
*/
|
|
1425
|
+
function getFileSource(file) {
|
|
1426
|
+
if (isFileWithMetadata(file)) {
|
|
1427
|
+
return "buffer";
|
|
1428
|
+
}
|
|
1429
|
+
if (Buffer.isBuffer(file)) {
|
|
1430
|
+
return "buffer";
|
|
1431
|
+
}
|
|
1432
|
+
if (typeof file === "string") {
|
|
1433
|
+
if (file.startsWith("data:")) {
|
|
1434
|
+
return "datauri";
|
|
1435
|
+
}
|
|
1436
|
+
if (file.startsWith("http://") || file.startsWith("https://")) {
|
|
1437
|
+
return "url";
|
|
1438
|
+
}
|
|
1439
|
+
if (existsSync(file)) {
|
|
1440
|
+
return "path";
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
return "buffer";
|
|
1444
|
+
}
|
|
1445
|
+
/**
|
|
1446
|
+
* Try to register a file with the FileReferenceRegistry for lazy processing.
|
|
1447
|
+
* Returns true if registration succeeded, false if it failed (caller should
|
|
1448
|
+
* fall through to full processing).
|
|
1449
|
+
*/
|
|
1450
|
+
async function tryRegisterFileReference(file, fileSize, registry, index = 0) {
|
|
1451
|
+
try {
|
|
1452
|
+
const buffer = await getFileBuffer(file);
|
|
1453
|
+
if (!buffer) {
|
|
1454
|
+
return false;
|
|
1455
|
+
}
|
|
1456
|
+
const filename = extractFilename(file, index);
|
|
1457
|
+
await registry.register(buffer, getFileSource(file), { filename });
|
|
1458
|
+
logger.info(`[FileDetector] Registered "${filename}" (${(fileSize / 1024).toFixed(0)} KB) ` +
|
|
1459
|
+
`as lazy reference — skipping upfront processing`);
|
|
1460
|
+
return true;
|
|
1461
|
+
}
|
|
1462
|
+
catch (regError) {
|
|
1463
|
+
logger.warn(`[FileDetector] Failed to register file as reference, falling back to full processing: ${regError instanceof Error ? regError.message : String(regError)}`);
|
|
1464
|
+
return false;
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
/**
|
|
1468
|
+
* Get a language hint for code fencing based on MIME type or filename extension.
|
|
1469
|
+
* Returns the language identifier for markdown code blocks, or null for generic text.
|
|
1470
|
+
*/
|
|
1471
|
+
function getLanguageHint(mimeType, filename) {
|
|
1472
|
+
// Try MIME type first
|
|
1473
|
+
const mimeMap = {
|
|
1474
|
+
"text/javascript": "javascript",
|
|
1475
|
+
"text/typescript": "typescript",
|
|
1476
|
+
"text/x-python": "python",
|
|
1477
|
+
"text/x-java-source": "java",
|
|
1478
|
+
"text/x-go": "go",
|
|
1479
|
+
"text/x-rustsrc": "rust",
|
|
1480
|
+
"text/x-ruby": "ruby",
|
|
1481
|
+
"text/x-php": "php",
|
|
1482
|
+
"text/x-c": "c",
|
|
1483
|
+
"text/x-c++": "cpp",
|
|
1484
|
+
"text/x-csharp": "csharp",
|
|
1485
|
+
"text/x-swift": "swift",
|
|
1486
|
+
"text/x-kotlin": "kotlin",
|
|
1487
|
+
"text/x-scala": "scala",
|
|
1488
|
+
"text/x-shellscript": "bash",
|
|
1489
|
+
"text/x-powershell": "powershell",
|
|
1490
|
+
"text/x-sql": "sql",
|
|
1491
|
+
"text/x-r": "r",
|
|
1492
|
+
"text/x-lua": "lua",
|
|
1493
|
+
"text/x-perl": "perl",
|
|
1494
|
+
"text/x-dart": "dart",
|
|
1495
|
+
"text/x-elixir": "elixir",
|
|
1496
|
+
"text/x-erlang": "erlang",
|
|
1497
|
+
"text/x-haskell": "haskell",
|
|
1498
|
+
"text/x-clojure": "clojure",
|
|
1499
|
+
"text/x-lisp": "lisp",
|
|
1500
|
+
"text/html": "html",
|
|
1501
|
+
"text/css": "css",
|
|
1502
|
+
"text/markdown": "markdown",
|
|
1503
|
+
"application/json": "json",
|
|
1504
|
+
"application/xml": "xml",
|
|
1505
|
+
"text/xml": "xml",
|
|
1506
|
+
"application/yaml": "yaml",
|
|
1507
|
+
"application/x-yaml": "yaml",
|
|
1508
|
+
};
|
|
1509
|
+
const lower = mimeType.toLowerCase().split(";")[0].trim();
|
|
1510
|
+
if (mimeMap[lower]) {
|
|
1511
|
+
return mimeMap[lower];
|
|
1512
|
+
}
|
|
1513
|
+
// Fallback: try extension from filename
|
|
1514
|
+
const ext = filename.split(".").pop()?.toLowerCase();
|
|
1515
|
+
if (!ext) {
|
|
1516
|
+
return null;
|
|
1517
|
+
}
|
|
1518
|
+
const extMap = {
|
|
1519
|
+
js: "javascript",
|
|
1520
|
+
mjs: "javascript",
|
|
1521
|
+
cjs: "javascript",
|
|
1522
|
+
jsx: "javascript",
|
|
1523
|
+
ts: "typescript",
|
|
1524
|
+
tsx: "typescript",
|
|
1525
|
+
py: "python",
|
|
1526
|
+
java: "java",
|
|
1527
|
+
go: "go",
|
|
1528
|
+
rs: "rust",
|
|
1529
|
+
rb: "ruby",
|
|
1530
|
+
php: "php",
|
|
1531
|
+
c: "c",
|
|
1532
|
+
cpp: "cpp",
|
|
1533
|
+
cc: "cpp",
|
|
1534
|
+
h: "c",
|
|
1535
|
+
hpp: "cpp",
|
|
1536
|
+
cs: "csharp",
|
|
1537
|
+
swift: "swift",
|
|
1538
|
+
kt: "kotlin",
|
|
1539
|
+
kts: "kotlin",
|
|
1540
|
+
scala: "scala",
|
|
1541
|
+
sh: "bash",
|
|
1542
|
+
bash: "bash",
|
|
1543
|
+
zsh: "bash",
|
|
1544
|
+
ps1: "powershell",
|
|
1545
|
+
sql: "sql",
|
|
1546
|
+
r: "r",
|
|
1547
|
+
lua: "lua",
|
|
1548
|
+
pl: "perl",
|
|
1549
|
+
perl: "perl",
|
|
1550
|
+
dart: "dart",
|
|
1551
|
+
ex: "elixir",
|
|
1552
|
+
exs: "elixir",
|
|
1553
|
+
erl: "erlang",
|
|
1554
|
+
hs: "haskell",
|
|
1555
|
+
clj: "clojure",
|
|
1556
|
+
lisp: "lisp",
|
|
1557
|
+
vim: "vim",
|
|
1558
|
+
html: "html",
|
|
1559
|
+
htm: "html",
|
|
1560
|
+
css: "css",
|
|
1561
|
+
md: "markdown",
|
|
1562
|
+
markdown: "markdown",
|
|
1563
|
+
json: "json",
|
|
1564
|
+
xml: "xml",
|
|
1565
|
+
yaml: "yaml",
|
|
1566
|
+
yml: "yaml",
|
|
1567
|
+
toml: "toml",
|
|
1568
|
+
ini: "ini",
|
|
1569
|
+
cfg: "ini",
|
|
1570
|
+
};
|
|
1571
|
+
return extMap[ext] || null;
|
|
1572
|
+
}
|
|
920
1573
|
function buildCSVToolInstructions(filePath) {
|
|
921
1574
|
return `\n**NOTE**: You can perform calculations directly on the CSV data shown above. For advanced operations on the full file (counting by column, grouping, etc.), you may optionally use the analyzeCSV tool with filePath="${filePath}".\n\nExample: analyzeCSV(filePath="${filePath}", operation="count_by_column", column="merchant_id")\n\n`;
|
|
922
1575
|
}
|