@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Source Code Processor
|
|
3
|
+
*
|
|
4
|
+
* Processes source code files for 50+ programming languages.
|
|
5
|
+
* Uses extension-based detection as primary method (more reliable than MIME types for code).
|
|
6
|
+
*
|
|
7
|
+
* Key features:
|
|
8
|
+
* - Supports 50+ programming languages via extension detection
|
|
9
|
+
* - Handles exact filename matches (Dockerfile, Makefile, etc.)
|
|
10
|
+
* - Line count truncation to prevent token overflow
|
|
11
|
+
* - Language detection for syntax highlighting metadata
|
|
12
|
+
*
|
|
13
|
+
* Priority: 120 (lower priority - text-based content, processed after binary/document formats)
|
|
14
|
+
*
|
|
15
|
+
* @module processors/code/SourceCodeProcessor
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* import { sourceCodeProcessor, processSourceCode, isSourceCodeFile } from "./code/index.js";
|
|
20
|
+
*
|
|
21
|
+
* // Check if a file is source code
|
|
22
|
+
* if (isSourceCodeFile("text/plain", "app.ts")) {
|
|
23
|
+
* const result = await processSourceCode({
|
|
24
|
+
* id: "file-123",
|
|
25
|
+
* name: "app.ts",
|
|
26
|
+
* mimetype: "text/plain",
|
|
27
|
+
* size: 1024,
|
|
28
|
+
* buffer: codeBuffer,
|
|
29
|
+
* });
|
|
30
|
+
*
|
|
31
|
+
* if (result.success) {
|
|
32
|
+
* console.log(`Language: ${result.data.language}`);
|
|
33
|
+
* console.log(`Lines: ${result.data.lineCount}`);
|
|
34
|
+
* }
|
|
35
|
+
* }
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
|
|
39
|
+
import type { FileInfo, FileProcessingResult, ProcessOptions } from "../base/types.js";
|
|
40
|
+
import { detectLanguageFromFilename } from "../config/languageMap.js";
|
|
41
|
+
export type { ProcessedSourceCode } from "../base/types.js";
|
|
42
|
+
import type { ProcessedSourceCode } from "../base/types.js";
|
|
43
|
+
/**
|
|
44
|
+
* Source Code Processor - handles 50+ programming languages.
|
|
45
|
+
*
|
|
46
|
+
* Uses extension-based detection as the primary method since MIME types
|
|
47
|
+
* for source code are often unreliable (many are just "text/plain").
|
|
48
|
+
*
|
|
49
|
+
* Priority: 120 (lower priority than binary/document formats)
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* ```typescript
|
|
53
|
+
* const processor = new SourceCodeProcessor();
|
|
54
|
+
*
|
|
55
|
+
* const result = await processor.processFile({
|
|
56
|
+
* id: "file-123",
|
|
57
|
+
* name: "main.py",
|
|
58
|
+
* mimetype: "text/plain",
|
|
59
|
+
* size: 2048,
|
|
60
|
+
* buffer: pythonCodeBuffer,
|
|
61
|
+
* });
|
|
62
|
+
*
|
|
63
|
+
* if (result.success) {
|
|
64
|
+
* console.log(`Language: ${result.data.language}`); // "Python"
|
|
65
|
+
* }
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
68
|
+
export declare class SourceCodeProcessor extends BaseFileProcessor<ProcessedSourceCode> {
|
|
69
|
+
/**
|
|
70
|
+
* Supported file extensions for source code.
|
|
71
|
+
* Includes 50+ extensions covering all major programming languages.
|
|
72
|
+
*/
|
|
73
|
+
private static readonly supportedExtensions;
|
|
74
|
+
/**
|
|
75
|
+
* Common MIME types for source code files.
|
|
76
|
+
* Note: Extension-based detection is preferred as MIME types are often unreliable.
|
|
77
|
+
*/
|
|
78
|
+
private static readonly supportedMimeTypes;
|
|
79
|
+
constructor();
|
|
80
|
+
/**
|
|
81
|
+
* Override to use extension-based detection as primary method.
|
|
82
|
+
* Source code MIME types are often unreliable (e.g., "text/plain" for .ts files),
|
|
83
|
+
* so we check extensions first.
|
|
84
|
+
*
|
|
85
|
+
* Also handles exact filename matches for special files like Dockerfile, Makefile.
|
|
86
|
+
*
|
|
87
|
+
* @param mimetype - MIME type of the file (often unreliable for source code)
|
|
88
|
+
* @param filename - Filename for extension-based detection
|
|
89
|
+
* @returns true if the file is a supported source code file
|
|
90
|
+
*/
|
|
91
|
+
isFileSupported(mimetype: string, filename: string): boolean;
|
|
92
|
+
/**
|
|
93
|
+
* Build the processed source code result.
|
|
94
|
+
* Decodes the buffer as UTF-8, detects language, and truncates if needed.
|
|
95
|
+
*
|
|
96
|
+
* @param buffer - Raw file content
|
|
97
|
+
* @param fileInfo - Original file information
|
|
98
|
+
* @returns Processed source code with metadata
|
|
99
|
+
*/
|
|
100
|
+
protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedSourceCode;
|
|
101
|
+
/**
|
|
102
|
+
* Extract file extension from filename.
|
|
103
|
+
*
|
|
104
|
+
* @param filename - Filename to extract extension from
|
|
105
|
+
* @returns Extension with leading dot (e.g., ".ts") or null if no extension
|
|
106
|
+
*/
|
|
107
|
+
private getExtension;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Singleton instance of the SourceCodeProcessor.
|
|
111
|
+
* Use this for all source code processing to share configuration.
|
|
112
|
+
*/
|
|
113
|
+
export declare const sourceCodeProcessor: SourceCodeProcessor;
|
|
114
|
+
/**
|
|
115
|
+
* Check if a file is a source code file.
|
|
116
|
+
*
|
|
117
|
+
* @param mimetype - MIME type of the file
|
|
118
|
+
* @param filename - Filename for extension-based detection
|
|
119
|
+
* @returns true if the file is a supported source code file
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```typescript
|
|
123
|
+
* if (isSourceCodeFile("text/plain", "app.ts")) {
|
|
124
|
+
* console.log("This is a TypeScript file");
|
|
125
|
+
* }
|
|
126
|
+
* ```
|
|
127
|
+
*/
|
|
128
|
+
export declare function isSourceCodeFile(mimetype: string, filename: string): boolean;
|
|
129
|
+
/**
|
|
130
|
+
* Validate source code file size against configured limit.
|
|
131
|
+
*
|
|
132
|
+
* @param sizeBytes - File size in bytes
|
|
133
|
+
* @returns true if the file size is within limits
|
|
134
|
+
*/
|
|
135
|
+
export declare function validateSourceCodeSize(sizeBytes: number): boolean;
|
|
136
|
+
/**
|
|
137
|
+
* Process a source code file.
|
|
138
|
+
*
|
|
139
|
+
* @param fileInfo - File information (can include URL or buffer)
|
|
140
|
+
* @param options - Optional processing options
|
|
141
|
+
* @returns Processing result with success flag and either data or error
|
|
142
|
+
*
|
|
143
|
+
* @example
|
|
144
|
+
* ```typescript
|
|
145
|
+
* const result = await processSourceCode({
|
|
146
|
+
* id: "file-123",
|
|
147
|
+
* name: "main.py",
|
|
148
|
+
* mimetype: "text/plain",
|
|
149
|
+
* size: 2048,
|
|
150
|
+
* buffer: pythonCodeBuffer,
|
|
151
|
+
* });
|
|
152
|
+
*
|
|
153
|
+
* if (result.success) {
|
|
154
|
+
* console.log(`Detected language: ${result.data.language}`);
|
|
155
|
+
* console.log(`Line count: ${result.data.lineCount}`);
|
|
156
|
+
* console.log(`Truncated: ${result.data.truncated}`);
|
|
157
|
+
* }
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
export declare function processSourceCode(fileInfo: FileInfo, options?: ProcessOptions): Promise<FileProcessingResult<ProcessedSourceCode>>;
|
|
161
|
+
/**
|
|
162
|
+
* Alias for backward compatibility with Curator codebase.
|
|
163
|
+
* Detects programming language from a filename.
|
|
164
|
+
*
|
|
165
|
+
* @param filename - The filename to detect language from
|
|
166
|
+
* @returns The detected language name or 'Unknown'
|
|
167
|
+
*
|
|
168
|
+
* @example
|
|
169
|
+
* ```typescript
|
|
170
|
+
* detectLanguage("app.ts") // Returns "TypeScript"
|
|
171
|
+
* detectLanguage("Dockerfile") // Returns "Dockerfile"
|
|
172
|
+
* ```
|
|
173
|
+
*/
|
|
174
|
+
export declare const detectLanguage: typeof detectLanguageFromFilename;
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Source Code Processor
|
|
3
|
+
*
|
|
4
|
+
* Processes source code files for 50+ programming languages.
|
|
5
|
+
* Uses extension-based detection as primary method (more reliable than MIME types for code).
|
|
6
|
+
*
|
|
7
|
+
* Key features:
|
|
8
|
+
* - Supports 50+ programming languages via extension detection
|
|
9
|
+
* - Handles exact filename matches (Dockerfile, Makefile, etc.)
|
|
10
|
+
* - Line count truncation to prevent token overflow
|
|
11
|
+
* - Language detection for syntax highlighting metadata
|
|
12
|
+
*
|
|
13
|
+
* Priority: 120 (lower priority - text-based content, processed after binary/document formats)
|
|
14
|
+
*
|
|
15
|
+
* @module processors/code/SourceCodeProcessor
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* import { sourceCodeProcessor, processSourceCode, isSourceCodeFile } from "./code/index.js";
|
|
20
|
+
*
|
|
21
|
+
* // Check if a file is source code
|
|
22
|
+
* if (isSourceCodeFile("text/plain", "app.ts")) {
|
|
23
|
+
* const result = await processSourceCode({
|
|
24
|
+
* id: "file-123",
|
|
25
|
+
* name: "app.ts",
|
|
26
|
+
* mimetype: "text/plain",
|
|
27
|
+
* size: 1024,
|
|
28
|
+
* buffer: codeBuffer,
|
|
29
|
+
* });
|
|
30
|
+
*
|
|
31
|
+
* if (result.success) {
|
|
32
|
+
* console.log(`Language: ${result.data.language}`);
|
|
33
|
+
* console.log(`Lines: ${result.data.lineCount}`);
|
|
34
|
+
* }
|
|
35
|
+
* }
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
import { basename as pathBasename } from "node:path";
|
|
39
|
+
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
|
|
40
|
+
import { EXACT_FILENAME_MAP, SIZE_LIMITS, SOURCE_CODE_EXTENSIONS, } from "../config/index.js";
|
|
41
|
+
import { detectLanguageFromFilename } from "../config/languageMap.js";
|
|
42
|
+
// =============================================================================
|
|
43
|
+
// SOURCE CODE PROCESSOR
|
|
44
|
+
// =============================================================================
|
|
45
|
+
/**
|
|
46
|
+
* Source Code Processor - handles 50+ programming languages.
|
|
47
|
+
*
|
|
48
|
+
* Uses extension-based detection as the primary method since MIME types
|
|
49
|
+
* for source code are often unreliable (many are just "text/plain").
|
|
50
|
+
*
|
|
51
|
+
* Priority: 120 (lower priority than binary/document formats)
|
|
52
|
+
*
|
|
53
|
+
* @example
|
|
54
|
+
* ```typescript
|
|
55
|
+
* const processor = new SourceCodeProcessor();
|
|
56
|
+
*
|
|
57
|
+
* const result = await processor.processFile({
|
|
58
|
+
* id: "file-123",
|
|
59
|
+
* name: "main.py",
|
|
60
|
+
* mimetype: "text/plain",
|
|
61
|
+
* size: 2048,
|
|
62
|
+
* buffer: pythonCodeBuffer,
|
|
63
|
+
* });
|
|
64
|
+
*
|
|
65
|
+
* if (result.success) {
|
|
66
|
+
* console.log(`Language: ${result.data.language}`); // "Python"
|
|
67
|
+
* }
|
|
68
|
+
* ```
|
|
69
|
+
*/
|
|
70
|
+
export class SourceCodeProcessor extends BaseFileProcessor {
|
|
71
|
+
/**
|
|
72
|
+
* Supported file extensions for source code.
|
|
73
|
+
* Includes 50+ extensions covering all major programming languages.
|
|
74
|
+
*/
|
|
75
|
+
static supportedExtensions = [
|
|
76
|
+
...SOURCE_CODE_EXTENSIONS,
|
|
77
|
+
];
|
|
78
|
+
/**
|
|
79
|
+
* Common MIME types for source code files.
|
|
80
|
+
* Note: Extension-based detection is preferred as MIME types are often unreliable.
|
|
81
|
+
*/
|
|
82
|
+
static supportedMimeTypes = [
|
|
83
|
+
"text/plain",
|
|
84
|
+
"text/x-python",
|
|
85
|
+
"text/javascript",
|
|
86
|
+
"text/typescript",
|
|
87
|
+
"application/javascript",
|
|
88
|
+
"application/typescript",
|
|
89
|
+
"application/x-javascript",
|
|
90
|
+
"text/x-java",
|
|
91
|
+
"text/x-java-source",
|
|
92
|
+
"text/x-c",
|
|
93
|
+
"text/x-csrc",
|
|
94
|
+
"text/x-c++",
|
|
95
|
+
"text/x-c++src",
|
|
96
|
+
"text/x-csharp",
|
|
97
|
+
"text/x-go",
|
|
98
|
+
"text/x-rust",
|
|
99
|
+
"text/x-ruby",
|
|
100
|
+
"text/x-php",
|
|
101
|
+
"text/x-sh",
|
|
102
|
+
"text/x-shellscript",
|
|
103
|
+
"application/x-sh",
|
|
104
|
+
"text/x-perl",
|
|
105
|
+
"text/x-lua",
|
|
106
|
+
"text/x-sql",
|
|
107
|
+
"text/x-swift",
|
|
108
|
+
"text/x-kotlin",
|
|
109
|
+
"text/x-scala",
|
|
110
|
+
"text/x-haskell",
|
|
111
|
+
"text/x-elixir",
|
|
112
|
+
"text/x-erlang",
|
|
113
|
+
"text/x-clojure",
|
|
114
|
+
"text/x-fsharp",
|
|
115
|
+
"text/x-ocaml",
|
|
116
|
+
"text/x-lisp",
|
|
117
|
+
"text/x-scheme",
|
|
118
|
+
"text/x-groovy",
|
|
119
|
+
"text/x-powershell",
|
|
120
|
+
"text/x-r",
|
|
121
|
+
"text/x-julia",
|
|
122
|
+
"text/x-nim",
|
|
123
|
+
"text/x-zig",
|
|
124
|
+
"text/x-dart",
|
|
125
|
+
"text/x-crystal",
|
|
126
|
+
"text/x-d",
|
|
127
|
+
"text/x-asm",
|
|
128
|
+
"text/x-fortran",
|
|
129
|
+
"text/x-cobol",
|
|
130
|
+
"text/x-pascal",
|
|
131
|
+
"text/x-ada",
|
|
132
|
+
"text/css",
|
|
133
|
+
"text/x-scss",
|
|
134
|
+
"text/x-sass",
|
|
135
|
+
"text/x-less",
|
|
136
|
+
"application/x-httpd-php",
|
|
137
|
+
];
|
|
138
|
+
constructor() {
|
|
139
|
+
super({
|
|
140
|
+
maxSizeMB: SIZE_LIMITS.SOURCE_CODE_MAX_MB,
|
|
141
|
+
timeoutMs: 30000,
|
|
142
|
+
supportedMimeTypes: SourceCodeProcessor.supportedMimeTypes,
|
|
143
|
+
supportedExtensions: SourceCodeProcessor.supportedExtensions,
|
|
144
|
+
fileTypeName: "SourceCode",
|
|
145
|
+
defaultFilename: "code.txt",
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Override to use extension-based detection as primary method.
|
|
150
|
+
* Source code MIME types are often unreliable (e.g., "text/plain" for .ts files),
|
|
151
|
+
* so we check extensions first.
|
|
152
|
+
*
|
|
153
|
+
* Also handles exact filename matches for special files like Dockerfile, Makefile.
|
|
154
|
+
*
|
|
155
|
+
* @param mimetype - MIME type of the file (often unreliable for source code)
|
|
156
|
+
* @param filename - Filename for extension-based detection
|
|
157
|
+
* @returns true if the file is a supported source code file
|
|
158
|
+
*/
|
|
159
|
+
isFileSupported(mimetype, filename) {
|
|
160
|
+
if (!filename) {
|
|
161
|
+
return false;
|
|
162
|
+
}
|
|
163
|
+
// Check exact filename matches first (Dockerfile, Makefile, etc.)
|
|
164
|
+
if (EXACT_FILENAME_MAP[filename]) {
|
|
165
|
+
return true;
|
|
166
|
+
}
|
|
167
|
+
// Also check basename for exact matches (in case full path is passed)
|
|
168
|
+
const basename = pathBasename(filename);
|
|
169
|
+
if (EXACT_FILENAME_MAP[basename]) {
|
|
170
|
+
return true;
|
|
171
|
+
}
|
|
172
|
+
// Check by extension (more reliable for source code than MIME type)
|
|
173
|
+
const ext = this.getExtension(filename);
|
|
174
|
+
if (ext &&
|
|
175
|
+
SourceCodeProcessor.supportedExtensions.includes(ext.toLowerCase())) {
|
|
176
|
+
return true;
|
|
177
|
+
}
|
|
178
|
+
// Fall back to MIME type check
|
|
179
|
+
return super.isFileSupported(mimetype, filename);
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Build the processed source code result.
|
|
183
|
+
* Decodes the buffer as UTF-8, detects language, and truncates if needed.
|
|
184
|
+
*
|
|
185
|
+
* @param buffer - Raw file content
|
|
186
|
+
* @param fileInfo - Original file information
|
|
187
|
+
* @returns Processed source code with metadata
|
|
188
|
+
*/
|
|
189
|
+
buildProcessedResult(buffer, fileInfo) {
|
|
190
|
+
const content = buffer.toString("utf-8");
|
|
191
|
+
const lines = content.split("\n");
|
|
192
|
+
const originalLineCount = lines.length;
|
|
193
|
+
const language = detectLanguageFromFilename(fileInfo.name || "");
|
|
194
|
+
const maxLines = SIZE_LIMITS.MAX_SOURCE_CODE_LINES;
|
|
195
|
+
// Truncate if too many lines
|
|
196
|
+
let finalContent = content;
|
|
197
|
+
let truncated = false;
|
|
198
|
+
if (lines.length > maxLines) {
|
|
199
|
+
truncated = true;
|
|
200
|
+
finalContent = lines.slice(0, maxLines).join("\n");
|
|
201
|
+
finalContent += `\n\n// ... truncated at ${maxLines} lines, total ${originalLineCount} lines ...`;
|
|
202
|
+
}
|
|
203
|
+
return {
|
|
204
|
+
content: finalContent,
|
|
205
|
+
language,
|
|
206
|
+
lineCount: Math.min(lines.length, maxLines),
|
|
207
|
+
truncated,
|
|
208
|
+
encoding: "utf-8",
|
|
209
|
+
buffer,
|
|
210
|
+
mimetype: fileInfo.mimetype || "text/plain",
|
|
211
|
+
size: fileInfo.size,
|
|
212
|
+
filename: this.getFilename(fileInfo),
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Extract file extension from filename.
|
|
217
|
+
*
|
|
218
|
+
* @param filename - Filename to extract extension from
|
|
219
|
+
* @returns Extension with leading dot (e.g., ".ts") or null if no extension
|
|
220
|
+
*/
|
|
221
|
+
getExtension(filename) {
|
|
222
|
+
const match = filename.toLowerCase().match(/\.[^.]+$/);
|
|
223
|
+
return match ? match[0] : null;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
// =============================================================================
|
|
227
|
+
// SINGLETON INSTANCE
|
|
228
|
+
// =============================================================================
|
|
229
|
+
/**
|
|
230
|
+
* Singleton instance of the SourceCodeProcessor.
|
|
231
|
+
* Use this for all source code processing to share configuration.
|
|
232
|
+
*/
|
|
233
|
+
export const sourceCodeProcessor = new SourceCodeProcessor();
|
|
234
|
+
// =============================================================================
|
|
235
|
+
// HELPER FUNCTIONS
|
|
236
|
+
// =============================================================================
|
|
237
|
+
/**
|
|
238
|
+
* Check if a file is a source code file.
|
|
239
|
+
*
|
|
240
|
+
* @param mimetype - MIME type of the file
|
|
241
|
+
* @param filename - Filename for extension-based detection
|
|
242
|
+
* @returns true if the file is a supported source code file
|
|
243
|
+
*
|
|
244
|
+
* @example
|
|
245
|
+
* ```typescript
|
|
246
|
+
* if (isSourceCodeFile("text/plain", "app.ts")) {
|
|
247
|
+
* console.log("This is a TypeScript file");
|
|
248
|
+
* }
|
|
249
|
+
* ```
|
|
250
|
+
*/
|
|
251
|
+
export function isSourceCodeFile(mimetype, filename) {
|
|
252
|
+
return sourceCodeProcessor.isFileSupported(mimetype, filename);
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Validate source code file size against configured limit.
|
|
256
|
+
*
|
|
257
|
+
* @param sizeBytes - File size in bytes
|
|
258
|
+
* @returns true if the file size is within limits
|
|
259
|
+
*/
|
|
260
|
+
export function validateSourceCodeSize(sizeBytes) {
|
|
261
|
+
const maxBytes = SIZE_LIMITS.SOURCE_CODE_MAX_MB * 1024 * 1024;
|
|
262
|
+
return sizeBytes <= maxBytes;
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Process a source code file.
|
|
266
|
+
*
|
|
267
|
+
* @param fileInfo - File information (can include URL or buffer)
|
|
268
|
+
* @param options - Optional processing options
|
|
269
|
+
* @returns Processing result with success flag and either data or error
|
|
270
|
+
*
|
|
271
|
+
* @example
|
|
272
|
+
* ```typescript
|
|
273
|
+
* const result = await processSourceCode({
|
|
274
|
+
* id: "file-123",
|
|
275
|
+
* name: "main.py",
|
|
276
|
+
* mimetype: "text/plain",
|
|
277
|
+
* size: 2048,
|
|
278
|
+
* buffer: pythonCodeBuffer,
|
|
279
|
+
* });
|
|
280
|
+
*
|
|
281
|
+
* if (result.success) {
|
|
282
|
+
* console.log(`Detected language: ${result.data.language}`);
|
|
283
|
+
* console.log(`Line count: ${result.data.lineCount}`);
|
|
284
|
+
* console.log(`Truncated: ${result.data.truncated}`);
|
|
285
|
+
* }
|
|
286
|
+
* ```
|
|
287
|
+
*/
|
|
288
|
+
export async function processSourceCode(fileInfo, options) {
|
|
289
|
+
return sourceCodeProcessor.processFile(fileInfo, options);
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Alias for backward compatibility with Curator codebase.
|
|
293
|
+
* Detects programming language from a filename.
|
|
294
|
+
*
|
|
295
|
+
* @param filename - The filename to detect language from
|
|
296
|
+
* @returns The detected language name or 'Unknown'
|
|
297
|
+
*
|
|
298
|
+
* @example
|
|
299
|
+
* ```typescript
|
|
300
|
+
* detectLanguage("app.ts") // Returns "TypeScript"
|
|
301
|
+
* detectLanguage("Dockerfile") // Returns "Dockerfile"
|
|
302
|
+
* ```
|
|
303
|
+
*/
|
|
304
|
+
export const detectLanguage = detectLanguageFromFilename;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Processors Module
|
|
3
|
+
*
|
|
4
|
+
* Provides file processors for source code files across 50+ programming languages.
|
|
5
|
+
* Uses extension-based detection as primary method for reliable identification.
|
|
6
|
+
*
|
|
7
|
+
* @module processors/code
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import {
|
|
12
|
+
* // Processor class and singleton
|
|
13
|
+
* SourceCodeProcessor,
|
|
14
|
+
* sourceCodeProcessor,
|
|
15
|
+
*
|
|
16
|
+
* // Helper functions
|
|
17
|
+
* isSourceCodeFile,
|
|
18
|
+
* processSourceCode,
|
|
19
|
+
* validateSourceCodeSize,
|
|
20
|
+
* detectLanguage,
|
|
21
|
+
*
|
|
22
|
+
* // Types
|
|
23
|
+
* type ProcessedSourceCode,
|
|
24
|
+
* } from "./code/index.js";
|
|
25
|
+
*
|
|
26
|
+
* // Check if a file is source code
|
|
27
|
+
* if (isSourceCodeFile("text/plain", "main.py")) {
|
|
28
|
+
* const result = await processSourceCode({
|
|
29
|
+
* id: "file-123",
|
|
30
|
+
* name: "main.py",
|
|
31
|
+
* mimetype: "text/plain",
|
|
32
|
+
* size: 1024,
|
|
33
|
+
* buffer: codeBuffer,
|
|
34
|
+
* });
|
|
35
|
+
*
|
|
36
|
+
* if (result.success) {
|
|
37
|
+
* console.log(`Language: ${result.data.language}`); // "Python"
|
|
38
|
+
* console.log(`Lines: ${result.data.lineCount}`);
|
|
39
|
+
* }
|
|
40
|
+
* }
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export { detectLanguage, isSourceCodeFile, type ProcessedSourceCode, processSourceCode, SourceCodeProcessor, sourceCodeProcessor, validateSourceCodeSize, } from "./SourceCodeProcessor.js";
|
|
44
|
+
export { ConfigProcessor, configProcessor, isConfigFile, type ProcessedConfig, processConfig, } from "./ConfigProcessor.js";
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Processors Module
|
|
3
|
+
*
|
|
4
|
+
* Provides file processors for source code files across 50+ programming languages.
|
|
5
|
+
* Uses extension-based detection as primary method for reliable identification.
|
|
6
|
+
*
|
|
7
|
+
* @module processors/code
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import {
|
|
12
|
+
* // Processor class and singleton
|
|
13
|
+
* SourceCodeProcessor,
|
|
14
|
+
* sourceCodeProcessor,
|
|
15
|
+
*
|
|
16
|
+
* // Helper functions
|
|
17
|
+
* isSourceCodeFile,
|
|
18
|
+
* processSourceCode,
|
|
19
|
+
* validateSourceCodeSize,
|
|
20
|
+
* detectLanguage,
|
|
21
|
+
*
|
|
22
|
+
* // Types
|
|
23
|
+
* type ProcessedSourceCode,
|
|
24
|
+
* } from "./code/index.js";
|
|
25
|
+
*
|
|
26
|
+
* // Check if a file is source code
|
|
27
|
+
* if (isSourceCodeFile("text/plain", "main.py")) {
|
|
28
|
+
* const result = await processSourceCode({
|
|
29
|
+
* id: "file-123",
|
|
30
|
+
* name: "main.py",
|
|
31
|
+
* mimetype: "text/plain",
|
|
32
|
+
* size: 1024,
|
|
33
|
+
* buffer: codeBuffer,
|
|
34
|
+
* });
|
|
35
|
+
*
|
|
36
|
+
* if (result.success) {
|
|
37
|
+
* console.log(`Language: ${result.data.language}`); // "Python"
|
|
38
|
+
* console.log(`Lines: ${result.data.lineCount}`);
|
|
39
|
+
* }
|
|
40
|
+
* }
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
// =============================================================================
|
|
44
|
+
// SOURCE CODE PROCESSOR
|
|
45
|
+
// =============================================================================
|
|
46
|
+
export {
|
|
47
|
+
// Helper functions
|
|
48
|
+
detectLanguage, isSourceCodeFile, processSourceCode,
|
|
49
|
+
// Processor class and singleton
|
|
50
|
+
SourceCodeProcessor, sourceCodeProcessor, validateSourceCodeSize, } from "./SourceCodeProcessor.js";
|
|
51
|
+
// =============================================================================
|
|
52
|
+
// CONFIG PROCESSOR
|
|
53
|
+
// =============================================================================
|
|
54
|
+
export {
|
|
55
|
+
// Processor class
|
|
56
|
+
ConfigProcessor,
|
|
57
|
+
// Singleton instance
|
|
58
|
+
configProcessor,
|
|
59
|
+
// Helper functions
|
|
60
|
+
isConfigFile, processConfig, } from "./ConfigProcessor.js";
|