@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunker Registry
|
|
3
|
+
*
|
|
4
|
+
* Centralized registry for all chunking strategies with metadata
|
|
5
|
+
* and discovery capabilities. Follows the BaseRegistry pattern.
|
|
6
|
+
*/
|
|
7
|
+
import { BaseRegistry } from "../core/infrastructure/index.js";
|
|
8
|
+
import { logger } from "../utils/logger.js";
|
|
9
|
+
import { ChunkingError, RAGErrorCodes } from "./errors/RAGError.js";
|
|
10
|
+
/**
|
|
11
|
+
* Default chunker metadata entries
|
|
12
|
+
*/
|
|
13
|
+
const DEFAULT_CHUNKER_METADATA = {
|
|
14
|
+
character: {
|
|
15
|
+
description: "Splits text into fixed-size character chunks with optional overlap",
|
|
16
|
+
defaultConfig: {
|
|
17
|
+
maxSize: 1000,
|
|
18
|
+
overlap: 100,
|
|
19
|
+
},
|
|
20
|
+
supportedOptions: ["maxSize", "overlap", "minSize"],
|
|
21
|
+
useCases: [
|
|
22
|
+
"Simple text processing",
|
|
23
|
+
"Fixed-size chunks needed",
|
|
24
|
+
"Language-agnostic splitting",
|
|
25
|
+
],
|
|
26
|
+
aliases: ["char", "fixed-size", "fixed"],
|
|
27
|
+
},
|
|
28
|
+
recursive: {
|
|
29
|
+
description: "Recursively splits text using ordered separators (paragraphs, sentences, etc.)",
|
|
30
|
+
defaultConfig: {
|
|
31
|
+
maxSize: 1000,
|
|
32
|
+
overlap: 100,
|
|
33
|
+
separators: ["\n\n", "\n", ". ", " ", ""],
|
|
34
|
+
},
|
|
35
|
+
supportedOptions: [
|
|
36
|
+
"maxSize",
|
|
37
|
+
"overlap",
|
|
38
|
+
"separators",
|
|
39
|
+
"keepSeparators",
|
|
40
|
+
"minSize",
|
|
41
|
+
],
|
|
42
|
+
useCases: [
|
|
43
|
+
"General text documents",
|
|
44
|
+
"Preserving semantic boundaries",
|
|
45
|
+
"Default choice for most use cases",
|
|
46
|
+
],
|
|
47
|
+
aliases: ["recursive-character", "langchain-default"],
|
|
48
|
+
},
|
|
49
|
+
sentence: {
|
|
50
|
+
description: "Splits text by sentence boundaries for semantically meaningful chunks",
|
|
51
|
+
defaultConfig: {
|
|
52
|
+
maxSize: 1000,
|
|
53
|
+
overlap: 1,
|
|
54
|
+
},
|
|
55
|
+
supportedOptions: [
|
|
56
|
+
"maxSize",
|
|
57
|
+
"overlap",
|
|
58
|
+
"boundaryDetection",
|
|
59
|
+
"maxSentences",
|
|
60
|
+
],
|
|
61
|
+
useCases: [
|
|
62
|
+
"Q&A applications",
|
|
63
|
+
"Sentence-level analysis",
|
|
64
|
+
"Preserving complete thoughts",
|
|
65
|
+
],
|
|
66
|
+
aliases: ["sent", "sentence-based"],
|
|
67
|
+
},
|
|
68
|
+
token: {
|
|
69
|
+
description: "Splits text by token count using a specific tokenizer (GPT, Claude, etc.)",
|
|
70
|
+
defaultConfig: {
|
|
71
|
+
maxSize: 512,
|
|
72
|
+
overlap: 50,
|
|
73
|
+
},
|
|
74
|
+
supportedOptions: ["maxSize", "overlap", "tokenizer", "maxTokens"],
|
|
75
|
+
useCases: [
|
|
76
|
+
"Token-aware splitting",
|
|
77
|
+
"Optimal for specific models",
|
|
78
|
+
"Precise token budget management",
|
|
79
|
+
],
|
|
80
|
+
aliases: ["tok", "tokenized"],
|
|
81
|
+
},
|
|
82
|
+
markdown: {
|
|
83
|
+
description: "Splits markdown content by headers and structural elements",
|
|
84
|
+
defaultConfig: {
|
|
85
|
+
maxSize: 1000,
|
|
86
|
+
overlap: 0,
|
|
87
|
+
},
|
|
88
|
+
supportedOptions: [
|
|
89
|
+
"maxSize",
|
|
90
|
+
"overlap",
|
|
91
|
+
"headerLevels",
|
|
92
|
+
"splitCodeBlocks",
|
|
93
|
+
"preserveMetadata",
|
|
94
|
+
],
|
|
95
|
+
useCases: [
|
|
96
|
+
"Documentation processing",
|
|
97
|
+
"README files",
|
|
98
|
+
"Technical documentation",
|
|
99
|
+
],
|
|
100
|
+
aliases: ["md", "markdown-header"],
|
|
101
|
+
},
|
|
102
|
+
html: {
|
|
103
|
+
description: "Splits HTML content by semantic tags while optionally stripping markup",
|
|
104
|
+
defaultConfig: {
|
|
105
|
+
maxSize: 1000,
|
|
106
|
+
overlap: 0,
|
|
107
|
+
},
|
|
108
|
+
supportedOptions: [
|
|
109
|
+
"maxSize",
|
|
110
|
+
"overlap",
|
|
111
|
+
"splitTags",
|
|
112
|
+
"stripTags",
|
|
113
|
+
"preserveAttributes",
|
|
114
|
+
],
|
|
115
|
+
useCases: ["Web content processing", "HTML documents", "Web scraping"],
|
|
116
|
+
aliases: ["html-tag", "web"],
|
|
117
|
+
},
|
|
118
|
+
json: {
|
|
119
|
+
description: "Splits JSON documents by object boundaries and nested structures",
|
|
120
|
+
defaultConfig: {
|
|
121
|
+
maxSize: 1000,
|
|
122
|
+
overlap: 0,
|
|
123
|
+
},
|
|
124
|
+
supportedOptions: ["maxSize", "overlap", "maxDepth", "chunkKeys"],
|
|
125
|
+
useCases: [
|
|
126
|
+
"API response processing",
|
|
127
|
+
"Structured data",
|
|
128
|
+
"Configuration files",
|
|
129
|
+
],
|
|
130
|
+
aliases: ["json-object", "structured"],
|
|
131
|
+
},
|
|
132
|
+
latex: {
|
|
133
|
+
description: "Splits LaTeX documents by sections, environments, and math blocks",
|
|
134
|
+
defaultConfig: {
|
|
135
|
+
maxSize: 1000,
|
|
136
|
+
overlap: 0,
|
|
137
|
+
},
|
|
138
|
+
supportedOptions: [
|
|
139
|
+
"maxSize",
|
|
140
|
+
"overlap",
|
|
141
|
+
"environments",
|
|
142
|
+
"splitMathBlocks",
|
|
143
|
+
"preserveMetadata",
|
|
144
|
+
],
|
|
145
|
+
useCases: [
|
|
146
|
+
"Academic papers",
|
|
147
|
+
"Scientific documents",
|
|
148
|
+
"Mathematical content",
|
|
149
|
+
],
|
|
150
|
+
aliases: ["tex", "latex-section"],
|
|
151
|
+
},
|
|
152
|
+
semantic: {
|
|
153
|
+
description: "Uses LLM to identify semantically meaningful split points",
|
|
154
|
+
defaultConfig: {
|
|
155
|
+
maxSize: 1000,
|
|
156
|
+
overlap: 100,
|
|
157
|
+
},
|
|
158
|
+
supportedOptions: [
|
|
159
|
+
"maxSize",
|
|
160
|
+
"overlap",
|
|
161
|
+
"modelName",
|
|
162
|
+
"provider",
|
|
163
|
+
"similarityThreshold",
|
|
164
|
+
],
|
|
165
|
+
useCases: [
|
|
166
|
+
"Advanced semantic understanding",
|
|
167
|
+
"Context-aware splitting",
|
|
168
|
+
"AI-enhanced chunking",
|
|
169
|
+
],
|
|
170
|
+
aliases: ["llm", "ai-semantic"],
|
|
171
|
+
},
|
|
172
|
+
"semantic-markdown": {
|
|
173
|
+
description: "Combines markdown splitting with semantic similarity for intelligent merging",
|
|
174
|
+
defaultConfig: {
|
|
175
|
+
maxSize: 1000,
|
|
176
|
+
overlap: 100,
|
|
177
|
+
},
|
|
178
|
+
supportedOptions: [
|
|
179
|
+
"maxSize",
|
|
180
|
+
"overlap",
|
|
181
|
+
"similarityThreshold",
|
|
182
|
+
"maxMergeSize",
|
|
183
|
+
"preserveMetadata",
|
|
184
|
+
],
|
|
185
|
+
useCases: [
|
|
186
|
+
"Context-aware documentation",
|
|
187
|
+
"Knowledge base creation",
|
|
188
|
+
"Semantic search preparation",
|
|
189
|
+
],
|
|
190
|
+
aliases: ["semantic-md", "smart-markdown"],
|
|
191
|
+
},
|
|
192
|
+
};
|
|
193
|
+
/**
|
|
194
|
+
* Chunker Registry
|
|
195
|
+
*
|
|
196
|
+
* Manages registration and discovery of all chunking strategies.
|
|
197
|
+
* Extends BaseRegistry for consistent lifecycle management.
|
|
198
|
+
*/
|
|
199
|
+
export class ChunkerRegistry extends BaseRegistry {
|
|
200
|
+
static instance = null;
|
|
201
|
+
aliasMap = new Map();
|
|
202
|
+
constructor() {
|
|
203
|
+
super();
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Get singleton instance
|
|
207
|
+
*/
|
|
208
|
+
static getInstance() {
|
|
209
|
+
if (!ChunkerRegistry.instance) {
|
|
210
|
+
ChunkerRegistry.instance = new ChunkerRegistry();
|
|
211
|
+
}
|
|
212
|
+
return ChunkerRegistry.instance;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Reset singleton (for testing)
|
|
216
|
+
*/
|
|
217
|
+
static resetInstance() {
|
|
218
|
+
if (ChunkerRegistry.instance) {
|
|
219
|
+
ChunkerRegistry.instance.clear();
|
|
220
|
+
ChunkerRegistry.instance = null;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Register all default chunkers
|
|
225
|
+
*/
|
|
226
|
+
async registerAll() {
|
|
227
|
+
// Register character chunker
|
|
228
|
+
this.registerChunker("character", async () => {
|
|
229
|
+
const { CharacterChunker } = await import("./chunkers/CharacterChunker.js");
|
|
230
|
+
return new CharacterChunker();
|
|
231
|
+
}, DEFAULT_CHUNKER_METADATA.character);
|
|
232
|
+
// Register recursive chunker
|
|
233
|
+
this.registerChunker("recursive", async () => {
|
|
234
|
+
const { RecursiveChunker } = await import("./chunkers/RecursiveChunker.js");
|
|
235
|
+
return new RecursiveChunker();
|
|
236
|
+
}, DEFAULT_CHUNKER_METADATA.recursive);
|
|
237
|
+
// Register sentence chunker
|
|
238
|
+
this.registerChunker("sentence", async () => {
|
|
239
|
+
const { SentenceChunker } = await import("./chunkers/SentenceChunker.js");
|
|
240
|
+
return new SentenceChunker();
|
|
241
|
+
}, DEFAULT_CHUNKER_METADATA.sentence);
|
|
242
|
+
// Register token chunker
|
|
243
|
+
this.registerChunker("token", async () => {
|
|
244
|
+
const { TokenChunker } = await import("./chunkers/TokenChunker.js");
|
|
245
|
+
return new TokenChunker();
|
|
246
|
+
}, DEFAULT_CHUNKER_METADATA.token);
|
|
247
|
+
// Register markdown chunker
|
|
248
|
+
this.registerChunker("markdown", async () => {
|
|
249
|
+
const { MarkdownChunker } = await import("./chunkers/MarkdownChunker.js");
|
|
250
|
+
return new MarkdownChunker();
|
|
251
|
+
}, DEFAULT_CHUNKER_METADATA.markdown);
|
|
252
|
+
// Register HTML chunker
|
|
253
|
+
this.registerChunker("html", async () => {
|
|
254
|
+
const { HTMLChunker } = await import("./chunkers/HTMLChunker.js");
|
|
255
|
+
return new HTMLChunker();
|
|
256
|
+
}, DEFAULT_CHUNKER_METADATA.html);
|
|
257
|
+
// Register JSON chunker
|
|
258
|
+
this.registerChunker("json", async () => {
|
|
259
|
+
const { JSONChunker } = await import("./chunkers/JSONChunker.js");
|
|
260
|
+
return new JSONChunker();
|
|
261
|
+
}, DEFAULT_CHUNKER_METADATA.json);
|
|
262
|
+
// Register LaTeX chunker
|
|
263
|
+
this.registerChunker("latex", async () => {
|
|
264
|
+
const { LaTeXChunker } = await import("./chunkers/LaTeXChunker.js");
|
|
265
|
+
return new LaTeXChunker();
|
|
266
|
+
}, DEFAULT_CHUNKER_METADATA.latex);
|
|
267
|
+
// Register semantic chunker
|
|
268
|
+
this.registerChunker("semantic", async () => {
|
|
269
|
+
const { SemanticChunker } = await import("./chunking/semanticChunker.js");
|
|
270
|
+
return new SemanticChunker();
|
|
271
|
+
}, DEFAULT_CHUNKER_METADATA.semantic);
|
|
272
|
+
// Register semantic-markdown chunker
|
|
273
|
+
this.registerChunker("semantic-markdown", async () => {
|
|
274
|
+
const { SemanticMarkdownChunker } = await import("./chunkers/SemanticMarkdownChunker.js");
|
|
275
|
+
return new SemanticMarkdownChunker();
|
|
276
|
+
}, DEFAULT_CHUNKER_METADATA["semantic-markdown"]);
|
|
277
|
+
logger.debug(`[ChunkerRegistry] Registered ${this.items.size} chunking strategies`);
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Register a chunker with aliases
|
|
281
|
+
*/
|
|
282
|
+
registerChunker(strategy, factory, metadata) {
|
|
283
|
+
this.register(strategy, factory, metadata);
|
|
284
|
+
// Register aliases
|
|
285
|
+
if (metadata.aliases) {
|
|
286
|
+
for (const alias of metadata.aliases) {
|
|
287
|
+
this.aliasMap.set(alias.toLowerCase(), strategy);
|
|
288
|
+
logger.debug(`[ChunkerRegistry] Registered alias '${alias}' -> '${strategy}'`);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Resolve strategy name from alias
|
|
294
|
+
*/
|
|
295
|
+
resolveStrategy(nameOrAlias) {
|
|
296
|
+
const lower = nameOrAlias.toLowerCase();
|
|
297
|
+
// Check if it's a direct strategy name
|
|
298
|
+
if (this.items.has(lower)) {
|
|
299
|
+
return lower;
|
|
300
|
+
}
|
|
301
|
+
// Check aliases
|
|
302
|
+
const resolved = this.aliasMap.get(lower);
|
|
303
|
+
if (resolved) {
|
|
304
|
+
return resolved;
|
|
305
|
+
}
|
|
306
|
+
throw new ChunkingError(`Unknown chunking strategy: '${nameOrAlias}'. Available strategies: ${this.list()
|
|
307
|
+
.map((item) => item.id)
|
|
308
|
+
.join(", ")}`, {
|
|
309
|
+
code: RAGErrorCodes.CHUNKING_STRATEGY_NOT_FOUND,
|
|
310
|
+
details: {
|
|
311
|
+
requestedStrategy: nameOrAlias,
|
|
312
|
+
availableStrategies: this.list().map((item) => item.id),
|
|
313
|
+
},
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Get a chunker by strategy name or alias
|
|
318
|
+
*/
|
|
319
|
+
async getChunker(strategyOrAlias) {
|
|
320
|
+
await this.ensureInitialized();
|
|
321
|
+
const strategy = this.resolveStrategy(strategyOrAlias);
|
|
322
|
+
const chunker = await this.get(strategy);
|
|
323
|
+
if (!chunker) {
|
|
324
|
+
throw new ChunkingError(`Chunker not found: ${strategy}`, {
|
|
325
|
+
code: RAGErrorCodes.CHUNKING_STRATEGY_NOT_FOUND,
|
|
326
|
+
details: { strategy },
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
return chunker;
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Get list of available chunker strategies
|
|
333
|
+
*/
|
|
334
|
+
async getAvailableChunkers() {
|
|
335
|
+
await this.ensureInitialized();
|
|
336
|
+
return this.list().map((item) => item.id);
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Get metadata for a specific chunker
|
|
340
|
+
*/
|
|
341
|
+
getChunkerMetadata(strategyOrAlias) {
|
|
342
|
+
const strategy = this.resolveStrategy(strategyOrAlias);
|
|
343
|
+
const entry = this.list().find((item) => item.id === strategy);
|
|
344
|
+
return entry?.metadata;
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Get all aliases for a strategy
|
|
348
|
+
*/
|
|
349
|
+
getAliasesForStrategy(strategy) {
|
|
350
|
+
const metadata = DEFAULT_CHUNKER_METADATA[strategy];
|
|
351
|
+
return metadata?.aliases ?? [];
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Get all registered aliases
|
|
355
|
+
*/
|
|
356
|
+
getAllAliases() {
|
|
357
|
+
return new Map(this.aliasMap);
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Check if a strategy or alias exists
|
|
361
|
+
*/
|
|
362
|
+
hasChunker(strategyOrAlias) {
|
|
363
|
+
try {
|
|
364
|
+
this.resolveStrategy(strategyOrAlias);
|
|
365
|
+
return true;
|
|
366
|
+
}
|
|
367
|
+
catch {
|
|
368
|
+
return false;
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Get chunkers by use case
|
|
373
|
+
*/
|
|
374
|
+
getChunkersByUseCase(useCase) {
|
|
375
|
+
const matches = [];
|
|
376
|
+
const useCaseLower = useCase.toLowerCase();
|
|
377
|
+
for (const [strategy, metadata] of Object.entries(DEFAULT_CHUNKER_METADATA)) {
|
|
378
|
+
const hasMatchingUseCase = metadata.useCases?.some((uc) => uc.toLowerCase().includes(useCaseLower)) ?? false;
|
|
379
|
+
if (hasMatchingUseCase) {
|
|
380
|
+
matches.push(strategy);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
return matches;
|
|
384
|
+
}
|
|
385
|
+
/**
|
|
386
|
+
* Get default configuration for a chunker
|
|
387
|
+
*/
|
|
388
|
+
getDefaultConfig(strategyOrAlias) {
|
|
389
|
+
const metadata = this.getChunkerMetadata(strategyOrAlias);
|
|
390
|
+
return metadata?.defaultConfig;
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* Clear the registry (also clears aliases)
|
|
394
|
+
*/
|
|
395
|
+
clear() {
|
|
396
|
+
super.clear();
|
|
397
|
+
this.aliasMap.clear();
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Global chunker registry singleton
|
|
402
|
+
*/
|
|
403
|
+
export const chunkerRegistry = ChunkerRegistry.getInstance();
|
|
404
|
+
/**
|
|
405
|
+
* Convenience function to get available chunkers
|
|
406
|
+
*/
|
|
407
|
+
export async function getAvailableChunkers() {
|
|
408
|
+
return chunkerRegistry.getAvailableChunkers();
|
|
409
|
+
}
|
|
410
|
+
/**
|
|
411
|
+
* Convenience function to get chunker by strategy
|
|
412
|
+
*/
|
|
413
|
+
export async function getChunker(strategyOrAlias) {
|
|
414
|
+
return chunkerRegistry.getChunker(strategyOrAlias);
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Convenience function to get chunker metadata
|
|
418
|
+
*/
|
|
419
|
+
export function getChunkerMetadata(strategyOrAlias) {
|
|
420
|
+
return chunkerRegistry.getChunkerMetadata(strategyOrAlias);
|
|
421
|
+
}
|
|
422
|
+
//# sourceMappingURL=ChunkerRegistry.js.map
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Chunker
|
|
3
|
+
*
|
|
4
|
+
* Abstract base class for all chunker implementations.
|
|
5
|
+
* Provides common functionality and interface contract.
|
|
6
|
+
*/
|
|
7
|
+
import type { Chunk, Chunker, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
8
|
+
/**
|
|
9
|
+
* Default chunker configuration
|
|
10
|
+
*/
|
|
11
|
+
export declare const DEFAULT_CHUNKER_CONFIG: ChunkerConfig;
|
|
12
|
+
/**
|
|
13
|
+
* Base Chunker abstract class
|
|
14
|
+
*
|
|
15
|
+
* All chunker implementations should extend this class.
|
|
16
|
+
*/
|
|
17
|
+
export declare abstract class BaseChunker implements Chunker {
|
|
18
|
+
abstract readonly strategy: ChunkingStrategy;
|
|
19
|
+
protected config: ChunkerConfig;
|
|
20
|
+
constructor(config?: ChunkerConfig);
|
|
21
|
+
/**
|
|
22
|
+
* Get default configuration for this chunker
|
|
23
|
+
*/
|
|
24
|
+
getDefaultConfig(): ChunkerConfig;
|
|
25
|
+
/**
|
|
26
|
+
* Validate chunker configuration
|
|
27
|
+
*/
|
|
28
|
+
protected validateConfig(): void;
|
|
29
|
+
/**
|
|
30
|
+
* Chunk content into smaller pieces
|
|
31
|
+
*/
|
|
32
|
+
chunk(content: string, config?: ChunkerConfig): Promise<Chunk[]>;
|
|
33
|
+
/**
|
|
34
|
+
* Perform the actual chunking (to be implemented by subclasses)
|
|
35
|
+
*/
|
|
36
|
+
protected abstract doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
37
|
+
/**
|
|
38
|
+
* Filter chunks based on minimum size
|
|
39
|
+
*/
|
|
40
|
+
protected filterChunks(chunks: Chunk[], config: ChunkerConfig): Chunk[];
|
|
41
|
+
/**
|
|
42
|
+
* Create a chunk object
|
|
43
|
+
*/
|
|
44
|
+
protected createChunk(text: string, chunkIndex: number, startPosition: number, endPosition: number, documentId?: string, customMetadata?: Record<string, unknown>): Chunk;
|
|
45
|
+
/**
|
|
46
|
+
* Split content by size with overlap
|
|
47
|
+
*/
|
|
48
|
+
protected splitBySizeWithOverlap(content: string, maxSize: number, overlap: number): Array<{
|
|
49
|
+
text: string;
|
|
50
|
+
start: number;
|
|
51
|
+
end: number;
|
|
52
|
+
}>;
|
|
53
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Chunker
|
|
3
|
+
*
|
|
4
|
+
* Abstract base class for all chunker implementations.
|
|
5
|
+
* Provides common functionality and interface contract.
|
|
6
|
+
*/
|
|
7
|
+
import { v4 as uuidv4 } from "uuid";
|
|
8
|
+
import { ChunkingError, RAGErrorCodes } from "../errors/RAGError.js";
|
|
9
|
+
/**
|
|
10
|
+
* Default chunker configuration
|
|
11
|
+
*/
|
|
12
|
+
export const DEFAULT_CHUNKER_CONFIG = {
|
|
13
|
+
maxSize: 1000,
|
|
14
|
+
overlap: 100,
|
|
15
|
+
minSize: 10,
|
|
16
|
+
preserveMetadata: true,
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* Base Chunker abstract class
|
|
20
|
+
*
|
|
21
|
+
* All chunker implementations should extend this class.
|
|
22
|
+
*/
|
|
23
|
+
export class BaseChunker {
|
|
24
|
+
config;
|
|
25
|
+
constructor(config) {
|
|
26
|
+
this.config = { ...this.getDefaultConfig(), ...config };
|
|
27
|
+
this.validateConfig();
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Get default configuration for this chunker
|
|
31
|
+
*/
|
|
32
|
+
getDefaultConfig() {
|
|
33
|
+
return { ...DEFAULT_CHUNKER_CONFIG };
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Validate chunker configuration
|
|
37
|
+
*/
|
|
38
|
+
validateConfig() {
|
|
39
|
+
if (this.config.maxSize !== undefined && this.config.maxSize <= 0) {
|
|
40
|
+
throw new ChunkingError("maxSize must be positive", {
|
|
41
|
+
code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
|
|
42
|
+
details: { maxSize: this.config.maxSize },
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
if (this.config.overlap !== undefined && this.config.overlap < 0) {
|
|
46
|
+
throw new ChunkingError("overlap cannot be negative", {
|
|
47
|
+
code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
|
|
48
|
+
details: { overlap: this.config.overlap },
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
if (this.config.maxSize !== undefined &&
|
|
52
|
+
this.config.overlap !== undefined &&
|
|
53
|
+
this.config.overlap >= this.config.maxSize) {
|
|
54
|
+
throw new ChunkingError("overlap must be less than maxSize", {
|
|
55
|
+
code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
|
|
56
|
+
details: {
|
|
57
|
+
maxSize: this.config.maxSize,
|
|
58
|
+
overlap: this.config.overlap,
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Chunk content into smaller pieces
|
|
65
|
+
*/
|
|
66
|
+
async chunk(content, config) {
|
|
67
|
+
const effectiveConfig = { ...this.config, ...config };
|
|
68
|
+
if (!content || content.trim().length === 0) {
|
|
69
|
+
throw new ChunkingError("Content is empty", {
|
|
70
|
+
code: RAGErrorCodes.CHUNKING_EMPTY_CONTENT,
|
|
71
|
+
strategy: this.strategy,
|
|
72
|
+
contentLength: 0,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
try {
|
|
76
|
+
const chunks = await this.doChunk(content, effectiveConfig);
|
|
77
|
+
return this.filterChunks(chunks, effectiveConfig);
|
|
78
|
+
}
|
|
79
|
+
catch (error) {
|
|
80
|
+
if (error instanceof ChunkingError) {
|
|
81
|
+
throw error;
|
|
82
|
+
}
|
|
83
|
+
throw new ChunkingError(`Chunking failed: ${error instanceof Error ? error.message : String(error)}`, {
|
|
84
|
+
code: RAGErrorCodes.CHUNKING_ERROR,
|
|
85
|
+
cause: error instanceof Error ? error : undefined,
|
|
86
|
+
strategy: this.strategy,
|
|
87
|
+
contentLength: content.length,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Filter chunks based on minimum size
|
|
93
|
+
*/
|
|
94
|
+
filterChunks(chunks, config) {
|
|
95
|
+
const minSize = config.minSize ?? 0;
|
|
96
|
+
return chunks.filter((chunk) => chunk.text.length >= minSize);
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Create a chunk object
|
|
100
|
+
*/
|
|
101
|
+
createChunk(text, chunkIndex, startPosition, endPosition, documentId = "unknown", customMetadata) {
|
|
102
|
+
const metadata = {
|
|
103
|
+
documentId,
|
|
104
|
+
chunkIndex,
|
|
105
|
+
startPosition,
|
|
106
|
+
endPosition,
|
|
107
|
+
custom: this.config.preserveMetadata ? customMetadata : undefined,
|
|
108
|
+
};
|
|
109
|
+
return {
|
|
110
|
+
id: uuidv4(),
|
|
111
|
+
text,
|
|
112
|
+
metadata,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Split content by size with overlap
|
|
117
|
+
*/
|
|
118
|
+
splitBySizeWithOverlap(content, maxSize, overlap) {
|
|
119
|
+
const result = [];
|
|
120
|
+
let start = 0;
|
|
121
|
+
while (start < content.length) {
|
|
122
|
+
const end = Math.min(start + maxSize, content.length);
|
|
123
|
+
result.push({
|
|
124
|
+
text: content.slice(start, end),
|
|
125
|
+
start,
|
|
126
|
+
end,
|
|
127
|
+
});
|
|
128
|
+
// If we've reached the end of content, stop
|
|
129
|
+
if (end >= content.length) {
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
// Move start position, accounting for overlap
|
|
133
|
+
// Ensure start always moves forward by at least 1 character
|
|
134
|
+
const nextStart = end - overlap;
|
|
135
|
+
start = Math.max(nextStart, start + 1);
|
|
136
|
+
// Prevent infinite loop if overlap >= chunk size
|
|
137
|
+
if (start >= end) {
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return result;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=BaseChunker.js.map
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Character Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits text into fixed-size character chunks with optional overlap.
|
|
5
|
+
* The simplest chunking strategy for language-agnostic processing.
|
|
6
|
+
*/
|
|
7
|
+
import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
8
|
+
import { BaseChunker } from "./BaseChunker.js";
|
|
9
|
+
/**
|
|
10
|
+
* Character Chunker
|
|
11
|
+
*
|
|
12
|
+
* Splits content into fixed-size character chunks.
|
|
13
|
+
*/
|
|
14
|
+
export declare class CharacterChunker extends BaseChunker {
|
|
15
|
+
readonly strategy: ChunkingStrategy;
|
|
16
|
+
getDefaultConfig(): ChunkerConfig;
|
|
17
|
+
protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
18
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Character Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits text into fixed-size character chunks with optional overlap.
|
|
5
|
+
* The simplest chunking strategy for language-agnostic processing.
|
|
6
|
+
*/
|
|
7
|
+
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* Character Chunker
|
|
10
|
+
*
|
|
11
|
+
* Splits content into fixed-size character chunks.
|
|
12
|
+
*/
|
|
13
|
+
export class CharacterChunker extends BaseChunker {
|
|
14
|
+
strategy = "character";
|
|
15
|
+
getDefaultConfig() {
|
|
16
|
+
return {
|
|
17
|
+
...DEFAULT_CHUNKER_CONFIG,
|
|
18
|
+
maxSize: 1000,
|
|
19
|
+
overlap: 100,
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
async doChunk(content, config) {
|
|
23
|
+
const maxSize = config.maxSize ?? 1000;
|
|
24
|
+
const overlap = config.overlap ?? 100;
|
|
25
|
+
const segments = this.splitBySizeWithOverlap(content, maxSize, overlap);
|
|
26
|
+
return segments.map((segment, index) => this.createChunk(segment.text, index, segment.start, segment.end));
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=CharacterChunker.js.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits HTML content by semantic tags.
|
|
5
|
+
*/
|
|
6
|
+
import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
7
|
+
import { BaseChunker } from "./BaseChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* HTML Chunker
|
|
10
|
+
*/
|
|
11
|
+
export declare class HTMLChunker extends BaseChunker {
|
|
12
|
+
readonly strategy: ChunkingStrategy;
|
|
13
|
+
getDefaultConfig(): ChunkerConfig;
|
|
14
|
+
protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
15
|
+
/**
|
|
16
|
+
* Strip HTML tags from content
|
|
17
|
+
*/
|
|
18
|
+
private stripHtml;
|
|
19
|
+
}
|