@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Pipeline Orchestrator
|
|
3
|
+
*
|
|
4
|
+
* Provides a complete end-to-end RAG pipeline that orchestrates:
|
|
5
|
+
* - Document loading and preprocessing
|
|
6
|
+
* - Chunking with configurable strategies
|
|
7
|
+
* - Embedding generation
|
|
8
|
+
* - Vector storage and retrieval
|
|
9
|
+
* - Context assembly for LLM queries
|
|
10
|
+
* - Response generation with citations
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* const pipeline = new RAGPipeline({
|
|
15
|
+
* vectorStore: myVectorStore,
|
|
16
|
+
* embeddingModel: { provider: 'openai', modelName: 'text-embedding-3-small' },
|
|
17
|
+
* generationModel: { provider: 'openai', modelName: 'gpt-4o-mini' }
|
|
18
|
+
* });
|
|
19
|
+
*
|
|
20
|
+
* // Ingest documents
|
|
21
|
+
* await pipeline.ingest(['/path/to/doc1.md', '/path/to/doc2.pdf']);
|
|
22
|
+
*
|
|
23
|
+
* // Query with RAG
|
|
24
|
+
* const response = await pipeline.query('What are the key features?');
|
|
25
|
+
* console.log(response.answer, response.sources);
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
import { randomUUID } from "crypto";
|
|
29
|
+
import { MDocument } from "../document/MDocument.js";
|
|
30
|
+
import { loadDocument } from "../document/loaders.js";
|
|
31
|
+
import { InMemoryVectorStore, } from "../retrieval/vectorQueryTool.js";
|
|
32
|
+
import { InMemoryBM25Index, createHybridSearch, } from "../retrieval/hybridSearch.js";
|
|
33
|
+
import { GraphRAG } from "../graphRag/graphRAG.js";
|
|
34
|
+
import { rerank } from "../reranker/reranker.js";
|
|
35
|
+
import { ProviderFactory } from "../../factories/providerFactory.js";
|
|
36
|
+
import { logger } from "../../utils/logger.js";
|
|
37
|
+
/**
|
|
38
|
+
* RAG Pipeline Orchestrator
|
|
39
|
+
*
|
|
40
|
+
* Complete end-to-end pipeline for Retrieval-Augmented Generation.
|
|
41
|
+
*/
|
|
42
|
+
export class RAGPipeline {
|
|
43
|
+
id;
|
|
44
|
+
config;
|
|
45
|
+
vectorStore;
|
|
46
|
+
bm25Index;
|
|
47
|
+
graphRAG;
|
|
48
|
+
embeddingProvider;
|
|
49
|
+
generationProvider;
|
|
50
|
+
hybridSearch;
|
|
51
|
+
documents = new Map();
|
|
52
|
+
allChunks = [];
|
|
53
|
+
constructor(config) {
|
|
54
|
+
this.id = config.id || `rag-pipeline-${randomUUID().slice(0, 8)}`;
|
|
55
|
+
this.config = {
|
|
56
|
+
indexName: "default",
|
|
57
|
+
defaultChunkingStrategy: "recursive",
|
|
58
|
+
defaultChunkSize: 1000,
|
|
59
|
+
defaultChunkOverlap: 200,
|
|
60
|
+
enableHybridSearch: false,
|
|
61
|
+
enableGraphRAG: false,
|
|
62
|
+
graphThreshold: 0.7,
|
|
63
|
+
defaultTopK: 5,
|
|
64
|
+
enableReranking: false,
|
|
65
|
+
...config,
|
|
66
|
+
};
|
|
67
|
+
// Initialize stores
|
|
68
|
+
this.vectorStore = config.vectorStore || new InMemoryVectorStore();
|
|
69
|
+
this.bm25Index = config.bm25Index || new InMemoryBM25Index();
|
|
70
|
+
this.graphRAG = new GraphRAG({ threshold: this.config.graphThreshold });
|
|
71
|
+
logger.info("[RAGPipeline] Pipeline initialized", {
|
|
72
|
+
id: this.id,
|
|
73
|
+
indexName: this.config.indexName,
|
|
74
|
+
embeddingModel: this.config.embeddingModel,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Initialize the pipeline (lazy loading of providers)
|
|
79
|
+
*/
|
|
80
|
+
async initialize() {
|
|
81
|
+
// Initialize embedding provider
|
|
82
|
+
this.embeddingProvider = await ProviderFactory.createProvider(this.config.embeddingModel.provider, this.config.embeddingModel.modelName);
|
|
83
|
+
// Initialize generation provider if configured
|
|
84
|
+
if (this.config.generationModel) {
|
|
85
|
+
this.generationProvider = await ProviderFactory.createProvider(this.config.generationModel.provider, this.config.generationModel.modelName);
|
|
86
|
+
}
|
|
87
|
+
// Initialize hybrid search if enabled
|
|
88
|
+
if (this.config.enableHybridSearch) {
|
|
89
|
+
this.hybridSearch = createHybridSearch({
|
|
90
|
+
vectorStore: this.vectorStore,
|
|
91
|
+
bm25Index: this.bm25Index,
|
|
92
|
+
indexName: this.config.indexName,
|
|
93
|
+
embeddingModel: this.config.embeddingModel,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
logger.info("[RAGPipeline] Pipeline initialized", { id: this.id });
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Ingest documents into the pipeline
|
|
100
|
+
*
|
|
101
|
+
* @param sources - Array of file paths, URLs, or MDocument instances
|
|
102
|
+
* @param options - Ingestion options
|
|
103
|
+
*/
|
|
104
|
+
async ingest(sources, options) {
|
|
105
|
+
await this.ensureInitialized();
|
|
106
|
+
const strategy = options?.strategy || this.config.defaultChunkingStrategy;
|
|
107
|
+
const chunkSize = options?.chunkSize || this.config.defaultChunkSize;
|
|
108
|
+
const chunkOverlap = options?.chunkOverlap || this.config.defaultChunkOverlap;
|
|
109
|
+
let documentsProcessed = 0;
|
|
110
|
+
let chunksCreated = 0;
|
|
111
|
+
for (const source of sources) {
|
|
112
|
+
try {
|
|
113
|
+
// Load document if string
|
|
114
|
+
const doc = source instanceof MDocument
|
|
115
|
+
? source
|
|
116
|
+
: await loadDocument(source, { metadata: options?.metadata });
|
|
117
|
+
// Chunk the document
|
|
118
|
+
await doc.chunk({
|
|
119
|
+
strategy,
|
|
120
|
+
config: {
|
|
121
|
+
maxSize: chunkSize,
|
|
122
|
+
overlap: chunkOverlap,
|
|
123
|
+
metadata: options?.metadata,
|
|
124
|
+
},
|
|
125
|
+
});
|
|
126
|
+
// Extract metadata if requested
|
|
127
|
+
if (options?.extractMetadata) {
|
|
128
|
+
await doc.extractMetadata({
|
|
129
|
+
title: true,
|
|
130
|
+
summary: true,
|
|
131
|
+
keywords: true,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
// Generate embeddings
|
|
135
|
+
await doc.embed(this.config.embeddingModel.provider, this.config.embeddingModel.modelName);
|
|
136
|
+
const chunks = doc.getChunks();
|
|
137
|
+
const embeddings = doc.getEmbeddings();
|
|
138
|
+
// Store in vector store
|
|
139
|
+
await this.vectorStore.query({
|
|
140
|
+
indexName: this.config.indexName,
|
|
141
|
+
queryVector: embeddings[0],
|
|
142
|
+
topK: 1,
|
|
143
|
+
}); // Warm up
|
|
144
|
+
// Upsert into vector store
|
|
145
|
+
if ("upsert" in this.vectorStore) {
|
|
146
|
+
await this.vectorStore.upsert(this.config.indexName, chunks.map((chunk, i) => ({
|
|
147
|
+
id: chunk.id,
|
|
148
|
+
vector: embeddings[i],
|
|
149
|
+
metadata: { ...chunk.metadata, text: chunk.text },
|
|
150
|
+
})));
|
|
151
|
+
}
|
|
152
|
+
// Add to BM25 index
|
|
153
|
+
await this.bm25Index.addDocuments(chunks.map((chunk) => ({
|
|
154
|
+
id: chunk.id,
|
|
155
|
+
text: chunk.text,
|
|
156
|
+
metadata: chunk.metadata,
|
|
157
|
+
})));
|
|
158
|
+
// Update Graph RAG if enabled
|
|
159
|
+
if (this.config.enableGraphRAG) {
|
|
160
|
+
this.graphRAG.createGraph([...this.allChunks, ...chunks].map((c) => ({
|
|
161
|
+
text: c.text,
|
|
162
|
+
metadata: c.metadata,
|
|
163
|
+
})), [...this.allChunks, ...chunks].map((c) => ({
|
|
164
|
+
vector: c.embedding || [],
|
|
165
|
+
})));
|
|
166
|
+
}
|
|
167
|
+
// Track documents and chunks
|
|
168
|
+
this.documents.set(doc.getId(), doc);
|
|
169
|
+
this.allChunks.push(...chunks);
|
|
170
|
+
documentsProcessed++;
|
|
171
|
+
chunksCreated += chunks.length;
|
|
172
|
+
logger.debug("[RAGPipeline] Document ingested", {
|
|
173
|
+
documentId: doc.getId(),
|
|
174
|
+
chunks: chunks.length,
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
catch (error) {
|
|
178
|
+
logger.error("[RAGPipeline] Failed to ingest document", {
|
|
179
|
+
source: typeof source === "string" ? source : source.getId(),
|
|
180
|
+
error: error instanceof Error ? error.message : String(error),
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
logger.info("[RAGPipeline] Ingestion complete", {
|
|
185
|
+
documentsProcessed,
|
|
186
|
+
chunksCreated,
|
|
187
|
+
});
|
|
188
|
+
return { documentsProcessed, chunksCreated };
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Query the pipeline
|
|
192
|
+
*
|
|
193
|
+
* @param query - Search query
|
|
194
|
+
* @param options - Query options
|
|
195
|
+
* @returns RAG response with retrieved context and optional generated answer
|
|
196
|
+
*/
|
|
197
|
+
async query(query, options) {
|
|
198
|
+
await this.ensureInitialized();
|
|
199
|
+
const startTime = Date.now();
|
|
200
|
+
const topK = options?.topK || this.config.defaultTopK;
|
|
201
|
+
const useHybrid = options?.hybrid ?? this.config.enableHybridSearch;
|
|
202
|
+
const useGraph = options?.graph ?? this.config.enableGraphRAG;
|
|
203
|
+
const useRerank = options?.rerank ?? this.config.enableReranking;
|
|
204
|
+
let results;
|
|
205
|
+
let retrievalMethod = "vector";
|
|
206
|
+
// Generate query embedding
|
|
207
|
+
const queryEmbedding = await this.generateEmbedding(query);
|
|
208
|
+
if (useGraph && this.config.enableGraphRAG) {
|
|
209
|
+
// Graph RAG search
|
|
210
|
+
retrievalMethod = "graph";
|
|
211
|
+
const graphResults = this.graphRAG.query({
|
|
212
|
+
query: queryEmbedding,
|
|
213
|
+
topK: topK * 2, // Get more for potential reranking
|
|
214
|
+
});
|
|
215
|
+
results = graphResults.map((r) => ({
|
|
216
|
+
id: r.id,
|
|
217
|
+
text: r.content,
|
|
218
|
+
score: r.score,
|
|
219
|
+
metadata: r.metadata,
|
|
220
|
+
}));
|
|
221
|
+
}
|
|
222
|
+
else if (useHybrid && this.hybridSearch) {
|
|
223
|
+
// Hybrid search
|
|
224
|
+
retrievalMethod = "hybrid";
|
|
225
|
+
const hybridResults = await this.hybridSearch(query, { topK: topK * 2 });
|
|
226
|
+
results = hybridResults.map((r) => ({
|
|
227
|
+
id: r.id,
|
|
228
|
+
text: r.text,
|
|
229
|
+
score: r.score,
|
|
230
|
+
metadata: r.metadata,
|
|
231
|
+
}));
|
|
232
|
+
}
|
|
233
|
+
else {
|
|
234
|
+
// Vector search
|
|
235
|
+
results = await this.vectorStore.query({
|
|
236
|
+
indexName: this.config.indexName,
|
|
237
|
+
queryVector: queryEmbedding,
|
|
238
|
+
topK: topK * 2,
|
|
239
|
+
filter: options?.filter,
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
// Apply reranking if enabled
|
|
243
|
+
let reranked = false;
|
|
244
|
+
if (useRerank && this.config.rerankingModel && results.length > 0) {
|
|
245
|
+
const rerankModel = await ProviderFactory.createProvider(this.config.rerankingModel.provider, this.config.rerankingModel.modelName);
|
|
246
|
+
const rerankedResults = await rerank(results, query, rerankModel, {
|
|
247
|
+
topK,
|
|
248
|
+
queryEmbedding,
|
|
249
|
+
});
|
|
250
|
+
results = rerankedResults.map((r) => r.result);
|
|
251
|
+
reranked = true;
|
|
252
|
+
}
|
|
253
|
+
// Take top K results
|
|
254
|
+
results = results.slice(0, topK);
|
|
255
|
+
// Assemble context
|
|
256
|
+
const context = this.assembleContext(results);
|
|
257
|
+
// Format sources
|
|
258
|
+
const sources = results.map((r) => ({
|
|
259
|
+
id: r.id,
|
|
260
|
+
text: r.text || r.metadata?.text || "",
|
|
261
|
+
score: r.score || 0,
|
|
262
|
+
metadata: r.metadata,
|
|
263
|
+
}));
|
|
264
|
+
// Generate answer if requested
|
|
265
|
+
let answer;
|
|
266
|
+
if (options?.generate !== false && this.generationProvider) {
|
|
267
|
+
answer = await this.generateAnswer(query, context, options?.systemPrompt, options?.temperature);
|
|
268
|
+
}
|
|
269
|
+
const queryTime = Date.now() - startTime;
|
|
270
|
+
logger.info("[RAGPipeline] Query completed", {
|
|
271
|
+
query: query.slice(0, 50),
|
|
272
|
+
retrievalMethod,
|
|
273
|
+
resultsCount: results.length,
|
|
274
|
+
reranked,
|
|
275
|
+
queryTime,
|
|
276
|
+
});
|
|
277
|
+
return {
|
|
278
|
+
answer,
|
|
279
|
+
context,
|
|
280
|
+
sources,
|
|
281
|
+
metadata: {
|
|
282
|
+
queryTime,
|
|
283
|
+
retrievalMethod,
|
|
284
|
+
chunksRetrieved: results.length,
|
|
285
|
+
reranked,
|
|
286
|
+
},
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Get pipeline statistics
|
|
291
|
+
*/
|
|
292
|
+
getStats() {
|
|
293
|
+
return {
|
|
294
|
+
totalDocuments: this.documents.size,
|
|
295
|
+
totalChunks: this.allChunks.length,
|
|
296
|
+
indexName: this.config.indexName,
|
|
297
|
+
embeddingDimension: this.allChunks[0]?.embedding?.length,
|
|
298
|
+
hybridSearchEnabled: this.config.enableHybridSearch,
|
|
299
|
+
graphRAGEnabled: this.config.enableGraphRAG,
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Get pipeline ID
|
|
304
|
+
*/
|
|
305
|
+
getId() {
|
|
306
|
+
return this.id;
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Clear all indexed data
|
|
310
|
+
*/
|
|
311
|
+
async clear() {
|
|
312
|
+
this.documents.clear();
|
|
313
|
+
this.allChunks = [];
|
|
314
|
+
this.graphRAG = new GraphRAG({ threshold: this.config.graphThreshold });
|
|
315
|
+
if ("delete" in this.vectorStore) {
|
|
316
|
+
// Clear vector store if supported
|
|
317
|
+
// Note: InMemoryVectorStore doesn't have a clear method
|
|
318
|
+
}
|
|
319
|
+
logger.info("[RAGPipeline] Pipeline cleared", { id: this.id });
|
|
320
|
+
}
|
|
321
|
+
// ============================================================================
|
|
322
|
+
// Private Methods
|
|
323
|
+
// ============================================================================
|
|
324
|
+
/**
|
|
325
|
+
* Ensure pipeline is initialized
|
|
326
|
+
*/
|
|
327
|
+
async ensureInitialized() {
|
|
328
|
+
if (!this.embeddingProvider) {
|
|
329
|
+
await this.initialize();
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Generate embedding for text
|
|
334
|
+
*/
|
|
335
|
+
async generateEmbedding(text) {
|
|
336
|
+
if (!this.embeddingProvider) {
|
|
337
|
+
throw new Error("Embedding provider not initialized");
|
|
338
|
+
}
|
|
339
|
+
if (typeof this.embeddingProvider
|
|
340
|
+
.embed !== "function") {
|
|
341
|
+
throw new Error(`Provider ${this.config.embeddingModel.provider} does not support embeddings`);
|
|
342
|
+
}
|
|
343
|
+
return await this.embeddingProvider.embed(text);
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Assemble context from results
|
|
347
|
+
*/
|
|
348
|
+
assembleContext(results) {
|
|
349
|
+
return results
|
|
350
|
+
.map((r, i) => {
|
|
351
|
+
const text = r.text || r.metadata?.text || "";
|
|
352
|
+
const source = r.metadata?.source || `chunk-${i + 1}`;
|
|
353
|
+
return `[Source ${i + 1}: ${source}]\n${text}`;
|
|
354
|
+
})
|
|
355
|
+
.join("\n\n---\n\n");
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Generate answer using LLM
|
|
359
|
+
*/
|
|
360
|
+
async generateAnswer(query, context, customSystemPrompt, temperature) {
|
|
361
|
+
if (!this.generationProvider) {
|
|
362
|
+
throw new Error("Generation provider not configured");
|
|
363
|
+
}
|
|
364
|
+
const systemPrompt = customSystemPrompt ||
|
|
365
|
+
`You are a helpful assistant that answers questions based on the provided context.
|
|
366
|
+
Use only the information from the context to answer the question.
|
|
367
|
+
If the context doesn't contain relevant information, say so.
|
|
368
|
+
Cite sources when possible using [Source N] format.`;
|
|
369
|
+
const prompt = `Context:\n${context}\n\nQuestion: ${query}\n\nAnswer:`;
|
|
370
|
+
const result = await this.generationProvider.generate({
|
|
371
|
+
prompt,
|
|
372
|
+
systemPrompt,
|
|
373
|
+
temperature: temperature ?? this.config.generationModel?.temperature ?? 0.7,
|
|
374
|
+
maxTokens: this.config.generationModel?.maxTokens ?? 1000,
|
|
375
|
+
});
|
|
376
|
+
return result?.content || "";
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Create a simple RAG pipeline with sensible defaults
|
|
381
|
+
*
|
|
382
|
+
* @param options - Basic configuration options
|
|
383
|
+
* @returns Configured RAGPipeline instance
|
|
384
|
+
*/
|
|
385
|
+
export function createRAGPipeline(options) {
|
|
386
|
+
const provider = options.provider || "openai";
|
|
387
|
+
return new RAGPipeline({
|
|
388
|
+
embeddingModel: {
|
|
389
|
+
provider,
|
|
390
|
+
modelName: options.embeddingModel || "text-embedding-3-small",
|
|
391
|
+
},
|
|
392
|
+
generationModel: options.generationModel
|
|
393
|
+
? {
|
|
394
|
+
provider,
|
|
395
|
+
modelName: options.generationModel,
|
|
396
|
+
}
|
|
397
|
+
: undefined,
|
|
398
|
+
enableHybridSearch: options.enableHybrid,
|
|
399
|
+
enableGraphRAG: options.enableGraph,
|
|
400
|
+
});
|
|
401
|
+
}
|
|
402
|
+
//# sourceMappingURL=RAGPipeline.js.map
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Assembly Utilities
|
|
3
|
+
*
|
|
4
|
+
* Provides utilities for assembling, formatting, and optimizing context
|
|
5
|
+
* from retrieved chunks for LLM consumption.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Context window management (token-aware truncation)
|
|
9
|
+
* - Citation formatting
|
|
10
|
+
* - Context deduplication
|
|
11
|
+
* - Relevance-based ordering
|
|
12
|
+
* - Context summarization
|
|
13
|
+
*/
|
|
14
|
+
import type { Chunk, VectorQueryResult } from "../types.js";
|
|
15
|
+
/**
|
|
16
|
+
* Citation format options
|
|
17
|
+
*/
|
|
18
|
+
export type CitationFormat = "inline" | "footnote" | "numbered" | "none";
|
|
19
|
+
/**
|
|
20
|
+
* Context assembly options
|
|
21
|
+
*/
|
|
22
|
+
export interface ContextAssemblyOptions {
|
|
23
|
+
/** Maximum characters in assembled context */
|
|
24
|
+
maxChars?: number;
|
|
25
|
+
/** Maximum tokens (approximate, 4 chars/token) */
|
|
26
|
+
maxTokens?: number;
|
|
27
|
+
/** Citation format to use */
|
|
28
|
+
citationFormat?: CitationFormat;
|
|
29
|
+
/** Separator between chunks */
|
|
30
|
+
separator?: string;
|
|
31
|
+
/** Include chunk metadata in context */
|
|
32
|
+
includeMetadata?: boolean;
|
|
33
|
+
/** Deduplicate overlapping content */
|
|
34
|
+
deduplicate?: boolean;
|
|
35
|
+
/** Similarity threshold for deduplication (0-1) */
|
|
36
|
+
dedupeThreshold?: number;
|
|
37
|
+
/** Order by relevance score */
|
|
38
|
+
orderByRelevance?: boolean;
|
|
39
|
+
/** Include section headers */
|
|
40
|
+
includeSectionHeaders?: boolean;
|
|
41
|
+
/** Header template (use {index}, {source}, {score} placeholders) */
|
|
42
|
+
headerTemplate?: string;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Context window representation
|
|
46
|
+
*/
|
|
47
|
+
export interface ContextWindow {
|
|
48
|
+
/** Assembled context text */
|
|
49
|
+
text: string;
|
|
50
|
+
/** Number of chunks included */
|
|
51
|
+
chunkCount: number;
|
|
52
|
+
/** Total character count */
|
|
53
|
+
charCount: number;
|
|
54
|
+
/** Estimated token count */
|
|
55
|
+
tokenCount: number;
|
|
56
|
+
/** Chunks that were truncated/excluded */
|
|
57
|
+
truncatedChunks: number;
|
|
58
|
+
/** Citation map (id -> citation text) */
|
|
59
|
+
citations: Map<string, string>;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Assemble context from retrieved results
|
|
63
|
+
*
|
|
64
|
+
* Combines multiple chunks into a coherent context string
|
|
65
|
+
* suitable for LLM consumption.
|
|
66
|
+
*
|
|
67
|
+
* @param results - Retrieved chunks or query results
|
|
68
|
+
* @param options - Assembly options
|
|
69
|
+
* @returns Assembled context string
|
|
70
|
+
*
|
|
71
|
+
* @example
|
|
72
|
+
* ```typescript
|
|
73
|
+
* const context = assembleContext(results, {
|
|
74
|
+
* maxTokens: 4000,
|
|
75
|
+
* citationFormat: 'numbered',
|
|
76
|
+
* deduplicate: true
|
|
77
|
+
* });
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
export declare function assembleContext(results: Array<Chunk | VectorQueryResult>, options?: ContextAssemblyOptions): string;
|
|
81
|
+
/**
|
|
82
|
+
* Format context with inline citations
|
|
83
|
+
*
|
|
84
|
+
* @param results - Retrieved results
|
|
85
|
+
* @param options - Formatting options
|
|
86
|
+
* @returns Context with citations and citation list
|
|
87
|
+
*/
|
|
88
|
+
export declare function formatContextWithCitations(results: Array<Chunk | VectorQueryResult>, options?: ContextAssemblyOptions & {
|
|
89
|
+
returnCitations?: boolean;
|
|
90
|
+
}): {
|
|
91
|
+
context: string;
|
|
92
|
+
citations: string[];
|
|
93
|
+
};
|
|
94
|
+
/**
|
|
95
|
+
* Create a context window with detailed tracking
|
|
96
|
+
*
|
|
97
|
+
* @param results - Retrieved results
|
|
98
|
+
* @param options - Assembly options
|
|
99
|
+
* @returns Context window with metadata
|
|
100
|
+
*/
|
|
101
|
+
export declare function createContextWindow(results: Array<Chunk | VectorQueryResult>, options?: ContextAssemblyOptions): ContextWindow;
|
|
102
|
+
/**
|
|
103
|
+
* Summarize context using LLM
|
|
104
|
+
*
|
|
105
|
+
* @param context - Context to summarize
|
|
106
|
+
* @param maxLength - Maximum summary length
|
|
107
|
+
* @param provider - LLM provider instance
|
|
108
|
+
* @returns Summarized context
|
|
109
|
+
*/
|
|
110
|
+
export declare function summarizeContext(context: string, maxLength?: number, provider?: {
|
|
111
|
+
generate: (params: {
|
|
112
|
+
prompt: string;
|
|
113
|
+
maxTokens: number;
|
|
114
|
+
temperature: number;
|
|
115
|
+
}) => Promise<{
|
|
116
|
+
content?: string;
|
|
117
|
+
} | null>;
|
|
118
|
+
}): Promise<string>;
|
|
119
|
+
/**
|
|
120
|
+
* Order chunks by document structure (if available)
|
|
121
|
+
*/
|
|
122
|
+
export declare function orderByDocumentStructure(chunks: Chunk[]): Chunk[];
|
|
123
|
+
/**
|
|
124
|
+
* Extract key sentences from chunks for summary
|
|
125
|
+
*/
|
|
126
|
+
export declare function extractKeySentences(text: string, count?: number): string[];
|