@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-powered Metadata Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts structured metadata from document chunks using language models.
|
|
5
|
+
* Supports title, summary, keywords, Q&A pairs, and custom schema extraction.
|
|
6
|
+
*/
|
|
7
|
+
import { ProviderFactory } from "../../factories/providerFactory.js";
|
|
8
|
+
import { logger } from "../../utils/logger.js";
|
|
9
|
+
/**
|
|
10
|
+
* Default prompts for metadata extraction
|
|
11
|
+
*/
|
|
12
|
+
const DEFAULT_PROMPTS = {
|
|
13
|
+
title: `Extract a concise, descriptive title for the following content.
|
|
14
|
+
Return only the title, nothing else.
|
|
15
|
+
|
|
16
|
+
Content:
|
|
17
|
+
{context}
|
|
18
|
+
|
|
19
|
+
Title:`,
|
|
20
|
+
summary: `Summarize the following content in {maxWords} words or less.
|
|
21
|
+
Focus on the key points and main ideas.
|
|
22
|
+
|
|
23
|
+
Content:
|
|
24
|
+
{context}
|
|
25
|
+
|
|
26
|
+
Summary:`,
|
|
27
|
+
keywords: `Extract the {maxKeywords} most important keywords or key phrases from the following content.
|
|
28
|
+
Return them as a comma-separated list.
|
|
29
|
+
|
|
30
|
+
Content:
|
|
31
|
+
{context}
|
|
32
|
+
|
|
33
|
+
Keywords:`,
|
|
34
|
+
questions: `Generate {numQuestions} questions that can be answered using the following content.
|
|
35
|
+
{answerInstruction}
|
|
36
|
+
|
|
37
|
+
Content:
|
|
38
|
+
{context}
|
|
39
|
+
|
|
40
|
+
Questions:`,
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* LLM-powered metadata extractor
|
|
44
|
+
* Extracts title, summary, keywords, Q&A pairs, and custom schema data
|
|
45
|
+
*/
|
|
46
|
+
export class LLMMetadataExtractor {
|
|
47
|
+
provider;
|
|
48
|
+
modelName;
|
|
49
|
+
constructor(options) {
|
|
50
|
+
this.provider = options?.provider || "openai";
|
|
51
|
+
this.modelName = options?.modelName || "gpt-4o-mini";
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Extract metadata from chunks based on configuration
|
|
55
|
+
* @param chunks - Array of chunks to extract metadata from
|
|
56
|
+
* @param params - Extraction parameters
|
|
57
|
+
* @returns Array of extraction results, one per chunk
|
|
58
|
+
*/
|
|
59
|
+
async extract(chunks, params) {
|
|
60
|
+
const results = [];
|
|
61
|
+
// Group chunks by documentId for title extraction
|
|
62
|
+
const chunksByDocument = this.groupByDocument(chunks);
|
|
63
|
+
// Cache titles by document to avoid re-extraction
|
|
64
|
+
const titleCache = new Map();
|
|
65
|
+
for (const chunk of chunks) {
|
|
66
|
+
const result = {};
|
|
67
|
+
try {
|
|
68
|
+
// Extract title (shared across chunks with same documentId)
|
|
69
|
+
if (params.title) {
|
|
70
|
+
const docId = chunk.metadata.documentId;
|
|
71
|
+
if (!titleCache.has(docId)) {
|
|
72
|
+
const titleConfig = typeof params.title === "boolean" ? {} : params.title;
|
|
73
|
+
const title = await this.extractTitle(chunksByDocument.get(docId) || [chunk], titleConfig);
|
|
74
|
+
titleCache.set(docId, title);
|
|
75
|
+
}
|
|
76
|
+
result.title = titleCache.get(docId);
|
|
77
|
+
}
|
|
78
|
+
// Extract summary
|
|
79
|
+
if (params.summary) {
|
|
80
|
+
const summaryConfig = typeof params.summary === "boolean" ? {} : params.summary;
|
|
81
|
+
result.summary = await this.extractSummary(chunk, summaryConfig);
|
|
82
|
+
}
|
|
83
|
+
// Extract keywords
|
|
84
|
+
if (params.keywords) {
|
|
85
|
+
const keywordConfig = typeof params.keywords === "boolean" ? {} : params.keywords;
|
|
86
|
+
result.keywords = await this.extractKeywords(chunk, keywordConfig);
|
|
87
|
+
}
|
|
88
|
+
// Generate Q&A pairs
|
|
89
|
+
if (params.questions) {
|
|
90
|
+
const questionConfig = typeof params.questions === "boolean" ? {} : params.questions;
|
|
91
|
+
result.questions = await this.extractQuestions(chunk, questionConfig);
|
|
92
|
+
}
|
|
93
|
+
// Custom schema extraction
|
|
94
|
+
if (params.custom) {
|
|
95
|
+
result.custom = await this.extractCustom(chunk, params.custom);
|
|
96
|
+
}
|
|
97
|
+
results.push(result);
|
|
98
|
+
}
|
|
99
|
+
catch (error) {
|
|
100
|
+
logger.error("[MetadataExtractor] Extraction failed for chunk", {
|
|
101
|
+
chunkId: chunk.id,
|
|
102
|
+
error: error instanceof Error ? error.message : String(error),
|
|
103
|
+
});
|
|
104
|
+
results.push(result);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return results;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Group chunks by document ID
|
|
111
|
+
*/
|
|
112
|
+
groupByDocument(chunks) {
|
|
113
|
+
const groups = new Map();
|
|
114
|
+
for (const chunk of chunks) {
|
|
115
|
+
const docId = chunk.metadata.documentId;
|
|
116
|
+
if (!groups.has(docId)) {
|
|
117
|
+
groups.set(docId, []);
|
|
118
|
+
}
|
|
119
|
+
groups.get(docId).push(chunk);
|
|
120
|
+
}
|
|
121
|
+
return groups;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Extract title from document chunks
|
|
125
|
+
*/
|
|
126
|
+
async extractTitle(chunks, config) {
|
|
127
|
+
const { nodes = 3, promptTemplate = DEFAULT_PROMPTS.title } = config;
|
|
128
|
+
// Use first N chunks for title extraction
|
|
129
|
+
const relevantChunks = chunks.slice(0, nodes);
|
|
130
|
+
const context = relevantChunks.map((c) => c.text).join("\n\n");
|
|
131
|
+
const prompt = promptTemplate.replace("{context}", context);
|
|
132
|
+
const response = await this.callLLM(prompt, config);
|
|
133
|
+
return response.trim();
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Extract summary from a chunk
|
|
137
|
+
*/
|
|
138
|
+
async extractSummary(chunk, config) {
|
|
139
|
+
const { maxWords = 100, promptTemplate = DEFAULT_PROMPTS.summary } = config;
|
|
140
|
+
const prompt = promptTemplate
|
|
141
|
+
.replace("{context}", chunk.text)
|
|
142
|
+
.replace("{maxWords}", String(maxWords));
|
|
143
|
+
const response = await this.callLLM(prompt, config);
|
|
144
|
+
return response.trim();
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Extract keywords from a chunk
|
|
148
|
+
*/
|
|
149
|
+
async extractKeywords(chunk, config) {
|
|
150
|
+
const { maxKeywords = 10, promptTemplate = DEFAULT_PROMPTS.keywords } = config;
|
|
151
|
+
const prompt = promptTemplate
|
|
152
|
+
.replace("{context}", chunk.text)
|
|
153
|
+
.replace("{maxKeywords}", String(maxKeywords));
|
|
154
|
+
const response = await this.callLLM(prompt, config);
|
|
155
|
+
// Parse comma-separated keywords
|
|
156
|
+
return response
|
|
157
|
+
.split(",")
|
|
158
|
+
.map((k) => k.trim())
|
|
159
|
+
.filter((k) => k.length > 0)
|
|
160
|
+
.slice(0, maxKeywords);
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Extract Q&A pairs from a chunk
|
|
164
|
+
*/
|
|
165
|
+
async extractQuestions(chunk, config) {
|
|
166
|
+
const { numQuestions = 3, includeAnswers = true, promptTemplate = DEFAULT_PROMPTS.questions, } = config;
|
|
167
|
+
const answerInstruction = includeAnswers
|
|
168
|
+
? "For each question, also provide a brief answer based on the content."
|
|
169
|
+
: "Return only the questions.";
|
|
170
|
+
const prompt = promptTemplate
|
|
171
|
+
.replace("{context}", chunk.text)
|
|
172
|
+
.replace("{numQuestions}", String(numQuestions))
|
|
173
|
+
.replace("{answerInstruction}", answerInstruction);
|
|
174
|
+
const response = await this.callLLM(prompt, config);
|
|
175
|
+
// Parse Q&A pairs from response
|
|
176
|
+
return this.parseQAPairs(response, includeAnswers);
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Extract custom schema data from a chunk
|
|
180
|
+
*/
|
|
181
|
+
async extractCustom(chunk, config) {
|
|
182
|
+
const { description, promptTemplate } = config;
|
|
183
|
+
// Build extraction prompt
|
|
184
|
+
const prompt = promptTemplate ||
|
|
185
|
+
`Extract the following information from the content:
|
|
186
|
+
${description || "Extract structured data according to the schema."}
|
|
187
|
+
|
|
188
|
+
Content:
|
|
189
|
+
${chunk.text}
|
|
190
|
+
|
|
191
|
+
Return the extracted data as JSON.`;
|
|
192
|
+
const response = await this.callLLM(prompt, config);
|
|
193
|
+
try {
|
|
194
|
+
// Try to parse as JSON
|
|
195
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
196
|
+
if (jsonMatch) {
|
|
197
|
+
return JSON.parse(jsonMatch[0]);
|
|
198
|
+
}
|
|
199
|
+
return JSON.parse(response);
|
|
200
|
+
}
|
|
201
|
+
catch {
|
|
202
|
+
logger.warn("[MetadataExtractor] Failed to parse custom extraction as JSON");
|
|
203
|
+
return { raw: response };
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Parse Q&A pairs from LLM response
|
|
208
|
+
*/
|
|
209
|
+
parseQAPairs(response, includeAnswers) {
|
|
210
|
+
const pairs = [];
|
|
211
|
+
// Try to parse numbered questions
|
|
212
|
+
const lines = response.split("\n").filter((l) => l.trim());
|
|
213
|
+
let currentQuestion = null;
|
|
214
|
+
let currentAnswer = null;
|
|
215
|
+
for (const line of lines) {
|
|
216
|
+
const trimmed = line.trim();
|
|
217
|
+
// Check if line is a question (starts with number or Q:)
|
|
218
|
+
if (/^\d+[.):]\s*/.test(trimmed) || /^Q[.:]?\s*/i.test(trimmed)) {
|
|
219
|
+
// Save previous Q&A pair
|
|
220
|
+
if (currentQuestion) {
|
|
221
|
+
pairs.push({
|
|
222
|
+
question: currentQuestion,
|
|
223
|
+
...(includeAnswers && currentAnswer
|
|
224
|
+
? { answer: currentAnswer }
|
|
225
|
+
: {}),
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
currentQuestion = trimmed
|
|
229
|
+
.replace(/^\d+[.):]\s*/, "")
|
|
230
|
+
.replace(/^Q[.:]?\s*/i, "");
|
|
231
|
+
currentAnswer = null;
|
|
232
|
+
}
|
|
233
|
+
else if (/^A[.:]?\s*/i.test(trimmed) && currentQuestion) {
|
|
234
|
+
currentAnswer = trimmed.replace(/^A[.:]?\s*/i, "");
|
|
235
|
+
}
|
|
236
|
+
else if (currentQuestion && !currentAnswer) {
|
|
237
|
+
// Continuation of question
|
|
238
|
+
currentQuestion += " " + trimmed;
|
|
239
|
+
}
|
|
240
|
+
else if (currentAnswer) {
|
|
241
|
+
// Continuation of answer
|
|
242
|
+
currentAnswer += " " + trimmed;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
// Don't forget the last pair
|
|
246
|
+
if (currentQuestion) {
|
|
247
|
+
pairs.push({
|
|
248
|
+
question: currentQuestion,
|
|
249
|
+
...(includeAnswers && currentAnswer ? { answer: currentAnswer } : {}),
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
return pairs;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Call the LLM with a prompt
|
|
256
|
+
*/
|
|
257
|
+
async callLLM(prompt, config) {
|
|
258
|
+
const provider = await ProviderFactory.createProvider(config.provider || this.provider, config.modelName || this.modelName);
|
|
259
|
+
const result = await provider.generate({
|
|
260
|
+
prompt,
|
|
261
|
+
maxTokens: config.maxTokens || 500,
|
|
262
|
+
temperature: config.temperature || 0.3,
|
|
263
|
+
});
|
|
264
|
+
return result?.content || "";
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Convenience function to extract metadata from chunks
|
|
269
|
+
* @param chunks - Chunks to process
|
|
270
|
+
* @param params - Extraction parameters
|
|
271
|
+
* @param options - Extractor options
|
|
272
|
+
* @returns Extraction results
|
|
273
|
+
*/
|
|
274
|
+
export async function extractMetadata(chunks, params, options) {
|
|
275
|
+
const extractor = new LLMMetadataExtractor(options);
|
|
276
|
+
return extractor.extract(chunks, params);
|
|
277
|
+
}
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Pipeline Orchestrator
|
|
3
|
+
*
|
|
4
|
+
* Provides a complete end-to-end RAG pipeline that orchestrates:
|
|
5
|
+
* - Document loading and preprocessing
|
|
6
|
+
* - Chunking with configurable strategies
|
|
7
|
+
* - Embedding generation
|
|
8
|
+
* - Vector storage and retrieval
|
|
9
|
+
* - Context assembly for LLM queries
|
|
10
|
+
* - Response generation with citations
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* const pipeline = new RAGPipeline({
|
|
15
|
+
* vectorStore: myVectorStore,
|
|
16
|
+
* embeddingModel: { provider: 'openai', modelName: 'text-embedding-3-small' },
|
|
17
|
+
* generationModel: { provider: 'openai', modelName: 'gpt-4o-mini' }
|
|
18
|
+
* });
|
|
19
|
+
*
|
|
20
|
+
* // Ingest documents
|
|
21
|
+
* await pipeline.ingest(['/path/to/doc1.md', '/path/to/doc2.pdf']);
|
|
22
|
+
*
|
|
23
|
+
* // Query with RAG
|
|
24
|
+
* const response = await pipeline.query('What are the key features?');
|
|
25
|
+
* console.log(response.answer, response.sources);
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
import type { ChunkingStrategy } from "../types.js";
|
|
29
|
+
import { MDocument } from "../document/MDocument.js";
|
|
30
|
+
import { type VectorStore } from "../retrieval/vectorQueryTool.js";
|
|
31
|
+
import { type BM25Index } from "../retrieval/hybridSearch.js";
|
|
32
|
+
/**
|
|
33
|
+
* Embedding model configuration
|
|
34
|
+
*/
|
|
35
|
+
export interface EmbeddingModelConfig {
|
|
36
|
+
provider: string;
|
|
37
|
+
modelName: string;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Generation model configuration
|
|
41
|
+
*/
|
|
42
|
+
export interface GenerationModelConfig {
|
|
43
|
+
provider: string;
|
|
44
|
+
modelName: string;
|
|
45
|
+
temperature?: number;
|
|
46
|
+
maxTokens?: number;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* RAG pipeline configuration
|
|
50
|
+
*/
|
|
51
|
+
export interface RAGPipelineConfig {
|
|
52
|
+
/** Pipeline identifier */
|
|
53
|
+
id?: string;
|
|
54
|
+
/** Vector store instance (defaults to in-memory) */
|
|
55
|
+
vectorStore?: VectorStore;
|
|
56
|
+
/** BM25 index for hybrid search (defaults to in-memory) */
|
|
57
|
+
bm25Index?: BM25Index;
|
|
58
|
+
/** Index name for vector store */
|
|
59
|
+
indexName?: string;
|
|
60
|
+
/** Embedding model configuration */
|
|
61
|
+
embeddingModel: EmbeddingModelConfig;
|
|
62
|
+
/** Generation model configuration (for RAG responses) */
|
|
63
|
+
generationModel?: GenerationModelConfig;
|
|
64
|
+
/** Default chunking strategy */
|
|
65
|
+
defaultChunkingStrategy?: ChunkingStrategy;
|
|
66
|
+
/** Default chunk size */
|
|
67
|
+
defaultChunkSize?: number;
|
|
68
|
+
/** Default chunk overlap */
|
|
69
|
+
defaultChunkOverlap?: number;
|
|
70
|
+
/** Enable hybrid search (vector + BM25) */
|
|
71
|
+
enableHybridSearch?: boolean;
|
|
72
|
+
/** Enable Graph RAG */
|
|
73
|
+
enableGraphRAG?: boolean;
|
|
74
|
+
/** Graph RAG similarity threshold */
|
|
75
|
+
graphThreshold?: number;
|
|
76
|
+
/** Default number of results to retrieve */
|
|
77
|
+
defaultTopK?: number;
|
|
78
|
+
/** Enable reranking */
|
|
79
|
+
enableReranking?: boolean;
|
|
80
|
+
/** Reranking model configuration */
|
|
81
|
+
rerankingModel?: EmbeddingModelConfig;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Ingestion options
|
|
85
|
+
*/
|
|
86
|
+
export interface IngestOptions {
|
|
87
|
+
/** Chunking strategy override */
|
|
88
|
+
strategy?: ChunkingStrategy;
|
|
89
|
+
/** Chunk size override */
|
|
90
|
+
chunkSize?: number;
|
|
91
|
+
/** Chunk overlap override */
|
|
92
|
+
chunkOverlap?: number;
|
|
93
|
+
/** Custom metadata to add */
|
|
94
|
+
metadata?: Record<string, unknown>;
|
|
95
|
+
/** Extract metadata using LLM */
|
|
96
|
+
extractMetadata?: boolean;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Query options
|
|
100
|
+
*/
|
|
101
|
+
export interface QueryOptions {
|
|
102
|
+
/** Number of chunks to retrieve */
|
|
103
|
+
topK?: number;
|
|
104
|
+
/** Use hybrid search */
|
|
105
|
+
hybrid?: boolean;
|
|
106
|
+
/** Use Graph RAG */
|
|
107
|
+
graph?: boolean;
|
|
108
|
+
/** Enable reranking */
|
|
109
|
+
rerank?: boolean;
|
|
110
|
+
/** Metadata filter */
|
|
111
|
+
filter?: Record<string, unknown>;
|
|
112
|
+
/** Include sources in response */
|
|
113
|
+
includeSources?: boolean;
|
|
114
|
+
/** Generate response (vs just retrieve) */
|
|
115
|
+
generate?: boolean;
|
|
116
|
+
/** Custom system prompt for generation */
|
|
117
|
+
systemPrompt?: string;
|
|
118
|
+
/** Temperature for generation */
|
|
119
|
+
temperature?: number;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Query response
|
|
123
|
+
*/
|
|
124
|
+
export interface RAGResponse {
|
|
125
|
+
/** Generated answer (if generate=true) */
|
|
126
|
+
answer?: string;
|
|
127
|
+
/** Retrieved context chunks */
|
|
128
|
+
context: string;
|
|
129
|
+
/** Source documents/chunks */
|
|
130
|
+
sources: Array<{
|
|
131
|
+
id: string;
|
|
132
|
+
text: string;
|
|
133
|
+
score: number;
|
|
134
|
+
metadata?: Record<string, unknown>;
|
|
135
|
+
}>;
|
|
136
|
+
/** Query metadata */
|
|
137
|
+
metadata: {
|
|
138
|
+
queryTime: number;
|
|
139
|
+
retrievalMethod: string;
|
|
140
|
+
chunksRetrieved: number;
|
|
141
|
+
reranked: boolean;
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Pipeline statistics
|
|
146
|
+
*/
|
|
147
|
+
export interface PipelineStats {
|
|
148
|
+
totalDocuments: number;
|
|
149
|
+
totalChunks: number;
|
|
150
|
+
indexName: string;
|
|
151
|
+
embeddingDimension?: number;
|
|
152
|
+
hybridSearchEnabled: boolean;
|
|
153
|
+
graphRAGEnabled: boolean;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* RAG Pipeline Orchestrator
|
|
157
|
+
*
|
|
158
|
+
* Complete end-to-end pipeline for Retrieval-Augmented Generation.
|
|
159
|
+
*/
|
|
160
|
+
export declare class RAGPipeline {
|
|
161
|
+
private id;
|
|
162
|
+
private config;
|
|
163
|
+
private vectorStore;
|
|
164
|
+
private bm25Index;
|
|
165
|
+
private graphRAG;
|
|
166
|
+
private embeddingProvider?;
|
|
167
|
+
private generationProvider?;
|
|
168
|
+
private hybridSearch?;
|
|
169
|
+
private documents;
|
|
170
|
+
private allChunks;
|
|
171
|
+
constructor(config: RAGPipelineConfig);
|
|
172
|
+
/**
|
|
173
|
+
* Initialize the pipeline (lazy loading of providers)
|
|
174
|
+
*/
|
|
175
|
+
initialize(): Promise<void>;
|
|
176
|
+
/**
|
|
177
|
+
* Ingest documents into the pipeline
|
|
178
|
+
*
|
|
179
|
+
* @param sources - Array of file paths, URLs, or MDocument instances
|
|
180
|
+
* @param options - Ingestion options
|
|
181
|
+
*/
|
|
182
|
+
ingest(sources: Array<string | MDocument>, options?: IngestOptions): Promise<{
|
|
183
|
+
documentsProcessed: number;
|
|
184
|
+
chunksCreated: number;
|
|
185
|
+
}>;
|
|
186
|
+
/**
|
|
187
|
+
* Query the pipeline
|
|
188
|
+
*
|
|
189
|
+
* @param query - Search query
|
|
190
|
+
* @param options - Query options
|
|
191
|
+
* @returns RAG response with retrieved context and optional generated answer
|
|
192
|
+
*/
|
|
193
|
+
query(query: string, options?: QueryOptions): Promise<RAGResponse>;
|
|
194
|
+
/**
|
|
195
|
+
* Get pipeline statistics
|
|
196
|
+
*/
|
|
197
|
+
getStats(): PipelineStats;
|
|
198
|
+
/**
|
|
199
|
+
* Get pipeline ID
|
|
200
|
+
*/
|
|
201
|
+
getId(): string;
|
|
202
|
+
/**
|
|
203
|
+
* Clear all indexed data
|
|
204
|
+
*/
|
|
205
|
+
clear(): Promise<void>;
|
|
206
|
+
/**
|
|
207
|
+
* Ensure pipeline is initialized
|
|
208
|
+
*/
|
|
209
|
+
private ensureInitialized;
|
|
210
|
+
/**
|
|
211
|
+
* Generate embedding for text
|
|
212
|
+
*/
|
|
213
|
+
private generateEmbedding;
|
|
214
|
+
/**
|
|
215
|
+
* Assemble context from results
|
|
216
|
+
*/
|
|
217
|
+
private assembleContext;
|
|
218
|
+
/**
|
|
219
|
+
* Generate answer using LLM
|
|
220
|
+
*/
|
|
221
|
+
private generateAnswer;
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Create a simple RAG pipeline with sensible defaults
|
|
225
|
+
*
|
|
226
|
+
* @param options - Basic configuration options
|
|
227
|
+
* @returns Configured RAGPipeline instance
|
|
228
|
+
*/
|
|
229
|
+
export declare function createRAGPipeline(options: {
|
|
230
|
+
provider?: string;
|
|
231
|
+
embeddingModel?: string;
|
|
232
|
+
generationModel?: string;
|
|
233
|
+
enableHybrid?: boolean;
|
|
234
|
+
enableGraph?: boolean;
|
|
235
|
+
}): RAGPipeline;
|