@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Assembly Utilities
|
|
3
|
+
*
|
|
4
|
+
* Provides utilities for assembling, formatting, and optimizing context
|
|
5
|
+
* from retrieved chunks for LLM consumption.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Context window management (token-aware truncation)
|
|
9
|
+
* - Citation formatting
|
|
10
|
+
* - Context deduplication
|
|
11
|
+
* - Relevance-based ordering
|
|
12
|
+
* - Context summarization
|
|
13
|
+
*/
|
|
14
|
+
import { logger } from "../../utils/logger.js";
|
|
15
|
+
/**
|
|
16
|
+
* Assemble context from retrieved results
|
|
17
|
+
*
|
|
18
|
+
* Combines multiple chunks into a coherent context string
|
|
19
|
+
* suitable for LLM consumption.
|
|
20
|
+
*
|
|
21
|
+
* @param results - Retrieved chunks or query results
|
|
22
|
+
* @param options - Assembly options
|
|
23
|
+
* @returns Assembled context string
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* ```typescript
|
|
27
|
+
* const context = assembleContext(results, {
|
|
28
|
+
* maxTokens: 4000,
|
|
29
|
+
* citationFormat: 'numbered',
|
|
30
|
+
* deduplicate: true
|
|
31
|
+
* });
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export function assembleContext(results, options) {
|
|
35
|
+
const { maxChars, maxTokens = 4000, citationFormat = "none", separator = "\n\n---\n\n", includeMetadata = false, deduplicate = false, dedupeThreshold = 0.8, orderByRelevance = true, includeSectionHeaders = false, headerTemplate = "[{index}] Source: {source}", } = options || {};
|
|
36
|
+
if (results.length === 0) {
|
|
37
|
+
return "";
|
|
38
|
+
}
|
|
39
|
+
// Convert to unified format
|
|
40
|
+
let items = results.map((r, index) => ({
|
|
41
|
+
id: "id" in r ? r.id : `chunk-${index}`,
|
|
42
|
+
text: "text" in r ? r.text || "" : "",
|
|
43
|
+
score: "score" in r ? r.score || 0 : 0,
|
|
44
|
+
metadata: "metadata" in r ? r.metadata : {},
|
|
45
|
+
index,
|
|
46
|
+
}));
|
|
47
|
+
// Get text from metadata if not directly available
|
|
48
|
+
items = items.map((item) => ({
|
|
49
|
+
...item,
|
|
50
|
+
text: item.text ||
|
|
51
|
+
item.metadata?.text ||
|
|
52
|
+
"",
|
|
53
|
+
}));
|
|
54
|
+
// Order by relevance if requested
|
|
55
|
+
if (orderByRelevance) {
|
|
56
|
+
items.sort((a, b) => b.score - a.score);
|
|
57
|
+
}
|
|
58
|
+
// Deduplicate if requested
|
|
59
|
+
if (deduplicate) {
|
|
60
|
+
// Ensure metadata is defined for deduplication
|
|
61
|
+
const itemsWithMetadata = items.map((item) => ({
|
|
62
|
+
...item,
|
|
63
|
+
metadata: item.metadata || {},
|
|
64
|
+
}));
|
|
65
|
+
items = deduplicateChunks(itemsWithMetadata, dedupeThreshold);
|
|
66
|
+
}
|
|
67
|
+
// Calculate max characters
|
|
68
|
+
const effectiveMaxChars = maxChars || maxTokens * 4;
|
|
69
|
+
// Assemble context with token awareness
|
|
70
|
+
const parts = [];
|
|
71
|
+
let totalChars = 0;
|
|
72
|
+
for (const item of items) {
|
|
73
|
+
const header = includeSectionHeaders
|
|
74
|
+
? formatHeader(headerTemplate, {
|
|
75
|
+
index: parts.length + 1,
|
|
76
|
+
source: item.metadata?.source || item.id,
|
|
77
|
+
score: item.score,
|
|
78
|
+
})
|
|
79
|
+
: "";
|
|
80
|
+
const metadata = includeMetadata ? formatMetadata(item.metadata) : "";
|
|
81
|
+
const citation = formatCitation(citationFormat, parts.length + 1, item.metadata);
|
|
82
|
+
const chunkText = [
|
|
83
|
+
header,
|
|
84
|
+
citation ? `${citation}\n` : "",
|
|
85
|
+
item.text,
|
|
86
|
+
metadata,
|
|
87
|
+
]
|
|
88
|
+
.filter(Boolean)
|
|
89
|
+
.join("\n");
|
|
90
|
+
// Check if adding this chunk would exceed limit
|
|
91
|
+
const newTotalChars = totalChars + chunkText.length + separator.length;
|
|
92
|
+
if (newTotalChars > effectiveMaxChars) {
|
|
93
|
+
// Try to include partial chunk
|
|
94
|
+
const remainingChars = effectiveMaxChars - totalChars - separator.length - 50; // Buffer
|
|
95
|
+
if (remainingChars > 200) {
|
|
96
|
+
const truncatedText = truncateText(item.text, remainingChars);
|
|
97
|
+
parts.push([
|
|
98
|
+
header,
|
|
99
|
+
citation ? `${citation}\n` : "",
|
|
100
|
+
truncatedText,
|
|
101
|
+
"[truncated]",
|
|
102
|
+
]
|
|
103
|
+
.filter(Boolean)
|
|
104
|
+
.join("\n"));
|
|
105
|
+
}
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
parts.push(chunkText);
|
|
109
|
+
totalChars = newTotalChars;
|
|
110
|
+
}
|
|
111
|
+
return parts.join(separator);
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Format context with inline citations
|
|
115
|
+
*
|
|
116
|
+
* @param results - Retrieved results
|
|
117
|
+
* @param options - Formatting options
|
|
118
|
+
* @returns Context with citations and citation list
|
|
119
|
+
*/
|
|
120
|
+
export function formatContextWithCitations(results, options) {
|
|
121
|
+
const citations = [];
|
|
122
|
+
const items = results.map((r, index) => {
|
|
123
|
+
const id = "id" in r ? r.id : `chunk-${index}`;
|
|
124
|
+
const metadata = "metadata" in r ? r.metadata : {};
|
|
125
|
+
const source = metadata?.source || id;
|
|
126
|
+
citations.push(`[${index + 1}] ${source}`);
|
|
127
|
+
return {
|
|
128
|
+
...r,
|
|
129
|
+
citationMarker: `[${index + 1}]`,
|
|
130
|
+
};
|
|
131
|
+
});
|
|
132
|
+
const context = assembleContext(items, {
|
|
133
|
+
...options,
|
|
134
|
+
citationFormat: "numbered",
|
|
135
|
+
includeSectionHeaders: true,
|
|
136
|
+
headerTemplate: "[{index}]",
|
|
137
|
+
});
|
|
138
|
+
return { context, citations };
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Create a context window with detailed tracking
|
|
142
|
+
*
|
|
143
|
+
* @param results - Retrieved results
|
|
144
|
+
* @param options - Assembly options
|
|
145
|
+
* @returns Context window with metadata
|
|
146
|
+
*/
|
|
147
|
+
export function createContextWindow(results, options) {
|
|
148
|
+
const maxTokens = options?.maxTokens || 4000;
|
|
149
|
+
const maxChars = options?.maxChars || maxTokens * 4;
|
|
150
|
+
let text = "";
|
|
151
|
+
let chunkCount = 0;
|
|
152
|
+
let truncatedChunks = 0;
|
|
153
|
+
const citations = new Map();
|
|
154
|
+
const items = results.map((r, index) => ({
|
|
155
|
+
id: "id" in r ? r.id : `chunk-${index}`,
|
|
156
|
+
text: ("text" in r ? r.text : "") ||
|
|
157
|
+
r.metadata?.text ||
|
|
158
|
+
"",
|
|
159
|
+
metadata: "metadata" in r ? r.metadata : {},
|
|
160
|
+
}));
|
|
161
|
+
for (const item of items) {
|
|
162
|
+
const chunkText = item.text;
|
|
163
|
+
const newLength = text.length + chunkText.length + 10; // Buffer for separators
|
|
164
|
+
if (newLength > maxChars) {
|
|
165
|
+
// Try partial inclusion
|
|
166
|
+
const remaining = maxChars - text.length - 20;
|
|
167
|
+
if (remaining > 100) {
|
|
168
|
+
const truncated = truncateText(chunkText, remaining);
|
|
169
|
+
text += (text ? "\n\n" : "") + truncated + "...";
|
|
170
|
+
truncatedChunks++;
|
|
171
|
+
citations.set(item.id, `[${chunkCount + 1}] ${item.metadata?.source || item.id} (truncated)`);
|
|
172
|
+
chunkCount++;
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
truncatedChunks++;
|
|
176
|
+
}
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
text += (text ? "\n\n" : "") + chunkText;
|
|
180
|
+
citations.set(item.id, `[${chunkCount + 1}] ${item.metadata?.source || item.id}`);
|
|
181
|
+
chunkCount++;
|
|
182
|
+
}
|
|
183
|
+
return {
|
|
184
|
+
text,
|
|
185
|
+
chunkCount,
|
|
186
|
+
charCount: text.length,
|
|
187
|
+
tokenCount: Math.ceil(text.length / 4),
|
|
188
|
+
truncatedChunks,
|
|
189
|
+
citations,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Summarize context using LLM
|
|
194
|
+
*
|
|
195
|
+
* @param context - Context to summarize
|
|
196
|
+
* @param maxLength - Maximum summary length
|
|
197
|
+
* @param provider - LLM provider instance
|
|
198
|
+
* @returns Summarized context
|
|
199
|
+
*/
|
|
200
|
+
export async function summarizeContext(context, maxLength = 500, provider) {
|
|
201
|
+
if (!provider) {
|
|
202
|
+
// Simple truncation fallback
|
|
203
|
+
return truncateText(context, maxLength * 4);
|
|
204
|
+
}
|
|
205
|
+
try {
|
|
206
|
+
const result = await provider.generate({
|
|
207
|
+
prompt: `Summarize the following context in no more than ${maxLength} words, preserving the key information:\n\n${context}\n\nSummary:`,
|
|
208
|
+
maxTokens: Math.ceil(maxLength * 1.5),
|
|
209
|
+
temperature: 0.3,
|
|
210
|
+
});
|
|
211
|
+
return result?.content?.trim() || truncateText(context, maxLength * 4);
|
|
212
|
+
}
|
|
213
|
+
catch (error) {
|
|
214
|
+
logger.warn("[ContextAssembly] Summarization failed, using truncation", {
|
|
215
|
+
error: error instanceof Error ? error.message : String(error),
|
|
216
|
+
});
|
|
217
|
+
return truncateText(context, maxLength * 4);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
// ============================================================================
|
|
221
|
+
// Helper Functions
|
|
222
|
+
// ============================================================================
|
|
223
|
+
/**
|
|
224
|
+
* Format section header using template
|
|
225
|
+
*/
|
|
226
|
+
function formatHeader(template, vars) {
|
|
227
|
+
return template
|
|
228
|
+
.replace("{index}", String(vars.index))
|
|
229
|
+
.replace("{source}", vars.source)
|
|
230
|
+
.replace("{score}", vars.score.toFixed(4));
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Format citation based on style
|
|
234
|
+
*/
|
|
235
|
+
function formatCitation(format, index, metadata) {
|
|
236
|
+
switch (format) {
|
|
237
|
+
case "inline":
|
|
238
|
+
return `(Source: ${metadata?.source || `#${index}`})`;
|
|
239
|
+
case "footnote":
|
|
240
|
+
return `[^${index}]`;
|
|
241
|
+
case "numbered":
|
|
242
|
+
return `[${index}]`;
|
|
243
|
+
case "none":
|
|
244
|
+
default:
|
|
245
|
+
return "";
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Format metadata for display
|
|
250
|
+
*/
|
|
251
|
+
function formatMetadata(metadata) {
|
|
252
|
+
if (!metadata) {
|
|
253
|
+
return "";
|
|
254
|
+
}
|
|
255
|
+
const relevant = ["source", "title", "author", "date", "page"];
|
|
256
|
+
const parts = [];
|
|
257
|
+
for (const key of relevant) {
|
|
258
|
+
if (metadata[key]) {
|
|
259
|
+
parts.push(`${key}: ${metadata[key]}`);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
return parts.length > 0 ? `\n[${parts.join(" | ")}]` : "";
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Truncate text at word boundary
|
|
266
|
+
*/
|
|
267
|
+
function truncateText(text, maxLength) {
|
|
268
|
+
if (text.length <= maxLength) {
|
|
269
|
+
return text;
|
|
270
|
+
}
|
|
271
|
+
// Find last space before maxLength
|
|
272
|
+
let truncateAt = text.lastIndexOf(" ", maxLength);
|
|
273
|
+
if (truncateAt === -1 || truncateAt < maxLength * 0.7) {
|
|
274
|
+
truncateAt = maxLength;
|
|
275
|
+
}
|
|
276
|
+
return text.slice(0, truncateAt).trim();
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Deduplicate chunks based on text similarity
|
|
280
|
+
*/
|
|
281
|
+
function deduplicateChunks(items, threshold) {
|
|
282
|
+
const unique = [];
|
|
283
|
+
for (const item of items) {
|
|
284
|
+
// Check if this item is too similar to any already included
|
|
285
|
+
const isDuplicate = unique.some((existing) => textSimilarity(item.text, existing.text) > threshold);
|
|
286
|
+
if (!isDuplicate) {
|
|
287
|
+
unique.push(item);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return unique;
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Simple text similarity using Jaccard index
|
|
294
|
+
*/
|
|
295
|
+
function textSimilarity(a, b) {
|
|
296
|
+
const wordsA = new Set(a.toLowerCase().split(/\s+/));
|
|
297
|
+
const wordsB = new Set(b.toLowerCase().split(/\s+/));
|
|
298
|
+
const intersection = new Set([...wordsA].filter((x) => wordsB.has(x)));
|
|
299
|
+
const union = new Set([...wordsA, ...wordsB]);
|
|
300
|
+
return intersection.size / union.size;
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Order chunks by document structure (if available)
|
|
304
|
+
*/
|
|
305
|
+
export function orderByDocumentStructure(chunks) {
|
|
306
|
+
// Group by document
|
|
307
|
+
const byDocument = new Map();
|
|
308
|
+
for (const chunk of chunks) {
|
|
309
|
+
const docId = chunk.metadata.documentId;
|
|
310
|
+
if (!byDocument.has(docId)) {
|
|
311
|
+
byDocument.set(docId, []);
|
|
312
|
+
}
|
|
313
|
+
byDocument.get(docId).push(chunk);
|
|
314
|
+
}
|
|
315
|
+
// Sort each document's chunks by position
|
|
316
|
+
for (const docChunks of byDocument.values()) {
|
|
317
|
+
docChunks.sort((a, b) => (a.metadata.chunkIndex || 0) - (b.metadata.chunkIndex || 0));
|
|
318
|
+
}
|
|
319
|
+
// Flatten, keeping documents together
|
|
320
|
+
return [...byDocument.values()].flat();
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Extract key sentences from chunks for summary
|
|
324
|
+
*/
|
|
325
|
+
export function extractKeySentences(text, count = 3) {
|
|
326
|
+
const sentences = text
|
|
327
|
+
.split(/[.!?]+/)
|
|
328
|
+
.map((s) => s.trim())
|
|
329
|
+
.filter((s) => s.length > 20);
|
|
330
|
+
// Simple scoring: longer sentences with more unique words
|
|
331
|
+
const scored = sentences.map((s) => ({
|
|
332
|
+
text: s,
|
|
333
|
+
score: s.length * new Set(s.toLowerCase().split(/\s+/)).size,
|
|
334
|
+
}));
|
|
335
|
+
scored.sort((a, b) => b.score - a.score);
|
|
336
|
+
return scored.slice(0, count).map((s) => s.text);
|
|
337
|
+
}
|
|
338
|
+
//# sourceMappingURL=contextAssembly.js.map
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline Module Exports
|
|
3
|
+
*/
|
|
4
|
+
export { assembleContext, type CitationFormat, type ContextAssemblyOptions, type ContextWindow, createContextWindow, extractKeySentences, formatContextWithCitations, orderByDocumentStructure, summarizeContext, } from "./contextAssembly.js";
|
|
5
|
+
export { createRAGPipeline, type EmbeddingModelConfig, type GenerationModelConfig, type IngestOptions, type PipelineStats, type QueryOptions, RAGPipeline, type RAGPipelineConfig, type RAGResponse, } from "./RAGPipeline.js";
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline Module Exports
|
|
3
|
+
*/
|
|
4
|
+
export { assembleContext, createContextWindow, extractKeySentences, formatContextWithCitations, orderByDocumentStructure, summarizeContext, } from "./contextAssembly.js";
|
|
5
|
+
export { createRAGPipeline, RAGPipeline, } from "./RAGPipeline.js";
|
|
6
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Integration for generate() and stream()
|
|
3
|
+
*
|
|
4
|
+
* Provides automatic RAG pipeline setup when `rag` config is provided
|
|
5
|
+
* in GenerateOptions or StreamOptions. Handles file loading, chunking,
|
|
6
|
+
* embedding generation, vector storage, and tool creation internally
|
|
7
|
+
* so developers only need to pass `rag: { files: [...] }`.
|
|
8
|
+
*/
|
|
9
|
+
import type { Tool } from "ai";
|
|
10
|
+
import type { RAGConfig } from "./types.js";
|
|
11
|
+
/**
|
|
12
|
+
* Result of preparing RAG for a generate/stream call
|
|
13
|
+
*/
|
|
14
|
+
export type RAGPreparedTool = {
|
|
15
|
+
/** The tool to inject into the tools Record */
|
|
16
|
+
tool: Tool;
|
|
17
|
+
/** Tool name (key for the tools Record) */
|
|
18
|
+
toolName: string;
|
|
19
|
+
/** Number of chunks indexed */
|
|
20
|
+
chunksIndexed: number;
|
|
21
|
+
/** Number of files loaded */
|
|
22
|
+
filesLoaded: number;
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* Prepare RAG tools from the provided configuration.
|
|
26
|
+
*
|
|
27
|
+
* This function:
|
|
28
|
+
* 1. Loads and reads all specified files
|
|
29
|
+
* 2. Chunks them using the configured (or auto-detected) strategy
|
|
30
|
+
* 3. Generates embeddings for each chunk
|
|
31
|
+
* 4. Stores them in an in-memory vector store
|
|
32
|
+
* 5. Creates a tool the AI model can use to search the documents
|
|
33
|
+
*
|
|
34
|
+
* @param ragConfig - RAG configuration from generate/stream options
|
|
35
|
+
* @param fallbackProvider - Provider to use for embeddings if not specified in ragConfig
|
|
36
|
+
* @returns Prepared RAG tool to inject into the tools record
|
|
37
|
+
*/
|
|
38
|
+
export declare function prepareRAGTool(ragConfig: RAGConfig, fallbackProvider?: string): Promise<RAGPreparedTool>;
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Integration for generate() and stream()
|
|
3
|
+
*
|
|
4
|
+
* Provides automatic RAG pipeline setup when `rag` config is provided
|
|
5
|
+
* in GenerateOptions or StreamOptions. Handles file loading, chunking,
|
|
6
|
+
* embedding generation, vector storage, and tool creation internally
|
|
7
|
+
* so developers only need to pass `rag: { files: [...] }`.
|
|
8
|
+
*/
|
|
9
|
+
import { existsSync, readFileSync } from "fs";
|
|
10
|
+
import { extname, resolve } from "path";
|
|
11
|
+
import { z } from "zod";
|
|
12
|
+
import { logger } from "../utils/logger.js";
|
|
13
|
+
import { ChunkerRegistry } from "./chunking/index.js";
|
|
14
|
+
import { createVectorQueryTool, InMemoryVectorStore, } from "./retrieval/vectorQueryTool.js";
|
|
15
|
+
/**
|
|
16
|
+
* Maps file extensions to recommended chunking strategies
|
|
17
|
+
*/
|
|
18
|
+
const EXTENSION_TO_STRATEGY = {
|
|
19
|
+
".md": "markdown",
|
|
20
|
+
".mdx": "markdown",
|
|
21
|
+
".html": "html",
|
|
22
|
+
".htm": "html",
|
|
23
|
+
".json": "json",
|
|
24
|
+
".tex": "latex",
|
|
25
|
+
".latex": "latex",
|
|
26
|
+
".txt": "recursive",
|
|
27
|
+
".csv": "recursive",
|
|
28
|
+
".xml": "recursive",
|
|
29
|
+
".yaml": "recursive",
|
|
30
|
+
".yml": "recursive",
|
|
31
|
+
".ts": "recursive",
|
|
32
|
+
".js": "recursive",
|
|
33
|
+
".py": "recursive",
|
|
34
|
+
".java": "recursive",
|
|
35
|
+
".go": "recursive",
|
|
36
|
+
".rs": "recursive",
|
|
37
|
+
".c": "recursive",
|
|
38
|
+
".cpp": "recursive",
|
|
39
|
+
".rb": "recursive",
|
|
40
|
+
".php": "recursive",
|
|
41
|
+
".swift": "recursive",
|
|
42
|
+
".kt": "recursive",
|
|
43
|
+
};
|
|
44
|
+
/**
|
|
45
|
+
* Detect the best chunking strategy from file extension
|
|
46
|
+
*/
|
|
47
|
+
function detectStrategy(filePath) {
|
|
48
|
+
const ext = extname(filePath).toLowerCase();
|
|
49
|
+
return EXTENSION_TO_STRATEGY[ext] || "recursive";
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Generate deterministic embeddings for chunks.
|
|
53
|
+
* Uses a simple hash-based approach for the in-memory vector store.
|
|
54
|
+
* When a real embedding provider is configured, it will be used instead.
|
|
55
|
+
*/
|
|
56
|
+
function generateSimpleEmbedding(text, dimension) {
|
|
57
|
+
const embedding = new Array(dimension).fill(0);
|
|
58
|
+
// Simple character-frequency based embedding
|
|
59
|
+
for (let i = 0; i < text.length; i++) {
|
|
60
|
+
const charCode = text.charCodeAt(i);
|
|
61
|
+
const idx = charCode % dimension;
|
|
62
|
+
embedding[idx] += 1;
|
|
63
|
+
}
|
|
64
|
+
// Normalize to unit vector
|
|
65
|
+
const magnitude = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
66
|
+
if (magnitude > 0) {
|
|
67
|
+
for (let i = 0; i < dimension; i++) {
|
|
68
|
+
embedding[i] /= magnitude;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return embedding;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Prepare RAG tools from the provided configuration.
|
|
75
|
+
*
|
|
76
|
+
* This function:
|
|
77
|
+
* 1. Loads and reads all specified files
|
|
78
|
+
* 2. Chunks them using the configured (or auto-detected) strategy
|
|
79
|
+
* 3. Generates embeddings for each chunk
|
|
80
|
+
* 4. Stores them in an in-memory vector store
|
|
81
|
+
* 5. Creates a tool the AI model can use to search the documents
|
|
82
|
+
*
|
|
83
|
+
* @param ragConfig - RAG configuration from generate/stream options
|
|
84
|
+
* @param fallbackProvider - Provider to use for embeddings if not specified in ragConfig
|
|
85
|
+
* @returns Prepared RAG tool to inject into the tools record
|
|
86
|
+
*/
|
|
87
|
+
export async function prepareRAGTool(ragConfig, fallbackProvider) {
|
|
88
|
+
const { files, strategy: userStrategy, chunkSize = 1000, chunkOverlap = 200, topK = 5, toolName = "search_knowledge_base", toolDescription = "REQUIRED: Search through pre-loaded local documents to find relevant information. Use this tool FIRST before any web search or other tools. This searches an indexed knowledge base of documents the user has provided.", embeddingProvider, embeddingModel, } = ragConfig;
|
|
89
|
+
if (!files || files.length === 0) {
|
|
90
|
+
throw new Error("RAG config requires at least one file path in 'files'");
|
|
91
|
+
}
|
|
92
|
+
// 1. Load files
|
|
93
|
+
const fileContents = [];
|
|
94
|
+
for (const filePath of files) {
|
|
95
|
+
const resolvedPath = resolve(filePath);
|
|
96
|
+
if (!existsSync(resolvedPath)) {
|
|
97
|
+
logger.warn(`[RAG] File not found, skipping: ${resolvedPath}`);
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
try {
|
|
101
|
+
const content = readFileSync(resolvedPath, "utf-8");
|
|
102
|
+
const strategy = userStrategy || detectStrategy(resolvedPath);
|
|
103
|
+
fileContents.push({ path: resolvedPath, content, strategy });
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
logger.warn(`[RAG] Failed to read file: ${resolvedPath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
if (fileContents.length === 0) {
|
|
110
|
+
throw new Error("RAG: No files could be loaded. Check that file paths exist and are readable.");
|
|
111
|
+
}
|
|
112
|
+
logger.info(`[RAG] Loaded ${fileContents.length} files for indexing`);
|
|
113
|
+
// 2. Chunk all files
|
|
114
|
+
const allChunks = [];
|
|
115
|
+
for (const { path, content, strategy } of fileContents) {
|
|
116
|
+
try {
|
|
117
|
+
const chunker = ChunkerRegistry.get(strategy);
|
|
118
|
+
const chunks = await chunker.chunk(content, {
|
|
119
|
+
maxSize: chunkSize,
|
|
120
|
+
overlap: chunkOverlap,
|
|
121
|
+
metadata: { source: path },
|
|
122
|
+
});
|
|
123
|
+
for (const chunk of chunks) {
|
|
124
|
+
allChunks.push({
|
|
125
|
+
text: chunk.text,
|
|
126
|
+
metadata: { ...chunk.metadata, source: path },
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
catch (error) {
|
|
131
|
+
logger.warn(`[RAG] Chunking failed for ${path}, using fallback: ${error instanceof Error ? error.message : String(error)}`);
|
|
132
|
+
// Fallback: treat entire file as one chunk
|
|
133
|
+
allChunks.push({
|
|
134
|
+
text: content.slice(0, chunkSize),
|
|
135
|
+
metadata: { source: path, fallback: true },
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
logger.info(`[RAG] Created ${allChunks.length} chunks from ${fileContents.length} files`);
|
|
140
|
+
// 3. Generate embeddings and store in vector store
|
|
141
|
+
const EMBEDDING_DIMENSION = 128;
|
|
142
|
+
const vectorStore = new InMemoryVectorStore();
|
|
143
|
+
const indexName = "rag-index";
|
|
144
|
+
const items = allChunks.map((chunk, i) => ({
|
|
145
|
+
id: `rag-chunk-${i}`,
|
|
146
|
+
vector: generateSimpleEmbedding(chunk.text, EMBEDDING_DIMENSION),
|
|
147
|
+
metadata: {
|
|
148
|
+
text: chunk.text,
|
|
149
|
+
...chunk.metadata,
|
|
150
|
+
},
|
|
151
|
+
}));
|
|
152
|
+
await vectorStore.upsert(indexName, items);
|
|
153
|
+
logger.info(`[RAG] Indexed ${items.length} chunks in vector store`);
|
|
154
|
+
// 4. Create the search tool
|
|
155
|
+
// Determine embedding provider/model for the query tool
|
|
156
|
+
const provider = embeddingProvider || fallbackProvider || "vertex";
|
|
157
|
+
const model = embeddingModel || "gemini-2.5-flash";
|
|
158
|
+
const queryTool = createVectorQueryTool({
|
|
159
|
+
id: toolName,
|
|
160
|
+
description: toolDescription,
|
|
161
|
+
indexName,
|
|
162
|
+
embeddingModel: { provider, modelName: model },
|
|
163
|
+
topK,
|
|
164
|
+
includeSources: true,
|
|
165
|
+
}, vectorStore);
|
|
166
|
+
// Convert to Vercel AI SDK Tool format
|
|
167
|
+
const aiTool = {
|
|
168
|
+
description: queryTool.description,
|
|
169
|
+
parameters: z.object({
|
|
170
|
+
query: z
|
|
171
|
+
.string()
|
|
172
|
+
.describe("The search query to find relevant information"),
|
|
173
|
+
}),
|
|
174
|
+
execute: async ({ query }) => {
|
|
175
|
+
// For the in-memory store with simple embeddings,
|
|
176
|
+
// generate a query embedding using the same method
|
|
177
|
+
const queryEmbedding = generateSimpleEmbedding(query, EMBEDDING_DIMENSION);
|
|
178
|
+
const results = await vectorStore.query({
|
|
179
|
+
indexName,
|
|
180
|
+
queryVector: queryEmbedding,
|
|
181
|
+
topK,
|
|
182
|
+
});
|
|
183
|
+
if (results.length === 0) {
|
|
184
|
+
return {
|
|
185
|
+
relevantContext: "No relevant documents found for the query.",
|
|
186
|
+
sources: [],
|
|
187
|
+
totalResults: 0,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
const relevantContext = results
|
|
191
|
+
.map((r, i) => `[${i + 1}] ${r.metadata?.text || r.text || ""}`)
|
|
192
|
+
.join("\n\n");
|
|
193
|
+
return {
|
|
194
|
+
relevantContext,
|
|
195
|
+
sources: results.map((r) => ({
|
|
196
|
+
id: r.id,
|
|
197
|
+
score: r.score,
|
|
198
|
+
source: r.metadata?.source,
|
|
199
|
+
text: (r.metadata?.text || r.text || "").slice(0, 200),
|
|
200
|
+
})),
|
|
201
|
+
totalResults: results.length,
|
|
202
|
+
};
|
|
203
|
+
},
|
|
204
|
+
};
|
|
205
|
+
return {
|
|
206
|
+
tool: aiTool,
|
|
207
|
+
toolName,
|
|
208
|
+
chunksIndexed: allChunks.length,
|
|
209
|
+
filesLoaded: fileContents.length,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
//# sourceMappingURL=ragIntegration.js.map
|