@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,756 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG CLI Commands for NeuroLink
|
|
3
|
+
*
|
|
4
|
+
* Implements commands for RAG document processing:
|
|
5
|
+
* - neurolink rag chunk <file> - Chunk a document
|
|
6
|
+
* - neurolink rag index <file> - Index a document for retrieval
|
|
7
|
+
* - neurolink rag query <query> - Query indexed documents
|
|
8
|
+
*/
|
|
9
|
+
import chalk from "chalk";
|
|
10
|
+
import { existsSync } from "fs";
|
|
11
|
+
import { readFile, writeFile } from "fs/promises";
|
|
12
|
+
import ora from "ora";
|
|
13
|
+
import { basename, extname, resolve } from "path";
|
|
14
|
+
import { ProviderFactory } from "../../lib/factories/providerFactory.js";
|
|
15
|
+
import { ProviderRegistry } from "../../lib/factories/providerRegistry.js";
|
|
16
|
+
import { ChunkerRegistry } from "../../lib/rag/chunking/chunkerRegistry.js";
|
|
17
|
+
import { GraphRAG } from "../../lib/rag/graphRag/graphRAG.js";
|
|
18
|
+
import { LLMMetadataExtractor } from "../../lib/rag/metadata/metadataExtractor.js";
|
|
19
|
+
import { createHybridSearch, InMemoryBM25Index, } from "../../lib/rag/retrieval/hybridSearch.js";
|
|
20
|
+
import { InMemoryVectorStore } from "../../lib/rag/retrieval/vectorQueryTool.js";
|
|
21
|
+
import { globalSession } from "../../lib/session/globalSessionState.js";
|
|
22
|
+
import { logger } from "../../lib/utils/logger.js";
|
|
23
|
+
import { getBestProvider } from "../../lib/utils/providerUtils.js";
|
|
24
|
+
/**
|
|
25
|
+
* Ensure the NeuroLink SDK is initialized (which registers all providers)
|
|
26
|
+
* This follows the same pattern as the 'generate' command
|
|
27
|
+
*/
|
|
28
|
+
async function ensureSDKInitialized() {
|
|
29
|
+
// Getting or creating the NeuroLink instance ensures proper SDK initialization
|
|
30
|
+
// This registers all providers via the ProviderRegistry
|
|
31
|
+
globalSession.getOrCreateNeuroLink();
|
|
32
|
+
// Also ensure providers are registered (belt and suspenders approach)
|
|
33
|
+
if (!ProviderRegistry.isRegistered()) {
|
|
34
|
+
await ProviderRegistry.registerAllProviders();
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Default embedding models for each provider
|
|
39
|
+
* These are dedicated embedding models that support the embed() method
|
|
40
|
+
*/
|
|
41
|
+
const DEFAULT_EMBEDDING_MODELS = {
|
|
42
|
+
vertex: "text-embedding-004",
|
|
43
|
+
google: "text-embedding-004",
|
|
44
|
+
"google-vertex": "text-embedding-004",
|
|
45
|
+
openai: "text-embedding-3-small",
|
|
46
|
+
azure: "text-embedding-3-small",
|
|
47
|
+
"azure-openai": "text-embedding-3-small",
|
|
48
|
+
bedrock: "amazon.titan-embed-text-v2:0",
|
|
49
|
+
"amazon-bedrock": "amazon.titan-embed-text-v2:0",
|
|
50
|
+
};
|
|
51
|
+
/**
|
|
52
|
+
* Provider-specific embedding model environment variables
|
|
53
|
+
* Maps provider names to their embedding model env var names
|
|
54
|
+
*/
|
|
55
|
+
const EMBEDDING_ENV_VARS = {
|
|
56
|
+
vertex: ["VERTEX_EMBEDDING_MODEL", "GOOGLE_EMBEDDING_MODEL"],
|
|
57
|
+
google: ["GOOGLE_EMBEDDING_MODEL", "VERTEX_EMBEDDING_MODEL"],
|
|
58
|
+
"google-vertex": ["VERTEX_EMBEDDING_MODEL", "GOOGLE_EMBEDDING_MODEL"],
|
|
59
|
+
openai: ["OPENAI_EMBEDDING_MODEL"],
|
|
60
|
+
azure: ["AZURE_EMBEDDING_MODEL", "AZURE_OPENAI_EMBEDDING_MODEL"],
|
|
61
|
+
"azure-openai": ["AZURE_OPENAI_EMBEDDING_MODEL", "AZURE_EMBEDDING_MODEL"],
|
|
62
|
+
bedrock: ["BEDROCK_EMBEDDING_MODEL", "AWS_EMBEDDING_MODEL"],
|
|
63
|
+
"amazon-bedrock": ["BEDROCK_EMBEDDING_MODEL", "AWS_EMBEDDING_MODEL"],
|
|
64
|
+
};
|
|
65
|
+
/**
|
|
66
|
+
* Provider-specific default model environment variables (for generation)
|
|
67
|
+
* Used to check if user has set an embedding model in these vars
|
|
68
|
+
*/
|
|
69
|
+
const PROVIDER_MODEL_ENV_VARS = {
|
|
70
|
+
vertex: ["VERTEX_MODEL"],
|
|
71
|
+
google: ["GOOGLE_AI_MODEL"],
|
|
72
|
+
"google-vertex": ["VERTEX_MODEL"],
|
|
73
|
+
openai: ["OPENAI_MODEL"],
|
|
74
|
+
azure: ["AZURE_OPENAI_MODEL"],
|
|
75
|
+
"azure-openai": ["AZURE_OPENAI_MODEL"],
|
|
76
|
+
bedrock: ["BEDROCK_MODEL", "BEDROCK_MODEL_ID"],
|
|
77
|
+
"amazon-bedrock": ["BEDROCK_MODEL", "BEDROCK_MODEL_ID"],
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* Check if a model name is an embedding model
|
|
81
|
+
*/
|
|
82
|
+
function isEmbeddingModel(modelName) {
|
|
83
|
+
const embeddingPatterns = [
|
|
84
|
+
/embed/i,
|
|
85
|
+
/text-embedding/i,
|
|
86
|
+
/titan-embed/i,
|
|
87
|
+
/gecko/i,
|
|
88
|
+
];
|
|
89
|
+
return embeddingPatterns.some((pattern) => pattern.test(modelName));
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Get the appropriate embedding model for a provider
|
|
93
|
+
*
|
|
94
|
+
* Resolution order:
|
|
95
|
+
* 1. CLI --model flag (if it's an embedding model)
|
|
96
|
+
* 2. NEUROLINK_EMBEDDING_MODEL env var
|
|
97
|
+
* 3. Provider-specific embedding env vars (e.g., VERTEX_EMBEDDING_MODEL)
|
|
98
|
+
* 4. Provider's default model env var (if it's an embedding model)
|
|
99
|
+
* 5. Provider-specific default embedding model
|
|
100
|
+
* 6. Fallback to OpenAI text-embedding-3-small
|
|
101
|
+
*/
|
|
102
|
+
async function getEmbeddingModel(provider, model) {
|
|
103
|
+
// Resolve provider using the same logic as generate/stream commands
|
|
104
|
+
// This automatically detects available providers and falls back appropriately
|
|
105
|
+
let resolvedProvider;
|
|
106
|
+
if (provider) {
|
|
107
|
+
// User explicitly specified a provider
|
|
108
|
+
resolvedProvider = provider;
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
// Use getBestProvider() to automatically detect the best available provider
|
|
112
|
+
// This is the same logic used by generate/stream commands
|
|
113
|
+
try {
|
|
114
|
+
resolvedProvider = await getBestProvider();
|
|
115
|
+
logger.debug(`Auto-detected best available provider: ${resolvedProvider}`);
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
// If no provider is available at all, throw a helpful error
|
|
119
|
+
throw new Error(`No AI providers available for embeddings. Please configure at least one provider:\n` +
|
|
120
|
+
` - OpenAI: Set OPENAI_API_KEY\n` +
|
|
121
|
+
` - Google Vertex: Set GOOGLE_CLOUD_PROJECT_ID and authenticate with gcloud\n` +
|
|
122
|
+
` - Amazon Bedrock: Configure AWS credentials\n` +
|
|
123
|
+
`Or specify a provider explicitly with --provider`);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
const normalizedProvider = resolvedProvider.toLowerCase();
|
|
127
|
+
// Priority 1: CLI --model flag (if it's an embedding model)
|
|
128
|
+
if (model && isEmbeddingModel(model)) {
|
|
129
|
+
logger.debug(`Using CLI-provided embedding model: ${model}`);
|
|
130
|
+
return { provider: resolvedProvider, model };
|
|
131
|
+
}
|
|
132
|
+
// Priority 2: Global NEUROLINK_EMBEDDING_MODEL env var
|
|
133
|
+
const globalEmbeddingModel = process.env.NEUROLINK_EMBEDDING_MODEL;
|
|
134
|
+
if (globalEmbeddingModel) {
|
|
135
|
+
logger.debug(`Using NEUROLINK_EMBEDDING_MODEL: ${globalEmbeddingModel}`);
|
|
136
|
+
return { provider: resolvedProvider, model: globalEmbeddingModel };
|
|
137
|
+
}
|
|
138
|
+
// Priority 3: Provider-specific embedding env vars
|
|
139
|
+
const embeddingEnvVars = EMBEDDING_ENV_VARS[normalizedProvider];
|
|
140
|
+
if (embeddingEnvVars) {
|
|
141
|
+
for (const envVar of embeddingEnvVars) {
|
|
142
|
+
const envModel = process.env[envVar];
|
|
143
|
+
if (envModel) {
|
|
144
|
+
logger.debug(`Using ${envVar}: ${envModel}`);
|
|
145
|
+
return { provider: resolvedProvider, model: envModel };
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// Priority 4: Check if provider's default model is an embedding model
|
|
150
|
+
const providerModelEnvVars = PROVIDER_MODEL_ENV_VARS[normalizedProvider];
|
|
151
|
+
if (providerModelEnvVars) {
|
|
152
|
+
for (const envVar of providerModelEnvVars) {
|
|
153
|
+
const envModel = process.env[envVar];
|
|
154
|
+
if (envModel && isEmbeddingModel(envModel)) {
|
|
155
|
+
logger.debug(`Using ${envVar} (detected as embedding model): ${envModel}`);
|
|
156
|
+
return { provider: resolvedProvider, model: envModel };
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
// Priority 5: Provider-specific default embedding model
|
|
161
|
+
const defaultEmbeddingModel = DEFAULT_EMBEDDING_MODELS[normalizedProvider];
|
|
162
|
+
if (defaultEmbeddingModel) {
|
|
163
|
+
logger.debug(`Using default embedding model for ${resolvedProvider}: ${defaultEmbeddingModel}`);
|
|
164
|
+
return { provider: resolvedProvider, model: defaultEmbeddingModel };
|
|
165
|
+
}
|
|
166
|
+
// Priority 6: Fallback to OpenAI's embedding model if provider not found
|
|
167
|
+
logger.warn(`No default embedding model for provider ${resolvedProvider}, falling back to OpenAI text-embedding-3-small`);
|
|
168
|
+
return { provider: "openai", model: "text-embedding-3-small" };
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* In-memory storage for indexed documents
|
|
172
|
+
* In production, this would be persisted to a vector database
|
|
173
|
+
*/
|
|
174
|
+
const indexedDocuments = new Map();
|
|
175
|
+
/**
|
|
176
|
+
* Detect document type from file extension
|
|
177
|
+
*/
|
|
178
|
+
function detectDocumentType(filePath) {
|
|
179
|
+
const ext = extname(filePath).toLowerCase();
|
|
180
|
+
const typeMap = {
|
|
181
|
+
".md": "markdown",
|
|
182
|
+
".markdown": "markdown",
|
|
183
|
+
".html": "html",
|
|
184
|
+
".htm": "html",
|
|
185
|
+
".json": "json",
|
|
186
|
+
".tex": "latex",
|
|
187
|
+
".latex": "latex",
|
|
188
|
+
".txt": "recursive",
|
|
189
|
+
".csv": "recursive",
|
|
190
|
+
".pdf": "recursive",
|
|
191
|
+
};
|
|
192
|
+
return typeMap[ext] || "recursive";
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Format chunks for display
|
|
196
|
+
*/
|
|
197
|
+
function formatChunks(chunks, format) {
|
|
198
|
+
if (format === "json") {
|
|
199
|
+
return JSON.stringify(chunks, null, 2);
|
|
200
|
+
}
|
|
201
|
+
if (format === "table") {
|
|
202
|
+
const rows = chunks.map((chunk, i) => ({
|
|
203
|
+
"#": i + 1,
|
|
204
|
+
ID: chunk.id.slice(0, 8),
|
|
205
|
+
Length: chunk.text.length,
|
|
206
|
+
Preview: chunk.text.slice(0, 50).replace(/\n/g, " ") + "...",
|
|
207
|
+
}));
|
|
208
|
+
// Simple table formatting
|
|
209
|
+
const headers = Object.keys(rows[0] || {});
|
|
210
|
+
const colWidths = headers.map((h) => Math.max(h.length, ...rows.map((r) => String(r[h]).length)));
|
|
211
|
+
let output = headers.map((h, i) => h.padEnd(colWidths[i])).join(" | ") + "\n";
|
|
212
|
+
output += colWidths.map((w) => "-".repeat(w)).join("-+-") + "\n";
|
|
213
|
+
output += rows
|
|
214
|
+
.map((row) => headers
|
|
215
|
+
.map((h, i) => String(row[h]).padEnd(colWidths[i]))
|
|
216
|
+
.join(" | "))
|
|
217
|
+
.join("\n");
|
|
218
|
+
return output;
|
|
219
|
+
}
|
|
220
|
+
// Default text format
|
|
221
|
+
return chunks
|
|
222
|
+
.map((chunk, i) => `--- Chunk ${i + 1} (${chunk.text.length} chars) ---\n${chunk.text}\n`)
|
|
223
|
+
.join("\n");
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Create the chunk subcommand
|
|
227
|
+
*/
|
|
228
|
+
function createChunkCommand() {
|
|
229
|
+
return {
|
|
230
|
+
command: "chunk <file>",
|
|
231
|
+
describe: "Chunk a document into smaller pieces for processing",
|
|
232
|
+
builder: (yargs) => yargs
|
|
233
|
+
.positional("file", {
|
|
234
|
+
describe: "Path to the file to chunk",
|
|
235
|
+
type: "string",
|
|
236
|
+
demandOption: true,
|
|
237
|
+
})
|
|
238
|
+
.option("strategy", {
|
|
239
|
+
alias: "s",
|
|
240
|
+
describe: "Chunking strategy to use",
|
|
241
|
+
choices: [
|
|
242
|
+
"character",
|
|
243
|
+
"recursive",
|
|
244
|
+
"sentence",
|
|
245
|
+
"token",
|
|
246
|
+
"markdown",
|
|
247
|
+
"html",
|
|
248
|
+
"json",
|
|
249
|
+
"latex",
|
|
250
|
+
"semantic",
|
|
251
|
+
"semantic-markdown",
|
|
252
|
+
],
|
|
253
|
+
type: "string",
|
|
254
|
+
})
|
|
255
|
+
.option("maxSize", {
|
|
256
|
+
alias: "m",
|
|
257
|
+
describe: "Maximum chunk size",
|
|
258
|
+
type: "number",
|
|
259
|
+
default: 1000,
|
|
260
|
+
})
|
|
261
|
+
.option("overlap", {
|
|
262
|
+
alias: "o",
|
|
263
|
+
describe: "Overlap between chunks",
|
|
264
|
+
type: "number",
|
|
265
|
+
default: 200,
|
|
266
|
+
})
|
|
267
|
+
.option("format", {
|
|
268
|
+
alias: "f",
|
|
269
|
+
describe: "Output format",
|
|
270
|
+
choices: ["json", "text", "table"],
|
|
271
|
+
default: "text",
|
|
272
|
+
})
|
|
273
|
+
.option("output", {
|
|
274
|
+
describe: "Output file path (optional)",
|
|
275
|
+
type: "string",
|
|
276
|
+
})
|
|
277
|
+
.option("extract", {
|
|
278
|
+
alias: "e",
|
|
279
|
+
describe: "Extract metadata (title, summary, keywords)",
|
|
280
|
+
type: "boolean",
|
|
281
|
+
default: false,
|
|
282
|
+
})
|
|
283
|
+
.option("provider", {
|
|
284
|
+
alias: "p",
|
|
285
|
+
describe: "Provider for semantic chunking/metadata extraction (uses default from config/env if not specified)",
|
|
286
|
+
type: "string",
|
|
287
|
+
})
|
|
288
|
+
.option("model", {
|
|
289
|
+
describe: "Model for semantic chunking/metadata extraction (uses default from config/env if not specified)",
|
|
290
|
+
type: "string",
|
|
291
|
+
})
|
|
292
|
+
.option("verbose", {
|
|
293
|
+
alias: "v",
|
|
294
|
+
describe: "Enable verbose output",
|
|
295
|
+
type: "boolean",
|
|
296
|
+
default: false,
|
|
297
|
+
}),
|
|
298
|
+
handler: async (args) => {
|
|
299
|
+
const spinner = ora("Processing document...").start();
|
|
300
|
+
try {
|
|
301
|
+
// Validate file exists
|
|
302
|
+
const filePath = resolve(args.file);
|
|
303
|
+
if (!existsSync(filePath)) {
|
|
304
|
+
spinner.fail(chalk.red(`File not found: ${filePath}`));
|
|
305
|
+
process.exit(1);
|
|
306
|
+
}
|
|
307
|
+
// Read file content
|
|
308
|
+
const content = await readFile(filePath, "utf-8");
|
|
309
|
+
const fileName = basename(filePath);
|
|
310
|
+
// Determine strategy
|
|
311
|
+
const strategy = args.strategy || detectDocumentType(filePath);
|
|
312
|
+
spinner.text = `Chunking with ${strategy} strategy...`;
|
|
313
|
+
// Validate chunk parameters
|
|
314
|
+
const maxSize = args.maxSize ?? 1000;
|
|
315
|
+
const overlap = args.overlap ?? 200;
|
|
316
|
+
if (maxSize <= 0) {
|
|
317
|
+
spinner.fail(chalk.red("maxSize must be greater than 0"));
|
|
318
|
+
process.exit(1);
|
|
319
|
+
}
|
|
320
|
+
if (overlap >= maxSize) {
|
|
321
|
+
spinner.fail(chalk.red("overlap must be less than maxSize"));
|
|
322
|
+
process.exit(1);
|
|
323
|
+
}
|
|
324
|
+
// Get chunker and chunk the document
|
|
325
|
+
const chunker = ChunkerRegistry.get(strategy);
|
|
326
|
+
const chunks = await chunker.chunk(content, {
|
|
327
|
+
maxSize,
|
|
328
|
+
overlap,
|
|
329
|
+
metadata: { source: fileName },
|
|
330
|
+
});
|
|
331
|
+
spinner.succeed(chalk.green(`Created ${chunks.length} chunks from ${fileName}`));
|
|
332
|
+
// Extract metadata if requested
|
|
333
|
+
if (args.extract) {
|
|
334
|
+
// Ensure providers are registered for metadata extraction
|
|
335
|
+
await ensureSDKInitialized();
|
|
336
|
+
spinner.start("Extracting metadata...");
|
|
337
|
+
const extractor = new LLMMetadataExtractor({
|
|
338
|
+
provider: args.provider,
|
|
339
|
+
modelName: args.model,
|
|
340
|
+
});
|
|
341
|
+
const results = await extractor.extract(chunks, {
|
|
342
|
+
title: true,
|
|
343
|
+
summary: true,
|
|
344
|
+
keywords: true,
|
|
345
|
+
});
|
|
346
|
+
// Merge metadata into chunks
|
|
347
|
+
for (let i = 0; i < chunks.length && i < results.length; i++) {
|
|
348
|
+
const result = results[i];
|
|
349
|
+
if (result.title) {
|
|
350
|
+
chunks[i].metadata.title = result.title;
|
|
351
|
+
}
|
|
352
|
+
if (result.summary) {
|
|
353
|
+
chunks[i].metadata.summary = result.summary;
|
|
354
|
+
}
|
|
355
|
+
if (result.keywords) {
|
|
356
|
+
chunks[i].metadata.keywords = result.keywords;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
spinner.succeed(chalk.green("Metadata extracted"));
|
|
360
|
+
}
|
|
361
|
+
// Format output
|
|
362
|
+
const output = formatChunks(chunks, args.format || "text");
|
|
363
|
+
// Write to file or stdout
|
|
364
|
+
if (args.output) {
|
|
365
|
+
await writeFile(args.output, output, "utf-8");
|
|
366
|
+
logger.always(chalk.green(`Output written to ${args.output}`));
|
|
367
|
+
}
|
|
368
|
+
else {
|
|
369
|
+
logger.always("\n" + output);
|
|
370
|
+
}
|
|
371
|
+
// Show summary
|
|
372
|
+
if (args.verbose) {
|
|
373
|
+
logger.always(chalk.dim("\n--- Summary ---"));
|
|
374
|
+
logger.always(chalk.dim(`Strategy: ${strategy}`));
|
|
375
|
+
logger.always(chalk.dim(`Total chunks: ${chunks.length}`));
|
|
376
|
+
logger.always(chalk.dim(`Avg chunk size: ${Math.round(chunks.reduce((sum, c) => sum + c.text.length, 0) / chunks.length)} chars`));
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
catch (error) {
|
|
380
|
+
spinner.fail(chalk.red(`Error: ${error instanceof Error ? error.message : String(error)}`));
|
|
381
|
+
process.exit(1);
|
|
382
|
+
}
|
|
383
|
+
},
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
/**
|
|
387
|
+
* Create the index subcommand
|
|
388
|
+
*/
|
|
389
|
+
function createIndexCommand() {
|
|
390
|
+
return {
|
|
391
|
+
command: "index <file>",
|
|
392
|
+
describe: "Index a document for semantic search",
|
|
393
|
+
builder: (yargs) => yargs
|
|
394
|
+
.positional("file", {
|
|
395
|
+
describe: "Path to the file to index",
|
|
396
|
+
type: "string",
|
|
397
|
+
demandOption: true,
|
|
398
|
+
})
|
|
399
|
+
.option("indexName", {
|
|
400
|
+
alias: "n",
|
|
401
|
+
describe: "Name for the index",
|
|
402
|
+
type: "string",
|
|
403
|
+
})
|
|
404
|
+
.option("strategy", {
|
|
405
|
+
alias: "s",
|
|
406
|
+
describe: "Chunking strategy to use",
|
|
407
|
+
choices: [
|
|
408
|
+
"character",
|
|
409
|
+
"recursive",
|
|
410
|
+
"sentence",
|
|
411
|
+
"token",
|
|
412
|
+
"markdown",
|
|
413
|
+
"html",
|
|
414
|
+
"json",
|
|
415
|
+
"latex",
|
|
416
|
+
"semantic",
|
|
417
|
+
"semantic-markdown",
|
|
418
|
+
],
|
|
419
|
+
type: "string",
|
|
420
|
+
})
|
|
421
|
+
.option("maxSize", {
|
|
422
|
+
alias: "m",
|
|
423
|
+
describe: "Maximum chunk size",
|
|
424
|
+
type: "number",
|
|
425
|
+
default: 1000,
|
|
426
|
+
})
|
|
427
|
+
.option("overlap", {
|
|
428
|
+
alias: "o",
|
|
429
|
+
describe: "Overlap between chunks",
|
|
430
|
+
type: "number",
|
|
431
|
+
default: 200,
|
|
432
|
+
})
|
|
433
|
+
.option("provider", {
|
|
434
|
+
alias: "p",
|
|
435
|
+
describe: "Provider for embeddings (uses default from config/env if not specified)",
|
|
436
|
+
type: "string",
|
|
437
|
+
})
|
|
438
|
+
.option("model", {
|
|
439
|
+
describe: "Model for embeddings (uses default from config/env if not specified)",
|
|
440
|
+
type: "string",
|
|
441
|
+
})
|
|
442
|
+
.option("graph", {
|
|
443
|
+
alias: "g",
|
|
444
|
+
describe: "Build Graph RAG index",
|
|
445
|
+
type: "boolean",
|
|
446
|
+
default: false,
|
|
447
|
+
})
|
|
448
|
+
.option("verbose", {
|
|
449
|
+
alias: "v",
|
|
450
|
+
describe: "Enable verbose output",
|
|
451
|
+
type: "boolean",
|
|
452
|
+
default: false,
|
|
453
|
+
}),
|
|
454
|
+
handler: async (args) => {
|
|
455
|
+
const spinner = ora("Indexing document...").start();
|
|
456
|
+
try {
|
|
457
|
+
// Ensure providers are registered before use
|
|
458
|
+
await ensureSDKInitialized();
|
|
459
|
+
// Validate file exists
|
|
460
|
+
const filePath = resolve(args.file);
|
|
461
|
+
if (!existsSync(filePath)) {
|
|
462
|
+
spinner.fail(chalk.red(`File not found: ${filePath}`));
|
|
463
|
+
process.exit(1);
|
|
464
|
+
}
|
|
465
|
+
// Read file content
|
|
466
|
+
const content = await readFile(filePath, "utf-8");
|
|
467
|
+
const fileName = basename(filePath);
|
|
468
|
+
const indexName = args.indexName || fileName.replace(/\.[^.]+$/, "");
|
|
469
|
+
// Determine strategy
|
|
470
|
+
const strategy = args.strategy || detectDocumentType(filePath);
|
|
471
|
+
spinner.text = `Chunking with ${strategy} strategy...`;
|
|
472
|
+
// Validate chunk parameters
|
|
473
|
+
const maxSize = args.maxSize ?? 1000;
|
|
474
|
+
const overlap = args.overlap ?? 200;
|
|
475
|
+
if (maxSize <= 0) {
|
|
476
|
+
spinner.fail(chalk.red("maxSize must be greater than 0"));
|
|
477
|
+
process.exit(1);
|
|
478
|
+
}
|
|
479
|
+
if (overlap >= maxSize) {
|
|
480
|
+
spinner.fail(chalk.red("overlap must be less than maxSize"));
|
|
481
|
+
process.exit(1);
|
|
482
|
+
}
|
|
483
|
+
// Chunk the document
|
|
484
|
+
const chunker = ChunkerRegistry.get(strategy);
|
|
485
|
+
const chunks = await chunker.chunk(content, {
|
|
486
|
+
maxSize,
|
|
487
|
+
overlap,
|
|
488
|
+
metadata: { source: fileName },
|
|
489
|
+
});
|
|
490
|
+
spinner.text = `Generating embeddings for ${chunks.length} chunks...`;
|
|
491
|
+
// Get embedding provider with smart model detection
|
|
492
|
+
// Automatically uses the appropriate embedding model for the provider
|
|
493
|
+
// Uses getBestProvider() to auto-detect available providers (same as generate/stream)
|
|
494
|
+
const { provider: embeddingProviderName, model: embeddingModelName } = await getEmbeddingModel(args.provider, args.model);
|
|
495
|
+
if (args.verbose) {
|
|
496
|
+
logger.always(chalk.dim(`Using embedding provider: ${embeddingProviderName}, model: ${embeddingModelName}`));
|
|
497
|
+
}
|
|
498
|
+
const embeddingProvider = await ProviderFactory.createProvider(embeddingProviderName, embeddingModelName);
|
|
499
|
+
// Verify the provider has an embed method
|
|
500
|
+
if (typeof embeddingProvider.embed !==
|
|
501
|
+
"function") {
|
|
502
|
+
spinner.fail(chalk.red(`Provider ${embeddingProviderName} with model ${embeddingModelName} does not support embeddings. ` +
|
|
503
|
+
`Please use an embedding model like text-embedding-004 (Vertex) or text-embedding-3-small (OpenAI).`));
|
|
504
|
+
process.exit(1);
|
|
505
|
+
}
|
|
506
|
+
// Generate embeddings
|
|
507
|
+
const embeddings = [];
|
|
508
|
+
for (const chunk of chunks) {
|
|
509
|
+
const embedding = await embeddingProvider.embed(chunk.text);
|
|
510
|
+
embeddings.push(embedding);
|
|
511
|
+
chunk.embedding = embedding;
|
|
512
|
+
}
|
|
513
|
+
// Create indices
|
|
514
|
+
const vectorStore = new InMemoryVectorStore();
|
|
515
|
+
const bm25Index = new InMemoryBM25Index();
|
|
516
|
+
const graphRag = new GraphRAG({ threshold: 0.7 });
|
|
517
|
+
// Index in vector store
|
|
518
|
+
await vectorStore.upsert(indexName, chunks.map((chunk, i) => ({
|
|
519
|
+
id: chunk.id,
|
|
520
|
+
vector: embeddings[i],
|
|
521
|
+
metadata: { ...chunk.metadata, text: chunk.text },
|
|
522
|
+
})));
|
|
523
|
+
// Index in BM25
|
|
524
|
+
await bm25Index.addDocuments(chunks.map((chunk) => ({
|
|
525
|
+
id: chunk.id,
|
|
526
|
+
text: chunk.text,
|
|
527
|
+
metadata: chunk.metadata,
|
|
528
|
+
})));
|
|
529
|
+
// Build Graph RAG if requested
|
|
530
|
+
if (args.graph) {
|
|
531
|
+
spinner.text = "Building knowledge graph...";
|
|
532
|
+
graphRag.createGraph(chunks.map((c) => ({ text: c.text, metadata: c.metadata })), embeddings.map((v) => ({ vector: v })));
|
|
533
|
+
}
|
|
534
|
+
// Store in memory
|
|
535
|
+
indexedDocuments.set(indexName, {
|
|
536
|
+
vectorStore,
|
|
537
|
+
bm25Index,
|
|
538
|
+
graphRag,
|
|
539
|
+
chunks,
|
|
540
|
+
});
|
|
541
|
+
spinner.succeed(chalk.green(`Indexed ${chunks.length} chunks as "${indexName}"${args.graph ? " with Graph RAG" : ""}`));
|
|
542
|
+
if (args.verbose) {
|
|
543
|
+
logger.always(chalk.dim("\n--- Index Summary ---"));
|
|
544
|
+
logger.always(chalk.dim(`Index name: ${indexName}`));
|
|
545
|
+
logger.always(chalk.dim(`Total chunks: ${chunks.length}`));
|
|
546
|
+
logger.always(chalk.dim(`Embedding dimension: ${embeddings[0]?.length || 0}`));
|
|
547
|
+
if (args.graph) {
|
|
548
|
+
const stats = graphRag.getStats();
|
|
549
|
+
logger.always(chalk.dim(`Graph nodes: ${stats.nodeCount}`));
|
|
550
|
+
logger.always(chalk.dim(`Graph edges: ${stats.edgeCount}`));
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
catch (error) {
|
|
555
|
+
spinner.fail(chalk.red(`Error: ${error instanceof Error ? error.message : String(error)}`));
|
|
556
|
+
process.exit(1);
|
|
557
|
+
}
|
|
558
|
+
},
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
/**
|
|
562
|
+
* Create the query subcommand
|
|
563
|
+
*/
|
|
564
|
+
function createQueryCommand() {
|
|
565
|
+
return {
|
|
566
|
+
command: "query <query>",
|
|
567
|
+
describe: "Query indexed documents",
|
|
568
|
+
builder: (yargs) => yargs
|
|
569
|
+
.positional("query", {
|
|
570
|
+
describe: "Search query",
|
|
571
|
+
type: "string",
|
|
572
|
+
demandOption: true,
|
|
573
|
+
})
|
|
574
|
+
.option("indexName", {
|
|
575
|
+
alias: "n",
|
|
576
|
+
describe: "Name of the index to query",
|
|
577
|
+
type: "string",
|
|
578
|
+
})
|
|
579
|
+
.option("topK", {
|
|
580
|
+
alias: "k",
|
|
581
|
+
describe: "Number of results to return",
|
|
582
|
+
type: "number",
|
|
583
|
+
default: 5,
|
|
584
|
+
})
|
|
585
|
+
.option("hybrid", {
|
|
586
|
+
alias: "h",
|
|
587
|
+
describe: "Use hybrid search (vector + BM25)",
|
|
588
|
+
type: "boolean",
|
|
589
|
+
default: false,
|
|
590
|
+
})
|
|
591
|
+
.option("graph", {
|
|
592
|
+
alias: "g",
|
|
593
|
+
describe: "Use Graph RAG search",
|
|
594
|
+
type: "boolean",
|
|
595
|
+
default: false,
|
|
596
|
+
})
|
|
597
|
+
.option("provider", {
|
|
598
|
+
alias: "p",
|
|
599
|
+
describe: "Provider for embeddings (uses default from config/env if not specified)",
|
|
600
|
+
type: "string",
|
|
601
|
+
})
|
|
602
|
+
.option("model", {
|
|
603
|
+
describe: "Model for embeddings (uses default from config/env if not specified)",
|
|
604
|
+
type: "string",
|
|
605
|
+
})
|
|
606
|
+
.option("format", {
|
|
607
|
+
alias: "f",
|
|
608
|
+
describe: "Output format",
|
|
609
|
+
choices: ["json", "text", "table"],
|
|
610
|
+
default: "text",
|
|
611
|
+
})
|
|
612
|
+
.option("verbose", {
|
|
613
|
+
alias: "v",
|
|
614
|
+
describe: "Enable verbose output",
|
|
615
|
+
type: "boolean",
|
|
616
|
+
default: false,
|
|
617
|
+
}),
|
|
618
|
+
handler: async (args) => {
|
|
619
|
+
const spinner = ora("Searching...").start();
|
|
620
|
+
try {
|
|
621
|
+
// Ensure providers are registered before use
|
|
622
|
+
await ensureSDKInitialized();
|
|
623
|
+
// Find index
|
|
624
|
+
const indexName = args.indexName || Array.from(indexedDocuments.keys())[0];
|
|
625
|
+
if (!indexName) {
|
|
626
|
+
spinner.fail(chalk.red("No indexed documents found. Run 'neurolink rag index' first."));
|
|
627
|
+
process.exit(1);
|
|
628
|
+
}
|
|
629
|
+
const indexed = indexedDocuments.get(indexName);
|
|
630
|
+
if (!indexed) {
|
|
631
|
+
spinner.fail(chalk.red(`Index "${indexName}" not found.`));
|
|
632
|
+
process.exit(1);
|
|
633
|
+
}
|
|
634
|
+
const { vectorStore, bm25Index, graphRag } = indexed;
|
|
635
|
+
// Generate query embedding with smart model detection
|
|
636
|
+
// Uses getBestProvider() to auto-detect available providers (same as generate/stream)
|
|
637
|
+
const { provider: embeddingProviderName, model: embeddingModelName } = await getEmbeddingModel(args.provider, args.model);
|
|
638
|
+
if (args.verbose) {
|
|
639
|
+
logger.always(chalk.dim(`Using embedding provider: ${embeddingProviderName}, model: ${embeddingModelName}`));
|
|
640
|
+
}
|
|
641
|
+
const embeddingProvider = await ProviderFactory.createProvider(embeddingProviderName, embeddingModelName);
|
|
642
|
+
// Verify the provider has an embed method
|
|
643
|
+
if (typeof embeddingProvider.embed !==
|
|
644
|
+
"function") {
|
|
645
|
+
spinner.fail(chalk.red(`Provider ${embeddingProviderName} with model ${embeddingModelName} does not support embeddings. ` +
|
|
646
|
+
`Please use an embedding model like text-embedding-004 (Vertex) or text-embedding-3-small (OpenAI).`));
|
|
647
|
+
process.exit(1);
|
|
648
|
+
}
|
|
649
|
+
const queryEmbedding = await embeddingProvider.embed(args.query);
|
|
650
|
+
let results;
|
|
651
|
+
if (args.graph) {
|
|
652
|
+
// Graph RAG search
|
|
653
|
+
spinner.text = "Searching knowledge graph...";
|
|
654
|
+
const graphResults = graphRag.query({
|
|
655
|
+
query: queryEmbedding,
|
|
656
|
+
topK: args.topK || 5,
|
|
657
|
+
});
|
|
658
|
+
results = graphResults.map((r) => ({
|
|
659
|
+
id: r.id,
|
|
660
|
+
score: r.score,
|
|
661
|
+
text: r.content,
|
|
662
|
+
}));
|
|
663
|
+
}
|
|
664
|
+
else if (args.hybrid) {
|
|
665
|
+
// Hybrid search
|
|
666
|
+
spinner.text = "Performing hybrid search...";
|
|
667
|
+
const hybridSearch = createHybridSearch({
|
|
668
|
+
vectorStore,
|
|
669
|
+
bm25Index,
|
|
670
|
+
indexName,
|
|
671
|
+
embeddingModel: {
|
|
672
|
+
provider: embeddingProviderName,
|
|
673
|
+
modelName: embeddingModelName,
|
|
674
|
+
},
|
|
675
|
+
});
|
|
676
|
+
const hybridResults = await hybridSearch(args.query, {
|
|
677
|
+
topK: args.topK || 5,
|
|
678
|
+
});
|
|
679
|
+
results = hybridResults.map((r) => ({
|
|
680
|
+
id: r.id,
|
|
681
|
+
score: r.score,
|
|
682
|
+
text: r.text,
|
|
683
|
+
}));
|
|
684
|
+
}
|
|
685
|
+
else {
|
|
686
|
+
// Vector search
|
|
687
|
+
spinner.text = "Performing vector search...";
|
|
688
|
+
const vectorResults = await vectorStore.query({
|
|
689
|
+
indexName,
|
|
690
|
+
queryVector: queryEmbedding,
|
|
691
|
+
topK: args.topK || 5,
|
|
692
|
+
});
|
|
693
|
+
results = vectorResults.map((r) => ({
|
|
694
|
+
id: r.id,
|
|
695
|
+
score: r.score || 0,
|
|
696
|
+
text: r.metadata?.text || r.text || "",
|
|
697
|
+
}));
|
|
698
|
+
}
|
|
699
|
+
spinner.succeed(chalk.green(`Found ${results.length} results`));
|
|
700
|
+
// Format and display results
|
|
701
|
+
if (args.format === "json") {
|
|
702
|
+
logger.always(JSON.stringify(results, null, 2));
|
|
703
|
+
}
|
|
704
|
+
else if (args.format === "table") {
|
|
705
|
+
logger.always("\n" + chalk.bold("Search Results:"));
|
|
706
|
+
results.forEach((r, i) => {
|
|
707
|
+
logger.always(chalk.cyan(`\n[${i + 1}] Score: ${r.score.toFixed(4)}`));
|
|
708
|
+
logger.always(r.text.slice(0, 200) + "...");
|
|
709
|
+
});
|
|
710
|
+
}
|
|
711
|
+
else {
|
|
712
|
+
logger.always("\n" + chalk.bold("Search Results:"));
|
|
713
|
+
results.forEach((r, i) => {
|
|
714
|
+
logger.always(chalk.cyan(`\n--- Result ${i + 1} (Score: ${r.score.toFixed(4)}) ---`));
|
|
715
|
+
logger.always(r.text);
|
|
716
|
+
});
|
|
717
|
+
}
|
|
718
|
+
if (args.verbose) {
|
|
719
|
+
logger.always(chalk.dim("\n--- Query Info ---"));
|
|
720
|
+
logger.always(chalk.dim(`Index: ${indexName}`));
|
|
721
|
+
logger.always(chalk.dim(`Query: ${args.query}`));
|
|
722
|
+
logger.always(chalk.dim(`Search type: ${args.graph ? "Graph RAG" : args.hybrid ? "Hybrid" : "Vector"}`));
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
catch (error) {
|
|
726
|
+
spinner.fail(chalk.red(`Error: ${error instanceof Error ? error.message : String(error)}`));
|
|
727
|
+
process.exit(1);
|
|
728
|
+
}
|
|
729
|
+
},
|
|
730
|
+
};
|
|
731
|
+
}
|
|
732
|
+
/**
|
|
733
|
+
* RAG CLI command factory
|
|
734
|
+
*/
|
|
735
|
+
export class RAGCommandFactory {
|
|
736
|
+
/**
|
|
737
|
+
* Create the main RAG command with subcommands
|
|
738
|
+
*/
|
|
739
|
+
static createRAGCommands() {
|
|
740
|
+
return {
|
|
741
|
+
command: "rag <subcommand>",
|
|
742
|
+
describe: "RAG document processing commands",
|
|
743
|
+
builder: (yargs) => yargs
|
|
744
|
+
.command(createChunkCommand())
|
|
745
|
+
.command(createIndexCommand())
|
|
746
|
+
.command(createQueryCommand())
|
|
747
|
+
.demandCommand(1, "Please specify a subcommand"),
|
|
748
|
+
handler: () => {
|
|
749
|
+
// Parent command handler - not called when subcommand is specified
|
|
750
|
+
},
|
|
751
|
+
};
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
// Export for CLI registration
|
|
755
|
+
export const ragCommand = RAGCommandFactory.createRAGCommands();
|
|
756
|
+
//# sourceMappingURL=rag.js.map
|