@juspay/neurolink 9.2.0 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +52 -30
- package/dist/agent/directTools.d.ts +8 -8
- package/dist/cli/commands/config.d.ts +3 -3
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +5 -5
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +8 -8
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/modelTypes.d.ts +2 -2
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +2 -2
- package/dist/types/common.d.ts +0 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/modelTypes.d.ts +20 -20
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/package.json +1 -1
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Search Implementation
|
|
3
|
+
*
|
|
4
|
+
* Combines vector (dense) search with BM25 (sparse) search for improved retrieval.
|
|
5
|
+
* Supports multiple fusion methods: Reciprocal Rank Fusion (RRF) and Linear Combination.
|
|
6
|
+
*/
|
|
7
|
+
import { ProviderFactory } from "../../factories/providerFactory.js";
|
|
8
|
+
import { logger } from "../../utils/logger.js";
|
|
9
|
+
import { rerank } from "../reranker/reranker.js";
|
|
10
|
+
/**
|
|
11
|
+
* In-memory BM25 implementation for testing and development
|
|
12
|
+
*/
|
|
13
|
+
export class InMemoryBM25Index {
|
|
14
|
+
documents = new Map();
|
|
15
|
+
avgDocLength = 0;
|
|
16
|
+
k1 = 1.5; // BM25 parameter
|
|
17
|
+
b = 0.75; // BM25 parameter
|
|
18
|
+
async search(query, topK = 10) {
|
|
19
|
+
const queryTokens = this.tokenize(query);
|
|
20
|
+
if (queryTokens.length === 0 || this.documents.size === 0) {
|
|
21
|
+
return [];
|
|
22
|
+
}
|
|
23
|
+
// Calculate IDF for each query term
|
|
24
|
+
const idfValues = new Map();
|
|
25
|
+
for (const token of queryTokens) {
|
|
26
|
+
const docCount = this.countDocumentsWithTerm(token);
|
|
27
|
+
const idf = Math.log((this.documents.size - docCount + 0.5) / (docCount + 0.5) + 1);
|
|
28
|
+
idfValues.set(token, idf);
|
|
29
|
+
}
|
|
30
|
+
// Calculate BM25 score for each document
|
|
31
|
+
const scores = [];
|
|
32
|
+
for (const [id, doc] of this.documents) {
|
|
33
|
+
let score = 0;
|
|
34
|
+
const docLength = doc.tokens.length;
|
|
35
|
+
for (const token of queryTokens) {
|
|
36
|
+
const tf = this.countTermFrequency(doc.tokens, token);
|
|
37
|
+
const idf = idfValues.get(token) || 0;
|
|
38
|
+
// BM25 scoring formula
|
|
39
|
+
const numerator = tf * (this.k1 + 1);
|
|
40
|
+
const denominator = tf +
|
|
41
|
+
this.k1 * (1 - this.b + this.b * (docLength / this.avgDocLength));
|
|
42
|
+
score += idf * (numerator / denominator);
|
|
43
|
+
}
|
|
44
|
+
if (score > 0) {
|
|
45
|
+
scores.push({
|
|
46
|
+
id,
|
|
47
|
+
score,
|
|
48
|
+
text: doc.text,
|
|
49
|
+
metadata: doc.metadata,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
// Sort by score descending
|
|
54
|
+
scores.sort((a, b) => b.score - a.score);
|
|
55
|
+
return scores.slice(0, topK);
|
|
56
|
+
}
|
|
57
|
+
async addDocuments(documents) {
|
|
58
|
+
for (const doc of documents) {
|
|
59
|
+
const tokens = this.tokenize(doc.text);
|
|
60
|
+
this.documents.set(doc.id, {
|
|
61
|
+
text: doc.text,
|
|
62
|
+
tokens,
|
|
63
|
+
metadata: doc.metadata || {},
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
// Recalculate average document length
|
|
67
|
+
let totalLength = 0;
|
|
68
|
+
for (const doc of this.documents.values()) {
|
|
69
|
+
totalLength += doc.tokens.length;
|
|
70
|
+
}
|
|
71
|
+
this.avgDocLength =
|
|
72
|
+
this.documents.size > 0 ? totalLength / this.documents.size : 0;
|
|
73
|
+
}
|
|
74
|
+
tokenize(text) {
|
|
75
|
+
return text
|
|
76
|
+
.toLowerCase()
|
|
77
|
+
.replace(/[^\w\s]/g, " ")
|
|
78
|
+
.split(/\s+/)
|
|
79
|
+
.filter((t) => t.length > 0);
|
|
80
|
+
}
|
|
81
|
+
countTermFrequency(tokens, term) {
|
|
82
|
+
return tokens.filter((t) => t === term).length;
|
|
83
|
+
}
|
|
84
|
+
countDocumentsWithTerm(term) {
|
|
85
|
+
let count = 0;
|
|
86
|
+
for (const doc of this.documents.values()) {
|
|
87
|
+
if (doc.tokens.includes(term)) {
|
|
88
|
+
count++;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return count;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Reciprocal Rank Fusion
|
|
96
|
+
* Combines rankings from multiple retrieval methods
|
|
97
|
+
*
|
|
98
|
+
* @param rankings - Array of ranking lists, each with id and rank
|
|
99
|
+
* @param k - RRF constant (default: 60)
|
|
100
|
+
* @returns Map of document IDs to fused scores
|
|
101
|
+
*/
|
|
102
|
+
export function reciprocalRankFusion(rankings, k = 60) {
|
|
103
|
+
const scores = new Map();
|
|
104
|
+
for (const ranking of rankings) {
|
|
105
|
+
for (const { id, rank } of ranking) {
|
|
106
|
+
const currentScore = scores.get(id) || 0;
|
|
107
|
+
scores.set(id, currentScore + 1 / (k + rank));
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return scores;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Linear Combination of normalized scores
|
|
114
|
+
*
|
|
115
|
+
* @param vectorScores - Vector search scores
|
|
116
|
+
* @param bm25Scores - BM25 search scores
|
|
117
|
+
* @param alpha - Weight for vector scores (0-1), bm25 gets 1-alpha
|
|
118
|
+
* @returns Map of document IDs to combined scores
|
|
119
|
+
*/
|
|
120
|
+
export function linearCombination(vectorScores, bm25Scores, alpha = 0.5) {
|
|
121
|
+
const combined = new Map();
|
|
122
|
+
// Get all document IDs
|
|
123
|
+
const allIds = new Set([...vectorScores.keys(), ...bm25Scores.keys()]);
|
|
124
|
+
// Normalize scores
|
|
125
|
+
const normalizedVector = normalizeScores(vectorScores);
|
|
126
|
+
const normalizedBM25 = normalizeScores(bm25Scores);
|
|
127
|
+
for (const id of allIds) {
|
|
128
|
+
const vectorScore = normalizedVector.get(id) || 0;
|
|
129
|
+
const bm25Score = normalizedBM25.get(id) || 0;
|
|
130
|
+
combined.set(id, alpha * vectorScore + (1 - alpha) * bm25Score);
|
|
131
|
+
}
|
|
132
|
+
return combined;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Normalize scores to 0-1 range
|
|
136
|
+
*/
|
|
137
|
+
function normalizeScores(scores) {
|
|
138
|
+
const values = Array.from(scores.values());
|
|
139
|
+
if (values.length === 0) {
|
|
140
|
+
return new Map();
|
|
141
|
+
}
|
|
142
|
+
const min = Math.min(...values);
|
|
143
|
+
const max = Math.max(...values);
|
|
144
|
+
const range = max - min || 1;
|
|
145
|
+
const normalized = new Map();
|
|
146
|
+
for (const [id, score] of scores) {
|
|
147
|
+
normalized.set(id, (score - min) / range);
|
|
148
|
+
}
|
|
149
|
+
return normalized;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Create a hybrid search function
|
|
153
|
+
*
|
|
154
|
+
* @param options - Search options
|
|
155
|
+
* @returns Hybrid search function
|
|
156
|
+
*/
|
|
157
|
+
export function createHybridSearch(options) {
|
|
158
|
+
const { vectorStore, bm25Index, indexName, embeddingModel, defaultConfig = {}, } = options;
|
|
159
|
+
/**
|
|
160
|
+
* Execute hybrid search combining vector and BM25 retrieval
|
|
161
|
+
*
|
|
162
|
+
* @param query - Search query
|
|
163
|
+
* @param config - Search configuration
|
|
164
|
+
* @returns Hybrid search results
|
|
165
|
+
*/
|
|
166
|
+
return async function hybridSearch(query, config) {
|
|
167
|
+
const startTime = Date.now();
|
|
168
|
+
const { vectorWeight = defaultConfig.vectorWeight ?? 0.5, bm25Weight = defaultConfig.bm25Weight ?? 0.5, fusionMethod = defaultConfig.fusionMethod ?? "rrf", rrfK = defaultConfig.rrfK ?? 60, topK = defaultConfig.topK ?? 10, enableReranking = defaultConfig.enableReranking ?? false, reranker: rerankerConfig = defaultConfig.reranker, } = config || {};
|
|
169
|
+
try {
|
|
170
|
+
// Generate query embedding
|
|
171
|
+
const embeddingProvider = await ProviderFactory.createProvider(embeddingModel?.provider, embeddingModel?.modelName);
|
|
172
|
+
if (typeof embeddingProvider
|
|
173
|
+
.embed !== "function") {
|
|
174
|
+
throw new Error(`Embedding provider does not support the embed() method. ` +
|
|
175
|
+
`Please use a provider that supports embeddings (e.g., OpenAI text-embedding-3-small, Vertex text-embedding-004).`);
|
|
176
|
+
}
|
|
177
|
+
const queryEmbedding = await embeddingProvider.embed(query);
|
|
178
|
+
// Parallel retrieval
|
|
179
|
+
const [vectorResults, bm25Results] = await Promise.all([
|
|
180
|
+
vectorStore.query({
|
|
181
|
+
indexName,
|
|
182
|
+
queryVector: queryEmbedding,
|
|
183
|
+
topK: topK * 2, // Get more for fusion
|
|
184
|
+
}),
|
|
185
|
+
bm25Index.search(query, topK * 2),
|
|
186
|
+
]);
|
|
187
|
+
// Fuse results
|
|
188
|
+
let fusedResults;
|
|
189
|
+
if (fusionMethod === "rrf") {
|
|
190
|
+
// Reciprocal Rank Fusion
|
|
191
|
+
const vectorRanking = vectorResults.map((r, i) => ({
|
|
192
|
+
id: r.id,
|
|
193
|
+
rank: i + 1,
|
|
194
|
+
}));
|
|
195
|
+
const bm25Ranking = bm25Results.map((r, i) => ({
|
|
196
|
+
id: r.id,
|
|
197
|
+
rank: i + 1,
|
|
198
|
+
}));
|
|
199
|
+
const rrfScores = reciprocalRankFusion([vectorRanking, bm25Ranking], rrfK);
|
|
200
|
+
// Combine with original data
|
|
201
|
+
const resultMap = new Map();
|
|
202
|
+
for (const r of vectorResults) {
|
|
203
|
+
resultMap.set(r.id, { text: r.text || "", metadata: r.metadata });
|
|
204
|
+
}
|
|
205
|
+
for (const r of bm25Results) {
|
|
206
|
+
if (!resultMap.has(r.id)) {
|
|
207
|
+
resultMap.set(r.id, { text: r.text, metadata: r.metadata });
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
fusedResults = Array.from(rrfScores.entries())
|
|
211
|
+
.sort((a, b) => b[1] - a[1])
|
|
212
|
+
.slice(0, topK)
|
|
213
|
+
.map(([id, score]) => ({
|
|
214
|
+
id,
|
|
215
|
+
score,
|
|
216
|
+
text: resultMap.get(id)?.text || "",
|
|
217
|
+
metadata: resultMap.get(id)?.metadata,
|
|
218
|
+
scores: {
|
|
219
|
+
combined: score,
|
|
220
|
+
},
|
|
221
|
+
}));
|
|
222
|
+
}
|
|
223
|
+
else {
|
|
224
|
+
// Linear combination
|
|
225
|
+
const vectorScoreMap = new Map(vectorResults.map((r) => [r.id, r.score || 0]));
|
|
226
|
+
const bm25ScoreMap = new Map(bm25Results.map((r) => [r.id, r.score]));
|
|
227
|
+
// Adjust weights based on config
|
|
228
|
+
const totalWeight = vectorWeight + bm25Weight;
|
|
229
|
+
const normalizedVectorWeight = vectorWeight / totalWeight;
|
|
230
|
+
const combinedScores = linearCombination(vectorScoreMap, bm25ScoreMap, normalizedVectorWeight);
|
|
231
|
+
// Combine with original data
|
|
232
|
+
const resultMap = new Map();
|
|
233
|
+
for (const r of vectorResults) {
|
|
234
|
+
resultMap.set(r.id, {
|
|
235
|
+
text: r.text || "",
|
|
236
|
+
metadata: r.metadata,
|
|
237
|
+
vectorScore: r.score,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
for (const r of bm25Results) {
|
|
241
|
+
const existing = resultMap.get(r.id);
|
|
242
|
+
if (existing) {
|
|
243
|
+
existing.bm25Score = r.score;
|
|
244
|
+
}
|
|
245
|
+
else {
|
|
246
|
+
resultMap.set(r.id, {
|
|
247
|
+
text: r.text,
|
|
248
|
+
metadata: r.metadata,
|
|
249
|
+
bm25Score: r.score,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
fusedResults = Array.from(combinedScores.entries())
|
|
254
|
+
.sort((a, b) => b[1] - a[1])
|
|
255
|
+
.slice(0, topK)
|
|
256
|
+
.map(([id, score]) => {
|
|
257
|
+
const data = resultMap.get(id);
|
|
258
|
+
return {
|
|
259
|
+
id,
|
|
260
|
+
score,
|
|
261
|
+
text: data?.text || "",
|
|
262
|
+
metadata: data?.metadata,
|
|
263
|
+
scores: {
|
|
264
|
+
vector: data?.vectorScore,
|
|
265
|
+
bm25: data?.bm25Score,
|
|
266
|
+
combined: score,
|
|
267
|
+
},
|
|
268
|
+
};
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
// Apply reranking if configured
|
|
272
|
+
if (enableReranking && rerankerConfig && fusedResults.length > 0) {
|
|
273
|
+
const rerankerModel = await ProviderFactory.createProvider(rerankerConfig.model.provider, rerankerConfig.model.modelName);
|
|
274
|
+
const rerankedResults = await rerank(fusedResults.map((r) => ({
|
|
275
|
+
id: r.id,
|
|
276
|
+
text: r.text,
|
|
277
|
+
score: r.score,
|
|
278
|
+
metadata: r.metadata,
|
|
279
|
+
})), query, rerankerModel, {
|
|
280
|
+
weights: rerankerConfig.weights,
|
|
281
|
+
topK: rerankerConfig.topK || topK,
|
|
282
|
+
});
|
|
283
|
+
fusedResults = rerankedResults.map((r) => ({
|
|
284
|
+
id: r.result.id,
|
|
285
|
+
score: r.score,
|
|
286
|
+
text: r.result.text || "",
|
|
287
|
+
metadata: r.result.metadata,
|
|
288
|
+
scores: {
|
|
289
|
+
...(fusedResults.find((f) => f.id === r.result.id)?.scores || {}),
|
|
290
|
+
reranked: r.score,
|
|
291
|
+
},
|
|
292
|
+
}));
|
|
293
|
+
}
|
|
294
|
+
const queryTime = Date.now() - startTime;
|
|
295
|
+
logger.info("[HybridSearch] Search completed", {
|
|
296
|
+
query: query.slice(0, 50),
|
|
297
|
+
vectorResults: vectorResults.length,
|
|
298
|
+
bm25Results: bm25Results.length,
|
|
299
|
+
fusedResults: fusedResults.length,
|
|
300
|
+
fusionMethod,
|
|
301
|
+
queryTime,
|
|
302
|
+
});
|
|
303
|
+
return fusedResults;
|
|
304
|
+
}
|
|
305
|
+
catch (error) {
|
|
306
|
+
logger.error("[HybridSearch] Search failed", {
|
|
307
|
+
query: query.slice(0, 50),
|
|
308
|
+
error: error instanceof Error ? error.message : String(error),
|
|
309
|
+
});
|
|
310
|
+
throw error;
|
|
311
|
+
}
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
//# sourceMappingURL=hybridSearch.js.map
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval Module Exports
|
|
3
|
+
*/
|
|
4
|
+
export { createVectorQueryTool, InMemoryVectorStore, type VectorStore, } from "./vectorQueryTool.js";
|
|
5
|
+
export { createHybridSearch, InMemoryBM25Index, reciprocalRankFusion, linearCombination, type BM25Index, type HybridSearchOptions, } from "./hybridSearch.js";
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval Module Exports
|
|
3
|
+
*/
|
|
4
|
+
export { createVectorQueryTool, InMemoryVectorStore, } from "./vectorQueryTool.js";
|
|
5
|
+
export { createHybridSearch, InMemoryBM25Index, reciprocalRankFusion, linearCombination, } from "./hybridSearch.js";
|
|
6
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector Query Tool
|
|
3
|
+
*
|
|
4
|
+
* Provides semantic search capabilities for RAG pipelines.
|
|
5
|
+
* Integrates with vector stores and supports metadata filtering and reranking.
|
|
6
|
+
*/
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import type { MetadataFilter, RequestContext, VectorQueryResponse, VectorQueryResult, VectorQueryToolConfig } from "../types.js";
|
|
9
|
+
/**
|
|
10
|
+
* Abstract vector store interface
|
|
11
|
+
* Vector stores should implement this interface to work with the query tool
|
|
12
|
+
*/
|
|
13
|
+
export interface VectorStore {
|
|
14
|
+
query(params: {
|
|
15
|
+
indexName: string;
|
|
16
|
+
queryVector: number[];
|
|
17
|
+
topK?: number;
|
|
18
|
+
filter?: MetadataFilter;
|
|
19
|
+
includeVectors?: boolean;
|
|
20
|
+
}): Promise<VectorQueryResult[]>;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Creates a vector query tool for semantic search
|
|
24
|
+
* Follows NeuroLink's factory pattern
|
|
25
|
+
*
|
|
26
|
+
* @param config - Tool configuration
|
|
27
|
+
* @param vectorStore - Vector store instance or resolver function
|
|
28
|
+
* @returns Tool object with execute method
|
|
29
|
+
*/
|
|
30
|
+
export declare function createVectorQueryTool(config: VectorQueryToolConfig, vectorStore: VectorStore | ((context: RequestContext) => VectorStore)): {
|
|
31
|
+
name: string;
|
|
32
|
+
description: string;
|
|
33
|
+
parameters: z.ZodObject<{
|
|
34
|
+
topK: z.ZodOptional<z.ZodNumber>;
|
|
35
|
+
filter?: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>> | undefined;
|
|
36
|
+
query: z.ZodString;
|
|
37
|
+
}, "strip", z.ZodTypeAny, {
|
|
38
|
+
query: string;
|
|
39
|
+
filter?: unknown;
|
|
40
|
+
topK?: number | undefined;
|
|
41
|
+
}, {
|
|
42
|
+
query: string;
|
|
43
|
+
filter?: unknown;
|
|
44
|
+
topK?: number | undefined;
|
|
45
|
+
}>;
|
|
46
|
+
/**
|
|
47
|
+
* Execute the vector query
|
|
48
|
+
* @param params - Query parameters
|
|
49
|
+
* @param context - Optional request context
|
|
50
|
+
* @returns Query results with relevant context
|
|
51
|
+
*/
|
|
52
|
+
execute: (params: {
|
|
53
|
+
query: string;
|
|
54
|
+
filter?: MetadataFilter;
|
|
55
|
+
topK?: number;
|
|
56
|
+
}, context?: RequestContext) => Promise<VectorQueryResponse>;
|
|
57
|
+
};
|
|
58
|
+
/**
|
|
59
|
+
* In-memory vector store implementation for testing and development
|
|
60
|
+
*/
|
|
61
|
+
export declare class InMemoryVectorStore implements VectorStore {
|
|
62
|
+
private vectors;
|
|
63
|
+
/**
|
|
64
|
+
* Add vectors to an index
|
|
65
|
+
*/
|
|
66
|
+
upsert(indexName: string, items: Array<{
|
|
67
|
+
id: string;
|
|
68
|
+
vector: number[];
|
|
69
|
+
metadata?: Record<string, unknown>;
|
|
70
|
+
}>): Promise<void>;
|
|
71
|
+
/**
|
|
72
|
+
* Query vectors by similarity
|
|
73
|
+
*/
|
|
74
|
+
query(params: {
|
|
75
|
+
indexName: string;
|
|
76
|
+
queryVector: number[];
|
|
77
|
+
topK?: number;
|
|
78
|
+
filter?: MetadataFilter;
|
|
79
|
+
includeVectors?: boolean;
|
|
80
|
+
}): Promise<VectorQueryResult[]>;
|
|
81
|
+
/**
|
|
82
|
+
* Delete vectors from an index
|
|
83
|
+
*/
|
|
84
|
+
delete(indexName: string, ids: string[]): Promise<void>;
|
|
85
|
+
/**
|
|
86
|
+
* Check if metadata matches filter
|
|
87
|
+
*/
|
|
88
|
+
private matchesFilter;
|
|
89
|
+
/**
|
|
90
|
+
* Calculate cosine similarity between two vectors
|
|
91
|
+
*/
|
|
92
|
+
private cosineSimilarity;
|
|
93
|
+
}
|