@juspay/neurolink 9.2.0 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +52 -30
- package/dist/agent/directTools.d.ts +8 -8
- package/dist/cli/commands/config.d.ts +3 -3
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +5 -5
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +8 -8
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/modelTypes.d.ts +2 -2
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +2 -2
- package/dist/types/common.d.ts +0 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/modelTypes.d.ts +20 -20
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/package.json +1 -1
|
@@ -0,0 +1,768 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Document Processing Types
|
|
3
|
+
*
|
|
4
|
+
* Comprehensive type definitions for RAG (Retrieval-Augmented Generation)
|
|
5
|
+
* document processing, including chunking strategies, metadata extraction,
|
|
6
|
+
* vector queries, and Graph RAG support.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Supported document types for processing
|
|
10
|
+
*/
|
|
11
|
+
export type DocumentType = "text" | "markdown" | "html" | "json" | "latex" | "csv" | "pdf";
|
|
12
|
+
/**
|
|
13
|
+
* Chunk metadata for tracking source and position
|
|
14
|
+
*/
|
|
15
|
+
export type ChunkMetadata = {
|
|
16
|
+
/** Source document identifier */
|
|
17
|
+
documentId: string;
|
|
18
|
+
/** Original document filename or URL */
|
|
19
|
+
source?: string;
|
|
20
|
+
/** Position in the original document (0-indexed) */
|
|
21
|
+
chunkIndex: number;
|
|
22
|
+
/** Total number of chunks from the document */
|
|
23
|
+
totalChunks?: number;
|
|
24
|
+
/** Start character position in original text */
|
|
25
|
+
startPosition?: number;
|
|
26
|
+
/** End character position in original text */
|
|
27
|
+
endPosition?: number;
|
|
28
|
+
/** Document type (markdown, html, json, etc.) */
|
|
29
|
+
documentType?: DocumentType;
|
|
30
|
+
/** Custom metadata from extraction */
|
|
31
|
+
custom?: Record<string, unknown>;
|
|
32
|
+
/** Extracted title (from metadata extraction) */
|
|
33
|
+
title?: string;
|
|
34
|
+
/** Extracted summary (from metadata extraction) */
|
|
35
|
+
summary?: string;
|
|
36
|
+
/** Extracted keywords (from metadata extraction) */
|
|
37
|
+
keywords?: string[];
|
|
38
|
+
/** Header level for markdown/html chunks */
|
|
39
|
+
headerLevel?: number;
|
|
40
|
+
/** Header text for structured documents */
|
|
41
|
+
header?: string;
|
|
42
|
+
/** JSON path for JSON chunks */
|
|
43
|
+
jsonPath?: string;
|
|
44
|
+
/** LaTeX environment name */
|
|
45
|
+
latexEnvironment?: string;
|
|
46
|
+
};
|
|
47
|
+
/**
|
|
48
|
+
* Base chunk result with text and metadata
|
|
49
|
+
*/
|
|
50
|
+
export type Chunk = {
|
|
51
|
+
/** Unique identifier for the chunk */
|
|
52
|
+
id: string;
|
|
53
|
+
/** The text content of the chunk */
|
|
54
|
+
text: string;
|
|
55
|
+
/** Metadata associated with the chunk */
|
|
56
|
+
metadata: ChunkMetadata;
|
|
57
|
+
/** Optional embedding vector (populated after embedding) */
|
|
58
|
+
embedding?: number[];
|
|
59
|
+
};
|
|
60
|
+
/**
|
|
61
|
+
* Available chunking strategy types
|
|
62
|
+
*/
|
|
63
|
+
export type ChunkingStrategy = "character" | "recursive" | "sentence" | "token" | "markdown" | "html" | "json" | "latex" | "semantic" | "semantic-markdown";
|
|
64
|
+
/**
|
|
65
|
+
* Validation result for chunker configuration
|
|
66
|
+
*/
|
|
67
|
+
export type ChunkerValidationResult = {
|
|
68
|
+
valid: boolean;
|
|
69
|
+
errors: string[];
|
|
70
|
+
warnings: string[];
|
|
71
|
+
};
|
|
72
|
+
/**
|
|
73
|
+
* Base configuration for all chunkers
|
|
74
|
+
*/
|
|
75
|
+
export type BaseChunkerConfig = {
|
|
76
|
+
/** Maximum chunk size (interpretation varies by strategy) */
|
|
77
|
+
maxSize?: number;
|
|
78
|
+
/** Minimum chunk size */
|
|
79
|
+
minSize?: number;
|
|
80
|
+
/** Overlap between consecutive chunks */
|
|
81
|
+
overlap?: number;
|
|
82
|
+
/** Whether to trim whitespace from chunks */
|
|
83
|
+
trimWhitespace?: boolean;
|
|
84
|
+
/** Custom metadata to add to all chunks */
|
|
85
|
+
metadata?: Record<string, unknown>;
|
|
86
|
+
/** Whether to preserve metadata from source document */
|
|
87
|
+
preserveMetadata?: boolean;
|
|
88
|
+
};
|
|
89
|
+
/**
|
|
90
|
+
* Character chunker configuration
|
|
91
|
+
* Simple character-based splitting
|
|
92
|
+
*/
|
|
93
|
+
export type CharacterChunkerConfig = BaseChunkerConfig & {
|
|
94
|
+
/** Character separator (default: "") */
|
|
95
|
+
separator?: string;
|
|
96
|
+
/** Keep separator in chunks */
|
|
97
|
+
keepSeparator?: boolean;
|
|
98
|
+
};
|
|
99
|
+
/**
|
|
100
|
+
* Recursive chunker configuration
|
|
101
|
+
* Smart splitting based on content structure
|
|
102
|
+
*/
|
|
103
|
+
export type RecursiveChunkerConfig = BaseChunkerConfig & {
|
|
104
|
+
/** Ordered list of separators to try (default: ["\n\n", "\n", " ", ""]) */
|
|
105
|
+
separators?: string[];
|
|
106
|
+
/** Whether separators are regex patterns */
|
|
107
|
+
isSeparatorRegex?: boolean;
|
|
108
|
+
/** Whether to keep separators in the output chunks */
|
|
109
|
+
keepSeparators?: boolean;
|
|
110
|
+
};
|
|
111
|
+
/**
|
|
112
|
+
* Sentence chunker configuration
|
|
113
|
+
* Sentence-aware splitting
|
|
114
|
+
*/
|
|
115
|
+
export type SentenceChunkerConfig = BaseChunkerConfig & {
|
|
116
|
+
/** Sentence ending characters (default: [".", "!", "?", "\n"]) */
|
|
117
|
+
sentenceEnders?: string[];
|
|
118
|
+
/** Minimum sentences per chunk */
|
|
119
|
+
minSentences?: number;
|
|
120
|
+
/** Maximum sentences per chunk */
|
|
121
|
+
maxSentences?: number;
|
|
122
|
+
};
|
|
123
|
+
/**
|
|
124
|
+
* Token chunker configuration
|
|
125
|
+
* Token-aware splitting using tokenizer
|
|
126
|
+
*/
|
|
127
|
+
export type TokenChunkerConfig = BaseChunkerConfig & {
|
|
128
|
+
/** Tokenizer to use (default: "cl100k_base" for GPT models) */
|
|
129
|
+
tokenizer?: string;
|
|
130
|
+
/** Model name for token counting (alternative to tokenizer) */
|
|
131
|
+
modelName?: string;
|
|
132
|
+
/** Maximum tokens per chunk */
|
|
133
|
+
maxTokens?: number;
|
|
134
|
+
/** Token overlap between chunks */
|
|
135
|
+
tokenOverlap?: number;
|
|
136
|
+
};
|
|
137
|
+
/**
|
|
138
|
+
* Markdown chunker configuration
|
|
139
|
+
* Structure-aware markdown splitting
|
|
140
|
+
*/
|
|
141
|
+
export type MarkdownChunkerConfig = BaseChunkerConfig & {
|
|
142
|
+
/** Header levels to split on (default: [1, 2, 3]) */
|
|
143
|
+
headerLevels?: number[];
|
|
144
|
+
/** Include code blocks as single chunks */
|
|
145
|
+
preserveCodeBlocks?: boolean;
|
|
146
|
+
/** Include the header in the chunk content */
|
|
147
|
+
includeHeader?: boolean;
|
|
148
|
+
/** Strip markdown formatting from output */
|
|
149
|
+
stripFormatting?: boolean;
|
|
150
|
+
};
|
|
151
|
+
/**
|
|
152
|
+
* HTML chunker configuration
|
|
153
|
+
* HTML structure-aware splitting
|
|
154
|
+
*/
|
|
155
|
+
export type HTMLChunkerConfig = BaseChunkerConfig & {
|
|
156
|
+
/** Tags to split on (default: ["div", "p", "section", "article"]) */
|
|
157
|
+
splitTags?: string[];
|
|
158
|
+
/** Tags to preserve as single chunks */
|
|
159
|
+
preserveTags?: string[];
|
|
160
|
+
/** Extract text only (strip HTML tags) */
|
|
161
|
+
extractTextOnly?: boolean;
|
|
162
|
+
/** Include tag metadata in chunks */
|
|
163
|
+
includeTagMetadata?: boolean;
|
|
164
|
+
};
|
|
165
|
+
/**
|
|
166
|
+
* JSON chunker configuration
|
|
167
|
+
* JSON structure-aware splitting
|
|
168
|
+
*/
|
|
169
|
+
export type JSONChunkerConfig = BaseChunkerConfig & {
|
|
170
|
+
/** Maximum depth to traverse */
|
|
171
|
+
maxDepth?: number;
|
|
172
|
+
/** Keys to split on (arrays/objects at these keys become chunks) */
|
|
173
|
+
splitKeys?: string[];
|
|
174
|
+
/** Keys to preserve as single units */
|
|
175
|
+
preserveKeys?: string[];
|
|
176
|
+
/** Include JSON path in metadata */
|
|
177
|
+
includeJsonPath?: boolean;
|
|
178
|
+
};
|
|
179
|
+
/**
|
|
180
|
+
* LaTeX chunker configuration
|
|
181
|
+
* LaTeX structure-aware splitting
|
|
182
|
+
*/
|
|
183
|
+
export type LaTeXChunkerConfig = BaseChunkerConfig & {
|
|
184
|
+
/** Environments to split on (default: ["section", "subsection", "chapter"]) */
|
|
185
|
+
splitEnvironments?: string[];
|
|
186
|
+
/** Preserve math environments as single chunks */
|
|
187
|
+
preserveMath?: boolean;
|
|
188
|
+
/** Include preamble as separate chunk */
|
|
189
|
+
includePreamble?: boolean;
|
|
190
|
+
};
|
|
191
|
+
/**
|
|
192
|
+
* Semantic chunker configuration
|
|
193
|
+
* LLM-based semantic splitting
|
|
194
|
+
*/
|
|
195
|
+
export type SemanticChunkerConfig = BaseChunkerConfig & {
|
|
196
|
+
/** Minimum tokens before considering a split */
|
|
197
|
+
joinThreshold?: number;
|
|
198
|
+
/** Model for semantic analysis */
|
|
199
|
+
modelName?: string;
|
|
200
|
+
/** Provider for the model */
|
|
201
|
+
provider?: string;
|
|
202
|
+
/** Custom prompt for semantic grouping */
|
|
203
|
+
semanticPrompt?: string;
|
|
204
|
+
/** Maximum header depth to consider for grouping */
|
|
205
|
+
maxHeaderDepth?: number;
|
|
206
|
+
/** Similarity threshold for grouping (0-1) */
|
|
207
|
+
similarityThreshold?: number;
|
|
208
|
+
};
|
|
209
|
+
/**
|
|
210
|
+
* Union type for all chunker configurations
|
|
211
|
+
*/
|
|
212
|
+
export type ChunkerConfig = CharacterChunkerConfig | RecursiveChunkerConfig | SentenceChunkerConfig | TokenChunkerConfig | MarkdownChunkerConfig | HTMLChunkerConfig | JSONChunkerConfig | LaTeXChunkerConfig | SemanticChunkerConfig;
|
|
213
|
+
/**
|
|
214
|
+
* Chunker interface - all chunking strategies implement this
|
|
215
|
+
*/
|
|
216
|
+
export interface Chunker {
|
|
217
|
+
/** Strategy name for identification */
|
|
218
|
+
readonly strategy: ChunkingStrategy;
|
|
219
|
+
/**
|
|
220
|
+
* Split text into chunks
|
|
221
|
+
* @param text - The text to chunk
|
|
222
|
+
* @param config - Strategy-specific configuration
|
|
223
|
+
* @returns Array of chunks
|
|
224
|
+
*/
|
|
225
|
+
chunk(text: string, config?: BaseChunkerConfig): Promise<Chunk[]>;
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Chunker metadata for factory registration
|
|
229
|
+
*/
|
|
230
|
+
export type ChunkerMetadata = {
|
|
231
|
+
/** Human-readable description */
|
|
232
|
+
description: string;
|
|
233
|
+
/** Supported document types */
|
|
234
|
+
supportedTypes?: DocumentType[];
|
|
235
|
+
/** Whether the chunker requires external dependencies */
|
|
236
|
+
requiresExternalDeps?: boolean;
|
|
237
|
+
/** Default configuration (can be any chunker-specific config) */
|
|
238
|
+
defaultConfig?: Record<string, unknown>;
|
|
239
|
+
/** Supported configuration options */
|
|
240
|
+
supportedOptions?: string[];
|
|
241
|
+
/** Use cases where this chunker excels */
|
|
242
|
+
useCases?: string[];
|
|
243
|
+
/** Alternative names/aliases for this chunker */
|
|
244
|
+
aliases?: string[];
|
|
245
|
+
};
|
|
246
|
+
/**
|
|
247
|
+
* Metadata extraction types
|
|
248
|
+
*/
|
|
249
|
+
export type ExtractorType = "title" | "summary" | "keywords" | "questions" | "custom";
|
|
250
|
+
/**
|
|
251
|
+
* Base configuration for metadata extractors
|
|
252
|
+
*/
|
|
253
|
+
export type BaseExtractorConfig = {
|
|
254
|
+
/** Language model to use for extraction */
|
|
255
|
+
modelName?: string;
|
|
256
|
+
/** Provider for the model */
|
|
257
|
+
provider?: string;
|
|
258
|
+
/** Custom prompt template */
|
|
259
|
+
promptTemplate?: string;
|
|
260
|
+
/** Maximum tokens for LLM response */
|
|
261
|
+
maxTokens?: number;
|
|
262
|
+
/** Temperature for LLM generation */
|
|
263
|
+
temperature?: number;
|
|
264
|
+
};
|
|
265
|
+
/**
|
|
266
|
+
* Title extractor configuration
|
|
267
|
+
*/
|
|
268
|
+
export type TitleExtractorConfig = BaseExtractorConfig & {
|
|
269
|
+
/** Number of nodes to use for title extraction */
|
|
270
|
+
nodes?: number;
|
|
271
|
+
/** Template for processing individual nodes */
|
|
272
|
+
nodeTemplate?: string;
|
|
273
|
+
/** Template for combining node results */
|
|
274
|
+
combineTemplate?: string;
|
|
275
|
+
};
|
|
276
|
+
/**
|
|
277
|
+
* Summary extractor configuration
|
|
278
|
+
*/
|
|
279
|
+
export type SummaryExtractorConfig = BaseExtractorConfig & {
|
|
280
|
+
/** Summary types to generate */
|
|
281
|
+
summaryTypes?: ("current" | "previous" | "next")[];
|
|
282
|
+
/** Maximum summary length in words */
|
|
283
|
+
maxWords?: number;
|
|
284
|
+
};
|
|
285
|
+
/**
|
|
286
|
+
* Keyword extractor configuration
|
|
287
|
+
*/
|
|
288
|
+
export type KeywordExtractorConfig = BaseExtractorConfig & {
|
|
289
|
+
/** Maximum number of keywords to extract */
|
|
290
|
+
maxKeywords?: number;
|
|
291
|
+
/** Minimum keyword relevance score (0-1) */
|
|
292
|
+
minRelevance?: number;
|
|
293
|
+
};
|
|
294
|
+
/**
|
|
295
|
+
* Question-Answer extractor configuration
|
|
296
|
+
*/
|
|
297
|
+
export type QuestionExtractorConfig = BaseExtractorConfig & {
|
|
298
|
+
/** Number of Q&A pairs to generate */
|
|
299
|
+
numQuestions?: number;
|
|
300
|
+
/** Include answers in output */
|
|
301
|
+
includeAnswers?: boolean;
|
|
302
|
+
/** Generate embedding-only questions (shorter, more focused) */
|
|
303
|
+
embeddingOnly?: boolean;
|
|
304
|
+
};
|
|
305
|
+
/**
|
|
306
|
+
* Custom schema extractor configuration
|
|
307
|
+
*/
|
|
308
|
+
export type CustomSchemaExtractorConfig = BaseExtractorConfig & {
|
|
309
|
+
/** Zod schema for structured extraction */
|
|
310
|
+
schema: unknown;
|
|
311
|
+
/** Description of what to extract */
|
|
312
|
+
description?: string;
|
|
313
|
+
};
|
|
314
|
+
/**
|
|
315
|
+
* Combined extraction parameters
|
|
316
|
+
*/
|
|
317
|
+
export type ExtractParams = {
|
|
318
|
+
/** Extract document title */
|
|
319
|
+
title?: boolean | TitleExtractorConfig;
|
|
320
|
+
/** Extract document summary */
|
|
321
|
+
summary?: boolean | SummaryExtractorConfig;
|
|
322
|
+
/** Extract keywords */
|
|
323
|
+
keywords?: boolean | KeywordExtractorConfig;
|
|
324
|
+
/** Generate Q&A pairs */
|
|
325
|
+
questions?: boolean | QuestionExtractorConfig;
|
|
326
|
+
/** Custom schema extraction */
|
|
327
|
+
custom?: CustomSchemaExtractorConfig;
|
|
328
|
+
};
|
|
329
|
+
/**
|
|
330
|
+
* Extraction result for a single chunk
|
|
331
|
+
*/
|
|
332
|
+
export type ExtractionResult = {
|
|
333
|
+
/** Extracted title */
|
|
334
|
+
title?: string;
|
|
335
|
+
/** Extracted summary */
|
|
336
|
+
summary?: string;
|
|
337
|
+
/** Extracted keywords */
|
|
338
|
+
keywords?: string[];
|
|
339
|
+
/** Generated Q&A pairs */
|
|
340
|
+
questions?: Array<{
|
|
341
|
+
question: string;
|
|
342
|
+
answer?: string;
|
|
343
|
+
}>;
|
|
344
|
+
/** Custom schema extraction result */
|
|
345
|
+
custom?: Record<string, unknown>;
|
|
346
|
+
};
|
|
347
|
+
/**
|
|
348
|
+
* Request context for dynamic configuration
|
|
349
|
+
*/
|
|
350
|
+
export type RequestContext = {
|
|
351
|
+
userId?: string;
|
|
352
|
+
tenantId?: string;
|
|
353
|
+
environment?: string;
|
|
354
|
+
custom?: Record<string, unknown>;
|
|
355
|
+
};
|
|
356
|
+
/**
|
|
357
|
+
* Metadata filter using MongoDB/Sift query syntax
|
|
358
|
+
*/
|
|
359
|
+
export type MetadataFilter = {
|
|
360
|
+
$eq?: unknown;
|
|
361
|
+
$ne?: unknown;
|
|
362
|
+
$gt?: number;
|
|
363
|
+
$gte?: number;
|
|
364
|
+
$lt?: number;
|
|
365
|
+
$lte?: number;
|
|
366
|
+
$in?: unknown[];
|
|
367
|
+
$nin?: unknown[];
|
|
368
|
+
$and?: MetadataFilter[];
|
|
369
|
+
$or?: MetadataFilter[];
|
|
370
|
+
$not?: MetadataFilter;
|
|
371
|
+
$nor?: MetadataFilter[];
|
|
372
|
+
$exists?: boolean;
|
|
373
|
+
$contains?: string;
|
|
374
|
+
$regex?: string;
|
|
375
|
+
$size?: number;
|
|
376
|
+
[field: string]: unknown;
|
|
377
|
+
};
|
|
378
|
+
/**
|
|
379
|
+
* Vector store query result
|
|
380
|
+
*/
|
|
381
|
+
export type VectorQueryResult = {
|
|
382
|
+
/** Unique identifier */
|
|
383
|
+
id: string;
|
|
384
|
+
/** Text content */
|
|
385
|
+
text?: string;
|
|
386
|
+
/** Similarity/relevance score */
|
|
387
|
+
score?: number;
|
|
388
|
+
/** Associated metadata */
|
|
389
|
+
metadata?: Record<string, unknown>;
|
|
390
|
+
/** Embedding vector (if requested) */
|
|
391
|
+
vector?: number[];
|
|
392
|
+
};
|
|
393
|
+
/**
|
|
394
|
+
* Reranker configuration
|
|
395
|
+
*/
|
|
396
|
+
export type RerankerConfig = {
|
|
397
|
+
/** Language model for reranking */
|
|
398
|
+
model: {
|
|
399
|
+
provider: string;
|
|
400
|
+
modelName: string;
|
|
401
|
+
};
|
|
402
|
+
/** Scoring weights */
|
|
403
|
+
weights?: {
|
|
404
|
+
semantic?: number;
|
|
405
|
+
vector?: number;
|
|
406
|
+
position?: number;
|
|
407
|
+
};
|
|
408
|
+
/** Number of results after reranking */
|
|
409
|
+
topK?: number;
|
|
410
|
+
};
|
|
411
|
+
/**
|
|
412
|
+
* Provider-specific query options
|
|
413
|
+
*/
|
|
414
|
+
export type VectorProviderOptions = {
|
|
415
|
+
/** Pinecone options */
|
|
416
|
+
pinecone?: {
|
|
417
|
+
namespace?: string;
|
|
418
|
+
sparseVector?: number[];
|
|
419
|
+
};
|
|
420
|
+
/** pgVector options */
|
|
421
|
+
pgVector?: {
|
|
422
|
+
minScore?: number;
|
|
423
|
+
ef?: number;
|
|
424
|
+
probes?: number;
|
|
425
|
+
};
|
|
426
|
+
/** Chroma options */
|
|
427
|
+
chroma?: {
|
|
428
|
+
where?: Record<string, unknown>;
|
|
429
|
+
whereDocument?: Record<string, unknown>;
|
|
430
|
+
};
|
|
431
|
+
};
|
|
432
|
+
/**
|
|
433
|
+
* Vector query tool configuration
|
|
434
|
+
*/
|
|
435
|
+
export type VectorQueryToolConfig = {
|
|
436
|
+
/** Tool identifier */
|
|
437
|
+
id?: string;
|
|
438
|
+
/** Tool description for AI agents */
|
|
439
|
+
description?: string;
|
|
440
|
+
/** Index name within the vector store */
|
|
441
|
+
indexName: string;
|
|
442
|
+
/** Embedding model specification */
|
|
443
|
+
embeddingModel: {
|
|
444
|
+
provider: string;
|
|
445
|
+
modelName: string;
|
|
446
|
+
};
|
|
447
|
+
/** Enable metadata filtering */
|
|
448
|
+
enableFilter?: boolean;
|
|
449
|
+
/** Include embedding vectors in results */
|
|
450
|
+
includeVectors?: boolean;
|
|
451
|
+
/** Include full source objects in results */
|
|
452
|
+
includeSources?: boolean;
|
|
453
|
+
/** Number of results to return */
|
|
454
|
+
topK?: number;
|
|
455
|
+
/** Reranker configuration */
|
|
456
|
+
reranker?: RerankerConfig;
|
|
457
|
+
/** Provider-specific options */
|
|
458
|
+
providerOptions?: VectorProviderOptions;
|
|
459
|
+
};
|
|
460
|
+
/**
|
|
461
|
+
* Vector query result wrapper
|
|
462
|
+
*/
|
|
463
|
+
export type VectorQueryResponse = {
|
|
464
|
+
/** Formatted relevant context string */
|
|
465
|
+
relevantContext: string;
|
|
466
|
+
/** Source query results */
|
|
467
|
+
sources: VectorQueryResult[];
|
|
468
|
+
/** Total results found */
|
|
469
|
+
totalResults: number;
|
|
470
|
+
/** Query metadata */
|
|
471
|
+
metadata: {
|
|
472
|
+
queryTime: number;
|
|
473
|
+
reranked: boolean;
|
|
474
|
+
filtered: boolean;
|
|
475
|
+
};
|
|
476
|
+
};
|
|
477
|
+
/**
|
|
478
|
+
* BM25 search result
|
|
479
|
+
*/
|
|
480
|
+
export type BM25Result = {
|
|
481
|
+
/** Document ID */
|
|
482
|
+
id: string;
|
|
483
|
+
/** BM25 score */
|
|
484
|
+
score: number;
|
|
485
|
+
/** Document text */
|
|
486
|
+
text: string;
|
|
487
|
+
/** Associated metadata */
|
|
488
|
+
metadata?: Record<string, unknown>;
|
|
489
|
+
};
|
|
490
|
+
/**
|
|
491
|
+
* Hybrid search configuration
|
|
492
|
+
*/
|
|
493
|
+
export type HybridSearchConfig = {
|
|
494
|
+
/** Weight for vector search (0-1) */
|
|
495
|
+
vectorWeight?: number;
|
|
496
|
+
/** Weight for BM25 search (0-1) */
|
|
497
|
+
bm25Weight?: number;
|
|
498
|
+
/** Fusion method */
|
|
499
|
+
fusionMethod?: "rrf" | "linear";
|
|
500
|
+
/** RRF k parameter */
|
|
501
|
+
rrfK?: number;
|
|
502
|
+
/** Number of results to return */
|
|
503
|
+
topK?: number;
|
|
504
|
+
/** Enable reranking */
|
|
505
|
+
enableReranking?: boolean;
|
|
506
|
+
/** Reranker configuration */
|
|
507
|
+
reranker?: RerankerConfig;
|
|
508
|
+
};
|
|
509
|
+
/**
|
|
510
|
+
* Hybrid search result
|
|
511
|
+
*/
|
|
512
|
+
export type HybridSearchResult = {
|
|
513
|
+
/** Document ID */
|
|
514
|
+
id: string;
|
|
515
|
+
/** Combined score */
|
|
516
|
+
score: number;
|
|
517
|
+
/** Document text */
|
|
518
|
+
text: string;
|
|
519
|
+
/** Associated metadata */
|
|
520
|
+
metadata?: Record<string, unknown>;
|
|
521
|
+
/** Score breakdown */
|
|
522
|
+
scores?: {
|
|
523
|
+
vector?: number;
|
|
524
|
+
bm25?: number;
|
|
525
|
+
combined?: number;
|
|
526
|
+
reranked?: number;
|
|
527
|
+
};
|
|
528
|
+
};
|
|
529
|
+
/**
|
|
530
|
+
* Graph node representing a document chunk
|
|
531
|
+
*/
|
|
532
|
+
export type GraphNode = {
|
|
533
|
+
/** Unique node identifier */
|
|
534
|
+
id: string;
|
|
535
|
+
/** Text content of the node */
|
|
536
|
+
content: string;
|
|
537
|
+
/** Node metadata */
|
|
538
|
+
metadata: Record<string, unknown>;
|
|
539
|
+
/** Embedding vector */
|
|
540
|
+
embedding?: number[];
|
|
541
|
+
};
|
|
542
|
+
/**
|
|
543
|
+
* Graph edge representing semantic relationship
|
|
544
|
+
*/
|
|
545
|
+
export type GraphEdge = {
|
|
546
|
+
/** Source node ID */
|
|
547
|
+
source: string;
|
|
548
|
+
/** Target node ID */
|
|
549
|
+
target: string;
|
|
550
|
+
/** Edge weight (similarity score) */
|
|
551
|
+
weight: number;
|
|
552
|
+
/** Edge type */
|
|
553
|
+
type?: string;
|
|
554
|
+
};
|
|
555
|
+
/**
|
|
556
|
+
* Chunk input for graph creation
|
|
557
|
+
*/
|
|
558
|
+
export type GraphChunk = {
|
|
559
|
+
/** Chunk text content */
|
|
560
|
+
text: string;
|
|
561
|
+
/** Chunk metadata */
|
|
562
|
+
metadata?: Record<string, unknown>;
|
|
563
|
+
};
|
|
564
|
+
/**
|
|
565
|
+
* Embedding input for graph creation
|
|
566
|
+
*/
|
|
567
|
+
export type GraphEmbedding = {
|
|
568
|
+
/** Embedding vector */
|
|
569
|
+
vector: number[];
|
|
570
|
+
};
|
|
571
|
+
/**
|
|
572
|
+
* Ranked node result from graph query
|
|
573
|
+
*/
|
|
574
|
+
export type RankedNode = {
|
|
575
|
+
/** Node ID */
|
|
576
|
+
id: string;
|
|
577
|
+
/** Node content */
|
|
578
|
+
content: string;
|
|
579
|
+
/** Node metadata */
|
|
580
|
+
metadata: Record<string, unknown>;
|
|
581
|
+
/** Relevance score */
|
|
582
|
+
score: number;
|
|
583
|
+
};
|
|
584
|
+
/**
|
|
585
|
+
* Graph RAG configuration
|
|
586
|
+
*/
|
|
587
|
+
export type GraphRAGConfig = {
|
|
588
|
+
/** Embedding vector dimension (default: 1536) */
|
|
589
|
+
dimension?: number;
|
|
590
|
+
/** Similarity threshold for edge creation (default: 0.7) */
|
|
591
|
+
threshold?: number;
|
|
592
|
+
};
|
|
593
|
+
/**
|
|
594
|
+
* Graph query parameters
|
|
595
|
+
*/
|
|
596
|
+
export type GraphQueryParams = {
|
|
597
|
+
/** Query embedding vector */
|
|
598
|
+
query: number[];
|
|
599
|
+
/** Number of results to return (default: 10) */
|
|
600
|
+
topK?: number;
|
|
601
|
+
/** Random walk steps (default: 100) */
|
|
602
|
+
randomWalkSteps?: number;
|
|
603
|
+
/** Restart probability for random walk (default: 0.15) */
|
|
604
|
+
restartProb?: number;
|
|
605
|
+
};
|
|
606
|
+
/**
|
|
607
|
+
* Graph statistics
|
|
608
|
+
*/
|
|
609
|
+
export type GraphStats = {
|
|
610
|
+
nodeCount: number;
|
|
611
|
+
edgeCount: number;
|
|
612
|
+
avgDegree: number;
|
|
613
|
+
threshold: number;
|
|
614
|
+
};
|
|
615
|
+
/**
|
|
616
|
+
* Reranker type options
|
|
617
|
+
*/
|
|
618
|
+
export type RerankerType = "cross-encoder" | "colbert" | "cohere" | "llm";
|
|
619
|
+
/**
|
|
620
|
+
* Reranker options
|
|
621
|
+
*/
|
|
622
|
+
export type RerankerOptions = {
|
|
623
|
+
/** Pre-computed query embedding */
|
|
624
|
+
queryEmbedding?: number[];
|
|
625
|
+
/** Number of results to return after reranking */
|
|
626
|
+
topK?: number;
|
|
627
|
+
/** Scoring weights (must sum to 1.0) */
|
|
628
|
+
weights?: {
|
|
629
|
+
semantic?: number;
|
|
630
|
+
vector?: number;
|
|
631
|
+
position?: number;
|
|
632
|
+
};
|
|
633
|
+
};
|
|
634
|
+
/**
|
|
635
|
+
* Reranked result with detailed scoring
|
|
636
|
+
*/
|
|
637
|
+
export type RerankResult = {
|
|
638
|
+
/** Original query result */
|
|
639
|
+
result: VectorQueryResult;
|
|
640
|
+
/** Combined reranking score (0-1) */
|
|
641
|
+
score: number;
|
|
642
|
+
/** Detailed score breakdown */
|
|
643
|
+
details: {
|
|
644
|
+
semantic: number;
|
|
645
|
+
vector: number;
|
|
646
|
+
position: number;
|
|
647
|
+
queryAnalysis?: string;
|
|
648
|
+
};
|
|
649
|
+
};
|
|
650
|
+
/**
|
|
651
|
+
* MDocument configuration
|
|
652
|
+
*/
|
|
653
|
+
export type MDocumentConfig = {
|
|
654
|
+
/** Document type */
|
|
655
|
+
type: DocumentType;
|
|
656
|
+
/** Custom metadata */
|
|
657
|
+
metadata?: Record<string, unknown>;
|
|
658
|
+
};
|
|
659
|
+
/**
|
|
660
|
+
* Chunk parameters for MDocument
|
|
661
|
+
*/
|
|
662
|
+
export type ChunkParams = {
|
|
663
|
+
/** Chunking strategy to use */
|
|
664
|
+
strategy?: ChunkingStrategy;
|
|
665
|
+
/** Strategy-specific configuration */
|
|
666
|
+
config?: ChunkerConfig;
|
|
667
|
+
/** Metadata extraction options */
|
|
668
|
+
extract?: ExtractParams;
|
|
669
|
+
};
|
|
670
|
+
/**
|
|
671
|
+
* RAG CLI command arguments
|
|
672
|
+
*/
|
|
673
|
+
export type RAGCommandArgs = {
|
|
674
|
+
/** Input file path */
|
|
675
|
+
file?: string;
|
|
676
|
+
/** Query string */
|
|
677
|
+
query?: string;
|
|
678
|
+
/** Chunking strategy */
|
|
679
|
+
strategy?: ChunkingStrategy;
|
|
680
|
+
/** Maximum chunk size */
|
|
681
|
+
maxSize?: number;
|
|
682
|
+
/** Chunk overlap */
|
|
683
|
+
overlap?: number;
|
|
684
|
+
/** Output format */
|
|
685
|
+
format?: "json" | "text" | "table";
|
|
686
|
+
/** Enable verbose output */
|
|
687
|
+
verbose?: boolean;
|
|
688
|
+
/** Provider for embeddings */
|
|
689
|
+
provider?: string;
|
|
690
|
+
/** Model for embeddings */
|
|
691
|
+
model?: string;
|
|
692
|
+
/** Number of results */
|
|
693
|
+
topK?: number;
|
|
694
|
+
/** Index name */
|
|
695
|
+
index?: string;
|
|
696
|
+
/** Enable hybrid search */
|
|
697
|
+
hybrid?: boolean;
|
|
698
|
+
/** Use Graph RAG */
|
|
699
|
+
graph?: boolean;
|
|
700
|
+
};
|
|
701
|
+
/**
|
|
702
|
+
* RAG configuration for generate() and stream() APIs.
|
|
703
|
+
*
|
|
704
|
+
* When provided, NeuroLink automatically:
|
|
705
|
+
* 1. Loads the specified files
|
|
706
|
+
* 2. Chunks them using the selected strategy
|
|
707
|
+
* 3. Generates embeddings
|
|
708
|
+
* 4. Stores in an in-memory vector store
|
|
709
|
+
* 5. Creates a search tool the AI can invoke on demand
|
|
710
|
+
*
|
|
711
|
+
* @example
|
|
712
|
+
* ```typescript
|
|
713
|
+
* const result = await neurolink.generate({
|
|
714
|
+
* input: { text: "What is RAG?" },
|
|
715
|
+
* provider: "vertex",
|
|
716
|
+
* rag: {
|
|
717
|
+
* files: ["./docs/guide.md", "./docs/api.md"],
|
|
718
|
+
* strategy: "markdown",
|
|
719
|
+
* chunkSize: 512,
|
|
720
|
+
* topK: 5,
|
|
721
|
+
* }
|
|
722
|
+
* });
|
|
723
|
+
* ```
|
|
724
|
+
*/
|
|
725
|
+
export type RAGConfig = {
|
|
726
|
+
/** File paths to load and index for retrieval */
|
|
727
|
+
files: string[];
|
|
728
|
+
/**
|
|
729
|
+
* Chunking strategy to use. If not specified, auto-detected from file extension.
|
|
730
|
+
* @default "recursive"
|
|
731
|
+
*/
|
|
732
|
+
strategy?: ChunkingStrategy;
|
|
733
|
+
/**
|
|
734
|
+
* Maximum chunk size in characters.
|
|
735
|
+
* @default 1000
|
|
736
|
+
*/
|
|
737
|
+
chunkSize?: number;
|
|
738
|
+
/**
|
|
739
|
+
* Overlap between adjacent chunks in characters.
|
|
740
|
+
* @default 200
|
|
741
|
+
*/
|
|
742
|
+
chunkOverlap?: number;
|
|
743
|
+
/**
|
|
744
|
+
* Number of top results to retrieve per query.
|
|
745
|
+
* @default 5
|
|
746
|
+
*/
|
|
747
|
+
topK?: number;
|
|
748
|
+
/**
|
|
749
|
+
* Tool name visible to the AI model.
|
|
750
|
+
* @default "search_knowledge_base"
|
|
751
|
+
*/
|
|
752
|
+
toolName?: string;
|
|
753
|
+
/**
|
|
754
|
+
* Tool description for the AI model explaining what the knowledge base contains.
|
|
755
|
+
* @default "Search the loaded documents for relevant information to answer the user's question"
|
|
756
|
+
*/
|
|
757
|
+
toolDescription?: string;
|
|
758
|
+
/**
|
|
759
|
+
* Embedding model provider for generating embeddings.
|
|
760
|
+
* Defaults to the same provider used for generation.
|
|
761
|
+
*/
|
|
762
|
+
embeddingProvider?: string;
|
|
763
|
+
/**
|
|
764
|
+
* Embedding model name.
|
|
765
|
+
* Defaults to the provider's default embedding model.
|
|
766
|
+
*/
|
|
767
|
+
embeddingModel?: string;
|
|
768
|
+
};
|