@juspay/neurolink 9.2.0 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +52 -30
- package/dist/agent/directTools.d.ts +8 -8
- package/dist/cli/commands/config.d.ts +3 -3
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +5 -5
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +8 -8
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/modelTypes.d.ts +2 -2
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +2 -2
- package/dist/types/common.d.ts +0 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/modelTypes.d.ts +20 -20
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/package.json +1 -1
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metadata Extractor Registry
|
|
3
|
+
*
|
|
4
|
+
* Centralized registry for all metadata extractor implementations with metadata
|
|
5
|
+
* and discovery capabilities. Follows the BaseRegistry pattern.
|
|
6
|
+
*/
|
|
7
|
+
import { BaseRegistry } from "../../core/infrastructure/index.js";
|
|
8
|
+
import { logger } from "../../utils/logger.js";
|
|
9
|
+
import { MetadataExtractionError, RAGErrorCodes } from "../errors/RAGError.js";
|
|
10
|
+
/**
|
|
11
|
+
* Default metadata extractor metadata entries
|
|
12
|
+
*/
|
|
13
|
+
const DEFAULT_EXTRACTOR_METADATA = {
|
|
14
|
+
llm: {
|
|
15
|
+
description: "Full LLM-powered metadata extraction supporting all extraction types",
|
|
16
|
+
defaultConfig: {
|
|
17
|
+
provider: "openai",
|
|
18
|
+
modelName: "gpt-4o-mini",
|
|
19
|
+
temperature: 0.3,
|
|
20
|
+
},
|
|
21
|
+
supportedOptions: [
|
|
22
|
+
"provider",
|
|
23
|
+
"modelName",
|
|
24
|
+
"promptTemplate",
|
|
25
|
+
"maxTokens",
|
|
26
|
+
"temperature",
|
|
27
|
+
],
|
|
28
|
+
useCases: [
|
|
29
|
+
"Comprehensive metadata extraction",
|
|
30
|
+
"Multi-type extraction in single pass",
|
|
31
|
+
"Custom schema extraction",
|
|
32
|
+
],
|
|
33
|
+
aliases: ["full", "comprehensive", "all"],
|
|
34
|
+
requiresModel: true,
|
|
35
|
+
extractionTypes: ["title", "summary", "keywords", "questions", "custom"],
|
|
36
|
+
},
|
|
37
|
+
title: {
|
|
38
|
+
description: "Extracts concise, descriptive titles from document content",
|
|
39
|
+
defaultConfig: {
|
|
40
|
+
provider: "openai",
|
|
41
|
+
modelName: "gpt-4o-mini",
|
|
42
|
+
maxTokens: 100,
|
|
43
|
+
},
|
|
44
|
+
supportedOptions: ["provider", "modelName", "promptTemplate", "maxTokens"],
|
|
45
|
+
useCases: [
|
|
46
|
+
"Document indexing",
|
|
47
|
+
"Content organization",
|
|
48
|
+
"Navigation systems",
|
|
49
|
+
],
|
|
50
|
+
aliases: ["header", "heading"],
|
|
51
|
+
requiresModel: true,
|
|
52
|
+
extractionTypes: ["title"],
|
|
53
|
+
},
|
|
54
|
+
summary: {
|
|
55
|
+
description: "Generates concise summaries of document chunks",
|
|
56
|
+
defaultConfig: {
|
|
57
|
+
provider: "openai",
|
|
58
|
+
modelName: "gpt-4o-mini",
|
|
59
|
+
maxTokens: 200,
|
|
60
|
+
},
|
|
61
|
+
supportedOptions: [
|
|
62
|
+
"provider",
|
|
63
|
+
"modelName",
|
|
64
|
+
"promptTemplate",
|
|
65
|
+
"maxTokens",
|
|
66
|
+
"maxWords",
|
|
67
|
+
],
|
|
68
|
+
useCases: [
|
|
69
|
+
"Document previews",
|
|
70
|
+
"Search result snippets",
|
|
71
|
+
"Content condensation",
|
|
72
|
+
],
|
|
73
|
+
aliases: ["summarize", "abstract"],
|
|
74
|
+
requiresModel: true,
|
|
75
|
+
extractionTypes: ["summary"],
|
|
76
|
+
},
|
|
77
|
+
keywords: {
|
|
78
|
+
description: "Extracts key terms and phrases from content",
|
|
79
|
+
defaultConfig: {
|
|
80
|
+
provider: "openai",
|
|
81
|
+
modelName: "gpt-4o-mini",
|
|
82
|
+
maxTokens: 100,
|
|
83
|
+
},
|
|
84
|
+
supportedOptions: [
|
|
85
|
+
"provider",
|
|
86
|
+
"modelName",
|
|
87
|
+
"promptTemplate",
|
|
88
|
+
"maxKeywords",
|
|
89
|
+
],
|
|
90
|
+
useCases: ["Tag generation", "Topic modeling", "Search optimization"],
|
|
91
|
+
aliases: ["tags", "terms", "keyphrase"],
|
|
92
|
+
requiresModel: true,
|
|
93
|
+
extractionTypes: ["keywords"],
|
|
94
|
+
},
|
|
95
|
+
questions: {
|
|
96
|
+
description: "Generates Q&A pairs from content for training or FAQs",
|
|
97
|
+
defaultConfig: {
|
|
98
|
+
provider: "openai",
|
|
99
|
+
modelName: "gpt-4o-mini",
|
|
100
|
+
maxTokens: 500,
|
|
101
|
+
},
|
|
102
|
+
supportedOptions: [
|
|
103
|
+
"provider",
|
|
104
|
+
"modelName",
|
|
105
|
+
"promptTemplate",
|
|
106
|
+
"numQuestions",
|
|
107
|
+
"includeAnswers",
|
|
108
|
+
],
|
|
109
|
+
useCases: [
|
|
110
|
+
"FAQ generation",
|
|
111
|
+
"Training data creation",
|
|
112
|
+
"Knowledge base building",
|
|
113
|
+
],
|
|
114
|
+
aliases: ["qa", "faq", "questions-answers"],
|
|
115
|
+
requiresModel: true,
|
|
116
|
+
extractionTypes: ["questions"],
|
|
117
|
+
},
|
|
118
|
+
custom: {
|
|
119
|
+
description: "Extracts structured data according to custom schema",
|
|
120
|
+
defaultConfig: {
|
|
121
|
+
provider: "openai",
|
|
122
|
+
modelName: "gpt-4o-mini",
|
|
123
|
+
maxTokens: 500,
|
|
124
|
+
},
|
|
125
|
+
supportedOptions: [
|
|
126
|
+
"provider",
|
|
127
|
+
"modelName",
|
|
128
|
+
"promptTemplate",
|
|
129
|
+
"schema",
|
|
130
|
+
"description",
|
|
131
|
+
],
|
|
132
|
+
useCases: [
|
|
133
|
+
"Structured data extraction",
|
|
134
|
+
"Entity extraction",
|
|
135
|
+
"Custom field extraction",
|
|
136
|
+
],
|
|
137
|
+
aliases: ["schema", "structured", "entity"],
|
|
138
|
+
requiresModel: true,
|
|
139
|
+
extractionTypes: ["custom"],
|
|
140
|
+
},
|
|
141
|
+
composite: {
|
|
142
|
+
description: "Combines multiple extraction types in a single pass",
|
|
143
|
+
defaultConfig: {
|
|
144
|
+
provider: "openai",
|
|
145
|
+
modelName: "gpt-4o-mini",
|
|
146
|
+
},
|
|
147
|
+
supportedOptions: ["provider", "modelName", "extractors"],
|
|
148
|
+
useCases: [
|
|
149
|
+
"Multi-field extraction",
|
|
150
|
+
"Complete document processing",
|
|
151
|
+
"Pipeline integration",
|
|
152
|
+
],
|
|
153
|
+
aliases: ["multi", "combined", "batch"],
|
|
154
|
+
requiresModel: true,
|
|
155
|
+
extractionTypes: ["title", "summary", "keywords", "questions", "custom"],
|
|
156
|
+
},
|
|
157
|
+
};
|
|
158
|
+
/**
|
|
159
|
+
* Metadata Extractor Registry
|
|
160
|
+
*
|
|
161
|
+
* Manages registration and discovery of all metadata extractor implementations.
|
|
162
|
+
* Extends BaseRegistry for consistent lifecycle management.
|
|
163
|
+
*/
|
|
164
|
+
export class MetadataExtractorRegistry extends BaseRegistry {
|
|
165
|
+
static instance = null;
|
|
166
|
+
aliasMap = new Map();
|
|
167
|
+
constructor() {
|
|
168
|
+
super();
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Get singleton instance
|
|
172
|
+
*/
|
|
173
|
+
static getInstance() {
|
|
174
|
+
if (!MetadataExtractorRegistry.instance) {
|
|
175
|
+
MetadataExtractorRegistry.instance = new MetadataExtractorRegistry();
|
|
176
|
+
}
|
|
177
|
+
return MetadataExtractorRegistry.instance;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Reset singleton (for testing)
|
|
181
|
+
*/
|
|
182
|
+
static resetInstance() {
|
|
183
|
+
if (MetadataExtractorRegistry.instance) {
|
|
184
|
+
MetadataExtractorRegistry.instance.clear();
|
|
185
|
+
MetadataExtractorRegistry.instance = null;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Register all built-in extractors
|
|
190
|
+
*/
|
|
191
|
+
async registerAll() {
|
|
192
|
+
const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
|
|
193
|
+
// Register all extractor types
|
|
194
|
+
for (const [type, metadata] of Object.entries(DEFAULT_EXTRACTOR_METADATA)) {
|
|
195
|
+
this.registerExtractor(type, async () => this.createExtractorInstance(LLMMetadataExtractor, type), metadata);
|
|
196
|
+
}
|
|
197
|
+
logger.debug(`[MetadataExtractorRegistry] Registered ${this.items.size} extractor types`);
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Create extractor instance wrapper
|
|
201
|
+
*/
|
|
202
|
+
createExtractorInstance(ExtractorClass, type) {
|
|
203
|
+
const extractor = new ExtractorClass();
|
|
204
|
+
return {
|
|
205
|
+
type,
|
|
206
|
+
async extract(chunks, params) {
|
|
207
|
+
return extractor.extract(chunks, params ?? {});
|
|
208
|
+
},
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Register an extractor with aliases
|
|
213
|
+
*/
|
|
214
|
+
registerExtractor(type, factory, metadata) {
|
|
215
|
+
this.register(type, factory, metadata);
|
|
216
|
+
// Register aliases
|
|
217
|
+
for (const alias of metadata.aliases) {
|
|
218
|
+
this.aliasMap.set(alias.toLowerCase(), type);
|
|
219
|
+
logger.debug(`[MetadataExtractorRegistry] Registered alias '${alias}' -> '${type}'`);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Resolve type from alias
|
|
224
|
+
*/
|
|
225
|
+
resolveType(nameOrAlias) {
|
|
226
|
+
const lower = nameOrAlias.toLowerCase();
|
|
227
|
+
// Check if it's a direct type
|
|
228
|
+
if (this.items.has(lower)) {
|
|
229
|
+
return lower;
|
|
230
|
+
}
|
|
231
|
+
// Check aliases
|
|
232
|
+
const resolved = this.aliasMap.get(lower);
|
|
233
|
+
if (resolved) {
|
|
234
|
+
return resolved;
|
|
235
|
+
}
|
|
236
|
+
throw new MetadataExtractionError(`Unknown metadata extractor type: '${nameOrAlias}'. Available types: ${this.getAvailableExtractors().join(", ")}`, {
|
|
237
|
+
code: RAGErrorCodes.METADATA_EXTRACTOR_NOT_FOUND,
|
|
238
|
+
extractorType: nameOrAlias,
|
|
239
|
+
details: {
|
|
240
|
+
requestedType: nameOrAlias,
|
|
241
|
+
availableTypes: this.getAvailableExtractors(),
|
|
242
|
+
},
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Get an extractor by type or alias
|
|
247
|
+
*/
|
|
248
|
+
async getExtractor(typeOrAlias) {
|
|
249
|
+
await this.ensureInitialized();
|
|
250
|
+
const type = this.resolveType(typeOrAlias);
|
|
251
|
+
const extractor = await this.get(type);
|
|
252
|
+
if (!extractor) {
|
|
253
|
+
throw new MetadataExtractionError(`Metadata extractor not found: ${type}`, {
|
|
254
|
+
code: RAGErrorCodes.METADATA_EXTRACTOR_NOT_FOUND,
|
|
255
|
+
extractorType: type,
|
|
256
|
+
details: { type },
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
return extractor;
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Get list of available extractor types
|
|
263
|
+
*/
|
|
264
|
+
getAvailableExtractors() {
|
|
265
|
+
return this.list().map((item) => item.id);
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Get metadata for a specific extractor
|
|
269
|
+
*/
|
|
270
|
+
getExtractorMetadata(typeOrAlias) {
|
|
271
|
+
const type = this.resolveType(typeOrAlias);
|
|
272
|
+
const entry = this.list().find((item) => item.id === type);
|
|
273
|
+
return entry?.metadata;
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* Get all aliases for a type
|
|
277
|
+
*/
|
|
278
|
+
getAliasesForType(type) {
|
|
279
|
+
const metadata = DEFAULT_EXTRACTOR_METADATA[type];
|
|
280
|
+
return metadata?.aliases ?? [];
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Get all registered aliases
|
|
284
|
+
*/
|
|
285
|
+
getAllAliases() {
|
|
286
|
+
return new Map(this.aliasMap);
|
|
287
|
+
}
|
|
288
|
+
/**
|
|
289
|
+
* Check if a type or alias exists
|
|
290
|
+
*/
|
|
291
|
+
hasExtractor(typeOrAlias) {
|
|
292
|
+
try {
|
|
293
|
+
this.resolveType(typeOrAlias);
|
|
294
|
+
return true;
|
|
295
|
+
}
|
|
296
|
+
catch {
|
|
297
|
+
return false;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Get extractors by use case
|
|
302
|
+
*/
|
|
303
|
+
getExtractorsByUseCase(useCase) {
|
|
304
|
+
const matches = [];
|
|
305
|
+
const useCaseLower = useCase.toLowerCase();
|
|
306
|
+
for (const [type, metadata] of Object.entries(DEFAULT_EXTRACTOR_METADATA)) {
|
|
307
|
+
const hasMatchingUseCase = metadata.useCases.some((uc) => uc.toLowerCase().includes(useCaseLower));
|
|
308
|
+
if (hasMatchingUseCase) {
|
|
309
|
+
matches.push(type);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
return matches;
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Get extractors that can produce a specific extraction type
|
|
316
|
+
*/
|
|
317
|
+
getExtractorsByExtractionType(extractionType) {
|
|
318
|
+
const matches = [];
|
|
319
|
+
for (const [type, metadata] of Object.entries(DEFAULT_EXTRACTOR_METADATA)) {
|
|
320
|
+
if (metadata.extractionTypes.includes(extractionType)) {
|
|
321
|
+
matches.push(type);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
return matches;
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Get default configuration for an extractor
|
|
328
|
+
*/
|
|
329
|
+
getDefaultConfig(typeOrAlias) {
|
|
330
|
+
const metadata = this.getExtractorMetadata(typeOrAlias);
|
|
331
|
+
return metadata?.defaultConfig;
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Clear the registry (also clears aliases)
|
|
335
|
+
*/
|
|
336
|
+
clear() {
|
|
337
|
+
super.clear();
|
|
338
|
+
this.aliasMap.clear();
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Global metadata extractor registry singleton
|
|
343
|
+
*/
|
|
344
|
+
export const metadataExtractorRegistry = MetadataExtractorRegistry.getInstance();
|
|
345
|
+
/**
|
|
346
|
+
* Convenience function to get available extractors
|
|
347
|
+
*/
|
|
348
|
+
export function getAvailableExtractors() {
|
|
349
|
+
return metadataExtractorRegistry.getAvailableExtractors();
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Convenience function to get extractor by type
|
|
353
|
+
*/
|
|
354
|
+
export async function getExtractor(typeOrAlias) {
|
|
355
|
+
return metadataExtractorRegistry.getExtractor(typeOrAlias);
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Convenience function to get extractor metadata
|
|
359
|
+
*/
|
|
360
|
+
export function getRegisteredExtractorMetadata(typeOrAlias) {
|
|
361
|
+
return metadataExtractorRegistry.getExtractorMetadata(typeOrAlias);
|
|
362
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metadata Extraction Module Exports
|
|
3
|
+
*/
|
|
4
|
+
export { createMetadataExtractor, getAvailableExtractorTypes, getExtractorDefaultConfig, getExtractorMetadata, type MetadataExtractor, type MetadataExtractorConfig, MetadataExtractorFactory, type MetadataExtractorMetadata, type MetadataExtractorType, metadataExtractorFactory, } from "./MetadataExtractorFactory.js";
|
|
5
|
+
export { getAvailableExtractors, getExtractor, getRegisteredExtractorMetadata, MetadataExtractorRegistry, metadataExtractorRegistry, } from "./MetadataExtractorRegistry.js";
|
|
6
|
+
export { extractMetadata, LLMMetadataExtractor } from "./metadataExtractor.js";
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metadata Extraction Module Exports
|
|
3
|
+
*/
|
|
4
|
+
// Factory pattern exports
|
|
5
|
+
export { createMetadataExtractor, getAvailableExtractorTypes, getExtractorDefaultConfig, getExtractorMetadata, MetadataExtractorFactory, metadataExtractorFactory, } from "./MetadataExtractorFactory.js";
|
|
6
|
+
// Registry pattern exports
|
|
7
|
+
export { getAvailableExtractors, getExtractor, getRegisteredExtractorMetadata, MetadataExtractorRegistry, metadataExtractorRegistry, } from "./MetadataExtractorRegistry.js";
|
|
8
|
+
// Core metadata extractor
|
|
9
|
+
export { extractMetadata, LLMMetadataExtractor } from "./metadataExtractor.js";
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-powered Metadata Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts structured metadata from document chunks using language models.
|
|
5
|
+
* Supports title, summary, keywords, Q&A pairs, and custom schema extraction.
|
|
6
|
+
*/
|
|
7
|
+
import type { Chunk, ExtractParams, ExtractionResult } from "../types.js";
|
|
8
|
+
/**
|
|
9
|
+
* LLM-powered metadata extractor
|
|
10
|
+
* Extracts title, summary, keywords, Q&A pairs, and custom schema data
|
|
11
|
+
*/
|
|
12
|
+
export declare class LLMMetadataExtractor {
|
|
13
|
+
private provider;
|
|
14
|
+
private modelName;
|
|
15
|
+
constructor(options?: {
|
|
16
|
+
provider?: string;
|
|
17
|
+
modelName?: string;
|
|
18
|
+
});
|
|
19
|
+
/**
|
|
20
|
+
* Extract metadata from chunks based on configuration
|
|
21
|
+
* @param chunks - Array of chunks to extract metadata from
|
|
22
|
+
* @param params - Extraction parameters
|
|
23
|
+
* @returns Array of extraction results, one per chunk
|
|
24
|
+
*/
|
|
25
|
+
extract(chunks: Chunk[], params: ExtractParams): Promise<ExtractionResult[]>;
|
|
26
|
+
/**
|
|
27
|
+
* Group chunks by document ID
|
|
28
|
+
*/
|
|
29
|
+
private groupByDocument;
|
|
30
|
+
/**
|
|
31
|
+
* Extract title from document chunks
|
|
32
|
+
*/
|
|
33
|
+
private extractTitle;
|
|
34
|
+
/**
|
|
35
|
+
* Extract summary from a chunk
|
|
36
|
+
*/
|
|
37
|
+
private extractSummary;
|
|
38
|
+
/**
|
|
39
|
+
* Extract keywords from a chunk
|
|
40
|
+
*/
|
|
41
|
+
private extractKeywords;
|
|
42
|
+
/**
|
|
43
|
+
* Extract Q&A pairs from a chunk
|
|
44
|
+
*/
|
|
45
|
+
private extractQuestions;
|
|
46
|
+
/**
|
|
47
|
+
* Extract custom schema data from a chunk
|
|
48
|
+
*/
|
|
49
|
+
private extractCustom;
|
|
50
|
+
/**
|
|
51
|
+
* Parse Q&A pairs from LLM response
|
|
52
|
+
*/
|
|
53
|
+
private parseQAPairs;
|
|
54
|
+
/**
|
|
55
|
+
* Call the LLM with a prompt
|
|
56
|
+
*/
|
|
57
|
+
private callLLM;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Convenience function to extract metadata from chunks
|
|
61
|
+
* @param chunks - Chunks to process
|
|
62
|
+
* @param params - Extraction parameters
|
|
63
|
+
* @param options - Extractor options
|
|
64
|
+
* @returns Extraction results
|
|
65
|
+
*/
|
|
66
|
+
export declare function extractMetadata(chunks: Chunk[], params: ExtractParams, options?: {
|
|
67
|
+
provider?: string;
|
|
68
|
+
modelName?: string;
|
|
69
|
+
}): Promise<ExtractionResult[]>;
|