@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metadata Extractor Factory
|
|
3
|
+
*
|
|
4
|
+
* Factory for creating metadata extractor instances with configuration.
|
|
5
|
+
* Follows the BaseFactory pattern for consistent lifecycle management.
|
|
6
|
+
*/
|
|
7
|
+
import { BaseFactory } from "../../core/infrastructure/index.js";
|
|
8
|
+
import type { Chunk, ExtractionResult, ExtractParams } from "../types.js";
|
|
9
|
+
/**
|
|
10
|
+
* Supported metadata extractor types
|
|
11
|
+
*/
|
|
12
|
+
export type MetadataExtractorType = "llm" | "title" | "summary" | "keywords" | "questions" | "custom" | "composite";
|
|
13
|
+
/**
|
|
14
|
+
* Metadata Extractor interface - all extractors implement this
|
|
15
|
+
*/
|
|
16
|
+
export interface MetadataExtractor {
|
|
17
|
+
/** Extractor type identifier */
|
|
18
|
+
readonly type: MetadataExtractorType;
|
|
19
|
+
/**
|
|
20
|
+
* Extract metadata from chunks
|
|
21
|
+
* @param chunks - Array of chunks to extract metadata from
|
|
22
|
+
* @param params - Extraction parameters
|
|
23
|
+
* @returns Array of extraction results
|
|
24
|
+
*/
|
|
25
|
+
extract(chunks: Chunk[], params?: ExtractParams): Promise<ExtractionResult[]>;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Metadata extractor configuration
|
|
29
|
+
*/
|
|
30
|
+
export interface MetadataExtractorConfig {
|
|
31
|
+
/** Extractor type */
|
|
32
|
+
type: MetadataExtractorType;
|
|
33
|
+
/** Language model provider */
|
|
34
|
+
provider?: string;
|
|
35
|
+
/** Model name for LLM-based extraction */
|
|
36
|
+
modelName?: string;
|
|
37
|
+
/** Custom prompt template */
|
|
38
|
+
promptTemplate?: string;
|
|
39
|
+
/** Maximum tokens for LLM response */
|
|
40
|
+
maxTokens?: number;
|
|
41
|
+
/** Temperature for LLM generation */
|
|
42
|
+
temperature?: number;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Metadata extractor metadata for discovery and documentation
|
|
46
|
+
*/
|
|
47
|
+
export interface MetadataExtractorMetadata {
|
|
48
|
+
/** Human-readable description */
|
|
49
|
+
description: string;
|
|
50
|
+
/** Default configuration */
|
|
51
|
+
defaultConfig: Partial<MetadataExtractorConfig>;
|
|
52
|
+
/** Supported configuration options */
|
|
53
|
+
supportedOptions: string[];
|
|
54
|
+
/** Recommended use cases */
|
|
55
|
+
useCases: string[];
|
|
56
|
+
/** Alternative names for this extractor */
|
|
57
|
+
aliases: string[];
|
|
58
|
+
/** Whether this extractor requires an AI model */
|
|
59
|
+
requiresModel: boolean;
|
|
60
|
+
/** Extraction types this extractor can produce */
|
|
61
|
+
extractionTypes: string[];
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Metadata Extractor Factory
|
|
65
|
+
*
|
|
66
|
+
* Creates metadata extractor instances based on type with configuration support.
|
|
67
|
+
* Uses lazy loading via dynamic imports to avoid circular dependencies.
|
|
68
|
+
*/
|
|
69
|
+
export declare class MetadataExtractorFactory extends BaseFactory<MetadataExtractor, MetadataExtractorConfig> {
|
|
70
|
+
private static instance;
|
|
71
|
+
private metadataMap;
|
|
72
|
+
private constructor();
|
|
73
|
+
/**
|
|
74
|
+
* Get singleton instance
|
|
75
|
+
*/
|
|
76
|
+
static getInstance(): MetadataExtractorFactory;
|
|
77
|
+
/**
|
|
78
|
+
* Reset singleton (for testing)
|
|
79
|
+
*/
|
|
80
|
+
static resetInstance(): void;
|
|
81
|
+
/**
|
|
82
|
+
* Register all default extractors
|
|
83
|
+
*/
|
|
84
|
+
protected registerAll(): Promise<void>;
|
|
85
|
+
/**
|
|
86
|
+
* Wrap LLMMetadataExtractor to conform to MetadataExtractor interface
|
|
87
|
+
*/
|
|
88
|
+
private wrapExtractor;
|
|
89
|
+
/**
|
|
90
|
+
* Create specialized extractor that only extracts specific types
|
|
91
|
+
*/
|
|
92
|
+
private createSpecializedExtractor;
|
|
93
|
+
/**
|
|
94
|
+
* Register an extractor with metadata and aliases
|
|
95
|
+
*/
|
|
96
|
+
registerExtractor(type: MetadataExtractorType, factory: (config?: MetadataExtractorConfig) => Promise<MetadataExtractor>, metadata: MetadataExtractorMetadata): void;
|
|
97
|
+
/**
|
|
98
|
+
* Create an extractor by type or alias
|
|
99
|
+
*/
|
|
100
|
+
createExtractor(typeOrAlias: string, config?: MetadataExtractorConfig): Promise<MetadataExtractor>;
|
|
101
|
+
/**
|
|
102
|
+
* Get metadata for an extractor
|
|
103
|
+
*/
|
|
104
|
+
getExtractorMetadata(typeOrAlias: string): MetadataExtractorMetadata | undefined;
|
|
105
|
+
/**
|
|
106
|
+
* Get default configuration for an extractor
|
|
107
|
+
*/
|
|
108
|
+
getDefaultConfig(typeOrAlias: string): Partial<MetadataExtractorConfig> | undefined;
|
|
109
|
+
/**
|
|
110
|
+
* Get available extractor types (not including aliases)
|
|
111
|
+
*/
|
|
112
|
+
getAvailableTypes(): MetadataExtractorType[];
|
|
113
|
+
/**
|
|
114
|
+
* Get all aliases mapped to their types
|
|
115
|
+
*/
|
|
116
|
+
getTypeAliases(): Map<string, string>;
|
|
117
|
+
/**
|
|
118
|
+
* Check if a type exists
|
|
119
|
+
*/
|
|
120
|
+
hasType(typeOrAlias: string): boolean;
|
|
121
|
+
/**
|
|
122
|
+
* Get extractors suitable for a use case
|
|
123
|
+
*/
|
|
124
|
+
getExtractorsForUseCase(useCase: string): MetadataExtractorType[];
|
|
125
|
+
/**
|
|
126
|
+
* Get extractors that can produce a specific extraction type
|
|
127
|
+
*/
|
|
128
|
+
getExtractorsForExtractionType(extractionType: string): MetadataExtractorType[];
|
|
129
|
+
/**
|
|
130
|
+
* Get all extractor metadata
|
|
131
|
+
*/
|
|
132
|
+
getAllMetadata(): Map<MetadataExtractorType, MetadataExtractorMetadata>;
|
|
133
|
+
/**
|
|
134
|
+
* Clear factory and metadata
|
|
135
|
+
*/
|
|
136
|
+
clear(): void;
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Global metadata extractor factory singleton
|
|
140
|
+
*/
|
|
141
|
+
export declare const metadataExtractorFactory: MetadataExtractorFactory;
|
|
142
|
+
/**
|
|
143
|
+
* Convenience function to create a metadata extractor
|
|
144
|
+
*/
|
|
145
|
+
export declare function createMetadataExtractor(typeOrAlias: string, config?: MetadataExtractorConfig): Promise<MetadataExtractor>;
|
|
146
|
+
/**
|
|
147
|
+
* Convenience function to get available extractor types
|
|
148
|
+
*/
|
|
149
|
+
export declare function getAvailableExtractorTypes(): MetadataExtractorType[];
|
|
150
|
+
/**
|
|
151
|
+
* Convenience function to get extractor metadata
|
|
152
|
+
*/
|
|
153
|
+
export declare function getExtractorMetadata(typeOrAlias: string): MetadataExtractorMetadata | undefined;
|
|
154
|
+
/**
|
|
155
|
+
* Convenience function to get default config
|
|
156
|
+
*/
|
|
157
|
+
export declare function getExtractorDefaultConfig(typeOrAlias: string): Partial<MetadataExtractorConfig> | undefined;
|
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metadata Extractor Factory
|
|
3
|
+
*
|
|
4
|
+
* Factory for creating metadata extractor instances with configuration.
|
|
5
|
+
* Follows the BaseFactory pattern for consistent lifecycle management.
|
|
6
|
+
*/
|
|
7
|
+
import { BaseFactory } from "../../core/infrastructure/index.js";
|
|
8
|
+
import { logger } from "../../utils/logger.js";
|
|
9
|
+
import { MetadataExtractionError, RAGErrorCodes } from "../errors/RAGError.js";
|
|
10
|
+
/**
|
|
11
|
+
* Default metadata extractor metadata entries
|
|
12
|
+
*/
|
|
13
|
+
const DEFAULT_EXTRACTOR_METADATA = {
|
|
14
|
+
llm: {
|
|
15
|
+
description: "Full LLM-powered metadata extraction supporting all extraction types",
|
|
16
|
+
defaultConfig: {
|
|
17
|
+
provider: "openai",
|
|
18
|
+
modelName: "gpt-4o-mini",
|
|
19
|
+
temperature: 0.3,
|
|
20
|
+
},
|
|
21
|
+
supportedOptions: [
|
|
22
|
+
"provider",
|
|
23
|
+
"modelName",
|
|
24
|
+
"promptTemplate",
|
|
25
|
+
"maxTokens",
|
|
26
|
+
"temperature",
|
|
27
|
+
],
|
|
28
|
+
useCases: [
|
|
29
|
+
"Comprehensive metadata extraction",
|
|
30
|
+
"Multi-type extraction in single pass",
|
|
31
|
+
"Custom schema extraction",
|
|
32
|
+
],
|
|
33
|
+
aliases: ["full", "comprehensive", "all"],
|
|
34
|
+
requiresModel: true,
|
|
35
|
+
extractionTypes: ["title", "summary", "keywords", "questions", "custom"],
|
|
36
|
+
},
|
|
37
|
+
title: {
|
|
38
|
+
description: "Extracts concise, descriptive titles from document content",
|
|
39
|
+
defaultConfig: {
|
|
40
|
+
provider: "openai",
|
|
41
|
+
modelName: "gpt-4o-mini",
|
|
42
|
+
maxTokens: 100,
|
|
43
|
+
},
|
|
44
|
+
supportedOptions: ["provider", "modelName", "promptTemplate", "maxTokens"],
|
|
45
|
+
useCases: [
|
|
46
|
+
"Document indexing",
|
|
47
|
+
"Content organization",
|
|
48
|
+
"Navigation systems",
|
|
49
|
+
],
|
|
50
|
+
aliases: ["header", "heading"],
|
|
51
|
+
requiresModel: true,
|
|
52
|
+
extractionTypes: ["title"],
|
|
53
|
+
},
|
|
54
|
+
summary: {
|
|
55
|
+
description: "Generates concise summaries of document chunks",
|
|
56
|
+
defaultConfig: {
|
|
57
|
+
provider: "openai",
|
|
58
|
+
modelName: "gpt-4o-mini",
|
|
59
|
+
maxTokens: 200,
|
|
60
|
+
},
|
|
61
|
+
supportedOptions: [
|
|
62
|
+
"provider",
|
|
63
|
+
"modelName",
|
|
64
|
+
"promptTemplate",
|
|
65
|
+
"maxTokens",
|
|
66
|
+
"maxWords",
|
|
67
|
+
],
|
|
68
|
+
useCases: [
|
|
69
|
+
"Document previews",
|
|
70
|
+
"Search result snippets",
|
|
71
|
+
"Content condensation",
|
|
72
|
+
],
|
|
73
|
+
aliases: ["summarize", "abstract"],
|
|
74
|
+
requiresModel: true,
|
|
75
|
+
extractionTypes: ["summary"],
|
|
76
|
+
},
|
|
77
|
+
keywords: {
|
|
78
|
+
description: "Extracts key terms and phrases from content",
|
|
79
|
+
defaultConfig: {
|
|
80
|
+
provider: "openai",
|
|
81
|
+
modelName: "gpt-4o-mini",
|
|
82
|
+
maxTokens: 100,
|
|
83
|
+
},
|
|
84
|
+
supportedOptions: [
|
|
85
|
+
"provider",
|
|
86
|
+
"modelName",
|
|
87
|
+
"promptTemplate",
|
|
88
|
+
"maxKeywords",
|
|
89
|
+
],
|
|
90
|
+
useCases: ["Tag generation", "Topic modeling", "Search optimization"],
|
|
91
|
+
aliases: ["tags", "terms", "keyphrase"],
|
|
92
|
+
requiresModel: true,
|
|
93
|
+
extractionTypes: ["keywords"],
|
|
94
|
+
},
|
|
95
|
+
questions: {
|
|
96
|
+
description: "Generates Q&A pairs from content for training or FAQs",
|
|
97
|
+
defaultConfig: {
|
|
98
|
+
provider: "openai",
|
|
99
|
+
modelName: "gpt-4o-mini",
|
|
100
|
+
maxTokens: 500,
|
|
101
|
+
},
|
|
102
|
+
supportedOptions: [
|
|
103
|
+
"provider",
|
|
104
|
+
"modelName",
|
|
105
|
+
"promptTemplate",
|
|
106
|
+
"numQuestions",
|
|
107
|
+
"includeAnswers",
|
|
108
|
+
],
|
|
109
|
+
useCases: [
|
|
110
|
+
"FAQ generation",
|
|
111
|
+
"Training data creation",
|
|
112
|
+
"Knowledge base building",
|
|
113
|
+
],
|
|
114
|
+
aliases: ["qa", "faq", "questions-answers"],
|
|
115
|
+
requiresModel: true,
|
|
116
|
+
extractionTypes: ["questions"],
|
|
117
|
+
},
|
|
118
|
+
custom: {
|
|
119
|
+
description: "Extracts structured data according to custom schema",
|
|
120
|
+
defaultConfig: {
|
|
121
|
+
provider: "openai",
|
|
122
|
+
modelName: "gpt-4o-mini",
|
|
123
|
+
maxTokens: 500,
|
|
124
|
+
},
|
|
125
|
+
supportedOptions: [
|
|
126
|
+
"provider",
|
|
127
|
+
"modelName",
|
|
128
|
+
"promptTemplate",
|
|
129
|
+
"schema",
|
|
130
|
+
"description",
|
|
131
|
+
],
|
|
132
|
+
useCases: [
|
|
133
|
+
"Structured data extraction",
|
|
134
|
+
"Entity extraction",
|
|
135
|
+
"Custom field extraction",
|
|
136
|
+
],
|
|
137
|
+
aliases: ["schema", "structured", "entity"],
|
|
138
|
+
requiresModel: true,
|
|
139
|
+
extractionTypes: ["custom"],
|
|
140
|
+
},
|
|
141
|
+
composite: {
|
|
142
|
+
description: "Combines multiple extraction types in a single pass",
|
|
143
|
+
defaultConfig: {
|
|
144
|
+
provider: "openai",
|
|
145
|
+
modelName: "gpt-4o-mini",
|
|
146
|
+
},
|
|
147
|
+
supportedOptions: ["provider", "modelName", "extractors"],
|
|
148
|
+
useCases: [
|
|
149
|
+
"Multi-field extraction",
|
|
150
|
+
"Complete document processing",
|
|
151
|
+
"Pipeline integration",
|
|
152
|
+
],
|
|
153
|
+
aliases: ["multi", "combined", "batch"],
|
|
154
|
+
requiresModel: true,
|
|
155
|
+
extractionTypes: ["title", "summary", "keywords", "questions", "custom"],
|
|
156
|
+
},
|
|
157
|
+
};
|
|
158
|
+
/**
|
|
159
|
+
* Metadata Extractor Factory
|
|
160
|
+
*
|
|
161
|
+
* Creates metadata extractor instances based on type with configuration support.
|
|
162
|
+
* Uses lazy loading via dynamic imports to avoid circular dependencies.
|
|
163
|
+
*/
|
|
164
|
+
export class MetadataExtractorFactory extends BaseFactory {
|
|
165
|
+
static instance = null;
|
|
166
|
+
metadataMap = new Map();
|
|
167
|
+
constructor() {
|
|
168
|
+
super();
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Get singleton instance
|
|
172
|
+
*/
|
|
173
|
+
static getInstance() {
|
|
174
|
+
if (!MetadataExtractorFactory.instance) {
|
|
175
|
+
MetadataExtractorFactory.instance = new MetadataExtractorFactory();
|
|
176
|
+
}
|
|
177
|
+
return MetadataExtractorFactory.instance;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Reset singleton (for testing)
|
|
181
|
+
*/
|
|
182
|
+
static resetInstance() {
|
|
183
|
+
if (MetadataExtractorFactory.instance) {
|
|
184
|
+
MetadataExtractorFactory.instance.clear();
|
|
185
|
+
MetadataExtractorFactory.instance = null;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Register all default extractors
|
|
190
|
+
*/
|
|
191
|
+
async registerAll() {
|
|
192
|
+
// Register full LLM extractor
|
|
193
|
+
this.registerExtractor("llm", async (config) => {
|
|
194
|
+
const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
|
|
195
|
+
return this.wrapExtractor(new LLMMetadataExtractor({
|
|
196
|
+
provider: config?.provider,
|
|
197
|
+
modelName: config?.modelName,
|
|
198
|
+
}), "llm");
|
|
199
|
+
}, DEFAULT_EXTRACTOR_METADATA.llm);
|
|
200
|
+
// Register title extractor
|
|
201
|
+
this.registerExtractor("title", async (config) => {
|
|
202
|
+
const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
|
|
203
|
+
return this.createSpecializedExtractor(new LLMMetadataExtractor({
|
|
204
|
+
provider: config?.provider,
|
|
205
|
+
modelName: config?.modelName,
|
|
206
|
+
}), "title", { title: true });
|
|
207
|
+
}, DEFAULT_EXTRACTOR_METADATA.title);
|
|
208
|
+
// Register summary extractor
|
|
209
|
+
this.registerExtractor("summary", async (config) => {
|
|
210
|
+
const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
|
|
211
|
+
return this.createSpecializedExtractor(new LLMMetadataExtractor({
|
|
212
|
+
provider: config?.provider,
|
|
213
|
+
modelName: config?.modelName,
|
|
214
|
+
}), "summary", { summary: true });
|
|
215
|
+
}, DEFAULT_EXTRACTOR_METADATA.summary);
|
|
216
|
+
// Register keywords extractor
|
|
217
|
+
this.registerExtractor("keywords", async (config) => {
|
|
218
|
+
const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
|
|
219
|
+
return this.createSpecializedExtractor(new LLMMetadataExtractor({
|
|
220
|
+
provider: config?.provider,
|
|
221
|
+
modelName: config?.modelName,
|
|
222
|
+
}), "keywords", { keywords: true });
|
|
223
|
+
}, DEFAULT_EXTRACTOR_METADATA.keywords);
|
|
224
|
+
// Register questions extractor
|
|
225
|
+
this.registerExtractor("questions", async (config) => {
|
|
226
|
+
const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
|
|
227
|
+
return this.createSpecializedExtractor(new LLMMetadataExtractor({
|
|
228
|
+
provider: config?.provider,
|
|
229
|
+
modelName: config?.modelName,
|
|
230
|
+
}), "questions", { questions: true });
|
|
231
|
+
}, DEFAULT_EXTRACTOR_METADATA.questions);
|
|
232
|
+
// Register custom extractor
|
|
233
|
+
this.registerExtractor("custom", async (config) => {
|
|
234
|
+
const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
|
|
235
|
+
return this.wrapExtractor(new LLMMetadataExtractor({
|
|
236
|
+
provider: config?.provider,
|
|
237
|
+
modelName: config?.modelName,
|
|
238
|
+
}), "custom");
|
|
239
|
+
}, DEFAULT_EXTRACTOR_METADATA.custom);
|
|
240
|
+
// Register composite extractor
|
|
241
|
+
this.registerExtractor("composite", async (config) => {
|
|
242
|
+
const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
|
|
243
|
+
return this.wrapExtractor(new LLMMetadataExtractor({
|
|
244
|
+
provider: config?.provider,
|
|
245
|
+
modelName: config?.modelName,
|
|
246
|
+
}), "composite");
|
|
247
|
+
}, DEFAULT_EXTRACTOR_METADATA.composite);
|
|
248
|
+
logger.debug(`[MetadataExtractorFactory] Registered ${this.items.size} extractor types`);
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Wrap LLMMetadataExtractor to conform to MetadataExtractor interface
|
|
252
|
+
*/
|
|
253
|
+
wrapExtractor(extractor, type) {
|
|
254
|
+
return {
|
|
255
|
+
type,
|
|
256
|
+
async extract(chunks, params) {
|
|
257
|
+
return extractor.extract(chunks, params ?? {});
|
|
258
|
+
},
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Create specialized extractor that only extracts specific types
|
|
263
|
+
*/
|
|
264
|
+
createSpecializedExtractor(extractor, type, defaultParams) {
|
|
265
|
+
return {
|
|
266
|
+
type,
|
|
267
|
+
async extract(chunks, params) {
|
|
268
|
+
// Merge default params with any provided params
|
|
269
|
+
const mergedParams = { ...defaultParams, ...params };
|
|
270
|
+
return extractor.extract(chunks, mergedParams);
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Register an extractor with metadata and aliases
|
|
276
|
+
*/
|
|
277
|
+
registerExtractor(type, factory, metadata) {
|
|
278
|
+
// Store metadata
|
|
279
|
+
this.metadataMap.set(type, metadata);
|
|
280
|
+
// Register with aliases
|
|
281
|
+
this.register(type, factory, metadata.aliases, { metadata });
|
|
282
|
+
logger.debug(`[MetadataExtractorFactory] Registered extractor '${type}' with aliases: ${metadata.aliases.join(", ")}`);
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Create an extractor by type or alias
|
|
286
|
+
*/
|
|
287
|
+
async createExtractor(typeOrAlias, config) {
|
|
288
|
+
await this.ensureInitialized();
|
|
289
|
+
const resolvedName = this.resolveName(typeOrAlias);
|
|
290
|
+
if (!this.has(resolvedName)) {
|
|
291
|
+
const available = this.getAvailable();
|
|
292
|
+
throw new MetadataExtractionError(`Unknown metadata extractor type: '${typeOrAlias}'. Available types: ${available.join(", ")}`, {
|
|
293
|
+
code: RAGErrorCodes.METADATA_EXTRACTOR_NOT_FOUND,
|
|
294
|
+
extractorType: typeOrAlias,
|
|
295
|
+
details: {
|
|
296
|
+
requestedType: typeOrAlias,
|
|
297
|
+
availableTypes: available,
|
|
298
|
+
},
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
try {
|
|
302
|
+
const extractor = await this.create(resolvedName, config);
|
|
303
|
+
logger.debug(`[MetadataExtractorFactory] Created extractor '${resolvedName}' with config:`, config);
|
|
304
|
+
return extractor;
|
|
305
|
+
}
|
|
306
|
+
catch (error) {
|
|
307
|
+
// Re-throw if already a MetadataExtractionError
|
|
308
|
+
if (error instanceof MetadataExtractionError) {
|
|
309
|
+
throw error;
|
|
310
|
+
}
|
|
311
|
+
throw new MetadataExtractionError(`Failed to create extractor '${resolvedName}': ${error instanceof Error ? error.message : String(error)}`, {
|
|
312
|
+
extractorType: resolvedName,
|
|
313
|
+
cause: error instanceof Error ? error : undefined,
|
|
314
|
+
details: { type: resolvedName, config },
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Get metadata for an extractor
|
|
320
|
+
*/
|
|
321
|
+
getExtractorMetadata(typeOrAlias) {
|
|
322
|
+
const resolvedName = this.resolveName(typeOrAlias);
|
|
323
|
+
return this.metadataMap.get(resolvedName);
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Get default configuration for an extractor
|
|
327
|
+
*/
|
|
328
|
+
getDefaultConfig(typeOrAlias) {
|
|
329
|
+
const metadata = this.getExtractorMetadata(typeOrAlias);
|
|
330
|
+
return metadata?.defaultConfig;
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Get available extractor types (not including aliases)
|
|
334
|
+
*/
|
|
335
|
+
getAvailableTypes() {
|
|
336
|
+
return this.getAvailable();
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Get all aliases mapped to their types
|
|
340
|
+
*/
|
|
341
|
+
getTypeAliases() {
|
|
342
|
+
return this.getAliases();
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Check if a type exists
|
|
346
|
+
*/
|
|
347
|
+
hasType(typeOrAlias) {
|
|
348
|
+
const resolved = this.resolveName(typeOrAlias);
|
|
349
|
+
return this.has(resolved);
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Get extractors suitable for a use case
|
|
353
|
+
*/
|
|
354
|
+
getExtractorsForUseCase(useCase) {
|
|
355
|
+
const matches = [];
|
|
356
|
+
const useCaseLower = useCase.toLowerCase();
|
|
357
|
+
for (const [type, metadata] of this.metadataMap) {
|
|
358
|
+
const hasMatch = metadata.useCases.some((uc) => uc.toLowerCase().includes(useCaseLower));
|
|
359
|
+
if (hasMatch) {
|
|
360
|
+
matches.push(type);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
return matches;
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Get extractors that can produce a specific extraction type
|
|
367
|
+
*/
|
|
368
|
+
getExtractorsForExtractionType(extractionType) {
|
|
369
|
+
const matches = [];
|
|
370
|
+
for (const [type, metadata] of this.metadataMap) {
|
|
371
|
+
if (metadata.extractionTypes.includes(extractionType)) {
|
|
372
|
+
matches.push(type);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return matches;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Get all extractor metadata
|
|
379
|
+
*/
|
|
380
|
+
getAllMetadata() {
|
|
381
|
+
return new Map(this.metadataMap);
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Clear factory and metadata
|
|
385
|
+
*/
|
|
386
|
+
clear() {
|
|
387
|
+
super.clear();
|
|
388
|
+
this.metadataMap.clear();
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
/**
|
|
392
|
+
* Global metadata extractor factory singleton
|
|
393
|
+
*/
|
|
394
|
+
export const metadataExtractorFactory = MetadataExtractorFactory.getInstance();
|
|
395
|
+
/**
|
|
396
|
+
* Convenience function to create a metadata extractor
|
|
397
|
+
*/
|
|
398
|
+
export async function createMetadataExtractor(typeOrAlias, config) {
|
|
399
|
+
return metadataExtractorFactory.createExtractor(typeOrAlias, config);
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Convenience function to get available extractor types
|
|
403
|
+
*/
|
|
404
|
+
export function getAvailableExtractorTypes() {
|
|
405
|
+
return metadataExtractorFactory.getAvailableTypes();
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Convenience function to get extractor metadata
|
|
409
|
+
*/
|
|
410
|
+
export function getExtractorMetadata(typeOrAlias) {
|
|
411
|
+
return metadataExtractorFactory.getExtractorMetadata(typeOrAlias);
|
|
412
|
+
}
|
|
413
|
+
/**
|
|
414
|
+
* Convenience function to get default config
|
|
415
|
+
*/
|
|
416
|
+
export function getExtractorDefaultConfig(typeOrAlias) {
|
|
417
|
+
return metadataExtractorFactory.getDefaultConfig(typeOrAlias);
|
|
418
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metadata Extractor Registry
|
|
3
|
+
*
|
|
4
|
+
* Centralized registry for all metadata extractor implementations with metadata
|
|
5
|
+
* and discovery capabilities. Follows the BaseRegistry pattern.
|
|
6
|
+
*/
|
|
7
|
+
import { BaseRegistry } from "../../core/infrastructure/index.js";
|
|
8
|
+
import type { MetadataExtractor, MetadataExtractorConfig, MetadataExtractorMetadata, MetadataExtractorType } from "./MetadataExtractorFactory.js";
|
|
9
|
+
/**
|
|
10
|
+
* Metadata Extractor Registry
|
|
11
|
+
*
|
|
12
|
+
* Manages registration and discovery of all metadata extractor implementations.
|
|
13
|
+
* Extends BaseRegistry for consistent lifecycle management.
|
|
14
|
+
*/
|
|
15
|
+
export declare class MetadataExtractorRegistry extends BaseRegistry<MetadataExtractor, MetadataExtractorMetadata> {
|
|
16
|
+
private static instance;
|
|
17
|
+
private aliasMap;
|
|
18
|
+
private constructor();
|
|
19
|
+
/**
|
|
20
|
+
* Get singleton instance
|
|
21
|
+
*/
|
|
22
|
+
static getInstance(): MetadataExtractorRegistry;
|
|
23
|
+
/**
|
|
24
|
+
* Reset singleton (for testing)
|
|
25
|
+
*/
|
|
26
|
+
static resetInstance(): void;
|
|
27
|
+
/**
|
|
28
|
+
* Register all built-in extractors
|
|
29
|
+
*/
|
|
30
|
+
protected registerAll(): Promise<void>;
|
|
31
|
+
/**
|
|
32
|
+
* Create extractor instance wrapper
|
|
33
|
+
*/
|
|
34
|
+
private createExtractorInstance;
|
|
35
|
+
/**
|
|
36
|
+
* Register an extractor with aliases
|
|
37
|
+
*/
|
|
38
|
+
registerExtractor(type: MetadataExtractorType, factory: () => Promise<MetadataExtractor>, metadata: MetadataExtractorMetadata): void;
|
|
39
|
+
/**
|
|
40
|
+
* Resolve type from alias
|
|
41
|
+
*/
|
|
42
|
+
resolveType(nameOrAlias: string): MetadataExtractorType;
|
|
43
|
+
/**
|
|
44
|
+
* Get an extractor by type or alias
|
|
45
|
+
*/
|
|
46
|
+
getExtractor(typeOrAlias: string): Promise<MetadataExtractor>;
|
|
47
|
+
/**
|
|
48
|
+
* Get list of available extractor types
|
|
49
|
+
*/
|
|
50
|
+
getAvailableExtractors(): MetadataExtractorType[];
|
|
51
|
+
/**
|
|
52
|
+
* Get metadata for a specific extractor
|
|
53
|
+
*/
|
|
54
|
+
getExtractorMetadata(typeOrAlias: string): MetadataExtractorMetadata | undefined;
|
|
55
|
+
/**
|
|
56
|
+
* Get all aliases for a type
|
|
57
|
+
*/
|
|
58
|
+
getAliasesForType(type: MetadataExtractorType): string[];
|
|
59
|
+
/**
|
|
60
|
+
* Get all registered aliases
|
|
61
|
+
*/
|
|
62
|
+
getAllAliases(): Map<string, MetadataExtractorType>;
|
|
63
|
+
/**
|
|
64
|
+
* Check if a type or alias exists
|
|
65
|
+
*/
|
|
66
|
+
hasExtractor(typeOrAlias: string): boolean;
|
|
67
|
+
/**
|
|
68
|
+
* Get extractors by use case
|
|
69
|
+
*/
|
|
70
|
+
getExtractorsByUseCase(useCase: string): MetadataExtractorType[];
|
|
71
|
+
/**
|
|
72
|
+
* Get extractors that can produce a specific extraction type
|
|
73
|
+
*/
|
|
74
|
+
getExtractorsByExtractionType(extractionType: string): MetadataExtractorType[];
|
|
75
|
+
/**
|
|
76
|
+
* Get default configuration for an extractor
|
|
77
|
+
*/
|
|
78
|
+
getDefaultConfig(typeOrAlias: string): Partial<MetadataExtractorConfig> | undefined;
|
|
79
|
+
/**
|
|
80
|
+
* Clear the registry (also clears aliases)
|
|
81
|
+
*/
|
|
82
|
+
clear(): void;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Global metadata extractor registry singleton
|
|
86
|
+
*/
|
|
87
|
+
export declare const metadataExtractorRegistry: MetadataExtractorRegistry;
|
|
88
|
+
/**
|
|
89
|
+
* Convenience function to get available extractors
|
|
90
|
+
*/
|
|
91
|
+
export declare function getAvailableExtractors(): MetadataExtractorType[];
|
|
92
|
+
/**
|
|
93
|
+
* Convenience function to get extractor by type
|
|
94
|
+
*/
|
|
95
|
+
export declare function getExtractor(typeOrAlias: string): Promise<MetadataExtractor>;
|
|
96
|
+
/**
|
|
97
|
+
* Convenience function to get extractor metadata
|
|
98
|
+
*/
|
|
99
|
+
export declare function getRegisteredExtractorMetadata(typeOrAlias: string): MetadataExtractorMetadata | undefined;
|