@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token-based Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits text based on token counts using simple tokenization.
|
|
5
|
+
* Best for controlling context window usage with LLMs.
|
|
6
|
+
*/
|
|
7
|
+
import { randomUUID } from "crypto";
|
|
8
|
+
/**
|
|
9
|
+
* Token-aware chunker implementation
|
|
10
|
+
* Splits text based on approximate token counts
|
|
11
|
+
*
|
|
12
|
+
* Note: Uses simple word-based tokenization as approximation.
|
|
13
|
+
* For exact token counts, integrate with tiktoken or model-specific tokenizers.
|
|
14
|
+
*/
|
|
15
|
+
export class TokenChunker {
|
|
16
|
+
strategy = "token";
|
|
17
|
+
// Approximate characters per token for different tokenizers
|
|
18
|
+
CHARS_PER_TOKEN = {
|
|
19
|
+
cl100k_base: 4, // GPT-4, GPT-3.5-turbo
|
|
20
|
+
p50k_base: 4, // Codex
|
|
21
|
+
r50k_base: 4, // GPT-3
|
|
22
|
+
default: 4,
|
|
23
|
+
};
|
|
24
|
+
async chunk(text, config) {
|
|
25
|
+
const { maxSize, overlap = 0, tokenizer = "cl100k_base", maxTokens = 512, tokenOverlap, trimWhitespace = true, metadata = {}, } = config || {};
|
|
26
|
+
const chunks = [];
|
|
27
|
+
const documentId = randomUUID();
|
|
28
|
+
if (!text || text.length === 0) {
|
|
29
|
+
return chunks;
|
|
30
|
+
}
|
|
31
|
+
// Determine effective overlap
|
|
32
|
+
const effectiveOverlap = tokenOverlap ?? Math.floor(overlap / this.getCharsPerToken(tokenizer));
|
|
33
|
+
// Use maxSize if provided, otherwise calculate from maxTokens
|
|
34
|
+
const _effectiveMaxChars = maxSize ?? maxTokens * this.getCharsPerToken(tokenizer);
|
|
35
|
+
// Tokenize text (simple word-based approximation)
|
|
36
|
+
const words = this.tokenize(text);
|
|
37
|
+
const _tokensPerWord = this.estimateTokensPerWord(tokenizer);
|
|
38
|
+
let currentWords = [];
|
|
39
|
+
let currentTokenCount = 0;
|
|
40
|
+
let chunkIndex = 0;
|
|
41
|
+
let startPosition = 0;
|
|
42
|
+
let charPosition = 0;
|
|
43
|
+
for (const word of words) {
|
|
44
|
+
const wordTokens = Math.ceil(word.length / this.getCharsPerToken(tokenizer));
|
|
45
|
+
// Check if adding this word would exceed the limit
|
|
46
|
+
if (currentTokenCount + wordTokens > maxTokens &&
|
|
47
|
+
currentWords.length > 0) {
|
|
48
|
+
// Save current chunk
|
|
49
|
+
const chunkText = currentWords.join(" ");
|
|
50
|
+
const finalText = trimWhitespace ? chunkText.trim() : chunkText;
|
|
51
|
+
if (finalText.length > 0) {
|
|
52
|
+
chunks.push({
|
|
53
|
+
id: randomUUID(),
|
|
54
|
+
text: finalText,
|
|
55
|
+
metadata: {
|
|
56
|
+
documentId,
|
|
57
|
+
chunkIndex,
|
|
58
|
+
startPosition,
|
|
59
|
+
endPosition: charPosition,
|
|
60
|
+
documentType: "text",
|
|
61
|
+
custom: {
|
|
62
|
+
...metadata,
|
|
63
|
+
estimatedTokens: currentTokenCount,
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
});
|
|
67
|
+
chunkIndex++;
|
|
68
|
+
}
|
|
69
|
+
// Handle token overlap
|
|
70
|
+
if (effectiveOverlap > 0 && currentWords.length > 0) {
|
|
71
|
+
// Keep some words for overlap
|
|
72
|
+
let overlapTokens = 0;
|
|
73
|
+
const overlapWords = [];
|
|
74
|
+
for (let i = currentWords.length - 1; i >= 0; i--) {
|
|
75
|
+
const w = currentWords[i];
|
|
76
|
+
const wTokens = Math.ceil(w.length / this.getCharsPerToken(tokenizer));
|
|
77
|
+
if (overlapTokens + wTokens <= effectiveOverlap) {
|
|
78
|
+
overlapWords.unshift(w);
|
|
79
|
+
overlapTokens += wTokens;
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
currentWords = overlapWords;
|
|
86
|
+
currentTokenCount = overlapTokens;
|
|
87
|
+
// Adjust start position for overlap
|
|
88
|
+
const overlapChars = overlapWords.join(" ").length + 1;
|
|
89
|
+
startPosition = charPosition - overlapChars;
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
currentWords = [];
|
|
93
|
+
currentTokenCount = 0;
|
|
94
|
+
startPosition = charPosition;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
currentWords.push(word);
|
|
98
|
+
currentTokenCount += wordTokens;
|
|
99
|
+
charPosition += word.length + 1; // +1 for space
|
|
100
|
+
}
|
|
101
|
+
// Don't forget the last chunk
|
|
102
|
+
if (currentWords.length > 0) {
|
|
103
|
+
const chunkText = currentWords.join(" ");
|
|
104
|
+
const finalText = trimWhitespace ? chunkText.trim() : chunkText;
|
|
105
|
+
if (finalText.length > 0) {
|
|
106
|
+
chunks.push({
|
|
107
|
+
id: randomUUID(),
|
|
108
|
+
text: finalText,
|
|
109
|
+
metadata: {
|
|
110
|
+
documentId,
|
|
111
|
+
chunkIndex,
|
|
112
|
+
startPosition,
|
|
113
|
+
endPosition: charPosition,
|
|
114
|
+
documentType: "text",
|
|
115
|
+
custom: {
|
|
116
|
+
...metadata,
|
|
117
|
+
estimatedTokens: currentTokenCount,
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
// Update total chunks count
|
|
124
|
+
chunks.forEach((chunk) => {
|
|
125
|
+
chunk.metadata.totalChunks = chunks.length;
|
|
126
|
+
});
|
|
127
|
+
return chunks;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Simple word-based tokenization
|
|
131
|
+
*/
|
|
132
|
+
tokenize(text) {
|
|
133
|
+
// Split on whitespace and filter empty strings
|
|
134
|
+
return text.split(/\s+/).filter((w) => w.length > 0);
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Get characters per token for a tokenizer
|
|
138
|
+
*/
|
|
139
|
+
getCharsPerToken(tokenizer) {
|
|
140
|
+
return this.CHARS_PER_TOKEN[tokenizer] ?? this.CHARS_PER_TOKEN.default;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Estimate average tokens per word
|
|
144
|
+
*/
|
|
145
|
+
estimateTokensPerWord(_tokenizer) {
|
|
146
|
+
// Average English word is ~5 characters, so roughly 1.25 tokens
|
|
147
|
+
return 1.25;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Estimate token count for text
|
|
151
|
+
*/
|
|
152
|
+
estimateTokenCount(text, tokenizer = "cl100k_base") {
|
|
153
|
+
return Math.ceil(text.length / this.getCharsPerToken(tokenizer));
|
|
154
|
+
}
|
|
155
|
+
validateConfig(config) {
|
|
156
|
+
const errors = [];
|
|
157
|
+
const warnings = [];
|
|
158
|
+
const tokenConfig = config;
|
|
159
|
+
if (tokenConfig.maxTokens !== undefined && tokenConfig.maxTokens <= 0) {
|
|
160
|
+
errors.push("maxTokens must be greater than 0");
|
|
161
|
+
}
|
|
162
|
+
if (tokenConfig.tokenOverlap !== undefined &&
|
|
163
|
+
tokenConfig.tokenOverlap < 0) {
|
|
164
|
+
errors.push("tokenOverlap must be non-negative");
|
|
165
|
+
}
|
|
166
|
+
if (tokenConfig.tokenOverlap !== undefined &&
|
|
167
|
+
tokenConfig.maxTokens !== undefined) {
|
|
168
|
+
if (tokenConfig.tokenOverlap >= tokenConfig.maxTokens) {
|
|
169
|
+
errors.push("tokenOverlap must be less than maxTokens");
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
if (tokenConfig.maxSize !== undefined && tokenConfig.maxSize <= 0) {
|
|
173
|
+
errors.push("maxSize must be greater than 0");
|
|
174
|
+
}
|
|
175
|
+
// Warn about tokenizer approximation
|
|
176
|
+
warnings.push("Token counts are approximated. For exact counts, integrate with tiktoken.");
|
|
177
|
+
return {
|
|
178
|
+
valid: errors.length === 0,
|
|
179
|
+
errors,
|
|
180
|
+
warnings,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
//# sourceMappingURL=tokenChunker.js.map
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MDocument - Main Document Processing Class
|
|
3
|
+
*
|
|
4
|
+
* Provides a fluent interface for document processing using the Factory + Registry pattern.
|
|
5
|
+
* Supports various document types, chunking strategies, and metadata extraction.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* const doc = await MDocument.fromText(content);
|
|
10
|
+
* const chunks = await doc.chunk({
|
|
11
|
+
* strategy: 'recursive',
|
|
12
|
+
* config: { maxSize: 1000, overlap: 200 }
|
|
13
|
+
* });
|
|
14
|
+
* const enriched = await doc.extractMetadata({
|
|
15
|
+
* title: true,
|
|
16
|
+
* summary: true,
|
|
17
|
+
* keywords: true
|
|
18
|
+
* });
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
import type { Chunk, ChunkParams, DocumentType, ExtractParams, MDocumentConfig } from "../types.js";
|
|
22
|
+
/**
|
|
23
|
+
* MDocument class for comprehensive document processing
|
|
24
|
+
*
|
|
25
|
+
* Provides a chainable API for:
|
|
26
|
+
* - Loading documents from various sources
|
|
27
|
+
* - Chunking with multiple strategies
|
|
28
|
+
* - Metadata extraction using LLMs
|
|
29
|
+
* - Embedding generation
|
|
30
|
+
*/
|
|
31
|
+
export declare class MDocument {
|
|
32
|
+
private state;
|
|
33
|
+
private documentId;
|
|
34
|
+
/**
|
|
35
|
+
* Create a new MDocument instance
|
|
36
|
+
* @param content - Document content
|
|
37
|
+
* @param config - Document configuration
|
|
38
|
+
*/
|
|
39
|
+
constructor(content: string, config?: MDocumentConfig);
|
|
40
|
+
/**
|
|
41
|
+
* Create MDocument from plain text
|
|
42
|
+
* @param text - Plain text content
|
|
43
|
+
* @param metadata - Optional metadata
|
|
44
|
+
* @returns MDocument instance
|
|
45
|
+
*/
|
|
46
|
+
static fromText(text: string, metadata?: Record<string, unknown>): MDocument;
|
|
47
|
+
/**
|
|
48
|
+
* Create MDocument from markdown content
|
|
49
|
+
* @param markdown - Markdown content
|
|
50
|
+
* @param metadata - Optional metadata
|
|
51
|
+
* @returns MDocument instance
|
|
52
|
+
*/
|
|
53
|
+
static fromMarkdown(markdown: string, metadata?: Record<string, unknown>): MDocument;
|
|
54
|
+
/**
|
|
55
|
+
* Create MDocument from HTML content
|
|
56
|
+
* @param html - HTML content
|
|
57
|
+
* @param metadata - Optional metadata
|
|
58
|
+
* @returns MDocument instance
|
|
59
|
+
*/
|
|
60
|
+
static fromHTML(html: string, metadata?: Record<string, unknown>): MDocument;
|
|
61
|
+
/**
|
|
62
|
+
* Create MDocument from JSON content
|
|
63
|
+
* @param json - JSON string or object
|
|
64
|
+
* @param metadata - Optional metadata
|
|
65
|
+
* @returns MDocument instance
|
|
66
|
+
*/
|
|
67
|
+
static fromJSONContent(json: string | object, metadata?: Record<string, unknown>): MDocument;
|
|
68
|
+
/**
|
|
69
|
+
* Create MDocument from LaTeX content
|
|
70
|
+
* @param latex - LaTeX content
|
|
71
|
+
* @param metadata - Optional metadata
|
|
72
|
+
* @returns MDocument instance
|
|
73
|
+
*/
|
|
74
|
+
static fromLaTeX(latex: string, metadata?: Record<string, unknown>): MDocument;
|
|
75
|
+
/**
|
|
76
|
+
* Create MDocument from CSV content
|
|
77
|
+
* @param csv - CSV content
|
|
78
|
+
* @param metadata - Optional metadata
|
|
79
|
+
* @returns MDocument instance
|
|
80
|
+
*/
|
|
81
|
+
static fromCSV(csv: string, metadata?: Record<string, unknown>): MDocument;
|
|
82
|
+
/**
|
|
83
|
+
* Chunk the document using specified strategy
|
|
84
|
+
* @param params - Chunking parameters
|
|
85
|
+
* @returns This MDocument instance (for chaining)
|
|
86
|
+
*/
|
|
87
|
+
chunk(params?: ChunkParams): Promise<MDocument>;
|
|
88
|
+
/**
|
|
89
|
+
* Extract metadata from chunks using LLM
|
|
90
|
+
* @param params - Extraction parameters
|
|
91
|
+
* @param options - Extractor options
|
|
92
|
+
* @returns This MDocument instance (for chaining)
|
|
93
|
+
*/
|
|
94
|
+
extractMetadata(params: ExtractParams, options?: {
|
|
95
|
+
provider?: string;
|
|
96
|
+
modelName?: string;
|
|
97
|
+
}): Promise<MDocument>;
|
|
98
|
+
/**
|
|
99
|
+
* Generate embeddings for all chunks
|
|
100
|
+
* @param provider - Embedding provider name
|
|
101
|
+
* @param modelName - Embedding model name
|
|
102
|
+
* @returns This MDocument instance (for chaining)
|
|
103
|
+
*/
|
|
104
|
+
embed(provider?: string, modelName?: string): Promise<MDocument>;
|
|
105
|
+
/**
|
|
106
|
+
* Get document ID
|
|
107
|
+
*/
|
|
108
|
+
getId(): string;
|
|
109
|
+
/**
|
|
110
|
+
* Get raw document content
|
|
111
|
+
*/
|
|
112
|
+
getContent(): string;
|
|
113
|
+
/**
|
|
114
|
+
* Get document type
|
|
115
|
+
*/
|
|
116
|
+
getType(): DocumentType;
|
|
117
|
+
/**
|
|
118
|
+
* Get document metadata
|
|
119
|
+
*/
|
|
120
|
+
getMetadata(): Record<string, unknown>;
|
|
121
|
+
/**
|
|
122
|
+
* Get processed chunks
|
|
123
|
+
*/
|
|
124
|
+
getChunks(): Chunk[];
|
|
125
|
+
/**
|
|
126
|
+
* Get chunk embeddings
|
|
127
|
+
*/
|
|
128
|
+
getEmbeddings(): number[][];
|
|
129
|
+
/**
|
|
130
|
+
* Get processing history
|
|
131
|
+
*/
|
|
132
|
+
getHistory(): string[];
|
|
133
|
+
/**
|
|
134
|
+
* Check if document has been chunked
|
|
135
|
+
*/
|
|
136
|
+
isChunked(): boolean;
|
|
137
|
+
/**
|
|
138
|
+
* Check if document has embeddings
|
|
139
|
+
*/
|
|
140
|
+
hasEmbeddings(): boolean;
|
|
141
|
+
/**
|
|
142
|
+
* Get chunk count
|
|
143
|
+
*/
|
|
144
|
+
getChunkCount(): number;
|
|
145
|
+
/**
|
|
146
|
+
* Set document metadata
|
|
147
|
+
* @param key - Metadata key
|
|
148
|
+
* @param value - Metadata value
|
|
149
|
+
* @returns This MDocument instance (for chaining)
|
|
150
|
+
*/
|
|
151
|
+
setMetadata(key: string, value: unknown): MDocument;
|
|
152
|
+
/**
|
|
153
|
+
* Merge metadata into document
|
|
154
|
+
* @param metadata - Metadata to merge
|
|
155
|
+
* @returns This MDocument instance (for chaining)
|
|
156
|
+
*/
|
|
157
|
+
mergeMetadata(metadata: Record<string, unknown>): MDocument;
|
|
158
|
+
/**
|
|
159
|
+
* Filter chunks based on predicate
|
|
160
|
+
* @param predicate - Filter function
|
|
161
|
+
* @returns New MDocument with filtered chunks
|
|
162
|
+
*/
|
|
163
|
+
filterChunks(predicate: (chunk: Chunk) => boolean): MDocument;
|
|
164
|
+
/**
|
|
165
|
+
* Map transformation over chunks
|
|
166
|
+
* @param transform - Transform function
|
|
167
|
+
* @returns New MDocument with transformed chunks
|
|
168
|
+
*/
|
|
169
|
+
mapChunks(transform: (chunk: Chunk) => Chunk): MDocument;
|
|
170
|
+
/**
|
|
171
|
+
* Convert to plain object for serialization
|
|
172
|
+
*/
|
|
173
|
+
toJSON(): {
|
|
174
|
+
id: string;
|
|
175
|
+
content: string;
|
|
176
|
+
type: DocumentType;
|
|
177
|
+
metadata: Record<string, unknown>;
|
|
178
|
+
chunks: Chunk[];
|
|
179
|
+
history: string[];
|
|
180
|
+
};
|
|
181
|
+
/**
|
|
182
|
+
* Create MDocument from serialized JSON
|
|
183
|
+
* @param json - Serialized document data
|
|
184
|
+
* @returns MDocument instance
|
|
185
|
+
*/
|
|
186
|
+
static fromJSON(json: {
|
|
187
|
+
id?: string;
|
|
188
|
+
content: string;
|
|
189
|
+
type: DocumentType;
|
|
190
|
+
metadata?: Record<string, unknown>;
|
|
191
|
+
chunks?: Chunk[];
|
|
192
|
+
history?: string[];
|
|
193
|
+
}): MDocument;
|
|
194
|
+
/**
|
|
195
|
+
* Get default chunking strategy based on document type
|
|
196
|
+
*/
|
|
197
|
+
private getDefaultStrategy;
|
|
198
|
+
}
|