@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sentence Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits text by sentence boundaries for semantically meaningful chunks.
|
|
5
|
+
*/
|
|
6
|
+
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
+
/**
|
|
8
|
+
* Sentence Chunker
|
|
9
|
+
*/
|
|
10
|
+
export class SentenceChunker extends BaseChunker {
|
|
11
|
+
strategy = "sentence";
|
|
12
|
+
getDefaultConfig() {
|
|
13
|
+
return {
|
|
14
|
+
...DEFAULT_CHUNKER_CONFIG,
|
|
15
|
+
maxSize: 1000,
|
|
16
|
+
overlap: 1, // Overlap in sentences
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
async doChunk(content, config) {
|
|
20
|
+
const maxSize = config.maxSize ?? 1000;
|
|
21
|
+
// Simple sentence splitting (can be enhanced with NLP)
|
|
22
|
+
const sentences = this.splitIntoSentences(content);
|
|
23
|
+
const chunks = [];
|
|
24
|
+
let currentChunk = "";
|
|
25
|
+
let currentStart = 0;
|
|
26
|
+
let chunkIndex = 0;
|
|
27
|
+
for (const sentence of sentences) {
|
|
28
|
+
if (currentChunk.length + sentence.length <= maxSize) {
|
|
29
|
+
currentChunk += sentence;
|
|
30
|
+
}
|
|
31
|
+
else {
|
|
32
|
+
if (currentChunk.length > 0) {
|
|
33
|
+
const startOffset = content.indexOf(currentChunk, currentStart);
|
|
34
|
+
chunks.push(this.createChunk(currentChunk, chunkIndex++, startOffset, startOffset + currentChunk.length));
|
|
35
|
+
currentStart = startOffset + 1;
|
|
36
|
+
}
|
|
37
|
+
currentChunk = sentence;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
// Add remaining chunk
|
|
41
|
+
if (currentChunk.length > 0) {
|
|
42
|
+
const startOffset = content.indexOf(currentChunk, currentStart);
|
|
43
|
+
chunks.push(this.createChunk(currentChunk, chunkIndex, startOffset, startOffset + currentChunk.length));
|
|
44
|
+
}
|
|
45
|
+
return chunks;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Split content into sentences
|
|
49
|
+
*/
|
|
50
|
+
splitIntoSentences(content) {
|
|
51
|
+
// Simple regex-based sentence splitting
|
|
52
|
+
// Handles common abbreviations and sentence endings
|
|
53
|
+
const sentencePattern = /[^.!?]*[.!?]+(?:\s|$)/g;
|
|
54
|
+
const sentences = [];
|
|
55
|
+
let match;
|
|
56
|
+
while ((match = sentencePattern.exec(content)) !== null) {
|
|
57
|
+
sentences.push(match[0]);
|
|
58
|
+
}
|
|
59
|
+
// Handle remaining content without sentence ending
|
|
60
|
+
const lastIndex = sentences.reduce((acc, s) => acc + s.length, 0);
|
|
61
|
+
if (lastIndex < content.length) {
|
|
62
|
+
sentences.push(content.slice(lastIndex));
|
|
63
|
+
}
|
|
64
|
+
return sentences;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=SentenceChunker.js.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits text by token count using a tokenizer.
|
|
5
|
+
* Useful for precise token budget management.
|
|
6
|
+
*/
|
|
7
|
+
import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
8
|
+
import { BaseChunker } from "./BaseChunker.js";
|
|
9
|
+
/**
|
|
10
|
+
* Token Chunker
|
|
11
|
+
*
|
|
12
|
+
* Approximates token-based splitting using word count.
|
|
13
|
+
* For production, integrate with a proper tokenizer (tiktoken, etc.)
|
|
14
|
+
*/
|
|
15
|
+
export declare class TokenChunker extends BaseChunker {
|
|
16
|
+
readonly strategy: ChunkingStrategy;
|
|
17
|
+
getDefaultConfig(): ChunkerConfig;
|
|
18
|
+
protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
19
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits text by token count using a tokenizer.
|
|
5
|
+
* Useful for precise token budget management.
|
|
6
|
+
*/
|
|
7
|
+
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* Token Chunker
|
|
10
|
+
*
|
|
11
|
+
* Approximates token-based splitting using word count.
|
|
12
|
+
* For production, integrate with a proper tokenizer (tiktoken, etc.)
|
|
13
|
+
*/
|
|
14
|
+
export class TokenChunker extends BaseChunker {
|
|
15
|
+
strategy = "token";
|
|
16
|
+
getDefaultConfig() {
|
|
17
|
+
return {
|
|
18
|
+
...DEFAULT_CHUNKER_CONFIG,
|
|
19
|
+
maxSize: 512, // Tokens
|
|
20
|
+
overlap: 50, // Tokens
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
async doChunk(content, config) {
|
|
24
|
+
const maxTokens = config.maxSize ?? 512;
|
|
25
|
+
const overlapTokens = config.overlap ?? 50;
|
|
26
|
+
// Approximate tokenization using words
|
|
27
|
+
// In production, use a proper tokenizer like tiktoken
|
|
28
|
+
const words = content.split(/\s+/);
|
|
29
|
+
const chunks = [];
|
|
30
|
+
let currentWords = [];
|
|
31
|
+
let currentStart = 0;
|
|
32
|
+
let chunkIndex = 0;
|
|
33
|
+
for (let i = 0; i < words.length; i++) {
|
|
34
|
+
const word = words[i];
|
|
35
|
+
if (!word) {
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
// Estimate tokens (roughly 1.3 tokens per word on average)
|
|
39
|
+
const estimatedTokens = Math.ceil(currentWords.length * 1.3);
|
|
40
|
+
if (estimatedTokens >= maxTokens) {
|
|
41
|
+
const chunkText = currentWords.join(" ");
|
|
42
|
+
const startOffset = content.indexOf(currentWords[0] ?? "", currentStart);
|
|
43
|
+
const endOffset = startOffset + chunkText.length;
|
|
44
|
+
chunks.push(this.createChunk(chunkText, chunkIndex++, startOffset, endOffset));
|
|
45
|
+
// Keep overlap words
|
|
46
|
+
const overlapCount = Math.ceil(overlapTokens / 1.3);
|
|
47
|
+
currentWords = currentWords.slice(-overlapCount);
|
|
48
|
+
currentStart = endOffset - currentWords.join(" ").length;
|
|
49
|
+
}
|
|
50
|
+
currentWords.push(word);
|
|
51
|
+
}
|
|
52
|
+
// Add remaining chunk
|
|
53
|
+
if (currentWords.length > 0) {
|
|
54
|
+
const chunkText = currentWords.join(" ");
|
|
55
|
+
const startOffset = content.indexOf(currentWords[0] ?? "", currentStart);
|
|
56
|
+
const endOffset = startOffset + chunkText.length;
|
|
57
|
+
chunks.push(this.createChunk(chunkText, chunkIndex, startOffset, endOffset));
|
|
58
|
+
}
|
|
59
|
+
return chunks;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=TokenChunker.js.map
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunkers Index
|
|
3
|
+
*
|
|
4
|
+
* Exports all chunker implementations.
|
|
5
|
+
*/
|
|
6
|
+
export { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
+
export { CharacterChunker } from "./CharacterChunker.js";
|
|
8
|
+
export { RecursiveChunker } from "./RecursiveChunker.js";
|
|
9
|
+
export { SentenceChunker } from "./SentenceChunker.js";
|
|
10
|
+
export { TokenChunker } from "./TokenChunker.js";
|
|
11
|
+
export { MarkdownChunker } from "./MarkdownChunker.js";
|
|
12
|
+
export { HTMLChunker } from "./HTMLChunker.js";
|
|
13
|
+
export { JSONChunker } from "./JSONChunker.js";
|
|
14
|
+
export { LaTeXChunker } from "./LaTeXChunker.js";
|
|
15
|
+
export { SemanticMarkdownChunker } from "./SemanticMarkdownChunker.js";
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunkers Index
|
|
3
|
+
*
|
|
4
|
+
* Exports all chunker implementations.
|
|
5
|
+
*/
|
|
6
|
+
export { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
+
export { CharacterChunker } from "./CharacterChunker.js";
|
|
8
|
+
export { RecursiveChunker } from "./RecursiveChunker.js";
|
|
9
|
+
export { SentenceChunker } from "./SentenceChunker.js";
|
|
10
|
+
export { TokenChunker } from "./TokenChunker.js";
|
|
11
|
+
export { MarkdownChunker } from "./MarkdownChunker.js";
|
|
12
|
+
export { HTMLChunker } from "./HTMLChunker.js";
|
|
13
|
+
export { JSONChunker } from "./JSONChunker.js";
|
|
14
|
+
export { LaTeXChunker } from "./LaTeXChunker.js";
|
|
15
|
+
export { SemanticMarkdownChunker } from "./SemanticMarkdownChunker.js";
|
|
16
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Character-based Chunker
|
|
3
|
+
*
|
|
4
|
+
* Simple character-based text splitting with configurable separator and overlap.
|
|
5
|
+
* Best for unstructured text where character count is the primary concern.
|
|
6
|
+
*/
|
|
7
|
+
import type { Chunker, Chunk, ChunkerValidationResult, CharacterChunkerConfig, BaseChunkerConfig } from "../types.js";
|
|
8
|
+
/**
|
|
9
|
+
* Character-based chunker implementation
|
|
10
|
+
* Splits text by character count with optional separator
|
|
11
|
+
*/
|
|
12
|
+
export declare class CharacterChunker implements Chunker {
|
|
13
|
+
readonly strategy: "character";
|
|
14
|
+
chunk(text: string, config?: CharacterChunkerConfig): Promise<Chunk[]>;
|
|
15
|
+
validateConfig(config: BaseChunkerConfig): ChunkerValidationResult;
|
|
16
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Character-based Chunker
|
|
3
|
+
*
|
|
4
|
+
* Simple character-based text splitting with configurable separator and overlap.
|
|
5
|
+
* Best for unstructured text where character count is the primary concern.
|
|
6
|
+
*/
|
|
7
|
+
import { randomUUID } from "crypto";
|
|
8
|
+
/**
|
|
9
|
+
* Character-based chunker implementation
|
|
10
|
+
* Splits text by character count with optional separator
|
|
11
|
+
*/
|
|
12
|
+
export class CharacterChunker {
|
|
13
|
+
strategy = "character";
|
|
14
|
+
async chunk(text, config) {
|
|
15
|
+
const { maxSize = 1000, overlap = 0, separator = "", keepSeparator = false, trimWhitespace = true, metadata = {}, } = config || {};
|
|
16
|
+
const chunks = [];
|
|
17
|
+
const documentId = randomUUID();
|
|
18
|
+
if (!text || text.length === 0) {
|
|
19
|
+
return chunks;
|
|
20
|
+
}
|
|
21
|
+
// Split by separator if provided
|
|
22
|
+
let segments;
|
|
23
|
+
if (separator) {
|
|
24
|
+
segments = text.split(separator);
|
|
25
|
+
if (keepSeparator && separator) {
|
|
26
|
+
segments = segments.map((s, i) => i < segments.length - 1 ? s + separator : s);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
segments = [text];
|
|
31
|
+
}
|
|
32
|
+
let currentChunk = "";
|
|
33
|
+
let chunkIndex = 0;
|
|
34
|
+
let startPosition = 0;
|
|
35
|
+
for (const segment of segments) {
|
|
36
|
+
if (currentChunk.length + segment.length <= maxSize) {
|
|
37
|
+
currentChunk += segment;
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
// Save current chunk if it has content
|
|
41
|
+
if (currentChunk.length > 0) {
|
|
42
|
+
const chunkText = trimWhitespace ? currentChunk.trim() : currentChunk;
|
|
43
|
+
if (chunkText.length > 0) {
|
|
44
|
+
chunks.push({
|
|
45
|
+
id: randomUUID(),
|
|
46
|
+
text: chunkText,
|
|
47
|
+
metadata: {
|
|
48
|
+
documentId,
|
|
49
|
+
chunkIndex,
|
|
50
|
+
startPosition,
|
|
51
|
+
endPosition: startPosition + currentChunk.length,
|
|
52
|
+
documentType: "text",
|
|
53
|
+
custom: metadata,
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
chunkIndex++;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Handle overlap
|
|
60
|
+
if (overlap > 0 && currentChunk.length > overlap) {
|
|
61
|
+
currentChunk = currentChunk.slice(-overlap) + segment;
|
|
62
|
+
startPosition = startPosition + currentChunk.length - overlap;
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
startPosition += currentChunk.length;
|
|
66
|
+
currentChunk = segment;
|
|
67
|
+
}
|
|
68
|
+
// If segment is larger than maxSize, split it further
|
|
69
|
+
while (currentChunk.length > maxSize) {
|
|
70
|
+
const chunkText = trimWhitespace
|
|
71
|
+
? currentChunk.slice(0, maxSize).trim()
|
|
72
|
+
: currentChunk.slice(0, maxSize);
|
|
73
|
+
chunks.push({
|
|
74
|
+
id: randomUUID(),
|
|
75
|
+
text: chunkText,
|
|
76
|
+
metadata: {
|
|
77
|
+
documentId,
|
|
78
|
+
chunkIndex,
|
|
79
|
+
startPosition,
|
|
80
|
+
endPosition: startPosition + maxSize,
|
|
81
|
+
documentType: "text",
|
|
82
|
+
custom: metadata,
|
|
83
|
+
},
|
|
84
|
+
});
|
|
85
|
+
chunkIndex++;
|
|
86
|
+
const overlapStart = Math.max(0, maxSize - overlap);
|
|
87
|
+
currentChunk = currentChunk.slice(overlapStart);
|
|
88
|
+
startPosition += overlapStart;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// Don't forget the last chunk
|
|
93
|
+
if (currentChunk.length > 0) {
|
|
94
|
+
const chunkText = trimWhitespace ? currentChunk.trim() : currentChunk;
|
|
95
|
+
if (chunkText.length > 0) {
|
|
96
|
+
chunks.push({
|
|
97
|
+
id: randomUUID(),
|
|
98
|
+
text: chunkText,
|
|
99
|
+
metadata: {
|
|
100
|
+
documentId,
|
|
101
|
+
chunkIndex,
|
|
102
|
+
startPosition,
|
|
103
|
+
endPosition: startPosition + currentChunk.length,
|
|
104
|
+
documentType: "text",
|
|
105
|
+
custom: metadata,
|
|
106
|
+
},
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
// Update total chunks count
|
|
111
|
+
chunks.forEach((chunk) => {
|
|
112
|
+
chunk.metadata.totalChunks = chunks.length;
|
|
113
|
+
});
|
|
114
|
+
return chunks;
|
|
115
|
+
}
|
|
116
|
+
validateConfig(config) {
|
|
117
|
+
const errors = [];
|
|
118
|
+
const warnings = [];
|
|
119
|
+
const charConfig = config;
|
|
120
|
+
if (charConfig.maxSize !== undefined && charConfig.maxSize <= 0) {
|
|
121
|
+
errors.push("maxSize must be greater than 0");
|
|
122
|
+
}
|
|
123
|
+
if (charConfig.overlap !== undefined && charConfig.overlap < 0) {
|
|
124
|
+
errors.push("overlap must be non-negative");
|
|
125
|
+
}
|
|
126
|
+
if (charConfig.overlap !== undefined && charConfig.maxSize !== undefined) {
|
|
127
|
+
if (charConfig.overlap >= charConfig.maxSize) {
|
|
128
|
+
errors.push("overlap must be less than maxSize");
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
if (charConfig.minSize !== undefined && charConfig.maxSize !== undefined) {
|
|
132
|
+
if (charConfig.minSize > charConfig.maxSize) {
|
|
133
|
+
warnings.push("minSize is greater than maxSize, some chunks may be smaller than minSize");
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return {
|
|
137
|
+
valid: errors.length === 0,
|
|
138
|
+
errors,
|
|
139
|
+
warnings,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
//# sourceMappingURL=characterChunker.js.map
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunker Registry
|
|
3
|
+
*
|
|
4
|
+
* Central registry for all chunking strategies following NeuroLink's registry pattern.
|
|
5
|
+
* Provides factory methods for creating chunker instances.
|
|
6
|
+
*/
|
|
7
|
+
import type { Chunker, ChunkingStrategy } from "../types.js";
|
|
8
|
+
/**
|
|
9
|
+
* Registry for chunking strategies
|
|
10
|
+
* Follows NeuroLink's factory pattern with lazy initialization
|
|
11
|
+
*/
|
|
12
|
+
export declare class ChunkerRegistry {
|
|
13
|
+
private static chunkers;
|
|
14
|
+
private static initialized;
|
|
15
|
+
/**
|
|
16
|
+
* Initialize all built-in chunkers
|
|
17
|
+
*/
|
|
18
|
+
static initialize(): void;
|
|
19
|
+
/**
|
|
20
|
+
* Register a custom chunker
|
|
21
|
+
* @param strategy - Strategy name
|
|
22
|
+
* @param factory - Factory function that creates chunker instance
|
|
23
|
+
*/
|
|
24
|
+
static register(strategy: ChunkingStrategy, factory: () => Chunker): void;
|
|
25
|
+
/**
|
|
26
|
+
* Get a chunker by strategy name
|
|
27
|
+
* @param strategy - Chunking strategy name
|
|
28
|
+
* @returns Chunker instance
|
|
29
|
+
* @throws Error if strategy is not registered
|
|
30
|
+
*/
|
|
31
|
+
static get(strategy: ChunkingStrategy): Chunker;
|
|
32
|
+
/**
|
|
33
|
+
* Get all available chunking strategies
|
|
34
|
+
* @returns Array of strategy names
|
|
35
|
+
*/
|
|
36
|
+
static getAvailableStrategies(): ChunkingStrategy[];
|
|
37
|
+
/**
|
|
38
|
+
* Check if a strategy is registered
|
|
39
|
+
* @param strategy - Strategy name to check
|
|
40
|
+
* @returns True if strategy is registered
|
|
41
|
+
*/
|
|
42
|
+
static has(strategy: ChunkingStrategy): boolean;
|
|
43
|
+
/**
|
|
44
|
+
* Get strategy recommendation based on content type
|
|
45
|
+
* @param contentType - Document type or MIME type
|
|
46
|
+
* @returns Recommended chunking strategy
|
|
47
|
+
*/
|
|
48
|
+
static getRecommendedStrategy(contentType: string): ChunkingStrategy;
|
|
49
|
+
/**
|
|
50
|
+
* Get default configuration for a strategy
|
|
51
|
+
* @param strategy - Chunking strategy
|
|
52
|
+
* @returns Default configuration object
|
|
53
|
+
*/
|
|
54
|
+
static getDefaultConfig(strategy: ChunkingStrategy): Record<string, unknown>;
|
|
55
|
+
/**
|
|
56
|
+
* Reset the registry (useful for testing)
|
|
57
|
+
*/
|
|
58
|
+
static reset(): void;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Convenience function to chunk text with a given strategy
|
|
62
|
+
* @param text - Text to chunk
|
|
63
|
+
* @param strategy - Chunking strategy (default: "recursive")
|
|
64
|
+
* @param config - Strategy-specific configuration
|
|
65
|
+
* @returns Array of chunks
|
|
66
|
+
*/
|
|
67
|
+
export declare function chunkText(text: string, strategy?: ChunkingStrategy, config?: Record<string, unknown>): Promise<import("../types.js").Chunk[]>;
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunker Registry
|
|
3
|
+
*
|
|
4
|
+
* Central registry for all chunking strategies following NeuroLink's registry pattern.
|
|
5
|
+
* Provides factory methods for creating chunker instances.
|
|
6
|
+
*/
|
|
7
|
+
import { SemanticMarkdownChunker } from "../chunkers/SemanticMarkdownChunker.js";
|
|
8
|
+
import { CharacterChunker } from "./characterChunker.js";
|
|
9
|
+
import { HTMLChunker } from "./htmlChunker.js";
|
|
10
|
+
import { JSONChunker } from "./jsonChunker.js";
|
|
11
|
+
import { LaTeXChunker } from "./latexChunker.js";
|
|
12
|
+
import { MarkdownChunker } from "./markdownChunker.js";
|
|
13
|
+
import { RecursiveChunker } from "./recursiveChunker.js";
|
|
14
|
+
import { SemanticChunker } from "./semanticChunker.js";
|
|
15
|
+
import { SentenceChunker } from "./sentenceChunker.js";
|
|
16
|
+
import { TokenChunker } from "./tokenChunker.js";
|
|
17
|
+
/**
|
|
18
|
+
* Registry for chunking strategies
|
|
19
|
+
* Follows NeuroLink's factory pattern with lazy initialization
|
|
20
|
+
*/
|
|
21
|
+
export class ChunkerRegistry {
|
|
22
|
+
static chunkers = new Map();
|
|
23
|
+
static initialized = false;
|
|
24
|
+
/**
|
|
25
|
+
* Initialize all built-in chunkers
|
|
26
|
+
*/
|
|
27
|
+
static initialize() {
|
|
28
|
+
if (ChunkerRegistry.initialized) {
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
ChunkerRegistry.register("character", () => new CharacterChunker());
|
|
32
|
+
ChunkerRegistry.register("recursive", () => new RecursiveChunker());
|
|
33
|
+
ChunkerRegistry.register("sentence", () => new SentenceChunker());
|
|
34
|
+
ChunkerRegistry.register("token", () => new TokenChunker());
|
|
35
|
+
ChunkerRegistry.register("markdown", () => new MarkdownChunker());
|
|
36
|
+
ChunkerRegistry.register("html", () => new HTMLChunker());
|
|
37
|
+
ChunkerRegistry.register("json", () => new JSONChunker());
|
|
38
|
+
ChunkerRegistry.register("latex", () => new LaTeXChunker());
|
|
39
|
+
ChunkerRegistry.register("semantic", () => new SemanticChunker());
|
|
40
|
+
ChunkerRegistry.register("semantic-markdown", () => new SemanticMarkdownChunker());
|
|
41
|
+
ChunkerRegistry.initialized = true;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Register a custom chunker
|
|
45
|
+
* @param strategy - Strategy name
|
|
46
|
+
* @param factory - Factory function that creates chunker instance
|
|
47
|
+
*/
|
|
48
|
+
static register(strategy, factory) {
|
|
49
|
+
ChunkerRegistry.chunkers.set(strategy, factory);
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Get a chunker by strategy name
|
|
53
|
+
* @param strategy - Chunking strategy name
|
|
54
|
+
* @returns Chunker instance
|
|
55
|
+
* @throws Error if strategy is not registered
|
|
56
|
+
*/
|
|
57
|
+
static get(strategy) {
|
|
58
|
+
ChunkerRegistry.initialize();
|
|
59
|
+
const factory = ChunkerRegistry.chunkers.get(strategy);
|
|
60
|
+
if (!factory) {
|
|
61
|
+
throw new Error(`Unknown chunking strategy: ${strategy}. Available strategies: ${ChunkerRegistry.getAvailableStrategies().join(", ")}`);
|
|
62
|
+
}
|
|
63
|
+
return factory();
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Get all available chunking strategies
|
|
67
|
+
* @returns Array of strategy names
|
|
68
|
+
*/
|
|
69
|
+
static getAvailableStrategies() {
|
|
70
|
+
ChunkerRegistry.initialize();
|
|
71
|
+
return Array.from(ChunkerRegistry.chunkers.keys());
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Check if a strategy is registered
|
|
75
|
+
* @param strategy - Strategy name to check
|
|
76
|
+
* @returns True if strategy is registered
|
|
77
|
+
*/
|
|
78
|
+
static has(strategy) {
|
|
79
|
+
ChunkerRegistry.initialize();
|
|
80
|
+
return ChunkerRegistry.chunkers.has(strategy);
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Get strategy recommendation based on content type
|
|
84
|
+
* @param contentType - Document type or MIME type
|
|
85
|
+
* @returns Recommended chunking strategy
|
|
86
|
+
*/
|
|
87
|
+
static getRecommendedStrategy(contentType) {
|
|
88
|
+
const normalized = contentType.toLowerCase();
|
|
89
|
+
if (normalized.includes("markdown") || normalized === "md") {
|
|
90
|
+
return "markdown";
|
|
91
|
+
}
|
|
92
|
+
if (normalized.includes("html") || normalized.includes("htm")) {
|
|
93
|
+
return "html";
|
|
94
|
+
}
|
|
95
|
+
if (normalized.includes("json")) {
|
|
96
|
+
return "json";
|
|
97
|
+
}
|
|
98
|
+
// Check for latex specifically - don't match "text" which contains "tex"
|
|
99
|
+
if (normalized.includes("latex") ||
|
|
100
|
+
normalized === "tex" ||
|
|
101
|
+
normalized.endsWith("/tex")) {
|
|
102
|
+
return "latex";
|
|
103
|
+
}
|
|
104
|
+
if (normalized.includes("code") || normalized.includes("programming")) {
|
|
105
|
+
return "recursive";
|
|
106
|
+
}
|
|
107
|
+
if (normalized.includes("document") || normalized.includes("text")) {
|
|
108
|
+
return "sentence";
|
|
109
|
+
}
|
|
110
|
+
// Default to recursive for general text
|
|
111
|
+
return "recursive";
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Get default configuration for a strategy
|
|
115
|
+
* @param strategy - Chunking strategy
|
|
116
|
+
* @returns Default configuration object
|
|
117
|
+
*/
|
|
118
|
+
static getDefaultConfig(strategy) {
|
|
119
|
+
const defaults = {
|
|
120
|
+
character: {
|
|
121
|
+
maxSize: 1000,
|
|
122
|
+
overlap: 0,
|
|
123
|
+
separator: "",
|
|
124
|
+
keepSeparator: false,
|
|
125
|
+
},
|
|
126
|
+
recursive: {
|
|
127
|
+
maxSize: 1000,
|
|
128
|
+
overlap: 200,
|
|
129
|
+
separators: ["\n\n", "\n", ". ", " ", ""],
|
|
130
|
+
},
|
|
131
|
+
sentence: {
|
|
132
|
+
maxSize: 1000,
|
|
133
|
+
overlap: 0,
|
|
134
|
+
minSentences: 1,
|
|
135
|
+
sentenceEnders: [".", "!", "?"],
|
|
136
|
+
},
|
|
137
|
+
token: {
|
|
138
|
+
maxTokens: 512,
|
|
139
|
+
tokenOverlap: 50,
|
|
140
|
+
tokenizer: "cl100k_base",
|
|
141
|
+
},
|
|
142
|
+
markdown: {
|
|
143
|
+
maxSize: 1000,
|
|
144
|
+
headerLevels: [1, 2, 3],
|
|
145
|
+
preserveCodeBlocks: true,
|
|
146
|
+
includeHeader: true,
|
|
147
|
+
},
|
|
148
|
+
html: {
|
|
149
|
+
maxSize: 1000,
|
|
150
|
+
splitTags: ["div", "p", "section", "article"],
|
|
151
|
+
extractTextOnly: false,
|
|
152
|
+
},
|
|
153
|
+
json: {
|
|
154
|
+
maxSize: 1000,
|
|
155
|
+
maxDepth: 10,
|
|
156
|
+
includeJsonPath: true,
|
|
157
|
+
},
|
|
158
|
+
latex: {
|
|
159
|
+
maxSize: 1000,
|
|
160
|
+
splitEnvironments: ["section", "subsection", "chapter"],
|
|
161
|
+
preserveMath: true,
|
|
162
|
+
},
|
|
163
|
+
semantic: {
|
|
164
|
+
maxSize: 1000,
|
|
165
|
+
similarityThreshold: 0.7,
|
|
166
|
+
joinThreshold: 100,
|
|
167
|
+
},
|
|
168
|
+
"semantic-markdown": {
|
|
169
|
+
maxSize: 1000,
|
|
170
|
+
overlap: 100,
|
|
171
|
+
similarityThreshold: 0.7,
|
|
172
|
+
},
|
|
173
|
+
};
|
|
174
|
+
return defaults[strategy] || { maxSize: 1000 };
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Reset the registry (useful for testing)
|
|
178
|
+
*/
|
|
179
|
+
static reset() {
|
|
180
|
+
ChunkerRegistry.chunkers.clear();
|
|
181
|
+
ChunkerRegistry.initialized = false;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Convenience function to chunk text with a given strategy
|
|
186
|
+
* @param text - Text to chunk
|
|
187
|
+
* @param strategy - Chunking strategy (default: "recursive")
|
|
188
|
+
* @param config - Strategy-specific configuration
|
|
189
|
+
* @returns Array of chunks
|
|
190
|
+
*/
|
|
191
|
+
export async function chunkText(text, strategy = "recursive", config) {
|
|
192
|
+
const chunker = ChunkerRegistry.get(strategy);
|
|
193
|
+
return chunker.chunk(text, config);
|
|
194
|
+
}
|
|
195
|
+
//# sourceMappingURL=chunkerRegistry.js.map
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML-aware Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits HTML documents based on tag structure while preserving semantics.
|
|
5
|
+
* Best for web pages, email templates, and structured HTML content.
|
|
6
|
+
*/
|
|
7
|
+
import type { BaseChunkerConfig, Chunk, Chunker, ChunkerValidationResult, HTMLChunkerConfig } from "../types.js";
|
|
8
|
+
/**
|
|
9
|
+
* HTML-aware chunker implementation
|
|
10
|
+
* Splits based on HTML structure (tags, elements)
|
|
11
|
+
*/
|
|
12
|
+
export declare class HTMLChunker implements Chunker {
|
|
13
|
+
readonly strategy: "html";
|
|
14
|
+
private readonly defaultSplitTags;
|
|
15
|
+
private readonly defaultPreserveTags;
|
|
16
|
+
chunk(text: string, config?: HTMLChunkerConfig): Promise<Chunk[]>;
|
|
17
|
+
/**
|
|
18
|
+
* Split HTML by structural tags
|
|
19
|
+
*/
|
|
20
|
+
private splitByTags;
|
|
21
|
+
/**
|
|
22
|
+
* Parse HTML attributes from string
|
|
23
|
+
*/
|
|
24
|
+
private parseAttributes;
|
|
25
|
+
/**
|
|
26
|
+
* Extract plain text from HTML
|
|
27
|
+
*/
|
|
28
|
+
private extractText;
|
|
29
|
+
/**
|
|
30
|
+
* Split content that exceeds max size
|
|
31
|
+
*/
|
|
32
|
+
private splitContent;
|
|
33
|
+
validateConfig(config: BaseChunkerConfig): ChunkerValidationResult;
|
|
34
|
+
}
|