@juspay/neurolink 9.1.1 → 9.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +106 -37
- package/dist/agent/directTools.d.ts +11 -11
- package/dist/cli/commands/config.d.ts +6 -6
- package/dist/cli/commands/rag.d.ts +19 -0
- package/dist/cli/commands/rag.js +756 -0
- package/dist/cli/factories/commandFactory.js +146 -83
- package/dist/cli/parser.js +4 -1
- package/dist/core/baseProvider.d.ts +43 -30
- package/dist/core/baseProvider.js +98 -138
- package/dist/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/core/conversationMemoryFactory.js +2 -2
- package/dist/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/core/conversationMemoryInitializer.js +2 -2
- package/dist/core/infrastructure/baseError.d.ts +21 -0
- package/dist/core/infrastructure/baseError.js +22 -0
- package/dist/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/core/infrastructure/baseFactory.js +54 -0
- package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/core/infrastructure/baseRegistry.js +49 -0
- package/dist/core/infrastructure/index.d.ts +5 -0
- package/dist/core/infrastructure/index.js +5 -0
- package/dist/core/infrastructure/retry.d.ts +7 -0
- package/dist/core/infrastructure/retry.js +20 -0
- package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/core/infrastructure/typedEventEmitter.js +23 -0
- package/dist/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/core/redisConversationMemoryManager.js +7 -19
- package/dist/factories/providerFactory.d.ts +5 -3
- package/dist/factories/providerFactory.js +31 -24
- package/dist/image-gen/ImageGenService.d.ts +143 -0
- package/dist/image-gen/ImageGenService.js +345 -0
- package/dist/image-gen/imageGenTools.d.ts +126 -0
- package/dist/image-gen/imageGenTools.js +304 -0
- package/dist/image-gen/index.d.ts +46 -0
- package/dist/image-gen/index.js +48 -0
- package/dist/image-gen/types.d.ts +237 -0
- package/dist/image-gen/types.js +24 -0
- package/dist/index.d.ts +46 -12
- package/dist/index.js +88 -36
- package/dist/lib/agent/directTools.d.ts +8 -8
- package/dist/lib/core/baseProvider.d.ts +43 -30
- package/dist/lib/core/baseProvider.js +98 -138
- package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
- package/dist/lib/core/conversationMemoryFactory.js +2 -2
- package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
- package/dist/lib/core/conversationMemoryInitializer.js +2 -2
- package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseError.js +23 -0
- package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseFactory.js +55 -0
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
- package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
- package/dist/lib/core/infrastructure/index.d.ts +5 -0
- package/dist/lib/core/infrastructure/index.js +6 -0
- package/dist/lib/core/infrastructure/retry.d.ts +7 -0
- package/dist/lib/core/infrastructure/retry.js +21 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
- package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
- package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
- package/dist/lib/core/redisConversationMemoryManager.js +7 -19
- package/dist/lib/factories/providerFactory.d.ts +5 -3
- package/dist/lib/factories/providerFactory.js +31 -24
- package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
- package/dist/lib/image-gen/ImageGenService.js +346 -0
- package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
- package/dist/lib/image-gen/imageGenTools.js +305 -0
- package/dist/lib/image-gen/index.d.ts +46 -0
- package/dist/lib/image-gen/index.js +49 -0
- package/dist/lib/image-gen/types.d.ts +237 -0
- package/dist/lib/image-gen/types.js +25 -0
- package/dist/lib/index.d.ts +46 -12
- package/dist/lib/index.js +88 -36
- package/dist/lib/mcp/index.d.ts +6 -5
- package/dist/lib/mcp/index.js +7 -5
- package/dist/lib/neurolink.d.ts +11 -13
- package/dist/lib/neurolink.js +95 -29
- package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
- package/dist/lib/processors/base/index.d.ts +14 -0
- package/dist/lib/processors/base/index.js +20 -0
- package/dist/lib/processors/base/types.d.ts +593 -0
- package/dist/lib/processors/base/types.js +77 -0
- package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
- package/dist/lib/processors/cli/index.d.ts +37 -0
- package/dist/lib/processors/cli/index.js +50 -0
- package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/lib/processors/code/ConfigProcessor.js +401 -0
- package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
- package/dist/lib/processors/code/index.d.ts +44 -0
- package/dist/lib/processors/code/index.js +61 -0
- package/dist/lib/processors/config/fileTypes.d.ts +283 -0
- package/dist/lib/processors/config/fileTypes.js +521 -0
- package/dist/lib/processors/config/index.d.ts +32 -0
- package/dist/lib/processors/config/index.js +93 -0
- package/dist/lib/processors/config/languageMap.d.ts +66 -0
- package/dist/lib/processors/config/languageMap.js +411 -0
- package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
- package/dist/lib/processors/config/mimeTypes.js +339 -0
- package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
- package/dist/lib/processors/config/sizeLimits.js +247 -0
- package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/lib/processors/data/JsonProcessor.js +204 -0
- package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/lib/processors/data/XmlProcessor.js +284 -0
- package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/lib/processors/data/YamlProcessor.js +295 -0
- package/dist/lib/processors/data/index.d.ts +49 -0
- package/dist/lib/processors/data/index.js +77 -0
- package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/lib/processors/document/ExcelProcessor.js +520 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
- package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/lib/processors/document/RtfProcessor.js +362 -0
- package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
- package/dist/lib/processors/document/WordProcessor.js +354 -0
- package/dist/lib/processors/document/index.d.ts +54 -0
- package/dist/lib/processors/document/index.js +91 -0
- package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/lib/processors/errors/FileErrorCode.js +256 -0
- package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/lib/processors/errors/errorHelpers.js +379 -0
- package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/lib/processors/errors/errorSerializer.js +508 -0
- package/dist/lib/processors/errors/index.d.ts +46 -0
- package/dist/lib/processors/errors/index.js +50 -0
- package/dist/lib/processors/index.d.ts +76 -0
- package/dist/lib/processors/index.js +113 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
- package/dist/lib/processors/integration/index.d.ts +42 -0
- package/dist/lib/processors/integration/index.js +45 -0
- package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
- package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
- package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/lib/processors/markup/SvgProcessor.js +241 -0
- package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/lib/processors/markup/TextProcessor.js +189 -0
- package/dist/lib/processors/markup/index.d.ts +66 -0
- package/dist/lib/processors/markup/index.js +103 -0
- package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
- package/dist/lib/processors/registry/index.d.ts +12 -0
- package/dist/lib/processors/registry/index.js +17 -0
- package/dist/lib/processors/registry/types.d.ts +53 -0
- package/dist/lib/processors/registry/types.js +11 -0
- package/dist/lib/providers/amazonBedrock.d.ts +15 -2
- package/dist/lib/providers/amazonBedrock.js +65 -8
- package/dist/lib/providers/anthropic.d.ts +3 -3
- package/dist/lib/providers/anthropic.js +10 -7
- package/dist/lib/providers/googleAiStudio.d.ts +5 -5
- package/dist/lib/providers/googleAiStudio.js +10 -7
- package/dist/lib/providers/googleVertex.d.ts +16 -4
- package/dist/lib/providers/googleVertex.js +72 -16
- package/dist/lib/providers/litellm.d.ts +3 -3
- package/dist/lib/providers/litellm.js +10 -10
- package/dist/lib/providers/mistral.d.ts +3 -3
- package/dist/lib/providers/mistral.js +7 -6
- package/dist/lib/providers/ollama.d.ts +3 -4
- package/dist/lib/providers/ollama.js +7 -8
- package/dist/lib/providers/openAI.d.ts +14 -2
- package/dist/lib/providers/openAI.js +60 -6
- package/dist/lib/providers/openRouter.d.ts +2 -2
- package/dist/lib/providers/openRouter.js +10 -6
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
- package/dist/lib/rag/ChunkerFactory.js +321 -0
- package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/lib/rag/ChunkerRegistry.js +422 -0
- package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
- package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
- package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
- package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
- package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
- package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
- package/dist/lib/rag/chunkers/index.d.ts +15 -0
- package/dist/lib/rag/chunkers/index.js +16 -0
- package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/characterChunker.js +143 -0
- package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
- package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/lib/rag/chunking/htmlChunker.js +248 -0
- package/dist/lib/rag/chunking/index.d.ts +15 -0
- package/dist/lib/rag/chunking/index.js +18 -0
- package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/lib/rag/chunking/jsonChunker.js +282 -0
- package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/lib/rag/chunking/latexChunker.js +252 -0
- package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/markdownChunker.js +202 -0
- package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
- package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/lib/rag/chunking/semanticChunker.js +307 -0
- package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
- package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/lib/rag/chunking/tokenChunker.js +184 -0
- package/dist/lib/rag/document/MDocument.d.ts +198 -0
- package/dist/lib/rag/document/MDocument.js +393 -0
- package/dist/lib/rag/document/index.d.ts +5 -0
- package/dist/lib/rag/document/index.js +6 -0
- package/dist/lib/rag/document/loaders.d.ts +201 -0
- package/dist/lib/rag/document/loaders.js +501 -0
- package/dist/lib/rag/errors/RAGError.d.ts +244 -0
- package/dist/lib/rag/errors/RAGError.js +275 -0
- package/dist/lib/rag/errors/index.d.ts +6 -0
- package/dist/lib/rag/errors/index.js +7 -0
- package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/lib/rag/graphRag/graphRAG.js +385 -0
- package/dist/lib/rag/graphRag/index.d.ts +4 -0
- package/dist/lib/rag/graphRag/index.js +5 -0
- package/dist/lib/rag/index.d.ts +103 -0
- package/dist/lib/rag/index.js +142 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
- package/dist/lib/rag/metadata/index.d.ts +6 -0
- package/dist/lib/rag/metadata/index.js +10 -0
- package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
- package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
- package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
- package/dist/lib/rag/pipeline/index.d.ts +5 -0
- package/dist/lib/rag/pipeline/index.js +6 -0
- package/dist/lib/rag/ragIntegration.d.ts +38 -0
- package/dist/lib/rag/ragIntegration.js +212 -0
- package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
- package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
- package/dist/lib/rag/reranker/index.d.ts +6 -0
- package/dist/lib/rag/reranker/index.js +10 -0
- package/dist/lib/rag/reranker/reranker.d.ts +71 -0
- package/dist/lib/rag/reranker/reranker.js +278 -0
- package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
- package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/lib/rag/resilience/RetryHandler.js +301 -0
- package/dist/lib/rag/resilience/index.d.ts +7 -0
- package/dist/lib/rag/resilience/index.js +8 -0
- package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
- package/dist/lib/rag/retrieval/index.d.ts +5 -0
- package/dist/lib/rag/retrieval/index.js +6 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
- package/dist/lib/rag/types.d.ts +768 -0
- package/dist/lib/rag/types.js +9 -0
- package/dist/lib/server/index.d.ts +15 -11
- package/dist/lib/server/index.js +55 -51
- package/dist/lib/server/utils/validation.d.ts +2 -2
- package/dist/lib/types/common.d.ts +0 -1
- package/dist/lib/types/fileTypes.d.ts +1 -1
- package/dist/lib/types/generateTypes.d.ts +42 -8
- package/dist/lib/types/generateTypes.js +1 -1
- package/dist/lib/types/index.d.ts +25 -24
- package/dist/lib/types/index.js +21 -20
- package/dist/lib/types/modelTypes.d.ts +16 -16
- package/dist/lib/types/pptTypes.d.ts +14 -2
- package/dist/lib/types/pptTypes.js +16 -0
- package/dist/lib/types/streamTypes.d.ts +28 -8
- package/dist/lib/types/streamTypes.js +1 -1
- package/dist/lib/utils/async/delay.d.ts +40 -0
- package/dist/lib/utils/async/delay.js +43 -0
- package/dist/lib/utils/async/index.d.ts +23 -0
- package/dist/lib/utils/async/index.js +24 -0
- package/dist/lib/utils/async/retry.d.ts +141 -0
- package/dist/lib/utils/async/retry.js +172 -0
- package/dist/lib/utils/async/withTimeout.d.ts +73 -0
- package/dist/lib/utils/async/withTimeout.js +97 -0
- package/dist/lib/utils/fileDetector.d.ts +7 -1
- package/dist/lib/utils/fileDetector.js +91 -18
- package/dist/lib/utils/json/extract.d.ts +103 -0
- package/dist/lib/utils/json/extract.js +249 -0
- package/dist/lib/utils/json/index.d.ts +36 -0
- package/dist/lib/utils/json/index.js +37 -0
- package/dist/lib/utils/json/safeParse.d.ts +137 -0
- package/dist/lib/utils/json/safeParse.js +191 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -2
- package/dist/lib/utils/messageBuilder.js +15 -7
- package/dist/lib/utils/modelRouter.d.ts +4 -4
- package/dist/lib/utils/modelRouter.js +4 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
- package/dist/lib/utils/sanitizers/filename.js +366 -0
- package/dist/lib/utils/sanitizers/html.d.ts +170 -0
- package/dist/lib/utils/sanitizers/html.js +326 -0
- package/dist/lib/utils/sanitizers/index.d.ts +26 -0
- package/dist/lib/utils/sanitizers/index.js +30 -0
- package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
- package/dist/lib/utils/sanitizers/svg.js +483 -0
- package/dist/mcp/index.d.ts +6 -5
- package/dist/mcp/index.js +7 -5
- package/dist/neurolink.d.ts +11 -13
- package/dist/neurolink.js +95 -29
- package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
- package/dist/processors/base/BaseFileProcessor.js +613 -0
- package/dist/processors/base/index.d.ts +14 -0
- package/dist/processors/base/index.js +19 -0
- package/dist/processors/base/types.d.ts +593 -0
- package/dist/processors/base/types.js +76 -0
- package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
- package/dist/processors/cli/fileProcessorCli.js +388 -0
- package/dist/processors/cli/index.d.ts +37 -0
- package/dist/processors/cli/index.js +49 -0
- package/dist/processors/code/ConfigProcessor.d.ts +171 -0
- package/dist/processors/code/ConfigProcessor.js +400 -0
- package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
- package/dist/processors/code/SourceCodeProcessor.js +304 -0
- package/dist/processors/code/index.d.ts +44 -0
- package/dist/processors/code/index.js +60 -0
- package/dist/processors/config/fileTypes.d.ts +283 -0
- package/dist/processors/config/fileTypes.js +520 -0
- package/dist/processors/config/index.d.ts +32 -0
- package/dist/processors/config/index.js +92 -0
- package/dist/processors/config/languageMap.d.ts +66 -0
- package/dist/processors/config/languageMap.js +410 -0
- package/dist/processors/config/mimeTypes.d.ts +376 -0
- package/dist/processors/config/mimeTypes.js +338 -0
- package/dist/processors/config/sizeLimits.d.ts +194 -0
- package/dist/processors/config/sizeLimits.js +246 -0
- package/dist/processors/data/JsonProcessor.d.ts +122 -0
- package/dist/processors/data/JsonProcessor.js +203 -0
- package/dist/processors/data/XmlProcessor.d.ts +160 -0
- package/dist/processors/data/XmlProcessor.js +283 -0
- package/dist/processors/data/YamlProcessor.d.ts +163 -0
- package/dist/processors/data/YamlProcessor.js +294 -0
- package/dist/processors/data/index.d.ts +49 -0
- package/dist/processors/data/index.js +76 -0
- package/dist/processors/document/ExcelProcessor.d.ts +238 -0
- package/dist/processors/document/ExcelProcessor.js +519 -0
- package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
- package/dist/processors/document/OpenDocumentProcessor.js +210 -0
- package/dist/processors/document/RtfProcessor.d.ts +152 -0
- package/dist/processors/document/RtfProcessor.js +361 -0
- package/dist/processors/document/WordProcessor.d.ts +168 -0
- package/dist/processors/document/WordProcessor.js +353 -0
- package/dist/processors/document/index.d.ts +54 -0
- package/dist/processors/document/index.js +90 -0
- package/dist/processors/errors/FileErrorCode.d.ts +98 -0
- package/dist/processors/errors/FileErrorCode.js +255 -0
- package/dist/processors/errors/errorHelpers.d.ts +151 -0
- package/dist/processors/errors/errorHelpers.js +378 -0
- package/dist/processors/errors/errorSerializer.d.ts +139 -0
- package/dist/processors/errors/errorSerializer.js +507 -0
- package/dist/processors/errors/index.d.ts +46 -0
- package/dist/processors/errors/index.js +49 -0
- package/dist/processors/index.d.ts +76 -0
- package/dist/processors/index.js +112 -0
- package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
- package/dist/processors/integration/FileProcessorIntegration.js +272 -0
- package/dist/processors/integration/index.d.ts +42 -0
- package/dist/processors/integration/index.js +44 -0
- package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
- package/dist/processors/markup/HtmlProcessor.js +249 -0
- package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
- package/dist/processors/markup/MarkdownProcessor.js +244 -0
- package/dist/processors/markup/SvgProcessor.d.ts +156 -0
- package/dist/processors/markup/SvgProcessor.js +240 -0
- package/dist/processors/markup/TextProcessor.d.ts +135 -0
- package/dist/processors/markup/TextProcessor.js +188 -0
- package/dist/processors/markup/index.d.ts +66 -0
- package/dist/processors/markup/index.js +102 -0
- package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
- package/dist/processors/registry/ProcessorRegistry.js +608 -0
- package/dist/processors/registry/index.d.ts +12 -0
- package/dist/processors/registry/index.js +16 -0
- package/dist/processors/registry/types.d.ts +53 -0
- package/dist/processors/registry/types.js +10 -0
- package/dist/providers/amazonBedrock.d.ts +15 -2
- package/dist/providers/amazonBedrock.js +65 -8
- package/dist/providers/anthropic.d.ts +3 -3
- package/dist/providers/anthropic.js +10 -7
- package/dist/providers/googleAiStudio.d.ts +5 -5
- package/dist/providers/googleAiStudio.js +10 -7
- package/dist/providers/googleVertex.d.ts +16 -4
- package/dist/providers/googleVertex.js +72 -16
- package/dist/providers/litellm.d.ts +3 -3
- package/dist/providers/litellm.js +10 -10
- package/dist/providers/mistral.d.ts +3 -3
- package/dist/providers/mistral.js +7 -6
- package/dist/providers/ollama.d.ts +3 -4
- package/dist/providers/ollama.js +7 -8
- package/dist/providers/openAI.d.ts +14 -2
- package/dist/providers/openAI.js +60 -6
- package/dist/providers/openRouter.d.ts +2 -2
- package/dist/providers/openRouter.js +10 -6
- package/dist/rag/ChunkerFactory.d.ts +91 -0
- package/dist/rag/ChunkerFactory.js +320 -0
- package/dist/rag/ChunkerRegistry.d.ts +91 -0
- package/dist/rag/ChunkerRegistry.js +421 -0
- package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
- package/dist/rag/chunkers/BaseChunker.js +143 -0
- package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
- package/dist/rag/chunkers/CharacterChunker.js +28 -0
- package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
- package/dist/rag/chunkers/HTMLChunker.js +38 -0
- package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
- package/dist/rag/chunkers/JSONChunker.js +68 -0
- package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
- package/dist/rag/chunkers/LaTeXChunker.js +63 -0
- package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
- package/dist/rag/chunkers/MarkdownChunker.js +102 -0
- package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
- package/dist/rag/chunkers/RecursiveChunker.js +139 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
- package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
- package/dist/rag/chunkers/SentenceChunker.js +66 -0
- package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
- package/dist/rag/chunkers/TokenChunker.js +61 -0
- package/dist/rag/chunkers/index.d.ts +15 -0
- package/dist/rag/chunkers/index.js +15 -0
- package/dist/rag/chunking/characterChunker.d.ts +16 -0
- package/dist/rag/chunking/characterChunker.js +142 -0
- package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
- package/dist/rag/chunking/chunkerRegistry.js +194 -0
- package/dist/rag/chunking/htmlChunker.d.ts +34 -0
- package/dist/rag/chunking/htmlChunker.js +247 -0
- package/dist/rag/chunking/index.d.ts +15 -0
- package/dist/rag/chunking/index.js +17 -0
- package/dist/rag/chunking/jsonChunker.d.ts +20 -0
- package/dist/rag/chunking/jsonChunker.js +281 -0
- package/dist/rag/chunking/latexChunker.d.ts +26 -0
- package/dist/rag/chunking/latexChunker.js +251 -0
- package/dist/rag/chunking/markdownChunker.d.ts +19 -0
- package/dist/rag/chunking/markdownChunker.js +201 -0
- package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
- package/dist/rag/chunking/recursiveChunker.js +148 -0
- package/dist/rag/chunking/semanticChunker.d.ts +41 -0
- package/dist/rag/chunking/semanticChunker.js +306 -0
- package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
- package/dist/rag/chunking/sentenceChunker.js +230 -0
- package/dist/rag/chunking/tokenChunker.d.ts +36 -0
- package/dist/rag/chunking/tokenChunker.js +183 -0
- package/dist/rag/document/MDocument.d.ts +198 -0
- package/dist/rag/document/MDocument.js +392 -0
- package/dist/rag/document/index.d.ts +5 -0
- package/dist/rag/document/index.js +5 -0
- package/dist/rag/document/loaders.d.ts +201 -0
- package/dist/rag/document/loaders.js +500 -0
- package/dist/rag/errors/RAGError.d.ts +244 -0
- package/dist/rag/errors/RAGError.js +274 -0
- package/dist/rag/errors/index.d.ts +6 -0
- package/dist/rag/errors/index.js +6 -0
- package/dist/rag/graphRag/graphRAG.d.ts +115 -0
- package/dist/rag/graphRag/graphRAG.js +384 -0
- package/dist/rag/graphRag/index.d.ts +4 -0
- package/dist/rag/graphRag/index.js +4 -0
- package/dist/rag/index.d.ts +103 -0
- package/dist/rag/index.js +141 -0
- package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
- package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
- package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
- package/dist/rag/metadata/index.d.ts +6 -0
- package/dist/rag/metadata/index.js +9 -0
- package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
- package/dist/rag/metadata/metadataExtractor.js +277 -0
- package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
- package/dist/rag/pipeline/RAGPipeline.js +401 -0
- package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
- package/dist/rag/pipeline/contextAssembly.js +337 -0
- package/dist/rag/pipeline/index.d.ts +5 -0
- package/dist/rag/pipeline/index.js +5 -0
- package/dist/rag/ragIntegration.d.ts +38 -0
- package/dist/rag/ragIntegration.js +211 -0
- package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
- package/dist/rag/reranker/RerankerFactory.js +430 -0
- package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
- package/dist/rag/reranker/RerankerRegistry.js +402 -0
- package/dist/rag/reranker/index.d.ts +6 -0
- package/dist/rag/reranker/index.js +9 -0
- package/dist/rag/reranker/reranker.d.ts +71 -0
- package/dist/rag/reranker/reranker.js +277 -0
- package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
- package/dist/rag/resilience/CircuitBreaker.js +431 -0
- package/dist/rag/resilience/RetryHandler.d.ts +115 -0
- package/dist/rag/resilience/RetryHandler.js +300 -0
- package/dist/rag/resilience/index.d.ts +7 -0
- package/dist/rag/resilience/index.js +7 -0
- package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
- package/dist/rag/retrieval/hybridSearch.js +313 -0
- package/dist/rag/retrieval/index.d.ts +5 -0
- package/dist/rag/retrieval/index.js +5 -0
- package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
- package/dist/rag/retrieval/vectorQueryTool.js +289 -0
- package/dist/rag/types.d.ts +768 -0
- package/dist/rag/types.js +8 -0
- package/dist/server/index.d.ts +15 -11
- package/dist/server/index.js +55 -51
- package/dist/server/utils/validation.d.ts +8 -8
- package/dist/types/common.d.ts +0 -1
- package/dist/types/fileTypes.d.ts +1 -1
- package/dist/types/generateTypes.d.ts +42 -8
- package/dist/types/generateTypes.js +1 -1
- package/dist/types/index.d.ts +25 -24
- package/dist/types/index.js +21 -20
- package/dist/types/modelTypes.d.ts +10 -10
- package/dist/types/pptTypes.d.ts +14 -2
- package/dist/types/pptTypes.js +16 -0
- package/dist/types/streamTypes.d.ts +28 -8
- package/dist/types/streamTypes.js +1 -1
- package/dist/utils/async/delay.d.ts +40 -0
- package/dist/utils/async/delay.js +42 -0
- package/dist/utils/async/index.d.ts +23 -0
- package/dist/utils/async/index.js +23 -0
- package/dist/utils/async/retry.d.ts +141 -0
- package/dist/utils/async/retry.js +171 -0
- package/dist/utils/async/withTimeout.d.ts +73 -0
- package/dist/utils/async/withTimeout.js +96 -0
- package/dist/utils/fileDetector.d.ts +7 -1
- package/dist/utils/fileDetector.js +91 -18
- package/dist/utils/json/extract.d.ts +103 -0
- package/dist/utils/json/extract.js +248 -0
- package/dist/utils/json/index.d.ts +36 -0
- package/dist/utils/json/index.js +36 -0
- package/dist/utils/json/safeParse.d.ts +137 -0
- package/dist/utils/json/safeParse.js +190 -0
- package/dist/utils/messageBuilder.d.ts +2 -2
- package/dist/utils/messageBuilder.js +15 -7
- package/dist/utils/modelRouter.d.ts +4 -4
- package/dist/utils/modelRouter.js +4 -4
- package/dist/utils/sanitizers/filename.d.ts +137 -0
- package/dist/utils/sanitizers/filename.js +365 -0
- package/dist/utils/sanitizers/html.d.ts +170 -0
- package/dist/utils/sanitizers/html.js +325 -0
- package/dist/utils/sanitizers/index.d.ts +26 -0
- package/dist/utils/sanitizers/index.js +29 -0
- package/dist/utils/sanitizers/svg.d.ts +81 -0
- package/dist/utils/sanitizers/svg.js +482 -0
- package/package.json +2 -2
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits HTML content by semantic tags.
|
|
5
|
+
*/
|
|
6
|
+
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
+
/**
|
|
8
|
+
* HTML Chunker
|
|
9
|
+
*/
|
|
10
|
+
export class HTMLChunker extends BaseChunker {
|
|
11
|
+
strategy = "html";
|
|
12
|
+
getDefaultConfig() {
|
|
13
|
+
return {
|
|
14
|
+
...DEFAULT_CHUNKER_CONFIG,
|
|
15
|
+
maxSize: 1000,
|
|
16
|
+
overlap: 0,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
async doChunk(content, config) {
|
|
20
|
+
const maxSize = config.maxSize ?? 1000;
|
|
21
|
+
// Strip HTML tags for text content
|
|
22
|
+
const textContent = this.stripHtml(content);
|
|
23
|
+
// Use simple character-based splitting for now
|
|
24
|
+
const segments = this.splitBySizeWithOverlap(textContent, maxSize, 0);
|
|
25
|
+
return segments.map((segment, index) => this.createChunk(segment.text, index, segment.start, segment.end));
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Strip HTML tags from content
|
|
29
|
+
*/
|
|
30
|
+
stripHtml(html) {
|
|
31
|
+
return html
|
|
32
|
+
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
|
|
33
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
|
|
34
|
+
.replace(/<[^>]+>/g, " ")
|
|
35
|
+
.replace(/\s+/g, " ")
|
|
36
|
+
.trim();
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=HTMLChunker.js.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits JSON documents by object boundaries.
|
|
5
|
+
*/
|
|
6
|
+
import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
7
|
+
import { BaseChunker } from "./BaseChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* JSON Chunker
|
|
10
|
+
*/
|
|
11
|
+
export declare class JSONChunker extends BaseChunker {
|
|
12
|
+
readonly strategy: ChunkingStrategy;
|
|
13
|
+
getDefaultConfig(): ChunkerConfig;
|
|
14
|
+
protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
15
|
+
/**
|
|
16
|
+
* Flatten JSON into array of objects
|
|
17
|
+
*/
|
|
18
|
+
private flattenJson;
|
|
19
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits JSON documents by object boundaries.
|
|
5
|
+
*/
|
|
6
|
+
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
+
import { ChunkingError, RAGErrorCodes } from "../errors/RAGError.js";
|
|
8
|
+
/**
|
|
9
|
+
* JSON Chunker
|
|
10
|
+
*/
|
|
11
|
+
export class JSONChunker extends BaseChunker {
|
|
12
|
+
strategy = "json";
|
|
13
|
+
getDefaultConfig() {
|
|
14
|
+
return {
|
|
15
|
+
...DEFAULT_CHUNKER_CONFIG,
|
|
16
|
+
maxSize: 1000,
|
|
17
|
+
overlap: 0,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
async doChunk(content, config) {
|
|
21
|
+
const maxSize = config.maxSize ?? 1000;
|
|
22
|
+
let parsed;
|
|
23
|
+
try {
|
|
24
|
+
parsed = JSON.parse(content);
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
throw new ChunkingError("Invalid JSON content", {
|
|
28
|
+
code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
|
|
29
|
+
strategy: this.strategy,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
const chunks = [];
|
|
33
|
+
const items = this.flattenJson(parsed);
|
|
34
|
+
for (let i = 0; i < items.length; i++) {
|
|
35
|
+
const item = items[i];
|
|
36
|
+
if (!item) {
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
const jsonString = JSON.stringify(item, null, 2);
|
|
40
|
+
if (jsonString.length <= maxSize) {
|
|
41
|
+
const startOffset = content.indexOf(jsonString.slice(0, 20));
|
|
42
|
+
chunks.push(this.createChunk(jsonString, i, startOffset >= 0 ? startOffset : i * maxSize, startOffset >= 0
|
|
43
|
+
? startOffset + jsonString.length
|
|
44
|
+
: (i + 1) * maxSize));
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
// Split large objects
|
|
48
|
+
const segments = this.splitBySizeWithOverlap(jsonString, maxSize, 0);
|
|
49
|
+
for (const segment of segments) {
|
|
50
|
+
chunks.push(this.createChunk(segment.text, chunks.length, segment.start, segment.end));
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return chunks;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Flatten JSON into array of objects
|
|
58
|
+
*/
|
|
59
|
+
flattenJson(data) {
|
|
60
|
+
if (Array.isArray(data)) {
|
|
61
|
+
return data;
|
|
62
|
+
}
|
|
63
|
+
if (typeof data === "object" && data !== null) {
|
|
64
|
+
return [data];
|
|
65
|
+
}
|
|
66
|
+
return [{ value: data }];
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=JSONChunker.js.map
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LaTeX Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits LaTeX documents by sections and environments.
|
|
5
|
+
*/
|
|
6
|
+
import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
7
|
+
import { BaseChunker } from "./BaseChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* LaTeX Chunker
|
|
10
|
+
*/
|
|
11
|
+
export declare class LaTeXChunker extends BaseChunker {
|
|
12
|
+
readonly strategy: ChunkingStrategy;
|
|
13
|
+
getDefaultConfig(): ChunkerConfig;
|
|
14
|
+
protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
15
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LaTeX Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits LaTeX documents by sections and environments.
|
|
5
|
+
*/
|
|
6
|
+
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
+
/**
|
|
8
|
+
* LaTeX Chunker
|
|
9
|
+
*/
|
|
10
|
+
export class LaTeXChunker extends BaseChunker {
|
|
11
|
+
strategy = "latex";
|
|
12
|
+
getDefaultConfig() {
|
|
13
|
+
return {
|
|
14
|
+
...DEFAULT_CHUNKER_CONFIG,
|
|
15
|
+
maxSize: 1000,
|
|
16
|
+
overlap: 0,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
async doChunk(content, config) {
|
|
20
|
+
const maxSize = config.maxSize ?? 1000;
|
|
21
|
+
// Split by sections
|
|
22
|
+
const sectionPattern = /\\(?:section|subsection|subsubsection|chapter|paragraph)\{[^}]+\}/g;
|
|
23
|
+
const sections = [];
|
|
24
|
+
let lastIndex = 0;
|
|
25
|
+
let match;
|
|
26
|
+
while ((match = sectionPattern.exec(content)) !== null) {
|
|
27
|
+
if (match.index > lastIndex) {
|
|
28
|
+
sections.push(content.slice(lastIndex, match.index));
|
|
29
|
+
}
|
|
30
|
+
lastIndex = match.index;
|
|
31
|
+
}
|
|
32
|
+
if (lastIndex < content.length) {
|
|
33
|
+
sections.push(content.slice(lastIndex));
|
|
34
|
+
}
|
|
35
|
+
if (sections.length === 0) {
|
|
36
|
+
sections.push(content);
|
|
37
|
+
}
|
|
38
|
+
const chunks = [];
|
|
39
|
+
let offset = 0;
|
|
40
|
+
for (const section of sections) {
|
|
41
|
+
const trimmed = section.trim();
|
|
42
|
+
if (!trimmed) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
if (trimmed.length <= maxSize) {
|
|
46
|
+
const startOffset = content.indexOf(trimmed, offset);
|
|
47
|
+
chunks.push(this.createChunk(trimmed, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
|
|
48
|
+
? startOffset + trimmed.length
|
|
49
|
+
: offset + trimmed.length));
|
|
50
|
+
if (startOffset >= 0) {
|
|
51
|
+
offset = startOffset + 1;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
const segments = this.splitBySizeWithOverlap(trimmed, maxSize, 0);
|
|
56
|
+
for (const segment of segments) {
|
|
57
|
+
chunks.push(this.createChunk(segment.text, chunks.length, segment.start, segment.end));
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return chunks;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=LaTeXChunker.js.map
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits markdown content by headers and structural elements.
|
|
5
|
+
*/
|
|
6
|
+
import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
7
|
+
import { BaseChunker } from "./BaseChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* Markdown Chunker
|
|
10
|
+
*/
|
|
11
|
+
export declare class MarkdownChunker extends BaseChunker {
|
|
12
|
+
readonly strategy: ChunkingStrategy;
|
|
13
|
+
getDefaultConfig(): ChunkerConfig;
|
|
14
|
+
protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
15
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits markdown content by headers and structural elements.
|
|
5
|
+
*/
|
|
6
|
+
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
+
/**
|
|
8
|
+
* Markdown Chunker
|
|
9
|
+
*/
|
|
10
|
+
export class MarkdownChunker extends BaseChunker {
|
|
11
|
+
strategy = "markdown";
|
|
12
|
+
getDefaultConfig() {
|
|
13
|
+
return {
|
|
14
|
+
...DEFAULT_CHUNKER_CONFIG,
|
|
15
|
+
maxSize: 1000,
|
|
16
|
+
overlap: 0,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
async doChunk(content, config) {
|
|
20
|
+
const maxSize = config.maxSize ?? 1000;
|
|
21
|
+
// Split by headers
|
|
22
|
+
const headerPattern = /^(#{1,6})\s+(.+)$/gm;
|
|
23
|
+
const sections = [];
|
|
24
|
+
let lastIndex = 0;
|
|
25
|
+
let match = headerPattern.exec(content);
|
|
26
|
+
while (match !== null) {
|
|
27
|
+
// Add content before this header
|
|
28
|
+
if (match.index > lastIndex) {
|
|
29
|
+
const prevContent = content.slice(lastIndex, match.index).trim();
|
|
30
|
+
if (prevContent && sections.length > 0) {
|
|
31
|
+
const lastSection = sections[sections.length - 1];
|
|
32
|
+
if (lastSection) {
|
|
33
|
+
lastSection.content += "\n\n" + prevContent;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
else if (prevContent) {
|
|
37
|
+
sections.push({ header: "", content: prevContent, level: 0 });
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
sections.push({
|
|
41
|
+
header: match[0],
|
|
42
|
+
content: "",
|
|
43
|
+
level: match[1]?.length ?? 1,
|
|
44
|
+
});
|
|
45
|
+
lastIndex = match.index + match[0].length;
|
|
46
|
+
match = headerPattern.exec(content);
|
|
47
|
+
}
|
|
48
|
+
// Add remaining content
|
|
49
|
+
if (lastIndex < content.length) {
|
|
50
|
+
const remaining = content.slice(lastIndex).trim();
|
|
51
|
+
if (remaining) {
|
|
52
|
+
if (sections.length > 0) {
|
|
53
|
+
const lastSection = sections[sections.length - 1];
|
|
54
|
+
if (lastSection) {
|
|
55
|
+
lastSection.content += remaining;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
sections.push({ header: "", content: remaining, level: 0 });
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// Convert sections to chunks
|
|
64
|
+
const chunks = [];
|
|
65
|
+
let offset = 0;
|
|
66
|
+
for (let i = 0; i < sections.length; i++) {
|
|
67
|
+
const section = sections[i];
|
|
68
|
+
if (!section) {
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
const fullContent = section.header
|
|
72
|
+
? section.header + "\n\n" + section.content.trim()
|
|
73
|
+
: section.content.trim();
|
|
74
|
+
if (!fullContent) {
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
// Split if too large
|
|
78
|
+
if (fullContent.length > maxSize) {
|
|
79
|
+
const subChunks = this.splitBySizeWithOverlap(fullContent, maxSize, 0);
|
|
80
|
+
for (const sub of subChunks) {
|
|
81
|
+
const startOffset = content.indexOf(sub.text, offset);
|
|
82
|
+
chunks.push(this.createChunk(sub.text, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
|
|
83
|
+
? startOffset + sub.text.length
|
|
84
|
+
: offset + sub.text.length, "unknown", { sectionContext: section.header }));
|
|
85
|
+
if (startOffset >= 0) {
|
|
86
|
+
offset = startOffset + 1;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
const startOffset = content.indexOf(fullContent, offset);
|
|
92
|
+
chunks.push(this.createChunk(fullContent, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
|
|
93
|
+
? startOffset + fullContent.length
|
|
94
|
+
: offset + fullContent.length, "unknown", { sectionContext: section.header }));
|
|
95
|
+
if (startOffset >= 0) {
|
|
96
|
+
offset = startOffset + 1;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return chunks;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
//# sourceMappingURL=MarkdownChunker.js.map
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recursive Chunker
|
|
3
|
+
*
|
|
4
|
+
* Recursively splits text using an ordered list of separators.
|
|
5
|
+
* Tries each separator in order until chunks are small enough.
|
|
6
|
+
*/
|
|
7
|
+
import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
8
|
+
import { BaseChunker } from "./BaseChunker.js";
|
|
9
|
+
/**
|
|
10
|
+
* Recursive Chunker
|
|
11
|
+
*
|
|
12
|
+
* Splits content using ordered separators, recursively breaking
|
|
13
|
+
* down text until chunks meet size requirements.
|
|
14
|
+
*/
|
|
15
|
+
export declare class RecursiveChunker extends BaseChunker {
|
|
16
|
+
readonly strategy: ChunkingStrategy;
|
|
17
|
+
getDefaultConfig(): ChunkerConfig;
|
|
18
|
+
protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
19
|
+
/**
|
|
20
|
+
* Recursively split text using separators
|
|
21
|
+
*/
|
|
22
|
+
private recursiveSplit;
|
|
23
|
+
/**
|
|
24
|
+
* Apply overlap between chunks
|
|
25
|
+
*/
|
|
26
|
+
private applyOverlap;
|
|
27
|
+
}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recursive Chunker
|
|
3
|
+
*
|
|
4
|
+
* Recursively splits text using an ordered list of separators.
|
|
5
|
+
* Tries each separator in order until chunks are small enough.
|
|
6
|
+
*/
|
|
7
|
+
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* Default separators for recursive splitting
|
|
10
|
+
*/
|
|
11
|
+
const DEFAULT_SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
|
|
12
|
+
/**
|
|
13
|
+
* Recursive Chunker
|
|
14
|
+
*
|
|
15
|
+
* Splits content using ordered separators, recursively breaking
|
|
16
|
+
* down text until chunks meet size requirements.
|
|
17
|
+
*/
|
|
18
|
+
export class RecursiveChunker extends BaseChunker {
|
|
19
|
+
strategy = "recursive";
|
|
20
|
+
getDefaultConfig() {
|
|
21
|
+
return {
|
|
22
|
+
...DEFAULT_CHUNKER_CONFIG,
|
|
23
|
+
maxSize: 1000,
|
|
24
|
+
overlap: 100,
|
|
25
|
+
separators: DEFAULT_SEPARATORS,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
async doChunk(content, config) {
|
|
29
|
+
const recursiveConfig = config;
|
|
30
|
+
const maxSize = config.maxSize ?? 1000;
|
|
31
|
+
const overlap = config.overlap ?? 100;
|
|
32
|
+
const separators = recursiveConfig.separators ?? DEFAULT_SEPARATORS;
|
|
33
|
+
const keepSeparators = recursiveConfig.keepSeparators ?? true;
|
|
34
|
+
const chunks = [];
|
|
35
|
+
let offset = 0;
|
|
36
|
+
const textChunks = this.recursiveSplit(content, separators, maxSize, overlap, keepSeparators);
|
|
37
|
+
for (let i = 0; i < textChunks.length; i++) {
|
|
38
|
+
const text = textChunks[i];
|
|
39
|
+
if (!text) {
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
const startOffset = content.indexOf(text, offset);
|
|
43
|
+
const endOffset = startOffset + text.length;
|
|
44
|
+
chunks.push(this.createChunk(text, i, startOffset, endOffset));
|
|
45
|
+
offset = Math.max(offset, startOffset + 1);
|
|
46
|
+
}
|
|
47
|
+
return chunks;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Recursively split text using separators
|
|
51
|
+
*/
|
|
52
|
+
recursiveSplit(text, separators, maxSize, overlap, keepSeparators) {
|
|
53
|
+
if (text.length <= maxSize) {
|
|
54
|
+
return [text];
|
|
55
|
+
}
|
|
56
|
+
// Find the first separator that exists in the text
|
|
57
|
+
let separator = "";
|
|
58
|
+
for (const sep of separators) {
|
|
59
|
+
if (sep === "" || text.includes(sep)) {
|
|
60
|
+
separator = sep;
|
|
61
|
+
break;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
// If no separator found or empty separator, split by size
|
|
65
|
+
if (separator === "") {
|
|
66
|
+
const result = [];
|
|
67
|
+
let start = 0;
|
|
68
|
+
while (start < text.length) {
|
|
69
|
+
const end = Math.min(start + maxSize, text.length);
|
|
70
|
+
result.push(text.slice(start, end));
|
|
71
|
+
const previousStart = start;
|
|
72
|
+
start = end - overlap;
|
|
73
|
+
if (start <= previousStart) {
|
|
74
|
+
start = previousStart + 1;
|
|
75
|
+
}
|
|
76
|
+
if (start >= text.length) {
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return result;
|
|
81
|
+
}
|
|
82
|
+
// Split by separator
|
|
83
|
+
const parts = text.split(separator);
|
|
84
|
+
const result = [];
|
|
85
|
+
let currentChunk = "";
|
|
86
|
+
for (let i = 0; i < parts.length; i++) {
|
|
87
|
+
const part = parts[i];
|
|
88
|
+
const addSeparator = keepSeparators && i < parts.length - 1;
|
|
89
|
+
const toAdd = part + (addSeparator ? separator : "");
|
|
90
|
+
if (currentChunk.length + toAdd.length <= maxSize) {
|
|
91
|
+
currentChunk += toAdd;
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
// Current chunk is full
|
|
95
|
+
if (currentChunk.length > 0) {
|
|
96
|
+
result.push(currentChunk);
|
|
97
|
+
}
|
|
98
|
+
// If the part itself is too large, recursively split it
|
|
99
|
+
if (toAdd.length > maxSize) {
|
|
100
|
+
const remainingSeparators = separators.slice(separators.indexOf(separator) + 1);
|
|
101
|
+
const subChunks = this.recursiveSplit(toAdd, remainingSeparators, maxSize, overlap, keepSeparators);
|
|
102
|
+
result.push(...subChunks);
|
|
103
|
+
currentChunk = "";
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
currentChunk = toAdd;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
if (currentChunk.length > 0) {
|
|
111
|
+
result.push(currentChunk);
|
|
112
|
+
}
|
|
113
|
+
// Apply overlap between chunks
|
|
114
|
+
if (overlap > 0 && result.length > 1) {
|
|
115
|
+
return this.applyOverlap(result, overlap);
|
|
116
|
+
}
|
|
117
|
+
return result;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Apply overlap between chunks
|
|
121
|
+
*/
|
|
122
|
+
applyOverlap(chunks, overlap) {
|
|
123
|
+
if (chunks.length <= 1) {
|
|
124
|
+
return chunks;
|
|
125
|
+
}
|
|
126
|
+
const result = [];
|
|
127
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
128
|
+
let chunk = chunks[i] ?? "";
|
|
129
|
+
// Add overlap from previous chunk
|
|
130
|
+
if (i > 0 && chunks[i - 1]) {
|
|
131
|
+
const prevChunk = chunks[i - 1];
|
|
132
|
+
const overlapText = prevChunk.slice(-Math.min(overlap, prevChunk.length));
|
|
133
|
+
chunk = overlapText + chunk;
|
|
134
|
+
}
|
|
135
|
+
result.push(chunk);
|
|
136
|
+
}
|
|
137
|
+
return result;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
//# sourceMappingURL=RecursiveChunker.js.map
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Markdown Chunker
|
|
3
|
+
*
|
|
4
|
+
* Combines markdown splitting with semantic similarity for intelligent merging.
|
|
5
|
+
*/
|
|
6
|
+
import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
7
|
+
import { BaseChunker } from "./BaseChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* Semantic Markdown Chunker
|
|
10
|
+
*
|
|
11
|
+
* Extends markdown chunking with semantic awareness.
|
|
12
|
+
* Can be enhanced with embedding-based similarity.
|
|
13
|
+
*/
|
|
14
|
+
export declare class SemanticMarkdownChunker extends BaseChunker {
|
|
15
|
+
readonly strategy: ChunkingStrategy;
|
|
16
|
+
getDefaultConfig(): ChunkerConfig;
|
|
17
|
+
protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
18
|
+
/**
|
|
19
|
+
* Merge small sections to optimize chunk sizes
|
|
20
|
+
*/
|
|
21
|
+
private mergeSmallSections;
|
|
22
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Markdown Chunker
|
|
3
|
+
*
|
|
4
|
+
* Combines markdown splitting with semantic similarity for intelligent merging.
|
|
5
|
+
*/
|
|
6
|
+
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
+
/**
|
|
8
|
+
* Semantic Markdown Chunker
|
|
9
|
+
*
|
|
10
|
+
* Extends markdown chunking with semantic awareness.
|
|
11
|
+
* Can be enhanced with embedding-based similarity.
|
|
12
|
+
*/
|
|
13
|
+
export class SemanticMarkdownChunker extends BaseChunker {
|
|
14
|
+
strategy = "semantic-markdown";
|
|
15
|
+
getDefaultConfig() {
|
|
16
|
+
return {
|
|
17
|
+
...DEFAULT_CHUNKER_CONFIG,
|
|
18
|
+
maxSize: 1000,
|
|
19
|
+
overlap: 100,
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
async doChunk(content, config) {
|
|
23
|
+
const maxSize = config.maxSize ?? 1000;
|
|
24
|
+
const overlap = config.overlap ?? 100;
|
|
25
|
+
// First, split by markdown headers
|
|
26
|
+
const headerPattern = /^(#{1,6})\s+(.+)$/gm;
|
|
27
|
+
const sections = [];
|
|
28
|
+
let lastIndex = 0;
|
|
29
|
+
let match = headerPattern.exec(content);
|
|
30
|
+
while (match !== null) {
|
|
31
|
+
if (match.index > lastIndex) {
|
|
32
|
+
const prevContent = content.slice(lastIndex, match.index).trim();
|
|
33
|
+
if (prevContent && sections.length > 0) {
|
|
34
|
+
const lastSection = sections[sections.length - 1];
|
|
35
|
+
if (lastSection) {
|
|
36
|
+
lastSection.content += "\n\n" + prevContent;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
else if (prevContent) {
|
|
40
|
+
sections.push({ header: "", content: prevContent });
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
sections.push({ header: match[0], content: "" });
|
|
44
|
+
lastIndex = match.index + match[0].length;
|
|
45
|
+
match = headerPattern.exec(content);
|
|
46
|
+
}
|
|
47
|
+
if (lastIndex < content.length) {
|
|
48
|
+
const remaining = content.slice(lastIndex).trim();
|
|
49
|
+
if (remaining) {
|
|
50
|
+
if (sections.length > 0) {
|
|
51
|
+
const lastSection = sections[sections.length - 1];
|
|
52
|
+
if (lastSection) {
|
|
53
|
+
lastSection.content += remaining;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
sections.push({ header: "", content: remaining });
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Merge small sections that are semantically related
|
|
62
|
+
const mergedSections = this.mergeSmallSections(sections, maxSize);
|
|
63
|
+
// Convert to chunks
|
|
64
|
+
const chunks = [];
|
|
65
|
+
let offset = 0;
|
|
66
|
+
for (let i = 0; i < mergedSections.length; i++) {
|
|
67
|
+
const section = mergedSections[i];
|
|
68
|
+
if (!section) {
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
const fullContent = section.header
|
|
72
|
+
? section.header + "\n\n" + section.content.trim()
|
|
73
|
+
: section.content.trim();
|
|
74
|
+
if (!fullContent) {
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
if (fullContent.length > maxSize) {
|
|
78
|
+
const segments = this.splitBySizeWithOverlap(fullContent, maxSize, overlap);
|
|
79
|
+
for (const segment of segments) {
|
|
80
|
+
const startOffset = content.indexOf(segment.text.slice(0, 50), offset);
|
|
81
|
+
chunks.push(this.createChunk(segment.text, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
|
|
82
|
+
? startOffset + segment.text.length
|
|
83
|
+
: offset + segment.text.length, "unknown", { sectionContext: section.header }));
|
|
84
|
+
if (startOffset >= 0) {
|
|
85
|
+
offset = startOffset + 1;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
const startOffset = content.indexOf(fullContent.slice(0, 50), offset);
|
|
91
|
+
chunks.push(this.createChunk(fullContent, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
|
|
92
|
+
? startOffset + fullContent.length
|
|
93
|
+
: offset + fullContent.length, "unknown", { sectionContext: section.header }));
|
|
94
|
+
if (startOffset >= 0) {
|
|
95
|
+
offset = startOffset + 1;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return chunks;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Merge small sections to optimize chunk sizes
|
|
103
|
+
*/
|
|
104
|
+
mergeSmallSections(sections, maxSize) {
|
|
105
|
+
const result = [];
|
|
106
|
+
let current = null;
|
|
107
|
+
for (const section of sections) {
|
|
108
|
+
const fullContent = section.header
|
|
109
|
+
? section.header + "\n\n" + section.content.trim()
|
|
110
|
+
: section.content.trim();
|
|
111
|
+
const sectionLength = fullContent.length;
|
|
112
|
+
if (!current) {
|
|
113
|
+
current = { ...section };
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
const currentLength = current.header
|
|
117
|
+
? current.header.length + current.content.length + 2
|
|
118
|
+
: current.content.length;
|
|
119
|
+
// Merge if combined size is within limit
|
|
120
|
+
if (currentLength + sectionLength <= maxSize) {
|
|
121
|
+
if (section.header) {
|
|
122
|
+
current.content += "\n\n" + section.header + "\n" + section.content;
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
current.content += "\n\n" + section.content;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
result.push(current);
|
|
130
|
+
current = { ...section };
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (current) {
|
|
134
|
+
result.push(current);
|
|
135
|
+
}
|
|
136
|
+
return result;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
//# sourceMappingURL=SemanticMarkdownChunker.js.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sentence Chunker
|
|
3
|
+
*
|
|
4
|
+
* Splits text by sentence boundaries for semantically meaningful chunks.
|
|
5
|
+
*/
|
|
6
|
+
import type { Chunk, ChunkerConfig, ChunkingStrategy } from "../types.js";
|
|
7
|
+
import { BaseChunker } from "./BaseChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* Sentence Chunker
|
|
10
|
+
*/
|
|
11
|
+
export declare class SentenceChunker extends BaseChunker {
|
|
12
|
+
readonly strategy: ChunkingStrategy;
|
|
13
|
+
getDefaultConfig(): ChunkerConfig;
|
|
14
|
+
protected doChunk(content: string, config: ChunkerConfig): Promise<Chunk[]>;
|
|
15
|
+
/**
|
|
16
|
+
* Split content into sentences
|
|
17
|
+
*/
|
|
18
|
+
private splitIntoSentences;
|
|
19
|
+
}
|