@framers/agentos 0.1.119 → 0.1.121
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -0
- package/dist/api/agency.d.ts.map +1 -1
- package/dist/api/agency.js +227 -84
- package/dist/api/agency.js.map +1 -1
- package/dist/api/analyzeVideo.d.ts +127 -0
- package/dist/api/analyzeVideo.d.ts.map +1 -0
- package/dist/api/analyzeVideo.js +136 -0
- package/dist/api/analyzeVideo.js.map +1 -0
- package/dist/api/detectScenes.d.ts +82 -0
- package/dist/api/detectScenes.d.ts.map +1 -0
- package/dist/api/detectScenes.js +67 -0
- package/dist/api/detectScenes.js.map +1 -0
- package/dist/api/generateImage.d.ts +7 -0
- package/dist/api/generateImage.d.ts.map +1 -1
- package/dist/api/generateImage.js +133 -9
- package/dist/api/generateImage.js.map +1 -1
- package/dist/api/generateMusic.d.ts +98 -0
- package/dist/api/generateMusic.d.ts.map +1 -0
- package/dist/api/generateMusic.js +319 -0
- package/dist/api/generateMusic.js.map +1 -0
- package/dist/api/generateSFX.d.ts +96 -0
- package/dist/api/generateSFX.d.ts.map +1 -0
- package/dist/api/generateSFX.js +317 -0
- package/dist/api/generateSFX.js.map +1 -0
- package/dist/api/generateVideo.d.ts +113 -0
- package/dist/api/generateVideo.d.ts.map +1 -0
- package/dist/api/generateVideo.js +342 -0
- package/dist/api/generateVideo.js.map +1 -0
- package/dist/api/model.d.ts.map +1 -1
- package/dist/api/model.js +8 -4
- package/dist/api/model.js.map +1 -1
- package/dist/api/performOCR.d.ts +169 -0
- package/dist/api/performOCR.d.ts.map +1 -0
- package/dist/api/performOCR.js +198 -0
- package/dist/api/performOCR.js.map +1 -0
- package/dist/api/provider-defaults.d.ts +7 -5
- package/dist/api/provider-defaults.d.ts.map +1 -1
- package/dist/api/provider-defaults.js +32 -10
- package/dist/api/provider-defaults.js.map +1 -1
- package/dist/api/strategies/debate.d.ts.map +1 -1
- package/dist/api/strategies/debate.js +1 -0
- package/dist/api/strategies/debate.js.map +1 -1
- package/dist/api/strategies/graph.d.ts.map +1 -1
- package/dist/api/strategies/graph.js +69 -13
- package/dist/api/strategies/graph.js.map +1 -1
- package/dist/api/strategies/hierarchical.d.ts.map +1 -1
- package/dist/api/strategies/hierarchical.js +1 -0
- package/dist/api/strategies/hierarchical.js.map +1 -1
- package/dist/api/strategies/parallel.d.ts.map +1 -1
- package/dist/api/strategies/parallel.js +1 -0
- package/dist/api/strategies/parallel.js.map +1 -1
- package/dist/api/strategies/review-loop.d.ts.map +1 -1
- package/dist/api/strategies/review-loop.js +1 -0
- package/dist/api/strategies/review-loop.js.map +1 -1
- package/dist/api/strategies/sequential.d.ts.map +1 -1
- package/dist/api/strategies/sequential.js +54 -48
- package/dist/api/strategies/sequential.js.map +1 -1
- package/dist/api/streamBuffer.d.ts +20 -0
- package/dist/api/streamBuffer.d.ts.map +1 -0
- package/dist/api/streamBuffer.js +81 -0
- package/dist/api/streamBuffer.js.map +1 -0
- package/dist/api/types.d.ts +145 -5
- package/dist/api/types.d.ts.map +1 -1
- package/dist/api/types.js.map +1 -1
- package/dist/channels/adapters/RedditChannelAdapter.js.map +1 -1
- package/dist/core/audio/AudioProcessor.d.ts.map +1 -1
- package/dist/core/audio/AudioProcessor.js +1 -0
- package/dist/core/audio/AudioProcessor.js.map +1 -1
- package/dist/core/audio/EnvironmentalCalibrator.d.ts.map +1 -1
- package/dist/core/audio/EnvironmentalCalibrator.js +1 -0
- package/dist/core/audio/EnvironmentalCalibrator.js.map +1 -1
- package/dist/core/audio/FallbackAudioProxy.d.ts +169 -0
- package/dist/core/audio/FallbackAudioProxy.d.ts.map +1 -0
- package/dist/core/audio/FallbackAudioProxy.js +236 -0
- package/dist/core/audio/FallbackAudioProxy.js.map +1 -0
- package/dist/core/audio/IAudioGenerator.d.ts +103 -0
- package/dist/core/audio/IAudioGenerator.d.ts.map +1 -0
- package/dist/core/audio/IAudioGenerator.js +24 -0
- package/dist/core/audio/IAudioGenerator.js.map +1 -0
- package/dist/core/audio/index.d.ts +54 -0
- package/dist/core/audio/index.d.ts.map +1 -1
- package/dist/core/audio/index.js +93 -0
- package/dist/core/audio/index.js.map +1 -1
- package/dist/core/audio/providers/AudioGenLocalProvider.d.ts +136 -0
- package/dist/core/audio/providers/AudioGenLocalProvider.d.ts.map +1 -0
- package/dist/core/audio/providers/AudioGenLocalProvider.js +235 -0
- package/dist/core/audio/providers/AudioGenLocalProvider.js.map +1 -0
- package/dist/core/audio/providers/ElevenLabsSFXProvider.d.ts +107 -0
- package/dist/core/audio/providers/ElevenLabsSFXProvider.d.ts.map +1 -0
- package/dist/core/audio/providers/ElevenLabsSFXProvider.js +154 -0
- package/dist/core/audio/providers/ElevenLabsSFXProvider.js.map +1 -0
- package/dist/core/audio/providers/FalAudioProvider.d.ts +207 -0
- package/dist/core/audio/providers/FalAudioProvider.d.ts.map +1 -0
- package/dist/core/audio/providers/FalAudioProvider.js +315 -0
- package/dist/core/audio/providers/FalAudioProvider.js.map +1 -0
- package/dist/core/audio/providers/MusicGenLocalProvider.d.ts +136 -0
- package/dist/core/audio/providers/MusicGenLocalProvider.d.ts.map +1 -0
- package/dist/core/audio/providers/MusicGenLocalProvider.js +235 -0
- package/dist/core/audio/providers/MusicGenLocalProvider.js.map +1 -0
- package/dist/core/audio/providers/ReplicateAudioProvider.d.ts +200 -0
- package/dist/core/audio/providers/ReplicateAudioProvider.d.ts.map +1 -0
- package/dist/core/audio/providers/ReplicateAudioProvider.js +346 -0
- package/dist/core/audio/providers/ReplicateAudioProvider.js.map +1 -0
- package/dist/core/audio/providers/StableAudioProvider.d.ts +138 -0
- package/dist/core/audio/providers/StableAudioProvider.d.ts.map +1 -0
- package/dist/core/audio/providers/StableAudioProvider.js +192 -0
- package/dist/core/audio/providers/StableAudioProvider.js.map +1 -0
- package/dist/core/audio/providers/SunoProvider.d.ts +182 -0
- package/dist/core/audio/providers/SunoProvider.d.ts.map +1 -0
- package/dist/core/audio/providers/SunoProvider.js +312 -0
- package/dist/core/audio/providers/SunoProvider.js.map +1 -0
- package/dist/core/audio/providers/UdioProvider.d.ts +177 -0
- package/dist/core/audio/providers/UdioProvider.d.ts.map +1 -0
- package/dist/core/audio/providers/UdioProvider.js +305 -0
- package/dist/core/audio/providers/UdioProvider.js.map +1 -0
- package/dist/core/audio/types.d.ts +257 -0
- package/dist/core/audio/types.d.ts.map +1 -0
- package/dist/core/audio/types.js +21 -0
- package/dist/core/audio/types.js.map +1 -0
- package/dist/core/images/FallbackImageProxy.d.ts +183 -0
- package/dist/core/images/FallbackImageProxy.d.ts.map +1 -0
- package/dist/core/images/FallbackImageProxy.js +283 -0
- package/dist/core/images/FallbackImageProxy.js.map +1 -0
- package/dist/core/images/IImageProvider.d.ts +1 -1
- package/dist/core/images/IImageProvider.d.ts.map +1 -1
- package/dist/core/images/index.d.ts +1 -0
- package/dist/core/images/index.d.ts.map +1 -1
- package/dist/core/images/index.js +1 -0
- package/dist/core/images/index.js.map +1 -1
- package/dist/core/llm/providers/AIModelProviderManager.d.ts +3 -1
- package/dist/core/llm/providers/AIModelProviderManager.d.ts.map +1 -1
- package/dist/core/llm/providers/AIModelProviderManager.js +8 -0
- package/dist/core/llm/providers/AIModelProviderManager.js.map +1 -1
- package/dist/core/llm/providers/errors/ClaudeCodeProviderError.d.ts +52 -0
- package/dist/core/llm/providers/errors/ClaudeCodeProviderError.d.ts.map +1 -0
- package/dist/core/llm/providers/errors/ClaudeCodeProviderError.js +36 -0
- package/dist/core/llm/providers/errors/ClaudeCodeProviderError.js.map +1 -0
- package/dist/core/llm/providers/errors/GeminiCLIProviderError.d.ts +32 -0
- package/dist/core/llm/providers/errors/GeminiCLIProviderError.d.ts.map +1 -0
- package/dist/core/llm/providers/errors/GeminiCLIProviderError.js +27 -0
- package/dist/core/llm/providers/errors/GeminiCLIProviderError.js.map +1 -0
- package/dist/core/llm/providers/implementations/ClaudeCodeCLIBridge.d.ts +38 -0
- package/dist/core/llm/providers/implementations/ClaudeCodeCLIBridge.d.ts.map +1 -0
- package/dist/core/llm/providers/implementations/ClaudeCodeCLIBridge.js +128 -0
- package/dist/core/llm/providers/implementations/ClaudeCodeCLIBridge.js.map +1 -0
- package/dist/core/llm/providers/implementations/ClaudeCodeProvider.d.ts +107 -0
- package/dist/core/llm/providers/implementations/ClaudeCodeProvider.d.ts.map +1 -0
- package/dist/core/llm/providers/implementations/ClaudeCodeProvider.js +504 -0
- package/dist/core/llm/providers/implementations/ClaudeCodeProvider.js.map +1 -0
- package/dist/core/llm/providers/implementations/GeminiCLIBridge.d.ts +60 -0
- package/dist/core/llm/providers/implementations/GeminiCLIBridge.d.ts.map +1 -0
- package/dist/core/llm/providers/implementations/GeminiCLIBridge.js +177 -0
- package/dist/core/llm/providers/implementations/GeminiCLIBridge.js.map +1 -0
- package/dist/core/llm/providers/implementations/GeminiCLIProvider.d.ts +55 -0
- package/dist/core/llm/providers/implementations/GeminiCLIProvider.d.ts.map +1 -0
- package/dist/core/llm/providers/implementations/GeminiCLIProvider.js +447 -0
- package/dist/core/llm/providers/implementations/GeminiCLIProvider.js.map +1 -0
- package/dist/core/media/ProviderPreferences.d.ts +158 -0
- package/dist/core/media/ProviderPreferences.d.ts.map +1 -0
- package/dist/core/media/ProviderPreferences.js +183 -0
- package/dist/core/media/ProviderPreferences.js.map +1 -0
- package/dist/core/subprocess/CLIRegistry.d.ts +71 -0
- package/dist/core/subprocess/CLIRegistry.d.ts.map +1 -0
- package/dist/core/subprocess/CLIRegistry.js +210 -0
- package/dist/core/subprocess/CLIRegistry.js.map +1 -0
- package/dist/core/subprocess/CLISubprocessBridge.d.ts +117 -0
- package/dist/core/subprocess/CLISubprocessBridge.d.ts.map +1 -0
- package/dist/core/subprocess/CLISubprocessBridge.js +199 -0
- package/dist/core/subprocess/CLISubprocessBridge.js.map +1 -0
- package/dist/core/subprocess/errors.d.ts +76 -0
- package/dist/core/subprocess/errors.d.ts.map +1 -0
- package/dist/core/subprocess/errors.js +75 -0
- package/dist/core/subprocess/errors.js.map +1 -0
- package/dist/core/subprocess/index.d.ts +11 -0
- package/dist/core/subprocess/index.d.ts.map +1 -0
- package/dist/core/subprocess/index.js +10 -0
- package/dist/core/subprocess/index.js.map +1 -0
- package/dist/core/subprocess/types.d.ts +100 -0
- package/dist/core/subprocess/types.d.ts.map +1 -0
- package/dist/core/subprocess/types.js +9 -0
- package/dist/core/subprocess/types.js.map +1 -0
- package/dist/core/video/FallbackVideoProxy.d.ts +166 -0
- package/dist/core/video/FallbackVideoProxy.d.ts.map +1 -0
- package/dist/core/video/FallbackVideoProxy.js +228 -0
- package/dist/core/video/FallbackVideoProxy.js.map +1 -0
- package/dist/core/video/IVideoAnalyzer.d.ts +29 -0
- package/dist/core/video/IVideoAnalyzer.d.ts.map +1 -0
- package/dist/core/video/IVideoAnalyzer.js +12 -0
- package/dist/core/video/IVideoAnalyzer.js.map +1 -0
- package/dist/core/video/IVideoGenerator.d.ts +76 -0
- package/dist/core/video/IVideoGenerator.d.ts.map +1 -0
- package/dist/core/video/IVideoGenerator.js +13 -0
- package/dist/core/video/IVideoGenerator.js.map +1 -0
- package/dist/core/video/VideoAnalyzer.d.ts +278 -0
- package/dist/core/video/VideoAnalyzer.d.ts.map +1 -0
- package/dist/core/video/VideoAnalyzer.js +648 -0
- package/dist/core/video/VideoAnalyzer.js.map +1 -0
- package/dist/core/video/index.d.ts +55 -0
- package/dist/core/video/index.d.ts.map +1 -0
- package/dist/core/video/index.js +78 -0
- package/dist/core/video/index.js.map +1 -0
- package/dist/core/video/providers/FalVideoProvider.d.ts +195 -0
- package/dist/core/video/providers/FalVideoProvider.d.ts.map +1 -0
- package/dist/core/video/providers/FalVideoProvider.js +322 -0
- package/dist/core/video/providers/FalVideoProvider.js.map +1 -0
- package/dist/core/video/providers/ReplicateVideoProvider.d.ts +194 -0
- package/dist/core/video/providers/ReplicateVideoProvider.d.ts.map +1 -0
- package/dist/core/video/providers/ReplicateVideoProvider.js +356 -0
- package/dist/core/video/providers/ReplicateVideoProvider.js.map +1 -0
- package/dist/core/video/providers/RunwayVideoProvider.d.ts +175 -0
- package/dist/core/video/providers/RunwayVideoProvider.d.ts.map +1 -0
- package/dist/core/video/providers/RunwayVideoProvider.js +293 -0
- package/dist/core/video/providers/RunwayVideoProvider.js.map +1 -0
- package/dist/core/video/types.d.ts +441 -0
- package/dist/core/video/types.d.ts.map +1 -0
- package/dist/core/video/types.js +10 -0
- package/dist/core/video/types.js.map +1 -0
- package/dist/core/vision/SceneDetector.d.ts +180 -0
- package/dist/core/vision/SceneDetector.d.ts.map +1 -0
- package/dist/core/vision/SceneDetector.js +366 -0
- package/dist/core/vision/SceneDetector.js.map +1 -0
- package/dist/core/vision/index.d.ts +2 -1
- package/dist/core/vision/index.d.ts.map +1 -1
- package/dist/core/vision/index.js +1 -0
- package/dist/core/vision/index.js.map +1 -1
- package/dist/core/vision/types.d.ts +125 -0
- package/dist/core/vision/types.d.ts.map +1 -1
- package/dist/discovery/CapabilityDiscoveryEngine.d.ts +32 -0
- package/dist/discovery/CapabilityDiscoveryEngine.d.ts.map +1 -1
- package/dist/discovery/CapabilityDiscoveryEngine.js +46 -0
- package/dist/discovery/CapabilityDiscoveryEngine.js.map +1 -1
- package/dist/extensions/MultiRegistryLoader.js.map +1 -1
- package/dist/index.d.ts +17 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +12 -0
- package/dist/index.js.map +1 -1
- package/dist/memory/CognitiveMemoryManager.d.ts +40 -0
- package/dist/memory/CognitiveMemoryManager.d.ts.map +1 -1
- package/dist/memory/CognitiveMemoryManager.js +54 -1
- package/dist/memory/CognitiveMemoryManager.js.map +1 -1
- package/dist/memory/facade/Memory.d.ts +4 -0
- package/dist/memory/facade/Memory.d.ts.map +1 -1
- package/dist/memory/facade/Memory.js +140 -4
- package/dist/memory/facade/Memory.js.map +1 -1
- package/dist/memory/facade/types.d.ts +30 -2
- package/dist/memory/facade/types.d.ts.map +1 -1
- package/dist/memory/index.d.ts +1 -0
- package/dist/memory/index.d.ts.map +1 -1
- package/dist/memory/index.js +1 -0
- package/dist/memory/index.js.map +1 -1
- package/dist/memory/store/HnswSidecar.d.ts +115 -0
- package/dist/memory/store/HnswSidecar.d.ts.map +1 -0
- package/dist/memory/store/HnswSidecar.js +256 -0
- package/dist/memory/store/HnswSidecar.js.map +1 -0
- package/dist/memory/types.d.ts +15 -0
- package/dist/memory/types.d.ts.map +1 -1
- package/dist/query-router/QueryClassifier.d.ts +192 -21
- package/dist/query-router/QueryClassifier.d.ts.map +1 -1
- package/dist/query-router/QueryClassifier.js +604 -23
- package/dist/query-router/QueryClassifier.js.map +1 -1
- package/dist/query-router/QueryDispatcher.d.ts +106 -8
- package/dist/query-router/QueryDispatcher.d.ts.map +1 -1
- package/dist/query-router/QueryDispatcher.js +387 -8
- package/dist/query-router/QueryDispatcher.js.map +1 -1
- package/dist/query-router/QueryRouter.d.ts +198 -14
- package/dist/query-router/QueryRouter.d.ts.map +1 -1
- package/dist/query-router/QueryRouter.js +738 -50
- package/dist/query-router/QueryRouter.js.map +1 -1
- package/dist/query-router/index.d.ts +1 -1
- package/dist/query-router/index.d.ts.map +1 -1
- package/dist/query-router/index.js +1 -1
- package/dist/query-router/index.js.map +1 -1
- package/dist/query-router/types.d.ts +396 -3
- package/dist/query-router/types.d.ts.map +1 -1
- package/dist/query-router/types.js +35 -0
- package/dist/query-router/types.js.map +1 -1
- package/dist/rag/HydeRetriever.d.ts +108 -0
- package/dist/rag/HydeRetriever.d.ts.map +1 -1
- package/dist/rag/HydeRetriever.js +184 -0
- package/dist/rag/HydeRetriever.js.map +1 -1
- package/dist/rag/IRetrievalAugmentor.d.ts +15 -0
- package/dist/rag/IRetrievalAugmentor.d.ts.map +1 -1
- package/dist/rag/RetrievalAugmentor.d.ts +58 -0
- package/dist/rag/RetrievalAugmentor.d.ts.map +1 -1
- package/dist/rag/RetrievalAugmentor.js +200 -32
- package/dist/rag/RetrievalAugmentor.js.map +1 -1
- package/dist/rag/VectorStoreManager.js +1 -1
- package/dist/rag/audit/RAGAuditCollector.d.ts +7 -0
- package/dist/rag/audit/RAGAuditCollector.d.ts.map +1 -1
- package/dist/rag/audit/RAGAuditCollector.js +10 -0
- package/dist/rag/audit/RAGAuditCollector.js.map +1 -1
- package/dist/rag/audit/RAGAuditTypes.d.ts +10 -1
- package/dist/rag/audit/RAGAuditTypes.d.ts.map +1 -1
- package/dist/rag/chunking/SemanticChunker.d.ts +210 -0
- package/dist/rag/chunking/SemanticChunker.d.ts.map +1 -0
- package/dist/rag/chunking/SemanticChunker.js +460 -0
- package/dist/rag/chunking/SemanticChunker.js.map +1 -0
- package/dist/rag/chunking/index.d.ts +10 -0
- package/dist/rag/chunking/index.d.ts.map +1 -0
- package/dist/rag/chunking/index.js +10 -0
- package/dist/rag/chunking/index.js.map +1 -0
- package/dist/rag/implementations/vector_stores/PineconeVectorStore.d.ts +103 -0
- package/dist/rag/implementations/vector_stores/PineconeVectorStore.d.ts.map +1 -0
- package/dist/rag/implementations/vector_stores/PineconeVectorStore.js +315 -0
- package/dist/rag/implementations/vector_stores/PineconeVectorStore.js.map +1 -0
- package/dist/rag/implementations/vector_stores/PostgresVectorStore.d.ts +107 -0
- package/dist/rag/implementations/vector_stores/PostgresVectorStore.d.ts.map +1 -0
- package/dist/rag/implementations/vector_stores/PostgresVectorStore.js +438 -0
- package/dist/rag/implementations/vector_stores/PostgresVectorStore.js.map +1 -0
- package/dist/rag/index.d.ts +15 -1
- package/dist/rag/index.d.ts.map +1 -1
- package/dist/rag/index.js +32 -0
- package/dist/rag/index.js.map +1 -1
- package/dist/rag/migration/MigrationEngine.d.ts +47 -0
- package/dist/rag/migration/MigrationEngine.d.ts.map +1 -0
- package/dist/rag/migration/MigrationEngine.js +168 -0
- package/dist/rag/migration/MigrationEngine.js.map +1 -0
- package/dist/rag/migration/adapters/PineconeSourceAdapter.d.ts +23 -0
- package/dist/rag/migration/adapters/PineconeSourceAdapter.d.ts.map +1 -0
- package/dist/rag/migration/adapters/PineconeSourceAdapter.js +63 -0
- package/dist/rag/migration/adapters/PineconeSourceAdapter.js.map +1 -0
- package/dist/rag/migration/adapters/PostgresSourceAdapter.d.ts +30 -0
- package/dist/rag/migration/adapters/PostgresSourceAdapter.d.ts.map +1 -0
- package/dist/rag/migration/adapters/PostgresSourceAdapter.js +71 -0
- package/dist/rag/migration/adapters/PostgresSourceAdapter.js.map +1 -0
- package/dist/rag/migration/adapters/PostgresTargetAdapter.d.ts +38 -0
- package/dist/rag/migration/adapters/PostgresTargetAdapter.d.ts.map +1 -0
- package/dist/rag/migration/adapters/PostgresTargetAdapter.js +114 -0
- package/dist/rag/migration/adapters/PostgresTargetAdapter.js.map +1 -0
- package/dist/rag/migration/adapters/QdrantSourceAdapter.d.ts +36 -0
- package/dist/rag/migration/adapters/QdrantSourceAdapter.d.ts.map +1 -0
- package/dist/rag/migration/adapters/QdrantSourceAdapter.js +109 -0
- package/dist/rag/migration/adapters/QdrantSourceAdapter.js.map +1 -0
- package/dist/rag/migration/adapters/QdrantTargetAdapter.d.ts +35 -0
- package/dist/rag/migration/adapters/QdrantTargetAdapter.d.ts.map +1 -0
- package/dist/rag/migration/adapters/QdrantTargetAdapter.js +110 -0
- package/dist/rag/migration/adapters/QdrantTargetAdapter.js.map +1 -0
- package/dist/rag/migration/adapters/SqliteSourceAdapter.d.ts +37 -0
- package/dist/rag/migration/adapters/SqliteSourceAdapter.d.ts.map +1 -0
- package/dist/rag/migration/adapters/SqliteSourceAdapter.js +72 -0
- package/dist/rag/migration/adapters/SqliteSourceAdapter.js.map +1 -0
- package/dist/rag/migration/adapters/SqliteTargetAdapter.d.ts +47 -0
- package/dist/rag/migration/adapters/SqliteTargetAdapter.d.ts.map +1 -0
- package/dist/rag/migration/adapters/SqliteTargetAdapter.js +93 -0
- package/dist/rag/migration/adapters/SqliteTargetAdapter.js.map +1 -0
- package/dist/rag/migration/types.d.ts +108 -0
- package/dist/rag/migration/types.d.ts.map +1 -0
- package/dist/rag/migration/types.js +11 -0
- package/dist/rag/migration/types.js.map +1 -0
- package/dist/rag/multimodal/MultimodalIndexer.d.ts +35 -0
- package/dist/rag/multimodal/MultimodalIndexer.d.ts.map +1 -1
- package/dist/rag/multimodal/MultimodalIndexer.js +66 -1
- package/dist/rag/multimodal/MultimodalIndexer.js.map +1 -1
- package/dist/rag/multimodal/types.d.ts +24 -0
- package/dist/rag/multimodal/types.d.ts.map +1 -1
- package/dist/rag/raptor/RaptorTree.d.ts +268 -0
- package/dist/rag/raptor/RaptorTree.d.ts.map +1 -0
- package/dist/rag/raptor/RaptorTree.js +443 -0
- package/dist/rag/raptor/RaptorTree.js.map +1 -0
- package/dist/rag/raptor/index.d.ts +11 -0
- package/dist/rag/raptor/index.d.ts.map +1 -0
- package/dist/rag/raptor/index.js +11 -0
- package/dist/rag/raptor/index.js.map +1 -0
- package/dist/rag/reranking/providers/CohereReranker.js.map +1 -1
- package/dist/rag/search/BM25Index.d.ts +282 -0
- package/dist/rag/search/BM25Index.d.ts.map +1 -0
- package/dist/rag/search/BM25Index.js +344 -0
- package/dist/rag/search/BM25Index.js.map +1 -0
- package/dist/rag/search/HybridSearcher.d.ts +198 -0
- package/dist/rag/search/HybridSearcher.d.ts.map +1 -0
- package/dist/rag/search/HybridSearcher.js +316 -0
- package/dist/rag/search/HybridSearcher.js.map +1 -0
- package/dist/rag/search/index.d.ts +12 -0
- package/dist/rag/search/index.d.ts.map +1 -0
- package/dist/rag/search/index.js +12 -0
- package/dist/rag/search/index.js.map +1 -0
- package/dist/rag/setup/DockerDetector.d.ts +67 -0
- package/dist/rag/setup/DockerDetector.d.ts.map +1 -0
- package/dist/rag/setup/DockerDetector.js +125 -0
- package/dist/rag/setup/DockerDetector.js.map +1 -0
- package/dist/rag/setup/PostgresSetup.d.ts +20 -0
- package/dist/rag/setup/PostgresSetup.d.ts.map +1 -0
- package/dist/rag/setup/PostgresSetup.js +133 -0
- package/dist/rag/setup/PostgresSetup.js.map +1 -0
- package/dist/rag/setup/QdrantSetup.d.ts +26 -0
- package/dist/rag/setup/QdrantSetup.d.ts.map +1 -0
- package/dist/rag/setup/QdrantSetup.js +96 -0
- package/dist/rag/setup/QdrantSetup.js.map +1 -0
- package/dist/rag/setup/types.d.ts +55 -0
- package/dist/rag/setup/types.d.ts.map +1 -0
- package/dist/rag/setup/types.js +6 -0
- package/dist/rag/setup/types.js.map +1 -0
- package/dist/rag/unified/UnifiedRetriever.d.ts +472 -0
- package/dist/rag/unified/UnifiedRetriever.d.ts.map +1 -0
- package/dist/rag/unified/UnifiedRetriever.js +887 -0
- package/dist/rag/unified/UnifiedRetriever.js.map +1 -0
- package/dist/rag/unified/index.d.ts +24 -0
- package/dist/rag/unified/index.d.ts.map +1 -0
- package/dist/rag/unified/index.js +23 -0
- package/dist/rag/unified/index.js.map +1 -0
- package/dist/rag/unified/types.d.ts +546 -0
- package/dist/rag/unified/types.d.ts.map +1 -0
- package/dist/rag/unified/types.js +177 -0
- package/dist/rag/unified/types.js.map +1 -0
- package/dist/speech/providers/AssemblyAISTTProvider.js.map +1 -1
- package/dist/speech/providers/AzureSpeechSTTProvider.js.map +1 -1
- package/dist/speech/providers/BuiltInAdaptiveVadProvider.d.ts +1 -1
- package/dist/speech/providers/DeepgramBatchSTTProvider.js.map +1 -1
- package/package.json +5 -1
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Semantic text chunker that splits on natural boundaries instead
|
|
3
|
+
* of fixed character counts.
|
|
4
|
+
*
|
|
5
|
+
* Three-tier splitting strategy:
|
|
6
|
+
* 1. **Heading boundaries** — Markdown headings (`# ## ###` etc.) start new chunks
|
|
7
|
+
* 2. **Paragraph boundaries** — Double newlines are the preferred split point
|
|
8
|
+
* 3. **Sentence boundaries** — Period/exclamation/question followed by whitespace
|
|
9
|
+
* 4. **Fixed-size fallback** — Only when paragraphs/sentences exceed `maxSize`
|
|
10
|
+
*
|
|
11
|
+
* Each chunk preserves complete sentences/paragraphs and includes configurable
|
|
12
|
+
* overlap context from the previous chunk for retrieval continuity.
|
|
13
|
+
*
|
|
14
|
+
* Special handling:
|
|
15
|
+
* - **Code blocks** (fenced with triple backticks) are kept intact when possible
|
|
16
|
+
* - **Markdown headings** always start new chunks for better section-level retrieval
|
|
17
|
+
* - **Small fragments** below `minSize` are merged with the previous chunk
|
|
18
|
+
*
|
|
19
|
+
* @module agentos/rag/chunking/SemanticChunker
|
|
20
|
+
* @see RetrievalAugmentor for integration with the RAG pipeline
|
|
21
|
+
*/
|
|
22
|
+
/**
|
|
23
|
+
* Configuration for the semantic chunker.
|
|
24
|
+
*
|
|
25
|
+
* @interface SemanticChunkerConfig
|
|
26
|
+
*/
|
|
27
|
+
export interface SemanticChunkerConfig {
|
|
28
|
+
/** Target chunk size in characters. Default: 1000. */
|
|
29
|
+
targetSize?: number;
|
|
30
|
+
/** Maximum chunk size — hard limit before forced splitting. Default: 2000. */
|
|
31
|
+
maxSize?: number;
|
|
32
|
+
/** Minimum chunk size — fragments below this merge with previous. Default: 200. */
|
|
33
|
+
minSize?: number;
|
|
34
|
+
/** Overlap characters from previous chunk prepended for context. Default: 100. */
|
|
35
|
+
overlap?: number;
|
|
36
|
+
/** Whether to detect and preserve fenced code blocks intact. Default: true. */
|
|
37
|
+
preserveCodeBlocks?: boolean;
|
|
38
|
+
/** Whether to detect markdown headings as chunk-start boundaries. Default: true. */
|
|
39
|
+
respectHeadings?: boolean;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* A semantically coherent text chunk produced by the chunker.
|
|
43
|
+
*
|
|
44
|
+
* @interface SemanticChunk
|
|
45
|
+
* @property {string} text - The chunk text content (may include overlap prefix).
|
|
46
|
+
* @property {number} index - 0-based sequence index within the chunked document.
|
|
47
|
+
* @property {number} startOffset - Character offset in the original text where this chunk begins.
|
|
48
|
+
* @property {number} endOffset - Character offset in the original text where this chunk ends.
|
|
49
|
+
* @property {BoundaryType} boundaryType - Type of boundary that determined this chunk's split.
|
|
50
|
+
* @property {Record<string, unknown>} [metadata] - Pass-through metadata from the caller.
|
|
51
|
+
*/
|
|
52
|
+
export interface SemanticChunk {
|
|
53
|
+
/** The chunk text content (may include overlap prefix from previous chunk). */
|
|
54
|
+
text: string;
|
|
55
|
+
/** 0-based sequence index within the chunked document. */
|
|
56
|
+
index: number;
|
|
57
|
+
/** Character offset in the original text where this chunk begins (before overlap). */
|
|
58
|
+
startOffset: number;
|
|
59
|
+
/** Character offset in the original text where this chunk ends. */
|
|
60
|
+
endOffset: number;
|
|
61
|
+
/** Type of boundary that determined this chunk's split point. */
|
|
62
|
+
boundaryType: BoundaryType;
|
|
63
|
+
/** Pass-through metadata from the caller. */
|
|
64
|
+
metadata?: Record<string, unknown>;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* The type of boundary used to split a chunk.
|
|
68
|
+
*/
|
|
69
|
+
export type BoundaryType = 'paragraph' | 'sentence' | 'heading' | 'code-block' | 'fixed';
|
|
70
|
+
/**
|
|
71
|
+
* Semantic text chunker that splits on natural boundaries instead of
|
|
72
|
+
* fixed character counts.
|
|
73
|
+
*
|
|
74
|
+
* Produces chunks that are more semantically coherent than fixed-size
|
|
75
|
+
* splitting, improving retrieval quality by keeping related ideas together.
|
|
76
|
+
*
|
|
77
|
+
* @example Basic usage
|
|
78
|
+
* ```typescript
|
|
79
|
+
* const chunker = new SemanticChunker({ targetSize: 800, overlap: 50 });
|
|
80
|
+
* const chunks = chunker.chunk(markdownDocument);
|
|
81
|
+
* for (const c of chunks) {
|
|
82
|
+
* console.log(`Chunk ${c.index} (${c.boundaryType}): ${c.text.length} chars`);
|
|
83
|
+
* }
|
|
84
|
+
* ```
|
|
85
|
+
*
|
|
86
|
+
* @example Preserving code blocks
|
|
87
|
+
* ```typescript
|
|
88
|
+
* const chunker = new SemanticChunker({
|
|
89
|
+
* targetSize: 1000,
|
|
90
|
+
* maxSize: 3000, // Allow larger chunks for code blocks
|
|
91
|
+
* preserveCodeBlocks: true,
|
|
92
|
+
* });
|
|
93
|
+
* const chunks = chunker.chunk(technicalDoc);
|
|
94
|
+
* ```
|
|
95
|
+
*/
|
|
96
|
+
export declare class SemanticChunker {
|
|
97
|
+
/** Resolved configuration with defaults applied. */
|
|
98
|
+
private config;
|
|
99
|
+
/**
|
|
100
|
+
* Creates a new SemanticChunker.
|
|
101
|
+
*
|
|
102
|
+
* @param {SemanticChunkerConfig} [config] - Chunking configuration.
|
|
103
|
+
* @param {number} [config.targetSize=1000] - Target chunk size in characters.
|
|
104
|
+
* @param {number} [config.maxSize=2000] - Maximum chunk size (hard limit).
|
|
105
|
+
* @param {number} [config.minSize=200] - Minimum chunk size before merging.
|
|
106
|
+
* @param {number} [config.overlap=100] - Overlap characters from previous chunk.
|
|
107
|
+
* @param {boolean} [config.preserveCodeBlocks=true] - Keep code blocks intact.
|
|
108
|
+
* @param {boolean} [config.respectHeadings=true] - Start new chunks at headings.
|
|
109
|
+
*
|
|
110
|
+
* @example
|
|
111
|
+
* ```typescript
|
|
112
|
+
* const chunker = new SemanticChunker({
|
|
113
|
+
* targetSize: 800,
|
|
114
|
+
* maxSize: 1500,
|
|
115
|
+
* overlap: 80,
|
|
116
|
+
* });
|
|
117
|
+
* ```
|
|
118
|
+
*/
|
|
119
|
+
constructor(config?: SemanticChunkerConfig);
|
|
120
|
+
/**
|
|
121
|
+
* Splits text into semantically coherent chunks.
|
|
122
|
+
*
|
|
123
|
+
* Pipeline:
|
|
124
|
+
* 1. Pre-process: extract code blocks (if `preserveCodeBlocks`)
|
|
125
|
+
* 2. Split by headings (if `respectHeadings`) — each heading starts a new section
|
|
126
|
+
* 3. Within sections, split by paragraphs (double newline)
|
|
127
|
+
* 4. If a paragraph exceeds `maxSize`, split by sentences
|
|
128
|
+
* 5. If a sentence exceeds `maxSize`, split at word boundaries (fixed fallback)
|
|
129
|
+
* 6. Merge small fragments (< `minSize`) with the previous chunk
|
|
130
|
+
* 7. Add overlap from the end of the previous chunk to each chunk
|
|
131
|
+
*
|
|
132
|
+
* @param {string} text - The full text to chunk.
|
|
133
|
+
* @param {Record<string, unknown>} [metadata] - Optional metadata attached to all chunks.
|
|
134
|
+
* @returns {SemanticChunk[]} Array of chunks in order.
|
|
135
|
+
* @throws {Error} If text is empty.
|
|
136
|
+
*
|
|
137
|
+
* @example
|
|
138
|
+
* ```typescript
|
|
139
|
+
* const chunks = chunker.chunk(
|
|
140
|
+
* '# Introduction\n\nFirst paragraph.\n\n## Details\n\nSecond paragraph.',
|
|
141
|
+
* { source: 'docs/readme.md' },
|
|
142
|
+
* );
|
|
143
|
+
* // chunks[0].boundaryType === 'heading'
|
|
144
|
+
* // chunks[0].text includes "# Introduction\n\nFirst paragraph."
|
|
145
|
+
* ```
|
|
146
|
+
*/
|
|
147
|
+
chunk(text: string, metadata?: Record<string, unknown>): SemanticChunk[];
|
|
148
|
+
/**
|
|
149
|
+
* Splits text into structural segments based on headings and code blocks.
|
|
150
|
+
*
|
|
151
|
+
* This is the first pass that identifies major structural boundaries:
|
|
152
|
+
* - Markdown headings always start new segments
|
|
153
|
+
* - Fenced code blocks are kept as single segments when possible
|
|
154
|
+
* - Remaining text is split by paragraphs (double newline)
|
|
155
|
+
*
|
|
156
|
+
* @param {string} text - Full document text.
|
|
157
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Segments.
|
|
158
|
+
*/
|
|
159
|
+
private splitByStructure;
|
|
160
|
+
/**
|
|
161
|
+
* Further splits an oversized segment by paragraph and sentence boundaries.
|
|
162
|
+
*
|
|
163
|
+
* Called when a structural segment exceeds `maxSize`. Tries progressively
|
|
164
|
+
* smaller split granularity:
|
|
165
|
+
* 1. Paragraph splits (double newline)
|
|
166
|
+
* 2. Sentence splits (period/exclamation/question + space + uppercase)
|
|
167
|
+
* 3. Word boundary splits (fixed-size fallback)
|
|
168
|
+
*
|
|
169
|
+
* @param {string} text - Oversized segment text.
|
|
170
|
+
* @param {number} baseOffset - Character offset of this segment in the original text.
|
|
171
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Sub-segments.
|
|
172
|
+
*/
|
|
173
|
+
private splitOversizedSegment;
|
|
174
|
+
/**
|
|
175
|
+
* Splits text by sentence boundaries.
|
|
176
|
+
*
|
|
177
|
+
* Detects sentence endings (`.` `!` `?` followed by whitespace) and accumulates
|
|
178
|
+
* sentences until reaching `targetSize`. Falls back to word-boundary splitting
|
|
179
|
+
* for sentences exceeding `maxSize`.
|
|
180
|
+
*
|
|
181
|
+
* @param {string} text - Text to split by sentences.
|
|
182
|
+
* @param {number} baseOffset - Character offset in the original text.
|
|
183
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Sentence-split chunks.
|
|
184
|
+
*/
|
|
185
|
+
private splitBySentences;
|
|
186
|
+
/**
|
|
187
|
+
* Last-resort fixed-size splitting at word boundaries.
|
|
188
|
+
*
|
|
189
|
+
* Splits text at the last space before `targetSize` to avoid breaking words.
|
|
190
|
+
* This is only used when no paragraph or sentence boundaries are available
|
|
191
|
+
* within a segment that exceeds `maxSize`.
|
|
192
|
+
*
|
|
193
|
+
* @param {string} text - Text to split at word boundaries.
|
|
194
|
+
* @param {number} baseOffset - Character offset in the original text.
|
|
195
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Fixed-size chunks.
|
|
196
|
+
*/
|
|
197
|
+
private splitFixed;
|
|
198
|
+
/**
|
|
199
|
+
* Merges fragments smaller than `minSize` with the previous chunk.
|
|
200
|
+
*
|
|
201
|
+
* Small trailing fragments (e.g., a short concluding sentence) are merged
|
|
202
|
+
* backwards to prevent creating chunks that are too small for meaningful
|
|
203
|
+
* embedding.
|
|
204
|
+
*
|
|
205
|
+
* @param {Array<{ text: string; offset: number; boundary: BoundaryType }>} segments - Input segments.
|
|
206
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Segments with small ones merged.
|
|
207
|
+
*/
|
|
208
|
+
private mergeSmallFragments;
|
|
209
|
+
}
|
|
210
|
+
//# sourceMappingURL=SemanticChunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SemanticChunker.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/SemanticChunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAIH;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACpC,sDAAsD;IACtD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,8EAA8E;IAC9E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,mFAAmF;IACnF,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,kFAAkF;IAClF,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+EAA+E;IAC/E,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,oFAAoF;IACpF,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,aAAa;IAC5B,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb,0DAA0D;IAC1D,KAAK,EAAE,MAAM,CAAC;IACd,sFAAsF;IACtF,WAAW,EAAE,MAAM,CAAC;IACpB,mEAAmE;IACnE,SAAS,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,YAAY,EAAE,YAAY,CAAC;IAC3B,6CAA6C;IAC7C,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,WAAW,GAAG,UAAU,GAAG,SAAS,GAAG,YAAY,GAAG,OAAO,CAAC;AAuBzF;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,qBAAa,eAAe;IAC1B,oDAAoD;IACpD,OAAO,CAAC,MAAM,CAAkC;IAEhD;;;;;;;;;;;;;;;;;;;OAmBG;gBACS,MAAM,CAAC,EAAE,qBAAqB;IAW1C;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,aAAa,EAAE;IAmDxE;;;;;;;;;;OAUG;IACH,OAAO,CAAC,gBAAgB;IAgHxB;;;;;;;;;;;;OAYG;IACH,OAAO,CAAC,qBAAqB;IAkD7B;;;;;;;;;;OAUG;IACH,OAAO,CAAC,gBAAgB;IA+CxB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,UAAU;IAqClB;;;;;;;;;OASG;IACH,OAAO,CAAC,mBAAmB;CAwB5B"}
|
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Semantic text chunker that splits on natural boundaries instead
|
|
3
|
+
* of fixed character counts.
|
|
4
|
+
*
|
|
5
|
+
* Three-tier splitting strategy:
|
|
6
|
+
* 1. **Heading boundaries** — Markdown headings (`# ## ###` etc.) start new chunks
|
|
7
|
+
* 2. **Paragraph boundaries** — Double newlines are the preferred split point
|
|
8
|
+
* 3. **Sentence boundaries** — Period/exclamation/question followed by whitespace
|
|
9
|
+
* 4. **Fixed-size fallback** — Only when paragraphs/sentences exceed `maxSize`
|
|
10
|
+
*
|
|
11
|
+
* Each chunk preserves complete sentences/paragraphs and includes configurable
|
|
12
|
+
* overlap context from the previous chunk for retrieval continuity.
|
|
13
|
+
*
|
|
14
|
+
* Special handling:
|
|
15
|
+
* - **Code blocks** (fenced with triple backticks) are kept intact when possible
|
|
16
|
+
* - **Markdown headings** always start new chunks for better section-level retrieval
|
|
17
|
+
* - **Small fragments** below `minSize` are merged with the previous chunk
|
|
18
|
+
*
|
|
19
|
+
* @module agentos/rag/chunking/SemanticChunker
|
|
20
|
+
* @see RetrievalAugmentor for integration with the RAG pipeline
|
|
21
|
+
*/
|
|
22
|
+
// ── Internal helpers ──────────────────────────────────────────────────────
|
|
23
|
+
/**
|
|
24
|
+
* Regex matching Markdown heading lines (# through ######).
|
|
25
|
+
* Captures the heading line at the start of a string or after a newline.
|
|
26
|
+
*/
|
|
27
|
+
const HEADING_RE = /(?:^|\n)(#{1,6}\s+.+)/;
|
|
28
|
+
/**
|
|
29
|
+
* Regex for fenced code block start/end markers.
|
|
30
|
+
*/
|
|
31
|
+
const CODE_FENCE_RE = /^```/;
|
|
32
|
+
/**
|
|
33
|
+
* Regex for sentence boundaries: `. ` or `! ` or `? ` followed by an
|
|
34
|
+
* uppercase letter or end of text. Also matches after newline.
|
|
35
|
+
*/
|
|
36
|
+
const SENTENCE_BOUNDARY_RE = /[.!?]\s+(?=[A-Z\n])|[.!?]\s*$/;
|
|
37
|
+
// ── Semantic Chunker ──────────────────────────────────────────────────────
|
|
38
|
+
/**
|
|
39
|
+
* Semantic text chunker that splits on natural boundaries instead of
|
|
40
|
+
* fixed character counts.
|
|
41
|
+
*
|
|
42
|
+
* Produces chunks that are more semantically coherent than fixed-size
|
|
43
|
+
* splitting, improving retrieval quality by keeping related ideas together.
|
|
44
|
+
*
|
|
45
|
+
* @example Basic usage
|
|
46
|
+
* ```typescript
|
|
47
|
+
* const chunker = new SemanticChunker({ targetSize: 800, overlap: 50 });
|
|
48
|
+
* const chunks = chunker.chunk(markdownDocument);
|
|
49
|
+
* for (const c of chunks) {
|
|
50
|
+
* console.log(`Chunk ${c.index} (${c.boundaryType}): ${c.text.length} chars`);
|
|
51
|
+
* }
|
|
52
|
+
* ```
|
|
53
|
+
*
|
|
54
|
+
* @example Preserving code blocks
|
|
55
|
+
* ```typescript
|
|
56
|
+
* const chunker = new SemanticChunker({
|
|
57
|
+
* targetSize: 1000,
|
|
58
|
+
* maxSize: 3000, // Allow larger chunks for code blocks
|
|
59
|
+
* preserveCodeBlocks: true,
|
|
60
|
+
* });
|
|
61
|
+
* const chunks = chunker.chunk(technicalDoc);
|
|
62
|
+
* ```
|
|
63
|
+
*/
|
|
64
|
+
export class SemanticChunker {
|
|
65
|
+
/**
|
|
66
|
+
* Creates a new SemanticChunker.
|
|
67
|
+
*
|
|
68
|
+
* @param {SemanticChunkerConfig} [config] - Chunking configuration.
|
|
69
|
+
* @param {number} [config.targetSize=1000] - Target chunk size in characters.
|
|
70
|
+
* @param {number} [config.maxSize=2000] - Maximum chunk size (hard limit).
|
|
71
|
+
* @param {number} [config.minSize=200] - Minimum chunk size before merging.
|
|
72
|
+
* @param {number} [config.overlap=100] - Overlap characters from previous chunk.
|
|
73
|
+
* @param {boolean} [config.preserveCodeBlocks=true] - Keep code blocks intact.
|
|
74
|
+
* @param {boolean} [config.respectHeadings=true] - Start new chunks at headings.
|
|
75
|
+
*
|
|
76
|
+
* @example
|
|
77
|
+
* ```typescript
|
|
78
|
+
* const chunker = new SemanticChunker({
|
|
79
|
+
* targetSize: 800,
|
|
80
|
+
* maxSize: 1500,
|
|
81
|
+
* overlap: 80,
|
|
82
|
+
* });
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
constructor(config) {
|
|
86
|
+
this.config = {
|
|
87
|
+
targetSize: config?.targetSize ?? 1000,
|
|
88
|
+
maxSize: config?.maxSize ?? 2000,
|
|
89
|
+
minSize: config?.minSize ?? 200,
|
|
90
|
+
overlap: config?.overlap ?? 100,
|
|
91
|
+
preserveCodeBlocks: config?.preserveCodeBlocks ?? true,
|
|
92
|
+
respectHeadings: config?.respectHeadings ?? true,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Splits text into semantically coherent chunks.
|
|
97
|
+
*
|
|
98
|
+
* Pipeline:
|
|
99
|
+
* 1. Pre-process: extract code blocks (if `preserveCodeBlocks`)
|
|
100
|
+
* 2. Split by headings (if `respectHeadings`) — each heading starts a new section
|
|
101
|
+
* 3. Within sections, split by paragraphs (double newline)
|
|
102
|
+
* 4. If a paragraph exceeds `maxSize`, split by sentences
|
|
103
|
+
* 5. If a sentence exceeds `maxSize`, split at word boundaries (fixed fallback)
|
|
104
|
+
* 6. Merge small fragments (< `minSize`) with the previous chunk
|
|
105
|
+
* 7. Add overlap from the end of the previous chunk to each chunk
|
|
106
|
+
*
|
|
107
|
+
* @param {string} text - The full text to chunk.
|
|
108
|
+
* @param {Record<string, unknown>} [metadata] - Optional metadata attached to all chunks.
|
|
109
|
+
* @returns {SemanticChunk[]} Array of chunks in order.
|
|
110
|
+
* @throws {Error} If text is empty.
|
|
111
|
+
*
|
|
112
|
+
* @example
|
|
113
|
+
* ```typescript
|
|
114
|
+
* const chunks = chunker.chunk(
|
|
115
|
+
* '# Introduction\n\nFirst paragraph.\n\n## Details\n\nSecond paragraph.',
|
|
116
|
+
* { source: 'docs/readme.md' },
|
|
117
|
+
* );
|
|
118
|
+
* // chunks[0].boundaryType === 'heading'
|
|
119
|
+
* // chunks[0].text includes "# Introduction\n\nFirst paragraph."
|
|
120
|
+
* ```
|
|
121
|
+
*/
|
|
122
|
+
chunk(text, metadata) {
|
|
123
|
+
if (!text || text.trim().length === 0) {
|
|
124
|
+
return [];
|
|
125
|
+
}
|
|
126
|
+
// Step 1: Split into raw segments by headings and code blocks
|
|
127
|
+
const rawSegments = this.splitByStructure(text);
|
|
128
|
+
// Step 2: Split oversized segments further by paragraphs, sentences, or fixed
|
|
129
|
+
const refinedSegments = [];
|
|
130
|
+
for (const segment of rawSegments) {
|
|
131
|
+
if (segment.text.length <= this.config.maxSize) {
|
|
132
|
+
refinedSegments.push(segment);
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
// Further split this oversized segment
|
|
136
|
+
const subSegments = this.splitOversizedSegment(segment.text, segment.offset);
|
|
137
|
+
refinedSegments.push(...subSegments);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
// Step 3: Merge fragments smaller than minSize with the previous chunk
|
|
141
|
+
const merged = this.mergeSmallFragments(refinedSegments);
|
|
142
|
+
// Step 4: Build final chunks with overlap
|
|
143
|
+
const chunks = [];
|
|
144
|
+
for (let i = 0; i < merged.length; i++) {
|
|
145
|
+
const segment = merged[i];
|
|
146
|
+
let chunkText = segment.text;
|
|
147
|
+
// Add overlap from previous chunk
|
|
148
|
+
if (i > 0 && this.config.overlap > 0) {
|
|
149
|
+
const prevText = merged[i - 1].text;
|
|
150
|
+
const overlapText = prevText.slice(-this.config.overlap);
|
|
151
|
+
if (overlapText.length > 0) {
|
|
152
|
+
chunkText = overlapText + chunkText;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
chunks.push({
|
|
156
|
+
text: chunkText,
|
|
157
|
+
index: i,
|
|
158
|
+
startOffset: segment.offset,
|
|
159
|
+
endOffset: segment.offset + segment.text.length,
|
|
160
|
+
boundaryType: segment.boundary,
|
|
161
|
+
metadata,
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
return chunks;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Splits text into structural segments based on headings and code blocks.
|
|
168
|
+
*
|
|
169
|
+
* This is the first pass that identifies major structural boundaries:
|
|
170
|
+
* - Markdown headings always start new segments
|
|
171
|
+
* - Fenced code blocks are kept as single segments when possible
|
|
172
|
+
* - Remaining text is split by paragraphs (double newline)
|
|
173
|
+
*
|
|
174
|
+
* @param {string} text - Full document text.
|
|
175
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Segments.
|
|
176
|
+
*/
|
|
177
|
+
splitByStructure(text) {
|
|
178
|
+
const segments = [];
|
|
179
|
+
const lines = text.split('\n');
|
|
180
|
+
let currentSegment = '';
|
|
181
|
+
let currentOffset = 0;
|
|
182
|
+
let segmentStart = 0;
|
|
183
|
+
let currentBoundary = 'paragraph';
|
|
184
|
+
let inCodeBlock = false;
|
|
185
|
+
let codeBlockStart = 0;
|
|
186
|
+
let codeBlockContent = '';
|
|
187
|
+
for (let i = 0; i < lines.length; i++) {
|
|
188
|
+
const line = lines[i];
|
|
189
|
+
const lineOffset = currentOffset;
|
|
190
|
+
currentOffset += line.length + 1; // +1 for the newline
|
|
191
|
+
// Handle code block boundaries
|
|
192
|
+
if (this.config.preserveCodeBlocks && CODE_FENCE_RE.test(line.trim())) {
|
|
193
|
+
if (!inCodeBlock) {
|
|
194
|
+
// Starting a code block — flush current segment first
|
|
195
|
+
if (currentSegment.trim().length > 0) {
|
|
196
|
+
segments.push({
|
|
197
|
+
text: currentSegment,
|
|
198
|
+
offset: segmentStart,
|
|
199
|
+
boundary: currentBoundary,
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
inCodeBlock = true;
|
|
203
|
+
codeBlockStart = lineOffset;
|
|
204
|
+
codeBlockContent = line + '\n';
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
else {
|
|
208
|
+
// Ending a code block
|
|
209
|
+
codeBlockContent += line;
|
|
210
|
+
segments.push({
|
|
211
|
+
text: codeBlockContent,
|
|
212
|
+
offset: codeBlockStart,
|
|
213
|
+
boundary: 'code-block',
|
|
214
|
+
});
|
|
215
|
+
inCodeBlock = false;
|
|
216
|
+
codeBlockContent = '';
|
|
217
|
+
currentSegment = '';
|
|
218
|
+
segmentStart = currentOffset;
|
|
219
|
+
currentBoundary = 'paragraph';
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (inCodeBlock) {
|
|
224
|
+
codeBlockContent += line + '\n';
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
// Handle headings
|
|
228
|
+
if (this.config.respectHeadings && /^#{1,6}\s+/.test(line)) {
|
|
229
|
+
// Flush current segment
|
|
230
|
+
if (currentSegment.trim().length > 0) {
|
|
231
|
+
segments.push({
|
|
232
|
+
text: currentSegment,
|
|
233
|
+
offset: segmentStart,
|
|
234
|
+
boundary: currentBoundary,
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
currentSegment = line + '\n';
|
|
238
|
+
segmentStart = lineOffset;
|
|
239
|
+
currentBoundary = 'heading';
|
|
240
|
+
continue;
|
|
241
|
+
}
|
|
242
|
+
// Check for paragraph boundary (empty line)
|
|
243
|
+
if (line.trim() === '' && currentSegment.trim().length > 0) {
|
|
244
|
+
// Check if current segment is at or near target size — if so, split here
|
|
245
|
+
if (currentSegment.length >= this.config.targetSize) {
|
|
246
|
+
segments.push({
|
|
247
|
+
text: currentSegment,
|
|
248
|
+
offset: segmentStart,
|
|
249
|
+
boundary: currentBoundary,
|
|
250
|
+
});
|
|
251
|
+
currentSegment = '';
|
|
252
|
+
segmentStart = currentOffset;
|
|
253
|
+
currentBoundary = 'paragraph';
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
// Accumulate into current segment
|
|
258
|
+
currentSegment += line + '\n';
|
|
259
|
+
}
|
|
260
|
+
// Handle unclosed code block
|
|
261
|
+
if (inCodeBlock && codeBlockContent.trim().length > 0) {
|
|
262
|
+
segments.push({
|
|
263
|
+
text: codeBlockContent,
|
|
264
|
+
offset: codeBlockStart,
|
|
265
|
+
boundary: 'code-block',
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
// Flush remaining segment
|
|
269
|
+
if (currentSegment.trim().length > 0) {
|
|
270
|
+
segments.push({
|
|
271
|
+
text: currentSegment,
|
|
272
|
+
offset: segmentStart,
|
|
273
|
+
boundary: currentBoundary,
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
return segments;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Further splits an oversized segment by paragraph and sentence boundaries.
|
|
280
|
+
*
|
|
281
|
+
* Called when a structural segment exceeds `maxSize`. Tries progressively
|
|
282
|
+
* smaller split granularity:
|
|
283
|
+
* 1. Paragraph splits (double newline)
|
|
284
|
+
* 2. Sentence splits (period/exclamation/question + space + uppercase)
|
|
285
|
+
* 3. Word boundary splits (fixed-size fallback)
|
|
286
|
+
*
|
|
287
|
+
* @param {string} text - Oversized segment text.
|
|
288
|
+
* @param {number} baseOffset - Character offset of this segment in the original text.
|
|
289
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Sub-segments.
|
|
290
|
+
*/
|
|
291
|
+
splitOversizedSegment(text, baseOffset) {
|
|
292
|
+
// Try paragraph splitting first
|
|
293
|
+
const paragraphs = text.split(/\n\s*\n/);
|
|
294
|
+
if (paragraphs.length > 1) {
|
|
295
|
+
const results = [];
|
|
296
|
+
let accumulated = '';
|
|
297
|
+
let accOffset = baseOffset;
|
|
298
|
+
let runningOffset = baseOffset;
|
|
299
|
+
for (const para of paragraphs) {
|
|
300
|
+
if (accumulated.length > 0 && accumulated.length + para.length + 2 > this.config.targetSize) {
|
|
301
|
+
// Flush accumulated
|
|
302
|
+
if (accumulated.length > this.config.maxSize) {
|
|
303
|
+
// Even accumulated is too large — split by sentences
|
|
304
|
+
results.push(...this.splitBySentences(accumulated, accOffset));
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
results.push({ text: accumulated, offset: accOffset, boundary: 'paragraph' });
|
|
308
|
+
}
|
|
309
|
+
accumulated = para;
|
|
310
|
+
accOffset = runningOffset;
|
|
311
|
+
}
|
|
312
|
+
else {
|
|
313
|
+
if (accumulated.length > 0) {
|
|
314
|
+
accumulated += '\n\n' + para;
|
|
315
|
+
}
|
|
316
|
+
else {
|
|
317
|
+
accumulated = para;
|
|
318
|
+
accOffset = runningOffset;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
runningOffset += para.length + 2; // +2 for the \n\n separator
|
|
322
|
+
}
|
|
323
|
+
// Flush remaining
|
|
324
|
+
if (accumulated.trim().length > 0) {
|
|
325
|
+
if (accumulated.length > this.config.maxSize) {
|
|
326
|
+
results.push(...this.splitBySentences(accumulated, accOffset));
|
|
327
|
+
}
|
|
328
|
+
else {
|
|
329
|
+
results.push({ text: accumulated, offset: accOffset, boundary: 'paragraph' });
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
return results;
|
|
333
|
+
}
|
|
334
|
+
// No paragraph boundaries — try sentences
|
|
335
|
+
return this.splitBySentences(text, baseOffset);
|
|
336
|
+
}
|
|
337
|
+
/**
|
|
338
|
+
* Splits text by sentence boundaries.
|
|
339
|
+
*
|
|
340
|
+
* Detects sentence endings (`.` `!` `?` followed by whitespace) and accumulates
|
|
341
|
+
* sentences until reaching `targetSize`. Falls back to word-boundary splitting
|
|
342
|
+
* for sentences exceeding `maxSize`.
|
|
343
|
+
*
|
|
344
|
+
* @param {string} text - Text to split by sentences.
|
|
345
|
+
* @param {number} baseOffset - Character offset in the original text.
|
|
346
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Sentence-split chunks.
|
|
347
|
+
*/
|
|
348
|
+
splitBySentences(text, baseOffset) {
|
|
349
|
+
// Split on sentence boundaries
|
|
350
|
+
const sentences = text.split(/(?<=[.!?])\s+/);
|
|
351
|
+
if (sentences.length <= 1) {
|
|
352
|
+
// No sentence boundaries — fall back to fixed splitting
|
|
353
|
+
return this.splitFixed(text, baseOffset);
|
|
354
|
+
}
|
|
355
|
+
const results = [];
|
|
356
|
+
let accumulated = '';
|
|
357
|
+
let accOffset = baseOffset;
|
|
358
|
+
let runningOffset = baseOffset;
|
|
359
|
+
for (const sentence of sentences) {
|
|
360
|
+
if (accumulated.length > 0 && accumulated.length + sentence.length + 1 > this.config.targetSize) {
|
|
361
|
+
if (accumulated.length > this.config.maxSize) {
|
|
362
|
+
results.push(...this.splitFixed(accumulated, accOffset));
|
|
363
|
+
}
|
|
364
|
+
else {
|
|
365
|
+
results.push({ text: accumulated, offset: accOffset, boundary: 'sentence' });
|
|
366
|
+
}
|
|
367
|
+
accumulated = sentence;
|
|
368
|
+
accOffset = runningOffset;
|
|
369
|
+
}
|
|
370
|
+
else {
|
|
371
|
+
if (accumulated.length > 0) {
|
|
372
|
+
accumulated += ' ' + sentence;
|
|
373
|
+
}
|
|
374
|
+
else {
|
|
375
|
+
accumulated = sentence;
|
|
376
|
+
accOffset = runningOffset;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
runningOffset += sentence.length + 1; // +1 for the space separator
|
|
380
|
+
}
|
|
381
|
+
if (accumulated.trim().length > 0) {
|
|
382
|
+
if (accumulated.length > this.config.maxSize) {
|
|
383
|
+
results.push(...this.splitFixed(accumulated, accOffset));
|
|
384
|
+
}
|
|
385
|
+
else {
|
|
386
|
+
results.push({ text: accumulated, offset: accOffset, boundary: 'sentence' });
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
return results;
|
|
390
|
+
}
|
|
391
|
+
/**
|
|
392
|
+
* Last-resort fixed-size splitting at word boundaries.
|
|
393
|
+
*
|
|
394
|
+
* Splits text at the last space before `targetSize` to avoid breaking words.
|
|
395
|
+
* This is only used when no paragraph or sentence boundaries are available
|
|
396
|
+
* within a segment that exceeds `maxSize`.
|
|
397
|
+
*
|
|
398
|
+
* @param {string} text - Text to split at word boundaries.
|
|
399
|
+
* @param {number} baseOffset - Character offset in the original text.
|
|
400
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Fixed-size chunks.
|
|
401
|
+
*/
|
|
402
|
+
splitFixed(text, baseOffset) {
|
|
403
|
+
const results = [];
|
|
404
|
+
let position = 0;
|
|
405
|
+
while (position < text.length) {
|
|
406
|
+
let end = Math.min(position + this.config.targetSize, text.length);
|
|
407
|
+
// If not at the end, try to break at a word boundary
|
|
408
|
+
if (end < text.length) {
|
|
409
|
+
const lastSpace = text.lastIndexOf(' ', end);
|
|
410
|
+
if (lastSpace > position + this.config.minSize) {
|
|
411
|
+
end = lastSpace;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
const chunk = text.slice(position, end).trim();
|
|
415
|
+
if (chunk.length > 0) {
|
|
416
|
+
results.push({
|
|
417
|
+
text: chunk,
|
|
418
|
+
offset: baseOffset + position,
|
|
419
|
+
boundary: 'fixed',
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
position = end;
|
|
423
|
+
// Skip whitespace after split point
|
|
424
|
+
while (position < text.length && text[position] === ' ') {
|
|
425
|
+
position++;
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
return results;
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Merges fragments smaller than `minSize` with the previous chunk.
|
|
432
|
+
*
|
|
433
|
+
* Small trailing fragments (e.g., a short concluding sentence) are merged
|
|
434
|
+
* backwards to prevent creating chunks that are too small for meaningful
|
|
435
|
+
* embedding.
|
|
436
|
+
*
|
|
437
|
+
* @param {Array<{ text: string; offset: number; boundary: BoundaryType }>} segments - Input segments.
|
|
438
|
+
* @returns {Array<{ text: string; offset: number; boundary: BoundaryType }>} Segments with small ones merged.
|
|
439
|
+
*/
|
|
440
|
+
mergeSmallFragments(segments) {
|
|
441
|
+
if (segments.length <= 1)
|
|
442
|
+
return segments;
|
|
443
|
+
const merged = [];
|
|
444
|
+
for (const segment of segments) {
|
|
445
|
+
if (merged.length > 0 &&
|
|
446
|
+
segment.text.trim().length < this.config.minSize &&
|
|
447
|
+
segment.boundary !== 'heading' &&
|
|
448
|
+
segment.boundary !== 'code-block') {
|
|
449
|
+
// Merge with previous
|
|
450
|
+
const prev = merged[merged.length - 1];
|
|
451
|
+
prev.text += '\n\n' + segment.text;
|
|
452
|
+
}
|
|
453
|
+
else {
|
|
454
|
+
merged.push({ ...segment });
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
return merged;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
//# sourceMappingURL=SemanticChunker.js.map
|