@juspay/neurolink 9.32.0 → 9.32.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/auth/anthropicOAuth.js +1 -1
- package/dist/cli/commands/proxy.js +18 -5
- package/dist/client/aiSdkAdapter.js +1 -1
- package/dist/client/index.js +137 -501
- package/dist/core/factory.js +0 -1
- package/dist/core/redisConversationMemoryManager.js +1 -1
- package/dist/features/ppt/slideGenerator.js +0 -1
- package/dist/features/ppt/utils.js +0 -1
- package/dist/lib/server/routes/claudeProxyRoutes.js +45 -9
- package/dist/mcp/elicitationProtocol.js +1 -1
- package/dist/mcp/servers/agent/directToolsServer.js +0 -1
- package/dist/providers/azureOpenai.js +1 -1
- package/dist/providers/huggingFace.js +0 -1
- package/dist/providers/openaiCompatible.js +0 -1
- package/dist/sdk/toolRegistration.js +0 -1
- package/dist/server/openapi/generator.js +1 -1
- package/dist/server/routes/claudeProxyRoutes.js +45 -9
- package/dist/types/configTypes.js +0 -5
- package/dist/types/modelTypes.js +0 -1
- package/dist/types/tools.js +0 -1
- package/dist/types/typeAliases.js +0 -1
- package/dist/types/utilities.js +1 -1
- package/dist/types/workflowTypes.js +0 -1
- package/dist/utils/providerRetry.js +0 -1
- package/dist/utils/providerUtils.js +0 -1
- package/package.json +2 -2
- package/dist/client/adapters/providerImageAdapter.js +0 -588
- package/dist/client/adapters/tts/googleTTSHandler.js +0 -344
- package/dist/client/adapters/video/directorPipeline.js +0 -516
- package/dist/client/adapters/video/ffmpegAdapter.js +0 -206
- package/dist/client/adapters/video/frameExtractor.js +0 -143
- package/dist/client/adapters/video/vertexVideoHandler.js +0 -763
- package/dist/client/adapters/video/videoAnalyzer.js +0 -238
- package/dist/client/adapters/video/videoMerger.js +0 -171
- package/dist/client/agent/directTools.js +0 -840
- package/dist/client/auth/AuthProviderFactory.js +0 -111
- package/dist/client/auth/AuthProviderRegistry.js +0 -190
- package/dist/client/auth/RequestContext.js +0 -78
- package/dist/client/auth/accountPool.js +0 -178
- package/dist/client/auth/anthropicOAuth.js +0 -974
- package/dist/client/auth/authContext.js +0 -314
- package/dist/client/auth/errors.js +0 -39
- package/dist/client/auth/index.js +0 -61
- package/dist/client/auth/middleware/AuthMiddleware.js +0 -519
- package/dist/client/auth/middleware/rateLimitByUser.js +0 -554
- package/dist/client/auth/providers/BaseAuthProvider.js +0 -723
- package/dist/client/auth/providers/CognitoProvider.js +0 -304
- package/dist/client/auth/providers/KeycloakProvider.js +0 -393
- package/dist/client/auth/providers/auth0.js +0 -274
- package/dist/client/auth/providers/betterAuth.js +0 -182
- package/dist/client/auth/providers/clerk.js +0 -317
- package/dist/client/auth/providers/custom.js +0 -112
- package/dist/client/auth/providers/firebase.js +0 -226
- package/dist/client/auth/providers/jwt.js +0 -212
- package/dist/client/auth/providers/oauth2.js +0 -303
- package/dist/client/auth/providers/supabase.js +0 -259
- package/dist/client/auth/providers/workos.js +0 -284
- package/dist/client/auth/serverBridge.js +0 -25
- package/dist/client/auth/sessionManager.js +0 -437
- package/dist/client/auth/tokenStore.js +0 -799
- package/dist/client/client/aiSdkAdapter.js +0 -487
- package/dist/client/client/auth.js +0 -473
- package/dist/client/client/errors.js +0 -552
- package/dist/client/client/httpClient.js +0 -837
- package/dist/client/client/index.js +0 -172
- package/dist/client/client/interceptors.js +0 -601
- package/dist/client/client/sseClient.js +0 -545
- package/dist/client/client/streamingClient.js +0 -917
- package/dist/client/client/wsClient.js +0 -369
- package/dist/client/config/configManager.js +0 -303
- package/dist/client/config/conversationMemory.js +0 -86
- package/dist/client/config/taskClassificationConfig.js +0 -148
- package/dist/client/constants/contextWindows.js +0 -295
- package/dist/client/constants/enums.js +0 -853
- package/dist/client/constants/index.js +0 -207
- package/dist/client/constants/performance.js +0 -389
- package/dist/client/constants/retry.js +0 -266
- package/dist/client/constants/timeouts.js +0 -182
- package/dist/client/constants/tokens.js +0 -380
- package/dist/client/constants/videoErrors.js +0 -46
- package/dist/client/context/budgetChecker.js +0 -98
- package/dist/client/context/contextCompactor.js +0 -205
- package/dist/client/context/emergencyTruncation.js +0 -88
- package/dist/client/context/errorDetection.js +0 -171
- package/dist/client/context/errors.js +0 -21
- package/dist/client/context/fileTokenBudget.js +0 -127
- package/dist/client/context/prompts/summarizationPrompt.js +0 -117
- package/dist/client/context/stages/fileReadDeduplicator.js +0 -66
- package/dist/client/context/stages/slidingWindowTruncator.js +0 -190
- package/dist/client/context/stages/structuredSummarizer.js +0 -99
- package/dist/client/context/stages/toolOutputPruner.js +0 -52
- package/dist/client/context/summarizationEngine.js +0 -136
- package/dist/client/context/toolOutputLimits.js +0 -78
- package/dist/client/context/toolPairRepair.js +0 -66
- package/dist/client/core/analytics.js +0 -88
- package/dist/client/core/baseProvider.js +0 -1385
- package/dist/client/core/constants.js +0 -140
- package/dist/client/core/conversationMemoryFactory.js +0 -141
- package/dist/client/core/conversationMemoryInitializer.js +0 -128
- package/dist/client/core/conversationMemoryManager.js +0 -344
- package/dist/client/core/dynamicModels.js +0 -358
- package/dist/client/core/evaluation.js +0 -309
- package/dist/client/core/evaluationProviders.js +0 -248
- package/dist/client/core/factory.js +0 -412
- package/dist/client/core/infrastructure/baseError.js +0 -22
- package/dist/client/core/infrastructure/baseFactory.js +0 -54
- package/dist/client/core/infrastructure/baseRegistry.js +0 -53
- package/dist/client/core/infrastructure/index.js +0 -5
- package/dist/client/core/infrastructure/retry.js +0 -20
- package/dist/client/core/infrastructure/typedEventEmitter.js +0 -23
- package/dist/client/core/modelConfiguration.js +0 -851
- package/dist/client/core/modules/GenerationHandler.js +0 -588
- package/dist/client/core/modules/MessageBuilder.js +0 -273
- package/dist/client/core/modules/StreamHandler.js +0 -185
- package/dist/client/core/modules/TelemetryHandler.js +0 -203
- package/dist/client/core/modules/ToolsManager.js +0 -499
- package/dist/client/core/modules/Utilities.js +0 -331
- package/dist/client/core/redisConversationMemoryManager.js +0 -1435
- package/dist/client/core/streamAnalytics.js +0 -131
- package/dist/client/evaluation/contextBuilder.js +0 -134
- package/dist/client/evaluation/index.js +0 -61
- package/dist/client/evaluation/prompts.js +0 -73
- package/dist/client/evaluation/ragasEvaluator.js +0 -110
- package/dist/client/evaluation/retryManager.js +0 -78
- package/dist/client/evaluation/scoring.js +0 -61
- package/dist/client/factories/providerFactory.js +0 -166
- package/dist/client/factories/providerRegistry.js +0 -166
- package/dist/client/features/ppt/constants.js +0 -896
- package/dist/client/features/ppt/contentPlanner.js +0 -529
- package/dist/client/features/ppt/presentationOrchestrator.js +0 -236
- package/dist/client/features/ppt/slideGenerator.js +0 -532
- package/dist/client/features/ppt/slideRenderers.js +0 -2383
- package/dist/client/features/ppt/slideTypeInference.js +0 -405
- package/dist/client/features/ppt/types.js +0 -13
- package/dist/client/features/ppt/utils.js +0 -443
- package/dist/client/files/fileReferenceRegistry.js +0 -1543
- package/dist/client/files/fileTools.js +0 -450
- package/dist/client/files/streamingReader.js +0 -321
- package/dist/client/files/types.js +0 -23
- package/dist/client/hitl/hitlErrors.js +0 -54
- package/dist/client/hitl/hitlManager.js +0 -460
- package/dist/client/mcp/agentExposure.js +0 -356
- package/dist/client/mcp/auth/index.js +0 -11
- package/dist/client/mcp/auth/oauthClientProvider.js +0 -325
- package/dist/client/mcp/auth/tokenStorage.js +0 -134
- package/dist/client/mcp/batching/index.js +0 -10
- package/dist/client/mcp/batching/requestBatcher.js +0 -441
- package/dist/client/mcp/caching/index.js +0 -10
- package/dist/client/mcp/caching/toolCache.js +0 -433
- package/dist/client/mcp/elicitation/elicitationManager.js +0 -376
- package/dist/client/mcp/elicitation/index.js +0 -11
- package/dist/client/mcp/elicitation/types.js +0 -10
- package/dist/client/mcp/elicitationProtocol.js +0 -375
- package/dist/client/mcp/enhancedToolDiscovery.js +0 -481
- package/dist/client/mcp/externalServerManager.js +0 -1478
- package/dist/client/mcp/factory.js +0 -161
- package/dist/client/mcp/flexibleToolValidator.js +0 -161
- package/dist/client/mcp/httpRateLimiter.js +0 -391
- package/dist/client/mcp/httpRetryHandler.js +0 -178
- package/dist/client/mcp/index.js +0 -74
- package/dist/client/mcp/mcpCircuitBreaker.js +0 -427
- package/dist/client/mcp/mcpClientFactory.js +0 -708
- package/dist/client/mcp/mcpRegistryClient.js +0 -488
- package/dist/client/mcp/mcpServerBase.js +0 -373
- package/dist/client/mcp/multiServerManager.js +0 -579
- package/dist/client/mcp/registry.js +0 -158
- package/dist/client/mcp/routing/index.js +0 -10
- package/dist/client/mcp/routing/toolRouter.js +0 -416
- package/dist/client/mcp/serverCapabilities.js +0 -502
- package/dist/client/mcp/servers/agent/directToolsServer.js +0 -150
- package/dist/client/mcp/toolAnnotations.js +0 -239
- package/dist/client/mcp/toolConverter.js +0 -258
- package/dist/client/mcp/toolDiscoveryService.js +0 -798
- package/dist/client/mcp/toolIntegration.js +0 -334
- package/dist/client/mcp/toolRegistry.js +0 -729
- package/dist/client/memory/hippocampusInitializer.js +0 -19
- package/dist/client/memory/memoryRetrievalTools.js +0 -166
- package/dist/client/middleware/builtin/analytics.js +0 -132
- package/dist/client/middleware/builtin/autoEvaluation.js +0 -203
- package/dist/client/middleware/builtin/guardrails.js +0 -109
- package/dist/client/middleware/builtin/lifecycle.js +0 -168
- package/dist/client/middleware/factory.js +0 -327
- package/dist/client/middleware/registry.js +0 -295
- package/dist/client/middleware/utils/guardrailsUtils.js +0 -396
- package/dist/client/models/anthropicModels.js +0 -527
- package/dist/client/neurolink.js +0 -8233
- package/dist/client/observability/exporterRegistry.js +0 -413
- package/dist/client/observability/exporters/arizeExporter.js +0 -138
- package/dist/client/observability/exporters/baseExporter.js +0 -190
- package/dist/client/observability/exporters/braintrustExporter.js +0 -154
- package/dist/client/observability/exporters/datadogExporter.js +0 -196
- package/dist/client/observability/exporters/laminarExporter.js +0 -302
- package/dist/client/observability/exporters/langfuseExporter.js +0 -209
- package/dist/client/observability/exporters/langsmithExporter.js +0 -143
- package/dist/client/observability/exporters/otelExporter.js +0 -164
- package/dist/client/observability/exporters/posthogExporter.js +0 -287
- package/dist/client/observability/exporters/sentryExporter.js +0 -165
- package/dist/client/observability/index.js +0 -31
- package/dist/client/observability/metricsAggregator.js +0 -556
- package/dist/client/observability/otelBridge.js +0 -131
- package/dist/client/observability/retryPolicy.js +0 -383
- package/dist/client/observability/sampling/samplers.js +0 -216
- package/dist/client/observability/spanProcessor.js +0 -303
- package/dist/client/observability/tokenTracker.js +0 -413
- package/dist/client/observability/types/exporterTypes.js +0 -5
- package/dist/client/observability/types/index.js +0 -4
- package/dist/client/observability/types/spanTypes.js +0 -92
- package/dist/client/observability/utils/safeMetadata.js +0 -25
- package/dist/client/observability/utils/spanSerializer.js +0 -292
- package/dist/client/processors/archive/ArchiveProcessor.js +0 -1308
- package/dist/client/processors/base/BaseFileProcessor.js +0 -614
- package/dist/client/processors/base/types.js +0 -82
- package/dist/client/processors/config/fileTypes.js +0 -520
- package/dist/client/processors/config/index.js +0 -92
- package/dist/client/processors/config/languageMap.js +0 -410
- package/dist/client/processors/config/mimeTypes.js +0 -363
- package/dist/client/processors/config/sizeLimits.js +0 -258
- package/dist/client/processors/document/ExcelProcessor.js +0 -590
- package/dist/client/processors/document/OpenDocumentProcessor.js +0 -212
- package/dist/client/processors/document/PptxProcessor.js +0 -157
- package/dist/client/processors/document/RtfProcessor.js +0 -361
- package/dist/client/processors/document/WordProcessor.js +0 -353
- package/dist/client/processors/errors/FileErrorCode.js +0 -255
- package/dist/client/processors/errors/errorHelpers.js +0 -386
- package/dist/client/processors/errors/errorSerializer.js +0 -507
- package/dist/client/processors/errors/index.js +0 -49
- package/dist/client/processors/markup/SvgProcessor.js +0 -240
- package/dist/client/processors/media/AudioProcessor.js +0 -707
- package/dist/client/processors/media/VideoProcessor.js +0 -1045
- package/dist/client/providers/amazonBedrock.js +0 -1512
- package/dist/client/providers/amazonSagemaker.js +0 -162
- package/dist/client/providers/anthropic.js +0 -831
- package/dist/client/providers/azureOpenai.js +0 -143
- package/dist/client/providers/googleAiStudio.js +0 -1200
- package/dist/client/providers/googleNativeGemini3.js +0 -543
- package/dist/client/providers/googleVertex.js +0 -2936
- package/dist/client/providers/huggingFace.js +0 -315
- package/dist/client/providers/litellm.js +0 -488
- package/dist/client/providers/mistral.js +0 -157
- package/dist/client/providers/ollama.js +0 -1579
- package/dist/client/providers/openAI.js +0 -627
- package/dist/client/providers/openRouter.js +0 -543
- package/dist/client/providers/openaiCompatible.js +0 -290
- package/dist/client/providers/providerTypeUtils.js +0 -46
- package/dist/client/providers/sagemaker/adaptive-semaphore.js +0 -215
- package/dist/client/providers/sagemaker/client.js +0 -472
- package/dist/client/providers/sagemaker/config.js +0 -317
- package/dist/client/providers/sagemaker/detection.js +0 -606
- package/dist/client/providers/sagemaker/error-constants.js +0 -227
- package/dist/client/providers/sagemaker/errors.js +0 -299
- package/dist/client/providers/sagemaker/language-model.js +0 -775
- package/dist/client/providers/sagemaker/parsers.js +0 -634
- package/dist/client/providers/sagemaker/streaming.js +0 -331
- package/dist/client/providers/sagemaker/structured-parser.js +0 -625
- package/dist/client/proxy/accountQuota.js +0 -162
- package/dist/client/proxy/claudeFormat.js +0 -595
- package/dist/client/proxy/modelRouter.js +0 -29
- package/dist/client/proxy/oauthFetch.js +0 -367
- package/dist/client/proxy/proxyFetch.js +0 -586
- package/dist/client/proxy/requestLogger.js +0 -207
- package/dist/client/proxy/tokenRefresh.js +0 -124
- package/dist/client/proxy/usageStats.js +0 -74
- package/dist/client/proxy/utils/noProxyUtils.js +0 -149
- package/dist/client/rag/ChunkerFactory.js +0 -320
- package/dist/client/rag/ChunkerRegistry.js +0 -421
- package/dist/client/rag/chunkers/BaseChunker.js +0 -143
- package/dist/client/rag/chunkers/CharacterChunker.js +0 -28
- package/dist/client/rag/chunkers/HTMLChunker.js +0 -38
- package/dist/client/rag/chunkers/JSONChunker.js +0 -68
- package/dist/client/rag/chunkers/LaTeXChunker.js +0 -63
- package/dist/client/rag/chunkers/MarkdownChunker.js +0 -306
- package/dist/client/rag/chunkers/RecursiveChunker.js +0 -139
- package/dist/client/rag/chunkers/SemanticMarkdownChunker.js +0 -138
- package/dist/client/rag/chunkers/SentenceChunker.js +0 -66
- package/dist/client/rag/chunkers/TokenChunker.js +0 -61
- package/dist/client/rag/chunkers/index.js +0 -15
- package/dist/client/rag/chunking/characterChunker.js +0 -142
- package/dist/client/rag/chunking/chunkerRegistry.js +0 -194
- package/dist/client/rag/chunking/htmlChunker.js +0 -247
- package/dist/client/rag/chunking/index.js +0 -17
- package/dist/client/rag/chunking/jsonChunker.js +0 -281
- package/dist/client/rag/chunking/latexChunker.js +0 -251
- package/dist/client/rag/chunking/markdownChunker.js +0 -373
- package/dist/client/rag/chunking/recursiveChunker.js +0 -148
- package/dist/client/rag/chunking/semanticChunker.js +0 -306
- package/dist/client/rag/chunking/sentenceChunker.js +0 -230
- package/dist/client/rag/chunking/tokenChunker.js +0 -183
- package/dist/client/rag/document/MDocument.js +0 -392
- package/dist/client/rag/document/index.js +0 -5
- package/dist/client/rag/document/loaders.js +0 -500
- package/dist/client/rag/errors/RAGError.js +0 -274
- package/dist/client/rag/errors/index.js +0 -6
- package/dist/client/rag/graphRag/graphRAG.js +0 -401
- package/dist/client/rag/graphRag/index.js +0 -4
- package/dist/client/rag/index.js +0 -141
- package/dist/client/rag/metadata/MetadataExtractorFactory.js +0 -418
- package/dist/client/rag/metadata/MetadataExtractorRegistry.js +0 -362
- package/dist/client/rag/metadata/index.js +0 -9
- package/dist/client/rag/metadata/metadataExtractor.js +0 -280
- package/dist/client/rag/pipeline/RAGPipeline.js +0 -436
- package/dist/client/rag/pipeline/contextAssembly.js +0 -341
- package/dist/client/rag/pipeline/index.js +0 -5
- package/dist/client/rag/ragIntegration.js +0 -321
- package/dist/client/rag/reranker/RerankerFactory.js +0 -430
- package/dist/client/rag/reranker/RerankerRegistry.js +0 -402
- package/dist/client/rag/reranker/index.js +0 -9
- package/dist/client/rag/reranker/reranker.js +0 -277
- package/dist/client/rag/resilience/CircuitBreaker.js +0 -431
- package/dist/client/rag/resilience/RetryHandler.js +0 -304
- package/dist/client/rag/resilience/index.js +0 -7
- package/dist/client/rag/retrieval/hybridSearch.js +0 -335
- package/dist/client/rag/retrieval/index.js +0 -5
- package/dist/client/rag/retrieval/vectorQueryTool.js +0 -307
- package/dist/client/rag/types.js +0 -8
- package/dist/client/sdk/toolRegistration.js +0 -377
- package/dist/client/server/abstract/baseServerAdapter.js +0 -575
- package/dist/client/server/adapters/expressAdapter.js +0 -486
- package/dist/client/server/adapters/fastifyAdapter.js +0 -472
- package/dist/client/server/adapters/honoAdapter.js +0 -632
- package/dist/client/server/adapters/koaAdapter.js +0 -510
- package/dist/client/server/errors.js +0 -486
- package/dist/client/server/factory/serverAdapterFactory.js +0 -160
- package/dist/client/server/index.js +0 -108
- package/dist/client/server/middleware/abortSignal.js +0 -111
- package/dist/client/server/middleware/auth.js +0 -388
- package/dist/client/server/middleware/cache.js +0 -359
- package/dist/client/server/middleware/common.js +0 -281
- package/dist/client/server/middleware/deprecation.js +0 -190
- package/dist/client/server/middleware/mcpBodyAttachment.js +0 -63
- package/dist/client/server/middleware/rateLimit.js +0 -227
- package/dist/client/server/middleware/validation.js +0 -388
- package/dist/client/server/openapi/generator.js +0 -398
- package/dist/client/server/openapi/index.js +0 -36
- package/dist/client/server/openapi/schemas.js +0 -695
- package/dist/client/server/openapi/templates.js +0 -374
- package/dist/client/server/routes/agentRoutes.js +0 -189
- package/dist/client/server/routes/claudeProxyRoutes.js +0 -1600
- package/dist/client/server/routes/healthRoutes.js +0 -187
- package/dist/client/server/routes/index.js +0 -57
- package/dist/client/server/routes/mcpRoutes.js +0 -342
- package/dist/client/server/routes/memoryRoutes.js +0 -350
- package/dist/client/server/routes/openApiRoutes.js +0 -126
- package/dist/client/server/routes/toolRoutes.js +0 -199
- package/dist/client/server/streaming/dataStream.js +0 -486
- package/dist/client/server/streaming/index.js +0 -11
- package/dist/client/server/types.js +0 -67
- package/dist/client/server/utils/redaction.js +0 -334
- package/dist/client/server/utils/validation.js +0 -243
- package/dist/client/server/websocket/WebSocketHandler.js +0 -383
- package/dist/client/server/websocket/index.js +0 -4
- package/dist/client/services/server/ai/observability/instrumentation.js +0 -808
- package/dist/client/telemetry/attributes.js +0 -100
- package/dist/client/telemetry/index.js +0 -26
- package/dist/client/telemetry/telemetryService.js +0 -308
- package/dist/client/telemetry/tracers.js +0 -17
- package/dist/client/telemetry/withSpan.js +0 -34
- package/dist/client/types/actionTypes.js +0 -6
- package/dist/client/types/analytics.js +0 -5
- package/dist/client/types/authTypes.js +0 -9
- package/dist/client/types/circuitBreakerErrors.js +0 -34
- package/dist/client/types/cli.js +0 -21
- package/dist/client/types/clientTypes.js +0 -10
- package/dist/client/types/common.js +0 -51
- package/dist/client/types/configTypes.js +0 -49
- package/dist/client/types/content.js +0 -19
- package/dist/client/types/contextTypes.js +0 -400
- package/dist/client/types/conversation.js +0 -47
- package/dist/client/types/conversationMemoryInterface.js +0 -6
- package/dist/client/types/domainTypes.js +0 -5
- package/dist/client/types/errors.js +0 -167
- package/dist/client/types/evaluation.js +0 -5
- package/dist/client/types/evaluationProviders.js +0 -5
- package/dist/client/types/evaluationTypes.js +0 -1
- package/dist/client/types/externalMcp.js +0 -6
- package/dist/client/types/fileReferenceTypes.js +0 -8
- package/dist/client/types/fileTypes.js +0 -4
- package/dist/client/types/generateTypes.js +0 -1
- package/dist/client/types/guardrails.js +0 -1
- package/dist/client/types/hitlTypes.js +0 -8
- package/dist/client/types/index.js +0 -57
- package/dist/client/types/mcpTypes.js +0 -5
- package/dist/client/types/middlewareTypes.js +0 -1
- package/dist/client/types/modelTypes.js +0 -30
- package/dist/client/types/multimodal.js +0 -135
- package/dist/client/types/observability.js +0 -6
- package/dist/client/types/pptTypes.js +0 -82
- package/dist/client/types/providers.js +0 -111
- package/dist/client/types/proxyTypes.js +0 -16
- package/dist/client/types/ragTypes.js +0 -7
- package/dist/client/types/sdkTypes.js +0 -8
- package/dist/client/types/serviceTypes.js +0 -5
- package/dist/client/types/streamTypes.js +0 -1
- package/dist/client/types/subscriptionTypes.js +0 -9
- package/dist/client/types/taskClassificationTypes.js +0 -5
- package/dist/client/types/tools.js +0 -24
- package/dist/client/types/ttsTypes.js +0 -57
- package/dist/client/types/typeAliases.js +0 -48
- package/dist/client/types/utilities.js +0 -4
- package/dist/client/types/workflowTypes.js +0 -30
- package/dist/client/utils/async/withTimeout.js +0 -98
- package/dist/client/utils/asyncMutex.js +0 -60
- package/dist/client/utils/conversationMemory.js +0 -431
- package/dist/client/utils/csvProcessor.js +0 -846
- package/dist/client/utils/errorHandling.js +0 -936
- package/dist/client/utils/evaluationUtils.js +0 -131
- package/dist/client/utils/factoryProcessing.js +0 -589
- package/dist/client/utils/fileDetector.js +0 -2161
- package/dist/client/utils/imageCache.js +0 -376
- package/dist/client/utils/imageProcessor.js +0 -704
- package/dist/client/utils/logger.js +0 -491
- package/dist/client/utils/mcpDefaults.js +0 -134
- package/dist/client/utils/messageBuilder.js +0 -1653
- package/dist/client/utils/modelAliasResolver.js +0 -54
- package/dist/client/utils/modelDetection.js +0 -80
- package/dist/client/utils/modelRouter.js +0 -292
- package/dist/client/utils/multimodalOptionsBuilder.js +0 -65
- package/dist/client/utils/observabilityHelpers.js +0 -47
- package/dist/client/utils/parameterValidation.js +0 -966
- package/dist/client/utils/pdfProcessor.js +0 -410
- package/dist/client/utils/performance.js +0 -222
- package/dist/client/utils/pricing.js +0 -340
- package/dist/client/utils/promptRedaction.js +0 -62
- package/dist/client/utils/providerConfig.js +0 -1009
- package/dist/client/utils/providerHealth.js +0 -1237
- package/dist/client/utils/providerRetry.js +0 -112
- package/dist/client/utils/providerUtils.js +0 -434
- package/dist/client/utils/rateLimiter.js +0 -200
- package/dist/client/utils/redis.js +0 -368
- package/dist/client/utils/retryHandler.js +0 -269
- package/dist/client/utils/retryability.js +0 -22
- package/dist/client/utils/sanitizers/svg.js +0 -481
- package/dist/client/utils/schemaConversion.js +0 -255
- package/dist/client/utils/taskClassificationUtils.js +0 -149
- package/dist/client/utils/taskClassifier.js +0 -94
- package/dist/client/utils/thinkingConfig.js +0 -104
- package/dist/client/utils/timeout.js +0 -359
- package/dist/client/utils/tokenEstimation.js +0 -142
- package/dist/client/utils/tokenLimits.js +0 -125
- package/dist/client/utils/tokenUtils.js +0 -239
- package/dist/client/utils/toolUtils.js +0 -75
- package/dist/client/utils/transformationUtils.js +0 -554
- package/dist/client/utils/ttsProcessor.js +0 -286
- package/dist/client/utils/typeUtils.js +0 -97
- package/dist/client/utils/videoAnalysisProcessor.js +0 -67
- package/dist/client/workflow/config.js +0 -398
- package/dist/client/workflow/core/ensembleExecutor.js +0 -407
- package/dist/client/workflow/core/judgeScorer.js +0 -544
- package/dist/client/workflow/core/responseConditioner.js +0 -225
- package/dist/client/workflow/core/types/conditionerTypes.js +0 -7
- package/dist/client/workflow/core/types/ensembleTypes.js +0 -7
- package/dist/client/workflow/core/types/index.js +0 -7
- package/dist/client/workflow/core/types/judgeTypes.js +0 -7
- package/dist/client/workflow/core/types/layerTypes.js +0 -7
- package/dist/client/workflow/core/types/registryTypes.js +0 -7
- package/dist/client/workflow/core/workflowRegistry.js +0 -304
- package/dist/client/workflow/core/workflowRunner.js +0 -586
- package/dist/client/workflow/index.js +0 -50
- package/dist/client/workflow/types.js +0 -9
- package/dist/client/workflow/utils/types/index.js +0 -7
- package/dist/client/workflow/utils/workflowMetrics.js +0 -311
- package/dist/client/workflow/utils/workflowValidation.js +0 -420
- package/dist/client/workflow/workflows/adaptiveWorkflow.js +0 -366
- package/dist/client/workflow/workflows/consensusWorkflow.js +0 -192
- package/dist/client/workflow/workflows/fallbackWorkflow.js +0 -225
- package/dist/client/workflow/workflows/multiJudgeWorkflow.js +0 -351
- /package/dist/client/{client/reactHooks.js → reactHooks.js} +0 -0
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LaTeX Chunker
|
|
3
|
-
*
|
|
4
|
-
* Splits LaTeX documents by sections and environments.
|
|
5
|
-
*/
|
|
6
|
-
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
-
/**
|
|
8
|
-
* LaTeX Chunker
|
|
9
|
-
*/
|
|
10
|
-
export class LaTeXChunker extends BaseChunker {
|
|
11
|
-
strategy = "latex";
|
|
12
|
-
getDefaultConfig() {
|
|
13
|
-
return {
|
|
14
|
-
...DEFAULT_CHUNKER_CONFIG,
|
|
15
|
-
maxSize: 1000,
|
|
16
|
-
overlap: 0,
|
|
17
|
-
};
|
|
18
|
-
}
|
|
19
|
-
async doChunk(content, config) {
|
|
20
|
-
const maxSize = config.maxSize ?? 1000;
|
|
21
|
-
// Split by sections
|
|
22
|
-
const sectionPattern = /\\(?:section|subsection|subsubsection|chapter|paragraph)\{[^}]+\}/g;
|
|
23
|
-
const sections = [];
|
|
24
|
-
let lastIndex = 0;
|
|
25
|
-
let match;
|
|
26
|
-
while ((match = sectionPattern.exec(content)) !== null) {
|
|
27
|
-
if (match.index > lastIndex) {
|
|
28
|
-
sections.push(content.slice(lastIndex, match.index));
|
|
29
|
-
}
|
|
30
|
-
lastIndex = match.index;
|
|
31
|
-
}
|
|
32
|
-
if (lastIndex < content.length) {
|
|
33
|
-
sections.push(content.slice(lastIndex));
|
|
34
|
-
}
|
|
35
|
-
if (sections.length === 0) {
|
|
36
|
-
sections.push(content);
|
|
37
|
-
}
|
|
38
|
-
const chunks = [];
|
|
39
|
-
let offset = 0;
|
|
40
|
-
for (const section of sections) {
|
|
41
|
-
const trimmed = section.trim();
|
|
42
|
-
if (!trimmed) {
|
|
43
|
-
continue;
|
|
44
|
-
}
|
|
45
|
-
if (trimmed.length <= maxSize) {
|
|
46
|
-
const startOffset = content.indexOf(trimmed, offset);
|
|
47
|
-
chunks.push(this.createChunk(trimmed, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
|
|
48
|
-
? startOffset + trimmed.length
|
|
49
|
-
: offset + trimmed.length));
|
|
50
|
-
if (startOffset >= 0) {
|
|
51
|
-
offset = startOffset + 1;
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
else {
|
|
55
|
-
const segments = this.splitBySizeWithOverlap(trimmed, maxSize, 0);
|
|
56
|
-
for (const segment of segments) {
|
|
57
|
-
chunks.push(this.createChunk(segment.text, chunks.length, segment.start, segment.end));
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
return chunks;
|
|
62
|
-
}
|
|
63
|
-
}
|
|
@@ -1,306 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Markdown Chunker
|
|
3
|
-
*
|
|
4
|
-
* Splits markdown content by headers and structural elements.
|
|
5
|
-
* Preserves markdown tables by detecting table boundaries and splitting
|
|
6
|
-
* on row boundaries when a table exceeds the max chunk size.
|
|
7
|
-
*/
|
|
8
|
-
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
9
|
-
/** Matches a markdown table separator row like |---|---| or |:--:|---:| */
|
|
10
|
-
const TABLE_SEPARATOR_RE = /^\|[\s:]*-+[\s:]*(\|[\s:]*-+[\s:]*)*\|?\s*$/;
|
|
11
|
-
/** Matches a line that looks like a table row (starts with |) */
|
|
12
|
-
const TABLE_ROW_RE = /^\|.+\|?\s*$/;
|
|
13
|
-
/**
|
|
14
|
-
* Detect contiguous table blocks in text.
|
|
15
|
-
* Returns an array of { start, end } line index ranges (inclusive).
|
|
16
|
-
* A table is a sequence of lines where the second line is a separator.
|
|
17
|
-
*/
|
|
18
|
-
function detectTableRanges(lines) {
|
|
19
|
-
const ranges = [];
|
|
20
|
-
let i = 0;
|
|
21
|
-
while (i < lines.length) {
|
|
22
|
-
// A table needs at least a header row + separator
|
|
23
|
-
if (i + 1 < lines.length &&
|
|
24
|
-
TABLE_ROW_RE.test(lines[i]) &&
|
|
25
|
-
TABLE_SEPARATOR_RE.test(lines[i + 1])) {
|
|
26
|
-
const start = i;
|
|
27
|
-
// Advance past header + separator
|
|
28
|
-
i += 2;
|
|
29
|
-
// Consume remaining data rows
|
|
30
|
-
while (i < lines.length && TABLE_ROW_RE.test(lines[i])) {
|
|
31
|
-
i++;
|
|
32
|
-
}
|
|
33
|
-
ranges.push({ start, end: i - 1 });
|
|
34
|
-
}
|
|
35
|
-
else {
|
|
36
|
-
i++;
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
return ranges;
|
|
40
|
-
}
|
|
41
|
-
/**
|
|
42
|
-
* Markdown Chunker
|
|
43
|
-
*/
|
|
44
|
-
export class MarkdownChunker extends BaseChunker {
|
|
45
|
-
strategy = "markdown";
|
|
46
|
-
getDefaultConfig() {
|
|
47
|
-
return {
|
|
48
|
-
...DEFAULT_CHUNKER_CONFIG,
|
|
49
|
-
maxSize: 1000,
|
|
50
|
-
overlap: 50,
|
|
51
|
-
};
|
|
52
|
-
}
|
|
53
|
-
async doChunk(content, config) {
|
|
54
|
-
const maxSize = config.maxSize ?? 1000;
|
|
55
|
-
// Split by headers
|
|
56
|
-
const headerPattern = /^(#{1,6})\s+(.+)$/gm;
|
|
57
|
-
const sections = [];
|
|
58
|
-
let lastIndex = 0;
|
|
59
|
-
let match = headerPattern.exec(content);
|
|
60
|
-
while (match !== null) {
|
|
61
|
-
// Add content before this header
|
|
62
|
-
if (match.index > lastIndex) {
|
|
63
|
-
const prevContent = content.slice(lastIndex, match.index).trim();
|
|
64
|
-
if (prevContent && sections.length > 0) {
|
|
65
|
-
const lastSection = sections[sections.length - 1];
|
|
66
|
-
if (lastSection) {
|
|
67
|
-
lastSection.content += "\n\n" + prevContent;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
else if (prevContent) {
|
|
71
|
-
sections.push({ header: "", content: prevContent, level: 0 });
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
sections.push({
|
|
75
|
-
header: match[0],
|
|
76
|
-
content: "",
|
|
77
|
-
level: match[1]?.length ?? 1,
|
|
78
|
-
});
|
|
79
|
-
lastIndex = match.index + match[0].length;
|
|
80
|
-
match = headerPattern.exec(content);
|
|
81
|
-
}
|
|
82
|
-
// Add remaining content
|
|
83
|
-
if (lastIndex < content.length) {
|
|
84
|
-
const remaining = content.slice(lastIndex).trim();
|
|
85
|
-
if (remaining) {
|
|
86
|
-
if (sections.length > 0) {
|
|
87
|
-
const lastSection = sections[sections.length - 1];
|
|
88
|
-
if (lastSection) {
|
|
89
|
-
lastSection.content += remaining;
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
else {
|
|
93
|
-
sections.push({ header: "", content: remaining, level: 0 });
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
// Convert sections to chunks
|
|
98
|
-
const chunks = [];
|
|
99
|
-
let offset = 0;
|
|
100
|
-
for (let i = 0; i < sections.length; i++) {
|
|
101
|
-
const section = sections[i];
|
|
102
|
-
if (!section) {
|
|
103
|
-
continue;
|
|
104
|
-
}
|
|
105
|
-
const fullContent = section.header
|
|
106
|
-
? section.header + "\n\n" + section.content.trim()
|
|
107
|
-
: section.content.trim();
|
|
108
|
-
if (!fullContent) {
|
|
109
|
-
continue;
|
|
110
|
-
}
|
|
111
|
-
// Split if too large — use table-aware splitting
|
|
112
|
-
if (fullContent.length > maxSize) {
|
|
113
|
-
const subChunks = this.splitContentTableAware(fullContent, maxSize);
|
|
114
|
-
for (const sub of subChunks) {
|
|
115
|
-
const startOffset = content.indexOf(sub, offset);
|
|
116
|
-
chunks.push(this.createChunk(sub, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0 ? startOffset + sub.length : offset + sub.length, "unknown", { sectionContext: section.header }));
|
|
117
|
-
if (startOffset >= 0) {
|
|
118
|
-
offset = startOffset + sub.length;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
else {
|
|
123
|
-
const startOffset = content.indexOf(fullContent, offset);
|
|
124
|
-
chunks.push(this.createChunk(fullContent, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
|
|
125
|
-
? startOffset + fullContent.length
|
|
126
|
-
: offset + fullContent.length, "unknown", { sectionContext: section.header }));
|
|
127
|
-
if (startOffset >= 0) {
|
|
128
|
-
offset = startOffset + fullContent.length;
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
return chunks;
|
|
133
|
-
}
|
|
134
|
-
/**
|
|
135
|
-
* Split content while preserving markdown tables.
|
|
136
|
-
*
|
|
137
|
-
* Strategy:
|
|
138
|
-
* 1. Identify table blocks in the content.
|
|
139
|
-
* 2. Split content into segments: non-table text and table blocks.
|
|
140
|
-
* 3. Non-table text is split using paragraph/sentence boundaries (existing logic).
|
|
141
|
-
* 4. Tables that fit in a chunk are kept intact.
|
|
142
|
-
* 5. Oversized tables are split on row boundaries, repeating the header row.
|
|
143
|
-
*/
|
|
144
|
-
splitContentTableAware(content, maxSize) {
|
|
145
|
-
const lines = content.split("\n");
|
|
146
|
-
const tableRanges = detectTableRanges(lines);
|
|
147
|
-
// If no tables, fall back to existing splitting logic
|
|
148
|
-
if (tableRanges.length === 0) {
|
|
149
|
-
return this.splitPlainContent(content, maxSize, this.config.overlap ?? 0);
|
|
150
|
-
}
|
|
151
|
-
// Build segments: alternating non-table and table blocks
|
|
152
|
-
const segments = [];
|
|
153
|
-
let lineIdx = 0;
|
|
154
|
-
for (const range of tableRanges) {
|
|
155
|
-
// Non-table text before this table
|
|
156
|
-
if (lineIdx < range.start) {
|
|
157
|
-
const text = lines.slice(lineIdx, range.start).join("\n").trim();
|
|
158
|
-
if (text) {
|
|
159
|
-
segments.push({ text, isTable: false });
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
// The table itself
|
|
163
|
-
const tableText = lines.slice(range.start, range.end + 1).join("\n");
|
|
164
|
-
segments.push({ text: tableText, isTable: true });
|
|
165
|
-
lineIdx = range.end + 1;
|
|
166
|
-
}
|
|
167
|
-
// Trailing non-table text
|
|
168
|
-
if (lineIdx < lines.length) {
|
|
169
|
-
const text = lines.slice(lineIdx).join("\n").trim();
|
|
170
|
-
if (text) {
|
|
171
|
-
segments.push({ text, isTable: false });
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
// Now produce chunks, trying to pack segments together up to maxSize
|
|
175
|
-
const result = [];
|
|
176
|
-
let current = "";
|
|
177
|
-
for (const seg of segments) {
|
|
178
|
-
if (!seg.isTable) {
|
|
179
|
-
// Non-table text: try to append, split if needed
|
|
180
|
-
const pieces = this.splitPlainContent(seg.text, maxSize, this.config.overlap ?? 0);
|
|
181
|
-
for (const piece of pieces) {
|
|
182
|
-
if (current.length === 0) {
|
|
183
|
-
current = piece;
|
|
184
|
-
}
|
|
185
|
-
else if (current.length + 1 + piece.length <= maxSize) {
|
|
186
|
-
current += "\n" + piece;
|
|
187
|
-
}
|
|
188
|
-
else {
|
|
189
|
-
result.push(current);
|
|
190
|
-
current = piece;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
else {
|
|
195
|
-
// Table block
|
|
196
|
-
if (seg.text.length <= maxSize) {
|
|
197
|
-
// Table fits — try to append to current chunk
|
|
198
|
-
if (current.length === 0) {
|
|
199
|
-
current = seg.text;
|
|
200
|
-
}
|
|
201
|
-
else if (current.length + 2 + seg.text.length <= maxSize) {
|
|
202
|
-
current += "\n\n" + seg.text;
|
|
203
|
-
}
|
|
204
|
-
else {
|
|
205
|
-
result.push(current);
|
|
206
|
-
current = seg.text;
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
else {
|
|
210
|
-
// Oversized table — flush current, then split table on row boundaries
|
|
211
|
-
if (current) {
|
|
212
|
-
result.push(current);
|
|
213
|
-
current = "";
|
|
214
|
-
}
|
|
215
|
-
const tableChunks = this.splitTableByRows(seg.text, maxSize);
|
|
216
|
-
result.push(...tableChunks);
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
if (current) {
|
|
221
|
-
result.push(current);
|
|
222
|
-
}
|
|
223
|
-
return result.length > 0 ? result : [content];
|
|
224
|
-
}
|
|
225
|
-
/**
|
|
226
|
-
* Split a table on row boundaries, repeating header + separator in each chunk.
|
|
227
|
-
*/
|
|
228
|
-
splitTableByRows(tableText, maxSize) {
|
|
229
|
-
const rows = tableText.split("\n");
|
|
230
|
-
if (rows.length < 3) {
|
|
231
|
-
// Not a proper table (need header + separator + at least 1 data row)
|
|
232
|
-
return [tableText];
|
|
233
|
-
}
|
|
234
|
-
const headerRow = rows[0];
|
|
235
|
-
const separatorRow = rows[1];
|
|
236
|
-
const headerBlock = headerRow + "\n" + separatorRow;
|
|
237
|
-
const dataRows = rows.slice(2);
|
|
238
|
-
// If even the header doesn't fit, fall back to size-based split
|
|
239
|
-
if (headerBlock.length > maxSize) {
|
|
240
|
-
return this.splitPlainContent(tableText, maxSize, this.config.overlap ?? 0);
|
|
241
|
-
}
|
|
242
|
-
const chunks = [];
|
|
243
|
-
let currentChunk = headerBlock;
|
|
244
|
-
for (const row of dataRows) {
|
|
245
|
-
// Guard: single row exceeds budget — flush and emit as standalone chunk
|
|
246
|
-
const singleRowChunk = `${headerBlock}\n${row}`;
|
|
247
|
-
if (singleRowChunk.length > maxSize) {
|
|
248
|
-
if (currentChunk.length > headerBlock.length) {
|
|
249
|
-
chunks.push(currentChunk);
|
|
250
|
-
}
|
|
251
|
-
chunks.push(singleRowChunk);
|
|
252
|
-
currentChunk = headerBlock;
|
|
253
|
-
continue;
|
|
254
|
-
}
|
|
255
|
-
const candidate = currentChunk + "\n" + row;
|
|
256
|
-
if (candidate.length <= maxSize) {
|
|
257
|
-
currentChunk = candidate;
|
|
258
|
-
}
|
|
259
|
-
else {
|
|
260
|
-
// Flush current chunk (skip if it only contains the header)
|
|
261
|
-
if (currentChunk.length > headerBlock.length) {
|
|
262
|
-
chunks.push(currentChunk);
|
|
263
|
-
}
|
|
264
|
-
// Start new chunk with header repeated
|
|
265
|
-
currentChunk = headerBlock + "\n" + row;
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
if (currentChunk.length > headerBlock.length) {
|
|
269
|
-
chunks.push(currentChunk);
|
|
270
|
-
}
|
|
271
|
-
return chunks.length > 0 ? chunks : [tableText];
|
|
272
|
-
}
|
|
273
|
-
/**
|
|
274
|
-
* Split non-table text using paragraph and sentence boundaries.
|
|
275
|
-
* This is the original splitContent logic extracted for reuse.
|
|
276
|
-
*/
|
|
277
|
-
splitPlainContent(content, maxSize, overlap = 0) {
|
|
278
|
-
if (content.length <= maxSize) {
|
|
279
|
-
return [content];
|
|
280
|
-
}
|
|
281
|
-
const chunks = [];
|
|
282
|
-
let start = 0;
|
|
283
|
-
while (start < content.length) {
|
|
284
|
-
let end = Math.min(start + maxSize, content.length);
|
|
285
|
-
if (end < content.length) {
|
|
286
|
-
const searchStart = Math.max(start, end - 200);
|
|
287
|
-
const searchText = content.slice(searchStart, end);
|
|
288
|
-
// Look for paragraph break first
|
|
289
|
-
const paragraphBreak = searchText.lastIndexOf("\n\n");
|
|
290
|
-
if (paragraphBreak > 0) {
|
|
291
|
-
end = searchStart + paragraphBreak;
|
|
292
|
-
}
|
|
293
|
-
else {
|
|
294
|
-
// Look for sentence break
|
|
295
|
-
const sentenceBreak = searchText.search(/[.!?]\s+[A-Z]/);
|
|
296
|
-
if (sentenceBreak > 0) {
|
|
297
|
-
end = searchStart + sentenceBreak + 1;
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
}
|
|
301
|
-
chunks.push(content.slice(start, end));
|
|
302
|
-
start = Math.max(start + 1, end - overlap);
|
|
303
|
-
}
|
|
304
|
-
return chunks;
|
|
305
|
-
}
|
|
306
|
-
}
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Recursive Chunker
|
|
3
|
-
*
|
|
4
|
-
* Recursively splits text using an ordered list of separators.
|
|
5
|
-
* Tries each separator in order until chunks are small enough.
|
|
6
|
-
*/
|
|
7
|
-
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
8
|
-
/**
|
|
9
|
-
* Default separators for recursive splitting
|
|
10
|
-
*/
|
|
11
|
-
const DEFAULT_SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
|
|
12
|
-
/**
|
|
13
|
-
* Recursive Chunker
|
|
14
|
-
*
|
|
15
|
-
* Splits content using ordered separators, recursively breaking
|
|
16
|
-
* down text until chunks meet size requirements.
|
|
17
|
-
*/
|
|
18
|
-
export class RecursiveChunker extends BaseChunker {
|
|
19
|
-
strategy = "recursive";
|
|
20
|
-
getDefaultConfig() {
|
|
21
|
-
return {
|
|
22
|
-
...DEFAULT_CHUNKER_CONFIG,
|
|
23
|
-
maxSize: 1000,
|
|
24
|
-
overlap: 100,
|
|
25
|
-
separators: DEFAULT_SEPARATORS,
|
|
26
|
-
};
|
|
27
|
-
}
|
|
28
|
-
async doChunk(content, config) {
|
|
29
|
-
const recursiveConfig = config;
|
|
30
|
-
const maxSize = config.maxSize ?? 1000;
|
|
31
|
-
const overlap = config.overlap ?? 100;
|
|
32
|
-
const separators = recursiveConfig.separators ?? DEFAULT_SEPARATORS;
|
|
33
|
-
const keepSeparators = recursiveConfig.keepSeparators ?? true;
|
|
34
|
-
const chunks = [];
|
|
35
|
-
let offset = 0;
|
|
36
|
-
const textChunks = this.recursiveSplit(content, separators, maxSize, overlap, keepSeparators);
|
|
37
|
-
for (let i = 0; i < textChunks.length; i++) {
|
|
38
|
-
const text = textChunks[i];
|
|
39
|
-
if (!text) {
|
|
40
|
-
continue;
|
|
41
|
-
}
|
|
42
|
-
const startOffset = content.indexOf(text, offset);
|
|
43
|
-
const endOffset = startOffset + text.length;
|
|
44
|
-
chunks.push(this.createChunk(text, i, startOffset, endOffset));
|
|
45
|
-
offset = Math.max(offset, startOffset + 1);
|
|
46
|
-
}
|
|
47
|
-
return chunks;
|
|
48
|
-
}
|
|
49
|
-
/**
|
|
50
|
-
* Recursively split text using separators
|
|
51
|
-
*/
|
|
52
|
-
recursiveSplit(text, separators, maxSize, overlap, keepSeparators) {
|
|
53
|
-
if (text.length <= maxSize) {
|
|
54
|
-
return [text];
|
|
55
|
-
}
|
|
56
|
-
// Find the first separator that exists in the text
|
|
57
|
-
let separator = "";
|
|
58
|
-
for (const sep of separators) {
|
|
59
|
-
if (sep === "" || text.includes(sep)) {
|
|
60
|
-
separator = sep;
|
|
61
|
-
break;
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
// If no separator found or empty separator, split by size
|
|
65
|
-
if (separator === "") {
|
|
66
|
-
const result = [];
|
|
67
|
-
let start = 0;
|
|
68
|
-
while (start < text.length) {
|
|
69
|
-
const end = Math.min(start + maxSize, text.length);
|
|
70
|
-
result.push(text.slice(start, end));
|
|
71
|
-
const previousStart = start;
|
|
72
|
-
start = end - overlap;
|
|
73
|
-
if (start <= previousStart) {
|
|
74
|
-
start = previousStart + 1;
|
|
75
|
-
}
|
|
76
|
-
if (start >= text.length) {
|
|
77
|
-
break;
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
return result;
|
|
81
|
-
}
|
|
82
|
-
// Split by separator
|
|
83
|
-
const parts = text.split(separator);
|
|
84
|
-
const result = [];
|
|
85
|
-
let currentChunk = "";
|
|
86
|
-
for (let i = 0; i < parts.length; i++) {
|
|
87
|
-
const part = parts[i];
|
|
88
|
-
const addSeparator = keepSeparators && i < parts.length - 1;
|
|
89
|
-
const toAdd = part + (addSeparator ? separator : "");
|
|
90
|
-
if (currentChunk.length + toAdd.length <= maxSize) {
|
|
91
|
-
currentChunk += toAdd;
|
|
92
|
-
}
|
|
93
|
-
else {
|
|
94
|
-
// Current chunk is full
|
|
95
|
-
if (currentChunk.length > 0) {
|
|
96
|
-
result.push(currentChunk);
|
|
97
|
-
}
|
|
98
|
-
// If the part itself is too large, recursively split it
|
|
99
|
-
if (toAdd.length > maxSize) {
|
|
100
|
-
const remainingSeparators = separators.slice(separators.indexOf(separator) + 1);
|
|
101
|
-
const subChunks = this.recursiveSplit(toAdd, remainingSeparators, maxSize, overlap, keepSeparators);
|
|
102
|
-
result.push(...subChunks);
|
|
103
|
-
currentChunk = "";
|
|
104
|
-
}
|
|
105
|
-
else {
|
|
106
|
-
currentChunk = toAdd;
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
if (currentChunk.length > 0) {
|
|
111
|
-
result.push(currentChunk);
|
|
112
|
-
}
|
|
113
|
-
// Apply overlap between chunks
|
|
114
|
-
if (overlap > 0 && result.length > 1) {
|
|
115
|
-
return this.applyOverlap(result, overlap);
|
|
116
|
-
}
|
|
117
|
-
return result;
|
|
118
|
-
}
|
|
119
|
-
/**
|
|
120
|
-
* Apply overlap between chunks
|
|
121
|
-
*/
|
|
122
|
-
applyOverlap(chunks, overlap) {
|
|
123
|
-
if (chunks.length <= 1) {
|
|
124
|
-
return chunks;
|
|
125
|
-
}
|
|
126
|
-
const result = [];
|
|
127
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
128
|
-
let chunk = chunks[i] ?? "";
|
|
129
|
-
// Add overlap from previous chunk
|
|
130
|
-
const prevChunk = i > 0 ? chunks[i - 1] : undefined;
|
|
131
|
-
if (prevChunk) {
|
|
132
|
-
const overlapText = prevChunk.slice(-Math.min(overlap, prevChunk.length));
|
|
133
|
-
chunk = overlapText + chunk;
|
|
134
|
-
}
|
|
135
|
-
result.push(chunk);
|
|
136
|
-
}
|
|
137
|
-
return result;
|
|
138
|
-
}
|
|
139
|
-
}
|
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Semantic Markdown Chunker
|
|
3
|
-
*
|
|
4
|
-
* Combines markdown splitting with semantic similarity for intelligent merging.
|
|
5
|
-
*/
|
|
6
|
-
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
-
/**
|
|
8
|
-
* Semantic Markdown Chunker
|
|
9
|
-
*
|
|
10
|
-
* Extends markdown chunking with semantic awareness.
|
|
11
|
-
* Can be enhanced with embedding-based similarity.
|
|
12
|
-
*/
|
|
13
|
-
export class SemanticMarkdownChunker extends BaseChunker {
|
|
14
|
-
strategy = "semantic-markdown";
|
|
15
|
-
getDefaultConfig() {
|
|
16
|
-
return {
|
|
17
|
-
...DEFAULT_CHUNKER_CONFIG,
|
|
18
|
-
maxSize: 1000,
|
|
19
|
-
overlap: 100,
|
|
20
|
-
};
|
|
21
|
-
}
|
|
22
|
-
async doChunk(content, config) {
|
|
23
|
-
const maxSize = config.maxSize ?? 1000;
|
|
24
|
-
const overlap = config.overlap ?? 100;
|
|
25
|
-
// First, split by markdown headers
|
|
26
|
-
const headerPattern = /^(#{1,6})\s+(.+)$/gm;
|
|
27
|
-
const sections = [];
|
|
28
|
-
let lastIndex = 0;
|
|
29
|
-
let match = headerPattern.exec(content);
|
|
30
|
-
while (match !== null) {
|
|
31
|
-
if (match.index > lastIndex) {
|
|
32
|
-
const prevContent = content.slice(lastIndex, match.index).trim();
|
|
33
|
-
if (prevContent && sections.length > 0) {
|
|
34
|
-
const lastSection = sections[sections.length - 1];
|
|
35
|
-
if (lastSection) {
|
|
36
|
-
lastSection.content += "\n\n" + prevContent;
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
else if (prevContent) {
|
|
40
|
-
sections.push({ header: "", content: prevContent });
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
sections.push({ header: match[0], content: "" });
|
|
44
|
-
lastIndex = match.index + match[0].length;
|
|
45
|
-
match = headerPattern.exec(content);
|
|
46
|
-
}
|
|
47
|
-
if (lastIndex < content.length) {
|
|
48
|
-
const remaining = content.slice(lastIndex).trim();
|
|
49
|
-
if (remaining) {
|
|
50
|
-
if (sections.length > 0) {
|
|
51
|
-
const lastSection = sections[sections.length - 1];
|
|
52
|
-
if (lastSection) {
|
|
53
|
-
lastSection.content += remaining;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
else {
|
|
57
|
-
sections.push({ header: "", content: remaining });
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
// Merge small sections that are semantically related
|
|
62
|
-
const mergedSections = this.mergeSmallSections(sections, maxSize);
|
|
63
|
-
// Convert to chunks
|
|
64
|
-
const chunks = [];
|
|
65
|
-
let offset = 0;
|
|
66
|
-
for (let i = 0; i < mergedSections.length; i++) {
|
|
67
|
-
const section = mergedSections[i];
|
|
68
|
-
if (!section) {
|
|
69
|
-
continue;
|
|
70
|
-
}
|
|
71
|
-
const fullContent = section.header
|
|
72
|
-
? section.header + "\n\n" + section.content.trim()
|
|
73
|
-
: section.content.trim();
|
|
74
|
-
if (!fullContent) {
|
|
75
|
-
continue;
|
|
76
|
-
}
|
|
77
|
-
if (fullContent.length > maxSize) {
|
|
78
|
-
const segments = this.splitBySizeWithOverlap(fullContent, maxSize, overlap);
|
|
79
|
-
for (const segment of segments) {
|
|
80
|
-
const startOffset = content.indexOf(segment.text.slice(0, 50), offset);
|
|
81
|
-
chunks.push(this.createChunk(segment.text, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
|
|
82
|
-
? startOffset + segment.text.length
|
|
83
|
-
: offset + segment.text.length, "unknown", { sectionContext: section.header }));
|
|
84
|
-
if (startOffset >= 0) {
|
|
85
|
-
offset = startOffset + 1;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
else {
|
|
90
|
-
const startOffset = content.indexOf(fullContent.slice(0, 50), offset);
|
|
91
|
-
chunks.push(this.createChunk(fullContent, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0
|
|
92
|
-
? startOffset + fullContent.length
|
|
93
|
-
: offset + fullContent.length, "unknown", { sectionContext: section.header }));
|
|
94
|
-
if (startOffset >= 0) {
|
|
95
|
-
offset = startOffset + 1;
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
return chunks;
|
|
100
|
-
}
|
|
101
|
-
/**
|
|
102
|
-
* Merge small sections to optimize chunk sizes
|
|
103
|
-
*/
|
|
104
|
-
mergeSmallSections(sections, maxSize) {
|
|
105
|
-
const result = [];
|
|
106
|
-
let current = null;
|
|
107
|
-
for (const section of sections) {
|
|
108
|
-
const fullContent = section.header
|
|
109
|
-
? section.header + "\n\n" + section.content.trim()
|
|
110
|
-
: section.content.trim();
|
|
111
|
-
const sectionLength = fullContent.length;
|
|
112
|
-
if (!current) {
|
|
113
|
-
current = { ...section };
|
|
114
|
-
continue;
|
|
115
|
-
}
|
|
116
|
-
const currentLength = current.header
|
|
117
|
-
? current.header.length + current.content.length + 2
|
|
118
|
-
: current.content.length;
|
|
119
|
-
// Merge if combined size is within limit
|
|
120
|
-
if (currentLength + sectionLength <= maxSize) {
|
|
121
|
-
if (section.header) {
|
|
122
|
-
current.content += "\n\n" + section.header + "\n" + section.content;
|
|
123
|
-
}
|
|
124
|
-
else {
|
|
125
|
-
current.content += "\n\n" + section.content;
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
else {
|
|
129
|
-
result.push(current);
|
|
130
|
-
current = { ...section };
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
if (current) {
|
|
134
|
-
result.push(current);
|
|
135
|
-
}
|
|
136
|
-
return result;
|
|
137
|
-
}
|
|
138
|
-
}
|