@juspay/neurolink 9.32.0 → 9.32.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/auth/anthropicOAuth.js +1 -1
- package/dist/cli/commands/proxy.js +18 -5
- package/dist/client/aiSdkAdapter.js +1 -1
- package/dist/client/index.js +137 -501
- package/dist/core/factory.js +0 -1
- package/dist/core/redisConversationMemoryManager.js +1 -1
- package/dist/features/ppt/slideGenerator.js +0 -1
- package/dist/features/ppt/utils.js +0 -1
- package/dist/lib/server/routes/claudeProxyRoutes.js +45 -9
- package/dist/mcp/elicitationProtocol.js +1 -1
- package/dist/mcp/servers/agent/directToolsServer.js +0 -1
- package/dist/providers/azureOpenai.js +1 -1
- package/dist/providers/huggingFace.js +0 -1
- package/dist/providers/openaiCompatible.js +0 -1
- package/dist/sdk/toolRegistration.js +0 -1
- package/dist/server/openapi/generator.js +1 -1
- package/dist/server/routes/claudeProxyRoutes.js +45 -9
- package/dist/types/configTypes.js +0 -5
- package/dist/types/modelTypes.js +0 -1
- package/dist/types/tools.js +0 -1
- package/dist/types/typeAliases.js +0 -1
- package/dist/types/utilities.js +1 -1
- package/dist/types/workflowTypes.js +0 -1
- package/dist/utils/providerRetry.js +0 -1
- package/dist/utils/providerUtils.js +0 -1
- package/package.json +2 -2
- package/dist/client/adapters/providerImageAdapter.js +0 -588
- package/dist/client/adapters/tts/googleTTSHandler.js +0 -344
- package/dist/client/adapters/video/directorPipeline.js +0 -516
- package/dist/client/adapters/video/ffmpegAdapter.js +0 -206
- package/dist/client/adapters/video/frameExtractor.js +0 -143
- package/dist/client/adapters/video/vertexVideoHandler.js +0 -763
- package/dist/client/adapters/video/videoAnalyzer.js +0 -238
- package/dist/client/adapters/video/videoMerger.js +0 -171
- package/dist/client/agent/directTools.js +0 -840
- package/dist/client/auth/AuthProviderFactory.js +0 -111
- package/dist/client/auth/AuthProviderRegistry.js +0 -190
- package/dist/client/auth/RequestContext.js +0 -78
- package/dist/client/auth/accountPool.js +0 -178
- package/dist/client/auth/anthropicOAuth.js +0 -974
- package/dist/client/auth/authContext.js +0 -314
- package/dist/client/auth/errors.js +0 -39
- package/dist/client/auth/index.js +0 -61
- package/dist/client/auth/middleware/AuthMiddleware.js +0 -519
- package/dist/client/auth/middleware/rateLimitByUser.js +0 -554
- package/dist/client/auth/providers/BaseAuthProvider.js +0 -723
- package/dist/client/auth/providers/CognitoProvider.js +0 -304
- package/dist/client/auth/providers/KeycloakProvider.js +0 -393
- package/dist/client/auth/providers/auth0.js +0 -274
- package/dist/client/auth/providers/betterAuth.js +0 -182
- package/dist/client/auth/providers/clerk.js +0 -317
- package/dist/client/auth/providers/custom.js +0 -112
- package/dist/client/auth/providers/firebase.js +0 -226
- package/dist/client/auth/providers/jwt.js +0 -212
- package/dist/client/auth/providers/oauth2.js +0 -303
- package/dist/client/auth/providers/supabase.js +0 -259
- package/dist/client/auth/providers/workos.js +0 -284
- package/dist/client/auth/serverBridge.js +0 -25
- package/dist/client/auth/sessionManager.js +0 -437
- package/dist/client/auth/tokenStore.js +0 -799
- package/dist/client/client/aiSdkAdapter.js +0 -487
- package/dist/client/client/auth.js +0 -473
- package/dist/client/client/errors.js +0 -552
- package/dist/client/client/httpClient.js +0 -837
- package/dist/client/client/index.js +0 -172
- package/dist/client/client/interceptors.js +0 -601
- package/dist/client/client/sseClient.js +0 -545
- package/dist/client/client/streamingClient.js +0 -917
- package/dist/client/client/wsClient.js +0 -369
- package/dist/client/config/configManager.js +0 -303
- package/dist/client/config/conversationMemory.js +0 -86
- package/dist/client/config/taskClassificationConfig.js +0 -148
- package/dist/client/constants/contextWindows.js +0 -295
- package/dist/client/constants/enums.js +0 -853
- package/dist/client/constants/index.js +0 -207
- package/dist/client/constants/performance.js +0 -389
- package/dist/client/constants/retry.js +0 -266
- package/dist/client/constants/timeouts.js +0 -182
- package/dist/client/constants/tokens.js +0 -380
- package/dist/client/constants/videoErrors.js +0 -46
- package/dist/client/context/budgetChecker.js +0 -98
- package/dist/client/context/contextCompactor.js +0 -205
- package/dist/client/context/emergencyTruncation.js +0 -88
- package/dist/client/context/errorDetection.js +0 -171
- package/dist/client/context/errors.js +0 -21
- package/dist/client/context/fileTokenBudget.js +0 -127
- package/dist/client/context/prompts/summarizationPrompt.js +0 -117
- package/dist/client/context/stages/fileReadDeduplicator.js +0 -66
- package/dist/client/context/stages/slidingWindowTruncator.js +0 -190
- package/dist/client/context/stages/structuredSummarizer.js +0 -99
- package/dist/client/context/stages/toolOutputPruner.js +0 -52
- package/dist/client/context/summarizationEngine.js +0 -136
- package/dist/client/context/toolOutputLimits.js +0 -78
- package/dist/client/context/toolPairRepair.js +0 -66
- package/dist/client/core/analytics.js +0 -88
- package/dist/client/core/baseProvider.js +0 -1385
- package/dist/client/core/constants.js +0 -140
- package/dist/client/core/conversationMemoryFactory.js +0 -141
- package/dist/client/core/conversationMemoryInitializer.js +0 -128
- package/dist/client/core/conversationMemoryManager.js +0 -344
- package/dist/client/core/dynamicModels.js +0 -358
- package/dist/client/core/evaluation.js +0 -309
- package/dist/client/core/evaluationProviders.js +0 -248
- package/dist/client/core/factory.js +0 -412
- package/dist/client/core/infrastructure/baseError.js +0 -22
- package/dist/client/core/infrastructure/baseFactory.js +0 -54
- package/dist/client/core/infrastructure/baseRegistry.js +0 -53
- package/dist/client/core/infrastructure/index.js +0 -5
- package/dist/client/core/infrastructure/retry.js +0 -20
- package/dist/client/core/infrastructure/typedEventEmitter.js +0 -23
- package/dist/client/core/modelConfiguration.js +0 -851
- package/dist/client/core/modules/GenerationHandler.js +0 -588
- package/dist/client/core/modules/MessageBuilder.js +0 -273
- package/dist/client/core/modules/StreamHandler.js +0 -185
- package/dist/client/core/modules/TelemetryHandler.js +0 -203
- package/dist/client/core/modules/ToolsManager.js +0 -499
- package/dist/client/core/modules/Utilities.js +0 -331
- package/dist/client/core/redisConversationMemoryManager.js +0 -1435
- package/dist/client/core/streamAnalytics.js +0 -131
- package/dist/client/evaluation/contextBuilder.js +0 -134
- package/dist/client/evaluation/index.js +0 -61
- package/dist/client/evaluation/prompts.js +0 -73
- package/dist/client/evaluation/ragasEvaluator.js +0 -110
- package/dist/client/evaluation/retryManager.js +0 -78
- package/dist/client/evaluation/scoring.js +0 -61
- package/dist/client/factories/providerFactory.js +0 -166
- package/dist/client/factories/providerRegistry.js +0 -166
- package/dist/client/features/ppt/constants.js +0 -896
- package/dist/client/features/ppt/contentPlanner.js +0 -529
- package/dist/client/features/ppt/presentationOrchestrator.js +0 -236
- package/dist/client/features/ppt/slideGenerator.js +0 -532
- package/dist/client/features/ppt/slideRenderers.js +0 -2383
- package/dist/client/features/ppt/slideTypeInference.js +0 -405
- package/dist/client/features/ppt/types.js +0 -13
- package/dist/client/features/ppt/utils.js +0 -443
- package/dist/client/files/fileReferenceRegistry.js +0 -1543
- package/dist/client/files/fileTools.js +0 -450
- package/dist/client/files/streamingReader.js +0 -321
- package/dist/client/files/types.js +0 -23
- package/dist/client/hitl/hitlErrors.js +0 -54
- package/dist/client/hitl/hitlManager.js +0 -460
- package/dist/client/mcp/agentExposure.js +0 -356
- package/dist/client/mcp/auth/index.js +0 -11
- package/dist/client/mcp/auth/oauthClientProvider.js +0 -325
- package/dist/client/mcp/auth/tokenStorage.js +0 -134
- package/dist/client/mcp/batching/index.js +0 -10
- package/dist/client/mcp/batching/requestBatcher.js +0 -441
- package/dist/client/mcp/caching/index.js +0 -10
- package/dist/client/mcp/caching/toolCache.js +0 -433
- package/dist/client/mcp/elicitation/elicitationManager.js +0 -376
- package/dist/client/mcp/elicitation/index.js +0 -11
- package/dist/client/mcp/elicitation/types.js +0 -10
- package/dist/client/mcp/elicitationProtocol.js +0 -375
- package/dist/client/mcp/enhancedToolDiscovery.js +0 -481
- package/dist/client/mcp/externalServerManager.js +0 -1478
- package/dist/client/mcp/factory.js +0 -161
- package/dist/client/mcp/flexibleToolValidator.js +0 -161
- package/dist/client/mcp/httpRateLimiter.js +0 -391
- package/dist/client/mcp/httpRetryHandler.js +0 -178
- package/dist/client/mcp/index.js +0 -74
- package/dist/client/mcp/mcpCircuitBreaker.js +0 -427
- package/dist/client/mcp/mcpClientFactory.js +0 -708
- package/dist/client/mcp/mcpRegistryClient.js +0 -488
- package/dist/client/mcp/mcpServerBase.js +0 -373
- package/dist/client/mcp/multiServerManager.js +0 -579
- package/dist/client/mcp/registry.js +0 -158
- package/dist/client/mcp/routing/index.js +0 -10
- package/dist/client/mcp/routing/toolRouter.js +0 -416
- package/dist/client/mcp/serverCapabilities.js +0 -502
- package/dist/client/mcp/servers/agent/directToolsServer.js +0 -150
- package/dist/client/mcp/toolAnnotations.js +0 -239
- package/dist/client/mcp/toolConverter.js +0 -258
- package/dist/client/mcp/toolDiscoveryService.js +0 -798
- package/dist/client/mcp/toolIntegration.js +0 -334
- package/dist/client/mcp/toolRegistry.js +0 -729
- package/dist/client/memory/hippocampusInitializer.js +0 -19
- package/dist/client/memory/memoryRetrievalTools.js +0 -166
- package/dist/client/middleware/builtin/analytics.js +0 -132
- package/dist/client/middleware/builtin/autoEvaluation.js +0 -203
- package/dist/client/middleware/builtin/guardrails.js +0 -109
- package/dist/client/middleware/builtin/lifecycle.js +0 -168
- package/dist/client/middleware/factory.js +0 -327
- package/dist/client/middleware/registry.js +0 -295
- package/dist/client/middleware/utils/guardrailsUtils.js +0 -396
- package/dist/client/models/anthropicModels.js +0 -527
- package/dist/client/neurolink.js +0 -8233
- package/dist/client/observability/exporterRegistry.js +0 -413
- package/dist/client/observability/exporters/arizeExporter.js +0 -138
- package/dist/client/observability/exporters/baseExporter.js +0 -190
- package/dist/client/observability/exporters/braintrustExporter.js +0 -154
- package/dist/client/observability/exporters/datadogExporter.js +0 -196
- package/dist/client/observability/exporters/laminarExporter.js +0 -302
- package/dist/client/observability/exporters/langfuseExporter.js +0 -209
- package/dist/client/observability/exporters/langsmithExporter.js +0 -143
- package/dist/client/observability/exporters/otelExporter.js +0 -164
- package/dist/client/observability/exporters/posthogExporter.js +0 -287
- package/dist/client/observability/exporters/sentryExporter.js +0 -165
- package/dist/client/observability/index.js +0 -31
- package/dist/client/observability/metricsAggregator.js +0 -556
- package/dist/client/observability/otelBridge.js +0 -131
- package/dist/client/observability/retryPolicy.js +0 -383
- package/dist/client/observability/sampling/samplers.js +0 -216
- package/dist/client/observability/spanProcessor.js +0 -303
- package/dist/client/observability/tokenTracker.js +0 -413
- package/dist/client/observability/types/exporterTypes.js +0 -5
- package/dist/client/observability/types/index.js +0 -4
- package/dist/client/observability/types/spanTypes.js +0 -92
- package/dist/client/observability/utils/safeMetadata.js +0 -25
- package/dist/client/observability/utils/spanSerializer.js +0 -292
- package/dist/client/processors/archive/ArchiveProcessor.js +0 -1308
- package/dist/client/processors/base/BaseFileProcessor.js +0 -614
- package/dist/client/processors/base/types.js +0 -82
- package/dist/client/processors/config/fileTypes.js +0 -520
- package/dist/client/processors/config/index.js +0 -92
- package/dist/client/processors/config/languageMap.js +0 -410
- package/dist/client/processors/config/mimeTypes.js +0 -363
- package/dist/client/processors/config/sizeLimits.js +0 -258
- package/dist/client/processors/document/ExcelProcessor.js +0 -590
- package/dist/client/processors/document/OpenDocumentProcessor.js +0 -212
- package/dist/client/processors/document/PptxProcessor.js +0 -157
- package/dist/client/processors/document/RtfProcessor.js +0 -361
- package/dist/client/processors/document/WordProcessor.js +0 -353
- package/dist/client/processors/errors/FileErrorCode.js +0 -255
- package/dist/client/processors/errors/errorHelpers.js +0 -386
- package/dist/client/processors/errors/errorSerializer.js +0 -507
- package/dist/client/processors/errors/index.js +0 -49
- package/dist/client/processors/markup/SvgProcessor.js +0 -240
- package/dist/client/processors/media/AudioProcessor.js +0 -707
- package/dist/client/processors/media/VideoProcessor.js +0 -1045
- package/dist/client/providers/amazonBedrock.js +0 -1512
- package/dist/client/providers/amazonSagemaker.js +0 -162
- package/dist/client/providers/anthropic.js +0 -831
- package/dist/client/providers/azureOpenai.js +0 -143
- package/dist/client/providers/googleAiStudio.js +0 -1200
- package/dist/client/providers/googleNativeGemini3.js +0 -543
- package/dist/client/providers/googleVertex.js +0 -2936
- package/dist/client/providers/huggingFace.js +0 -315
- package/dist/client/providers/litellm.js +0 -488
- package/dist/client/providers/mistral.js +0 -157
- package/dist/client/providers/ollama.js +0 -1579
- package/dist/client/providers/openAI.js +0 -627
- package/dist/client/providers/openRouter.js +0 -543
- package/dist/client/providers/openaiCompatible.js +0 -290
- package/dist/client/providers/providerTypeUtils.js +0 -46
- package/dist/client/providers/sagemaker/adaptive-semaphore.js +0 -215
- package/dist/client/providers/sagemaker/client.js +0 -472
- package/dist/client/providers/sagemaker/config.js +0 -317
- package/dist/client/providers/sagemaker/detection.js +0 -606
- package/dist/client/providers/sagemaker/error-constants.js +0 -227
- package/dist/client/providers/sagemaker/errors.js +0 -299
- package/dist/client/providers/sagemaker/language-model.js +0 -775
- package/dist/client/providers/sagemaker/parsers.js +0 -634
- package/dist/client/providers/sagemaker/streaming.js +0 -331
- package/dist/client/providers/sagemaker/structured-parser.js +0 -625
- package/dist/client/proxy/accountQuota.js +0 -162
- package/dist/client/proxy/claudeFormat.js +0 -595
- package/dist/client/proxy/modelRouter.js +0 -29
- package/dist/client/proxy/oauthFetch.js +0 -367
- package/dist/client/proxy/proxyFetch.js +0 -586
- package/dist/client/proxy/requestLogger.js +0 -207
- package/dist/client/proxy/tokenRefresh.js +0 -124
- package/dist/client/proxy/usageStats.js +0 -74
- package/dist/client/proxy/utils/noProxyUtils.js +0 -149
- package/dist/client/rag/ChunkerFactory.js +0 -320
- package/dist/client/rag/ChunkerRegistry.js +0 -421
- package/dist/client/rag/chunkers/BaseChunker.js +0 -143
- package/dist/client/rag/chunkers/CharacterChunker.js +0 -28
- package/dist/client/rag/chunkers/HTMLChunker.js +0 -38
- package/dist/client/rag/chunkers/JSONChunker.js +0 -68
- package/dist/client/rag/chunkers/LaTeXChunker.js +0 -63
- package/dist/client/rag/chunkers/MarkdownChunker.js +0 -306
- package/dist/client/rag/chunkers/RecursiveChunker.js +0 -139
- package/dist/client/rag/chunkers/SemanticMarkdownChunker.js +0 -138
- package/dist/client/rag/chunkers/SentenceChunker.js +0 -66
- package/dist/client/rag/chunkers/TokenChunker.js +0 -61
- package/dist/client/rag/chunkers/index.js +0 -15
- package/dist/client/rag/chunking/characterChunker.js +0 -142
- package/dist/client/rag/chunking/chunkerRegistry.js +0 -194
- package/dist/client/rag/chunking/htmlChunker.js +0 -247
- package/dist/client/rag/chunking/index.js +0 -17
- package/dist/client/rag/chunking/jsonChunker.js +0 -281
- package/dist/client/rag/chunking/latexChunker.js +0 -251
- package/dist/client/rag/chunking/markdownChunker.js +0 -373
- package/dist/client/rag/chunking/recursiveChunker.js +0 -148
- package/dist/client/rag/chunking/semanticChunker.js +0 -306
- package/dist/client/rag/chunking/sentenceChunker.js +0 -230
- package/dist/client/rag/chunking/tokenChunker.js +0 -183
- package/dist/client/rag/document/MDocument.js +0 -392
- package/dist/client/rag/document/index.js +0 -5
- package/dist/client/rag/document/loaders.js +0 -500
- package/dist/client/rag/errors/RAGError.js +0 -274
- package/dist/client/rag/errors/index.js +0 -6
- package/dist/client/rag/graphRag/graphRAG.js +0 -401
- package/dist/client/rag/graphRag/index.js +0 -4
- package/dist/client/rag/index.js +0 -141
- package/dist/client/rag/metadata/MetadataExtractorFactory.js +0 -418
- package/dist/client/rag/metadata/MetadataExtractorRegistry.js +0 -362
- package/dist/client/rag/metadata/index.js +0 -9
- package/dist/client/rag/metadata/metadataExtractor.js +0 -280
- package/dist/client/rag/pipeline/RAGPipeline.js +0 -436
- package/dist/client/rag/pipeline/contextAssembly.js +0 -341
- package/dist/client/rag/pipeline/index.js +0 -5
- package/dist/client/rag/ragIntegration.js +0 -321
- package/dist/client/rag/reranker/RerankerFactory.js +0 -430
- package/dist/client/rag/reranker/RerankerRegistry.js +0 -402
- package/dist/client/rag/reranker/index.js +0 -9
- package/dist/client/rag/reranker/reranker.js +0 -277
- package/dist/client/rag/resilience/CircuitBreaker.js +0 -431
- package/dist/client/rag/resilience/RetryHandler.js +0 -304
- package/dist/client/rag/resilience/index.js +0 -7
- package/dist/client/rag/retrieval/hybridSearch.js +0 -335
- package/dist/client/rag/retrieval/index.js +0 -5
- package/dist/client/rag/retrieval/vectorQueryTool.js +0 -307
- package/dist/client/rag/types.js +0 -8
- package/dist/client/sdk/toolRegistration.js +0 -377
- package/dist/client/server/abstract/baseServerAdapter.js +0 -575
- package/dist/client/server/adapters/expressAdapter.js +0 -486
- package/dist/client/server/adapters/fastifyAdapter.js +0 -472
- package/dist/client/server/adapters/honoAdapter.js +0 -632
- package/dist/client/server/adapters/koaAdapter.js +0 -510
- package/dist/client/server/errors.js +0 -486
- package/dist/client/server/factory/serverAdapterFactory.js +0 -160
- package/dist/client/server/index.js +0 -108
- package/dist/client/server/middleware/abortSignal.js +0 -111
- package/dist/client/server/middleware/auth.js +0 -388
- package/dist/client/server/middleware/cache.js +0 -359
- package/dist/client/server/middleware/common.js +0 -281
- package/dist/client/server/middleware/deprecation.js +0 -190
- package/dist/client/server/middleware/mcpBodyAttachment.js +0 -63
- package/dist/client/server/middleware/rateLimit.js +0 -227
- package/dist/client/server/middleware/validation.js +0 -388
- package/dist/client/server/openapi/generator.js +0 -398
- package/dist/client/server/openapi/index.js +0 -36
- package/dist/client/server/openapi/schemas.js +0 -695
- package/dist/client/server/openapi/templates.js +0 -374
- package/dist/client/server/routes/agentRoutes.js +0 -189
- package/dist/client/server/routes/claudeProxyRoutes.js +0 -1600
- package/dist/client/server/routes/healthRoutes.js +0 -187
- package/dist/client/server/routes/index.js +0 -57
- package/dist/client/server/routes/mcpRoutes.js +0 -342
- package/dist/client/server/routes/memoryRoutes.js +0 -350
- package/dist/client/server/routes/openApiRoutes.js +0 -126
- package/dist/client/server/routes/toolRoutes.js +0 -199
- package/dist/client/server/streaming/dataStream.js +0 -486
- package/dist/client/server/streaming/index.js +0 -11
- package/dist/client/server/types.js +0 -67
- package/dist/client/server/utils/redaction.js +0 -334
- package/dist/client/server/utils/validation.js +0 -243
- package/dist/client/server/websocket/WebSocketHandler.js +0 -383
- package/dist/client/server/websocket/index.js +0 -4
- package/dist/client/services/server/ai/observability/instrumentation.js +0 -808
- package/dist/client/telemetry/attributes.js +0 -100
- package/dist/client/telemetry/index.js +0 -26
- package/dist/client/telemetry/telemetryService.js +0 -308
- package/dist/client/telemetry/tracers.js +0 -17
- package/dist/client/telemetry/withSpan.js +0 -34
- package/dist/client/types/actionTypes.js +0 -6
- package/dist/client/types/analytics.js +0 -5
- package/dist/client/types/authTypes.js +0 -9
- package/dist/client/types/circuitBreakerErrors.js +0 -34
- package/dist/client/types/cli.js +0 -21
- package/dist/client/types/clientTypes.js +0 -10
- package/dist/client/types/common.js +0 -51
- package/dist/client/types/configTypes.js +0 -49
- package/dist/client/types/content.js +0 -19
- package/dist/client/types/contextTypes.js +0 -400
- package/dist/client/types/conversation.js +0 -47
- package/dist/client/types/conversationMemoryInterface.js +0 -6
- package/dist/client/types/domainTypes.js +0 -5
- package/dist/client/types/errors.js +0 -167
- package/dist/client/types/evaluation.js +0 -5
- package/dist/client/types/evaluationProviders.js +0 -5
- package/dist/client/types/evaluationTypes.js +0 -1
- package/dist/client/types/externalMcp.js +0 -6
- package/dist/client/types/fileReferenceTypes.js +0 -8
- package/dist/client/types/fileTypes.js +0 -4
- package/dist/client/types/generateTypes.js +0 -1
- package/dist/client/types/guardrails.js +0 -1
- package/dist/client/types/hitlTypes.js +0 -8
- package/dist/client/types/index.js +0 -57
- package/dist/client/types/mcpTypes.js +0 -5
- package/dist/client/types/middlewareTypes.js +0 -1
- package/dist/client/types/modelTypes.js +0 -30
- package/dist/client/types/multimodal.js +0 -135
- package/dist/client/types/observability.js +0 -6
- package/dist/client/types/pptTypes.js +0 -82
- package/dist/client/types/providers.js +0 -111
- package/dist/client/types/proxyTypes.js +0 -16
- package/dist/client/types/ragTypes.js +0 -7
- package/dist/client/types/sdkTypes.js +0 -8
- package/dist/client/types/serviceTypes.js +0 -5
- package/dist/client/types/streamTypes.js +0 -1
- package/dist/client/types/subscriptionTypes.js +0 -9
- package/dist/client/types/taskClassificationTypes.js +0 -5
- package/dist/client/types/tools.js +0 -24
- package/dist/client/types/ttsTypes.js +0 -57
- package/dist/client/types/typeAliases.js +0 -48
- package/dist/client/types/utilities.js +0 -4
- package/dist/client/types/workflowTypes.js +0 -30
- package/dist/client/utils/async/withTimeout.js +0 -98
- package/dist/client/utils/asyncMutex.js +0 -60
- package/dist/client/utils/conversationMemory.js +0 -431
- package/dist/client/utils/csvProcessor.js +0 -846
- package/dist/client/utils/errorHandling.js +0 -936
- package/dist/client/utils/evaluationUtils.js +0 -131
- package/dist/client/utils/factoryProcessing.js +0 -589
- package/dist/client/utils/fileDetector.js +0 -2161
- package/dist/client/utils/imageCache.js +0 -376
- package/dist/client/utils/imageProcessor.js +0 -704
- package/dist/client/utils/logger.js +0 -491
- package/dist/client/utils/mcpDefaults.js +0 -134
- package/dist/client/utils/messageBuilder.js +0 -1653
- package/dist/client/utils/modelAliasResolver.js +0 -54
- package/dist/client/utils/modelDetection.js +0 -80
- package/dist/client/utils/modelRouter.js +0 -292
- package/dist/client/utils/multimodalOptionsBuilder.js +0 -65
- package/dist/client/utils/observabilityHelpers.js +0 -47
- package/dist/client/utils/parameterValidation.js +0 -966
- package/dist/client/utils/pdfProcessor.js +0 -410
- package/dist/client/utils/performance.js +0 -222
- package/dist/client/utils/pricing.js +0 -340
- package/dist/client/utils/promptRedaction.js +0 -62
- package/dist/client/utils/providerConfig.js +0 -1009
- package/dist/client/utils/providerHealth.js +0 -1237
- package/dist/client/utils/providerRetry.js +0 -112
- package/dist/client/utils/providerUtils.js +0 -434
- package/dist/client/utils/rateLimiter.js +0 -200
- package/dist/client/utils/redis.js +0 -368
- package/dist/client/utils/retryHandler.js +0 -269
- package/dist/client/utils/retryability.js +0 -22
- package/dist/client/utils/sanitizers/svg.js +0 -481
- package/dist/client/utils/schemaConversion.js +0 -255
- package/dist/client/utils/taskClassificationUtils.js +0 -149
- package/dist/client/utils/taskClassifier.js +0 -94
- package/dist/client/utils/thinkingConfig.js +0 -104
- package/dist/client/utils/timeout.js +0 -359
- package/dist/client/utils/tokenEstimation.js +0 -142
- package/dist/client/utils/tokenLimits.js +0 -125
- package/dist/client/utils/tokenUtils.js +0 -239
- package/dist/client/utils/toolUtils.js +0 -75
- package/dist/client/utils/transformationUtils.js +0 -554
- package/dist/client/utils/ttsProcessor.js +0 -286
- package/dist/client/utils/typeUtils.js +0 -97
- package/dist/client/utils/videoAnalysisProcessor.js +0 -67
- package/dist/client/workflow/config.js +0 -398
- package/dist/client/workflow/core/ensembleExecutor.js +0 -407
- package/dist/client/workflow/core/judgeScorer.js +0 -544
- package/dist/client/workflow/core/responseConditioner.js +0 -225
- package/dist/client/workflow/core/types/conditionerTypes.js +0 -7
- package/dist/client/workflow/core/types/ensembleTypes.js +0 -7
- package/dist/client/workflow/core/types/index.js +0 -7
- package/dist/client/workflow/core/types/judgeTypes.js +0 -7
- package/dist/client/workflow/core/types/layerTypes.js +0 -7
- package/dist/client/workflow/core/types/registryTypes.js +0 -7
- package/dist/client/workflow/core/workflowRegistry.js +0 -304
- package/dist/client/workflow/core/workflowRunner.js +0 -586
- package/dist/client/workflow/index.js +0 -50
- package/dist/client/workflow/types.js +0 -9
- package/dist/client/workflow/utils/types/index.js +0 -7
- package/dist/client/workflow/utils/workflowMetrics.js +0 -311
- package/dist/client/workflow/utils/workflowValidation.js +0 -420
- package/dist/client/workflow/workflows/adaptiveWorkflow.js +0 -366
- package/dist/client/workflow/workflows/consensusWorkflow.js +0 -192
- package/dist/client/workflow/workflows/fallbackWorkflow.js +0 -225
- package/dist/client/workflow/workflows/multiJudgeWorkflow.js +0 -351
- /package/dist/client/{client/reactHooks.js → reactHooks.js} +0 -0
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Sentence Chunker
|
|
3
|
-
*
|
|
4
|
-
* Splits text by sentence boundaries for semantically meaningful chunks.
|
|
5
|
-
*/
|
|
6
|
-
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
-
/**
|
|
8
|
-
* Sentence Chunker
|
|
9
|
-
*/
|
|
10
|
-
export class SentenceChunker extends BaseChunker {
|
|
11
|
-
strategy = "sentence";
|
|
12
|
-
getDefaultConfig() {
|
|
13
|
-
return {
|
|
14
|
-
...DEFAULT_CHUNKER_CONFIG,
|
|
15
|
-
maxSize: 1000,
|
|
16
|
-
overlap: 1, // Overlap in sentences
|
|
17
|
-
};
|
|
18
|
-
}
|
|
19
|
-
async doChunk(content, config) {
|
|
20
|
-
const maxSize = config.maxSize ?? 1000;
|
|
21
|
-
// Simple sentence splitting (can be enhanced with NLP)
|
|
22
|
-
const sentences = this.splitIntoSentences(content);
|
|
23
|
-
const chunks = [];
|
|
24
|
-
let currentChunk = "";
|
|
25
|
-
let currentStart = 0;
|
|
26
|
-
let chunkIndex = 0;
|
|
27
|
-
for (const sentence of sentences) {
|
|
28
|
-
if (currentChunk.length + sentence.length <= maxSize) {
|
|
29
|
-
currentChunk += sentence;
|
|
30
|
-
}
|
|
31
|
-
else {
|
|
32
|
-
if (currentChunk.length > 0) {
|
|
33
|
-
const startOffset = content.indexOf(currentChunk, currentStart);
|
|
34
|
-
chunks.push(this.createChunk(currentChunk, chunkIndex++, startOffset, startOffset + currentChunk.length));
|
|
35
|
-
currentStart = startOffset + 1;
|
|
36
|
-
}
|
|
37
|
-
currentChunk = sentence;
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
// Add remaining chunk
|
|
41
|
-
if (currentChunk.length > 0) {
|
|
42
|
-
const startOffset = content.indexOf(currentChunk, currentStart);
|
|
43
|
-
chunks.push(this.createChunk(currentChunk, chunkIndex, startOffset, startOffset + currentChunk.length));
|
|
44
|
-
}
|
|
45
|
-
return chunks;
|
|
46
|
-
}
|
|
47
|
-
/**
|
|
48
|
-
* Split content into sentences
|
|
49
|
-
*/
|
|
50
|
-
splitIntoSentences(content) {
|
|
51
|
-
// Simple regex-based sentence splitting
|
|
52
|
-
// Handles common abbreviations and sentence endings
|
|
53
|
-
const sentencePattern = /[^.!?]*[.!?]+(?:\s|$)/g;
|
|
54
|
-
const sentences = [];
|
|
55
|
-
let match;
|
|
56
|
-
while ((match = sentencePattern.exec(content)) !== null) {
|
|
57
|
-
sentences.push(match[0]);
|
|
58
|
-
}
|
|
59
|
-
// Handle remaining content without sentence ending
|
|
60
|
-
const lastIndex = sentences.reduce((acc, s) => acc + s.length, 0);
|
|
61
|
-
if (lastIndex < content.length) {
|
|
62
|
-
sentences.push(content.slice(lastIndex));
|
|
63
|
-
}
|
|
64
|
-
return sentences;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Token Chunker
|
|
3
|
-
*
|
|
4
|
-
* Splits text by token count using a tokenizer.
|
|
5
|
-
* Useful for precise token budget management.
|
|
6
|
-
*/
|
|
7
|
-
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
8
|
-
/**
|
|
9
|
-
* Token Chunker
|
|
10
|
-
*
|
|
11
|
-
* Approximates token-based splitting using word count.
|
|
12
|
-
* For production, integrate with a proper tokenizer (tiktoken, etc.)
|
|
13
|
-
*/
|
|
14
|
-
export class TokenChunker extends BaseChunker {
|
|
15
|
-
strategy = "token";
|
|
16
|
-
getDefaultConfig() {
|
|
17
|
-
return {
|
|
18
|
-
...DEFAULT_CHUNKER_CONFIG,
|
|
19
|
-
maxSize: 512, // Tokens
|
|
20
|
-
overlap: 50, // Tokens
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
async doChunk(content, config) {
|
|
24
|
-
const maxTokens = config.maxSize ?? 512;
|
|
25
|
-
const overlapTokens = config.overlap ?? 50;
|
|
26
|
-
// Approximate tokenization using words
|
|
27
|
-
// In production, use a proper tokenizer like tiktoken
|
|
28
|
-
const words = content.split(/\s+/);
|
|
29
|
-
const chunks = [];
|
|
30
|
-
let currentWords = [];
|
|
31
|
-
let currentStart = 0;
|
|
32
|
-
let chunkIndex = 0;
|
|
33
|
-
for (let i = 0; i < words.length; i++) {
|
|
34
|
-
const word = words[i];
|
|
35
|
-
if (!word) {
|
|
36
|
-
continue;
|
|
37
|
-
}
|
|
38
|
-
// Estimate tokens (roughly 1.3 tokens per word on average)
|
|
39
|
-
const estimatedTokens = Math.ceil(currentWords.length * 1.3);
|
|
40
|
-
if (estimatedTokens >= maxTokens) {
|
|
41
|
-
const chunkText = currentWords.join(" ");
|
|
42
|
-
const startOffset = content.indexOf(currentWords[0] ?? "", currentStart);
|
|
43
|
-
const endOffset = startOffset + chunkText.length;
|
|
44
|
-
chunks.push(this.createChunk(chunkText, chunkIndex++, startOffset, endOffset));
|
|
45
|
-
// Keep overlap words
|
|
46
|
-
const overlapCount = Math.ceil(overlapTokens / 1.3);
|
|
47
|
-
currentWords = currentWords.slice(-overlapCount);
|
|
48
|
-
currentStart = endOffset - currentWords.join(" ").length;
|
|
49
|
-
}
|
|
50
|
-
currentWords.push(word);
|
|
51
|
-
}
|
|
52
|
-
// Add remaining chunk
|
|
53
|
-
if (currentWords.length > 0) {
|
|
54
|
-
const chunkText = currentWords.join(" ");
|
|
55
|
-
const startOffset = content.indexOf(currentWords[0] ?? "", currentStart);
|
|
56
|
-
const endOffset = startOffset + chunkText.length;
|
|
57
|
-
chunks.push(this.createChunk(chunkText, chunkIndex, startOffset, endOffset));
|
|
58
|
-
}
|
|
59
|
-
return chunks;
|
|
60
|
-
}
|
|
61
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Chunkers Index
|
|
3
|
-
*
|
|
4
|
-
* Exports all chunker implementations.
|
|
5
|
-
*/
|
|
6
|
-
export { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
|
|
7
|
-
export { CharacterChunker } from "./CharacterChunker.js";
|
|
8
|
-
export { RecursiveChunker } from "./RecursiveChunker.js";
|
|
9
|
-
export { SentenceChunker } from "./SentenceChunker.js";
|
|
10
|
-
export { TokenChunker } from "./TokenChunker.js";
|
|
11
|
-
export { MarkdownChunker } from "./MarkdownChunker.js";
|
|
12
|
-
export { HTMLChunker } from "./HTMLChunker.js";
|
|
13
|
-
export { JSONChunker } from "./JSONChunker.js";
|
|
14
|
-
export { LaTeXChunker } from "./LaTeXChunker.js";
|
|
15
|
-
export { SemanticMarkdownChunker } from "./SemanticMarkdownChunker.js";
|
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Character-based Chunker
|
|
3
|
-
*
|
|
4
|
-
* Simple character-based text splitting with configurable separator and overlap.
|
|
5
|
-
* Best for unstructured text where character count is the primary concern.
|
|
6
|
-
*/
|
|
7
|
-
import { randomUUID } from "crypto";
|
|
8
|
-
/**
|
|
9
|
-
* Character-based chunker implementation
|
|
10
|
-
* Splits text by character count with optional separator
|
|
11
|
-
*/
|
|
12
|
-
export class CharacterChunker {
|
|
13
|
-
strategy = "character";
|
|
14
|
-
async chunk(text, config) {
|
|
15
|
-
const { maxSize = 1000, overlap = 0, separator = "", keepSeparator = false, trimWhitespace = true, metadata = {}, } = config || {};
|
|
16
|
-
const chunks = [];
|
|
17
|
-
const documentId = randomUUID();
|
|
18
|
-
if (!text || text.length === 0) {
|
|
19
|
-
return chunks;
|
|
20
|
-
}
|
|
21
|
-
// Split by separator if provided
|
|
22
|
-
let segments;
|
|
23
|
-
if (separator) {
|
|
24
|
-
segments = text.split(separator);
|
|
25
|
-
if (keepSeparator && separator) {
|
|
26
|
-
segments = segments.map((s, i) => i < segments.length - 1 ? s + separator : s);
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
else {
|
|
30
|
-
segments = [text];
|
|
31
|
-
}
|
|
32
|
-
let currentChunk = "";
|
|
33
|
-
let chunkIndex = 0;
|
|
34
|
-
let startPosition = 0;
|
|
35
|
-
for (const segment of segments) {
|
|
36
|
-
if (currentChunk.length + segment.length <= maxSize) {
|
|
37
|
-
currentChunk += segment;
|
|
38
|
-
}
|
|
39
|
-
else {
|
|
40
|
-
// Save current chunk if it has content
|
|
41
|
-
if (currentChunk.length > 0) {
|
|
42
|
-
const chunkText = trimWhitespace ? currentChunk.trim() : currentChunk;
|
|
43
|
-
if (chunkText.length > 0) {
|
|
44
|
-
chunks.push({
|
|
45
|
-
id: randomUUID(),
|
|
46
|
-
text: chunkText,
|
|
47
|
-
metadata: {
|
|
48
|
-
documentId,
|
|
49
|
-
chunkIndex,
|
|
50
|
-
startPosition,
|
|
51
|
-
endPosition: startPosition + currentChunk.length,
|
|
52
|
-
documentType: "text",
|
|
53
|
-
custom: metadata,
|
|
54
|
-
},
|
|
55
|
-
});
|
|
56
|
-
chunkIndex++;
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
// Handle overlap
|
|
60
|
-
if (overlap > 0 && currentChunk.length > overlap) {
|
|
61
|
-
currentChunk = currentChunk.slice(-overlap) + segment;
|
|
62
|
-
startPosition = startPosition + currentChunk.length - overlap;
|
|
63
|
-
}
|
|
64
|
-
else {
|
|
65
|
-
startPosition += currentChunk.length;
|
|
66
|
-
currentChunk = segment;
|
|
67
|
-
}
|
|
68
|
-
// If segment is larger than maxSize, split it further
|
|
69
|
-
while (currentChunk.length > maxSize) {
|
|
70
|
-
const chunkText = trimWhitespace
|
|
71
|
-
? currentChunk.slice(0, maxSize).trim()
|
|
72
|
-
: currentChunk.slice(0, maxSize);
|
|
73
|
-
chunks.push({
|
|
74
|
-
id: randomUUID(),
|
|
75
|
-
text: chunkText,
|
|
76
|
-
metadata: {
|
|
77
|
-
documentId,
|
|
78
|
-
chunkIndex,
|
|
79
|
-
startPosition,
|
|
80
|
-
endPosition: startPosition + maxSize,
|
|
81
|
-
documentType: "text",
|
|
82
|
-
custom: metadata,
|
|
83
|
-
},
|
|
84
|
-
});
|
|
85
|
-
chunkIndex++;
|
|
86
|
-
const overlapStart = Math.max(0, maxSize - overlap);
|
|
87
|
-
currentChunk = currentChunk.slice(overlapStart);
|
|
88
|
-
startPosition += overlapStart;
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
// Don't forget the last chunk
|
|
93
|
-
if (currentChunk.length > 0) {
|
|
94
|
-
const chunkText = trimWhitespace ? currentChunk.trim() : currentChunk;
|
|
95
|
-
if (chunkText.length > 0) {
|
|
96
|
-
chunks.push({
|
|
97
|
-
id: randomUUID(),
|
|
98
|
-
text: chunkText,
|
|
99
|
-
metadata: {
|
|
100
|
-
documentId,
|
|
101
|
-
chunkIndex,
|
|
102
|
-
startPosition,
|
|
103
|
-
endPosition: startPosition + currentChunk.length,
|
|
104
|
-
documentType: "text",
|
|
105
|
-
custom: metadata,
|
|
106
|
-
},
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
// Update total chunks count
|
|
111
|
-
chunks.forEach((chunk) => {
|
|
112
|
-
chunk.metadata.totalChunks = chunks.length;
|
|
113
|
-
});
|
|
114
|
-
return chunks;
|
|
115
|
-
}
|
|
116
|
-
validateConfig(config) {
|
|
117
|
-
const errors = [];
|
|
118
|
-
const warnings = [];
|
|
119
|
-
const charConfig = config;
|
|
120
|
-
if (charConfig.maxSize !== undefined && charConfig.maxSize <= 0) {
|
|
121
|
-
errors.push("maxSize must be greater than 0");
|
|
122
|
-
}
|
|
123
|
-
if (charConfig.overlap !== undefined && charConfig.overlap < 0) {
|
|
124
|
-
errors.push("overlap must be non-negative");
|
|
125
|
-
}
|
|
126
|
-
if (charConfig.overlap !== undefined && charConfig.maxSize !== undefined) {
|
|
127
|
-
if (charConfig.overlap >= charConfig.maxSize) {
|
|
128
|
-
errors.push("overlap must be less than maxSize");
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
if (charConfig.minSize !== undefined && charConfig.maxSize !== undefined) {
|
|
132
|
-
if (charConfig.minSize > charConfig.maxSize) {
|
|
133
|
-
warnings.push("minSize is greater than maxSize, some chunks may be smaller than minSize");
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
return {
|
|
137
|
-
valid: errors.length === 0,
|
|
138
|
-
errors,
|
|
139
|
-
warnings,
|
|
140
|
-
};
|
|
141
|
-
}
|
|
142
|
-
}
|
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Chunker Registry
|
|
3
|
-
*
|
|
4
|
-
* Central registry for all chunking strategies following NeuroLink's registry pattern.
|
|
5
|
-
* Provides factory methods for creating chunker instances.
|
|
6
|
-
*/
|
|
7
|
-
import { SemanticMarkdownChunker } from "../chunkers/SemanticMarkdownChunker.js";
|
|
8
|
-
import { CharacterChunker } from "./characterChunker.js";
|
|
9
|
-
import { HTMLChunker } from "./htmlChunker.js";
|
|
10
|
-
import { JSONChunker } from "./jsonChunker.js";
|
|
11
|
-
import { LaTeXChunker } from "./latexChunker.js";
|
|
12
|
-
import { MarkdownChunker } from "./markdownChunker.js";
|
|
13
|
-
import { RecursiveChunker } from "./recursiveChunker.js";
|
|
14
|
-
import { SemanticChunker } from "./semanticChunker.js";
|
|
15
|
-
import { SentenceChunker } from "./sentenceChunker.js";
|
|
16
|
-
import { TokenChunker } from "./tokenChunker.js";
|
|
17
|
-
/**
|
|
18
|
-
* Registry for chunking strategies
|
|
19
|
-
* Follows NeuroLink's factory pattern with lazy initialization
|
|
20
|
-
*/
|
|
21
|
-
export class ChunkerRegistry {
|
|
22
|
-
static chunkers = new Map();
|
|
23
|
-
static initialized = false;
|
|
24
|
-
/**
|
|
25
|
-
* Initialize all built-in chunkers
|
|
26
|
-
*/
|
|
27
|
-
static initialize() {
|
|
28
|
-
if (ChunkerRegistry.initialized) {
|
|
29
|
-
return;
|
|
30
|
-
}
|
|
31
|
-
ChunkerRegistry.register("character", () => new CharacterChunker());
|
|
32
|
-
ChunkerRegistry.register("recursive", () => new RecursiveChunker());
|
|
33
|
-
ChunkerRegistry.register("sentence", () => new SentenceChunker());
|
|
34
|
-
ChunkerRegistry.register("token", () => new TokenChunker());
|
|
35
|
-
ChunkerRegistry.register("markdown", () => new MarkdownChunker());
|
|
36
|
-
ChunkerRegistry.register("html", () => new HTMLChunker());
|
|
37
|
-
ChunkerRegistry.register("json", () => new JSONChunker());
|
|
38
|
-
ChunkerRegistry.register("latex", () => new LaTeXChunker());
|
|
39
|
-
ChunkerRegistry.register("semantic", () => new SemanticChunker());
|
|
40
|
-
ChunkerRegistry.register("semantic-markdown", () => new SemanticMarkdownChunker());
|
|
41
|
-
ChunkerRegistry.initialized = true;
|
|
42
|
-
}
|
|
43
|
-
/**
|
|
44
|
-
* Register a custom chunker
|
|
45
|
-
* @param strategy - Strategy name
|
|
46
|
-
* @param factory - Factory function that creates chunker instance
|
|
47
|
-
*/
|
|
48
|
-
static register(strategy, factory) {
|
|
49
|
-
ChunkerRegistry.chunkers.set(strategy, factory);
|
|
50
|
-
}
|
|
51
|
-
/**
|
|
52
|
-
* Get a chunker by strategy name
|
|
53
|
-
* @param strategy - Chunking strategy name
|
|
54
|
-
* @returns Chunker instance
|
|
55
|
-
* @throws Error if strategy is not registered
|
|
56
|
-
*/
|
|
57
|
-
static get(strategy) {
|
|
58
|
-
ChunkerRegistry.initialize();
|
|
59
|
-
const factory = ChunkerRegistry.chunkers.get(strategy);
|
|
60
|
-
if (!factory) {
|
|
61
|
-
throw new Error(`Unknown chunking strategy: ${strategy}. Available strategies: ${ChunkerRegistry.getAvailableStrategies().join(", ")}`);
|
|
62
|
-
}
|
|
63
|
-
return factory();
|
|
64
|
-
}
|
|
65
|
-
/**
|
|
66
|
-
* Get all available chunking strategies
|
|
67
|
-
* @returns Array of strategy names
|
|
68
|
-
*/
|
|
69
|
-
static getAvailableStrategies() {
|
|
70
|
-
ChunkerRegistry.initialize();
|
|
71
|
-
return Array.from(ChunkerRegistry.chunkers.keys());
|
|
72
|
-
}
|
|
73
|
-
/**
|
|
74
|
-
* Check if a strategy is registered
|
|
75
|
-
* @param strategy - Strategy name to check
|
|
76
|
-
* @returns True if strategy is registered
|
|
77
|
-
*/
|
|
78
|
-
static has(strategy) {
|
|
79
|
-
ChunkerRegistry.initialize();
|
|
80
|
-
return ChunkerRegistry.chunkers.has(strategy);
|
|
81
|
-
}
|
|
82
|
-
/**
|
|
83
|
-
* Get strategy recommendation based on content type
|
|
84
|
-
* @param contentType - Document type or MIME type
|
|
85
|
-
* @returns Recommended chunking strategy
|
|
86
|
-
*/
|
|
87
|
-
static getRecommendedStrategy(contentType) {
|
|
88
|
-
const normalized = contentType.toLowerCase();
|
|
89
|
-
if (normalized.includes("markdown") || normalized === "md") {
|
|
90
|
-
return "markdown";
|
|
91
|
-
}
|
|
92
|
-
if (normalized.includes("html") || normalized.includes("htm")) {
|
|
93
|
-
return "html";
|
|
94
|
-
}
|
|
95
|
-
if (normalized.includes("json")) {
|
|
96
|
-
return "json";
|
|
97
|
-
}
|
|
98
|
-
// Check for latex specifically - don't match "text" which contains "tex"
|
|
99
|
-
if (normalized.includes("latex") ||
|
|
100
|
-
normalized === "tex" ||
|
|
101
|
-
normalized.endsWith("/tex")) {
|
|
102
|
-
return "latex";
|
|
103
|
-
}
|
|
104
|
-
if (normalized.includes("code") || normalized.includes("programming")) {
|
|
105
|
-
return "recursive";
|
|
106
|
-
}
|
|
107
|
-
if (normalized.includes("document") || normalized.includes("text")) {
|
|
108
|
-
return "sentence";
|
|
109
|
-
}
|
|
110
|
-
// Default to recursive for general text
|
|
111
|
-
return "recursive";
|
|
112
|
-
}
|
|
113
|
-
/**
|
|
114
|
-
* Get default configuration for a strategy
|
|
115
|
-
* @param strategy - Chunking strategy
|
|
116
|
-
* @returns Default configuration object
|
|
117
|
-
*/
|
|
118
|
-
static getDefaultConfig(strategy) {
|
|
119
|
-
const defaults = {
|
|
120
|
-
character: {
|
|
121
|
-
maxSize: 1000,
|
|
122
|
-
overlap: 0,
|
|
123
|
-
separator: "",
|
|
124
|
-
keepSeparator: false,
|
|
125
|
-
},
|
|
126
|
-
recursive: {
|
|
127
|
-
maxSize: 1000,
|
|
128
|
-
overlap: 200,
|
|
129
|
-
separators: ["\n\n", "\n", ". ", " ", ""],
|
|
130
|
-
},
|
|
131
|
-
sentence: {
|
|
132
|
-
maxSize: 1000,
|
|
133
|
-
overlap: 0,
|
|
134
|
-
minSentences: 1,
|
|
135
|
-
sentenceEnders: [".", "!", "?"],
|
|
136
|
-
},
|
|
137
|
-
token: {
|
|
138
|
-
maxTokens: 512,
|
|
139
|
-
tokenOverlap: 50,
|
|
140
|
-
tokenizer: "cl100k_base",
|
|
141
|
-
},
|
|
142
|
-
markdown: {
|
|
143
|
-
maxSize: 1000,
|
|
144
|
-
headerLevels: [1, 2, 3],
|
|
145
|
-
preserveCodeBlocks: true,
|
|
146
|
-
includeHeader: true,
|
|
147
|
-
},
|
|
148
|
-
html: {
|
|
149
|
-
maxSize: 1000,
|
|
150
|
-
splitTags: ["div", "p", "section", "article"],
|
|
151
|
-
extractTextOnly: false,
|
|
152
|
-
},
|
|
153
|
-
json: {
|
|
154
|
-
maxSize: 1000,
|
|
155
|
-
maxDepth: 10,
|
|
156
|
-
includeJsonPath: true,
|
|
157
|
-
},
|
|
158
|
-
latex: {
|
|
159
|
-
maxSize: 1000,
|
|
160
|
-
splitEnvironments: ["section", "subsection", "chapter"],
|
|
161
|
-
preserveMath: true,
|
|
162
|
-
},
|
|
163
|
-
semantic: {
|
|
164
|
-
maxSize: 1000,
|
|
165
|
-
similarityThreshold: 0.7,
|
|
166
|
-
joinThreshold: 100,
|
|
167
|
-
},
|
|
168
|
-
"semantic-markdown": {
|
|
169
|
-
maxSize: 1000,
|
|
170
|
-
overlap: 100,
|
|
171
|
-
similarityThreshold: 0.7,
|
|
172
|
-
},
|
|
173
|
-
};
|
|
174
|
-
return defaults[strategy] || { maxSize: 1000 };
|
|
175
|
-
}
|
|
176
|
-
/**
|
|
177
|
-
* Reset the registry (useful for testing)
|
|
178
|
-
*/
|
|
179
|
-
static reset() {
|
|
180
|
-
ChunkerRegistry.chunkers.clear();
|
|
181
|
-
ChunkerRegistry.initialized = false;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
/**
|
|
185
|
-
* Convenience function to chunk text with a given strategy
|
|
186
|
-
* @param text - Text to chunk
|
|
187
|
-
* @param strategy - Chunking strategy (default: "recursive")
|
|
188
|
-
* @param config - Strategy-specific configuration
|
|
189
|
-
* @returns Array of chunks
|
|
190
|
-
*/
|
|
191
|
-
export async function chunkText(text, strategy = "recursive", config) {
|
|
192
|
-
const chunker = ChunkerRegistry.get(strategy);
|
|
193
|
-
return chunker.chunk(text, config);
|
|
194
|
-
}
|