@juspay/neurolink 9.32.0 → 9.32.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/auth/anthropicOAuth.js +1 -1
- package/dist/cli/commands/proxy.js +18 -5
- package/dist/client/aiSdkAdapter.js +1 -1
- package/dist/client/index.js +137 -501
- package/dist/core/factory.js +0 -1
- package/dist/core/redisConversationMemoryManager.js +1 -1
- package/dist/features/ppt/slideGenerator.js +0 -1
- package/dist/features/ppt/utils.js +0 -1
- package/dist/lib/server/routes/claudeProxyRoutes.js +45 -9
- package/dist/mcp/elicitationProtocol.js +1 -1
- package/dist/mcp/servers/agent/directToolsServer.js +0 -1
- package/dist/providers/azureOpenai.js +1 -1
- package/dist/providers/huggingFace.js +0 -1
- package/dist/providers/openaiCompatible.js +0 -1
- package/dist/sdk/toolRegistration.js +0 -1
- package/dist/server/openapi/generator.js +1 -1
- package/dist/server/routes/claudeProxyRoutes.js +45 -9
- package/dist/types/configTypes.js +0 -5
- package/dist/types/modelTypes.js +0 -1
- package/dist/types/tools.js +0 -1
- package/dist/types/typeAliases.js +0 -1
- package/dist/types/utilities.js +1 -1
- package/dist/types/workflowTypes.js +0 -1
- package/dist/utils/providerRetry.js +0 -1
- package/dist/utils/providerUtils.js +0 -1
- package/package.json +2 -2
- package/dist/client/adapters/providerImageAdapter.js +0 -588
- package/dist/client/adapters/tts/googleTTSHandler.js +0 -344
- package/dist/client/adapters/video/directorPipeline.js +0 -516
- package/dist/client/adapters/video/ffmpegAdapter.js +0 -206
- package/dist/client/adapters/video/frameExtractor.js +0 -143
- package/dist/client/adapters/video/vertexVideoHandler.js +0 -763
- package/dist/client/adapters/video/videoAnalyzer.js +0 -238
- package/dist/client/adapters/video/videoMerger.js +0 -171
- package/dist/client/agent/directTools.js +0 -840
- package/dist/client/auth/AuthProviderFactory.js +0 -111
- package/dist/client/auth/AuthProviderRegistry.js +0 -190
- package/dist/client/auth/RequestContext.js +0 -78
- package/dist/client/auth/accountPool.js +0 -178
- package/dist/client/auth/anthropicOAuth.js +0 -974
- package/dist/client/auth/authContext.js +0 -314
- package/dist/client/auth/errors.js +0 -39
- package/dist/client/auth/index.js +0 -61
- package/dist/client/auth/middleware/AuthMiddleware.js +0 -519
- package/dist/client/auth/middleware/rateLimitByUser.js +0 -554
- package/dist/client/auth/providers/BaseAuthProvider.js +0 -723
- package/dist/client/auth/providers/CognitoProvider.js +0 -304
- package/dist/client/auth/providers/KeycloakProvider.js +0 -393
- package/dist/client/auth/providers/auth0.js +0 -274
- package/dist/client/auth/providers/betterAuth.js +0 -182
- package/dist/client/auth/providers/clerk.js +0 -317
- package/dist/client/auth/providers/custom.js +0 -112
- package/dist/client/auth/providers/firebase.js +0 -226
- package/dist/client/auth/providers/jwt.js +0 -212
- package/dist/client/auth/providers/oauth2.js +0 -303
- package/dist/client/auth/providers/supabase.js +0 -259
- package/dist/client/auth/providers/workos.js +0 -284
- package/dist/client/auth/serverBridge.js +0 -25
- package/dist/client/auth/sessionManager.js +0 -437
- package/dist/client/auth/tokenStore.js +0 -799
- package/dist/client/client/aiSdkAdapter.js +0 -487
- package/dist/client/client/auth.js +0 -473
- package/dist/client/client/errors.js +0 -552
- package/dist/client/client/httpClient.js +0 -837
- package/dist/client/client/index.js +0 -172
- package/dist/client/client/interceptors.js +0 -601
- package/dist/client/client/sseClient.js +0 -545
- package/dist/client/client/streamingClient.js +0 -917
- package/dist/client/client/wsClient.js +0 -369
- package/dist/client/config/configManager.js +0 -303
- package/dist/client/config/conversationMemory.js +0 -86
- package/dist/client/config/taskClassificationConfig.js +0 -148
- package/dist/client/constants/contextWindows.js +0 -295
- package/dist/client/constants/enums.js +0 -853
- package/dist/client/constants/index.js +0 -207
- package/dist/client/constants/performance.js +0 -389
- package/dist/client/constants/retry.js +0 -266
- package/dist/client/constants/timeouts.js +0 -182
- package/dist/client/constants/tokens.js +0 -380
- package/dist/client/constants/videoErrors.js +0 -46
- package/dist/client/context/budgetChecker.js +0 -98
- package/dist/client/context/contextCompactor.js +0 -205
- package/dist/client/context/emergencyTruncation.js +0 -88
- package/dist/client/context/errorDetection.js +0 -171
- package/dist/client/context/errors.js +0 -21
- package/dist/client/context/fileTokenBudget.js +0 -127
- package/dist/client/context/prompts/summarizationPrompt.js +0 -117
- package/dist/client/context/stages/fileReadDeduplicator.js +0 -66
- package/dist/client/context/stages/slidingWindowTruncator.js +0 -190
- package/dist/client/context/stages/structuredSummarizer.js +0 -99
- package/dist/client/context/stages/toolOutputPruner.js +0 -52
- package/dist/client/context/summarizationEngine.js +0 -136
- package/dist/client/context/toolOutputLimits.js +0 -78
- package/dist/client/context/toolPairRepair.js +0 -66
- package/dist/client/core/analytics.js +0 -88
- package/dist/client/core/baseProvider.js +0 -1385
- package/dist/client/core/constants.js +0 -140
- package/dist/client/core/conversationMemoryFactory.js +0 -141
- package/dist/client/core/conversationMemoryInitializer.js +0 -128
- package/dist/client/core/conversationMemoryManager.js +0 -344
- package/dist/client/core/dynamicModels.js +0 -358
- package/dist/client/core/evaluation.js +0 -309
- package/dist/client/core/evaluationProviders.js +0 -248
- package/dist/client/core/factory.js +0 -412
- package/dist/client/core/infrastructure/baseError.js +0 -22
- package/dist/client/core/infrastructure/baseFactory.js +0 -54
- package/dist/client/core/infrastructure/baseRegistry.js +0 -53
- package/dist/client/core/infrastructure/index.js +0 -5
- package/dist/client/core/infrastructure/retry.js +0 -20
- package/dist/client/core/infrastructure/typedEventEmitter.js +0 -23
- package/dist/client/core/modelConfiguration.js +0 -851
- package/dist/client/core/modules/GenerationHandler.js +0 -588
- package/dist/client/core/modules/MessageBuilder.js +0 -273
- package/dist/client/core/modules/StreamHandler.js +0 -185
- package/dist/client/core/modules/TelemetryHandler.js +0 -203
- package/dist/client/core/modules/ToolsManager.js +0 -499
- package/dist/client/core/modules/Utilities.js +0 -331
- package/dist/client/core/redisConversationMemoryManager.js +0 -1435
- package/dist/client/core/streamAnalytics.js +0 -131
- package/dist/client/evaluation/contextBuilder.js +0 -134
- package/dist/client/evaluation/index.js +0 -61
- package/dist/client/evaluation/prompts.js +0 -73
- package/dist/client/evaluation/ragasEvaluator.js +0 -110
- package/dist/client/evaluation/retryManager.js +0 -78
- package/dist/client/evaluation/scoring.js +0 -61
- package/dist/client/factories/providerFactory.js +0 -166
- package/dist/client/factories/providerRegistry.js +0 -166
- package/dist/client/features/ppt/constants.js +0 -896
- package/dist/client/features/ppt/contentPlanner.js +0 -529
- package/dist/client/features/ppt/presentationOrchestrator.js +0 -236
- package/dist/client/features/ppt/slideGenerator.js +0 -532
- package/dist/client/features/ppt/slideRenderers.js +0 -2383
- package/dist/client/features/ppt/slideTypeInference.js +0 -405
- package/dist/client/features/ppt/types.js +0 -13
- package/dist/client/features/ppt/utils.js +0 -443
- package/dist/client/files/fileReferenceRegistry.js +0 -1543
- package/dist/client/files/fileTools.js +0 -450
- package/dist/client/files/streamingReader.js +0 -321
- package/dist/client/files/types.js +0 -23
- package/dist/client/hitl/hitlErrors.js +0 -54
- package/dist/client/hitl/hitlManager.js +0 -460
- package/dist/client/mcp/agentExposure.js +0 -356
- package/dist/client/mcp/auth/index.js +0 -11
- package/dist/client/mcp/auth/oauthClientProvider.js +0 -325
- package/dist/client/mcp/auth/tokenStorage.js +0 -134
- package/dist/client/mcp/batching/index.js +0 -10
- package/dist/client/mcp/batching/requestBatcher.js +0 -441
- package/dist/client/mcp/caching/index.js +0 -10
- package/dist/client/mcp/caching/toolCache.js +0 -433
- package/dist/client/mcp/elicitation/elicitationManager.js +0 -376
- package/dist/client/mcp/elicitation/index.js +0 -11
- package/dist/client/mcp/elicitation/types.js +0 -10
- package/dist/client/mcp/elicitationProtocol.js +0 -375
- package/dist/client/mcp/enhancedToolDiscovery.js +0 -481
- package/dist/client/mcp/externalServerManager.js +0 -1478
- package/dist/client/mcp/factory.js +0 -161
- package/dist/client/mcp/flexibleToolValidator.js +0 -161
- package/dist/client/mcp/httpRateLimiter.js +0 -391
- package/dist/client/mcp/httpRetryHandler.js +0 -178
- package/dist/client/mcp/index.js +0 -74
- package/dist/client/mcp/mcpCircuitBreaker.js +0 -427
- package/dist/client/mcp/mcpClientFactory.js +0 -708
- package/dist/client/mcp/mcpRegistryClient.js +0 -488
- package/dist/client/mcp/mcpServerBase.js +0 -373
- package/dist/client/mcp/multiServerManager.js +0 -579
- package/dist/client/mcp/registry.js +0 -158
- package/dist/client/mcp/routing/index.js +0 -10
- package/dist/client/mcp/routing/toolRouter.js +0 -416
- package/dist/client/mcp/serverCapabilities.js +0 -502
- package/dist/client/mcp/servers/agent/directToolsServer.js +0 -150
- package/dist/client/mcp/toolAnnotations.js +0 -239
- package/dist/client/mcp/toolConverter.js +0 -258
- package/dist/client/mcp/toolDiscoveryService.js +0 -798
- package/dist/client/mcp/toolIntegration.js +0 -334
- package/dist/client/mcp/toolRegistry.js +0 -729
- package/dist/client/memory/hippocampusInitializer.js +0 -19
- package/dist/client/memory/memoryRetrievalTools.js +0 -166
- package/dist/client/middleware/builtin/analytics.js +0 -132
- package/dist/client/middleware/builtin/autoEvaluation.js +0 -203
- package/dist/client/middleware/builtin/guardrails.js +0 -109
- package/dist/client/middleware/builtin/lifecycle.js +0 -168
- package/dist/client/middleware/factory.js +0 -327
- package/dist/client/middleware/registry.js +0 -295
- package/dist/client/middleware/utils/guardrailsUtils.js +0 -396
- package/dist/client/models/anthropicModels.js +0 -527
- package/dist/client/neurolink.js +0 -8233
- package/dist/client/observability/exporterRegistry.js +0 -413
- package/dist/client/observability/exporters/arizeExporter.js +0 -138
- package/dist/client/observability/exporters/baseExporter.js +0 -190
- package/dist/client/observability/exporters/braintrustExporter.js +0 -154
- package/dist/client/observability/exporters/datadogExporter.js +0 -196
- package/dist/client/observability/exporters/laminarExporter.js +0 -302
- package/dist/client/observability/exporters/langfuseExporter.js +0 -209
- package/dist/client/observability/exporters/langsmithExporter.js +0 -143
- package/dist/client/observability/exporters/otelExporter.js +0 -164
- package/dist/client/observability/exporters/posthogExporter.js +0 -287
- package/dist/client/observability/exporters/sentryExporter.js +0 -165
- package/dist/client/observability/index.js +0 -31
- package/dist/client/observability/metricsAggregator.js +0 -556
- package/dist/client/observability/otelBridge.js +0 -131
- package/dist/client/observability/retryPolicy.js +0 -383
- package/dist/client/observability/sampling/samplers.js +0 -216
- package/dist/client/observability/spanProcessor.js +0 -303
- package/dist/client/observability/tokenTracker.js +0 -413
- package/dist/client/observability/types/exporterTypes.js +0 -5
- package/dist/client/observability/types/index.js +0 -4
- package/dist/client/observability/types/spanTypes.js +0 -92
- package/dist/client/observability/utils/safeMetadata.js +0 -25
- package/dist/client/observability/utils/spanSerializer.js +0 -292
- package/dist/client/processors/archive/ArchiveProcessor.js +0 -1308
- package/dist/client/processors/base/BaseFileProcessor.js +0 -614
- package/dist/client/processors/base/types.js +0 -82
- package/dist/client/processors/config/fileTypes.js +0 -520
- package/dist/client/processors/config/index.js +0 -92
- package/dist/client/processors/config/languageMap.js +0 -410
- package/dist/client/processors/config/mimeTypes.js +0 -363
- package/dist/client/processors/config/sizeLimits.js +0 -258
- package/dist/client/processors/document/ExcelProcessor.js +0 -590
- package/dist/client/processors/document/OpenDocumentProcessor.js +0 -212
- package/dist/client/processors/document/PptxProcessor.js +0 -157
- package/dist/client/processors/document/RtfProcessor.js +0 -361
- package/dist/client/processors/document/WordProcessor.js +0 -353
- package/dist/client/processors/errors/FileErrorCode.js +0 -255
- package/dist/client/processors/errors/errorHelpers.js +0 -386
- package/dist/client/processors/errors/errorSerializer.js +0 -507
- package/dist/client/processors/errors/index.js +0 -49
- package/dist/client/processors/markup/SvgProcessor.js +0 -240
- package/dist/client/processors/media/AudioProcessor.js +0 -707
- package/dist/client/processors/media/VideoProcessor.js +0 -1045
- package/dist/client/providers/amazonBedrock.js +0 -1512
- package/dist/client/providers/amazonSagemaker.js +0 -162
- package/dist/client/providers/anthropic.js +0 -831
- package/dist/client/providers/azureOpenai.js +0 -143
- package/dist/client/providers/googleAiStudio.js +0 -1200
- package/dist/client/providers/googleNativeGemini3.js +0 -543
- package/dist/client/providers/googleVertex.js +0 -2936
- package/dist/client/providers/huggingFace.js +0 -315
- package/dist/client/providers/litellm.js +0 -488
- package/dist/client/providers/mistral.js +0 -157
- package/dist/client/providers/ollama.js +0 -1579
- package/dist/client/providers/openAI.js +0 -627
- package/dist/client/providers/openRouter.js +0 -543
- package/dist/client/providers/openaiCompatible.js +0 -290
- package/dist/client/providers/providerTypeUtils.js +0 -46
- package/dist/client/providers/sagemaker/adaptive-semaphore.js +0 -215
- package/dist/client/providers/sagemaker/client.js +0 -472
- package/dist/client/providers/sagemaker/config.js +0 -317
- package/dist/client/providers/sagemaker/detection.js +0 -606
- package/dist/client/providers/sagemaker/error-constants.js +0 -227
- package/dist/client/providers/sagemaker/errors.js +0 -299
- package/dist/client/providers/sagemaker/language-model.js +0 -775
- package/dist/client/providers/sagemaker/parsers.js +0 -634
- package/dist/client/providers/sagemaker/streaming.js +0 -331
- package/dist/client/providers/sagemaker/structured-parser.js +0 -625
- package/dist/client/proxy/accountQuota.js +0 -162
- package/dist/client/proxy/claudeFormat.js +0 -595
- package/dist/client/proxy/modelRouter.js +0 -29
- package/dist/client/proxy/oauthFetch.js +0 -367
- package/dist/client/proxy/proxyFetch.js +0 -586
- package/dist/client/proxy/requestLogger.js +0 -207
- package/dist/client/proxy/tokenRefresh.js +0 -124
- package/dist/client/proxy/usageStats.js +0 -74
- package/dist/client/proxy/utils/noProxyUtils.js +0 -149
- package/dist/client/rag/ChunkerFactory.js +0 -320
- package/dist/client/rag/ChunkerRegistry.js +0 -421
- package/dist/client/rag/chunkers/BaseChunker.js +0 -143
- package/dist/client/rag/chunkers/CharacterChunker.js +0 -28
- package/dist/client/rag/chunkers/HTMLChunker.js +0 -38
- package/dist/client/rag/chunkers/JSONChunker.js +0 -68
- package/dist/client/rag/chunkers/LaTeXChunker.js +0 -63
- package/dist/client/rag/chunkers/MarkdownChunker.js +0 -306
- package/dist/client/rag/chunkers/RecursiveChunker.js +0 -139
- package/dist/client/rag/chunkers/SemanticMarkdownChunker.js +0 -138
- package/dist/client/rag/chunkers/SentenceChunker.js +0 -66
- package/dist/client/rag/chunkers/TokenChunker.js +0 -61
- package/dist/client/rag/chunkers/index.js +0 -15
- package/dist/client/rag/chunking/characterChunker.js +0 -142
- package/dist/client/rag/chunking/chunkerRegistry.js +0 -194
- package/dist/client/rag/chunking/htmlChunker.js +0 -247
- package/dist/client/rag/chunking/index.js +0 -17
- package/dist/client/rag/chunking/jsonChunker.js +0 -281
- package/dist/client/rag/chunking/latexChunker.js +0 -251
- package/dist/client/rag/chunking/markdownChunker.js +0 -373
- package/dist/client/rag/chunking/recursiveChunker.js +0 -148
- package/dist/client/rag/chunking/semanticChunker.js +0 -306
- package/dist/client/rag/chunking/sentenceChunker.js +0 -230
- package/dist/client/rag/chunking/tokenChunker.js +0 -183
- package/dist/client/rag/document/MDocument.js +0 -392
- package/dist/client/rag/document/index.js +0 -5
- package/dist/client/rag/document/loaders.js +0 -500
- package/dist/client/rag/errors/RAGError.js +0 -274
- package/dist/client/rag/errors/index.js +0 -6
- package/dist/client/rag/graphRag/graphRAG.js +0 -401
- package/dist/client/rag/graphRag/index.js +0 -4
- package/dist/client/rag/index.js +0 -141
- package/dist/client/rag/metadata/MetadataExtractorFactory.js +0 -418
- package/dist/client/rag/metadata/MetadataExtractorRegistry.js +0 -362
- package/dist/client/rag/metadata/index.js +0 -9
- package/dist/client/rag/metadata/metadataExtractor.js +0 -280
- package/dist/client/rag/pipeline/RAGPipeline.js +0 -436
- package/dist/client/rag/pipeline/contextAssembly.js +0 -341
- package/dist/client/rag/pipeline/index.js +0 -5
- package/dist/client/rag/ragIntegration.js +0 -321
- package/dist/client/rag/reranker/RerankerFactory.js +0 -430
- package/dist/client/rag/reranker/RerankerRegistry.js +0 -402
- package/dist/client/rag/reranker/index.js +0 -9
- package/dist/client/rag/reranker/reranker.js +0 -277
- package/dist/client/rag/resilience/CircuitBreaker.js +0 -431
- package/dist/client/rag/resilience/RetryHandler.js +0 -304
- package/dist/client/rag/resilience/index.js +0 -7
- package/dist/client/rag/retrieval/hybridSearch.js +0 -335
- package/dist/client/rag/retrieval/index.js +0 -5
- package/dist/client/rag/retrieval/vectorQueryTool.js +0 -307
- package/dist/client/rag/types.js +0 -8
- package/dist/client/sdk/toolRegistration.js +0 -377
- package/dist/client/server/abstract/baseServerAdapter.js +0 -575
- package/dist/client/server/adapters/expressAdapter.js +0 -486
- package/dist/client/server/adapters/fastifyAdapter.js +0 -472
- package/dist/client/server/adapters/honoAdapter.js +0 -632
- package/dist/client/server/adapters/koaAdapter.js +0 -510
- package/dist/client/server/errors.js +0 -486
- package/dist/client/server/factory/serverAdapterFactory.js +0 -160
- package/dist/client/server/index.js +0 -108
- package/dist/client/server/middleware/abortSignal.js +0 -111
- package/dist/client/server/middleware/auth.js +0 -388
- package/dist/client/server/middleware/cache.js +0 -359
- package/dist/client/server/middleware/common.js +0 -281
- package/dist/client/server/middleware/deprecation.js +0 -190
- package/dist/client/server/middleware/mcpBodyAttachment.js +0 -63
- package/dist/client/server/middleware/rateLimit.js +0 -227
- package/dist/client/server/middleware/validation.js +0 -388
- package/dist/client/server/openapi/generator.js +0 -398
- package/dist/client/server/openapi/index.js +0 -36
- package/dist/client/server/openapi/schemas.js +0 -695
- package/dist/client/server/openapi/templates.js +0 -374
- package/dist/client/server/routes/agentRoutes.js +0 -189
- package/dist/client/server/routes/claudeProxyRoutes.js +0 -1600
- package/dist/client/server/routes/healthRoutes.js +0 -187
- package/dist/client/server/routes/index.js +0 -57
- package/dist/client/server/routes/mcpRoutes.js +0 -342
- package/dist/client/server/routes/memoryRoutes.js +0 -350
- package/dist/client/server/routes/openApiRoutes.js +0 -126
- package/dist/client/server/routes/toolRoutes.js +0 -199
- package/dist/client/server/streaming/dataStream.js +0 -486
- package/dist/client/server/streaming/index.js +0 -11
- package/dist/client/server/types.js +0 -67
- package/dist/client/server/utils/redaction.js +0 -334
- package/dist/client/server/utils/validation.js +0 -243
- package/dist/client/server/websocket/WebSocketHandler.js +0 -383
- package/dist/client/server/websocket/index.js +0 -4
- package/dist/client/services/server/ai/observability/instrumentation.js +0 -808
- package/dist/client/telemetry/attributes.js +0 -100
- package/dist/client/telemetry/index.js +0 -26
- package/dist/client/telemetry/telemetryService.js +0 -308
- package/dist/client/telemetry/tracers.js +0 -17
- package/dist/client/telemetry/withSpan.js +0 -34
- package/dist/client/types/actionTypes.js +0 -6
- package/dist/client/types/analytics.js +0 -5
- package/dist/client/types/authTypes.js +0 -9
- package/dist/client/types/circuitBreakerErrors.js +0 -34
- package/dist/client/types/cli.js +0 -21
- package/dist/client/types/clientTypes.js +0 -10
- package/dist/client/types/common.js +0 -51
- package/dist/client/types/configTypes.js +0 -49
- package/dist/client/types/content.js +0 -19
- package/dist/client/types/contextTypes.js +0 -400
- package/dist/client/types/conversation.js +0 -47
- package/dist/client/types/conversationMemoryInterface.js +0 -6
- package/dist/client/types/domainTypes.js +0 -5
- package/dist/client/types/errors.js +0 -167
- package/dist/client/types/evaluation.js +0 -5
- package/dist/client/types/evaluationProviders.js +0 -5
- package/dist/client/types/evaluationTypes.js +0 -1
- package/dist/client/types/externalMcp.js +0 -6
- package/dist/client/types/fileReferenceTypes.js +0 -8
- package/dist/client/types/fileTypes.js +0 -4
- package/dist/client/types/generateTypes.js +0 -1
- package/dist/client/types/guardrails.js +0 -1
- package/dist/client/types/hitlTypes.js +0 -8
- package/dist/client/types/index.js +0 -57
- package/dist/client/types/mcpTypes.js +0 -5
- package/dist/client/types/middlewareTypes.js +0 -1
- package/dist/client/types/modelTypes.js +0 -30
- package/dist/client/types/multimodal.js +0 -135
- package/dist/client/types/observability.js +0 -6
- package/dist/client/types/pptTypes.js +0 -82
- package/dist/client/types/providers.js +0 -111
- package/dist/client/types/proxyTypes.js +0 -16
- package/dist/client/types/ragTypes.js +0 -7
- package/dist/client/types/sdkTypes.js +0 -8
- package/dist/client/types/serviceTypes.js +0 -5
- package/dist/client/types/streamTypes.js +0 -1
- package/dist/client/types/subscriptionTypes.js +0 -9
- package/dist/client/types/taskClassificationTypes.js +0 -5
- package/dist/client/types/tools.js +0 -24
- package/dist/client/types/ttsTypes.js +0 -57
- package/dist/client/types/typeAliases.js +0 -48
- package/dist/client/types/utilities.js +0 -4
- package/dist/client/types/workflowTypes.js +0 -30
- package/dist/client/utils/async/withTimeout.js +0 -98
- package/dist/client/utils/asyncMutex.js +0 -60
- package/dist/client/utils/conversationMemory.js +0 -431
- package/dist/client/utils/csvProcessor.js +0 -846
- package/dist/client/utils/errorHandling.js +0 -936
- package/dist/client/utils/evaluationUtils.js +0 -131
- package/dist/client/utils/factoryProcessing.js +0 -589
- package/dist/client/utils/fileDetector.js +0 -2161
- package/dist/client/utils/imageCache.js +0 -376
- package/dist/client/utils/imageProcessor.js +0 -704
- package/dist/client/utils/logger.js +0 -491
- package/dist/client/utils/mcpDefaults.js +0 -134
- package/dist/client/utils/messageBuilder.js +0 -1653
- package/dist/client/utils/modelAliasResolver.js +0 -54
- package/dist/client/utils/modelDetection.js +0 -80
- package/dist/client/utils/modelRouter.js +0 -292
- package/dist/client/utils/multimodalOptionsBuilder.js +0 -65
- package/dist/client/utils/observabilityHelpers.js +0 -47
- package/dist/client/utils/parameterValidation.js +0 -966
- package/dist/client/utils/pdfProcessor.js +0 -410
- package/dist/client/utils/performance.js +0 -222
- package/dist/client/utils/pricing.js +0 -340
- package/dist/client/utils/promptRedaction.js +0 -62
- package/dist/client/utils/providerConfig.js +0 -1009
- package/dist/client/utils/providerHealth.js +0 -1237
- package/dist/client/utils/providerRetry.js +0 -112
- package/dist/client/utils/providerUtils.js +0 -434
- package/dist/client/utils/rateLimiter.js +0 -200
- package/dist/client/utils/redis.js +0 -368
- package/dist/client/utils/retryHandler.js +0 -269
- package/dist/client/utils/retryability.js +0 -22
- package/dist/client/utils/sanitizers/svg.js +0 -481
- package/dist/client/utils/schemaConversion.js +0 -255
- package/dist/client/utils/taskClassificationUtils.js +0 -149
- package/dist/client/utils/taskClassifier.js +0 -94
- package/dist/client/utils/thinkingConfig.js +0 -104
- package/dist/client/utils/timeout.js +0 -359
- package/dist/client/utils/tokenEstimation.js +0 -142
- package/dist/client/utils/tokenLimits.js +0 -125
- package/dist/client/utils/tokenUtils.js +0 -239
- package/dist/client/utils/toolUtils.js +0 -75
- package/dist/client/utils/transformationUtils.js +0 -554
- package/dist/client/utils/ttsProcessor.js +0 -286
- package/dist/client/utils/typeUtils.js +0 -97
- package/dist/client/utils/videoAnalysisProcessor.js +0 -67
- package/dist/client/workflow/config.js +0 -398
- package/dist/client/workflow/core/ensembleExecutor.js +0 -407
- package/dist/client/workflow/core/judgeScorer.js +0 -544
- package/dist/client/workflow/core/responseConditioner.js +0 -225
- package/dist/client/workflow/core/types/conditionerTypes.js +0 -7
- package/dist/client/workflow/core/types/ensembleTypes.js +0 -7
- package/dist/client/workflow/core/types/index.js +0 -7
- package/dist/client/workflow/core/types/judgeTypes.js +0 -7
- package/dist/client/workflow/core/types/layerTypes.js +0 -7
- package/dist/client/workflow/core/types/registryTypes.js +0 -7
- package/dist/client/workflow/core/workflowRegistry.js +0 -304
- package/dist/client/workflow/core/workflowRunner.js +0 -586
- package/dist/client/workflow/index.js +0 -50
- package/dist/client/workflow/types.js +0 -9
- package/dist/client/workflow/utils/types/index.js +0 -7
- package/dist/client/workflow/utils/workflowMetrics.js +0 -311
- package/dist/client/workflow/utils/workflowValidation.js +0 -420
- package/dist/client/workflow/workflows/adaptiveWorkflow.js +0 -366
- package/dist/client/workflow/workflows/consensusWorkflow.js +0 -192
- package/dist/client/workflow/workflows/fallbackWorkflow.js +0 -225
- package/dist/client/workflow/workflows/multiJudgeWorkflow.js +0 -351
- /package/dist/client/{client/reactHooks.js → reactHooks.js} +0 -0
|
@@ -1,251 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LaTeX-aware Chunker
|
|
3
|
-
*
|
|
4
|
-
* Splits LaTeX documents based on structure (sections, environments, math).
|
|
5
|
-
* Best for academic papers, scientific documents, and mathematical content.
|
|
6
|
-
*/
|
|
7
|
-
import { randomUUID } from "crypto";
|
|
8
|
-
/**
|
|
9
|
-
* LaTeX-aware chunker implementation
|
|
10
|
-
* Splits based on LaTeX structure (sections, environments)
|
|
11
|
-
*/
|
|
12
|
-
export class LaTeXChunker {
|
|
13
|
-
strategy = "latex";
|
|
14
|
-
defaultSplitEnvironments = [
|
|
15
|
-
"section",
|
|
16
|
-
"subsection",
|
|
17
|
-
"subsubsection",
|
|
18
|
-
"chapter",
|
|
19
|
-
"part",
|
|
20
|
-
];
|
|
21
|
-
mathEnvironments = [
|
|
22
|
-
"equation",
|
|
23
|
-
"equation*",
|
|
24
|
-
"align",
|
|
25
|
-
"align*",
|
|
26
|
-
"gather",
|
|
27
|
-
"gather*",
|
|
28
|
-
"multline",
|
|
29
|
-
"multline*",
|
|
30
|
-
"displaymath",
|
|
31
|
-
];
|
|
32
|
-
async chunk(text, config) {
|
|
33
|
-
const { maxSize = 1000, overlap = 0, splitEnvironments = this.defaultSplitEnvironments, preserveMath = true, includePreamble = true, trimWhitespace = true, metadata = {}, } = config || {};
|
|
34
|
-
const documentId = randomUUID();
|
|
35
|
-
const chunks = [];
|
|
36
|
-
if (!text || text.length === 0) {
|
|
37
|
-
return chunks;
|
|
38
|
-
}
|
|
39
|
-
// Extract preamble if present
|
|
40
|
-
const preambleMatch = text.match(/^([\s\S]*?)\\begin\{document\}([\s\S]*?)\\end\{document\}/);
|
|
41
|
-
let preamble = "";
|
|
42
|
-
let documentContent = text;
|
|
43
|
-
if (preambleMatch) {
|
|
44
|
-
preamble = preambleMatch[1].trim();
|
|
45
|
-
documentContent = preambleMatch[2];
|
|
46
|
-
// Add preamble as first chunk if requested
|
|
47
|
-
if (includePreamble && preamble.length > 0) {
|
|
48
|
-
chunks.push({
|
|
49
|
-
id: randomUUID(),
|
|
50
|
-
text: preamble,
|
|
51
|
-
metadata: {
|
|
52
|
-
documentId,
|
|
53
|
-
chunkIndex: 0,
|
|
54
|
-
startPosition: 0,
|
|
55
|
-
endPosition: preamble.length,
|
|
56
|
-
documentType: "latex",
|
|
57
|
-
latexEnvironment: "preamble",
|
|
58
|
-
custom: metadata,
|
|
59
|
-
},
|
|
60
|
-
});
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
// Protect math environments
|
|
64
|
-
let processedContent = documentContent;
|
|
65
|
-
const mathBlocks = [];
|
|
66
|
-
if (preserveMath) {
|
|
67
|
-
// Protect display math environments
|
|
68
|
-
for (const env of this.mathEnvironments) {
|
|
69
|
-
const envPattern = new RegExp(`\\\\begin\\{${env}\\}[\\s\\S]*?\\\\end\\{${env}\\}`, "g");
|
|
70
|
-
processedContent = processedContent.replace(envPattern, (match) => {
|
|
71
|
-
const placeholder = `__MATH_${mathBlocks.length}__`;
|
|
72
|
-
mathBlocks.push({ placeholder, content: match });
|
|
73
|
-
return placeholder;
|
|
74
|
-
});
|
|
75
|
-
}
|
|
76
|
-
// Protect inline math
|
|
77
|
-
processedContent = processedContent.replace(/\$\$[\s\S]*?\$\$/g, (match) => {
|
|
78
|
-
const placeholder = `__MATH_${mathBlocks.length}__`;
|
|
79
|
-
mathBlocks.push({ placeholder, content: match });
|
|
80
|
-
return placeholder;
|
|
81
|
-
});
|
|
82
|
-
processedContent = processedContent.replace(/\$[^$]+\$/g, (match) => {
|
|
83
|
-
const placeholder = `__MATH_${mathBlocks.length}__`;
|
|
84
|
-
mathBlocks.push({ placeholder, content: match });
|
|
85
|
-
return placeholder;
|
|
86
|
-
});
|
|
87
|
-
// Protect \[ \] math
|
|
88
|
-
processedContent = processedContent.replace(/\\\[[\s\S]*?\\\]/g, (match) => {
|
|
89
|
-
const placeholder = `__MATH_${mathBlocks.length}__`;
|
|
90
|
-
mathBlocks.push({ placeholder, content: match });
|
|
91
|
-
return placeholder;
|
|
92
|
-
});
|
|
93
|
-
}
|
|
94
|
-
// Split by sectioning commands
|
|
95
|
-
const sections = this.splitBySections(processedContent, splitEnvironments);
|
|
96
|
-
let chunkIndex = chunks.length;
|
|
97
|
-
let currentPosition = includePreamble && preamble.length > 0 ? preamble.length : 0;
|
|
98
|
-
for (const section of sections) {
|
|
99
|
-
const { title, content, environment } = section;
|
|
100
|
-
// Restore math blocks
|
|
101
|
-
let restoredContent = content;
|
|
102
|
-
for (const { placeholder, content: mathContent } of mathBlocks) {
|
|
103
|
-
restoredContent = restoredContent.replace(placeholder, mathContent);
|
|
104
|
-
}
|
|
105
|
-
// Split if content is too large
|
|
106
|
-
const contentChunks = this.splitContent(restoredContent, maxSize, overlap);
|
|
107
|
-
for (let i = 0; i < contentChunks.length; i++) {
|
|
108
|
-
let chunkText = contentChunks[i];
|
|
109
|
-
// Include section command in first chunk
|
|
110
|
-
if (i === 0 && title && environment) {
|
|
111
|
-
chunkText = `\\${environment}{${title}}\n${chunkText}`;
|
|
112
|
-
}
|
|
113
|
-
const finalText = trimWhitespace ? chunkText.trim() : chunkText;
|
|
114
|
-
if (finalText.length > 0) {
|
|
115
|
-
chunks.push({
|
|
116
|
-
id: randomUUID(),
|
|
117
|
-
text: finalText,
|
|
118
|
-
metadata: {
|
|
119
|
-
documentId,
|
|
120
|
-
chunkIndex,
|
|
121
|
-
startPosition: currentPosition,
|
|
122
|
-
endPosition: currentPosition + chunkText.length,
|
|
123
|
-
documentType: "latex",
|
|
124
|
-
latexEnvironment: environment ?? undefined,
|
|
125
|
-
header: title ?? undefined,
|
|
126
|
-
custom: metadata,
|
|
127
|
-
},
|
|
128
|
-
});
|
|
129
|
-
chunkIndex++;
|
|
130
|
-
}
|
|
131
|
-
currentPosition += chunkText.length;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
// Update total chunks count
|
|
135
|
-
chunks.forEach((chunk) => {
|
|
136
|
-
chunk.metadata.totalChunks = chunks.length;
|
|
137
|
-
});
|
|
138
|
-
return chunks;
|
|
139
|
-
}
|
|
140
|
-
/**
|
|
141
|
-
* Split LaTeX by sectioning commands
|
|
142
|
-
*/
|
|
143
|
-
splitBySections(content, splitEnvironments) {
|
|
144
|
-
const sections = [];
|
|
145
|
-
// Build pattern for sectioning commands
|
|
146
|
-
const envPattern = splitEnvironments.join("|");
|
|
147
|
-
const sectionPattern = new RegExp(`\\\\(${envPattern})\\*?\\{([^}]*)\\}`, "g");
|
|
148
|
-
let lastIndex = 0;
|
|
149
|
-
let lastTitle = null;
|
|
150
|
-
let lastEnvironment = null;
|
|
151
|
-
let match;
|
|
152
|
-
// Reset regex
|
|
153
|
-
sectionPattern.lastIndex = 0;
|
|
154
|
-
while ((match = sectionPattern.exec(content)) !== null) {
|
|
155
|
-
// Content before this section
|
|
156
|
-
if (match.index > lastIndex) {
|
|
157
|
-
const sectionContent = content.slice(lastIndex, match.index);
|
|
158
|
-
if (sectionContent.trim()) {
|
|
159
|
-
sections.push({
|
|
160
|
-
title: lastTitle,
|
|
161
|
-
content: sectionContent.trim(),
|
|
162
|
-
environment: lastEnvironment,
|
|
163
|
-
});
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
lastEnvironment = match[1];
|
|
167
|
-
lastTitle = match[2];
|
|
168
|
-
lastIndex = match.index + match[0].length;
|
|
169
|
-
}
|
|
170
|
-
// Don't forget content after the last section
|
|
171
|
-
if (lastIndex < content.length) {
|
|
172
|
-
const remaining = content.slice(lastIndex);
|
|
173
|
-
if (remaining.trim()) {
|
|
174
|
-
sections.push({
|
|
175
|
-
title: lastTitle,
|
|
176
|
-
content: remaining.trim(),
|
|
177
|
-
environment: lastEnvironment,
|
|
178
|
-
});
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
// If no sections found, return entire content
|
|
182
|
-
if (sections.length === 0 && content.trim()) {
|
|
183
|
-
sections.push({
|
|
184
|
-
title: null,
|
|
185
|
-
content: content.trim(),
|
|
186
|
-
environment: null,
|
|
187
|
-
});
|
|
188
|
-
}
|
|
189
|
-
return sections;
|
|
190
|
-
}
|
|
191
|
-
/**
|
|
192
|
-
* Split content that exceeds max size
|
|
193
|
-
*/
|
|
194
|
-
splitContent(content, maxSize, overlap) {
|
|
195
|
-
const effectiveMaxSize = Math.max(maxSize, 1);
|
|
196
|
-
const effectiveOverlap = Math.min(Math.max(overlap, 0), effectiveMaxSize - 1);
|
|
197
|
-
if (content.length <= effectiveMaxSize) {
|
|
198
|
-
return [content];
|
|
199
|
-
}
|
|
200
|
-
const chunks = [];
|
|
201
|
-
let start = 0;
|
|
202
|
-
while (start < content.length) {
|
|
203
|
-
let end = Math.min(start + effectiveMaxSize, content.length);
|
|
204
|
-
// Try to break at paragraph boundary
|
|
205
|
-
if (end < content.length) {
|
|
206
|
-
const searchStart = Math.max(start, end - 200);
|
|
207
|
-
const searchText = content.slice(searchStart, end);
|
|
208
|
-
// Look for paragraph break
|
|
209
|
-
const paragraphBreak = searchText.lastIndexOf("\n\n");
|
|
210
|
-
if (paragraphBreak > 0) {
|
|
211
|
-
end = searchStart + paragraphBreak;
|
|
212
|
-
}
|
|
213
|
-
else {
|
|
214
|
-
// Look for sentence break
|
|
215
|
-
const sentenceBreak = searchText.search(/[.!?]\s+[A-Z\\]/);
|
|
216
|
-
if (sentenceBreak > 0) {
|
|
217
|
-
end = searchStart + sentenceBreak + 1;
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
chunks.push(content.slice(start, end));
|
|
222
|
-
start = Math.max(start + 1, end - effectiveOverlap);
|
|
223
|
-
}
|
|
224
|
-
return chunks;
|
|
225
|
-
}
|
|
226
|
-
validateConfig(config) {
|
|
227
|
-
const errors = [];
|
|
228
|
-
const warnings = [];
|
|
229
|
-
const latexConfig = config;
|
|
230
|
-
if (latexConfig.maxSize !== undefined && latexConfig.maxSize <= 0) {
|
|
231
|
-
errors.push("maxSize must be greater than 0");
|
|
232
|
-
}
|
|
233
|
-
if (latexConfig.overlap !== undefined && latexConfig.overlap < 0) {
|
|
234
|
-
errors.push("overlap must be non-negative");
|
|
235
|
-
}
|
|
236
|
-
if (latexConfig.overlap !== undefined &&
|
|
237
|
-
latexConfig.maxSize !== undefined &&
|
|
238
|
-
latexConfig.overlap >= latexConfig.maxSize) {
|
|
239
|
-
errors.push("overlap must be less than maxSize");
|
|
240
|
-
}
|
|
241
|
-
if (latexConfig.splitEnvironments !== undefined &&
|
|
242
|
-
latexConfig.splitEnvironments.length === 0) {
|
|
243
|
-
warnings.push("No split environments specified, using defaults");
|
|
244
|
-
}
|
|
245
|
-
return {
|
|
246
|
-
valid: errors.length === 0,
|
|
247
|
-
errors,
|
|
248
|
-
warnings,
|
|
249
|
-
};
|
|
250
|
-
}
|
|
251
|
-
}
|
|
@@ -1,373 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Markdown-aware Chunker
|
|
3
|
-
*
|
|
4
|
-
* Splits markdown documents based on header structure while preserving formatting.
|
|
5
|
-
* Best for documentation, README files, and structured markdown content.
|
|
6
|
-
*/
|
|
7
|
-
import { randomUUID } from "crypto";
|
|
8
|
-
/**
|
|
9
|
-
* Markdown-aware chunker implementation
|
|
10
|
-
* Splits based on markdown structure (headers, code blocks, etc.)
|
|
11
|
-
*/
|
|
12
|
-
export class MarkdownChunker {
|
|
13
|
-
strategy = "markdown";
|
|
14
|
-
async chunk(text, config) {
|
|
15
|
-
const { maxSize = 1000, overlap = 0, headerLevels = [1, 2, 3], preserveCodeBlocks = true, includeHeader = true, stripFormatting = false, trimWhitespace = true, metadata = {}, } = config || {};
|
|
16
|
-
const documentId = randomUUID();
|
|
17
|
-
const chunks = [];
|
|
18
|
-
if (!text || text.length === 0) {
|
|
19
|
-
return chunks;
|
|
20
|
-
}
|
|
21
|
-
// Build header regex pattern
|
|
22
|
-
const headerPattern = new RegExp(`^(#{${Math.min(...headerLevels)},${Math.max(...headerLevels)}})\\s+(.+)$`, "gm");
|
|
23
|
-
// Split by headers while preserving them
|
|
24
|
-
const sections = this.splitByHeaders(text, headerPattern, includeHeader);
|
|
25
|
-
let chunkIndex = 0;
|
|
26
|
-
let currentPosition = 0;
|
|
27
|
-
for (const section of sections) {
|
|
28
|
-
const { header, content, level } = section;
|
|
29
|
-
// Handle code blocks
|
|
30
|
-
let processedContent = content;
|
|
31
|
-
const codeBlocks = [];
|
|
32
|
-
if (preserveCodeBlocks) {
|
|
33
|
-
processedContent = content.replace(/```[\s\S]*?```|`[^`]+`/g, (match) => {
|
|
34
|
-
const placeholder = `__CODE_BLOCK_${codeBlocks.length}__`;
|
|
35
|
-
codeBlocks.push({ placeholder, code: match });
|
|
36
|
-
return placeholder;
|
|
37
|
-
});
|
|
38
|
-
}
|
|
39
|
-
// Split content if too large
|
|
40
|
-
const effectiveMaxSize = Math.max(maxSize - (header?.length || 0), 100);
|
|
41
|
-
const contentChunks = this.splitContent(processedContent, effectiveMaxSize, overlap);
|
|
42
|
-
for (const contentChunk of contentChunks) {
|
|
43
|
-
let chunkText = header && includeHeader
|
|
44
|
-
? `${header}\n\n${contentChunk}`
|
|
45
|
-
: contentChunk;
|
|
46
|
-
// Restore code blocks
|
|
47
|
-
for (const { placeholder, code } of codeBlocks) {
|
|
48
|
-
chunkText = chunkText.replace(placeholder, code);
|
|
49
|
-
}
|
|
50
|
-
// Strip formatting if requested
|
|
51
|
-
if (stripFormatting) {
|
|
52
|
-
chunkText = this.stripMarkdown(chunkText);
|
|
53
|
-
}
|
|
54
|
-
const finalText = trimWhitespace ? chunkText.trim() : chunkText;
|
|
55
|
-
if (finalText.length > 0) {
|
|
56
|
-
chunks.push({
|
|
57
|
-
id: randomUUID(),
|
|
58
|
-
text: finalText,
|
|
59
|
-
metadata: {
|
|
60
|
-
documentId,
|
|
61
|
-
chunkIndex,
|
|
62
|
-
startPosition: currentPosition,
|
|
63
|
-
endPosition: currentPosition + chunkText.length,
|
|
64
|
-
documentType: "markdown",
|
|
65
|
-
headerLevel: level ?? undefined,
|
|
66
|
-
header: header?.replace(/^#+\s*/, "") ?? undefined,
|
|
67
|
-
custom: metadata,
|
|
68
|
-
},
|
|
69
|
-
});
|
|
70
|
-
chunkIndex++;
|
|
71
|
-
}
|
|
72
|
-
currentPosition += chunkText.length;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
// Update total chunks count
|
|
76
|
-
chunks.forEach((chunk) => {
|
|
77
|
-
chunk.metadata.totalChunks = chunks.length;
|
|
78
|
-
});
|
|
79
|
-
return chunks;
|
|
80
|
-
}
|
|
81
|
-
splitByHeaders(text, headerPattern, _includeHeader) {
|
|
82
|
-
const sections = [];
|
|
83
|
-
let lastIndex = 0;
|
|
84
|
-
let match;
|
|
85
|
-
let currentHeader = null;
|
|
86
|
-
let currentLevel = null;
|
|
87
|
-
// Reset regex
|
|
88
|
-
headerPattern.lastIndex = 0;
|
|
89
|
-
while ((match = headerPattern.exec(text)) !== null) {
|
|
90
|
-
// Content before this header
|
|
91
|
-
if (match.index > lastIndex) {
|
|
92
|
-
const content = text.slice(lastIndex, match.index);
|
|
93
|
-
if (content.trim()) {
|
|
94
|
-
sections.push({
|
|
95
|
-
header: currentHeader,
|
|
96
|
-
content: content.trim(),
|
|
97
|
-
level: currentLevel,
|
|
98
|
-
});
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
currentHeader = match[0];
|
|
102
|
-
currentLevel = match[1].length; // Number of # characters
|
|
103
|
-
lastIndex = match.index + match[0].length;
|
|
104
|
-
}
|
|
105
|
-
// Don't forget content after the last header
|
|
106
|
-
if (lastIndex < text.length) {
|
|
107
|
-
const content = text.slice(lastIndex);
|
|
108
|
-
if (content.trim()) {
|
|
109
|
-
sections.push({
|
|
110
|
-
header: currentHeader,
|
|
111
|
-
content: content.trim(),
|
|
112
|
-
level: currentLevel,
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
// If no headers found, return entire text as one section
|
|
117
|
-
if (sections.length === 0 && text.trim()) {
|
|
118
|
-
sections.push({
|
|
119
|
-
header: null,
|
|
120
|
-
content: text.trim(),
|
|
121
|
-
level: null,
|
|
122
|
-
});
|
|
123
|
-
}
|
|
124
|
-
return sections;
|
|
125
|
-
}
|
|
126
|
-
splitContent(content, maxSize, overlap) {
|
|
127
|
-
const effectiveMaxSize = Math.max(maxSize, 1);
|
|
128
|
-
const effectiveOverlap = Math.min(Math.max(overlap, 0), effectiveMaxSize - 1);
|
|
129
|
-
if (content.length <= effectiveMaxSize) {
|
|
130
|
-
return [content];
|
|
131
|
-
}
|
|
132
|
-
// Use table-aware splitting
|
|
133
|
-
const lines = content.split("\n");
|
|
134
|
-
const tableRanges = this.detectTableRanges(lines);
|
|
135
|
-
if (tableRanges.length > 0) {
|
|
136
|
-
return this.splitContentTableAware(content, lines, tableRanges, effectiveMaxSize, effectiveOverlap);
|
|
137
|
-
}
|
|
138
|
-
return this.splitPlainContent(content, effectiveMaxSize, effectiveOverlap);
|
|
139
|
-
}
|
|
140
|
-
/**
|
|
141
|
-
* Detect contiguous table blocks in lines.
|
|
142
|
-
* Returns array of { start, end } line index ranges (inclusive).
|
|
143
|
-
*/
|
|
144
|
-
detectTableRanges(lines) {
|
|
145
|
-
// Simple pipe-prefixed line check (single character class — no backtracking)
|
|
146
|
-
const TABLE_ROW_RE = /^\|[^\r\n]{1,10000}/;
|
|
147
|
-
// Per-cell separator regex applied AFTER splitting on "|" — safe because
|
|
148
|
-
// each cell is short and bounded by pipe delimiters (CodeQL: js/polynomial-redos)
|
|
149
|
-
const SEPARATOR_CELL_RE = /^[\t ]*:?-+:?[\t ]*$/;
|
|
150
|
-
const ranges = [];
|
|
151
|
-
let i = 0;
|
|
152
|
-
while (i < lines.length) {
|
|
153
|
-
if (i + 1 < lines.length &&
|
|
154
|
-
TABLE_ROW_RE.test(lines[i]) &&
|
|
155
|
-
this.isTableSeparator(lines[i + 1], SEPARATOR_CELL_RE)) {
|
|
156
|
-
const start = i;
|
|
157
|
-
i += 2;
|
|
158
|
-
while (i < lines.length && TABLE_ROW_RE.test(lines[i])) {
|
|
159
|
-
i++;
|
|
160
|
-
}
|
|
161
|
-
ranges.push({ start, end: i - 1 });
|
|
162
|
-
}
|
|
163
|
-
else {
|
|
164
|
-
i++;
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
return ranges;
|
|
168
|
-
}
|
|
169
|
-
/** Check if a line is a markdown table separator (e.g. |---|---|). */
|
|
170
|
-
isTableSeparator(line, cellRe) {
|
|
171
|
-
const trimmed = line.trimEnd();
|
|
172
|
-
if (!trimmed.startsWith("|")) {
|
|
173
|
-
return false;
|
|
174
|
-
}
|
|
175
|
-
// Split by "|" → ["", "---", "---", ""] for "|---|---|"
|
|
176
|
-
const cells = trimmed.split("|");
|
|
177
|
-
cells.shift(); // remove leading empty element
|
|
178
|
-
if (cells.length > 0 && cells[cells.length - 1].trim() === "") {
|
|
179
|
-
cells.pop(); // remove trailing empty element
|
|
180
|
-
}
|
|
181
|
-
if (cells.length === 0) {
|
|
182
|
-
return false;
|
|
183
|
-
}
|
|
184
|
-
return cells.every((cell) => cellRe.test(cell));
|
|
185
|
-
}
|
|
186
|
-
/**
|
|
187
|
-
* Split content while preserving markdown tables.
|
|
188
|
-
*/
|
|
189
|
-
splitContentTableAware(content, lines, tableRanges, maxSize, overlap) {
|
|
190
|
-
// Build segments: alternating non-table and table blocks
|
|
191
|
-
const segments = [];
|
|
192
|
-
let lineIdx = 0;
|
|
193
|
-
for (const range of tableRanges) {
|
|
194
|
-
if (lineIdx < range.start) {
|
|
195
|
-
const text = lines.slice(lineIdx, range.start).join("\n").trim();
|
|
196
|
-
if (text) {
|
|
197
|
-
segments.push({ text, isTable: false });
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
const tableText = lines.slice(range.start, range.end + 1).join("\n");
|
|
201
|
-
segments.push({ text: tableText, isTable: true });
|
|
202
|
-
lineIdx = range.end + 1;
|
|
203
|
-
}
|
|
204
|
-
if (lineIdx < lines.length) {
|
|
205
|
-
const text = lines.slice(lineIdx).join("\n").trim();
|
|
206
|
-
if (text) {
|
|
207
|
-
segments.push({ text, isTable: false });
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
const result = [];
|
|
211
|
-
let current = "";
|
|
212
|
-
for (const seg of segments) {
|
|
213
|
-
if (!seg.isTable) {
|
|
214
|
-
const pieces = this.splitPlainContent(seg.text, maxSize, overlap);
|
|
215
|
-
for (const piece of pieces) {
|
|
216
|
-
if (current.length === 0) {
|
|
217
|
-
current = piece;
|
|
218
|
-
}
|
|
219
|
-
else if (current.length + 1 + piece.length <= maxSize) {
|
|
220
|
-
current += "\n" + piece;
|
|
221
|
-
}
|
|
222
|
-
else {
|
|
223
|
-
result.push(current);
|
|
224
|
-
current = piece;
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
else {
|
|
229
|
-
if (seg.text.length <= maxSize) {
|
|
230
|
-
if (current.length === 0) {
|
|
231
|
-
current = seg.text;
|
|
232
|
-
}
|
|
233
|
-
else if (current.length + 2 + seg.text.length <= maxSize) {
|
|
234
|
-
current += "\n\n" + seg.text;
|
|
235
|
-
}
|
|
236
|
-
else {
|
|
237
|
-
result.push(current);
|
|
238
|
-
current = seg.text;
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
else {
|
|
242
|
-
if (current) {
|
|
243
|
-
result.push(current);
|
|
244
|
-
current = "";
|
|
245
|
-
}
|
|
246
|
-
const tableChunks = this.splitTableByRows(seg.text, maxSize);
|
|
247
|
-
result.push(...tableChunks);
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
if (current) {
|
|
252
|
-
result.push(current);
|
|
253
|
-
}
|
|
254
|
-
return result.length > 0 ? result : [content];
|
|
255
|
-
}
|
|
256
|
-
/**
|
|
257
|
-
* Split a table on row boundaries, repeating header + separator in each chunk.
|
|
258
|
-
*/
|
|
259
|
-
splitTableByRows(tableText, maxSize) {
|
|
260
|
-
const rows = tableText.split("\n");
|
|
261
|
-
if (rows.length < 3) {
|
|
262
|
-
return [tableText];
|
|
263
|
-
}
|
|
264
|
-
const headerRow = rows[0];
|
|
265
|
-
const separatorRow = rows[1];
|
|
266
|
-
const headerBlock = headerRow + "\n" + separatorRow;
|
|
267
|
-
const dataRows = rows.slice(2);
|
|
268
|
-
if (headerBlock.length > maxSize) {
|
|
269
|
-
return this.splitPlainContent(tableText, maxSize, 0);
|
|
270
|
-
}
|
|
271
|
-
const chunks = [];
|
|
272
|
-
let currentChunk = headerBlock;
|
|
273
|
-
for (const row of dataRows) {
|
|
274
|
-
// Guard: single row exceeds budget — flush and emit as standalone chunk
|
|
275
|
-
const singleRowChunk = `${headerBlock}\n${row}`;
|
|
276
|
-
if (singleRowChunk.length > maxSize) {
|
|
277
|
-
if (currentChunk.length > headerBlock.length) {
|
|
278
|
-
chunks.push(currentChunk);
|
|
279
|
-
}
|
|
280
|
-
chunks.push(singleRowChunk);
|
|
281
|
-
currentChunk = headerBlock;
|
|
282
|
-
continue;
|
|
283
|
-
}
|
|
284
|
-
const candidate = currentChunk + "\n" + row;
|
|
285
|
-
if (candidate.length <= maxSize) {
|
|
286
|
-
currentChunk = candidate;
|
|
287
|
-
}
|
|
288
|
-
else {
|
|
289
|
-
if (currentChunk.length > headerBlock.length) {
|
|
290
|
-
chunks.push(currentChunk);
|
|
291
|
-
}
|
|
292
|
-
currentChunk = headerBlock + "\n" + row;
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
if (currentChunk.length > headerBlock.length) {
|
|
296
|
-
chunks.push(currentChunk);
|
|
297
|
-
}
|
|
298
|
-
return chunks.length > 0 ? chunks : [tableText];
|
|
299
|
-
}
|
|
300
|
-
splitPlainContent(content, maxSize, overlap) {
|
|
301
|
-
if (content.length <= maxSize) {
|
|
302
|
-
return [content];
|
|
303
|
-
}
|
|
304
|
-
const chunks = [];
|
|
305
|
-
let start = 0;
|
|
306
|
-
while (start < content.length) {
|
|
307
|
-
let end = Math.min(start + maxSize, content.length);
|
|
308
|
-
// Try to break at a paragraph or sentence boundary
|
|
309
|
-
if (end < content.length) {
|
|
310
|
-
const searchStart = Math.max(start, end - 200);
|
|
311
|
-
const searchText = content.slice(searchStart, end);
|
|
312
|
-
// Look for paragraph break first
|
|
313
|
-
const paragraphBreak = searchText.lastIndexOf("\n\n");
|
|
314
|
-
if (paragraphBreak > 0) {
|
|
315
|
-
end = searchStart + paragraphBreak;
|
|
316
|
-
}
|
|
317
|
-
else {
|
|
318
|
-
// Look for sentence break
|
|
319
|
-
const sentenceBreak = searchText.search(/[.!?]\s+[A-Z]/);
|
|
320
|
-
if (sentenceBreak > 0) {
|
|
321
|
-
end = searchStart + sentenceBreak + 1;
|
|
322
|
-
}
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
chunks.push(content.slice(start, end));
|
|
326
|
-
start = Math.max(start + 1, end - overlap);
|
|
327
|
-
}
|
|
328
|
-
return chunks;
|
|
329
|
-
}
|
|
330
|
-
stripMarkdown(text) {
|
|
331
|
-
return text
|
|
332
|
-
.replace(/^#+\s+/gm, "") // Headers
|
|
333
|
-
.replace(/\*\*(.+?)\*\*/g, "$1") // Bold
|
|
334
|
-
.replace(/\*(.+?)\*/g, "$1") // Italic
|
|
335
|
-
.replace(/__(.+?)__/g, "$1") // Bold (underscore)
|
|
336
|
-
.replace(/_(.+?)_/g, "$1") // Italic (underscore)
|
|
337
|
-
.replace(/`(.+?)`/g, "$1") // Inline code
|
|
338
|
-
.replace(/```[\s\S]*?```/g, "") // Code blocks
|
|
339
|
-
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Links
|
|
340
|
-
.replace(/!\[([^\]]*)\]\([^)]+\)/g, "$1"); // Images
|
|
341
|
-
}
|
|
342
|
-
validateConfig(config) {
|
|
343
|
-
const errors = [];
|
|
344
|
-
const warnings = [];
|
|
345
|
-
const mdConfig = config;
|
|
346
|
-
if (mdConfig.maxSize !== undefined && mdConfig.maxSize <= 0) {
|
|
347
|
-
errors.push("maxSize must be greater than 0");
|
|
348
|
-
}
|
|
349
|
-
if (mdConfig.headerLevels !== undefined) {
|
|
350
|
-
if (mdConfig.headerLevels.length === 0) {
|
|
351
|
-
errors.push("headerLevels must not be empty");
|
|
352
|
-
}
|
|
353
|
-
for (const level of mdConfig.headerLevels) {
|
|
354
|
-
if (level < 1 || level > 6) {
|
|
355
|
-
errors.push(`Invalid header level: ${level}. Must be between 1 and 6`);
|
|
356
|
-
}
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
if (mdConfig.overlap !== undefined && mdConfig.overlap < 0) {
|
|
360
|
-
errors.push("overlap must be non-negative");
|
|
361
|
-
}
|
|
362
|
-
if (mdConfig.overlap !== undefined &&
|
|
363
|
-
mdConfig.maxSize !== undefined &&
|
|
364
|
-
mdConfig.overlap >= mdConfig.maxSize) {
|
|
365
|
-
errors.push("overlap must be less than maxSize");
|
|
366
|
-
}
|
|
367
|
-
return {
|
|
368
|
-
valid: errors.length === 0,
|
|
369
|
-
errors,
|
|
370
|
-
warnings,
|
|
371
|
-
};
|
|
372
|
-
}
|
|
373
|
-
}
|