@juspay/neurolink 9.54.5 → 9.54.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/dist/action/actionInputs.d.ts +1 -1
- package/dist/adapters/video/directorPipeline.js +6 -0
- package/dist/adapters/video/vertexVideoHandler.js +6 -0
- package/dist/agent/directTools.d.ts +3 -23
- package/dist/auth/AuthProviderFactory.d.ts +1 -3
- package/dist/auth/anthropicOAuth.d.ts +4 -7
- package/dist/auth/anthropicOAuth.js +23 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/index.d.ts +11 -0
- package/dist/auth/index.js +14 -0
- package/dist/auth/middleware/AuthMiddleware.d.ts +5 -60
- package/dist/auth/middleware/AuthMiddleware.js +3 -0
- package/dist/auth/middleware/rateLimitByUser.d.ts +4 -93
- package/dist/auth/middleware/rateLimitByUser.js +4 -0
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/auth/providers/CognitoProvider.js +3 -0
- package/dist/auth/providers/KeycloakProvider.js +3 -0
- package/dist/auth/providers/auth0.d.ts +1 -1
- package/dist/auth/sessionManager.d.ts +2 -0
- package/dist/auth/sessionManager.js +53 -11
- package/dist/auth/tokenStore.d.ts +2 -1
- package/dist/auth/tokenStore.js +45 -4
- package/dist/autoresearch/tools.d.ts +1 -16
- package/dist/browser/neurolink.min.js +353 -353
- package/dist/cli/commands/config.d.ts +3 -123
- package/dist/cli/commands/config.js +4 -2
- package/dist/cli/commands/evaluate.d.ts +1 -19
- package/dist/cli/commands/proxy.d.ts +1 -1
- package/dist/cli/commands/proxy.js +3 -0
- package/dist/cli/commands/rag.js +3 -0
- package/dist/cli/commands/setup-anthropic.d.ts +2 -6
- package/dist/cli/commands/setup-anthropic.js +1 -1
- package/dist/cli/commands/setup-azure.d.ts +2 -6
- package/dist/cli/commands/setup-azure.js +1 -1
- package/dist/cli/commands/setup-bedrock.d.ts +2 -6
- package/dist/cli/commands/setup-bedrock.js +1 -1
- package/dist/cli/commands/setup-gcp.d.ts +2 -6
- package/dist/cli/commands/setup-google-ai.d.ts +2 -6
- package/dist/cli/commands/setup-google-ai.js +1 -1
- package/dist/cli/commands/setup-huggingface.d.ts +1 -5
- package/dist/cli/commands/setup-mistral.d.ts +1 -5
- package/dist/cli/commands/setup-openai.d.ts +2 -6
- package/dist/cli/commands/setup-openai.js +1 -1
- package/dist/cli/commands/setup.d.ts +1 -8
- package/dist/cli/commands/task.js +1 -0
- package/dist/cli/commands/voiceServer.d.ts +1 -4
- package/dist/cli/loop/session.js +31 -10
- package/dist/cli/utils/interactiveSetup.d.ts +2 -15
- package/dist/cli/utils/videoFileUtils.d.ts +1 -15
- package/dist/client/aiSdkAdapter.d.ts +1 -1
- package/dist/client/aiSdkAdapter.js +1 -0
- package/dist/client/httpClient.d.ts +2 -2
- package/dist/client/httpClient.js +13 -0
- package/dist/client/sseClient.d.ts +2 -2
- package/dist/client/sseClient.js +29 -0
- package/dist/client/streamingClient.d.ts +2 -0
- package/dist/client/streamingClient.js +19 -0
- package/dist/client/wsClient.d.ts +7 -2
- package/dist/client/wsClient.js +90 -10
- package/dist/context/budgetChecker.js +3 -1
- package/dist/context/contextCompactor.js +163 -143
- package/dist/context/fileSummarizationService.d.ts +1 -9
- package/dist/context/summarizationEngine.js +29 -16
- package/dist/core/baseProvider.js +124 -153
- package/dist/core/infrastructure/baseRegistry.d.ts +1 -7
- package/dist/core/modules/GenerationHandler.d.ts +3 -2
- package/dist/core/modules/GenerationHandler.js +9 -1
- package/dist/core/modules/StreamHandler.js +9 -0
- package/dist/core/modules/ToolsManager.js +18 -2
- package/dist/core/toolEvents.d.ts +0 -1
- package/dist/evaluation/BatchEvaluator.d.ts +1 -97
- package/dist/evaluation/EvaluationAggregator.d.ts +1 -118
- package/dist/evaluation/EvaluatorFactory.d.ts +1 -13
- package/dist/evaluation/EvaluatorRegistry.d.ts +1 -50
- package/dist/evaluation/errors/EvaluationError.d.ts +2 -27
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +1 -39
- package/dist/evaluation/hooks/observabilityHooks.d.ts +3 -55
- package/dist/evaluation/hooks/observabilityHooks.js +3 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +7 -61
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +7 -7
- package/dist/evaluation/ragasEvaluator.js +54 -37
- package/dist/evaluation/reporting/metricsCollector.d.ts +1 -60
- package/dist/evaluation/reporting/reportGenerator.d.ts +1 -17
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +1 -29
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +1 -42
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +1 -19
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +1 -33
- package/dist/factories/providerFactory.d.ts +1 -16
- package/dist/factories/providerFactory.js +2 -0
- package/dist/image-gen/ImageGenService.d.ts +3 -0
- package/dist/image-gen/ImageGenService.js +3 -0
- package/dist/lib/action/actionInputs.d.ts +1 -1
- package/dist/lib/adapters/video/directorPipeline.js +6 -0
- package/dist/lib/adapters/video/vertexVideoHandler.js +6 -0
- package/dist/lib/agent/directTools.d.ts +3 -23
- package/dist/lib/auth/AuthProviderFactory.d.ts +1 -3
- package/dist/lib/auth/anthropicOAuth.d.ts +4 -7
- package/dist/lib/auth/anthropicOAuth.js +23 -0
- package/dist/lib/auth/errors.d.ts +1 -1
- package/dist/lib/auth/index.d.ts +11 -0
- package/dist/lib/auth/index.js +14 -0
- package/dist/lib/auth/middleware/AuthMiddleware.d.ts +5 -60
- package/dist/lib/auth/middleware/AuthMiddleware.js +3 -0
- package/dist/lib/auth/middleware/rateLimitByUser.d.ts +4 -93
- package/dist/lib/auth/middleware/rateLimitByUser.js +4 -0
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/auth/providers/CognitoProvider.js +3 -0
- package/dist/lib/auth/providers/KeycloakProvider.js +3 -0
- package/dist/lib/auth/providers/auth0.d.ts +1 -1
- package/dist/lib/auth/sessionManager.d.ts +2 -0
- package/dist/lib/auth/sessionManager.js +53 -11
- package/dist/lib/auth/tokenStore.d.ts +2 -1
- package/dist/lib/auth/tokenStore.js +45 -4
- package/dist/lib/autoresearch/tools.d.ts +1 -16
- package/dist/lib/client/aiSdkAdapter.d.ts +1 -1
- package/dist/lib/client/aiSdkAdapter.js +1 -0
- package/dist/lib/client/httpClient.d.ts +2 -2
- package/dist/lib/client/httpClient.js +13 -0
- package/dist/lib/client/sseClient.d.ts +2 -2
- package/dist/lib/client/sseClient.js +29 -0
- package/dist/lib/client/streamingClient.d.ts +2 -0
- package/dist/lib/client/streamingClient.js +19 -0
- package/dist/lib/client/wsClient.d.ts +7 -2
- package/dist/lib/client/wsClient.js +90 -10
- package/dist/lib/context/budgetChecker.js +3 -1
- package/dist/lib/context/contextCompactor.js +163 -143
- package/dist/lib/context/fileSummarizationService.d.ts +1 -9
- package/dist/lib/context/summarizationEngine.js +29 -16
- package/dist/lib/core/baseProvider.js +124 -153
- package/dist/lib/core/infrastructure/baseRegistry.d.ts +1 -7
- package/dist/lib/core/modules/GenerationHandler.d.ts +3 -2
- package/dist/lib/core/modules/GenerationHandler.js +9 -1
- package/dist/lib/core/modules/StreamHandler.js +9 -0
- package/dist/lib/core/modules/ToolsManager.js +18 -2
- package/dist/lib/core/toolEvents.d.ts +0 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +1 -97
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +1 -118
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +1 -13
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +1 -50
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +2 -27
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +1 -39
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +3 -55
- package/dist/lib/evaluation/hooks/observabilityHooks.js +3 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +7 -61
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +7 -7
- package/dist/lib/evaluation/ragasEvaluator.js +54 -37
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +1 -60
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +1 -17
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +1 -29
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +1 -42
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +1 -19
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +1 -33
- package/dist/lib/factories/providerFactory.d.ts +1 -16
- package/dist/lib/factories/providerFactory.js +2 -0
- package/dist/lib/image-gen/ImageGenService.d.ts +3 -0
- package/dist/lib/image-gen/ImageGenService.js +3 -0
- package/dist/lib/mcp/batching/requestBatcher.js +99 -73
- package/dist/lib/mcp/httpRateLimiter.js +3 -1
- package/dist/lib/mcp/httpRetryHandler.js +3 -1
- package/dist/lib/mcp/mcpClientFactory.js +3 -1
- package/dist/lib/mcp/multiServerManager.d.ts +1 -14
- package/dist/lib/mcp/servers/aiProviders/aiAnalysisTools.js +5 -1
- package/dist/lib/mcp/servers/aiProviders/aiWorkflowTools.js +1 -0
- package/dist/lib/mcp/toolDiscoveryService.js +70 -57
- package/dist/lib/mcp/toolRegistry.js +11 -1
- package/dist/lib/memory/hippocampusInitializer.d.ts +1 -2
- package/dist/lib/memory/hippocampusInitializer.js +1 -1
- package/dist/lib/memory/memoryRetrievalTools.js +182 -141
- package/dist/lib/models/anthropicModels.d.ts +0 -1
- package/dist/lib/models/anthropicModels.js +1 -0
- package/dist/lib/neurolink.js +237 -41
- package/dist/lib/observability/exporterRegistry.d.ts +3 -22
- package/dist/lib/observability/exporters/sentryExporter.js +1 -0
- package/dist/lib/observability/metricsAggregator.d.ts +1 -31
- package/dist/lib/observability/retryPolicy.d.ts +0 -1
- package/dist/lib/observability/sampling/samplers.d.ts +0 -1
- package/dist/lib/observability/spanProcessor.d.ts +0 -1
- package/dist/lib/observability/tokenTracker.d.ts +7 -16
- package/dist/lib/observability/tokenTracker.js +6 -4
- package/dist/lib/observability/utils/spanSerializer.d.ts +5 -1
- package/dist/lib/observability/utils/spanSerializer.js +24 -4
- package/dist/lib/processors/base/BaseFileProcessor.js +66 -53
- package/dist/lib/processors/document/ExcelProcessor.d.ts +1 -1
- package/dist/lib/processors/errors/errorHelpers.d.ts +1 -31
- package/dist/lib/processors/errors/errorSerializer.d.ts +1 -45
- package/dist/lib/processors/registry/ProcessorRegistry.js +17 -6
- package/dist/lib/providers/amazonBedrock.js +189 -15
- package/dist/lib/providers/amazonSagemaker.js +25 -11
- package/dist/lib/providers/anthropic.js +13 -0
- package/dist/lib/providers/azureOpenai.js +2 -0
- package/dist/lib/providers/googleAiStudio.js +82 -0
- package/dist/lib/providers/googleNativeGemini3.d.ts +1 -2
- package/dist/lib/providers/googleVertex.js +52 -0
- package/dist/lib/providers/huggingFace.js +2 -0
- package/dist/lib/providers/litellm.js +2 -0
- package/dist/lib/providers/mistral.js +2 -0
- package/dist/lib/providers/ollama.js +84 -5
- package/dist/lib/providers/openAI.d.ts +2 -0
- package/dist/lib/providers/openAI.js +17 -6
- package/dist/lib/providers/openRouter.js +2 -0
- package/dist/lib/providers/openaiCompatible.js +2 -0
- package/dist/lib/providers/sagemaker/detection.d.ts +1 -33
- package/dist/lib/providers/sagemaker/diagnostics.d.ts +1 -25
- package/dist/lib/providers/sagemaker/language-model.d.ts +1 -1
- package/dist/lib/proxy/proxyConfig.js +4 -0
- package/dist/lib/proxy/proxyEnv.d.ts +1 -17
- package/dist/lib/proxy/proxyHealth.d.ts +0 -1
- package/dist/lib/proxy/proxyTracer.d.ts +1 -36
- package/dist/lib/proxy/proxyTracer.js +9 -0
- package/dist/lib/proxy/quietDetector.d.ts +1 -7
- package/dist/lib/proxy/rawStreamCapture.d.ts +1 -10
- package/dist/lib/proxy/requestLogger.d.ts +1 -21
- package/dist/lib/proxy/routingPolicy.d.ts +1 -2
- package/dist/lib/proxy/sseInterceptor.d.ts +1 -66
- package/dist/lib/proxy/sseInterceptor.js +6 -0
- package/dist/lib/proxy/updateChecker.d.ts +1 -6
- package/dist/lib/proxy/updateState.d.ts +1 -12
- package/dist/lib/rag/chunkers/BaseChunker.js +36 -22
- package/dist/lib/rag/chunking/jsonChunker.d.ts +1 -1
- package/dist/lib/rag/errors/RAGError.d.ts +1 -2
- package/dist/lib/rag/ragIntegration.js +45 -32
- package/dist/lib/rag/reranker/reranker.js +151 -122
- package/dist/lib/rag/retrieval/vectorQueryTool.js +79 -65
- package/dist/lib/sdk/toolRegistration.d.ts +10 -44
- package/dist/lib/sdk/toolRegistration.js +1 -1
- package/dist/lib/server/middleware/abortSignal.d.ts +1 -11
- package/dist/lib/server/middleware/auth.d.ts +1 -21
- package/dist/lib/server/middleware/auth.js +12 -0
- package/dist/lib/server/middleware/common.js +48 -32
- package/dist/lib/server/middleware/deprecation.d.ts +1 -20
- package/dist/lib/server/middleware/rateLimit.d.ts +1 -75
- package/dist/lib/server/middleware/validation.d.ts +3 -81
- package/dist/lib/server/middleware/validation.js +3 -0
- package/dist/lib/server/openapi/generator.d.ts +1 -47
- package/dist/lib/server/routes/agentRoutes.js +112 -57
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +1 -6
- package/dist/lib/server/routes/claudeProxyRoutes.js +127 -13
- package/dist/lib/server/routes/healthRoutes.js +58 -12
- package/dist/lib/server/routes/index.d.ts +1 -26
- package/dist/lib/server/routes/mcpRoutes.js +40 -7
- package/dist/lib/server/routes/memoryRoutes.js +22 -7
- package/dist/lib/server/routes/openApiRoutes.js +30 -6
- package/dist/lib/server/routes/toolRoutes.js +140 -68
- package/dist/lib/server/streaming/dataStream.d.ts +1 -35
- package/dist/lib/server/streaming/dataStream.js +15 -0
- package/dist/lib/services/server/ai/observability/instrumentation.js +114 -14
- package/dist/lib/session/globalSessionState.d.ts +1 -10
- package/dist/lib/tasks/tools/taskTools.d.ts +2 -2
- package/dist/lib/telemetry/traceContext.d.ts +9 -0
- package/dist/lib/telemetry/traceContext.js +19 -0
- package/dist/lib/telemetry/tracers.d.ts +2 -0
- package/dist/lib/telemetry/tracers.js +2 -0
- package/dist/lib/types/action.d.ts +2 -0
- package/dist/lib/types/artifact.d.ts +7 -0
- package/dist/lib/types/auth.d.ts +125 -18
- package/dist/lib/types/autoresearch.d.ts +12 -0
- package/dist/lib/types/cli.d.ts +415 -0
- package/dist/lib/types/client.d.ts +34 -0
- package/dist/lib/types/common.d.ts +12 -41
- package/dist/lib/types/context.d.ts +5 -0
- package/dist/lib/types/evaluation.d.ts +332 -1
- package/dist/lib/types/file.d.ts +4 -0
- package/dist/lib/types/mcp.d.ts +102 -2
- package/dist/lib/types/middleware.d.ts +116 -0
- package/dist/lib/types/multimodal.d.ts +65 -0
- package/dist/lib/types/observability.d.ts +81 -0
- package/dist/lib/types/processor.d.ts +47 -0
- package/dist/lib/types/providers.d.ts +120 -16
- package/dist/lib/types/proxy.d.ts +321 -1
- package/dist/lib/types/rag.d.ts +22 -0
- package/dist/lib/types/scorer.d.ts +141 -0
- package/dist/lib/types/server.d.ts +99 -0
- package/dist/lib/types/span.d.ts +2 -1
- package/dist/lib/types/span.js +1 -0
- package/dist/lib/types/tools.d.ts +44 -0
- package/dist/lib/types/tts.d.ts +6 -0
- package/dist/lib/types/utilities.d.ts +22 -0
- package/dist/lib/types/workflow.d.ts +18 -0
- package/dist/lib/utils/async/retry.d.ts +2 -8
- package/dist/lib/utils/async/retry.js +9 -9
- package/dist/lib/utils/imageCompressor.d.ts +1 -21
- package/dist/lib/utils/imageCompressor.js +5 -1
- package/dist/lib/utils/messageBuilder.d.ts +1 -1
- package/dist/lib/utils/redis.d.ts +1 -4
- package/dist/lib/utils/sanitizers/filename.d.ts +0 -1
- package/dist/lib/utils/toolEndEmitter.d.ts +25 -0
- package/dist/lib/utils/toolEndEmitter.js +65 -0
- package/dist/lib/workflow/config.d.ts +7 -32
- package/dist/lib/workflow/core/ensembleExecutor.js +28 -0
- package/dist/lib/workflow/core/judgeScorer.js +23 -0
- package/dist/lib/workflow/core/responseConditioner.js +17 -0
- package/dist/lib/workflow/core/workflowRunner.d.ts +1 -19
- package/dist/lib/workflow/core/workflowRunner.js +202 -147
- package/dist/mcp/batching/requestBatcher.js +99 -73
- package/dist/mcp/httpRateLimiter.js +3 -1
- package/dist/mcp/httpRetryHandler.js +3 -1
- package/dist/mcp/mcpClientFactory.js +3 -1
- package/dist/mcp/multiServerManager.d.ts +1 -14
- package/dist/mcp/servers/aiProviders/aiAnalysisTools.js +5 -1
- package/dist/mcp/servers/aiProviders/aiWorkflowTools.js +1 -0
- package/dist/mcp/toolDiscoveryService.js +70 -57
- package/dist/mcp/toolRegistry.js +11 -1
- package/dist/memory/hippocampusInitializer.d.ts +1 -2
- package/dist/memory/hippocampusInitializer.js +1 -1
- package/dist/memory/memoryRetrievalTools.js +182 -141
- package/dist/models/anthropicModels.d.ts +0 -1
- package/dist/models/anthropicModels.js +1 -0
- package/dist/neurolink.js +237 -41
- package/dist/observability/exporterRegistry.d.ts +3 -22
- package/dist/observability/exporters/sentryExporter.js +1 -0
- package/dist/observability/metricsAggregator.d.ts +1 -31
- package/dist/observability/retryPolicy.d.ts +0 -1
- package/dist/observability/sampling/samplers.d.ts +0 -1
- package/dist/observability/spanProcessor.d.ts +0 -1
- package/dist/observability/tokenTracker.d.ts +7 -16
- package/dist/observability/tokenTracker.js +6 -4
- package/dist/observability/utils/spanSerializer.d.ts +5 -1
- package/dist/observability/utils/spanSerializer.js +24 -4
- package/dist/processors/base/BaseFileProcessor.js +66 -53
- package/dist/processors/document/ExcelProcessor.d.ts +1 -1
- package/dist/processors/errors/errorHelpers.d.ts +1 -31
- package/dist/processors/errors/errorSerializer.d.ts +1 -45
- package/dist/processors/registry/ProcessorRegistry.js +17 -6
- package/dist/providers/amazonBedrock.js +189 -15
- package/dist/providers/amazonSagemaker.js +25 -11
- package/dist/providers/anthropic.js +13 -0
- package/dist/providers/azureOpenai.js +2 -0
- package/dist/providers/googleAiStudio.js +82 -0
- package/dist/providers/googleNativeGemini3.d.ts +1 -2
- package/dist/providers/googleVertex.js +52 -0
- package/dist/providers/huggingFace.js +2 -0
- package/dist/providers/litellm.js +2 -0
- package/dist/providers/mistral.js +2 -0
- package/dist/providers/ollama.js +84 -5
- package/dist/providers/openAI.d.ts +2 -0
- package/dist/providers/openAI.js +17 -6
- package/dist/providers/openRouter.js +2 -0
- package/dist/providers/openaiCompatible.js +2 -0
- package/dist/providers/sagemaker/detection.d.ts +1 -33
- package/dist/providers/sagemaker/diagnostics.d.ts +1 -25
- package/dist/providers/sagemaker/language-model.d.ts +1 -1
- package/dist/proxy/proxyConfig.js +4 -0
- package/dist/proxy/proxyEnv.d.ts +1 -17
- package/dist/proxy/proxyHealth.d.ts +0 -1
- package/dist/proxy/proxyTracer.d.ts +1 -36
- package/dist/proxy/proxyTracer.js +9 -0
- package/dist/proxy/quietDetector.d.ts +1 -7
- package/dist/proxy/rawStreamCapture.d.ts +1 -10
- package/dist/proxy/requestLogger.d.ts +1 -21
- package/dist/proxy/routingPolicy.d.ts +1 -2
- package/dist/proxy/sseInterceptor.d.ts +1 -66
- package/dist/proxy/sseInterceptor.js +6 -0
- package/dist/proxy/updateChecker.d.ts +1 -6
- package/dist/proxy/updateState.d.ts +1 -12
- package/dist/rag/chunkers/BaseChunker.js +36 -22
- package/dist/rag/chunking/jsonChunker.d.ts +1 -1
- package/dist/rag/errors/RAGError.d.ts +1 -2
- package/dist/rag/ragIntegration.js +45 -32
- package/dist/rag/reranker/reranker.js +151 -122
- package/dist/rag/retrieval/vectorQueryTool.js +79 -65
- package/dist/sdk/toolRegistration.d.ts +10 -44
- package/dist/sdk/toolRegistration.js +1 -1
- package/dist/server/middleware/abortSignal.d.ts +1 -11
- package/dist/server/middleware/auth.d.ts +1 -21
- package/dist/server/middleware/auth.js +12 -0
- package/dist/server/middleware/common.js +48 -32
- package/dist/server/middleware/deprecation.d.ts +1 -20
- package/dist/server/middleware/rateLimit.d.ts +1 -75
- package/dist/server/middleware/validation.d.ts +3 -81
- package/dist/server/middleware/validation.js +3 -0
- package/dist/server/openapi/generator.d.ts +1 -47
- package/dist/server/routes/agentRoutes.js +112 -57
- package/dist/server/routes/claudeProxyRoutes.d.ts +1 -6
- package/dist/server/routes/claudeProxyRoutes.js +127 -13
- package/dist/server/routes/healthRoutes.js +58 -12
- package/dist/server/routes/index.d.ts +1 -26
- package/dist/server/routes/mcpRoutes.js +40 -7
- package/dist/server/routes/memoryRoutes.js +22 -7
- package/dist/server/routes/openApiRoutes.js +30 -6
- package/dist/server/routes/toolRoutes.js +140 -68
- package/dist/server/streaming/dataStream.d.ts +1 -35
- package/dist/server/streaming/dataStream.js +15 -0
- package/dist/services/server/ai/observability/instrumentation.js +114 -14
- package/dist/session/globalSessionState.d.ts +1 -10
- package/dist/tasks/tools/taskTools.d.ts +2 -2
- package/dist/telemetry/traceContext.d.ts +9 -0
- package/dist/telemetry/traceContext.js +18 -0
- package/dist/telemetry/tracers.d.ts +2 -0
- package/dist/telemetry/tracers.js +2 -0
- package/dist/types/action.d.ts +2 -0
- package/dist/types/artifact.d.ts +7 -0
- package/dist/types/auth.d.ts +125 -18
- package/dist/types/autoresearch.d.ts +12 -0
- package/dist/types/cli.d.ts +415 -0
- package/dist/types/client.d.ts +34 -0
- package/dist/types/common.d.ts +12 -41
- package/dist/types/context.d.ts +5 -0
- package/dist/types/evaluation.d.ts +332 -1
- package/dist/types/file.d.ts +4 -0
- package/dist/types/mcp.d.ts +102 -2
- package/dist/types/middleware.d.ts +116 -0
- package/dist/types/multimodal.d.ts +65 -0
- package/dist/types/observability.d.ts +81 -0
- package/dist/types/processor.d.ts +47 -0
- package/dist/types/providers.d.ts +120 -16
- package/dist/types/proxy.d.ts +321 -1
- package/dist/types/rag.d.ts +22 -0
- package/dist/types/scorer.d.ts +141 -0
- package/dist/types/server.d.ts +99 -0
- package/dist/types/span.d.ts +2 -1
- package/dist/types/span.js +1 -0
- package/dist/types/tools.d.ts +44 -0
- package/dist/types/tts.d.ts +6 -0
- package/dist/types/utilities.d.ts +22 -0
- package/dist/types/workflow.d.ts +18 -0
- package/dist/utils/async/retry.d.ts +2 -8
- package/dist/utils/async/retry.js +9 -9
- package/dist/utils/imageCompressor.d.ts +1 -21
- package/dist/utils/imageCompressor.js +5 -1
- package/dist/utils/messageBuilder.d.ts +1 -1
- package/dist/utils/redis.d.ts +1 -4
- package/dist/utils/sanitizers/filename.d.ts +0 -1
- package/dist/utils/toolEndEmitter.d.ts +25 -0
- package/dist/utils/toolEndEmitter.js +64 -0
- package/dist/workflow/config.d.ts +4 -29
- package/dist/workflow/core/ensembleExecutor.js +28 -0
- package/dist/workflow/core/judgeScorer.js +23 -0
- package/dist/workflow/core/responseConditioner.js +17 -0
- package/dist/workflow/core/workflowRunner.d.ts +1 -19
- package/dist/workflow/core/workflowRunner.js +202 -147
- package/package.json +2 -1
|
@@ -2,123 +2,7 @@
|
|
|
2
2
|
* @file EvaluationAggregator - Aggregates and analyzes evaluation results.
|
|
3
3
|
* Provides statistical analysis, trend detection, and summary generation.
|
|
4
4
|
*/
|
|
5
|
-
import type { EvaluationData } from "../types/index.js";
|
|
6
|
-
/**
|
|
7
|
-
* Statistical summary of evaluation scores.
|
|
8
|
-
*/
|
|
9
|
-
type ScoreStatistics = {
|
|
10
|
-
/** Minimum score */
|
|
11
|
-
min: number;
|
|
12
|
-
/** Maximum score */
|
|
13
|
-
max: number;
|
|
14
|
-
/** Mean (average) score */
|
|
15
|
-
mean: number;
|
|
16
|
-
/** Median score */
|
|
17
|
-
median: number;
|
|
18
|
-
/** Standard deviation */
|
|
19
|
-
stdDev: number;
|
|
20
|
-
/** Variance */
|
|
21
|
-
variance: number;
|
|
22
|
-
/** 25th percentile */
|
|
23
|
-
p25: number;
|
|
24
|
-
/** 75th percentile */
|
|
25
|
-
p75: number;
|
|
26
|
-
/** 90th percentile */
|
|
27
|
-
p90: number;
|
|
28
|
-
/** 95th percentile */
|
|
29
|
-
p95: number;
|
|
30
|
-
};
|
|
31
|
-
/**
|
|
32
|
-
* Score distribution across ranges.
|
|
33
|
-
*/
|
|
34
|
-
type ScoreDistribution = {
|
|
35
|
-
/** Items scoring 1-3 (poor) */
|
|
36
|
-
poor: number;
|
|
37
|
-
/** Items scoring 4-5 (below average) */
|
|
38
|
-
belowAverage: number;
|
|
39
|
-
/** Items scoring 6-7 (average) */
|
|
40
|
-
average: number;
|
|
41
|
-
/** Items scoring 8-9 (good) */
|
|
42
|
-
good: number;
|
|
43
|
-
/** Items scoring 10 (excellent) */
|
|
44
|
-
excellent: number;
|
|
45
|
-
};
|
|
46
|
-
/**
|
|
47
|
-
* Trend analysis results.
|
|
48
|
-
*/
|
|
49
|
-
type TrendAnalysis = {
|
|
50
|
-
/** Direction of the trend */
|
|
51
|
-
direction: "improving" | "declining" | "stable";
|
|
52
|
-
/** Slope of the linear regression */
|
|
53
|
-
slope: number;
|
|
54
|
-
/** R-squared value (fit quality) */
|
|
55
|
-
rSquared: number;
|
|
56
|
-
/** Percentage change from first to last */
|
|
57
|
-
percentChange: number;
|
|
58
|
-
/** Moving average of last N evaluations */
|
|
59
|
-
movingAverage: number;
|
|
60
|
-
};
|
|
61
|
-
/**
|
|
62
|
-
* Dimension-specific analysis for RAGAS metrics.
|
|
63
|
-
*/
|
|
64
|
-
type DimensionAnalysis = {
|
|
65
|
-
/** Relevance score statistics */
|
|
66
|
-
relevance: ScoreStatistics;
|
|
67
|
-
/** Accuracy score statistics */
|
|
68
|
-
accuracy: ScoreStatistics;
|
|
69
|
-
/** Completeness score statistics */
|
|
70
|
-
completeness: ScoreStatistics;
|
|
71
|
-
/** Overall score statistics */
|
|
72
|
-
overall: ScoreStatistics;
|
|
73
|
-
/** Correlation matrix between dimensions */
|
|
74
|
-
correlations: {
|
|
75
|
-
relevanceAccuracy: number;
|
|
76
|
-
relevanceCompleteness: number;
|
|
77
|
-
accuracyCompleteness: number;
|
|
78
|
-
};
|
|
79
|
-
};
|
|
80
|
-
/**
|
|
81
|
-
* Quality alerts summary.
|
|
82
|
-
*/
|
|
83
|
-
type AlertSummary = {
|
|
84
|
-
/** Total number of alerts */
|
|
85
|
-
total: number;
|
|
86
|
-
/** Number of high severity alerts */
|
|
87
|
-
high: number;
|
|
88
|
-
/** Number of medium severity alerts */
|
|
89
|
-
medium: number;
|
|
90
|
-
/** Number of items marked as off-topic */
|
|
91
|
-
offTopic: number;
|
|
92
|
-
/** Alert rate as percentage */
|
|
93
|
-
alertRate: number;
|
|
94
|
-
};
|
|
95
|
-
/**
|
|
96
|
-
* Comprehensive aggregation result.
|
|
97
|
-
*/
|
|
98
|
-
type AggregationResult = {
|
|
99
|
-
/** Number of evaluations aggregated */
|
|
100
|
-
count: number;
|
|
101
|
-
/** Statistics for overall scores */
|
|
102
|
-
statistics: ScoreStatistics;
|
|
103
|
-
/** Score distribution */
|
|
104
|
-
distribution: ScoreDistribution;
|
|
105
|
-
/** Dimension-specific analysis */
|
|
106
|
-
dimensions: DimensionAnalysis;
|
|
107
|
-
/** Sequence trend analysis based on insertion order (not time-based) */
|
|
108
|
-
sequenceTrend?: TrendAnalysis;
|
|
109
|
-
/** Alert summary */
|
|
110
|
-
alerts: AlertSummary;
|
|
111
|
-
/** Passing rate based on threshold */
|
|
112
|
-
passingRate: number;
|
|
113
|
-
/** Average evaluation time */
|
|
114
|
-
avgEvaluationTime: number;
|
|
115
|
-
/** Aggregation metadata */
|
|
116
|
-
metadata: {
|
|
117
|
-
aggregatedAt: string;
|
|
118
|
-
threshold: number;
|
|
119
|
-
evaluationModels: string[];
|
|
120
|
-
};
|
|
121
|
-
};
|
|
5
|
+
import type { AggregationResult, EvaluationData, ScoreDistribution, ScoreStatistics, TrendAnalysis } from "../types/index.js";
|
|
122
6
|
/**
|
|
123
7
|
* EvaluationAggregator - Aggregates evaluation results and provides analytics.
|
|
124
8
|
* Supports statistical analysis, trend detection, and quality monitoring.
|
|
@@ -270,4 +154,3 @@ export declare class EvaluationAggregator {
|
|
|
270
154
|
*/
|
|
271
155
|
generateSummary(threshold?: number): string;
|
|
272
156
|
}
|
|
273
|
-
export {};
|
|
@@ -3,19 +3,8 @@
|
|
|
3
3
|
* Extends BaseFactory to provide dynamic evaluator creation with configuration support.
|
|
4
4
|
*/
|
|
5
5
|
import { BaseFactory } from "../core/infrastructure/index.js";
|
|
6
|
-
import type { EvaluationConfig } from "../types/index.js";
|
|
6
|
+
import type { EvaluationConfig, EvaluatorPreset } from "../types/index.js";
|
|
7
7
|
import { Evaluator } from "./index.js";
|
|
8
|
-
/**
|
|
9
|
-
* Configuration presets for common evaluation scenarios.
|
|
10
|
-
*/
|
|
11
|
-
type EvaluatorPreset = {
|
|
12
|
-
/** Preset name for identification */
|
|
13
|
-
name: string;
|
|
14
|
-
/** Description of the preset use case */
|
|
15
|
-
description: string;
|
|
16
|
-
/** The underlying evaluation configuration (optional for built-in presets) */
|
|
17
|
-
config?: EvaluationConfig;
|
|
18
|
-
};
|
|
19
8
|
/**
|
|
20
9
|
* Factory for creating Evaluator instances with various configurations.
|
|
21
10
|
* Supports presets for common use cases and custom configurations.
|
|
@@ -111,4 +100,3 @@ export declare class EvaluatorFactory extends BaseFactory<Evaluator, EvaluationC
|
|
|
111
100
|
unregisterPreset(name: string): boolean;
|
|
112
101
|
}
|
|
113
102
|
export declare const getEvaluatorFactory: () => EvaluatorFactory;
|
|
114
|
-
export {};
|
|
@@ -3,55 +3,7 @@
|
|
|
3
3
|
* Extends BaseRegistry to provide dynamic strategy registration and lookup.
|
|
4
4
|
*/
|
|
5
5
|
import { BaseRegistry } from "../core/infrastructure/index.js";
|
|
6
|
-
import type {
|
|
7
|
-
import type { GenerateResult, EvaluationResult, EnhancedEvaluationContext } from "../types/index.js";
|
|
8
|
-
/**
|
|
9
|
-
* A function that performs evaluation and returns results.
|
|
10
|
-
*/
|
|
11
|
-
type EvaluationStrategyFunction = (options: LanguageModelV3CallOptions, result: GenerateResult, config?: EvaluationStrategyConfig) => Promise<{
|
|
12
|
-
evaluationResult: EvaluationResult;
|
|
13
|
-
evalContext: EnhancedEvaluationContext;
|
|
14
|
-
}>;
|
|
15
|
-
/**
|
|
16
|
-
* Configuration for evaluation strategies.
|
|
17
|
-
*/
|
|
18
|
-
type EvaluationStrategyConfig = {
|
|
19
|
-
/** The model to use for evaluation */
|
|
20
|
-
evaluationModel?: string;
|
|
21
|
-
/** The provider to use for evaluation */
|
|
22
|
-
provider?: string;
|
|
23
|
-
/** The passing threshold (1-10) */
|
|
24
|
-
threshold?: number;
|
|
25
|
-
/** Custom prompt generator */
|
|
26
|
-
promptGenerator?: (context: {
|
|
27
|
-
userQuery: string;
|
|
28
|
-
history: string;
|
|
29
|
-
tools: string;
|
|
30
|
-
retryInfo: string;
|
|
31
|
-
aiResponse: string;
|
|
32
|
-
}) => string;
|
|
33
|
-
/** Additional strategy-specific options */
|
|
34
|
-
options?: Record<string, unknown>;
|
|
35
|
-
};
|
|
36
|
-
/**
|
|
37
|
-
* Metadata for registered evaluation strategies.
|
|
38
|
-
*/
|
|
39
|
-
type EvaluationStrategyMetadata = {
|
|
40
|
-
/** Human-readable name for the strategy */
|
|
41
|
-
name: string;
|
|
42
|
-
/** Description of what the strategy does */
|
|
43
|
-
description: string;
|
|
44
|
-
/** Whether the strategy requires an external LLM */
|
|
45
|
-
requiresLLM: boolean;
|
|
46
|
-
/** Default model for the strategy (if requiresLLM is true) */
|
|
47
|
-
defaultModel?: string;
|
|
48
|
-
/** Default provider for the strategy (if requiresLLM is true) */
|
|
49
|
-
defaultProvider?: string;
|
|
50
|
-
/** Version of the strategy */
|
|
51
|
-
version: string;
|
|
52
|
-
/** Supported features */
|
|
53
|
-
features: string[];
|
|
54
|
-
};
|
|
6
|
+
import type { EvaluationStrategyFunction, EvaluationStrategyMetadata } from "../types/index.js";
|
|
55
7
|
/**
|
|
56
8
|
* Registry for evaluation strategies.
|
|
57
9
|
* Allows dynamic registration and retrieval of evaluation strategies.
|
|
@@ -157,4 +109,3 @@ export declare class EvaluatorRegistry extends BaseRegistry<EvaluationStrategyFu
|
|
|
157
109
|
getStrategiesByProvider(provider: string): Promise<string[]>;
|
|
158
110
|
}
|
|
159
111
|
export declare const getEvaluatorRegistry: () => EvaluatorRegistry;
|
|
160
|
-
export {};
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Uses NeuroLinkFeatureError and createErrorFactory from core infrastructure.
|
|
4
4
|
*/
|
|
5
5
|
import { NeuroLinkFeatureError } from "../../core/infrastructure/index.js";
|
|
6
|
-
import type { EnhancedEvaluationContext } from "../../types/index.js";
|
|
6
|
+
import type { EnhancedEvaluationContext, EvaluationErrorContext } from "../../types/index.js";
|
|
7
7
|
/**
|
|
8
8
|
* Error codes for the Evaluation feature.
|
|
9
9
|
* These codes help identify specific error scenarios for proper handling.
|
|
@@ -65,36 +65,12 @@ export declare const evaluationErrors: {
|
|
|
65
65
|
/** Rate limit hit during evaluation */
|
|
66
66
|
readonly RATE_LIMIT_ERROR: "RATE_LIMIT_ERROR";
|
|
67
67
|
};
|
|
68
|
-
create: (code: "
|
|
68
|
+
create: (code: "CONFIGURATION_ERROR" | "PROVIDER_ERROR" | "EVALUATION_FAILED" | "PARSE_ERROR" | "STRATEGY_NOT_FOUND" | "CUSTOM_EVALUATOR_ERROR" | "BATCH_EVALUATION_ERROR" | "AGGREGATION_ERROR" | "REGISTRY_ERROR" | "MAX_RETRIES_EXCEEDED" | "TIMEOUT_ERROR" | "RATE_LIMIT_ERROR", message: string, options?: {
|
|
69
69
|
retryable?: boolean;
|
|
70
70
|
details?: Record<string, unknown>;
|
|
71
71
|
cause?: Error;
|
|
72
72
|
} | undefined) => NeuroLinkFeatureError;
|
|
73
73
|
};
|
|
74
|
-
/**
|
|
75
|
-
* Extended evaluation context for error details.
|
|
76
|
-
* Provides rich debugging information when errors occur.
|
|
77
|
-
*/
|
|
78
|
-
type EvaluationErrorContext = {
|
|
79
|
-
/** Length of the user query (redacted for safety) */
|
|
80
|
-
userQueryLength?: number;
|
|
81
|
-
/** Length of the AI response (redacted for safety) */
|
|
82
|
-
aiResponseLength?: number;
|
|
83
|
-
/** The current attempt number */
|
|
84
|
-
attemptNumber?: number;
|
|
85
|
-
/** Previous evaluation scores if any */
|
|
86
|
-
previousScores?: number[];
|
|
87
|
-
/** The evaluation strategy in use */
|
|
88
|
-
strategy?: string;
|
|
89
|
-
/** The evaluation model being used */
|
|
90
|
-
evaluationModel?: string;
|
|
91
|
-
/** The provider being used */
|
|
92
|
-
provider?: string;
|
|
93
|
-
/** Length of the raw response (redacted for safety) */
|
|
94
|
-
rawResponseLength?: number;
|
|
95
|
-
/** Any additional context */
|
|
96
|
-
additionalContext?: Record<string, unknown>;
|
|
97
|
-
};
|
|
98
74
|
/**
|
|
99
75
|
* Checks if an error is retryable based on its code.
|
|
100
76
|
* Transient errors (timeout, rate limit, some provider errors) are retryable.
|
|
@@ -183,4 +159,3 @@ export declare function createConfigurationError(message: string, configIssue: s
|
|
|
183
159
|
* @returns An EvaluationErrorContext
|
|
184
160
|
*/
|
|
185
161
|
export declare function contextToErrorContext(context: EnhancedEvaluationContext): EvaluationErrorContext;
|
|
186
|
-
export {};
|
|
@@ -2,44 +2,7 @@
|
|
|
2
2
|
* @file Langfuse Adapter
|
|
3
3
|
* Integration with Langfuse for LLM observability
|
|
4
4
|
*/
|
|
5
|
-
|
|
6
|
-
* Langfuse client interface (minimal for type safety)
|
|
7
|
-
*/
|
|
8
|
-
type LangfuseClient = {
|
|
9
|
-
score: (params: {
|
|
10
|
-
name: string;
|
|
11
|
-
value: number;
|
|
12
|
-
traceId?: string;
|
|
13
|
-
observationId?: string;
|
|
14
|
-
comment?: string;
|
|
15
|
-
metadata?: Record<string, unknown>;
|
|
16
|
-
}) => Promise<unknown>;
|
|
17
|
-
trace?: (params: {
|
|
18
|
-
name: string;
|
|
19
|
-
metadata?: Record<string, unknown>;
|
|
20
|
-
tags?: string[];
|
|
21
|
-
}) => {
|
|
22
|
-
id: string;
|
|
23
|
-
};
|
|
24
|
-
shutdown?: () => Promise<void>;
|
|
25
|
-
};
|
|
26
|
-
/**
|
|
27
|
-
* Langfuse adapter configuration
|
|
28
|
-
*/
|
|
29
|
-
type LangfuseAdapterConfig = {
|
|
30
|
-
/** Langfuse client instance */
|
|
31
|
-
client: LangfuseClient;
|
|
32
|
-
/** Prefix for score names */
|
|
33
|
-
scorePrefix?: string;
|
|
34
|
-
/** Include detailed metadata */
|
|
35
|
-
includeMetadata?: boolean;
|
|
36
|
-
/** Tags to add to all scores */
|
|
37
|
-
tags?: string[];
|
|
38
|
-
/** Whether to send pipeline-level scores */
|
|
39
|
-
sendPipelineScores?: boolean;
|
|
40
|
-
/** Whether to send individual scorer scores */
|
|
41
|
-
sendScorerScores?: boolean;
|
|
42
|
-
};
|
|
5
|
+
import type { LangfuseAdapterConfig, LangfuseClient } from "../../types/index.js";
|
|
43
6
|
/**
|
|
44
7
|
* Langfuse adapter for evaluation observability
|
|
45
8
|
*/
|
|
@@ -97,4 +60,3 @@ export declare function createMockLangfuseClient(): LangfuseClient & {
|
|
|
97
60
|
metadata?: Record<string, unknown>;
|
|
98
61
|
}>;
|
|
99
62
|
};
|
|
100
|
-
export {};
|
|
@@ -2,57 +2,7 @@
|
|
|
2
2
|
* @file Observability Hooks
|
|
3
3
|
* OpenTelemetry integration for evaluation tracing
|
|
4
4
|
*/
|
|
5
|
-
import type { EvaluationTraceContext,
|
|
6
|
-
/**
|
|
7
|
-
* Event handler type
|
|
8
|
-
*/
|
|
9
|
-
type EventHandler<T> = (event: T) => void | Promise<void>;
|
|
10
|
-
/**
|
|
11
|
-
* Evaluation events
|
|
12
|
-
*/
|
|
13
|
-
type EvaluationEvents = {
|
|
14
|
-
"scorer:start": {
|
|
15
|
-
scorerId: string;
|
|
16
|
-
scorerName: string;
|
|
17
|
-
timestamp: number;
|
|
18
|
-
traceContext?: EvaluationTraceContext;
|
|
19
|
-
};
|
|
20
|
-
"scorer:end": {
|
|
21
|
-
scorerId: string;
|
|
22
|
-
scorerName: string;
|
|
23
|
-
result: ScoreResult;
|
|
24
|
-
timestamp: number;
|
|
25
|
-
duration: number;
|
|
26
|
-
traceContext?: EvaluationTraceContext;
|
|
27
|
-
};
|
|
28
|
-
"scorer:error": {
|
|
29
|
-
scorerId: string;
|
|
30
|
-
scorerName: string;
|
|
31
|
-
error: string;
|
|
32
|
-
timestamp: number;
|
|
33
|
-
traceContext?: EvaluationTraceContext;
|
|
34
|
-
};
|
|
35
|
-
"pipeline:start": {
|
|
36
|
-
pipelineName: string;
|
|
37
|
-
scorerCount: number;
|
|
38
|
-
timestamp: number;
|
|
39
|
-
correlationId: string;
|
|
40
|
-
traceContext?: EvaluationTraceContext;
|
|
41
|
-
};
|
|
42
|
-
"pipeline:end": {
|
|
43
|
-
pipelineName: string;
|
|
44
|
-
result: PipelineResult;
|
|
45
|
-
timestamp: number;
|
|
46
|
-
duration: number;
|
|
47
|
-
traceContext?: EvaluationTraceContext;
|
|
48
|
-
};
|
|
49
|
-
"pipeline:error": {
|
|
50
|
-
pipelineName: string;
|
|
51
|
-
error: string;
|
|
52
|
-
timestamp: number;
|
|
53
|
-
traceContext?: EvaluationTraceContext;
|
|
54
|
-
};
|
|
55
|
-
};
|
|
5
|
+
import type { EvaluationEvents, EvaluationSpanAttributes, EvaluationTraceContext, EventHandler, PipelineResult, ScoreResult } from "../../types/index.js";
|
|
56
6
|
/**
|
|
57
7
|
* Observability hooks manager
|
|
58
8
|
*/
|
|
@@ -117,13 +67,11 @@ export declare function createMetricsCollectorHook(collector: {
|
|
|
117
67
|
/**
|
|
118
68
|
* OpenTelemetry span attributes
|
|
119
69
|
*/
|
|
120
|
-
type SpanAttributes = Record<string, string | number | boolean>;
|
|
121
70
|
/**
|
|
122
71
|
* Create span attributes from scorer result
|
|
123
72
|
*/
|
|
124
|
-
export declare function scorerToSpanAttributes(result: ScoreResult):
|
|
73
|
+
export declare function scorerToSpanAttributes(result: ScoreResult): EvaluationSpanAttributes;
|
|
125
74
|
/**
|
|
126
75
|
* Create span attributes from pipeline result
|
|
127
76
|
*/
|
|
128
|
-
export declare function pipelineToSpanAttributes(result: PipelineResult):
|
|
129
|
-
export {};
|
|
77
|
+
export declare function pipelineToSpanAttributes(result: PipelineResult): EvaluationSpanAttributes;
|
|
@@ -2,72 +2,19 @@
|
|
|
2
2
|
* @file Batch Strategy
|
|
3
3
|
* Batch processing for evaluation pipelines
|
|
4
4
|
*/
|
|
5
|
-
import type { ScorerInput, PipelineExecutionOptions,
|
|
5
|
+
import type { ScorerInput, PipelineExecutionOptions, BatchEvaluationConfig, BatchEvaluationResult, BatchItemResult } from "../../../types/index.js";
|
|
6
6
|
import type { EvaluationPipeline } from "../evaluationPipeline.js";
|
|
7
|
-
/**
|
|
8
|
-
* Batch processing configuration
|
|
9
|
-
*/
|
|
10
|
-
type BatchConfig = {
|
|
11
|
-
/** Maximum concurrent evaluations */
|
|
12
|
-
concurrency?: number;
|
|
13
|
-
/** Delay between batches (ms) */
|
|
14
|
-
batchDelay?: number;
|
|
15
|
-
/** Continue on individual failures */
|
|
16
|
-
continueOnError?: boolean;
|
|
17
|
-
/** Progress callback */
|
|
18
|
-
onProgress?: (progress: BatchProgress) => void;
|
|
19
|
-
/** Individual result callback */
|
|
20
|
-
onResult?: (result: BatchItemResult) => void;
|
|
21
|
-
};
|
|
22
|
-
/**
|
|
23
|
-
* Batch progress information
|
|
24
|
-
*/
|
|
25
|
-
type BatchProgress = {
|
|
26
|
-
total: number;
|
|
27
|
-
completed: number;
|
|
28
|
-
failed: number;
|
|
29
|
-
remaining: number;
|
|
30
|
-
percentComplete: number;
|
|
31
|
-
estimatedTimeRemaining?: number;
|
|
32
|
-
};
|
|
33
|
-
/**
|
|
34
|
-
* Individual batch item result
|
|
35
|
-
*/
|
|
36
|
-
type BatchItemResult = {
|
|
37
|
-
index: number;
|
|
38
|
-
input: ScorerInput;
|
|
39
|
-
result?: PipelineResult;
|
|
40
|
-
error?: string;
|
|
41
|
-
duration: number;
|
|
42
|
-
};
|
|
43
|
-
/**
|
|
44
|
-
* Batch evaluation result
|
|
45
|
-
*/
|
|
46
|
-
type BatchResult = {
|
|
47
|
-
/** All individual results */
|
|
48
|
-
results: BatchItemResult[];
|
|
49
|
-
/** Summary statistics */
|
|
50
|
-
summary: {
|
|
51
|
-
total: number;
|
|
52
|
-
successful: number;
|
|
53
|
-
failed: number;
|
|
54
|
-
averageScore: number;
|
|
55
|
-
passRate: number;
|
|
56
|
-
totalDuration: number;
|
|
57
|
-
averageDuration: number;
|
|
58
|
-
};
|
|
59
|
-
};
|
|
60
7
|
/**
|
|
61
8
|
* Batch evaluation strategy
|
|
62
9
|
*/
|
|
63
10
|
export declare class BatchStrategy {
|
|
64
11
|
private _pipeline;
|
|
65
12
|
private _config;
|
|
66
|
-
constructor(pipeline: EvaluationPipeline, config?:
|
|
13
|
+
constructor(pipeline: EvaluationPipeline, config?: BatchEvaluationConfig);
|
|
67
14
|
/**
|
|
68
15
|
* Evaluate a batch of inputs
|
|
69
16
|
*/
|
|
70
|
-
evaluate(inputs: ScorerInput[], options?: PipelineExecutionOptions): Promise<
|
|
17
|
+
evaluate(inputs: ScorerInput[], options?: PipelineExecutionOptions): Promise<BatchEvaluationResult>;
|
|
71
18
|
/**
|
|
72
19
|
* Evaluate a single item
|
|
73
20
|
*/
|
|
@@ -83,18 +30,17 @@ export declare class BatchStrategy {
|
|
|
83
30
|
/**
|
|
84
31
|
* Update configuration
|
|
85
32
|
*/
|
|
86
|
-
configure(config: Partial<
|
|
33
|
+
configure(config: Partial<BatchEvaluationConfig>): void;
|
|
87
34
|
}
|
|
88
35
|
/**
|
|
89
36
|
* Create a batch strategy for a pipeline
|
|
90
37
|
*/
|
|
91
|
-
export declare function createBatchStrategy(pipeline: EvaluationPipeline, config?:
|
|
38
|
+
export declare function createBatchStrategy(pipeline: EvaluationPipeline, config?: BatchEvaluationConfig): BatchStrategy;
|
|
92
39
|
/**
|
|
93
40
|
* Evaluate a batch of inputs using a pipeline
|
|
94
41
|
*/
|
|
95
|
-
export declare function evaluateBatch(pipeline: EvaluationPipeline, inputs: ScorerInput[], config?:
|
|
42
|
+
export declare function evaluateBatch(pipeline: EvaluationPipeline, inputs: ScorerInput[], config?: BatchEvaluationConfig): Promise<BatchEvaluationResult>;
|
|
96
43
|
/**
|
|
97
44
|
* Stream batch evaluation results
|
|
98
45
|
*/
|
|
99
|
-
export declare function streamBatchEvaluation(pipeline: EvaluationPipeline, inputs: ScorerInput[], config?: Omit<
|
|
100
|
-
export {};
|
|
46
|
+
export declare function streamBatchEvaluation(pipeline: EvaluationPipeline, inputs: ScorerInput[], config?: Omit<BatchEvaluationConfig, "onResult" | "onProgress">): AsyncGenerator<BatchItemResult, BatchEvaluationResult["summary"], void>;
|
|
@@ -57,7 +57,7 @@ export class BatchStrategy {
|
|
|
57
57
|
total: inputs.length,
|
|
58
58
|
completed: results.length,
|
|
59
59
|
failed: results.filter((r) => r.error).length,
|
|
60
|
-
|
|
60
|
+
pending: inputs.length - results.length,
|
|
61
61
|
percentComplete: (results.length / inputs.length) * 100,
|
|
62
62
|
estimatedTimeRemaining: this._estimateRemainingTime(durations, inputs.length - results.length),
|
|
63
63
|
});
|
|
@@ -76,12 +76,12 @@ export class BatchStrategy {
|
|
|
76
76
|
results,
|
|
77
77
|
summary: {
|
|
78
78
|
total: inputs.length,
|
|
79
|
-
|
|
79
|
+
succeeded: successfulResults.length,
|
|
80
80
|
failed: results.length - successfulResults.length,
|
|
81
81
|
averageScore: scores.length > 0
|
|
82
82
|
? scores.reduce((a, b) => a + b, 0) / scores.length
|
|
83
83
|
: 0,
|
|
84
|
-
|
|
84
|
+
passingRate: successfulResults.length > 0
|
|
85
85
|
? passed.length / successfulResults.length
|
|
86
86
|
: 0,
|
|
87
87
|
totalDuration,
|
|
@@ -200,12 +200,12 @@ export async function* streamBatchEvaluation(pipeline, inputs, config) {
|
|
|
200
200
|
const earlyPassed = successfulResults.filter((r) => r.result.passed);
|
|
201
201
|
return {
|
|
202
202
|
total: inputs.length,
|
|
203
|
-
|
|
203
|
+
succeeded: successfulResults.length,
|
|
204
204
|
failed: results.length - successfulResults.length,
|
|
205
205
|
averageScore: earlyScores.length > 0
|
|
206
206
|
? earlyScores.reduce((a, b) => a + b, 0) / earlyScores.length
|
|
207
207
|
: 0,
|
|
208
|
-
|
|
208
|
+
passingRate: successfulResults.length > 0
|
|
209
209
|
? earlyPassed.length / successfulResults.length
|
|
210
210
|
: 0,
|
|
211
211
|
totalDuration: Date.now() - startTime,
|
|
@@ -227,10 +227,10 @@ export async function* streamBatchEvaluation(pipeline, inputs, config) {
|
|
|
227
227
|
const passed = successfulResults.filter((r) => r.result.passed);
|
|
228
228
|
return {
|
|
229
229
|
total: inputs.length,
|
|
230
|
-
|
|
230
|
+
succeeded: successfulResults.length,
|
|
231
231
|
failed: results.length - successfulResults.length,
|
|
232
232
|
averageScore: scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0,
|
|
233
|
-
|
|
233
|
+
passingRate: successfulResults.length > 0
|
|
234
234
|
? passed.length / successfulResults.length
|
|
235
235
|
: 0,
|
|
236
236
|
totalDuration: Date.now() - startTime,
|
|
@@ -2,6 +2,8 @@ import { AIProviderFactory } from "../core/factory.js";
|
|
|
2
2
|
import { PromptBuilder } from "./prompts.js";
|
|
3
3
|
import { logger } from "../utils/logger.js";
|
|
4
4
|
import { SpanSerializer, SpanType, SpanStatus, getMetricsAggregator, } from "../observability/index.js";
|
|
5
|
+
import { withSpan } from "../telemetry/withSpan.js";
|
|
6
|
+
import { tracers } from "../telemetry/tracers.js";
|
|
5
7
|
/**
|
|
6
8
|
* Implements a RAGAS-style evaluator that uses a "judge" LLM to score the
|
|
7
9
|
* quality of an AI response based on rich, contextual information.
|
|
@@ -33,45 +35,60 @@ export class RAGASEvaluator {
|
|
|
33
35
|
* @returns A promise that resolves to a detailed `EvaluationResult`.
|
|
34
36
|
*/
|
|
35
37
|
async evaluate(context) {
|
|
36
|
-
|
|
37
|
-
"evaluation.
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
const
|
|
45
|
-
|
|
46
|
-
|
|
38
|
+
return withSpan({
|
|
39
|
+
name: "neurolink.evaluation.ragas",
|
|
40
|
+
tracer: tracers.sdk,
|
|
41
|
+
attributes: {
|
|
42
|
+
"evaluation.provider": this.providerName,
|
|
43
|
+
"evaluation.model": this.evaluationModel,
|
|
44
|
+
},
|
|
45
|
+
}, async (otelSpan) => {
|
|
46
|
+
const span = SpanSerializer.createSpan(SpanType.EVALUATION, "evaluation.ragas", {
|
|
47
|
+
"evaluation.dimension": "relevance|accuracy|completeness",
|
|
48
|
+
"ai.provider": this.providerName,
|
|
49
|
+
"ai.model": this.evaluationModel,
|
|
47
50
|
});
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
const startTime = Date.now();
|
|
52
|
+
try {
|
|
53
|
+
const prompt = this.promptBuilder.buildEvaluationPrompt(context, this.promptGenerator);
|
|
54
|
+
const provider = await AIProviderFactory.createProvider(this.providerName, this.evaluationModel);
|
|
55
|
+
const result = await provider.generate({
|
|
56
|
+
input: { text: prompt },
|
|
57
|
+
});
|
|
58
|
+
if (!result) {
|
|
59
|
+
throw new Error("Evaluation generation failed to return a result.");
|
|
60
|
+
}
|
|
61
|
+
const rawEvaluationResponse = result.content;
|
|
62
|
+
const parsedResult = this.parseEvaluationResponse(rawEvaluationResponse);
|
|
63
|
+
const evaluationTime = Date.now() - startTime;
|
|
64
|
+
const finalResult = {
|
|
65
|
+
...parsedResult,
|
|
66
|
+
isPassing: parsedResult.finalScore >= this.threshold, // This will be recalculated, but is needed for the type
|
|
67
|
+
evaluationModel: this.evaluationModel,
|
|
68
|
+
evaluationTime,
|
|
69
|
+
attemptNumber: context.attemptNumber,
|
|
70
|
+
rawEvaluationResponse,
|
|
71
|
+
};
|
|
72
|
+
// Write evaluation scores to OTel span for Langfuse visibility
|
|
73
|
+
otelSpan.setAttribute("evaluation.relevance_score", finalResult.relevanceScore);
|
|
74
|
+
otelSpan.setAttribute("evaluation.accuracy_score", finalResult.accuracyScore);
|
|
75
|
+
otelSpan.setAttribute("evaluation.completeness_score", finalResult.completenessScore);
|
|
76
|
+
otelSpan.setAttribute("evaluation.final_score", finalResult.finalScore);
|
|
77
|
+
otelSpan.setAttribute("evaluation.is_passing", finalResult.isPassing);
|
|
78
|
+
span.durationMs = Date.now() - startTime;
|
|
79
|
+
const endedSpan = SpanSerializer.endSpan(span, SpanStatus.OK);
|
|
80
|
+
getMetricsAggregator().recordSpan(endedSpan);
|
|
81
|
+
return finalResult;
|
|
50
82
|
}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
rawEvaluationResponse,
|
|
61
|
-
};
|
|
62
|
-
span.durationMs = Date.now() - startTime;
|
|
63
|
-
const endedSpan = SpanSerializer.endSpan(span, SpanStatus.OK);
|
|
64
|
-
getMetricsAggregator().recordSpan(endedSpan);
|
|
65
|
-
return finalResult;
|
|
66
|
-
}
|
|
67
|
-
catch (error) {
|
|
68
|
-
span.durationMs = Date.now() - startTime;
|
|
69
|
-
const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR);
|
|
70
|
-
endedSpan.statusMessage =
|
|
71
|
-
error instanceof Error ? error.message : String(error);
|
|
72
|
-
getMetricsAggregator().recordSpan(endedSpan);
|
|
73
|
-
throw error;
|
|
74
|
-
}
|
|
83
|
+
catch (error) {
|
|
84
|
+
span.durationMs = Date.now() - startTime;
|
|
85
|
+
const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR);
|
|
86
|
+
endedSpan.statusMessage =
|
|
87
|
+
error instanceof Error ? error.message : String(error);
|
|
88
|
+
getMetricsAggregator().recordSpan(endedSpan);
|
|
89
|
+
throw error;
|
|
90
|
+
}
|
|
91
|
+
}); // end withSpan
|
|
75
92
|
}
|
|
76
93
|
/**
|
|
77
94
|
* Parses the raw JSON string from the judge LLM into a structured `EvaluationResult` object.
|