@inkeep/agents-api 0.0.0-dev-20260121145510
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +56 -0
- package/SUPPLEMENTAL_TERMS.md +40 -0
- package/dist/.well-known/workflow/v1/flow.cjs +46383 -0
- package/dist/.well-known/workflow/v1/flow.cjs.debug.json +6 -0
- package/dist/.well-known/workflow/v1/manifest.debug.json +55 -0
- package/dist/.well-known/workflow/v1/step.cjs +218683 -0
- package/dist/.well-known/workflow/v1/step.cjs.debug.json +6 -0
- package/dist/.well-known/workflow/v1/webhook.mjs +29 -0
- package/dist/createApp.d.ts +10 -0
- package/dist/createApp.js +170 -0
- package/dist/data/db/index.d.ts +4 -0
- package/dist/data/db/index.js +5 -0
- package/dist/data/db/manageDbClient.d.ts +6 -0
- package/dist/data/db/manageDbClient.js +9 -0
- package/dist/data/db/manageDbPool.d.ts +6 -0
- package/dist/data/db/manageDbPool.js +9 -0
- package/dist/data/db/runDbClient.d.ts +6 -0
- package/dist/data/db/runDbClient.js +9 -0
- package/dist/domains/evals/index.d.ts +13 -0
- package/dist/domains/evals/index.js +13 -0
- package/dist/domains/evals/routes/datasetTriggers.d.ts +7 -0
- package/dist/domains/evals/routes/datasetTriggers.js +65 -0
- package/dist/domains/evals/routes/evaluationTriggers.d.ts +11 -0
- package/dist/domains/evals/routes/evaluationTriggers.js +311 -0
- package/dist/domains/evals/routes/index.d.ts +7 -0
- package/dist/domains/evals/routes/index.js +12 -0
- package/dist/domains/evals/scripts/build-workflow.d.ts +1 -0
- package/dist/domains/evals/scripts/build-workflow.js +31 -0
- package/dist/domains/evals/services/EvaluationService.d.ts +96 -0
- package/dist/domains/evals/services/EvaluationService.js +863 -0
- package/dist/domains/evals/services/conversationEvaluation.d.ts +15 -0
- package/dist/domains/evals/services/conversationEvaluation.js +102 -0
- package/dist/domains/evals/services/datasetRun.d.ts +16 -0
- package/dist/domains/evals/services/datasetRun.js +43 -0
- package/dist/domains/evals/services/evaluationJob.d.ts +17 -0
- package/dist/domains/evals/services/evaluationJob.js +65 -0
- package/dist/domains/evals/services/startEvaluation.d.ts +19 -0
- package/dist/domains/evals/services/startEvaluation.js +18 -0
- package/dist/domains/evals/workflow/functions/evaluateConversation.d.ts +31 -0
- package/dist/domains/evals/workflow/functions/evaluateConversation.js +135 -0
- package/dist/domains/evals/workflow/functions/runDatasetItem.d.ts +39 -0
- package/dist/domains/evals/workflow/functions/runDatasetItem.js +205 -0
- package/dist/domains/evals/workflow/index.d.ts +4 -0
- package/dist/domains/evals/workflow/index.js +5 -0
- package/dist/domains/evals/workflow/routes.d.ts +7 -0
- package/dist/domains/evals/workflow/routes.js +106 -0
- package/dist/domains/evals/workflow/world.d.ts +4 -0
- package/dist/domains/evals/workflow/world.js +36 -0
- package/dist/domains/index.d.ts +4 -0
- package/dist/domains/index.js +5 -0
- package/dist/domains/manage/index.d.ts +12 -0
- package/dist/domains/manage/index.js +31 -0
- package/dist/domains/manage/routes/agent.d.ts +9 -0
- package/dist/domains/manage/routes/agent.js +266 -0
- package/dist/domains/manage/routes/agentFull.d.ts +9 -0
- package/dist/domains/manage/routes/agentFull.js +207 -0
- package/dist/domains/manage/routes/agentToolRelations.d.ts +9 -0
- package/dist/domains/manage/routes/agentToolRelations.js +289 -0
- package/dist/domains/manage/routes/apiKeys.d.ts +9 -0
- package/dist/domains/manage/routes/apiKeys.js +217 -0
- package/dist/domains/manage/routes/artifactComponents.d.ts +9 -0
- package/dist/domains/manage/routes/artifactComponents.js +210 -0
- package/dist/domains/manage/routes/branches.d.ts +9 -0
- package/dist/domains/manage/routes/branches.js +182 -0
- package/dist/domains/manage/routes/cliAuth.d.ts +9 -0
- package/dist/domains/manage/routes/cliAuth.js +60 -0
- package/dist/domains/manage/routes/contextConfigs.d.ts +9 -0
- package/dist/domains/manage/routes/contextConfigs.js +189 -0
- package/dist/domains/manage/routes/conversations.d.ts +7 -0
- package/dist/domains/manage/routes/conversations.js +59 -0
- package/dist/domains/manage/routes/credentialStores.d.ts +9 -0
- package/dist/domains/manage/routes/credentialStores.js +86 -0
- package/dist/domains/manage/routes/credentials.d.ts +9 -0
- package/dist/domains/manage/routes/credentials.js +207 -0
- package/dist/domains/manage/routes/dataComponents.d.ts +9 -0
- package/dist/domains/manage/routes/dataComponents.js +192 -0
- package/dist/domains/manage/routes/evals/datasetItems.d.ts +9 -0
- package/dist/domains/manage/routes/evals/datasetItems.js +310 -0
- package/dist/domains/manage/routes/evals/datasetRunConfigs.d.ts +9 -0
- package/dist/domains/manage/routes/evals/datasetRunConfigs.js +402 -0
- package/dist/domains/manage/routes/evals/datasetRuns.d.ts +9 -0
- package/dist/domains/manage/routes/evals/datasetRuns.js +256 -0
- package/dist/domains/manage/routes/evals/datasets.d.ts +9 -0
- package/dist/domains/manage/routes/evals/datasets.js +238 -0
- package/dist/domains/manage/routes/evals/evaluationJobConfigEvaluatorRelations.d.ts +9 -0
- package/dist/domains/manage/routes/evals/evaluationJobConfigEvaluatorRelations.js +146 -0
- package/dist/domains/manage/routes/evals/evaluationJobConfigs.d.ts +9 -0
- package/dist/domains/manage/routes/evals/evaluationJobConfigs.js +364 -0
- package/dist/domains/manage/routes/evals/evaluationResults.d.ts +7 -0
- package/dist/domains/manage/routes/evals/evaluationResults.js +192 -0
- package/dist/domains/manage/routes/evals/evaluationRunConfigs.d.ts +9 -0
- package/dist/domains/manage/routes/evals/evaluationRunConfigs.js +403 -0
- package/dist/domains/manage/routes/evals/evaluationSuiteConfigEvaluatorRelations.d.ts +9 -0
- package/dist/domains/manage/routes/evals/evaluationSuiteConfigEvaluatorRelations.js +146 -0
- package/dist/domains/manage/routes/evals/evaluationSuiteConfigs.d.ts +9 -0
- package/dist/domains/manage/routes/evals/evaluationSuiteConfigs.js +246 -0
- package/dist/domains/manage/routes/evals/evaluators.d.ts +9 -0
- package/dist/domains/manage/routes/evals/evaluators.js +281 -0
- package/dist/domains/manage/routes/evals/index.d.ts +9 -0
- package/dist/domains/manage/routes/evals/index.js +26 -0
- package/dist/domains/manage/routes/externalAgents.d.ts +9 -0
- package/dist/domains/manage/routes/externalAgents.js +199 -0
- package/dist/domains/manage/routes/functionTools.d.ts +9 -0
- package/dist/domains/manage/routes/functionTools.js +256 -0
- package/dist/domains/manage/routes/functions.d.ts +9 -0
- package/dist/domains/manage/routes/functions.js +285 -0
- package/dist/domains/manage/routes/index.d.ts +7 -0
- package/dist/domains/manage/routes/index.js +68 -0
- package/dist/domains/manage/routes/invitations.d.ts +9 -0
- package/dist/domains/manage/routes/invitations.js +41 -0
- package/dist/domains/manage/routes/mcp.d.ts +7 -0
- package/dist/domains/manage/routes/mcp.js +45 -0
- package/dist/domains/manage/routes/mcpCatalog.d.ts +9 -0
- package/dist/domains/manage/routes/mcpCatalog.js +454 -0
- package/dist/domains/manage/routes/oauth.d.ts +10 -0
- package/dist/domains/manage/routes/oauth.js +327 -0
- package/dist/domains/manage/routes/playgroundToken.d.ts +9 -0
- package/dist/domains/manage/routes/playgroundToken.js +127 -0
- package/dist/domains/manage/routes/projectFull.d.ts +9 -0
- package/dist/domains/manage/routes/projectFull.js +304 -0
- package/dist/domains/manage/routes/projectMembers.d.ts +9 -0
- package/dist/domains/manage/routes/projectMembers.js +201 -0
- package/dist/domains/manage/routes/projectPermissions.d.ts +9 -0
- package/dist/domains/manage/routes/projectPermissions.js +68 -0
- package/dist/domains/manage/routes/projects.d.ts +9 -0
- package/dist/domains/manage/routes/projects.js +279 -0
- package/dist/domains/manage/routes/ref.d.ts +9 -0
- package/dist/domains/manage/routes/ref.js +33 -0
- package/dist/domains/manage/routes/signoz.d.ts +10 -0
- package/dist/domains/manage/routes/signoz.js +159 -0
- package/dist/domains/manage/routes/subAgentArtifactComponents.d.ts +9 -0
- package/dist/domains/manage/routes/subAgentArtifactComponents.js +202 -0
- package/dist/domains/manage/routes/subAgentDataComponents.d.ts +9 -0
- package/dist/domains/manage/routes/subAgentDataComponents.js +201 -0
- package/dist/domains/manage/routes/subAgentExternalAgentRelations.d.ts +9 -0
- package/dist/domains/manage/routes/subAgentExternalAgentRelations.js +216 -0
- package/dist/domains/manage/routes/subAgentFunctionTools.d.ts +9 -0
- package/dist/domains/manage/routes/subAgentFunctionTools.js +205 -0
- package/dist/domains/manage/routes/subAgentRelations.d.ts +9 -0
- package/dist/domains/manage/routes/subAgentRelations.js +263 -0
- package/dist/domains/manage/routes/subAgentTeamAgentRelations.d.ts +9 -0
- package/dist/domains/manage/routes/subAgentTeamAgentRelations.js +216 -0
- package/dist/domains/manage/routes/subAgentToolRelations.d.ts +9 -0
- package/dist/domains/manage/routes/subAgentToolRelations.js +289 -0
- package/dist/domains/manage/routes/subAgents.d.ts +9 -0
- package/dist/domains/manage/routes/subAgents.js +220 -0
- package/dist/domains/manage/routes/thirdPartyMCPServers.d.ts +9 -0
- package/dist/domains/manage/routes/thirdPartyMCPServers.js +72 -0
- package/dist/domains/manage/routes/tools.d.ts +9 -0
- package/dist/domains/manage/routes/tools.js +261 -0
- package/dist/domains/manage/routes/triggers.d.ts +9 -0
- package/dist/domains/manage/routes/triggers.js +455 -0
- package/dist/domains/manage/routes/userOrganizations.d.ts +9 -0
- package/dist/domains/manage/routes/userOrganizations.js +58 -0
- package/dist/domains/run/a2a/client.d.ts +186 -0
- package/dist/domains/run/a2a/client.js +524 -0
- package/dist/domains/run/a2a/handlers.d.ts +7 -0
- package/dist/domains/run/a2a/handlers.js +574 -0
- package/dist/domains/run/a2a/transfer.d.ts +27 -0
- package/dist/domains/run/a2a/transfer.js +50 -0
- package/dist/domains/run/a2a/types.d.ts +75 -0
- package/dist/domains/run/a2a/types.js +22 -0
- package/dist/domains/run/agents/Agent.d.ts +273 -0
- package/dist/domains/run/agents/Agent.js +2104 -0
- package/dist/domains/run/agents/ModelFactory.d.ts +63 -0
- package/dist/domains/run/agents/ModelFactory.js +194 -0
- package/dist/domains/run/agents/SystemPromptBuilder.d.ts +21 -0
- package/dist/domains/run/agents/SystemPromptBuilder.js +48 -0
- package/dist/domains/run/agents/ToolSessionManager.d.ts +63 -0
- package/dist/domains/run/agents/ToolSessionManager.js +146 -0
- package/dist/domains/run/agents/generateTaskHandler.d.ts +44 -0
- package/dist/domains/run/agents/generateTaskHandler.js +396 -0
- package/dist/domains/run/agents/relationTools.d.ts +64 -0
- package/dist/domains/run/agents/relationTools.js +365 -0
- package/dist/domains/run/agents/types.d.ts +31 -0
- package/dist/domains/run/agents/types.js +1 -0
- package/dist/domains/run/agents/versions/v1/Phase1Config.d.ts +29 -0
- package/dist/domains/run/agents/versions/v1/Phase1Config.js +458 -0
- package/dist/domains/run/agents/versions/v1/Phase2Config.d.ts +33 -0
- package/dist/domains/run/agents/versions/v1/Phase2Config.js +341 -0
- package/dist/domains/run/constants/execution-limits/defaults.d.ts +51 -0
- package/dist/domains/run/constants/execution-limits/defaults.js +52 -0
- package/dist/domains/run/constants/execution-limits/index.d.ts +6 -0
- package/dist/domains/run/constants/execution-limits/index.js +21 -0
- package/dist/domains/run/context/ContextFetcher.d.ts +68 -0
- package/dist/domains/run/context/ContextFetcher.js +276 -0
- package/dist/domains/run/context/ContextResolver.d.ts +56 -0
- package/dist/domains/run/context/ContextResolver.js +273 -0
- package/dist/domains/run/context/context.d.ts +19 -0
- package/dist/domains/run/context/context.js +108 -0
- package/dist/domains/run/context/contextCache.d.ts +56 -0
- package/dist/domains/run/context/contextCache.js +174 -0
- package/dist/domains/run/context/index.d.ts +6 -0
- package/dist/domains/run/context/index.js +7 -0
- package/dist/domains/run/context/validation.d.ts +39 -0
- package/dist/domains/run/context/validation.js +255 -0
- package/dist/domains/run/data/agent.d.ts +7 -0
- package/dist/domains/run/data/agent.js +67 -0
- package/dist/domains/run/data/agents.d.ts +34 -0
- package/dist/domains/run/data/agents.js +131 -0
- package/dist/domains/run/data/conversations.d.ts +129 -0
- package/dist/domains/run/data/conversations.js +517 -0
- package/dist/domains/run/handlers/executionHandler.d.ts +44 -0
- package/dist/domains/run/handlers/executionHandler.js +485 -0
- package/dist/domains/run/index.d.ts +13 -0
- package/dist/domains/run/index.js +21 -0
- package/dist/domains/run/routes/agents.d.ts +13 -0
- package/dist/domains/run/routes/agents.js +141 -0
- package/dist/domains/run/routes/chat.d.ts +14 -0
- package/dist/domains/run/routes/chat.js +300 -0
- package/dist/domains/run/routes/chatDataStream.d.ts +14 -0
- package/dist/domains/run/routes/chatDataStream.js +381 -0
- package/dist/domains/run/routes/mcp.d.ts +14 -0
- package/dist/domains/run/routes/mcp.js +483 -0
- package/dist/domains/run/routes/webhooks.d.ts +15 -0
- package/dist/domains/run/routes/webhooks.js +416 -0
- package/dist/domains/run/services/AgentSession.d.ts +354 -0
- package/dist/domains/run/services/AgentSession.js +1203 -0
- package/dist/domains/run/services/ArtifactParser.d.ts +105 -0
- package/dist/domains/run/services/ArtifactParser.js +338 -0
- package/dist/domains/run/services/ArtifactService.d.ts +122 -0
- package/dist/domains/run/services/ArtifactService.js +629 -0
- package/dist/domains/run/services/BaseCompressor.d.ts +183 -0
- package/dist/domains/run/services/BaseCompressor.js +500 -0
- package/dist/domains/run/services/ConversationCompressor.d.ts +32 -0
- package/dist/domains/run/services/ConversationCompressor.js +91 -0
- package/dist/domains/run/services/IncrementalStreamParser.d.ts +98 -0
- package/dist/domains/run/services/IncrementalStreamParser.js +327 -0
- package/dist/domains/run/services/MidGenerationCompressor.d.ts +63 -0
- package/dist/domains/run/services/MidGenerationCompressor.js +104 -0
- package/dist/domains/run/services/PendingToolApprovalManager.d.ts +62 -0
- package/dist/domains/run/services/PendingToolApprovalManager.js +133 -0
- package/dist/domains/run/services/ResponseFormatter.d.ts +39 -0
- package/dist/domains/run/services/ResponseFormatter.js +152 -0
- package/dist/domains/run/services/evaluationRunConfigMatcher.d.ts +4 -0
- package/dist/domains/run/services/evaluationRunConfigMatcher.js +7 -0
- package/dist/domains/run/tools/NativeSandboxExecutor.d.ts +38 -0
- package/dist/domains/run/tools/NativeSandboxExecutor.js +432 -0
- package/dist/domains/run/tools/SandboxExecutorFactory.d.ts +36 -0
- package/dist/domains/run/tools/SandboxExecutorFactory.js +80 -0
- package/dist/domains/run/tools/VercelSandboxExecutor.d.ts +71 -0
- package/dist/domains/run/tools/VercelSandboxExecutor.js +340 -0
- package/dist/domains/run/tools/distill-conversation-history-tool.d.ts +62 -0
- package/dist/domains/run/tools/distill-conversation-history-tool.js +206 -0
- package/dist/domains/run/tools/distill-conversation-tool.d.ts +41 -0
- package/dist/domains/run/tools/distill-conversation-tool.js +141 -0
- package/dist/domains/run/tools/sandbox-utils.d.ts +18 -0
- package/dist/domains/run/tools/sandbox-utils.js +53 -0
- package/dist/domains/run/types/chat.d.ts +27 -0
- package/dist/domains/run/types/chat.js +1 -0
- package/dist/domains/run/types/executionContext.d.ts +40 -0
- package/dist/domains/run/types/executionContext.js +28 -0
- package/dist/domains/run/types/xml.d.ts +5 -0
- package/dist/domains/run/utils/SchemaProcessor.d.ts +52 -0
- package/dist/domains/run/utils/SchemaProcessor.js +182 -0
- package/dist/domains/run/utils/agent-operations.d.ts +62 -0
- package/dist/domains/run/utils/agent-operations.js +53 -0
- package/dist/domains/run/utils/artifact-component-schema.d.ts +42 -0
- package/dist/domains/run/utils/artifact-component-schema.js +186 -0
- package/dist/domains/run/utils/cleanup.d.ts +21 -0
- package/dist/domains/run/utils/cleanup.js +59 -0
- package/dist/domains/run/utils/data-component-schema.d.ts +2 -0
- package/dist/domains/run/utils/data-component-schema.js +3 -0
- package/dist/domains/run/utils/default-status-schemas.d.ts +20 -0
- package/dist/domains/run/utils/default-status-schemas.js +24 -0
- package/dist/domains/run/utils/json-postprocessor.d.ts +13 -0
- package/dist/domains/run/utils/json-postprocessor.js +19 -0
- package/dist/domains/run/utils/model-context-utils.d.ts +39 -0
- package/dist/domains/run/utils/model-context-utils.js +181 -0
- package/dist/domains/run/utils/model-resolver.d.ts +6 -0
- package/dist/domains/run/utils/model-resolver.js +24 -0
- package/dist/domains/run/utils/project.d.ts +207 -0
- package/dist/domains/run/utils/project.js +315 -0
- package/dist/domains/run/utils/schema-validation.d.ts +44 -0
- package/dist/domains/run/utils/schema-validation.js +97 -0
- package/dist/domains/run/utils/stream-helpers.d.ts +193 -0
- package/dist/domains/run/utils/stream-helpers.js +510 -0
- package/dist/domains/run/utils/stream-registry.d.ts +22 -0
- package/dist/domains/run/utils/stream-registry.js +33 -0
- package/dist/domains/run/utils/token-estimator.d.ts +23 -0
- package/dist/domains/run/utils/token-estimator.js +17 -0
- package/dist/domains/run/utils/tracer.d.ts +7 -0
- package/dist/domains/run/utils/tracer.js +7 -0
- package/dist/env.d.ts +89 -0
- package/dist/env.js +69 -0
- package/dist/factory.d.ts +1535 -0
- package/dist/factory.js +42 -0
- package/dist/index.d.ts +1530 -0
- package/dist/index.js +59 -0
- package/dist/initialization.d.ts +6 -0
- package/dist/initialization.js +65 -0
- package/dist/instrumentation.d.ts +17 -0
- package/dist/instrumentation.js +68 -0
- package/dist/logger.d.ts +2 -0
- package/dist/logger.js +3 -0
- package/dist/middleware/branchScopedDb.d.ts +31 -0
- package/dist/middleware/branchScopedDb.js +137 -0
- package/dist/middleware/cors.d.ts +36 -0
- package/dist/middleware/cors.js +131 -0
- package/dist/middleware/errorHandler.d.ts +12 -0
- package/dist/middleware/errorHandler.js +88 -0
- package/dist/middleware/evalsAuth.d.ts +16 -0
- package/dist/middleware/evalsAuth.js +52 -0
- package/dist/middleware/index.d.ts +8 -0
- package/dist/middleware/index.js +9 -0
- package/dist/middleware/manageAuth.d.ts +25 -0
- package/dist/middleware/manageAuth.js +80 -0
- package/dist/middleware/projectAccess.d.ts +31 -0
- package/dist/middleware/projectAccess.js +118 -0
- package/dist/middleware/projectConfig.d.ts +25 -0
- package/dist/middleware/projectConfig.js +89 -0
- package/dist/middleware/ref.d.ts +61 -0
- package/dist/middleware/ref.js +239 -0
- package/dist/middleware/requirePermission.d.ts +14 -0
- package/dist/middleware/requirePermission.js +80 -0
- package/dist/middleware/runAuth.d.ts +29 -0
- package/dist/middleware/runAuth.js +253 -0
- package/dist/middleware/sessionAuth.d.ts +17 -0
- package/dist/middleware/sessionAuth.js +58 -0
- package/dist/middleware/tenantAccess.d.ts +22 -0
- package/dist/middleware/tenantAccess.js +63 -0
- package/dist/middleware/tracing.d.ts +7 -0
- package/dist/middleware/tracing.js +50 -0
- package/dist/openapi.d.ts +7 -0
- package/dist/openapi.js +156 -0
- package/dist/ssoHelpers.d.ts +20 -0
- package/dist/ssoHelpers.js +51 -0
- package/dist/templates/v1/phase1/system-prompt.js +5 -0
- package/dist/templates/v1/phase1/thinking-preparation.js +5 -0
- package/dist/templates/v1/phase1/tool.js +5 -0
- package/dist/templates/v1/phase2/data-component.js +5 -0
- package/dist/templates/v1/phase2/data-components.js +5 -0
- package/dist/templates/v1/phase2/system-prompt.js +5 -0
- package/dist/templates/v1/shared/artifact-retrieval-guidance.js +5 -0
- package/dist/templates/v1/shared/artifact.js +5 -0
- package/dist/types/app.d.ts +64 -0
- package/dist/types/app.js +1 -0
- package/dist/types/index.d.ts +2 -0
- package/dist/types/index.js +1 -0
- package/dist/types/runExecutionContext.d.ts +25 -0
- package/dist/types/runExecutionContext.js +28 -0
- package/dist/utils/oauthService.d.ts +71 -0
- package/dist/utils/oauthService.js +106 -0
- package/dist/utils/signozHelpers.d.ts +9 -0
- package/dist/utils/signozHelpers.js +33 -0
- package/dist/utils/speakeasy.d.ts +93 -0
- package/dist/utils/speakeasy.js +44 -0
- package/dist/utils/tempApiKeys.d.ts +17 -0
- package/dist/utils/tempApiKeys.js +26 -0
- package/dist/utils/workflowApiHelpers.d.ts +1 -0
- package/dist/utils/workflowApiHelpers.js +1 -0
- package/package.json +125 -0
|
@@ -0,0 +1,863 @@
|
|
|
1
|
+
import { getLogger as getLogger$1 } from "../../../logger.js";
|
|
2
|
+
import { env } from "../../../env.js";
|
|
3
|
+
import manageDbClient_default from "../../../data/db/manageDbClient.js";
|
|
4
|
+
import manageDbPool_default from "../../../data/db/manageDbPool.js";
|
|
5
|
+
import runDbClient_default from "../../../data/db/runDbClient.js";
|
|
6
|
+
import { ModelFactory, createEvaluationResult, createEvaluationRun, filterConversationsForJob, generateId, getConversationHistory, getEvaluationJobConfigById, getEvaluationJobConfigEvaluatorRelations, getEvaluatorById, getFullAgent, getProjectScopedRef, resolveRef, updateEvaluationResult, withRef } from "@inkeep/agents-core";
|
|
7
|
+
import { generateObject, generateText } from "ai";
|
|
8
|
+
import { z } from "zod";
|
|
9
|
+
|
|
10
|
+
//#region src/domains/evals/services/EvaluationService.ts
|
|
11
|
+
const logger = getLogger$1("EvaluationService");
|
|
12
|
+
/**
|
|
13
|
+
* Converts JSON Schema objects to Zod schema types
|
|
14
|
+
*/
|
|
15
|
+
function jsonSchemaToZod(jsonSchema) {
|
|
16
|
+
if (!jsonSchema || typeof jsonSchema !== "object") {
|
|
17
|
+
logger.warn({ jsonSchema }, "Invalid JSON schema provided, using string fallback");
|
|
18
|
+
return z.string();
|
|
19
|
+
}
|
|
20
|
+
switch (jsonSchema.type) {
|
|
21
|
+
case "object":
|
|
22
|
+
if (jsonSchema.properties) {
|
|
23
|
+
const shape = {};
|
|
24
|
+
const required = jsonSchema.required || [];
|
|
25
|
+
for (const [key, prop] of Object.entries(jsonSchema.properties)) {
|
|
26
|
+
const propSchema = prop;
|
|
27
|
+
let zodType = jsonSchemaToZod(propSchema);
|
|
28
|
+
if (propSchema.description) zodType = zodType.describe(String(propSchema.description));
|
|
29
|
+
if (!required.includes(key)) zodType = zodType.optional();
|
|
30
|
+
shape[key] = zodType;
|
|
31
|
+
}
|
|
32
|
+
return z.object(shape);
|
|
33
|
+
}
|
|
34
|
+
return z.record(z.string(), z.unknown());
|
|
35
|
+
case "array": {
|
|
36
|
+
const itemSchema = jsonSchema.items ? jsonSchemaToZod(jsonSchema.items) : z.unknown();
|
|
37
|
+
return z.array(itemSchema);
|
|
38
|
+
}
|
|
39
|
+
case "string": return z.string();
|
|
40
|
+
case "number": return z.number();
|
|
41
|
+
case "integer": return z.number().int();
|
|
42
|
+
case "boolean": return z.boolean();
|
|
43
|
+
case "null": return z.null();
|
|
44
|
+
default:
|
|
45
|
+
logger.warn({
|
|
46
|
+
unsupportedType: jsonSchema.type,
|
|
47
|
+
schema: jsonSchema
|
|
48
|
+
}, "Unsupported JSON schema type, using unknown validation");
|
|
49
|
+
return z.unknown();
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Service for running dataset items through the chat API endpoint
|
|
54
|
+
*/
|
|
55
|
+
var EvaluationService = class {
|
|
56
|
+
agentsApiUrl;
|
|
57
|
+
runApiBypassSecret;
|
|
58
|
+
constructor() {
|
|
59
|
+
this.runApiBypassSecret = env.INKEEP_AGENTS_RUN_API_BYPASS_SECRET ?? void 0;
|
|
60
|
+
this.agentsApiUrl = env.INKEEP_AGENTS_API_URL ?? "";
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Run a dataset item through the chat API endpoint
|
|
64
|
+
* Supports multi-turn conversations with simulation agents
|
|
65
|
+
*/
|
|
66
|
+
async runDatasetItem(options) {
|
|
67
|
+
const { tenantId, projectId, agentId, datasetItem, datasetRunId, apiKey } = options;
|
|
68
|
+
try {
|
|
69
|
+
const initialMessages = this.extractMessagesFromDatasetItem(datasetItem);
|
|
70
|
+
if (!initialMessages || initialMessages.length === 0) return { error: "Dataset item has no valid input messages" };
|
|
71
|
+
const finalConversationId = generateId();
|
|
72
|
+
if (datasetItem.simulationAgent && typeof datasetItem.simulationAgent === "object" && datasetItem.simulationAgent !== null && !Array.isArray(datasetItem.simulationAgent) && datasetItem.simulationAgent.prompt && datasetItem.simulationAgent.model?.model) return await this.runDatasetItemWithSimulation({
|
|
73
|
+
tenantId,
|
|
74
|
+
projectId,
|
|
75
|
+
agentId,
|
|
76
|
+
datasetItem,
|
|
77
|
+
datasetRunId,
|
|
78
|
+
conversationId: finalConversationId,
|
|
79
|
+
apiKey,
|
|
80
|
+
initialMessages,
|
|
81
|
+
simulationAgent: datasetItem.simulationAgent
|
|
82
|
+
});
|
|
83
|
+
return await this.runSingleTurn({
|
|
84
|
+
tenantId,
|
|
85
|
+
projectId,
|
|
86
|
+
agentId,
|
|
87
|
+
datasetItem,
|
|
88
|
+
datasetRunId,
|
|
89
|
+
conversationId: finalConversationId,
|
|
90
|
+
apiKey,
|
|
91
|
+
messages: initialMessages
|
|
92
|
+
});
|
|
93
|
+
} catch (error) {
|
|
94
|
+
logger.error({
|
|
95
|
+
error: error instanceof Error ? error.message : String(error),
|
|
96
|
+
datasetItemId: datasetItem.id
|
|
97
|
+
}, "Error running dataset item through chat API");
|
|
98
|
+
return { error: error instanceof Error ? error.message : "Unknown error occurred" };
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Run a single-turn conversation (original behavior)
|
|
103
|
+
*/
|
|
104
|
+
async runSingleTurn(params) {
|
|
105
|
+
const { tenantId, projectId, agentId, datasetItem, datasetRunId, conversationId, apiKey, messages } = params;
|
|
106
|
+
const chatUrl = `${this.agentsApiUrl}/run/api/chat`;
|
|
107
|
+
const chatPayload = {
|
|
108
|
+
messages,
|
|
109
|
+
conversationId,
|
|
110
|
+
stream: true
|
|
111
|
+
};
|
|
112
|
+
const authToken = apiKey || this.runApiBypassSecret;
|
|
113
|
+
const headers$1 = {
|
|
114
|
+
"Content-Type": "application/json",
|
|
115
|
+
...authToken && { Authorization: `Bearer ${authToken}` },
|
|
116
|
+
"x-inkeep-tenant-id": tenantId,
|
|
117
|
+
"x-inkeep-project-id": projectId,
|
|
118
|
+
"x-inkeep-agent-id": agentId,
|
|
119
|
+
...datasetRunId && { "x-inkeep-dataset-run-id": datasetRunId }
|
|
120
|
+
};
|
|
121
|
+
logger.info({
|
|
122
|
+
tenantId,
|
|
123
|
+
projectId,
|
|
124
|
+
agentId,
|
|
125
|
+
datasetItemId: datasetItem.id,
|
|
126
|
+
datasetRunId,
|
|
127
|
+
conversationId
|
|
128
|
+
}, "Running dataset item through chat API");
|
|
129
|
+
const response = await fetch(chatUrl, {
|
|
130
|
+
method: "POST",
|
|
131
|
+
headers: headers$1,
|
|
132
|
+
body: JSON.stringify(chatPayload)
|
|
133
|
+
});
|
|
134
|
+
if (!response.ok) {
|
|
135
|
+
const errorText = await response.text();
|
|
136
|
+
logger.error({
|
|
137
|
+
status: response.status,
|
|
138
|
+
statusText: response.statusText,
|
|
139
|
+
errorText,
|
|
140
|
+
datasetItemId: datasetItem.id,
|
|
141
|
+
conversationId
|
|
142
|
+
}, "Chat API request failed");
|
|
143
|
+
return {
|
|
144
|
+
conversationId,
|
|
145
|
+
error: `Chat API error: ${response.status} ${response.statusText}`
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
const responseText = await response.text();
|
|
149
|
+
const parseResult = this.parseSSEResponse(responseText);
|
|
150
|
+
if (parseResult.error) {
|
|
151
|
+
logger.error({
|
|
152
|
+
datasetItemId: datasetItem.id,
|
|
153
|
+
conversationId,
|
|
154
|
+
errorMessage: parseResult.error
|
|
155
|
+
}, "Chat API returned error operation");
|
|
156
|
+
return {
|
|
157
|
+
conversationId,
|
|
158
|
+
error: parseResult.error
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
logger.info({
|
|
162
|
+
datasetItemId: datasetItem.id,
|
|
163
|
+
conversationId,
|
|
164
|
+
responseLength: parseResult.text?.length || 0
|
|
165
|
+
}, "Successfully processed dataset item");
|
|
166
|
+
return {
|
|
167
|
+
conversationId,
|
|
168
|
+
response: parseResult.text
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Run a multi-turn conversation with simulation agent
|
|
173
|
+
*/
|
|
174
|
+
async runDatasetItemWithSimulation(params) {
|
|
175
|
+
const { tenantId, projectId, agentId, datasetItem, datasetRunId, conversationId, apiKey, initialMessages, simulationAgent } = params;
|
|
176
|
+
logger.info({
|
|
177
|
+
tenantId,
|
|
178
|
+
projectId,
|
|
179
|
+
agentId,
|
|
180
|
+
datasetItemId: datasetItem.id,
|
|
181
|
+
conversationId,
|
|
182
|
+
hasSimulationAgent: true,
|
|
183
|
+
stopWhen: simulationAgent.stopWhen
|
|
184
|
+
}, "Running dataset item with simulation agent");
|
|
185
|
+
const simulationModelConfig = ModelFactory.prepareGenerationConfig(simulationAgent.model);
|
|
186
|
+
const maxSteps = (simulationAgent.stopWhen || {}).stepCountIs ?? 10;
|
|
187
|
+
const conversationHistory = [];
|
|
188
|
+
let stepCount = 0;
|
|
189
|
+
const initialResult = await this.runSingleTurn({
|
|
190
|
+
tenantId,
|
|
191
|
+
projectId,
|
|
192
|
+
agentId,
|
|
193
|
+
datasetItem,
|
|
194
|
+
datasetRunId,
|
|
195
|
+
conversationId,
|
|
196
|
+
apiKey,
|
|
197
|
+
messages: initialMessages
|
|
198
|
+
});
|
|
199
|
+
if (initialResult.error || !initialResult.response) return initialResult;
|
|
200
|
+
const lastUserMessage = initialMessages.filter((m) => m.role === "user").map((m) => typeof m.content === "string" ? m.content : JSON.stringify(m.content)).join("\n");
|
|
201
|
+
conversationHistory.push({
|
|
202
|
+
role: "user",
|
|
203
|
+
content: lastUserMessage
|
|
204
|
+
});
|
|
205
|
+
conversationHistory.push({
|
|
206
|
+
role: "assistant",
|
|
207
|
+
content: initialResult.response
|
|
208
|
+
});
|
|
209
|
+
stepCount++;
|
|
210
|
+
while (stepCount < maxSteps) try {
|
|
211
|
+
const simulationPrompt = this.buildSimulationPrompt(simulationAgent.prompt, conversationHistory);
|
|
212
|
+
logger.debug({
|
|
213
|
+
stepCount,
|
|
214
|
+
maxSteps,
|
|
215
|
+
conversationHistoryLength: conversationHistory.length
|
|
216
|
+
}, "Generating next user message with simulation agent");
|
|
217
|
+
const nextUserMessage = (await generateText({
|
|
218
|
+
...simulationModelConfig,
|
|
219
|
+
prompt: simulationPrompt
|
|
220
|
+
})).text.trim();
|
|
221
|
+
if (!nextUserMessage) {
|
|
222
|
+
logger.warn({
|
|
223
|
+
stepCount,
|
|
224
|
+
datasetItemId: datasetItem.id
|
|
225
|
+
}, "Simulation agent returned empty message, stopping conversation");
|
|
226
|
+
break;
|
|
227
|
+
}
|
|
228
|
+
conversationHistory.push({
|
|
229
|
+
role: "user",
|
|
230
|
+
content: nextUserMessage
|
|
231
|
+
});
|
|
232
|
+
const agentResponse = await this.runSingleTurn({
|
|
233
|
+
tenantId,
|
|
234
|
+
projectId,
|
|
235
|
+
agentId,
|
|
236
|
+
datasetItem,
|
|
237
|
+
datasetRunId,
|
|
238
|
+
conversationId,
|
|
239
|
+
apiKey,
|
|
240
|
+
messages: [{
|
|
241
|
+
role: "user",
|
|
242
|
+
content: nextUserMessage
|
|
243
|
+
}]
|
|
244
|
+
});
|
|
245
|
+
if (agentResponse.error || !agentResponse.response) {
|
|
246
|
+
logger.warn({
|
|
247
|
+
stepCount,
|
|
248
|
+
error: agentResponse.error,
|
|
249
|
+
datasetItemId: datasetItem.id
|
|
250
|
+
}, "Agent response failed, stopping conversation");
|
|
251
|
+
break;
|
|
252
|
+
}
|
|
253
|
+
conversationHistory.push({
|
|
254
|
+
role: "assistant",
|
|
255
|
+
content: agentResponse.response
|
|
256
|
+
});
|
|
257
|
+
stepCount++;
|
|
258
|
+
} catch (error) {
|
|
259
|
+
logger.error({
|
|
260
|
+
error: error instanceof Error ? error.message : String(error),
|
|
261
|
+
stepCount,
|
|
262
|
+
datasetItemId: datasetItem.id
|
|
263
|
+
}, "Error in simulation agent loop");
|
|
264
|
+
break;
|
|
265
|
+
}
|
|
266
|
+
logger.info({
|
|
267
|
+
datasetItemId: datasetItem.id,
|
|
268
|
+
conversationId,
|
|
269
|
+
finalStepCount: stepCount,
|
|
270
|
+
maxSteps,
|
|
271
|
+
conversationHistoryLength: conversationHistory.length
|
|
272
|
+
}, "Completed multi-turn conversation with simulation agent");
|
|
273
|
+
return {
|
|
274
|
+
conversationId,
|
|
275
|
+
response: conversationHistory.filter((m) => m.role === "assistant").pop()?.content || ""
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Build prompt for simulation agent based on persona and conversation history
|
|
280
|
+
*/
|
|
281
|
+
buildSimulationPrompt(personaPrompt, conversationHistory) {
|
|
282
|
+
return `${personaPrompt}
|
|
283
|
+
|
|
284
|
+
You are simulating a user in a conversation. Based on the conversation history below, generate the next user message that would naturally follow. Keep your response concise and realistic.
|
|
285
|
+
|
|
286
|
+
Conversation History:
|
|
287
|
+
${conversationHistory.map((msg) => `${msg.role === "user" ? "User" : "Assistant"}: ${msg.content}`).join("\n\n")}
|
|
288
|
+
|
|
289
|
+
Generate the next user message:`;
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Extract messages from dataset item input
|
|
293
|
+
*/
|
|
294
|
+
extractMessagesFromDatasetItem(datasetItem) {
|
|
295
|
+
if (!datasetItem.input) return null;
|
|
296
|
+
const roleMap = {
|
|
297
|
+
agent: "assistant",
|
|
298
|
+
user: "user",
|
|
299
|
+
system: "system",
|
|
300
|
+
assistant: "assistant",
|
|
301
|
+
function: "function",
|
|
302
|
+
tool: "tool"
|
|
303
|
+
};
|
|
304
|
+
if (typeof datasetItem.input === "object" && "messages" in datasetItem.input) {
|
|
305
|
+
const input = datasetItem.input;
|
|
306
|
+
const validMessages = input.messages.map((msg) => {
|
|
307
|
+
const mappedRole = roleMap[msg.role.toLowerCase()];
|
|
308
|
+
if (!mappedRole) {
|
|
309
|
+
logger.warn({
|
|
310
|
+
datasetItemId: datasetItem.id,
|
|
311
|
+
invalidRole: msg.role
|
|
312
|
+
}, "Invalid message role found, skipping message");
|
|
313
|
+
return null;
|
|
314
|
+
}
|
|
315
|
+
return {
|
|
316
|
+
role: mappedRole,
|
|
317
|
+
content: msg.content
|
|
318
|
+
};
|
|
319
|
+
}).filter((msg) => msg !== null);
|
|
320
|
+
if (validMessages.length === 0) {
|
|
321
|
+
logger.warn({
|
|
322
|
+
datasetItemId: datasetItem.id,
|
|
323
|
+
totalMessages: input.messages.length
|
|
324
|
+
}, "No valid messages found after filtering roles");
|
|
325
|
+
return null;
|
|
326
|
+
}
|
|
327
|
+
return validMessages;
|
|
328
|
+
}
|
|
329
|
+
if (typeof datasetItem.input === "string") try {
|
|
330
|
+
const parsed = JSON.parse(datasetItem.input);
|
|
331
|
+
if (parsed.messages && Array.isArray(parsed.messages)) {
|
|
332
|
+
const validMessages = parsed.messages.map((msg) => {
|
|
333
|
+
const mappedRole = roleMap[msg.role?.toLowerCase()];
|
|
334
|
+
if (!mappedRole) {
|
|
335
|
+
logger.warn({
|
|
336
|
+
datasetItemId: datasetItem.id,
|
|
337
|
+
invalidRole: msg.role
|
|
338
|
+
}, "Invalid message role found in parsed input, skipping message");
|
|
339
|
+
return null;
|
|
340
|
+
}
|
|
341
|
+
return {
|
|
342
|
+
role: mappedRole,
|
|
343
|
+
content: msg.content
|
|
344
|
+
};
|
|
345
|
+
}).filter((msg) => msg !== null);
|
|
346
|
+
return validMessages.length > 0 ? validMessages : null;
|
|
347
|
+
}
|
|
348
|
+
} catch {
|
|
349
|
+
return [{
|
|
350
|
+
role: "user",
|
|
351
|
+
content: datasetItem.input
|
|
352
|
+
}];
|
|
353
|
+
}
|
|
354
|
+
return null;
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* Parse SSE (Server-Sent Events) response from chat API
|
|
358
|
+
* Handles text deltas, error operations, and other data operations
|
|
359
|
+
*/
|
|
360
|
+
parseSSEResponse(sseText) {
|
|
361
|
+
let textContent = "";
|
|
362
|
+
let hasError = false;
|
|
363
|
+
let errorMessage = "";
|
|
364
|
+
const lines = sseText.split("\n").filter((line) => line.startsWith("data: "));
|
|
365
|
+
for (const line of lines) try {
|
|
366
|
+
const data = JSON.parse(line.slice(6));
|
|
367
|
+
if (data.object === "chat.completion.chunk" && data.choices?.[0]?.delta) {
|
|
368
|
+
const delta = data.choices[0].delta;
|
|
369
|
+
if (delta.content) textContent += delta.content;
|
|
370
|
+
if (delta.content && typeof delta.content === "string") try {
|
|
371
|
+
const parsedContent = JSON.parse(delta.content);
|
|
372
|
+
if (parsedContent.type === "data-operation" && parsedContent.data?.type === "error") {
|
|
373
|
+
hasError = true;
|
|
374
|
+
errorMessage = parsedContent.data.message || "Unknown error occurred";
|
|
375
|
+
logger.warn({
|
|
376
|
+
errorMessage,
|
|
377
|
+
errorData: parsedContent.data
|
|
378
|
+
}, "Received error operation from chat API");
|
|
379
|
+
}
|
|
380
|
+
} catch {}
|
|
381
|
+
} else if (data.type === "text-delta" && data.delta) textContent += data.delta;
|
|
382
|
+
else if (data.type === "data-operation" && data.data?.type === "error") {
|
|
383
|
+
hasError = true;
|
|
384
|
+
errorMessage = data.data.message || "Unknown error occurred";
|
|
385
|
+
logger.warn({
|
|
386
|
+
errorMessage,
|
|
387
|
+
errorData: data.data
|
|
388
|
+
}, "Received error operation from chat API");
|
|
389
|
+
} else if (data.type === "error") {
|
|
390
|
+
hasError = true;
|
|
391
|
+
errorMessage = data.message || "Unknown error occurred";
|
|
392
|
+
logger.warn({
|
|
393
|
+
errorMessage,
|
|
394
|
+
errorData: data
|
|
395
|
+
}, "Received error event from chat API");
|
|
396
|
+
} else if (data.content) textContent += typeof data.content === "string" ? data.content : JSON.stringify(data.content);
|
|
397
|
+
} catch {}
|
|
398
|
+
if (hasError) return {
|
|
399
|
+
text: textContent.trim(),
|
|
400
|
+
error: errorMessage
|
|
401
|
+
};
|
|
402
|
+
return { text: textContent.trim() };
|
|
403
|
+
}
|
|
404
|
+
/**
|
|
405
|
+
* Run an evaluation job based on an evaluation job config
|
|
406
|
+
* Filters conversations based on jobFilters and runs evaluations with configured evaluators
|
|
407
|
+
*/
|
|
408
|
+
async runEvaluationJob(params) {
|
|
409
|
+
const { tenantId, projectId, evaluationJobConfigId, sampleRate } = params;
|
|
410
|
+
const ref = getProjectScopedRef(tenantId, projectId, "main");
|
|
411
|
+
const resolvedRef = await resolveRef(manageDbClient_default)(ref);
|
|
412
|
+
if (!resolvedRef) throw new Error("Failed to resolve ref");
|
|
413
|
+
logger.info({
|
|
414
|
+
tenantId,
|
|
415
|
+
projectId,
|
|
416
|
+
evaluationJobConfigId,
|
|
417
|
+
sampleRate
|
|
418
|
+
}, "Starting evaluation job");
|
|
419
|
+
const config = await withRef(manageDbPool_default, resolvedRef, (db) => getEvaluationJobConfigById(db)({ scopes: {
|
|
420
|
+
tenantId,
|
|
421
|
+
projectId,
|
|
422
|
+
evaluationJobConfigId
|
|
423
|
+
} }));
|
|
424
|
+
if (!config) throw new Error(`Evaluation job config not found: ${evaluationJobConfigId}`);
|
|
425
|
+
const evaluatorRelations = await withRef(manageDbPool_default, resolvedRef, (db) => getEvaluationJobConfigEvaluatorRelations(db)({ scopes: {
|
|
426
|
+
tenantId,
|
|
427
|
+
projectId,
|
|
428
|
+
evaluationJobConfigId
|
|
429
|
+
} }));
|
|
430
|
+
if (evaluatorRelations.length === 0) throw new Error(`No evaluators found for job config: ${evaluationJobConfigId}`);
|
|
431
|
+
const validEvaluators = (await Promise.all(evaluatorRelations.map((relation) => withRef(manageDbPool_default, resolvedRef, (db) => getEvaluatorById(db)({ scopes: {
|
|
432
|
+
tenantId,
|
|
433
|
+
projectId,
|
|
434
|
+
evaluatorId: relation.evaluatorId
|
|
435
|
+
} }))))).filter((e) => e !== null);
|
|
436
|
+
if (validEvaluators.length === 0) throw new Error(`No valid evaluators found for job config: ${evaluationJobConfigId}`);
|
|
437
|
+
logger.info({
|
|
438
|
+
tenantId,
|
|
439
|
+
projectId,
|
|
440
|
+
evaluationJobConfigId,
|
|
441
|
+
evaluatorCount: validEvaluators.length
|
|
442
|
+
}, "Found evaluators for job config");
|
|
443
|
+
let conversationsToEvaluate = await filterConversationsForJob(runDbClient_default)({
|
|
444
|
+
scopes: {
|
|
445
|
+
tenantId,
|
|
446
|
+
projectId
|
|
447
|
+
},
|
|
448
|
+
jobFilters: config.jobFilters
|
|
449
|
+
});
|
|
450
|
+
if (sampleRate !== void 0 && sampleRate !== null) {
|
|
451
|
+
const originalCount = conversationsToEvaluate.length;
|
|
452
|
+
conversationsToEvaluate = this.applySampleRate(conversationsToEvaluate, sampleRate);
|
|
453
|
+
logger.info({
|
|
454
|
+
tenantId,
|
|
455
|
+
projectId,
|
|
456
|
+
evaluationJobConfigId,
|
|
457
|
+
originalCount,
|
|
458
|
+
sampledCount: conversationsToEvaluate.length,
|
|
459
|
+
sampleRate
|
|
460
|
+
}, "Applied sample rate to conversations");
|
|
461
|
+
}
|
|
462
|
+
logger.info({
|
|
463
|
+
tenantId,
|
|
464
|
+
projectId,
|
|
465
|
+
evaluationJobConfigId,
|
|
466
|
+
conversationCount: conversationsToEvaluate.length
|
|
467
|
+
}, "Found conversations for evaluation");
|
|
468
|
+
if (conversationsToEvaluate.length === 0) {
|
|
469
|
+
logger.warn({
|
|
470
|
+
tenantId,
|
|
471
|
+
projectId,
|
|
472
|
+
evaluationJobConfigId
|
|
473
|
+
}, "No conversations found matching job filters");
|
|
474
|
+
return [];
|
|
475
|
+
}
|
|
476
|
+
const evaluationRun = await createEvaluationRun(runDbClient_default)({
|
|
477
|
+
id: generateId(),
|
|
478
|
+
tenantId,
|
|
479
|
+
projectId,
|
|
480
|
+
evaluationJobConfigId
|
|
481
|
+
});
|
|
482
|
+
const results = [];
|
|
483
|
+
for (const conversation of conversationsToEvaluate) for (const evaluator of validEvaluators) try {
|
|
484
|
+
logger.info({
|
|
485
|
+
tenantId,
|
|
486
|
+
conversationId: conversation.id,
|
|
487
|
+
evaluatorId: evaluator.id
|
|
488
|
+
}, "Running evaluation");
|
|
489
|
+
const evalResult = await createEvaluationResult(runDbClient_default)({
|
|
490
|
+
id: generateId(),
|
|
491
|
+
tenantId,
|
|
492
|
+
projectId,
|
|
493
|
+
conversationId: conversation.id,
|
|
494
|
+
evaluatorId: evaluator.id,
|
|
495
|
+
evaluationRunId: evaluationRun.id
|
|
496
|
+
});
|
|
497
|
+
try {
|
|
498
|
+
const evaluationResult = await this.executeEvaluation({
|
|
499
|
+
conversation,
|
|
500
|
+
evaluator,
|
|
501
|
+
tenantId,
|
|
502
|
+
projectId
|
|
503
|
+
});
|
|
504
|
+
const updatedResult = await updateEvaluationResult(runDbClient_default)({
|
|
505
|
+
scopes: {
|
|
506
|
+
tenantId,
|
|
507
|
+
projectId,
|
|
508
|
+
evaluationResultId: evalResult.id
|
|
509
|
+
},
|
|
510
|
+
data: { output: evaluationResult.output }
|
|
511
|
+
});
|
|
512
|
+
if (updatedResult) results.push(updatedResult);
|
|
513
|
+
logger.info({
|
|
514
|
+
tenantId,
|
|
515
|
+
conversationId: conversation.id,
|
|
516
|
+
evaluatorId: evaluator.id,
|
|
517
|
+
resultId: evalResult.id
|
|
518
|
+
}, "Evaluation completed successfully");
|
|
519
|
+
} catch (error) {
|
|
520
|
+
logger.error({
|
|
521
|
+
error,
|
|
522
|
+
tenantId,
|
|
523
|
+
conversationId: conversation.id,
|
|
524
|
+
evaluatorId: evaluator.id,
|
|
525
|
+
resultId: evalResult.id
|
|
526
|
+
}, "Evaluation execution failed");
|
|
527
|
+
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
528
|
+
const failedResult = await updateEvaluationResult(runDbClient_default)({
|
|
529
|
+
scopes: {
|
|
530
|
+
tenantId,
|
|
531
|
+
projectId,
|
|
532
|
+
evaluationResultId: evalResult.id
|
|
533
|
+
},
|
|
534
|
+
data: { output: { text: `Evaluation failed: ${errorMessage}` } }
|
|
535
|
+
});
|
|
536
|
+
if (failedResult) results.push(failedResult);
|
|
537
|
+
}
|
|
538
|
+
} catch (error) {
|
|
539
|
+
logger.error({
|
|
540
|
+
error: error instanceof Error ? error.message : String(error),
|
|
541
|
+
tenantId,
|
|
542
|
+
conversationId: conversation.id,
|
|
543
|
+
evaluatorId: evaluator.id
|
|
544
|
+
}, "Failed to create or update eval result");
|
|
545
|
+
}
|
|
546
|
+
logger.info({
|
|
547
|
+
tenantId,
|
|
548
|
+
projectId,
|
|
549
|
+
evaluationJobConfigId,
|
|
550
|
+
resultCount: results.length
|
|
551
|
+
}, "Evaluation job completed");
|
|
552
|
+
return results;
|
|
553
|
+
}
|
|
554
|
+
/**
|
|
555
|
+
* Apply sample rate to conversations
|
|
556
|
+
*/
|
|
557
|
+
applySampleRate(items, sampleRate) {
|
|
558
|
+
if (!sampleRate || sampleRate >= 1) return items;
|
|
559
|
+
if (sampleRate <= 0) return [];
|
|
560
|
+
const targetCount = Math.ceil(items.length * sampleRate);
|
|
561
|
+
const sampled = [];
|
|
562
|
+
const indices = /* @__PURE__ */ new Set();
|
|
563
|
+
while (sampled.length < targetCount && sampled.length < items.length) {
|
|
564
|
+
const randomIndex = Math.floor(Math.random() * items.length);
|
|
565
|
+
if (!indices.has(randomIndex)) {
|
|
566
|
+
indices.add(randomIndex);
|
|
567
|
+
sampled.push(items[randomIndex]);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
return sampled;
|
|
571
|
+
}
|
|
572
|
+
/**
|
|
573
|
+
* Execute an evaluation by calling the LLM with the evaluator prompt and conversation data
|
|
574
|
+
*/
|
|
575
|
+
async executeEvaluation(params) {
|
|
576
|
+
const { conversation, evaluator, tenantId, projectId, expectedOutput } = params;
|
|
577
|
+
let resolvedRef = null;
|
|
578
|
+
if (conversation.ref) resolvedRef = conversation.ref;
|
|
579
|
+
else {
|
|
580
|
+
const ref = getProjectScopedRef(tenantId, projectId, "main");
|
|
581
|
+
resolvedRef = await resolveRef(manageDbClient_default)(ref);
|
|
582
|
+
}
|
|
583
|
+
if (!resolvedRef) throw new Error("Failed to resolve ref");
|
|
584
|
+
const conversationHistory = await getConversationHistory(runDbClient_default)({
|
|
585
|
+
scopes: {
|
|
586
|
+
tenantId,
|
|
587
|
+
projectId
|
|
588
|
+
},
|
|
589
|
+
conversationId: conversation.id,
|
|
590
|
+
options: {
|
|
591
|
+
includeInternal: false,
|
|
592
|
+
limit: 100
|
|
593
|
+
}
|
|
594
|
+
});
|
|
595
|
+
let agentDefinition = null;
|
|
596
|
+
let agentId = null;
|
|
597
|
+
try {
|
|
598
|
+
agentId = conversation.agentId ?? null;
|
|
599
|
+
if (agentId) {
|
|
600
|
+
const agentIdForLookup = agentId;
|
|
601
|
+
agentDefinition = await withRef(manageDbPool_default, resolvedRef, (db) => getFullAgent(db)({ scopes: {
|
|
602
|
+
tenantId,
|
|
603
|
+
projectId,
|
|
604
|
+
agentId: agentIdForLookup
|
|
605
|
+
} }));
|
|
606
|
+
} else logger.warn({
|
|
607
|
+
conversationId: conversation.id,
|
|
608
|
+
agentId: conversation.agentId
|
|
609
|
+
}, "AgentId not found, cannot get agent definition");
|
|
610
|
+
} catch (error) {
|
|
611
|
+
logger.warn({
|
|
612
|
+
error,
|
|
613
|
+
conversationId: conversation.id,
|
|
614
|
+
agentId: conversation.agentId
|
|
615
|
+
}, "Failed to fetch agent definition for evaluation");
|
|
616
|
+
}
|
|
617
|
+
const prettifiedTrace = await this.fetchTraceFromSigNoz(conversation.id);
|
|
618
|
+
logger.info({
|
|
619
|
+
conversationId: conversation.id,
|
|
620
|
+
hasTrace: !!prettifiedTrace,
|
|
621
|
+
traceActivityCount: prettifiedTrace?.timeline?.length || 0
|
|
622
|
+
}, "Trace fetch completed");
|
|
623
|
+
const conversationText = JSON.stringify(conversationHistory, null, 2);
|
|
624
|
+
const agentDefinitionText = agentDefinition ? JSON.stringify(agentDefinition, null, 2) : "Agent definition not available";
|
|
625
|
+
const traceText = prettifiedTrace ? JSON.stringify(prettifiedTrace, null, 2) : "Trace data not available";
|
|
626
|
+
const modelConfig = evaluator.model ?? {};
|
|
627
|
+
let schemaObj;
|
|
628
|
+
if (typeof evaluator.schema === "string") try {
|
|
629
|
+
schemaObj = JSON.parse(evaluator.schema);
|
|
630
|
+
} catch (error) {
|
|
631
|
+
logger.error({
|
|
632
|
+
error,
|
|
633
|
+
schemaString: evaluator.schema
|
|
634
|
+
}, "Failed to parse evaluator schema string");
|
|
635
|
+
throw new Error("Invalid evaluator schema format");
|
|
636
|
+
}
|
|
637
|
+
else schemaObj = evaluator.schema;
|
|
638
|
+
logger.info({
|
|
639
|
+
evaluatorId: evaluator.id,
|
|
640
|
+
schemaType: typeof schemaObj,
|
|
641
|
+
schemaKeys: schemaObj && typeof schemaObj === "object" ? Object.keys(schemaObj) : []
|
|
642
|
+
}, "Using evaluator schema");
|
|
643
|
+
const expectedOutputText = expectedOutput ? JSON.stringify(expectedOutput, null, 2) : void 0;
|
|
644
|
+
const evaluationPrompt = this.buildEvalInputEvaluationPrompt(evaluator.prompt, agentDefinitionText, conversationText, traceText, schemaObj, expectedOutputText);
|
|
645
|
+
const llmResponse = await this.callLLM({
|
|
646
|
+
prompt: evaluationPrompt,
|
|
647
|
+
modelConfig,
|
|
648
|
+
schema: schemaObj
|
|
649
|
+
});
|
|
650
|
+
return {
|
|
651
|
+
output: llmResponse.result,
|
|
652
|
+
metadata: {
|
|
653
|
+
...llmResponse.metadata,
|
|
654
|
+
model: modelConfig.model || "unknown",
|
|
655
|
+
agentId,
|
|
656
|
+
hasAgentDefinition: !!agentDefinition,
|
|
657
|
+
hasTrace: !!prettifiedTrace,
|
|
658
|
+
traceActivityCount: prettifiedTrace?.timeline?.length || 0
|
|
659
|
+
}
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
/**
|
|
663
|
+
* Build evaluation prompt with agent definition, conversation history, trace, and expected output
|
|
664
|
+
*/
|
|
665
|
+
buildEvalInputEvaluationPrompt(evaluatorPrompt, agentDefinitionText, conversationText, traceText, schema, expectedOutputText) {
|
|
666
|
+
const schemaDescription = JSON.stringify(schema, null, 2);
|
|
667
|
+
return `${evaluatorPrompt}
|
|
668
|
+
|
|
669
|
+
Agent Definition:
|
|
670
|
+
|
|
671
|
+
${agentDefinitionText}
|
|
672
|
+
|
|
673
|
+
Conversation History:
|
|
674
|
+
|
|
675
|
+
${conversationText}
|
|
676
|
+
|
|
677
|
+
Execution Trace:
|
|
678
|
+
|
|
679
|
+
${traceText}
|
|
680
|
+
${expectedOutputText ? `
|
|
681
|
+
|
|
682
|
+
Expected Output:
|
|
683
|
+
|
|
684
|
+
${expectedOutputText}
|
|
685
|
+
` : ""}
|
|
686
|
+
Please evaluate this conversation according to the following schema and return your evaluation as JSON:
|
|
687
|
+
|
|
688
|
+
${schemaDescription}
|
|
689
|
+
|
|
690
|
+
Return your evaluation as a JSON object matching the schema above.`;
|
|
691
|
+
}
|
|
692
|
+
/**
|
|
693
|
+
* Call LLM API using AI SDK's generateObject for structured output
|
|
694
|
+
*/
|
|
695
|
+
async callLLM(params) {
|
|
696
|
+
const { prompt, modelConfig, schema } = params;
|
|
697
|
+
const languageModel = ModelFactory.prepareGenerationConfig(modelConfig);
|
|
698
|
+
const providerOptions = modelConfig?.providerOptions || {};
|
|
699
|
+
let resultSchema;
|
|
700
|
+
try {
|
|
701
|
+
resultSchema = jsonSchemaToZod(schema);
|
|
702
|
+
logger.info({
|
|
703
|
+
schemaType: typeof schema,
|
|
704
|
+
schemaKeys: schema && typeof schema === "object" ? Object.keys(schema) : [],
|
|
705
|
+
convertedSchema: "success"
|
|
706
|
+
}, "Converted JSON schema to Zod");
|
|
707
|
+
} catch (error) {
|
|
708
|
+
logger.error({
|
|
709
|
+
error,
|
|
710
|
+
schema
|
|
711
|
+
}, "Failed to convert JSON schema to Zod, using fallback");
|
|
712
|
+
resultSchema = z.record(z.string(), z.unknown());
|
|
713
|
+
}
|
|
714
|
+
const evaluationSchema = resultSchema;
|
|
715
|
+
try {
|
|
716
|
+
logger.info({
|
|
717
|
+
promptLength: prompt.length,
|
|
718
|
+
model: modelConfig.model
|
|
719
|
+
}, "Calling generateObject");
|
|
720
|
+
const result = await generateObject({
|
|
721
|
+
...languageModel,
|
|
722
|
+
schema: evaluationSchema,
|
|
723
|
+
prompt,
|
|
724
|
+
temperature: providerOptions.temperature ?? .3
|
|
725
|
+
});
|
|
726
|
+
return {
|
|
727
|
+
result: result.object,
|
|
728
|
+
metadata: { usage: result.usage }
|
|
729
|
+
};
|
|
730
|
+
} catch (error) {
|
|
731
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
732
|
+
logger.error({
|
|
733
|
+
error: errorMessage,
|
|
734
|
+
schema: JSON.stringify(schema, null, 2),
|
|
735
|
+
promptPreview: prompt.substring(0, 500)
|
|
736
|
+
}, "Evaluation failed with generateObject");
|
|
737
|
+
throw new Error(`Evaluation failed: ${errorMessage}`);
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
/**
|
|
741
|
+
* Fetch trace from SigNoz (similar to the example)
|
|
742
|
+
*/
|
|
743
|
+
async fetchTraceFromSigNoz(conversationId) {
|
|
744
|
+
const manageUIUrl = env.INKEEP_AGENTS_MANAGE_UI_URL;
|
|
745
|
+
const maxRetries = 2;
|
|
746
|
+
const retryDelayMs = 2e4;
|
|
747
|
+
const initialDelayMs = 3e4;
|
|
748
|
+
try {
|
|
749
|
+
logger.info({
|
|
750
|
+
conversationId,
|
|
751
|
+
manageUIUrl,
|
|
752
|
+
initialDelayMs
|
|
753
|
+
}, "Waiting 30s before fetching trace from SigNoz");
|
|
754
|
+
await new Promise((resolve) => setTimeout(resolve, initialDelayMs));
|
|
755
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) try {
|
|
756
|
+
logger.info({
|
|
757
|
+
conversationId,
|
|
758
|
+
attempt: attempt + 1,
|
|
759
|
+
maxRetries: maxRetries + 1
|
|
760
|
+
}, "Fetching trace from SigNoz");
|
|
761
|
+
const traceResponse = await fetch(`${manageUIUrl}/api/signoz/conversations/${conversationId}`);
|
|
762
|
+
if (!traceResponse.ok) {
|
|
763
|
+
logger.warn({
|
|
764
|
+
conversationId,
|
|
765
|
+
status: traceResponse.status,
|
|
766
|
+
statusText: traceResponse.statusText,
|
|
767
|
+
attempt: attempt + 1
|
|
768
|
+
}, "Failed to fetch trace from SigNoz");
|
|
769
|
+
if (attempt < maxRetries) {
|
|
770
|
+
logger.info({
|
|
771
|
+
conversationId,
|
|
772
|
+
retryDelayMs
|
|
773
|
+
}, "Retrying trace fetch after delay");
|
|
774
|
+
await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
|
|
775
|
+
continue;
|
|
776
|
+
}
|
|
777
|
+
return null;
|
|
778
|
+
}
|
|
779
|
+
const conversationDetail = await traceResponse.json();
|
|
780
|
+
logger.debug({
|
|
781
|
+
conversationId,
|
|
782
|
+
activityTypes: conversationDetail.activities?.map((a) => a.type) || [],
|
|
783
|
+
activityCount: conversationDetail.activities?.length || 0
|
|
784
|
+
}, "Checking activities for ai_assistant_message type");
|
|
785
|
+
if (!conversationDetail.activities?.some((activity) => activity.type === "ai_assistant_message")) {
|
|
786
|
+
logger.warn({
|
|
787
|
+
conversationId,
|
|
788
|
+
attempt: attempt + 1,
|
|
789
|
+
activityCount: conversationDetail.activities?.length || 0,
|
|
790
|
+
activityTypes: conversationDetail.activities?.slice(0, 5).map((a) => a.type) || []
|
|
791
|
+
}, "Trace fetched but ai_assistant_message not found in activities");
|
|
792
|
+
if (attempt < maxRetries) {
|
|
793
|
+
logger.info({
|
|
794
|
+
conversationId,
|
|
795
|
+
retryDelayMs
|
|
796
|
+
}, "Retrying trace fetch after delay to wait for assistant message");
|
|
797
|
+
await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
|
|
798
|
+
continue;
|
|
799
|
+
}
|
|
800
|
+
logger.warn({
|
|
801
|
+
conversationId,
|
|
802
|
+
maxRetries,
|
|
803
|
+
activityCount: conversationDetail.activities?.length || 0
|
|
804
|
+
}, "Max retries reached, ai_assistant_message not found - proceeding with available trace data");
|
|
805
|
+
} else logger.info({
|
|
806
|
+
conversationId,
|
|
807
|
+
activityCount: conversationDetail.activities?.length || 0,
|
|
808
|
+
attempt: attempt + 1
|
|
809
|
+
}, "Trace fetched successfully with ai_assistant_message");
|
|
810
|
+
return this.formatConversationAsPrettifiedTrace(conversationDetail);
|
|
811
|
+
} catch (fetchError) {
|
|
812
|
+
logger.warn({
|
|
813
|
+
error: fetchError,
|
|
814
|
+
conversationId,
|
|
815
|
+
attempt: attempt + 1
|
|
816
|
+
}, "Error fetching trace from SigNoz");
|
|
817
|
+
if (attempt < maxRetries) {
|
|
818
|
+
logger.info({
|
|
819
|
+
conversationId,
|
|
820
|
+
retryDelayMs
|
|
821
|
+
}, "Retrying trace fetch after delay");
|
|
822
|
+
await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
|
|
823
|
+
continue;
|
|
824
|
+
}
|
|
825
|
+
return null;
|
|
826
|
+
}
|
|
827
|
+
return null;
|
|
828
|
+
} catch (error) {
|
|
829
|
+
logger.warn({
|
|
830
|
+
error,
|
|
831
|
+
conversationId,
|
|
832
|
+
manageUIUrl
|
|
833
|
+
}, "Failed to fetch trace from SigNoz, will continue without trace");
|
|
834
|
+
return null;
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
/**
|
|
838
|
+
* Format conversation detail as prettified trace
|
|
839
|
+
*/
|
|
840
|
+
formatConversationAsPrettifiedTrace(conversation) {
|
|
841
|
+
return {
|
|
842
|
+
metadata: {
|
|
843
|
+
conversationId: conversation.conversationId,
|
|
844
|
+
traceId: conversation.traceId,
|
|
845
|
+
agentName: conversation.agentName,
|
|
846
|
+
agentId: conversation.agentId,
|
|
847
|
+
exportedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
848
|
+
},
|
|
849
|
+
timing: {
|
|
850
|
+
startTime: conversation.conversationStartTime || "",
|
|
851
|
+
endTime: conversation.conversationEndTime || "",
|
|
852
|
+
durationMs: conversation.duration || 0
|
|
853
|
+
},
|
|
854
|
+
timeline: (conversation.activities || []).map((activity) => {
|
|
855
|
+
const { id: _id, ...rest } = activity;
|
|
856
|
+
return { ...rest };
|
|
857
|
+
})
|
|
858
|
+
};
|
|
859
|
+
}
|
|
860
|
+
};
|
|
861
|
+
|
|
862
|
+
//#endregion
|
|
863
|
+
export { EvaluationService };
|