@agent-native/core 0.7.11 → 0.7.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/action.js +30 -11
- package/dist/action.js.map +1 -1
- package/dist/agent/engine/ai-sdk-engine.d.ts.map +1 -1
- package/dist/agent/engine/ai-sdk-engine.js +26 -8
- package/dist/agent/engine/ai-sdk-engine.js.map +1 -1
- package/dist/agent/engine/builder-engine.d.ts +19 -0
- package/dist/agent/engine/builder-engine.d.ts.map +1 -0
- package/dist/agent/engine/builder-engine.js +430 -0
- package/dist/agent/engine/builder-engine.js.map +1 -0
- package/dist/agent/engine/builtin.d.ts.map +1 -1
- package/dist/agent/engine/builtin.js +26 -10
- package/dist/agent/engine/builtin.js.map +1 -1
- package/dist/agent/engine/index.d.ts +1 -1
- package/dist/agent/engine/index.d.ts.map +1 -1
- package/dist/agent/engine/index.js +1 -1
- package/dist/agent/engine/index.js.map +1 -1
- package/dist/agent/engine/provider-env-vars.d.ts +21 -0
- package/dist/agent/engine/provider-env-vars.d.ts.map +1 -0
- package/dist/agent/engine/provider-env-vars.js +50 -0
- package/dist/agent/engine/provider-env-vars.js.map +1 -0
- package/dist/agent/engine/registry.d.ts +37 -0
- package/dist/agent/engine/registry.d.ts.map +1 -1
- package/dist/agent/engine/registry.js +102 -4
- package/dist/agent/engine/registry.js.map +1 -1
- package/dist/agent/engine/types.d.ts +30 -0
- package/dist/agent/engine/types.d.ts.map +1 -1
- package/dist/agent/engine/types.js +19 -1
- package/dist/agent/engine/types.js.map +1 -1
- package/dist/agent/production-agent.d.ts +7 -1
- package/dist/agent/production-agent.d.ts.map +1 -1
- package/dist/agent/production-agent.js +67 -15
- package/dist/agent/production-agent.js.map +1 -1
- package/dist/agent/run-manager.d.ts.map +1 -1
- package/dist/agent/run-manager.js +11 -1
- package/dist/agent/run-manager.js.map +1 -1
- package/dist/agent/thread-data-builder.d.ts +4 -0
- package/dist/agent/thread-data-builder.d.ts.map +1 -1
- package/dist/agent/thread-data-builder.js +1 -0
- package/dist/agent/thread-data-builder.js.map +1 -1
- package/dist/agent/types.d.ts +8 -0
- package/dist/agent/types.d.ts.map +1 -1
- package/dist/checkpoints/service.d.ts +1 -0
- package/dist/checkpoints/service.d.ts.map +1 -1
- package/dist/checkpoints/service.js +26 -2
- package/dist/checkpoints/service.js.map +1 -1
- package/dist/cli/create.d.ts +30 -0
- package/dist/cli/create.d.ts.map +1 -1
- package/dist/cli/create.js +127 -10
- package/dist/cli/create.js.map +1 -1
- package/dist/cli/index.js +19 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/templates-meta.d.ts +2 -0
- package/dist/cli/templates-meta.d.ts.map +1 -1
- package/dist/cli/templates-meta.js +2 -0
- package/dist/cli/templates-meta.js.map +1 -1
- package/dist/cli/workspacify.d.ts.map +1 -1
- package/dist/cli/workspacify.js +7 -3
- package/dist/cli/workspacify.js.map +1 -1
- package/dist/client/AgentPanel.js +1 -1
- package/dist/client/AgentPanel.js.map +1 -1
- package/dist/client/AssistantChat.d.ts.map +1 -1
- package/dist/client/AssistantChat.js +93 -37
- package/dist/client/AssistantChat.js.map +1 -1
- package/dist/client/ConnectBuilderCard.d.ts +1 -7
- package/dist/client/ConnectBuilderCard.d.ts.map +1 -1
- package/dist/client/ConnectBuilderCard.js +30 -132
- package/dist/client/ConnectBuilderCard.js.map +1 -1
- package/dist/client/MultiTabAssistantChat.d.ts.map +1 -1
- package/dist/client/MultiTabAssistantChat.js +58 -26
- package/dist/client/MultiTabAssistantChat.js.map +1 -1
- package/dist/client/analytics.d.ts +5 -8
- package/dist/client/analytics.d.ts.map +1 -1
- package/dist/client/analytics.js +53 -11
- package/dist/client/analytics.js.map +1 -1
- package/dist/client/builder-mark.d.ts +9 -0
- package/dist/client/builder-mark.d.ts.map +1 -0
- package/dist/client/builder-mark.js +10 -0
- package/dist/client/builder-mark.js.map +1 -0
- package/dist/client/components/ui/popover.d.ts +8 -0
- package/dist/client/components/ui/popover.d.ts.map +1 -0
- package/dist/client/components/ui/popover.js +11 -0
- package/dist/client/components/ui/popover.js.map +1 -0
- package/dist/client/composer/ComposerPlusMenu.d.ts +2 -0
- package/dist/client/composer/ComposerPlusMenu.d.ts.map +1 -0
- package/dist/client/composer/ComposerPlusMenu.js +244 -0
- package/dist/client/composer/ComposerPlusMenu.js.map +1 -0
- package/dist/client/composer/TiptapComposer.d.ts.map +1 -1
- package/dist/client/composer/TiptapComposer.js +25 -7
- package/dist/client/composer/TiptapComposer.js.map +1 -1
- package/dist/client/composer/useVoiceDictation.d.ts.map +1 -1
- package/dist/client/composer/useVoiceDictation.js +4 -2
- package/dist/client/composer/useVoiceDictation.js.map +1 -1
- package/dist/client/error-format.d.ts +2 -0
- package/dist/client/error-format.d.ts.map +1 -0
- package/dist/client/error-format.js +31 -0
- package/dist/client/error-format.js.map +1 -0
- package/dist/client/index.d.ts +3 -1
- package/dist/client/index.d.ts.map +1 -1
- package/dist/client/index.js +3 -1
- package/dist/client/index.js.map +1 -1
- package/dist/client/observability/ObservabilityDashboard.d.ts +5 -0
- package/dist/client/observability/ObservabilityDashboard.d.ts.map +1 -0
- package/dist/client/observability/ObservabilityDashboard.js +169 -0
- package/dist/client/observability/ObservabilityDashboard.js.map +1 -0
- package/dist/client/observability/ThumbsFeedback.d.ts +8 -0
- package/dist/client/observability/ThumbsFeedback.d.ts.map +1 -0
- package/dist/client/observability/ThumbsFeedback.js +64 -0
- package/dist/client/observability/ThumbsFeedback.js.map +1 -0
- package/dist/client/observability/index.d.ts +4 -0
- package/dist/client/observability/index.d.ts.map +1 -0
- package/dist/client/observability/index.js +4 -0
- package/dist/client/observability/index.js.map +1 -0
- package/dist/client/observability/useObservability.d.ts +128 -0
- package/dist/client/observability/useObservability.d.ts.map +1 -0
- package/dist/client/observability/useObservability.js +109 -0
- package/dist/client/observability/useObservability.js.map +1 -0
- package/dist/client/onboarding/OnboardingPanel.d.ts.map +1 -1
- package/dist/client/onboarding/OnboardingPanel.js +34 -92
- package/dist/client/onboarding/OnboardingPanel.js.map +1 -1
- package/dist/client/org/RequireActiveOrg.d.ts +33 -0
- package/dist/client/org/RequireActiveOrg.d.ts.map +1 -0
- package/dist/client/org/RequireActiveOrg.js +63 -0
- package/dist/client/org/RequireActiveOrg.js.map +1 -0
- package/dist/client/org/hooks.d.ts.map +1 -1
- package/dist/client/org/hooks.js +50 -15
- package/dist/client/org/hooks.js.map +1 -1
- package/dist/client/org/index.d.ts +1 -0
- package/dist/client/org/index.d.ts.map +1 -1
- package/dist/client/org/index.js +1 -0
- package/dist/client/org/index.js.map +1 -1
- package/dist/client/resources/ResourcesPanel.js +3 -3
- package/dist/client/resources/ResourcesPanel.js.map +1 -1
- package/dist/client/settings/AutomationsSection.js +1 -1
- package/dist/client/settings/AutomationsSection.js.map +1 -1
- package/dist/client/settings/BrowserSection.js +1 -1
- package/dist/client/settings/BrowserSection.js.map +1 -1
- package/dist/client/settings/SettingsPanel.d.ts.map +1 -1
- package/dist/client/settings/SettingsPanel.js +251 -42
- package/dist/client/settings/SettingsPanel.js.map +1 -1
- package/dist/client/settings/VoiceTranscriptionSection.d.ts.map +1 -1
- package/dist/client/settings/VoiceTranscriptionSection.js +10 -4
- package/dist/client/settings/VoiceTranscriptionSection.js.map +1 -1
- package/dist/client/settings/useBuilderStatus.d.ts +26 -0
- package/dist/client/settings/useBuilderStatus.d.ts.map +1 -1
- package/dist/client/settings/useBuilderStatus.js +128 -4
- package/dist/client/settings/useBuilderStatus.js.map +1 -1
- package/dist/client/sse-event-processor.d.ts +2 -0
- package/dist/client/sse-event-processor.d.ts.map +1 -1
- package/dist/client/sse-event-processor.js +6 -2
- package/dist/client/sse-event-processor.js.map +1 -1
- package/dist/client/transcription/BuilderTranscriptionCta.d.ts +9 -0
- package/dist/client/transcription/BuilderTranscriptionCta.d.ts.map +1 -0
- package/dist/client/transcription/BuilderTranscriptionCta.js +18 -0
- package/dist/client/transcription/BuilderTranscriptionCta.js.map +1 -0
- package/dist/client/transcription/use-live-transcription.d.ts +29 -0
- package/dist/client/transcription/use-live-transcription.d.ts.map +1 -0
- package/dist/client/transcription/use-live-transcription.js +156 -0
- package/dist/client/transcription/use-live-transcription.js.map +1 -0
- package/dist/client/use-builder-enabled.d.ts +17 -0
- package/dist/client/use-builder-enabled.d.ts.map +1 -0
- package/dist/client/use-builder-enabled.js +36 -0
- package/dist/client/use-builder-enabled.js.map +1 -0
- package/dist/client/use-db-sync.d.ts.map +1 -1
- package/dist/client/use-db-sync.js +4 -2
- package/dist/client/use-db-sync.js.map +1 -1
- package/dist/client/useProductionAgent.d.ts.map +1 -1
- package/dist/client/useProductionAgent.js +3 -1
- package/dist/client/useProductionAgent.js.map +1 -1
- package/dist/db/migrations.d.ts +9 -0
- package/dist/db/migrations.d.ts.map +1 -1
- package/dist/db/migrations.js +75 -10
- package/dist/db/migrations.js.map +1 -1
- package/dist/file-upload/builder.d.ts.map +1 -1
- package/dist/file-upload/builder.js +11 -4
- package/dist/file-upload/builder.js.map +1 -1
- package/dist/jobs/tools.d.ts.map +1 -1
- package/dist/jobs/tools.js +137 -161
- package/dist/jobs/tools.js.map +1 -1
- package/dist/notifications/actions.d.ts +2 -2
- package/dist/notifications/actions.d.ts.map +1 -1
- package/dist/notifications/actions.js +77 -69
- package/dist/notifications/actions.js.map +1 -1
- package/dist/observability/evals.d.ts +22 -0
- package/dist/observability/evals.d.ts.map +1 -0
- package/dist/observability/evals.js +371 -0
- package/dist/observability/evals.js.map +1 -0
- package/dist/observability/experiments.d.ts +24 -0
- package/dist/observability/experiments.d.ts.map +1 -0
- package/dist/observability/experiments.js +274 -0
- package/dist/observability/experiments.js.map +1 -0
- package/dist/observability/feedback.d.ts +14 -0
- package/dist/observability/feedback.d.ts.map +1 -0
- package/dist/observability/feedback.js +256 -0
- package/dist/observability/feedback.js.map +1 -0
- package/dist/observability/index.d.ts +6 -0
- package/dist/observability/index.d.ts.map +1 -0
- package/dist/observability/index.js +5 -0
- package/dist/observability/index.js.map +1 -0
- package/dist/observability/plugin.d.ts +2 -0
- package/dist/observability/plugin.d.ts.map +1 -0
- package/dist/observability/plugin.js +12 -0
- package/dist/observability/plugin.js.map +1 -0
- package/dist/observability/routes.d.ts +68 -0
- package/dist/observability/routes.d.ts.map +1 -0
- package/dist/observability/routes.js +301 -0
- package/dist/observability/routes.js.map +1 -0
- package/dist/observability/store.d.ts +77 -0
- package/dist/observability/store.d.ts.map +1 -0
- package/dist/observability/store.js +976 -0
- package/dist/observability/store.js.map +1 -0
- package/dist/observability/traces.d.ts +37 -0
- package/dist/observability/traces.d.ts.map +1 -0
- package/dist/observability/traces.js +182 -0
- package/dist/observability/traces.js.map +1 -0
- package/dist/observability/types.d.ts +159 -0
- package/dist/observability/types.d.ts.map +1 -0
- package/dist/observability/types.js +16 -0
- package/dist/observability/types.js.map +1 -0
- package/dist/onboarding/default-steps.d.ts.map +1 -1
- package/dist/onboarding/default-steps.js +68 -24
- package/dist/onboarding/default-steps.js.map +1 -1
- package/dist/onboarding/types.d.ts +10 -1
- package/dist/onboarding/types.d.ts.map +1 -1
- package/dist/org/context.d.ts +8 -1
- package/dist/org/context.d.ts.map +1 -1
- package/dist/org/context.js +163 -6
- package/dist/org/context.js.map +1 -1
- package/dist/org/handlers.d.ts.map +1 -1
- package/dist/org/handlers.js +49 -30
- package/dist/org/handlers.js.map +1 -1
- package/dist/progress/actions.d.ts +3 -0
- package/dist/progress/actions.d.ts.map +1 -1
- package/dist/progress/actions.js +86 -110
- package/dist/progress/actions.js.map +1 -1
- package/dist/progress/routes.d.ts +1 -1
- package/dist/progress/routes.js +1 -1
- package/dist/scripts/agent-engines/list-agent-engines.d.ts.map +1 -1
- package/dist/scripts/agent-engines/list-agent-engines.js +20 -6
- package/dist/scripts/agent-engines/list-agent-engines.js.map +1 -1
- package/dist/scripts/agent-engines/manage-agent-engine.d.ts +10 -0
- package/dist/scripts/agent-engines/manage-agent-engine.d.ts.map +1 -0
- package/dist/scripts/agent-engines/manage-agent-engine.js +47 -0
- package/dist/scripts/agent-engines/manage-agent-engine.js.map +1 -0
- package/dist/scripts/agent-engines/set-agent-engine.d.ts.map +1 -1
- package/dist/scripts/agent-engines/set-agent-engine.js +10 -9
- package/dist/scripts/agent-engines/set-agent-engine.js.map +1 -1
- package/dist/server/agent-chat-plugin.d.ts +39 -0
- package/dist/server/agent-chat-plugin.d.ts.map +1 -1
- package/dist/server/agent-chat-plugin.js +743 -452
- package/dist/server/agent-chat-plugin.js.map +1 -1
- package/dist/server/agent-teams.d.ts.map +1 -1
- package/dist/server/agent-teams.js +18 -7
- package/dist/server/agent-teams.js.map +1 -1
- package/dist/server/analytics.d.ts +9 -7
- package/dist/server/analytics.d.ts.map +1 -1
- package/dist/server/analytics.js +12 -11
- package/dist/server/analytics.js.map +1 -1
- package/dist/server/app-name.d.ts +5 -2
- package/dist/server/app-name.d.ts.map +1 -1
- package/dist/server/app-name.js +14 -3
- package/dist/server/app-name.js.map +1 -1
- package/dist/server/app-url.d.ts.map +1 -1
- package/dist/server/app-url.js +17 -5
- package/dist/server/app-url.js.map +1 -1
- package/dist/server/auth.d.ts +2 -0
- package/dist/server/auth.d.ts.map +1 -1
- package/dist/server/auth.js +153 -2
- package/dist/server/auth.js.map +1 -1
- package/dist/server/better-auth-instance.d.ts +2 -0
- package/dist/server/better-auth-instance.d.ts.map +1 -1
- package/dist/server/better-auth-instance.js +4 -0
- package/dist/server/better-auth-instance.js.map +1 -1
- package/dist/server/builder-browser.d.ts +59 -1
- package/dist/server/builder-browser.d.ts.map +1 -1
- package/dist/server/builder-browser.js +127 -11
- package/dist/server/builder-browser.js.map +1 -1
- package/dist/server/core-routes-plugin.d.ts.map +1 -1
- package/dist/server/core-routes-plugin.js +278 -13
- package/dist/server/core-routes-plugin.js.map +1 -1
- package/dist/server/credential-provider.d.ts +7 -0
- package/dist/server/credential-provider.d.ts.map +1 -1
- package/dist/server/credential-provider.js +10 -0
- package/dist/server/credential-provider.js.map +1 -1
- package/dist/server/onboarding-html.d.ts.map +1 -1
- package/dist/server/onboarding-html.js +29 -4
- package/dist/server/onboarding-html.js.map +1 -1
- package/dist/server/poll.d.ts.map +1 -1
- package/dist/server/poll.js +46 -5
- package/dist/server/poll.js.map +1 -1
- package/dist/server/ssr-handler.d.ts.map +1 -1
- package/dist/server/ssr-handler.js +2 -1
- package/dist/server/ssr-handler.js.map +1 -1
- package/dist/server/transcribe-voice.d.ts.map +1 -1
- package/dist/server/transcribe-voice.js +44 -5
- package/dist/server/transcribe-voice.js.map +1 -1
- package/dist/styles/agent-native.css +11 -2
- package/dist/templates/default/.agents/skills/progress/SKILL.md +14 -12
- package/dist/templates/default/app/root.tsx +7 -0
- package/dist/templates/workspace-root/pnpm-workspace.yaml +6 -0
- package/dist/transcription/builder-transcription.d.ts +27 -0
- package/dist/transcription/builder-transcription.d.ts.map +1 -0
- package/dist/transcription/builder-transcription.js +41 -0
- package/dist/transcription/builder-transcription.js.map +1 -0
- package/dist/triggers/actions.d.ts +3 -0
- package/dist/triggers/actions.d.ts.map +1 -1
- package/dist/triggers/actions.js +189 -213
- package/dist/triggers/actions.js.map +1 -1
- package/docs/content/agent-mentions.md +1 -1
- package/docs/content/automations.md +22 -19
- package/docs/content/cloneable-saas.md +2 -2
- package/docs/content/deployment.md +21 -61
- package/docs/content/getting-started.md +1 -1
- package/docs/content/key-concepts.md +1 -1
- package/docs/content/{enterprise-workspace.md → multi-app-workspace.md} +3 -3
- package/docs/content/multi-tenancy.md +1 -1
- package/docs/content/progress.md +11 -11
- package/docs/content/template-dispatch.md +3 -3
- package/docs/content/workspace-management.md +1 -1
- package/package.json +13 -1
- package/src/templates/default/.agents/skills/progress/SKILL.md +14 -12
- package/src/templates/default/app/root.tsx +7 -0
- package/src/templates/workspace-root/pnpm-workspace.yaml +6 -0
- package/dist/client/settings/LLMSection.d.ts +0 -2
- package/dist/client/settings/LLMSection.d.ts.map +0 -1
- package/dist/client/settings/LLMSection.js +0 -191
- package/dist/client/settings/LLMSection.js.map +0 -1
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
import { getTraceSummary, insertEvalResult, getEvalDataset } from "./store.js";
|
|
2
|
+
import { getRunById, getRunEventsSince } from "../agent/run-store.js";
|
|
3
|
+
import { resolveEngine, getStoredModelForEngine, } from "../agent/engine/index.js";
|
|
4
|
+
const LATENCY_BASELINE_PER_TOOL_MS = 10_000;
|
|
5
|
+
const COST_BASELINE_PER_TOOL_CX100 = 50;
|
|
6
|
+
const LLM_JUDGE_TIMEOUT_MS = 30_000;
|
|
7
|
+
function makeEvalResult(opts) {
|
|
8
|
+
return {
|
|
9
|
+
id: crypto.randomUUID(),
|
|
10
|
+
runId: opts.runId,
|
|
11
|
+
threadId: opts.threadId,
|
|
12
|
+
userId: opts.userId,
|
|
13
|
+
evalType: opts.evalType,
|
|
14
|
+
criteria: opts.criteria,
|
|
15
|
+
score: Math.max(0, Math.min(1, opts.score)),
|
|
16
|
+
reasoning: opts.reasoning ?? null,
|
|
17
|
+
metadata: opts.metadata ?? null,
|
|
18
|
+
createdAt: Date.now(),
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
/** Lift the (runId, threadId, userId) triple off a TraceSummary —
|
|
22
|
+
* every automated scorer pulls these together. */
|
|
23
|
+
function fromSummary(summary) {
|
|
24
|
+
return {
|
|
25
|
+
runId: summary.runId,
|
|
26
|
+
threadId: summary.threadId,
|
|
27
|
+
userId: summary.userId,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
// ─── Layer 1: Automated deterministic scorers ────────────────────────
|
|
31
|
+
function scoreToolSuccessRate(summary) {
|
|
32
|
+
const total = summary.toolCalls;
|
|
33
|
+
const score = total > 0 ? summary.successfulTools / total : 1.0;
|
|
34
|
+
return makeEvalResult({
|
|
35
|
+
...fromSummary(summary),
|
|
36
|
+
evalType: "automated",
|
|
37
|
+
criteria: "tool_success_rate",
|
|
38
|
+
score,
|
|
39
|
+
metadata: {
|
|
40
|
+
totalTools: total,
|
|
41
|
+
successfulTools: summary.successfulTools,
|
|
42
|
+
failedTools: summary.failedTools,
|
|
43
|
+
},
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
function scoreStepEfficiency(summary) {
|
|
47
|
+
// No tool calls = simple Q&A, maximally efficient.
|
|
48
|
+
// With tools: penalize excessive LLM iterations relative to tool calls.
|
|
49
|
+
const score = summary.toolCalls === 0
|
|
50
|
+
? 1.0
|
|
51
|
+
: summary.llmCalls > 0
|
|
52
|
+
? Math.min(1, summary.toolCalls / summary.llmCalls)
|
|
53
|
+
: 1.0;
|
|
54
|
+
return makeEvalResult({
|
|
55
|
+
...fromSummary(summary),
|
|
56
|
+
evalType: "automated",
|
|
57
|
+
criteria: "step_efficiency",
|
|
58
|
+
score,
|
|
59
|
+
metadata: { llmCalls: summary.llmCalls, toolCalls: summary.toolCalls },
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
function scoreLatency(summary) {
|
|
63
|
+
const expectedMs = Math.max(LATENCY_BASELINE_PER_TOOL_MS, summary.toolCalls * LATENCY_BASELINE_PER_TOOL_MS);
|
|
64
|
+
const score = Math.max(0, 1 - summary.totalDurationMs / expectedMs);
|
|
65
|
+
return makeEvalResult({
|
|
66
|
+
...fromSummary(summary),
|
|
67
|
+
evalType: "automated",
|
|
68
|
+
criteria: "latency_score",
|
|
69
|
+
score,
|
|
70
|
+
metadata: { actualMs: summary.totalDurationMs, expectedMs },
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
function scoreCostEfficiency(summary) {
|
|
74
|
+
const expectedCx100 = Math.max(COST_BASELINE_PER_TOOL_CX100, summary.toolCalls * COST_BASELINE_PER_TOOL_CX100);
|
|
75
|
+
const score = Math.max(0, 1 - summary.totalCostCentsX100 / expectedCx100);
|
|
76
|
+
return makeEvalResult({
|
|
77
|
+
...fromSummary(summary),
|
|
78
|
+
evalType: "automated",
|
|
79
|
+
criteria: "cost_efficiency",
|
|
80
|
+
score,
|
|
81
|
+
metadata: { actualCx100: summary.totalCostCentsX100, expectedCx100 },
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
function scoreErrorRecovery(summary, runStatus) {
|
|
85
|
+
const hadErrors = summary.failedTools > 0;
|
|
86
|
+
let score;
|
|
87
|
+
if (!hadErrors) {
|
|
88
|
+
score = 1.0;
|
|
89
|
+
}
|
|
90
|
+
else if (runStatus === "completed") {
|
|
91
|
+
score = 1.0;
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
score = 0;
|
|
95
|
+
}
|
|
96
|
+
return makeEvalResult({
|
|
97
|
+
...fromSummary(summary),
|
|
98
|
+
evalType: "automated",
|
|
99
|
+
criteria: "error_recovery",
|
|
100
|
+
score,
|
|
101
|
+
metadata: { hadErrors, runStatus },
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
export async function runAutomatedEvals(runId) {
|
|
105
|
+
const [summary, run] = await Promise.all([
|
|
106
|
+
getTraceSummary(runId),
|
|
107
|
+
getRunById(runId),
|
|
108
|
+
]);
|
|
109
|
+
if (!summary)
|
|
110
|
+
return [];
|
|
111
|
+
const runStatus = run?.status ?? "unknown";
|
|
112
|
+
const results = [
|
|
113
|
+
scoreToolSuccessRate(summary),
|
|
114
|
+
scoreStepEfficiency(summary),
|
|
115
|
+
scoreLatency(summary),
|
|
116
|
+
scoreCostEfficiency(summary),
|
|
117
|
+
scoreErrorRecovery(summary, runStatus),
|
|
118
|
+
];
|
|
119
|
+
for (const result of results) {
|
|
120
|
+
insertEvalResult(result).catch(() => { });
|
|
121
|
+
}
|
|
122
|
+
return results;
|
|
123
|
+
}
|
|
124
|
+
// ─── Layer 2: LLM-as-Judge ───────────────────────────────────────────
|
|
125
|
+
function buildConversationTranscript(events) {
|
|
126
|
+
const lines = [];
|
|
127
|
+
for (const { eventData } of events) {
|
|
128
|
+
try {
|
|
129
|
+
const event = JSON.parse(eventData);
|
|
130
|
+
if (event.type === "user-message") {
|
|
131
|
+
lines.push(`[User]: ${event.text ?? JSON.stringify(event.content)}`);
|
|
132
|
+
}
|
|
133
|
+
else if (event.type === "text-delta" || event.type === "text") {
|
|
134
|
+
lines.push(`[Agent]: ${event.text}`);
|
|
135
|
+
}
|
|
136
|
+
else if (event.type === "tool-call") {
|
|
137
|
+
lines.push(`[Tool Call: ${event.name}] ${JSON.stringify(event.input)}`);
|
|
138
|
+
}
|
|
139
|
+
else if (event.type === "tool-result") {
|
|
140
|
+
const snippet = typeof event.content === "string"
|
|
141
|
+
? event.content.slice(0, 500)
|
|
142
|
+
: JSON.stringify(event.content).slice(0, 500);
|
|
143
|
+
lines.push(`[Tool Result${event.isError ? " (ERROR)" : ""}]: ${snippet}`);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
catch {
|
|
147
|
+
// Skip unparseable events
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return lines.join("\n");
|
|
151
|
+
}
|
|
152
|
+
function buildJudgePrompt(transcript, criteria) {
|
|
153
|
+
let prompt = `You are an expert evaluator. Assess the following agent conversation against the given criteria.
|
|
154
|
+
|
|
155
|
+
## Criteria
|
|
156
|
+
Name: ${criteria.name}
|
|
157
|
+
Description: ${criteria.description}`;
|
|
158
|
+
if (criteria.rubric) {
|
|
159
|
+
prompt += `\nRubric: ${criteria.rubric}`;
|
|
160
|
+
}
|
|
161
|
+
const min = criteria.scoreRange?.min ?? 0;
|
|
162
|
+
const max = criteria.scoreRange?.max ?? 1;
|
|
163
|
+
prompt += `
|
|
164
|
+
|
|
165
|
+
## Conversation Transcript
|
|
166
|
+
${transcript}
|
|
167
|
+
|
|
168
|
+
## Instructions
|
|
169
|
+
Evaluate the conversation and respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
|
|
170
|
+
{"score": <number between ${min} and ${max}>, "reasoning": "<brief explanation>"}`;
|
|
171
|
+
return prompt;
|
|
172
|
+
}
|
|
173
|
+
export async function runLlmJudgeEval(runId, criteria, opts) {
|
|
174
|
+
try {
|
|
175
|
+
const [events, run] = await Promise.all([
|
|
176
|
+
getRunEventsSince(runId, 0),
|
|
177
|
+
getRunById(runId),
|
|
178
|
+
]);
|
|
179
|
+
if (events.length === 0)
|
|
180
|
+
return null;
|
|
181
|
+
const transcript = buildConversationTranscript(events);
|
|
182
|
+
if (!transcript.trim())
|
|
183
|
+
return null;
|
|
184
|
+
const engine = opts?.engine ?? (await resolveEngine({ engineOption: undefined }));
|
|
185
|
+
const model = opts?.model ??
|
|
186
|
+
(await getStoredModelForEngine(engine)) ??
|
|
187
|
+
engine.defaultModel;
|
|
188
|
+
const judgePrompt = buildJudgePrompt(transcript, criteria);
|
|
189
|
+
const controller = new AbortController();
|
|
190
|
+
const timeout = setTimeout(() => controller.abort(), LLM_JUDGE_TIMEOUT_MS);
|
|
191
|
+
let responseText = "";
|
|
192
|
+
try {
|
|
193
|
+
const stream = engine.stream({
|
|
194
|
+
model,
|
|
195
|
+
systemPrompt: "You are an evaluation judge. Respond only with valid JSON.",
|
|
196
|
+
messages: [
|
|
197
|
+
{ role: "user", content: [{ type: "text", text: judgePrompt }] },
|
|
198
|
+
],
|
|
199
|
+
tools: [],
|
|
200
|
+
abortSignal: controller.signal,
|
|
201
|
+
maxOutputTokens: 512,
|
|
202
|
+
temperature: 0,
|
|
203
|
+
});
|
|
204
|
+
for await (const event of stream) {
|
|
205
|
+
if (event.type === "text-delta") {
|
|
206
|
+
responseText += event.text;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
finally {
|
|
211
|
+
clearTimeout(timeout);
|
|
212
|
+
}
|
|
213
|
+
const jsonMatch = responseText.match(/\{[\s\S]*\}/);
|
|
214
|
+
if (!jsonMatch)
|
|
215
|
+
return null;
|
|
216
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
217
|
+
const min = criteria.scoreRange?.min ?? 0;
|
|
218
|
+
const max = criteria.scoreRange?.max ?? 1;
|
|
219
|
+
const normalizedScore = max > min ? (parsed.score - min) / (max - min) : parsed.score;
|
|
220
|
+
const result = makeEvalResult({
|
|
221
|
+
runId,
|
|
222
|
+
threadId: run?.threadId ?? null,
|
|
223
|
+
userId: opts?.userId ?? null,
|
|
224
|
+
evalType: "llm_judge",
|
|
225
|
+
criteria: criteria.name,
|
|
226
|
+
score: normalizedScore,
|
|
227
|
+
reasoning: parsed.reasoning,
|
|
228
|
+
metadata: { model, rawScore: parsed.score, scoreRange: { min, max } },
|
|
229
|
+
});
|
|
230
|
+
insertEvalResult(result).catch(() => { });
|
|
231
|
+
return result;
|
|
232
|
+
}
|
|
233
|
+
catch {
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
// ─── Layer 3: Dataset evaluation ─────────────────────────────────────
|
|
238
|
+
export async function runDatasetEval(datasetId, opts) {
|
|
239
|
+
const dataset = await getEvalDataset(datasetId);
|
|
240
|
+
if (!dataset) {
|
|
241
|
+
return { datasetId, totalCases: 0, avgScore: 0, results: [] };
|
|
242
|
+
}
|
|
243
|
+
const engine = opts?.engine ?? (await resolveEngine({ engineOption: undefined }));
|
|
244
|
+
const model = opts?.model ??
|
|
245
|
+
(await getStoredModelForEngine(engine)) ??
|
|
246
|
+
engine.defaultModel;
|
|
247
|
+
const criteria = opts?.criteria ?? [
|
|
248
|
+
{
|
|
249
|
+
name: "response_quality",
|
|
250
|
+
description: "How well the agent's response addresses the user's input, considering accuracy, completeness, and helpfulness.",
|
|
251
|
+
},
|
|
252
|
+
];
|
|
253
|
+
const allResults = [];
|
|
254
|
+
for (const testCase of dataset.entries) {
|
|
255
|
+
const transcript = buildTestCaseTranscript(testCase, engine, model);
|
|
256
|
+
for (const c of criteria) {
|
|
257
|
+
const result = await evaluateTestCase(datasetId, testCase, transcript, c, engine, model);
|
|
258
|
+
if (result)
|
|
259
|
+
allResults.push(result);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
const avgScore = allResults.length > 0
|
|
263
|
+
? allResults.reduce((sum, r) => sum + r.score, 0) / allResults.length
|
|
264
|
+
: 0;
|
|
265
|
+
return {
|
|
266
|
+
datasetId,
|
|
267
|
+
totalCases: dataset.entries.length,
|
|
268
|
+
avgScore,
|
|
269
|
+
results: allResults,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
function buildTestCaseTranscript(testCase, _engine, _model) {
|
|
273
|
+
let transcript = `[User]: ${testCase.input}`;
|
|
274
|
+
if (testCase.expectedOutput) {
|
|
275
|
+
transcript += `\n[Expected Output]: ${testCase.expectedOutput}`;
|
|
276
|
+
}
|
|
277
|
+
if (testCase.context) {
|
|
278
|
+
transcript += `\n[Context]: ${JSON.stringify(testCase.context)}`;
|
|
279
|
+
}
|
|
280
|
+
return transcript;
|
|
281
|
+
}
|
|
282
|
+
async function evaluateTestCase(datasetId, testCase, transcript, criteria, engine, model) {
|
|
283
|
+
try {
|
|
284
|
+
const judgePrompt = buildJudgePrompt(transcript, criteria);
|
|
285
|
+
const controller = new AbortController();
|
|
286
|
+
const timeout = setTimeout(() => controller.abort(), LLM_JUDGE_TIMEOUT_MS);
|
|
287
|
+
let responseText = "";
|
|
288
|
+
try {
|
|
289
|
+
const stream = engine.stream({
|
|
290
|
+
model,
|
|
291
|
+
systemPrompt: "You are an evaluation judge. Respond only with valid JSON.",
|
|
292
|
+
messages: [
|
|
293
|
+
{ role: "user", content: [{ type: "text", text: judgePrompt }] },
|
|
294
|
+
],
|
|
295
|
+
tools: [],
|
|
296
|
+
abortSignal: controller.signal,
|
|
297
|
+
maxOutputTokens: 512,
|
|
298
|
+
temperature: 0,
|
|
299
|
+
});
|
|
300
|
+
for await (const event of stream) {
|
|
301
|
+
if (event.type === "text-delta") {
|
|
302
|
+
responseText += event.text;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
finally {
|
|
307
|
+
clearTimeout(timeout);
|
|
308
|
+
}
|
|
309
|
+
const jsonMatch = responseText.match(/\{[\s\S]*\}/);
|
|
310
|
+
if (!jsonMatch)
|
|
311
|
+
return null;
|
|
312
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
313
|
+
const min = criteria.scoreRange?.min ?? 0;
|
|
314
|
+
const max = criteria.scoreRange?.max ?? 1;
|
|
315
|
+
const normalizedScore = max > min ? (parsed.score - min) / (max - min) : parsed.score;
|
|
316
|
+
// Dataset evals use a synthetic runId since there's no real run
|
|
317
|
+
const syntheticRunId = `dataset:${datasetId}:${crypto.randomUUID()}`;
|
|
318
|
+
// Dataset evals are administrative — there's no per-user runId, so
|
|
319
|
+
// we leave userId null. Per-user reads filter null rows out, which
|
|
320
|
+
// is the right default; admins can fetch dataset evals via the
|
|
321
|
+
// unfiltered call path.
|
|
322
|
+
const result = makeEvalResult({
|
|
323
|
+
runId: syntheticRunId,
|
|
324
|
+
threadId: null,
|
|
325
|
+
userId: null,
|
|
326
|
+
evalType: "llm_judge",
|
|
327
|
+
criteria: criteria.name,
|
|
328
|
+
score: normalizedScore,
|
|
329
|
+
reasoning: parsed.reasoning,
|
|
330
|
+
metadata: {
|
|
331
|
+
datasetId,
|
|
332
|
+
model,
|
|
333
|
+
testCaseInput: testCase.input,
|
|
334
|
+
expectedOutput: testCase.expectedOutput ?? null,
|
|
335
|
+
tags: testCase.tags ?? [],
|
|
336
|
+
rawScore: parsed.score,
|
|
337
|
+
scoreRange: { min, max },
|
|
338
|
+
},
|
|
339
|
+
});
|
|
340
|
+
insertEvalResult(result).catch(() => { });
|
|
341
|
+
return result;
|
|
342
|
+
}
|
|
343
|
+
catch {
|
|
344
|
+
return null;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
// ─── Orchestrator ────────────────────────────────────────────────────
|
|
348
|
+
export async function evaluateRun(runId, opts) {
|
|
349
|
+
const results = await runAutomatedEvals(runId);
|
|
350
|
+
const userId = results[0]?.userId ?? null;
|
|
351
|
+
const sampleRate = opts?.sampleRate ?? 0;
|
|
352
|
+
if (sampleRate > 0 && Math.random() < sampleRate) {
|
|
353
|
+
const defaultCriteria = [
|
|
354
|
+
{
|
|
355
|
+
name: "overall_quality",
|
|
356
|
+
description: "Overall quality of the agent's response, considering helpfulness, accuracy, and appropriate tool usage.",
|
|
357
|
+
},
|
|
358
|
+
{
|
|
359
|
+
name: "task_completion",
|
|
360
|
+
description: "Whether the agent successfully completed the user's requested task.",
|
|
361
|
+
},
|
|
362
|
+
];
|
|
363
|
+
const judgeResults = await Promise.all(defaultCriteria.map((c) => runLlmJudgeEval(runId, c, { userId })));
|
|
364
|
+
for (const r of judgeResults) {
|
|
365
|
+
if (r)
|
|
366
|
+
results.push(r);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return results;
|
|
370
|
+
}
|
|
371
|
+
//# sourceMappingURL=evals.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evals.js","sourceRoot":"","sources":["../../src/observability/evals.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAC/E,OAAO,EAAE,UAAU,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAEtE,OAAO,EACL,aAAa,EACb,uBAAuB,GACxB,MAAM,0BAA0B,CAAC;AAElC,MAAM,4BAA4B,GAAG,MAAM,CAAC;AAC5C,MAAM,4BAA4B,GAAG,EAAE,CAAC;AACxC,MAAM,oBAAoB,GAAG,MAAM,CAAC;AAapC,SAAS,cAAc,CAAC,IAAwB;IAC9C,OAAO;QACL,EAAE,EAAE,MAAM,CAAC,UAAU,EAAE;QACvB,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI;QACjC,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI;QAC/B,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;KACtB,CAAC;AACJ,CAAC;AAED;mDACmD;AACnD,SAAS,WAAW,CAAC,OAAqB;IAKxC,OAAO;QACL,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,MAAM,EAAE,OAAO,CAAC,MAAM;KACvB,CAAC;AACJ,CAAC;AAED,wEAAwE;AAExE,SAAS,oBAAoB,CAAC,OAAqB;IACjD,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAChC,MAAM,KAAK,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,eAAe,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC;IAChE,OAAO,cAAc,CAAC;QACpB,GAAG,WAAW,CAAC,OAAO,CAAC;QACvB,QAAQ,EAAE,WAAW;QACrB,QAAQ,EAAE,mBAAmB;QAC7B,KAAK;QACL,QAAQ,EAAE;YACR,UAAU,EAAE,KAAK;YACjB,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,WAAW,EAAE,OAAO,CAAC,WAAW;SACjC;KACF,CAAC,CAAC;AACL,CAAC;AAED,SAAS,mBAAmB,CAAC,OAAqB;IAChD,mDAAmD;IACnD,wEAAwE;IACxE,MAAM,KAAK,GACT,OAAO,CAAC,SAAS,KAAK,CAAC;QACrB,CAAC,CAAC,GAAG;QACL,CAAC,CAAC,OAAO,CAAC,QAAQ,GAAG,CAAC;YACpB,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC;YACnD,CAAC,CAAC,GAAG,CAAC;IACZ,OAAO,cAAc,CAAC;QACpB,GAAG,WAAW,CAAC,OAAO,CAAC;QACvB,QAAQ,EAAE,WAAW;QACrB,QAAQ,EAAE,iBAAiB;QAC3B,KAAK;QACL,QAAQ,EAAE,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,EAAE;KACvE,CAAC,CAAC;AACL,CAAC;AAED,SAAS,YAAY,CAAC,OAAqB;IACzC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CACzB,4BAA4B,EAC5B,OAAO,CAAC,SAAS,GAAG,4BAA4B,CACjD,CAAC;IACF,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,eAAe,GAAG,UAAU,CAAC,CAAC;IACpE,OAAO,cAAc,CAAC;QACpB,GAAG,WAAW,CAAC,OAAO,CAAC;QACvB,QAAQ,EAAE,WAAW;QACrB,QAAQ,EAAE,eAAe;QACzB,KAAK;QACL,QAAQ,EAAE,EAAE,QAAQ,EAAE,OAAO,CAAC,eAAe,EAAE,UAAU,EAAE;KAC5D,CAAC,CAAC;AACL,CAAC;AAED,SAAS,mBAAmB,CAAC,OAAqB;IAChD,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAC5B,4BAA4B,EAC5B,OAAO,CAAC,SAAS,GAAG,4BAA4B,CACjD,CAAC;IACF,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,kBAAkB,GAAG,aAAa,CAAC,CAAC;IAC1E,OAAO,cAAc,CAAC;QACpB,GAAG,WAAW,CAAC,OAAO,CAAC;QACvB,QAAQ,EAAE,WAAW;QACrB,QAAQ,EAAE,iBAAiB;QAC3B,KAAK;QACL,QAAQ,EAAE,EAAE,WAAW,EAAE,OAAO,CAAC,kBAAkB,EAAE,aAAa,EAAE;KACrE,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CACzB,OAAqB,EACrB,SAAiB;IAEjB,MAAM,SAAS,GAAG,OAAO,CAAC,WAAW,GAAG,CAAC,CAAC;IAC1C,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,SAAS,KAAK,WAAW,EAAE,CAAC;QACrC,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,CAAC,CAAC;IACZ,CAAC;IACD,OAAO,cAAc,CAAC;QACpB,GAAG,WAAW,CAAC,OAAO,CAAC;QACvB,QAAQ,EAAE,WAAW;QACrB,QAAQ,EAAE,gBAAgB;QAC1B,KAAK;QACL,QAAQ,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE;KACnC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,KAAa;IACnD,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACvC,eAAe,CAAC,KAAK,CAAC;QACtB,UAAU,CAAC,KAAK,CAAC;KAClB,CAAC,CAAC;IAEH,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,SAAS,GAAG,GAAG,EAAE,MAAM,IAAI,SAAS,CAAC;IAC3C,MAAM,OAAO,GAAG;QACd,oBAAoB,CAAC,OAAO,CAAC;QAC7B,mBAAmB,CAAC,OAAO,CAAC;QAC5B,YAAY,CAAC,OAAO,CAAC;QACrB,mBAAmB,CAAC,OAAO,CAAC;QAC5B,kBAAkB,CAAC,OAAO,EAAE,SAAS,CAAC;KACvC,CAAC;IAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,gBAAgB,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IAC3C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,wEAAwE;AAExE,SAAS,2BAA2B,CAClC,MAAiD;IAEjD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,EAAE,SAAS,EAAE,IAAI,MAAM,EAAE,CAAC;QACnC,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACpC,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;gBAClC,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YACvE,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAChE,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACtC,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAC1E,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBACxC,MAAM,OAAO,GACX,OAAO,KAAK,CAAC,OAAO,KAAK,QAAQ;oBAC/B,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;oBAC7B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAClD,KAAK,CAAC,IAAI,CACR,eAAe,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,MAAM,OAAO,EAAE,CAC9D,CAAC;YACJ,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,0BAA0B;QAC5B,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,gBAAgB,CAAC,UAAkB,EAAE,QAAsB;IAClE,IAAI,MAAM,GAAG;;;QAGP,QAAQ,CAAC,IAAI;eACN,QAAQ,CAAC,WAAW,EAAE,CAAC;IAEpC,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,IAAI,aAAa,QAAQ,CAAC,MAAM,EAAE,CAAC;IAC3C,CAAC;IAED,MAAM,GAAG,GAAG,QAAQ,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,CAAC;IAC1C,MAAM,GAAG,GAAG,QAAQ,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,CAAC;IAE1C,MAAM,IAAI;;;EAGV,UAAU;;;;4BAIgB,GAAG,QAAQ,GAAG,wCAAwC,CAAC;IAEjF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAa,EACb,QAAsB,EACtB,IAAuE;IAEvE,IAAI,CAAC;QACH,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YACtC,iBAAiB,CAAC,KAAK,EAAE,CAAC,CAAC;YAC3B,UAAU,CAAC,KAAK,CAAC;SAClB,CAAC,CAAC;QAEH,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QAErC,MAAM,UAAU,GAAG,2BAA2B,CAAC,MAAM,CAAC,CAAC;QACvD,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE;YAAE,OAAO,IAAI,CAAC;QAEpC,MAAM,MAAM,GACV,IAAI,EAAE,MAAM,IAAI,CAAC,MAAM,aAAa,CAAC,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC;QACrE,MAAM,KAAK,GACT,IAAI,EAAE,KAAK;YACX,CAAC,MAAM,uBAAuB,CAAC,MAAM,CAAC,CAAC;YACvC,MAAM,CAAC,YAAY,CAAC;QAEtB,MAAM,WAAW,GAAG,gBAAgB,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QAE3D,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,oBAAoB,CAAC,CAAC;QAE3E,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;gBAC3B,KAAK;gBACL,YAAY,EACV,4DAA4D;gBAC9D,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE;iBACjE;gBACD,KAAK,EAAE,EAAE;gBACT,WAAW,EAAE,UAAU,CAAC,MAAM;gBAC9B,eAAe,EAAE,GAAG;gBACpB,WAAW,EAAE,CAAC;aACf,CAAC,CAAC;YAEH,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBACjC,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;oBAChC,YAAY,IAAI,KAAK,CAAC,IAAI,CAAC;gBAC7B,CAAC;YACH,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,YAAY,CAAC,OAAO,CAAC,CAAC;QACxB,CAAC;QAED,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QACpD,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC;QAE5B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAGrC,CAAC;QAEF,MAAM,GAAG,GAAG,QAAQ,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,QAAQ,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,CAAC;QAC1C,MAAM,eAAe,GACnB,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;QAEhE,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,KAAK;YACL,QAAQ,EAAE,GAAG,EAAE,QAAQ,IAAI,IAAI;YAC/B,MAAM,EAAE,IAAI,EAAE,MAAM,IAAI,IAAI;YAC5B,QAAQ,EAAE,WAAW;YACrB,QAAQ,EAAE,QAAQ,CAAC,IAAI;YACvB,KAAK,EAAE,eAAe;YACtB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,QAAQ,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE;SACtE,CAAC,CAAC;QAEH,gBAAgB,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QACzC,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,wEAAwE;AAExE,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,SAAiB,EACjB,IAA0E;IAO1E,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,SAAS,CAAC,CAAC;IAChD,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAChE,CAAC;IAED,MAAM,MAAM,GACV,IAAI,EAAE,MAAM,IAAI,CAAC,MAAM,aAAa,CAAC,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC;IACrE,MAAM,KAAK,GACT,IAAI,EAAE,KAAK;QACX,CAAC,MAAM,uBAAuB,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,YAAY,CAAC;IAEtB,MAAM,QAAQ,GAAG,IAAI,EAAE,QAAQ,IAAI;QACjC;YACE,IAAI,EAAE,kBAAkB;YACxB,WAAW,EACT,gHAAgH;SACnH;KACF,CAAC;IAEF,MAAM,UAAU,GAAiB,EAAE,CAAC;IAEpC,KAAK,MAAM,QAAQ,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACvC,MAAM,UAAU,GAAG,uBAAuB,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;QAEpE,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,MAAM,GAAG,MAAM,gBAAgB,CACnC,SAAS,EACT,QAAQ,EACR,UAAU,EACV,CAAC,EACD,MAAM,EACN,KAAK,CACN,CAAC;YACF,IAAI,MAAM;gBAAE,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GACZ,UAAU,CAAC,MAAM,GAAG,CAAC;QACnB,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM;QACrE,CAAC,CAAC,CAAC,CAAC;IAER,OAAO;QACL,SAAS;QACT,UAAU,EAAE,OAAO,CAAC,OAAO,CAAC,MAAM;QAClC,QAAQ;QACR,OAAO,EAAE,UAAU;KACpB,CAAC;AACJ,CAAC;AAED,SAAS,uBAAuB,CAC9B,QAAsB,EACtB,OAAoB,EACpB,MAAc;IAEd,IAAI,UAAU,GAAG,WAAW,QAAQ,CAAC,KAAK,EAAE,CAAC;IAC7C,IAAI,QAAQ,CAAC,cAAc,EAAE,CAAC;QAC5B,UAAU,IAAI,wBAAwB,QAAQ,CAAC,cAAc,EAAE,CAAC;IAClE,CAAC;IACD,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;QACrB,UAAU,IAAI,gBAAgB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;IACnE,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,KAAK,UAAU,gBAAgB,CAC7B,SAAiB,EACjB,QAAsB,EACtB,UAAkB,EAClB,QAAsB,EACtB,MAAmB,EACnB,KAAa;IAEb,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,gBAAgB,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QAE3D,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,oBAAoB,CAAC,CAAC;QAE3E,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;gBAC3B,KAAK;gBACL,YAAY,EACV,4DAA4D;gBAC9D,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE;iBACjE;gBACD,KAAK,EAAE,EAAE;gBACT,WAAW,EAAE,UAAU,CAAC,MAAM;gBAC9B,eAAe,EAAE,GAAG;gBACpB,WAAW,EAAE,CAAC;aACf,CAAC,CAAC;YAEH,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBACjC,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;oBAChC,YAAY,IAAI,KAAK,CAAC,IAAI,CAAC;gBAC7B,CAAC;YACH,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,YAAY,CAAC,OAAO,CAAC,CAAC;QACxB,CAAC;QAED,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QACpD,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC;QAE5B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAGrC,CAAC;QAEF,MAAM,GAAG,GAAG,QAAQ,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,QAAQ,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,CAAC;QAC1C,MAAM,eAAe,GACnB,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;QAEhE,gEAAgE;QAChE,MAAM,cAAc,GAAG,WAAW,SAAS,IAAI,MAAM,CAAC,UAAU,EAAE,EAAE,CAAC;QAErE,mEAAmE;QACnE,mEAAmE;QACnE,+DAA+D;QAC/D,wBAAwB;QACxB,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,KAAK,EAAE,cAAc;YACrB,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,WAAW;YACrB,QAAQ,EAAE,QAAQ,CAAC,IAAI;YACvB,KAAK,EAAE,eAAe;YACtB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,QAAQ,EAAE;gBACR,SAAS;gBACT,KAAK;gBACL,aAAa,EAAE,QAAQ,CAAC,KAAK;gBAC7B,cAAc,EAAE,QAAQ,CAAC,cAAc,IAAI,IAAI;gBAC/C,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;gBACzB,QAAQ,EAAE,MAAM,CAAC,KAAK;gBACtB,UAAU,EAAE,EAAE,GAAG,EAAE,GAAG,EAAE;aACzB;SACF,CAAC,CAAC;QAEH,gBAAgB,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QACzC,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,wEAAwE;AAExE,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,KAAa,EACb,IAA8B;IAE9B,MAAM,OAAO,GAAG,MAAM,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,IAAI,CAAC;IAE1C,MAAM,UAAU,GAAG,IAAI,EAAE,UAAU,IAAI,CAAC,CAAC;IACzC,IAAI,UAAU,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,EAAE,GAAG,UAAU,EAAE,CAAC;QACjD,MAAM,eAAe,GAAmB;YACtC;gBACE,IAAI,EAAE,iBAAiB;gBACvB,WAAW,EACT,yGAAyG;aAC5G;YACD;gBACE,IAAI,EAAE,iBAAiB;gBACvB,WAAW,EACT,qEAAqE;aACxE;SACF,CAAC;QAEF,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CACpC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC,CAClE,CAAC;QAEF,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;YAC7B,IAAI,CAAC;gBAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { Experiment, ExperimentVariant, ExperimentMetricResult } from "./types.js";
|
|
2
|
+
export declare function createExperiment(opts: {
|
|
3
|
+
name: string;
|
|
4
|
+
variants: Array<{
|
|
5
|
+
id: string;
|
|
6
|
+
weight: number;
|
|
7
|
+
config: Record<string, unknown>;
|
|
8
|
+
}>;
|
|
9
|
+
metrics: string[];
|
|
10
|
+
assignmentLevel?: "user" | "session";
|
|
11
|
+
}): Promise<Experiment>;
|
|
12
|
+
export declare function startExperiment(id: string): Promise<void>;
|
|
13
|
+
export declare function pauseExperiment(id: string): Promise<void>;
|
|
14
|
+
export declare function completeExperiment(id: string): Promise<void>;
|
|
15
|
+
export declare function resolveVariant(experimentId: string, userId: string): Promise<ExperimentVariant>;
|
|
16
|
+
export declare function resolveActiveExperimentConfig(userId: string): Promise<{
|
|
17
|
+
configs: Record<string, unknown>;
|
|
18
|
+
assignments: Array<{
|
|
19
|
+
experimentId: string;
|
|
20
|
+
variantId: string;
|
|
21
|
+
}>;
|
|
22
|
+
} | null>;
|
|
23
|
+
export declare function computeExperimentResults(experimentId: string): Promise<ExperimentMetricResult[]>;
|
|
24
|
+
//# sourceMappingURL=experiments.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"experiments.d.ts","sourceRoot":"","sources":["../../src/observability/experiments.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,UAAU,EACV,iBAAiB,EACjB,sBAAsB,EACvB,MAAM,YAAY,CAAC;AAkDpB,wBAAsB,gBAAgB,CAAC,IAAI,EAAE;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,KAAK,CAAC;QACd,EAAE,EAAE,MAAM,CAAC;QACX,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACjC,CAAC,CAAC;IACH,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACtC,GAAG,OAAO,CAAC,UAAU,CAAC,CActB;AAED,wBAAsB,eAAe,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAG/D;AAED,wBAAsB,eAAe,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAG/D;AAED,wBAAsB,kBAAkB,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGlE;AAID,wBAAsB,cAAc,CAClC,YAAY,EAAE,MAAM,EACpB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,iBAAiB,CAAC,CA8C5B;AAED,wBAAsB,6BAA6B,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC;IAC3E,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,WAAW,EAAE,KAAK,CAAC;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACjE,GAAG,IAAI,CAAC,CAmBR;AAID,wBAAsB,wBAAwB,CAC5C,YAAY,EAAE,MAAM,GACnB,OAAO,CAAC,sBAAsB,EAAE,CAAC,CA2JnC"}
|