@absolutejs/voice 0.0.22-beta.42 → 0.0.22-beta.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,17 +28,95 @@ export type VoiceEvalReport = {
28
28
  total: number;
29
29
  trend: VoiceEvalTrendBucket[];
30
30
  };
31
+ export type VoiceEvalBaselineSummary = {
32
+ failed: number;
33
+ failedSessionIds: string[];
34
+ passRate: number;
35
+ passed: number;
36
+ total: number;
37
+ };
38
+ export type VoiceEvalBaselineComparison = {
39
+ baseline: VoiceEvalBaselineSummary;
40
+ checkedAt: number;
41
+ current: VoiceEvalBaselineSummary;
42
+ deltas: {
43
+ failed: number;
44
+ passRate: number;
45
+ passed: number;
46
+ total: number;
47
+ };
48
+ newFailedSessionIds: string[];
49
+ recoveredSessionIds: string[];
50
+ reasons: string[];
51
+ status: VoiceEvalStatus;
52
+ };
53
+ export type VoiceEvalBaselineComparisonOptions = {
54
+ failOnNewFailedSessions?: boolean;
55
+ maxFailedDelta?: number;
56
+ maxPassRateDrop?: number;
57
+ };
58
+ export type VoiceEvalBaselineStore = {
59
+ get: () => Promise<VoiceEvalReport | undefined>;
60
+ set: (report: VoiceEvalReport) => Promise<void>;
61
+ };
62
+ export type VoiceScenarioEvalDefinition = {
63
+ description?: string;
64
+ forbiddenHandoffActions?: string[];
65
+ forbiddenLifecycleTypes?: string[];
66
+ id: string;
67
+ label?: string;
68
+ maxProviderErrors?: number;
69
+ maxSessionErrors?: number;
70
+ minSessions?: number;
71
+ minTurns?: number;
72
+ requiredAssistantIncludes?: string[];
73
+ requiredDisposition?: string;
74
+ requiredHandoffActions?: string[];
75
+ requiredLifecycleTypes?: string[];
76
+ requiredPayloadPaths?: string[];
77
+ requiredTranscriptIncludes?: string[];
78
+ scenarioId?: string;
79
+ };
80
+ export type VoiceScenarioEvalSessionResult = {
81
+ eventCount: number;
82
+ issues: string[];
83
+ sessionId: string;
84
+ status: VoiceEvalStatus;
85
+ };
86
+ export type VoiceScenarioEvalResult = {
87
+ description?: string;
88
+ failed: number;
89
+ id: string;
90
+ issues: string[];
91
+ label: string;
92
+ matchedSessions: number;
93
+ passed: number;
94
+ sessions: VoiceScenarioEvalSessionResult[];
95
+ status: VoiceEvalStatus;
96
+ };
97
+ export type VoiceScenarioEvalReport = {
98
+ checkedAt: number;
99
+ failed: number;
100
+ passed: number;
101
+ scenarios: VoiceScenarioEvalResult[];
102
+ status: VoiceEvalStatus;
103
+ total: number;
104
+ };
31
105
  export type VoiceEvalLink = {
32
106
  href: string;
33
107
  label: string;
34
108
  };
35
109
  export type VoiceEvalRoutesOptions = {
110
+ baseline?: VoiceEvalReport | (() => Promise<VoiceEvalReport | undefined>);
111
+ baselineComparison?: VoiceEvalBaselineComparisonOptions;
112
+ baselineStore?: VoiceEvalBaselineStore;
36
113
  events?: StoredVoiceTraceEvent[];
37
114
  headers?: HeadersInit;
38
115
  links?: VoiceEvalLink[];
39
116
  limit?: number;
40
117
  name?: string;
41
118
  path?: string;
119
+ scenarios?: VoiceScenarioEvalDefinition[];
42
120
  store?: VoiceTraceEventStore;
43
121
  thresholds?: VoiceQualityThresholds;
44
122
  title?: string;
@@ -49,10 +127,25 @@ export declare const runVoiceSessionEvals: (options?: {
49
127
  store?: VoiceTraceEventStore;
50
128
  thresholds?: VoiceQualityThresholds;
51
129
  }) => Promise<VoiceEvalReport>;
130
+ export declare const runVoiceScenarioEvals: (options?: {
131
+ events?: StoredVoiceTraceEvent[];
132
+ scenarios?: VoiceScenarioEvalDefinition[];
133
+ store?: VoiceTraceEventStore;
134
+ }) => Promise<VoiceScenarioEvalReport>;
135
+ export declare const compareVoiceEvalBaseline: (currentReport: VoiceEvalReport, baselineReport: VoiceEvalReport, options?: VoiceEvalBaselineComparisonOptions) => VoiceEvalBaselineComparison;
136
+ export declare const createVoiceFileEvalBaselineStore: (filePath: string) => VoiceEvalBaselineStore;
52
137
  export declare const renderVoiceEvalHTML: (report: VoiceEvalReport, options?: {
53
138
  links?: VoiceEvalLink[];
54
139
  title?: string;
55
140
  }) => string;
141
+ export declare const renderVoiceEvalBaselineHTML: (comparison: VoiceEvalBaselineComparison, options?: {
142
+ links?: VoiceEvalLink[];
143
+ title?: string;
144
+ }) => string;
145
+ export declare const renderVoiceScenarioEvalHTML: (report: VoiceScenarioEvalReport, options?: {
146
+ links?: VoiceEvalLink[];
147
+ title?: string;
148
+ }) => string;
56
149
  export declare const createVoiceEvalRoutes: (options: VoiceEvalRoutesOptions) => Elysia<"", {
57
150
  decorator: {};
58
151
  store: {};
package/dist/index.d.ts CHANGED
@@ -2,7 +2,7 @@ export { voice } from './plugin';
2
2
  export { createVoiceAssistant, createVoiceExperiment, summarizeVoiceAssistantRuns } from './assistant';
3
3
  export { createVoiceAssistantHealthHTMLHandler, createVoiceAssistantHealthJSONHandler, createVoiceAssistantHealthRoutes, renderVoiceAssistantHealthHTML, summarizeVoiceAssistantHealth } from './assistantHealth';
4
4
  export { buildVoiceDiagnosticsMarkdown, createVoiceDiagnosticsRoutes, resolveVoiceDiagnosticsTraceFilter } from './diagnosticsRoutes';
5
- export { createVoiceEvalRoutes, renderVoiceEvalHTML, runVoiceSessionEvals } from './evalRoutes';
5
+ export { compareVoiceEvalBaseline, createVoiceFileEvalBaselineStore, createVoiceEvalRoutes, renderVoiceEvalBaselineHTML, renderVoiceEvalHTML, renderVoiceScenarioEvalHTML, runVoiceScenarioEvals, runVoiceSessionEvals } from './evalRoutes';
6
6
  export { createVoiceSessionListRoutes, createVoiceSessionReplayHTMLHandler, createVoiceSessionReplayJSONHandler, createVoiceSessionReplayRoutes, createVoiceSessionsHTMLHandler, createVoiceSessionsJSONHandler, renderVoiceSessionsHTML, summarizeVoiceSessions, summarizeVoiceSessionReplay } from './sessionReplay';
7
7
  export { createVoiceAgent, createVoiceAgentSquad, createVoiceAgentTool } from './agent';
8
8
  export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileIntegrationEventStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore } from './fileStore';
@@ -40,7 +40,7 @@ export type { VoiceAssistant, VoiceAssistantArtifactPlan, VoiceAssistantExperime
40
40
  export type { VoiceAssistantHealthFailure, VoiceAssistantHealthHTMLHandlerOptions, VoiceAssistantHealthRoutesOptions, VoiceAssistantHealthSummary, VoiceAssistantHealthSummaryOptions } from './assistantHealth';
41
41
  export type { VoiceAssistantMemoryBinding, VoiceAssistantMemoryHandle, VoiceAssistantMemoryOptions, VoiceAssistantMemoryRecord, VoiceAssistantMemoryStore } from './assistantMemory';
42
42
  export type { VoiceDiagnosticsRoutesOptions } from './diagnosticsRoutes';
43
- export type { VoiceEvalLink, VoiceEvalReport, VoiceEvalRoutesOptions, VoiceEvalSessionReport, VoiceEvalStatus, VoiceEvalTrendBucket } from './evalRoutes';
43
+ export type { VoiceEvalBaselineComparison, VoiceEvalBaselineComparisonOptions, VoiceEvalBaselineStore, VoiceEvalBaselineSummary, VoiceEvalLink, VoiceEvalReport, VoiceEvalRoutesOptions, VoiceEvalSessionReport, VoiceEvalStatus, VoiceEvalTrendBucket, VoiceScenarioEvalDefinition, VoiceScenarioEvalReport, VoiceScenarioEvalResult, VoiceScenarioEvalSessionResult } from './evalRoutes';
44
44
  export type { VoiceSessionListHTMLHandlerOptions, VoiceSessionListItem, VoiceSessionListOptions, VoiceSessionListRoutesOptions, VoiceSessionListStatus, VoiceSessionReplay, VoiceSessionReplayHTMLHandlerOptions, VoiceSessionReplayOptions, VoiceSessionReplayRoutesOptions, VoiceSessionReplayTurn } from './sessionReplay';
45
45
  export type { AnthropicVoiceAssistantModelOptions, GeminiVoiceAssistantModelOptions, OpenAIVoiceAssistantModelOptions, VoiceProviderRouterEvent, VoiceProviderRouterFallbackMode, VoiceProviderRouterHealthOptions, VoiceProviderRouterOptions, VoiceProviderRouterPolicy, VoiceProviderRouterProviderHealth, VoiceProviderRouterProviderProfile, VoiceJSONAssistantModelHandler, VoiceJSONAssistantModelOptions } from './modelAdapters';
46
46
  export type { VoiceProviderHealthStatus, VoiceProviderHealthSummary, VoiceProviderHealthSummaryOptions } from './providerHealth';
package/dist/index.js CHANGED
@@ -7625,6 +7625,8 @@ var createVoiceDiagnosticsRoutes = (options) => {
7625
7625
  };
7626
7626
  // src/evalRoutes.ts
7627
7627
  import { Elysia as Elysia7 } from "elysia";
7628
+ import { mkdir } from "fs/promises";
7629
+ import { dirname } from "path";
7628
7630
 
7629
7631
  // src/qualityRoutes.ts
7630
7632
  import { Elysia as Elysia6 } from "elysia";
@@ -7974,6 +7976,25 @@ var createVoiceQualityRoutes = (options) => {
7974
7976
 
7975
7977
  // src/evalRoutes.ts
7976
7978
  var escapeHtml9 = (value) => value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
7979
+ var rate2 = (count, total) => count / Math.max(1, total);
7980
+ var normalizeSearchText = (value) => value.trim().toLowerCase();
7981
+ var getString6 = (value) => typeof value === "string" ? value : undefined;
7982
+ var getObject = (value) => value && typeof value === "object" && !Array.isArray(value) ? value : undefined;
7983
+ var getPathValue = (value, path) => {
7984
+ let current = value;
7985
+ for (const part of path.split(".").filter(Boolean)) {
7986
+ const record = getObject(current);
7987
+ if (!record || !(part in record)) {
7988
+ return;
7989
+ }
7990
+ current = record[part];
7991
+ }
7992
+ return current;
7993
+ };
7994
+ var includesAll = (haystack, needles) => {
7995
+ const normalized = normalizeSearchText(haystack);
7996
+ return needles.filter((needle) => !normalized.includes(normalizeSearchText(needle)));
7997
+ };
7977
7998
  var sessionTime = (events) => {
7978
7999
  const sorted = filterVoiceTraceEvents(events);
7979
8000
  return {
@@ -8044,7 +8065,171 @@ var runVoiceSessionEvals = async (options = {}) => {
8044
8065
  trend: buildTrend(limitedSessions)
8045
8066
  };
8046
8067
  };
8068
+ var getSessionText = (events, type) => events.filter((event) => event.type === type).map((event) => getString6(event.payload.text)).filter((text) => Boolean(text?.trim())).join(`
8069
+ `);
8070
+ var countProviderErrors = (events) => events.filter((event) => event.type === "session.error" && (event.payload.providerStatus === "error" || typeof event.payload.provider === "string")).length;
8071
+ var evaluateScenarioSession = (scenario, sessionId, events) => {
8072
+ const issues = [];
8073
+ const committedText = getSessionText(events, "turn.committed");
8074
+ const assistantText = getSessionText(events, "turn.assistant");
8075
+ const lifecycleTypes = events.filter((event) => event.type === "call.lifecycle").map((event) => getString6(event.payload.type)).filter((type) => Boolean(type));
8076
+ const dispositions = events.filter((event) => event.type === "call.lifecycle").map((event) => getString6(event.payload.disposition)).filter((disposition) => Boolean(disposition));
8077
+ const handoffActions = events.filter((event) => event.type === "call.handoff").map((event) => getString6(event.payload.action)).filter((action) => Boolean(action));
8078
+ const turnCount = events.filter((event) => event.type === "turn.committed").length;
8079
+ const sessionErrorCount = events.filter((event) => event.type === "session.error").length;
8080
+ const providerErrorCount = countProviderErrors(events);
8081
+ for (const missing of includesAll(committedText, scenario.requiredTranscriptIncludes ?? [])) {
8082
+ issues.push(`Missing transcript text: ${missing}`);
8083
+ }
8084
+ for (const missing of includesAll(assistantText, scenario.requiredAssistantIncludes ?? [])) {
8085
+ issues.push(`Missing assistant text: ${missing}`);
8086
+ }
8087
+ for (const type of scenario.requiredLifecycleTypes ?? []) {
8088
+ if (!lifecycleTypes.includes(type)) {
8089
+ issues.push(`Missing lifecycle event: ${type}`);
8090
+ }
8091
+ }
8092
+ for (const type of scenario.forbiddenLifecycleTypes ?? []) {
8093
+ if (lifecycleTypes.includes(type)) {
8094
+ issues.push(`Forbidden lifecycle event occurred: ${type}`);
8095
+ }
8096
+ }
8097
+ for (const action of scenario.requiredHandoffActions ?? []) {
8098
+ if (!handoffActions.includes(action)) {
8099
+ issues.push(`Missing handoff action: ${action}`);
8100
+ }
8101
+ }
8102
+ for (const action of scenario.forbiddenHandoffActions ?? []) {
8103
+ if (handoffActions.includes(action)) {
8104
+ issues.push(`Forbidden handoff action occurred: ${action}`);
8105
+ }
8106
+ }
8107
+ if (scenario.requiredDisposition && !dispositions.includes(scenario.requiredDisposition)) {
8108
+ issues.push(`Missing disposition: ${scenario.requiredDisposition}`);
8109
+ }
8110
+ if (scenario.minTurns !== undefined && turnCount < scenario.minTurns) {
8111
+ issues.push(`Expected at least ${scenario.minTurns} turn(s), saw ${turnCount}.`);
8112
+ }
8113
+ if (scenario.maxSessionErrors !== undefined && sessionErrorCount > scenario.maxSessionErrors) {
8114
+ issues.push(`Expected at most ${scenario.maxSessionErrors} session error(s), saw ${sessionErrorCount}.`);
8115
+ }
8116
+ if (scenario.maxProviderErrors !== undefined && providerErrorCount > scenario.maxProviderErrors) {
8117
+ issues.push(`Expected at most ${scenario.maxProviderErrors} provider error(s), saw ${providerErrorCount}.`);
8118
+ }
8119
+ for (const path of scenario.requiredPayloadPaths ?? []) {
8120
+ if (events.every((event) => getPathValue(event.payload, path) === undefined)) {
8121
+ issues.push(`Missing payload path: ${path}`);
8122
+ }
8123
+ }
8124
+ return {
8125
+ eventCount: events.length,
8126
+ issues,
8127
+ sessionId,
8128
+ status: issues.length > 0 ? "fail" : "pass"
8129
+ };
8130
+ };
8131
+ var runVoiceScenarioEvals = async (options = {}) => {
8132
+ const scenarios = options.scenarios ?? [];
8133
+ const events = filterVoiceTraceEvents(options.events ?? await options.store?.list() ?? []);
8134
+ const grouped = new Map;
8135
+ for (const event of events) {
8136
+ grouped.set(event.sessionId, [...grouped.get(event.sessionId) ?? [], event]);
8137
+ }
8138
+ const results = scenarios.map((scenario) => {
8139
+ const sessions = [...grouped.entries()].filter(([, sessionEvents]) => scenario.scenarioId ? sessionEvents.some((event) => event.scenarioId === scenario.scenarioId) : true).map(([sessionId, sessionEvents]) => evaluateScenarioSession(scenario, sessionId, filterVoiceTraceEvents(sessionEvents))).sort((left, right) => left.sessionId.localeCompare(right.sessionId));
8140
+ const issues = [];
8141
+ const minSessions = scenario.minSessions ?? 1;
8142
+ if (sessions.length < minSessions) {
8143
+ issues.push(`Expected at least ${minSessions} matching session(s), saw ${sessions.length}.`);
8144
+ }
8145
+ const failed2 = sessions.filter((session) => session.status === "fail").length;
8146
+ const passed2 = sessions.length - failed2;
8147
+ return {
8148
+ description: scenario.description,
8149
+ failed: failed2,
8150
+ id: scenario.id,
8151
+ issues,
8152
+ label: scenario.label ?? scenario.id,
8153
+ matchedSessions: sessions.length,
8154
+ passed: passed2,
8155
+ sessions,
8156
+ status: issues.length > 0 || failed2 > 0 ? "fail" : "pass"
8157
+ };
8158
+ });
8159
+ const failed = results.filter((scenario) => scenario.status === "fail").length;
8160
+ const passed = results.length - failed;
8161
+ return {
8162
+ checkedAt: Date.now(),
8163
+ failed,
8164
+ passed,
8165
+ scenarios: results,
8166
+ status: failed > 0 ? "fail" : "pass",
8167
+ total: results.length
8168
+ };
8169
+ };
8170
+ var summarizeEvalBaseline = (report) => {
8171
+ const failedSessionIds = report.sessions.filter((session) => session.status === "fail").map((session) => session.sessionId).sort();
8172
+ return {
8173
+ failed: report.failed,
8174
+ failedSessionIds,
8175
+ passRate: rate2(report.passed, report.total),
8176
+ passed: report.passed,
8177
+ total: report.total
8178
+ };
8179
+ };
8180
+ var compareVoiceEvalBaseline = (currentReport, baselineReport, options = {}) => {
8181
+ const baseline = summarizeEvalBaseline(baselineReport);
8182
+ const current = summarizeEvalBaseline(currentReport);
8183
+ const maxFailedDelta = options.maxFailedDelta ?? 0;
8184
+ const maxPassRateDrop = options.maxPassRateDrop ?? 0;
8185
+ const failOnNewFailedSessions = options.failOnNewFailedSessions ?? true;
8186
+ const baselineFailed = new Set(baseline.failedSessionIds);
8187
+ const currentFailed = new Set(current.failedSessionIds);
8188
+ const newFailedSessionIds = current.failedSessionIds.filter((sessionId) => !baselineFailed.has(sessionId));
8189
+ const recoveredSessionIds = baseline.failedSessionIds.filter((sessionId) => !currentFailed.has(sessionId));
8190
+ const deltas = {
8191
+ failed: current.failed - baseline.failed,
8192
+ passRate: current.passRate - baseline.passRate,
8193
+ passed: current.passed - baseline.passed,
8194
+ total: current.total - baseline.total
8195
+ };
8196
+ const reasons = [];
8197
+ if (deltas.failed > maxFailedDelta) {
8198
+ reasons.push(`Failed sessions increased by ${deltas.failed}, above allowed delta ${maxFailedDelta}.`);
8199
+ }
8200
+ if (deltas.passRate < -maxPassRateDrop) {
8201
+ reasons.push(`Pass rate dropped by ${Math.abs(deltas.passRate).toFixed(4)}, above allowed drop ${maxPassRateDrop}.`);
8202
+ }
8203
+ if (failOnNewFailedSessions && newFailedSessionIds.length > 0) {
8204
+ reasons.push(`${newFailedSessionIds.length} session(s) failed that were not failing in the baseline.`);
8205
+ }
8206
+ return {
8207
+ baseline,
8208
+ checkedAt: Date.now(),
8209
+ current,
8210
+ deltas,
8211
+ newFailedSessionIds,
8212
+ recoveredSessionIds,
8213
+ reasons,
8214
+ status: reasons.length > 0 ? "fail" : "pass"
8215
+ };
8216
+ };
8217
+ var createVoiceFileEvalBaselineStore = (filePath) => ({
8218
+ get: async () => {
8219
+ const file = Bun.file(filePath);
8220
+ if (!await file.exists()) {
8221
+ return;
8222
+ }
8223
+ const text = await file.text();
8224
+ return text.trim() ? JSON.parse(text) : undefined;
8225
+ },
8226
+ set: async (report) => {
8227
+ await mkdir(dirname(filePath), { recursive: true });
8228
+ await Bun.write(filePath, JSON.stringify(report, null, 2));
8229
+ }
8230
+ });
8047
8231
  var formatTime = (value) => value === undefined ? "unknown" : new Date(value).toLocaleString();
8232
+ var formatPercent = (value) => `${(value * 100).toFixed(2)}%`;
8048
8233
  var renderVoiceEvalHTML = (report, options = {}) => {
8049
8234
  const title = options.title ?? "AbsoluteJS Voice Evals";
8050
8235
  const links = options.links?.length ? `<nav>${options.links.map((link) => `<a href="${escapeHtml9(link.href)}">${escapeHtml9(link.label)}</a>`).join("")}</nav>` : "";
@@ -8055,6 +8240,24 @@ var renderVoiceEvalHTML = (report, options = {}) => {
8055
8240
  }).join("") : '<tr><td colspan="7">No sessions found.</td></tr>';
8056
8241
  return `<!doctype html><html lang="en"><head><meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" /><title>${escapeHtml9(title)}</title><style>body{font-family:ui-sans-serif,system-ui,sans-serif;margin:2rem;background:#f8f7f2;color:#181713}main{max-width:1180px;margin:auto}nav{display:flex;gap:.5rem;flex-wrap:wrap;margin-bottom:1rem}nav a{background:#181713;border-radius:999px;color:white;padding:.35rem .7rem;text-decoration:none}.status{border-radius:999px;display:inline-flex;font-weight:800;padding:.35rem .75rem}.pass{color:#166534}.fail{color:#991b1b}.status.pass{background:#dcfce7}.status.fail{background:#fee2e2}.grid{display:grid;gap:1rem;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));margin:1rem 0}.card{background:white;border:1px solid #e7e5e4;border-radius:1rem;padding:1rem}.card strong{display:block;font-size:2rem}table{border-collapse:collapse;background:white;width:100%;margin:1rem 0 2rem}td,th{border-bottom:1px solid #eee;padding:.75rem;text-align:left}tr.fail td{border-left:4px solid #dc2626}tr.pass td{border-left:4px solid #16a34a}</style></head><body><main>${links}<h1>${escapeHtml9(title)}</h1><p class="status ${report.status}">${report.status}</p><div class="grid"><article class="card"><span>Total</span><strong>${report.total}</strong></article><article class="card"><span>Passed</span><strong>${report.passed}</strong></article><article class="card"><span>Failed</span><strong>${report.failed}</strong></article></div><h2>Trend</h2><table><thead><tr><th>Day</th><th>Total</th><th>Passed</th><th>Failed</th></tr></thead><tbody>${trend}</tbody></table><h2>Session Eval Results</h2><table><thead><tr><th>Session</th><th>Status</th><th>Events</th><th>Turns</th><th>Errors</th><th>Last event</th><th>Failed metrics</th></tr></thead><tbody>${sessions}</tbody></table></main></body></html>`;
8057
8242
  };
8243
+ var renderVoiceEvalBaselineHTML = (comparison, options = {}) => {
8244
+ const title = options.title ?? "AbsoluteJS Voice Eval Baseline";
8245
+ const links = options.links?.length ? `<nav>${options.links.map((link) => `<a href="${escapeHtml9(link.href)}">${escapeHtml9(link.label)}</a>`).join("")}</nav>` : "";
8246
+ const reasons = comparison.reasons.length ? comparison.reasons.map((reason) => `<li>${escapeHtml9(reason)}</li>`).join("") : "<li>No baseline regressions detected.</li>";
8247
+ const newFailures = comparison.newFailedSessionIds.length ? comparison.newFailedSessionIds.map((id) => `<li>${escapeHtml9(id)}</li>`).join("") : "<li>none</li>";
8248
+ const recovered = comparison.recoveredSessionIds.length ? comparison.recoveredSessionIds.map((id) => `<li>${escapeHtml9(id)}</li>`).join("") : "<li>none</li>";
8249
+ return `<!doctype html><html lang="en"><head><meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" /><title>${escapeHtml9(title)}</title><style>body{font-family:ui-sans-serif,system-ui,sans-serif;margin:2rem;background:#f8f7f2;color:#181713}main{max-width:1000px;margin:auto}nav{display:flex;gap:.5rem;flex-wrap:wrap;margin-bottom:1rem}nav a{background:#181713;border-radius:999px;color:white;padding:.35rem .7rem;text-decoration:none}.status{border-radius:999px;display:inline-flex;font-weight:800;padding:.35rem .75rem}.pass{background:#dcfce7;color:#166534}.fail{background:#fee2e2;color:#991b1b}.grid{display:grid;gap:1rem;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));margin:1rem 0}.card{background:white;border:1px solid #e7e5e4;border-radius:1rem;padding:1rem}.card strong{display:block;font-size:2rem}section{background:white;border:1px solid #e7e5e4;border-radius:1rem;margin:1rem 0;padding:1rem}</style></head><body><main>${links}<h1>${escapeHtml9(title)}</h1><p class="status ${comparison.status}">${comparison.status}</p><div class="grid"><article class="card"><span>Baseline pass rate</span><strong>${escapeHtml9(formatPercent(comparison.baseline.passRate))}</strong></article><article class="card"><span>Current pass rate</span><strong>${escapeHtml9(formatPercent(comparison.current.passRate))}</strong></article><article class="card"><span>Failed delta</span><strong>${comparison.deltas.failed}</strong></article><article class="card"><span>Pass rate delta</span><strong>${escapeHtml9(formatPercent(comparison.deltas.passRate))}</strong></article></div><section><h2>Regression Reasons</h2><ul>${reasons}</ul></section><section><h2>New Failed Sessions</h2><ul>${newFailures}</ul></section><section><h2>Recovered Sessions</h2><ul>${recovered}</ul></section></main></body></html>`;
8250
+ };
8251
+ var renderVoiceScenarioEvalHTML = (report, options = {}) => {
8252
+ const title = options.title ?? "AbsoluteJS Voice Scenario Evals";
8253
+ const links = options.links?.length ? `<nav>${options.links.map((link) => `<a href="${escapeHtml9(link.href)}">${escapeHtml9(link.label)}</a>`).join("")}</nav>` : "";
8254
+ const scenarios = report.scenarios.length ? report.scenarios.map((scenario) => {
8255
+ const scenarioIssues = scenario.issues.length ? `<ul>${scenario.issues.map((issue) => `<li>${escapeHtml9(issue)}</li>`).join("")}</ul>` : "";
8256
+ const sessions = scenario.sessions.length ? scenario.sessions.map((session) => `<tr class="${session.status}"><td>${escapeHtml9(session.sessionId)}</td><td>${escapeHtml9(session.status)}</td><td>${session.eventCount}</td><td>${escapeHtml9(session.issues.join(", ") || "none")}</td></tr>`).join("") : '<tr><td colspan="4">No matching sessions.</td></tr>';
8257
+ return `<section class="scenario ${scenario.status}"><h2>${escapeHtml9(scenario.label)}</h2>${scenario.description ? `<p>${escapeHtml9(scenario.description)}</p>` : ""}<p class="status ${scenario.status}">${scenario.status}</p><p>${scenario.passed} passed, ${scenario.failed} failed, ${scenario.matchedSessions} matched.</p>${scenarioIssues}<table><thead><tr><th>Session</th><th>Status</th><th>Events</th><th>Issues</th></tr></thead><tbody>${sessions}</tbody></table></section>`;
8258
+ }).join("") : "<section><p>No scenarios configured.</p></section>";
8259
+ return `<!doctype html><html lang="en"><head><meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" /><title>${escapeHtml9(title)}</title><style>body{font-family:ui-sans-serif,system-ui,sans-serif;margin:2rem;background:#f8f7f2;color:#181713}main{max-width:1180px;margin:auto}nav{display:flex;gap:.5rem;flex-wrap:wrap;margin-bottom:1rem}nav a{background:#181713;border-radius:999px;color:white;padding:.35rem .7rem;text-decoration:none}.status{border-radius:999px;display:inline-flex;font-weight:800;padding:.35rem .75rem}.status.pass{background:#dcfce7;color:#166534}.status.fail{background:#fee2e2;color:#991b1b}.grid{display:grid;gap:1rem;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));margin:1rem 0}.card,section{background:white;border:1px solid #e7e5e4;border-radius:1rem;padding:1rem}.card strong{display:block;font-size:2rem}section{margin:1rem 0}table{border-collapse:collapse;width:100%;margin-top:1rem}td,th{border-bottom:1px solid #eee;padding:.75rem;text-align:left}tr.fail td{border-left:4px solid #dc2626}tr.pass td{border-left:4px solid #16a34a}</style></head><body><main>${links}<h1>${escapeHtml9(title)}</h1><p class="status ${report.status}">${report.status}</p><div class="grid"><article class="card"><span>Total</span><strong>${report.total}</strong></article><article class="card"><span>Passed</span><strong>${report.passed}</strong></article><article class="card"><span>Failed</span><strong>${report.failed}</strong></article></div>${scenarios}</main></body></html>`;
8260
+ };
8058
8261
  var createVoiceEvalRoutes = (options) => {
8059
8262
  const path = options.path ?? "/evals";
8060
8263
  const routes = new Elysia7({
@@ -8066,6 +8269,16 @@ var createVoiceEvalRoutes = (options) => {
8066
8269
  store: options.store,
8067
8270
  thresholds: options.thresholds
8068
8271
  });
8272
+ const getBaseline = async () => typeof options.baseline === "function" ? options.baseline() : options.baseline ?? await options.baselineStore?.get();
8273
+ const getBaselineComparison = async () => {
8274
+ const [current, baseline] = await Promise.all([getReport(), getBaseline()]);
8275
+ return baseline ? compareVoiceEvalBaseline(current, baseline, options.baselineComparison) : undefined;
8276
+ };
8277
+ const getScenarioReport = () => runVoiceScenarioEvals({
8278
+ events: options.events,
8279
+ scenarios: options.scenarios,
8280
+ store: options.store
8281
+ });
8069
8282
  routes.get(path, async () => {
8070
8283
  const report = await getReport();
8071
8284
  return new Response(renderVoiceEvalHTML(report, {
@@ -8086,11 +8299,78 @@ var createVoiceEvalRoutes = (options) => {
8086
8299
  }
8087
8300
  return report;
8088
8301
  });
8302
+ routes.get(`${path}/baseline`, async ({ set }) => {
8303
+ const comparison = await getBaselineComparison();
8304
+ if (!comparison) {
8305
+ set.status = 404;
8306
+ return Response.json({ error: "No voice eval baseline found." });
8307
+ }
8308
+ return new Response(renderVoiceEvalBaselineHTML(comparison, {
8309
+ links: options.links,
8310
+ title: `${options.title ?? "AbsoluteJS Voice Evals"} Baseline`
8311
+ }), {
8312
+ headers: {
8313
+ "Content-Type": "text/html; charset=utf-8",
8314
+ ...options.headers
8315
+ }
8316
+ });
8317
+ });
8318
+ routes.get(`${path}/baseline/json`, async ({ set }) => {
8319
+ const comparison = await getBaselineComparison();
8320
+ if (!comparison) {
8321
+ set.status = 404;
8322
+ return { error: "No voice eval baseline found." };
8323
+ }
8324
+ return comparison;
8325
+ });
8326
+ routes.get(`${path}/baseline/status`, async ({ set }) => {
8327
+ const comparison = await getBaselineComparison();
8328
+ if (!comparison) {
8329
+ set.status = 404;
8330
+ return { error: "No voice eval baseline found." };
8331
+ }
8332
+ if (comparison.status === "fail") {
8333
+ set.status = 503;
8334
+ }
8335
+ return comparison;
8336
+ });
8337
+ routes.post(`${path}/baseline`, async ({ set }) => {
8338
+ if (!options.baselineStore) {
8339
+ set.status = 501;
8340
+ return { error: "No voice eval baseline store configured." };
8341
+ }
8342
+ const report = await getReport();
8343
+ await options.baselineStore.set(report);
8344
+ return {
8345
+ baseline: report,
8346
+ status: "saved"
8347
+ };
8348
+ });
8349
+ routes.get(`${path}/scenarios`, async () => {
8350
+ const report = await getScenarioReport();
8351
+ return new Response(renderVoiceScenarioEvalHTML(report, {
8352
+ links: options.links,
8353
+ title: `${options.title ?? "AbsoluteJS Voice Evals"} Scenarios`
8354
+ }), {
8355
+ headers: {
8356
+ "Content-Type": "text/html; charset=utf-8",
8357
+ ...options.headers
8358
+ }
8359
+ });
8360
+ });
8361
+ routes.get(`${path}/scenarios/json`, async () => getScenarioReport());
8362
+ routes.get(`${path}/scenarios/status`, async ({ set }) => {
8363
+ const report = await getScenarioReport();
8364
+ if (report.status === "fail") {
8365
+ set.status = 503;
8366
+ }
8367
+ return report;
8368
+ });
8089
8369
  return routes;
8090
8370
  };
8091
8371
  // src/sessionReplay.ts
8092
8372
  import { Elysia as Elysia8 } from "elysia";
8093
- var getString6 = (value) => typeof value === "string" ? value : undefined;
8373
+ var getString7 = (value) => typeof value === "string" ? value : undefined;
8094
8374
  var escapeHtml10 = (value) => value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
8095
8375
  var increment3 = (record, key) => {
8096
8376
  record[key] = (record[key] ?? 0) + 1;
@@ -8120,14 +8400,14 @@ var buildReplayTurns = (events) => {
8120
8400
  case "turn.transcript":
8121
8401
  turn.transcripts.push({
8122
8402
  isFinal: event.payload.isFinal === true,
8123
- text: getString6(event.payload.text)
8403
+ text: getString7(event.payload.text)
8124
8404
  });
8125
8405
  break;
8126
8406
  case "turn.committed":
8127
- turn.committedText = getString6(event.payload.text);
8407
+ turn.committedText = getString7(event.payload.text);
8128
8408
  break;
8129
8409
  case "turn.assistant": {
8130
- const text = getString6(event.payload.text);
8410
+ const text = getString7(event.payload.text);
8131
8411
  if (text) {
8132
8412
  turn.assistantReplies.push(text);
8133
8413
  }
@@ -8196,7 +8476,7 @@ var summarizeVoiceSessions = async (options = {}) => {
8196
8476
  let latestOutcome;
8197
8477
  let errorCount = 0;
8198
8478
  for (const event of sorted) {
8199
- const provider = getString6(event.payload.provider);
8479
+ const provider = getString7(event.payload.provider);
8200
8480
  if (provider) {
8201
8481
  providers.add(provider);
8202
8482
  }
@@ -8204,7 +8484,7 @@ var summarizeVoiceSessions = async (options = {}) => {
8204
8484
  errorCount += 1;
8205
8485
  increment3(providerErrors, provider ?? "unknown");
8206
8486
  }
8207
- const outcome = getString6(event.payload.outcome);
8487
+ const outcome = getString7(event.payload.outcome);
8208
8488
  if (outcome) {
8209
8489
  latestOutcome = outcome;
8210
8490
  }
@@ -8331,7 +8611,7 @@ var createVoiceSessionReplayRoutes = (options) => {
8331
8611
  return routes;
8332
8612
  };
8333
8613
  // src/fileStore.ts
8334
- import { mkdir, readFile, readdir, rename, rm, writeFile } from "fs/promises";
8614
+ import { mkdir as mkdir2, readFile, readdir, rename, rm, writeFile } from "fs/promises";
8335
8615
  import { join } from "path";
8336
8616
  var listJsonFiles = async (directory) => {
8337
8617
  try {
@@ -8351,7 +8631,7 @@ var resolveFilePath = (directory, id) => join(directory, encodeStoreId(id));
8351
8631
  var createMemoryStoreId = (input) => `${input.assistantId}:${input.namespace}:${input.key}`;
8352
8632
  var readJsonFile = async (path) => JSON.parse(await readFile(path, "utf8"));
8353
8633
  var writeJsonFile = async (path, value, options) => {
8354
- await mkdir(options.directory, {
8634
+ await mkdir2(options.directory, {
8355
8635
  recursive: true
8356
8636
  });
8357
8637
  const tempPath = `${path}.${crypto.randomUUID()}.tmp`;
@@ -9464,7 +9744,7 @@ import { Elysia as Elysia10 } from "elysia";
9464
9744
  // src/resilienceRoutes.ts
9465
9745
  import { Elysia as Elysia9 } from "elysia";
9466
9746
  var escapeHtml11 = (value) => value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
9467
- var getString7 = (value) => typeof value === "string" ? value : undefined;
9747
+ var getString8 = (value) => typeof value === "string" ? value : undefined;
9468
9748
  var getNumber4 = (value) => typeof value === "number" && Number.isFinite(value) ? value : undefined;
9469
9749
  var getBoolean2 = (value) => value === true;
9470
9750
  var isProviderStatus2 = (value) => value === "error" || value === "fallback" || value === "success";
@@ -9474,23 +9754,23 @@ var listVoiceRoutingEvents = (events) => {
9474
9754
  if (event.type !== "session.error") {
9475
9755
  continue;
9476
9756
  }
9477
- const provider = getString7(event.payload.provider);
9757
+ const provider = getString8(event.payload.provider);
9478
9758
  const providerStatus = isProviderStatus2(event.payload.providerStatus) ? event.payload.providerStatus : undefined;
9479
9759
  if (!provider || !providerStatus) {
9480
9760
  continue;
9481
9761
  }
9482
- const kind = getString7(event.payload.kind);
9762
+ const kind = getString8(event.payload.kind);
9483
9763
  routingEvents.push({
9484
9764
  at: event.at,
9485
9765
  attempt: getNumber4(event.payload.attempt),
9486
9766
  elapsedMs: getNumber4(event.payload.elapsedMs),
9487
- error: getString7(event.payload.error),
9488
- fallbackProvider: getString7(event.payload.fallbackProvider),
9767
+ error: getString8(event.payload.error),
9768
+ fallbackProvider: getString8(event.payload.fallbackProvider),
9489
9769
  kind: kind === "stt" || kind === "tts" ? kind : "llm",
9490
9770
  latencyBudgetMs: getNumber4(event.payload.latencyBudgetMs),
9491
- operation: getString7(event.payload.operation),
9771
+ operation: getString8(event.payload.operation),
9492
9772
  provider,
9493
- selectedProvider: getString7(event.payload.selectedProvider),
9773
+ selectedProvider: getString8(event.payload.selectedProvider),
9494
9774
  sessionId: event.sessionId,
9495
9775
  status: providerStatus,
9496
9776
  timedOut: getBoolean2(event.payload.timedOut),
@@ -13032,6 +13312,7 @@ export {
13032
13312
  shapeTelephonyAssistantText,
13033
13313
  selectVoiceTraceEventsForPrune,
13034
13314
  runVoiceSessionEvals,
13315
+ runVoiceScenarioEvals,
13035
13316
  resolveVoiceTraceRedactionOptions,
13036
13317
  resolveVoiceSTTRoutingStrategy,
13037
13318
  resolveVoiceRuntimePreset,
@@ -13049,12 +13330,14 @@ export {
13049
13330
  renderVoiceTraceMarkdown,
13050
13331
  renderVoiceTraceHTML,
13051
13332
  renderVoiceSessionsHTML,
13333
+ renderVoiceScenarioEvalHTML,
13052
13334
  renderVoiceResilienceHTML,
13053
13335
  renderVoiceQualityHTML,
13054
13336
  renderVoiceProviderHealthHTML,
13055
13337
  renderVoiceOpsConsoleHTML,
13056
13338
  renderVoiceHandoffHealthHTML,
13057
13339
  renderVoiceEvalHTML,
13340
+ renderVoiceEvalBaselineHTML,
13058
13341
  renderVoiceCallReviewMarkdown,
13059
13342
  renderVoiceCallReviewHTML,
13060
13343
  renderVoiceAssistantHealthHTML,
@@ -13176,6 +13459,7 @@ export {
13176
13459
  createVoiceFileReviewStore,
13177
13460
  createVoiceFileIntegrationEventStore,
13178
13461
  createVoiceFileExternalObjectMapStore,
13462
+ createVoiceFileEvalBaselineStore,
13179
13463
  createVoiceFileAssistantMemoryStore,
13180
13464
  createVoiceExternalObjectMapId,
13181
13465
  createVoiceExternalObjectMap,
@@ -13213,6 +13497,7 @@ export {
13213
13497
  createAnthropicVoiceAssistantModel,
13214
13498
  conditionAudioChunk,
13215
13499
  completeVoiceOpsTask,
13500
+ compareVoiceEvalBaseline,
13216
13501
  claimVoiceOpsTask,
13217
13502
  buildVoiceTraceReplay,
13218
13503
  buildVoiceOpsTaskFromSLABreach,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.42",
3
+ "version": "0.0.22-beta.44",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",