@ouro.bot/cli 0.1.0-alpha.584 → 0.1.0-alpha.586
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/changelog.json +16 -0
- package/dist/mailroom/search-cache.js +12 -0
- package/dist/repertoire/tools-mail.js +62 -8
- package/dist/senses/voice/realtime-eval.js +12 -3
- package/dist/senses/voice/realtime-trace.js +402 -0
- package/dist/senses/voice-realtime-eval-command.js +99 -0
- package/dist/senses/voice-realtime-eval-entry.js +6 -10
- package/package.json +1 -1
package/changelog.json
CHANGED
|
@@ -1,6 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.586",
|
|
6
|
+
"changes": [
|
|
7
|
+
"`mail_status`, `mail_recent`, `mail_search`, and `mail_index_refresh` now flag a 'mail substrate divergence' when the encrypted mailroom store reports zero visible messages but the on-disk search cache still holds documents from prior imports — the post-rotation / hosted→local-fallback / wiped-store state that previously rendered as a silent 'no mail' answer indistinguishable from a clean onboarding.",
|
|
8
|
+
"Mail absence answers from a divergent runtime now point at vault inspection (`mailroom.mode`, `mailroom.azureAccountUrl`, `mailroom.storePath`) and re-import recovery, so agents stop treating a broken substrate as evidence that the human inbox is empty.",
|
|
9
|
+
"The substrate-divergence snapshot counts cache `.json` entries via `readdir` and ignores subdirectories and non-json files, so the diagnostic stays cheap on bundles holding tens of thousands of cached documents."
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"version": "0.1.0-alpha.585",
|
|
14
|
+
"changes": [
|
|
15
|
+
"Voice realtime evals now support replayable trace JSON artifacts that preserve transport/source metadata, normalize SIP/Twilio/Realtime event names into the shared Voice eval timeline, validate causal ordering, and keep ignored provider-noise events visible without grading them.",
|
|
16
|
+
"`npm run voice:eval -- --trace <file>` grades one or more trace artifacts alongside the built-in happy/known-bad suite, treating expected-fail traces as canaries while failing on unexpected trace failures or unexpected passes.",
|
|
17
|
+
"Voice trace replay has golden fixtures for clean calls, barge-in, tool holding phrases, mid-turn hangup, delayed audio/transcript behavior, duplicate/late provider events, and redaction-safe summaries so phone regressions can be debugged without a human live call."
|
|
18
|
+
]
|
|
19
|
+
},
|
|
4
20
|
{
|
|
5
21
|
"version": "0.1.0-alpha.584",
|
|
6
22
|
"changes": [
|
|
@@ -38,6 +38,7 @@ exports.buildMailSearchCacheDocument = buildMailSearchCacheDocument;
|
|
|
38
38
|
exports.upsertMailSearchCacheDocument = upsertMailSearchCacheDocument;
|
|
39
39
|
exports.syncMailSearchCacheMetadata = syncMailSearchCacheMetadata;
|
|
40
40
|
exports.searchMailSearchCache = searchMailSearchCache;
|
|
41
|
+
exports.snapshotMailSearchCache = snapshotMailSearchCache;
|
|
41
42
|
exports.readMailSearchCoverageRecord = readMailSearchCoverageRecord;
|
|
42
43
|
exports.writeMailSearchCoverageRecord = writeMailSearchCoverageRecord;
|
|
43
44
|
exports.resetMailSearchCacheForTests = resetMailSearchCacheForTests;
|
|
@@ -208,6 +209,17 @@ function searchMailSearchCache(filters, options) {
|
|
|
208
209
|
}
|
|
209
210
|
return typeof filters.limit === "number" ? ordered.slice(0, filters.limit) : ordered;
|
|
210
211
|
}
|
|
212
|
+
function snapshotMailSearchCache(agentId, options) {
|
|
213
|
+
const dir = cacheDir(agentId, options);
|
|
214
|
+
if (!fs.existsSync(dir))
|
|
215
|
+
return { totalDocuments: 0 };
|
|
216
|
+
let total = 0;
|
|
217
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
218
|
+
if (entry.isFile() && entry.name.endsWith(".json"))
|
|
219
|
+
total += 1;
|
|
220
|
+
}
|
|
221
|
+
return { totalDocuments: total };
|
|
222
|
+
}
|
|
211
223
|
function readMailSearchCoverageRecord(key, options) {
|
|
212
224
|
const document = readJsonDocument(coveragePath(key, options));
|
|
213
225
|
if (!document || document.schemaVersion !== 1 || document.agentId !== key.agentId)
|
|
@@ -519,13 +519,42 @@ async function renderSourceGrantStatus(config, agentId) {
|
|
|
519
519
|
];
|
|
520
520
|
}
|
|
521
521
|
}
|
|
522
|
+
/**
|
|
523
|
+
* When the encrypted mail store is empty but the on-disk search cache holds
|
|
524
|
+
* decrypted documents from prior imports, the agent has lost access to mail it
|
|
525
|
+
* previously had — typically because mail keys were rotated, the hosted blob
|
|
526
|
+
* pointer was dropped from the vault, or the encrypted store was wiped. The
|
|
527
|
+
* tools used to silently report "0 messages" in that state, which is the same
|
|
528
|
+
* answer they give for a fresh onboarding — so the agent could not tell a
|
|
529
|
+
* substrate failure from a clean slate. Surface the divergence loudly so the
|
|
530
|
+
* agent treats absence answers as suspect until the substrate is repaired.
|
|
531
|
+
*/
|
|
532
|
+
function describeMailSubstrateDivergence(input) {
|
|
533
|
+
if (input.visibleMessageCount > 0)
|
|
534
|
+
return null;
|
|
535
|
+
const snapshot = input.snapshot ?? (0, search_cache_1.snapshotMailSearchCache)(input.agentId);
|
|
536
|
+
if (snapshot.totalDocuments === 0)
|
|
537
|
+
return null;
|
|
538
|
+
return [
|
|
539
|
+
`mail substrate divergence: encrypted ${input.storeKind} store at ${input.storeLabel} has 0 visible messages, but the on-disk search cache for ${input.agentId} holds ${snapshot.totalDocuments} document(s).`,
|
|
540
|
+
"interpretation: this is not a fresh onboarding — prior imports populated the cache, then the encrypted store became unreachable (common causes: mail key rotation, hosted-store pointer dropped from vault during repair, encrypted store wiped). Mail absence answers from this runtime are not authoritative until the substrate is repaired.",
|
|
541
|
+
`agent next move: inspect runtime credentials (mailroom.mode, mailroom.azureAccountUrl, mailroom.storePath) — if the agent was previously hosted, the vault is missing the hosted pointer; coordinate with the human to restore it. If local mode is correct, re-import via 'ouro mail import-mbox --agent ${input.agentId} --owner-email <human-email> --source <source> --discover' so the encrypted store catches up to the cache.`,
|
|
542
|
+
].join("\n");
|
|
543
|
+
}
|
|
522
544
|
async function renderEmptyMailResult(input) {
|
|
523
545
|
const anyVisible = await input.store.listMessages({ agentId: input.agentId, limit: 1 });
|
|
524
546
|
if (anyVisible.length === 0) {
|
|
525
547
|
const sourceGrantStatus = await renderSourceGrantStatus(input.config, input.agentId);
|
|
548
|
+
const divergence = describeMailSubstrateDivergence({
|
|
549
|
+
agentId: input.agentId,
|
|
550
|
+
storeKind: input.storeKind,
|
|
551
|
+
storeLabel: input.storeLabel,
|
|
552
|
+
visibleMessageCount: 0,
|
|
553
|
+
});
|
|
526
554
|
return [
|
|
527
555
|
"No visible mail yet.",
|
|
528
556
|
`mail onboarding status: Mailroom is provisioned for ${input.config.mailboxAddress}, but this agent's encrypted store has 0 messages.`,
|
|
557
|
+
...(divergence ? [divergence] : []),
|
|
529
558
|
...sourceGrantStatus,
|
|
530
559
|
"interpretation: this is not evidence that the human's HEY inbox is empty; Agent Mail has not yet received or imported mail visible to this agent.",
|
|
531
560
|
`agent next move: guide setup from docs/agent-mail-setup.md. If HEY mail is needed, ensure the delegated hey alias exists, first try ouro mail import-mbox --agent ${input.agentId} --owner-email <human-email> --source hey --discover so Ouro can find a browser-downloaded export in .playwright-mcp or Downloads. Only ask the human for a file path if discovery cannot find a unique MBOX, then run ouro mail import-mbox --agent ${input.agentId} --owner-email <human-email> --source hey --file <mbox-path>. Verify with mail_recent/mail_search/Ouro Mailbox.`,
|
|
@@ -932,8 +961,8 @@ async function searchSuccessfulImportArchives(input) {
|
|
|
932
961
|
}
|
|
933
962
|
return matches.sort((left, right) => right.receivedAt.localeCompare(left.receivedAt));
|
|
934
963
|
}
|
|
935
|
-
async function renderMailStatus(
|
|
936
|
-
const sourceGrantStatus = await renderSourceGrantStatus(config, agentId);
|
|
964
|
+
async function renderMailStatus(input) {
|
|
965
|
+
const sourceGrantStatus = await renderSourceGrantStatus(input.config, input.agentId);
|
|
937
966
|
const delegatedLines = sourceGrantStatus
|
|
938
967
|
.flatMap((line) => line.startsWith("delegated source aliases: ")
|
|
939
968
|
? line
|
|
@@ -949,16 +978,24 @@ async function renderMailStatus(agentId, config, storeLabel) {
|
|
|
949
978
|
: `- delegated: ${grant}`;
|
|
950
979
|
})
|
|
951
980
|
: [`- ${line}`]);
|
|
981
|
+
const visible = await input.store.listMessages({ agentId: input.agentId, limit: 1 });
|
|
982
|
+
const divergence = describeMailSubstrateDivergence({
|
|
983
|
+
agentId: input.agentId,
|
|
984
|
+
storeKind: input.storeKind,
|
|
985
|
+
storeLabel: input.storeLabel,
|
|
986
|
+
visibleMessageCount: visible.length,
|
|
987
|
+
});
|
|
952
988
|
return [
|
|
953
|
-
`mailbox: ${config.mailboxAddress}`,
|
|
954
|
-
`store: ${storeLabel}`,
|
|
989
|
+
`mailbox: ${input.config.mailboxAddress}`,
|
|
990
|
+
`store: ${input.storeLabel}`,
|
|
991
|
+
...(divergence ? [divergence] : []),
|
|
955
992
|
"lane map:",
|
|
956
|
-
`- native: ${config.mailboxAddress}`,
|
|
993
|
+
`- native: ${input.config.mailboxAddress}`,
|
|
957
994
|
...delegatedLines,
|
|
958
995
|
"recent archives:",
|
|
959
|
-
...renderRecentArchiveStatus(agentId),
|
|
996
|
+
...renderRecentArchiveStatus(input.agentId),
|
|
960
997
|
"recent imports:",
|
|
961
|
-
...renderRecentImportOperations(agentId),
|
|
998
|
+
...renderRecentImportOperations(input.agentId),
|
|
962
999
|
].join("\n");
|
|
963
1000
|
}
|
|
964
1001
|
exports.mailToolDefinitions = [
|
|
@@ -985,7 +1022,13 @@ exports.mailToolDefinitions = [
|
|
|
985
1022
|
tool: "mail_status",
|
|
986
1023
|
reason: "mail operating model overview",
|
|
987
1024
|
});
|
|
988
|
-
return renderMailStatus(
|
|
1025
|
+
return renderMailStatus({
|
|
1026
|
+
agentId: resolved.agentName,
|
|
1027
|
+
config: resolved.config,
|
|
1028
|
+
store: resolved.store,
|
|
1029
|
+
storeKind: resolved.storeKind,
|
|
1030
|
+
storeLabel: resolved.storeLabel,
|
|
1031
|
+
});
|
|
989
1032
|
},
|
|
990
1033
|
summaryKeys: [],
|
|
991
1034
|
},
|
|
@@ -1039,6 +1082,8 @@ exports.mailToolDefinitions = [
|
|
|
1039
1082
|
agentId: resolved.agentName,
|
|
1040
1083
|
config: resolved.config,
|
|
1041
1084
|
store: resolved.store,
|
|
1085
|
+
storeKind: resolved.storeKind,
|
|
1086
|
+
storeLabel: resolved.storeLabel,
|
|
1042
1087
|
...(scope ? { scope } : {}),
|
|
1043
1088
|
...(args.source ? { source: args.source } : {}),
|
|
1044
1089
|
});
|
|
@@ -1394,6 +1439,8 @@ exports.mailToolDefinitions = [
|
|
|
1394
1439
|
agentId: resolved.agentName,
|
|
1395
1440
|
config: resolved.config,
|
|
1396
1441
|
store: resolved.store,
|
|
1442
|
+
storeKind: resolved.storeKind,
|
|
1443
|
+
storeLabel: resolved.storeLabel,
|
|
1397
1444
|
...(scope ? { scope } : {}),
|
|
1398
1445
|
...(args.source ? { source: args.source } : {}),
|
|
1399
1446
|
}), {
|
|
@@ -1485,6 +1532,12 @@ exports.mailToolDefinitions = [
|
|
|
1485
1532
|
reason: args.reason || "refresh mail search index",
|
|
1486
1533
|
});
|
|
1487
1534
|
const { coverage } = refreshed;
|
|
1535
|
+
const divergence = describeMailSubstrateDivergence({
|
|
1536
|
+
agentId: resolved.agentName,
|
|
1537
|
+
storeKind: resolved.storeKind,
|
|
1538
|
+
storeLabel: resolved.storeLabel,
|
|
1539
|
+
visibleMessageCount: coverage.visibleMessageCount,
|
|
1540
|
+
});
|
|
1488
1541
|
return [
|
|
1489
1542
|
"mail search index refreshed.",
|
|
1490
1543
|
`scope: ${scope ?? "all"}${args.source ? `; source: ${args.source}` : ""}${placement ? `; placement: ${placement}` : ""}`,
|
|
@@ -1496,6 +1549,7 @@ exports.mailToolDefinitions = [
|
|
|
1496
1549
|
`indexed at: ${coverage.indexedAt}`,
|
|
1497
1550
|
...(coverage.oldestReceivedAt ? [`oldest: ${coverage.oldestReceivedAt}`] : []),
|
|
1498
1551
|
...(coverage.newestReceivedAt ? [`newest: ${coverage.newestReceivedAt}`] : []),
|
|
1552
|
+
...(divergence ? [divergence] : []),
|
|
1499
1553
|
].join("\n");
|
|
1500
1554
|
}
|
|
1501
1555
|
catch (error) {
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.gradeVoiceRealtimeEvalTimeline = gradeVoiceRealtimeEvalTimeline;
|
|
4
4
|
exports.buildVoiceRealtimeEvalHappyPath = buildVoiceRealtimeEvalHappyPath;
|
|
5
|
+
exports.buildVoiceRealtimeEvalDefaultExpectation = buildVoiceRealtimeEvalDefaultExpectation;
|
|
5
6
|
exports.runBuiltInVoiceRealtimeEvalSuite = runBuiltInVoiceRealtimeEvalSuite;
|
|
6
7
|
exports.summarizeVoiceRealtimeEvalSuite = summarizeVoiceRealtimeEvalSuite;
|
|
7
8
|
const runtime_1 = require("../../nerves/runtime");
|
|
@@ -232,7 +233,15 @@ function gradeHangup(events, findings) {
|
|
|
232
233
|
function gradeOverlappingResponses(events, findings) {
|
|
233
234
|
for (const response of allEvents(events, "response.requested")) {
|
|
234
235
|
const activeAudio = allEvents(events, "assistant.audio.started").find((started) => {
|
|
235
|
-
const done = events.find((event) =>
|
|
236
|
+
const done = events.find((event) => {
|
|
237
|
+
if (event.atMs < started.atMs)
|
|
238
|
+
return false;
|
|
239
|
+
if (event.type === "response.truncated" || event.type === "call.hangup.requested")
|
|
240
|
+
return true;
|
|
241
|
+
if (event.type !== "assistant.audio.done" && event.type !== "assistant.transcript.done")
|
|
242
|
+
return false;
|
|
243
|
+
return !started.correlationId || !event.correlationId || event.correlationId === started.correlationId;
|
|
244
|
+
});
|
|
236
245
|
return response.atMs > started.atMs && (!done || response.atMs < done.atMs);
|
|
237
246
|
});
|
|
238
247
|
if (activeAudio) {
|
|
@@ -337,7 +346,7 @@ function buildVoiceRealtimeEvalHappyPath() {
|
|
|
337
346
|
{ type: "call.ended", atMs: 5_100, source: { transport: "openai-sip", id: "sip-call-1" } },
|
|
338
347
|
];
|
|
339
348
|
}
|
|
340
|
-
function
|
|
349
|
+
function buildVoiceRealtimeEvalDefaultExpectation() {
|
|
341
350
|
return {
|
|
342
351
|
maxFirstAssistantAudioMs: 1_200,
|
|
343
352
|
maxUserTurnResponseMs: 900,
|
|
@@ -367,7 +376,7 @@ function buildKnownBadLatencyPath() {
|
|
|
367
376
|
});
|
|
368
377
|
}
|
|
369
378
|
function runBuiltInVoiceRealtimeEvalSuite() {
|
|
370
|
-
const expectation =
|
|
379
|
+
const expectation = buildVoiceRealtimeEvalDefaultExpectation();
|
|
371
380
|
return [
|
|
372
381
|
gradeVoiceRealtimeEvalTimeline("voice-happy-path", buildVoiceRealtimeEvalHappyPath(), expectation),
|
|
373
382
|
gradeVoiceRealtimeEvalTimeline("voice-known-bad-latency", buildKnownBadLatencyPath(), expectation),
|
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.parseVoiceRealtimeEvalTraceArtifact = parseVoiceRealtimeEvalTraceArtifact;
|
|
37
|
+
exports.loadVoiceRealtimeEvalTraceArtifact = loadVoiceRealtimeEvalTraceArtifact;
|
|
38
|
+
exports.resolveVoiceRealtimeEvalTraceExpectation = resolveVoiceRealtimeEvalTraceExpectation;
|
|
39
|
+
exports.traceArtifactToVoiceRealtimeEvalTimeline = traceArtifactToVoiceRealtimeEvalTimeline;
|
|
40
|
+
exports.gradeVoiceRealtimeEvalTrace = gradeVoiceRealtimeEvalTrace;
|
|
41
|
+
exports.formatVoiceRealtimeEvalTraceReport = formatVoiceRealtimeEvalTraceReport;
|
|
42
|
+
const fs = __importStar(require("fs"));
|
|
43
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
44
|
+
const realtime_eval_1 = require("./realtime-eval");
|
|
45
|
+
const transports = new Set([
|
|
46
|
+
"browser-meeting",
|
|
47
|
+
"openai-realtime-control",
|
|
48
|
+
"openai-sip",
|
|
49
|
+
"twilio-media-stream",
|
|
50
|
+
"voice-eval",
|
|
51
|
+
]);
|
|
52
|
+
const normalizedEvents = new Set([
|
|
53
|
+
"assistant.audio.done",
|
|
54
|
+
"assistant.audio.started",
|
|
55
|
+
"assistant.transcript.done",
|
|
56
|
+
"barge_in.detected",
|
|
57
|
+
"call.connected",
|
|
58
|
+
"call.ended",
|
|
59
|
+
"call.hangup.requested",
|
|
60
|
+
"response.requested",
|
|
61
|
+
"response.truncated",
|
|
62
|
+
"session.updated",
|
|
63
|
+
"tool.call.completed",
|
|
64
|
+
"tool.call.started",
|
|
65
|
+
"tool.holding.started",
|
|
66
|
+
"transport.playback_cleared",
|
|
67
|
+
"user.transcript.done",
|
|
68
|
+
"voice.context.injected",
|
|
69
|
+
]);
|
|
70
|
+
const rawEventMap = new Map([
|
|
71
|
+
["openai.realtime.call.hangup.sent", "call.hangup.requested"],
|
|
72
|
+
["openai.realtime.conversation.item.truncate.sent", "response.truncated"],
|
|
73
|
+
["openai.realtime.input_audio_buffer.speech_started", "barge_in.detected"],
|
|
74
|
+
["openai.realtime.input_audio_transcription.completed", "user.transcript.done"],
|
|
75
|
+
["openai.realtime.output_audio.delta", "assistant.audio.started"],
|
|
76
|
+
["openai.realtime.output_audio.done", "assistant.audio.done"],
|
|
77
|
+
["openai.realtime.output_audio_transcript.done", "assistant.transcript.done"],
|
|
78
|
+
["openai.realtime.response.create.sent", "response.requested"],
|
|
79
|
+
["openai.realtime.response.function_call_arguments.done", "tool.call.started"],
|
|
80
|
+
["openai.realtime.session.updated", "session.updated"],
|
|
81
|
+
["openai.realtime.tool.completed", "tool.call.completed"],
|
|
82
|
+
["openai.sip.call.connected", "call.connected"],
|
|
83
|
+
["openai.sip.call.ended", "call.ended"],
|
|
84
|
+
["twilio.call.ended", "call.ended"],
|
|
85
|
+
["twilio.media.clear.sent", "transport.playback_cleared"],
|
|
86
|
+
["twilio.media.start", "call.connected"],
|
|
87
|
+
["voice.hangup.requested", "call.hangup.requested"],
|
|
88
|
+
["voice.tool_holding.started", "tool.holding.started"],
|
|
89
|
+
]);
|
|
90
|
+
const expectedOutcomes = new Set(["expected-fail", "fail", "pass"]);
|
|
91
|
+
const expectationProfiles = new Set(["voice-phone-default"]);
|
|
92
|
+
function objectRecord(value) {
|
|
93
|
+
return typeof value === "object" && value !== null && !Array.isArray(value) ? value : undefined;
|
|
94
|
+
}
|
|
95
|
+
function label(prefix, detail) {
|
|
96
|
+
return prefix ? `${prefix}: ${detail}` : detail;
|
|
97
|
+
}
|
|
98
|
+
function requiredString(value, name, sourceLabel) {
|
|
99
|
+
if (typeof value !== "string" || value.trim() === "")
|
|
100
|
+
throw new Error(label(sourceLabel, `${name} must be a non-empty string`));
|
|
101
|
+
return value.trim();
|
|
102
|
+
}
|
|
103
|
+
function optionalString(value, name, sourceLabel) {
|
|
104
|
+
if (value === undefined)
|
|
105
|
+
return undefined;
|
|
106
|
+
if (typeof value !== "string")
|
|
107
|
+
throw new Error(label(sourceLabel, `${name} must be a string`));
|
|
108
|
+
return value;
|
|
109
|
+
}
|
|
110
|
+
function parseSource(value, sourceLabel) {
|
|
111
|
+
if (value === undefined)
|
|
112
|
+
return undefined;
|
|
113
|
+
const raw = objectRecord(value);
|
|
114
|
+
if (!raw)
|
|
115
|
+
throw new Error(label(sourceLabel, "source must be an object"));
|
|
116
|
+
const transport = raw.transport;
|
|
117
|
+
if (typeof transport !== "string" || !transports.has(transport)) {
|
|
118
|
+
throw new Error(label(sourceLabel, "source.transport is unsupported"));
|
|
119
|
+
}
|
|
120
|
+
const id = optionalString(raw.id, "source.id", sourceLabel);
|
|
121
|
+
return id === undefined ? { transport: transport } : { transport: transport, id };
|
|
122
|
+
}
|
|
123
|
+
function parseTurnDetection(value, sourceLabel) {
|
|
124
|
+
if (value === undefined)
|
|
125
|
+
return undefined;
|
|
126
|
+
const session = objectRecord(value);
|
|
127
|
+
if (!session)
|
|
128
|
+
throw new Error(label(sourceLabel, "session must be an object"));
|
|
129
|
+
const turnDetection = objectRecord(session.turnDetection);
|
|
130
|
+
if (!turnDetection)
|
|
131
|
+
return {};
|
|
132
|
+
const createResponse = turnDetection.createResponse;
|
|
133
|
+
const interruptResponse = turnDetection.interruptResponse;
|
|
134
|
+
if (createResponse !== undefined && typeof createResponse !== "boolean") {
|
|
135
|
+
throw new Error(label(sourceLabel, "session.turnDetection.createResponse must be boolean"));
|
|
136
|
+
}
|
|
137
|
+
if (interruptResponse !== undefined && typeof interruptResponse !== "boolean") {
|
|
138
|
+
throw new Error(label(sourceLabel, "session.turnDetection.interruptResponse must be boolean"));
|
|
139
|
+
}
|
|
140
|
+
return { turnDetection: { createResponse, interruptResponse } };
|
|
141
|
+
}
|
|
142
|
+
function validateExpectation(expectation, sourceLabel) {
|
|
143
|
+
const budgets = [
|
|
144
|
+
expectation.maxFirstAssistantAudioMs,
|
|
145
|
+
expectation.maxUserTurnResponseMs,
|
|
146
|
+
expectation.maxToolPresenceMs,
|
|
147
|
+
expectation.maxBargeInClearMs,
|
|
148
|
+
expectation.maxBargeInTruncateMs,
|
|
149
|
+
];
|
|
150
|
+
if (budgets.some((budget) => typeof budget !== "number" || !Number.isFinite(budget) || budget <= 0)) {
|
|
151
|
+
throw new Error(label(sourceLabel, "expectation latency budgets must be positive finite numbers"));
|
|
152
|
+
}
|
|
153
|
+
return expectation;
|
|
154
|
+
}
|
|
155
|
+
function parseExpectation(value, sourceLabel) {
|
|
156
|
+
const raw = objectRecord(value);
|
|
157
|
+
if (!raw)
|
|
158
|
+
throw new Error(label(sourceLabel, "expectation must be an object"));
|
|
159
|
+
return validateExpectation(raw, sourceLabel);
|
|
160
|
+
}
|
|
161
|
+
function parseTraceEvent(value, index, sourceLabel) {
|
|
162
|
+
const eventLabel = `${sourceLabel} event[${index}]`;
|
|
163
|
+
const raw = objectRecord(value);
|
|
164
|
+
if (!raw)
|
|
165
|
+
throw new Error(label(eventLabel, "must be an object"));
|
|
166
|
+
const event = requiredString(raw.event, "event", eventLabel);
|
|
167
|
+
const ignored = raw.ignored === undefined ? undefined : raw.ignored;
|
|
168
|
+
if (ignored !== undefined && typeof ignored !== "boolean")
|
|
169
|
+
throw new Error(label(eventLabel, "ignored must be boolean"));
|
|
170
|
+
if (!ignored && !normalizedEvents.has(event) && !rawEventMap.has(event)) {
|
|
171
|
+
throw new Error(label(eventLabel, `unknown trace event ${event}`));
|
|
172
|
+
}
|
|
173
|
+
const atMs = raw.atMs;
|
|
174
|
+
if (typeof atMs !== "number" || !Number.isFinite(atMs)) {
|
|
175
|
+
throw new Error(label(eventLabel, "atMs must be a finite number"));
|
|
176
|
+
}
|
|
177
|
+
let role;
|
|
178
|
+
if (raw.role !== undefined) {
|
|
179
|
+
if (raw.role !== "assistant" && raw.role !== "user")
|
|
180
|
+
throw new Error(label(eventLabel, "role must be assistant or user"));
|
|
181
|
+
role = raw.role;
|
|
182
|
+
}
|
|
183
|
+
const parsed = {
|
|
184
|
+
atMs,
|
|
185
|
+
event,
|
|
186
|
+
source: parseSource(raw.source, eventLabel),
|
|
187
|
+
correlationId: optionalString(raw.correlationId, "correlationId", eventLabel),
|
|
188
|
+
text: optionalString(raw.text, "text", eventLabel),
|
|
189
|
+
role,
|
|
190
|
+
toolName: optionalString(raw.toolName, "toolName", eventLabel),
|
|
191
|
+
friendId: optionalString(raw.friendId, "friendId", eventLabel),
|
|
192
|
+
sessionKey: optionalString(raw.sessionKey, "sessionKey", eventLabel),
|
|
193
|
+
session: parseTurnDetection(raw.session, eventLabel),
|
|
194
|
+
ignored: ignored || undefined,
|
|
195
|
+
ignoreReason: optionalString(raw.ignoreReason, "ignoreReason", eventLabel),
|
|
196
|
+
};
|
|
197
|
+
if (parsed.ignored && !parsed.ignoreReason)
|
|
198
|
+
throw new Error(label(eventLabel, "ignored events require ignoreReason"));
|
|
199
|
+
return parsed;
|
|
200
|
+
}
|
|
201
|
+
function parseVoiceRealtimeEvalTraceArtifact(value, sourceLabel = "voice trace artifact") {
|
|
202
|
+
const raw = objectRecord(value);
|
|
203
|
+
if (!raw || raw.schemaVersion !== 1)
|
|
204
|
+
throw new Error(label(sourceLabel, "schemaVersion must be 1"));
|
|
205
|
+
const expectedOutcome = raw.expectedOutcome;
|
|
206
|
+
if (typeof expectedOutcome !== "string" || !expectedOutcomes.has(expectedOutcome)) {
|
|
207
|
+
throw new Error(label(sourceLabel, "expectedOutcome must be pass, fail, or expected-fail"));
|
|
208
|
+
}
|
|
209
|
+
const expectationProfile = raw.expectationProfile;
|
|
210
|
+
if (expectationProfile !== undefined && (typeof expectationProfile !== "string" || !expectationProfiles.has(expectationProfile))) {
|
|
211
|
+
throw new Error(label(sourceLabel, "expectationProfile is unsupported"));
|
|
212
|
+
}
|
|
213
|
+
const hasInlineExpectation = raw.expectation !== undefined;
|
|
214
|
+
if (hasInlineExpectation === (expectationProfile !== undefined)) {
|
|
215
|
+
throw new Error(label(sourceLabel, "provide exactly one of expectation or expectationProfile"));
|
|
216
|
+
}
|
|
217
|
+
if (!Array.isArray(raw.events) || raw.events.length === 0) {
|
|
218
|
+
throw new Error(label(sourceLabel, "events must contain at least one event"));
|
|
219
|
+
}
|
|
220
|
+
const redacted = raw.redacted;
|
|
221
|
+
if (redacted !== undefined && typeof redacted !== "boolean")
|
|
222
|
+
throw new Error(label(sourceLabel, "redacted must be boolean"));
|
|
223
|
+
return {
|
|
224
|
+
schemaVersion: 1,
|
|
225
|
+
traceId: requiredString(raw.traceId, "traceId", sourceLabel),
|
|
226
|
+
scenarioId: requiredString(raw.scenarioId, "scenarioId", sourceLabel),
|
|
227
|
+
expectedOutcome: expectedOutcome,
|
|
228
|
+
expectation: hasInlineExpectation ? parseExpectation(raw.expectation, sourceLabel) : undefined,
|
|
229
|
+
expectationProfile: expectationProfile,
|
|
230
|
+
redacted: redacted || undefined,
|
|
231
|
+
events: raw.events.map((event, index) => parseTraceEvent(event, index, sourceLabel)),
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
function loadVoiceRealtimeEvalTraceArtifact(filePath) {
|
|
235
|
+
let raw;
|
|
236
|
+
try {
|
|
237
|
+
raw = fs.readFileSync(filePath, "utf8");
|
|
238
|
+
}
|
|
239
|
+
catch {
|
|
240
|
+
throw new Error(`${filePath}: failed to read trace artifact`);
|
|
241
|
+
}
|
|
242
|
+
try {
|
|
243
|
+
return parseVoiceRealtimeEvalTraceArtifact(JSON.parse(raw), filePath);
|
|
244
|
+
}
|
|
245
|
+
catch (error) {
|
|
246
|
+
if (error instanceof SyntaxError)
|
|
247
|
+
throw new Error(`${filePath}: invalid JSON: ${error.message}`);
|
|
248
|
+
throw error;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
function resolveVoiceRealtimeEvalTraceExpectation(artifact) {
|
|
252
|
+
if (artifact.expectationProfile === "voice-phone-default")
|
|
253
|
+
return (0, realtime_eval_1.buildVoiceRealtimeEvalDefaultExpectation)();
|
|
254
|
+
if (artifact.expectation)
|
|
255
|
+
return artifact.expectation;
|
|
256
|
+
throw new Error(`${artifact.traceId}: trace artifact has no expectation contract`);
|
|
257
|
+
}
|
|
258
|
+
function eventTypeFor(event) {
|
|
259
|
+
if (normalizedEvents.has(event.event))
|
|
260
|
+
return event.event;
|
|
261
|
+
const type = rawEventMap.get(event.event);
|
|
262
|
+
if (!type)
|
|
263
|
+
throw new Error(`unknown trace event ${event.event}`);
|
|
264
|
+
return type;
|
|
265
|
+
}
|
|
266
|
+
function toTimelineEvent(event, redacted) {
|
|
267
|
+
const type = eventTypeFor(event);
|
|
268
|
+
return {
|
|
269
|
+
type,
|
|
270
|
+
atMs: event.atMs,
|
|
271
|
+
source: event.source,
|
|
272
|
+
correlationId: event.correlationId,
|
|
273
|
+
text: redacted ? undefined : event.text,
|
|
274
|
+
role: event.role,
|
|
275
|
+
toolName: event.toolName,
|
|
276
|
+
friendId: event.friendId,
|
|
277
|
+
sessionKey: event.sessionKey,
|
|
278
|
+
session: event.session,
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
function sortedTimeline(events) {
|
|
282
|
+
return events
|
|
283
|
+
.map((event, index) => ({ event, index }))
|
|
284
|
+
.sort((left, right) => left.event.atMs - right.event.atMs || left.index - right.index)
|
|
285
|
+
.map(({ event }) => event);
|
|
286
|
+
}
|
|
287
|
+
function findFirst(events, type) {
|
|
288
|
+
return events.find((event) => event.type === type);
|
|
289
|
+
}
|
|
290
|
+
function validateCausalTimeline(artifact, timeline) {
|
|
291
|
+
for (let index = 0; index < artifact.events.length; index += 1) {
|
|
292
|
+
const event = artifact.events[index];
|
|
293
|
+
if (event.ignored)
|
|
294
|
+
continue;
|
|
295
|
+
if (event.atMs < 0) {
|
|
296
|
+
throw new Error(`${artifact.traceId} event[${index}] atMs must be a nonnegative finite number`);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
const connected = findFirst(timeline, "call.connected");
|
|
300
|
+
const audio = findFirst(timeline, "assistant.audio.started");
|
|
301
|
+
if (connected && audio && audio.atMs < connected.atMs) {
|
|
302
|
+
throw new Error(`${artifact.traceId}: assistant audio started before call.connected`);
|
|
303
|
+
}
|
|
304
|
+
for (const response of timeline.filter((event) => event.type === "response.requested" && event.correlationId)) {
|
|
305
|
+
const transcript = timeline.find((event) => event.type === "user.transcript.done" && event.correlationId === response.correlationId);
|
|
306
|
+
if (transcript && response.atMs < transcript.atMs) {
|
|
307
|
+
throw new Error(`${artifact.traceId}: response.requested for ${response.correlationId} occurred before user.transcript.done`);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
for (const completed of timeline.filter((event) => event.type === "tool.call.completed" && event.correlationId)) {
|
|
311
|
+
const started = timeline.find((event) => event.type === "tool.call.started" && event.correlationId === completed.correlationId);
|
|
312
|
+
if (started && completed.atMs < started.atMs) {
|
|
313
|
+
throw new Error(`${artifact.traceId}: tool.call.completed for ${completed.correlationId} occurred before tool.call.started`);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
const expectation = resolveVoiceRealtimeEvalTraceExpectation(artifact);
|
|
317
|
+
if (expectation.requireHangup) {
|
|
318
|
+
const ended = findFirst(timeline, "call.ended");
|
|
319
|
+
const hangup = findFirst(timeline, "call.hangup.requested");
|
|
320
|
+
if (ended && hangup && ended.atMs < hangup.atMs) {
|
|
321
|
+
throw new Error(`${artifact.traceId}: call.ended occurred before call.hangup.requested`);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
function traceArtifactToVoiceRealtimeEvalTimeline(artifact) {
|
|
326
|
+
const timeline = sortedTimeline(artifact.events
|
|
327
|
+
.filter((event) => !event.ignored)
|
|
328
|
+
.map((event) => toTimelineEvent(event, Boolean(artifact.redacted))));
|
|
329
|
+
validateCausalTimeline(artifact, timeline);
|
|
330
|
+
return timeline;
|
|
331
|
+
}
|
|
332
|
+
function expectedOutcomeMatched(expectedOutcome, passed) {
|
|
333
|
+
if (expectedOutcome === "pass")
|
|
334
|
+
return passed;
|
|
335
|
+
return !passed;
|
|
336
|
+
}
|
|
337
|
+
function gradeVoiceRealtimeEvalTrace(artifact) {
|
|
338
|
+
(0, runtime_1.emitNervesEvent)({
|
|
339
|
+
component: "senses",
|
|
340
|
+
event: "senses.voice_realtime_trace_replay_start",
|
|
341
|
+
message: "starting Voice realtime trace replay",
|
|
342
|
+
meta: { scenarioId: artifact.scenarioId, events: artifact.events.length },
|
|
343
|
+
});
|
|
344
|
+
const timeline = traceArtifactToVoiceRealtimeEvalTimeline(artifact);
|
|
345
|
+
const report = (0, realtime_eval_1.gradeVoiceRealtimeEvalTimeline)(artifact.scenarioId, timeline, resolveVoiceRealtimeEvalTraceExpectation(artifact));
|
|
346
|
+
const result = {
|
|
347
|
+
artifact,
|
|
348
|
+
traceId: artifact.traceId,
|
|
349
|
+
scenarioId: artifact.scenarioId,
|
|
350
|
+
expectedOutcome: artifact.expectedOutcome,
|
|
351
|
+
outcomeMatched: expectedOutcomeMatched(artifact.expectedOutcome, report.passed),
|
|
352
|
+
report,
|
|
353
|
+
timeline,
|
|
354
|
+
ignoredEvents: artifact.events.filter((event) => event.ignored),
|
|
355
|
+
};
|
|
356
|
+
(0, runtime_1.emitNervesEvent)({
|
|
357
|
+
component: "senses",
|
|
358
|
+
event: "senses.voice_realtime_trace_replay_end",
|
|
359
|
+
message: "finished Voice realtime trace replay",
|
|
360
|
+
meta: { scenarioId: artifact.scenarioId, passed: report.passed, findings: report.findings.length },
|
|
361
|
+
});
|
|
362
|
+
return result;
|
|
363
|
+
}
|
|
364
|
+
function textForSummary(result, event) {
|
|
365
|
+
if (result.artifact.redacted && (event.type === "assistant.transcript.done" ||
|
|
366
|
+
event.type === "user.transcript.done" ||
|
|
367
|
+
event.type === "voice.context.injected")) {
|
|
368
|
+
return " [redacted]";
|
|
369
|
+
}
|
|
370
|
+
if (event.text === undefined)
|
|
371
|
+
return "";
|
|
372
|
+
return ` "${event.text}"`;
|
|
373
|
+
}
|
|
374
|
+
function sourceForSummary(source) {
|
|
375
|
+
if (!source)
|
|
376
|
+
return "";
|
|
377
|
+
return source.id ? ` ${source.transport}/${source.id}` : ` ${source.transport}`;
|
|
378
|
+
}
|
|
379
|
+
function formatVoiceRealtimeEvalTraceReport(result) {
|
|
380
|
+
const lines = [
|
|
381
|
+
`trace ${result.traceId} scenario ${result.scenarioId}`,
|
|
382
|
+
`expected: ${result.expectedOutcome}; report passed: ${result.report.passed}; outcome matched: ${result.outcomeMatched}`,
|
|
383
|
+
`transports: ${result.report.transportSources.join(", ") || "none"}`,
|
|
384
|
+
`metrics: ${JSON.stringify(result.report.metrics)}`,
|
|
385
|
+
];
|
|
386
|
+
if (result.report.findings.length > 0) {
|
|
387
|
+
lines.push("findings:");
|
|
388
|
+
for (const finding of result.report.findings) {
|
|
389
|
+
const message = result.artifact.redacted ? "[redacted]" : finding.message;
|
|
390
|
+
lines.push(`- ${finding.code}${finding.atMs === undefined ? "" : ` at ${finding.atMs}ms`}: ${message}`);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
lines.push("events:");
|
|
394
|
+
for (const event of result.timeline) {
|
|
395
|
+
lines.push(`- ${event.atMs}ms ${event.type}${sourceForSummary(event.source)}${textForSummary(result, event)}`);
|
|
396
|
+
}
|
|
397
|
+
lines.push(`ignored provider events: ${result.ignoredEvents.length}`);
|
|
398
|
+
for (const event of result.ignoredEvents) {
|
|
399
|
+
lines.push(`- ${event.atMs}ms ${event.event}${sourceForSummary(event.source)}: ${event.ignoreReason}`);
|
|
400
|
+
}
|
|
401
|
+
return lines.join("\n");
|
|
402
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runVoiceRealtimeEvalCommand = runVoiceRealtimeEvalCommand;
|
|
4
|
+
const runtime_1 = require("../nerves/runtime");
|
|
5
|
+
const realtime_eval_1 = require("./voice/realtime-eval");
|
|
6
|
+
const realtime_trace_1 = require("./voice/realtime-trace");
|
|
7
|
+
function parseTraceArgs(argv) {
|
|
8
|
+
const tracePaths = [];
|
|
9
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
10
|
+
const arg = argv[index];
|
|
11
|
+
if (arg !== "--trace")
|
|
12
|
+
throw new Error(`unknown argument: ${arg}`);
|
|
13
|
+
const tracePath = argv[index + 1];
|
|
14
|
+
if (!tracePath)
|
|
15
|
+
throw new Error("--trace requires a file path");
|
|
16
|
+
tracePaths.push(tracePath);
|
|
17
|
+
index += 1;
|
|
18
|
+
}
|
|
19
|
+
return tracePaths;
|
|
20
|
+
}
|
|
21
|
+
function builtInPayload() {
|
|
22
|
+
const reports = (0, realtime_eval_1.runBuiltInVoiceRealtimeEvalSuite)();
|
|
23
|
+
const summary = (0, realtime_eval_1.summarizeVoiceRealtimeEvalSuite)(reports);
|
|
24
|
+
return {
|
|
25
|
+
summary,
|
|
26
|
+
expectedKnownBadFailed: summary.failed === 1 && summary.failedScenarioIds[0] === "voice-known-bad-latency",
|
|
27
|
+
happyPathPassed: reports.some((report) => report.scenarioId === "voice-happy-path" && report.passed),
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
function traceResultPayload(result) {
|
|
31
|
+
return {
|
|
32
|
+
traceId: result.traceId,
|
|
33
|
+
scenarioId: result.scenarioId,
|
|
34
|
+
expectedOutcome: result.expectedOutcome,
|
|
35
|
+
outcomeMatched: result.outcomeMatched,
|
|
36
|
+
report: result.report,
|
|
37
|
+
ignoredEvents: result.ignoredEvents.map((event) => ({
|
|
38
|
+
atMs: event.atMs,
|
|
39
|
+
event: event.event,
|
|
40
|
+
source: event.source,
|
|
41
|
+
ignoreReason: event.ignoreReason,
|
|
42
|
+
})),
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
function summarizeTraceResults(traces) {
|
|
46
|
+
const mismatchedScenarioIds = traces.filter((trace) => !trace.outcomeMatched).map((trace) => trace.scenarioId);
|
|
47
|
+
return {
|
|
48
|
+
matched: traces.length - mismatchedScenarioIds.length,
|
|
49
|
+
mismatched: mismatchedScenarioIds.length,
|
|
50
|
+
total: traces.length,
|
|
51
|
+
mismatchedScenarioIds,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
function errorResult(error) {
|
|
55
|
+
return {
|
|
56
|
+
exitCode: 1,
|
|
57
|
+
payload: { error: String(error).replace(/^Error: /, "") },
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
function runVoiceRealtimeEvalCommand(argv) {
|
|
61
|
+
(0, runtime_1.emitNervesEvent)({
|
|
62
|
+
component: "senses",
|
|
63
|
+
event: "senses.voice_realtime_eval_command_start",
|
|
64
|
+
message: "starting Voice realtime eval command runner",
|
|
65
|
+
meta: { scenarioId: "voice-eval-command", events: argv.length },
|
|
66
|
+
});
|
|
67
|
+
try {
|
|
68
|
+
const tracePaths = parseTraceArgs(argv);
|
|
69
|
+
const payload = builtInPayload();
|
|
70
|
+
if (tracePaths.length > 0) {
|
|
71
|
+
const traces = tracePaths
|
|
72
|
+
.map((tracePath) => (0, realtime_trace_1.gradeVoiceRealtimeEvalTrace)((0, realtime_trace_1.loadVoiceRealtimeEvalTraceArtifact)(tracePath)))
|
|
73
|
+
.map(traceResultPayload);
|
|
74
|
+
payload.traces = traces;
|
|
75
|
+
payload.traceSummary = summarizeTraceResults(traces);
|
|
76
|
+
}
|
|
77
|
+
const builtInsPassed = Boolean(payload.expectedKnownBadFailed && payload.happyPathPassed);
|
|
78
|
+
const tracesPassed = payload.traceSummary ? payload.traceSummary.mismatched === 0 : true;
|
|
79
|
+
const result = { exitCode: builtInsPassed && tracesPassed ? 0 : 1, payload };
|
|
80
|
+
(0, runtime_1.emitNervesEvent)({
|
|
81
|
+
component: "senses",
|
|
82
|
+
event: "senses.voice_realtime_eval_command_end",
|
|
83
|
+
message: "finished Voice realtime eval command runner",
|
|
84
|
+
meta: { scenarioId: "voice-eval-command", passed: result.exitCode === 0, findings: payload.traceSummary?.mismatched ?? 0 },
|
|
85
|
+
});
|
|
86
|
+
return result;
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
const result = errorResult(error);
|
|
90
|
+
(0, runtime_1.emitNervesEvent)({
|
|
91
|
+
component: "senses",
|
|
92
|
+
event: "senses.voice_realtime_eval_command_error",
|
|
93
|
+
message: "Voice realtime eval command runner failed",
|
|
94
|
+
meta: { scenarioId: "voice-eval-command", error: result.payload.error },
|
|
95
|
+
level: "error",
|
|
96
|
+
});
|
|
97
|
+
return result;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
@@ -1,25 +1,21 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const realtime_eval_1 = require("./voice/realtime-eval");
|
|
4
3
|
const runtime_1 = require("../nerves/runtime");
|
|
4
|
+
const voice_realtime_eval_command_1 = require("./voice-realtime-eval-command");
|
|
5
5
|
(0, runtime_1.emitNervesEvent)({
|
|
6
6
|
component: "senses",
|
|
7
7
|
event: "senses.voice_realtime_eval_start",
|
|
8
8
|
message: "starting Voice realtime eval command",
|
|
9
9
|
meta: { scenarioId: "built-in-suite", events: 0 },
|
|
10
10
|
});
|
|
11
|
-
const
|
|
12
|
-
const summary = (0, realtime_eval_1.summarizeVoiceRealtimeEvalSuite)(reports);
|
|
13
|
-
const expectedKnownBadFailed = summary.failed === 1 && summary.failedScenarioIds[0] === "voice-known-bad-latency";
|
|
14
|
-
const happyPathPassed = reports.some((report) => report.scenarioId === "voice-happy-path" && report.passed);
|
|
11
|
+
const result = (0, voice_realtime_eval_command_1.runVoiceRealtimeEvalCommand)(process.argv.slice(2));
|
|
15
12
|
(0, runtime_1.emitNervesEvent)({
|
|
16
13
|
component: "senses",
|
|
17
14
|
event: "senses.voice_realtime_eval_end",
|
|
18
15
|
message: "finished Voice realtime eval command",
|
|
19
|
-
meta: { scenarioId: "built-in-suite", passed:
|
|
16
|
+
meta: { scenarioId: "built-in-suite", passed: result.exitCode === 0, findings: result.payload.traceSummary?.mismatched ?? result.payload.summary?.failed ?? 0 },
|
|
20
17
|
});
|
|
21
18
|
// eslint-disable-next-line no-console -- terminal UX: eval command summary
|
|
22
|
-
console.log(JSON.stringify(
|
|
23
|
-
if (
|
|
24
|
-
process.exit(
|
|
25
|
-
}
|
|
19
|
+
console.log(JSON.stringify(result.payload, null, 2));
|
|
20
|
+
if (result.exitCode !== 0)
|
|
21
|
+
process.exit(result.exitCode);
|