@ouro.bot/cli 0.1.0-alpha.585 → 0.1.0-alpha.587
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/changelog.json +16 -0
- package/dist/mailroom/search-cache.js +12 -0
- package/dist/repertoire/tools-mail.js +62 -8
- package/dist/senses/voice/floor-control.js +398 -0
- package/dist/senses/voice/realtime-eval.js +103 -0
- package/dist/senses/voice/realtime-trace.js +130 -1
- package/package.json +1 -1
package/changelog.json
CHANGED
|
@@ -1,6 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.587",
|
|
6
|
+
"changes": [
|
|
7
|
+
"Voice now has a pure duplex floor-control model with typed transitions, state summaries, and decision reasons for caller floor ownership, assistant speech, tool-running/tool-result states, stale suppression, and hangup terminal behavior.",
|
|
8
|
+
"Realtime Voice evals and trace replay now understand floor-state, speech-policy, and tool-result events, failing deterministic traces when assistant speech is allowed while the caller owns the floor, stale tool results are spoken, or responses are requested after hangup.",
|
|
9
|
+
"Voice trace reports now include floor diagnostics such as phase, floor owner, pending speech, pending/stale tool ids, interruption turn, and decision reason, with new golden fixtures for interruption, tool-duplex, stale suppression, progress acknowledgement, and hangup-with-pending-tool scenarios."
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"version": "0.1.0-alpha.586",
|
|
14
|
+
"changes": [
|
|
15
|
+
"`mail_status`, `mail_recent`, `mail_search`, and `mail_index_refresh` now flag a 'mail substrate divergence' when the encrypted mailroom store reports zero visible messages but the on-disk search cache still holds documents from prior imports — the post-rotation / hosted→local-fallback / wiped-store state that previously rendered as a silent 'no mail' answer indistinguishable from a clean onboarding.",
|
|
16
|
+
"Mail absence answers from a divergent runtime now point at vault inspection (`mailroom.mode`, `mailroom.azureAccountUrl`, `mailroom.storePath`) and re-import recovery, so agents stop treating a broken substrate as evidence that the human inbox is empty.",
|
|
17
|
+
"The substrate-divergence snapshot counts cache `.json` entries via `readdir` and ignores subdirectories and non-json files, so the diagnostic stays cheap on bundles holding tens of thousands of cached documents."
|
|
18
|
+
]
|
|
19
|
+
},
|
|
4
20
|
{
|
|
5
21
|
"version": "0.1.0-alpha.585",
|
|
6
22
|
"changes": [
|
|
@@ -38,6 +38,7 @@ exports.buildMailSearchCacheDocument = buildMailSearchCacheDocument;
|
|
|
38
38
|
exports.upsertMailSearchCacheDocument = upsertMailSearchCacheDocument;
|
|
39
39
|
exports.syncMailSearchCacheMetadata = syncMailSearchCacheMetadata;
|
|
40
40
|
exports.searchMailSearchCache = searchMailSearchCache;
|
|
41
|
+
exports.snapshotMailSearchCache = snapshotMailSearchCache;
|
|
41
42
|
exports.readMailSearchCoverageRecord = readMailSearchCoverageRecord;
|
|
42
43
|
exports.writeMailSearchCoverageRecord = writeMailSearchCoverageRecord;
|
|
43
44
|
exports.resetMailSearchCacheForTests = resetMailSearchCacheForTests;
|
|
@@ -208,6 +209,17 @@ function searchMailSearchCache(filters, options) {
|
|
|
208
209
|
}
|
|
209
210
|
return typeof filters.limit === "number" ? ordered.slice(0, filters.limit) : ordered;
|
|
210
211
|
}
|
|
212
|
+
function snapshotMailSearchCache(agentId, options) {
|
|
213
|
+
const dir = cacheDir(agentId, options);
|
|
214
|
+
if (!fs.existsSync(dir))
|
|
215
|
+
return { totalDocuments: 0 };
|
|
216
|
+
let total = 0;
|
|
217
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
218
|
+
if (entry.isFile() && entry.name.endsWith(".json"))
|
|
219
|
+
total += 1;
|
|
220
|
+
}
|
|
221
|
+
return { totalDocuments: total };
|
|
222
|
+
}
|
|
211
223
|
function readMailSearchCoverageRecord(key, options) {
|
|
212
224
|
const document = readJsonDocument(coveragePath(key, options));
|
|
213
225
|
if (!document || document.schemaVersion !== 1 || document.agentId !== key.agentId)
|
|
@@ -519,13 +519,42 @@ async function renderSourceGrantStatus(config, agentId) {
|
|
|
519
519
|
];
|
|
520
520
|
}
|
|
521
521
|
}
|
|
522
|
+
/**
|
|
523
|
+
* When the encrypted mail store is empty but the on-disk search cache holds
|
|
524
|
+
* decrypted documents from prior imports, the agent has lost access to mail it
|
|
525
|
+
* previously had — typically because mail keys were rotated, the hosted blob
|
|
526
|
+
* pointer was dropped from the vault, or the encrypted store was wiped. The
|
|
527
|
+
* tools used to silently report "0 messages" in that state, which is the same
|
|
528
|
+
* answer they give for a fresh onboarding — so the agent could not tell a
|
|
529
|
+
* substrate failure from a clean slate. Surface the divergence loudly so the
|
|
530
|
+
* agent treats absence answers as suspect until the substrate is repaired.
|
|
531
|
+
*/
|
|
532
|
+
function describeMailSubstrateDivergence(input) {
|
|
533
|
+
if (input.visibleMessageCount > 0)
|
|
534
|
+
return null;
|
|
535
|
+
const snapshot = input.snapshot ?? (0, search_cache_1.snapshotMailSearchCache)(input.agentId);
|
|
536
|
+
if (snapshot.totalDocuments === 0)
|
|
537
|
+
return null;
|
|
538
|
+
return [
|
|
539
|
+
`mail substrate divergence: encrypted ${input.storeKind} store at ${input.storeLabel} has 0 visible messages, but the on-disk search cache for ${input.agentId} holds ${snapshot.totalDocuments} document(s).`,
|
|
540
|
+
"interpretation: this is not a fresh onboarding — prior imports populated the cache, then the encrypted store became unreachable (common causes: mail key rotation, hosted-store pointer dropped from vault during repair, encrypted store wiped). Mail absence answers from this runtime are not authoritative until the substrate is repaired.",
|
|
541
|
+
`agent next move: inspect runtime credentials (mailroom.mode, mailroom.azureAccountUrl, mailroom.storePath) — if the agent was previously hosted, the vault is missing the hosted pointer; coordinate with the human to restore it. If local mode is correct, re-import via 'ouro mail import-mbox --agent ${input.agentId} --owner-email <human-email> --source <source> --discover' so the encrypted store catches up to the cache.`,
|
|
542
|
+
].join("\n");
|
|
543
|
+
}
|
|
522
544
|
async function renderEmptyMailResult(input) {
|
|
523
545
|
const anyVisible = await input.store.listMessages({ agentId: input.agentId, limit: 1 });
|
|
524
546
|
if (anyVisible.length === 0) {
|
|
525
547
|
const sourceGrantStatus = await renderSourceGrantStatus(input.config, input.agentId);
|
|
548
|
+
const divergence = describeMailSubstrateDivergence({
|
|
549
|
+
agentId: input.agentId,
|
|
550
|
+
storeKind: input.storeKind,
|
|
551
|
+
storeLabel: input.storeLabel,
|
|
552
|
+
visibleMessageCount: 0,
|
|
553
|
+
});
|
|
526
554
|
return [
|
|
527
555
|
"No visible mail yet.",
|
|
528
556
|
`mail onboarding status: Mailroom is provisioned for ${input.config.mailboxAddress}, but this agent's encrypted store has 0 messages.`,
|
|
557
|
+
...(divergence ? [divergence] : []),
|
|
529
558
|
...sourceGrantStatus,
|
|
530
559
|
"interpretation: this is not evidence that the human's HEY inbox is empty; Agent Mail has not yet received or imported mail visible to this agent.",
|
|
531
560
|
`agent next move: guide setup from docs/agent-mail-setup.md. If HEY mail is needed, ensure the delegated hey alias exists, first try ouro mail import-mbox --agent ${input.agentId} --owner-email <human-email> --source hey --discover so Ouro can find a browser-downloaded export in .playwright-mcp or Downloads. Only ask the human for a file path if discovery cannot find a unique MBOX, then run ouro mail import-mbox --agent ${input.agentId} --owner-email <human-email> --source hey --file <mbox-path>. Verify with mail_recent/mail_search/Ouro Mailbox.`,
|
|
@@ -932,8 +961,8 @@ async function searchSuccessfulImportArchives(input) {
|
|
|
932
961
|
}
|
|
933
962
|
return matches.sort((left, right) => right.receivedAt.localeCompare(left.receivedAt));
|
|
934
963
|
}
|
|
935
|
-
async function renderMailStatus(
|
|
936
|
-
const sourceGrantStatus = await renderSourceGrantStatus(config, agentId);
|
|
964
|
+
async function renderMailStatus(input) {
|
|
965
|
+
const sourceGrantStatus = await renderSourceGrantStatus(input.config, input.agentId);
|
|
937
966
|
const delegatedLines = sourceGrantStatus
|
|
938
967
|
.flatMap((line) => line.startsWith("delegated source aliases: ")
|
|
939
968
|
? line
|
|
@@ -949,16 +978,24 @@ async function renderMailStatus(agentId, config, storeLabel) {
|
|
|
949
978
|
: `- delegated: ${grant}`;
|
|
950
979
|
})
|
|
951
980
|
: [`- ${line}`]);
|
|
981
|
+
const visible = await input.store.listMessages({ agentId: input.agentId, limit: 1 });
|
|
982
|
+
const divergence = describeMailSubstrateDivergence({
|
|
983
|
+
agentId: input.agentId,
|
|
984
|
+
storeKind: input.storeKind,
|
|
985
|
+
storeLabel: input.storeLabel,
|
|
986
|
+
visibleMessageCount: visible.length,
|
|
987
|
+
});
|
|
952
988
|
return [
|
|
953
|
-
`mailbox: ${config.mailboxAddress}`,
|
|
954
|
-
`store: ${storeLabel}`,
|
|
989
|
+
`mailbox: ${input.config.mailboxAddress}`,
|
|
990
|
+
`store: ${input.storeLabel}`,
|
|
991
|
+
...(divergence ? [divergence] : []),
|
|
955
992
|
"lane map:",
|
|
956
|
-
`- native: ${config.mailboxAddress}`,
|
|
993
|
+
`- native: ${input.config.mailboxAddress}`,
|
|
957
994
|
...delegatedLines,
|
|
958
995
|
"recent archives:",
|
|
959
|
-
...renderRecentArchiveStatus(agentId),
|
|
996
|
+
...renderRecentArchiveStatus(input.agentId),
|
|
960
997
|
"recent imports:",
|
|
961
|
-
...renderRecentImportOperations(agentId),
|
|
998
|
+
...renderRecentImportOperations(input.agentId),
|
|
962
999
|
].join("\n");
|
|
963
1000
|
}
|
|
964
1001
|
exports.mailToolDefinitions = [
|
|
@@ -985,7 +1022,13 @@ exports.mailToolDefinitions = [
|
|
|
985
1022
|
tool: "mail_status",
|
|
986
1023
|
reason: "mail operating model overview",
|
|
987
1024
|
});
|
|
988
|
-
return renderMailStatus(
|
|
1025
|
+
return renderMailStatus({
|
|
1026
|
+
agentId: resolved.agentName,
|
|
1027
|
+
config: resolved.config,
|
|
1028
|
+
store: resolved.store,
|
|
1029
|
+
storeKind: resolved.storeKind,
|
|
1030
|
+
storeLabel: resolved.storeLabel,
|
|
1031
|
+
});
|
|
989
1032
|
},
|
|
990
1033
|
summaryKeys: [],
|
|
991
1034
|
},
|
|
@@ -1039,6 +1082,8 @@ exports.mailToolDefinitions = [
|
|
|
1039
1082
|
agentId: resolved.agentName,
|
|
1040
1083
|
config: resolved.config,
|
|
1041
1084
|
store: resolved.store,
|
|
1085
|
+
storeKind: resolved.storeKind,
|
|
1086
|
+
storeLabel: resolved.storeLabel,
|
|
1042
1087
|
...(scope ? { scope } : {}),
|
|
1043
1088
|
...(args.source ? { source: args.source } : {}),
|
|
1044
1089
|
});
|
|
@@ -1394,6 +1439,8 @@ exports.mailToolDefinitions = [
|
|
|
1394
1439
|
agentId: resolved.agentName,
|
|
1395
1440
|
config: resolved.config,
|
|
1396
1441
|
store: resolved.store,
|
|
1442
|
+
storeKind: resolved.storeKind,
|
|
1443
|
+
storeLabel: resolved.storeLabel,
|
|
1397
1444
|
...(scope ? { scope } : {}),
|
|
1398
1445
|
...(args.source ? { source: args.source } : {}),
|
|
1399
1446
|
}), {
|
|
@@ -1485,6 +1532,12 @@ exports.mailToolDefinitions = [
|
|
|
1485
1532
|
reason: args.reason || "refresh mail search index",
|
|
1486
1533
|
});
|
|
1487
1534
|
const { coverage } = refreshed;
|
|
1535
|
+
const divergence = describeMailSubstrateDivergence({
|
|
1536
|
+
agentId: resolved.agentName,
|
|
1537
|
+
storeKind: resolved.storeKind,
|
|
1538
|
+
storeLabel: resolved.storeLabel,
|
|
1539
|
+
visibleMessageCount: coverage.visibleMessageCount,
|
|
1540
|
+
});
|
|
1488
1541
|
return [
|
|
1489
1542
|
"mail search index refreshed.",
|
|
1490
1543
|
`scope: ${scope ?? "all"}${args.source ? `; source: ${args.source}` : ""}${placement ? `; placement: ${placement}` : ""}`,
|
|
@@ -1496,6 +1549,7 @@ exports.mailToolDefinitions = [
|
|
|
1496
1549
|
`indexed at: ${coverage.indexedAt}`,
|
|
1497
1550
|
...(coverage.oldestReceivedAt ? [`oldest: ${coverage.oldestReceivedAt}`] : []),
|
|
1498
1551
|
...(coverage.newestReceivedAt ? [`newest: ${coverage.newestReceivedAt}`] : []),
|
|
1552
|
+
...(divergence ? [divergence] : []),
|
|
1499
1553
|
].join("\n");
|
|
1500
1554
|
}
|
|
1501
1555
|
catch (error) {
|
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createInitialVoiceFloorState = createInitialVoiceFloorState;
|
|
4
|
+
exports.canRequestVoiceResponse = canRequestVoiceResponse;
|
|
5
|
+
exports.canSpeakToolHolding = canSpeakToolHolding;
|
|
6
|
+
exports.canSpeakToolResult = canSpeakToolResult;
|
|
7
|
+
exports.applyVoiceFloorEvent = applyVoiceFloorEvent;
|
|
8
|
+
exports.replayVoiceFloorEvents = replayVoiceFloorEvents;
|
|
9
|
+
exports.summarizeVoiceFloorState = summarizeVoiceFloorState;
|
|
10
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
11
|
+
const MAX_TOOL_HOLDING_WORDS = 6;
|
|
12
|
+
function createInitialVoiceFloorState() {
|
|
13
|
+
return {
|
|
14
|
+
phase: "idle",
|
|
15
|
+
floorOwner: "none",
|
|
16
|
+
terminal: false,
|
|
17
|
+
hangupRequested: false,
|
|
18
|
+
pendingToolCallIds: [],
|
|
19
|
+
staleToolCallIds: [],
|
|
20
|
+
spokenToolCallIds: [],
|
|
21
|
+
callerTurnIds: [],
|
|
22
|
+
toolCalls: {},
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
function decision(allowed, action, reason, details = {}) {
|
|
26
|
+
return { allowed, action, reason, ...details };
|
|
27
|
+
}
|
|
28
|
+
function copyState(state) {
|
|
29
|
+
return {
|
|
30
|
+
...state,
|
|
31
|
+
pendingToolCallIds: [...state.pendingToolCallIds],
|
|
32
|
+
staleToolCallIds: [...state.staleToolCallIds],
|
|
33
|
+
spokenToolCallIds: [...state.spokenToolCallIds],
|
|
34
|
+
callerTurnIds: [...state.callerTurnIds],
|
|
35
|
+
toolCalls: { ...state.toolCalls },
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
function withUnique(values, value) {
|
|
39
|
+
return values.includes(value) ? values : [...values, value];
|
|
40
|
+
}
|
|
41
|
+
function withoutValue(values, value) {
|
|
42
|
+
return values.filter((candidate) => candidate !== value);
|
|
43
|
+
}
|
|
44
|
+
function rememberCallerTurn(state, turnId) {
|
|
45
|
+
state.latestCallerTurnId = turnId;
|
|
46
|
+
state.callerTurnIds = withUnique(state.callerTurnIds, turnId);
|
|
47
|
+
}
|
|
48
|
+
function hasNewerCallerTurn(state, turnId) {
|
|
49
|
+
if (!turnId || !state.latestCallerTurnId || state.latestCallerTurnId === turnId)
|
|
50
|
+
return false;
|
|
51
|
+
const originalIndex = state.callerTurnIds.indexOf(turnId);
|
|
52
|
+
const latestIndex = state.callerTurnIds.indexOf(state.latestCallerTurnId);
|
|
53
|
+
return originalIndex >= 0 && latestIndex > originalIndex;
|
|
54
|
+
}
|
|
55
|
+
function toolState(state, toolCallId) {
|
|
56
|
+
return state.toolCalls[toolCallId];
|
|
57
|
+
}
|
|
58
|
+
function setToolState(state, nextTool) {
|
|
59
|
+
state.toolCalls = { ...state.toolCalls, [nextTool.toolCallId]: nextTool };
|
|
60
|
+
}
|
|
61
|
+
function phaseAfterAssistantSpeech(state) {
|
|
62
|
+
const pendingTools = state.pendingToolCallIds.map((toolCallId) => state.toolCalls[toolCallId]).filter(Boolean);
|
|
63
|
+
if (pendingTools.some((tool) => tool.status === "ready"))
|
|
64
|
+
return "tool-result-ready";
|
|
65
|
+
if (pendingTools.some((tool) => tool.status === "running"))
|
|
66
|
+
return "tool-running";
|
|
67
|
+
return "listening";
|
|
68
|
+
}
|
|
69
|
+
function suppressForHangup(state, event) {
|
|
70
|
+
return {
|
|
71
|
+
event,
|
|
72
|
+
state,
|
|
73
|
+
decision: decision(false, "suppress", "hangup_terminal", { atMs: event.atMs }),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
function canRequestVoiceResponse(state, input) {
|
|
77
|
+
if (state.terminal || state.hangupRequested || state.floorOwner === "terminal") {
|
|
78
|
+
return decision(false, "suppress", "hangup_terminal", { responseId: input.responseId });
|
|
79
|
+
}
|
|
80
|
+
if (state.floorOwner === "caller" || state.phase === "caller-speaking" || state.phase === "interrupted") {
|
|
81
|
+
return decision(false, "delay", "caller_has_floor", { responseId: input.responseId });
|
|
82
|
+
}
|
|
83
|
+
if (state.floorOwner === "assistant" || state.activeAssistantSpeechId) {
|
|
84
|
+
return decision(false, "delay", "assistant_has_floor", { responseId: input.responseId });
|
|
85
|
+
}
|
|
86
|
+
if (state.pendingSpeech) {
|
|
87
|
+
return decision(false, "delay", "response_pending", { responseId: input.responseId });
|
|
88
|
+
}
|
|
89
|
+
return decision(true, "allow", "ready_for_response", { responseId: input.responseId });
|
|
90
|
+
}
|
|
91
|
+
function canSpeakToolHolding(state, input) {
|
|
92
|
+
if (state.terminal || state.hangupRequested || state.floorOwner === "terminal") {
|
|
93
|
+
return decision(false, "suppress", "hangup_terminal", { toolCallId: input.toolCallId });
|
|
94
|
+
}
|
|
95
|
+
if (state.floorOwner === "caller" || state.phase === "caller-speaking" || state.phase === "interrupted") {
|
|
96
|
+
return decision(false, "delay", "caller_has_floor", { toolCallId: input.toolCallId });
|
|
97
|
+
}
|
|
98
|
+
const tool = toolState(state, input.toolCallId);
|
|
99
|
+
if (!tool || tool.status === "spoken") {
|
|
100
|
+
return decision(false, "suppress", "missing_tool_call", { toolCallId: input.toolCallId });
|
|
101
|
+
}
|
|
102
|
+
if (tool.status === "stale" || state.staleToolCallIds.includes(input.toolCallId)) {
|
|
103
|
+
return decision(false, "suppress", "stale_tool_result", { toolCallId: input.toolCallId });
|
|
104
|
+
}
|
|
105
|
+
const words = input.text?.trim().split(/\s+/).filter(Boolean).length ?? 0;
|
|
106
|
+
if (words > MAX_TOOL_HOLDING_WORDS) {
|
|
107
|
+
return decision(false, "suppress", "tool_holding_too_long", { toolCallId: input.toolCallId });
|
|
108
|
+
}
|
|
109
|
+
return decision(true, "allow", "tool_presence_allowed", { toolCallId: input.toolCallId });
|
|
110
|
+
}
|
|
111
|
+
function canSpeakToolResult(state, input) {
|
|
112
|
+
if (state.terminal || state.hangupRequested || state.floorOwner === "terminal") {
|
|
113
|
+
return decision(false, "suppress", "hangup_terminal", { toolCallId: input.toolCallId });
|
|
114
|
+
}
|
|
115
|
+
if (state.floorOwner === "caller" || state.phase === "caller-speaking" || state.phase === "interrupted") {
|
|
116
|
+
return decision(false, "delay", "caller_has_floor", { toolCallId: input.toolCallId });
|
|
117
|
+
}
|
|
118
|
+
const tool = toolState(state, input.toolCallId);
|
|
119
|
+
if (!tool || tool.status === "spoken") {
|
|
120
|
+
return decision(false, "suppress", "missing_tool_result", { toolCallId: input.toolCallId });
|
|
121
|
+
}
|
|
122
|
+
if (tool.status === "stale" || state.staleToolCallIds.includes(input.toolCallId)) {
|
|
123
|
+
return decision(false, "suppress", "stale_tool_result", { toolCallId: input.toolCallId });
|
|
124
|
+
}
|
|
125
|
+
if (tool.status !== "ready") {
|
|
126
|
+
return decision(false, "delay", "tool_still_running", { toolCallId: input.toolCallId });
|
|
127
|
+
}
|
|
128
|
+
return decision(true, "allow", "tool_result_ready", { toolCallId: input.toolCallId });
|
|
129
|
+
}
|
|
130
|
+
function applyConnected(state, event) {
|
|
131
|
+
const next = copyState(state);
|
|
132
|
+
next.phase = "listening";
|
|
133
|
+
next.floorOwner = "none";
|
|
134
|
+
next.terminal = false;
|
|
135
|
+
next.hangupRequested = false;
|
|
136
|
+
next.callId = event.callId;
|
|
137
|
+
return { event, state: next, decision: decision(true, "allow", "call_connected", { atMs: event.atMs }) };
|
|
138
|
+
}
|
|
139
|
+
function applyCallerSpeechStarted(state, event) {
|
|
140
|
+
const next = copyState(state);
|
|
141
|
+
rememberCallerTurn(next, event.turnId);
|
|
142
|
+
next.floorOwner = "caller";
|
|
143
|
+
if (state.activeAssistantSpeechId) {
|
|
144
|
+
next.phase = "interrupted";
|
|
145
|
+
next.interruption = {
|
|
146
|
+
turnId: event.turnId,
|
|
147
|
+
interruptedSpeechId: state.activeAssistantSpeechId,
|
|
148
|
+
atMs: event.atMs,
|
|
149
|
+
};
|
|
150
|
+
return {
|
|
151
|
+
event,
|
|
152
|
+
state: next,
|
|
153
|
+
decision: decision(false, "cancel", "caller_barge_in", {
|
|
154
|
+
atMs: event.atMs,
|
|
155
|
+
responseId: state.activeAssistantSpeechId,
|
|
156
|
+
interruptionTurnId: event.turnId,
|
|
157
|
+
}),
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
next.phase = "caller-speaking";
|
|
161
|
+
return { event, state: next, decision: decision(true, "allow", "caller_floor_started", { atMs: event.atMs }) };
|
|
162
|
+
}
|
|
163
|
+
function applyCallerSpeechEnded(state, event) {
|
|
164
|
+
const next = copyState(state);
|
|
165
|
+
rememberCallerTurn(next, event.turnId);
|
|
166
|
+
if (!next.activeAssistantSpeechId) {
|
|
167
|
+
next.floorOwner = "none";
|
|
168
|
+
next.phase = "listening";
|
|
169
|
+
}
|
|
170
|
+
return { event, state: next, decision: decision(true, "allow", "caller_floor_released", { atMs: event.atMs }) };
|
|
171
|
+
}
|
|
172
|
+
function applyCallerTranscriptFinal(state, event) {
|
|
173
|
+
const next = copyState(state);
|
|
174
|
+
rememberCallerTurn(next, event.turnId);
|
|
175
|
+
if (!next.activeAssistantSpeechId) {
|
|
176
|
+
next.floorOwner = "none";
|
|
177
|
+
next.phase = "thinking";
|
|
178
|
+
}
|
|
179
|
+
return { event, state: next, decision: decision(true, "allow", "caller_turn_ready", { atMs: event.atMs }) };
|
|
180
|
+
}
|
|
181
|
+
function applyAssistantResponseRequested(state, event) {
|
|
182
|
+
const requestDecision = canRequestVoiceResponse(state, { responseId: event.responseId, reason: event.reason });
|
|
183
|
+
if (!requestDecision.allowed)
|
|
184
|
+
return { event, state, decision: { ...requestDecision, atMs: event.atMs } };
|
|
185
|
+
const next = copyState(state);
|
|
186
|
+
next.phase = "thinking";
|
|
187
|
+
next.floorOwner = "none";
|
|
188
|
+
next.pendingSpeech = { responseId: event.responseId, reason: event.reason };
|
|
189
|
+
return { event, state: next, decision: { ...requestDecision, atMs: event.atMs } };
|
|
190
|
+
}
|
|
191
|
+
function applyAssistantSpeechStarted(state, event) {
|
|
192
|
+
const requestDecision = state.floorOwner === "caller"
|
|
193
|
+
? decision(false, "delay", "caller_has_floor", { atMs: event.atMs, responseId: event.responseId })
|
|
194
|
+
: decision(true, "allow", "assistant_speech_allowed", { atMs: event.atMs, responseId: event.responseId });
|
|
195
|
+
if (!requestDecision.allowed)
|
|
196
|
+
return { event, state, decision: requestDecision };
|
|
197
|
+
const next = copyState(state);
|
|
198
|
+
next.phase = "speaking";
|
|
199
|
+
next.floorOwner = "assistant";
|
|
200
|
+
next.activeAssistantSpeechId = event.responseId;
|
|
201
|
+
next.pendingSpeech = undefined;
|
|
202
|
+
return { event, state: next, decision: requestDecision };
|
|
203
|
+
}
|
|
204
|
+
function applyAssistantSpeechDone(state, event) {
|
|
205
|
+
const next = copyState(state);
|
|
206
|
+
if (next.activeAssistantSpeechId === event.responseId)
|
|
207
|
+
next.activeAssistantSpeechId = undefined;
|
|
208
|
+
if (next.floorOwner === "assistant")
|
|
209
|
+
next.floorOwner = "none";
|
|
210
|
+
next.phase = phaseAfterAssistantSpeech(next);
|
|
211
|
+
return { event, state: next, decision: decision(true, "allow", "assistant_speech_done", { atMs: event.atMs, responseId: event.responseId }) };
|
|
212
|
+
}
|
|
213
|
+
function applyAssistantSpeechCancelled(state, event) {
|
|
214
|
+
const next = copyState(state);
|
|
215
|
+
if (next.activeAssistantSpeechId === event.responseId)
|
|
216
|
+
next.activeAssistantSpeechId = undefined;
|
|
217
|
+
if (next.interruption) {
|
|
218
|
+
next.floorOwner = "caller";
|
|
219
|
+
next.phase = "caller-speaking";
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
next.floorOwner = "none";
|
|
223
|
+
next.phase = phaseAfterAssistantSpeech(next);
|
|
224
|
+
}
|
|
225
|
+
return {
|
|
226
|
+
event,
|
|
227
|
+
state: next,
|
|
228
|
+
decision: decision(true, "allow", "assistant_speech_cancelled", { atMs: event.atMs, responseId: event.responseId }),
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
function applyToolStarted(state, event) {
|
|
232
|
+
if (state.toolCalls[event.toolCallId]) {
|
|
233
|
+
return {
|
|
234
|
+
event,
|
|
235
|
+
state,
|
|
236
|
+
decision: decision(true, "allow", "duplicate_tool_start_ignored", { atMs: event.atMs, toolCallId: event.toolCallId }),
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
const next = copyState(state);
|
|
240
|
+
next.pendingToolCallIds = withUnique(next.pendingToolCallIds, event.toolCallId);
|
|
241
|
+
setToolState(next, {
|
|
242
|
+
toolCallId: event.toolCallId,
|
|
243
|
+
toolName: event.toolName,
|
|
244
|
+
turnId: event.turnId ?? next.latestCallerTurnId,
|
|
245
|
+
status: "running",
|
|
246
|
+
startedAtMs: event.atMs,
|
|
247
|
+
});
|
|
248
|
+
if (next.floorOwner === "none")
|
|
249
|
+
next.phase = "tool-running";
|
|
250
|
+
return { event, state: next, decision: decision(true, "allow", "tool_started", { atMs: event.atMs, toolCallId: event.toolCallId }) };
|
|
251
|
+
}
|
|
252
|
+
function applyToolHoldingSpoken(state, event) {
|
|
253
|
+
const holdingDecision = canSpeakToolHolding(state, { toolCallId: event.toolCallId, text: event.text });
|
|
254
|
+
if (!holdingDecision.allowed)
|
|
255
|
+
return { event, state, decision: { ...holdingDecision, atMs: event.atMs } };
|
|
256
|
+
return { event, state, decision: { ...holdingDecision, atMs: event.atMs } };
|
|
257
|
+
}
|
|
258
|
+
function applyToolCompleted(state, event) {
|
|
259
|
+
const existing = toolState(state, event.toolCallId);
|
|
260
|
+
if (!existing) {
|
|
261
|
+
return {
|
|
262
|
+
event,
|
|
263
|
+
state,
|
|
264
|
+
decision: decision(false, "suppress", "missing_tool_call", { atMs: event.atMs, toolCallId: event.toolCallId }),
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
if (existing.status !== "running") {
|
|
268
|
+
return {
|
|
269
|
+
event,
|
|
270
|
+
state,
|
|
271
|
+
decision: decision(true, "allow", "duplicate_tool_completion_ignored", { atMs: event.atMs, toolCallId: event.toolCallId }),
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
const turnId = event.turnId ?? existing.turnId;
|
|
275
|
+
const next = copyState(state);
|
|
276
|
+
if (hasNewerCallerTurn(next, turnId)) {
|
|
277
|
+
next.pendingToolCallIds = withoutValue(next.pendingToolCallIds, event.toolCallId);
|
|
278
|
+
next.staleToolCallIds = withUnique(next.staleToolCallIds, event.toolCallId);
|
|
279
|
+
setToolState(next, { ...existing, turnId, status: "stale", completedAtMs: event.atMs });
|
|
280
|
+
if (next.floorOwner === "none")
|
|
281
|
+
next.phase = "suppressing";
|
|
282
|
+
return {
|
|
283
|
+
event,
|
|
284
|
+
state: next,
|
|
285
|
+
decision: decision(false, "suppress", "newer_user_turn_started", { atMs: event.atMs, toolCallId: event.toolCallId }),
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
setToolState(next, { ...existing, turnId, status: "ready", completedAtMs: event.atMs });
|
|
289
|
+
next.pendingToolCallIds = withUnique(next.pendingToolCallIds, event.toolCallId);
|
|
290
|
+
if (next.floorOwner === "none")
|
|
291
|
+
next.phase = "tool-result-ready";
|
|
292
|
+
return { event, state: next, decision: decision(true, "allow", "tool_result_ready", { atMs: event.atMs, toolCallId: event.toolCallId }) };
|
|
293
|
+
}
|
|
294
|
+
function applyToolResultSpoken(state, event) {
|
|
295
|
+
const resultDecision = canSpeakToolResult(state, { toolCallId: event.toolCallId, text: event.text });
|
|
296
|
+
if (!resultDecision.allowed)
|
|
297
|
+
return { event, state, decision: { ...resultDecision, atMs: event.atMs } };
|
|
298
|
+
const existing = toolState(state, event.toolCallId);
|
|
299
|
+
const next = copyState(state);
|
|
300
|
+
setToolState(next, { ...existing, status: "spoken" });
|
|
301
|
+
next.pendingToolCallIds = withoutValue(next.pendingToolCallIds, event.toolCallId);
|
|
302
|
+
next.spokenToolCallIds = withUnique(next.spokenToolCallIds, event.toolCallId);
|
|
303
|
+
next.phase = phaseAfterAssistantSpeech(next);
|
|
304
|
+
return { event, state: next, decision: { ...resultDecision, atMs: event.atMs } };
|
|
305
|
+
}
|
|
306
|
+
function applyHangupRequested(state, event) {
|
|
307
|
+
const next = copyState(state);
|
|
308
|
+
next.phase = "hangup";
|
|
309
|
+
next.floorOwner = "terminal";
|
|
310
|
+
next.hangupRequested = true;
|
|
311
|
+
return { event, state: next, decision: decision(true, "allow", "hangup_requested", { atMs: event.atMs }) };
|
|
312
|
+
}
|
|
313
|
+
function applyCallEnded(state, event) {
|
|
314
|
+
const next = copyState(state);
|
|
315
|
+
next.phase = "ended";
|
|
316
|
+
next.floorOwner = "terminal";
|
|
317
|
+
next.terminal = true;
|
|
318
|
+
next.hangupRequested = true;
|
|
319
|
+
return { event, state: next, decision: decision(true, "allow", "call_ended", { atMs: event.atMs }) };
|
|
320
|
+
}
|
|
321
|
+
function applyVoiceFloorEvent(state, event) {
|
|
322
|
+
if (state.hangupRequested && event.type !== "call.ended" && event.type !== "hangup.requested")
|
|
323
|
+
return suppressForHangup(state, event);
|
|
324
|
+
switch (event.type) {
|
|
325
|
+
case "call.connected":
|
|
326
|
+
return applyConnected(state, event);
|
|
327
|
+
case "caller.speech.started":
|
|
328
|
+
return applyCallerSpeechStarted(state, event);
|
|
329
|
+
case "caller.speech.ended":
|
|
330
|
+
return applyCallerSpeechEnded(state, event);
|
|
331
|
+
case "caller.transcript.final":
|
|
332
|
+
return applyCallerTranscriptFinal(state, event);
|
|
333
|
+
case "assistant.response.requested":
|
|
334
|
+
return applyAssistantResponseRequested(state, event);
|
|
335
|
+
case "assistant.speech.started":
|
|
336
|
+
return applyAssistantSpeechStarted(state, event);
|
|
337
|
+
case "assistant.speech.done":
|
|
338
|
+
return applyAssistantSpeechDone(state, event);
|
|
339
|
+
case "assistant.speech.cancelled":
|
|
340
|
+
return applyAssistantSpeechCancelled(state, event);
|
|
341
|
+
case "tool.call.started":
|
|
342
|
+
return applyToolStarted(state, event);
|
|
343
|
+
case "tool.holding.spoken":
|
|
344
|
+
return applyToolHoldingSpoken(state, event);
|
|
345
|
+
case "tool.call.completed":
|
|
346
|
+
return applyToolCompleted(state, event);
|
|
347
|
+
case "tool.result.spoken":
|
|
348
|
+
return applyToolResultSpoken(state, event);
|
|
349
|
+
case "hangup.requested":
|
|
350
|
+
return applyHangupRequested(state, event);
|
|
351
|
+
case "call.ended":
|
|
352
|
+
return applyCallEnded(state, event);
|
|
353
|
+
default:
|
|
354
|
+
throw new Error(`unknown voice floor event: ${String(event.type)}`);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
function replayVoiceFloorEvents(events) {
|
|
358
|
+
(0, runtime_1.emitNervesEvent)({
|
|
359
|
+
component: "senses",
|
|
360
|
+
event: "senses.voice_floor_replay_start",
|
|
361
|
+
message: "starting Voice floor-control replay",
|
|
362
|
+
meta: { events: events.length },
|
|
363
|
+
});
|
|
364
|
+
let state = createInitialVoiceFloorState();
|
|
365
|
+
const steps = [];
|
|
366
|
+
for (const event of events) {
|
|
367
|
+
const transition = applyVoiceFloorEvent(state, event);
|
|
368
|
+
steps.push(transition);
|
|
369
|
+
state = transition.state;
|
|
370
|
+
}
|
|
371
|
+
(0, runtime_1.emitNervesEvent)({
|
|
372
|
+
component: "senses",
|
|
373
|
+
event: "senses.voice_floor_replay_end",
|
|
374
|
+
message: "finished Voice floor-control replay",
|
|
375
|
+
meta: { events: events.length, phase: state.phase, floorOwner: state.floorOwner },
|
|
376
|
+
});
|
|
377
|
+
return { state, steps };
|
|
378
|
+
}
|
|
379
|
+
function listSummary(values) {
|
|
380
|
+
return values.length > 0 ? values.join(",") : "none";
|
|
381
|
+
}
|
|
382
|
+
function summarizeVoiceFloorState(state) {
|
|
383
|
+
const parts = [
|
|
384
|
+
`phase=${state.phase}`,
|
|
385
|
+
`floor=${state.floorOwner}`,
|
|
386
|
+
];
|
|
387
|
+
if (state.activeAssistantSpeechId)
|
|
388
|
+
parts.push(`activeSpeech=${state.activeAssistantSpeechId}`);
|
|
389
|
+
parts.push(`pendingTools=${listSummary(state.pendingToolCallIds)}`);
|
|
390
|
+
parts.push(`staleTools=${listSummary(state.staleToolCallIds)}`);
|
|
391
|
+
if (state.interruption)
|
|
392
|
+
parts.push(`interruption=${state.interruption.turnId}@${state.interruption.interruptedSpeechId}`);
|
|
393
|
+
if (state.hangupRequested)
|
|
394
|
+
parts.push("hangup=requested");
|
|
395
|
+
if (state.terminal)
|
|
396
|
+
parts.push("terminal=true");
|
|
397
|
+
return parts.join(" ");
|
|
398
|
+
}
|
|
@@ -39,6 +39,25 @@ function lowerText(value) {
|
|
|
39
39
|
function pushFinding(findings, finding) {
|
|
40
40
|
findings.push(finding);
|
|
41
41
|
}
|
|
42
|
+
function floorDiagnostic(event, floor) {
|
|
43
|
+
return {
|
|
44
|
+
phase: event.floorPhase ?? floor?.floorPhase,
|
|
45
|
+
floorOwner: event.floorOwner ?? floor?.floorOwner,
|
|
46
|
+
speechDecision: event.speechDecision,
|
|
47
|
+
decisionReason: event.decisionReason ?? floor?.decisionReason,
|
|
48
|
+
responseId: event.type === "speech.policy.decision" || event.type === "response.requested"
|
|
49
|
+
? event.correlationId
|
|
50
|
+
: undefined,
|
|
51
|
+
toolCallId: event.type === "tool.result.ready" || event.type === "tool.result.spoken"
|
|
52
|
+
? event.correlationId
|
|
53
|
+
: undefined,
|
|
54
|
+
pendingSpeechId: event.pendingSpeechId ?? floor?.pendingSpeechId,
|
|
55
|
+
activeAssistantSpeechId: event.activeAssistantSpeechId ?? floor?.activeAssistantSpeechId,
|
|
56
|
+
pendingToolCallIds: event.pendingToolCallIds ?? floor?.pendingToolCallIds,
|
|
57
|
+
staleToolCallIds: event.staleToolCallIds ?? floor?.staleToolCallIds,
|
|
58
|
+
interruptionTurnId: event.interruptionTurnId ?? floor?.interruptionTurnId,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
42
61
|
function gradeFirstAudio(events, expectation, findings) {
|
|
43
62
|
const connected = firstEvent(events, "call.connected");
|
|
44
63
|
const firstAudio = firstEvent(events, "assistant.audio.started");
|
|
@@ -256,6 +275,89 @@ function gradeOverlappingResponses(events, findings) {
|
|
|
256
275
|
}
|
|
257
276
|
}
|
|
258
277
|
}
|
|
278
|
+
function floorIsCallerOwned(floor) {
|
|
279
|
+
return floor?.floorOwner === "caller" || floor?.floorPhase === "caller-speaking" || floor?.floorPhase === "interrupted";
|
|
280
|
+
}
|
|
281
|
+
function floorIsTerminal(floor) {
|
|
282
|
+
return floor?.floorOwner === "terminal" || floor?.floorPhase === "hangup" || floor?.floorPhase === "ended";
|
|
283
|
+
}
|
|
284
|
+
function gradeDuplexFloorPolicy(events, findings) {
|
|
285
|
+
let floor;
|
|
286
|
+
let hangupAtMs;
|
|
287
|
+
for (const event of events) {
|
|
288
|
+
if (event.type === "floor.state.changed") {
|
|
289
|
+
floor = event;
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
if (event.type === "call.hangup.requested")
|
|
293
|
+
hangupAtMs = event.atMs;
|
|
294
|
+
if (event.type === "speech.policy.decision" && event.speechDecision === "allow") {
|
|
295
|
+
if (floorIsCallerOwned(floor)) {
|
|
296
|
+
pushFinding(findings, {
|
|
297
|
+
code: "speech_allowed_while_caller_has_floor",
|
|
298
|
+
severity: "fail",
|
|
299
|
+
message: "Voice speech policy allowed assistant speech while the caller owned the floor.",
|
|
300
|
+
source: event.source,
|
|
301
|
+
atMs: event.atMs,
|
|
302
|
+
floor: floorDiagnostic(event, floor),
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
else if (floorIsTerminal(floor) || hangupAtMs !== undefined) {
|
|
306
|
+
pushFinding(findings, {
|
|
307
|
+
code: "speech_allowed_after_hangup",
|
|
308
|
+
severity: "fail",
|
|
309
|
+
message: "Voice speech policy allowed assistant speech after hangup began.",
|
|
310
|
+
source: event.source,
|
|
311
|
+
atMs: event.atMs,
|
|
312
|
+
floor: floorDiagnostic(event, floor),
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
if (event.type === "tool.result.spoken") {
|
|
317
|
+
const diagnostic = floorDiagnostic(event, floor);
|
|
318
|
+
if (event.correlationId && floor?.staleToolCallIds?.includes(event.correlationId)) {
|
|
319
|
+
pushFinding(findings, {
|
|
320
|
+
code: "stale_tool_result_spoken",
|
|
321
|
+
severity: "fail",
|
|
322
|
+
message: "Voice spoke a tool result that the floor model had already marked stale.",
|
|
323
|
+
source: event.source,
|
|
324
|
+
atMs: event.atMs,
|
|
325
|
+
floor: diagnostic,
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
else if (floorIsCallerOwned(floor)) {
|
|
329
|
+
pushFinding(findings, {
|
|
330
|
+
code: "tool_result_spoken_while_caller_has_floor",
|
|
331
|
+
severity: "fail",
|
|
332
|
+
message: "Voice spoke a tool result while the caller owned the floor.",
|
|
333
|
+
source: event.source,
|
|
334
|
+
atMs: event.atMs,
|
|
335
|
+
floor: diagnostic,
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
else if (floorIsTerminal(floor) || hangupAtMs !== undefined) {
|
|
339
|
+
pushFinding(findings, {
|
|
340
|
+
code: "tool_result_spoken_after_hangup",
|
|
341
|
+
severity: "fail",
|
|
342
|
+
message: "Voice spoke a tool result after hangup began.",
|
|
343
|
+
source: event.source,
|
|
344
|
+
atMs: event.atMs,
|
|
345
|
+
floor: diagnostic,
|
|
346
|
+
});
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
if (event.type === "response.requested" && hangupAtMs !== undefined && event.atMs > hangupAtMs) {
|
|
350
|
+
pushFinding(findings, {
|
|
351
|
+
code: "response_after_hangup",
|
|
352
|
+
severity: "fail",
|
|
353
|
+
message: "Voice requested a new response after hangup had already been requested.",
|
|
354
|
+
source: event.source,
|
|
355
|
+
atMs: event.atMs,
|
|
356
|
+
floor: floorDiagnostic(event, floor),
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
259
361
|
function collectTransportSources(events) {
|
|
260
362
|
return [...new Set(events.flatMap((event) => event.source ? [event.source.transport] : []))].sort();
|
|
261
363
|
}
|
|
@@ -284,6 +386,7 @@ function gradeVoiceRealtimeEvalTimeline(scenarioId, timeline, expectation) {
|
|
|
284
386
|
if (expectation.requireHangup)
|
|
285
387
|
gradeHangup(events, findings);
|
|
286
388
|
gradeOverlappingResponses(events, findings);
|
|
389
|
+
gradeDuplexFloorPolicy(events, findings);
|
|
287
390
|
const report = {
|
|
288
391
|
scenarioId: normalizedScenarioId,
|
|
289
392
|
passed: findings.every((finding) => finding.severity !== "fail"),
|
|
@@ -57,17 +57,22 @@ const normalizedEvents = new Set([
|
|
|
57
57
|
"call.connected",
|
|
58
58
|
"call.ended",
|
|
59
59
|
"call.hangup.requested",
|
|
60
|
+
"floor.state.changed",
|
|
60
61
|
"response.requested",
|
|
61
62
|
"response.truncated",
|
|
62
63
|
"session.updated",
|
|
64
|
+
"speech.policy.decision",
|
|
63
65
|
"tool.call.completed",
|
|
64
66
|
"tool.call.started",
|
|
65
67
|
"tool.holding.started",
|
|
68
|
+
"tool.result.ready",
|
|
69
|
+
"tool.result.spoken",
|
|
66
70
|
"transport.playback_cleared",
|
|
67
71
|
"user.transcript.done",
|
|
68
72
|
"voice.context.injected",
|
|
69
73
|
]);
|
|
70
74
|
const rawEventMap = new Map([
|
|
75
|
+
["voice.floor.state.changed", "floor.state.changed"],
|
|
71
76
|
["openai.realtime.call.hangup.sent", "call.hangup.requested"],
|
|
72
77
|
["openai.realtime.conversation.item.truncate.sent", "response.truncated"],
|
|
73
78
|
["openai.realtime.input_audio_buffer.speech_started", "barge_in.detected"],
|
|
@@ -85,10 +90,28 @@ const rawEventMap = new Map([
|
|
|
85
90
|
["twilio.media.clear.sent", "transport.playback_cleared"],
|
|
86
91
|
["twilio.media.start", "call.connected"],
|
|
87
92
|
["voice.hangup.requested", "call.hangup.requested"],
|
|
93
|
+
["voice.speech.policy.decision", "speech.policy.decision"],
|
|
88
94
|
["voice.tool_holding.started", "tool.holding.started"],
|
|
95
|
+
["voice.tool.result.ready", "tool.result.ready"],
|
|
96
|
+
["voice.tool.result.spoken", "tool.result.spoken"],
|
|
89
97
|
]);
|
|
90
98
|
const expectedOutcomes = new Set(["expected-fail", "fail", "pass"]);
|
|
91
99
|
const expectationProfiles = new Set(["voice-phone-default"]);
|
|
100
|
+
const floorOwners = new Set(["assistant", "caller", "none", "terminal"]);
|
|
101
|
+
const floorPhases = new Set([
|
|
102
|
+
"caller-speaking",
|
|
103
|
+
"ended",
|
|
104
|
+
"hangup",
|
|
105
|
+
"idle",
|
|
106
|
+
"interrupted",
|
|
107
|
+
"listening",
|
|
108
|
+
"speaking",
|
|
109
|
+
"suppressing",
|
|
110
|
+
"thinking",
|
|
111
|
+
"tool-result-ready",
|
|
112
|
+
"tool-running",
|
|
113
|
+
]);
|
|
114
|
+
const speechDecisions = new Set(["allow", "cancel", "delay", "suppress"]);
|
|
92
115
|
function objectRecord(value) {
|
|
93
116
|
return typeof value === "object" && value !== null && !Array.isArray(value) ? value : undefined;
|
|
94
117
|
}
|
|
@@ -107,6 +130,35 @@ function optionalString(value, name, sourceLabel) {
|
|
|
107
130
|
throw new Error(label(sourceLabel, `${name} must be a string`));
|
|
108
131
|
return value;
|
|
109
132
|
}
|
|
133
|
+
function optionalStringArray(value, name, sourceLabel) {
|
|
134
|
+
if (value === undefined)
|
|
135
|
+
return undefined;
|
|
136
|
+
if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
|
|
137
|
+
throw new Error(label(sourceLabel, `${name} must be an array of strings`));
|
|
138
|
+
}
|
|
139
|
+
return [...value];
|
|
140
|
+
}
|
|
141
|
+
function optionalFloorOwner(value, sourceLabel) {
|
|
142
|
+
if (value === undefined)
|
|
143
|
+
return undefined;
|
|
144
|
+
if (typeof value !== "string" || !floorOwners.has(value))
|
|
145
|
+
throw new Error(label(sourceLabel, "floorOwner is unsupported"));
|
|
146
|
+
return value;
|
|
147
|
+
}
|
|
148
|
+
function optionalFloorPhase(value, sourceLabel) {
|
|
149
|
+
if (value === undefined)
|
|
150
|
+
return undefined;
|
|
151
|
+
if (typeof value !== "string" || !floorPhases.has(value))
|
|
152
|
+
throw new Error(label(sourceLabel, "floorPhase is unsupported"));
|
|
153
|
+
return value;
|
|
154
|
+
}
|
|
155
|
+
function optionalSpeechDecision(value, sourceLabel) {
|
|
156
|
+
if (value === undefined)
|
|
157
|
+
return undefined;
|
|
158
|
+
if (typeof value !== "string" || !speechDecisions.has(value))
|
|
159
|
+
throw new Error(label(sourceLabel, "speechDecision is unsupported"));
|
|
160
|
+
return value;
|
|
161
|
+
}
|
|
110
162
|
function parseSource(value, sourceLabel) {
|
|
111
163
|
if (value === undefined)
|
|
112
164
|
return undefined;
|
|
@@ -191,6 +243,15 @@ function parseTraceEvent(value, index, sourceLabel) {
|
|
|
191
243
|
friendId: optionalString(raw.friendId, "friendId", eventLabel),
|
|
192
244
|
sessionKey: optionalString(raw.sessionKey, "sessionKey", eventLabel),
|
|
193
245
|
session: parseTurnDetection(raw.session, eventLabel),
|
|
246
|
+
floorOwner: optionalFloorOwner(raw.floorOwner, eventLabel),
|
|
247
|
+
floorPhase: optionalFloorPhase(raw.floorPhase, eventLabel),
|
|
248
|
+
speechDecision: optionalSpeechDecision(raw.speechDecision, eventLabel),
|
|
249
|
+
decisionReason: optionalString(raw.decisionReason, "decisionReason", eventLabel),
|
|
250
|
+
pendingSpeechId: optionalString(raw.pendingSpeechId, "pendingSpeechId", eventLabel),
|
|
251
|
+
activeAssistantSpeechId: optionalString(raw.activeAssistantSpeechId, "activeAssistantSpeechId", eventLabel),
|
|
252
|
+
pendingToolCallIds: optionalStringArray(raw.pendingToolCallIds, "pendingToolCallIds", eventLabel),
|
|
253
|
+
staleToolCallIds: optionalStringArray(raw.staleToolCallIds, "staleToolCallIds", eventLabel),
|
|
254
|
+
interruptionTurnId: optionalString(raw.interruptionTurnId, "interruptionTurnId", eventLabel),
|
|
194
255
|
ignored: ignored || undefined,
|
|
195
256
|
ignoreReason: optionalString(raw.ignoreReason, "ignoreReason", eventLabel),
|
|
196
257
|
};
|
|
@@ -276,6 +337,15 @@ function toTimelineEvent(event, redacted) {
|
|
|
276
337
|
friendId: event.friendId,
|
|
277
338
|
sessionKey: event.sessionKey,
|
|
278
339
|
session: event.session,
|
|
340
|
+
floorOwner: event.floorOwner,
|
|
341
|
+
floorPhase: event.floorPhase,
|
|
342
|
+
speechDecision: event.speechDecision,
|
|
343
|
+
decisionReason: event.decisionReason,
|
|
344
|
+
pendingSpeechId: event.pendingSpeechId,
|
|
345
|
+
activeAssistantSpeechId: event.activeAssistantSpeechId,
|
|
346
|
+
pendingToolCallIds: event.pendingToolCallIds,
|
|
347
|
+
staleToolCallIds: event.staleToolCallIds,
|
|
348
|
+
interruptionTurnId: event.interruptionTurnId,
|
|
279
349
|
};
|
|
280
350
|
}
|
|
281
351
|
function sortedTimeline(events) {
|
|
@@ -376,6 +446,63 @@ function sourceForSummary(source) {
|
|
|
376
446
|
return "";
|
|
377
447
|
return source.id ? ` ${source.transport}/${source.id}` : ` ${source.transport}`;
|
|
378
448
|
}
|
|
449
|
+
function listForSummary(values) {
|
|
450
|
+
return values && values.length > 0 ? values.join(",") : undefined;
|
|
451
|
+
}
|
|
452
|
+
function floorForSummary(floor) {
|
|
453
|
+
const parts = [];
|
|
454
|
+
if (floor.phase)
|
|
455
|
+
parts.push(`phase=${floor.phase}`);
|
|
456
|
+
if (floor.floorOwner)
|
|
457
|
+
parts.push(`floor=${floor.floorOwner}`);
|
|
458
|
+
if (floor.pendingSpeechId)
|
|
459
|
+
parts.push(`pendingSpeech=${floor.pendingSpeechId}`);
|
|
460
|
+
const pendingTools = listForSummary(floor.pendingToolCallIds);
|
|
461
|
+
if (pendingTools)
|
|
462
|
+
parts.push(`pendingTools=${pendingTools}`);
|
|
463
|
+
const staleTools = listForSummary(floor.staleToolCallIds);
|
|
464
|
+
if (staleTools)
|
|
465
|
+
parts.push(`staleTools=${staleTools}`);
|
|
466
|
+
if (floor.interruptionTurnId)
|
|
467
|
+
parts.push(`interruption=${floor.interruptionTurnId}`);
|
|
468
|
+
if (floor.decisionReason)
|
|
469
|
+
parts.push(`reason=${floor.decisionReason}`);
|
|
470
|
+
if (floor.activeAssistantSpeechId)
|
|
471
|
+
parts.push(`activeSpeech=${floor.activeAssistantSpeechId}`);
|
|
472
|
+
if (floor.speechDecision)
|
|
473
|
+
parts.push(`decision=${floor.speechDecision}`);
|
|
474
|
+
if (floor.responseId)
|
|
475
|
+
parts.push(`response=${floor.responseId}`);
|
|
476
|
+
if (floor.toolCallId)
|
|
477
|
+
parts.push(`tool=${floor.toolCallId}`);
|
|
478
|
+
return parts.join(" ") || "none";
|
|
479
|
+
}
|
|
480
|
+
function floorEventForSummary(event) {
|
|
481
|
+
if (event.floorPhase === undefined
|
|
482
|
+
&& event.floorOwner === undefined
|
|
483
|
+
&& event.speechDecision === undefined
|
|
484
|
+
&& event.decisionReason === undefined
|
|
485
|
+
&& event.pendingSpeechId === undefined
|
|
486
|
+
&& event.activeAssistantSpeechId === undefined
|
|
487
|
+
&& event.pendingToolCallIds === undefined
|
|
488
|
+
&& event.staleToolCallIds === undefined
|
|
489
|
+
&& event.interruptionTurnId === undefined) {
|
|
490
|
+
return "";
|
|
491
|
+
}
|
|
492
|
+
return ` floor(${floorForSummary({
|
|
493
|
+
phase: event.floorPhase,
|
|
494
|
+
floorOwner: event.floorOwner,
|
|
495
|
+
speechDecision: event.speechDecision,
|
|
496
|
+
decisionReason: event.decisionReason,
|
|
497
|
+
responseId: event.type === "response.requested" || event.type === "speech.policy.decision" ? event.correlationId : undefined,
|
|
498
|
+
toolCallId: event.type === "tool.result.ready" || event.type === "tool.result.spoken" ? event.correlationId : undefined,
|
|
499
|
+
pendingSpeechId: event.pendingSpeechId,
|
|
500
|
+
activeAssistantSpeechId: event.activeAssistantSpeechId,
|
|
501
|
+
pendingToolCallIds: event.pendingToolCallIds,
|
|
502
|
+
staleToolCallIds: event.staleToolCallIds,
|
|
503
|
+
interruptionTurnId: event.interruptionTurnId,
|
|
504
|
+
})})`;
|
|
505
|
+
}
|
|
379
506
|
function formatVoiceRealtimeEvalTraceReport(result) {
|
|
380
507
|
const lines = [
|
|
381
508
|
`trace ${result.traceId} scenario ${result.scenarioId}`,
|
|
@@ -388,11 +515,13 @@ function formatVoiceRealtimeEvalTraceReport(result) {
|
|
|
388
515
|
for (const finding of result.report.findings) {
|
|
389
516
|
const message = result.artifact.redacted ? "[redacted]" : finding.message;
|
|
390
517
|
lines.push(`- ${finding.code}${finding.atMs === undefined ? "" : ` at ${finding.atMs}ms`}: ${message}`);
|
|
518
|
+
if (finding.floor)
|
|
519
|
+
lines.push(` floor: ${floorForSummary(finding.floor)}`);
|
|
391
520
|
}
|
|
392
521
|
}
|
|
393
522
|
lines.push("events:");
|
|
394
523
|
for (const event of result.timeline) {
|
|
395
|
-
lines.push(`- ${event.atMs}ms ${event.type}${sourceForSummary(event.source)}${textForSummary(result, event)}`);
|
|
524
|
+
lines.push(`- ${event.atMs}ms ${event.type}${sourceForSummary(event.source)}${textForSummary(result, event)}${floorEventForSummary(event)}`);
|
|
396
525
|
}
|
|
397
526
|
lines.push(`ignored provider events: ${result.ignoredEvents.length}`);
|
|
398
527
|
for (const event of result.ignoredEvents) {
|