@ouro.bot/cli 0.1.0-alpha.586 → 0.1.0-alpha.587

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json CHANGED
@@ -1,6 +1,14 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.587",
6
+ "changes": [
7
+ "Voice now has a pure duplex floor-control model with typed transitions, state summaries, and decision reasons for caller floor ownership, assistant speech, tool-running/tool-result states, stale suppression, and hangup terminal behavior.",
8
+ "Realtime Voice evals and trace replay now understand floor-state, speech-policy, and tool-result events, failing deterministic traces when assistant speech is allowed while the caller owns the floor, stale tool results are spoken, or responses are requested after hangup.",
9
+ "Voice trace reports now include floor diagnostics such as phase, floor owner, pending speech, pending/stale tool ids, interruption turn, and decision reason, with new golden fixtures for interruption, tool-duplex, stale suppression, progress acknowledgement, and hangup-with-pending-tool scenarios."
10
+ ]
11
+ },
4
12
  {
5
13
  "version": "0.1.0-alpha.586",
6
14
  "changes": [
@@ -0,0 +1,398 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createInitialVoiceFloorState = createInitialVoiceFloorState;
4
+ exports.canRequestVoiceResponse = canRequestVoiceResponse;
5
+ exports.canSpeakToolHolding = canSpeakToolHolding;
6
+ exports.canSpeakToolResult = canSpeakToolResult;
7
+ exports.applyVoiceFloorEvent = applyVoiceFloorEvent;
8
+ exports.replayVoiceFloorEvents = replayVoiceFloorEvents;
9
+ exports.summarizeVoiceFloorState = summarizeVoiceFloorState;
10
+ const runtime_1 = require("../../nerves/runtime");
11
+ const MAX_TOOL_HOLDING_WORDS = 6;
12
+ function createInitialVoiceFloorState() {
13
+ return {
14
+ phase: "idle",
15
+ floorOwner: "none",
16
+ terminal: false,
17
+ hangupRequested: false,
18
+ pendingToolCallIds: [],
19
+ staleToolCallIds: [],
20
+ spokenToolCallIds: [],
21
+ callerTurnIds: [],
22
+ toolCalls: {},
23
+ };
24
+ }
25
+ function decision(allowed, action, reason, details = {}) {
26
+ return { allowed, action, reason, ...details };
27
+ }
28
+ function copyState(state) {
29
+ return {
30
+ ...state,
31
+ pendingToolCallIds: [...state.pendingToolCallIds],
32
+ staleToolCallIds: [...state.staleToolCallIds],
33
+ spokenToolCallIds: [...state.spokenToolCallIds],
34
+ callerTurnIds: [...state.callerTurnIds],
35
+ toolCalls: { ...state.toolCalls },
36
+ };
37
+ }
38
+ function withUnique(values, value) {
39
+ return values.includes(value) ? values : [...values, value];
40
+ }
41
+ function withoutValue(values, value) {
42
+ return values.filter((candidate) => candidate !== value);
43
+ }
44
+ function rememberCallerTurn(state, turnId) {
45
+ state.latestCallerTurnId = turnId;
46
+ state.callerTurnIds = withUnique(state.callerTurnIds, turnId);
47
+ }
48
+ function hasNewerCallerTurn(state, turnId) {
49
+ if (!turnId || !state.latestCallerTurnId || state.latestCallerTurnId === turnId)
50
+ return false;
51
+ const originalIndex = state.callerTurnIds.indexOf(turnId);
52
+ const latestIndex = state.callerTurnIds.indexOf(state.latestCallerTurnId);
53
+ return originalIndex >= 0 && latestIndex > originalIndex;
54
+ }
55
+ function toolState(state, toolCallId) {
56
+ return state.toolCalls[toolCallId];
57
+ }
58
+ function setToolState(state, nextTool) {
59
+ state.toolCalls = { ...state.toolCalls, [nextTool.toolCallId]: nextTool };
60
+ }
61
+ function phaseAfterAssistantSpeech(state) {
62
+ const pendingTools = state.pendingToolCallIds.map((toolCallId) => state.toolCalls[toolCallId]).filter(Boolean);
63
+ if (pendingTools.some((tool) => tool.status === "ready"))
64
+ return "tool-result-ready";
65
+ if (pendingTools.some((tool) => tool.status === "running"))
66
+ return "tool-running";
67
+ return "listening";
68
+ }
69
+ function suppressForHangup(state, event) {
70
+ return {
71
+ event,
72
+ state,
73
+ decision: decision(false, "suppress", "hangup_terminal", { atMs: event.atMs }),
74
+ };
75
+ }
76
+ function canRequestVoiceResponse(state, input) {
77
+ if (state.terminal || state.hangupRequested || state.floorOwner === "terminal") {
78
+ return decision(false, "suppress", "hangup_terminal", { responseId: input.responseId });
79
+ }
80
+ if (state.floorOwner === "caller" || state.phase === "caller-speaking" || state.phase === "interrupted") {
81
+ return decision(false, "delay", "caller_has_floor", { responseId: input.responseId });
82
+ }
83
+ if (state.floorOwner === "assistant" || state.activeAssistantSpeechId) {
84
+ return decision(false, "delay", "assistant_has_floor", { responseId: input.responseId });
85
+ }
86
+ if (state.pendingSpeech) {
87
+ return decision(false, "delay", "response_pending", { responseId: input.responseId });
88
+ }
89
+ return decision(true, "allow", "ready_for_response", { responseId: input.responseId });
90
+ }
91
+ function canSpeakToolHolding(state, input) {
92
+ if (state.terminal || state.hangupRequested || state.floorOwner === "terminal") {
93
+ return decision(false, "suppress", "hangup_terminal", { toolCallId: input.toolCallId });
94
+ }
95
+ if (state.floorOwner === "caller" || state.phase === "caller-speaking" || state.phase === "interrupted") {
96
+ return decision(false, "delay", "caller_has_floor", { toolCallId: input.toolCallId });
97
+ }
98
+ const tool = toolState(state, input.toolCallId);
99
+ if (!tool || tool.status === "spoken") {
100
+ return decision(false, "suppress", "missing_tool_call", { toolCallId: input.toolCallId });
101
+ }
102
+ if (tool.status === "stale" || state.staleToolCallIds.includes(input.toolCallId)) {
103
+ return decision(false, "suppress", "stale_tool_result", { toolCallId: input.toolCallId });
104
+ }
105
+ const words = input.text?.trim().split(/\s+/).filter(Boolean).length ?? 0;
106
+ if (words > MAX_TOOL_HOLDING_WORDS) {
107
+ return decision(false, "suppress", "tool_holding_too_long", { toolCallId: input.toolCallId });
108
+ }
109
+ return decision(true, "allow", "tool_presence_allowed", { toolCallId: input.toolCallId });
110
+ }
111
+ function canSpeakToolResult(state, input) {
112
+ if (state.terminal || state.hangupRequested || state.floorOwner === "terminal") {
113
+ return decision(false, "suppress", "hangup_terminal", { toolCallId: input.toolCallId });
114
+ }
115
+ if (state.floorOwner === "caller" || state.phase === "caller-speaking" || state.phase === "interrupted") {
116
+ return decision(false, "delay", "caller_has_floor", { toolCallId: input.toolCallId });
117
+ }
118
+ const tool = toolState(state, input.toolCallId);
119
+ if (!tool || tool.status === "spoken") {
120
+ return decision(false, "suppress", "missing_tool_result", { toolCallId: input.toolCallId });
121
+ }
122
+ if (tool.status === "stale" || state.staleToolCallIds.includes(input.toolCallId)) {
123
+ return decision(false, "suppress", "stale_tool_result", { toolCallId: input.toolCallId });
124
+ }
125
+ if (tool.status !== "ready") {
126
+ return decision(false, "delay", "tool_still_running", { toolCallId: input.toolCallId });
127
+ }
128
+ return decision(true, "allow", "tool_result_ready", { toolCallId: input.toolCallId });
129
+ }
130
+ function applyConnected(state, event) {
131
+ const next = copyState(state);
132
+ next.phase = "listening";
133
+ next.floorOwner = "none";
134
+ next.terminal = false;
135
+ next.hangupRequested = false;
136
+ next.callId = event.callId;
137
+ return { event, state: next, decision: decision(true, "allow", "call_connected", { atMs: event.atMs }) };
138
+ }
139
+ function applyCallerSpeechStarted(state, event) {
140
+ const next = copyState(state);
141
+ rememberCallerTurn(next, event.turnId);
142
+ next.floorOwner = "caller";
143
+ if (state.activeAssistantSpeechId) {
144
+ next.phase = "interrupted";
145
+ next.interruption = {
146
+ turnId: event.turnId,
147
+ interruptedSpeechId: state.activeAssistantSpeechId,
148
+ atMs: event.atMs,
149
+ };
150
+ return {
151
+ event,
152
+ state: next,
153
+ decision: decision(false, "cancel", "caller_barge_in", {
154
+ atMs: event.atMs,
155
+ responseId: state.activeAssistantSpeechId,
156
+ interruptionTurnId: event.turnId,
157
+ }),
158
+ };
159
+ }
160
+ next.phase = "caller-speaking";
161
+ return { event, state: next, decision: decision(true, "allow", "caller_floor_started", { atMs: event.atMs }) };
162
+ }
163
+ function applyCallerSpeechEnded(state, event) {
164
+ const next = copyState(state);
165
+ rememberCallerTurn(next, event.turnId);
166
+ if (!next.activeAssistantSpeechId) {
167
+ next.floorOwner = "none";
168
+ next.phase = "listening";
169
+ }
170
+ return { event, state: next, decision: decision(true, "allow", "caller_floor_released", { atMs: event.atMs }) };
171
+ }
172
+ function applyCallerTranscriptFinal(state, event) {
173
+ const next = copyState(state);
174
+ rememberCallerTurn(next, event.turnId);
175
+ if (!next.activeAssistantSpeechId) {
176
+ next.floorOwner = "none";
177
+ next.phase = "thinking";
178
+ }
179
+ return { event, state: next, decision: decision(true, "allow", "caller_turn_ready", { atMs: event.atMs }) };
180
+ }
181
+ function applyAssistantResponseRequested(state, event) {
182
+ const requestDecision = canRequestVoiceResponse(state, { responseId: event.responseId, reason: event.reason });
183
+ if (!requestDecision.allowed)
184
+ return { event, state, decision: { ...requestDecision, atMs: event.atMs } };
185
+ const next = copyState(state);
186
+ next.phase = "thinking";
187
+ next.floorOwner = "none";
188
+ next.pendingSpeech = { responseId: event.responseId, reason: event.reason };
189
+ return { event, state: next, decision: { ...requestDecision, atMs: event.atMs } };
190
+ }
191
+ function applyAssistantSpeechStarted(state, event) {
192
+ const requestDecision = state.floorOwner === "caller"
193
+ ? decision(false, "delay", "caller_has_floor", { atMs: event.atMs, responseId: event.responseId })
194
+ : decision(true, "allow", "assistant_speech_allowed", { atMs: event.atMs, responseId: event.responseId });
195
+ if (!requestDecision.allowed)
196
+ return { event, state, decision: requestDecision };
197
+ const next = copyState(state);
198
+ next.phase = "speaking";
199
+ next.floorOwner = "assistant";
200
+ next.activeAssistantSpeechId = event.responseId;
201
+ next.pendingSpeech = undefined;
202
+ return { event, state: next, decision: requestDecision };
203
+ }
204
+ function applyAssistantSpeechDone(state, event) {
205
+ const next = copyState(state);
206
+ if (next.activeAssistantSpeechId === event.responseId)
207
+ next.activeAssistantSpeechId = undefined;
208
+ if (next.floorOwner === "assistant")
209
+ next.floorOwner = "none";
210
+ next.phase = phaseAfterAssistantSpeech(next);
211
+ return { event, state: next, decision: decision(true, "allow", "assistant_speech_done", { atMs: event.atMs, responseId: event.responseId }) };
212
+ }
213
+ function applyAssistantSpeechCancelled(state, event) {
214
+ const next = copyState(state);
215
+ if (next.activeAssistantSpeechId === event.responseId)
216
+ next.activeAssistantSpeechId = undefined;
217
+ if (next.interruption) {
218
+ next.floorOwner = "caller";
219
+ next.phase = "caller-speaking";
220
+ }
221
+ else {
222
+ next.floorOwner = "none";
223
+ next.phase = phaseAfterAssistantSpeech(next);
224
+ }
225
+ return {
226
+ event,
227
+ state: next,
228
+ decision: decision(true, "allow", "assistant_speech_cancelled", { atMs: event.atMs, responseId: event.responseId }),
229
+ };
230
+ }
231
+ function applyToolStarted(state, event) {
232
+ if (state.toolCalls[event.toolCallId]) {
233
+ return {
234
+ event,
235
+ state,
236
+ decision: decision(true, "allow", "duplicate_tool_start_ignored", { atMs: event.atMs, toolCallId: event.toolCallId }),
237
+ };
238
+ }
239
+ const next = copyState(state);
240
+ next.pendingToolCallIds = withUnique(next.pendingToolCallIds, event.toolCallId);
241
+ setToolState(next, {
242
+ toolCallId: event.toolCallId,
243
+ toolName: event.toolName,
244
+ turnId: event.turnId ?? next.latestCallerTurnId,
245
+ status: "running",
246
+ startedAtMs: event.atMs,
247
+ });
248
+ if (next.floorOwner === "none")
249
+ next.phase = "tool-running";
250
+ return { event, state: next, decision: decision(true, "allow", "tool_started", { atMs: event.atMs, toolCallId: event.toolCallId }) };
251
+ }
252
+ function applyToolHoldingSpoken(state, event) {
253
+ const holdingDecision = canSpeakToolHolding(state, { toolCallId: event.toolCallId, text: event.text });
254
+ if (!holdingDecision.allowed)
255
+ return { event, state, decision: { ...holdingDecision, atMs: event.atMs } };
256
+ return { event, state, decision: { ...holdingDecision, atMs: event.atMs } };
257
+ }
258
+ function applyToolCompleted(state, event) {
259
+ const existing = toolState(state, event.toolCallId);
260
+ if (!existing) {
261
+ return {
262
+ event,
263
+ state,
264
+ decision: decision(false, "suppress", "missing_tool_call", { atMs: event.atMs, toolCallId: event.toolCallId }),
265
+ };
266
+ }
267
+ if (existing.status !== "running") {
268
+ return {
269
+ event,
270
+ state,
271
+ decision: decision(true, "allow", "duplicate_tool_completion_ignored", { atMs: event.atMs, toolCallId: event.toolCallId }),
272
+ };
273
+ }
274
+ const turnId = event.turnId ?? existing.turnId;
275
+ const next = copyState(state);
276
+ if (hasNewerCallerTurn(next, turnId)) {
277
+ next.pendingToolCallIds = withoutValue(next.pendingToolCallIds, event.toolCallId);
278
+ next.staleToolCallIds = withUnique(next.staleToolCallIds, event.toolCallId);
279
+ setToolState(next, { ...existing, turnId, status: "stale", completedAtMs: event.atMs });
280
+ if (next.floorOwner === "none")
281
+ next.phase = "suppressing";
282
+ return {
283
+ event,
284
+ state: next,
285
+ decision: decision(false, "suppress", "newer_user_turn_started", { atMs: event.atMs, toolCallId: event.toolCallId }),
286
+ };
287
+ }
288
+ setToolState(next, { ...existing, turnId, status: "ready", completedAtMs: event.atMs });
289
+ next.pendingToolCallIds = withUnique(next.pendingToolCallIds, event.toolCallId);
290
+ if (next.floorOwner === "none")
291
+ next.phase = "tool-result-ready";
292
+ return { event, state: next, decision: decision(true, "allow", "tool_result_ready", { atMs: event.atMs, toolCallId: event.toolCallId }) };
293
+ }
294
+ function applyToolResultSpoken(state, event) {
295
+ const resultDecision = canSpeakToolResult(state, { toolCallId: event.toolCallId, text: event.text });
296
+ if (!resultDecision.allowed)
297
+ return { event, state, decision: { ...resultDecision, atMs: event.atMs } };
298
+ const existing = toolState(state, event.toolCallId);
299
+ const next = copyState(state);
300
+ setToolState(next, { ...existing, status: "spoken" });
301
+ next.pendingToolCallIds = withoutValue(next.pendingToolCallIds, event.toolCallId);
302
+ next.spokenToolCallIds = withUnique(next.spokenToolCallIds, event.toolCallId);
303
+ next.phase = phaseAfterAssistantSpeech(next);
304
+ return { event, state: next, decision: { ...resultDecision, atMs: event.atMs } };
305
+ }
306
+ function applyHangupRequested(state, event) {
307
+ const next = copyState(state);
308
+ next.phase = "hangup";
309
+ next.floorOwner = "terminal";
310
+ next.hangupRequested = true;
311
+ return { event, state: next, decision: decision(true, "allow", "hangup_requested", { atMs: event.atMs }) };
312
+ }
313
+ function applyCallEnded(state, event) {
314
+ const next = copyState(state);
315
+ next.phase = "ended";
316
+ next.floorOwner = "terminal";
317
+ next.terminal = true;
318
+ next.hangupRequested = true;
319
+ return { event, state: next, decision: decision(true, "allow", "call_ended", { atMs: event.atMs }) };
320
+ }
321
+ function applyVoiceFloorEvent(state, event) {
322
+ if (state.hangupRequested && event.type !== "call.ended" && event.type !== "hangup.requested")
323
+ return suppressForHangup(state, event);
324
+ switch (event.type) {
325
+ case "call.connected":
326
+ return applyConnected(state, event);
327
+ case "caller.speech.started":
328
+ return applyCallerSpeechStarted(state, event);
329
+ case "caller.speech.ended":
330
+ return applyCallerSpeechEnded(state, event);
331
+ case "caller.transcript.final":
332
+ return applyCallerTranscriptFinal(state, event);
333
+ case "assistant.response.requested":
334
+ return applyAssistantResponseRequested(state, event);
335
+ case "assistant.speech.started":
336
+ return applyAssistantSpeechStarted(state, event);
337
+ case "assistant.speech.done":
338
+ return applyAssistantSpeechDone(state, event);
339
+ case "assistant.speech.cancelled":
340
+ return applyAssistantSpeechCancelled(state, event);
341
+ case "tool.call.started":
342
+ return applyToolStarted(state, event);
343
+ case "tool.holding.spoken":
344
+ return applyToolHoldingSpoken(state, event);
345
+ case "tool.call.completed":
346
+ return applyToolCompleted(state, event);
347
+ case "tool.result.spoken":
348
+ return applyToolResultSpoken(state, event);
349
+ case "hangup.requested":
350
+ return applyHangupRequested(state, event);
351
+ case "call.ended":
352
+ return applyCallEnded(state, event);
353
+ default:
354
+ throw new Error(`unknown voice floor event: ${String(event.type)}`);
355
+ }
356
+ }
357
+ function replayVoiceFloorEvents(events) {
358
+ (0, runtime_1.emitNervesEvent)({
359
+ component: "senses",
360
+ event: "senses.voice_floor_replay_start",
361
+ message: "starting Voice floor-control replay",
362
+ meta: { events: events.length },
363
+ });
364
+ let state = createInitialVoiceFloorState();
365
+ const steps = [];
366
+ for (const event of events) {
367
+ const transition = applyVoiceFloorEvent(state, event);
368
+ steps.push(transition);
369
+ state = transition.state;
370
+ }
371
+ (0, runtime_1.emitNervesEvent)({
372
+ component: "senses",
373
+ event: "senses.voice_floor_replay_end",
374
+ message: "finished Voice floor-control replay",
375
+ meta: { events: events.length, phase: state.phase, floorOwner: state.floorOwner },
376
+ });
377
+ return { state, steps };
378
+ }
379
+ function listSummary(values) {
380
+ return values.length > 0 ? values.join(",") : "none";
381
+ }
382
+ function summarizeVoiceFloorState(state) {
383
+ const parts = [
384
+ `phase=${state.phase}`,
385
+ `floor=${state.floorOwner}`,
386
+ ];
387
+ if (state.activeAssistantSpeechId)
388
+ parts.push(`activeSpeech=${state.activeAssistantSpeechId}`);
389
+ parts.push(`pendingTools=${listSummary(state.pendingToolCallIds)}`);
390
+ parts.push(`staleTools=${listSummary(state.staleToolCallIds)}`);
391
+ if (state.interruption)
392
+ parts.push(`interruption=${state.interruption.turnId}@${state.interruption.interruptedSpeechId}`);
393
+ if (state.hangupRequested)
394
+ parts.push("hangup=requested");
395
+ if (state.terminal)
396
+ parts.push("terminal=true");
397
+ return parts.join(" ");
398
+ }
@@ -39,6 +39,25 @@ function lowerText(value) {
39
39
  function pushFinding(findings, finding) {
40
40
  findings.push(finding);
41
41
  }
42
+ function floorDiagnostic(event, floor) {
43
+ return {
44
+ phase: event.floorPhase ?? floor?.floorPhase,
45
+ floorOwner: event.floorOwner ?? floor?.floorOwner,
46
+ speechDecision: event.speechDecision,
47
+ decisionReason: event.decisionReason ?? floor?.decisionReason,
48
+ responseId: event.type === "speech.policy.decision" || event.type === "response.requested"
49
+ ? event.correlationId
50
+ : undefined,
51
+ toolCallId: event.type === "tool.result.ready" || event.type === "tool.result.spoken"
52
+ ? event.correlationId
53
+ : undefined,
54
+ pendingSpeechId: event.pendingSpeechId ?? floor?.pendingSpeechId,
55
+ activeAssistantSpeechId: event.activeAssistantSpeechId ?? floor?.activeAssistantSpeechId,
56
+ pendingToolCallIds: event.pendingToolCallIds ?? floor?.pendingToolCallIds,
57
+ staleToolCallIds: event.staleToolCallIds ?? floor?.staleToolCallIds,
58
+ interruptionTurnId: event.interruptionTurnId ?? floor?.interruptionTurnId,
59
+ };
60
+ }
42
61
  function gradeFirstAudio(events, expectation, findings) {
43
62
  const connected = firstEvent(events, "call.connected");
44
63
  const firstAudio = firstEvent(events, "assistant.audio.started");
@@ -256,6 +275,89 @@ function gradeOverlappingResponses(events, findings) {
256
275
  }
257
276
  }
258
277
  }
278
+ function floorIsCallerOwned(floor) {
279
+ return floor?.floorOwner === "caller" || floor?.floorPhase === "caller-speaking" || floor?.floorPhase === "interrupted";
280
+ }
281
+ function floorIsTerminal(floor) {
282
+ return floor?.floorOwner === "terminal" || floor?.floorPhase === "hangup" || floor?.floorPhase === "ended";
283
+ }
284
+ function gradeDuplexFloorPolicy(events, findings) {
285
+ let floor;
286
+ let hangupAtMs;
287
+ for (const event of events) {
288
+ if (event.type === "floor.state.changed") {
289
+ floor = event;
290
+ continue;
291
+ }
292
+ if (event.type === "call.hangup.requested")
293
+ hangupAtMs = event.atMs;
294
+ if (event.type === "speech.policy.decision" && event.speechDecision === "allow") {
295
+ if (floorIsCallerOwned(floor)) {
296
+ pushFinding(findings, {
297
+ code: "speech_allowed_while_caller_has_floor",
298
+ severity: "fail",
299
+ message: "Voice speech policy allowed assistant speech while the caller owned the floor.",
300
+ source: event.source,
301
+ atMs: event.atMs,
302
+ floor: floorDiagnostic(event, floor),
303
+ });
304
+ }
305
+ else if (floorIsTerminal(floor) || hangupAtMs !== undefined) {
306
+ pushFinding(findings, {
307
+ code: "speech_allowed_after_hangup",
308
+ severity: "fail",
309
+ message: "Voice speech policy allowed assistant speech after hangup began.",
310
+ source: event.source,
311
+ atMs: event.atMs,
312
+ floor: floorDiagnostic(event, floor),
313
+ });
314
+ }
315
+ }
316
+ if (event.type === "tool.result.spoken") {
317
+ const diagnostic = floorDiagnostic(event, floor);
318
+ if (event.correlationId && floor?.staleToolCallIds?.includes(event.correlationId)) {
319
+ pushFinding(findings, {
320
+ code: "stale_tool_result_spoken",
321
+ severity: "fail",
322
+ message: "Voice spoke a tool result that the floor model had already marked stale.",
323
+ source: event.source,
324
+ atMs: event.atMs,
325
+ floor: diagnostic,
326
+ });
327
+ }
328
+ else if (floorIsCallerOwned(floor)) {
329
+ pushFinding(findings, {
330
+ code: "tool_result_spoken_while_caller_has_floor",
331
+ severity: "fail",
332
+ message: "Voice spoke a tool result while the caller owned the floor.",
333
+ source: event.source,
334
+ atMs: event.atMs,
335
+ floor: diagnostic,
336
+ });
337
+ }
338
+ else if (floorIsTerminal(floor) || hangupAtMs !== undefined) {
339
+ pushFinding(findings, {
340
+ code: "tool_result_spoken_after_hangup",
341
+ severity: "fail",
342
+ message: "Voice spoke a tool result after hangup began.",
343
+ source: event.source,
344
+ atMs: event.atMs,
345
+ floor: diagnostic,
346
+ });
347
+ }
348
+ }
349
+ if (event.type === "response.requested" && hangupAtMs !== undefined && event.atMs > hangupAtMs) {
350
+ pushFinding(findings, {
351
+ code: "response_after_hangup",
352
+ severity: "fail",
353
+ message: "Voice requested a new response after hangup had already been requested.",
354
+ source: event.source,
355
+ atMs: event.atMs,
356
+ floor: floorDiagnostic(event, floor),
357
+ });
358
+ }
359
+ }
360
+ }
259
361
  function collectTransportSources(events) {
260
362
  return [...new Set(events.flatMap((event) => event.source ? [event.source.transport] : []))].sort();
261
363
  }
@@ -284,6 +386,7 @@ function gradeVoiceRealtimeEvalTimeline(scenarioId, timeline, expectation) {
284
386
  if (expectation.requireHangup)
285
387
  gradeHangup(events, findings);
286
388
  gradeOverlappingResponses(events, findings);
389
+ gradeDuplexFloorPolicy(events, findings);
287
390
  const report = {
288
391
  scenarioId: normalizedScenarioId,
289
392
  passed: findings.every((finding) => finding.severity !== "fail"),
@@ -57,17 +57,22 @@ const normalizedEvents = new Set([
57
57
  "call.connected",
58
58
  "call.ended",
59
59
  "call.hangup.requested",
60
+ "floor.state.changed",
60
61
  "response.requested",
61
62
  "response.truncated",
62
63
  "session.updated",
64
+ "speech.policy.decision",
63
65
  "tool.call.completed",
64
66
  "tool.call.started",
65
67
  "tool.holding.started",
68
+ "tool.result.ready",
69
+ "tool.result.spoken",
66
70
  "transport.playback_cleared",
67
71
  "user.transcript.done",
68
72
  "voice.context.injected",
69
73
  ]);
70
74
  const rawEventMap = new Map([
75
+ ["voice.floor.state.changed", "floor.state.changed"],
71
76
  ["openai.realtime.call.hangup.sent", "call.hangup.requested"],
72
77
  ["openai.realtime.conversation.item.truncate.sent", "response.truncated"],
73
78
  ["openai.realtime.input_audio_buffer.speech_started", "barge_in.detected"],
@@ -85,10 +90,28 @@ const rawEventMap = new Map([
85
90
  ["twilio.media.clear.sent", "transport.playback_cleared"],
86
91
  ["twilio.media.start", "call.connected"],
87
92
  ["voice.hangup.requested", "call.hangup.requested"],
93
+ ["voice.speech.policy.decision", "speech.policy.decision"],
88
94
  ["voice.tool_holding.started", "tool.holding.started"],
95
+ ["voice.tool.result.ready", "tool.result.ready"],
96
+ ["voice.tool.result.spoken", "tool.result.spoken"],
89
97
  ]);
90
98
  const expectedOutcomes = new Set(["expected-fail", "fail", "pass"]);
91
99
  const expectationProfiles = new Set(["voice-phone-default"]);
100
+ const floorOwners = new Set(["assistant", "caller", "none", "terminal"]);
101
+ const floorPhases = new Set([
102
+ "caller-speaking",
103
+ "ended",
104
+ "hangup",
105
+ "idle",
106
+ "interrupted",
107
+ "listening",
108
+ "speaking",
109
+ "suppressing",
110
+ "thinking",
111
+ "tool-result-ready",
112
+ "tool-running",
113
+ ]);
114
+ const speechDecisions = new Set(["allow", "cancel", "delay", "suppress"]);
92
115
  function objectRecord(value) {
93
116
  return typeof value === "object" && value !== null && !Array.isArray(value) ? value : undefined;
94
117
  }
@@ -107,6 +130,35 @@ function optionalString(value, name, sourceLabel) {
107
130
  throw new Error(label(sourceLabel, `${name} must be a string`));
108
131
  return value;
109
132
  }
133
+ function optionalStringArray(value, name, sourceLabel) {
134
+ if (value === undefined)
135
+ return undefined;
136
+ if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
137
+ throw new Error(label(sourceLabel, `${name} must be an array of strings`));
138
+ }
139
+ return [...value];
140
+ }
141
+ function optionalFloorOwner(value, sourceLabel) {
142
+ if (value === undefined)
143
+ return undefined;
144
+ if (typeof value !== "string" || !floorOwners.has(value))
145
+ throw new Error(label(sourceLabel, "floorOwner is unsupported"));
146
+ return value;
147
+ }
148
+ function optionalFloorPhase(value, sourceLabel) {
149
+ if (value === undefined)
150
+ return undefined;
151
+ if (typeof value !== "string" || !floorPhases.has(value))
152
+ throw new Error(label(sourceLabel, "floorPhase is unsupported"));
153
+ return value;
154
+ }
155
+ function optionalSpeechDecision(value, sourceLabel) {
156
+ if (value === undefined)
157
+ return undefined;
158
+ if (typeof value !== "string" || !speechDecisions.has(value))
159
+ throw new Error(label(sourceLabel, "speechDecision is unsupported"));
160
+ return value;
161
+ }
110
162
  function parseSource(value, sourceLabel) {
111
163
  if (value === undefined)
112
164
  return undefined;
@@ -191,6 +243,15 @@ function parseTraceEvent(value, index, sourceLabel) {
191
243
  friendId: optionalString(raw.friendId, "friendId", eventLabel),
192
244
  sessionKey: optionalString(raw.sessionKey, "sessionKey", eventLabel),
193
245
  session: parseTurnDetection(raw.session, eventLabel),
246
+ floorOwner: optionalFloorOwner(raw.floorOwner, eventLabel),
247
+ floorPhase: optionalFloorPhase(raw.floorPhase, eventLabel),
248
+ speechDecision: optionalSpeechDecision(raw.speechDecision, eventLabel),
249
+ decisionReason: optionalString(raw.decisionReason, "decisionReason", eventLabel),
250
+ pendingSpeechId: optionalString(raw.pendingSpeechId, "pendingSpeechId", eventLabel),
251
+ activeAssistantSpeechId: optionalString(raw.activeAssistantSpeechId, "activeAssistantSpeechId", eventLabel),
252
+ pendingToolCallIds: optionalStringArray(raw.pendingToolCallIds, "pendingToolCallIds", eventLabel),
253
+ staleToolCallIds: optionalStringArray(raw.staleToolCallIds, "staleToolCallIds", eventLabel),
254
+ interruptionTurnId: optionalString(raw.interruptionTurnId, "interruptionTurnId", eventLabel),
194
255
  ignored: ignored || undefined,
195
256
  ignoreReason: optionalString(raw.ignoreReason, "ignoreReason", eventLabel),
196
257
  };
@@ -276,6 +337,15 @@ function toTimelineEvent(event, redacted) {
276
337
  friendId: event.friendId,
277
338
  sessionKey: event.sessionKey,
278
339
  session: event.session,
340
+ floorOwner: event.floorOwner,
341
+ floorPhase: event.floorPhase,
342
+ speechDecision: event.speechDecision,
343
+ decisionReason: event.decisionReason,
344
+ pendingSpeechId: event.pendingSpeechId,
345
+ activeAssistantSpeechId: event.activeAssistantSpeechId,
346
+ pendingToolCallIds: event.pendingToolCallIds,
347
+ staleToolCallIds: event.staleToolCallIds,
348
+ interruptionTurnId: event.interruptionTurnId,
279
349
  };
280
350
  }
281
351
  function sortedTimeline(events) {
@@ -376,6 +446,63 @@ function sourceForSummary(source) {
376
446
  return "";
377
447
  return source.id ? ` ${source.transport}/${source.id}` : ` ${source.transport}`;
378
448
  }
449
+ function listForSummary(values) {
450
+ return values && values.length > 0 ? values.join(",") : undefined;
451
+ }
452
+ function floorForSummary(floor) {
453
+ const parts = [];
454
+ if (floor.phase)
455
+ parts.push(`phase=${floor.phase}`);
456
+ if (floor.floorOwner)
457
+ parts.push(`floor=${floor.floorOwner}`);
458
+ if (floor.pendingSpeechId)
459
+ parts.push(`pendingSpeech=${floor.pendingSpeechId}`);
460
+ const pendingTools = listForSummary(floor.pendingToolCallIds);
461
+ if (pendingTools)
462
+ parts.push(`pendingTools=${pendingTools}`);
463
+ const staleTools = listForSummary(floor.staleToolCallIds);
464
+ if (staleTools)
465
+ parts.push(`staleTools=${staleTools}`);
466
+ if (floor.interruptionTurnId)
467
+ parts.push(`interruption=${floor.interruptionTurnId}`);
468
+ if (floor.decisionReason)
469
+ parts.push(`reason=${floor.decisionReason}`);
470
+ if (floor.activeAssistantSpeechId)
471
+ parts.push(`activeSpeech=${floor.activeAssistantSpeechId}`);
472
+ if (floor.speechDecision)
473
+ parts.push(`decision=${floor.speechDecision}`);
474
+ if (floor.responseId)
475
+ parts.push(`response=${floor.responseId}`);
476
+ if (floor.toolCallId)
477
+ parts.push(`tool=${floor.toolCallId}`);
478
+ return parts.join(" ") || "none";
479
+ }
480
+ function floorEventForSummary(event) {
481
+ if (event.floorPhase === undefined
482
+ && event.floorOwner === undefined
483
+ && event.speechDecision === undefined
484
+ && event.decisionReason === undefined
485
+ && event.pendingSpeechId === undefined
486
+ && event.activeAssistantSpeechId === undefined
487
+ && event.pendingToolCallIds === undefined
488
+ && event.staleToolCallIds === undefined
489
+ && event.interruptionTurnId === undefined) {
490
+ return "";
491
+ }
492
+ return ` floor(${floorForSummary({
493
+ phase: event.floorPhase,
494
+ floorOwner: event.floorOwner,
495
+ speechDecision: event.speechDecision,
496
+ decisionReason: event.decisionReason,
497
+ responseId: event.type === "response.requested" || event.type === "speech.policy.decision" ? event.correlationId : undefined,
498
+ toolCallId: event.type === "tool.result.ready" || event.type === "tool.result.spoken" ? event.correlationId : undefined,
499
+ pendingSpeechId: event.pendingSpeechId,
500
+ activeAssistantSpeechId: event.activeAssistantSpeechId,
501
+ pendingToolCallIds: event.pendingToolCallIds,
502
+ staleToolCallIds: event.staleToolCallIds,
503
+ interruptionTurnId: event.interruptionTurnId,
504
+ })})`;
505
+ }
379
506
  function formatVoiceRealtimeEvalTraceReport(result) {
380
507
  const lines = [
381
508
  `trace ${result.traceId} scenario ${result.scenarioId}`,
@@ -388,11 +515,13 @@ function formatVoiceRealtimeEvalTraceReport(result) {
388
515
  for (const finding of result.report.findings) {
389
516
  const message = result.artifact.redacted ? "[redacted]" : finding.message;
390
517
  lines.push(`- ${finding.code}${finding.atMs === undefined ? "" : ` at ${finding.atMs}ms`}: ${message}`);
518
+ if (finding.floor)
519
+ lines.push(` floor: ${floorForSummary(finding.floor)}`);
391
520
  }
392
521
  }
393
522
  lines.push("events:");
394
523
  for (const event of result.timeline) {
395
- lines.push(`- ${event.atMs}ms ${event.type}${sourceForSummary(event.source)}${textForSummary(result, event)}`);
524
+ lines.push(`- ${event.atMs}ms ${event.type}${sourceForSummary(event.source)}${textForSummary(result, event)}${floorEventForSummary(event)}`);
396
525
  }
397
526
  lines.push(`ignored provider events: ${result.ignoredEvents.length}`);
398
527
  for (const event of result.ignoredEvents) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ouro.bot/cli",
3
- "version": "0.1.0-alpha.586",
3
+ "version": "0.1.0-alpha.587",
4
4
  "main": "dist/heart/daemon/ouro-entry.js",
5
5
  "bin": {
6
6
  "cli": "dist/heart/daemon/ouro-bot-entry.js",