@ouro.bot/cli 0.1.0-alpha.590 → 0.1.0-alpha.591
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.591",
|
|
6
|
+
"changes": [
|
|
7
|
+
"Voice floor-control gains a principled caller.turn.dismissed event. The realtime runtime now emits this event when OpenAI starts a coordinated tool call inside an active response cycle (proof the realtime server has parsed the caller's most recent turn), replacing the tactical synthetic caller.transcript.final hack that previously did the same job from outside the reducer. No live human calls."
|
|
8
|
+
]
|
|
9
|
+
},
|
|
4
10
|
{
|
|
5
11
|
"version": "0.1.0-alpha.590",
|
|
6
12
|
"changes": [
|
|
@@ -178,6 +178,37 @@ function applyCallerTranscriptFinal(state, event) {
|
|
|
178
178
|
}
|
|
179
179
|
return { event, state: next, decision: decision(true, "allow", "caller_turn_ready", { atMs: event.atMs }) };
|
|
180
180
|
}
|
|
181
|
+
function applyCallerTurnDismissed(state, event) {
|
|
182
|
+
if (state.latestCallerTurnId !== event.turnId) {
|
|
183
|
+
return {
|
|
184
|
+
event,
|
|
185
|
+
state,
|
|
186
|
+
decision: decision(false, "suppress", "stale_caller_turn", { atMs: event.atMs }),
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
if (state.floorOwner !== "caller") {
|
|
190
|
+
return {
|
|
191
|
+
event,
|
|
192
|
+
state,
|
|
193
|
+
decision: decision(true, "allow", "caller_turn_already_released", { atMs: event.atMs }),
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
const next = copyState(state);
|
|
197
|
+
if (next.activeAssistantSpeechId) {
|
|
198
|
+
next.floorOwner = "assistant";
|
|
199
|
+
next.phase = "speaking";
|
|
200
|
+
}
|
|
201
|
+
else {
|
|
202
|
+
next.floorOwner = "none";
|
|
203
|
+
next.phase = "thinking";
|
|
204
|
+
}
|
|
205
|
+
next.interruption = undefined;
|
|
206
|
+
return {
|
|
207
|
+
event,
|
|
208
|
+
state: next,
|
|
209
|
+
decision: decision(true, "allow", "caller_turn_dismissed", { atMs: event.atMs }),
|
|
210
|
+
};
|
|
211
|
+
}
|
|
181
212
|
function applyAssistantResponseRequested(state, event) {
|
|
182
213
|
const requestDecision = canRequestVoiceResponse(state, { responseId: event.responseId, reason: event.reason });
|
|
183
214
|
if (!requestDecision.allowed)
|
|
@@ -330,6 +361,8 @@ function applyVoiceFloorEvent(state, event) {
|
|
|
330
361
|
return applyCallerSpeechEnded(state, event);
|
|
331
362
|
case "caller.transcript.final":
|
|
332
363
|
return applyCallerTranscriptFinal(state, event);
|
|
364
|
+
case "caller.turn.dismissed":
|
|
365
|
+
return applyCallerTurnDismissed(state, event);
|
|
333
366
|
case "assistant.response.requested":
|
|
334
367
|
return applyAssistantResponseRequested(state, event);
|
|
335
368
|
case "assistant.speech.started":
|
|
@@ -2140,24 +2140,9 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
2140
2140
|
this.clearRealtimeToolPresenceTimer(state);
|
|
2141
2141
|
if (state.suppressFollowup)
|
|
2142
2142
|
return true;
|
|
2143
|
-
this.releaseCallerFloorForToolFollowup();
|
|
2144
2143
|
this.requestRealtimeResponse();
|
|
2145
2144
|
return true;
|
|
2146
2145
|
}
|
|
2147
|
-
releaseCallerFloorForToolFollowup() {
|
|
2148
|
-
// OpenAI emitting a function-call result inside a coordinated response
|
|
2149
|
-
// means the caller's most recent turn has already been parsed by the
|
|
2150
|
-
// realtime server. If we still hold a synthetic caller turn (because the
|
|
2151
|
-
// matching transcript event has not been delivered yet — common in unit
|
|
2152
|
-
// fixtures and during fast-turn races), release it before asking the gate
|
|
2153
|
-
// to flush a follow-up response.create so the gate is not stuck thinking
|
|
2154
|
-
// the caller still owns the floor.
|
|
2155
|
-
if (!this.activeCallerTurnId)
|
|
2156
|
-
return;
|
|
2157
|
-
const turnId = this.activeCallerTurnId;
|
|
2158
|
-
this.activeCallerTurnId = undefined;
|
|
2159
|
-
this.floor.apply({ type: "caller.transcript.final", atMs: Date.now(), turnId });
|
|
2160
|
-
}
|
|
2161
2146
|
scheduleRealtimeToolPresence(responseId, state) {
|
|
2162
2147
|
if (!responseId || state.presenceRequested || state.presenceTimer)
|
|
2163
2148
|
return;
|
|
@@ -2195,6 +2180,24 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
2195
2180
|
toolState.suppressFollowup = true;
|
|
2196
2181
|
if (toolState && !toolState.suppressFollowup)
|
|
2197
2182
|
this.scheduleRealtimeToolPresence(responseId, toolState);
|
|
2183
|
+
// A coordinated tool call (one with a responseId from OpenAI's active
|
|
2184
|
+
// response cycle) is proof that the realtime server has already parsed the
|
|
2185
|
+
// caller's most recent turn into a tool intent. If we still hold a
|
|
2186
|
+
// synthetic caller floor for that turn — because the matching
|
|
2187
|
+
// input_audio_transcription.completed event has not arrived yet, which is
|
|
2188
|
+
// common in unit fixtures and during fast-turn races — dismiss it so the
|
|
2189
|
+
// floor gate is not stuck thinking the caller still owns the floor when
|
|
2190
|
+
// the assistant is mid-response.
|
|
2191
|
+
if (coordinated && this.activeCallerTurnId) {
|
|
2192
|
+
const turnId = this.activeCallerTurnId;
|
|
2193
|
+
this.activeCallerTurnId = undefined;
|
|
2194
|
+
this.floor.apply({
|
|
2195
|
+
type: "caller.turn.dismissed",
|
|
2196
|
+
atMs: Date.now(),
|
|
2197
|
+
turnId,
|
|
2198
|
+
reason: "coordinated_tool_call",
|
|
2199
|
+
});
|
|
2200
|
+
}
|
|
2198
2201
|
this.floor.apply({
|
|
2199
2202
|
type: "tool.call.started",
|
|
2200
2203
|
atMs: Date.now(),
|
|
@@ -3280,6 +3283,24 @@ class OpenAISipPhoneSession {
|
|
|
3280
3283
|
toolState.suppressFollowup = true;
|
|
3281
3284
|
if (toolState && !toolState.suppressFollowup)
|
|
3282
3285
|
this.scheduleRealtimeToolPresence(responseId, toolState);
|
|
3286
|
+
// A coordinated tool call (one with a responseId from OpenAI's active
|
|
3287
|
+
// response cycle) is proof that the realtime server has already parsed the
|
|
3288
|
+
// caller's most recent turn into a tool intent. If we still hold a
|
|
3289
|
+
// synthetic caller floor for that turn — because the matching
|
|
3290
|
+
// input_audio_transcription.completed event has not arrived yet, which is
|
|
3291
|
+
// common in unit fixtures and during fast-turn races — dismiss it so the
|
|
3292
|
+
// floor gate is not stuck thinking the caller still owns the floor when
|
|
3293
|
+
// the assistant is mid-response.
|
|
3294
|
+
if (coordinated && this.activeCallerTurnId) {
|
|
3295
|
+
const turnId = this.activeCallerTurnId;
|
|
3296
|
+
this.activeCallerTurnId = undefined;
|
|
3297
|
+
this.floor.apply({
|
|
3298
|
+
type: "caller.turn.dismissed",
|
|
3299
|
+
atMs: Date.now(),
|
|
3300
|
+
turnId,
|
|
3301
|
+
reason: "coordinated_tool_call",
|
|
3302
|
+
});
|
|
3303
|
+
}
|
|
3283
3304
|
this.floor.apply({
|
|
3284
3305
|
type: "tool.call.started",
|
|
3285
3306
|
atMs: Date.now(),
|