@ouro.bot/cli 0.1.0-alpha.590 → 0.1.0-alpha.591

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json CHANGED
@@ -1,6 +1,12 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.591",
6
+ "changes": [
7
+ "Voice floor-control gains a principled caller.turn.dismissed event. The realtime runtime now emits this event when OpenAI starts a coordinated tool call inside an active response cycle (proof the realtime server has parsed the caller's most recent turn), replacing the tactical synthetic caller.transcript.final hack that previously did the same job from outside the reducer. No live human calls."
8
+ ]
9
+ },
4
10
  {
5
11
  "version": "0.1.0-alpha.590",
6
12
  "changes": [
@@ -178,6 +178,37 @@ function applyCallerTranscriptFinal(state, event) {
178
178
  }
179
179
  return { event, state: next, decision: decision(true, "allow", "caller_turn_ready", { atMs: event.atMs }) };
180
180
  }
181
+ function applyCallerTurnDismissed(state, event) {
182
+ if (state.latestCallerTurnId !== event.turnId) {
183
+ return {
184
+ event,
185
+ state,
186
+ decision: decision(false, "suppress", "stale_caller_turn", { atMs: event.atMs }),
187
+ };
188
+ }
189
+ if (state.floorOwner !== "caller") {
190
+ return {
191
+ event,
192
+ state,
193
+ decision: decision(true, "allow", "caller_turn_already_released", { atMs: event.atMs }),
194
+ };
195
+ }
196
+ const next = copyState(state);
197
+ if (next.activeAssistantSpeechId) {
198
+ next.floorOwner = "assistant";
199
+ next.phase = "speaking";
200
+ }
201
+ else {
202
+ next.floorOwner = "none";
203
+ next.phase = "thinking";
204
+ }
205
+ next.interruption = undefined;
206
+ return {
207
+ event,
208
+ state: next,
209
+ decision: decision(true, "allow", "caller_turn_dismissed", { atMs: event.atMs }),
210
+ };
211
+ }
181
212
  function applyAssistantResponseRequested(state, event) {
182
213
  const requestDecision = canRequestVoiceResponse(state, { responseId: event.responseId, reason: event.reason });
183
214
  if (!requestDecision.allowed)
@@ -330,6 +361,8 @@ function applyVoiceFloorEvent(state, event) {
330
361
  return applyCallerSpeechEnded(state, event);
331
362
  case "caller.transcript.final":
332
363
  return applyCallerTranscriptFinal(state, event);
364
+ case "caller.turn.dismissed":
365
+ return applyCallerTurnDismissed(state, event);
333
366
  case "assistant.response.requested":
334
367
  return applyAssistantResponseRequested(state, event);
335
368
  case "assistant.speech.started":
@@ -2140,24 +2140,9 @@ class TwilioOpenAIRealtimeMediaStreamSession {
2140
2140
  this.clearRealtimeToolPresenceTimer(state);
2141
2141
  if (state.suppressFollowup)
2142
2142
  return true;
2143
- this.releaseCallerFloorForToolFollowup();
2144
2143
  this.requestRealtimeResponse();
2145
2144
  return true;
2146
2145
  }
2147
- releaseCallerFloorForToolFollowup() {
2148
- // OpenAI emitting a function-call result inside a coordinated response
2149
- // means the caller's most recent turn has already been parsed by the
2150
- // realtime server. If we still hold a synthetic caller turn (because the
2151
- // matching transcript event has not been delivered yet — common in unit
2152
- // fixtures and during fast-turn races), release it before asking the gate
2153
- // to flush a follow-up response.create so the gate is not stuck thinking
2154
- // the caller still owns the floor.
2155
- if (!this.activeCallerTurnId)
2156
- return;
2157
- const turnId = this.activeCallerTurnId;
2158
- this.activeCallerTurnId = undefined;
2159
- this.floor.apply({ type: "caller.transcript.final", atMs: Date.now(), turnId });
2160
- }
2161
2146
  scheduleRealtimeToolPresence(responseId, state) {
2162
2147
  if (!responseId || state.presenceRequested || state.presenceTimer)
2163
2148
  return;
@@ -2195,6 +2180,24 @@ class TwilioOpenAIRealtimeMediaStreamSession {
2195
2180
  toolState.suppressFollowup = true;
2196
2181
  if (toolState && !toolState.suppressFollowup)
2197
2182
  this.scheduleRealtimeToolPresence(responseId, toolState);
2183
+ // A coordinated tool call (one with a responseId from OpenAI's active
2184
+ // response cycle) is proof that the realtime server has already parsed the
2185
+ // caller's most recent turn into a tool intent. If we still hold a
2186
+ // synthetic caller floor for that turn — because the matching
2187
+ // input_audio_transcription.completed event has not arrived yet, which is
2188
+ // common in unit fixtures and during fast-turn races — dismiss it so the
2189
+ // floor gate is not stuck thinking the caller still owns the floor when
2190
+ // the assistant is mid-response.
2191
+ if (coordinated && this.activeCallerTurnId) {
2192
+ const turnId = this.activeCallerTurnId;
2193
+ this.activeCallerTurnId = undefined;
2194
+ this.floor.apply({
2195
+ type: "caller.turn.dismissed",
2196
+ atMs: Date.now(),
2197
+ turnId,
2198
+ reason: "coordinated_tool_call",
2199
+ });
2200
+ }
2198
2201
  this.floor.apply({
2199
2202
  type: "tool.call.started",
2200
2203
  atMs: Date.now(),
@@ -3280,6 +3283,24 @@ class OpenAISipPhoneSession {
3280
3283
  toolState.suppressFollowup = true;
3281
3284
  if (toolState && !toolState.suppressFollowup)
3282
3285
  this.scheduleRealtimeToolPresence(responseId, toolState);
3286
+ // A coordinated tool call (one with a responseId from OpenAI's active
3287
+ // response cycle) is proof that the realtime server has already parsed the
3288
+ // caller's most recent turn into a tool intent. If we still hold a
3289
+ // synthetic caller floor for that turn — because the matching
3290
+ // input_audio_transcription.completed event has not arrived yet, which is
3291
+ // common in unit fixtures and during fast-turn races — dismiss it so the
3292
+ // floor gate is not stuck thinking the caller still owns the floor when
3293
+ // the assistant is mid-response.
3294
+ if (coordinated && this.activeCallerTurnId) {
3295
+ const turnId = this.activeCallerTurnId;
3296
+ this.activeCallerTurnId = undefined;
3297
+ this.floor.apply({
3298
+ type: "caller.turn.dismissed",
3299
+ atMs: Date.now(),
3300
+ turnId,
3301
+ reason: "coordinated_tool_call",
3302
+ });
3303
+ }
3283
3304
  this.floor.apply({
3284
3305
  type: "tool.call.started",
3285
3306
  atMs: Date.now(),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ouro.bot/cli",
3
- "version": "0.1.0-alpha.590",
3
+ "version": "0.1.0-alpha.591",
4
4
  "main": "dist/heart/daemon/ouro-entry.js",
5
5
  "bin": {
6
6
  "cli": "dist/heart/daemon/ouro-bot-entry.js",