@ouro.bot/cli 0.1.0-alpha.579 → 0.1.0-alpha.580

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json CHANGED
@@ -1,6 +1,14 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.580",
6
+ "changes": [
7
+ "OpenAI Realtime phone voice now queues manual `response.create` requests while another response is active, preventing SIP calls from hitting `conversation_already_has_active_response` and losing conversational state during tool followups.",
8
+ "Voice Realtime bootstrap now exposes the native voice/base tool set immediately instead of only `voice_end_call`, so early phone turns can still use tools while the full friend/MCP tool refresh finishes.",
9
+ "BlueBubbles webhook handling now claims message GUIDs before repair/hydrate work starts and releases those claims if repair discovers an already-processed message, preventing duplicate webhook deliveries from racing into overlapping agent turns for the same message."
10
+ ]
11
+ },
4
12
  {
5
13
  "version": "0.1.0-alpha.579",
6
14
  "changes": [
@@ -919,9 +919,18 @@ async function handleBlueBubblesNormalizedEvent(event, resolvedDeps, source, opt
919
919
  let ownsInFlightMessage = false;
920
920
  let releaseInFlightAfterTurnSettles = false;
921
921
  let activeTurnId = null;
922
+ const inFlightKey = event.kind === "message"
923
+ ? {
924
+ sessionKey: options.preclaimedInFlight?.sessionKey ?? event.chat.sessionKey,
925
+ messageGuid: options.preclaimedInFlight?.messageGuid ?? event.messageGuid,
926
+ }
927
+ : null;
922
928
  if (event.kind === "message") {
923
929
  if ((0, processed_log_1.hasProcessedBlueBubblesMessage)(agentName, event.chat.sessionKey, event.messageGuid)
924
930
  || (0, processed_log_1.hasProcessedBlueBubblesMessageGuid)(agentName, event.messageGuid)) {
931
+ if (inFlightKey && options.preclaimedInFlight) {
932
+ endBlueBubblesMessageInFlight(inFlightKey.sessionKey, inFlightKey.messageGuid);
933
+ }
925
934
  (0, runtime_1.emitNervesEvent)({
926
935
  component: "senses",
927
936
  event: "senses.bluebubbles_recovery_skip",
@@ -935,7 +944,10 @@ async function handleBlueBubblesNormalizedEvent(event, resolvedDeps, source, opt
935
944
  });
936
945
  return { handled: true, notifiedAgent: false, kind: event.kind, reason: "already_processed" };
937
946
  }
938
- if (!beginBlueBubblesMessageInFlight(event.chat.sessionKey, event.messageGuid)) {
947
+ if (options.preclaimedInFlight) {
948
+ ownsInFlightMessage = true;
949
+ }
950
+ else if (!beginBlueBubblesMessageInFlight(inFlightKey.sessionKey, inFlightKey.messageGuid)) {
939
951
  (0, runtime_1.emitNervesEvent)({
940
952
  component: "senses",
941
953
  event: "senses.bluebubbles_recovery_skip",
@@ -1084,8 +1096,8 @@ async function handleBlueBubblesNormalizedEvent(event, resolvedDeps, source, opt
1084
1096
  timeoutTimer = setTimeout(() => {
1085
1097
  const reason = new BlueBubblesRecoveryTurnTimeoutError(timeoutMs);
1086
1098
  recoveryTimedOut = true;
1087
- if (liveWebhookTimeout && ownsInFlightMessage && event.kind === "message") {
1088
- endBlueBubblesMessageInFlight(event.chat.sessionKey, event.messageGuid);
1099
+ if (liveWebhookTimeout && ownsInFlightMessage && inFlightKey) {
1100
+ endBlueBubblesMessageInFlight(inFlightKey.sessionKey, inFlightKey.messageGuid);
1089
1101
  ownsInFlightMessage = false;
1090
1102
  }
1091
1103
  else {
@@ -1190,8 +1202,8 @@ async function handleBlueBubblesNormalizedEvent(event, resolvedDeps, source, opt
1190
1202
  });
1191
1203
  })
1192
1204
  .finally(() => {
1193
- if (releaseInFlightAfterTurnSettles && ownsInFlightMessage && event.kind === "message") {
1194
- endBlueBubblesMessageInFlight(event.chat.sessionKey, event.messageGuid);
1205
+ if (releaseInFlightAfterTurnSettles && ownsInFlightMessage && inFlightKey) {
1206
+ endBlueBubblesMessageInFlight(inFlightKey.sessionKey, inFlightKey.messageGuid);
1195
1207
  }
1196
1208
  });
1197
1209
  /* v8 ignore stop */
@@ -1272,8 +1284,8 @@ async function handleBlueBubblesNormalizedEvent(event, resolvedDeps, source, opt
1272
1284
  finally {
1273
1285
  if (activeTurnId)
1274
1286
  (0, active_turns_1.finishBlueBubblesActiveTurn)(agentName, activeTurnId);
1275
- if (ownsInFlightMessage && event.kind === "message" && !releaseInFlightAfterTurnSettles) {
1276
- endBlueBubblesMessageInFlight(event.chat.sessionKey, event.messageGuid);
1287
+ if (ownsInFlightMessage && inFlightKey && !releaseInFlightAfterTurnSettles) {
1288
+ endBlueBubblesMessageInFlight(inFlightKey.sessionKey, inFlightKey.messageGuid);
1277
1289
  }
1278
1290
  }
1279
1291
  }
@@ -1346,8 +1358,24 @@ async function handleBlueBubblesEvent(payload, deps = {}) {
1346
1358
  });
1347
1359
  return handleBlueBubblesNormalizedEvent(normalized, resolvedDeps, "webhook");
1348
1360
  }
1349
- const event = await client.repairEvent(normalized);
1350
- return handleBlueBubblesNormalizedEvent(event, resolvedDeps, "webhook");
1361
+ let preclaimedInFlight = false;
1362
+ let handedPreclaimToTurn = false;
1363
+ if (normalized.kind === "message") {
1364
+ beginBlueBubblesMessageInFlight(normalized.chat.sessionKey, normalized.messageGuid);
1365
+ preclaimedInFlight = true;
1366
+ }
1367
+ try {
1368
+ const event = await client.repairEvent(normalized);
1369
+ handedPreclaimToTurn = preclaimedInFlight && event.kind === "message";
1370
+ return handleBlueBubblesNormalizedEvent(event, resolvedDeps, "webhook", handedPreclaimToTurn
1371
+ ? { preclaimedInFlight: { sessionKey: normalized.chat.sessionKey, messageGuid: normalized.messageGuid } }
1372
+ : {});
1373
+ }
1374
+ finally {
1375
+ if (preclaimedInFlight && !handedPreclaimToTurn) {
1376
+ endBlueBubblesMessageInFlight(normalized.chat.sessionKey, normalized.messageGuid);
1377
+ }
1378
+ }
1351
1379
  }
1352
1380
  function listPendingCapturedInboundMessages(agentName) {
1353
1381
  const seenMessageGuids = new Set();
@@ -1128,6 +1128,7 @@ const OPENAI_REALTIME_BARGE_IN_MIN_SPEECH_MS = 160;
1128
1128
  const OPENAI_REALTIME_BARGE_IN_RMS_THRESHOLD = 900;
1129
1129
  const OPENAI_REALTIME_MIN_VOICE_SPEED = 0.25;
1130
1130
  const OPENAI_REALTIME_MAX_VOICE_SPEED = 1.5;
1131
+ const OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS = 50;
1131
1132
  const OPENAI_SIP_OUTBOUND_AMD_GREETING_TIMEOUT_MS = 10_000;
1132
1133
  const OPENAI_SIP_UNSUPPORTED_TOOL_NAMES = new Set();
1133
1134
  const OPENAI_SIP_DEFAULT_API_BASE_URL = "https://api.openai.com/v1";
@@ -1435,18 +1436,7 @@ function realtimeBootstrapInstructions(agentName, voiceStyle) {
1435
1436
  ].filter(Boolean).join(" ");
1436
1437
  }
1437
1438
  function realtimeBootstrapTools() {
1438
- return [{
1439
- type: "function",
1440
- name: "voice_end_call",
1441
- description: "End the active live voice phone call after a natural goodbye.",
1442
- parameters: {
1443
- type: "object",
1444
- properties: {
1445
- reason: { type: "string", description: "Short reason for ending the call." },
1446
- },
1447
- additionalProperties: false,
1448
- },
1449
- }];
1439
+ return realtimeToolsFromChatTools((0, tools_1.getToolsForChannel)((0, channel_1.getChannelCapabilities)("voice")));
1450
1440
  }
1451
1441
  function timeoutAfter(ms) {
1452
1442
  return new Promise((resolve) => {
@@ -1507,6 +1497,10 @@ class TwilioOpenAIRealtimeMediaStreamSession {
1507
1497
  playbackMarks = new Map();
1508
1498
  toolResponses = new Map();
1509
1499
  completedRealtimeResponseIds = new Set();
1500
+ activeRealtimeResponseId = null;
1501
+ pendingRealtimeResponse = null;
1502
+ pendingRealtimeResponseTimer = null;
1503
+ responseCreateHoldUntilMs = 0;
1510
1504
  initialAudio;
1511
1505
  initialAudioPlayed = false;
1512
1506
  callerBargeInSpeechMs = 0;
@@ -1813,12 +1807,7 @@ class TwilioOpenAIRealtimeMediaStreamSession {
1813
1807
  createdAt: new Date().toISOString(),
1814
1808
  }, { From: this.to, To: this.from })
1815
1809
  : callConnectedPrompt({ From: this.from, To: this.to });
1816
- this.sendOpenAI({
1817
- type: "response.create",
1818
- response: {
1819
- instructions: promptText,
1820
- },
1821
- });
1810
+ this.requestRealtimeResponse({ instructions: promptText });
1822
1811
  }
1823
1812
  handleMedia(media) {
1824
1813
  const payload = stringField(media?.payload);
@@ -1873,6 +1862,10 @@ class TwilioOpenAIRealtimeMediaStreamSession {
1873
1862
  return;
1874
1863
  }
1875
1864
  const type = typeof event.type === "string" ? event.type : "";
1865
+ if (type === "response.created") {
1866
+ this.noteRealtimeResponseCreated(event);
1867
+ return;
1868
+ }
1876
1869
  if (type === "response.output_audio.delta" && typeof event.delta === "string") {
1877
1870
  this.handleOpenAIAudioDelta(event);
1878
1871
  return;
@@ -1894,7 +1887,9 @@ class TwilioOpenAIRealtimeMediaStreamSession {
1894
1887
  return;
1895
1888
  }
1896
1889
  if (type === "response.done") {
1897
- if (this.completeRealtimeToolResponse(realtimeResponseId(event)))
1890
+ const responseId = realtimeResponseId(event);
1891
+ this.noteRealtimeResponseDone(responseId);
1892
+ if (this.completeRealtimeToolResponse(responseId))
1898
1893
  return;
1899
1894
  void this.playInitialAudioAfterGreeting();
1900
1895
  this.completeHangupIfReady("response_done");
@@ -2009,7 +2004,7 @@ class TwilioOpenAIRealtimeMediaStreamSession {
2009
2004
  this.toolResponses.delete(responseId);
2010
2005
  if (state.suppressFollowup)
2011
2006
  return true;
2012
- this.sendOpenAI({ type: "response.create" });
2007
+ this.requestRealtimeResponse();
2013
2008
  return true;
2014
2009
  }
2015
2010
  async runRealtimeTool(event) {
@@ -2058,8 +2053,62 @@ class TwilioOpenAIRealtimeMediaStreamSession {
2058
2053
  },
2059
2054
  });
2060
2055
  if (!this.completeRealtimeToolCall(responseId, callId) && !coordinated) {
2061
- this.sendOpenAI({ type: "response.create" });
2056
+ this.requestRealtimeResponse();
2057
+ }
2058
+ }
2059
+ noteRealtimeResponseCreated(event) {
2060
+ const responseId = realtimeResponseId(event);
2061
+ if (responseId)
2062
+ this.activeRealtimeResponseId = responseId;
2063
+ }
2064
+ noteRealtimeResponseDone(responseId) {
2065
+ if (!responseId || this.activeRealtimeResponseId === responseId) {
2066
+ this.activeRealtimeResponseId = null;
2067
+ }
2068
+ this.responseCreateHoldUntilMs = Math.max(this.responseCreateHoldUntilMs, Date.now() + OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
2069
+ this.schedulePendingRealtimeResponse(OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
2070
+ }
2071
+ requestRealtimeResponse(response) {
2072
+ if (this.closed)
2073
+ return;
2074
+ const waitMs = Math.max(0, this.responseCreateHoldUntilMs - Date.now());
2075
+ if (this.activeRealtimeResponseId || waitMs > 0) {
2076
+ const pendingResponse = response ?? this.pendingRealtimeResponse?.response;
2077
+ this.pendingRealtimeResponse = pendingResponse ? { response: pendingResponse } : {};
2078
+ if (!this.activeRealtimeResponseId)
2079
+ this.schedulePendingRealtimeResponse(waitMs);
2080
+ return;
2062
2081
  }
2082
+ this.sendRealtimeResponseCreate(response ? { response } : {});
2083
+ }
2084
+ schedulePendingRealtimeResponse(delayMs) {
2085
+ if (!this.pendingRealtimeResponse)
2086
+ return;
2087
+ if (this.pendingRealtimeResponseTimer)
2088
+ clearTimeout(this.pendingRealtimeResponseTimer);
2089
+ this.pendingRealtimeResponseTimer = setTimeout(() => {
2090
+ this.pendingRealtimeResponseTimer = null;
2091
+ this.flushPendingRealtimeResponse();
2092
+ }, Math.max(0, delayMs));
2093
+ this.pendingRealtimeResponseTimer.unref?.();
2094
+ }
2095
+ flushPendingRealtimeResponse() {
2096
+ if (!this.pendingRealtimeResponse || this.closed || this.activeRealtimeResponseId)
2097
+ return;
2098
+ const waitMs = Math.max(0, this.responseCreateHoldUntilMs - Date.now());
2099
+ if (waitMs > 0) {
2100
+ this.schedulePendingRealtimeResponse(waitMs);
2101
+ return;
2102
+ }
2103
+ const pending = this.pendingRealtimeResponse;
2104
+ this.pendingRealtimeResponse = null;
2105
+ this.sendRealtimeResponseCreate(pending);
2106
+ }
2107
+ sendRealtimeResponseCreate(request) {
2108
+ this.sendOpenAI({
2109
+ type: "response.create",
2110
+ ...(request.response ? { response: request.response } : {}),
2111
+ });
2063
2112
  }
2064
2113
  flushPendingAudio() {
2065
2114
  const pending = this.pendingAudioPayloads.splice(0);
@@ -2176,6 +2225,10 @@ class TwilioOpenAIRealtimeMediaStreamSession {
2176
2225
  if (this.openaiWs && (this.openaiWs.readyState === ws_1.WebSocket.OPEN || this.openaiWs.readyState === ws_1.WebSocket.CONNECTING)) {
2177
2226
  this.openaiWs.close();
2178
2227
  }
2228
+ if (this.pendingRealtimeResponseTimer) {
2229
+ clearTimeout(this.pendingRealtimeResponseTimer);
2230
+ this.pendingRealtimeResponseTimer = null;
2231
+ }
2179
2232
  this.lifecycle?.onClose?.(this, { callSid: this.callSid, outboundId: this.outboundId });
2180
2233
  (0, runtime_1.emitNervesEvent)({
2181
2234
  component: "senses",
@@ -2207,6 +2260,10 @@ class OpenAISipPhoneSession {
2207
2260
  sessionMessages = [];
2208
2261
  toolResponses = new Map();
2209
2262
  completedRealtimeResponseIds = new Set();
2263
+ activeRealtimeResponseId = null;
2264
+ pendingRealtimeResponse = null;
2265
+ pendingRealtimeResponseTimer = null;
2266
+ responseCreateHoldUntilMs = 0;
2210
2267
  constructor(options, metadata, registry) {
2211
2268
  this.options = options;
2212
2269
  this.metadata = metadata;
@@ -2722,11 +2779,8 @@ class OpenAISipPhoneSession {
2722
2779
  if (!this.openaiWs || this.openaiWs.readyState !== ws_1.WebSocket.OPEN)
2723
2780
  return;
2724
2781
  this.initialGreetingSent = true;
2725
- this.sendOpenAI({
2726
- type: "response.create",
2727
- response: {
2728
- instructions: openAISipCallConnectedPrompt(this.metadata, this.options.openaiRealtime?.voiceStyle),
2729
- },
2782
+ this.requestRealtimeResponse({
2783
+ instructions: openAISipCallConnectedPrompt(this.metadata, this.options.openaiRealtime?.voiceStyle),
2730
2784
  });
2731
2785
  }
2732
2786
  handleOpenAIMessage(raw) {
@@ -2738,6 +2792,10 @@ class OpenAISipPhoneSession {
2738
2792
  return;
2739
2793
  }
2740
2794
  const type = typeof event.type === "string" ? event.type : "";
2795
+ if (type === "response.created") {
2796
+ this.noteRealtimeResponseCreated(event);
2797
+ return;
2798
+ }
2741
2799
  if (type === "conversation.item.input_audio_transcription.completed" && typeof event.transcript === "string") {
2742
2800
  this.recordOutboundAmdTranscriptCandidate(event.transcript);
2743
2801
  this.appendTranscript("user", event.transcript);
@@ -2752,7 +2810,9 @@ class OpenAISipPhoneSession {
2752
2810
  return;
2753
2811
  }
2754
2812
  if (type === "response.done") {
2755
- if (this.completeRealtimeToolResponse(realtimeResponseId(event)))
2813
+ const responseId = realtimeResponseId(event);
2814
+ this.noteRealtimeResponseDone(responseId);
2815
+ if (this.completeRealtimeToolResponse(responseId))
2756
2816
  return;
2757
2817
  this.completeHangupIfReady("response_done");
2758
2818
  return;
@@ -2811,7 +2871,7 @@ class OpenAISipPhoneSession {
2811
2871
  this.completeHangupIfReady("tool_response_done");
2812
2872
  return true;
2813
2873
  }
2814
- this.sendOpenAI({ type: "response.create" });
2874
+ this.requestRealtimeResponse();
2815
2875
  return true;
2816
2876
  }
2817
2877
  async runRealtimeTool(event) {
@@ -2860,9 +2920,63 @@ class OpenAISipPhoneSession {
2860
2920
  },
2861
2921
  });
2862
2922
  if (!this.completeRealtimeToolCall(responseId, callId) && !coordinated) {
2863
- this.sendOpenAI({ type: "response.create" });
2923
+ this.requestRealtimeResponse();
2864
2924
  }
2865
2925
  }
2926
+ noteRealtimeResponseCreated(event) {
2927
+ const responseId = realtimeResponseId(event);
2928
+ if (responseId)
2929
+ this.activeRealtimeResponseId = responseId;
2930
+ }
2931
+ noteRealtimeResponseDone(responseId) {
2932
+ if (!responseId || this.activeRealtimeResponseId === responseId) {
2933
+ this.activeRealtimeResponseId = null;
2934
+ }
2935
+ this.responseCreateHoldUntilMs = Math.max(this.responseCreateHoldUntilMs, Date.now() + OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
2936
+ this.schedulePendingRealtimeResponse(OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
2937
+ }
2938
+ requestRealtimeResponse(response) {
2939
+ if (this.closed)
2940
+ return;
2941
+ const waitMs = Math.max(0, this.responseCreateHoldUntilMs - Date.now());
2942
+ if (this.activeRealtimeResponseId || waitMs > 0) {
2943
+ const pendingResponse = response ?? this.pendingRealtimeResponse?.response;
2944
+ this.pendingRealtimeResponse = pendingResponse ? { response: pendingResponse } : {};
2945
+ if (!this.activeRealtimeResponseId)
2946
+ this.schedulePendingRealtimeResponse(waitMs);
2947
+ return;
2948
+ }
2949
+ this.sendRealtimeResponseCreate(response ? { response } : {});
2950
+ }
2951
+ schedulePendingRealtimeResponse(delayMs) {
2952
+ if (!this.pendingRealtimeResponse)
2953
+ return;
2954
+ if (this.pendingRealtimeResponseTimer)
2955
+ clearTimeout(this.pendingRealtimeResponseTimer);
2956
+ this.pendingRealtimeResponseTimer = setTimeout(() => {
2957
+ this.pendingRealtimeResponseTimer = null;
2958
+ this.flushPendingRealtimeResponse();
2959
+ }, Math.max(0, delayMs));
2960
+ this.pendingRealtimeResponseTimer.unref?.();
2961
+ }
2962
+ flushPendingRealtimeResponse() {
2963
+ if (!this.pendingRealtimeResponse || this.closed || this.activeRealtimeResponseId)
2964
+ return;
2965
+ const waitMs = Math.max(0, this.responseCreateHoldUntilMs - Date.now());
2966
+ if (waitMs > 0) {
2967
+ this.schedulePendingRealtimeResponse(waitMs);
2968
+ return;
2969
+ }
2970
+ const pending = this.pendingRealtimeResponse;
2971
+ this.pendingRealtimeResponse = null;
2972
+ this.sendRealtimeResponseCreate(pending);
2973
+ }
2974
+ sendRealtimeResponseCreate(request) {
2975
+ this.sendOpenAI({
2976
+ type: "response.create",
2977
+ ...(request.response ? { response: request.response } : {}),
2978
+ });
2979
+ }
2866
2980
  requestHangupFromTool() {
2867
2981
  if (this.closed)
2868
2982
  return;
@@ -2932,6 +3046,10 @@ class OpenAISipPhoneSession {
2932
3046
  if (this.openaiWs && (this.openaiWs.readyState === ws_1.WebSocket.OPEN || this.openaiWs.readyState === ws_1.WebSocket.CONNECTING)) {
2933
3047
  this.openaiWs.close();
2934
3048
  }
3049
+ if (this.pendingRealtimeResponseTimer) {
3050
+ clearTimeout(this.pendingRealtimeResponseTimer);
3051
+ this.pendingRealtimeResponseTimer = null;
3052
+ }
2935
3053
  (0, runtime_1.emitNervesEvent)({
2936
3054
  component: "senses",
2937
3055
  event: "senses.voice_openai_sip_call_stop",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ouro.bot/cli",
3
- "version": "0.1.0-alpha.579",
3
+ "version": "0.1.0-alpha.580",
4
4
  "main": "dist/heart/daemon/ouro-entry.js",
5
5
  "bin": {
6
6
  "cli": "dist/heart/daemon/ouro-bot-entry.js",