@ouro.bot/cli 0.1.0-alpha.582 → 0.1.0-alpha.583
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/changelog.json +11 -0
- package/dist/senses/voice/twilio-phone-runtime.js +56 -17
- package/dist/senses/voice/twilio-phone.js +375 -68
- package/package.json +1 -1
package/changelog.json
CHANGED
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.583",
|
|
6
|
+
"changes": [
|
|
7
|
+
"Outbound SIP phone calls now start the Realtime greeting immediately after answer unless Twilio has already positively identified voicemail or fax, preventing humans from hearing post-pickup silence when async AMD returns unknown.",
|
|
8
|
+
"Twilio phone voice now defaults outbound calls to OpenAI Realtime Media Streams when inbound calls use OpenAI SIP on a Media Stream machine, while still allowing `voice.twilioOutboundConversationEngine` overrides, so humans avoid post-pickup SIP ringback.",
|
|
9
|
+
"Realtime voice now resolves phone callers through the canonical friend graph, preferring existing friend ids and otherwise matching normalized phone numbers via `imessage-handle`, so trust-aware tools see the same friend context as text and mail.",
|
|
10
|
+
"Realtime media-stream voice now treats empty caller metadata as absent and preserves local voice friend identities, keeping outbound and provider-simulated calls attached to the intended friend instead of inventing a blank phone identity.",
|
|
11
|
+
"Realtime voice response creation now backs off and retries after provider active-response conflicts, holds user turns under Ouro floor-control instead of provider auto-response, and long-running voice tools can emit one tiny holding phrase instead of leaving seconds of unexplained silence.",
|
|
12
|
+
"Realtime voice VAD and local barge-in thresholds are less twitchy by default, reducing accidental interruption from tiny room sounds while preserving deliberate caller interruption."
|
|
13
|
+
]
|
|
14
|
+
},
|
|
4
15
|
{
|
|
5
16
|
"version": "0.1.0-alpha.582",
|
|
6
17
|
"changes": [
|
|
@@ -154,13 +154,37 @@ function resolveOpenAIRealtimeApiKey(options) {
|
|
|
154
154
|
return { apiKey: compatKey, source: "integrations.openaiEmbeddingsApiKey" };
|
|
155
155
|
return undefined;
|
|
156
156
|
}
|
|
157
|
-
function configuredConversationEngine(options, overrides) {
|
|
158
|
-
|
|
159
|
-
??
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
157
|
+
function configuredConversationEngine(options, overrides, transportMode) {
|
|
158
|
+
const explicit = overrides.conversationEngine
|
|
159
|
+
?? configString(options.machineConfig, "voice.twilioConversationEngine")
|
|
160
|
+
?? configString(options.machineConfig, "voice.conversationEngine")
|
|
161
|
+
?? configString(options.runtimeConfig, "voice.twilioConversationEngine")
|
|
162
|
+
?? configString(options.runtimeConfig, "voice.conversationEngine");
|
|
163
|
+
const hasSipConfig = !!(configString(options.runtimeConfig, "voice.openaiSipProjectId")
|
|
164
|
+
|| configString(options.machineConfig, "voice.openaiSipProjectId"));
|
|
165
|
+
const explicitEngine = explicit ? (0, twilio_phone_1.normalizeTwilioPhoneConversationEngine)(explicit) : undefined;
|
|
166
|
+
if (hasSipConfig && (!explicitEngine || explicitEngine === "cascade"))
|
|
167
|
+
return "openai-sip";
|
|
168
|
+
if (explicitEngine)
|
|
169
|
+
return explicitEngine;
|
|
170
|
+
const hasRealtimeConfig = !!resolveOpenAIRealtimeApiKey({ runtimeConfig: options.runtimeConfig, overrides });
|
|
171
|
+
if (hasRealtimeConfig && transportMode === "media-stream")
|
|
172
|
+
return "openai-realtime";
|
|
173
|
+
return "cascade";
|
|
174
|
+
}
|
|
175
|
+
function configuredOutboundConversationEngine(options, overrides, conversationEngine, transportMode) {
|
|
176
|
+
const defaultOutboundEngine = conversationEngine === "openai-sip" && transportMode === "media-stream"
|
|
177
|
+
? "openai-realtime"
|
|
178
|
+
: conversationEngine;
|
|
179
|
+
const configured = overrides.outboundConversationEngine
|
|
180
|
+
?? (0, twilio_phone_1.normalizeTwilioPhoneConversationEngine)(configString(options.machineConfig, "voice.twilioOutboundConversationEngine")
|
|
181
|
+
?? configString(options.machineConfig, "voice.outboundConversationEngine")
|
|
182
|
+
?? configString(options.runtimeConfig, "voice.twilioOutboundConversationEngine")
|
|
183
|
+
?? configString(options.runtimeConfig, "voice.outboundConversationEngine")
|
|
184
|
+
?? defaultOutboundEngine);
|
|
185
|
+
if (defaultOutboundEngine === "openai-realtime" && configured === "cascade")
|
|
186
|
+
return defaultOutboundEngine;
|
|
187
|
+
return configured;
|
|
164
188
|
}
|
|
165
189
|
function normalizeOpenAIRealtimeReasoningEffort(value) {
|
|
166
190
|
const normalized = value?.trim().toLowerCase();
|
|
@@ -226,7 +250,14 @@ function resolveTwilioPhoneTransportRuntime(options) {
|
|
|
226
250
|
?? twilio_phone_1.TWILIO_PHONE_WEBHOOK_BASE_PATH);
|
|
227
251
|
const transportMode = overrides.transportMode
|
|
228
252
|
?? (0, twilio_phone_1.normalizeTwilioPhoneTransportMode)(configString(options.machineConfig, "voice.twilioTransportMode") ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_TRANSPORT_MODE);
|
|
229
|
-
const conversationEngine = configuredConversationEngine(options, overrides);
|
|
253
|
+
const conversationEngine = configuredConversationEngine(options, overrides, transportMode);
|
|
254
|
+
const outboundConversationEngine = configuredOutboundConversationEngine(options, overrides, conversationEngine, transportMode);
|
|
255
|
+
const needsOpenAIRealtime = conversationEngine === "openai-realtime"
|
|
256
|
+
|| conversationEngine === "openai-sip"
|
|
257
|
+
|| outboundConversationEngine === "openai-realtime"
|
|
258
|
+
|| outboundConversationEngine === "openai-sip";
|
|
259
|
+
const needsOpenAISip = conversationEngine === "openai-sip" || outboundConversationEngine === "openai-sip";
|
|
260
|
+
const needsCascade = conversationEngine === "cascade" || outboundConversationEngine === "cascade";
|
|
230
261
|
let elevenLabsApiKey = configString(options.runtimeConfig, "integrations.elevenLabsApiKey") ?? "";
|
|
231
262
|
let elevenLabsVoiceId = trimOptional(overrides.elevenLabsVoiceId)
|
|
232
263
|
?? configString(options.runtimeConfig, "integrations.elevenLabsVoiceId")
|
|
@@ -240,9 +271,9 @@ function resolveTwilioPhoneTransportRuntime(options) {
|
|
|
240
271
|
?? "";
|
|
241
272
|
let openaiRealtime;
|
|
242
273
|
let openaiSip;
|
|
243
|
-
if (
|
|
244
|
-
if (conversationEngine === "openai-realtime" && transportMode !== "media-stream") {
|
|
245
|
-
throw new Error("voice.twilioConversationEngine
|
|
274
|
+
if (needsOpenAIRealtime) {
|
|
275
|
+
if ((conversationEngine === "openai-realtime" || outboundConversationEngine === "openai-realtime") && transportMode !== "media-stream") {
|
|
276
|
+
throw new Error("voice.twilioConversationEngine/openai-realtime requires voice.twilioTransportMode=media-stream");
|
|
246
277
|
}
|
|
247
278
|
const key = resolveOpenAIRealtimeApiKey({ runtimeConfig: options.runtimeConfig, overrides });
|
|
248
279
|
if (!key) {
|
|
@@ -300,7 +331,7 @@ function resolveTwilioPhoneTransportRuntime(options) {
|
|
|
300
331
|
?? normalizeOpenAIRealtimeNoiseReduction(configString(options.runtimeConfig, "voice.openaiRealtimeNoiseReduction")),
|
|
301
332
|
turnDetection,
|
|
302
333
|
};
|
|
303
|
-
if (
|
|
334
|
+
if (needsOpenAISip) {
|
|
304
335
|
const projectId = trimOptional(overrides.openaiSipProjectId)
|
|
305
336
|
?? configString(options.runtimeConfig, "voice.openaiSipProjectId")
|
|
306
337
|
?? configString(options.machineConfig, "voice.openaiSipProjectId");
|
|
@@ -334,7 +365,7 @@ function resolveTwilioPhoneTransportRuntime(options) {
|
|
|
334
365
|
};
|
|
335
366
|
}
|
|
336
367
|
}
|
|
337
|
-
|
|
368
|
+
if (needsCascade) {
|
|
338
369
|
elevenLabsApiKey = required(elevenLabsApiKey || undefined, "missing integrations.elevenLabsApiKey; run 'ouro connect voice --agent <agent>' for setup guidance");
|
|
339
370
|
elevenLabsVoiceId = required(elevenLabsVoiceId || undefined, "missing integrations.elevenLabsVoiceId; save the ElevenLabs voice ID before starting phone voice");
|
|
340
371
|
whisperCliPath = required(whisperCliPath || undefined, "missing voice.whisperCliPath in this machine's runtime config");
|
|
@@ -379,6 +410,7 @@ function resolveTwilioPhoneTransportRuntime(options) {
|
|
|
379
410
|
?? (0, twilio_phone_1.normalizeTwilioPhonePlaybackMode)(configString(options.machineConfig, "voice.twilioPlaybackMode") ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE),
|
|
380
411
|
transportMode,
|
|
381
412
|
conversationEngine,
|
|
413
|
+
outboundConversationEngine,
|
|
382
414
|
openaiRealtime,
|
|
383
415
|
openaiSip,
|
|
384
416
|
openaiSipWebhookUrl: openaiSip?.webhookPath ? (0, twilio_phone_1.openAISipWebhookUrl)(publicBaseUrl, openaiSip.webhookPath) : undefined,
|
|
@@ -482,7 +514,12 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
|
|
|
482
514
|
meta: { agentName: settings.agentName, source: settings.openaiRealtime.apiKeySource },
|
|
483
515
|
});
|
|
484
516
|
}
|
|
485
|
-
const
|
|
517
|
+
const settingsNeedsOpenAIRealtime = settings.conversationEngine === "openai-realtime"
|
|
518
|
+
|| settings.conversationEngine === "openai-sip"
|
|
519
|
+
|| settings.outboundConversationEngine === "openai-realtime"
|
|
520
|
+
|| settings.outboundConversationEngine === "openai-sip";
|
|
521
|
+
const settingsNeedsCascade = settings.conversationEngine === "cascade" || settings.outboundConversationEngine === "cascade";
|
|
522
|
+
const transcriber = settingsNeedsOpenAIRealtime && !settingsNeedsCascade
|
|
486
523
|
? {
|
|
487
524
|
transcribe: async () => {
|
|
488
525
|
throw new Error("OpenAI Realtime voice sessions do not use the cascade transcriber");
|
|
@@ -492,7 +529,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
|
|
|
492
529
|
whisperCliPath: settings.whisperCliPath,
|
|
493
530
|
modelPath: settings.whisperModelPath,
|
|
494
531
|
});
|
|
495
|
-
const tts =
|
|
532
|
+
const tts = settingsNeedsOpenAIRealtime && !settingsNeedsCascade
|
|
496
533
|
? {
|
|
497
534
|
synthesize: async () => {
|
|
498
535
|
throw new Error("OpenAI Realtime voice sessions do not use the cascade TTS service");
|
|
@@ -522,6 +559,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
|
|
|
522
559
|
transportMode: settings.transportMode,
|
|
523
560
|
playbackMode: settings.playbackMode,
|
|
524
561
|
conversationEngine: settings.conversationEngine,
|
|
562
|
+
outboundConversationEngine: settings.outboundConversationEngine,
|
|
525
563
|
openaiRealtime: settings.openaiRealtime,
|
|
526
564
|
openaiSip: settings.openaiSip,
|
|
527
565
|
});
|
|
@@ -538,6 +576,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
|
|
|
538
576
|
openaiSipWebhookUrl: settings.openaiSipWebhookUrl ?? "",
|
|
539
577
|
transportMode: settings.transportMode,
|
|
540
578
|
conversationEngine: settings.conversationEngine,
|
|
579
|
+
outboundConversationEngine: settings.outboundConversationEngine,
|
|
541
580
|
openaiRealtimeModel: settings.openaiRealtime?.model ?? "",
|
|
542
581
|
},
|
|
543
582
|
});
|
|
@@ -565,7 +604,7 @@ async function prewarmOutboundGreeting(options, deps) {
|
|
|
565
604
|
if (options.settings.transportMode !== "media-stream")
|
|
566
605
|
return undefined;
|
|
567
606
|
/* v8 ignore next -- Realtime/SIP outbound tests assert no cascade prewarm is attempted @preserve */
|
|
568
|
-
if (options.settings.
|
|
607
|
+
if (options.settings.outboundConversationEngine === "openai-realtime" || options.settings.outboundConversationEngine === "openai-sip")
|
|
569
608
|
return undefined;
|
|
570
609
|
const friendId = options.friendId?.trim() || `twilio-${safeRuntimeSegment(options.to)}`;
|
|
571
610
|
const sessionKey = (0, twilio_phone_1.twilioPhoneVoiceSessionKey)({
|
|
@@ -677,7 +716,7 @@ async function placeConfiguredTwilioPhoneCall(options, deps = defaultTwilioPhone
|
|
|
677
716
|
reason: options.reason.trim(),
|
|
678
717
|
...(options.initialAudio ? { initialAudio: options.initialAudio } : {}),
|
|
679
718
|
createdAt,
|
|
680
|
-
status: settings.transportMode === "media-stream" && settings.
|
|
719
|
+
status: settings.transportMode === "media-stream" && settings.outboundConversationEngine === "cascade"
|
|
681
720
|
? "prewarming"
|
|
682
721
|
: "requested",
|
|
683
722
|
});
|
|
@@ -205,6 +205,15 @@ function usesOpenAIRealtimeConversationEngine(options) {
|
|
|
205
205
|
function usesOpenAISipConversationEngine(options) {
|
|
206
206
|
return normalizeTwilioPhoneConversationEngine(options.conversationEngine) === "openai-sip";
|
|
207
207
|
}
|
|
208
|
+
function outboundConversationEngine(options) {
|
|
209
|
+
return normalizeTwilioPhoneConversationEngine(options.outboundConversationEngine ?? options.conversationEngine);
|
|
210
|
+
}
|
|
211
|
+
function usesOpenAIRealtimeOutboundConversationEngine(options) {
|
|
212
|
+
return outboundConversationEngine(options) === "openai-realtime";
|
|
213
|
+
}
|
|
214
|
+
function usesOpenAISipOutboundConversationEngine(options) {
|
|
215
|
+
return outboundConversationEngine(options) === "openai-sip";
|
|
216
|
+
}
|
|
208
217
|
function twilioPhoneWebhookUrl(publicBaseUrl, basePath = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH) {
|
|
209
218
|
return routeUrl(publicBaseUrl, `${normalizeTwilioPhoneBasePath(basePath)}/incoming`);
|
|
210
219
|
}
|
|
@@ -253,8 +262,9 @@ function websocketRouteUrl(publicBaseUrl, route) {
|
|
|
253
262
|
url.protocol = "wss:";
|
|
254
263
|
return url.toString();
|
|
255
264
|
}
|
|
256
|
-
function mediaStreamTwiml(options, basePath, params, greetingJobId, customParams = {}) {
|
|
257
|
-
const
|
|
265
|
+
function mediaStreamTwiml(options, basePath, params, greetingJobId, customParams = {}, streamEngine) {
|
|
266
|
+
const streamRoute = streamEngine ? `${basePath}/media-stream?engine=${encodeURIComponent(streamEngine)}` : `${basePath}/media-stream`;
|
|
267
|
+
const streamUrl = websocketRouteUrl(options.publicBaseUrl, streamRoute);
|
|
258
268
|
const twimlParams = {
|
|
259
269
|
From: params.From,
|
|
260
270
|
To: params.To,
|
|
@@ -286,7 +296,7 @@ function openAISipUri(options, customHeaders = {}) {
|
|
|
286
296
|
return `sip:${projectId}@sip.api.openai.com;transport=tls${query ? `?${query}` : ""}`;
|
|
287
297
|
}
|
|
288
298
|
function openAISipDialTwiml(options, customHeaders = {}) {
|
|
289
|
-
return `<Dial
|
|
299
|
+
return `<Dial><Sip>${escapeXml(openAISipUri(options, customHeaders))}</Sip></Dial>`;
|
|
290
300
|
}
|
|
291
301
|
/* v8 ignore stop */
|
|
292
302
|
function safeSegment(input) {
|
|
@@ -346,6 +356,32 @@ function friendIdFromCaller(from, callSid) {
|
|
|
346
356
|
function voiceFriendId(options, from, callSid) {
|
|
347
357
|
return options.defaultFriendId?.trim() || friendIdFromCaller(from, callSid);
|
|
348
358
|
}
|
|
359
|
+
async function resolveVoiceFriendContext(options, input) {
|
|
360
|
+
const agentRoot = options.agentRoot ?? (0, identity_1.getAgentRoot)(options.agentName);
|
|
361
|
+
const friendStore = new store_file_1.FileFriendStore(path.join(agentRoot, "friends"));
|
|
362
|
+
const explicitFriendId = input.friendId?.trim();
|
|
363
|
+
if (explicitFriendId) {
|
|
364
|
+
const existing = await friendStore.get(explicitFriendId);
|
|
365
|
+
if (existing) {
|
|
366
|
+
return {
|
|
367
|
+
friendId: existing.id,
|
|
368
|
+
friendStore,
|
|
369
|
+
resolved: { friend: existing, channel: (0, channel_1.getChannelCapabilities)("voice") },
|
|
370
|
+
};
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
const remotePhone = (0, phone_1.normalizeTwilioE164PhoneNumber)(input.remotePhone);
|
|
374
|
+
const provider = remotePhone ? "imessage-handle" : "local";
|
|
375
|
+
const externalId = remotePhone || explicitFriendId || voiceFriendId(options, input.remotePhone ?? "", input.callSid);
|
|
376
|
+
const resolver = new resolver_1.FriendResolver(friendStore, {
|
|
377
|
+
provider,
|
|
378
|
+
externalId,
|
|
379
|
+
displayName: remotePhone || externalId,
|
|
380
|
+
channel: "voice",
|
|
381
|
+
});
|
|
382
|
+
const resolved = await resolver.resolve();
|
|
383
|
+
return { friendId: resolved.friend.id, friendStore, resolved };
|
|
384
|
+
}
|
|
349
385
|
function phoneIdentitySegment(input) {
|
|
350
386
|
const phoneish = input.replace(/[^0-9A-Za-z]+/g, "");
|
|
351
387
|
return phoneish || safeSegment(input);
|
|
@@ -1119,16 +1155,19 @@ const OPENAI_REALTIME_DEFAULT_TRANSCRIPTION_MODEL = "gpt-realtime-whisper";
|
|
|
1119
1155
|
const OPENAI_REALTIME_BOOTSTRAP_TIMEOUT_MS = 250;
|
|
1120
1156
|
const OPENAI_REALTIME_PCMS_BYTES_PER_MS = 8;
|
|
1121
1157
|
const OPENAI_REALTIME_DEFAULT_NOISE_REDUCTION = "near_field";
|
|
1122
|
-
const OPENAI_REALTIME_DEFAULT_VAD_THRESHOLD = 0.
|
|
1123
|
-
const OPENAI_REALTIME_DEFAULT_VAD_PREFIX_PADDING_MS =
|
|
1124
|
-
const OPENAI_REALTIME_DEFAULT_VAD_SILENCE_DURATION_MS =
|
|
1125
|
-
const OPENAI_REALTIME_DEFAULT_VAD_IDLE_TIMEOUT_MS =
|
|
1158
|
+
const OPENAI_REALTIME_DEFAULT_VAD_THRESHOLD = 0.78;
|
|
1159
|
+
const OPENAI_REALTIME_DEFAULT_VAD_PREFIX_PADDING_MS = 300;
|
|
1160
|
+
const OPENAI_REALTIME_DEFAULT_VAD_SILENCE_DURATION_MS = 650;
|
|
1161
|
+
const OPENAI_REALTIME_DEFAULT_VAD_IDLE_TIMEOUT_MS = 7_000;
|
|
1126
1162
|
const OPENAI_REALTIME_MAX_OUTPUT_TOKENS = 220;
|
|
1127
|
-
const OPENAI_REALTIME_BARGE_IN_MIN_SPEECH_MS =
|
|
1128
|
-
const OPENAI_REALTIME_BARGE_IN_RMS_THRESHOLD =
|
|
1163
|
+
const OPENAI_REALTIME_BARGE_IN_MIN_SPEECH_MS = 260;
|
|
1164
|
+
const OPENAI_REALTIME_BARGE_IN_RMS_THRESHOLD = 1_300;
|
|
1129
1165
|
const OPENAI_REALTIME_MIN_VOICE_SPEED = 0.25;
|
|
1130
1166
|
const OPENAI_REALTIME_MAX_VOICE_SPEED = 1.5;
|
|
1131
1167
|
const OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS = 50;
|
|
1168
|
+
const OPENAI_REALTIME_RESPONSE_CREATE_CONFLICT_BACKOFF_MS = 1_000;
|
|
1169
|
+
const OPENAI_REALTIME_TOOL_PRESENCE_DELAY_MS = 900;
|
|
1170
|
+
const OPENAI_REALTIME_USER_TURN_RESPONSE_DELAY_MS = 700;
|
|
1132
1171
|
const OPENAI_SIP_OUTBOUND_AMD_GREETING_TIMEOUT_MS = 10_000;
|
|
1133
1172
|
const OPENAI_SIP_UNSUPPORTED_TOOL_NAMES = new Set();
|
|
1134
1173
|
const OPENAI_SIP_DEFAULT_API_BASE_URL = "https://api.openai.com/v1";
|
|
@@ -1337,6 +1376,18 @@ function realtimeToolsFromChatTools(tools, excludedToolNames = new Set()) {
|
|
|
1337
1376
|
parameters: tool.function.parameters ?? { type: "object", properties: {} },
|
|
1338
1377
|
}));
|
|
1339
1378
|
}
|
|
1379
|
+
function mediaStreamRequestedConversationEngine(url) {
|
|
1380
|
+
if (!url)
|
|
1381
|
+
return undefined;
|
|
1382
|
+
try {
|
|
1383
|
+
const parsed = new URL(url, "wss://localhost");
|
|
1384
|
+
const engine = parsed.searchParams.get("engine") ?? undefined;
|
|
1385
|
+
return engine ? normalizeTwilioPhoneConversationEngine(engine) : undefined;
|
|
1386
|
+
}
|
|
1387
|
+
catch {
|
|
1388
|
+
return undefined;
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1340
1391
|
function parseToolArguments(raw) {
|
|
1341
1392
|
if (!raw.trim())
|
|
1342
1393
|
return {};
|
|
@@ -1406,12 +1457,14 @@ async function buildRealtimeVoiceInstructions(options) {
|
|
|
1406
1457
|
return [
|
|
1407
1458
|
`You are ${options.agentName} in the live Voice sense.`,
|
|
1408
1459
|
"This is the same agent identity as every other Ouro surface. Voice is not a reduced or alternate self.",
|
|
1460
|
+
options.friend ? `Resolved voice friend: ${options.friend.name || options.friend.id} (friendId=${options.friend.id}, trust=${options.friend.trustLevel ?? "friend"}, role=${options.friend.role ?? "friend"}). Use this same friend record and trust context for relationship awareness and tool permissions across voice, text, mail, and every other sense.` : "",
|
|
1409
1461
|
`Current native Realtime provider config for this call: model=${options.realtimeModel?.trim() || OPENAI_REALTIME_DEFAULT_MODEL}, voice=${options.realtimeVoice?.trim() || OPENAI_REALTIME_DEFAULT_VOICE}${options.realtimeVoiceSpeed === undefined ? "" : `, speed=${options.realtimeVoiceSpeed}`}.`,
|
|
1410
1462
|
options.realtimeVoiceStyle?.trim()
|
|
1411
1463
|
? `Phone voice target: ${options.realtimeVoiceStyle.trim()}`
|
|
1412
1464
|
: "",
|
|
1413
1465
|
"Speak as yourself through live audio. Follow voice/style preferences from identity notes; do not say you lack identity, preferences, or agency because the provider voice is configured by the transport.",
|
|
1414
1466
|
"Audio is synchronous. Default to one short sentence. Use two short sentences only when needed. Do not use markdown, lists, or long explanations unless the caller explicitly asks.",
|
|
1467
|
+
"Do not treat every tiny silence as your turn. Let the caller finish the thought, especially if they pause mid-sentence.",
|
|
1415
1468
|
"If the caller interrupts, stop the older path and answer the newest thing first.",
|
|
1416
1469
|
"If the caller says they are counting, measuring latency, testing lag, waiting, or wants you quiet, say at most 'got it' and then stay silent until they ask or say something that needs an answer.",
|
|
1417
1470
|
"Use tools for outside facts or side effects. While a tool is running, give at most one tiny preamble, then summarize the result compactly when it returns.",
|
|
@@ -1491,6 +1544,8 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1491
1544
|
pendingAudioPayloads = [];
|
|
1492
1545
|
openaiWs = null;
|
|
1493
1546
|
toolContext;
|
|
1547
|
+
friendStore;
|
|
1548
|
+
resolvedContext;
|
|
1494
1549
|
sessionMessages = [];
|
|
1495
1550
|
playbackState;
|
|
1496
1551
|
playbackMarkIndex = 0;
|
|
@@ -1498,8 +1553,12 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1498
1553
|
toolResponses = new Map();
|
|
1499
1554
|
completedRealtimeResponseIds = new Set();
|
|
1500
1555
|
activeRealtimeResponseId = null;
|
|
1556
|
+
realtimeResponseCreateInFlight = null;
|
|
1557
|
+
untrackedActiveRealtimeResponse = false;
|
|
1558
|
+
untrackedActiveRealtimeResponseTimer = null;
|
|
1501
1559
|
pendingRealtimeResponse = null;
|
|
1502
1560
|
pendingRealtimeResponseTimer = null;
|
|
1561
|
+
pendingUserTurnResponseTimer = null;
|
|
1503
1562
|
responseCreateHoldUntilMs = 0;
|
|
1504
1563
|
initialAudio;
|
|
1505
1564
|
initialAudioPlayed = false;
|
|
@@ -1574,9 +1633,16 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1574
1633
|
this.from = customParameter(start, "From");
|
|
1575
1634
|
this.to = customParameter(start, "To");
|
|
1576
1635
|
}
|
|
1577
|
-
|
|
1636
|
+
const voiceContext = await resolveVoiceFriendContext(this.options, {
|
|
1637
|
+
friendId: explicitFriendId,
|
|
1638
|
+
remotePhone: this.from || undefined,
|
|
1639
|
+
callSid: this.callSid,
|
|
1640
|
+
});
|
|
1641
|
+
this.friendId = voiceContext.friendId;
|
|
1642
|
+
this.friendStore = voiceContext.friendStore;
|
|
1643
|
+
this.resolvedContext = voiceContext.resolved;
|
|
1578
1644
|
this.sessionKey = twilioPhoneVoiceSessionKey({
|
|
1579
|
-
defaultFriendId:
|
|
1645
|
+
defaultFriendId: this.friendId,
|
|
1580
1646
|
from: this.from,
|
|
1581
1647
|
to: this.to,
|
|
1582
1648
|
callSid: this.callSid,
|
|
@@ -1699,7 +1765,7 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1699
1765
|
format: { type: "audio/pcmu" },
|
|
1700
1766
|
noise_reduction: realtimeNoiseReductionConfig(realtime),
|
|
1701
1767
|
transcription: { model: OPENAI_REALTIME_DEFAULT_TRANSCRIPTION_MODEL },
|
|
1702
|
-
turn_detection: realtimeTurnDetectionConfig(realtime),
|
|
1768
|
+
turn_detection: realtimeTurnDetectionConfig(realtime, { createResponse: false, interruptResponse: false }),
|
|
1703
1769
|
},
|
|
1704
1770
|
output: realtimeOutputAudioConfig(realtime, { type: "audio/pcmu" }),
|
|
1705
1771
|
},
|
|
@@ -1721,6 +1787,7 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1721
1787
|
const realtimeSystem = await buildRealtimeVoiceInstructions({
|
|
1722
1788
|
agentName: this.options.agentName,
|
|
1723
1789
|
agentRoot,
|
|
1790
|
+
friend: this.resolvedContext?.friend,
|
|
1724
1791
|
priorTranscript: prior,
|
|
1725
1792
|
realtimeVoice: this.options.openaiRealtime?.voice,
|
|
1726
1793
|
realtimeVoiceStyle: this.options.openaiRealtime?.voiceStyle,
|
|
@@ -1743,6 +1810,8 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1743
1810
|
return;
|
|
1744
1811
|
this.toolContext = {
|
|
1745
1812
|
signin: async () => undefined,
|
|
1813
|
+
...(this.resolvedContext ? { context: this.resolvedContext } : {}),
|
|
1814
|
+
...(this.friendStore ? { friendStore: this.friendStore } : {}),
|
|
1746
1815
|
voiceCall: {
|
|
1747
1816
|
requestEnd: () => this.requestHangupFromTool(),
|
|
1748
1817
|
playAudio: (request) => this.playPreparedAudio(request),
|
|
@@ -1750,16 +1819,18 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1750
1819
|
};
|
|
1751
1820
|
}
|
|
1752
1821
|
async buildRealtimeTools() {
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1822
|
+
if (!this.resolvedContext || !this.friendStore) {
|
|
1823
|
+
const voiceContext = await resolveVoiceFriendContext(this.options, {
|
|
1824
|
+
friendId: this.friendId,
|
|
1825
|
+
remotePhone: this.from || undefined,
|
|
1826
|
+
callSid: this.callSid,
|
|
1827
|
+
});
|
|
1828
|
+
this.friendId = voiceContext.friendId;
|
|
1829
|
+
this.friendStore = voiceContext.friendStore;
|
|
1830
|
+
this.resolvedContext = voiceContext.resolved;
|
|
1831
|
+
}
|
|
1832
|
+
const resolved = this.resolvedContext;
|
|
1833
|
+
const friendStore = this.friendStore;
|
|
1763
1834
|
this.toolContext = {
|
|
1764
1835
|
signin: async () => undefined,
|
|
1765
1836
|
context: resolved,
|
|
@@ -1875,7 +1946,7 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1875
1946
|
return;
|
|
1876
1947
|
}
|
|
1877
1948
|
if (type === "conversation.item.input_audio_transcription.completed" && typeof event.transcript === "string") {
|
|
1878
|
-
this.
|
|
1949
|
+
this.handleUserTranscript(event.transcript);
|
|
1879
1950
|
return;
|
|
1880
1951
|
}
|
|
1881
1952
|
if (type === "response.output_audio_transcript.done" && typeof event.transcript === "string") {
|
|
@@ -1896,6 +1967,7 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1896
1967
|
return;
|
|
1897
1968
|
}
|
|
1898
1969
|
if (type === "error") {
|
|
1970
|
+
this.handleRealtimeError(event);
|
|
1899
1971
|
(0, runtime_1.emitNervesEvent)({
|
|
1900
1972
|
level: "error",
|
|
1901
1973
|
component: "senses",
|
|
@@ -1905,6 +1977,40 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1905
1977
|
});
|
|
1906
1978
|
}
|
|
1907
1979
|
}
|
|
1980
|
+
handleRealtimeError(event) {
|
|
1981
|
+
const error = event.error;
|
|
1982
|
+
if (!error || typeof error !== "object" || Array.isArray(error))
|
|
1983
|
+
return;
|
|
1984
|
+
const code = stringField(error.code);
|
|
1985
|
+
if (code !== "conversation_already_has_active_response")
|
|
1986
|
+
return;
|
|
1987
|
+
this.noteRealtimeResponseConflict();
|
|
1988
|
+
}
|
|
1989
|
+
handleUserTranscript(transcript) {
|
|
1990
|
+
const content = transcript.trim();
|
|
1991
|
+
if (!content)
|
|
1992
|
+
return;
|
|
1993
|
+
this.appendTranscript("user", content);
|
|
1994
|
+
this.scheduleUserTurnResponse();
|
|
1995
|
+
}
|
|
1996
|
+
scheduleUserTurnResponse() {
|
|
1997
|
+
if (this.closed)
|
|
1998
|
+
return;
|
|
1999
|
+
this.clearPendingUserTurnResponse();
|
|
2000
|
+
this.pendingUserTurnResponseTimer = setTimeout(() => {
|
|
2001
|
+
this.pendingUserTurnResponseTimer = null;
|
|
2002
|
+
if (this.closed)
|
|
2003
|
+
return;
|
|
2004
|
+
this.requestRealtimeResponse();
|
|
2005
|
+
}, OPENAI_REALTIME_USER_TURN_RESPONSE_DELAY_MS);
|
|
2006
|
+
this.pendingUserTurnResponseTimer.unref?.();
|
|
2007
|
+
}
|
|
2008
|
+
clearPendingUserTurnResponse() {
|
|
2009
|
+
if (!this.pendingUserTurnResponseTimer)
|
|
2010
|
+
return;
|
|
2011
|
+
clearTimeout(this.pendingUserTurnResponseTimer);
|
|
2012
|
+
this.pendingUserTurnResponseTimer = null;
|
|
2013
|
+
}
|
|
1908
2014
|
handleOpenAIAudioDelta(event) {
|
|
1909
2015
|
const payload = stringField(event.delta);
|
|
1910
2016
|
if (!payload)
|
|
@@ -1926,6 +2032,7 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1926
2032
|
this.sendTwilioMark({ itemId, contentIndex, audioEndMs });
|
|
1927
2033
|
}
|
|
1928
2034
|
handleCallerSpeechStarted() {
|
|
2035
|
+
this.clearPendingUserTurnResponse();
|
|
1929
2036
|
const playback = this.playbackState;
|
|
1930
2037
|
if (!this.hasReliableCallerBargeInSpeech()) {
|
|
1931
2038
|
(0, runtime_1.emitNervesEvent)({
|
|
@@ -1971,6 +2078,8 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1971
2078
|
responseDone: this.completedRealtimeResponseIds.has(responseId),
|
|
1972
2079
|
followupRequested: false,
|
|
1973
2080
|
suppressFollowup: false,
|
|
2081
|
+
presenceRequested: false,
|
|
2082
|
+
presenceTimer: null,
|
|
1974
2083
|
};
|
|
1975
2084
|
state.pendingCallIds.add(callId);
|
|
1976
2085
|
if (!existing)
|
|
@@ -1984,6 +2093,8 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
1984
2093
|
if (!state)
|
|
1985
2094
|
return false;
|
|
1986
2095
|
state.pendingCallIds.delete(callId);
|
|
2096
|
+
if (state.pendingCallIds.size === 0)
|
|
2097
|
+
this.clearRealtimeToolPresenceTimer(state);
|
|
1987
2098
|
return this.maybeCreateRealtimeToolFollowup(responseId, state);
|
|
1988
2099
|
}
|
|
1989
2100
|
completeRealtimeToolResponse(responseId) {
|
|
@@ -2002,11 +2113,37 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
2002
2113
|
return false;
|
|
2003
2114
|
state.followupRequested = true;
|
|
2004
2115
|
this.toolResponses.delete(responseId);
|
|
2116
|
+
this.clearRealtimeToolPresenceTimer(state);
|
|
2005
2117
|
if (state.suppressFollowup)
|
|
2006
2118
|
return true;
|
|
2007
2119
|
this.requestRealtimeResponse();
|
|
2008
2120
|
return true;
|
|
2009
2121
|
}
|
|
2122
|
+
scheduleRealtimeToolPresence(responseId, state) {
|
|
2123
|
+
if (!responseId || state.presenceRequested || state.presenceTimer)
|
|
2124
|
+
return;
|
|
2125
|
+
state.presenceTimer = setTimeout(() => {
|
|
2126
|
+
state.presenceTimer = null;
|
|
2127
|
+
const current = this.toolResponses.get(responseId);
|
|
2128
|
+
if (this.closed || current !== state || state.pendingCallIds.size === 0 || state.suppressFollowup)
|
|
2129
|
+
return;
|
|
2130
|
+
state.presenceRequested = true;
|
|
2131
|
+
this.requestRealtimeResponse({
|
|
2132
|
+
instructions: "A tool is taking a moment. Say one very short natural holding phrase under six words, then stop speaking.",
|
|
2133
|
+
});
|
|
2134
|
+
}, OPENAI_REALTIME_TOOL_PRESENCE_DELAY_MS);
|
|
2135
|
+
state.presenceTimer.unref?.();
|
|
2136
|
+
}
|
|
2137
|
+
clearRealtimeToolPresenceTimer(state) {
|
|
2138
|
+
if (!state.presenceTimer)
|
|
2139
|
+
return;
|
|
2140
|
+
clearTimeout(state.presenceTimer);
|
|
2141
|
+
state.presenceTimer = null;
|
|
2142
|
+
}
|
|
2143
|
+
clearRealtimeToolPresenceTimers() {
|
|
2144
|
+
for (const state of this.toolResponses.values())
|
|
2145
|
+
this.clearRealtimeToolPresenceTimer(state);
|
|
2146
|
+
}
|
|
2010
2147
|
async runRealtimeTool(event) {
|
|
2011
2148
|
const name = typeof event.name === "string" ? event.name : "";
|
|
2012
2149
|
const callId = typeof event.call_id === "string" ? event.call_id : "";
|
|
@@ -2017,6 +2154,8 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
2017
2154
|
const coordinated = !!toolState;
|
|
2018
2155
|
if (name === "voice_end_call" && toolState)
|
|
2019
2156
|
toolState.suppressFollowup = true;
|
|
2157
|
+
if (toolState && !toolState.suppressFollowup)
|
|
2158
|
+
this.scheduleRealtimeToolPresence(responseId, toolState);
|
|
2020
2159
|
let output;
|
|
2021
2160
|
try {
|
|
2022
2161
|
const args = parseToolArguments(typeof event.arguments === "string" ? event.arguments : "");
|
|
@@ -2057,14 +2196,18 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
2057
2196
|
}
|
|
2058
2197
|
}
|
|
2059
2198
|
noteRealtimeResponseCreated(event) {
|
|
2199
|
+
this.realtimeResponseCreateInFlight = null;
|
|
2200
|
+
this.untrackedActiveRealtimeResponse = false;
|
|
2201
|
+
this.clearUntrackedActiveRealtimeResponseTimer();
|
|
2060
2202
|
const responseId = realtimeResponseId(event);
|
|
2061
2203
|
if (responseId)
|
|
2062
2204
|
this.activeRealtimeResponseId = responseId;
|
|
2063
2205
|
}
|
|
2064
|
-
noteRealtimeResponseDone(
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2206
|
+
noteRealtimeResponseDone(_responseId) {
|
|
2207
|
+
this.realtimeResponseCreateInFlight = null;
|
|
2208
|
+
this.untrackedActiveRealtimeResponse = false;
|
|
2209
|
+
this.clearUntrackedActiveRealtimeResponseTimer();
|
|
2210
|
+
this.activeRealtimeResponseId = null;
|
|
2068
2211
|
this.responseCreateHoldUntilMs = Math.max(this.responseCreateHoldUntilMs, Date.now() + OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
|
|
2069
2212
|
this.schedulePendingRealtimeResponse(OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
|
|
2070
2213
|
}
|
|
@@ -2072,15 +2215,21 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
2072
2215
|
if (this.closed)
|
|
2073
2216
|
return;
|
|
2074
2217
|
const waitMs = Math.max(0, this.responseCreateHoldUntilMs - Date.now());
|
|
2075
|
-
if (this.
|
|
2076
|
-
|
|
2077
|
-
this.
|
|
2078
|
-
if (!this.activeRealtimeResponseId)
|
|
2218
|
+
if (this.realtimeResponseIsBusy() || waitMs > 0) {
|
|
2219
|
+
this.holdRealtimeResponse(response ? { response } : {});
|
|
2220
|
+
if (!this.realtimeResponseIsBusy())
|
|
2079
2221
|
this.schedulePendingRealtimeResponse(waitMs);
|
|
2080
2222
|
return;
|
|
2081
2223
|
}
|
|
2082
2224
|
this.sendRealtimeResponseCreate(response ? { response } : {});
|
|
2083
2225
|
}
|
|
2226
|
+
realtimeResponseIsBusy() {
|
|
2227
|
+
return !!this.activeRealtimeResponseId || !!this.realtimeResponseCreateInFlight || this.untrackedActiveRealtimeResponse;
|
|
2228
|
+
}
|
|
2229
|
+
holdRealtimeResponse(request) {
|
|
2230
|
+
const pendingResponse = request.response ?? this.pendingRealtimeResponse?.response;
|
|
2231
|
+
this.pendingRealtimeResponse = pendingResponse ? { response: pendingResponse } : {};
|
|
2232
|
+
}
|
|
2084
2233
|
schedulePendingRealtimeResponse(delayMs) {
|
|
2085
2234
|
if (!this.pendingRealtimeResponse)
|
|
2086
2235
|
return;
|
|
@@ -2093,7 +2242,7 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
2093
2242
|
this.pendingRealtimeResponseTimer.unref?.();
|
|
2094
2243
|
}
|
|
2095
2244
|
flushPendingRealtimeResponse() {
|
|
2096
|
-
if (!this.pendingRealtimeResponse || this.closed || this.
|
|
2245
|
+
if (!this.pendingRealtimeResponse || this.closed || this.realtimeResponseIsBusy())
|
|
2097
2246
|
return;
|
|
2098
2247
|
const waitMs = Math.max(0, this.responseCreateHoldUntilMs - Date.now());
|
|
2099
2248
|
if (waitMs > 0) {
|
|
@@ -2105,11 +2254,38 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
2105
2254
|
this.sendRealtimeResponseCreate(pending);
|
|
2106
2255
|
}
|
|
2107
2256
|
sendRealtimeResponseCreate(request) {
|
|
2257
|
+
this.realtimeResponseCreateInFlight = request;
|
|
2108
2258
|
this.sendOpenAI({
|
|
2109
2259
|
type: "response.create",
|
|
2110
2260
|
...(request.response ? { response: request.response } : {}),
|
|
2111
2261
|
});
|
|
2112
2262
|
}
|
|
2263
|
+
noteRealtimeResponseConflict() {
|
|
2264
|
+
const inFlight = this.realtimeResponseCreateInFlight;
|
|
2265
|
+
this.realtimeResponseCreateInFlight = null;
|
|
2266
|
+
this.untrackedActiveRealtimeResponse = true;
|
|
2267
|
+
if (inFlight)
|
|
2268
|
+
this.holdRealtimeResponse(inFlight);
|
|
2269
|
+
this.scheduleUntrackedActiveRealtimeResponseFallback();
|
|
2270
|
+
}
|
|
2271
|
+
scheduleUntrackedActiveRealtimeResponseFallback() {
|
|
2272
|
+
this.clearUntrackedActiveRealtimeResponseTimer();
|
|
2273
|
+
this.untrackedActiveRealtimeResponseTimer = setTimeout(() => {
|
|
2274
|
+
this.untrackedActiveRealtimeResponseTimer = null;
|
|
2275
|
+
if (this.closed || !this.untrackedActiveRealtimeResponse)
|
|
2276
|
+
return;
|
|
2277
|
+
this.untrackedActiveRealtimeResponse = false;
|
|
2278
|
+
this.responseCreateHoldUntilMs = Math.max(this.responseCreateHoldUntilMs, Date.now() + OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
|
|
2279
|
+
this.schedulePendingRealtimeResponse(OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
|
|
2280
|
+
}, OPENAI_REALTIME_RESPONSE_CREATE_CONFLICT_BACKOFF_MS);
|
|
2281
|
+
this.untrackedActiveRealtimeResponseTimer.unref?.();
|
|
2282
|
+
}
|
|
2283
|
+
clearUntrackedActiveRealtimeResponseTimer() {
|
|
2284
|
+
if (!this.untrackedActiveRealtimeResponseTimer)
|
|
2285
|
+
return;
|
|
2286
|
+
clearTimeout(this.untrackedActiveRealtimeResponseTimer);
|
|
2287
|
+
this.untrackedActiveRealtimeResponseTimer = null;
|
|
2288
|
+
}
|
|
2113
2289
|
flushPendingAudio() {
|
|
2114
2290
|
const pending = this.pendingAudioPayloads.splice(0);
|
|
2115
2291
|
for (const payload of pending) {
|
|
@@ -2229,6 +2405,9 @@ class TwilioOpenAIRealtimeMediaStreamSession {
|
|
|
2229
2405
|
clearTimeout(this.pendingRealtimeResponseTimer);
|
|
2230
2406
|
this.pendingRealtimeResponseTimer = null;
|
|
2231
2407
|
}
|
|
2408
|
+
this.clearPendingUserTurnResponse();
|
|
2409
|
+
this.clearRealtimeToolPresenceTimers();
|
|
2410
|
+
this.clearUntrackedActiveRealtimeResponseTimer();
|
|
2232
2411
|
this.lifecycle?.onClose?.(this, { callSid: this.callSid, outboundId: this.outboundId });
|
|
2233
2412
|
(0, runtime_1.emitNervesEvent)({
|
|
2234
2413
|
component: "senses",
|
|
@@ -2257,12 +2436,18 @@ class OpenAISipPhoneSession {
|
|
|
2257
2436
|
autoResponsesSuppressedForAmd = false;
|
|
2258
2437
|
openaiWs = null;
|
|
2259
2438
|
toolContext;
|
|
2439
|
+
friendStore;
|
|
2440
|
+
resolvedContext;
|
|
2260
2441
|
sessionMessages = [];
|
|
2261
2442
|
toolResponses = new Map();
|
|
2262
2443
|
completedRealtimeResponseIds = new Set();
|
|
2263
2444
|
activeRealtimeResponseId = null;
|
|
2445
|
+
realtimeResponseCreateInFlight = null;
|
|
2446
|
+
untrackedActiveRealtimeResponse = false;
|
|
2447
|
+
untrackedActiveRealtimeResponseTimer = null;
|
|
2264
2448
|
pendingRealtimeResponse = null;
|
|
2265
2449
|
pendingRealtimeResponseTimer = null;
|
|
2450
|
+
pendingUserTurnResponseTimer = null;
|
|
2266
2451
|
responseCreateHoldUntilMs = 0;
|
|
2267
2452
|
constructor(options, metadata, registry) {
|
|
2268
2453
|
this.options = options;
|
|
@@ -2283,11 +2468,16 @@ class OpenAISipPhoneSession {
|
|
|
2283
2468
|
throw new Error("OpenAI Realtime API key is not configured");
|
|
2284
2469
|
if (!sip)
|
|
2285
2470
|
throw new Error("OpenAI SIP options are not configured");
|
|
2286
|
-
|
|
2287
|
-
|| this.options.defaultFriendId?.trim()
|
|
2288
|
-
|
|
2471
|
+
const voiceContext = await resolveVoiceFriendContext(this.options, {
|
|
2472
|
+
friendId: this.metadata.friendId || this.options.defaultFriendId?.trim(),
|
|
2473
|
+
remotePhone: this.metadata.from || undefined,
|
|
2474
|
+
callSid: this.metadata.callId,
|
|
2475
|
+
});
|
|
2476
|
+
this.friendId = voiceContext.friendId;
|
|
2477
|
+
this.friendStore = voiceContext.friendStore;
|
|
2478
|
+
this.resolvedContext = voiceContext.resolved;
|
|
2289
2479
|
this.sessionKey = twilioPhoneVoiceSessionKey({
|
|
2290
|
-
defaultFriendId: this.friendId
|
|
2480
|
+
defaultFriendId: this.friendId,
|
|
2291
2481
|
from: this.metadata.from,
|
|
2292
2482
|
to: this.metadata.to,
|
|
2293
2483
|
callSid: this.metadata.callId,
|
|
@@ -2329,7 +2519,7 @@ class OpenAISipPhoneSession {
|
|
|
2329
2519
|
];
|
|
2330
2520
|
if (this.closed || this.outboundAmdStopped())
|
|
2331
2521
|
return;
|
|
2332
|
-
await this.acceptOpenAISipCall(realtime, sip, instructions, tools
|
|
2522
|
+
await this.acceptOpenAISipCall(realtime, sip, instructions, tools);
|
|
2333
2523
|
if (this.closed || this.outboundAmdStopped())
|
|
2334
2524
|
return;
|
|
2335
2525
|
this.openControlWebSocket(realtime, sip, fullConfigPromise, usedBootstrap);
|
|
@@ -2370,14 +2560,14 @@ class OpenAISipPhoneSession {
|
|
|
2370
2560
|
const answeredBy = job.answeredBy?.trim();
|
|
2371
2561
|
if (nonHumanAnsweredStatus(answeredBy) || job.status === "voicemail" || job.status === "fax")
|
|
2372
2562
|
return "reject";
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
return "
|
|
2563
|
+
// Silence after pickup feels broken. Start the greeting immediately unless
|
|
2564
|
+
// Twilio has already positively identified a machine/fax answer.
|
|
2565
|
+
return "send";
|
|
2376
2566
|
}
|
|
2377
2567
|
outboundAmdStopped() {
|
|
2378
2568
|
return this.outboundAmdState === "nonhuman" || this.outboundAmdState === "timeout";
|
|
2379
2569
|
}
|
|
2380
|
-
async acceptOpenAISipCall(realtime, sip, instructions, tools
|
|
2570
|
+
async acceptOpenAISipCall(realtime, sip, instructions, tools) {
|
|
2381
2571
|
const fetchImpl = sip.fetch ?? fetch;
|
|
2382
2572
|
const response = await fetchImpl(openAISipCallActionUrl(sip, this.metadata.callId, "accept"), {
|
|
2383
2573
|
method: "POST",
|
|
@@ -2393,7 +2583,7 @@ class OpenAISipPhoneSession {
|
|
|
2393
2583
|
input: {
|
|
2394
2584
|
noise_reduction: realtimeNoiseReductionConfig(realtime),
|
|
2395
2585
|
transcription: { model: OPENAI_REALTIME_DEFAULT_TRANSCRIPTION_MODEL },
|
|
2396
|
-
turn_detection: realtimeTurnDetectionConfig(realtime,
|
|
2586
|
+
turn_detection: realtimeTurnDetectionConfig(realtime, { createResponse: false, interruptResponse: false }),
|
|
2397
2587
|
},
|
|
2398
2588
|
output: realtimeOutputAudioConfig(realtime),
|
|
2399
2589
|
},
|
|
@@ -2509,6 +2699,7 @@ class OpenAISipPhoneSession {
|
|
|
2509
2699
|
const realtimeSystem = await buildRealtimeVoiceInstructions({
|
|
2510
2700
|
agentName: this.options.agentName,
|
|
2511
2701
|
agentRoot,
|
|
2702
|
+
friend: this.resolvedContext?.friend,
|
|
2512
2703
|
priorTranscript: prior,
|
|
2513
2704
|
realtimeVoice: this.options.openaiRealtime?.voice,
|
|
2514
2705
|
realtimeVoiceStyle: this.options.openaiRealtime?.voiceStyle,
|
|
@@ -2528,6 +2719,8 @@ class OpenAISipPhoneSession {
|
|
|
2528
2719
|
return;
|
|
2529
2720
|
this.toolContext = {
|
|
2530
2721
|
signin: async () => undefined,
|
|
2722
|
+
...(this.resolvedContext ? { context: this.resolvedContext } : {}),
|
|
2723
|
+
...(this.friendStore ? { friendStore: this.friendStore } : {}),
|
|
2531
2724
|
voiceCall: {
|
|
2532
2725
|
requestEnd: () => this.requestHangupFromTool(),
|
|
2533
2726
|
playAudio: (request) => this.playRealtimeAudioCue(request),
|
|
@@ -2535,16 +2728,18 @@ class OpenAISipPhoneSession {
|
|
|
2535
2728
|
};
|
|
2536
2729
|
}
|
|
2537
2730
|
async buildRealtimeTools() {
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2541
|
-
|
|
2542
|
-
|
|
2543
|
-
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2731
|
+
if (!this.resolvedContext || !this.friendStore) {
|
|
2732
|
+
const voiceContext = await resolveVoiceFriendContext(this.options, {
|
|
2733
|
+
friendId: this.friendId,
|
|
2734
|
+
remotePhone: this.metadata.from || undefined,
|
|
2735
|
+
callSid: this.metadata.callId,
|
|
2736
|
+
});
|
|
2737
|
+
this.friendId = voiceContext.friendId;
|
|
2738
|
+
this.friendStore = voiceContext.friendStore;
|
|
2739
|
+
this.resolvedContext = voiceContext.resolved;
|
|
2740
|
+
}
|
|
2741
|
+
const resolved = this.resolvedContext;
|
|
2742
|
+
const friendStore = this.friendStore;
|
|
2548
2743
|
this.toolContext = {
|
|
2549
2744
|
signin: async () => undefined,
|
|
2550
2745
|
context: resolved,
|
|
@@ -2697,7 +2892,7 @@ class OpenAISipPhoneSession {
|
|
|
2697
2892
|
type: "realtime",
|
|
2698
2893
|
audio: {
|
|
2699
2894
|
input: {
|
|
2700
|
-
turn_detection: realtimeTurnDetectionConfig(realtime),
|
|
2895
|
+
turn_detection: realtimeTurnDetectionConfig(realtime, { createResponse: false, interruptResponse: false }),
|
|
2701
2896
|
},
|
|
2702
2897
|
},
|
|
2703
2898
|
},
|
|
@@ -2796,9 +2991,13 @@ class OpenAISipPhoneSession {
|
|
|
2796
2991
|
this.noteRealtimeResponseCreated(event);
|
|
2797
2992
|
return;
|
|
2798
2993
|
}
|
|
2994
|
+
if (type === "input_audio_buffer.speech_started") {
|
|
2995
|
+
this.clearPendingUserTurnResponse();
|
|
2996
|
+
return;
|
|
2997
|
+
}
|
|
2799
2998
|
if (type === "conversation.item.input_audio_transcription.completed" && typeof event.transcript === "string") {
|
|
2800
2999
|
this.recordOutboundAmdTranscriptCandidate(event.transcript);
|
|
2801
|
-
this.
|
|
3000
|
+
this.handleUserTranscript(event.transcript);
|
|
2802
3001
|
return;
|
|
2803
3002
|
}
|
|
2804
3003
|
if (type === "response.output_audio_transcript.done" && typeof event.transcript === "string") {
|
|
@@ -2818,6 +3017,7 @@ class OpenAISipPhoneSession {
|
|
|
2818
3017
|
return;
|
|
2819
3018
|
}
|
|
2820
3019
|
if (type === "error") {
|
|
3020
|
+
this.handleRealtimeError(event);
|
|
2821
3021
|
(0, runtime_1.emitNervesEvent)({
|
|
2822
3022
|
level: "error",
|
|
2823
3023
|
component: "senses",
|
|
@@ -2827,6 +3027,40 @@ class OpenAISipPhoneSession {
|
|
|
2827
3027
|
});
|
|
2828
3028
|
}
|
|
2829
3029
|
}
|
|
3030
|
+
handleRealtimeError(event) {
|
|
3031
|
+
const error = event.error;
|
|
3032
|
+
if (!error || typeof error !== "object" || Array.isArray(error))
|
|
3033
|
+
return;
|
|
3034
|
+
const code = stringField(error.code);
|
|
3035
|
+
if (code !== "conversation_already_has_active_response")
|
|
3036
|
+
return;
|
|
3037
|
+
this.noteRealtimeResponseConflict();
|
|
3038
|
+
}
|
|
3039
|
+
handleUserTranscript(transcript) {
|
|
3040
|
+
const content = transcript.trim();
|
|
3041
|
+
if (!content)
|
|
3042
|
+
return;
|
|
3043
|
+
this.appendTranscript("user", content);
|
|
3044
|
+
this.scheduleUserTurnResponse();
|
|
3045
|
+
}
|
|
3046
|
+
scheduleUserTurnResponse() {
|
|
3047
|
+
if (this.closed)
|
|
3048
|
+
return;
|
|
3049
|
+
this.clearPendingUserTurnResponse();
|
|
3050
|
+
this.pendingUserTurnResponseTimer = setTimeout(() => {
|
|
3051
|
+
this.pendingUserTurnResponseTimer = null;
|
|
3052
|
+
if (this.closed)
|
|
3053
|
+
return;
|
|
3054
|
+
this.requestRealtimeResponse();
|
|
3055
|
+
}, OPENAI_REALTIME_USER_TURN_RESPONSE_DELAY_MS);
|
|
3056
|
+
this.pendingUserTurnResponseTimer.unref?.();
|
|
3057
|
+
}
|
|
3058
|
+
clearPendingUserTurnResponse() {
|
|
3059
|
+
if (!this.pendingUserTurnResponseTimer)
|
|
3060
|
+
return;
|
|
3061
|
+
clearTimeout(this.pendingUserTurnResponseTimer);
|
|
3062
|
+
this.pendingUserTurnResponseTimer = null;
|
|
3063
|
+
}
|
|
2830
3064
|
registerRealtimeToolResponse(responseId, callId) {
|
|
2831
3065
|
if (!responseId)
|
|
2832
3066
|
return undefined;
|
|
@@ -2836,6 +3070,8 @@ class OpenAISipPhoneSession {
|
|
|
2836
3070
|
responseDone: this.completedRealtimeResponseIds.has(responseId),
|
|
2837
3071
|
followupRequested: false,
|
|
2838
3072
|
suppressFollowup: false,
|
|
3073
|
+
presenceRequested: false,
|
|
3074
|
+
presenceTimer: null,
|
|
2839
3075
|
};
|
|
2840
3076
|
state.pendingCallIds.add(callId);
|
|
2841
3077
|
if (!existing)
|
|
@@ -2849,6 +3085,8 @@ class OpenAISipPhoneSession {
|
|
|
2849
3085
|
if (!state)
|
|
2850
3086
|
return false;
|
|
2851
3087
|
state.pendingCallIds.delete(callId);
|
|
3088
|
+
if (state.pendingCallIds.size === 0)
|
|
3089
|
+
this.clearRealtimeToolPresenceTimer(state);
|
|
2852
3090
|
return this.maybeCreateRealtimeToolFollowup(responseId, state);
|
|
2853
3091
|
}
|
|
2854
3092
|
completeRealtimeToolResponse(responseId) {
|
|
@@ -2867,6 +3105,7 @@ class OpenAISipPhoneSession {
|
|
|
2867
3105
|
return false;
|
|
2868
3106
|
state.followupRequested = true;
|
|
2869
3107
|
this.toolResponses.delete(responseId);
|
|
3108
|
+
this.clearRealtimeToolPresenceTimer(state);
|
|
2870
3109
|
if (state.suppressFollowup) {
|
|
2871
3110
|
this.completeHangupIfReady("tool_response_done");
|
|
2872
3111
|
return true;
|
|
@@ -2874,6 +3113,31 @@ class OpenAISipPhoneSession {
|
|
|
2874
3113
|
this.requestRealtimeResponse();
|
|
2875
3114
|
return true;
|
|
2876
3115
|
}
|
|
3116
|
+
scheduleRealtimeToolPresence(responseId, state) {
|
|
3117
|
+
if (!responseId || state.presenceRequested || state.presenceTimer)
|
|
3118
|
+
return;
|
|
3119
|
+
state.presenceTimer = setTimeout(() => {
|
|
3120
|
+
state.presenceTimer = null;
|
|
3121
|
+
const current = this.toolResponses.get(responseId);
|
|
3122
|
+
if (this.closed || current !== state || state.pendingCallIds.size === 0 || state.suppressFollowup)
|
|
3123
|
+
return;
|
|
3124
|
+
state.presenceRequested = true;
|
|
3125
|
+
this.requestRealtimeResponse({
|
|
3126
|
+
instructions: "A tool is taking a moment. Say one very short natural holding phrase under six words, then stop speaking.",
|
|
3127
|
+
});
|
|
3128
|
+
}, OPENAI_REALTIME_TOOL_PRESENCE_DELAY_MS);
|
|
3129
|
+
state.presenceTimer.unref?.();
|
|
3130
|
+
}
|
|
3131
|
+
clearRealtimeToolPresenceTimer(state) {
|
|
3132
|
+
if (!state.presenceTimer)
|
|
3133
|
+
return;
|
|
3134
|
+
clearTimeout(state.presenceTimer);
|
|
3135
|
+
state.presenceTimer = null;
|
|
3136
|
+
}
|
|
3137
|
+
clearRealtimeToolPresenceTimers() {
|
|
3138
|
+
for (const state of this.toolResponses.values())
|
|
3139
|
+
this.clearRealtimeToolPresenceTimer(state);
|
|
3140
|
+
}
|
|
2877
3141
|
async runRealtimeTool(event) {
|
|
2878
3142
|
const name = typeof event.name === "string" ? event.name : "";
|
|
2879
3143
|
const callId = typeof event.call_id === "string" ? event.call_id : "";
|
|
@@ -2884,6 +3148,8 @@ class OpenAISipPhoneSession {
|
|
|
2884
3148
|
const coordinated = !!toolState;
|
|
2885
3149
|
if (name === "voice_end_call" && toolState)
|
|
2886
3150
|
toolState.suppressFollowup = true;
|
|
3151
|
+
if (toolState && !toolState.suppressFollowup)
|
|
3152
|
+
this.scheduleRealtimeToolPresence(responseId, toolState);
|
|
2887
3153
|
let output;
|
|
2888
3154
|
try {
|
|
2889
3155
|
const args = parseToolArguments(typeof event.arguments === "string" ? event.arguments : "");
|
|
@@ -2924,14 +3190,18 @@ class OpenAISipPhoneSession {
|
|
|
2924
3190
|
}
|
|
2925
3191
|
}
|
|
2926
3192
|
noteRealtimeResponseCreated(event) {
|
|
3193
|
+
this.realtimeResponseCreateInFlight = null;
|
|
3194
|
+
this.untrackedActiveRealtimeResponse = false;
|
|
3195
|
+
this.clearUntrackedActiveRealtimeResponseTimer();
|
|
2927
3196
|
const responseId = realtimeResponseId(event);
|
|
2928
3197
|
if (responseId)
|
|
2929
3198
|
this.activeRealtimeResponseId = responseId;
|
|
2930
3199
|
}
|
|
2931
|
-
noteRealtimeResponseDone(
|
|
2932
|
-
|
|
2933
|
-
|
|
2934
|
-
|
|
3200
|
+
noteRealtimeResponseDone(_responseId) {
|
|
3201
|
+
this.realtimeResponseCreateInFlight = null;
|
|
3202
|
+
this.untrackedActiveRealtimeResponse = false;
|
|
3203
|
+
this.clearUntrackedActiveRealtimeResponseTimer();
|
|
3204
|
+
this.activeRealtimeResponseId = null;
|
|
2935
3205
|
this.responseCreateHoldUntilMs = Math.max(this.responseCreateHoldUntilMs, Date.now() + OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
|
|
2936
3206
|
this.schedulePendingRealtimeResponse(OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
|
|
2937
3207
|
}
|
|
@@ -2939,15 +3209,21 @@ class OpenAISipPhoneSession {
|
|
|
2939
3209
|
if (this.closed)
|
|
2940
3210
|
return;
|
|
2941
3211
|
const waitMs = Math.max(0, this.responseCreateHoldUntilMs - Date.now());
|
|
2942
|
-
if (this.
|
|
2943
|
-
|
|
2944
|
-
this.
|
|
2945
|
-
if (!this.activeRealtimeResponseId)
|
|
3212
|
+
if (this.realtimeResponseIsBusy() || waitMs > 0) {
|
|
3213
|
+
this.holdRealtimeResponse(response ? { response } : {});
|
|
3214
|
+
if (!this.realtimeResponseIsBusy())
|
|
2946
3215
|
this.schedulePendingRealtimeResponse(waitMs);
|
|
2947
3216
|
return;
|
|
2948
3217
|
}
|
|
2949
3218
|
this.sendRealtimeResponseCreate(response ? { response } : {});
|
|
2950
3219
|
}
|
|
3220
|
+
realtimeResponseIsBusy() {
|
|
3221
|
+
return !!this.activeRealtimeResponseId || !!this.realtimeResponseCreateInFlight || this.untrackedActiveRealtimeResponse;
|
|
3222
|
+
}
|
|
3223
|
+
holdRealtimeResponse(request) {
|
|
3224
|
+
const pendingResponse = request.response ?? this.pendingRealtimeResponse?.response;
|
|
3225
|
+
this.pendingRealtimeResponse = pendingResponse ? { response: pendingResponse } : {};
|
|
3226
|
+
}
|
|
2951
3227
|
schedulePendingRealtimeResponse(delayMs) {
|
|
2952
3228
|
if (!this.pendingRealtimeResponse)
|
|
2953
3229
|
return;
|
|
@@ -2960,7 +3236,7 @@ class OpenAISipPhoneSession {
|
|
|
2960
3236
|
this.pendingRealtimeResponseTimer.unref?.();
|
|
2961
3237
|
}
|
|
2962
3238
|
flushPendingRealtimeResponse() {
|
|
2963
|
-
if (!this.pendingRealtimeResponse || this.closed || this.
|
|
3239
|
+
if (!this.pendingRealtimeResponse || this.closed || this.realtimeResponseIsBusy())
|
|
2964
3240
|
return;
|
|
2965
3241
|
const waitMs = Math.max(0, this.responseCreateHoldUntilMs - Date.now());
|
|
2966
3242
|
if (waitMs > 0) {
|
|
@@ -2972,11 +3248,38 @@ class OpenAISipPhoneSession {
|
|
|
2972
3248
|
this.sendRealtimeResponseCreate(pending);
|
|
2973
3249
|
}
|
|
2974
3250
|
sendRealtimeResponseCreate(request) {
|
|
3251
|
+
this.realtimeResponseCreateInFlight = request;
|
|
2975
3252
|
this.sendOpenAI({
|
|
2976
3253
|
type: "response.create",
|
|
2977
3254
|
...(request.response ? { response: request.response } : {}),
|
|
2978
3255
|
});
|
|
2979
3256
|
}
|
|
3257
|
+
noteRealtimeResponseConflict() {
|
|
3258
|
+
const inFlight = this.realtimeResponseCreateInFlight;
|
|
3259
|
+
this.realtimeResponseCreateInFlight = null;
|
|
3260
|
+
this.untrackedActiveRealtimeResponse = true;
|
|
3261
|
+
if (inFlight)
|
|
3262
|
+
this.holdRealtimeResponse(inFlight);
|
|
3263
|
+
this.scheduleUntrackedActiveRealtimeResponseFallback();
|
|
3264
|
+
}
|
|
3265
|
+
scheduleUntrackedActiveRealtimeResponseFallback() {
|
|
3266
|
+
this.clearUntrackedActiveRealtimeResponseTimer();
|
|
3267
|
+
this.untrackedActiveRealtimeResponseTimer = setTimeout(() => {
|
|
3268
|
+
this.untrackedActiveRealtimeResponseTimer = null;
|
|
3269
|
+
if (this.closed || !this.untrackedActiveRealtimeResponse)
|
|
3270
|
+
return;
|
|
3271
|
+
this.untrackedActiveRealtimeResponse = false;
|
|
3272
|
+
this.responseCreateHoldUntilMs = Math.max(this.responseCreateHoldUntilMs, Date.now() + OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
|
|
3273
|
+
this.schedulePendingRealtimeResponse(OPENAI_REALTIME_RESPONSE_CREATE_GRACE_MS);
|
|
3274
|
+
}, OPENAI_REALTIME_RESPONSE_CREATE_CONFLICT_BACKOFF_MS);
|
|
3275
|
+
this.untrackedActiveRealtimeResponseTimer.unref?.();
|
|
3276
|
+
}
|
|
3277
|
+
clearUntrackedActiveRealtimeResponseTimer() {
|
|
3278
|
+
if (!this.untrackedActiveRealtimeResponseTimer)
|
|
3279
|
+
return;
|
|
3280
|
+
clearTimeout(this.untrackedActiveRealtimeResponseTimer);
|
|
3281
|
+
this.untrackedActiveRealtimeResponseTimer = null;
|
|
3282
|
+
}
|
|
2980
3283
|
requestHangupFromTool() {
|
|
2981
3284
|
if (this.closed)
|
|
2982
3285
|
return;
|
|
@@ -3050,6 +3353,9 @@ class OpenAISipPhoneSession {
|
|
|
3050
3353
|
clearTimeout(this.pendingRealtimeResponseTimer);
|
|
3051
3354
|
this.pendingRealtimeResponseTimer = null;
|
|
3052
3355
|
}
|
|
3356
|
+
this.clearPendingUserTurnResponse();
|
|
3357
|
+
this.clearRealtimeToolPresenceTimers();
|
|
3358
|
+
this.clearUntrackedActiveRealtimeResponseTimer();
|
|
3053
3359
|
(0, runtime_1.emitNervesEvent)({
|
|
3054
3360
|
component: "senses",
|
|
3055
3361
|
event: "senses.voice_openai_sip_call_stop",
|
|
@@ -3805,7 +4111,7 @@ async function handleOutgoing(options, basePath, outboundId, params, jobs) {
|
|
|
3805
4111
|
Reason: job.reason,
|
|
3806
4112
|
InitialAudio: encodeVoiceCallAudioCustomParameter(job.initialAudio),
|
|
3807
4113
|
};
|
|
3808
|
-
if (
|
|
4114
|
+
if (usesOpenAISipOutboundConversationEngine(options)) {
|
|
3809
4115
|
(0, runtime_1.emitNervesEvent)({
|
|
3810
4116
|
component: "senses",
|
|
3811
4117
|
event: "senses.voice_twilio_sip_connect",
|
|
@@ -3824,8 +4130,8 @@ async function handleOutgoing(options, basePath, outboundId, params, jobs) {
|
|
|
3824
4130
|
})));
|
|
3825
4131
|
}
|
|
3826
4132
|
if (normalizeTwilioPhoneTransportMode(options.transportMode) === "media-stream") {
|
|
3827
|
-
if (
|
|
3828
|
-
return xmlResponse(mediaStreamTwiml(options, basePath, { From: from, To: to }, undefined, streamParams));
|
|
4133
|
+
if (usesOpenAIRealtimeOutboundConversationEngine(options)) {
|
|
4134
|
+
return xmlResponse(mediaStreamTwiml(options, basePath, { From: from, To: to }, undefined, streamParams, "openai-realtime"));
|
|
3829
4135
|
}
|
|
3830
4136
|
try {
|
|
3831
4137
|
await fs.mkdir(callDir, { recursive: true });
|
|
@@ -4228,7 +4534,7 @@ function createTwilioPhoneBridge(options) {
|
|
|
4228
4534
|
byOutboundId: new Map(),
|
|
4229
4535
|
};
|
|
4230
4536
|
const activeSipSessions = new ActiveOpenAISipSessions();
|
|
4231
|
-
mediaStreams.on("connection", (ws) => {
|
|
4537
|
+
mediaStreams.on("connection", (ws, request) => {
|
|
4232
4538
|
const lifecycle = {
|
|
4233
4539
|
onIdentityChange: (activeSession, identity) => {
|
|
4234
4540
|
if (identity.callSid)
|
|
@@ -4245,7 +4551,8 @@ function createTwilioPhoneBridge(options) {
|
|
|
4245
4551
|
}
|
|
4246
4552
|
},
|
|
4247
4553
|
};
|
|
4248
|
-
const
|
|
4554
|
+
const streamEngine = mediaStreamRequestedConversationEngine(request.url);
|
|
4555
|
+
const session = streamEngine === "openai-realtime" || (!streamEngine && usesOpenAIRealtimeConversationEngine(options))
|
|
4249
4556
|
? new TwilioOpenAIRealtimeMediaStreamSession(ws, options, lifecycle)
|
|
4250
4557
|
: new TwilioMediaStreamSession(ws, options, jobs, lifecycle);
|
|
4251
4558
|
session.attach();
|