npm - @ouro.bot/cli - Versions diffs - 0.1.0-alpha.582 → 0.1.0-alpha.584 - Mend

@ouro.bot/cli 0.1.0-alpha.582 → 0.1.0-alpha.584

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/changelog.json +18 -0
package/dist/senses/voice/index.js +1 -0
package/dist/senses/voice/realtime-eval.js +384 -0
package/dist/senses/voice/twilio-phone-runtime.js +56 -17
package/dist/senses/voice/twilio-phone.js +375 -68
package/dist/senses/voice-realtime-eval-entry.js +25 -0
package/package.json +2 -1

package/changelog.json CHANGED Viewed

@@ -1,6 +1,24 @@
 {
   "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
   "versions": [
+    {
+      "version": "0.1.0-alpha.584",
+      "changes": [
+        "Voice now has a transport-aware Realtime eval kernel that grades deterministic call timelines for first-audio latency, user-turn response latency, tool holding phrases, barge-in clearing/truncation, friend context, transcript continuity, and hangup control before live phone testing.",
+        "`npm run voice:eval` runs built-in no-human voice scenarios, including a healthy path and an expected known-bad latency canary, and emits the normal nerves events expected of executable sense entrypoints so future Voice transport work can prove synchronous behavior without requiring a human to answer calls."
+      ]
+    },
+    {
+      "version": "0.1.0-alpha.583",
+      "changes": [
+        "Outbound SIP phone calls now start the Realtime greeting immediately after answer unless Twilio has already positively identified voicemail or fax, preventing humans from hearing post-pickup silence when async AMD returns unknown.",
+        "Twilio phone voice now defaults outbound calls to OpenAI Realtime Media Streams when inbound calls use OpenAI SIP on a Media Stream machine, while still allowing `voice.twilioOutboundConversationEngine` overrides, so humans avoid post-pickup SIP ringback.",
+        "Realtime voice now resolves phone callers through the canonical friend graph, preferring existing friend ids and otherwise matching normalized phone numbers via `imessage-handle`, so trust-aware tools see the same friend context as text and mail.",
+        "Realtime media-stream voice now treats empty caller metadata as absent and preserves local voice friend identities, keeping outbound and provider-simulated calls attached to the intended friend instead of inventing a blank phone identity.",
+        "Realtime voice response creation now backs off and retries after provider active-response conflicts, holds user turns under Ouro floor-control instead of provider auto-response, and long-running voice tools can emit one tiny holding phrase instead of leaving seconds of unexplained silence.",
+        "Realtime voice VAD and local barge-in thresholds are less twitchy by default, reducing accidental interruption from tiny room sounds while preserving deliberate caller interruption."
+      ]
+    },
     {
       "version": "0.1.0-alpha.582",
       "changes": [

package/dist/senses/voice/index.js CHANGED Viewed

@@ -26,3 +26,4 @@ __exportStar(require("./playback"), exports);
 __exportStar(require("./golden-path"), exports);
 __exportStar(require("./twilio-phone"), exports);
 __exportStar(require("./twilio-phone-runtime"), exports);
+__exportStar(require("./realtime-eval"), exports);

package/dist/senses/voice/realtime-eval.js ADDED Viewed

@@ -0,0 +1,384 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.gradeVoiceRealtimeEvalTimeline = gradeVoiceRealtimeEvalTimeline;
+exports.buildVoiceRealtimeEvalHappyPath = buildVoiceRealtimeEvalHappyPath;
+exports.runBuiltInVoiceRealtimeEvalSuite = runBuiltInVoiceRealtimeEvalSuite;
+exports.summarizeVoiceRealtimeEvalSuite = summarizeVoiceRealtimeEvalSuite;
+const runtime_1 = require("../../nerves/runtime");
+function validateTimeline(scenarioId, events, expectation) {
+    const normalizedScenarioId = scenarioId.trim();
+    if (!normalizedScenarioId)
+        throw new Error("voice eval scenario id is empty");
+    if (events.length === 0)
+        throw new Error("voice eval timeline is empty");
+    const budgets = [
+        expectation.maxFirstAssistantAudioMs,
+        expectation.maxUserTurnResponseMs,
+        expectation.maxToolPresenceMs,
+        expectation.maxBargeInClearMs,
+        expectation.maxBargeInTruncateMs,
+    ];
+    if (budgets.some((budget) => !Number.isFinite(budget) || budget <= 0)) {
+        throw new Error("voice eval latency budgets must be positive");
+    }
+    return normalizedScenarioId;
+}
+function sortedEvents(events) {
+    return [...events].sort((left, right) => left.atMs - right.atMs);
+}
+function firstEvent(events, type) {
+    return events.find((event) => event.type === type);
+}
+function allEvents(events, type) {
+    return events.filter((event) => event.type === type);
+}
+function lowerText(value) {
+    return value?.toLowerCase() ?? "";
+}
+function pushFinding(findings, finding) {
+    findings.push(finding);
+}
+function gradeFirstAudio(events, expectation, findings) {
+    const connected = firstEvent(events, "call.connected");
+    const firstAudio = firstEvent(events, "assistant.audio.started");
+    if (!connected || !firstAudio) {
+        pushFinding(findings, {
+            code: "first_audio_missing",
+            severity: "fail",
+            message: "Voice call did not produce assistant audio after connect.",
+            source: connected?.source ?? firstAudio?.source,
+            atMs: connected?.atMs ?? firstAudio?.atMs,
+        });
+        return undefined;
+    }
+    const ttfaMs = firstAudio.atMs - connected.atMs;
+    if (ttfaMs > expectation.maxFirstAssistantAudioMs) {
+        pushFinding(findings, {
+            code: "first_audio_late",
+            severity: "fail",
+            message: `First assistant audio started after ${ttfaMs}ms, over the ${expectation.maxFirstAssistantAudioMs}ms budget.`,
+            source: firstAudio.source,
+            atMs: firstAudio.atMs,
+        });
+    }
+    return ttfaMs;
+}
+function gradeFirstUserResponse(events, expectation, findings) {
+    const userTranscript = firstEvent(events, "user.transcript.done");
+    if (!userTranscript)
+        return undefined;
+    const response = events.find((event) => event.type === "response.requested"
+        && event.atMs >= userTranscript.atMs
+        && (!userTranscript.correlationId || event.correlationId === userTranscript.correlationId));
+    if (!response) {
+        pushFinding(findings, {
+            code: "user_response_missing",
+            severity: "fail",
+            message: "No voice response was requested after the caller transcript completed.",
+            source: userTranscript.source,
+            atMs: userTranscript.atMs,
+        });
+        return undefined;
+    }
+    const latencyMs = response.atMs - userTranscript.atMs;
+    if (latencyMs > expectation.maxUserTurnResponseMs) {
+        pushFinding(findings, {
+            code: "user_response_late",
+            severity: "fail",
+            message: `Voice response was requested after ${latencyMs}ms, over the ${expectation.maxUserTurnResponseMs}ms budget.`,
+            source: response.source,
+            atMs: response.atMs,
+        });
+    }
+    return latencyMs;
+}
+function gradeToolPresence(events, expectation, findings) {
+    const toolCall = firstEvent(events, "tool.call.started");
+    if (!toolCall)
+        return undefined;
+    const holding = events.find((event) => event.type === "tool.holding.started"
+        && event.atMs >= toolCall.atMs
+        && (!toolCall.correlationId || event.correlationId === toolCall.correlationId));
+    if (!holding) {
+        pushFinding(findings, {
+            code: "tool_presence_missing",
+            severity: "fail",
+            message: "Tool call did not produce a short voice holding phrase.",
+            source: toolCall.source,
+            atMs: toolCall.atMs,
+        });
+        return undefined;
+    }
+    const latencyMs = holding.atMs - toolCall.atMs;
+    if (latencyMs > expectation.maxToolPresenceMs) {
+        pushFinding(findings, {
+            code: "tool_presence_late",
+            severity: "fail",
+            message: `Tool holding phrase started after ${latencyMs}ms, over the ${expectation.maxToolPresenceMs}ms budget.`,
+            source: holding.source,
+            atMs: holding.atMs,
+        });
+    }
+    return latencyMs;
+}
+function gradeBargeIn(events, expectation, findings) {
+    const bargeIn = firstEvent(events, "barge_in.detected");
+    if (!bargeIn)
+        return {};
+    const clear = events.find((event) => event.type === "transport.playback_cleared" && event.atMs >= bargeIn.atMs);
+    const truncate = events.find((event) => event.type === "response.truncated" && event.atMs >= bargeIn.atMs);
+    const metrics = {};
+    if (!clear) {
+        pushFinding(findings, {
+            code: "barge_in_clear_missing",
+            severity: "fail",
+            message: "Caller barge-in did not clear transport playback.",
+            source: bargeIn.source,
+            atMs: bargeIn.atMs,
+        });
+    }
+    else {
+        metrics.firstBargeInClearMs = clear.atMs - bargeIn.atMs;
+        if (metrics.firstBargeInClearMs > expectation.maxBargeInClearMs) {
+            pushFinding(findings, {
+                code: "barge_in_clear_late",
+                severity: "fail",
+                message: `Barge-in playback clear took ${metrics.firstBargeInClearMs}ms, over the ${expectation.maxBargeInClearMs}ms budget.`,
+                source: clear.source,
+                atMs: clear.atMs,
+            });
+        }
+    }
+    if (!truncate) {
+        pushFinding(findings, {
+            code: "barge_in_truncate_missing",
+            severity: "fail",
+            message: "Caller barge-in did not truncate the active Realtime response.",
+            source: bargeIn.source,
+            atMs: bargeIn.atMs,
+        });
+    }
+    else {
+        metrics.firstBargeInTruncateMs = truncate.atMs - bargeIn.atMs;
+        if (metrics.firstBargeInTruncateMs > expectation.maxBargeInTruncateMs) {
+            pushFinding(findings, {
+                code: "barge_in_truncate_late",
+                severity: "fail",
+                message: `Barge-in response truncation took ${metrics.firstBargeInTruncateMs}ms, over the ${expectation.maxBargeInTruncateMs}ms budget.`,
+                source: truncate.source,
+                atMs: truncate.atMs,
+            });
+        }
+    }
+    return metrics;
+}
+function gradeManualFloorControl(events, findings) {
+    const session = allEvents(events, "session.updated").find((event) => event.session?.turnDetection);
+    if (session?.session?.turnDetection?.createResponse === false
+        && session.session.turnDetection.interruptResponse === false) {
+        return;
+    }
+    pushFinding(findings, {
+        code: "manual_floor_control_missing",
+        severity: "fail",
+        message: "Realtime session did not disable provider auto-response and provider interruption.",
+        source: session?.source,
+        atMs: session?.atMs,
+    });
+}
+function gradeFriendContext(events, requirement, findings) {
+    const context = firstEvent(events, "voice.context.injected");
+    if (context?.friendId === requirement.friendId
+        && context.sessionKey === requirement.sessionKey
+        && lowerText(context.text).includes(requirement.marker.toLowerCase())) {
+        return;
+    }
+    pushFinding(findings, {
+        code: "friend_context_mismatch",
+        severity: "fail",
+        message: "Voice context did not preserve the expected friend identity, trust marker, and stable session key.",
+        source: context?.source,
+        atMs: context?.atMs,
+    });
+}
+function gradeTranscripts(events, requirements, findings) {
+    for (const requirement of requirements) {
+        const type = requirement.role === "assistant"
+            ? "assistant.transcript.done"
+            : "user.transcript.done";
+        const found = allEvents(events, type).some((event) => lowerText(event.text).includes(requirement.contains.toLowerCase()));
+        if (!found) {
+            pushFinding(findings, {
+                code: "transcript_missing",
+                severity: "fail",
+                message: `Missing ${requirement.role} transcript containing "${requirement.contains}".`,
+            });
+        }
+    }
+}
+function gradeHangup(events, findings) {
+    const hangup = firstEvent(events, "call.hangup.requested");
+    if (hangup)
+        return;
+    const ended = firstEvent(events, "call.ended");
+    pushFinding(findings, {
+        code: "hangup_missing",
+        severity: "fail",
+        message: "Voice eval expected an agent-controlled hangup request before call end.",
+        source: ended?.source,
+        atMs: ended?.atMs,
+    });
+}
+function gradeOverlappingResponses(events, findings) {
+    for (const response of allEvents(events, "response.requested")) {
+        const activeAudio = allEvents(events, "assistant.audio.started").find((started) => {
+            const done = events.find((event) => event.type === "assistant.audio.done" && event.atMs >= started.atMs);
+            return response.atMs > started.atMs && (!done || response.atMs < done.atMs);
+        });
+        if (activeAudio) {
+            pushFinding(findings, {
+                code: "response_overlap",
+                severity: "fail",
+                message: "Voice response was requested while assistant audio was still active.",
+                source: response.source,
+                atMs: response.atMs,
+            });
+            return;
+        }
+    }
+}
+function collectTransportSources(events) {
+    return [...new Set(events.flatMap((event) => event.source ? [event.source.transport] : []))].sort();
+}
+function gradeVoiceRealtimeEvalTimeline(scenarioId, timeline, expectation) {
+    const normalizedScenarioId = validateTimeline(scenarioId, timeline, expectation);
+    const events = sortedEvents(timeline);
+    (0, runtime_1.emitNervesEvent)({
+        component: "senses",
+        event: "senses.voice_realtime_eval_start",
+        message: "starting Voice realtime eval timeline grading",
+        meta: { scenarioId: normalizedScenarioId, events: events.length },
+    });
+    const findings = [];
+    const metrics = {
+        ttfaMs: gradeFirstAudio(events, expectation, findings),
+        firstUserResponseMs: gradeFirstUserResponse(events, expectation, findings),
+        firstToolPresenceMs: gradeToolPresence(events, expectation, findings),
+        ...gradeBargeIn(events, expectation, findings),
+    };
+    if (expectation.requireManualFloorControl)
+        gradeManualFloorControl(events, findings);
+    if (expectation.requireFriendContext)
+        gradeFriendContext(events, expectation.requireFriendContext, findings);
+    if (expectation.requiredTranscripts)
+        gradeTranscripts(events, expectation.requiredTranscripts, findings);
+    if (expectation.requireHangup)
+        gradeHangup(events, findings);
+    gradeOverlappingResponses(events, findings);
+    const report = {
+        scenarioId: normalizedScenarioId,
+        passed: findings.every((finding) => finding.severity !== "fail"),
+        findings,
+        metrics,
+        transportSources: collectTransportSources(events),
+    };
+    (0, runtime_1.emitNervesEvent)({
+        component: "senses",
+        event: "senses.voice_realtime_eval_end",
+        message: "finished Voice realtime eval timeline grading",
+        meta: { scenarioId: normalizedScenarioId, passed: report.passed, findings: findings.length },
+    });
+    return report;
+}
+function buildVoiceRealtimeEvalHappyPath() {
+    return [
+        { type: "call.connected", atMs: 0, source: { transport: "openai-sip", id: "sip-call-1" } },
+        {
+            type: "voice.context.injected",
+            atMs: 80,
+            friendId: "friend-ari",
+            sessionKey: "twilio-phone-friend-ari-via-ouro",
+            text: "Resolved voice friend: Ari (friendId=friend-ari, trust=family).",
+            source: { transport: "voice-eval" },
+        },
+        {
+            type: "session.updated",
+            atMs: 100,
+            session: { turnDetection: { createResponse: false, interruptResponse: false } },
+            source: { transport: "openai-realtime-control", id: "ws-1" },
+        },
+        { type: "response.requested", atMs: 120, correlationId: "greeting", source: { transport: "openai-realtime-control", id: "ws-1" } },
+        { type: "assistant.audio.started", atMs: 720, correlationId: "greeting", source: { transport: "openai-sip", id: "sip-call-1" } },
+        { type: "assistant.audio.done", atMs: 1_820, correlationId: "greeting", source: { transport: "openai-sip", id: "sip-call-1" } },
+        {
+            type: "assistant.transcript.done",
+            atMs: 1_840,
+            correlationId: "greeting",
+            text: "Hey Ari, I am checking the weather now.",
+            source: { transport: "openai-realtime-control", id: "ws-1" },
+        },
+        {
+            type: "user.transcript.done",
+            atMs: 2_200,
+            correlationId: "user-1",
+            text: "Can you check the weather and then hang up?",
+            source: { transport: "twilio-media-stream", id: "stream-1" },
+        },
+        { type: "response.requested", atMs: 2_480, correlationId: "user-1", source: { transport: "openai-realtime-control", id: "ws-1" } },
+        { type: "assistant.audio.started", atMs: 2_540, correlationId: "user-1", source: { transport: "openai-sip", id: "sip-call-1" } },
+        { type: "assistant.audio.done", atMs: 2_820, correlationId: "user-1", source: { transport: "openai-sip", id: "sip-call-1" } },
+        { type: "tool.call.started", atMs: 3_000, correlationId: "tool-1", toolName: "weather_lookup", source: { transport: "openai-realtime-control", id: "ws-1" } },
+        { type: "tool.holding.started", atMs: 3_260, correlationId: "tool-1", text: "One sec, checking.", source: { transport: "openai-sip", id: "sip-call-1" } },
+        { type: "tool.call.completed", atMs: 3_800, correlationId: "tool-1", toolName: "weather_lookup", source: { transport: "openai-realtime-control", id: "ws-1" } },
+        { type: "barge_in.detected", atMs: 4_100, source: { transport: "twilio-media-stream", id: "stream-1" } },
+        { type: "transport.playback_cleared", atMs: 4_140, source: { transport: "twilio-media-stream", id: "stream-1" } },
+        { type: "response.truncated", atMs: 4_170, source: { transport: "openai-realtime-control", id: "ws-1" } },
+        { type: "call.hangup.requested", atMs: 5_000, source: { transport: "openai-realtime-control", id: "ws-1" } },
+        { type: "call.ended", atMs: 5_100, source: { transport: "openai-sip", id: "sip-call-1" } },
+    ];
+}
+function builtInExpectation() {
+    return {
+        maxFirstAssistantAudioMs: 1_200,
+        maxUserTurnResponseMs: 900,
+        maxToolPresenceMs: 600,
+        maxBargeInClearMs: 120,
+        maxBargeInTruncateMs: 180,
+        requireManualFloorControl: true,
+        requireFriendContext: {
+            friendId: "friend-ari",
+            sessionKey: "twilio-phone-friend-ari-via-ouro",
+            marker: "trust=family",
+        },
+        requireHangup: true,
+        requiredTranscripts: [
+            { role: "user", contains: "weather" },
+            { role: "assistant", contains: "checking the weather" },
+        ],
+    };
+}
+function buildKnownBadLatencyPath() {
+    return buildVoiceRealtimeEvalHappyPath().map((event) => {
+        if (event.type === "assistant.audio.started" && event.correlationId === "greeting")
+            return { ...event, atMs: 1_900 };
+        if (event.type === "response.requested" && event.correlationId === "user-1")
+            return { ...event, atMs: 3_500 };
+        return event;
+    });
+}
+function runBuiltInVoiceRealtimeEvalSuite() {
+    const expectation = builtInExpectation();
+    return [
+        gradeVoiceRealtimeEvalTimeline("voice-happy-path", buildVoiceRealtimeEvalHappyPath(), expectation),
+        gradeVoiceRealtimeEvalTimeline("voice-known-bad-latency", buildKnownBadLatencyPath(), expectation),
+    ];
+}
+function summarizeVoiceRealtimeEvalSuite(reports) {
+    const failedScenarioIds = reports.filter((report) => !report.passed).map((report) => report.scenarioId);
+    return {
+        passed: reports.length - failedScenarioIds.length,
+        failed: failedScenarioIds.length,
+        total: reports.length,
+        failedScenarioIds,
+    };
+}

package/dist/senses/voice/twilio-phone-runtime.js CHANGED Viewed

@@ -154,13 +154,37 @@ function resolveOpenAIRealtimeApiKey(options) {
         return { apiKey: compatKey, source: "integrations.openaiEmbeddingsApiKey" };
     return undefined;
 }
-function configuredConversationEngine(options, overrides) {
-    return overrides.conversationEngine
-        ?? (0, twilio_phone_1.normalizeTwilioPhoneConversationEngine)(configString(options.machineConfig, "voice.twilioConversationEngine")
-            ?? configString(options.machineConfig, "voice.conversationEngine")
-            ?? configString(options.runtimeConfig, "voice.twilioConversationEngine")
-            ?? configString(options.runtimeConfig, "voice.conversationEngine")
-            ?? "cascade");
+function configuredConversationEngine(options, overrides, transportMode) {
+    const explicit = overrides.conversationEngine
+        ?? configString(options.machineConfig, "voice.twilioConversationEngine")
+        ?? configString(options.machineConfig, "voice.conversationEngine")
+        ?? configString(options.runtimeConfig, "voice.twilioConversationEngine")
+        ?? configString(options.runtimeConfig, "voice.conversationEngine");
+    const hasSipConfig = !!(configString(options.runtimeConfig, "voice.openaiSipProjectId")
+        || configString(options.machineConfig, "voice.openaiSipProjectId"));
+    const explicitEngine = explicit ? (0, twilio_phone_1.normalizeTwilioPhoneConversationEngine)(explicit) : undefined;
+    if (hasSipConfig && (!explicitEngine || explicitEngine === "cascade"))
+        return "openai-sip";
+    if (explicitEngine)
+        return explicitEngine;
+    const hasRealtimeConfig = !!resolveOpenAIRealtimeApiKey({ runtimeConfig: options.runtimeConfig, overrides });
+    if (hasRealtimeConfig && transportMode === "media-stream")
+        return "openai-realtime";
+    return "cascade";
+}
+function configuredOutboundConversationEngine(options, overrides, conversationEngine, transportMode) {
+    const defaultOutboundEngine = conversationEngine === "openai-sip" && transportMode === "media-stream"
+        ? "openai-realtime"
+        : conversationEngine;
+    const configured = overrides.outboundConversationEngine
+        ?? (0, twilio_phone_1.normalizeTwilioPhoneConversationEngine)(configString(options.machineConfig, "voice.twilioOutboundConversationEngine")
+            ?? configString(options.machineConfig, "voice.outboundConversationEngine")
+            ?? configString(options.runtimeConfig, "voice.twilioOutboundConversationEngine")
+            ?? configString(options.runtimeConfig, "voice.outboundConversationEngine")
+            ?? defaultOutboundEngine);
+    if (defaultOutboundEngine === "openai-realtime" && configured === "cascade")
+        return defaultOutboundEngine;
+    return configured;
 }
 function normalizeOpenAIRealtimeReasoningEffort(value) {
     const normalized = value?.trim().toLowerCase();
@@ -226,7 +250,14 @@ function resolveTwilioPhoneTransportRuntime(options) {
         ?? twilio_phone_1.TWILIO_PHONE_WEBHOOK_BASE_PATH);
     const transportMode = overrides.transportMode
         ?? (0, twilio_phone_1.normalizeTwilioPhoneTransportMode)(configString(options.machineConfig, "voice.twilioTransportMode") ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_TRANSPORT_MODE);
-    const conversationEngine = configuredConversationEngine(options, overrides);
+    const conversationEngine = configuredConversationEngine(options, overrides, transportMode);
+    const outboundConversationEngine = configuredOutboundConversationEngine(options, overrides, conversationEngine, transportMode);
+    const needsOpenAIRealtime = conversationEngine === "openai-realtime"
+        || conversationEngine === "openai-sip"
+        || outboundConversationEngine === "openai-realtime"
+        || outboundConversationEngine === "openai-sip";
+    const needsOpenAISip = conversationEngine === "openai-sip" || outboundConversationEngine === "openai-sip";
+    const needsCascade = conversationEngine === "cascade" || outboundConversationEngine === "cascade";
     let elevenLabsApiKey = configString(options.runtimeConfig, "integrations.elevenLabsApiKey") ?? "";
     let elevenLabsVoiceId = trimOptional(overrides.elevenLabsVoiceId)
         ?? configString(options.runtimeConfig, "integrations.elevenLabsVoiceId")
@@ -240,9 +271,9 @@ function resolveTwilioPhoneTransportRuntime(options) {
         ?? "";
     let openaiRealtime;
     let openaiSip;
-    if (conversationEngine === "openai-realtime" || conversationEngine === "openai-sip") {
-        if (conversationEngine === "openai-realtime" && transportMode !== "media-stream") {
-            throw new Error("voice.twilioConversationEngine=openai-realtime requires voice.twilioTransportMode=media-stream");
+    if (needsOpenAIRealtime) {
+        if ((conversationEngine === "openai-realtime" || outboundConversationEngine === "openai-realtime") && transportMode !== "media-stream") {
+            throw new Error("voice.twilioConversationEngine/openai-realtime requires voice.twilioTransportMode=media-stream");
         }
         const key = resolveOpenAIRealtimeApiKey({ runtimeConfig: options.runtimeConfig, overrides });
         if (!key) {
@@ -300,7 +331,7 @@ function resolveTwilioPhoneTransportRuntime(options) {
                 ?? normalizeOpenAIRealtimeNoiseReduction(configString(options.runtimeConfig, "voice.openaiRealtimeNoiseReduction")),
             turnDetection,
         };
-        if (conversationEngine === "openai-sip") {
+        if (needsOpenAISip) {
             const projectId = trimOptional(overrides.openaiSipProjectId)
                 ?? configString(options.runtimeConfig, "voice.openaiSipProjectId")
                 ?? configString(options.machineConfig, "voice.openaiSipProjectId");
@@ -334,7 +365,7 @@ function resolveTwilioPhoneTransportRuntime(options) {
             };
         }
     }
-    else {
+    if (needsCascade) {
         elevenLabsApiKey = required(elevenLabsApiKey || undefined, "missing integrations.elevenLabsApiKey; run 'ouro connect voice --agent <agent>' for setup guidance");
         elevenLabsVoiceId = required(elevenLabsVoiceId || undefined, "missing integrations.elevenLabsVoiceId; save the ElevenLabs voice ID before starting phone voice");
         whisperCliPath = required(whisperCliPath || undefined, "missing voice.whisperCliPath in this machine's runtime config");
@@ -379,6 +410,7 @@ function resolveTwilioPhoneTransportRuntime(options) {
             ?? (0, twilio_phone_1.normalizeTwilioPhonePlaybackMode)(configString(options.machineConfig, "voice.twilioPlaybackMode") ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE),
         transportMode,
         conversationEngine,
+        outboundConversationEngine,
         openaiRealtime,
         openaiSip,
         openaiSipWebhookUrl: openaiSip?.webhookPath ? (0, twilio_phone_1.openAISipWebhookUrl)(publicBaseUrl, openaiSip.webhookPath) : undefined,
@@ -482,7 +514,12 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
             meta: { agentName: settings.agentName, source: settings.openaiRealtime.apiKeySource },
         });
     }
-    const transcriber = settings.conversationEngine === "openai-realtime" || settings.conversationEngine === "openai-sip"
+    const settingsNeedsOpenAIRealtime = settings.conversationEngine === "openai-realtime"
+        || settings.conversationEngine === "openai-sip"
+        || settings.outboundConversationEngine === "openai-realtime"
+        || settings.outboundConversationEngine === "openai-sip";
+    const settingsNeedsCascade = settings.conversationEngine === "cascade" || settings.outboundConversationEngine === "cascade";
+    const transcriber = settingsNeedsOpenAIRealtime && !settingsNeedsCascade
         ? {
             transcribe: async () => {
                 throw new Error("OpenAI Realtime voice sessions do not use the cascade transcriber");
@@ -492,7 +529,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
             whisperCliPath: settings.whisperCliPath,
             modelPath: settings.whisperModelPath,
         });
-    const tts = settings.conversationEngine === "openai-realtime" || settings.conversationEngine === "openai-sip"
+    const tts = settingsNeedsOpenAIRealtime && !settingsNeedsCascade
         ? {
             synthesize: async () => {
                 throw new Error("OpenAI Realtime voice sessions do not use the cascade TTS service");
@@ -522,6 +559,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
         transportMode: settings.transportMode,
         playbackMode: settings.playbackMode,
         conversationEngine: settings.conversationEngine,
+        outboundConversationEngine: settings.outboundConversationEngine,
         openaiRealtime: settings.openaiRealtime,
         openaiSip: settings.openaiSip,
     });
@@ -538,6 +576,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
             openaiSipWebhookUrl: settings.openaiSipWebhookUrl ?? "",
             transportMode: settings.transportMode,
             conversationEngine: settings.conversationEngine,
+            outboundConversationEngine: settings.outboundConversationEngine,
             openaiRealtimeModel: settings.openaiRealtime?.model ?? "",
         },
     });
@@ -565,7 +604,7 @@ async function prewarmOutboundGreeting(options, deps) {
     if (options.settings.transportMode !== "media-stream")
         return undefined;
     /* v8 ignore next -- Realtime/SIP outbound tests assert no cascade prewarm is attempted @preserve */
-    if (options.settings.conversationEngine === "openai-realtime" || options.settings.conversationEngine === "openai-sip")
+    if (options.settings.outboundConversationEngine === "openai-realtime" || options.settings.outboundConversationEngine === "openai-sip")
         return undefined;
     const friendId = options.friendId?.trim() || `twilio-${safeRuntimeSegment(options.to)}`;
     const sessionKey = (0, twilio_phone_1.twilioPhoneVoiceSessionKey)({
@@ -677,7 +716,7 @@ async function placeConfiguredTwilioPhoneCall(options, deps = defaultTwilioPhone
         reason: options.reason.trim(),
         ...(options.initialAudio ? { initialAudio: options.initialAudio } : {}),
         createdAt,
-        status: settings.transportMode === "media-stream" && settings.conversationEngine !== "openai-realtime" && settings.conversationEngine !== "openai-sip"
+        status: settings.transportMode === "media-stream" && settings.outboundConversationEngine === "cascade"
             ? "prewarming"
             : "requested",
     });