npm - @ouro.bot/cli - Versions diffs - 0.1.0-alpha.566 → 0.1.0-alpha.568 - Mend

@ouro.bot/cli 0.1.0-alpha.566 → 0.1.0-alpha.568

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +1 -1
package/changelog.json +15 -0
package/dist/heart/daemon/cli-exec.js +2 -1
package/dist/mind/prompt.js +1 -1
package/dist/senses/shared-turn.js +55 -7
package/dist/senses/voice/elevenlabs.js +13 -1
package/dist/senses/voice/turn.js +113 -9
package/dist/senses/voice/twilio-phone-runtime.js +3 -0
package/dist/senses/voice/twilio-phone.js +427 -32
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -105,7 +105,7 @@ Task docs do not live in this repo anymore. Planning and doing docs live in the
 - Human TTY commands share one CLI surface family: bare `ouro` opens the home deck, `ouro up` uses the boot checklist, `ouro connect`/`ouro auth verify`/`ouro repair` agree on provider and vault truth, and `ouro help`/`ouro whoami`/`ouro versions`/`ouro hatch` render through the same Ouro-branded wizard/guide language instead of raw transcript walls. Orientation commands such as root `ouro connect` may use shorter live probes, while startup and verification commands own durable readiness updates.
 - Human-facing CLI commands that can wait on browser auth, vault IO, daemon startup, daemon restart, provider checks, or connector setup use a shared progress checklist. If a cursor may blink for more than a few seconds, the command should print or animate the current step instead of going quiet.
 - CLI commands that mutate bundle config, such as vault setup or `ouro connect bluebubbles`, run bundle sync after the change when `sync.enabled` is true and report a compact `bundle sync:` line.
-- Voice is transcript-first: voice sessions use the ordinary `state/sessions/<friend>/voice/<key>.json` session path and appear in Ouro Mailbox as text transcripts. ElevenLabs API credentials live in portable `runtime/config` at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths live in the machine runtime item at `voice.whisperCliPath` and `voice.whisperModelPath`. Phone calls, browser meetings, and local microphone capture are transports under the single `voice` sense, not separate senses; the Twilio phone transport uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.
+- Voice is transcript-first: voice sessions use the ordinary `state/sessions/<friend>/voice/<key>.json` session path and appear in Ouro Mailbox as text transcripts. ElevenLabs API credentials live in portable `runtime/config` at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths live in the machine runtime item at `voice.whisperCliPath` and `voice.whisperModelPath`. Phone calls, browser meetings, and local microphone capture are transports under the single `voice` sense, not separate senses; the Twilio phone transport uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered `speak`/`settle` text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks to Twilio by default.
 - The daemon discovers bundles dynamically from `~/AgentBundles`.
 - `ouro status` reports version, last-updated time, discovered agents, senses, and workers.
 - `bundle-meta.json` tracks the runtime version that last touched a bundle.

package/changelog.json CHANGED Viewed

@@ -1,6 +1,21 @@
 {
   "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
   "versions": [
+    {
+      "version": "0.1.0-alpha.568",
+      "changes": [
+        "Twilio phone recordings that Whisper.cpp reports as empty speech now route through an agent-authored voice reprompt instead of failing the Twilio audio stream.",
+        "Real STT infrastructure failures still surface as bridge errors, preserving a clear distinction between silence and broken transcription."
+      ]
+    },
+    {
+      "version": "0.1.0-alpha.567",
+      "changes": [
+        "Voice transports now receive outward `speak` and `settle` text through the shared sense delivery callback path, so voice audio is driven by the same tool-required delivery semantics as chat channels.",
+        "Twilio phone sessions are now keyed to the stable phone voice channel instead of CallSid, while CallSid remains the per-call artifact directory.",
+        "Managed Twilio playback now supports streaming Play URLs backed by ElevenLabs audio chunks, with buffered playback still available for compatibility testing."
+      ]
+    },
     {
       "version": "0.1.0-alpha.566",
       "changes": [

package/dist/heart/daemon/cli-exec.js CHANGED Viewed

@@ -4289,11 +4289,12 @@ async function executeConnectVoice(agent, deps) {
         `  ouro vault config set --agent ${agent} --scope machine --key voice.twilioPublicUrl`,
         `  ouro vault config set --agent ${agent} --scope machine --key voice.twilioBasePath --value /voice/agents/${agentPathSegment}/twilio`,
         `  ouro vault config set --agent ${agent} --scope machine --key voice.twilioPort --value 18910`,
+        `  ouro vault config set --agent ${agent} --scope machine --key voice.twilioPlaybackMode --value stream`,
         `  ouro vault config set --agent ${agent} --scope machine --key voice.twilioDefaultFriendId --value ari`,
         "Then enable agent.json: senses.voice.enabled = true and restart with `ouro up`.",
         `The managed Voice entrypoint will listen at POST <public-url>/voice/agents/${agentPathSegment}/twilio/incoming.`,
         `Standalone local smoke remains available with: node dist/senses/voice-twilio-entry.js --agent ${agent} --port 18910 --public-url https://<cloudflare-tunnel>.`,
-        "Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.",
+        "Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered speak/settle text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks by default.",
     ].join("\n");
     deps.writeStdout(message);
     return message;

package/dist/mind/prompt.js CHANGED Viewed

@@ -506,7 +506,7 @@ function senseRuntimeGuidance(channel, preReadStatusLines) {
     lines.push("mail validation diagnostics: health checks, bounded mail tools, access logs, and UI inspection can support validation, but they are evidence inside those paths, not additional paths. If asked to name golden paths, do not include diagnostic commands, tool names, or status checks in the answer.");
     lines.push("mail diagnostic naming: `ouro doctor` is installation-wide; do not invent `ouro doctor --agent <agent>`.");
     lines.push("mail setup boundaries: do not invent `ouro auth verify --provider mail`, HEY OAuth, HEY IMAP, `ouro mcp call mail ...`, policy flags, autonomous sending, destructive mail actions, or production MX/DNS/forwarding changes. HEY export, HEY forwarding, DNS, MX cutover, sending, and destructive actions require explicit human confirmation.");
-    lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting links have URL intake and local BlackHole/Multi-Output readiness checks; phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play. Live browser join/injection remains an explicit handoff edge until provider automation lands.");
+    lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting links have URL intake and local BlackHole/Multi-Output readiness checks; phone testing uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered speak/settle text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks by default. Live browser join/injection remains an explicit handoff edge until provider automation lands.");
     if (channel === "cli") {
         lines.push("cli is interactive: it is available when the user opens it, not something `ouro up` daemonizes.");
     }

package/dist/senses/shared-turn.js CHANGED Viewed

@@ -235,17 +235,64 @@ async function runSenseTurn(options) {
         : [{ role: "system", content: (0, prompt_1.flattenSystemPrompt)(await (0, prompt_1.buildSystem)(channel, {}, undefined)) }];
     // Pending dir
     const pendingDir = (0, pending_1.getPendingDir)(agentName, friendId, channel, sessionKey);
-    // Accumulate response text via callbacks
-    let responseText = "";
+    // Accumulate outward text through the same callback boundary used by chat
+    // channels. `speak` flushes pending text immediately; `settle` is delivered
+    // once the turn completes.
+    let committedResponseText = "";
+    let pendingResponseText = "";
+    let terminalDeliveryKind = "text";
+    const deliveries = [];
+    const deliveryFailures = [];
+    const commitResponseText = (text) => {
+        const cleaned = stripThinkBlocks(text);
+        /* v8 ignore next -- deliverPending strips first; this is a defensive direct-call guard @preserve */
+        if (!cleaned)
+            return;
+        committedResponseText = committedResponseText
+            ? `${committedResponseText}\n${cleaned}`
+            : cleaned;
+    };
+    const deliveryErrorMessage = (error) => error instanceof Error ? error.message : String(error);
+    const deliverPending = async (kind, optionsForDelivery) => {
+        const text = stripThinkBlocks(pendingResponseText);
+        pendingResponseText = "";
+        if (!text)
+            return;
+        const delivery = { kind, text };
+        try {
+            await options.deliverySink?.onDelivery(delivery);
+            deliveries.push(delivery);
+            commitResponseText(text);
+        }
+        catch (error) {
+            const failure = { ...delivery, error: deliveryErrorMessage(error) };
+            deliveryFailures.push(failure);
+            (0, runtime_1.emitNervesEvent)({
+                level: "error",
+                component: "senses",
+                event: "senses.shared_turn_delivery_error",
+                message: "shared turn outward delivery failed",
+                meta: { agentName, channel, sessionKey, friendId, kind, error: failure.error, textLength: text.length },
+            });
+            if (optionsForDelivery.throwOnError)
+                throw error;
+            commitResponseText(text);
+        }
+    };
     /* v8 ignore start — no-op callback stubs; only onTextChunk does real work (covered via mock) */
     const callbacks = {
         onModelStart: () => { },
         onModelStreamStart: () => { },
-        onTextChunk: (chunk) => { responseText += chunk; },
+        onTextChunk: (chunk) => { pendingResponseText += chunk; },
         onReasoningChunk: () => { },
         onToolStart: () => { },
-        onToolEnd: () => { },
+        onToolEnd: (name, _summary, success) => {
+            if (name === "settle" && success)
+                terminalDeliveryKind = "settle";
+        },
         onError: () => { },
+        onClearText: () => { pendingResponseText = ""; },
+        flushNow: () => deliverPending("speak", { throwOnError: true }),
     };
     /* v8 ignore stop */
     // Run the pipeline
@@ -285,10 +332,11 @@ async function runSenseTurn(options) {
         /* v8 ignore stop */
         accumulateFriendTokens: tokens_1.accumulateFriendTokens,
     });
+    await deliverPending(terminalDeliveryKind, { throwOnError: false });
     const ponderDeferred = false;
     // Build response
     let finalResponse;
-    if (responseText.length === 0) {
+    if (committedResponseText.length === 0) {
         // Agent settled but no text came through callbacks — check session transcript for the settle answer
         // Await deferred persist so the session file is up-to-date before readback
         /* v8 ignore next -- persistPromise set inside v8-ignored postTurn callback; tested via pipeline integration @preserve */
@@ -304,7 +352,7 @@ async function runSenseTurn(options) {
         }
     }
     else {
-        finalResponse = responseText;
+        finalResponse = committedResponseText;
     }
     // Strip MiniMax-style <think>...</think> blocks from the final response.
     // When a reasoning-style model emits only a think block and no final answer
@@ -335,5 +383,5 @@ async function runSenseTurn(options) {
         message: "shared turn runner complete",
         meta: { agentName, channel, sessionKey, friendId, ponderDeferred, responseLength: finalResponse.length },
     });
-    return { response: finalResponse, ponderDeferred };
+    return { response: finalResponse, ponderDeferred, deliveries, deliveryFailures };
 }

package/dist/senses/voice/elevenlabs.js CHANGED Viewed

@@ -156,7 +156,19 @@ function createElevenLabsTtsClient(options) {
                     try {
                         const parsed = JSON.parse(payloadText(payload));
                         if (typeof parsed.audio === "string" && parsed.audio.length > 0) {
-                            chunks.push(Buffer.from(parsed.audio, "base64"));
+                            const chunk = Buffer.from(parsed.audio, "base64");
+                            chunks.push(chunk);
+                            if (request.onAudioChunk) {
+                                try {
+                                    const chunkResult = request.onAudioChunk(chunk);
+                                    if (chunkResult && typeof chunkResult.then === "function") {
+                                        void chunkResult.catch(fail);
+                                    }
+                                }
+                                catch (error) {
+                                    fail(error);
+                                }
+                            }
                         }
                         if (parsed.isFinal === true) {
                             finish();

package/dist/senses/voice/turn.js CHANGED Viewed

@@ -4,6 +4,33 @@ exports.runVoiceLoopbackTurn = runVoiceLoopbackTurn;
 const runtime_1 = require("../../nerves/runtime");
 const shared_turn_1 = require("../shared-turn");
 const transcript_1 = require("./transcript");
+function deliveredTts(spoken) {
+    return {
+        status: "delivered",
+        audio: spoken.audio,
+        byteLength: spoken.byteLength,
+        chunkCount: spoken.chunkCount,
+        mimeType: spoken.mimeType,
+        modelId: spoken.modelId,
+        voiceId: spoken.voiceId,
+    };
+}
+function aggregateSegments(segments) {
+    const first = segments[0].tts;
+    const audio = Buffer.concat(segments.map((segment) => Buffer.from(segment.tts.audio)));
+    return {
+        status: "delivered",
+        audio,
+        byteLength: audio.byteLength,
+        chunkCount: segments.reduce((sum, segment) => sum + segment.tts.chunkCount, 0),
+        mimeType: first.mimeType,
+        modelId: first.modelId,
+        voiceId: first.voiceId,
+    };
+}
+function deliveryErrorMessage(error) {
+    return error instanceof Error ? error.message : String(error);
+}
 async function runVoiceLoopbackTurn(options) {
     const runSenseTurn = options.runSenseTurn ?? shared_turn_1.runSenseTurn;
     let userMessage;
@@ -31,30 +58,105 @@ async function runVoiceLoopbackTurn(options) {
             utteranceId: options.transcript.utteranceId,
         },
     });
+    const speechSegments = [];
+    const speechDeliveryErrors = [];
+    let deliveryIndex = 0;
+    const synthesizeDelivery = async (delivery) => {
+        deliveryIndex += 1;
+        const segmentUtteranceId = `${options.transcript.utteranceId}-${deliveryIndex}-${delivery.kind}`;
+        try {
+            const spoken = await options.tts.synthesize({
+                utteranceId: segmentUtteranceId,
+                text: delivery.text,
+                onAudioChunk: options.onAudioChunk,
+            });
+            speechSegments.push({
+                kind: delivery.kind,
+                text: delivery.text,
+                utteranceId: segmentUtteranceId,
+                tts: deliveredTts(spoken),
+            });
+        }
+        catch (error) {
+            const failure = {
+                kind: delivery.kind,
+                text: delivery.text,
+                utteranceId: segmentUtteranceId,
+                error: deliveryErrorMessage(error),
+            };
+            speechDeliveryErrors.push(failure);
+            throw error;
+        }
+    };
     const turn = await runSenseTurn({
         agentName: options.agentName,
         channel: "voice",
         friendId: options.friendId,
         sessionKey: options.sessionKey,
         userMessage,
+        deliverySink: { onDelivery: synthesizeDelivery },
     });
+    if (speechSegments.length > 0) {
+        const tts = aggregateSegments(speechSegments);
+        const result = {
+            responseText: turn.response,
+            ponderDeferred: turn.ponderDeferred,
+            tts,
+            speechSegments,
+            speechDeliveryErrors,
+        };
+        (0, runtime_1.emitNervesEvent)({
+            component: "senses",
+            event: "senses.voice_turn_end",
+            message: "voice loopback turn delivered speech",
+            meta: {
+                utteranceId: options.transcript.utteranceId,
+                responseLength: turn.response.length,
+                segmentCount: speechSegments.length,
+                byteLength: tts.byteLength,
+            },
+        });
+        return result;
+    }
+    const turnDeliveryFailures = turn.deliveryFailures ?? [];
+    if (speechDeliveryErrors.length > 0 || turnDeliveryFailures.length > 0) {
+        const firstError = speechDeliveryErrors[0]?.error ?? turnDeliveryFailures[0].error;
+        (0, runtime_1.emitNervesEvent)({
+            level: "error",
+            component: "senses",
+            event: "senses.voice_turn_tts_error",
+            message: "voice loopback TTS failed after text response",
+            meta: { utteranceId: options.transcript.utteranceId, error: firstError, responseLength: turn.response.length },
+        });
+        return {
+            responseText: turn.response,
+            ponderDeferred: turn.ponderDeferred,
+            tts: {
+                status: "failed",
+                error: firstError,
+            },
+            speechSegments,
+            speechDeliveryErrors,
+        };
+    }
     try {
         const spoken = await options.tts.synthesize({
             utteranceId: options.transcript.utteranceId,
             text: turn.response,
+            onAudioChunk: options.onAudioChunk,
         });
+        const tts = deliveredTts(spoken);
         const result = {
             responseText: turn.response,
             ponderDeferred: turn.ponderDeferred,
-            tts: {
-                status: "delivered",
-                audio: spoken.audio,
-                byteLength: spoken.byteLength,
-                chunkCount: spoken.chunkCount,
-                mimeType: spoken.mimeType,
-                modelId: spoken.modelId,
-                voiceId: spoken.voiceId,
-            },
+            tts,
+            speechSegments: [{
+                    kind: "text",
+                    text: turn.response,
+                    utteranceId: options.transcript.utteranceId,
+                    tts,
+                }],
+            speechDeliveryErrors,
         };
         (0, runtime_1.emitNervesEvent)({
             component: "senses",
@@ -80,6 +182,8 @@ async function runVoiceLoopbackTurn(options) {
                 status: "failed",
                 error: message,
             },
+            speechSegments,
+            speechDeliveryErrors,
         };
     }
 }

package/dist/senses/voice/twilio-phone-runtime.js CHANGED Viewed

@@ -193,6 +193,8 @@ function resolveTwilioPhoneTransportRuntime(options) {
         recordMaxLengthSeconds: overrides.recordMaxLengthSeconds
             ?? configNumber(options.machineConfig, "voice.twilioRecordMaxLengthSeconds")
             ?? twilio_phone_1.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
+        playbackMode: overrides.playbackMode
+            ?? (0, twilio_phone_1.normalizeTwilioPhonePlaybackMode)(configString(options.machineConfig, "voice.twilioPlaybackMode") ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE),
     };
     return { status: "configured", settings };
 }
@@ -259,6 +261,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
         defaultFriendId: settings.defaultFriendId,
         recordTimeoutSeconds: settings.recordTimeoutSeconds,
         recordMaxLengthSeconds: settings.recordMaxLengthSeconds,
+        playbackMode: settings.playbackMode,
     });
     (0, runtime_1.emitNervesEvent)({
         component: "senses",

package/dist/senses/voice/twilio-phone.js CHANGED Viewed

@@ -33,9 +33,11 @@ var __importStar = (this && this.__importStar) || (function () {
     };
 })();
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
+exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
 exports.normalizeTwilioPhoneBasePath = normalizeTwilioPhoneBasePath;
+exports.normalizeTwilioPhonePlaybackMode = normalizeTwilioPhonePlaybackMode;
 exports.twilioPhoneWebhookUrl = twilioPhoneWebhookUrl;
+exports.twilioPhoneVoiceSessionKey = twilioPhoneVoiceSessionKey;
 exports.computeTwilioSignature = computeTwilioSignature;
 exports.validateTwilioSignature = validateTwilioSignature;
 exports.twilioRecordingMediaUrl = twilioRecordingMediaUrl;
@@ -54,6 +56,7 @@ exports.DEFAULT_TWILIO_PHONE_PORT = 18910;
 exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = 2;
 exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = 30;
 exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = "/voice/twilio";
+exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE = "stream";
 function bodyText(body) {
     if (body === undefined)
         return "";
@@ -104,6 +107,21 @@ function binaryResponse(body, contentType) {
         body,
     };
 }
+function streamResponse(body, contentType) {
+    return {
+        statusCode: 200,
+        headers: {
+            "content-type": contentType,
+            "cache-control": "no-store",
+        },
+        body,
+    };
+}
+function isAsyncIterableBody(body) {
+    return typeof body === "object"
+        && body !== null
+        && Symbol.asyncIterator in body;
+}
 function escapeXml(input) {
     return input
         .replace(/&/g, "&amp;")
@@ -127,6 +145,12 @@ function normalizeTwilioPhoneBasePath(value = exports.TWILIO_PHONE_WEBHOOK_BASE_
     }
     return withoutTrailingSlash;
 }
+function normalizeTwilioPhonePlaybackMode(value) {
+    const normalized = (value ?? exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE).trim().toLowerCase();
+    if (normalized === "stream" || normalized === "buffered")
+        return normalized;
+    throw new Error(`invalid Twilio phone playback mode: ${value}`);
+}
 function twilioPhoneWebhookUrl(publicBaseUrl, basePath = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH) {
     return routeUrl(publicBaseUrl, `${normalizeTwilioPhoneBasePath(basePath)}/incoming`);
 }
@@ -179,6 +203,25 @@ function friendIdFromCaller(from, callSid) {
 function voiceFriendId(options, from, callSid) {
     return options.defaultFriendId?.trim() || friendIdFromCaller(from, callSid);
 }
+function phoneIdentitySegment(input) {
+    const phoneish = input.replace(/[^0-9A-Za-z]+/g, "");
+    return phoneish || safeSegment(input);
+}
+function twilioPhoneVoiceSessionKey(options) {
+    const friendSegment = options.defaultFriendId?.trim()
+        ? safeSegment(options.defaultFriendId)
+        : options.from?.trim()
+            ? phoneIdentitySegment(options.from)
+            : "";
+    const lineSegment = options.to?.trim() ? phoneIdentitySegment(options.to) : "";
+    if (friendSegment && lineSegment)
+        return `twilio-phone-${friendSegment}-via-${lineSegment}`;
+    if (friendSegment)
+        return `twilio-phone-${friendSegment}`;
+    if (lineSegment)
+        return `twilio-phone-line-${lineSegment}`;
+    return `twilio-phone-${safeSegment(options.callSid ?? "incoming")}`;
+}
 function callConnectedPrompt(params) {
     const from = params.From?.trim();
     const to = params.To?.trim();
@@ -204,6 +247,35 @@ function isNoSpeechTranscript(text) {
         || normalized === "[NO_SPEECH]"
         || normalized === "NO_SPEECH";
 }
+function isNoSpeechTranscriptionError(error) {
+    const normalized = errorMessage(error).toLowerCase();
+    return normalized.includes("empty whisper.cpp transcript")
+        || normalized.includes("voice transcript text is empty");
+}
+function buildNoSpeechTranscript(utteranceId) {
+    return (0, transcript_1.buildVoiceTranscript)({
+        utteranceId: `${utteranceId}-nospeech`,
+        text: noSpeechPrompt(),
+        source: "loopback",
+    });
+}
+async function transcribeRecordingOrNoSpeech(options) {
+    try {
+        const transcript = await options.transcriber.transcribe({
+            utteranceId: options.utteranceId,
+            audioPath: options.inputPath,
+        });
+        return isNoSpeechTranscript(transcript.text)
+            ? buildNoSpeechTranscript(options.utteranceId)
+            : transcript;
+    }
+    catch (error) {
+        if (isNoSpeechTranscriptionError(error)) {
+            return buildNoSpeechTranscript(options.utteranceId);
+        }
+        throw error;
+    }
+}
 function parseRecordingParams(params) {
     const callSid = params.CallSid?.trim();
     const recordingSid = params.RecordingSid?.trim();
@@ -215,6 +287,7 @@ function parseRecordingParams(params) {
         recordingSid,
         recordingUrl,
         from: params.From?.trim() ?? "",
+        to: params.To?.trim() ?? "",
     };
 }
 function recordAgainResponse(options, basePath, message) {
@@ -238,6 +311,180 @@ function nextInputTwiml(options, basePath, mode) {
         maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
     });
 }
+class TwilioAudioStreamJob {
+    callSid;
+    jobId;
+    mimeType;
+    chunks = [];
+    waiters = new Set();
+    status = "pending";
+    failure = null;
+    byteLength = 0;
+    constructor(callSid, jobId, mimeType) {
+        this.callSid = callSid;
+        this.jobId = jobId;
+        this.mimeType = mimeType;
+    }
+    append(chunk) {
+        /* v8 ignore next -- append is only called while pending with non-empty chunks in bridge flow @preserve */
+        if (this.status !== "pending" || chunk.byteLength === 0)
+            return;
+        const buffered = Buffer.from(chunk);
+        this.chunks.push(buffered);
+        this.byteLength += buffered.byteLength;
+        this.notify();
+    }
+    complete() {
+        /* v8 ignore next -- completion is single-shot inside startTwilioPlaybackStreamJob @preserve */
+        if (this.status !== "pending")
+            return;
+        this.status = "completed";
+        this.notify();
+    }
+    fail(error) {
+        /* v8 ignore next -- failure is single-shot inside startTwilioPlaybackStreamJob @preserve */
+        if (this.status !== "pending")
+            return;
+        this.status = "failed";
+        this.failure = errorMessage(error);
+        this.notify();
+    }
+    async *stream() {
+        let index = 0;
+        let yielded = false;
+        for (;;) {
+            while (index < this.chunks.length) {
+                yielded = true;
+                yield this.chunks[index++];
+            }
+            if (this.status === "completed")
+                return;
+            if (this.status === "failed") {
+                if (yielded)
+                    return;
+                throw new Error(this.failure);
+            }
+            await new Promise((resolve) => {
+                this.waiters.add(resolve);
+            });
+        }
+    }
+    notify() {
+        const waiters = [...this.waiters];
+        this.waiters.clear();
+        for (const waiter of waiters)
+            waiter();
+    }
+}
+class TwilioAudioStreamJobStore {
+    jobs = new Map();
+    create(callSid, jobId, mimeType = "audio/mpeg") {
+        const key = this.key(callSid, jobId);
+        const job = new TwilioAudioStreamJob(callSid, jobId, mimeType);
+        this.jobs.set(key, job);
+        return job;
+    }
+    get(callSid, jobId) {
+        return this.jobs.get(this.key(callSid, jobId)) ?? null;
+    }
+    /* v8 ignore start -- stream job cleanup is delayed beyond request-scope tests @preserve */
+    delete(callSid, jobId) {
+        this.jobs.delete(this.key(callSid, jobId));
+    }
+    /* v8 ignore stop */
+    key(callSid, jobId) {
+        return `${callSid}/${jobId}`;
+    }
+}
+function deliveredSegments(turn) {
+    return turn.speechSegments.map((segment) => segment.tts);
+}
+async function writeVoiceTurnPlaybackArtifacts(options) {
+    const urls = [];
+    for (const segment of options.turn.speechSegments) {
+        const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
+            utteranceId: segment.utteranceId,
+            delivery: segment.tts,
+            outputDir: options.callDir,
+        });
+        urls.push(routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`));
+    }
+    return urls;
+}
+function playManyTwiml(urls) {
+    return urls.map(playTwiml).join("");
+}
+function streamAudioUrl(options, basePath, safeCallSid, jobId) {
+    return routeUrl(options.publicBaseUrl, `${basePath}/audio-stream/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(`${jobId}.mp3`)}`);
+}
+function scheduleJobCleanup(jobs, safeCallSid, jobId) {
+    /* v8 ignore start -- stream job cleanup is delayed beyond request-scope tests @preserve */
+    const cleanup = setTimeout(() => {
+        jobs.delete(safeCallSid, jobId);
+    }, 5 * 60_000);
+    cleanup.unref?.();
+    /* v8 ignore stop */
+}
+function startTwilioPlaybackStreamJob(options) {
+    const job = options.jobs.create(options.safeCallSid, options.jobId);
+    void (async () => {
+        try {
+            const turn = await options.runTurn((chunk) => job.append(chunk));
+            const deliveries = deliveredSegments(turn);
+            if (job.byteLength === 0 && deliveries.length > 0) {
+                for (const delivery of deliveries)
+                    job.append(delivery.audio);
+            }
+            if (deliveries.length === 0) {
+                /* v8 ignore next -- runVoiceLoopbackTurn cannot return delivered TTS with zero speech segments @preserve */
+                if (turn.tts.status === "failed")
+                    throw new Error(turn.tts.error);
+                /* v8 ignore next -- runVoiceLoopbackTurn emits a speech segment whenever TTS is delivered @preserve */
+                throw new Error("voice turn produced no audio");
+            }
+            try {
+                await writeVoiceTurnPlaybackArtifacts({
+                    bridgeOptions: options.bridgeOptions,
+                    basePath: options.basePath,
+                    callDir: options.callDir,
+                    safeCallSid: options.safeCallSid,
+                    baseUtteranceId: options.baseUtteranceId,
+                    turn,
+                });
+            }
+            catch (artifactError) {
+                (0, runtime_1.emitNervesEvent)({
+                    level: "warn",
+                    component: "senses",
+                    event: "senses.voice_twilio_stream_artifact_error",
+                    message: "Twilio stream audio was delivered but artifact persistence failed",
+                    meta: { ...options.meta, error: errorMessage(artifactError) },
+                });
+            }
+            job.complete();
+            (0, runtime_1.emitNervesEvent)({
+                component: "senses",
+                event: "senses.voice_twilio_stream_end",
+                message: "finished Twilio streaming voice playback job",
+                meta: { ...options.meta, byteLength: String(job.byteLength), segmentCount: String(deliveries.length) },
+            });
+        }
+        catch (error) {
+            job.fail(error);
+            (0, runtime_1.emitNervesEvent)({
+                level: "error",
+                component: "senses",
+                event: "senses.voice_twilio_stream_error",
+                message: "Twilio streaming voice playback job failed",
+                meta: { ...options.meta, error: errorMessage(error) },
+            });
+        }
+        finally {
+            scheduleJobCleanup(options.jobs, options.safeCallSid, options.jobId);
+        }
+    })();
+    return job;
+}
 async function runPhonePromptTurn(options) {
     const transcript = (0, transcript_1.buildVoiceTranscript)({
         utteranceId: options.utteranceId,
@@ -256,13 +503,15 @@ async function runPhonePromptTurn(options) {
     if (turn.tts.status !== "delivered") {
         return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${after}`);
     }
-    const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
-        utteranceId: options.utteranceId,
-        delivery: turn.tts,
-        outputDir: options.callDir,
+    const audioUrls = await writeVoiceTurnPlaybackArtifacts({
+        bridgeOptions: options.bridgeOptions,
+        basePath: options.basePath,
+        callDir: options.callDir,
+        safeCallSid: options.safeCallSid,
+        baseUtteranceId: options.utteranceId,
+        turn,
     });
-    const audioUrl = routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
-    return xmlResponse(`${playTwiml(audioUrl)}${after}`);
+    return xmlResponse(`${playManyTwiml(audioUrls)}${after}`);
 }
 function computeTwilioSignature(input) {
     const payload = input.url + Object.keys(input.params)
@@ -309,27 +558,62 @@ function verifyRequest(options, request, params) {
         signature: headerValue(request.headers, "x-twilio-signature"),
     });
 }
-async function handleIncoming(options, basePath, params) {
+async function handleIncoming(options, basePath, params, jobs) {
     const callSid = params.CallSid?.trim() || "incoming";
     const safeCallSid = safeSegment(callSid);
     const callDir = path.join(options.outputDir, safeCallSid);
     const utteranceId = `twilio-${safeCallSid}-connected`;
+    const friendId = voiceFriendId(options, params.From?.trim() ?? "", callSid);
+    const sessionKey = twilioPhoneVoiceSessionKey({
+        defaultFriendId: options.defaultFriendId,
+        from: params.From?.trim() ?? "",
+        to: params.To?.trim() ?? "",
+        callSid,
+    });
     (0, runtime_1.emitNervesEvent)({
         component: "senses",
         event: "senses.voice_twilio_incoming",
         message: "Twilio voice call connected",
-        meta: { agentName: options.agentName, callSid: safeCallSid },
+        meta: { agentName: options.agentName, callSid: safeCallSid, sessionKey },
     });
     try {
         await fs.mkdir(callDir, { recursive: true });
+        if (normalizeTwilioPhonePlaybackMode(options.playbackMode) === "stream") {
+            const transcript = (0, transcript_1.buildVoiceTranscript)({
+                utteranceId,
+                text: callConnectedPrompt(params),
+                source: "loopback",
+            });
+            const jobId = safeSegment(utteranceId);
+            startTwilioPlaybackStreamJob({
+                jobs,
+                bridgeOptions: options,
+                basePath,
+                callDir,
+                safeCallSid,
+                jobId,
+                baseUtteranceId: utteranceId,
+                runTurn: (onAudioChunk) => (0, turn_1.runVoiceLoopbackTurn)({
+                    agentName: options.agentName,
+                    friendId,
+                    sessionKey,
+                    transcript,
+                    tts: options.tts,
+                    runSenseTurn: options.runSenseTurn,
+                    onAudioChunk,
+                }),
+                meta: { agentName: options.agentName, callSid: safeCallSid, utteranceId },
+            });
+            return xmlResponse(`${playTwiml(streamAudioUrl(options, basePath, safeCallSid, jobId))}${nextInputTwiml(options, basePath, "record")}`);
+        }
         return await runPhonePromptTurn({
             bridgeOptions: options,
             basePath,
             callDir,
             safeCallSid,
             utteranceId,
-            friendId: voiceFriendId(options, params.From?.trim() ?? "", callSid),
-            sessionKey: `twilio-${safeCallSid}`,
+            friendId,
+            sessionKey,
             promptText: callConnectedPrompt(params),
             afterPlayback: "record",
         });
@@ -358,7 +642,7 @@ async function handleListen(options, basePath) {
         maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
     }));
 }
-async function handleRecording(options, basePath, params) {
+async function handleRecording(options, basePath, params, jobs) {
     const recording = parseRecordingParams(params);
     if (!recording) {
         (0, runtime_1.emitNervesEvent)({
@@ -376,13 +660,58 @@ async function handleRecording(options, basePath, params) {
     const inputPath = path.join(callDir, `${safeRecordingSid}.wav`);
     const utteranceId = `twilio-${safeCallSid}-${safeRecordingSid}`;
     const downloadRecording = options.downloadRecording ?? defaultTwilioRecordingDownloader;
+    const friendId = voiceFriendId(options, recording.from, recording.callSid);
+    const sessionKey = twilioPhoneVoiceSessionKey({
+        defaultFriendId: options.defaultFriendId,
+        from: recording.from,
+        to: recording.to,
+        callSid: recording.callSid,
+    });
     (0, runtime_1.emitNervesEvent)({
         component: "senses",
         event: "senses.voice_twilio_turn_start",
         message: "starting Twilio voice turn",
-        meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid },
+        meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, sessionKey },
     });
     try {
+        if (normalizeTwilioPhonePlaybackMode(options.playbackMode) === "stream") {
+            const jobId = safeSegment(utteranceId);
+            startTwilioPlaybackStreamJob({
+                jobs,
+                bridgeOptions: options,
+                basePath,
+                callDir,
+                safeCallSid,
+                jobId,
+                baseUtteranceId: utteranceId,
+                runTurn: async (onAudioChunk) => {
+                    await fs.mkdir(callDir, { recursive: true });
+                    const mediaUrl = twilioRecordingMediaUrl(recording.recordingUrl);
+                    const audio = await downloadRecording({
+                        recordingUrl: mediaUrl,
+                        accountSid: options.twilioAccountSid?.trim() || undefined,
+                        authToken: options.twilioAuthToken?.trim() || undefined,
+                    });
+                    await fs.writeFile(inputPath, audio);
+                    const turnTranscript = await transcribeRecordingOrNoSpeech({
+                        transcriber: options.transcriber,
+                        utteranceId,
+                        inputPath,
+                    });
+                    return (0, turn_1.runVoiceLoopbackTurn)({
+                        agentName: options.agentName,
+                        friendId,
+                        sessionKey,
+                        transcript: turnTranscript,
+                        tts: options.tts,
+                        runSenseTurn: options.runSenseTurn,
+                        onAudioChunk,
+                    });
+                },
+                meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, utteranceId },
+            });
+            return xmlResponse(`${playTwiml(streamAudioUrl(options, basePath, safeCallSid, jobId))}${redirectTwiml(options.publicBaseUrl, basePath)}`);
+        }
         await fs.mkdir(callDir, { recursive: true });
         const mediaUrl = twilioRecordingMediaUrl(recording.recordingUrl);
         const audio = await downloadRecording({
@@ -391,27 +720,28 @@ async function handleRecording(options, basePath, params) {
             authToken: options.twilioAuthToken?.trim() || undefined,
         });
         await fs.writeFile(inputPath, audio);
-        const transcript = await options.transcriber.transcribe({
+        const transcript = await transcribeRecordingOrNoSpeech({
+            transcriber: options.transcriber,
             utteranceId,
-            audioPath: inputPath,
+            inputPath,
         });
-        if (isNoSpeechTranscript(transcript.text)) {
+        if (transcript.utteranceId === `${utteranceId}-nospeech`) {
             return await runPhonePromptTurn({
                 bridgeOptions: options,
                 basePath,
                 callDir,
                 safeCallSid,
                 utteranceId: `${utteranceId}-nospeech`,
-                friendId: voiceFriendId(options, recording.from, recording.callSid),
-                sessionKey: `twilio-${safeCallSid}`,
+                friendId,
+                sessionKey,
                 promptText: noSpeechPrompt(),
                 afterPlayback: "redirect",
             });
         }
         const turn = await (0, turn_1.runVoiceLoopbackTurn)({
             agentName: options.agentName,
-            friendId: voiceFriendId(options, recording.from, recording.callSid),
-            sessionKey: `twilio-${safeCallSid}`,
+            friendId,
+            sessionKey,
             transcript,
             tts: options.tts,
             runSenseTurn: options.runSenseTurn,
@@ -419,19 +749,21 @@ async function handleRecording(options, basePath, params) {
         if (turn.tts.status !== "delivered") {
             return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${redirectTwiml(options.publicBaseUrl, basePath)}`);
         }
-        const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
-            utteranceId,
-            delivery: turn.tts,
-            outputDir: callDir,
+        const audioUrls = await writeVoiceTurnPlaybackArtifacts({
+            bridgeOptions: options,
+            basePath,
+            callDir,
+            safeCallSid,
+            baseUtteranceId: utteranceId,
+            turn,
         });
-        const audioUrl = routeUrl(options.publicBaseUrl, `${basePath}/audio/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
         (0, runtime_1.emitNervesEvent)({
             component: "senses",
             event: "senses.voice_twilio_turn_end",
             message: "finished Twilio voice turn",
-            meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, audioPath: playback.audioPath },
+            meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, playbackCount: audioUrls.length },
         });
-        return xmlResponse(`${playTwiml(audioUrl)}${redirectTwiml(options.publicBaseUrl, basePath)}`);
+        return xmlResponse(`${playManyTwiml(audioUrls)}${redirectTwiml(options.publicBaseUrl, basePath)}`);
     }
     catch (error) {
         (0, runtime_1.emitNervesEvent)({
@@ -477,9 +809,34 @@ async function handleAudio(options, basePath, requestPath) {
         return textResponse(404, "not found");
     }
 }
+async function handleAudioStream(options, basePath, requestPath, jobs) {
+    const prefix = `${basePath}/audio-stream/`;
+    const pathOnly = requestPath.split("?")[0];
+    const rest = pathOnly.slice(prefix.length);
+    const parts = rest.split("/");
+    if (parts.length !== 2)
+        return textResponse(404, "not found");
+    const [callSidPart, fileNamePart] = parts;
+    const callSid = decodeSafeSegment(callSidPart);
+    const fileName = decodeSafeSegment(fileNamePart);
+    if (!callSid || !fileName)
+        return textResponse(404, "not found");
+    const jobId = fileName.replace(/\.[A-Za-z0-9]+$/, "");
+    const job = jobs.get(callSid, jobId);
+    if (!job)
+        return textResponse(404, "not found");
+    (0, runtime_1.emitNervesEvent)({
+        component: "senses",
+        event: "senses.voice_twilio_stream_served",
+        message: "served Twilio voice streaming audio job",
+        meta: { agentName: options.agentName, callSid, jobId },
+    });
+    return streamResponse(job.stream(), job.mimeType);
+}
 function createTwilioPhoneBridge(options) {
     new URL(options.publicBaseUrl);
     const basePath = normalizeTwilioPhoneBasePath(options.basePath);
+    const jobs = new TwilioAudioStreamJobStore();
     return {
         async handle(request) {
             const method = request.method.toUpperCase();
@@ -488,6 +845,9 @@ function createTwilioPhoneBridge(options) {
             if (method === "GET" && requestPath.startsWith(`${basePath}/audio/`)) {
                 return handleAudio(options, basePath, requestPath);
             }
+            if (method === "GET" && requestPath.startsWith(`${basePath}/audio-stream/`)) {
+                return handleAudioStream(options, basePath, requestPath, jobs);
+            }
             if (method === "GET" && routePath === `${basePath}/health`) {
                 return textResponse(200, "ok");
             }
@@ -505,11 +865,11 @@ function createTwilioPhoneBridge(options) {
                 return textResponse(403, "invalid Twilio signature");
             }
             if (routePath === `${basePath}/incoming`)
-                return handleIncoming(options, basePath, params);
+                return handleIncoming(options, basePath, params, jobs);
             if (routePath === `${basePath}/listen`)
                 return handleListen(options, basePath);
             if (routePath === `${basePath}/recording`)
-                return handleRecording(options, basePath, params);
+                return handleRecording(options, basePath, params, jobs);
             return textResponse(404, "not found");
         },
     };
@@ -531,6 +891,35 @@ function readRequestBody(req, limitBytes = 1_000_000) {
         req.on("error", reject);
     });
 }
+/* v8 ignore start -- HTTP backpressure is platform-dependent in unit tests @preserve */
+function waitForDrain(res) {
+    return new Promise((resolve, reject) => {
+        const onDrain = () => {
+            res.off("error", onError);
+            resolve();
+        };
+        const onError = (error) => {
+            res.off("drain", onDrain);
+            reject(error);
+        };
+        res.once("drain", onDrain);
+        res.once("error", onError);
+    });
+}
+/* v8 ignore stop */
+async function writeResponseBody(res, body) {
+    if (!isAsyncIterableBody(body)) {
+        res.end(body);
+        return;
+    }
+    for await (const chunk of body) {
+        /* v8 ignore next -- exercised only when Node reports socket backpressure @preserve */
+        if (!res.write(chunk)) {
+            await waitForDrain(res);
+        }
+    }
+    res.end();
+}
 async function startTwilioPhoneBridgeServer(options) {
     const port = options.port ?? exports.DEFAULT_TWILIO_PHONE_PORT;
     const host = options.host ?? "127.0.0.1";
@@ -545,7 +934,7 @@ async function startTwilioPhoneBridgeServer(options) {
                 body,
             });
             res.writeHead(response.statusCode, response.headers);
-            res.end(response.body);
+            await writeResponseBody(res, response.body);
         }
         catch (error) {
             (0, runtime_1.emitNervesEvent)({
@@ -555,8 +944,14 @@ async function startTwilioPhoneBridgeServer(options) {
                 message: "Twilio voice bridge server failed a request",
                 meta: { agentName: options.agentName, error: errorMessage(error) },
             });
-            res.writeHead(500, { "content-type": "text/plain; charset=utf-8" });
-            res.end("internal server error");
+            /* v8 ignore next -- defensive path for async stream failures after headers @preserve */
+            if (res.headersSent) {
+                res.destroy(error instanceof Error ? error : new Error(String(error)));
+            }
+            else {
+                res.writeHead(500, { "content-type": "text/plain; charset=utf-8" });
+                res.end("internal server error");
+            }
         }
     });
     await new Promise((resolve, reject) => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ouro.bot/cli",
-  "version": "0.1.0-alpha.566",
+  "version": "0.1.0-alpha.568",
   "main": "dist/heart/daemon/ouro-entry.js",
   "bin": {
     "cli": "dist/heart/daemon/ouro-bot-entry.js",