@ouro.bot/cli 0.1.0-alpha.566 → 0.1.0-alpha.568

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -105,7 +105,7 @@ Task docs do not live in this repo anymore. Planning and doing docs live in the
105
105
  - Human TTY commands share one CLI surface family: bare `ouro` opens the home deck, `ouro up` uses the boot checklist, `ouro connect`/`ouro auth verify`/`ouro repair` agree on provider and vault truth, and `ouro help`/`ouro whoami`/`ouro versions`/`ouro hatch` render through the same Ouro-branded wizard/guide language instead of raw transcript walls. Orientation commands such as root `ouro connect` may use shorter live probes, while startup and verification commands own durable readiness updates.
106
106
  - Human-facing CLI commands that can wait on browser auth, vault IO, daemon startup, daemon restart, provider checks, or connector setup use a shared progress checklist. If a cursor may blink for more than a few seconds, the command should print or animate the current step instead of going quiet.
107
107
  - CLI commands that mutate bundle config, such as vault setup or `ouro connect bluebubbles`, run bundle sync after the change when `sync.enabled` is true and report a compact `bundle sync:` line.
108
- - Voice is transcript-first: voice sessions use the ordinary `state/sessions/<friend>/voice/<key>.json` session path and appear in Ouro Mailbox as text transcripts. ElevenLabs API credentials live in portable `runtime/config` at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths live in the machine runtime item at `voice.whisperCliPath` and `voice.whisperModelPath`. Phone calls, browser meetings, and local microphone capture are transports under the single `voice` sense, not separate senses; the Twilio phone transport uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.
108
+ - Voice is transcript-first: voice sessions use the ordinary `state/sessions/<friend>/voice/<key>.json` session path and appear in Ouro Mailbox as text transcripts. ElevenLabs API credentials live in portable `runtime/config` at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths live in the machine runtime item at `voice.whisperCliPath` and `voice.whisperModelPath`. Phone calls, browser meetings, and local microphone capture are transports under the single `voice` sense, not separate senses; the Twilio phone transport uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered `speak`/`settle` text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks to Twilio by default.
109
109
  - The daemon discovers bundles dynamically from `~/AgentBundles`.
110
110
  - `ouro status` reports version, last-updated time, discovered agents, senses, and workers.
111
111
  - `bundle-meta.json` tracks the runtime version that last touched a bundle.
package/changelog.json CHANGED
@@ -1,6 +1,21 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.568",
6
+ "changes": [
7
+ "Twilio phone recordings that Whisper.cpp reports as empty speech now route through an agent-authored voice reprompt instead of failing the Twilio audio stream.",
8
+ "Real STT infrastructure failures still surface as bridge errors, preserving a clear distinction between silence and broken transcription."
9
+ ]
10
+ },
11
+ {
12
+ "version": "0.1.0-alpha.567",
13
+ "changes": [
14
+ "Voice transports now receive outward `speak` and `settle` text through the shared sense delivery callback path, so voice audio is driven by the same tool-required delivery semantics as chat channels.",
15
+ "Twilio phone sessions are now keyed to the stable phone voice channel instead of CallSid, while CallSid remains the per-call artifact directory.",
16
+ "Managed Twilio playback now supports streaming Play URLs backed by ElevenLabs audio chunks, with buffered playback still available for compatibility testing."
17
+ ]
18
+ },
4
19
  {
5
20
  "version": "0.1.0-alpha.566",
6
21
  "changes": [
@@ -4289,11 +4289,12 @@ async function executeConnectVoice(agent, deps) {
4289
4289
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPublicUrl`,
4290
4290
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioBasePath --value /voice/agents/${agentPathSegment}/twilio`,
4291
4291
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPort --value 18910`,
4292
+ ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPlaybackMode --value stream`,
4292
4293
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioDefaultFriendId --value ari`,
4293
4294
  "Then enable agent.json: senses.voice.enabled = true and restart with `ouro up`.",
4294
4295
  `The managed Voice entrypoint will listen at POST <public-url>/voice/agents/${agentPathSegment}/twilio/incoming.`,
4295
4296
  `Standalone local smoke remains available with: node dist/senses/voice-twilio-entry.js --agent ${agent} --port 18910 --public-url https://<cloudflare-tunnel>.`,
4296
- "Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.",
4297
+ "Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered speak/settle text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks by default.",
4297
4298
  ].join("\n");
4298
4299
  deps.writeStdout(message);
4299
4300
  return message;
@@ -506,7 +506,7 @@ function senseRuntimeGuidance(channel, preReadStatusLines) {
506
506
  lines.push("mail validation diagnostics: health checks, bounded mail tools, access logs, and UI inspection can support validation, but they are evidence inside those paths, not additional paths. If asked to name golden paths, do not include diagnostic commands, tool names, or status checks in the answer.");
507
507
  lines.push("mail diagnostic naming: `ouro doctor` is installation-wide; do not invent `ouro doctor --agent <agent>`.");
508
508
  lines.push("mail setup boundaries: do not invent `ouro auth verify --provider mail`, HEY OAuth, HEY IMAP, `ouro mcp call mail ...`, policy flags, autonomous sending, destructive mail actions, or production MX/DNS/forwarding changes. HEY export, HEY forwarding, DNS, MX cutover, sending, and destructive actions require explicit human confirmation.");
509
- lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting links have URL intake and local BlackHole/Multi-Output readiness checks; phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play. Live browser join/injection remains an explicit handoff edge until provider automation lands.");
509
+ lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting links have URL intake and local BlackHole/Multi-Output readiness checks; phone testing uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered speak/settle text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks by default. Live browser join/injection remains an explicit handoff edge until provider automation lands.");
510
510
  if (channel === "cli") {
511
511
  lines.push("cli is interactive: it is available when the user opens it, not something `ouro up` daemonizes.");
512
512
  }
@@ -235,17 +235,64 @@ async function runSenseTurn(options) {
235
235
  : [{ role: "system", content: (0, prompt_1.flattenSystemPrompt)(await (0, prompt_1.buildSystem)(channel, {}, undefined)) }];
236
236
  // Pending dir
237
237
  const pendingDir = (0, pending_1.getPendingDir)(agentName, friendId, channel, sessionKey);
238
- // Accumulate response text via callbacks
239
- let responseText = "";
238
+ // Accumulate outward text through the same callback boundary used by chat
239
+ // channels. `speak` flushes pending text immediately; `settle` is delivered
240
+ // once the turn completes.
241
+ let committedResponseText = "";
242
+ let pendingResponseText = "";
243
+ let terminalDeliveryKind = "text";
244
+ const deliveries = [];
245
+ const deliveryFailures = [];
246
+ const commitResponseText = (text) => {
247
+ const cleaned = stripThinkBlocks(text);
248
+ /* v8 ignore next -- deliverPending strips first; this is a defensive direct-call guard @preserve */
249
+ if (!cleaned)
250
+ return;
251
+ committedResponseText = committedResponseText
252
+ ? `${committedResponseText}\n${cleaned}`
253
+ : cleaned;
254
+ };
255
+ const deliveryErrorMessage = (error) => error instanceof Error ? error.message : String(error);
256
+ const deliverPending = async (kind, optionsForDelivery) => {
257
+ const text = stripThinkBlocks(pendingResponseText);
258
+ pendingResponseText = "";
259
+ if (!text)
260
+ return;
261
+ const delivery = { kind, text };
262
+ try {
263
+ await options.deliverySink?.onDelivery(delivery);
264
+ deliveries.push(delivery);
265
+ commitResponseText(text);
266
+ }
267
+ catch (error) {
268
+ const failure = { ...delivery, error: deliveryErrorMessage(error) };
269
+ deliveryFailures.push(failure);
270
+ (0, runtime_1.emitNervesEvent)({
271
+ level: "error",
272
+ component: "senses",
273
+ event: "senses.shared_turn_delivery_error",
274
+ message: "shared turn outward delivery failed",
275
+ meta: { agentName, channel, sessionKey, friendId, kind, error: failure.error, textLength: text.length },
276
+ });
277
+ if (optionsForDelivery.throwOnError)
278
+ throw error;
279
+ commitResponseText(text);
280
+ }
281
+ };
240
282
  /* v8 ignore start — no-op callback stubs; only onTextChunk does real work (covered via mock) */
241
283
  const callbacks = {
242
284
  onModelStart: () => { },
243
285
  onModelStreamStart: () => { },
244
- onTextChunk: (chunk) => { responseText += chunk; },
286
+ onTextChunk: (chunk) => { pendingResponseText += chunk; },
245
287
  onReasoningChunk: () => { },
246
288
  onToolStart: () => { },
247
- onToolEnd: () => { },
289
+ onToolEnd: (name, _summary, success) => {
290
+ if (name === "settle" && success)
291
+ terminalDeliveryKind = "settle";
292
+ },
248
293
  onError: () => { },
294
+ onClearText: () => { pendingResponseText = ""; },
295
+ flushNow: () => deliverPending("speak", { throwOnError: true }),
249
296
  };
250
297
  /* v8 ignore stop */
251
298
  // Run the pipeline
@@ -285,10 +332,11 @@ async function runSenseTurn(options) {
285
332
  /* v8 ignore stop */
286
333
  accumulateFriendTokens: tokens_1.accumulateFriendTokens,
287
334
  });
335
+ await deliverPending(terminalDeliveryKind, { throwOnError: false });
288
336
  const ponderDeferred = false;
289
337
  // Build response
290
338
  let finalResponse;
291
- if (responseText.length === 0) {
339
+ if (committedResponseText.length === 0) {
292
340
  // Agent settled but no text came through callbacks — check session transcript for the settle answer
293
341
  // Await deferred persist so the session file is up-to-date before readback
294
342
  /* v8 ignore next -- persistPromise set inside v8-ignored postTurn callback; tested via pipeline integration @preserve */
@@ -304,7 +352,7 @@ async function runSenseTurn(options) {
304
352
  }
305
353
  }
306
354
  else {
307
- finalResponse = responseText;
355
+ finalResponse = committedResponseText;
308
356
  }
309
357
  // Strip MiniMax-style <think>...</think> blocks from the final response.
310
358
  // When a reasoning-style model emits only a think block and no final answer
@@ -335,5 +383,5 @@ async function runSenseTurn(options) {
335
383
  message: "shared turn runner complete",
336
384
  meta: { agentName, channel, sessionKey, friendId, ponderDeferred, responseLength: finalResponse.length },
337
385
  });
338
- return { response: finalResponse, ponderDeferred };
386
+ return { response: finalResponse, ponderDeferred, deliveries, deliveryFailures };
339
387
  }
@@ -156,7 +156,19 @@ function createElevenLabsTtsClient(options) {
156
156
  try {
157
157
  const parsed = JSON.parse(payloadText(payload));
158
158
  if (typeof parsed.audio === "string" && parsed.audio.length > 0) {
159
- chunks.push(Buffer.from(parsed.audio, "base64"));
159
+ const chunk = Buffer.from(parsed.audio, "base64");
160
+ chunks.push(chunk);
161
+ if (request.onAudioChunk) {
162
+ try {
163
+ const chunkResult = request.onAudioChunk(chunk);
164
+ if (chunkResult && typeof chunkResult.then === "function") {
165
+ void chunkResult.catch(fail);
166
+ }
167
+ }
168
+ catch (error) {
169
+ fail(error);
170
+ }
171
+ }
160
172
  }
161
173
  if (parsed.isFinal === true) {
162
174
  finish();
@@ -4,6 +4,33 @@ exports.runVoiceLoopbackTurn = runVoiceLoopbackTurn;
4
4
  const runtime_1 = require("../../nerves/runtime");
5
5
  const shared_turn_1 = require("../shared-turn");
6
6
  const transcript_1 = require("./transcript");
7
+ function deliveredTts(spoken) {
8
+ return {
9
+ status: "delivered",
10
+ audio: spoken.audio,
11
+ byteLength: spoken.byteLength,
12
+ chunkCount: spoken.chunkCount,
13
+ mimeType: spoken.mimeType,
14
+ modelId: spoken.modelId,
15
+ voiceId: spoken.voiceId,
16
+ };
17
+ }
18
+ function aggregateSegments(segments) {
19
+ const first = segments[0].tts;
20
+ const audio = Buffer.concat(segments.map((segment) => Buffer.from(segment.tts.audio)));
21
+ return {
22
+ status: "delivered",
23
+ audio,
24
+ byteLength: audio.byteLength,
25
+ chunkCount: segments.reduce((sum, segment) => sum + segment.tts.chunkCount, 0),
26
+ mimeType: first.mimeType,
27
+ modelId: first.modelId,
28
+ voiceId: first.voiceId,
29
+ };
30
+ }
31
+ function deliveryErrorMessage(error) {
32
+ return error instanceof Error ? error.message : String(error);
33
+ }
7
34
  async function runVoiceLoopbackTurn(options) {
8
35
  const runSenseTurn = options.runSenseTurn ?? shared_turn_1.runSenseTurn;
9
36
  let userMessage;
@@ -31,30 +58,105 @@ async function runVoiceLoopbackTurn(options) {
31
58
  utteranceId: options.transcript.utteranceId,
32
59
  },
33
60
  });
61
+ const speechSegments = [];
62
+ const speechDeliveryErrors = [];
63
+ let deliveryIndex = 0;
64
+ const synthesizeDelivery = async (delivery) => {
65
+ deliveryIndex += 1;
66
+ const segmentUtteranceId = `${options.transcript.utteranceId}-${deliveryIndex}-${delivery.kind}`;
67
+ try {
68
+ const spoken = await options.tts.synthesize({
69
+ utteranceId: segmentUtteranceId,
70
+ text: delivery.text,
71
+ onAudioChunk: options.onAudioChunk,
72
+ });
73
+ speechSegments.push({
74
+ kind: delivery.kind,
75
+ text: delivery.text,
76
+ utteranceId: segmentUtteranceId,
77
+ tts: deliveredTts(spoken),
78
+ });
79
+ }
80
+ catch (error) {
81
+ const failure = {
82
+ kind: delivery.kind,
83
+ text: delivery.text,
84
+ utteranceId: segmentUtteranceId,
85
+ error: deliveryErrorMessage(error),
86
+ };
87
+ speechDeliveryErrors.push(failure);
88
+ throw error;
89
+ }
90
+ };
34
91
  const turn = await runSenseTurn({
35
92
  agentName: options.agentName,
36
93
  channel: "voice",
37
94
  friendId: options.friendId,
38
95
  sessionKey: options.sessionKey,
39
96
  userMessage,
97
+ deliverySink: { onDelivery: synthesizeDelivery },
40
98
  });
99
+ if (speechSegments.length > 0) {
100
+ const tts = aggregateSegments(speechSegments);
101
+ const result = {
102
+ responseText: turn.response,
103
+ ponderDeferred: turn.ponderDeferred,
104
+ tts,
105
+ speechSegments,
106
+ speechDeliveryErrors,
107
+ };
108
+ (0, runtime_1.emitNervesEvent)({
109
+ component: "senses",
110
+ event: "senses.voice_turn_end",
111
+ message: "voice loopback turn delivered speech",
112
+ meta: {
113
+ utteranceId: options.transcript.utteranceId,
114
+ responseLength: turn.response.length,
115
+ segmentCount: speechSegments.length,
116
+ byteLength: tts.byteLength,
117
+ },
118
+ });
119
+ return result;
120
+ }
121
+ const turnDeliveryFailures = turn.deliveryFailures ?? [];
122
+ if (speechDeliveryErrors.length > 0 || turnDeliveryFailures.length > 0) {
123
+ const firstError = speechDeliveryErrors[0]?.error ?? turnDeliveryFailures[0].error;
124
+ (0, runtime_1.emitNervesEvent)({
125
+ level: "error",
126
+ component: "senses",
127
+ event: "senses.voice_turn_tts_error",
128
+ message: "voice loopback TTS failed after text response",
129
+ meta: { utteranceId: options.transcript.utteranceId, error: firstError, responseLength: turn.response.length },
130
+ });
131
+ return {
132
+ responseText: turn.response,
133
+ ponderDeferred: turn.ponderDeferred,
134
+ tts: {
135
+ status: "failed",
136
+ error: firstError,
137
+ },
138
+ speechSegments,
139
+ speechDeliveryErrors,
140
+ };
141
+ }
41
142
  try {
42
143
  const spoken = await options.tts.synthesize({
43
144
  utteranceId: options.transcript.utteranceId,
44
145
  text: turn.response,
146
+ onAudioChunk: options.onAudioChunk,
45
147
  });
148
+ const tts = deliveredTts(spoken);
46
149
  const result = {
47
150
  responseText: turn.response,
48
151
  ponderDeferred: turn.ponderDeferred,
49
- tts: {
50
- status: "delivered",
51
- audio: spoken.audio,
52
- byteLength: spoken.byteLength,
53
- chunkCount: spoken.chunkCount,
54
- mimeType: spoken.mimeType,
55
- modelId: spoken.modelId,
56
- voiceId: spoken.voiceId,
57
- },
152
+ tts,
153
+ speechSegments: [{
154
+ kind: "text",
155
+ text: turn.response,
156
+ utteranceId: options.transcript.utteranceId,
157
+ tts,
158
+ }],
159
+ speechDeliveryErrors,
58
160
  };
59
161
  (0, runtime_1.emitNervesEvent)({
60
162
  component: "senses",
@@ -80,6 +182,8 @@ async function runVoiceLoopbackTurn(options) {
80
182
  status: "failed",
81
183
  error: message,
82
184
  },
185
+ speechSegments,
186
+ speechDeliveryErrors,
83
187
  };
84
188
  }
85
189
  }
@@ -193,6 +193,8 @@ function resolveTwilioPhoneTransportRuntime(options) {
193
193
  recordMaxLengthSeconds: overrides.recordMaxLengthSeconds
194
194
  ?? configNumber(options.machineConfig, "voice.twilioRecordMaxLengthSeconds")
195
195
  ?? twilio_phone_1.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
196
+ playbackMode: overrides.playbackMode
197
+ ?? (0, twilio_phone_1.normalizeTwilioPhonePlaybackMode)(configString(options.machineConfig, "voice.twilioPlaybackMode") ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE),
196
198
  };
197
199
  return { status: "configured", settings };
198
200
  }
@@ -259,6 +261,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
259
261
  defaultFriendId: settings.defaultFriendId,
260
262
  recordTimeoutSeconds: settings.recordTimeoutSeconds,
261
263
  recordMaxLengthSeconds: settings.recordMaxLengthSeconds,
264
+ playbackMode: settings.playbackMode,
262
265
  });
263
266
  (0, runtime_1.emitNervesEvent)({
264
267
  component: "senses",
@@ -33,9 +33,11 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
36
+ exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
37
37
  exports.normalizeTwilioPhoneBasePath = normalizeTwilioPhoneBasePath;
38
+ exports.normalizeTwilioPhonePlaybackMode = normalizeTwilioPhonePlaybackMode;
38
39
  exports.twilioPhoneWebhookUrl = twilioPhoneWebhookUrl;
40
+ exports.twilioPhoneVoiceSessionKey = twilioPhoneVoiceSessionKey;
39
41
  exports.computeTwilioSignature = computeTwilioSignature;
40
42
  exports.validateTwilioSignature = validateTwilioSignature;
41
43
  exports.twilioRecordingMediaUrl = twilioRecordingMediaUrl;
@@ -54,6 +56,7 @@ exports.DEFAULT_TWILIO_PHONE_PORT = 18910;
54
56
  exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = 2;
55
57
  exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = 30;
56
58
  exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = "/voice/twilio";
59
+ exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE = "stream";
57
60
  function bodyText(body) {
58
61
  if (body === undefined)
59
62
  return "";
@@ -104,6 +107,21 @@ function binaryResponse(body, contentType) {
104
107
  body,
105
108
  };
106
109
  }
110
+ function streamResponse(body, contentType) {
111
+ return {
112
+ statusCode: 200,
113
+ headers: {
114
+ "content-type": contentType,
115
+ "cache-control": "no-store",
116
+ },
117
+ body,
118
+ };
119
+ }
120
+ function isAsyncIterableBody(body) {
121
+ return typeof body === "object"
122
+ && body !== null
123
+ && Symbol.asyncIterator in body;
124
+ }
107
125
  function escapeXml(input) {
108
126
  return input
109
127
  .replace(/&/g, "&amp;")
@@ -127,6 +145,12 @@ function normalizeTwilioPhoneBasePath(value = exports.TWILIO_PHONE_WEBHOOK_BASE_
127
145
  }
128
146
  return withoutTrailingSlash;
129
147
  }
148
+ function normalizeTwilioPhonePlaybackMode(value) {
149
+ const normalized = (value ?? exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE).trim().toLowerCase();
150
+ if (normalized === "stream" || normalized === "buffered")
151
+ return normalized;
152
+ throw new Error(`invalid Twilio phone playback mode: ${value}`);
153
+ }
130
154
  function twilioPhoneWebhookUrl(publicBaseUrl, basePath = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH) {
131
155
  return routeUrl(publicBaseUrl, `${normalizeTwilioPhoneBasePath(basePath)}/incoming`);
132
156
  }
@@ -179,6 +203,25 @@ function friendIdFromCaller(from, callSid) {
179
203
  function voiceFriendId(options, from, callSid) {
180
204
  return options.defaultFriendId?.trim() || friendIdFromCaller(from, callSid);
181
205
  }
206
+ function phoneIdentitySegment(input) {
207
+ const phoneish = input.replace(/[^0-9A-Za-z]+/g, "");
208
+ return phoneish || safeSegment(input);
209
+ }
210
+ function twilioPhoneVoiceSessionKey(options) {
211
+ const friendSegment = options.defaultFriendId?.trim()
212
+ ? safeSegment(options.defaultFriendId)
213
+ : options.from?.trim()
214
+ ? phoneIdentitySegment(options.from)
215
+ : "";
216
+ const lineSegment = options.to?.trim() ? phoneIdentitySegment(options.to) : "";
217
+ if (friendSegment && lineSegment)
218
+ return `twilio-phone-${friendSegment}-via-${lineSegment}`;
219
+ if (friendSegment)
220
+ return `twilio-phone-${friendSegment}`;
221
+ if (lineSegment)
222
+ return `twilio-phone-line-${lineSegment}`;
223
+ return `twilio-phone-${safeSegment(options.callSid ?? "incoming")}`;
224
+ }
182
225
  function callConnectedPrompt(params) {
183
226
  const from = params.From?.trim();
184
227
  const to = params.To?.trim();
@@ -204,6 +247,35 @@ function isNoSpeechTranscript(text) {
204
247
  || normalized === "[NO_SPEECH]"
205
248
  || normalized === "NO_SPEECH";
206
249
  }
250
+ function isNoSpeechTranscriptionError(error) {
251
+ const normalized = errorMessage(error).toLowerCase();
252
+ return normalized.includes("empty whisper.cpp transcript")
253
+ || normalized.includes("voice transcript text is empty");
254
+ }
255
+ function buildNoSpeechTranscript(utteranceId) {
256
+ return (0, transcript_1.buildVoiceTranscript)({
257
+ utteranceId: `${utteranceId}-nospeech`,
258
+ text: noSpeechPrompt(),
259
+ source: "loopback",
260
+ });
261
+ }
262
+ async function transcribeRecordingOrNoSpeech(options) {
263
+ try {
264
+ const transcript = await options.transcriber.transcribe({
265
+ utteranceId: options.utteranceId,
266
+ audioPath: options.inputPath,
267
+ });
268
+ return isNoSpeechTranscript(transcript.text)
269
+ ? buildNoSpeechTranscript(options.utteranceId)
270
+ : transcript;
271
+ }
272
+ catch (error) {
273
+ if (isNoSpeechTranscriptionError(error)) {
274
+ return buildNoSpeechTranscript(options.utteranceId);
275
+ }
276
+ throw error;
277
+ }
278
+ }
207
279
  function parseRecordingParams(params) {
208
280
  const callSid = params.CallSid?.trim();
209
281
  const recordingSid = params.RecordingSid?.trim();
@@ -215,6 +287,7 @@ function parseRecordingParams(params) {
215
287
  recordingSid,
216
288
  recordingUrl,
217
289
  from: params.From?.trim() ?? "",
290
+ to: params.To?.trim() ?? "",
218
291
  };
219
292
  }
220
293
  function recordAgainResponse(options, basePath, message) {
@@ -238,6 +311,180 @@ function nextInputTwiml(options, basePath, mode) {
238
311
  maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
239
312
  });
240
313
  }
314
+ class TwilioAudioStreamJob {
315
+ callSid;
316
+ jobId;
317
+ mimeType;
318
+ chunks = [];
319
+ waiters = new Set();
320
+ status = "pending";
321
+ failure = null;
322
+ byteLength = 0;
323
+ constructor(callSid, jobId, mimeType) {
324
+ this.callSid = callSid;
325
+ this.jobId = jobId;
326
+ this.mimeType = mimeType;
327
+ }
328
+ append(chunk) {
329
+ /* v8 ignore next -- append is only called while pending with non-empty chunks in bridge flow @preserve */
330
+ if (this.status !== "pending" || chunk.byteLength === 0)
331
+ return;
332
+ const buffered = Buffer.from(chunk);
333
+ this.chunks.push(buffered);
334
+ this.byteLength += buffered.byteLength;
335
+ this.notify();
336
+ }
337
+ complete() {
338
+ /* v8 ignore next -- completion is single-shot inside startTwilioPlaybackStreamJob @preserve */
339
+ if (this.status !== "pending")
340
+ return;
341
+ this.status = "completed";
342
+ this.notify();
343
+ }
344
+ fail(error) {
345
+ /* v8 ignore next -- failure is single-shot inside startTwilioPlaybackStreamJob @preserve */
346
+ if (this.status !== "pending")
347
+ return;
348
+ this.status = "failed";
349
+ this.failure = errorMessage(error);
350
+ this.notify();
351
+ }
352
+ async *stream() {
353
+ let index = 0;
354
+ let yielded = false;
355
+ for (;;) {
356
+ while (index < this.chunks.length) {
357
+ yielded = true;
358
+ yield this.chunks[index++];
359
+ }
360
+ if (this.status === "completed")
361
+ return;
362
+ if (this.status === "failed") {
363
+ if (yielded)
364
+ return;
365
+ throw new Error(this.failure);
366
+ }
367
+ await new Promise((resolve) => {
368
+ this.waiters.add(resolve);
369
+ });
370
+ }
371
+ }
372
+ notify() {
373
+ const waiters = [...this.waiters];
374
+ this.waiters.clear();
375
+ for (const waiter of waiters)
376
+ waiter();
377
+ }
378
+ }
379
+ class TwilioAudioStreamJobStore {
380
+ jobs = new Map();
381
+ create(callSid, jobId, mimeType = "audio/mpeg") {
382
+ const key = this.key(callSid, jobId);
383
+ const job = new TwilioAudioStreamJob(callSid, jobId, mimeType);
384
+ this.jobs.set(key, job);
385
+ return job;
386
+ }
387
+ get(callSid, jobId) {
388
+ return this.jobs.get(this.key(callSid, jobId)) ?? null;
389
+ }
390
+ /* v8 ignore start -- stream job cleanup is delayed beyond request-scope tests @preserve */
391
+ delete(callSid, jobId) {
392
+ this.jobs.delete(this.key(callSid, jobId));
393
+ }
394
+ /* v8 ignore stop */
395
+ key(callSid, jobId) {
396
+ return `${callSid}/${jobId}`;
397
+ }
398
+ }
399
+ function deliveredSegments(turn) {
400
+ return turn.speechSegments.map((segment) => segment.tts);
401
+ }
402
+ async function writeVoiceTurnPlaybackArtifacts(options) {
403
+ const urls = [];
404
+ for (const segment of options.turn.speechSegments) {
405
+ const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
406
+ utteranceId: segment.utteranceId,
407
+ delivery: segment.tts,
408
+ outputDir: options.callDir,
409
+ });
410
+ urls.push(routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`));
411
+ }
412
+ return urls;
413
+ }
414
+ function playManyTwiml(urls) {
415
+ return urls.map(playTwiml).join("");
416
+ }
417
+ function streamAudioUrl(options, basePath, safeCallSid, jobId) {
418
+ return routeUrl(options.publicBaseUrl, `${basePath}/audio-stream/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(`${jobId}.mp3`)}`);
419
+ }
420
+ function scheduleJobCleanup(jobs, safeCallSid, jobId) {
421
+ /* v8 ignore start -- stream job cleanup is delayed beyond request-scope tests @preserve */
422
+ const cleanup = setTimeout(() => {
423
+ jobs.delete(safeCallSid, jobId);
424
+ }, 5 * 60_000);
425
+ cleanup.unref?.();
426
+ /* v8 ignore stop */
427
+ }
428
+ function startTwilioPlaybackStreamJob(options) {
429
+ const job = options.jobs.create(options.safeCallSid, options.jobId);
430
+ void (async () => {
431
+ try {
432
+ const turn = await options.runTurn((chunk) => job.append(chunk));
433
+ const deliveries = deliveredSegments(turn);
434
+ if (job.byteLength === 0 && deliveries.length > 0) {
435
+ for (const delivery of deliveries)
436
+ job.append(delivery.audio);
437
+ }
438
+ if (deliveries.length === 0) {
439
+ /* v8 ignore next -- runVoiceLoopbackTurn cannot return delivered TTS with zero speech segments @preserve */
440
+ if (turn.tts.status === "failed")
441
+ throw new Error(turn.tts.error);
442
+ /* v8 ignore next -- runVoiceLoopbackTurn emits a speech segment whenever TTS is delivered @preserve */
443
+ throw new Error("voice turn produced no audio");
444
+ }
445
+ try {
446
+ await writeVoiceTurnPlaybackArtifacts({
447
+ bridgeOptions: options.bridgeOptions,
448
+ basePath: options.basePath,
449
+ callDir: options.callDir,
450
+ safeCallSid: options.safeCallSid,
451
+ baseUtteranceId: options.baseUtteranceId,
452
+ turn,
453
+ });
454
+ }
455
+ catch (artifactError) {
456
+ (0, runtime_1.emitNervesEvent)({
457
+ level: "warn",
458
+ component: "senses",
459
+ event: "senses.voice_twilio_stream_artifact_error",
460
+ message: "Twilio stream audio was delivered but artifact persistence failed",
461
+ meta: { ...options.meta, error: errorMessage(artifactError) },
462
+ });
463
+ }
464
+ job.complete();
465
+ (0, runtime_1.emitNervesEvent)({
466
+ component: "senses",
467
+ event: "senses.voice_twilio_stream_end",
468
+ message: "finished Twilio streaming voice playback job",
469
+ meta: { ...options.meta, byteLength: String(job.byteLength), segmentCount: String(deliveries.length) },
470
+ });
471
+ }
472
+ catch (error) {
473
+ job.fail(error);
474
+ (0, runtime_1.emitNervesEvent)({
475
+ level: "error",
476
+ component: "senses",
477
+ event: "senses.voice_twilio_stream_error",
478
+ message: "Twilio streaming voice playback job failed",
479
+ meta: { ...options.meta, error: errorMessage(error) },
480
+ });
481
+ }
482
+ finally {
483
+ scheduleJobCleanup(options.jobs, options.safeCallSid, options.jobId);
484
+ }
485
+ })();
486
+ return job;
487
+ }
241
488
  async function runPhonePromptTurn(options) {
242
489
  const transcript = (0, transcript_1.buildVoiceTranscript)({
243
490
  utteranceId: options.utteranceId,
@@ -256,13 +503,15 @@ async function runPhonePromptTurn(options) {
256
503
  if (turn.tts.status !== "delivered") {
257
504
  return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${after}`);
258
505
  }
259
- const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
260
- utteranceId: options.utteranceId,
261
- delivery: turn.tts,
262
- outputDir: options.callDir,
506
+ const audioUrls = await writeVoiceTurnPlaybackArtifacts({
507
+ bridgeOptions: options.bridgeOptions,
508
+ basePath: options.basePath,
509
+ callDir: options.callDir,
510
+ safeCallSid: options.safeCallSid,
511
+ baseUtteranceId: options.utteranceId,
512
+ turn,
263
513
  });
264
- const audioUrl = routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
265
- return xmlResponse(`${playTwiml(audioUrl)}${after}`);
514
+ return xmlResponse(`${playManyTwiml(audioUrls)}${after}`);
266
515
  }
267
516
  function computeTwilioSignature(input) {
268
517
  const payload = input.url + Object.keys(input.params)
@@ -309,27 +558,62 @@ function verifyRequest(options, request, params) {
309
558
  signature: headerValue(request.headers, "x-twilio-signature"),
310
559
  });
311
560
  }
312
- async function handleIncoming(options, basePath, params) {
561
+ async function handleIncoming(options, basePath, params, jobs) {
313
562
  const callSid = params.CallSid?.trim() || "incoming";
314
563
  const safeCallSid = safeSegment(callSid);
315
564
  const callDir = path.join(options.outputDir, safeCallSid);
316
565
  const utteranceId = `twilio-${safeCallSid}-connected`;
566
+ const friendId = voiceFriendId(options, params.From?.trim() ?? "", callSid);
567
+ const sessionKey = twilioPhoneVoiceSessionKey({
568
+ defaultFriendId: options.defaultFriendId,
569
+ from: params.From?.trim() ?? "",
570
+ to: params.To?.trim() ?? "",
571
+ callSid,
572
+ });
317
573
  (0, runtime_1.emitNervesEvent)({
318
574
  component: "senses",
319
575
  event: "senses.voice_twilio_incoming",
320
576
  message: "Twilio voice call connected",
321
- meta: { agentName: options.agentName, callSid: safeCallSid },
577
+ meta: { agentName: options.agentName, callSid: safeCallSid, sessionKey },
322
578
  });
323
579
  try {
324
580
  await fs.mkdir(callDir, { recursive: true });
581
+ if (normalizeTwilioPhonePlaybackMode(options.playbackMode) === "stream") {
582
+ const transcript = (0, transcript_1.buildVoiceTranscript)({
583
+ utteranceId,
584
+ text: callConnectedPrompt(params),
585
+ source: "loopback",
586
+ });
587
+ const jobId = safeSegment(utteranceId);
588
+ startTwilioPlaybackStreamJob({
589
+ jobs,
590
+ bridgeOptions: options,
591
+ basePath,
592
+ callDir,
593
+ safeCallSid,
594
+ jobId,
595
+ baseUtteranceId: utteranceId,
596
+ runTurn: (onAudioChunk) => (0, turn_1.runVoiceLoopbackTurn)({
597
+ agentName: options.agentName,
598
+ friendId,
599
+ sessionKey,
600
+ transcript,
601
+ tts: options.tts,
602
+ runSenseTurn: options.runSenseTurn,
603
+ onAudioChunk,
604
+ }),
605
+ meta: { agentName: options.agentName, callSid: safeCallSid, utteranceId },
606
+ });
607
+ return xmlResponse(`${playTwiml(streamAudioUrl(options, basePath, safeCallSid, jobId))}${nextInputTwiml(options, basePath, "record")}`);
608
+ }
325
609
  return await runPhonePromptTurn({
326
610
  bridgeOptions: options,
327
611
  basePath,
328
612
  callDir,
329
613
  safeCallSid,
330
614
  utteranceId,
331
- friendId: voiceFriendId(options, params.From?.trim() ?? "", callSid),
332
- sessionKey: `twilio-${safeCallSid}`,
615
+ friendId,
616
+ sessionKey,
333
617
  promptText: callConnectedPrompt(params),
334
618
  afterPlayback: "record",
335
619
  });
@@ -358,7 +642,7 @@ async function handleListen(options, basePath) {
358
642
  maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
359
643
  }));
360
644
  }
361
- async function handleRecording(options, basePath, params) {
645
+ async function handleRecording(options, basePath, params, jobs) {
362
646
  const recording = parseRecordingParams(params);
363
647
  if (!recording) {
364
648
  (0, runtime_1.emitNervesEvent)({
@@ -376,13 +660,58 @@ async function handleRecording(options, basePath, params) {
376
660
  const inputPath = path.join(callDir, `${safeRecordingSid}.wav`);
377
661
  const utteranceId = `twilio-${safeCallSid}-${safeRecordingSid}`;
378
662
  const downloadRecording = options.downloadRecording ?? defaultTwilioRecordingDownloader;
663
+ const friendId = voiceFriendId(options, recording.from, recording.callSid);
664
+ const sessionKey = twilioPhoneVoiceSessionKey({
665
+ defaultFriendId: options.defaultFriendId,
666
+ from: recording.from,
667
+ to: recording.to,
668
+ callSid: recording.callSid,
669
+ });
379
670
  (0, runtime_1.emitNervesEvent)({
380
671
  component: "senses",
381
672
  event: "senses.voice_twilio_turn_start",
382
673
  message: "starting Twilio voice turn",
383
- meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid },
674
+ meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, sessionKey },
384
675
  });
385
676
  try {
677
+ if (normalizeTwilioPhonePlaybackMode(options.playbackMode) === "stream") {
678
+ const jobId = safeSegment(utteranceId);
679
+ startTwilioPlaybackStreamJob({
680
+ jobs,
681
+ bridgeOptions: options,
682
+ basePath,
683
+ callDir,
684
+ safeCallSid,
685
+ jobId,
686
+ baseUtteranceId: utteranceId,
687
+ runTurn: async (onAudioChunk) => {
688
+ await fs.mkdir(callDir, { recursive: true });
689
+ const mediaUrl = twilioRecordingMediaUrl(recording.recordingUrl);
690
+ const audio = await downloadRecording({
691
+ recordingUrl: mediaUrl,
692
+ accountSid: options.twilioAccountSid?.trim() || undefined,
693
+ authToken: options.twilioAuthToken?.trim() || undefined,
694
+ });
695
+ await fs.writeFile(inputPath, audio);
696
+ const turnTranscript = await transcribeRecordingOrNoSpeech({
697
+ transcriber: options.transcriber,
698
+ utteranceId,
699
+ inputPath,
700
+ });
701
+ return (0, turn_1.runVoiceLoopbackTurn)({
702
+ agentName: options.agentName,
703
+ friendId,
704
+ sessionKey,
705
+ transcript: turnTranscript,
706
+ tts: options.tts,
707
+ runSenseTurn: options.runSenseTurn,
708
+ onAudioChunk,
709
+ });
710
+ },
711
+ meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, utteranceId },
712
+ });
713
+ return xmlResponse(`${playTwiml(streamAudioUrl(options, basePath, safeCallSid, jobId))}${redirectTwiml(options.publicBaseUrl, basePath)}`);
714
+ }
386
715
  await fs.mkdir(callDir, { recursive: true });
387
716
  const mediaUrl = twilioRecordingMediaUrl(recording.recordingUrl);
388
717
  const audio = await downloadRecording({
@@ -391,27 +720,28 @@ async function handleRecording(options, basePath, params) {
391
720
  authToken: options.twilioAuthToken?.trim() || undefined,
392
721
  });
393
722
  await fs.writeFile(inputPath, audio);
394
- const transcript = await options.transcriber.transcribe({
723
+ const transcript = await transcribeRecordingOrNoSpeech({
724
+ transcriber: options.transcriber,
395
725
  utteranceId,
396
- audioPath: inputPath,
726
+ inputPath,
397
727
  });
398
- if (isNoSpeechTranscript(transcript.text)) {
728
+ if (transcript.utteranceId === `${utteranceId}-nospeech`) {
399
729
  return await runPhonePromptTurn({
400
730
  bridgeOptions: options,
401
731
  basePath,
402
732
  callDir,
403
733
  safeCallSid,
404
734
  utteranceId: `${utteranceId}-nospeech`,
405
- friendId: voiceFriendId(options, recording.from, recording.callSid),
406
- sessionKey: `twilio-${safeCallSid}`,
735
+ friendId,
736
+ sessionKey,
407
737
  promptText: noSpeechPrompt(),
408
738
  afterPlayback: "redirect",
409
739
  });
410
740
  }
411
741
  const turn = await (0, turn_1.runVoiceLoopbackTurn)({
412
742
  agentName: options.agentName,
413
- friendId: voiceFriendId(options, recording.from, recording.callSid),
414
- sessionKey: `twilio-${safeCallSid}`,
743
+ friendId,
744
+ sessionKey,
415
745
  transcript,
416
746
  tts: options.tts,
417
747
  runSenseTurn: options.runSenseTurn,
@@ -419,19 +749,21 @@ async function handleRecording(options, basePath, params) {
419
749
  if (turn.tts.status !== "delivered") {
420
750
  return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${redirectTwiml(options.publicBaseUrl, basePath)}`);
421
751
  }
422
- const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
423
- utteranceId,
424
- delivery: turn.tts,
425
- outputDir: callDir,
752
+ const audioUrls = await writeVoiceTurnPlaybackArtifacts({
753
+ bridgeOptions: options,
754
+ basePath,
755
+ callDir,
756
+ safeCallSid,
757
+ baseUtteranceId: utteranceId,
758
+ turn,
426
759
  });
427
- const audioUrl = routeUrl(options.publicBaseUrl, `${basePath}/audio/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
428
760
  (0, runtime_1.emitNervesEvent)({
429
761
  component: "senses",
430
762
  event: "senses.voice_twilio_turn_end",
431
763
  message: "finished Twilio voice turn",
432
- meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, audioPath: playback.audioPath },
764
+ meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, playbackCount: audioUrls.length },
433
765
  });
434
- return xmlResponse(`${playTwiml(audioUrl)}${redirectTwiml(options.publicBaseUrl, basePath)}`);
766
+ return xmlResponse(`${playManyTwiml(audioUrls)}${redirectTwiml(options.publicBaseUrl, basePath)}`);
435
767
  }
436
768
  catch (error) {
437
769
  (0, runtime_1.emitNervesEvent)({
@@ -477,9 +809,34 @@ async function handleAudio(options, basePath, requestPath) {
477
809
  return textResponse(404, "not found");
478
810
  }
479
811
  }
812
+ async function handleAudioStream(options, basePath, requestPath, jobs) {
813
+ const prefix = `${basePath}/audio-stream/`;
814
+ const pathOnly = requestPath.split("?")[0];
815
+ const rest = pathOnly.slice(prefix.length);
816
+ const parts = rest.split("/");
817
+ if (parts.length !== 2)
818
+ return textResponse(404, "not found");
819
+ const [callSidPart, fileNamePart] = parts;
820
+ const callSid = decodeSafeSegment(callSidPart);
821
+ const fileName = decodeSafeSegment(fileNamePart);
822
+ if (!callSid || !fileName)
823
+ return textResponse(404, "not found");
824
+ const jobId = fileName.replace(/\.[A-Za-z0-9]+$/, "");
825
+ const job = jobs.get(callSid, jobId);
826
+ if (!job)
827
+ return textResponse(404, "not found");
828
+ (0, runtime_1.emitNervesEvent)({
829
+ component: "senses",
830
+ event: "senses.voice_twilio_stream_served",
831
+ message: "served Twilio voice streaming audio job",
832
+ meta: { agentName: options.agentName, callSid, jobId },
833
+ });
834
+ return streamResponse(job.stream(), job.mimeType);
835
+ }
480
836
  function createTwilioPhoneBridge(options) {
481
837
  new URL(options.publicBaseUrl);
482
838
  const basePath = normalizeTwilioPhoneBasePath(options.basePath);
839
+ const jobs = new TwilioAudioStreamJobStore();
483
840
  return {
484
841
  async handle(request) {
485
842
  const method = request.method.toUpperCase();
@@ -488,6 +845,9 @@ function createTwilioPhoneBridge(options) {
488
845
  if (method === "GET" && requestPath.startsWith(`${basePath}/audio/`)) {
489
846
  return handleAudio(options, basePath, requestPath);
490
847
  }
848
+ if (method === "GET" && requestPath.startsWith(`${basePath}/audio-stream/`)) {
849
+ return handleAudioStream(options, basePath, requestPath, jobs);
850
+ }
491
851
  if (method === "GET" && routePath === `${basePath}/health`) {
492
852
  return textResponse(200, "ok");
493
853
  }
@@ -505,11 +865,11 @@ function createTwilioPhoneBridge(options) {
505
865
  return textResponse(403, "invalid Twilio signature");
506
866
  }
507
867
  if (routePath === `${basePath}/incoming`)
508
- return handleIncoming(options, basePath, params);
868
+ return handleIncoming(options, basePath, params, jobs);
509
869
  if (routePath === `${basePath}/listen`)
510
870
  return handleListen(options, basePath);
511
871
  if (routePath === `${basePath}/recording`)
512
- return handleRecording(options, basePath, params);
872
+ return handleRecording(options, basePath, params, jobs);
513
873
  return textResponse(404, "not found");
514
874
  },
515
875
  };
@@ -531,6 +891,35 @@ function readRequestBody(req, limitBytes = 1_000_000) {
531
891
  req.on("error", reject);
532
892
  });
533
893
  }
894
+ /* v8 ignore start -- HTTP backpressure is platform-dependent in unit tests @preserve */
895
+ function waitForDrain(res) {
896
+ return new Promise((resolve, reject) => {
897
+ const onDrain = () => {
898
+ res.off("error", onError);
899
+ resolve();
900
+ };
901
+ const onError = (error) => {
902
+ res.off("drain", onDrain);
903
+ reject(error);
904
+ };
905
+ res.once("drain", onDrain);
906
+ res.once("error", onError);
907
+ });
908
+ }
909
+ /* v8 ignore stop */
910
+ async function writeResponseBody(res, body) {
911
+ if (!isAsyncIterableBody(body)) {
912
+ res.end(body);
913
+ return;
914
+ }
915
+ for await (const chunk of body) {
916
+ /* v8 ignore next -- exercised only when Node reports socket backpressure @preserve */
917
+ if (!res.write(chunk)) {
918
+ await waitForDrain(res);
919
+ }
920
+ }
921
+ res.end();
922
+ }
534
923
  async function startTwilioPhoneBridgeServer(options) {
535
924
  const port = options.port ?? exports.DEFAULT_TWILIO_PHONE_PORT;
536
925
  const host = options.host ?? "127.0.0.1";
@@ -545,7 +934,7 @@ async function startTwilioPhoneBridgeServer(options) {
545
934
  body,
546
935
  });
547
936
  res.writeHead(response.statusCode, response.headers);
548
- res.end(response.body);
937
+ await writeResponseBody(res, response.body);
549
938
  }
550
939
  catch (error) {
551
940
  (0, runtime_1.emitNervesEvent)({
@@ -555,8 +944,14 @@ async function startTwilioPhoneBridgeServer(options) {
555
944
  message: "Twilio voice bridge server failed a request",
556
945
  meta: { agentName: options.agentName, error: errorMessage(error) },
557
946
  });
558
- res.writeHead(500, { "content-type": "text/plain; charset=utf-8" });
559
- res.end("internal server error");
947
+ /* v8 ignore next -- defensive path for async stream failures after headers @preserve */
948
+ if (res.headersSent) {
949
+ res.destroy(error instanceof Error ? error : new Error(String(error)));
950
+ }
951
+ else {
952
+ res.writeHead(500, { "content-type": "text/plain; charset=utf-8" });
953
+ res.end("internal server error");
954
+ }
560
955
  }
561
956
  });
562
957
  await new Promise((resolve, reject) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ouro.bot/cli",
3
- "version": "0.1.0-alpha.566",
3
+ "version": "0.1.0-alpha.568",
4
4
  "main": "dist/heart/daemon/ouro-entry.js",
5
5
  "bin": {
6
6
  "cli": "dist/heart/daemon/ouro-bot-entry.js",