@ouro.bot/cli 0.1.0-alpha.565 → 0.1.0-alpha.567

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -105,7 +105,7 @@ Task docs do not live in this repo anymore. Planning and doing docs live in the
105
105
  - Human TTY commands share one CLI surface family: bare `ouro` opens the home deck, `ouro up` uses the boot checklist, `ouro connect`/`ouro auth verify`/`ouro repair` agree on provider and vault truth, and `ouro help`/`ouro whoami`/`ouro versions`/`ouro hatch` render through the same Ouro-branded wizard/guide language instead of raw transcript walls. Orientation commands such as root `ouro connect` may use shorter live probes, while startup and verification commands own durable readiness updates.
106
106
  - Human-facing CLI commands that can wait on browser auth, vault IO, daemon startup, daemon restart, provider checks, or connector setup use a shared progress checklist. If a cursor may blink for more than a few seconds, the command should print or animate the current step instead of going quiet.
107
107
  - CLI commands that mutate bundle config, such as vault setup or `ouro connect bluebubbles`, run bundle sync after the change when `sync.enabled` is true and report a compact `bundle sync:` line.
108
- - Voice is transcript-first: voice sessions use the ordinary `state/sessions/<friend>/voice/<key>.json` session path and appear in Ouro Mailbox as text transcripts. ElevenLabs API credentials live in portable `runtime/config` at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths live in the machine runtime item at `voice.whisperCliPath` and `voice.whisperModelPath`. Phone calls, browser meetings, and local microphone capture are transports under the single `voice` sense, not separate senses; the Twilio phone transport uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.
108
+ - Voice is transcript-first: voice sessions use the ordinary `state/sessions/<friend>/voice/<key>.json` session path and appear in Ouro Mailbox as text transcripts. ElevenLabs API credentials live in portable `runtime/config` at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths live in the machine runtime item at `voice.whisperCliPath` and `voice.whisperModelPath`. Phone calls, browser meetings, and local microphone capture are transports under the single `voice` sense, not separate senses; the Twilio phone transport uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered `speak`/`settle` text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks to Twilio by default.
109
109
  - The daemon discovers bundles dynamically from `~/AgentBundles`.
110
110
  - `ouro status` reports version, last-updated time, discovered agents, senses, and workers.
111
111
  - `bundle-meta.json` tracks the runtime version that last touched a bundle.
package/changelog.json CHANGED
@@ -1,6 +1,21 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.567",
6
+ "changes": [
7
+ "Voice transports now receive outward `speak` and `settle` text through the shared sense delivery callback path, so voice audio is driven by the same tool-required delivery semantics as chat channels.",
8
+ "Twilio phone sessions are now keyed to the stable phone voice channel instead of CallSid, while CallSid remains the per-call artifact directory.",
9
+ "Managed Twilio playback now supports streaming Play URLs backed by ElevenLabs audio chunks, with buffered playback still available for compatibility testing."
10
+ ]
11
+ },
12
+ {
13
+ "version": "0.1.0-alpha.566",
14
+ "changes": [
15
+ "Shared sense delivery recovery is now an exported contract so replaying transports such as Voice/Twilio recover tool-required `settle`/`speak` output only after outward delivery acknowledgements.",
16
+ "Sense development docs now spell out the sense/transport boundary and the tool-required delivery rules for future Voice, meeting, Twilio, and other adapter work."
17
+ ]
18
+ },
4
19
  {
5
20
  "version": "0.1.0-alpha.565",
6
21
  "changes": [
@@ -4289,11 +4289,12 @@ async function executeConnectVoice(agent, deps) {
4289
4289
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPublicUrl`,
4290
4290
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioBasePath --value /voice/agents/${agentPathSegment}/twilio`,
4291
4291
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPort --value 18910`,
4292
+ ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPlaybackMode --value stream`,
4292
4293
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioDefaultFriendId --value ari`,
4293
4294
  "Then enable agent.json: senses.voice.enabled = true and restart with `ouro up`.",
4294
4295
  `The managed Voice entrypoint will listen at POST <public-url>/voice/agents/${agentPathSegment}/twilio/incoming.`,
4295
4296
  `Standalone local smoke remains available with: node dist/senses/voice-twilio-entry.js --agent ${agent} --port 18910 --public-url https://<cloudflare-tunnel>.`,
4296
- "Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.",
4297
+ "Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered speak/settle text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks by default.",
4297
4298
  ].join("\n");
4298
4299
  deps.writeStdout(message);
4299
4300
  return message;
@@ -506,7 +506,7 @@ function senseRuntimeGuidance(channel, preReadStatusLines) {
506
506
  lines.push("mail validation diagnostics: health checks, bounded mail tools, access logs, and UI inspection can support validation, but they are evidence inside those paths, not additional paths. If asked to name golden paths, do not include diagnostic commands, tool names, or status checks in the answer.");
507
507
  lines.push("mail diagnostic naming: `ouro doctor` is installation-wide; do not invent `ouro doctor --agent <agent>`.");
508
508
  lines.push("mail setup boundaries: do not invent `ouro auth verify --provider mail`, HEY OAuth, HEY IMAP, `ouro mcp call mail ...`, policy flags, autonomous sending, destructive mail actions, or production MX/DNS/forwarding changes. HEY export, HEY forwarding, DNS, MX cutover, sending, and destructive actions require explicit human confirmation.");
509
- lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting links have URL intake and local BlackHole/Multi-Output readiness checks; phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play. Live browser join/injection remains an explicit handoff edge until provider automation lands.");
509
+ lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting links have URL intake and local BlackHole/Multi-Output readiness checks; phone testing uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered speak/settle text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks by default. Live browser join/injection remains an explicit handoff edge until provider automation lands.");
510
510
  if (channel === "cli") {
511
511
  lines.push("cli is interactive: it is available when the user opens it, not something `ouro up` daemonizes.");
512
512
  }
@@ -40,6 +40,7 @@ var __importStar = (this && this.__importStar) || (function () {
40
40
  })();
41
41
  Object.defineProperty(exports, "__esModule", { value: true });
42
42
  exports.stripThinkBlocks = stripThinkBlocks;
43
+ exports.extractOutwardSenseDeliveryText = extractOutwardSenseDeliveryText;
43
44
  exports.runSenseTurn = runSenseTurn;
44
45
  const os = __importStar(require("os"));
45
46
  const path = __importStar(require("path"));
@@ -61,7 +62,7 @@ const pipeline_1 = require("./pipeline");
61
62
  const mcp_manager_1 = require("../repertoire/mcp-manager");
62
63
  const runtime_1 = require("../nerves/runtime");
63
64
  const RESPONSE_CAP = 50_000;
64
- const DELIVERY_TOOL_ACKS = new Map([
65
+ const OUTWARD_DELIVERY_TOOL_ACKS = new Map([
65
66
  ["settle", "(delivered)"],
66
67
  ["speak", "(spoken)"],
67
68
  ]);
@@ -117,7 +118,7 @@ function parseToolStringArg(toolCall, toolName, argName) {
117
118
  function hasDeliveredToolResult(messages, assistantIndex, toolCallId, toolName) {
118
119
  if (typeof toolCallId !== "string" || !toolCallId.trim())
119
120
  return false;
120
- const expectedAck = DELIVERY_TOOL_ACKS.get(toolName);
121
+ const expectedAck = OUTWARD_DELIVERY_TOOL_ACKS.get(toolName);
121
122
  for (let index = assistantIndex + 1; index < messages.length; index++) {
122
123
  const message = messages[index];
123
124
  if (message.role !== "tool")
@@ -130,7 +131,7 @@ function hasDeliveredToolResult(messages, assistantIndex, toolCallId, toolName)
130
131
  }
131
132
  return false;
132
133
  }
133
- function deliveredTextFromAssistantTools(messages, assistantIndex) {
134
+ function outwardDeliveryTextFromAssistantTools(messages, assistantIndex) {
134
135
  const assistant = messages[assistantIndex];
135
136
  if (!Array.isArray(assistant.tool_calls))
136
137
  return null;
@@ -152,13 +153,29 @@ function deliveredTextFromAssistantTools(messages, assistantIndex) {
152
153
  }
153
154
  return delivered.length > 0 ? delivered.join("\n") : null;
154
155
  }
155
- function responseFromSessionMessages(messages) {
156
+ /**
157
+ * Recover the text that actually reached a friend in an outward sense turn.
158
+ *
159
+ * Ouro runs outward channels in tool-required mode. That means the visible
160
+ * response may be a `settle({ answer })` or `speak({ message })` tool call
161
+ * whose assistant message has `content: null`. The authoritative delivery
162
+ * signal is the following tool ack:
163
+ *
164
+ * - `(delivered)` for `settle.answer`
165
+ * - `(spoken)` for `speak.message`
166
+ *
167
+ * Inner-dialog `(settled)`, malformed tool arguments, rejected tools, and
168
+ * interrupted tool-call sequences are not outward speech. Sense transports
169
+ * that need to replay the turn later (Voice/Twilio TTS, future meeting audio)
170
+ * should use this helper instead of reading `assistant.content` directly.
171
+ */
172
+ function extractOutwardSenseDeliveryText(messages) {
156
173
  const assistantIndex = messages.findLastIndex((message) => message.role === "assistant");
157
174
  if (assistantIndex < 0)
158
175
  return null;
159
176
  const assistant = messages[assistantIndex];
160
177
  return assistantContentText(assistant.content)
161
- ?? deliveredTextFromAssistantTools(messages, assistantIndex);
178
+ ?? outwardDeliveryTextFromAssistantTools(messages, assistantIndex);
162
179
  }
163
180
  /**
164
181
  * Run a single agent turn through the inbound pipeline.
@@ -218,17 +235,64 @@ async function runSenseTurn(options) {
218
235
  : [{ role: "system", content: (0, prompt_1.flattenSystemPrompt)(await (0, prompt_1.buildSystem)(channel, {}, undefined)) }];
219
236
  // Pending dir
220
237
  const pendingDir = (0, pending_1.getPendingDir)(agentName, friendId, channel, sessionKey);
221
- // Accumulate response text via callbacks
222
- let responseText = "";
238
+ // Accumulate outward text through the same callback boundary used by chat
239
+ // channels. `speak` flushes pending text immediately; `settle` is delivered
240
+ // once the turn completes.
241
+ let committedResponseText = "";
242
+ let pendingResponseText = "";
243
+ let terminalDeliveryKind = "text";
244
+ const deliveries = [];
245
+ const deliveryFailures = [];
246
+ const commitResponseText = (text) => {
247
+ const cleaned = stripThinkBlocks(text);
248
+ /* v8 ignore next -- deliverPending strips first; this is a defensive direct-call guard @preserve */
249
+ if (!cleaned)
250
+ return;
251
+ committedResponseText = committedResponseText
252
+ ? `${committedResponseText}\n${cleaned}`
253
+ : cleaned;
254
+ };
255
+ const deliveryErrorMessage = (error) => error instanceof Error ? error.message : String(error);
256
+ const deliverPending = async (kind, optionsForDelivery) => {
257
+ const text = stripThinkBlocks(pendingResponseText);
258
+ pendingResponseText = "";
259
+ if (!text)
260
+ return;
261
+ const delivery = { kind, text };
262
+ try {
263
+ await options.deliverySink?.onDelivery(delivery);
264
+ deliveries.push(delivery);
265
+ commitResponseText(text);
266
+ }
267
+ catch (error) {
268
+ const failure = { ...delivery, error: deliveryErrorMessage(error) };
269
+ deliveryFailures.push(failure);
270
+ (0, runtime_1.emitNervesEvent)({
271
+ level: "error",
272
+ component: "senses",
273
+ event: "senses.shared_turn_delivery_error",
274
+ message: "shared turn outward delivery failed",
275
+ meta: { agentName, channel, sessionKey, friendId, kind, error: failure.error, textLength: text.length },
276
+ });
277
+ if (optionsForDelivery.throwOnError)
278
+ throw error;
279
+ commitResponseText(text);
280
+ }
281
+ };
223
282
  /* v8 ignore start — no-op callback stubs; only onTextChunk does real work (covered via mock) */
224
283
  const callbacks = {
225
284
  onModelStart: () => { },
226
285
  onModelStreamStart: () => { },
227
- onTextChunk: (chunk) => { responseText += chunk; },
286
+ onTextChunk: (chunk) => { pendingResponseText += chunk; },
228
287
  onReasoningChunk: () => { },
229
288
  onToolStart: () => { },
230
- onToolEnd: () => { },
289
+ onToolEnd: (name, _summary, success) => {
290
+ if (name === "settle" && success)
291
+ terminalDeliveryKind = "settle";
292
+ },
231
293
  onError: () => { },
294
+ onClearText: () => { pendingResponseText = ""; },
295
+ flushNow: () => deliverPending("speak", { throwOnError: true }),
232
296
  };
233
297
  /* v8 ignore stop */
234
298
  // Run the pipeline
@@ -268,10 +332,11 @@ async function runSenseTurn(options) {
268
332
  /* v8 ignore stop */
269
333
  accumulateFriendTokens: tokens_1.accumulateFriendTokens,
270
334
  });
335
+ await deliverPending(terminalDeliveryKind, { throwOnError: false });
271
336
  const ponderDeferred = false;
272
337
  // Build response
273
338
  let finalResponse;
274
- if (responseText.length === 0) {
339
+ if (committedResponseText.length === 0) {
275
340
  // Agent settled but no text came through callbacks — check session transcript for the settle answer
276
341
  // Await deferred persist so the session file is up-to-date before readback
277
342
  /* v8 ignore next -- persistPromise set inside v8-ignored postTurn callback; tested via pipeline integration @preserve */
@@ -279,7 +344,7 @@ async function runSenseTurn(options) {
279
344
  await persistPromise;
280
345
  const postTurnSession = (0, context_1.loadSession)(sessPath);
281
346
  if (postTurnSession?.messages) {
282
- finalResponse = responseFromSessionMessages(postTurnSession.messages)
347
+ finalResponse = extractOutwardSenseDeliveryText(postTurnSession.messages)
283
348
  ?? "(agent responded but response was empty)";
284
349
  }
285
350
  else {
@@ -287,7 +352,7 @@ async function runSenseTurn(options) {
287
352
  }
288
353
  }
289
354
  else {
290
- finalResponse = responseText;
355
+ finalResponse = committedResponseText;
291
356
  }
292
357
  // Strip MiniMax-style <think>...</think> blocks from the final response.
293
358
  // When a reasoning-style model emits only a think block and no final answer
@@ -318,5 +383,5 @@ async function runSenseTurn(options) {
318
383
  message: "shared turn runner complete",
319
384
  meta: { agentName, channel, sessionKey, friendId, ponderDeferred, responseLength: finalResponse.length },
320
385
  });
321
- return { response: finalResponse, ponderDeferred };
386
+ return { response: finalResponse, ponderDeferred, deliveries, deliveryFailures };
322
387
  }
@@ -156,7 +156,19 @@ function createElevenLabsTtsClient(options) {
156
156
  try {
157
157
  const parsed = JSON.parse(payloadText(payload));
158
158
  if (typeof parsed.audio === "string" && parsed.audio.length > 0) {
159
- chunks.push(Buffer.from(parsed.audio, "base64"));
159
+ const chunk = Buffer.from(parsed.audio, "base64");
160
+ chunks.push(chunk);
161
+ if (request.onAudioChunk) {
162
+ try {
163
+ const chunkResult = request.onAudioChunk(chunk);
164
+ if (chunkResult && typeof chunkResult.then === "function") {
165
+ void chunkResult.catch(fail);
166
+ }
167
+ }
168
+ catch (error) {
169
+ fail(error);
170
+ }
171
+ }
160
172
  }
161
173
  if (parsed.isFinal === true) {
162
174
  finish();
@@ -4,6 +4,33 @@ exports.runVoiceLoopbackTurn = runVoiceLoopbackTurn;
4
4
  const runtime_1 = require("../../nerves/runtime");
5
5
  const shared_turn_1 = require("../shared-turn");
6
6
  const transcript_1 = require("./transcript");
7
+ function deliveredTts(spoken) {
8
+ return {
9
+ status: "delivered",
10
+ audio: spoken.audio,
11
+ byteLength: spoken.byteLength,
12
+ chunkCount: spoken.chunkCount,
13
+ mimeType: spoken.mimeType,
14
+ modelId: spoken.modelId,
15
+ voiceId: spoken.voiceId,
16
+ };
17
+ }
18
+ function aggregateSegments(segments) {
19
+ const first = segments[0].tts;
20
+ const audio = Buffer.concat(segments.map((segment) => Buffer.from(segment.tts.audio)));
21
+ return {
22
+ status: "delivered",
23
+ audio,
24
+ byteLength: audio.byteLength,
25
+ chunkCount: segments.reduce((sum, segment) => sum + segment.tts.chunkCount, 0),
26
+ mimeType: first.mimeType,
27
+ modelId: first.modelId,
28
+ voiceId: first.voiceId,
29
+ };
30
+ }
31
+ function deliveryErrorMessage(error) {
32
+ return error instanceof Error ? error.message : String(error);
33
+ }
7
34
  async function runVoiceLoopbackTurn(options) {
8
35
  const runSenseTurn = options.runSenseTurn ?? shared_turn_1.runSenseTurn;
9
36
  let userMessage;
@@ -31,30 +58,105 @@ async function runVoiceLoopbackTurn(options) {
31
58
  utteranceId: options.transcript.utteranceId,
32
59
  },
33
60
  });
61
+ const speechSegments = [];
62
+ const speechDeliveryErrors = [];
63
+ let deliveryIndex = 0;
64
+ const synthesizeDelivery = async (delivery) => {
65
+ deliveryIndex += 1;
66
+ const segmentUtteranceId = `${options.transcript.utteranceId}-${deliveryIndex}-${delivery.kind}`;
67
+ try {
68
+ const spoken = await options.tts.synthesize({
69
+ utteranceId: segmentUtteranceId,
70
+ text: delivery.text,
71
+ onAudioChunk: options.onAudioChunk,
72
+ });
73
+ speechSegments.push({
74
+ kind: delivery.kind,
75
+ text: delivery.text,
76
+ utteranceId: segmentUtteranceId,
77
+ tts: deliveredTts(spoken),
78
+ });
79
+ }
80
+ catch (error) {
81
+ const failure = {
82
+ kind: delivery.kind,
83
+ text: delivery.text,
84
+ utteranceId: segmentUtteranceId,
85
+ error: deliveryErrorMessage(error),
86
+ };
87
+ speechDeliveryErrors.push(failure);
88
+ throw error;
89
+ }
90
+ };
34
91
  const turn = await runSenseTurn({
35
92
  agentName: options.agentName,
36
93
  channel: "voice",
37
94
  friendId: options.friendId,
38
95
  sessionKey: options.sessionKey,
39
96
  userMessage,
97
+ deliverySink: { onDelivery: synthesizeDelivery },
40
98
  });
99
+ if (speechSegments.length > 0) {
100
+ const tts = aggregateSegments(speechSegments);
101
+ const result = {
102
+ responseText: turn.response,
103
+ ponderDeferred: turn.ponderDeferred,
104
+ tts,
105
+ speechSegments,
106
+ speechDeliveryErrors,
107
+ };
108
+ (0, runtime_1.emitNervesEvent)({
109
+ component: "senses",
110
+ event: "senses.voice_turn_end",
111
+ message: "voice loopback turn delivered speech",
112
+ meta: {
113
+ utteranceId: options.transcript.utteranceId,
114
+ responseLength: turn.response.length,
115
+ segmentCount: speechSegments.length,
116
+ byteLength: tts.byteLength,
117
+ },
118
+ });
119
+ return result;
120
+ }
121
+ const turnDeliveryFailures = turn.deliveryFailures ?? [];
122
+ if (speechDeliveryErrors.length > 0 || turnDeliveryFailures.length > 0) {
123
+ const firstError = speechDeliveryErrors[0]?.error ?? turnDeliveryFailures[0].error;
124
+ (0, runtime_1.emitNervesEvent)({
125
+ level: "error",
126
+ component: "senses",
127
+ event: "senses.voice_turn_tts_error",
128
+ message: "voice loopback TTS failed after text response",
129
+ meta: { utteranceId: options.transcript.utteranceId, error: firstError, responseLength: turn.response.length },
130
+ });
131
+ return {
132
+ responseText: turn.response,
133
+ ponderDeferred: turn.ponderDeferred,
134
+ tts: {
135
+ status: "failed",
136
+ error: firstError,
137
+ },
138
+ speechSegments,
139
+ speechDeliveryErrors,
140
+ };
141
+ }
41
142
  try {
42
143
  const spoken = await options.tts.synthesize({
43
144
  utteranceId: options.transcript.utteranceId,
44
145
  text: turn.response,
146
+ onAudioChunk: options.onAudioChunk,
45
147
  });
148
+ const tts = deliveredTts(spoken);
46
149
  const result = {
47
150
  responseText: turn.response,
48
151
  ponderDeferred: turn.ponderDeferred,
49
- tts: {
50
- status: "delivered",
51
- audio: spoken.audio,
52
- byteLength: spoken.byteLength,
53
- chunkCount: spoken.chunkCount,
54
- mimeType: spoken.mimeType,
55
- modelId: spoken.modelId,
56
- voiceId: spoken.voiceId,
57
- },
152
+ tts,
153
+ speechSegments: [{
154
+ kind: "text",
155
+ text: turn.response,
156
+ utteranceId: options.transcript.utteranceId,
157
+ tts,
158
+ }],
159
+ speechDeliveryErrors,
58
160
  };
59
161
  (0, runtime_1.emitNervesEvent)({
60
162
  component: "senses",
@@ -80,6 +182,8 @@ async function runVoiceLoopbackTurn(options) {
80
182
  status: "failed",
81
183
  error: message,
82
184
  },
185
+ speechSegments,
186
+ speechDeliveryErrors,
83
187
  };
84
188
  }
85
189
  }
@@ -193,6 +193,8 @@ function resolveTwilioPhoneTransportRuntime(options) {
193
193
  recordMaxLengthSeconds: overrides.recordMaxLengthSeconds
194
194
  ?? configNumber(options.machineConfig, "voice.twilioRecordMaxLengthSeconds")
195
195
  ?? twilio_phone_1.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
196
+ playbackMode: overrides.playbackMode
197
+ ?? (0, twilio_phone_1.normalizeTwilioPhonePlaybackMode)(configString(options.machineConfig, "voice.twilioPlaybackMode") ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE),
196
198
  };
197
199
  return { status: "configured", settings };
198
200
  }
@@ -259,6 +261,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
259
261
  defaultFriendId: settings.defaultFriendId,
260
262
  recordTimeoutSeconds: settings.recordTimeoutSeconds,
261
263
  recordMaxLengthSeconds: settings.recordMaxLengthSeconds,
264
+ playbackMode: settings.playbackMode,
262
265
  });
263
266
  (0, runtime_1.emitNervesEvent)({
264
267
  component: "senses",
@@ -33,9 +33,11 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
36
+ exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
37
37
  exports.normalizeTwilioPhoneBasePath = normalizeTwilioPhoneBasePath;
38
+ exports.normalizeTwilioPhonePlaybackMode = normalizeTwilioPhonePlaybackMode;
38
39
  exports.twilioPhoneWebhookUrl = twilioPhoneWebhookUrl;
40
+ exports.twilioPhoneVoiceSessionKey = twilioPhoneVoiceSessionKey;
39
41
  exports.computeTwilioSignature = computeTwilioSignature;
40
42
  exports.validateTwilioSignature = validateTwilioSignature;
41
43
  exports.twilioRecordingMediaUrl = twilioRecordingMediaUrl;
@@ -54,6 +56,7 @@ exports.DEFAULT_TWILIO_PHONE_PORT = 18910;
54
56
  exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = 2;
55
57
  exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = 30;
56
58
  exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = "/voice/twilio";
59
+ exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE = "stream";
57
60
  function bodyText(body) {
58
61
  if (body === undefined)
59
62
  return "";
@@ -104,6 +107,21 @@ function binaryResponse(body, contentType) {
104
107
  body,
105
108
  };
106
109
  }
110
+ function streamResponse(body, contentType) {
111
+ return {
112
+ statusCode: 200,
113
+ headers: {
114
+ "content-type": contentType,
115
+ "cache-control": "no-store",
116
+ },
117
+ body,
118
+ };
119
+ }
120
+ function isAsyncIterableBody(body) {
121
+ return typeof body === "object"
122
+ && body !== null
123
+ && Symbol.asyncIterator in body;
124
+ }
107
125
  function escapeXml(input) {
108
126
  return input
109
127
  .replace(/&/g, "&amp;")
@@ -127,6 +145,12 @@ function normalizeTwilioPhoneBasePath(value = exports.TWILIO_PHONE_WEBHOOK_BASE_
127
145
  }
128
146
  return withoutTrailingSlash;
129
147
  }
148
+ function normalizeTwilioPhonePlaybackMode(value) {
149
+ const normalized = (value ?? exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE).trim().toLowerCase();
150
+ if (normalized === "stream" || normalized === "buffered")
151
+ return normalized;
152
+ throw new Error(`invalid Twilio phone playback mode: ${value}`);
153
+ }
130
154
  function twilioPhoneWebhookUrl(publicBaseUrl, basePath = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH) {
131
155
  return routeUrl(publicBaseUrl, `${normalizeTwilioPhoneBasePath(basePath)}/incoming`);
132
156
  }
@@ -179,6 +203,25 @@ function friendIdFromCaller(from, callSid) {
179
203
  function voiceFriendId(options, from, callSid) {
180
204
  return options.defaultFriendId?.trim() || friendIdFromCaller(from, callSid);
181
205
  }
206
+ function phoneIdentitySegment(input) {
207
+ const phoneish = input.replace(/[^0-9A-Za-z]+/g, "");
208
+ return phoneish || safeSegment(input);
209
+ }
210
+ function twilioPhoneVoiceSessionKey(options) {
211
+ const friendSegment = options.defaultFriendId?.trim()
212
+ ? safeSegment(options.defaultFriendId)
213
+ : options.from?.trim()
214
+ ? phoneIdentitySegment(options.from)
215
+ : "";
216
+ const lineSegment = options.to?.trim() ? phoneIdentitySegment(options.to) : "";
217
+ if (friendSegment && lineSegment)
218
+ return `twilio-phone-${friendSegment}-via-${lineSegment}`;
219
+ if (friendSegment)
220
+ return `twilio-phone-${friendSegment}`;
221
+ if (lineSegment)
222
+ return `twilio-phone-line-${lineSegment}`;
223
+ return `twilio-phone-${safeSegment(options.callSid ?? "incoming")}`;
224
+ }
182
225
  function callConnectedPrompt(params) {
183
226
  const from = params.From?.trim();
184
227
  const to = params.To?.trim();
@@ -215,6 +258,7 @@ function parseRecordingParams(params) {
215
258
  recordingSid,
216
259
  recordingUrl,
217
260
  from: params.From?.trim() ?? "",
261
+ to: params.To?.trim() ?? "",
218
262
  };
219
263
  }
220
264
  function recordAgainResponse(options, basePath, message) {
@@ -238,6 +282,180 @@ function nextInputTwiml(options, basePath, mode) {
238
282
  maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
239
283
  });
240
284
  }
285
+ class TwilioAudioStreamJob {
286
+ callSid;
287
+ jobId;
288
+ mimeType;
289
+ chunks = [];
290
+ waiters = new Set();
291
+ status = "pending";
292
+ failure = null;
293
+ byteLength = 0;
294
+ constructor(callSid, jobId, mimeType) {
295
+ this.callSid = callSid;
296
+ this.jobId = jobId;
297
+ this.mimeType = mimeType;
298
+ }
299
+ append(chunk) {
300
+ /* v8 ignore next -- append is only called while pending with non-empty chunks in bridge flow @preserve */
301
+ if (this.status !== "pending" || chunk.byteLength === 0)
302
+ return;
303
+ const buffered = Buffer.from(chunk);
304
+ this.chunks.push(buffered);
305
+ this.byteLength += buffered.byteLength;
306
+ this.notify();
307
+ }
308
+ complete() {
309
+ /* v8 ignore next -- completion is single-shot inside startTwilioPlaybackStreamJob @preserve */
310
+ if (this.status !== "pending")
311
+ return;
312
+ this.status = "completed";
313
+ this.notify();
314
+ }
315
+ fail(error) {
316
+ /* v8 ignore next -- failure is single-shot inside startTwilioPlaybackStreamJob @preserve */
317
+ if (this.status !== "pending")
318
+ return;
319
+ this.status = "failed";
320
+ this.failure = errorMessage(error);
321
+ this.notify();
322
+ }
323
+ async *stream() {
324
+ let index = 0;
325
+ let yielded = false;
326
+ for (;;) {
327
+ while (index < this.chunks.length) {
328
+ yielded = true;
329
+ yield this.chunks[index++];
330
+ }
331
+ if (this.status === "completed")
332
+ return;
333
+ if (this.status === "failed") {
334
+ if (yielded)
335
+ return;
336
+ throw new Error(this.failure);
337
+ }
338
+ await new Promise((resolve) => {
339
+ this.waiters.add(resolve);
340
+ });
341
+ }
342
+ }
343
+ notify() {
344
+ const waiters = [...this.waiters];
345
+ this.waiters.clear();
346
+ for (const waiter of waiters)
347
+ waiter();
348
+ }
349
+ }
350
+ class TwilioAudioStreamJobStore {
351
+ jobs = new Map();
352
+ create(callSid, jobId, mimeType = "audio/mpeg") {
353
+ const key = this.key(callSid, jobId);
354
+ const job = new TwilioAudioStreamJob(callSid, jobId, mimeType);
355
+ this.jobs.set(key, job);
356
+ return job;
357
+ }
358
+ get(callSid, jobId) {
359
+ return this.jobs.get(this.key(callSid, jobId)) ?? null;
360
+ }
361
+ /* v8 ignore start -- stream job cleanup is delayed beyond request-scope tests @preserve */
362
+ delete(callSid, jobId) {
363
+ this.jobs.delete(this.key(callSid, jobId));
364
+ }
365
+ /* v8 ignore stop */
366
+ key(callSid, jobId) {
367
+ return `${callSid}/${jobId}`;
368
+ }
369
+ }
370
+ function deliveredSegments(turn) {
371
+ return turn.speechSegments.map((segment) => segment.tts);
372
+ }
373
+ async function writeVoiceTurnPlaybackArtifacts(options) {
374
+ const urls = [];
375
+ for (const segment of options.turn.speechSegments) {
376
+ const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
377
+ utteranceId: segment.utteranceId,
378
+ delivery: segment.tts,
379
+ outputDir: options.callDir,
380
+ });
381
+ urls.push(routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`));
382
+ }
383
+ return urls;
384
+ }
385
+ function playManyTwiml(urls) {
386
+ return urls.map(playTwiml).join("");
387
+ }
388
+ function streamAudioUrl(options, basePath, safeCallSid, jobId) {
389
+ return routeUrl(options.publicBaseUrl, `${basePath}/audio-stream/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(`${jobId}.mp3`)}`);
390
+ }
391
+ function scheduleJobCleanup(jobs, safeCallSid, jobId) {
392
+ /* v8 ignore start -- stream job cleanup is delayed beyond request-scope tests @preserve */
393
+ const cleanup = setTimeout(() => {
394
+ jobs.delete(safeCallSid, jobId);
395
+ }, 5 * 60_000);
396
+ cleanup.unref?.();
397
+ /* v8 ignore stop */
398
+ }
399
+ function startTwilioPlaybackStreamJob(options) {
400
+ const job = options.jobs.create(options.safeCallSid, options.jobId);
401
+ void (async () => {
402
+ try {
403
+ const turn = await options.runTurn((chunk) => job.append(chunk));
404
+ const deliveries = deliveredSegments(turn);
405
+ if (job.byteLength === 0 && deliveries.length > 0) {
406
+ for (const delivery of deliveries)
407
+ job.append(delivery.audio);
408
+ }
409
+ if (deliveries.length === 0) {
410
+ /* v8 ignore next -- runVoiceLoopbackTurn cannot return delivered TTS with zero speech segments @preserve */
411
+ if (turn.tts.status === "failed")
412
+ throw new Error(turn.tts.error);
413
+ /* v8 ignore next -- runVoiceLoopbackTurn emits a speech segment whenever TTS is delivered @preserve */
414
+ throw new Error("voice turn produced no audio");
415
+ }
416
+ try {
417
+ await writeVoiceTurnPlaybackArtifacts({
418
+ bridgeOptions: options.bridgeOptions,
419
+ basePath: options.basePath,
420
+ callDir: options.callDir,
421
+ safeCallSid: options.safeCallSid,
422
+ baseUtteranceId: options.baseUtteranceId,
423
+ turn,
424
+ });
425
+ }
426
+ catch (artifactError) {
427
+ (0, runtime_1.emitNervesEvent)({
428
+ level: "warn",
429
+ component: "senses",
430
+ event: "senses.voice_twilio_stream_artifact_error",
431
+ message: "Twilio stream audio was delivered but artifact persistence failed",
432
+ meta: { ...options.meta, error: errorMessage(artifactError) },
433
+ });
434
+ }
435
+ job.complete();
436
+ (0, runtime_1.emitNervesEvent)({
437
+ component: "senses",
438
+ event: "senses.voice_twilio_stream_end",
439
+ message: "finished Twilio streaming voice playback job",
440
+ meta: { ...options.meta, byteLength: String(job.byteLength), segmentCount: String(deliveries.length) },
441
+ });
442
+ }
443
+ catch (error) {
444
+ job.fail(error);
445
+ (0, runtime_1.emitNervesEvent)({
446
+ level: "error",
447
+ component: "senses",
448
+ event: "senses.voice_twilio_stream_error",
449
+ message: "Twilio streaming voice playback job failed",
450
+ meta: { ...options.meta, error: errorMessage(error) },
451
+ });
452
+ }
453
+ finally {
454
+ scheduleJobCleanup(options.jobs, options.safeCallSid, options.jobId);
455
+ }
456
+ })();
457
+ return job;
458
+ }
241
459
  async function runPhonePromptTurn(options) {
242
460
  const transcript = (0, transcript_1.buildVoiceTranscript)({
243
461
  utteranceId: options.utteranceId,
@@ -256,13 +474,15 @@ async function runPhonePromptTurn(options) {
256
474
  if (turn.tts.status !== "delivered") {
257
475
  return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${after}`);
258
476
  }
259
- const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
260
- utteranceId: options.utteranceId,
261
- delivery: turn.tts,
262
- outputDir: options.callDir,
477
+ const audioUrls = await writeVoiceTurnPlaybackArtifacts({
478
+ bridgeOptions: options.bridgeOptions,
479
+ basePath: options.basePath,
480
+ callDir: options.callDir,
481
+ safeCallSid: options.safeCallSid,
482
+ baseUtteranceId: options.utteranceId,
483
+ turn,
263
484
  });
264
- const audioUrl = routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
265
- return xmlResponse(`${playTwiml(audioUrl)}${after}`);
485
+ return xmlResponse(`${playManyTwiml(audioUrls)}${after}`);
266
486
  }
267
487
  function computeTwilioSignature(input) {
268
488
  const payload = input.url + Object.keys(input.params)
@@ -309,27 +529,62 @@ function verifyRequest(options, request, params) {
309
529
  signature: headerValue(request.headers, "x-twilio-signature"),
310
530
  });
311
531
  }
312
- async function handleIncoming(options, basePath, params) {
532
+ async function handleIncoming(options, basePath, params, jobs) {
313
533
  const callSid = params.CallSid?.trim() || "incoming";
314
534
  const safeCallSid = safeSegment(callSid);
315
535
  const callDir = path.join(options.outputDir, safeCallSid);
316
536
  const utteranceId = `twilio-${safeCallSid}-connected`;
537
+ const friendId = voiceFriendId(options, params.From?.trim() ?? "", callSid);
538
+ const sessionKey = twilioPhoneVoiceSessionKey({
539
+ defaultFriendId: options.defaultFriendId,
540
+ from: params.From?.trim() ?? "",
541
+ to: params.To?.trim() ?? "",
542
+ callSid,
543
+ });
317
544
  (0, runtime_1.emitNervesEvent)({
318
545
  component: "senses",
319
546
  event: "senses.voice_twilio_incoming",
320
547
  message: "Twilio voice call connected",
321
- meta: { agentName: options.agentName, callSid: safeCallSid },
548
+ meta: { agentName: options.agentName, callSid: safeCallSid, sessionKey },
322
549
  });
323
550
  try {
324
551
  await fs.mkdir(callDir, { recursive: true });
552
+ if (normalizeTwilioPhonePlaybackMode(options.playbackMode) === "stream") {
553
+ const transcript = (0, transcript_1.buildVoiceTranscript)({
554
+ utteranceId,
555
+ text: callConnectedPrompt(params),
556
+ source: "loopback",
557
+ });
558
+ const jobId = safeSegment(utteranceId);
559
+ startTwilioPlaybackStreamJob({
560
+ jobs,
561
+ bridgeOptions: options,
562
+ basePath,
563
+ callDir,
564
+ safeCallSid,
565
+ jobId,
566
+ baseUtteranceId: utteranceId,
567
+ runTurn: (onAudioChunk) => (0, turn_1.runVoiceLoopbackTurn)({
568
+ agentName: options.agentName,
569
+ friendId,
570
+ sessionKey,
571
+ transcript,
572
+ tts: options.tts,
573
+ runSenseTurn: options.runSenseTurn,
574
+ onAudioChunk,
575
+ }),
576
+ meta: { agentName: options.agentName, callSid: safeCallSid, utteranceId },
577
+ });
578
+ return xmlResponse(`${playTwiml(streamAudioUrl(options, basePath, safeCallSid, jobId))}${nextInputTwiml(options, basePath, "record")}`);
579
+ }
325
580
  return await runPhonePromptTurn({
326
581
  bridgeOptions: options,
327
582
  basePath,
328
583
  callDir,
329
584
  safeCallSid,
330
585
  utteranceId,
331
- friendId: voiceFriendId(options, params.From?.trim() ?? "", callSid),
332
- sessionKey: `twilio-${safeCallSid}`,
586
+ friendId,
587
+ sessionKey,
333
588
  promptText: callConnectedPrompt(params),
334
589
  afterPlayback: "record",
335
590
  });
@@ -358,7 +613,7 @@ async function handleListen(options, basePath) {
358
613
  maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
359
614
  }));
360
615
  }
361
- async function handleRecording(options, basePath, params) {
616
+ async function handleRecording(options, basePath, params, jobs) {
362
617
  const recording = parseRecordingParams(params);
363
618
  if (!recording) {
364
619
  (0, runtime_1.emitNervesEvent)({
@@ -376,13 +631,64 @@ async function handleRecording(options, basePath, params) {
376
631
  const inputPath = path.join(callDir, `${safeRecordingSid}.wav`);
377
632
  const utteranceId = `twilio-${safeCallSid}-${safeRecordingSid}`;
378
633
  const downloadRecording = options.downloadRecording ?? defaultTwilioRecordingDownloader;
634
+ const friendId = voiceFriendId(options, recording.from, recording.callSid);
635
+ const sessionKey = twilioPhoneVoiceSessionKey({
636
+ defaultFriendId: options.defaultFriendId,
637
+ from: recording.from,
638
+ to: recording.to,
639
+ callSid: recording.callSid,
640
+ });
379
641
  (0, runtime_1.emitNervesEvent)({
380
642
  component: "senses",
381
643
  event: "senses.voice_twilio_turn_start",
382
644
  message: "starting Twilio voice turn",
383
- meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid },
645
+ meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, sessionKey },
384
646
  });
385
647
  try {
648
+ if (normalizeTwilioPhonePlaybackMode(options.playbackMode) === "stream") {
649
+ const jobId = safeSegment(utteranceId);
650
+ startTwilioPlaybackStreamJob({
651
+ jobs,
652
+ bridgeOptions: options,
653
+ basePath,
654
+ callDir,
655
+ safeCallSid,
656
+ jobId,
657
+ baseUtteranceId: utteranceId,
658
+ runTurn: async (onAudioChunk) => {
659
+ await fs.mkdir(callDir, { recursive: true });
660
+ const mediaUrl = twilioRecordingMediaUrl(recording.recordingUrl);
661
+ const audio = await downloadRecording({
662
+ recordingUrl: mediaUrl,
663
+ accountSid: options.twilioAccountSid?.trim() || undefined,
664
+ authToken: options.twilioAuthToken?.trim() || undefined,
665
+ });
666
+ await fs.writeFile(inputPath, audio);
667
+ const transcript = await options.transcriber.transcribe({
668
+ utteranceId,
669
+ audioPath: inputPath,
670
+ });
671
+ const turnTranscript = isNoSpeechTranscript(transcript.text)
672
+ ? (0, transcript_1.buildVoiceTranscript)({
673
+ utteranceId: `${utteranceId}-nospeech`,
674
+ text: noSpeechPrompt(),
675
+ source: "loopback",
676
+ })
677
+ : transcript;
678
+ return (0, turn_1.runVoiceLoopbackTurn)({
679
+ agentName: options.agentName,
680
+ friendId,
681
+ sessionKey,
682
+ transcript: turnTranscript,
683
+ tts: options.tts,
684
+ runSenseTurn: options.runSenseTurn,
685
+ onAudioChunk,
686
+ });
687
+ },
688
+ meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, utteranceId },
689
+ });
690
+ return xmlResponse(`${playTwiml(streamAudioUrl(options, basePath, safeCallSid, jobId))}${redirectTwiml(options.publicBaseUrl, basePath)}`);
691
+ }
386
692
  await fs.mkdir(callDir, { recursive: true });
387
693
  const mediaUrl = twilioRecordingMediaUrl(recording.recordingUrl);
388
694
  const audio = await downloadRecording({
@@ -402,16 +708,16 @@ async function handleRecording(options, basePath, params) {
402
708
  callDir,
403
709
  safeCallSid,
404
710
  utteranceId: `${utteranceId}-nospeech`,
405
- friendId: voiceFriendId(options, recording.from, recording.callSid),
406
- sessionKey: `twilio-${safeCallSid}`,
711
+ friendId,
712
+ sessionKey,
407
713
  promptText: noSpeechPrompt(),
408
714
  afterPlayback: "redirect",
409
715
  });
410
716
  }
411
717
  const turn = await (0, turn_1.runVoiceLoopbackTurn)({
412
718
  agentName: options.agentName,
413
- friendId: voiceFriendId(options, recording.from, recording.callSid),
414
- sessionKey: `twilio-${safeCallSid}`,
719
+ friendId,
720
+ sessionKey,
415
721
  transcript,
416
722
  tts: options.tts,
417
723
  runSenseTurn: options.runSenseTurn,
@@ -419,19 +725,21 @@ async function handleRecording(options, basePath, params) {
419
725
  if (turn.tts.status !== "delivered") {
420
726
  return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${redirectTwiml(options.publicBaseUrl, basePath)}`);
421
727
  }
422
- const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
423
- utteranceId,
424
- delivery: turn.tts,
425
- outputDir: callDir,
728
+ const audioUrls = await writeVoiceTurnPlaybackArtifacts({
729
+ bridgeOptions: options,
730
+ basePath,
731
+ callDir,
732
+ safeCallSid,
733
+ baseUtteranceId: utteranceId,
734
+ turn,
426
735
  });
427
- const audioUrl = routeUrl(options.publicBaseUrl, `${basePath}/audio/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
428
736
  (0, runtime_1.emitNervesEvent)({
429
737
  component: "senses",
430
738
  event: "senses.voice_twilio_turn_end",
431
739
  message: "finished Twilio voice turn",
432
- meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, audioPath: playback.audioPath },
740
+ meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, playbackCount: audioUrls.length },
433
741
  });
434
- return xmlResponse(`${playTwiml(audioUrl)}${redirectTwiml(options.publicBaseUrl, basePath)}`);
742
+ return xmlResponse(`${playManyTwiml(audioUrls)}${redirectTwiml(options.publicBaseUrl, basePath)}`);
435
743
  }
436
744
  catch (error) {
437
745
  (0, runtime_1.emitNervesEvent)({
@@ -477,9 +785,34 @@ async function handleAudio(options, basePath, requestPath) {
477
785
  return textResponse(404, "not found");
478
786
  }
479
787
  }
788
+ async function handleAudioStream(options, basePath, requestPath, jobs) {
789
+ const prefix = `${basePath}/audio-stream/`;
790
+ const pathOnly = requestPath.split("?")[0];
791
+ const rest = pathOnly.slice(prefix.length);
792
+ const parts = rest.split("/");
793
+ if (parts.length !== 2)
794
+ return textResponse(404, "not found");
795
+ const [callSidPart, fileNamePart] = parts;
796
+ const callSid = decodeSafeSegment(callSidPart);
797
+ const fileName = decodeSafeSegment(fileNamePart);
798
+ if (!callSid || !fileName)
799
+ return textResponse(404, "not found");
800
+ const jobId = fileName.replace(/\.[A-Za-z0-9]+$/, "");
801
+ const job = jobs.get(callSid, jobId);
802
+ if (!job)
803
+ return textResponse(404, "not found");
804
+ (0, runtime_1.emitNervesEvent)({
805
+ component: "senses",
806
+ event: "senses.voice_twilio_stream_served",
807
+ message: "served Twilio voice streaming audio job",
808
+ meta: { agentName: options.agentName, callSid, jobId },
809
+ });
810
+ return streamResponse(job.stream(), job.mimeType);
811
+ }
480
812
  function createTwilioPhoneBridge(options) {
481
813
  new URL(options.publicBaseUrl);
482
814
  const basePath = normalizeTwilioPhoneBasePath(options.basePath);
815
+ const jobs = new TwilioAudioStreamJobStore();
483
816
  return {
484
817
  async handle(request) {
485
818
  const method = request.method.toUpperCase();
@@ -488,6 +821,9 @@ function createTwilioPhoneBridge(options) {
488
821
  if (method === "GET" && requestPath.startsWith(`${basePath}/audio/`)) {
489
822
  return handleAudio(options, basePath, requestPath);
490
823
  }
824
+ if (method === "GET" && requestPath.startsWith(`${basePath}/audio-stream/`)) {
825
+ return handleAudioStream(options, basePath, requestPath, jobs);
826
+ }
491
827
  if (method === "GET" && routePath === `${basePath}/health`) {
492
828
  return textResponse(200, "ok");
493
829
  }
@@ -505,11 +841,11 @@ function createTwilioPhoneBridge(options) {
505
841
  return textResponse(403, "invalid Twilio signature");
506
842
  }
507
843
  if (routePath === `${basePath}/incoming`)
508
- return handleIncoming(options, basePath, params);
844
+ return handleIncoming(options, basePath, params, jobs);
509
845
  if (routePath === `${basePath}/listen`)
510
846
  return handleListen(options, basePath);
511
847
  if (routePath === `${basePath}/recording`)
512
- return handleRecording(options, basePath, params);
848
+ return handleRecording(options, basePath, params, jobs);
513
849
  return textResponse(404, "not found");
514
850
  },
515
851
  };
@@ -531,6 +867,35 @@ function readRequestBody(req, limitBytes = 1_000_000) {
531
867
  req.on("error", reject);
532
868
  });
533
869
  }
870
+ /* v8 ignore start -- HTTP backpressure is platform-dependent in unit tests @preserve */
871
+ function waitForDrain(res) {
872
+ return new Promise((resolve, reject) => {
873
+ const onDrain = () => {
874
+ res.off("error", onError);
875
+ resolve();
876
+ };
877
+ const onError = (error) => {
878
+ res.off("drain", onDrain);
879
+ reject(error);
880
+ };
881
+ res.once("drain", onDrain);
882
+ res.once("error", onError);
883
+ });
884
+ }
885
+ /* v8 ignore stop */
886
+ async function writeResponseBody(res, body) {
887
+ if (!isAsyncIterableBody(body)) {
888
+ res.end(body);
889
+ return;
890
+ }
891
+ for await (const chunk of body) {
892
+ /* v8 ignore next -- exercised only when Node reports socket backpressure @preserve */
893
+ if (!res.write(chunk)) {
894
+ await waitForDrain(res);
895
+ }
896
+ }
897
+ res.end();
898
+ }
534
899
  async function startTwilioPhoneBridgeServer(options) {
535
900
  const port = options.port ?? exports.DEFAULT_TWILIO_PHONE_PORT;
536
901
  const host = options.host ?? "127.0.0.1";
@@ -545,7 +910,7 @@ async function startTwilioPhoneBridgeServer(options) {
545
910
  body,
546
911
  });
547
912
  res.writeHead(response.statusCode, response.headers);
548
- res.end(response.body);
913
+ await writeResponseBody(res, response.body);
549
914
  }
550
915
  catch (error) {
551
916
  (0, runtime_1.emitNervesEvent)({
@@ -555,8 +920,14 @@ async function startTwilioPhoneBridgeServer(options) {
555
920
  message: "Twilio voice bridge server failed a request",
556
921
  meta: { agentName: options.agentName, error: errorMessage(error) },
557
922
  });
558
- res.writeHead(500, { "content-type": "text/plain; charset=utf-8" });
559
- res.end("internal server error");
923
+ /* v8 ignore next -- defensive path for async stream failures after headers @preserve */
924
+ if (res.headersSent) {
925
+ res.destroy(error instanceof Error ? error : new Error(String(error)));
926
+ }
927
+ else {
928
+ res.writeHead(500, { "content-type": "text/plain; charset=utf-8" });
929
+ res.end("internal server error");
930
+ }
560
931
  }
561
932
  });
562
933
  await new Promise((resolve, reject) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ouro.bot/cli",
3
- "version": "0.1.0-alpha.565",
3
+ "version": "0.1.0-alpha.567",
4
4
  "main": "dist/heart/daemon/ouro-entry.js",
5
5
  "bin": {
6
6
  "cli": "dist/heart/daemon/ouro-bot-entry.js",