@ouro.bot/cli 0.1.0-alpha.565 → 0.1.0-alpha.567
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/changelog.json +15 -0
- package/dist/heart/daemon/cli-exec.js +2 -1
- package/dist/mind/prompt.js +1 -1
- package/dist/senses/shared-turn.js +78 -13
- package/dist/senses/voice/elevenlabs.js +13 -1
- package/dist/senses/voice/turn.js +113 -9
- package/dist/senses/voice/twilio-phone-runtime.js +3 -0
- package/dist/senses/voice/twilio-phone.js +400 -29
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -105,7 +105,7 @@ Task docs do not live in this repo anymore. Planning and doing docs live in the
|
|
|
105
105
|
- Human TTY commands share one CLI surface family: bare `ouro` opens the home deck, `ouro up` uses the boot checklist, `ouro connect`/`ouro auth verify`/`ouro repair` agree on provider and vault truth, and `ouro help`/`ouro whoami`/`ouro versions`/`ouro hatch` render through the same Ouro-branded wizard/guide language instead of raw transcript walls. Orientation commands such as root `ouro connect` may use shorter live probes, while startup and verification commands own durable readiness updates.
|
|
106
106
|
- Human-facing CLI commands that can wait on browser auth, vault IO, daemon startup, daemon restart, provider checks, or connector setup use a shared progress checklist. If a cursor may blink for more than a few seconds, the command should print or animate the current step instead of going quiet.
|
|
107
107
|
- CLI commands that mutate bundle config, such as vault setup or `ouro connect bluebubbles`, run bundle sync after the change when `sync.enabled` is true and report a compact `bundle sync:` line.
|
|
108
|
-
- Voice is transcript-first: voice sessions use the ordinary `state/sessions/<friend>/voice/<key>.json` session path and appear in Ouro Mailbox as text transcripts. ElevenLabs API credentials live in portable `runtime/config` at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths live in the machine runtime item at `voice.whisperCliPath` and `voice.whisperModelPath`. Phone calls, browser meetings, and local microphone capture are transports under the single `voice` sense, not separate senses; the Twilio phone transport uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.
|
|
108
|
+
- Voice is transcript-first: voice sessions use the ordinary `state/sessions/<friend>/voice/<key>.json` session path and appear in Ouro Mailbox as text transcripts. ElevenLabs API credentials live in portable `runtime/config` at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths live in the machine runtime item at `voice.whisperCliPath` and `voice.whisperModelPath`. Phone calls, browser meetings, and local microphone capture are transports under the single `voice` sense, not separate senses; the Twilio phone transport uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered `speak`/`settle` text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks to Twilio by default.
|
|
109
109
|
- The daemon discovers bundles dynamically from `~/AgentBundles`.
|
|
110
110
|
- `ouro status` reports version, last-updated time, discovered agents, senses, and workers.
|
|
111
111
|
- `bundle-meta.json` tracks the runtime version that last touched a bundle.
|
package/changelog.json
CHANGED
|
@@ -1,6 +1,21 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.567",
|
|
6
|
+
"changes": [
|
|
7
|
+
"Voice transports now receive outward `speak` and `settle` text through the shared sense delivery callback path, so voice audio is driven by the same tool-required delivery semantics as chat channels.",
|
|
8
|
+
"Twilio phone sessions are now keyed to the stable phone voice channel instead of CallSid, while CallSid remains the per-call artifact directory.",
|
|
9
|
+
"Managed Twilio playback now supports streaming Play URLs backed by ElevenLabs audio chunks, with buffered playback still available for compatibility testing."
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"version": "0.1.0-alpha.566",
|
|
14
|
+
"changes": [
|
|
15
|
+
"Shared sense delivery recovery is now an exported contract so replaying transports such as Voice/Twilio recover tool-required `settle`/`speak` output only after outward delivery acknowledgements.",
|
|
16
|
+
"Sense development docs now spell out the sense/transport boundary and the tool-required delivery rules for future Voice, meeting, Twilio, and other adapter work."
|
|
17
|
+
]
|
|
18
|
+
},
|
|
4
19
|
{
|
|
5
20
|
"version": "0.1.0-alpha.565",
|
|
6
21
|
"changes": [
|
|
@@ -4289,11 +4289,12 @@ async function executeConnectVoice(agent, deps) {
|
|
|
4289
4289
|
` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPublicUrl`,
|
|
4290
4290
|
` ouro vault config set --agent ${agent} --scope machine --key voice.twilioBasePath --value /voice/agents/${agentPathSegment}/twilio`,
|
|
4291
4291
|
` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPort --value 18910`,
|
|
4292
|
+
` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPlaybackMode --value stream`,
|
|
4292
4293
|
` ouro vault config set --agent ${agent} --scope machine --key voice.twilioDefaultFriendId --value ari`,
|
|
4293
4294
|
"Then enable agent.json: senses.voice.enabled = true and restart with `ouro up`.",
|
|
4294
4295
|
`The managed Voice entrypoint will listen at POST <public-url>/voice/agents/${agentPathSegment}/twilio/incoming.`,
|
|
4295
4296
|
`Standalone local smoke remains available with: node dist/senses/voice-twilio-entry.js --agent ${agent} --port 18910 --public-url https://<cloudflare-tunnel>.`,
|
|
4296
|
-
"Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.",
|
|
4297
|
+
"Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered speak/settle text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks by default.",
|
|
4297
4298
|
].join("\n");
|
|
4298
4299
|
deps.writeStdout(message);
|
|
4299
4300
|
return message;
|
package/dist/mind/prompt.js
CHANGED
|
@@ -506,7 +506,7 @@ function senseRuntimeGuidance(channel, preReadStatusLines) {
|
|
|
506
506
|
lines.push("mail validation diagnostics: health checks, bounded mail tools, access logs, and UI inspection can support validation, but they are evidence inside those paths, not additional paths. If asked to name golden paths, do not include diagnostic commands, tool names, or status checks in the answer.");
|
|
507
507
|
lines.push("mail diagnostic naming: `ouro doctor` is installation-wide; do not invent `ouro doctor --agent <agent>`.");
|
|
508
508
|
lines.push("mail setup boundaries: do not invent `ouro auth verify --provider mail`, HEY OAuth, HEY IMAP, `ouro mcp call mail ...`, policy flags, autonomous sending, destructive mail actions, or production MX/DNS/forwarding changes. HEY export, HEY forwarding, DNS, MX cutover, sending, and destructive actions require explicit human confirmation.");
|
|
509
|
-
lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting links have URL intake and local BlackHole/Multi-Output readiness checks; phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play. Live browser join/injection remains an explicit handoff edge until provider automation lands.");
|
|
509
|
+
lines.push("voice setup truth: voice sessions are transcript-first local sessions. ElevenLabs credentials belong in portable runtime/config at `integrations.elevenLabsApiKey` and `integrations.elevenLabsVoiceId`; Whisper.cpp CLI/model paths belong in the machine runtime item under `voice.whisperCliPath` and `voice.whisperModelPath`. Meeting links have URL intake and local BlackHole/Multi-Output readiness checks; phone testing uses Twilio Record -> Whisper.cpp -> stable voice session -> tool-delivered speak/settle text -> ElevenLabs -> Twilio Play, with managed playback streaming ElevenLabs chunks by default. Live browser join/injection remains an explicit handoff edge until provider automation lands.");
|
|
510
510
|
if (channel === "cli") {
|
|
511
511
|
lines.push("cli is interactive: it is available when the user opens it, not something `ouro up` daemonizes.");
|
|
512
512
|
}
|
|
@@ -40,6 +40,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
40
40
|
})();
|
|
41
41
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
42
|
exports.stripThinkBlocks = stripThinkBlocks;
|
|
43
|
+
exports.extractOutwardSenseDeliveryText = extractOutwardSenseDeliveryText;
|
|
43
44
|
exports.runSenseTurn = runSenseTurn;
|
|
44
45
|
const os = __importStar(require("os"));
|
|
45
46
|
const path = __importStar(require("path"));
|
|
@@ -61,7 +62,7 @@ const pipeline_1 = require("./pipeline");
|
|
|
61
62
|
const mcp_manager_1 = require("../repertoire/mcp-manager");
|
|
62
63
|
const runtime_1 = require("../nerves/runtime");
|
|
63
64
|
const RESPONSE_CAP = 50_000;
|
|
64
|
-
const
|
|
65
|
+
const OUTWARD_DELIVERY_TOOL_ACKS = new Map([
|
|
65
66
|
["settle", "(delivered)"],
|
|
66
67
|
["speak", "(spoken)"],
|
|
67
68
|
]);
|
|
@@ -117,7 +118,7 @@ function parseToolStringArg(toolCall, toolName, argName) {
|
|
|
117
118
|
function hasDeliveredToolResult(messages, assistantIndex, toolCallId, toolName) {
|
|
118
119
|
if (typeof toolCallId !== "string" || !toolCallId.trim())
|
|
119
120
|
return false;
|
|
120
|
-
const expectedAck =
|
|
121
|
+
const expectedAck = OUTWARD_DELIVERY_TOOL_ACKS.get(toolName);
|
|
121
122
|
for (let index = assistantIndex + 1; index < messages.length; index++) {
|
|
122
123
|
const message = messages[index];
|
|
123
124
|
if (message.role !== "tool")
|
|
@@ -130,7 +131,7 @@ function hasDeliveredToolResult(messages, assistantIndex, toolCallId, toolName)
|
|
|
130
131
|
}
|
|
131
132
|
return false;
|
|
132
133
|
}
|
|
133
|
-
function
|
|
134
|
+
function outwardDeliveryTextFromAssistantTools(messages, assistantIndex) {
|
|
134
135
|
const assistant = messages[assistantIndex];
|
|
135
136
|
if (!Array.isArray(assistant.tool_calls))
|
|
136
137
|
return null;
|
|
@@ -152,13 +153,29 @@ function deliveredTextFromAssistantTools(messages, assistantIndex) {
|
|
|
152
153
|
}
|
|
153
154
|
return delivered.length > 0 ? delivered.join("\n") : null;
|
|
154
155
|
}
|
|
155
|
-
|
|
156
|
+
/**
|
|
157
|
+
* Recover the text that actually reached a friend in an outward sense turn.
|
|
158
|
+
*
|
|
159
|
+
* Ouro runs outward channels in tool-required mode. That means the visible
|
|
160
|
+
* response may be a `settle({ answer })` or `speak({ message })` tool call
|
|
161
|
+
* whose assistant message has `content: null`. The authoritative delivery
|
|
162
|
+
* signal is the following tool ack:
|
|
163
|
+
*
|
|
164
|
+
* - `(delivered)` for `settle.answer`
|
|
165
|
+
* - `(spoken)` for `speak.message`
|
|
166
|
+
*
|
|
167
|
+
* Inner-dialog `(settled)`, malformed tool arguments, rejected tools, and
|
|
168
|
+
* interrupted tool-call sequences are not outward speech. Sense transports
|
|
169
|
+
* that need to replay the turn later (Voice/Twilio TTS, future meeting audio)
|
|
170
|
+
* should use this helper instead of reading `assistant.content` directly.
|
|
171
|
+
*/
|
|
172
|
+
function extractOutwardSenseDeliveryText(messages) {
|
|
156
173
|
const assistantIndex = messages.findLastIndex((message) => message.role === "assistant");
|
|
157
174
|
if (assistantIndex < 0)
|
|
158
175
|
return null;
|
|
159
176
|
const assistant = messages[assistantIndex];
|
|
160
177
|
return assistantContentText(assistant.content)
|
|
161
|
-
??
|
|
178
|
+
?? outwardDeliveryTextFromAssistantTools(messages, assistantIndex);
|
|
162
179
|
}
|
|
163
180
|
/**
|
|
164
181
|
* Run a single agent turn through the inbound pipeline.
|
|
@@ -218,17 +235,64 @@ async function runSenseTurn(options) {
|
|
|
218
235
|
: [{ role: "system", content: (0, prompt_1.flattenSystemPrompt)(await (0, prompt_1.buildSystem)(channel, {}, undefined)) }];
|
|
219
236
|
// Pending dir
|
|
220
237
|
const pendingDir = (0, pending_1.getPendingDir)(agentName, friendId, channel, sessionKey);
|
|
221
|
-
// Accumulate
|
|
222
|
-
|
|
238
|
+
// Accumulate outward text through the same callback boundary used by chat
|
|
239
|
+
// channels. `speak` flushes pending text immediately; `settle` is delivered
|
|
240
|
+
// once the turn completes.
|
|
241
|
+
let committedResponseText = "";
|
|
242
|
+
let pendingResponseText = "";
|
|
243
|
+
let terminalDeliveryKind = "text";
|
|
244
|
+
const deliveries = [];
|
|
245
|
+
const deliveryFailures = [];
|
|
246
|
+
const commitResponseText = (text) => {
|
|
247
|
+
const cleaned = stripThinkBlocks(text);
|
|
248
|
+
/* v8 ignore next -- deliverPending strips first; this is a defensive direct-call guard @preserve */
|
|
249
|
+
if (!cleaned)
|
|
250
|
+
return;
|
|
251
|
+
committedResponseText = committedResponseText
|
|
252
|
+
? `${committedResponseText}\n${cleaned}`
|
|
253
|
+
: cleaned;
|
|
254
|
+
};
|
|
255
|
+
const deliveryErrorMessage = (error) => error instanceof Error ? error.message : String(error);
|
|
256
|
+
const deliverPending = async (kind, optionsForDelivery) => {
|
|
257
|
+
const text = stripThinkBlocks(pendingResponseText);
|
|
258
|
+
pendingResponseText = "";
|
|
259
|
+
if (!text)
|
|
260
|
+
return;
|
|
261
|
+
const delivery = { kind, text };
|
|
262
|
+
try {
|
|
263
|
+
await options.deliverySink?.onDelivery(delivery);
|
|
264
|
+
deliveries.push(delivery);
|
|
265
|
+
commitResponseText(text);
|
|
266
|
+
}
|
|
267
|
+
catch (error) {
|
|
268
|
+
const failure = { ...delivery, error: deliveryErrorMessage(error) };
|
|
269
|
+
deliveryFailures.push(failure);
|
|
270
|
+
(0, runtime_1.emitNervesEvent)({
|
|
271
|
+
level: "error",
|
|
272
|
+
component: "senses",
|
|
273
|
+
event: "senses.shared_turn_delivery_error",
|
|
274
|
+
message: "shared turn outward delivery failed",
|
|
275
|
+
meta: { agentName, channel, sessionKey, friendId, kind, error: failure.error, textLength: text.length },
|
|
276
|
+
});
|
|
277
|
+
if (optionsForDelivery.throwOnError)
|
|
278
|
+
throw error;
|
|
279
|
+
commitResponseText(text);
|
|
280
|
+
}
|
|
281
|
+
};
|
|
223
282
|
/* v8 ignore start — no-op callback stubs; only onTextChunk does real work (covered via mock) */
|
|
224
283
|
const callbacks = {
|
|
225
284
|
onModelStart: () => { },
|
|
226
285
|
onModelStreamStart: () => { },
|
|
227
|
-
onTextChunk: (chunk) => {
|
|
286
|
+
onTextChunk: (chunk) => { pendingResponseText += chunk; },
|
|
228
287
|
onReasoningChunk: () => { },
|
|
229
288
|
onToolStart: () => { },
|
|
230
|
-
onToolEnd: () => {
|
|
289
|
+
onToolEnd: (name, _summary, success) => {
|
|
290
|
+
if (name === "settle" && success)
|
|
291
|
+
terminalDeliveryKind = "settle";
|
|
292
|
+
},
|
|
231
293
|
onError: () => { },
|
|
294
|
+
onClearText: () => { pendingResponseText = ""; },
|
|
295
|
+
flushNow: () => deliverPending("speak", { throwOnError: true }),
|
|
232
296
|
};
|
|
233
297
|
/* v8 ignore stop */
|
|
234
298
|
// Run the pipeline
|
|
@@ -268,10 +332,11 @@ async function runSenseTurn(options) {
|
|
|
268
332
|
/* v8 ignore stop */
|
|
269
333
|
accumulateFriendTokens: tokens_1.accumulateFriendTokens,
|
|
270
334
|
});
|
|
335
|
+
await deliverPending(terminalDeliveryKind, { throwOnError: false });
|
|
271
336
|
const ponderDeferred = false;
|
|
272
337
|
// Build response
|
|
273
338
|
let finalResponse;
|
|
274
|
-
if (
|
|
339
|
+
if (committedResponseText.length === 0) {
|
|
275
340
|
// Agent settled but no text came through callbacks — check session transcript for the settle answer
|
|
276
341
|
// Await deferred persist so the session file is up-to-date before readback
|
|
277
342
|
/* v8 ignore next -- persistPromise set inside v8-ignored postTurn callback; tested via pipeline integration @preserve */
|
|
@@ -279,7 +344,7 @@ async function runSenseTurn(options) {
|
|
|
279
344
|
await persistPromise;
|
|
280
345
|
const postTurnSession = (0, context_1.loadSession)(sessPath);
|
|
281
346
|
if (postTurnSession?.messages) {
|
|
282
|
-
finalResponse =
|
|
347
|
+
finalResponse = extractOutwardSenseDeliveryText(postTurnSession.messages)
|
|
283
348
|
?? "(agent responded but response was empty)";
|
|
284
349
|
}
|
|
285
350
|
else {
|
|
@@ -287,7 +352,7 @@ async function runSenseTurn(options) {
|
|
|
287
352
|
}
|
|
288
353
|
}
|
|
289
354
|
else {
|
|
290
|
-
finalResponse =
|
|
355
|
+
finalResponse = committedResponseText;
|
|
291
356
|
}
|
|
292
357
|
// Strip MiniMax-style <think>...</think> blocks from the final response.
|
|
293
358
|
// When a reasoning-style model emits only a think block and no final answer
|
|
@@ -318,5 +383,5 @@ async function runSenseTurn(options) {
|
|
|
318
383
|
message: "shared turn runner complete",
|
|
319
384
|
meta: { agentName, channel, sessionKey, friendId, ponderDeferred, responseLength: finalResponse.length },
|
|
320
385
|
});
|
|
321
|
-
return { response: finalResponse, ponderDeferred };
|
|
386
|
+
return { response: finalResponse, ponderDeferred, deliveries, deliveryFailures };
|
|
322
387
|
}
|
|
@@ -156,7 +156,19 @@ function createElevenLabsTtsClient(options) {
|
|
|
156
156
|
try {
|
|
157
157
|
const parsed = JSON.parse(payloadText(payload));
|
|
158
158
|
if (typeof parsed.audio === "string" && parsed.audio.length > 0) {
|
|
159
|
-
|
|
159
|
+
const chunk = Buffer.from(parsed.audio, "base64");
|
|
160
|
+
chunks.push(chunk);
|
|
161
|
+
if (request.onAudioChunk) {
|
|
162
|
+
try {
|
|
163
|
+
const chunkResult = request.onAudioChunk(chunk);
|
|
164
|
+
if (chunkResult && typeof chunkResult.then === "function") {
|
|
165
|
+
void chunkResult.catch(fail);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
catch (error) {
|
|
169
|
+
fail(error);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
160
172
|
}
|
|
161
173
|
if (parsed.isFinal === true) {
|
|
162
174
|
finish();
|
|
@@ -4,6 +4,33 @@ exports.runVoiceLoopbackTurn = runVoiceLoopbackTurn;
|
|
|
4
4
|
const runtime_1 = require("../../nerves/runtime");
|
|
5
5
|
const shared_turn_1 = require("../shared-turn");
|
|
6
6
|
const transcript_1 = require("./transcript");
|
|
7
|
+
function deliveredTts(spoken) {
|
|
8
|
+
return {
|
|
9
|
+
status: "delivered",
|
|
10
|
+
audio: spoken.audio,
|
|
11
|
+
byteLength: spoken.byteLength,
|
|
12
|
+
chunkCount: spoken.chunkCount,
|
|
13
|
+
mimeType: spoken.mimeType,
|
|
14
|
+
modelId: spoken.modelId,
|
|
15
|
+
voiceId: spoken.voiceId,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
function aggregateSegments(segments) {
|
|
19
|
+
const first = segments[0].tts;
|
|
20
|
+
const audio = Buffer.concat(segments.map((segment) => Buffer.from(segment.tts.audio)));
|
|
21
|
+
return {
|
|
22
|
+
status: "delivered",
|
|
23
|
+
audio,
|
|
24
|
+
byteLength: audio.byteLength,
|
|
25
|
+
chunkCount: segments.reduce((sum, segment) => sum + segment.tts.chunkCount, 0),
|
|
26
|
+
mimeType: first.mimeType,
|
|
27
|
+
modelId: first.modelId,
|
|
28
|
+
voiceId: first.voiceId,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
function deliveryErrorMessage(error) {
|
|
32
|
+
return error instanceof Error ? error.message : String(error);
|
|
33
|
+
}
|
|
7
34
|
async function runVoiceLoopbackTurn(options) {
|
|
8
35
|
const runSenseTurn = options.runSenseTurn ?? shared_turn_1.runSenseTurn;
|
|
9
36
|
let userMessage;
|
|
@@ -31,30 +58,105 @@ async function runVoiceLoopbackTurn(options) {
|
|
|
31
58
|
utteranceId: options.transcript.utteranceId,
|
|
32
59
|
},
|
|
33
60
|
});
|
|
61
|
+
const speechSegments = [];
|
|
62
|
+
const speechDeliveryErrors = [];
|
|
63
|
+
let deliveryIndex = 0;
|
|
64
|
+
const synthesizeDelivery = async (delivery) => {
|
|
65
|
+
deliveryIndex += 1;
|
|
66
|
+
const segmentUtteranceId = `${options.transcript.utteranceId}-${deliveryIndex}-${delivery.kind}`;
|
|
67
|
+
try {
|
|
68
|
+
const spoken = await options.tts.synthesize({
|
|
69
|
+
utteranceId: segmentUtteranceId,
|
|
70
|
+
text: delivery.text,
|
|
71
|
+
onAudioChunk: options.onAudioChunk,
|
|
72
|
+
});
|
|
73
|
+
speechSegments.push({
|
|
74
|
+
kind: delivery.kind,
|
|
75
|
+
text: delivery.text,
|
|
76
|
+
utteranceId: segmentUtteranceId,
|
|
77
|
+
tts: deliveredTts(spoken),
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
catch (error) {
|
|
81
|
+
const failure = {
|
|
82
|
+
kind: delivery.kind,
|
|
83
|
+
text: delivery.text,
|
|
84
|
+
utteranceId: segmentUtteranceId,
|
|
85
|
+
error: deliveryErrorMessage(error),
|
|
86
|
+
};
|
|
87
|
+
speechDeliveryErrors.push(failure);
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
};
|
|
34
91
|
const turn = await runSenseTurn({
|
|
35
92
|
agentName: options.agentName,
|
|
36
93
|
channel: "voice",
|
|
37
94
|
friendId: options.friendId,
|
|
38
95
|
sessionKey: options.sessionKey,
|
|
39
96
|
userMessage,
|
|
97
|
+
deliverySink: { onDelivery: synthesizeDelivery },
|
|
40
98
|
});
|
|
99
|
+
if (speechSegments.length > 0) {
|
|
100
|
+
const tts = aggregateSegments(speechSegments);
|
|
101
|
+
const result = {
|
|
102
|
+
responseText: turn.response,
|
|
103
|
+
ponderDeferred: turn.ponderDeferred,
|
|
104
|
+
tts,
|
|
105
|
+
speechSegments,
|
|
106
|
+
speechDeliveryErrors,
|
|
107
|
+
};
|
|
108
|
+
(0, runtime_1.emitNervesEvent)({
|
|
109
|
+
component: "senses",
|
|
110
|
+
event: "senses.voice_turn_end",
|
|
111
|
+
message: "voice loopback turn delivered speech",
|
|
112
|
+
meta: {
|
|
113
|
+
utteranceId: options.transcript.utteranceId,
|
|
114
|
+
responseLength: turn.response.length,
|
|
115
|
+
segmentCount: speechSegments.length,
|
|
116
|
+
byteLength: tts.byteLength,
|
|
117
|
+
},
|
|
118
|
+
});
|
|
119
|
+
return result;
|
|
120
|
+
}
|
|
121
|
+
const turnDeliveryFailures = turn.deliveryFailures ?? [];
|
|
122
|
+
if (speechDeliveryErrors.length > 0 || turnDeliveryFailures.length > 0) {
|
|
123
|
+
const firstError = speechDeliveryErrors[0]?.error ?? turnDeliveryFailures[0].error;
|
|
124
|
+
(0, runtime_1.emitNervesEvent)({
|
|
125
|
+
level: "error",
|
|
126
|
+
component: "senses",
|
|
127
|
+
event: "senses.voice_turn_tts_error",
|
|
128
|
+
message: "voice loopback TTS failed after text response",
|
|
129
|
+
meta: { utteranceId: options.transcript.utteranceId, error: firstError, responseLength: turn.response.length },
|
|
130
|
+
});
|
|
131
|
+
return {
|
|
132
|
+
responseText: turn.response,
|
|
133
|
+
ponderDeferred: turn.ponderDeferred,
|
|
134
|
+
tts: {
|
|
135
|
+
status: "failed",
|
|
136
|
+
error: firstError,
|
|
137
|
+
},
|
|
138
|
+
speechSegments,
|
|
139
|
+
speechDeliveryErrors,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
41
142
|
try {
|
|
42
143
|
const spoken = await options.tts.synthesize({
|
|
43
144
|
utteranceId: options.transcript.utteranceId,
|
|
44
145
|
text: turn.response,
|
|
146
|
+
onAudioChunk: options.onAudioChunk,
|
|
45
147
|
});
|
|
148
|
+
const tts = deliveredTts(spoken);
|
|
46
149
|
const result = {
|
|
47
150
|
responseText: turn.response,
|
|
48
151
|
ponderDeferred: turn.ponderDeferred,
|
|
49
|
-
tts
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
},
|
|
152
|
+
tts,
|
|
153
|
+
speechSegments: [{
|
|
154
|
+
kind: "text",
|
|
155
|
+
text: turn.response,
|
|
156
|
+
utteranceId: options.transcript.utteranceId,
|
|
157
|
+
tts,
|
|
158
|
+
}],
|
|
159
|
+
speechDeliveryErrors,
|
|
58
160
|
};
|
|
59
161
|
(0, runtime_1.emitNervesEvent)({
|
|
60
162
|
component: "senses",
|
|
@@ -80,6 +182,8 @@ async function runVoiceLoopbackTurn(options) {
|
|
|
80
182
|
status: "failed",
|
|
81
183
|
error: message,
|
|
82
184
|
},
|
|
185
|
+
speechSegments,
|
|
186
|
+
speechDeliveryErrors,
|
|
83
187
|
};
|
|
84
188
|
}
|
|
85
189
|
}
|
|
@@ -193,6 +193,8 @@ function resolveTwilioPhoneTransportRuntime(options) {
|
|
|
193
193
|
recordMaxLengthSeconds: overrides.recordMaxLengthSeconds
|
|
194
194
|
?? configNumber(options.machineConfig, "voice.twilioRecordMaxLengthSeconds")
|
|
195
195
|
?? twilio_phone_1.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
196
|
+
playbackMode: overrides.playbackMode
|
|
197
|
+
?? (0, twilio_phone_1.normalizeTwilioPhonePlaybackMode)(configString(options.machineConfig, "voice.twilioPlaybackMode") ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE),
|
|
196
198
|
};
|
|
197
199
|
return { status: "configured", settings };
|
|
198
200
|
}
|
|
@@ -259,6 +261,7 @@ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilio
|
|
|
259
261
|
defaultFriendId: settings.defaultFriendId,
|
|
260
262
|
recordTimeoutSeconds: settings.recordTimeoutSeconds,
|
|
261
263
|
recordMaxLengthSeconds: settings.recordMaxLengthSeconds,
|
|
264
|
+
playbackMode: settings.playbackMode,
|
|
262
265
|
});
|
|
263
266
|
(0, runtime_1.emitNervesEvent)({
|
|
264
267
|
component: "senses",
|
|
@@ -33,9 +33,11 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
|
|
36
|
+
exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
|
|
37
37
|
exports.normalizeTwilioPhoneBasePath = normalizeTwilioPhoneBasePath;
|
|
38
|
+
exports.normalizeTwilioPhonePlaybackMode = normalizeTwilioPhonePlaybackMode;
|
|
38
39
|
exports.twilioPhoneWebhookUrl = twilioPhoneWebhookUrl;
|
|
40
|
+
exports.twilioPhoneVoiceSessionKey = twilioPhoneVoiceSessionKey;
|
|
39
41
|
exports.computeTwilioSignature = computeTwilioSignature;
|
|
40
42
|
exports.validateTwilioSignature = validateTwilioSignature;
|
|
41
43
|
exports.twilioRecordingMediaUrl = twilioRecordingMediaUrl;
|
|
@@ -54,6 +56,7 @@ exports.DEFAULT_TWILIO_PHONE_PORT = 18910;
|
|
|
54
56
|
exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = 2;
|
|
55
57
|
exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = 30;
|
|
56
58
|
exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = "/voice/twilio";
|
|
59
|
+
exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE = "stream";
|
|
57
60
|
function bodyText(body) {
|
|
58
61
|
if (body === undefined)
|
|
59
62
|
return "";
|
|
@@ -104,6 +107,21 @@ function binaryResponse(body, contentType) {
|
|
|
104
107
|
body,
|
|
105
108
|
};
|
|
106
109
|
}
|
|
110
|
+
function streamResponse(body, contentType) {
|
|
111
|
+
return {
|
|
112
|
+
statusCode: 200,
|
|
113
|
+
headers: {
|
|
114
|
+
"content-type": contentType,
|
|
115
|
+
"cache-control": "no-store",
|
|
116
|
+
},
|
|
117
|
+
body,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
function isAsyncIterableBody(body) {
|
|
121
|
+
return typeof body === "object"
|
|
122
|
+
&& body !== null
|
|
123
|
+
&& Symbol.asyncIterator in body;
|
|
124
|
+
}
|
|
107
125
|
function escapeXml(input) {
|
|
108
126
|
return input
|
|
109
127
|
.replace(/&/g, "&")
|
|
@@ -127,6 +145,12 @@ function normalizeTwilioPhoneBasePath(value = exports.TWILIO_PHONE_WEBHOOK_BASE_
|
|
|
127
145
|
}
|
|
128
146
|
return withoutTrailingSlash;
|
|
129
147
|
}
|
|
148
|
+
function normalizeTwilioPhonePlaybackMode(value) {
|
|
149
|
+
const normalized = (value ?? exports.DEFAULT_TWILIO_PHONE_PLAYBACK_MODE).trim().toLowerCase();
|
|
150
|
+
if (normalized === "stream" || normalized === "buffered")
|
|
151
|
+
return normalized;
|
|
152
|
+
throw new Error(`invalid Twilio phone playback mode: ${value}`);
|
|
153
|
+
}
|
|
130
154
|
function twilioPhoneWebhookUrl(publicBaseUrl, basePath = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH) {
|
|
131
155
|
return routeUrl(publicBaseUrl, `${normalizeTwilioPhoneBasePath(basePath)}/incoming`);
|
|
132
156
|
}
|
|
@@ -179,6 +203,25 @@ function friendIdFromCaller(from, callSid) {
|
|
|
179
203
|
function voiceFriendId(options, from, callSid) {
|
|
180
204
|
return options.defaultFriendId?.trim() || friendIdFromCaller(from, callSid);
|
|
181
205
|
}
|
|
206
|
+
function phoneIdentitySegment(input) {
|
|
207
|
+
const phoneish = input.replace(/[^0-9A-Za-z]+/g, "");
|
|
208
|
+
return phoneish || safeSegment(input);
|
|
209
|
+
}
|
|
210
|
+
function twilioPhoneVoiceSessionKey(options) {
|
|
211
|
+
const friendSegment = options.defaultFriendId?.trim()
|
|
212
|
+
? safeSegment(options.defaultFriendId)
|
|
213
|
+
: options.from?.trim()
|
|
214
|
+
? phoneIdentitySegment(options.from)
|
|
215
|
+
: "";
|
|
216
|
+
const lineSegment = options.to?.trim() ? phoneIdentitySegment(options.to) : "";
|
|
217
|
+
if (friendSegment && lineSegment)
|
|
218
|
+
return `twilio-phone-${friendSegment}-via-${lineSegment}`;
|
|
219
|
+
if (friendSegment)
|
|
220
|
+
return `twilio-phone-${friendSegment}`;
|
|
221
|
+
if (lineSegment)
|
|
222
|
+
return `twilio-phone-line-${lineSegment}`;
|
|
223
|
+
return `twilio-phone-${safeSegment(options.callSid ?? "incoming")}`;
|
|
224
|
+
}
|
|
182
225
|
function callConnectedPrompt(params) {
|
|
183
226
|
const from = params.From?.trim();
|
|
184
227
|
const to = params.To?.trim();
|
|
@@ -215,6 +258,7 @@ function parseRecordingParams(params) {
|
|
|
215
258
|
recordingSid,
|
|
216
259
|
recordingUrl,
|
|
217
260
|
from: params.From?.trim() ?? "",
|
|
261
|
+
to: params.To?.trim() ?? "",
|
|
218
262
|
};
|
|
219
263
|
}
|
|
220
264
|
function recordAgainResponse(options, basePath, message) {
|
|
@@ -238,6 +282,180 @@ function nextInputTwiml(options, basePath, mode) {
|
|
|
238
282
|
maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
239
283
|
});
|
|
240
284
|
}
|
|
285
|
+
class TwilioAudioStreamJob {
|
|
286
|
+
callSid;
|
|
287
|
+
jobId;
|
|
288
|
+
mimeType;
|
|
289
|
+
chunks = [];
|
|
290
|
+
waiters = new Set();
|
|
291
|
+
status = "pending";
|
|
292
|
+
failure = null;
|
|
293
|
+
byteLength = 0;
|
|
294
|
+
constructor(callSid, jobId, mimeType) {
|
|
295
|
+
this.callSid = callSid;
|
|
296
|
+
this.jobId = jobId;
|
|
297
|
+
this.mimeType = mimeType;
|
|
298
|
+
}
|
|
299
|
+
append(chunk) {
|
|
300
|
+
/* v8 ignore next -- append is only called while pending with non-empty chunks in bridge flow @preserve */
|
|
301
|
+
if (this.status !== "pending" || chunk.byteLength === 0)
|
|
302
|
+
return;
|
|
303
|
+
const buffered = Buffer.from(chunk);
|
|
304
|
+
this.chunks.push(buffered);
|
|
305
|
+
this.byteLength += buffered.byteLength;
|
|
306
|
+
this.notify();
|
|
307
|
+
}
|
|
308
|
+
complete() {
|
|
309
|
+
/* v8 ignore next -- completion is single-shot inside startTwilioPlaybackStreamJob @preserve */
|
|
310
|
+
if (this.status !== "pending")
|
|
311
|
+
return;
|
|
312
|
+
this.status = "completed";
|
|
313
|
+
this.notify();
|
|
314
|
+
}
|
|
315
|
+
fail(error) {
|
|
316
|
+
/* v8 ignore next -- failure is single-shot inside startTwilioPlaybackStreamJob @preserve */
|
|
317
|
+
if (this.status !== "pending")
|
|
318
|
+
return;
|
|
319
|
+
this.status = "failed";
|
|
320
|
+
this.failure = errorMessage(error);
|
|
321
|
+
this.notify();
|
|
322
|
+
}
|
|
323
|
+
async *stream() {
|
|
324
|
+
let index = 0;
|
|
325
|
+
let yielded = false;
|
|
326
|
+
for (;;) {
|
|
327
|
+
while (index < this.chunks.length) {
|
|
328
|
+
yielded = true;
|
|
329
|
+
yield this.chunks[index++];
|
|
330
|
+
}
|
|
331
|
+
if (this.status === "completed")
|
|
332
|
+
return;
|
|
333
|
+
if (this.status === "failed") {
|
|
334
|
+
if (yielded)
|
|
335
|
+
return;
|
|
336
|
+
throw new Error(this.failure);
|
|
337
|
+
}
|
|
338
|
+
await new Promise((resolve) => {
|
|
339
|
+
this.waiters.add(resolve);
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
notify() {
|
|
344
|
+
const waiters = [...this.waiters];
|
|
345
|
+
this.waiters.clear();
|
|
346
|
+
for (const waiter of waiters)
|
|
347
|
+
waiter();
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
class TwilioAudioStreamJobStore {
|
|
351
|
+
jobs = new Map();
|
|
352
|
+
create(callSid, jobId, mimeType = "audio/mpeg") {
|
|
353
|
+
const key = this.key(callSid, jobId);
|
|
354
|
+
const job = new TwilioAudioStreamJob(callSid, jobId, mimeType);
|
|
355
|
+
this.jobs.set(key, job);
|
|
356
|
+
return job;
|
|
357
|
+
}
|
|
358
|
+
get(callSid, jobId) {
|
|
359
|
+
return this.jobs.get(this.key(callSid, jobId)) ?? null;
|
|
360
|
+
}
|
|
361
|
+
/* v8 ignore start -- stream job cleanup is delayed beyond request-scope tests @preserve */
|
|
362
|
+
delete(callSid, jobId) {
|
|
363
|
+
this.jobs.delete(this.key(callSid, jobId));
|
|
364
|
+
}
|
|
365
|
+
/* v8 ignore stop */
|
|
366
|
+
key(callSid, jobId) {
|
|
367
|
+
return `${callSid}/${jobId}`;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
function deliveredSegments(turn) {
|
|
371
|
+
return turn.speechSegments.map((segment) => segment.tts);
|
|
372
|
+
}
|
|
373
|
+
async function writeVoiceTurnPlaybackArtifacts(options) {
|
|
374
|
+
const urls = [];
|
|
375
|
+
for (const segment of options.turn.speechSegments) {
|
|
376
|
+
const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
|
|
377
|
+
utteranceId: segment.utteranceId,
|
|
378
|
+
delivery: segment.tts,
|
|
379
|
+
outputDir: options.callDir,
|
|
380
|
+
});
|
|
381
|
+
urls.push(routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`));
|
|
382
|
+
}
|
|
383
|
+
return urls;
|
|
384
|
+
}
|
|
385
|
+
function playManyTwiml(urls) {
|
|
386
|
+
return urls.map(playTwiml).join("");
|
|
387
|
+
}
|
|
388
|
+
function streamAudioUrl(options, basePath, safeCallSid, jobId) {
|
|
389
|
+
return routeUrl(options.publicBaseUrl, `${basePath}/audio-stream/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(`${jobId}.mp3`)}`);
|
|
390
|
+
}
|
|
391
|
+
function scheduleJobCleanup(jobs, safeCallSid, jobId) {
|
|
392
|
+
/* v8 ignore start -- stream job cleanup is delayed beyond request-scope tests @preserve */
|
|
393
|
+
const cleanup = setTimeout(() => {
|
|
394
|
+
jobs.delete(safeCallSid, jobId);
|
|
395
|
+
}, 5 * 60_000);
|
|
396
|
+
cleanup.unref?.();
|
|
397
|
+
/* v8 ignore stop */
|
|
398
|
+
}
|
|
399
|
+
function startTwilioPlaybackStreamJob(options) {
|
|
400
|
+
const job = options.jobs.create(options.safeCallSid, options.jobId);
|
|
401
|
+
void (async () => {
|
|
402
|
+
try {
|
|
403
|
+
const turn = await options.runTurn((chunk) => job.append(chunk));
|
|
404
|
+
const deliveries = deliveredSegments(turn);
|
|
405
|
+
if (job.byteLength === 0 && deliveries.length > 0) {
|
|
406
|
+
for (const delivery of deliveries)
|
|
407
|
+
job.append(delivery.audio);
|
|
408
|
+
}
|
|
409
|
+
if (deliveries.length === 0) {
|
|
410
|
+
/* v8 ignore next -- runVoiceLoopbackTurn cannot return delivered TTS with zero speech segments @preserve */
|
|
411
|
+
if (turn.tts.status === "failed")
|
|
412
|
+
throw new Error(turn.tts.error);
|
|
413
|
+
/* v8 ignore next -- runVoiceLoopbackTurn emits a speech segment whenever TTS is delivered @preserve */
|
|
414
|
+
throw new Error("voice turn produced no audio");
|
|
415
|
+
}
|
|
416
|
+
try {
|
|
417
|
+
await writeVoiceTurnPlaybackArtifacts({
|
|
418
|
+
bridgeOptions: options.bridgeOptions,
|
|
419
|
+
basePath: options.basePath,
|
|
420
|
+
callDir: options.callDir,
|
|
421
|
+
safeCallSid: options.safeCallSid,
|
|
422
|
+
baseUtteranceId: options.baseUtteranceId,
|
|
423
|
+
turn,
|
|
424
|
+
});
|
|
425
|
+
}
|
|
426
|
+
catch (artifactError) {
|
|
427
|
+
(0, runtime_1.emitNervesEvent)({
|
|
428
|
+
level: "warn",
|
|
429
|
+
component: "senses",
|
|
430
|
+
event: "senses.voice_twilio_stream_artifact_error",
|
|
431
|
+
message: "Twilio stream audio was delivered but artifact persistence failed",
|
|
432
|
+
meta: { ...options.meta, error: errorMessage(artifactError) },
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
job.complete();
|
|
436
|
+
(0, runtime_1.emitNervesEvent)({
|
|
437
|
+
component: "senses",
|
|
438
|
+
event: "senses.voice_twilio_stream_end",
|
|
439
|
+
message: "finished Twilio streaming voice playback job",
|
|
440
|
+
meta: { ...options.meta, byteLength: String(job.byteLength), segmentCount: String(deliveries.length) },
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
catch (error) {
|
|
444
|
+
job.fail(error);
|
|
445
|
+
(0, runtime_1.emitNervesEvent)({
|
|
446
|
+
level: "error",
|
|
447
|
+
component: "senses",
|
|
448
|
+
event: "senses.voice_twilio_stream_error",
|
|
449
|
+
message: "Twilio streaming voice playback job failed",
|
|
450
|
+
meta: { ...options.meta, error: errorMessage(error) },
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
finally {
|
|
454
|
+
scheduleJobCleanup(options.jobs, options.safeCallSid, options.jobId);
|
|
455
|
+
}
|
|
456
|
+
})();
|
|
457
|
+
return job;
|
|
458
|
+
}
|
|
241
459
|
async function runPhonePromptTurn(options) {
|
|
242
460
|
const transcript = (0, transcript_1.buildVoiceTranscript)({
|
|
243
461
|
utteranceId: options.utteranceId,
|
|
@@ -256,13 +474,15 @@ async function runPhonePromptTurn(options) {
|
|
|
256
474
|
if (turn.tts.status !== "delivered") {
|
|
257
475
|
return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${after}`);
|
|
258
476
|
}
|
|
259
|
-
const
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
477
|
+
const audioUrls = await writeVoiceTurnPlaybackArtifacts({
|
|
478
|
+
bridgeOptions: options.bridgeOptions,
|
|
479
|
+
basePath: options.basePath,
|
|
480
|
+
callDir: options.callDir,
|
|
481
|
+
safeCallSid: options.safeCallSid,
|
|
482
|
+
baseUtteranceId: options.utteranceId,
|
|
483
|
+
turn,
|
|
263
484
|
});
|
|
264
|
-
|
|
265
|
-
return xmlResponse(`${playTwiml(audioUrl)}${after}`);
|
|
485
|
+
return xmlResponse(`${playManyTwiml(audioUrls)}${after}`);
|
|
266
486
|
}
|
|
267
487
|
function computeTwilioSignature(input) {
|
|
268
488
|
const payload = input.url + Object.keys(input.params)
|
|
@@ -309,27 +529,62 @@ function verifyRequest(options, request, params) {
|
|
|
309
529
|
signature: headerValue(request.headers, "x-twilio-signature"),
|
|
310
530
|
});
|
|
311
531
|
}
|
|
312
|
-
async function handleIncoming(options, basePath, params) {
|
|
532
|
+
async function handleIncoming(options, basePath, params, jobs) {
|
|
313
533
|
const callSid = params.CallSid?.trim() || "incoming";
|
|
314
534
|
const safeCallSid = safeSegment(callSid);
|
|
315
535
|
const callDir = path.join(options.outputDir, safeCallSid);
|
|
316
536
|
const utteranceId = `twilio-${safeCallSid}-connected`;
|
|
537
|
+
const friendId = voiceFriendId(options, params.From?.trim() ?? "", callSid);
|
|
538
|
+
const sessionKey = twilioPhoneVoiceSessionKey({
|
|
539
|
+
defaultFriendId: options.defaultFriendId,
|
|
540
|
+
from: params.From?.trim() ?? "",
|
|
541
|
+
to: params.To?.trim() ?? "",
|
|
542
|
+
callSid,
|
|
543
|
+
});
|
|
317
544
|
(0, runtime_1.emitNervesEvent)({
|
|
318
545
|
component: "senses",
|
|
319
546
|
event: "senses.voice_twilio_incoming",
|
|
320
547
|
message: "Twilio voice call connected",
|
|
321
|
-
meta: { agentName: options.agentName, callSid: safeCallSid },
|
|
548
|
+
meta: { agentName: options.agentName, callSid: safeCallSid, sessionKey },
|
|
322
549
|
});
|
|
323
550
|
try {
|
|
324
551
|
await fs.mkdir(callDir, { recursive: true });
|
|
552
|
+
if (normalizeTwilioPhonePlaybackMode(options.playbackMode) === "stream") {
|
|
553
|
+
const transcript = (0, transcript_1.buildVoiceTranscript)({
|
|
554
|
+
utteranceId,
|
|
555
|
+
text: callConnectedPrompt(params),
|
|
556
|
+
source: "loopback",
|
|
557
|
+
});
|
|
558
|
+
const jobId = safeSegment(utteranceId);
|
|
559
|
+
startTwilioPlaybackStreamJob({
|
|
560
|
+
jobs,
|
|
561
|
+
bridgeOptions: options,
|
|
562
|
+
basePath,
|
|
563
|
+
callDir,
|
|
564
|
+
safeCallSid,
|
|
565
|
+
jobId,
|
|
566
|
+
baseUtteranceId: utteranceId,
|
|
567
|
+
runTurn: (onAudioChunk) => (0, turn_1.runVoiceLoopbackTurn)({
|
|
568
|
+
agentName: options.agentName,
|
|
569
|
+
friendId,
|
|
570
|
+
sessionKey,
|
|
571
|
+
transcript,
|
|
572
|
+
tts: options.tts,
|
|
573
|
+
runSenseTurn: options.runSenseTurn,
|
|
574
|
+
onAudioChunk,
|
|
575
|
+
}),
|
|
576
|
+
meta: { agentName: options.agentName, callSid: safeCallSid, utteranceId },
|
|
577
|
+
});
|
|
578
|
+
return xmlResponse(`${playTwiml(streamAudioUrl(options, basePath, safeCallSid, jobId))}${nextInputTwiml(options, basePath, "record")}`);
|
|
579
|
+
}
|
|
325
580
|
return await runPhonePromptTurn({
|
|
326
581
|
bridgeOptions: options,
|
|
327
582
|
basePath,
|
|
328
583
|
callDir,
|
|
329
584
|
safeCallSid,
|
|
330
585
|
utteranceId,
|
|
331
|
-
friendId
|
|
332
|
-
sessionKey
|
|
586
|
+
friendId,
|
|
587
|
+
sessionKey,
|
|
333
588
|
promptText: callConnectedPrompt(params),
|
|
334
589
|
afterPlayback: "record",
|
|
335
590
|
});
|
|
@@ -358,7 +613,7 @@ async function handleListen(options, basePath) {
|
|
|
358
613
|
maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
359
614
|
}));
|
|
360
615
|
}
|
|
361
|
-
async function handleRecording(options, basePath, params) {
|
|
616
|
+
async function handleRecording(options, basePath, params, jobs) {
|
|
362
617
|
const recording = parseRecordingParams(params);
|
|
363
618
|
if (!recording) {
|
|
364
619
|
(0, runtime_1.emitNervesEvent)({
|
|
@@ -376,13 +631,64 @@ async function handleRecording(options, basePath, params) {
|
|
|
376
631
|
const inputPath = path.join(callDir, `${safeRecordingSid}.wav`);
|
|
377
632
|
const utteranceId = `twilio-${safeCallSid}-${safeRecordingSid}`;
|
|
378
633
|
const downloadRecording = options.downloadRecording ?? defaultTwilioRecordingDownloader;
|
|
634
|
+
const friendId = voiceFriendId(options, recording.from, recording.callSid);
|
|
635
|
+
const sessionKey = twilioPhoneVoiceSessionKey({
|
|
636
|
+
defaultFriendId: options.defaultFriendId,
|
|
637
|
+
from: recording.from,
|
|
638
|
+
to: recording.to,
|
|
639
|
+
callSid: recording.callSid,
|
|
640
|
+
});
|
|
379
641
|
(0, runtime_1.emitNervesEvent)({
|
|
380
642
|
component: "senses",
|
|
381
643
|
event: "senses.voice_twilio_turn_start",
|
|
382
644
|
message: "starting Twilio voice turn",
|
|
383
|
-
meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid },
|
|
645
|
+
meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, sessionKey },
|
|
384
646
|
});
|
|
385
647
|
try {
|
|
648
|
+
if (normalizeTwilioPhonePlaybackMode(options.playbackMode) === "stream") {
|
|
649
|
+
const jobId = safeSegment(utteranceId);
|
|
650
|
+
startTwilioPlaybackStreamJob({
|
|
651
|
+
jobs,
|
|
652
|
+
bridgeOptions: options,
|
|
653
|
+
basePath,
|
|
654
|
+
callDir,
|
|
655
|
+
safeCallSid,
|
|
656
|
+
jobId,
|
|
657
|
+
baseUtteranceId: utteranceId,
|
|
658
|
+
runTurn: async (onAudioChunk) => {
|
|
659
|
+
await fs.mkdir(callDir, { recursive: true });
|
|
660
|
+
const mediaUrl = twilioRecordingMediaUrl(recording.recordingUrl);
|
|
661
|
+
const audio = await downloadRecording({
|
|
662
|
+
recordingUrl: mediaUrl,
|
|
663
|
+
accountSid: options.twilioAccountSid?.trim() || undefined,
|
|
664
|
+
authToken: options.twilioAuthToken?.trim() || undefined,
|
|
665
|
+
});
|
|
666
|
+
await fs.writeFile(inputPath, audio);
|
|
667
|
+
const transcript = await options.transcriber.transcribe({
|
|
668
|
+
utteranceId,
|
|
669
|
+
audioPath: inputPath,
|
|
670
|
+
});
|
|
671
|
+
const turnTranscript = isNoSpeechTranscript(transcript.text)
|
|
672
|
+
? (0, transcript_1.buildVoiceTranscript)({
|
|
673
|
+
utteranceId: `${utteranceId}-nospeech`,
|
|
674
|
+
text: noSpeechPrompt(),
|
|
675
|
+
source: "loopback",
|
|
676
|
+
})
|
|
677
|
+
: transcript;
|
|
678
|
+
return (0, turn_1.runVoiceLoopbackTurn)({
|
|
679
|
+
agentName: options.agentName,
|
|
680
|
+
friendId,
|
|
681
|
+
sessionKey,
|
|
682
|
+
transcript: turnTranscript,
|
|
683
|
+
tts: options.tts,
|
|
684
|
+
runSenseTurn: options.runSenseTurn,
|
|
685
|
+
onAudioChunk,
|
|
686
|
+
});
|
|
687
|
+
},
|
|
688
|
+
meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, utteranceId },
|
|
689
|
+
});
|
|
690
|
+
return xmlResponse(`${playTwiml(streamAudioUrl(options, basePath, safeCallSid, jobId))}${redirectTwiml(options.publicBaseUrl, basePath)}`);
|
|
691
|
+
}
|
|
386
692
|
await fs.mkdir(callDir, { recursive: true });
|
|
387
693
|
const mediaUrl = twilioRecordingMediaUrl(recording.recordingUrl);
|
|
388
694
|
const audio = await downloadRecording({
|
|
@@ -402,16 +708,16 @@ async function handleRecording(options, basePath, params) {
|
|
|
402
708
|
callDir,
|
|
403
709
|
safeCallSid,
|
|
404
710
|
utteranceId: `${utteranceId}-nospeech`,
|
|
405
|
-
friendId
|
|
406
|
-
sessionKey
|
|
711
|
+
friendId,
|
|
712
|
+
sessionKey,
|
|
407
713
|
promptText: noSpeechPrompt(),
|
|
408
714
|
afterPlayback: "redirect",
|
|
409
715
|
});
|
|
410
716
|
}
|
|
411
717
|
const turn = await (0, turn_1.runVoiceLoopbackTurn)({
|
|
412
718
|
agentName: options.agentName,
|
|
413
|
-
friendId
|
|
414
|
-
sessionKey
|
|
719
|
+
friendId,
|
|
720
|
+
sessionKey,
|
|
415
721
|
transcript,
|
|
416
722
|
tts: options.tts,
|
|
417
723
|
runSenseTurn: options.runSenseTurn,
|
|
@@ -419,19 +725,21 @@ async function handleRecording(options, basePath, params) {
|
|
|
419
725
|
if (turn.tts.status !== "delivered") {
|
|
420
726
|
return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${redirectTwiml(options.publicBaseUrl, basePath)}`);
|
|
421
727
|
}
|
|
422
|
-
const
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
728
|
+
const audioUrls = await writeVoiceTurnPlaybackArtifacts({
|
|
729
|
+
bridgeOptions: options,
|
|
730
|
+
basePath,
|
|
731
|
+
callDir,
|
|
732
|
+
safeCallSid,
|
|
733
|
+
baseUtteranceId: utteranceId,
|
|
734
|
+
turn,
|
|
426
735
|
});
|
|
427
|
-
const audioUrl = routeUrl(options.publicBaseUrl, `${basePath}/audio/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
|
|
428
736
|
(0, runtime_1.emitNervesEvent)({
|
|
429
737
|
component: "senses",
|
|
430
738
|
event: "senses.voice_twilio_turn_end",
|
|
431
739
|
message: "finished Twilio voice turn",
|
|
432
|
-
meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid,
|
|
740
|
+
meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, playbackCount: audioUrls.length },
|
|
433
741
|
});
|
|
434
|
-
return xmlResponse(`${
|
|
742
|
+
return xmlResponse(`${playManyTwiml(audioUrls)}${redirectTwiml(options.publicBaseUrl, basePath)}`);
|
|
435
743
|
}
|
|
436
744
|
catch (error) {
|
|
437
745
|
(0, runtime_1.emitNervesEvent)({
|
|
@@ -477,9 +785,34 @@ async function handleAudio(options, basePath, requestPath) {
|
|
|
477
785
|
return textResponse(404, "not found");
|
|
478
786
|
}
|
|
479
787
|
}
|
|
788
|
+
async function handleAudioStream(options, basePath, requestPath, jobs) {
|
|
789
|
+
const prefix = `${basePath}/audio-stream/`;
|
|
790
|
+
const pathOnly = requestPath.split("?")[0];
|
|
791
|
+
const rest = pathOnly.slice(prefix.length);
|
|
792
|
+
const parts = rest.split("/");
|
|
793
|
+
if (parts.length !== 2)
|
|
794
|
+
return textResponse(404, "not found");
|
|
795
|
+
const [callSidPart, fileNamePart] = parts;
|
|
796
|
+
const callSid = decodeSafeSegment(callSidPart);
|
|
797
|
+
const fileName = decodeSafeSegment(fileNamePart);
|
|
798
|
+
if (!callSid || !fileName)
|
|
799
|
+
return textResponse(404, "not found");
|
|
800
|
+
const jobId = fileName.replace(/\.[A-Za-z0-9]+$/, "");
|
|
801
|
+
const job = jobs.get(callSid, jobId);
|
|
802
|
+
if (!job)
|
|
803
|
+
return textResponse(404, "not found");
|
|
804
|
+
(0, runtime_1.emitNervesEvent)({
|
|
805
|
+
component: "senses",
|
|
806
|
+
event: "senses.voice_twilio_stream_served",
|
|
807
|
+
message: "served Twilio voice streaming audio job",
|
|
808
|
+
meta: { agentName: options.agentName, callSid, jobId },
|
|
809
|
+
});
|
|
810
|
+
return streamResponse(job.stream(), job.mimeType);
|
|
811
|
+
}
|
|
480
812
|
function createTwilioPhoneBridge(options) {
|
|
481
813
|
new URL(options.publicBaseUrl);
|
|
482
814
|
const basePath = normalizeTwilioPhoneBasePath(options.basePath);
|
|
815
|
+
const jobs = new TwilioAudioStreamJobStore();
|
|
483
816
|
return {
|
|
484
817
|
async handle(request) {
|
|
485
818
|
const method = request.method.toUpperCase();
|
|
@@ -488,6 +821,9 @@ function createTwilioPhoneBridge(options) {
|
|
|
488
821
|
if (method === "GET" && requestPath.startsWith(`${basePath}/audio/`)) {
|
|
489
822
|
return handleAudio(options, basePath, requestPath);
|
|
490
823
|
}
|
|
824
|
+
if (method === "GET" && requestPath.startsWith(`${basePath}/audio-stream/`)) {
|
|
825
|
+
return handleAudioStream(options, basePath, requestPath, jobs);
|
|
826
|
+
}
|
|
491
827
|
if (method === "GET" && routePath === `${basePath}/health`) {
|
|
492
828
|
return textResponse(200, "ok");
|
|
493
829
|
}
|
|
@@ -505,11 +841,11 @@ function createTwilioPhoneBridge(options) {
|
|
|
505
841
|
return textResponse(403, "invalid Twilio signature");
|
|
506
842
|
}
|
|
507
843
|
if (routePath === `${basePath}/incoming`)
|
|
508
|
-
return handleIncoming(options, basePath, params);
|
|
844
|
+
return handleIncoming(options, basePath, params, jobs);
|
|
509
845
|
if (routePath === `${basePath}/listen`)
|
|
510
846
|
return handleListen(options, basePath);
|
|
511
847
|
if (routePath === `${basePath}/recording`)
|
|
512
|
-
return handleRecording(options, basePath, params);
|
|
848
|
+
return handleRecording(options, basePath, params, jobs);
|
|
513
849
|
return textResponse(404, "not found");
|
|
514
850
|
},
|
|
515
851
|
};
|
|
@@ -531,6 +867,35 @@ function readRequestBody(req, limitBytes = 1_000_000) {
|
|
|
531
867
|
req.on("error", reject);
|
|
532
868
|
});
|
|
533
869
|
}
|
|
870
|
+
/* v8 ignore start -- HTTP backpressure is platform-dependent in unit tests @preserve */
|
|
871
|
+
function waitForDrain(res) {
|
|
872
|
+
return new Promise((resolve, reject) => {
|
|
873
|
+
const onDrain = () => {
|
|
874
|
+
res.off("error", onError);
|
|
875
|
+
resolve();
|
|
876
|
+
};
|
|
877
|
+
const onError = (error) => {
|
|
878
|
+
res.off("drain", onDrain);
|
|
879
|
+
reject(error);
|
|
880
|
+
};
|
|
881
|
+
res.once("drain", onDrain);
|
|
882
|
+
res.once("error", onError);
|
|
883
|
+
});
|
|
884
|
+
}
|
|
885
|
+
/* v8 ignore stop */
|
|
886
|
+
async function writeResponseBody(res, body) {
|
|
887
|
+
if (!isAsyncIterableBody(body)) {
|
|
888
|
+
res.end(body);
|
|
889
|
+
return;
|
|
890
|
+
}
|
|
891
|
+
for await (const chunk of body) {
|
|
892
|
+
/* v8 ignore next -- exercised only when Node reports socket backpressure @preserve */
|
|
893
|
+
if (!res.write(chunk)) {
|
|
894
|
+
await waitForDrain(res);
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
res.end();
|
|
898
|
+
}
|
|
534
899
|
async function startTwilioPhoneBridgeServer(options) {
|
|
535
900
|
const port = options.port ?? exports.DEFAULT_TWILIO_PHONE_PORT;
|
|
536
901
|
const host = options.host ?? "127.0.0.1";
|
|
@@ -545,7 +910,7 @@ async function startTwilioPhoneBridgeServer(options) {
|
|
|
545
910
|
body,
|
|
546
911
|
});
|
|
547
912
|
res.writeHead(response.statusCode, response.headers);
|
|
548
|
-
res
|
|
913
|
+
await writeResponseBody(res, response.body);
|
|
549
914
|
}
|
|
550
915
|
catch (error) {
|
|
551
916
|
(0, runtime_1.emitNervesEvent)({
|
|
@@ -555,8 +920,14 @@ async function startTwilioPhoneBridgeServer(options) {
|
|
|
555
920
|
message: "Twilio voice bridge server failed a request",
|
|
556
921
|
meta: { agentName: options.agentName, error: errorMessage(error) },
|
|
557
922
|
});
|
|
558
|
-
|
|
559
|
-
res.
|
|
923
|
+
/* v8 ignore next -- defensive path for async stream failures after headers @preserve */
|
|
924
|
+
if (res.headersSent) {
|
|
925
|
+
res.destroy(error instanceof Error ? error : new Error(String(error)));
|
|
926
|
+
}
|
|
927
|
+
else {
|
|
928
|
+
res.writeHead(500, { "content-type": "text/plain; charset=utf-8" });
|
|
929
|
+
res.end("internal server error");
|
|
930
|
+
}
|
|
560
931
|
}
|
|
561
932
|
});
|
|
562
933
|
await new Promise((resolve, reject) => {
|