@ouro.bot/cli 0.1.0-alpha.563 → 0.1.0-alpha.565
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/changelog.json +16 -0
- package/dist/heart/daemon/cli-exec.js +5 -3
- package/dist/heart/daemon/sense-manager.js +7 -1
- package/dist/senses/shared-turn.js +78 -7
- package/dist/senses/voice/index.js +1 -0
- package/dist/senses/voice/twilio-phone-runtime.js +282 -0
- package/dist/senses/voice/twilio-phone.js +159 -34
- package/dist/senses/voice-entry.js +11 -10
- package/dist/senses/voice-twilio-entry.js +25 -157
- package/package.json +1 -1
package/changelog.json
CHANGED
|
@@ -1,6 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
|
|
3
3
|
"versions": [
|
|
4
|
+
{
|
|
5
|
+
"version": "0.1.0-alpha.565",
|
|
6
|
+
"changes": [
|
|
7
|
+
"Voice shared-turn readback now recovers tool-required `settle` and `speak` deliveries from acknowledged tool calls instead of treating null assistant content as an empty response.",
|
|
8
|
+
"Twilio phone calls now start a real agent voice turn for the call-connected greeting, so the agent greets through the voice channel rather than a hardcoded transport prompt.",
|
|
9
|
+
"Twilio phone no-speech transcripts such as `[BLANK_AUDIO]` now become agent-authored voice reprompts instead of being passed through as caller speech."
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"version": "0.1.0-alpha.564",
|
|
14
|
+
"changes": [
|
|
15
|
+
"Voice now keeps Twilio phone as a transport under the canonical `voice` sense, with configurable agent-scoped webhook paths such as `/voice/agents/slugger/twilio/incoming`.",
|
|
16
|
+
"The managed Voice entrypoint now starts the Twilio phone bridge when this machine has a public voice URL, sharing the same Whisper.cpp STT, voice session, and ElevenLabs TTS code as the standalone local smoke bridge.",
|
|
17
|
+
"Voice setup docs, daemon status, connect guidance, and tests now cover managed Twilio transport readiness while keeping the one-off standalone Twilio bridge available for local smoke testing."
|
|
18
|
+
]
|
|
19
|
+
},
|
|
4
20
|
{
|
|
5
21
|
"version": "0.1.0-alpha.563",
|
|
6
22
|
"changes": [
|
|
@@ -4274,6 +4274,7 @@ function connectMenuTarget(answer) {
|
|
|
4274
4274
|
return "cancel";
|
|
4275
4275
|
}
|
|
4276
4276
|
async function executeConnectVoice(agent, deps) {
|
|
4277
|
+
const agentPathSegment = agent.trim().toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "agent";
|
|
4277
4278
|
const message = [
|
|
4278
4279
|
`Voice foundation for ${agent}`,
|
|
4279
4280
|
"Configure portable ElevenLabs settings with:",
|
|
@@ -4282,15 +4283,16 @@ async function executeConnectVoice(agent, deps) {
|
|
|
4282
4283
|
"Configure this machine's Whisper.cpp attachment with:",
|
|
4283
4284
|
` ouro vault config set --agent ${agent} --scope machine --key voice.whisperCliPath`,
|
|
4284
4285
|
` ouro vault config set --agent ${agent} --scope machine --key voice.whisperModelPath`,
|
|
4285
|
-
"Optional Twilio phone
|
|
4286
|
+
"Optional managed Twilio phone transport setup:",
|
|
4286
4287
|
` ouro vault config set --agent ${agent} --key voice.twilioAccountSid`,
|
|
4287
4288
|
` ouro vault config set --agent ${agent} --key voice.twilioAuthToken`,
|
|
4288
4289
|
` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPublicUrl`,
|
|
4290
|
+
` ouro vault config set --agent ${agent} --scope machine --key voice.twilioBasePath --value /voice/agents/${agentPathSegment}/twilio`,
|
|
4289
4291
|
` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPort --value 18910`,
|
|
4290
4292
|
` ouro vault config set --agent ${agent} --scope machine --key voice.twilioDefaultFriendId --value ari`,
|
|
4291
|
-
` node dist/senses/voice-twilio-entry.js --agent ${agent} --port 18910 --public-url https://<cloudflare-tunnel>`,
|
|
4292
|
-
`Set the Twilio number's Voice webhook to POST https://<cloudflare-tunnel>/voice/twilio/incoming.`,
|
|
4293
4293
|
"Then enable agent.json: senses.voice.enabled = true and restart with `ouro up`.",
|
|
4294
|
+
`The managed Voice entrypoint will listen at POST <public-url>/voice/agents/${agentPathSegment}/twilio/incoming.`,
|
|
4295
|
+
`Standalone local smoke remains available with: node dist/senses/voice-twilio-entry.js --agent ${agent} --port 18910 --public-url https://<cloudflare-tunnel>.`,
|
|
4294
4296
|
"Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.",
|
|
4295
4297
|
].join("\n");
|
|
4296
4298
|
deps.writeStdout(message);
|
|
@@ -204,8 +204,14 @@ function senseFactsFromRuntimeConfig(agent, senses, runtimeConfig, machineRuntim
|
|
|
204
204
|
missing.push("voice.whisperCliPath");
|
|
205
205
|
if (!textField(voice, "whisperModelPath"))
|
|
206
206
|
missing.push("voice.whisperModelPath");
|
|
207
|
+
const twilioPublicUrl = textField(voice, "twilioPublicUrl");
|
|
207
208
|
base.voice = missing.length === 0
|
|
208
|
-
? {
|
|
209
|
+
? {
|
|
210
|
+
configured: true,
|
|
211
|
+
detail: twilioPublicUrl
|
|
212
|
+
? "local Whisper.cpp STT + ElevenLabs TTS; Twilio phone transport attached"
|
|
213
|
+
: "local Whisper.cpp STT + ElevenLabs TTS",
|
|
214
|
+
}
|
|
209
215
|
: {
|
|
210
216
|
configured: false,
|
|
211
217
|
optional: !machineRuntimeConfig.ok && machineRuntimeConfig.reason === "missing",
|
|
@@ -61,6 +61,10 @@ const pipeline_1 = require("./pipeline");
|
|
|
61
61
|
const mcp_manager_1 = require("../repertoire/mcp-manager");
|
|
62
62
|
const runtime_1 = require("../nerves/runtime");
|
|
63
63
|
const RESPONSE_CAP = 50_000;
|
|
64
|
+
const DELIVERY_TOOL_ACKS = new Map([
|
|
65
|
+
["settle", "(delivered)"],
|
|
66
|
+
["speak", "(spoken)"],
|
|
67
|
+
]);
|
|
64
68
|
/**
|
|
65
69
|
* Strip MiniMax-style `<think>...</think>` reasoning blocks from a response
|
|
66
70
|
* string. Handles unclosed open tags (treats everything from `<think>` to
|
|
@@ -84,6 +88,78 @@ function stripThinkBlocks(input) {
|
|
|
84
88
|
}
|
|
85
89
|
return out.trim();
|
|
86
90
|
}
|
|
91
|
+
function assistantContentText(content) {
|
|
92
|
+
if (typeof content !== "string")
|
|
93
|
+
return null;
|
|
94
|
+
const trimmed = content.trim();
|
|
95
|
+
return trimmed ? trimmed : null;
|
|
96
|
+
}
|
|
97
|
+
function parseToolStringArg(toolCall, toolName, argName) {
|
|
98
|
+
if (!toolCall || typeof toolCall !== "object")
|
|
99
|
+
return null;
|
|
100
|
+
const fn = toolCall.function;
|
|
101
|
+
if (fn?.name !== toolName || typeof fn.arguments !== "string")
|
|
102
|
+
return null;
|
|
103
|
+
try {
|
|
104
|
+
const parsed = JSON.parse(fn.arguments);
|
|
105
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed))
|
|
106
|
+
return null;
|
|
107
|
+
const value = parsed[argName];
|
|
108
|
+
if (typeof value !== "string")
|
|
109
|
+
return null;
|
|
110
|
+
const trimmed = value.trim();
|
|
111
|
+
return trimmed ? trimmed : null;
|
|
112
|
+
}
|
|
113
|
+
catch {
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
function hasDeliveredToolResult(messages, assistantIndex, toolCallId, toolName) {
|
|
118
|
+
if (typeof toolCallId !== "string" || !toolCallId.trim())
|
|
119
|
+
return false;
|
|
120
|
+
const expectedAck = DELIVERY_TOOL_ACKS.get(toolName);
|
|
121
|
+
for (let index = assistantIndex + 1; index < messages.length; index++) {
|
|
122
|
+
const message = messages[index];
|
|
123
|
+
if (message.role !== "tool")
|
|
124
|
+
return false;
|
|
125
|
+
if (message.tool_call_id === toolCallId
|
|
126
|
+
&& typeof message.content === "string"
|
|
127
|
+
&& message.content.trim() === expectedAck) {
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
function deliveredTextFromAssistantTools(messages, assistantIndex) {
|
|
134
|
+
const assistant = messages[assistantIndex];
|
|
135
|
+
if (!Array.isArray(assistant.tool_calls))
|
|
136
|
+
return null;
|
|
137
|
+
const delivered = [];
|
|
138
|
+
for (let index = 0; index < assistant.tool_calls.length; index++) {
|
|
139
|
+
const toolCall = assistant.tool_calls[index];
|
|
140
|
+
const toolCallId = toolCall && typeof toolCall === "object"
|
|
141
|
+
? toolCall.id
|
|
142
|
+
: undefined;
|
|
143
|
+
const settleAnswer = parseToolStringArg(toolCall, "settle", "answer");
|
|
144
|
+
if (settleAnswer && hasDeliveredToolResult(messages, assistantIndex, toolCallId, "settle")) {
|
|
145
|
+
delivered.push(settleAnswer);
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
const spokenMessage = parseToolStringArg(toolCall, "speak", "message");
|
|
149
|
+
if (spokenMessage && hasDeliveredToolResult(messages, assistantIndex, toolCallId, "speak")) {
|
|
150
|
+
delivered.push(spokenMessage);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return delivered.length > 0 ? delivered.join("\n") : null;
|
|
154
|
+
}
|
|
155
|
+
function responseFromSessionMessages(messages) {
|
|
156
|
+
const assistantIndex = messages.findLastIndex((message) => message.role === "assistant");
|
|
157
|
+
if (assistantIndex < 0)
|
|
158
|
+
return null;
|
|
159
|
+
const assistant = messages[assistantIndex];
|
|
160
|
+
return assistantContentText(assistant.content)
|
|
161
|
+
?? deliveredTextFromAssistantTools(messages, assistantIndex);
|
|
162
|
+
}
|
|
87
163
|
/**
|
|
88
164
|
* Run a single agent turn through the inbound pipeline.
|
|
89
165
|
* Caller provides channel, session key, friend, and message;
|
|
@@ -203,13 +279,8 @@ async function runSenseTurn(options) {
|
|
|
203
279
|
await persistPromise;
|
|
204
280
|
const postTurnSession = (0, context_1.loadSession)(sessPath);
|
|
205
281
|
if (postTurnSession?.messages) {
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
finalResponse = lastAssistant.content;
|
|
209
|
-
}
|
|
210
|
-
else {
|
|
211
|
-
finalResponse = "(agent responded but response was empty)";
|
|
212
|
-
}
|
|
282
|
+
finalResponse = responseFromSessionMessages(postTurnSession.messages)
|
|
283
|
+
?? "(agent responded but response was empty)";
|
|
213
284
|
}
|
|
214
285
|
else {
|
|
215
286
|
finalResponse = "(agent responded but response was empty)";
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.agentScopedTwilioPhoneBasePath = agentScopedTwilioPhoneBasePath;
|
|
37
|
+
exports.resolveTwilioPhoneTransportRuntime = resolveTwilioPhoneTransportRuntime;
|
|
38
|
+
exports.startConfiguredTwilioPhoneTransport = startConfiguredTwilioPhoneTransport;
|
|
39
|
+
exports.closeTwilioPhoneBridgeServer = closeTwilioPhoneBridgeServer;
|
|
40
|
+
const path = __importStar(require("path"));
|
|
41
|
+
const identity_1 = require("../../heart/identity");
|
|
42
|
+
const machine_identity_1 = require("../../heart/machine-identity");
|
|
43
|
+
const provider_credentials_1 = require("../../heart/provider-credentials");
|
|
44
|
+
const runtime_credentials_1 = require("../../heart/runtime-credentials");
|
|
45
|
+
const runtime_1 = require("../../nerves/runtime");
|
|
46
|
+
const elevenlabs_1 = require("./elevenlabs");
|
|
47
|
+
const twilio_phone_1 = require("./twilio-phone");
|
|
48
|
+
const whisper_1 = require("./whisper");
|
|
49
|
+
function asRecord(value) {
|
|
50
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : null;
|
|
51
|
+
}
|
|
52
|
+
function configString(config, dottedPath) {
|
|
53
|
+
let cursor = config;
|
|
54
|
+
for (const segment of dottedPath.split(".")) {
|
|
55
|
+
const record = asRecord(cursor);
|
|
56
|
+
if (!record)
|
|
57
|
+
return undefined;
|
|
58
|
+
cursor = record[segment];
|
|
59
|
+
}
|
|
60
|
+
return typeof cursor === "string" && cursor.trim() ? cursor.trim() : undefined;
|
|
61
|
+
}
|
|
62
|
+
function configNumber(config, dottedPath) {
|
|
63
|
+
let cursor = config;
|
|
64
|
+
for (const segment of dottedPath.split(".")) {
|
|
65
|
+
const record = asRecord(cursor);
|
|
66
|
+
if (!record)
|
|
67
|
+
return undefined;
|
|
68
|
+
cursor = record[segment];
|
|
69
|
+
}
|
|
70
|
+
if (typeof cursor === "number" && Number.isFinite(cursor))
|
|
71
|
+
return cursor;
|
|
72
|
+
if (typeof cursor === "string" && cursor.trim()) {
|
|
73
|
+
const parsed = Number(cursor);
|
|
74
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
75
|
+
}
|
|
76
|
+
return undefined;
|
|
77
|
+
}
|
|
78
|
+
function configBoolean(config, dottedPath) {
|
|
79
|
+
let cursor = config;
|
|
80
|
+
for (const segment of dottedPath.split(".")) {
|
|
81
|
+
const record = asRecord(cursor);
|
|
82
|
+
if (!record)
|
|
83
|
+
return undefined;
|
|
84
|
+
cursor = record[segment];
|
|
85
|
+
}
|
|
86
|
+
if (typeof cursor === "boolean")
|
|
87
|
+
return cursor;
|
|
88
|
+
if (typeof cursor === "string") {
|
|
89
|
+
const normalized = cursor.trim().toLowerCase();
|
|
90
|
+
if (normalized === "true")
|
|
91
|
+
return true;
|
|
92
|
+
if (normalized === "false")
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
return undefined;
|
|
96
|
+
}
|
|
97
|
+
function requireConfig(result, label) {
|
|
98
|
+
if (result.ok)
|
|
99
|
+
return result.config;
|
|
100
|
+
throw new Error(`${label} unavailable: ${result.error}`);
|
|
101
|
+
}
|
|
102
|
+
function required(value, guidance) {
|
|
103
|
+
if (value)
|
|
104
|
+
return value;
|
|
105
|
+
throw new Error(guidance);
|
|
106
|
+
}
|
|
107
|
+
function selectedAgentProviders(config) {
|
|
108
|
+
const providers = new Set();
|
|
109
|
+
providers.add(config.humanFacing.provider);
|
|
110
|
+
providers.add(config.agentFacing.provider);
|
|
111
|
+
if (config.provider)
|
|
112
|
+
providers.add(config.provider);
|
|
113
|
+
return [...providers];
|
|
114
|
+
}
|
|
115
|
+
async function cacheSelectedProviderCredentials(agentName) {
|
|
116
|
+
const providers = selectedAgentProviders((0, identity_1.loadAgentConfig)());
|
|
117
|
+
const pool = await (0, provider_credentials_1.refreshProviderCredentialPool)(agentName, { providers });
|
|
118
|
+
if (!pool.ok) {
|
|
119
|
+
throw new Error(`provider credentials unavailable for phone voice: ${pool.error}`);
|
|
120
|
+
}
|
|
121
|
+
const missing = providers.filter((provider) => !pool.pool.providers[provider]);
|
|
122
|
+
if (missing.length > 0) {
|
|
123
|
+
throw new Error(`missing provider credentials for phone voice: ${missing.join(", ")}`);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
function agentPathSegment(agentName) {
|
|
127
|
+
return agentName.trim().toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "agent";
|
|
128
|
+
}
|
|
129
|
+
function trimOptional(value) {
|
|
130
|
+
return value?.trim() || undefined;
|
|
131
|
+
}
|
|
132
|
+
function agentScopedTwilioPhoneBasePath(agentName) {
|
|
133
|
+
return `/voice/agents/${agentPathSegment(agentName)}/twilio`;
|
|
134
|
+
}
|
|
135
|
+
function resolveTwilioPhoneTransportRuntime(options) {
|
|
136
|
+
const overrides = options.overrides ?? {};
|
|
137
|
+
const configuredPublicBaseUrl = trimOptional(overrides.publicBaseUrl)
|
|
138
|
+
?? configString(options.machineConfig, "voice.twilioPublicUrl");
|
|
139
|
+
const explicitEnabled = overrides.enabled ?? configBoolean(options.machineConfig, "voice.twilioEnabled");
|
|
140
|
+
if (!configuredPublicBaseUrl && options.requirePublicUrl) {
|
|
141
|
+
throw new Error("missing voice.twilioPublicUrl in this machine's runtime config");
|
|
142
|
+
}
|
|
143
|
+
const enabled = explicitEnabled ?? !!configuredPublicBaseUrl;
|
|
144
|
+
if (!enabled) {
|
|
145
|
+
return { status: "disabled", reason: "voice.twilioPublicUrl is not configured" };
|
|
146
|
+
}
|
|
147
|
+
if (!configuredPublicBaseUrl) {
|
|
148
|
+
throw new Error("missing voice.twilioPublicUrl in this machine's runtime config");
|
|
149
|
+
}
|
|
150
|
+
const publicUrl = new URL(configuredPublicBaseUrl);
|
|
151
|
+
if (publicUrl.protocol !== "https:") {
|
|
152
|
+
throw new Error("voice.twilioPublicUrl must be an https URL");
|
|
153
|
+
}
|
|
154
|
+
const publicBaseUrl = publicUrl.toString();
|
|
155
|
+
const basePath = (0, twilio_phone_1.normalizeTwilioPhoneBasePath)(overrides.basePath
|
|
156
|
+
?? configString(options.machineConfig, "voice.twilioBasePath")
|
|
157
|
+
?? options.defaultBasePath
|
|
158
|
+
?? twilio_phone_1.TWILIO_PHONE_WEBHOOK_BASE_PATH);
|
|
159
|
+
const elevenLabsApiKey = required(configString(options.runtimeConfig, "integrations.elevenLabsApiKey"), "missing integrations.elevenLabsApiKey; run 'ouro connect voice --agent <agent>' for setup guidance");
|
|
160
|
+
const elevenLabsVoiceId = required(trimOptional(overrides.elevenLabsVoiceId)
|
|
161
|
+
?? configString(options.runtimeConfig, "integrations.elevenLabsVoiceId")
|
|
162
|
+
?? configString(options.runtimeConfig, "voice.elevenLabsVoiceId"), "missing integrations.elevenLabsVoiceId; save the ElevenLabs voice ID before starting phone voice");
|
|
163
|
+
const whisperCliPath = required(trimOptional(overrides.whisperCliPath)
|
|
164
|
+
?? configString(options.machineConfig, "voice.whisperCliPath"), "missing voice.whisperCliPath in this machine's runtime config");
|
|
165
|
+
const whisperModelPath = required(trimOptional(overrides.whisperModelPath)
|
|
166
|
+
?? configString(options.machineConfig, "voice.whisperModelPath"), "missing voice.whisperModelPath in this machine's runtime config");
|
|
167
|
+
const outputDir = trimOptional(overrides.outputDir)
|
|
168
|
+
?? configString(options.machineConfig, "voice.twilioOutputDir")
|
|
169
|
+
?? path.join((0, identity_1.getAgentRoot)(options.agentName), "state", "voice", "twilio-phone");
|
|
170
|
+
const settings = {
|
|
171
|
+
agentName: options.agentName,
|
|
172
|
+
publicBaseUrl,
|
|
173
|
+
basePath,
|
|
174
|
+
webhookUrl: (0, twilio_phone_1.twilioPhoneWebhookUrl)(publicBaseUrl, basePath),
|
|
175
|
+
outputDir,
|
|
176
|
+
port: overrides.port
|
|
177
|
+
?? configNumber(options.machineConfig, "voice.twilioPort")
|
|
178
|
+
?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PORT,
|
|
179
|
+
host: trimOptional(overrides.host)
|
|
180
|
+
?? configString(options.machineConfig, "voice.twilioHost")
|
|
181
|
+
?? "127.0.0.1",
|
|
182
|
+
elevenLabsApiKey,
|
|
183
|
+
elevenLabsVoiceId,
|
|
184
|
+
whisperCliPath,
|
|
185
|
+
whisperModelPath,
|
|
186
|
+
twilioAccountSid: configString(options.runtimeConfig, "voice.twilioAccountSid"),
|
|
187
|
+
twilioAuthToken: configString(options.runtimeConfig, "voice.twilioAuthToken"),
|
|
188
|
+
defaultFriendId: trimOptional(overrides.defaultFriendId)
|
|
189
|
+
?? configString(options.machineConfig, "voice.twilioDefaultFriendId"),
|
|
190
|
+
recordTimeoutSeconds: overrides.recordTimeoutSeconds
|
|
191
|
+
?? configNumber(options.machineConfig, "voice.twilioRecordTimeoutSeconds")
|
|
192
|
+
?? twilio_phone_1.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
|
|
193
|
+
recordMaxLengthSeconds: overrides.recordMaxLengthSeconds
|
|
194
|
+
?? configNumber(options.machineConfig, "voice.twilioRecordMaxLengthSeconds")
|
|
195
|
+
?? twilio_phone_1.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
196
|
+
};
|
|
197
|
+
return { status: "configured", settings };
|
|
198
|
+
}
|
|
199
|
+
const defaultTwilioPhoneTransportRuntimeDeps = {
|
|
200
|
+
waitForRuntimeCredentialBootstrap: runtime_credentials_1.waitForRuntimeCredentialBootstrap,
|
|
201
|
+
loadMachineIdentity: machine_identity_1.loadOrCreateMachineIdentity,
|
|
202
|
+
refreshRuntimeConfig: runtime_credentials_1.refreshRuntimeCredentialConfig,
|
|
203
|
+
refreshMachineRuntimeConfig: runtime_credentials_1.refreshMachineRuntimeCredentialConfig,
|
|
204
|
+
readRuntimeConfig: runtime_credentials_1.readRuntimeCredentialConfig,
|
|
205
|
+
readMachineRuntimeConfig: runtime_credentials_1.readMachineRuntimeCredentialConfig,
|
|
206
|
+
cacheSelectedProviderCredentials,
|
|
207
|
+
createTranscriber: whisper_1.createWhisperCppTranscriber,
|
|
208
|
+
createTts: elevenlabs_1.createElevenLabsTtsClient,
|
|
209
|
+
startBridgeServer: twilio_phone_1.startTwilioPhoneBridgeServer,
|
|
210
|
+
};
|
|
211
|
+
async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilioPhoneTransportRuntimeDeps) {
|
|
212
|
+
await deps.waitForRuntimeCredentialBootstrap(options.agentName);
|
|
213
|
+
const machine = deps.loadMachineIdentity();
|
|
214
|
+
await Promise.all([
|
|
215
|
+
deps.refreshRuntimeConfig(options.agentName, { preserveCachedOnFailure: true }).catch(() => undefined),
|
|
216
|
+
deps.refreshMachineRuntimeConfig(options.agentName, machine.machineId, { preserveCachedOnFailure: true }).catch(() => undefined),
|
|
217
|
+
]);
|
|
218
|
+
const runtimeConfig = requireConfig(deps.readRuntimeConfig(options.agentName), "portable runtime/config");
|
|
219
|
+
const machineConfig = requireConfig(deps.readMachineRuntimeConfig(options.agentName), "machine runtime config");
|
|
220
|
+
const resolution = resolveTwilioPhoneTransportRuntime({
|
|
221
|
+
agentName: options.agentName,
|
|
222
|
+
runtimeConfig,
|
|
223
|
+
machineConfig,
|
|
224
|
+
overrides: options.overrides,
|
|
225
|
+
defaultBasePath: options.defaultBasePath,
|
|
226
|
+
requirePublicUrl: options.requirePublicUrl,
|
|
227
|
+
});
|
|
228
|
+
if (resolution.status === "disabled") {
|
|
229
|
+
(0, runtime_1.emitNervesEvent)({
|
|
230
|
+
component: "senses",
|
|
231
|
+
event: "senses.voice_twilio_transport_disabled",
|
|
232
|
+
message: "Twilio phone voice transport is not attached on this machine",
|
|
233
|
+
meta: { agentName: options.agentName, reason: resolution.reason },
|
|
234
|
+
});
|
|
235
|
+
return resolution;
|
|
236
|
+
}
|
|
237
|
+
await deps.cacheSelectedProviderCredentials(options.agentName);
|
|
238
|
+
const settings = resolution.settings;
|
|
239
|
+
const transcriber = deps.createTranscriber({
|
|
240
|
+
whisperCliPath: settings.whisperCliPath,
|
|
241
|
+
modelPath: settings.whisperModelPath,
|
|
242
|
+
});
|
|
243
|
+
const tts = deps.createTts({
|
|
244
|
+
apiKey: settings.elevenLabsApiKey,
|
|
245
|
+
voiceId: settings.elevenLabsVoiceId,
|
|
246
|
+
outputFormat: "mp3_44100_128",
|
|
247
|
+
});
|
|
248
|
+
const bridge = await deps.startBridgeServer({
|
|
249
|
+
agentName: settings.agentName,
|
|
250
|
+
publicBaseUrl: settings.publicBaseUrl,
|
|
251
|
+
basePath: settings.basePath,
|
|
252
|
+
outputDir: settings.outputDir,
|
|
253
|
+
transcriber,
|
|
254
|
+
tts,
|
|
255
|
+
port: settings.port,
|
|
256
|
+
host: settings.host,
|
|
257
|
+
twilioAccountSid: settings.twilioAccountSid,
|
|
258
|
+
twilioAuthToken: settings.twilioAuthToken,
|
|
259
|
+
defaultFriendId: settings.defaultFriendId,
|
|
260
|
+
recordTimeoutSeconds: settings.recordTimeoutSeconds,
|
|
261
|
+
recordMaxLengthSeconds: settings.recordMaxLengthSeconds,
|
|
262
|
+
});
|
|
263
|
+
(0, runtime_1.emitNervesEvent)({
|
|
264
|
+
component: "senses",
|
|
265
|
+
event: "senses.voice_twilio_transport_ready",
|
|
266
|
+
message: "Twilio phone voice transport is ready",
|
|
267
|
+
meta: {
|
|
268
|
+
agentName: settings.agentName,
|
|
269
|
+
localUrl: bridge.localUrl,
|
|
270
|
+
publicBaseUrl: settings.publicBaseUrl,
|
|
271
|
+
basePath: settings.basePath,
|
|
272
|
+
webhookUrl: settings.webhookUrl,
|
|
273
|
+
},
|
|
274
|
+
});
|
|
275
|
+
return { status: "started", settings, bridge };
|
|
276
|
+
}
|
|
277
|
+
function closeTwilioPhoneBridgeServer(server) {
|
|
278
|
+
return new Promise((resolve, reject) => {
|
|
279
|
+
;
|
|
280
|
+
server.server.close((error) => error ? reject(error) : resolve());
|
|
281
|
+
});
|
|
282
|
+
}
|
|
@@ -34,6 +34,8 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
|
|
37
|
+
exports.normalizeTwilioPhoneBasePath = normalizeTwilioPhoneBasePath;
|
|
38
|
+
exports.twilioPhoneWebhookUrl = twilioPhoneWebhookUrl;
|
|
37
39
|
exports.computeTwilioSignature = computeTwilioSignature;
|
|
38
40
|
exports.validateTwilioSignature = validateTwilioSignature;
|
|
39
41
|
exports.twilioRecordingMediaUrl = twilioRecordingMediaUrl;
|
|
@@ -46,6 +48,7 @@ const http = __importStar(require("http"));
|
|
|
46
48
|
const path = __importStar(require("path"));
|
|
47
49
|
const runtime_1 = require("../../nerves/runtime");
|
|
48
50
|
const playback_1 = require("./playback");
|
|
51
|
+
const transcript_1 = require("./transcript");
|
|
49
52
|
const turn_1 = require("./turn");
|
|
50
53
|
exports.DEFAULT_TWILIO_PHONE_PORT = 18910;
|
|
51
54
|
exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = 2;
|
|
@@ -112,14 +115,29 @@ function escapeXml(input) {
|
|
|
112
115
|
function routeUrl(publicBaseUrl, route) {
|
|
113
116
|
return new URL(route, publicBaseUrl).toString();
|
|
114
117
|
}
|
|
118
|
+
function normalizeTwilioPhoneBasePath(value = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH) {
|
|
119
|
+
const trimmed = value.trim();
|
|
120
|
+
const withLeadingSlash = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
|
|
121
|
+
const withoutTrailingSlash = withLeadingSlash.replace(/\/+$/, "");
|
|
122
|
+
if (!withoutTrailingSlash || withoutTrailingSlash === "/") {
|
|
123
|
+
throw new Error("Twilio phone webhook base path is empty");
|
|
124
|
+
}
|
|
125
|
+
if (!/^\/[A-Za-z0-9._~/-]+$/.test(withoutTrailingSlash) || withoutTrailingSlash.includes("//")) {
|
|
126
|
+
throw new Error(`invalid Twilio phone webhook base path: ${value}`);
|
|
127
|
+
}
|
|
128
|
+
return withoutTrailingSlash;
|
|
129
|
+
}
|
|
130
|
+
function twilioPhoneWebhookUrl(publicBaseUrl, basePath = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH) {
|
|
131
|
+
return routeUrl(publicBaseUrl, `${normalizeTwilioPhoneBasePath(basePath)}/incoming`);
|
|
132
|
+
}
|
|
115
133
|
function requestPublicUrl(publicBaseUrl, requestPath) {
|
|
116
134
|
return routeUrl(publicBaseUrl, requestPath);
|
|
117
135
|
}
|
|
118
136
|
function recordTwiml(options) {
|
|
119
|
-
return `<Record action="${escapeXml(routeUrl(options.publicBaseUrl, `${
|
|
137
|
+
return `<Record action="${escapeXml(routeUrl(options.publicBaseUrl, `${options.basePath}/recording`))}" method="POST" playBeep="false" timeout="${options.timeoutSeconds}" maxLength="${options.maxLengthSeconds}" trim="trim-silence" />`;
|
|
120
138
|
}
|
|
121
|
-
function redirectTwiml(publicBaseUrl) {
|
|
122
|
-
return `<Redirect method="POST">${escapeXml(routeUrl(publicBaseUrl, `${
|
|
139
|
+
function redirectTwiml(publicBaseUrl, basePath) {
|
|
140
|
+
return `<Redirect method="POST">${escapeXml(routeUrl(publicBaseUrl, `${basePath}/listen`))}</Redirect>`;
|
|
123
141
|
}
|
|
124
142
|
function sayTwiml(message) {
|
|
125
143
|
return `<Say>${escapeXml(message)}</Say>`;
|
|
@@ -158,6 +176,34 @@ function friendIdFromCaller(from, callSid) {
|
|
|
158
176
|
const phoneish = from.replace(/[^0-9A-Za-z]+/g, "");
|
|
159
177
|
return phoneish ? `twilio-${phoneish}` : `twilio-${safeSegment(callSid)}`;
|
|
160
178
|
}
|
|
179
|
+
function voiceFriendId(options, from, callSid) {
|
|
180
|
+
return options.defaultFriendId?.trim() || friendIdFromCaller(from, callSid);
|
|
181
|
+
}
|
|
182
|
+
function callConnectedPrompt(params) {
|
|
183
|
+
const from = params.From?.trim();
|
|
184
|
+
const to = params.To?.trim();
|
|
185
|
+
return [
|
|
186
|
+
"A Twilio phone voice call just connected.",
|
|
187
|
+
"This is the first audible turn in the call.",
|
|
188
|
+
from ? `Twilio caller ID: ${from}.` : "Twilio did not provide caller ID.",
|
|
189
|
+
to ? `Dialed line: ${to}.` : "Twilio did not provide the dialed line.",
|
|
190
|
+
"Respond through the voice channel as yourself. Greet the caller naturally and briefly, then invite them to speak.",
|
|
191
|
+
].join("\n");
|
|
192
|
+
}
|
|
193
|
+
function noSpeechPrompt() {
|
|
194
|
+
return [
|
|
195
|
+
"The last Twilio phone recording contained no intelligible speech.",
|
|
196
|
+
"The caller is still on the line.",
|
|
197
|
+
"Respond through the voice channel as yourself. Briefly ask them to try again or check whether they are there.",
|
|
198
|
+
].join("\n");
|
|
199
|
+
}
|
|
200
|
+
function isNoSpeechTranscript(text) {
|
|
201
|
+
const normalized = text.trim().replace(/[.!?]+$/g, "").toUpperCase();
|
|
202
|
+
return normalized === "[BLANK_AUDIO]"
|
|
203
|
+
|| normalized === "BLANK_AUDIO"
|
|
204
|
+
|| normalized === "[NO_SPEECH]"
|
|
205
|
+
|| normalized === "NO_SPEECH";
|
|
206
|
+
}
|
|
161
207
|
function parseRecordingParams(params) {
|
|
162
208
|
const callSid = params.CallSid?.trim();
|
|
163
209
|
const recordingSid = params.RecordingSid?.trim();
|
|
@@ -171,16 +217,53 @@ function parseRecordingParams(params) {
|
|
|
171
217
|
from: params.From?.trim() ?? "",
|
|
172
218
|
};
|
|
173
219
|
}
|
|
174
|
-
function recordAgainResponse(
|
|
220
|
+
function recordAgainResponse(options, basePath, message) {
|
|
175
221
|
return xmlResponse(`${sayTwiml(message)}${recordTwiml({
|
|
176
|
-
publicBaseUrl,
|
|
177
|
-
|
|
178
|
-
|
|
222
|
+
publicBaseUrl: options.publicBaseUrl,
|
|
223
|
+
basePath,
|
|
224
|
+
timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
|
|
225
|
+
maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
179
226
|
})}`);
|
|
180
227
|
}
|
|
181
228
|
function errorMessage(error) {
|
|
182
229
|
return error instanceof Error ? error.message : String(error);
|
|
183
230
|
}
|
|
231
|
+
function nextInputTwiml(options, basePath, mode) {
|
|
232
|
+
if (mode === "redirect")
|
|
233
|
+
return redirectTwiml(options.publicBaseUrl, basePath);
|
|
234
|
+
return recordTwiml({
|
|
235
|
+
publicBaseUrl: options.publicBaseUrl,
|
|
236
|
+
basePath,
|
|
237
|
+
timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
|
|
238
|
+
maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
async function runPhonePromptTurn(options) {
|
|
242
|
+
const transcript = (0, transcript_1.buildVoiceTranscript)({
|
|
243
|
+
utteranceId: options.utteranceId,
|
|
244
|
+
text: options.promptText,
|
|
245
|
+
source: "loopback",
|
|
246
|
+
});
|
|
247
|
+
const turn = await (0, turn_1.runVoiceLoopbackTurn)({
|
|
248
|
+
agentName: options.bridgeOptions.agentName,
|
|
249
|
+
friendId: options.friendId,
|
|
250
|
+
sessionKey: options.sessionKey,
|
|
251
|
+
transcript,
|
|
252
|
+
tts: options.bridgeOptions.tts,
|
|
253
|
+
runSenseTurn: options.bridgeOptions.runSenseTurn,
|
|
254
|
+
});
|
|
255
|
+
const after = nextInputTwiml(options.bridgeOptions, options.basePath, options.afterPlayback);
|
|
256
|
+
if (turn.tts.status !== "delivered") {
|
|
257
|
+
return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${after}`);
|
|
258
|
+
}
|
|
259
|
+
const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
|
|
260
|
+
utteranceId: options.utteranceId,
|
|
261
|
+
delivery: turn.tts,
|
|
262
|
+
outputDir: options.callDir,
|
|
263
|
+
});
|
|
264
|
+
const audioUrl = routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
|
|
265
|
+
return xmlResponse(`${playTwiml(audioUrl)}${after}`);
|
|
266
|
+
}
|
|
184
267
|
function computeTwilioSignature(input) {
|
|
185
268
|
const payload = input.url + Object.keys(input.params)
|
|
186
269
|
.sort()
|
|
@@ -226,28 +309,56 @@ function verifyRequest(options, request, params) {
|
|
|
226
309
|
signature: headerValue(request.headers, "x-twilio-signature"),
|
|
227
310
|
});
|
|
228
311
|
}
|
|
229
|
-
async function handleIncoming(options) {
|
|
230
|
-
const
|
|
312
|
+
async function handleIncoming(options, basePath, params) {
|
|
313
|
+
const callSid = params.CallSid?.trim() || "incoming";
|
|
314
|
+
const safeCallSid = safeSegment(callSid);
|
|
315
|
+
const callDir = path.join(options.outputDir, safeCallSid);
|
|
316
|
+
const utteranceId = `twilio-${safeCallSid}-connected`;
|
|
231
317
|
(0, runtime_1.emitNervesEvent)({
|
|
232
318
|
component: "senses",
|
|
233
319
|
event: "senses.voice_twilio_incoming",
|
|
234
320
|
message: "Twilio voice call connected",
|
|
235
|
-
meta: { agentName: options.agentName },
|
|
321
|
+
meta: { agentName: options.agentName, callSid: safeCallSid },
|
|
236
322
|
});
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
323
|
+
try {
|
|
324
|
+
await fs.mkdir(callDir, { recursive: true });
|
|
325
|
+
return await runPhonePromptTurn({
|
|
326
|
+
bridgeOptions: options,
|
|
327
|
+
basePath,
|
|
328
|
+
callDir,
|
|
329
|
+
safeCallSid,
|
|
330
|
+
utteranceId,
|
|
331
|
+
friendId: voiceFriendId(options, params.From?.trim() ?? "", callSid),
|
|
332
|
+
sessionKey: `twilio-${safeCallSid}`,
|
|
333
|
+
promptText: callConnectedPrompt(params),
|
|
334
|
+
afterPlayback: "record",
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
catch (error) {
|
|
338
|
+
(0, runtime_1.emitNervesEvent)({
|
|
339
|
+
level: "error",
|
|
340
|
+
component: "senses",
|
|
341
|
+
event: "senses.voice_twilio_incoming_error",
|
|
342
|
+
message: "Twilio incoming voice greeting turn failed",
|
|
343
|
+
meta: { agentName: options.agentName, callSid: safeCallSid, error: errorMessage(error) },
|
|
344
|
+
});
|
|
345
|
+
return xmlResponse(recordTwiml({
|
|
346
|
+
publicBaseUrl: options.publicBaseUrl,
|
|
347
|
+
basePath,
|
|
348
|
+
timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
|
|
349
|
+
maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
350
|
+
}));
|
|
351
|
+
}
|
|
242
352
|
}
|
|
243
|
-
async function handleListen(options) {
|
|
353
|
+
async function handleListen(options, basePath) {
|
|
244
354
|
return xmlResponse(recordTwiml({
|
|
245
355
|
publicBaseUrl: options.publicBaseUrl,
|
|
356
|
+
basePath,
|
|
246
357
|
timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
|
|
247
358
|
maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
|
|
248
359
|
}));
|
|
249
360
|
}
|
|
250
|
-
async function handleRecording(options, params) {
|
|
361
|
+
async function handleRecording(options, basePath, params) {
|
|
251
362
|
const recording = parseRecordingParams(params);
|
|
252
363
|
if (!recording) {
|
|
253
364
|
(0, runtime_1.emitNervesEvent)({
|
|
@@ -257,7 +368,7 @@ async function handleRecording(options, params) {
|
|
|
257
368
|
message: "Twilio recording callback was missing required fields",
|
|
258
369
|
meta: { agentName: options.agentName },
|
|
259
370
|
});
|
|
260
|
-
return recordAgainResponse(options
|
|
371
|
+
return recordAgainResponse(options, basePath, "I did not receive audio. Please try again.");
|
|
261
372
|
}
|
|
262
373
|
const safeCallSid = safeSegment(recording.callSid);
|
|
263
374
|
const safeRecordingSid = safeSegment(recording.recordingSid);
|
|
@@ -284,30 +395,43 @@ async function handleRecording(options, params) {
|
|
|
284
395
|
utteranceId,
|
|
285
396
|
audioPath: inputPath,
|
|
286
397
|
});
|
|
398
|
+
if (isNoSpeechTranscript(transcript.text)) {
|
|
399
|
+
return await runPhonePromptTurn({
|
|
400
|
+
bridgeOptions: options,
|
|
401
|
+
basePath,
|
|
402
|
+
callDir,
|
|
403
|
+
safeCallSid,
|
|
404
|
+
utteranceId: `${utteranceId}-nospeech`,
|
|
405
|
+
friendId: voiceFriendId(options, recording.from, recording.callSid),
|
|
406
|
+
sessionKey: `twilio-${safeCallSid}`,
|
|
407
|
+
promptText: noSpeechPrompt(),
|
|
408
|
+
afterPlayback: "redirect",
|
|
409
|
+
});
|
|
410
|
+
}
|
|
287
411
|
const turn = await (0, turn_1.runVoiceLoopbackTurn)({
|
|
288
412
|
agentName: options.agentName,
|
|
289
|
-
friendId: options
|
|
413
|
+
friendId: voiceFriendId(options, recording.from, recording.callSid),
|
|
290
414
|
sessionKey: `twilio-${safeCallSid}`,
|
|
291
415
|
transcript,
|
|
292
416
|
tts: options.tts,
|
|
293
417
|
runSenseTurn: options.runSenseTurn,
|
|
294
418
|
});
|
|
295
419
|
if (turn.tts.status !== "delivered") {
|
|
296
|
-
return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${redirectTwiml(options.publicBaseUrl)}`);
|
|
420
|
+
return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${redirectTwiml(options.publicBaseUrl, basePath)}`);
|
|
297
421
|
}
|
|
298
422
|
const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
|
|
299
423
|
utteranceId,
|
|
300
424
|
delivery: turn.tts,
|
|
301
425
|
outputDir: callDir,
|
|
302
426
|
});
|
|
303
|
-
const audioUrl = routeUrl(options.publicBaseUrl, `${
|
|
427
|
+
const audioUrl = routeUrl(options.publicBaseUrl, `${basePath}/audio/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
|
|
304
428
|
(0, runtime_1.emitNervesEvent)({
|
|
305
429
|
component: "senses",
|
|
306
430
|
event: "senses.voice_twilio_turn_end",
|
|
307
431
|
message: "finished Twilio voice turn",
|
|
308
432
|
meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, audioPath: playback.audioPath },
|
|
309
433
|
});
|
|
310
|
-
return xmlResponse(`${playTwiml(audioUrl)}${redirectTwiml(options.publicBaseUrl)}`);
|
|
434
|
+
return xmlResponse(`${playTwiml(audioUrl)}${redirectTwiml(options.publicBaseUrl, basePath)}`);
|
|
311
435
|
}
|
|
312
436
|
catch (error) {
|
|
313
437
|
(0, runtime_1.emitNervesEvent)({
|
|
@@ -322,11 +446,11 @@ async function handleRecording(options, params) {
|
|
|
322
446
|
error: errorMessage(error),
|
|
323
447
|
},
|
|
324
448
|
});
|
|
325
|
-
return xmlResponse(`${sayTwiml("I could not process that audio. Please try again.")}${redirectTwiml(options.publicBaseUrl)}`);
|
|
449
|
+
return xmlResponse(`${sayTwiml("I could not process that audio. Please try again.")}${redirectTwiml(options.publicBaseUrl, basePath)}`);
|
|
326
450
|
}
|
|
327
451
|
}
|
|
328
|
-
async function handleAudio(options, requestPath) {
|
|
329
|
-
const prefix = `${
|
|
452
|
+
async function handleAudio(options, basePath, requestPath) {
|
|
453
|
+
const prefix = `${basePath}/audio/`;
|
|
330
454
|
const pathOnly = requestPath.split("?")[0];
|
|
331
455
|
const rest = pathOnly.slice(prefix.length);
|
|
332
456
|
const parts = rest.split("/");
|
|
@@ -355,15 +479,16 @@ async function handleAudio(options, requestPath) {
|
|
|
355
479
|
}
|
|
356
480
|
function createTwilioPhoneBridge(options) {
|
|
357
481
|
new URL(options.publicBaseUrl);
|
|
482
|
+
const basePath = normalizeTwilioPhoneBasePath(options.basePath);
|
|
358
483
|
return {
|
|
359
484
|
async handle(request) {
|
|
360
485
|
const method = request.method.toUpperCase();
|
|
361
486
|
const requestPath = request.path.startsWith("/") ? request.path : `/${request.path}`;
|
|
362
487
|
const routePath = requestPath.split("?")[0];
|
|
363
|
-
if (method === "GET" && requestPath.startsWith(`${
|
|
364
|
-
return handleAudio(options, requestPath);
|
|
488
|
+
if (method === "GET" && requestPath.startsWith(`${basePath}/audio/`)) {
|
|
489
|
+
return handleAudio(options, basePath, requestPath);
|
|
365
490
|
}
|
|
366
|
-
if (method === "GET" && routePath === `${
|
|
491
|
+
if (method === "GET" && routePath === `${basePath}/health`) {
|
|
367
492
|
return textResponse(200, "ok");
|
|
368
493
|
}
|
|
369
494
|
if (method !== "POST")
|
|
@@ -379,12 +504,12 @@ function createTwilioPhoneBridge(options) {
|
|
|
379
504
|
});
|
|
380
505
|
return textResponse(403, "invalid Twilio signature");
|
|
381
506
|
}
|
|
382
|
-
if (routePath === `${
|
|
383
|
-
return handleIncoming(options);
|
|
384
|
-
if (routePath === `${
|
|
385
|
-
return handleListen(options);
|
|
386
|
-
if (routePath === `${
|
|
387
|
-
return handleRecording(options, params);
|
|
507
|
+
if (routePath === `${basePath}/incoming`)
|
|
508
|
+
return handleIncoming(options, basePath, params);
|
|
509
|
+
if (routePath === `${basePath}/listen`)
|
|
510
|
+
return handleListen(options, basePath);
|
|
511
|
+
if (routePath === `${basePath}/recording`)
|
|
512
|
+
return handleRecording(options, basePath, params);
|
|
388
513
|
return textResponse(404, "not found");
|
|
389
514
|
},
|
|
390
515
|
};
|
|
@@ -49,20 +49,21 @@ const runtime_1 = require("../nerves/runtime");
|
|
|
49
49
|
message: "booting Voice entrypoint",
|
|
50
50
|
meta: { entry: "voice", agentName },
|
|
51
51
|
});
|
|
52
|
-
Promise.resolve().then(() => __importStar(require("
|
|
53
|
-
await
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
if (!machineConfig.ok) {
|
|
58
|
-
await refreshMachineRuntimeCredentialConfig(agentName, machine.machineId, { preserveCachedOnFailure: true }).catch(() => undefined);
|
|
59
|
-
}
|
|
60
|
-
void refreshRuntimeCredentialConfig(agentName, { preserveCachedOnFailure: true }).catch(() => undefined);
|
|
52
|
+
Promise.resolve().then(() => __importStar(require("./voice/twilio-phone-runtime"))).then(async ({ agentScopedTwilioPhoneBasePath, startConfiguredTwilioPhoneTransport, }) => {
|
|
53
|
+
const twilioPhone = await startConfiguredTwilioPhoneTransport({
|
|
54
|
+
agentName,
|
|
55
|
+
defaultBasePath: agentScopedTwilioPhoneBasePath(agentName),
|
|
56
|
+
});
|
|
61
57
|
(0, runtime_1.emitNervesEvent)({
|
|
62
58
|
component: "senses",
|
|
63
59
|
event: "senses.voice_entry_ready",
|
|
64
60
|
message: "Voice entrypoint is ready for managed voice turns",
|
|
65
|
-
meta: {
|
|
61
|
+
meta: {
|
|
62
|
+
entry: "voice",
|
|
63
|
+
agentName,
|
|
64
|
+
twilioPhone: twilioPhone.status,
|
|
65
|
+
webhookUrl: twilioPhone.status === "started" ? twilioPhone.settings.webhookUrl : undefined,
|
|
66
|
+
},
|
|
66
67
|
});
|
|
67
68
|
setInterval(() => undefined, 60_000);
|
|
68
69
|
})
|
|
@@ -1,37 +1,4 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
3
|
function readRequiredAgentName() {
|
|
37
4
|
const agentArgIndex = process.argv.indexOf("--agent");
|
|
@@ -42,16 +9,9 @@ function readRequiredAgentName() {
|
|
|
42
9
|
process.exit(1);
|
|
43
10
|
}
|
|
44
11
|
const agentName = readRequiredAgentName();
|
|
45
|
-
const path = __importStar(require("path"));
|
|
46
|
-
const identity_1 = require("../heart/identity");
|
|
47
|
-
const machine_identity_1 = require("../heart/machine-identity");
|
|
48
12
|
const runtime_logging_1 = require("../heart/daemon/runtime-logging");
|
|
49
|
-
const provider_credentials_1 = require("../heart/provider-credentials");
|
|
50
|
-
const runtime_credentials_1 = require("../heart/runtime-credentials");
|
|
51
13
|
const runtime_1 = require("../nerves/runtime");
|
|
52
|
-
const
|
|
53
|
-
const whisper_1 = require("./voice/whisper");
|
|
54
|
-
const twilio_phone_1 = require("./voice/twilio-phone");
|
|
14
|
+
const voice_1 = require("./voice");
|
|
55
15
|
function argValue(name) {
|
|
56
16
|
const index = process.argv.indexOf(name);
|
|
57
17
|
if (index < 0)
|
|
@@ -59,45 +19,6 @@ function argValue(name) {
|
|
|
59
19
|
const value = process.argv[index + 1];
|
|
60
20
|
return value && !value.startsWith("--") ? value : undefined;
|
|
61
21
|
}
|
|
62
|
-
function asRecord(value) {
|
|
63
|
-
return value && typeof value === "object" && !Array.isArray(value) ? value : null;
|
|
64
|
-
}
|
|
65
|
-
function configString(config, dottedPath) {
|
|
66
|
-
let cursor = config;
|
|
67
|
-
for (const segment of dottedPath.split(".")) {
|
|
68
|
-
const record = asRecord(cursor);
|
|
69
|
-
if (!record)
|
|
70
|
-
return undefined;
|
|
71
|
-
cursor = record[segment];
|
|
72
|
-
}
|
|
73
|
-
return typeof cursor === "string" && cursor.trim() ? cursor.trim() : undefined;
|
|
74
|
-
}
|
|
75
|
-
function configNumber(config, dottedPath) {
|
|
76
|
-
let cursor = config;
|
|
77
|
-
for (const segment of dottedPath.split(".")) {
|
|
78
|
-
const record = asRecord(cursor);
|
|
79
|
-
if (!record)
|
|
80
|
-
return undefined;
|
|
81
|
-
cursor = record[segment];
|
|
82
|
-
}
|
|
83
|
-
if (typeof cursor === "number" && Number.isFinite(cursor))
|
|
84
|
-
return cursor;
|
|
85
|
-
if (typeof cursor === "string" && cursor.trim()) {
|
|
86
|
-
const parsed = Number(cursor);
|
|
87
|
-
return Number.isFinite(parsed) ? parsed : undefined;
|
|
88
|
-
}
|
|
89
|
-
return undefined;
|
|
90
|
-
}
|
|
91
|
-
function requireConfig(result, label) {
|
|
92
|
-
if (result.ok)
|
|
93
|
-
return result.config;
|
|
94
|
-
throw new Error(`${label} unavailable: ${result.error}`);
|
|
95
|
-
}
|
|
96
|
-
function required(value, guidance) {
|
|
97
|
-
if (value)
|
|
98
|
-
return value;
|
|
99
|
-
throw new Error(guidance);
|
|
100
|
-
}
|
|
101
22
|
function numberArg(name) {
|
|
102
23
|
const raw = argValue(name);
|
|
103
24
|
if (!raw)
|
|
@@ -107,31 +28,27 @@ function numberArg(name) {
|
|
|
107
28
|
throw new Error(`${name} must be a number`);
|
|
108
29
|
return parsed;
|
|
109
30
|
}
|
|
110
|
-
function
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
}
|
|
124
|
-
const missing = providers.filter((provider) => !pool.pool.providers[provider]);
|
|
125
|
-
if (missing.length > 0) {
|
|
126
|
-
throw new Error(`missing provider credentials for phone voice: ${missing.join(", ")}`);
|
|
127
|
-
}
|
|
31
|
+
function standaloneOverrides() {
|
|
32
|
+
return {
|
|
33
|
+
publicBaseUrl: argValue("--public-url"),
|
|
34
|
+
basePath: argValue("--base-path"),
|
|
35
|
+
port: numberArg("--port"),
|
|
36
|
+
host: argValue("--host"),
|
|
37
|
+
outputDir: argValue("--output-dir"),
|
|
38
|
+
defaultFriendId: argValue("--friend"),
|
|
39
|
+
elevenLabsVoiceId: argValue("--elevenlabs-voice-id"),
|
|
40
|
+
whisperCliPath: argValue("--whisper-cli-path"),
|
|
41
|
+
whisperModelPath: argValue("--whisper-model-path"),
|
|
42
|
+
recordTimeoutSeconds: numberArg("--record-timeout"),
|
|
43
|
+
recordMaxLengthSeconds: numberArg("--record-max-length"),
|
|
44
|
+
};
|
|
128
45
|
}
|
|
129
|
-
function writeReadyInstructions(localUrl, publicBaseUrl) {
|
|
46
|
+
function writeReadyInstructions(localUrl, publicBaseUrl, webhookUrl) {
|
|
130
47
|
process.stdout.write([
|
|
131
48
|
"Twilio phone voice bridge ready.",
|
|
132
49
|
`local: ${localUrl}`,
|
|
133
50
|
`public: ${publicBaseUrl}`,
|
|
134
|
-
`Twilio Voice webhook: POST ${
|
|
51
|
+
`Twilio Voice webhook: POST ${webhookUrl}`,
|
|
135
52
|
"",
|
|
136
53
|
].join("\n"));
|
|
137
54
|
}
|
|
@@ -143,65 +60,16 @@ function writeReadyInstructions(localUrl, publicBaseUrl) {
|
|
|
143
60
|
meta: { entry: "voice-twilio", agentName },
|
|
144
61
|
});
|
|
145
62
|
async function main() {
|
|
146
|
-
await (0,
|
|
147
|
-
const machine = (0, machine_identity_1.loadOrCreateMachineIdentity)();
|
|
148
|
-
await Promise.all([
|
|
149
|
-
(0, runtime_credentials_1.refreshRuntimeCredentialConfig)(agentName, { preserveCachedOnFailure: true }).catch(() => undefined),
|
|
150
|
-
(0, runtime_credentials_1.refreshMachineRuntimeCredentialConfig)(agentName, machine.machineId, { preserveCachedOnFailure: true }).catch(() => undefined),
|
|
151
|
-
]);
|
|
152
|
-
await cacheSelectedProviderCredentials(agentName);
|
|
153
|
-
const runtimeConfig = requireConfig((0, runtime_credentials_1.readRuntimeCredentialConfig)(agentName), "portable runtime/config");
|
|
154
|
-
const machineConfig = requireConfig((0, runtime_credentials_1.readMachineRuntimeCredentialConfig)(agentName), "machine runtime config");
|
|
155
|
-
const port = numberArg("--port")
|
|
156
|
-
?? configNumber(machineConfig, "voice.twilioPort")
|
|
157
|
-
?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PORT;
|
|
158
|
-
const host = argValue("--host")
|
|
159
|
-
?? configString(machineConfig, "voice.twilioHost")
|
|
160
|
-
?? "127.0.0.1";
|
|
161
|
-
const publicBaseUrl = required(argValue("--public-url") ?? configString(machineConfig, "voice.twilioPublicUrl"), `missing public URL; run 'cloudflared tunnel --url http://127.0.0.1:${port}' and restart with --public-url https://<tunnel>`);
|
|
162
|
-
const elevenLabsApiKey = required(configString(runtimeConfig, "integrations.elevenLabsApiKey"), "missing integrations.elevenLabsApiKey; run 'ouro connect voice --agent <agent>' for setup guidance");
|
|
163
|
-
const elevenLabsVoiceId = required(argValue("--elevenlabs-voice-id")
|
|
164
|
-
?? configString(runtimeConfig, "integrations.elevenLabsVoiceId")
|
|
165
|
-
?? configString(runtimeConfig, "voice.elevenLabsVoiceId"), "missing integrations.elevenLabsVoiceId; save the ElevenLabs voice ID before starting phone voice");
|
|
166
|
-
const whisperCliPath = required(configString(machineConfig, "voice.whisperCliPath"), "missing voice.whisperCliPath in this machine's runtime config");
|
|
167
|
-
const whisperModelPath = required(configString(machineConfig, "voice.whisperModelPath"), "missing voice.whisperModelPath in this machine's runtime config");
|
|
168
|
-
const outputDir = argValue("--output-dir")
|
|
169
|
-
?? configString(machineConfig, "voice.twilioOutputDir")
|
|
170
|
-
?? path.join((0, identity_1.getAgentRoot)(agentName), "state", "voice", "twilio-phone");
|
|
171
|
-
const defaultFriendId = argValue("--friend")
|
|
172
|
-
?? configString(machineConfig, "voice.twilioDefaultFriendId");
|
|
173
|
-
const twilioAccountSid = configString(runtimeConfig, "voice.twilioAccountSid");
|
|
174
|
-
const twilioAuthToken = configString(runtimeConfig, "voice.twilioAuthToken");
|
|
175
|
-
const recordTimeoutSeconds = numberArg("--record-timeout")
|
|
176
|
-
?? configNumber(machineConfig, "voice.twilioRecordTimeoutSeconds")
|
|
177
|
-
?? twilio_phone_1.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS;
|
|
178
|
-
const recordMaxLengthSeconds = numberArg("--record-max-length")
|
|
179
|
-
?? configNumber(machineConfig, "voice.twilioRecordMaxLengthSeconds")
|
|
180
|
-
?? twilio_phone_1.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS;
|
|
181
|
-
const transcriber = (0, whisper_1.createWhisperCppTranscriber)({
|
|
182
|
-
whisperCliPath,
|
|
183
|
-
modelPath: whisperModelPath,
|
|
184
|
-
});
|
|
185
|
-
const tts = (0, elevenlabs_1.createElevenLabsTtsClient)({
|
|
186
|
-
apiKey: elevenLabsApiKey,
|
|
187
|
-
voiceId: elevenLabsVoiceId,
|
|
188
|
-
outputFormat: "mp3_44100_128",
|
|
189
|
-
});
|
|
190
|
-
const bridge = await (0, twilio_phone_1.startTwilioPhoneBridgeServer)({
|
|
63
|
+
const transport = await (0, voice_1.startConfiguredTwilioPhoneTransport)({
|
|
191
64
|
agentName,
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
tts,
|
|
196
|
-
port,
|
|
197
|
-
host,
|
|
198
|
-
twilioAccountSid,
|
|
199
|
-
twilioAuthToken,
|
|
200
|
-
defaultFriendId,
|
|
201
|
-
recordTimeoutSeconds,
|
|
202
|
-
recordMaxLengthSeconds,
|
|
65
|
+
overrides: standaloneOverrides(),
|
|
66
|
+
defaultBasePath: voice_1.TWILIO_PHONE_WEBHOOK_BASE_PATH,
|
|
67
|
+
requirePublicUrl: true,
|
|
203
68
|
});
|
|
204
|
-
|
|
69
|
+
if (transport.status !== "started") {
|
|
70
|
+
throw new Error(`Twilio phone voice transport did not start: ${transport.reason}`);
|
|
71
|
+
}
|
|
72
|
+
writeReadyInstructions(transport.bridge.localUrl, transport.settings.publicBaseUrl, transport.settings.webhookUrl);
|
|
205
73
|
}
|
|
206
74
|
main().catch((error) => {
|
|
207
75
|
(0, runtime_1.emitNervesEvent)({
|