@ouro.bot/cli 0.1.0-alpha.563 → 0.1.0-alpha.565

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json CHANGED
@@ -1,6 +1,22 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.565",
6
+ "changes": [
7
+ "Voice shared-turn readback now recovers tool-required `settle` and `speak` deliveries from acknowledged tool calls instead of treating null assistant content as an empty response.",
8
+ "Twilio phone calls now start a real agent voice turn for the call-connected greeting, so the agent greets through the voice channel rather than a hardcoded transport prompt.",
9
+ "Twilio phone no-speech transcripts such as `[BLANK_AUDIO]` now become agent-authored voice reprompts instead of being passed through as caller speech."
10
+ ]
11
+ },
12
+ {
13
+ "version": "0.1.0-alpha.564",
14
+ "changes": [
15
+ "Voice now keeps Twilio phone as a transport under the canonical `voice` sense, with configurable agent-scoped webhook paths such as `/voice/agents/slugger/twilio/incoming`.",
16
+ "The managed Voice entrypoint now starts the Twilio phone bridge when this machine has a public voice URL, sharing the same Whisper.cpp STT, voice session, and ElevenLabs TTS code as the standalone local smoke bridge.",
17
+ "Voice setup docs, daemon status, connect guidance, and tests now cover managed Twilio transport readiness while keeping the one-off standalone Twilio bridge available for local smoke testing."
18
+ ]
19
+ },
4
20
  {
5
21
  "version": "0.1.0-alpha.563",
6
22
  "changes": [
@@ -4274,6 +4274,7 @@ function connectMenuTarget(answer) {
4274
4274
  return "cancel";
4275
4275
  }
4276
4276
  async function executeConnectVoice(agent, deps) {
4277
+ const agentPathSegment = agent.trim().toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "agent";
4277
4278
  const message = [
4278
4279
  `Voice foundation for ${agent}`,
4279
4280
  "Configure portable ElevenLabs settings with:",
@@ -4282,15 +4283,16 @@ async function executeConnectVoice(agent, deps) {
4282
4283
  "Configure this machine's Whisper.cpp attachment with:",
4283
4284
  ` ouro vault config set --agent ${agent} --scope machine --key voice.whisperCliPath`,
4284
4285
  ` ouro vault config set --agent ${agent} --scope machine --key voice.whisperModelPath`,
4285
- "Optional Twilio phone testing setup:",
4286
+ "Optional managed Twilio phone transport setup:",
4286
4287
  ` ouro vault config set --agent ${agent} --key voice.twilioAccountSid`,
4287
4288
  ` ouro vault config set --agent ${agent} --key voice.twilioAuthToken`,
4288
4289
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPublicUrl`,
4290
+ ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioBasePath --value /voice/agents/${agentPathSegment}/twilio`,
4289
4291
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioPort --value 18910`,
4290
4292
  ` ouro vault config set --agent ${agent} --scope machine --key voice.twilioDefaultFriendId --value ari`,
4291
- ` node dist/senses/voice-twilio-entry.js --agent ${agent} --port 18910 --public-url https://<cloudflare-tunnel>`,
4292
- `Set the Twilio number's Voice webhook to POST https://<cloudflare-tunnel>/voice/twilio/incoming.`,
4293
4293
  "Then enable agent.json: senses.voice.enabled = true and restart with `ouro up`.",
4294
+ `The managed Voice entrypoint will listen at POST <public-url>/voice/agents/${agentPathSegment}/twilio/incoming.`,
4295
+ `Standalone local smoke remains available with: node dist/senses/voice-twilio-entry.js --agent ${agent} --port 18910 --public-url https://<cloudflare-tunnel>.`,
4294
4296
  "Meeting links use URL intake plus BlackHole/Multi-Output readiness checks. Phone testing uses Twilio Record -> Whisper.cpp -> voice session -> ElevenLabs -> Twilio Play.",
4295
4297
  ].join("\n");
4296
4298
  deps.writeStdout(message);
@@ -204,8 +204,14 @@ function senseFactsFromRuntimeConfig(agent, senses, runtimeConfig, machineRuntim
204
204
  missing.push("voice.whisperCliPath");
205
205
  if (!textField(voice, "whisperModelPath"))
206
206
  missing.push("voice.whisperModelPath");
207
+ const twilioPublicUrl = textField(voice, "twilioPublicUrl");
207
208
  base.voice = missing.length === 0
208
- ? { configured: true, detail: "local Whisper.cpp STT + ElevenLabs TTS" }
209
+ ? {
210
+ configured: true,
211
+ detail: twilioPublicUrl
212
+ ? "local Whisper.cpp STT + ElevenLabs TTS; Twilio phone transport attached"
213
+ : "local Whisper.cpp STT + ElevenLabs TTS",
214
+ }
209
215
  : {
210
216
  configured: false,
211
217
  optional: !machineRuntimeConfig.ok && machineRuntimeConfig.reason === "missing",
@@ -61,6 +61,10 @@ const pipeline_1 = require("./pipeline");
61
61
  const mcp_manager_1 = require("../repertoire/mcp-manager");
62
62
  const runtime_1 = require("../nerves/runtime");
63
63
  const RESPONSE_CAP = 50_000;
64
+ const DELIVERY_TOOL_ACKS = new Map([
65
+ ["settle", "(delivered)"],
66
+ ["speak", "(spoken)"],
67
+ ]);
64
68
  /**
65
69
  * Strip MiniMax-style `<think>...</think>` reasoning blocks from a response
66
70
  * string. Handles unclosed open tags (treats everything from `<think>` to
@@ -84,6 +88,78 @@ function stripThinkBlocks(input) {
84
88
  }
85
89
  return out.trim();
86
90
  }
91
+ function assistantContentText(content) {
92
+ if (typeof content !== "string")
93
+ return null;
94
+ const trimmed = content.trim();
95
+ return trimmed ? trimmed : null;
96
+ }
97
+ function parseToolStringArg(toolCall, toolName, argName) {
98
+ if (!toolCall || typeof toolCall !== "object")
99
+ return null;
100
+ const fn = toolCall.function;
101
+ if (fn?.name !== toolName || typeof fn.arguments !== "string")
102
+ return null;
103
+ try {
104
+ const parsed = JSON.parse(fn.arguments);
105
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed))
106
+ return null;
107
+ const value = parsed[argName];
108
+ if (typeof value !== "string")
109
+ return null;
110
+ const trimmed = value.trim();
111
+ return trimmed ? trimmed : null;
112
+ }
113
+ catch {
114
+ return null;
115
+ }
116
+ }
117
+ function hasDeliveredToolResult(messages, assistantIndex, toolCallId, toolName) {
118
+ if (typeof toolCallId !== "string" || !toolCallId.trim())
119
+ return false;
120
+ const expectedAck = DELIVERY_TOOL_ACKS.get(toolName);
121
+ for (let index = assistantIndex + 1; index < messages.length; index++) {
122
+ const message = messages[index];
123
+ if (message.role !== "tool")
124
+ return false;
125
+ if (message.tool_call_id === toolCallId
126
+ && typeof message.content === "string"
127
+ && message.content.trim() === expectedAck) {
128
+ return true;
129
+ }
130
+ }
131
+ return false;
132
+ }
133
+ function deliveredTextFromAssistantTools(messages, assistantIndex) {
134
+ const assistant = messages[assistantIndex];
135
+ if (!Array.isArray(assistant.tool_calls))
136
+ return null;
137
+ const delivered = [];
138
+ for (let index = 0; index < assistant.tool_calls.length; index++) {
139
+ const toolCall = assistant.tool_calls[index];
140
+ const toolCallId = toolCall && typeof toolCall === "object"
141
+ ? toolCall.id
142
+ : undefined;
143
+ const settleAnswer = parseToolStringArg(toolCall, "settle", "answer");
144
+ if (settleAnswer && hasDeliveredToolResult(messages, assistantIndex, toolCallId, "settle")) {
145
+ delivered.push(settleAnswer);
146
+ continue;
147
+ }
148
+ const spokenMessage = parseToolStringArg(toolCall, "speak", "message");
149
+ if (spokenMessage && hasDeliveredToolResult(messages, assistantIndex, toolCallId, "speak")) {
150
+ delivered.push(spokenMessage);
151
+ }
152
+ }
153
+ return delivered.length > 0 ? delivered.join("\n") : null;
154
+ }
155
+ function responseFromSessionMessages(messages) {
156
+ const assistantIndex = messages.findLastIndex((message) => message.role === "assistant");
157
+ if (assistantIndex < 0)
158
+ return null;
159
+ const assistant = messages[assistantIndex];
160
+ return assistantContentText(assistant.content)
161
+ ?? deliveredTextFromAssistantTools(messages, assistantIndex);
162
+ }
87
163
  /**
88
164
  * Run a single agent turn through the inbound pipeline.
89
165
  * Caller provides channel, session key, friend, and message;
@@ -203,13 +279,8 @@ async function runSenseTurn(options) {
203
279
  await persistPromise;
204
280
  const postTurnSession = (0, context_1.loadSession)(sessPath);
205
281
  if (postTurnSession?.messages) {
206
- const lastAssistant = [...postTurnSession.messages].reverse().find(m => m.role === "assistant");
207
- if (lastAssistant && typeof lastAssistant.content === "string" && lastAssistant.content.trim()) {
208
- finalResponse = lastAssistant.content;
209
- }
210
- else {
211
- finalResponse = "(agent responded but response was empty)";
212
- }
282
+ finalResponse = responseFromSessionMessages(postTurnSession.messages)
283
+ ?? "(agent responded but response was empty)";
213
284
  }
214
285
  else {
215
286
  finalResponse = "(agent responded but response was empty)";
@@ -25,3 +25,4 @@ __exportStar(require("./audio-routing"), exports);
25
25
  __exportStar(require("./playback"), exports);
26
26
  __exportStar(require("./golden-path"), exports);
27
27
  __exportStar(require("./twilio-phone"), exports);
28
+ __exportStar(require("./twilio-phone-runtime"), exports);
@@ -0,0 +1,282 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.agentScopedTwilioPhoneBasePath = agentScopedTwilioPhoneBasePath;
37
+ exports.resolveTwilioPhoneTransportRuntime = resolveTwilioPhoneTransportRuntime;
38
+ exports.startConfiguredTwilioPhoneTransport = startConfiguredTwilioPhoneTransport;
39
+ exports.closeTwilioPhoneBridgeServer = closeTwilioPhoneBridgeServer;
40
+ const path = __importStar(require("path"));
41
+ const identity_1 = require("../../heart/identity");
42
+ const machine_identity_1 = require("../../heart/machine-identity");
43
+ const provider_credentials_1 = require("../../heart/provider-credentials");
44
+ const runtime_credentials_1 = require("../../heart/runtime-credentials");
45
+ const runtime_1 = require("../../nerves/runtime");
46
+ const elevenlabs_1 = require("./elevenlabs");
47
+ const twilio_phone_1 = require("./twilio-phone");
48
+ const whisper_1 = require("./whisper");
49
+ function asRecord(value) {
50
+ return value && typeof value === "object" && !Array.isArray(value) ? value : null;
51
+ }
52
+ function configString(config, dottedPath) {
53
+ let cursor = config;
54
+ for (const segment of dottedPath.split(".")) {
55
+ const record = asRecord(cursor);
56
+ if (!record)
57
+ return undefined;
58
+ cursor = record[segment];
59
+ }
60
+ return typeof cursor === "string" && cursor.trim() ? cursor.trim() : undefined;
61
+ }
62
+ function configNumber(config, dottedPath) {
63
+ let cursor = config;
64
+ for (const segment of dottedPath.split(".")) {
65
+ const record = asRecord(cursor);
66
+ if (!record)
67
+ return undefined;
68
+ cursor = record[segment];
69
+ }
70
+ if (typeof cursor === "number" && Number.isFinite(cursor))
71
+ return cursor;
72
+ if (typeof cursor === "string" && cursor.trim()) {
73
+ const parsed = Number(cursor);
74
+ return Number.isFinite(parsed) ? parsed : undefined;
75
+ }
76
+ return undefined;
77
+ }
78
+ function configBoolean(config, dottedPath) {
79
+ let cursor = config;
80
+ for (const segment of dottedPath.split(".")) {
81
+ const record = asRecord(cursor);
82
+ if (!record)
83
+ return undefined;
84
+ cursor = record[segment];
85
+ }
86
+ if (typeof cursor === "boolean")
87
+ return cursor;
88
+ if (typeof cursor === "string") {
89
+ const normalized = cursor.trim().toLowerCase();
90
+ if (normalized === "true")
91
+ return true;
92
+ if (normalized === "false")
93
+ return false;
94
+ }
95
+ return undefined;
96
+ }
97
+ function requireConfig(result, label) {
98
+ if (result.ok)
99
+ return result.config;
100
+ throw new Error(`${label} unavailable: ${result.error}`);
101
+ }
102
+ function required(value, guidance) {
103
+ if (value)
104
+ return value;
105
+ throw new Error(guidance);
106
+ }
107
+ function selectedAgentProviders(config) {
108
+ const providers = new Set();
109
+ providers.add(config.humanFacing.provider);
110
+ providers.add(config.agentFacing.provider);
111
+ if (config.provider)
112
+ providers.add(config.provider);
113
+ return [...providers];
114
+ }
115
+ async function cacheSelectedProviderCredentials(agentName) {
116
+ const providers = selectedAgentProviders((0, identity_1.loadAgentConfig)());
117
+ const pool = await (0, provider_credentials_1.refreshProviderCredentialPool)(agentName, { providers });
118
+ if (!pool.ok) {
119
+ throw new Error(`provider credentials unavailable for phone voice: ${pool.error}`);
120
+ }
121
+ const missing = providers.filter((provider) => !pool.pool.providers[provider]);
122
+ if (missing.length > 0) {
123
+ throw new Error(`missing provider credentials for phone voice: ${missing.join(", ")}`);
124
+ }
125
+ }
126
+ function agentPathSegment(agentName) {
127
+ return agentName.trim().toLowerCase().replace(/[^a-z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "agent";
128
+ }
129
+ function trimOptional(value) {
130
+ return value?.trim() || undefined;
131
+ }
132
+ function agentScopedTwilioPhoneBasePath(agentName) {
133
+ return `/voice/agents/${agentPathSegment(agentName)}/twilio`;
134
+ }
135
+ function resolveTwilioPhoneTransportRuntime(options) {
136
+ const overrides = options.overrides ?? {};
137
+ const configuredPublicBaseUrl = trimOptional(overrides.publicBaseUrl)
138
+ ?? configString(options.machineConfig, "voice.twilioPublicUrl");
139
+ const explicitEnabled = overrides.enabled ?? configBoolean(options.machineConfig, "voice.twilioEnabled");
140
+ if (!configuredPublicBaseUrl && options.requirePublicUrl) {
141
+ throw new Error("missing voice.twilioPublicUrl in this machine's runtime config");
142
+ }
143
+ const enabled = explicitEnabled ?? !!configuredPublicBaseUrl;
144
+ if (!enabled) {
145
+ return { status: "disabled", reason: "voice.twilioPublicUrl is not configured" };
146
+ }
147
+ if (!configuredPublicBaseUrl) {
148
+ throw new Error("missing voice.twilioPublicUrl in this machine's runtime config");
149
+ }
150
+ const publicUrl = new URL(configuredPublicBaseUrl);
151
+ if (publicUrl.protocol !== "https:") {
152
+ throw new Error("voice.twilioPublicUrl must be an https URL");
153
+ }
154
+ const publicBaseUrl = publicUrl.toString();
155
+ const basePath = (0, twilio_phone_1.normalizeTwilioPhoneBasePath)(overrides.basePath
156
+ ?? configString(options.machineConfig, "voice.twilioBasePath")
157
+ ?? options.defaultBasePath
158
+ ?? twilio_phone_1.TWILIO_PHONE_WEBHOOK_BASE_PATH);
159
+ const elevenLabsApiKey = required(configString(options.runtimeConfig, "integrations.elevenLabsApiKey"), "missing integrations.elevenLabsApiKey; run 'ouro connect voice --agent <agent>' for setup guidance");
160
+ const elevenLabsVoiceId = required(trimOptional(overrides.elevenLabsVoiceId)
161
+ ?? configString(options.runtimeConfig, "integrations.elevenLabsVoiceId")
162
+ ?? configString(options.runtimeConfig, "voice.elevenLabsVoiceId"), "missing integrations.elevenLabsVoiceId; save the ElevenLabs voice ID before starting phone voice");
163
+ const whisperCliPath = required(trimOptional(overrides.whisperCliPath)
164
+ ?? configString(options.machineConfig, "voice.whisperCliPath"), "missing voice.whisperCliPath in this machine's runtime config");
165
+ const whisperModelPath = required(trimOptional(overrides.whisperModelPath)
166
+ ?? configString(options.machineConfig, "voice.whisperModelPath"), "missing voice.whisperModelPath in this machine's runtime config");
167
+ const outputDir = trimOptional(overrides.outputDir)
168
+ ?? configString(options.machineConfig, "voice.twilioOutputDir")
169
+ ?? path.join((0, identity_1.getAgentRoot)(options.agentName), "state", "voice", "twilio-phone");
170
+ const settings = {
171
+ agentName: options.agentName,
172
+ publicBaseUrl,
173
+ basePath,
174
+ webhookUrl: (0, twilio_phone_1.twilioPhoneWebhookUrl)(publicBaseUrl, basePath),
175
+ outputDir,
176
+ port: overrides.port
177
+ ?? configNumber(options.machineConfig, "voice.twilioPort")
178
+ ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PORT,
179
+ host: trimOptional(overrides.host)
180
+ ?? configString(options.machineConfig, "voice.twilioHost")
181
+ ?? "127.0.0.1",
182
+ elevenLabsApiKey,
183
+ elevenLabsVoiceId,
184
+ whisperCliPath,
185
+ whisperModelPath,
186
+ twilioAccountSid: configString(options.runtimeConfig, "voice.twilioAccountSid"),
187
+ twilioAuthToken: configString(options.runtimeConfig, "voice.twilioAuthToken"),
188
+ defaultFriendId: trimOptional(overrides.defaultFriendId)
189
+ ?? configString(options.machineConfig, "voice.twilioDefaultFriendId"),
190
+ recordTimeoutSeconds: overrides.recordTimeoutSeconds
191
+ ?? configNumber(options.machineConfig, "voice.twilioRecordTimeoutSeconds")
192
+ ?? twilio_phone_1.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
193
+ recordMaxLengthSeconds: overrides.recordMaxLengthSeconds
194
+ ?? configNumber(options.machineConfig, "voice.twilioRecordMaxLengthSeconds")
195
+ ?? twilio_phone_1.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
196
+ };
197
+ return { status: "configured", settings };
198
+ }
199
+ const defaultTwilioPhoneTransportRuntimeDeps = {
200
+ waitForRuntimeCredentialBootstrap: runtime_credentials_1.waitForRuntimeCredentialBootstrap,
201
+ loadMachineIdentity: machine_identity_1.loadOrCreateMachineIdentity,
202
+ refreshRuntimeConfig: runtime_credentials_1.refreshRuntimeCredentialConfig,
203
+ refreshMachineRuntimeConfig: runtime_credentials_1.refreshMachineRuntimeCredentialConfig,
204
+ readRuntimeConfig: runtime_credentials_1.readRuntimeCredentialConfig,
205
+ readMachineRuntimeConfig: runtime_credentials_1.readMachineRuntimeCredentialConfig,
206
+ cacheSelectedProviderCredentials,
207
+ createTranscriber: whisper_1.createWhisperCppTranscriber,
208
+ createTts: elevenlabs_1.createElevenLabsTtsClient,
209
+ startBridgeServer: twilio_phone_1.startTwilioPhoneBridgeServer,
210
+ };
211
+ async function startConfiguredTwilioPhoneTransport(options, deps = defaultTwilioPhoneTransportRuntimeDeps) {
212
+ await deps.waitForRuntimeCredentialBootstrap(options.agentName);
213
+ const machine = deps.loadMachineIdentity();
214
+ await Promise.all([
215
+ deps.refreshRuntimeConfig(options.agentName, { preserveCachedOnFailure: true }).catch(() => undefined),
216
+ deps.refreshMachineRuntimeConfig(options.agentName, machine.machineId, { preserveCachedOnFailure: true }).catch(() => undefined),
217
+ ]);
218
+ const runtimeConfig = requireConfig(deps.readRuntimeConfig(options.agentName), "portable runtime/config");
219
+ const machineConfig = requireConfig(deps.readMachineRuntimeConfig(options.agentName), "machine runtime config");
220
+ const resolution = resolveTwilioPhoneTransportRuntime({
221
+ agentName: options.agentName,
222
+ runtimeConfig,
223
+ machineConfig,
224
+ overrides: options.overrides,
225
+ defaultBasePath: options.defaultBasePath,
226
+ requirePublicUrl: options.requirePublicUrl,
227
+ });
228
+ if (resolution.status === "disabled") {
229
+ (0, runtime_1.emitNervesEvent)({
230
+ component: "senses",
231
+ event: "senses.voice_twilio_transport_disabled",
232
+ message: "Twilio phone voice transport is not attached on this machine",
233
+ meta: { agentName: options.agentName, reason: resolution.reason },
234
+ });
235
+ return resolution;
236
+ }
237
+ await deps.cacheSelectedProviderCredentials(options.agentName);
238
+ const settings = resolution.settings;
239
+ const transcriber = deps.createTranscriber({
240
+ whisperCliPath: settings.whisperCliPath,
241
+ modelPath: settings.whisperModelPath,
242
+ });
243
+ const tts = deps.createTts({
244
+ apiKey: settings.elevenLabsApiKey,
245
+ voiceId: settings.elevenLabsVoiceId,
246
+ outputFormat: "mp3_44100_128",
247
+ });
248
+ const bridge = await deps.startBridgeServer({
249
+ agentName: settings.agentName,
250
+ publicBaseUrl: settings.publicBaseUrl,
251
+ basePath: settings.basePath,
252
+ outputDir: settings.outputDir,
253
+ transcriber,
254
+ tts,
255
+ port: settings.port,
256
+ host: settings.host,
257
+ twilioAccountSid: settings.twilioAccountSid,
258
+ twilioAuthToken: settings.twilioAuthToken,
259
+ defaultFriendId: settings.defaultFriendId,
260
+ recordTimeoutSeconds: settings.recordTimeoutSeconds,
261
+ recordMaxLengthSeconds: settings.recordMaxLengthSeconds,
262
+ });
263
+ (0, runtime_1.emitNervesEvent)({
264
+ component: "senses",
265
+ event: "senses.voice_twilio_transport_ready",
266
+ message: "Twilio phone voice transport is ready",
267
+ meta: {
268
+ agentName: settings.agentName,
269
+ localUrl: bridge.localUrl,
270
+ publicBaseUrl: settings.publicBaseUrl,
271
+ basePath: settings.basePath,
272
+ webhookUrl: settings.webhookUrl,
273
+ },
274
+ });
275
+ return { status: "started", settings, bridge };
276
+ }
277
+ function closeTwilioPhoneBridgeServer(server) {
278
+ return new Promise((resolve, reject) => {
279
+ ;
280
+ server.server.close((error) => error ? reject(error) : resolve());
281
+ });
282
+ }
@@ -34,6 +34,8 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.TWILIO_PHONE_WEBHOOK_BASE_PATH = exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS = exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = exports.DEFAULT_TWILIO_PHONE_PORT = void 0;
37
+ exports.normalizeTwilioPhoneBasePath = normalizeTwilioPhoneBasePath;
38
+ exports.twilioPhoneWebhookUrl = twilioPhoneWebhookUrl;
37
39
  exports.computeTwilioSignature = computeTwilioSignature;
38
40
  exports.validateTwilioSignature = validateTwilioSignature;
39
41
  exports.twilioRecordingMediaUrl = twilioRecordingMediaUrl;
@@ -46,6 +48,7 @@ const http = __importStar(require("http"));
46
48
  const path = __importStar(require("path"));
47
49
  const runtime_1 = require("../../nerves/runtime");
48
50
  const playback_1 = require("./playback");
51
+ const transcript_1 = require("./transcript");
49
52
  const turn_1 = require("./turn");
50
53
  exports.DEFAULT_TWILIO_PHONE_PORT = 18910;
51
54
  exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS = 2;
@@ -112,14 +115,29 @@ function escapeXml(input) {
112
115
  function routeUrl(publicBaseUrl, route) {
113
116
  return new URL(route, publicBaseUrl).toString();
114
117
  }
118
+ function normalizeTwilioPhoneBasePath(value = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH) {
119
+ const trimmed = value.trim();
120
+ const withLeadingSlash = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
121
+ const withoutTrailingSlash = withLeadingSlash.replace(/\/+$/, "");
122
+ if (!withoutTrailingSlash || withoutTrailingSlash === "/") {
123
+ throw new Error("Twilio phone webhook base path is empty");
124
+ }
125
+ if (!/^\/[A-Za-z0-9._~/-]+$/.test(withoutTrailingSlash) || withoutTrailingSlash.includes("//")) {
126
+ throw new Error(`invalid Twilio phone webhook base path: ${value}`);
127
+ }
128
+ return withoutTrailingSlash;
129
+ }
130
+ function twilioPhoneWebhookUrl(publicBaseUrl, basePath = exports.TWILIO_PHONE_WEBHOOK_BASE_PATH) {
131
+ return routeUrl(publicBaseUrl, `${normalizeTwilioPhoneBasePath(basePath)}/incoming`);
132
+ }
115
133
  function requestPublicUrl(publicBaseUrl, requestPath) {
116
134
  return routeUrl(publicBaseUrl, requestPath);
117
135
  }
118
136
  function recordTwiml(options) {
119
- return `<Record action="${escapeXml(routeUrl(options.publicBaseUrl, `${exports.TWILIO_PHONE_WEBHOOK_BASE_PATH}/recording`))}" method="POST" playBeep="false" timeout="${options.timeoutSeconds}" maxLength="${options.maxLengthSeconds}" trim="trim-silence" />`;
137
+ return `<Record action="${escapeXml(routeUrl(options.publicBaseUrl, `${options.basePath}/recording`))}" method="POST" playBeep="false" timeout="${options.timeoutSeconds}" maxLength="${options.maxLengthSeconds}" trim="trim-silence" />`;
120
138
  }
121
- function redirectTwiml(publicBaseUrl) {
122
- return `<Redirect method="POST">${escapeXml(routeUrl(publicBaseUrl, `${exports.TWILIO_PHONE_WEBHOOK_BASE_PATH}/listen`))}</Redirect>`;
139
+ function redirectTwiml(publicBaseUrl, basePath) {
140
+ return `<Redirect method="POST">${escapeXml(routeUrl(publicBaseUrl, `${basePath}/listen`))}</Redirect>`;
123
141
  }
124
142
  function sayTwiml(message) {
125
143
  return `<Say>${escapeXml(message)}</Say>`;
@@ -158,6 +176,34 @@ function friendIdFromCaller(from, callSid) {
158
176
  const phoneish = from.replace(/[^0-9A-Za-z]+/g, "");
159
177
  return phoneish ? `twilio-${phoneish}` : `twilio-${safeSegment(callSid)}`;
160
178
  }
179
+ function voiceFriendId(options, from, callSid) {
180
+ return options.defaultFriendId?.trim() || friendIdFromCaller(from, callSid);
181
+ }
182
+ function callConnectedPrompt(params) {
183
+ const from = params.From?.trim();
184
+ const to = params.To?.trim();
185
+ return [
186
+ "A Twilio phone voice call just connected.",
187
+ "This is the first audible turn in the call.",
188
+ from ? `Twilio caller ID: ${from}.` : "Twilio did not provide caller ID.",
189
+ to ? `Dialed line: ${to}.` : "Twilio did not provide the dialed line.",
190
+ "Respond through the voice channel as yourself. Greet the caller naturally and briefly, then invite them to speak.",
191
+ ].join("\n");
192
+ }
193
+ function noSpeechPrompt() {
194
+ return [
195
+ "The last Twilio phone recording contained no intelligible speech.",
196
+ "The caller is still on the line.",
197
+ "Respond through the voice channel as yourself. Briefly ask them to try again or check whether they are there.",
198
+ ].join("\n");
199
+ }
200
+ function isNoSpeechTranscript(text) {
201
+ const normalized = text.trim().replace(/[.!?]+$/g, "").toUpperCase();
202
+ return normalized === "[BLANK_AUDIO]"
203
+ || normalized === "BLANK_AUDIO"
204
+ || normalized === "[NO_SPEECH]"
205
+ || normalized === "NO_SPEECH";
206
+ }
161
207
  function parseRecordingParams(params) {
162
208
  const callSid = params.CallSid?.trim();
163
209
  const recordingSid = params.RecordingSid?.trim();
@@ -171,16 +217,53 @@ function parseRecordingParams(params) {
171
217
  from: params.From?.trim() ?? "",
172
218
  };
173
219
  }
174
- function recordAgainResponse(publicBaseUrl, message) {
220
+ function recordAgainResponse(options, basePath, message) {
175
221
  return xmlResponse(`${sayTwiml(message)}${recordTwiml({
176
- publicBaseUrl,
177
- timeoutSeconds: exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
178
- maxLengthSeconds: exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
222
+ publicBaseUrl: options.publicBaseUrl,
223
+ basePath,
224
+ timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
225
+ maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
179
226
  })}`);
180
227
  }
181
228
  function errorMessage(error) {
182
229
  return error instanceof Error ? error.message : String(error);
183
230
  }
231
+ function nextInputTwiml(options, basePath, mode) {
232
+ if (mode === "redirect")
233
+ return redirectTwiml(options.publicBaseUrl, basePath);
234
+ return recordTwiml({
235
+ publicBaseUrl: options.publicBaseUrl,
236
+ basePath,
237
+ timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
238
+ maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
239
+ });
240
+ }
241
+ async function runPhonePromptTurn(options) {
242
+ const transcript = (0, transcript_1.buildVoiceTranscript)({
243
+ utteranceId: options.utteranceId,
244
+ text: options.promptText,
245
+ source: "loopback",
246
+ });
247
+ const turn = await (0, turn_1.runVoiceLoopbackTurn)({
248
+ agentName: options.bridgeOptions.agentName,
249
+ friendId: options.friendId,
250
+ sessionKey: options.sessionKey,
251
+ transcript,
252
+ tts: options.bridgeOptions.tts,
253
+ runSenseTurn: options.bridgeOptions.runSenseTurn,
254
+ });
255
+ const after = nextInputTwiml(options.bridgeOptions, options.basePath, options.afterPlayback);
256
+ if (turn.tts.status !== "delivered") {
257
+ return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${after}`);
258
+ }
259
+ const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
260
+ utteranceId: options.utteranceId,
261
+ delivery: turn.tts,
262
+ outputDir: options.callDir,
263
+ });
264
+ const audioUrl = routeUrl(options.bridgeOptions.publicBaseUrl, `${options.basePath}/audio/${encodeURIComponent(options.safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
265
+ return xmlResponse(`${playTwiml(audioUrl)}${after}`);
266
+ }
184
267
  function computeTwilioSignature(input) {
185
268
  const payload = input.url + Object.keys(input.params)
186
269
  .sort()
@@ -226,28 +309,56 @@ function verifyRequest(options, request, params) {
226
309
  signature: headerValue(request.headers, "x-twilio-signature"),
227
310
  });
228
311
  }
229
- async function handleIncoming(options) {
230
- const greeting = options.greetingText ?? "Connected to Ouro voice. Speak after the prompt.";
312
+ async function handleIncoming(options, basePath, params) {
313
+ const callSid = params.CallSid?.trim() || "incoming";
314
+ const safeCallSid = safeSegment(callSid);
315
+ const callDir = path.join(options.outputDir, safeCallSid);
316
+ const utteranceId = `twilio-${safeCallSid}-connected`;
231
317
  (0, runtime_1.emitNervesEvent)({
232
318
  component: "senses",
233
319
  event: "senses.voice_twilio_incoming",
234
320
  message: "Twilio voice call connected",
235
- meta: { agentName: options.agentName },
321
+ meta: { agentName: options.agentName, callSid: safeCallSid },
236
322
  });
237
- return xmlResponse(`${sayTwiml(greeting)}${recordTwiml({
238
- publicBaseUrl: options.publicBaseUrl,
239
- timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
240
- maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
241
- })}`);
323
+ try {
324
+ await fs.mkdir(callDir, { recursive: true });
325
+ return await runPhonePromptTurn({
326
+ bridgeOptions: options,
327
+ basePath,
328
+ callDir,
329
+ safeCallSid,
330
+ utteranceId,
331
+ friendId: voiceFriendId(options, params.From?.trim() ?? "", callSid),
332
+ sessionKey: `twilio-${safeCallSid}`,
333
+ promptText: callConnectedPrompt(params),
334
+ afterPlayback: "record",
335
+ });
336
+ }
337
+ catch (error) {
338
+ (0, runtime_1.emitNervesEvent)({
339
+ level: "error",
340
+ component: "senses",
341
+ event: "senses.voice_twilio_incoming_error",
342
+ message: "Twilio incoming voice greeting turn failed",
343
+ meta: { agentName: options.agentName, callSid: safeCallSid, error: errorMessage(error) },
344
+ });
345
+ return xmlResponse(recordTwiml({
346
+ publicBaseUrl: options.publicBaseUrl,
347
+ basePath,
348
+ timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
349
+ maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
350
+ }));
351
+ }
242
352
  }
243
- async function handleListen(options) {
353
+ async function handleListen(options, basePath) {
244
354
  return xmlResponse(recordTwiml({
245
355
  publicBaseUrl: options.publicBaseUrl,
356
+ basePath,
246
357
  timeoutSeconds: options.recordTimeoutSeconds ?? exports.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS,
247
358
  maxLengthSeconds: options.recordMaxLengthSeconds ?? exports.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS,
248
359
  }));
249
360
  }
250
- async function handleRecording(options, params) {
361
+ async function handleRecording(options, basePath, params) {
251
362
  const recording = parseRecordingParams(params);
252
363
  if (!recording) {
253
364
  (0, runtime_1.emitNervesEvent)({
@@ -257,7 +368,7 @@ async function handleRecording(options, params) {
257
368
  message: "Twilio recording callback was missing required fields",
258
369
  meta: { agentName: options.agentName },
259
370
  });
260
- return recordAgainResponse(options.publicBaseUrl, "I did not receive audio. Please try again.");
371
+ return recordAgainResponse(options, basePath, "I did not receive audio. Please try again.");
261
372
  }
262
373
  const safeCallSid = safeSegment(recording.callSid);
263
374
  const safeRecordingSid = safeSegment(recording.recordingSid);
@@ -284,30 +395,43 @@ async function handleRecording(options, params) {
284
395
  utteranceId,
285
396
  audioPath: inputPath,
286
397
  });
398
+ if (isNoSpeechTranscript(transcript.text)) {
399
+ return await runPhonePromptTurn({
400
+ bridgeOptions: options,
401
+ basePath,
402
+ callDir,
403
+ safeCallSid,
404
+ utteranceId: `${utteranceId}-nospeech`,
405
+ friendId: voiceFriendId(options, recording.from, recording.callSid),
406
+ sessionKey: `twilio-${safeCallSid}`,
407
+ promptText: noSpeechPrompt(),
408
+ afterPlayback: "redirect",
409
+ });
410
+ }
287
411
  const turn = await (0, turn_1.runVoiceLoopbackTurn)({
288
412
  agentName: options.agentName,
289
- friendId: options.defaultFriendId?.trim() || friendIdFromCaller(recording.from, recording.callSid),
413
+ friendId: voiceFriendId(options, recording.from, recording.callSid),
290
414
  sessionKey: `twilio-${safeCallSid}`,
291
415
  transcript,
292
416
  tts: options.tts,
293
417
  runSenseTurn: options.runSenseTurn,
294
418
  });
295
419
  if (turn.tts.status !== "delivered") {
296
- return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${redirectTwiml(options.publicBaseUrl)}`);
420
+ return xmlResponse(`${sayTwiml("voice output failed after the text response was captured.")}${redirectTwiml(options.publicBaseUrl, basePath)}`);
297
421
  }
298
422
  const playback = await (0, playback_1.writeVoicePlaybackArtifact)({
299
423
  utteranceId,
300
424
  delivery: turn.tts,
301
425
  outputDir: callDir,
302
426
  });
303
- const audioUrl = routeUrl(options.publicBaseUrl, `${exports.TWILIO_PHONE_WEBHOOK_BASE_PATH}/audio/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
427
+ const audioUrl = routeUrl(options.publicBaseUrl, `${basePath}/audio/${encodeURIComponent(safeCallSid)}/${encodeURIComponent(path.basename(playback.audioPath))}`);
304
428
  (0, runtime_1.emitNervesEvent)({
305
429
  component: "senses",
306
430
  event: "senses.voice_twilio_turn_end",
307
431
  message: "finished Twilio voice turn",
308
432
  meta: { agentName: options.agentName, callSid: safeCallSid, recordingSid: safeRecordingSid, audioPath: playback.audioPath },
309
433
  });
310
- return xmlResponse(`${playTwiml(audioUrl)}${redirectTwiml(options.publicBaseUrl)}`);
434
+ return xmlResponse(`${playTwiml(audioUrl)}${redirectTwiml(options.publicBaseUrl, basePath)}`);
311
435
  }
312
436
  catch (error) {
313
437
  (0, runtime_1.emitNervesEvent)({
@@ -322,11 +446,11 @@ async function handleRecording(options, params) {
322
446
  error: errorMessage(error),
323
447
  },
324
448
  });
325
- return xmlResponse(`${sayTwiml("I could not process that audio. Please try again.")}${redirectTwiml(options.publicBaseUrl)}`);
449
+ return xmlResponse(`${sayTwiml("I could not process that audio. Please try again.")}${redirectTwiml(options.publicBaseUrl, basePath)}`);
326
450
  }
327
451
  }
328
- async function handleAudio(options, requestPath) {
329
- const prefix = `${exports.TWILIO_PHONE_WEBHOOK_BASE_PATH}/audio/`;
452
+ async function handleAudio(options, basePath, requestPath) {
453
+ const prefix = `${basePath}/audio/`;
330
454
  const pathOnly = requestPath.split("?")[0];
331
455
  const rest = pathOnly.slice(prefix.length);
332
456
  const parts = rest.split("/");
@@ -355,15 +479,16 @@ async function handleAudio(options, requestPath) {
355
479
  }
356
480
  function createTwilioPhoneBridge(options) {
357
481
  new URL(options.publicBaseUrl);
482
+ const basePath = normalizeTwilioPhoneBasePath(options.basePath);
358
483
  return {
359
484
  async handle(request) {
360
485
  const method = request.method.toUpperCase();
361
486
  const requestPath = request.path.startsWith("/") ? request.path : `/${request.path}`;
362
487
  const routePath = requestPath.split("?")[0];
363
- if (method === "GET" && requestPath.startsWith(`${exports.TWILIO_PHONE_WEBHOOK_BASE_PATH}/audio/`)) {
364
- return handleAudio(options, requestPath);
488
+ if (method === "GET" && requestPath.startsWith(`${basePath}/audio/`)) {
489
+ return handleAudio(options, basePath, requestPath);
365
490
  }
366
- if (method === "GET" && routePath === `${exports.TWILIO_PHONE_WEBHOOK_BASE_PATH}/health`) {
491
+ if (method === "GET" && routePath === `${basePath}/health`) {
367
492
  return textResponse(200, "ok");
368
493
  }
369
494
  if (method !== "POST")
@@ -379,12 +504,12 @@ function createTwilioPhoneBridge(options) {
379
504
  });
380
505
  return textResponse(403, "invalid Twilio signature");
381
506
  }
382
- if (routePath === `${exports.TWILIO_PHONE_WEBHOOK_BASE_PATH}/incoming`)
383
- return handleIncoming(options);
384
- if (routePath === `${exports.TWILIO_PHONE_WEBHOOK_BASE_PATH}/listen`)
385
- return handleListen(options);
386
- if (routePath === `${exports.TWILIO_PHONE_WEBHOOK_BASE_PATH}/recording`)
387
- return handleRecording(options, params);
507
+ if (routePath === `${basePath}/incoming`)
508
+ return handleIncoming(options, basePath, params);
509
+ if (routePath === `${basePath}/listen`)
510
+ return handleListen(options, basePath);
511
+ if (routePath === `${basePath}/recording`)
512
+ return handleRecording(options, basePath, params);
388
513
  return textResponse(404, "not found");
389
514
  },
390
515
  };
@@ -49,20 +49,21 @@ const runtime_1 = require("../nerves/runtime");
49
49
  message: "booting Voice entrypoint",
50
50
  meta: { entry: "voice", agentName },
51
51
  });
52
- Promise.resolve().then(() => __importStar(require("../heart/runtime-credentials"))).then(async ({ readMachineRuntimeCredentialConfig, refreshMachineRuntimeCredentialConfig, refreshRuntimeCredentialConfig, waitForRuntimeCredentialBootstrap, }) => {
53
- await waitForRuntimeCredentialBootstrap(agentName);
54
- const { loadOrCreateMachineIdentity } = await Promise.resolve().then(() => __importStar(require("../heart/machine-identity")));
55
- const machine = loadOrCreateMachineIdentity();
56
- const machineConfig = readMachineRuntimeCredentialConfig(agentName);
57
- if (!machineConfig.ok) {
58
- await refreshMachineRuntimeCredentialConfig(agentName, machine.machineId, { preserveCachedOnFailure: true }).catch(() => undefined);
59
- }
60
- void refreshRuntimeCredentialConfig(agentName, { preserveCachedOnFailure: true }).catch(() => undefined);
52
+ Promise.resolve().then(() => __importStar(require("./voice/twilio-phone-runtime"))).then(async ({ agentScopedTwilioPhoneBasePath, startConfiguredTwilioPhoneTransport, }) => {
53
+ const twilioPhone = await startConfiguredTwilioPhoneTransport({
54
+ agentName,
55
+ defaultBasePath: agentScopedTwilioPhoneBasePath(agentName),
56
+ });
61
57
  (0, runtime_1.emitNervesEvent)({
62
58
  component: "senses",
63
59
  event: "senses.voice_entry_ready",
64
60
  message: "Voice entrypoint is ready for managed voice turns",
65
- meta: { entry: "voice", agentName, machineId: machine.machineId },
61
+ meta: {
62
+ entry: "voice",
63
+ agentName,
64
+ twilioPhone: twilioPhone.status,
65
+ webhookUrl: twilioPhone.status === "started" ? twilioPhone.settings.webhookUrl : undefined,
66
+ },
66
67
  });
67
68
  setInterval(() => undefined, 60_000);
68
69
  })
@@ -1,37 +1,4 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
2
  Object.defineProperty(exports, "__esModule", { value: true });
36
3
  function readRequiredAgentName() {
37
4
  const agentArgIndex = process.argv.indexOf("--agent");
@@ -42,16 +9,9 @@ function readRequiredAgentName() {
42
9
  process.exit(1);
43
10
  }
44
11
  const agentName = readRequiredAgentName();
45
- const path = __importStar(require("path"));
46
- const identity_1 = require("../heart/identity");
47
- const machine_identity_1 = require("../heart/machine-identity");
48
12
  const runtime_logging_1 = require("../heart/daemon/runtime-logging");
49
- const provider_credentials_1 = require("../heart/provider-credentials");
50
- const runtime_credentials_1 = require("../heart/runtime-credentials");
51
13
  const runtime_1 = require("../nerves/runtime");
52
- const elevenlabs_1 = require("./voice/elevenlabs");
53
- const whisper_1 = require("./voice/whisper");
54
- const twilio_phone_1 = require("./voice/twilio-phone");
14
+ const voice_1 = require("./voice");
55
15
  function argValue(name) {
56
16
  const index = process.argv.indexOf(name);
57
17
  if (index < 0)
@@ -59,45 +19,6 @@ function argValue(name) {
59
19
  const value = process.argv[index + 1];
60
20
  return value && !value.startsWith("--") ? value : undefined;
61
21
  }
62
- function asRecord(value) {
63
- return value && typeof value === "object" && !Array.isArray(value) ? value : null;
64
- }
65
- function configString(config, dottedPath) {
66
- let cursor = config;
67
- for (const segment of dottedPath.split(".")) {
68
- const record = asRecord(cursor);
69
- if (!record)
70
- return undefined;
71
- cursor = record[segment];
72
- }
73
- return typeof cursor === "string" && cursor.trim() ? cursor.trim() : undefined;
74
- }
75
- function configNumber(config, dottedPath) {
76
- let cursor = config;
77
- for (const segment of dottedPath.split(".")) {
78
- const record = asRecord(cursor);
79
- if (!record)
80
- return undefined;
81
- cursor = record[segment];
82
- }
83
- if (typeof cursor === "number" && Number.isFinite(cursor))
84
- return cursor;
85
- if (typeof cursor === "string" && cursor.trim()) {
86
- const parsed = Number(cursor);
87
- return Number.isFinite(parsed) ? parsed : undefined;
88
- }
89
- return undefined;
90
- }
91
- function requireConfig(result, label) {
92
- if (result.ok)
93
- return result.config;
94
- throw new Error(`${label} unavailable: ${result.error}`);
95
- }
96
- function required(value, guidance) {
97
- if (value)
98
- return value;
99
- throw new Error(guidance);
100
- }
101
22
  function numberArg(name) {
102
23
  const raw = argValue(name);
103
24
  if (!raw)
@@ -107,31 +28,27 @@ function numberArg(name) {
107
28
  throw new Error(`${name} must be a number`);
108
29
  return parsed;
109
30
  }
110
- function selectedAgentProviders(config) {
111
- const providers = new Set();
112
- providers.add(config.humanFacing.provider);
113
- providers.add(config.agentFacing.provider);
114
- if (config.provider)
115
- providers.add(config.provider);
116
- return [...providers];
117
- }
118
- async function cacheSelectedProviderCredentials(agentName) {
119
- const providers = selectedAgentProviders((0, identity_1.loadAgentConfig)());
120
- const pool = await (0, provider_credentials_1.refreshProviderCredentialPool)(agentName, { providers });
121
- if (!pool.ok) {
122
- throw new Error(`provider credentials unavailable for phone voice: ${pool.error}`);
123
- }
124
- const missing = providers.filter((provider) => !pool.pool.providers[provider]);
125
- if (missing.length > 0) {
126
- throw new Error(`missing provider credentials for phone voice: ${missing.join(", ")}`);
127
- }
31
+ function standaloneOverrides() {
32
+ return {
33
+ publicBaseUrl: argValue("--public-url"),
34
+ basePath: argValue("--base-path"),
35
+ port: numberArg("--port"),
36
+ host: argValue("--host"),
37
+ outputDir: argValue("--output-dir"),
38
+ defaultFriendId: argValue("--friend"),
39
+ elevenLabsVoiceId: argValue("--elevenlabs-voice-id"),
40
+ whisperCliPath: argValue("--whisper-cli-path"),
41
+ whisperModelPath: argValue("--whisper-model-path"),
42
+ recordTimeoutSeconds: numberArg("--record-timeout"),
43
+ recordMaxLengthSeconds: numberArg("--record-max-length"),
44
+ };
128
45
  }
129
- function writeReadyInstructions(localUrl, publicBaseUrl) {
46
+ function writeReadyInstructions(localUrl, publicBaseUrl, webhookUrl) {
130
47
  process.stdout.write([
131
48
  "Twilio phone voice bridge ready.",
132
49
  `local: ${localUrl}`,
133
50
  `public: ${publicBaseUrl}`,
134
- `Twilio Voice webhook: POST ${new URL(`${twilio_phone_1.TWILIO_PHONE_WEBHOOK_BASE_PATH}/incoming`, publicBaseUrl).toString()}`,
51
+ `Twilio Voice webhook: POST ${webhookUrl}`,
135
52
  "",
136
53
  ].join("\n"));
137
54
  }
@@ -143,65 +60,16 @@ function writeReadyInstructions(localUrl, publicBaseUrl) {
143
60
  meta: { entry: "voice-twilio", agentName },
144
61
  });
145
62
  async function main() {
146
- await (0, runtime_credentials_1.waitForRuntimeCredentialBootstrap)(agentName);
147
- const machine = (0, machine_identity_1.loadOrCreateMachineIdentity)();
148
- await Promise.all([
149
- (0, runtime_credentials_1.refreshRuntimeCredentialConfig)(agentName, { preserveCachedOnFailure: true }).catch(() => undefined),
150
- (0, runtime_credentials_1.refreshMachineRuntimeCredentialConfig)(agentName, machine.machineId, { preserveCachedOnFailure: true }).catch(() => undefined),
151
- ]);
152
- await cacheSelectedProviderCredentials(agentName);
153
- const runtimeConfig = requireConfig((0, runtime_credentials_1.readRuntimeCredentialConfig)(agentName), "portable runtime/config");
154
- const machineConfig = requireConfig((0, runtime_credentials_1.readMachineRuntimeCredentialConfig)(agentName), "machine runtime config");
155
- const port = numberArg("--port")
156
- ?? configNumber(machineConfig, "voice.twilioPort")
157
- ?? twilio_phone_1.DEFAULT_TWILIO_PHONE_PORT;
158
- const host = argValue("--host")
159
- ?? configString(machineConfig, "voice.twilioHost")
160
- ?? "127.0.0.1";
161
- const publicBaseUrl = required(argValue("--public-url") ?? configString(machineConfig, "voice.twilioPublicUrl"), `missing public URL; run 'cloudflared tunnel --url http://127.0.0.1:${port}' and restart with --public-url https://<tunnel>`);
162
- const elevenLabsApiKey = required(configString(runtimeConfig, "integrations.elevenLabsApiKey"), "missing integrations.elevenLabsApiKey; run 'ouro connect voice --agent <agent>' for setup guidance");
163
- const elevenLabsVoiceId = required(argValue("--elevenlabs-voice-id")
164
- ?? configString(runtimeConfig, "integrations.elevenLabsVoiceId")
165
- ?? configString(runtimeConfig, "voice.elevenLabsVoiceId"), "missing integrations.elevenLabsVoiceId; save the ElevenLabs voice ID before starting phone voice");
166
- const whisperCliPath = required(configString(machineConfig, "voice.whisperCliPath"), "missing voice.whisperCliPath in this machine's runtime config");
167
- const whisperModelPath = required(configString(machineConfig, "voice.whisperModelPath"), "missing voice.whisperModelPath in this machine's runtime config");
168
- const outputDir = argValue("--output-dir")
169
- ?? configString(machineConfig, "voice.twilioOutputDir")
170
- ?? path.join((0, identity_1.getAgentRoot)(agentName), "state", "voice", "twilio-phone");
171
- const defaultFriendId = argValue("--friend")
172
- ?? configString(machineConfig, "voice.twilioDefaultFriendId");
173
- const twilioAccountSid = configString(runtimeConfig, "voice.twilioAccountSid");
174
- const twilioAuthToken = configString(runtimeConfig, "voice.twilioAuthToken");
175
- const recordTimeoutSeconds = numberArg("--record-timeout")
176
- ?? configNumber(machineConfig, "voice.twilioRecordTimeoutSeconds")
177
- ?? twilio_phone_1.DEFAULT_TWILIO_RECORD_TIMEOUT_SECONDS;
178
- const recordMaxLengthSeconds = numberArg("--record-max-length")
179
- ?? configNumber(machineConfig, "voice.twilioRecordMaxLengthSeconds")
180
- ?? twilio_phone_1.DEFAULT_TWILIO_RECORD_MAX_LENGTH_SECONDS;
181
- const transcriber = (0, whisper_1.createWhisperCppTranscriber)({
182
- whisperCliPath,
183
- modelPath: whisperModelPath,
184
- });
185
- const tts = (0, elevenlabs_1.createElevenLabsTtsClient)({
186
- apiKey: elevenLabsApiKey,
187
- voiceId: elevenLabsVoiceId,
188
- outputFormat: "mp3_44100_128",
189
- });
190
- const bridge = await (0, twilio_phone_1.startTwilioPhoneBridgeServer)({
63
+ const transport = await (0, voice_1.startConfiguredTwilioPhoneTransport)({
191
64
  agentName,
192
- publicBaseUrl,
193
- outputDir,
194
- transcriber,
195
- tts,
196
- port,
197
- host,
198
- twilioAccountSid,
199
- twilioAuthToken,
200
- defaultFriendId,
201
- recordTimeoutSeconds,
202
- recordMaxLengthSeconds,
65
+ overrides: standaloneOverrides(),
66
+ defaultBasePath: voice_1.TWILIO_PHONE_WEBHOOK_BASE_PATH,
67
+ requirePublicUrl: true,
203
68
  });
204
- writeReadyInstructions(bridge.localUrl, publicBaseUrl);
69
+ if (transport.status !== "started") {
70
+ throw new Error(`Twilio phone voice transport did not start: ${transport.reason}`);
71
+ }
72
+ writeReadyInstructions(transport.bridge.localUrl, transport.settings.publicBaseUrl, transport.settings.webhookUrl);
205
73
  }
206
74
  main().catch((error) => {
207
75
  (0, runtime_1.emitNervesEvent)({
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ouro.bot/cli",
3
- "version": "0.1.0-alpha.563",
3
+ "version": "0.1.0-alpha.565",
4
4
  "main": "dist/heart/daemon/ouro-entry.js",
5
5
  "bin": {
6
6
  "cli": "dist/heart/daemon/ouro-bot-entry.js",