voicecc 1.1.36 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/voicecc.js +94 -1
- package/dashboard/dist/assets/index-DCeOdulF.js +28 -0
- package/dashboard/dist/index.html +1 -1
- package/dashboard/routes/agents.ts +28 -8
- package/dashboard/routes/browser-call.ts +3 -2
- package/dashboard/routes/chat.ts +75 -55
- package/dashboard/routes/providers.ts +5 -74
- package/dashboard/routes/twilio.ts +104 -5
- package/dashboard/routes/voice.ts +98 -0
- package/dashboard/server.ts +48 -1
- package/package.json +2 -3
- package/server/index.ts +96 -8
- package/server/services/twilio-manager.ts +29 -10
- package/dashboard/dist/assets/index-C62C9Gp0.js +0 -28
- package/dashboard/dist/audio-processor.js +0 -126
- package/server/services/heartbeat.ts +0 -403
- package/server/voice/assets/chime.wav +0 -0
- package/server/voice/assets/startup.pcm +0 -0
- package/server/voice/audio-adapter.ts +0 -60
- package/server/voice/audio-inactivity.test.ts +0 -108
- package/server/voice/audio-inactivity.ts +0 -91
- package/server/voice/browser-audio-playback.test.ts +0 -149
- package/server/voice/browser-audio.ts +0 -147
- package/server/voice/browser-server.ts +0 -311
- package/server/voice/chat-server.ts +0 -236
- package/server/voice/chime.test.ts +0 -69
- package/server/voice/chime.ts +0 -36
- package/server/voice/claude-session.ts +0 -293
- package/server/voice/endpointing.ts +0 -163
- package/server/voice/mic-vpio +0 -0
- package/server/voice/narration.ts +0 -204
- package/server/voice/prompt-builder.ts +0 -108
- package/server/voice/session-lock.ts +0 -123
- package/server/voice/stt-elevenlabs.ts +0 -210
- package/server/voice/stt-provider.ts +0 -106
- package/server/voice/tts-elevenlabs-hiss.test.ts +0 -183
- package/server/voice/tts-elevenlabs.ts +0 -397
- package/server/voice/tts-provider.ts +0 -155
- package/server/voice/twilio-audio.ts +0 -338
- package/server/voice/twilio-server.ts +0 -540
- package/server/voice/types.ts +0 -282
- package/server/voice/vad.ts +0 -101
- package/server/voice/voice-loop-bugs.test.ts +0 -348
- package/server/voice/voice-server.ts +0 -129
- package/server/voice/voice-session.ts +0 -539
|
@@ -1,540 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Twilio voice call handlers for the unified voice server.
|
|
3
|
-
*
|
|
4
|
-
* Provides HTTP request handlers and WebSocket upgrade logic for Twilio
|
|
5
|
-
* phone calls. Used by voice-server.ts which owns the HTTP server.
|
|
6
|
-
*
|
|
7
|
-
* Responsibilities:
|
|
8
|
-
* - Handle incoming call webhooks via Twilio signature verification
|
|
9
|
-
* - Generate per-call UUID tokens for secure WebSocket upgrade
|
|
10
|
-
* - Accept Twilio media stream WebSocket connections
|
|
11
|
-
* - Create a TwilioAudioAdapter + VoiceSession per call
|
|
12
|
-
* - Enforce global session limit via session locks
|
|
13
|
-
* - Tear down sessions on hangup, stop phrase, or error
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
import { randomUUID } from "crypto";
|
|
17
|
-
import { join } from "path";
|
|
18
|
-
|
|
19
|
-
import twilio from "twilio";
|
|
20
|
-
import { WebSocketServer } from "ws";
|
|
21
|
-
|
|
22
|
-
import { createTwilioAudioAdapter } from "./twilio-audio.js";
|
|
23
|
-
import { createVoiceSession } from "./voice-session.js";
|
|
24
|
-
import { createAudioInactivityWatchdog } from "./audio-inactivity.js";
|
|
25
|
-
import { buildAgentPrompt, buildDefaultPrompt } from "./prompt-builder.js";
|
|
26
|
-
import { getAgent, AGENTS_DIR } from "../services/agent-store.js";
|
|
27
|
-
import { getTunnelUrl } from "../services/tunnel.js";
|
|
28
|
-
import { readEnv } from "../services/env.js";
|
|
29
|
-
|
|
30
|
-
import type { IncomingMessage, ServerResponse } from "http";
|
|
31
|
-
import type { Duplex } from "stream";
|
|
32
|
-
import type { WebSocket } from "ws";
|
|
33
|
-
import type { VoiceSession } from "./voice-session.js";
|
|
34
|
-
import type { TtsProviderConfig, SttProviderConfig } from "./types.js";
|
|
35
|
-
|
|
36
|
-
// ============================================================================
|
|
37
|
-
// CONSTANTS
|
|
38
|
-
// ============================================================================
|
|
39
|
-
|
|
40
|
-
/** Interruption threshold for phone calls (higher than local mic due to no VPIO echo cancellation) */
|
|
41
|
-
const PHONE_INTERRUPTION_THRESHOLD_MS = 2000;
|
|
42
|
-
|
|
43
|
-
/** Close the WebSocket if no Twilio audio frames arrive within this window (ms) */
|
|
44
|
-
const AUDIO_INACTIVITY_TIMEOUT_MS = 5000;
|
|
45
|
-
|
|
46
|
-
/** How often to check for audio inactivity (ms) */
|
|
47
|
-
const AUDIO_INACTIVITY_CHECK_INTERVAL_MS = 2000;
|
|
48
|
-
|
|
49
|
-
/** Default ElevenLabs voice ID (used when not set in .env) */
|
|
50
|
-
const DEFAULT_ELEVENLABS_VOICE_ID = "WrjxnKxK0m1uiaH0uteU";
|
|
51
|
-
|
|
52
|
-
/** Default ElevenLabs TTS model ID (used when not set in .env) */
|
|
53
|
-
const DEFAULT_ELEVENLABS_MODEL_ID = "eleven_turbo_v2_5";
|
|
54
|
-
|
|
55
|
-
/** Default ElevenLabs STT model ID (used when not set in .env) */
|
|
56
|
-
const DEFAULT_ELEVENLABS_STT_MODEL_ID = "scribe_v1";
|
|
57
|
-
|
|
58
|
-
// ============================================================================
|
|
59
|
-
// TYPES
|
|
60
|
-
// ============================================================================
|
|
61
|
-
|
|
62
|
-
/** Tracks an active phone call from Twilio webhook through WebSocket session */
|
|
63
|
-
interface ActiveCall {
|
|
64
|
-
/** Twilio call SID (populated when the WebSocket start event arrives) */
|
|
65
|
-
callSid: string;
|
|
66
|
-
/** Voice session handle (null until WebSocket start event creates it) */
|
|
67
|
-
session: VoiceSession | null;
|
|
68
|
-
/** Agent identifier for agent-initiated calls (undefined for default inbound calls) */
|
|
69
|
-
agentId?: string;
|
|
70
|
-
/** Initial prompt for the agent to speak first (e.g. "Call Me" or heartbeat reason) */
|
|
71
|
-
initialPrompt?: string;
|
|
72
|
-
/** Pre-existing Claude session from heartbeat (passed to voice session instead of creating new one) */
|
|
73
|
-
claudeSession?: import("./claude-session.js").ClaudeSession;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// ============================================================================
|
|
77
|
-
// STATE
|
|
78
|
-
// ============================================================================
|
|
79
|
-
|
|
80
|
-
/** Active calls keyed by per-call UUID token */
|
|
81
|
-
const activeCalls = new Map<string, ActiveCall>();
|
|
82
|
-
|
|
83
|
-
// ============================================================================
|
|
84
|
-
// EXPORTED HANDLERS
|
|
85
|
-
// ============================================================================
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* Attach a pre-existing Claude session to a registered call token.
|
|
89
|
-
* Called by the heartbeat scheduler after registering a token, so the
|
|
90
|
-
* voice session can continue the same Claude session instead of creating a new one.
|
|
91
|
-
*
|
|
92
|
-
* @param token - The call token previously registered via /register-call
|
|
93
|
-
* @param session - The live Claude session from the heartbeat check
|
|
94
|
-
*/
|
|
95
|
-
export function setCallClaudeSession(token: string, session: import("./claude-session.js").ClaudeSession): void {
|
|
96
|
-
const call = activeCalls.get(token);
|
|
97
|
-
if (call) {
|
|
98
|
-
call.claudeSession = session;
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Handle Twilio-specific HTTP requests.
|
|
104
|
-
*
|
|
105
|
-
* Routes POST /twilio/incoming-call and POST /register-call.
|
|
106
|
-
* Returns true if the request was handled, false otherwise (so the
|
|
107
|
-
* caller can fall through to other handlers like the dashboard proxy).
|
|
108
|
-
*
|
|
109
|
-
* @param req - HTTP request
|
|
110
|
-
* @param res - HTTP response
|
|
111
|
-
* @returns true if handled
|
|
112
|
-
*/
|
|
113
|
-
export function handleTwilioHttpRequest(req: IncomingMessage, res: ServerResponse): boolean {
|
|
114
|
-
if (req.method === "POST" && req.url === "/twilio/incoming-call") {
|
|
115
|
-
handleIncomingCall(req, res);
|
|
116
|
-
return true;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
if (req.method === "POST" && req.url === "/register-call") {
|
|
120
|
-
handleRegisterCall(req, res);
|
|
121
|
-
return true;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
return false;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
/**
|
|
128
|
-
* Handle a WebSocket upgrade for Twilio media streams.
|
|
129
|
-
*
|
|
130
|
-
* Delegates to the internal handleWebSocketUpgrade with the shared WSS.
|
|
131
|
-
*
|
|
132
|
-
* @param req - HTTP upgrade request
|
|
133
|
-
* @param socket - Underlying TCP socket
|
|
134
|
-
* @param head - First packet of the upgraded stream
|
|
135
|
-
* @param wss - WebSocketServer instance to accept the upgrade
|
|
136
|
-
*/
|
|
137
|
-
export function handleTwilioUpgrade(
|
|
138
|
-
req: IncomingMessage,
|
|
139
|
-
socket: Duplex,
|
|
140
|
-
head: Buffer,
|
|
141
|
-
wss: WebSocketServer,
|
|
142
|
-
): void {
|
|
143
|
-
handleWebSocketUpgrade(req, socket, head, wss);
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// ============================================================================
|
|
147
|
-
// MAIN HANDLERS
|
|
148
|
-
// ============================================================================
|
|
149
|
-
|
|
150
|
-
/**
|
|
151
|
-
* Handle an incoming call webhook from Twilio (POST /twilio/incoming-call).
|
|
152
|
-
*
|
|
153
|
-
* Validates the Twilio request signature, generates a per-call token, and
|
|
154
|
-
* responds with TwiML that tells Twilio to connect a media stream WebSocket.
|
|
155
|
-
* Reads auth token and tunnel URL lazily per-request so values are always current.
|
|
156
|
-
*
|
|
157
|
-
* @param req - HTTP request from Twilio
|
|
158
|
-
* @param res - HTTP response to send TwiML back
|
|
159
|
-
*/
|
|
160
|
-
function handleIncomingCall(
|
|
161
|
-
req: IncomingMessage,
|
|
162
|
-
res: ServerResponse,
|
|
163
|
-
): void {
|
|
164
|
-
// Collect the POST body for signature validation
|
|
165
|
-
let body = "";
|
|
166
|
-
req.on("data", (chunk: Buffer) => {
|
|
167
|
-
body += chunk.toString();
|
|
168
|
-
});
|
|
169
|
-
|
|
170
|
-
req.on("end", async () => {
|
|
171
|
-
// Read auth token and tunnel URL lazily per-request
|
|
172
|
-
const env = await readEnv();
|
|
173
|
-
const authToken = env.TWILIO_AUTH_TOKEN;
|
|
174
|
-
const tunnelUrl = getTunnelUrl();
|
|
175
|
-
|
|
176
|
-
if (!authToken) {
|
|
177
|
-
console.log("Rejected incoming call: TWILIO_AUTH_TOKEN not set");
|
|
178
|
-
res.writeHead(500, { "Content-Type": "text/plain" });
|
|
179
|
-
res.end("Server misconfigured");
|
|
180
|
-
return;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
if (!tunnelUrl) {
|
|
184
|
-
console.log("Rejected incoming call: no tunnel URL available");
|
|
185
|
-
res.writeHead(500, { "Content-Type": "text/plain" });
|
|
186
|
-
res.end("Server misconfigured");
|
|
187
|
-
return;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
const webhookHost = new URL(tunnelUrl).host;
|
|
191
|
-
|
|
192
|
-
// Parse URL-encoded POST body into key-value params
|
|
193
|
-
const params = parseUrlEncodedBody(body);
|
|
194
|
-
|
|
195
|
-
// Validate Twilio signature (use full URL -- Twilio signs against the complete endpoint URL)
|
|
196
|
-
const webhookUrl = tunnelUrl.replace(/\/$/, "");
|
|
197
|
-
const validationUrl = webhookUrl + req.url;
|
|
198
|
-
const signature = req.headers["x-twilio-signature"] as string;
|
|
199
|
-
if (!signature || !twilio.validateRequest(authToken, signature, validationUrl, params)) {
|
|
200
|
-
console.log("Rejected incoming call: invalid Twilio signature");
|
|
201
|
-
console.log(" validationUrl:", validationUrl);
|
|
202
|
-
console.log(" signature:", signature);
|
|
203
|
-
res.writeHead(403, { "Content-Type": "text/plain" });
|
|
204
|
-
res.end("Forbidden");
|
|
205
|
-
return;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// Generate per-call token and register in active calls
|
|
209
|
-
const token = randomUUID();
|
|
210
|
-
activeCalls.set(token, { callSid: "", session: null });
|
|
211
|
-
|
|
212
|
-
console.log(`Incoming call accepted, token: ${token}`);
|
|
213
|
-
|
|
214
|
-
// Respond with TwiML to connect a media stream
|
|
215
|
-
const twiml = [
|
|
216
|
-
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
217
|
-
"<Response>",
|
|
218
|
-
" <Connect>",
|
|
219
|
-
` <Stream url="wss://${webhookHost}/media/${token}" />`,
|
|
220
|
-
" </Connect>",
|
|
221
|
-
"</Response>",
|
|
222
|
-
].join("\n");
|
|
223
|
-
|
|
224
|
-
res.writeHead(200, { "Content-Type": "text/xml" });
|
|
225
|
-
res.end(twiml);
|
|
226
|
-
});
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
/**
|
|
230
|
-
* Handle a POST /register-call request to pre-register an outbound call token.
|
|
231
|
-
*
|
|
232
|
-
* Called by the heartbeat scheduler or API before placing an outbound Twilio call.
|
|
233
|
-
* Registers the token in activeCalls so the subsequent WebSocket upgrade is accepted.
|
|
234
|
-
*
|
|
235
|
-
* @param req - HTTP request with JSON body { token, agentId }
|
|
236
|
-
* @param res - HTTP response
|
|
237
|
-
*/
|
|
238
|
-
function handleRegisterCall(req: IncomingMessage, res: ServerResponse): void {
|
|
239
|
-
let body = "";
|
|
240
|
-
req.on("data", (chunk: Buffer) => {
|
|
241
|
-
body += chunk.toString();
|
|
242
|
-
});
|
|
243
|
-
|
|
244
|
-
req.on("end", () => {
|
|
245
|
-
const { token, agentId, initialPrompt } = JSON.parse(body) as { token: string; agentId: string; initialPrompt?: string };
|
|
246
|
-
activeCalls.set(token, { callSid: "", session: null, agentId, initialPrompt });
|
|
247
|
-
|
|
248
|
-
console.log(`Registered outbound call token: ${token}, agentId: ${agentId}`);
|
|
249
|
-
|
|
250
|
-
res.writeHead(200, { "Content-Type": "application/json" });
|
|
251
|
-
res.end(JSON.stringify({ success: true }));
|
|
252
|
-
});
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
/**
|
|
256
|
-
* Handle a WebSocket upgrade request for the Twilio media stream.
|
|
257
|
-
*
|
|
258
|
-
* Extracts the per-call token from the URL path, validates it against
|
|
259
|
-
* the activeCalls map, and either accepts or rejects the connection.
|
|
260
|
-
*
|
|
261
|
-
* @param req - HTTP upgrade request
|
|
262
|
-
* @param socket - Underlying TCP socket
|
|
263
|
-
* @param head - First packet of the upgraded stream
|
|
264
|
-
* @param wss - WebSocketServer instance to accept the upgrade
|
|
265
|
-
*/
|
|
266
|
-
function handleWebSocketUpgrade(
|
|
267
|
-
req: IncomingMessage,
|
|
268
|
-
socket: Duplex,
|
|
269
|
-
head: Buffer,
|
|
270
|
-
wss: WebSocketServer,
|
|
271
|
-
): void {
|
|
272
|
-
// Extract token from URL path: /media/:token (allow optional query params)
|
|
273
|
-
const url = req.url ?? "";
|
|
274
|
-
const match = url.match(/^\/media\/([a-f0-9-]+)(?:\?.*)?$/);
|
|
275
|
-
|
|
276
|
-
if (!match) {
|
|
277
|
-
console.log(`Rejected WebSocket upgrade: invalid path ${url}`);
|
|
278
|
-
socket.destroy();
|
|
279
|
-
return;
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
const token = match[1];
|
|
283
|
-
|
|
284
|
-
if (!activeCalls.has(token)) {
|
|
285
|
-
console.log(`Rejected WebSocket upgrade: unknown token ${token}`);
|
|
286
|
-
socket.destroy();
|
|
287
|
-
return;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
// Parse agentId from query string if present (used for outbound agent calls)
|
|
291
|
-
const urlObj = new URL(url, "http://localhost");
|
|
292
|
-
const queryAgentId = urlObj.searchParams.get("agentId");
|
|
293
|
-
if (queryAgentId) {
|
|
294
|
-
activeCalls.get(token)!.agentId = queryAgentId;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
// Accept the WebSocket connection
|
|
298
|
-
wss.handleUpgrade(req, socket, head, (ws: WebSocket) => {
|
|
299
|
-
wss.emit("connection", ws, req);
|
|
300
|
-
handleCallSession(ws, token);
|
|
301
|
-
});
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
/**
|
|
305
|
-
* Handle a connected Twilio media stream WebSocket session.
|
|
306
|
-
*
|
|
307
|
-
* Listens for Twilio WebSocket events (start, media, stop) and manages
|
|
308
|
-
* the voice session lifecycle. On the "start" event, creates a
|
|
309
|
-
* TwilioAudioAdapter and VoiceSession. On "stop" or WebSocket close,
|
|
310
|
-
* tears down the session and cleans up.
|
|
311
|
-
*
|
|
312
|
-
* @param ws - Connected WebSocket for the Twilio media stream
|
|
313
|
-
* @param token - Per-call UUID token identifying this call
|
|
314
|
-
*/
|
|
315
|
-
function handleCallSession(ws: WebSocket, token: string): void {
|
|
316
|
-
let cleaned = false;
|
|
317
|
-
|
|
318
|
-
// Detect stale calls: if Twilio stops sending audio frames (caller hung up
|
|
319
|
-
// but WebSocket didn't close cleanly), close the WebSocket to trigger cleanup.
|
|
320
|
-
const watchdog = createAudioInactivityWatchdog({
|
|
321
|
-
timeoutMs: AUDIO_INACTIVITY_TIMEOUT_MS,
|
|
322
|
-
checkIntervalMs: AUDIO_INACTIVITY_CHECK_INTERVAL_MS,
|
|
323
|
-
onTimeout: () => {
|
|
324
|
-
console.log(`[twilio-server] No audio received, closing stale call (token: ${token})`);
|
|
325
|
-
ws.close();
|
|
326
|
-
},
|
|
327
|
-
});
|
|
328
|
-
|
|
329
|
-
/**
|
|
330
|
-
* Clean up the call session. Stops the voice session, removes from
|
|
331
|
-
* activeCalls map. Uses cleaned flag to prevent double-cleanup.
|
|
332
|
-
*/
|
|
333
|
-
async function cleanup(): Promise<void> {
|
|
334
|
-
if (cleaned) return;
|
|
335
|
-
cleaned = true;
|
|
336
|
-
|
|
337
|
-
watchdog.dispose();
|
|
338
|
-
|
|
339
|
-
const call = activeCalls.get(token);
|
|
340
|
-
if (call?.session) {
|
|
341
|
-
await call.session.stop();
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
activeCalls.delete(token);
|
|
345
|
-
console.log(`Call session cleaned up, token: ${token}`);
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
// WebSocket close handler -- always runs cleanup regardless of cause
|
|
349
|
-
ws.on("close", () => {
|
|
350
|
-
cleanup().catch((err) => {
|
|
351
|
-
console.error(`Error during call cleanup: ${err}`);
|
|
352
|
-
});
|
|
353
|
-
});
|
|
354
|
-
|
|
355
|
-
ws.on("error", (err) => {
|
|
356
|
-
console.error(`WebSocket error for token ${token}: ${err}`);
|
|
357
|
-
ws.close();
|
|
358
|
-
});
|
|
359
|
-
|
|
360
|
-
// Listen for Twilio media stream events
|
|
361
|
-
ws.on("message", (data: Buffer | string) => {
|
|
362
|
-
const msg = JSON.parse(typeof data === "string" ? data : data.toString("utf-8"));
|
|
363
|
-
|
|
364
|
-
if (msg.event === "media") {
|
|
365
|
-
watchdog.ping();
|
|
366
|
-
// Don't return -- TwilioAudioAdapter's onAudio listener also handles media events
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
if (msg.event === "start") {
|
|
370
|
-
watchdog.ping();
|
|
371
|
-
handleStreamStart(ws, token, msg).catch((err) => {
|
|
372
|
-
console.error(`Error handling stream start: ${err}`);
|
|
373
|
-
});
|
|
374
|
-
return;
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
if (msg.event === "stop") {
|
|
378
|
-
console.log(`Twilio stream stopped for token: ${token}`);
|
|
379
|
-
ws.close();
|
|
380
|
-
return;
|
|
381
|
-
}
|
|
382
|
-
});
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
// ============================================================================
|
|
386
|
-
// HELPER FUNCTIONS
|
|
387
|
-
// ============================================================================
|
|
388
|
-
|
|
389
|
-
/**
|
|
390
|
-
* Build provider config by reading the latest values from .env.
|
|
391
|
-
* Called per-session so changes to API keys, voice IDs, or model IDs
|
|
392
|
-
* take effect without a server restart.
|
|
393
|
-
*
|
|
394
|
-
* @returns TTS and STT provider configs with current .env values
|
|
395
|
-
*/
|
|
396
|
-
async function buildProviderConfig(): Promise<{ ttsProvider: TtsProviderConfig; sttProvider: SttProviderConfig }> {
|
|
397
|
-
const env = await readEnv();
|
|
398
|
-
|
|
399
|
-
const apiKey = env.ELEVENLABS_API_KEY ?? "";
|
|
400
|
-
const voiceId = env.ELEVENLABS_VOICE_ID ?? DEFAULT_ELEVENLABS_VOICE_ID;
|
|
401
|
-
const modelId = env.ELEVENLABS_MODEL_ID ?? DEFAULT_ELEVENLABS_MODEL_ID;
|
|
402
|
-
const sttModelId = env.ELEVENLABS_STT_MODEL_ID ?? DEFAULT_ELEVENLABS_STT_MODEL_ID;
|
|
403
|
-
|
|
404
|
-
return {
|
|
405
|
-
ttsProvider: { provider: "elevenlabs", elevenlabs: { apiKey, voiceId, modelId } },
|
|
406
|
-
sttProvider: { provider: "elevenlabs", elevenlabs: { apiKey, modelId: sttModelId } },
|
|
407
|
-
};
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
/**
|
|
411
|
-
* Handle the Twilio "start" event on a media stream WebSocket.
|
|
412
|
-
*
|
|
413
|
-
* Extracts the streamSid and callSid, creates a TwilioAudioAdapter and
|
|
414
|
-
* VoiceSession. If session creation fails (e.g. limit reached), logs the
|
|
415
|
-
* error and closes the WebSocket.
|
|
416
|
-
*
|
|
417
|
-
* @param ws - Connected WebSocket for the Twilio media stream
|
|
418
|
-
* @param token - Per-call UUID token
|
|
419
|
-
* @param msg - Parsed Twilio "start" event message
|
|
420
|
-
*/
|
|
421
|
-
async function handleStreamStart(
|
|
422
|
-
ws: WebSocket,
|
|
423
|
-
token: string,
|
|
424
|
-
msg: { start: { streamSid: string; callSid: string } },
|
|
425
|
-
): Promise<void> {
|
|
426
|
-
const { streamSid, callSid } = msg.start;
|
|
427
|
-
console.log(`Stream started -- callSid: ${callSid}, streamSid: ${streamSid}`);
|
|
428
|
-
|
|
429
|
-
// Update the active call entry with the callSid
|
|
430
|
-
const call = activeCalls.get(token);
|
|
431
|
-
if (!call) return;
|
|
432
|
-
call.callSid = callSid;
|
|
433
|
-
|
|
434
|
-
// Read provider config fresh from .env so key/model/voice changes take effect without restart
|
|
435
|
-
const { ttsProvider, sttProvider } = await buildProviderConfig();
|
|
436
|
-
|
|
437
|
-
const defaultConfig = {
|
|
438
|
-
stopPhrase: "stop listening",
|
|
439
|
-
ttsProvider,
|
|
440
|
-
sttProvider,
|
|
441
|
-
interruptionThresholdMs: PHONE_INTERRUPTION_THRESHOLD_MS,
|
|
442
|
-
endpointing: {
|
|
443
|
-
silenceThresholdMs: 700,
|
|
444
|
-
maxSilenceBeforeTimeoutMs: 1200,
|
|
445
|
-
minWordCountForFastPath: 2,
|
|
446
|
-
enableHaikuFallback: false,
|
|
447
|
-
},
|
|
448
|
-
narration: {
|
|
449
|
-
summaryIntervalMs: 12000,
|
|
450
|
-
},
|
|
451
|
-
claudeSession: {
|
|
452
|
-
allowedTools: [] as string[],
|
|
453
|
-
permissionMode: "bypassPermissions",
|
|
454
|
-
systemPrompt: buildDefaultPrompt("voice"),
|
|
455
|
-
} as import("./types.js").ClaudeSessionConfig,
|
|
456
|
-
};
|
|
457
|
-
|
|
458
|
-
// Build session config -- use agent personality if agentId is set, otherwise default
|
|
459
|
-
const agentId = call.agentId;
|
|
460
|
-
let sessionConfig: Parameters<typeof createVoiceSession>[1] = { ...defaultConfig, onSessionEnd: () => ws.close() };
|
|
461
|
-
|
|
462
|
-
if (agentId) {
|
|
463
|
-
try {
|
|
464
|
-
const agentPrompt = await buildAgentPrompt(agentId, "voice");
|
|
465
|
-
const agentDir = join(AGENTS_DIR, agentId);
|
|
466
|
-
sessionConfig = {
|
|
467
|
-
...defaultConfig,
|
|
468
|
-
claudeSession: {
|
|
469
|
-
...defaultConfig.claudeSession,
|
|
470
|
-
customSystemPrompt: agentPrompt,
|
|
471
|
-
cwd: agentDir,
|
|
472
|
-
},
|
|
473
|
-
onSessionEnd: () => ws.close(),
|
|
474
|
-
};
|
|
475
|
-
// Override TTS voice if the agent has a preference
|
|
476
|
-
const agent = await getAgent(agentId);
|
|
477
|
-
if (agent.config.voice?.elevenlabs) {
|
|
478
|
-
const voicePref = agent.config.voice.elevenlabs;
|
|
479
|
-
const overriddenTts: TtsProviderConfig = {
|
|
480
|
-
...ttsProvider,
|
|
481
|
-
elevenlabs: { ...ttsProvider.elevenlabs, voiceId: voicePref.id },
|
|
482
|
-
};
|
|
483
|
-
sessionConfig = { ...sessionConfig, ttsProvider: overriddenTts };
|
|
484
|
-
console.log(`Using voice "${voicePref.name}" (${voicePref.id}) for agent "${agentId}"`);
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
// If heartbeat attached a live Claude session, pass it through
|
|
488
|
-
if (call.claudeSession) {
|
|
489
|
-
sessionConfig.existingClaudeSession = call.claudeSession;
|
|
490
|
-
sessionConfig.initialPrompt = "The user just answered your call. Greet them and briefly explain why you're calling.";
|
|
491
|
-
console.log(`Using existing heartbeat Claude session for agent "${agentId}" call ${callSid}`);
|
|
492
|
-
} else if (call.initialPrompt) {
|
|
493
|
-
sessionConfig.initialPrompt = call.initialPrompt;
|
|
494
|
-
console.log(`Using agent "${agentId}" with initial prompt for call ${callSid}`);
|
|
495
|
-
} else {
|
|
496
|
-
console.log(`Using agent "${agentId}" personality for call ${callSid}`);
|
|
497
|
-
}
|
|
498
|
-
} catch (err) {
|
|
499
|
-
console.error(`Failed to load agent "${agentId}", using default config:`, err);
|
|
500
|
-
}
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
try {
|
|
504
|
-
// Create the Twilio audio adapter
|
|
505
|
-
const adapter = createTwilioAudioAdapter({ ws, streamSid });
|
|
506
|
-
|
|
507
|
-
// Create the voice session (acquires a session lock -- may throw if limit reached)
|
|
508
|
-
const session = await createVoiceSession(adapter, sessionConfig);
|
|
509
|
-
|
|
510
|
-
call.session = session;
|
|
511
|
-
} catch (err) {
|
|
512
|
-
console.error(`Failed to create voice session for call ${callSid}: ${err}`);
|
|
513
|
-
|
|
514
|
-
// Send a TwiML-style rejection message over the WebSocket is not possible,
|
|
515
|
-
// so just close the WebSocket. The caller will hear silence and Twilio will
|
|
516
|
-
// eventually disconnect.
|
|
517
|
-
ws.close();
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
/**
|
|
522
|
-
* Parse a URL-encoded POST body into a key-value record.
|
|
523
|
-
*
|
|
524
|
-
* @param body - URL-encoded string (e.g. "key1=value1&key2=value2")
|
|
525
|
-
* @returns Record of decoded key-value pairs
|
|
526
|
-
*/
|
|
527
|
-
function parseUrlEncodedBody(body: string): Record<string, string> {
|
|
528
|
-
const params: Record<string, string> = {};
|
|
529
|
-
|
|
530
|
-
if (!body) return params;
|
|
531
|
-
|
|
532
|
-
for (const pair of body.split("&")) {
|
|
533
|
-
const [key, value] = pair.split("=");
|
|
534
|
-
if (key) {
|
|
535
|
-
params[decodeURIComponent(key)] = decodeURIComponent(value ?? "");
|
|
536
|
-
}
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
return params;
|
|
540
|
-
}
|