@openclaw/voice-call 2026.5.2 → 2026.5.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/dist/api.js +2 -0
  2. package/dist/call-status-CXldV5o8.js +32 -0
  3. package/dist/cli-metadata.js +12 -0
  4. package/dist/config-7w04YpHh.js +548 -0
  5. package/dist/config-compat-B0me39_4.js +129 -0
  6. package/dist/guarded-json-api-Btx5EE4w.js +591 -0
  7. package/dist/http-headers-BrnxBasF.js +10 -0
  8. package/dist/index.js +1284 -0
  9. package/dist/mock-CeKvfVEd.js +135 -0
  10. package/dist/plivo-B-a7KFoT.js +393 -0
  11. package/dist/realtime-handler-B63CIDP2.js +325 -0
  12. package/dist/realtime-transcription.runtime-B2h70y2W.js +2 -0
  13. package/dist/realtime-voice.runtime-Bkh4nvLn.js +2 -0
  14. package/dist/response-generator-BrcmwDZU.js +182 -0
  15. package/dist/response-model-CyF5K80p.js +12 -0
  16. package/dist/runtime-api.js +6 -0
  17. package/dist/runtime-entry-88ytYAQa.js +3119 -0
  18. package/dist/runtime-entry.js +2 -0
  19. package/dist/setup-api.js +37 -0
  20. package/dist/telnyx-jjBE8boz.js +260 -0
  21. package/dist/twilio-1OqbcXLL.js +676 -0
  22. package/dist/voice-mapping-BYDGdWGx.js +40 -0
  23. package/package.json +14 -6
  24. package/api.ts +0 -16
  25. package/cli-metadata.ts +0 -10
  26. package/config-api.ts +0 -12
  27. package/index.test.ts +0 -943
  28. package/index.ts +0 -794
  29. package/runtime-api.ts +0 -20
  30. package/runtime-entry.ts +0 -1
  31. package/setup-api.ts +0 -47
  32. package/src/allowlist.test.ts +0 -18
  33. package/src/allowlist.ts +0 -19
  34. package/src/cli.ts +0 -845
  35. package/src/config-compat.test.ts +0 -120
  36. package/src/config-compat.ts +0 -227
  37. package/src/config.test.ts +0 -479
  38. package/src/config.ts +0 -808
  39. package/src/core-bridge.ts +0 -14
  40. package/src/deep-merge.test.ts +0 -40
  41. package/src/deep-merge.ts +0 -23
  42. package/src/gateway-continue-operation.ts +0 -200
  43. package/src/http-headers.test.ts +0 -16
  44. package/src/http-headers.ts +0 -15
  45. package/src/manager/context.ts +0 -42
  46. package/src/manager/events.test.ts +0 -581
  47. package/src/manager/events.ts +0 -288
  48. package/src/manager/lifecycle.ts +0 -53
  49. package/src/manager/lookup.test.ts +0 -52
  50. package/src/manager/lookup.ts +0 -35
  51. package/src/manager/outbound.test.ts +0 -528
  52. package/src/manager/outbound.ts +0 -486
  53. package/src/manager/state.ts +0 -48
  54. package/src/manager/store.ts +0 -106
  55. package/src/manager/timers.test.ts +0 -129
  56. package/src/manager/timers.ts +0 -113
  57. package/src/manager/twiml.test.ts +0 -13
  58. package/src/manager/twiml.ts +0 -17
  59. package/src/manager.closed-loop.test.ts +0 -236
  60. package/src/manager.inbound-allowlist.test.ts +0 -188
  61. package/src/manager.notify.test.ts +0 -377
  62. package/src/manager.restore.test.ts +0 -183
  63. package/src/manager.test-harness.ts +0 -127
  64. package/src/manager.ts +0 -392
  65. package/src/media-stream.test.ts +0 -768
  66. package/src/media-stream.ts +0 -708
  67. package/src/providers/base.ts +0 -97
  68. package/src/providers/mock.test.ts +0 -78
  69. package/src/providers/mock.ts +0 -185
  70. package/src/providers/plivo.test.ts +0 -93
  71. package/src/providers/plivo.ts +0 -601
  72. package/src/providers/shared/call-status.test.ts +0 -24
  73. package/src/providers/shared/call-status.ts +0 -24
  74. package/src/providers/shared/guarded-json-api.test.ts +0 -106
  75. package/src/providers/shared/guarded-json-api.ts +0 -42
  76. package/src/providers/telnyx.test.ts +0 -340
  77. package/src/providers/telnyx.ts +0 -394
  78. package/src/providers/twilio/api.test.ts +0 -145
  79. package/src/providers/twilio/api.ts +0 -93
  80. package/src/providers/twilio/twiml-policy.test.ts +0 -84
  81. package/src/providers/twilio/twiml-policy.ts +0 -87
  82. package/src/providers/twilio/webhook.ts +0 -34
  83. package/src/providers/twilio.test.ts +0 -591
  84. package/src/providers/twilio.ts +0 -861
  85. package/src/providers/twilio.types.ts +0 -17
  86. package/src/realtime-defaults.ts +0 -3
  87. package/src/realtime-fast-context.test.ts +0 -88
  88. package/src/realtime-fast-context.ts +0 -165
  89. package/src/realtime-transcription.runtime.ts +0 -4
  90. package/src/realtime-voice.runtime.ts +0 -5
  91. package/src/response-generator.test.ts +0 -321
  92. package/src/response-generator.ts +0 -318
  93. package/src/response-model.test.ts +0 -71
  94. package/src/response-model.ts +0 -23
  95. package/src/runtime.test.ts +0 -536
  96. package/src/runtime.ts +0 -510
  97. package/src/telephony-audio.test.ts +0 -61
  98. package/src/telephony-audio.ts +0 -12
  99. package/src/telephony-tts.test.ts +0 -196
  100. package/src/telephony-tts.ts +0 -235
  101. package/src/test-fixtures.ts +0 -73
  102. package/src/tts-provider-voice.test.ts +0 -34
  103. package/src/tts-provider-voice.ts +0 -21
  104. package/src/tunnel.test.ts +0 -166
  105. package/src/tunnel.ts +0 -314
  106. package/src/types.ts +0 -291
  107. package/src/utils.test.ts +0 -17
  108. package/src/utils.ts +0 -14
  109. package/src/voice-mapping.test.ts +0 -34
  110. package/src/voice-mapping.ts +0 -68
  111. package/src/webhook/realtime-handler.test.ts +0 -598
  112. package/src/webhook/realtime-handler.ts +0 -485
  113. package/src/webhook/stale-call-reaper.test.ts +0 -88
  114. package/src/webhook/stale-call-reaper.ts +0 -38
  115. package/src/webhook/tailscale.test.ts +0 -214
  116. package/src/webhook/tailscale.ts +0 -129
  117. package/src/webhook-exposure.test.ts +0 -33
  118. package/src/webhook-exposure.ts +0 -84
  119. package/src/webhook-security.test.ts +0 -770
  120. package/src/webhook-security.ts +0 -994
  121. package/src/webhook.hangup-once.lifecycle.test.ts +0 -135
  122. package/src/webhook.test.ts +0 -1470
  123. package/src/webhook.ts +0 -908
  124. package/src/webhook.types.ts +0 -5
  125. package/src/websocket-test-support.ts +0 -72
  126. package/tsconfig.json +0 -16
@@ -1,708 +0,0 @@
1
- /**
2
- * Media Stream Handler
3
- *
4
- * Handles bidirectional audio streaming between Twilio and the AI services.
5
- * - Receives mu-law audio from Twilio via WebSocket
6
- * - Forwards to the selected realtime transcription provider
7
- * - Sends TTS audio back to Twilio
8
- */
9
-
10
- import type { IncomingMessage } from "node:http";
11
- import type { Duplex } from "node:stream";
12
- import type {
13
- RealtimeTranscriptionProviderConfig,
14
- RealtimeTranscriptionProviderPlugin,
15
- RealtimeTranscriptionSession,
16
- } from "openclaw/plugin-sdk/realtime-transcription";
17
- import { type RawData, WebSocket, WebSocketServer } from "ws";
18
-
19
- /**
20
- * Configuration for the media stream handler.
21
- */
22
- export interface MediaStreamConfig {
23
- /** Realtime transcription provider for streaming STT. */
24
- transcriptionProvider: RealtimeTranscriptionProviderPlugin;
25
- /** Provider-owned config blob passed into the transcription session. */
26
- providerConfig: RealtimeTranscriptionProviderConfig;
27
- /** Close sockets that never send a valid `start` frame within this window. */
28
- preStartTimeoutMs?: number;
29
- /** Max concurrent pre-start sockets. */
30
- maxPendingConnections?: number;
31
- /** Max concurrent pre-start sockets from a single source IP. */
32
- maxPendingConnectionsPerIp?: number;
33
- /** Max total open sockets (pending + active sessions). */
34
- maxConnections?: number;
35
- /** Optional trusted resolver for the source IP used by pending-connection guards. */
36
- resolveClientIp?: (request: IncomingMessage) => string | undefined;
37
- /** Validate whether to accept a media stream for the given call ID */
38
- shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean;
39
- /** Callback when transcript is received */
40
- onTranscript?: (callId: string, transcript: string) => void;
41
- /** Callback for partial transcripts (streaming UI) */
42
- onPartialTranscript?: (callId: string, partial: string) => void;
43
- /** Callback when stream connects */
44
- onConnect?: (callId: string, streamSid: string) => void;
45
- /** Callback when realtime transcription is ready for the stream */
46
- onTranscriptionReady?: (callId: string, streamSid: string) => void;
47
- /** Callback when speech starts (barge-in) */
48
- onSpeechStart?: (callId: string) => void;
49
- /** Callback when stream disconnects */
50
- onDisconnect?: (callId: string, streamSid: string) => void;
51
- }
52
-
53
- /**
54
- * Active media stream session.
55
- */
56
- interface StreamSession {
57
- callId: string;
58
- streamSid: string;
59
- ws: WebSocket;
60
- sttSession: RealtimeTranscriptionSession;
61
- }
62
-
63
- type TtsQueueEntry = {
64
- playFn: (signal: AbortSignal) => Promise<void>;
65
- controller: AbortController;
66
- resolve: () => void;
67
- reject: (error: unknown) => void;
68
- };
69
-
70
- type StreamSendResult = {
71
- sent: boolean;
72
- readyState?: number;
73
- bufferedBeforeBytes: number;
74
- bufferedAfterBytes: number;
75
- };
76
-
77
- type PendingConnection = {
78
- ip: string;
79
- timeout: ReturnType<typeof setTimeout>;
80
- };
81
-
82
- const DEFAULT_PRE_START_TIMEOUT_MS = 5000;
83
- const DEFAULT_MAX_PENDING_CONNECTIONS = 32;
84
- const DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP = 4;
85
- const DEFAULT_MAX_CONNECTIONS = 128;
86
- const MAX_INBOUND_MESSAGE_BYTES = 64 * 1024;
87
- const MAX_WS_BUFFERED_BYTES = 1024 * 1024;
88
- const CLOSE_REASON_LOG_MAX_CHARS = 120;
89
-
90
- export function sanitizeLogText(value: string, maxChars: number): string {
91
- const sanitized = value
92
- .replace(/\p{Cc}/gu, " ")
93
- .replace(/\s+/g, " ")
94
- .trim();
95
- if (sanitized.length <= maxChars) {
96
- return sanitized;
97
- }
98
- return `${sanitized.slice(0, maxChars)}...`;
99
- }
100
-
101
- function normalizeWsMessageData(data: RawData): Buffer {
102
- if (Buffer.isBuffer(data)) {
103
- return data;
104
- }
105
- if (Array.isArray(data)) {
106
- return Buffer.concat(data);
107
- }
108
- return Buffer.from(data);
109
- }
110
-
111
- /**
112
- * Manages WebSocket connections for Twilio media streams.
113
- */
114
- export class MediaStreamHandler {
115
- private wss: WebSocketServer | null = null;
116
- private sessions = new Map<string, StreamSession>();
117
- private config: MediaStreamConfig;
118
- /** Pending sockets that have upgraded but not yet sent an accepted `start` frame. */
119
- private pendingConnections = new Map<WebSocket, PendingConnection>();
120
- /** Pending socket count per remote IP for pre-auth throttling. */
121
- private pendingByIp = new Map<string, number>();
122
- private preStartTimeoutMs: number;
123
- private maxPendingConnections: number;
124
- private maxPendingConnectionsPerIp: number;
125
- private maxConnections: number;
126
- private inflightUpgrades = 0;
127
- /** TTS playback queues per stream (serialize audio to prevent overlap) */
128
- private ttsQueues = new Map<string, TtsQueueEntry[]>();
129
- /** Whether TTS is currently playing per stream */
130
- private ttsPlaying = new Map<string, boolean>();
131
- /** Active TTS playback controllers per stream */
132
- private ttsActiveControllers = new Map<string, AbortController>();
133
-
134
- constructor(config: MediaStreamConfig) {
135
- this.config = config;
136
- this.preStartTimeoutMs = config.preStartTimeoutMs ?? DEFAULT_PRE_START_TIMEOUT_MS;
137
- this.maxPendingConnections = config.maxPendingConnections ?? DEFAULT_MAX_PENDING_CONNECTIONS;
138
- this.maxPendingConnectionsPerIp =
139
- config.maxPendingConnectionsPerIp ?? DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP;
140
- this.maxConnections = config.maxConnections ?? DEFAULT_MAX_CONNECTIONS;
141
- }
142
-
143
- /**
144
- * Handle WebSocket upgrade for media stream connections.
145
- */
146
- handleUpgrade(request: IncomingMessage, socket: Duplex, head: Buffer): void {
147
- if (!this.wss) {
148
- this.wss = new WebSocketServer({
149
- noServer: true,
150
- // Reject oversized frames before app-level parsing runs on unauthenticated sockets.
151
- maxPayload: MAX_INBOUND_MESSAGE_BYTES,
152
- });
153
- this.wss.on("connection", (ws, req) => this.handleConnection(ws, req));
154
- }
155
-
156
- const currentConnections = this.getCurrentConnectionCount();
157
- if (currentConnections >= this.maxConnections) {
158
- this.rejectUpgrade(socket, 503, "Too many media stream connections");
159
- return;
160
- }
161
-
162
- this.inflightUpgrades += 1;
163
- let released = false;
164
- const releaseUpgradeReservation = () => {
165
- if (released) {
166
- return;
167
- }
168
- released = true;
169
- this.inflightUpgrades = Math.max(0, this.inflightUpgrades - 1);
170
- };
171
- const handleUpgradeAbort = () => {
172
- socket.removeListener("error", handleUpgradeAbort);
173
- socket.removeListener("close", handleUpgradeAbort);
174
- releaseUpgradeReservation();
175
- };
176
- socket.once("error", handleUpgradeAbort);
177
- socket.once("close", handleUpgradeAbort);
178
-
179
- try {
180
- this.wss.handleUpgrade(request, socket, head, (ws) => {
181
- socket.removeListener("error", handleUpgradeAbort);
182
- socket.removeListener("close", handleUpgradeAbort);
183
- releaseUpgradeReservation();
184
- this.wss?.emit("connection", ws, request);
185
- });
186
- } catch (error) {
187
- socket.removeListener("error", handleUpgradeAbort);
188
- socket.removeListener("close", handleUpgradeAbort);
189
- releaseUpgradeReservation();
190
- throw error;
191
- }
192
- }
193
-
194
- /**
195
- * Handle new WebSocket connection from Twilio.
196
- */
197
- private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
198
- let session: StreamSession | null = null;
199
- const streamToken = this.getStreamToken(_request);
200
- const ip = this.getClientIp(_request);
201
-
202
- if (!this.registerPendingConnection(ws, ip)) {
203
- ws.close(1013, "Too many pending media stream connections");
204
- return;
205
- }
206
-
207
- ws.on("message", async (data: RawData) => {
208
- try {
209
- const raw = normalizeWsMessageData(data);
210
- const message = JSON.parse(raw.toString("utf8")) as TwilioMediaMessage;
211
-
212
- switch (message.event) {
213
- case "connected":
214
- console.log("[MediaStream] Twilio connected");
215
- break;
216
-
217
- case "start":
218
- session = this.handleStart(ws, message, streamToken);
219
- if (session) {
220
- this.clearPendingConnection(ws);
221
- }
222
- break;
223
-
224
- case "media":
225
- if (session && message.media?.payload) {
226
- // Forward audio to STT
227
- const audioBuffer = Buffer.from(message.media.payload, "base64");
228
- session.sttSession.sendAudio(audioBuffer);
229
- }
230
- break;
231
-
232
- case "stop":
233
- if (session) {
234
- this.handleStop(session);
235
- session = null;
236
- }
237
- break;
238
-
239
- case "clear":
240
- case "mark":
241
- break;
242
- }
243
- } catch (error) {
244
- console.error("[MediaStream] Error processing message:", error);
245
- }
246
- });
247
-
248
- ws.on("close", (code, reason) => {
249
- const rawReason = Buffer.isBuffer(reason) ? reason.toString("utf8") : String(reason || "");
250
- const reasonText = sanitizeLogText(rawReason, CLOSE_REASON_LOG_MAX_CHARS);
251
- console.log(
252
- `[MediaStream] WebSocket closed (code: ${code}, reason: ${reasonText || "none"})`,
253
- );
254
- this.clearPendingConnection(ws);
255
- if (session) {
256
- this.handleStop(session);
257
- }
258
- });
259
-
260
- ws.on("error", (error) => {
261
- console.error("[MediaStream] WebSocket error:", error);
262
- });
263
- }
264
-
265
- /**
266
- * Handle stream start event.
267
- */
268
- private handleStart(
269
- ws: WebSocket,
270
- message: TwilioMediaMessage,
271
- streamToken?: string,
272
- ): StreamSession | null {
273
- const streamSid = message.streamSid || "";
274
- const callSid = message.start?.callSid || "";
275
-
276
- // Prefer token from start message customParameters (set via TwiML <Parameter>),
277
- // falling back to query string token. Twilio strips query params from WebSocket
278
- // URLs but reliably delivers <Parameter> values in customParameters.
279
- const effectiveToken = message.start?.customParameters?.token ?? streamToken;
280
-
281
- console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
282
- if (!callSid) {
283
- console.warn("[MediaStream] Missing callSid; closing stream");
284
- ws.close(1008, "Missing callSid");
285
- return null;
286
- }
287
- if (
288
- this.config.shouldAcceptStream &&
289
- !this.config.shouldAcceptStream({ callId: callSid, streamSid, token: effectiveToken })
290
- ) {
291
- console.warn(`[MediaStream] Rejecting stream for unknown call: ${callSid}`);
292
- ws.close(1008, "Unknown call");
293
- return null;
294
- }
295
-
296
- const sttSession = this.config.transcriptionProvider.createSession({
297
- providerConfig: this.config.providerConfig,
298
- onPartial: (partial) => {
299
- this.config.onPartialTranscript?.(callSid, partial);
300
- },
301
- onTranscript: (transcript) => {
302
- this.config.onTranscript?.(callSid, transcript);
303
- },
304
- onSpeechStart: () => {
305
- this.config.onSpeechStart?.(callSid);
306
- },
307
- onError: (error) => {
308
- console.warn("[MediaStream] Transcription session error:", error.message);
309
- },
310
- });
311
-
312
- const session: StreamSession = {
313
- callId: callSid,
314
- streamSid,
315
- ws,
316
- sttSession,
317
- };
318
-
319
- this.sessions.set(streamSid, session);
320
- this.config.onConnect?.(callSid, streamSid);
321
- void this.connectTranscriptionAndNotify(session);
322
-
323
- return session;
324
- }
325
-
326
- private async connectTranscriptionAndNotify(session: StreamSession): Promise<void> {
327
- try {
328
- await session.sttSession.connect();
329
- } catch (error) {
330
- console.warn(
331
- "[MediaStream] STT connection failed; closing media stream:",
332
- error instanceof Error ? error.message : String(error),
333
- );
334
- if (
335
- this.sessions.get(session.streamSid) === session &&
336
- session.ws.readyState === WebSocket.OPEN
337
- ) {
338
- session.ws.close(1011, "STT connection failed");
339
- } else {
340
- session.sttSession.close();
341
- }
342
- return;
343
- }
344
-
345
- if (
346
- this.sessions.get(session.streamSid) !== session ||
347
- session.ws.readyState !== WebSocket.OPEN
348
- ) {
349
- session.sttSession.close();
350
- return;
351
- }
352
-
353
- this.config.onTranscriptionReady?.(session.callId, session.streamSid);
354
- }
355
-
356
- /**
357
- * Handle stream stop event.
358
- */
359
- private handleStop(session: StreamSession): void {
360
- console.log(`[MediaStream] Stream stopped: ${session.streamSid}`);
361
-
362
- this.clearTtsState(session.streamSid);
363
- session.sttSession.close();
364
- this.sessions.delete(session.streamSid);
365
- this.config.onDisconnect?.(session.callId, session.streamSid);
366
- }
367
-
368
- private getStreamToken(request: IncomingMessage): string | undefined {
369
- if (!request.url || !request.headers.host) {
370
- return undefined;
371
- }
372
- try {
373
- const url = new URL(request.url, `http://${request.headers.host}`);
374
- return url.searchParams.get("token") ?? undefined;
375
- } catch {
376
- return undefined;
377
- }
378
- }
379
-
380
- private getClientIp(request: IncomingMessage): string {
381
- const resolvedIp = this.config.resolveClientIp?.(request)?.trim();
382
- if (resolvedIp) {
383
- return resolvedIp;
384
- }
385
- return request.socket.remoteAddress || "unknown";
386
- }
387
-
388
- private getCurrentConnectionCount(): number {
389
- return this.wss ? this.wss.clients.size + this.inflightUpgrades : this.inflightUpgrades;
390
- }
391
-
392
- private registerPendingConnection(ws: WebSocket, ip: string): boolean {
393
- if (this.pendingConnections.size >= this.maxPendingConnections) {
394
- console.warn("[MediaStream] Rejecting connection: pending connection limit reached");
395
- return false;
396
- }
397
-
398
- const pendingForIp = this.pendingByIp.get(ip) ?? 0;
399
- if (pendingForIp >= this.maxPendingConnectionsPerIp) {
400
- console.warn(`[MediaStream] Rejecting connection: pending per-IP limit reached (${ip})`);
401
- return false;
402
- }
403
-
404
- const timeout = setTimeout(() => {
405
- if (!this.pendingConnections.has(ws)) {
406
- return;
407
- }
408
- console.warn(
409
- `[MediaStream] Closing pre-start idle connection after ${this.preStartTimeoutMs}ms (${ip})`,
410
- );
411
- ws.close(1008, "Start timeout");
412
- }, this.preStartTimeoutMs);
413
-
414
- timeout.unref?.();
415
- this.pendingConnections.set(ws, { ip, timeout });
416
- this.pendingByIp.set(ip, pendingForIp + 1);
417
- return true;
418
- }
419
-
420
- private clearPendingConnection(ws: WebSocket): void {
421
- const pending = this.pendingConnections.get(ws);
422
- if (!pending) {
423
- return;
424
- }
425
-
426
- clearTimeout(pending.timeout);
427
- this.pendingConnections.delete(ws);
428
-
429
- const current = this.pendingByIp.get(pending.ip) ?? 0;
430
- if (current <= 1) {
431
- this.pendingByIp.delete(pending.ip);
432
- return;
433
- }
434
- this.pendingByIp.set(pending.ip, current - 1);
435
- }
436
-
437
- private rejectUpgrade(socket: Duplex, statusCode: 429 | 503, message: string): void {
438
- const statusText = statusCode === 429 ? "Too Many Requests" : "Service Unavailable";
439
- const body = `${message}\n`;
440
- socket.write(
441
- `HTTP/1.1 ${statusCode} ${statusText}\r\n` +
442
- "Connection: close\r\n" +
443
- "Content-Type: text/plain; charset=utf-8\r\n" +
444
- `Content-Length: ${Buffer.byteLength(body)}\r\n` +
445
- "\r\n" +
446
- body,
447
- );
448
- socket.destroy();
449
- }
450
-
451
- /**
452
- * Get an active session with an open WebSocket, or undefined if unavailable.
453
- */
454
- private getOpenSession(streamSid: string): StreamSession | undefined {
455
- const session = this.sessions.get(streamSid);
456
- return session?.ws.readyState === WebSocket.OPEN ? session : undefined;
457
- }
458
-
459
- /**
460
- * Send a message to a stream's WebSocket if available.
461
- */
462
- private sendToStream(streamSid: string, message: unknown): StreamSendResult {
463
- const session = this.sessions.get(streamSid);
464
- if (!session) {
465
- return {
466
- sent: false,
467
- bufferedBeforeBytes: 0,
468
- bufferedAfterBytes: 0,
469
- };
470
- }
471
-
472
- const readyState = session.ws.readyState;
473
- const bufferedBeforeBytes = session.ws.bufferedAmount;
474
- if (readyState !== WebSocket.OPEN) {
475
- return {
476
- sent: false,
477
- readyState,
478
- bufferedBeforeBytes,
479
- bufferedAfterBytes: session.ws.bufferedAmount,
480
- };
481
- }
482
- if (bufferedBeforeBytes > MAX_WS_BUFFERED_BYTES) {
483
- try {
484
- session.ws.close(1013, "Backpressure: send buffer exceeded");
485
- } catch {
486
- // Best-effort close; caller still receives sent:false.
487
- }
488
- return {
489
- sent: false,
490
- readyState,
491
- bufferedBeforeBytes,
492
- bufferedAfterBytes: session.ws.bufferedAmount,
493
- };
494
- }
495
-
496
- try {
497
- session.ws.send(JSON.stringify(message));
498
- const bufferedAfterBytes = session.ws.bufferedAmount;
499
- if (bufferedAfterBytes > MAX_WS_BUFFERED_BYTES) {
500
- try {
501
- session.ws.close(1013, "Backpressure: send buffer exceeded");
502
- } catch {
503
- // Best-effort close; caller still receives sent:false.
504
- }
505
- return {
506
- sent: false,
507
- readyState,
508
- bufferedBeforeBytes,
509
- bufferedAfterBytes,
510
- };
511
- }
512
- return {
513
- sent: true,
514
- readyState,
515
- bufferedBeforeBytes,
516
- bufferedAfterBytes,
517
- };
518
- } catch {
519
- return {
520
- sent: false,
521
- readyState,
522
- bufferedBeforeBytes,
523
- bufferedAfterBytes: session.ws.bufferedAmount,
524
- };
525
- }
526
- }
527
-
528
- /**
529
- * Send audio to a specific stream (for TTS playback).
530
- * Audio should be mu-law encoded at 8kHz mono.
531
- */
532
- sendAudio(streamSid: string, muLawAudio: Buffer): StreamSendResult {
533
- return this.sendToStream(streamSid, {
534
- event: "media",
535
- streamSid,
536
- media: { payload: muLawAudio.toString("base64") },
537
- });
538
- }
539
-
540
- /**
541
- * Send a mark event to track audio playback position.
542
- */
543
- sendMark(streamSid: string, name: string): StreamSendResult {
544
- return this.sendToStream(streamSid, {
545
- event: "mark",
546
- streamSid,
547
- mark: { name },
548
- });
549
- }
550
-
551
- /**
552
- * Clear audio buffer (interrupt playback).
553
- */
554
- clearAudio(streamSid: string): StreamSendResult {
555
- return this.sendToStream(streamSid, { event: "clear", streamSid });
556
- }
557
-
558
- /**
559
- * Queue a TTS operation for sequential playback.
560
- * Only one TTS operation plays at a time per stream to prevent overlap.
561
- */
562
- async queueTts(streamSid: string, playFn: (signal: AbortSignal) => Promise<void>): Promise<void> {
563
- const queue = this.getTtsQueue(streamSid);
564
- let resolveEntry: () => void;
565
- let rejectEntry: (error: unknown) => void;
566
- const promise = new Promise<void>((resolve, reject) => {
567
- resolveEntry = resolve;
568
- rejectEntry = reject;
569
- });
570
-
571
- queue.push({
572
- playFn,
573
- controller: new AbortController(),
574
- resolve: resolveEntry!,
575
- reject: rejectEntry!,
576
- });
577
-
578
- if (!this.ttsPlaying.get(streamSid)) {
579
- void this.processQueue(streamSid);
580
- }
581
-
582
- return promise;
583
- }
584
-
585
- /**
586
- * Clear TTS queue and interrupt current playback (barge-in).
587
- */
588
- clearTtsQueue(streamSid: string, _reason = "unspecified"): void {
589
- const queue = this.getTtsQueue(streamSid);
590
- this.resolveQueuedTtsEntries(queue);
591
- this.ttsActiveControllers.get(streamSid)?.abort();
592
- this.clearAudio(streamSid);
593
- }
594
-
595
- /**
596
- * Get active session by call ID.
597
- */
598
- getSessionByCallId(callId: string): StreamSession | undefined {
599
- return [...this.sessions.values()].find((session) => session.callId === callId);
600
- }
601
-
602
- /**
603
- * Close all sessions.
604
- */
605
- closeAll(): void {
606
- for (const session of this.sessions.values()) {
607
- this.clearTtsState(session.streamSid);
608
- session.sttSession.close();
609
- session.ws.close();
610
- }
611
- this.sessions.clear();
612
- }
613
-
614
- private getTtsQueue(streamSid: string): TtsQueueEntry[] {
615
- const existing = this.ttsQueues.get(streamSid);
616
- if (existing) {
617
- return existing;
618
- }
619
- const queue: TtsQueueEntry[] = [];
620
- this.ttsQueues.set(streamSid, queue);
621
- return queue;
622
- }
623
-
624
- /**
625
- * Process the TTS queue for a stream.
626
- * Uses iterative approach to avoid stack accumulation from recursion.
627
- */
628
- private async processQueue(streamSid: string): Promise<void> {
629
- this.ttsPlaying.set(streamSid, true);
630
-
631
- while (true) {
632
- const queue = this.ttsQueues.get(streamSid);
633
- if (!queue || queue.length === 0) {
634
- this.ttsPlaying.set(streamSid, false);
635
- this.ttsActiveControllers.delete(streamSid);
636
- return;
637
- }
638
-
639
- const entry = queue.shift()!;
640
- this.ttsActiveControllers.set(streamSid, entry.controller);
641
-
642
- try {
643
- await entry.playFn(entry.controller.signal);
644
- entry.resolve();
645
- } catch (error) {
646
- if (entry.controller.signal.aborted) {
647
- entry.resolve();
648
- } else {
649
- console.error("[MediaStream] TTS playback error:", error);
650
- entry.reject(error);
651
- }
652
- } finally {
653
- if (this.ttsActiveControllers.get(streamSid) === entry.controller) {
654
- this.ttsActiveControllers.delete(streamSid);
655
- }
656
- }
657
- }
658
- }
659
-
660
- private clearTtsState(streamSid: string): void {
661
- const queue = this.ttsQueues.get(streamSid);
662
- if (queue) {
663
- this.resolveQueuedTtsEntries(queue);
664
- }
665
- this.ttsActiveControllers.get(streamSid)?.abort();
666
- this.ttsActiveControllers.delete(streamSid);
667
- this.ttsPlaying.delete(streamSid);
668
- this.ttsQueues.delete(streamSid);
669
- }
670
-
671
- private resolveQueuedTtsEntries(queue: TtsQueueEntry[]): void {
672
- const pending = queue.splice(0);
673
- for (const entry of pending) {
674
- entry.controller.abort();
675
- entry.resolve();
676
- }
677
- }
678
- }
679
-
680
- /**
681
- * Twilio Media Stream message format.
682
- */
683
- interface TwilioMediaMessage {
684
- event: "connected" | "start" | "media" | "stop" | "mark" | "clear";
685
- sequenceNumber?: string;
686
- streamSid?: string;
687
- start?: {
688
- streamSid: string;
689
- accountSid: string;
690
- callSid: string;
691
- tracks: string[];
692
- customParameters?: Record<string, string>;
693
- mediaFormat: {
694
- encoding: string;
695
- sampleRate: number;
696
- channels: number;
697
- };
698
- };
699
- media?: {
700
- track?: string;
701
- chunk?: string;
702
- timestamp?: string;
703
- payload?: string;
704
- };
705
- mark?: {
706
- name: string;
707
- };
708
- }