@openclaw/voice-call 2026.5.2 → 2026.5.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.js +2 -0
- package/dist/call-status-CXldV5o8.js +32 -0
- package/dist/cli-metadata.js +12 -0
- package/dist/config-7w04YpHh.js +548 -0
- package/dist/config-compat-B0me39_4.js +129 -0
- package/dist/guarded-json-api-Btx5EE4w.js +591 -0
- package/dist/http-headers-BrnxBasF.js +10 -0
- package/dist/index.js +1284 -0
- package/dist/mock-CeKvfVEd.js +135 -0
- package/dist/plivo-B-a7KFoT.js +393 -0
- package/dist/realtime-handler-B63CIDP2.js +325 -0
- package/dist/realtime-transcription.runtime-B2h70y2W.js +2 -0
- package/dist/realtime-voice.runtime-Bkh4nvLn.js +2 -0
- package/dist/response-generator-BrcmwDZU.js +182 -0
- package/dist/response-model-CyF5K80p.js +12 -0
- package/dist/runtime-api.js +6 -0
- package/dist/runtime-entry-88ytYAQa.js +3119 -0
- package/dist/runtime-entry.js +2 -0
- package/dist/setup-api.js +37 -0
- package/dist/telnyx-jjBE8boz.js +260 -0
- package/dist/twilio-1OqbcXLL.js +676 -0
- package/dist/voice-mapping-BYDGdWGx.js +40 -0
- package/package.json +14 -6
- package/api.ts +0 -16
- package/cli-metadata.ts +0 -10
- package/config-api.ts +0 -12
- package/index.test.ts +0 -943
- package/index.ts +0 -794
- package/runtime-api.ts +0 -20
- package/runtime-entry.ts +0 -1
- package/setup-api.ts +0 -47
- package/src/allowlist.test.ts +0 -18
- package/src/allowlist.ts +0 -19
- package/src/cli.ts +0 -845
- package/src/config-compat.test.ts +0 -120
- package/src/config-compat.ts +0 -227
- package/src/config.test.ts +0 -479
- package/src/config.ts +0 -808
- package/src/core-bridge.ts +0 -14
- package/src/deep-merge.test.ts +0 -40
- package/src/deep-merge.ts +0 -23
- package/src/gateway-continue-operation.ts +0 -200
- package/src/http-headers.test.ts +0 -16
- package/src/http-headers.ts +0 -15
- package/src/manager/context.ts +0 -42
- package/src/manager/events.test.ts +0 -581
- package/src/manager/events.ts +0 -288
- package/src/manager/lifecycle.ts +0 -53
- package/src/manager/lookup.test.ts +0 -52
- package/src/manager/lookup.ts +0 -35
- package/src/manager/outbound.test.ts +0 -528
- package/src/manager/outbound.ts +0 -486
- package/src/manager/state.ts +0 -48
- package/src/manager/store.ts +0 -106
- package/src/manager/timers.test.ts +0 -129
- package/src/manager/timers.ts +0 -113
- package/src/manager/twiml.test.ts +0 -13
- package/src/manager/twiml.ts +0 -17
- package/src/manager.closed-loop.test.ts +0 -236
- package/src/manager.inbound-allowlist.test.ts +0 -188
- package/src/manager.notify.test.ts +0 -377
- package/src/manager.restore.test.ts +0 -183
- package/src/manager.test-harness.ts +0 -127
- package/src/manager.ts +0 -392
- package/src/media-stream.test.ts +0 -768
- package/src/media-stream.ts +0 -708
- package/src/providers/base.ts +0 -97
- package/src/providers/mock.test.ts +0 -78
- package/src/providers/mock.ts +0 -185
- package/src/providers/plivo.test.ts +0 -93
- package/src/providers/plivo.ts +0 -601
- package/src/providers/shared/call-status.test.ts +0 -24
- package/src/providers/shared/call-status.ts +0 -24
- package/src/providers/shared/guarded-json-api.test.ts +0 -106
- package/src/providers/shared/guarded-json-api.ts +0 -42
- package/src/providers/telnyx.test.ts +0 -340
- package/src/providers/telnyx.ts +0 -394
- package/src/providers/twilio/api.test.ts +0 -145
- package/src/providers/twilio/api.ts +0 -93
- package/src/providers/twilio/twiml-policy.test.ts +0 -84
- package/src/providers/twilio/twiml-policy.ts +0 -87
- package/src/providers/twilio/webhook.ts +0 -34
- package/src/providers/twilio.test.ts +0 -591
- package/src/providers/twilio.ts +0 -861
- package/src/providers/twilio.types.ts +0 -17
- package/src/realtime-defaults.ts +0 -3
- package/src/realtime-fast-context.test.ts +0 -88
- package/src/realtime-fast-context.ts +0 -165
- package/src/realtime-transcription.runtime.ts +0 -4
- package/src/realtime-voice.runtime.ts +0 -5
- package/src/response-generator.test.ts +0 -321
- package/src/response-generator.ts +0 -318
- package/src/response-model.test.ts +0 -71
- package/src/response-model.ts +0 -23
- package/src/runtime.test.ts +0 -536
- package/src/runtime.ts +0 -510
- package/src/telephony-audio.test.ts +0 -61
- package/src/telephony-audio.ts +0 -12
- package/src/telephony-tts.test.ts +0 -196
- package/src/telephony-tts.ts +0 -235
- package/src/test-fixtures.ts +0 -73
- package/src/tts-provider-voice.test.ts +0 -34
- package/src/tts-provider-voice.ts +0 -21
- package/src/tunnel.test.ts +0 -166
- package/src/tunnel.ts +0 -314
- package/src/types.ts +0 -291
- package/src/utils.test.ts +0 -17
- package/src/utils.ts +0 -14
- package/src/voice-mapping.test.ts +0 -34
- package/src/voice-mapping.ts +0 -68
- package/src/webhook/realtime-handler.test.ts +0 -598
- package/src/webhook/realtime-handler.ts +0 -485
- package/src/webhook/stale-call-reaper.test.ts +0 -88
- package/src/webhook/stale-call-reaper.ts +0 -38
- package/src/webhook/tailscale.test.ts +0 -214
- package/src/webhook/tailscale.ts +0 -129
- package/src/webhook-exposure.test.ts +0 -33
- package/src/webhook-exposure.ts +0 -84
- package/src/webhook-security.test.ts +0 -770
- package/src/webhook-security.ts +0 -994
- package/src/webhook.hangup-once.lifecycle.test.ts +0 -135
- package/src/webhook.test.ts +0 -1470
- package/src/webhook.ts +0 -908
- package/src/webhook.types.ts +0 -5
- package/src/websocket-test-support.ts +0 -72
- package/tsconfig.json +0 -16
package/src/media-stream.ts
DELETED
|
@@ -1,708 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Media Stream Handler
|
|
3
|
-
*
|
|
4
|
-
* Handles bidirectional audio streaming between Twilio and the AI services.
|
|
5
|
-
* - Receives mu-law audio from Twilio via WebSocket
|
|
6
|
-
* - Forwards to the selected realtime transcription provider
|
|
7
|
-
* - Sends TTS audio back to Twilio
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
import type { IncomingMessage } from "node:http";
|
|
11
|
-
import type { Duplex } from "node:stream";
|
|
12
|
-
import type {
|
|
13
|
-
RealtimeTranscriptionProviderConfig,
|
|
14
|
-
RealtimeTranscriptionProviderPlugin,
|
|
15
|
-
RealtimeTranscriptionSession,
|
|
16
|
-
} from "openclaw/plugin-sdk/realtime-transcription";
|
|
17
|
-
import { type RawData, WebSocket, WebSocketServer } from "ws";
|
|
18
|
-
|
|
19
|
-
/**
|
|
20
|
-
* Configuration for the media stream handler.
|
|
21
|
-
*/
|
|
22
|
-
export interface MediaStreamConfig {
|
|
23
|
-
/** Realtime transcription provider for streaming STT. */
|
|
24
|
-
transcriptionProvider: RealtimeTranscriptionProviderPlugin;
|
|
25
|
-
/** Provider-owned config blob passed into the transcription session. */
|
|
26
|
-
providerConfig: RealtimeTranscriptionProviderConfig;
|
|
27
|
-
/** Close sockets that never send a valid `start` frame within this window. */
|
|
28
|
-
preStartTimeoutMs?: number;
|
|
29
|
-
/** Max concurrent pre-start sockets. */
|
|
30
|
-
maxPendingConnections?: number;
|
|
31
|
-
/** Max concurrent pre-start sockets from a single source IP. */
|
|
32
|
-
maxPendingConnectionsPerIp?: number;
|
|
33
|
-
/** Max total open sockets (pending + active sessions). */
|
|
34
|
-
maxConnections?: number;
|
|
35
|
-
/** Optional trusted resolver for the source IP used by pending-connection guards. */
|
|
36
|
-
resolveClientIp?: (request: IncomingMessage) => string | undefined;
|
|
37
|
-
/** Validate whether to accept a media stream for the given call ID */
|
|
38
|
-
shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean;
|
|
39
|
-
/** Callback when transcript is received */
|
|
40
|
-
onTranscript?: (callId: string, transcript: string) => void;
|
|
41
|
-
/** Callback for partial transcripts (streaming UI) */
|
|
42
|
-
onPartialTranscript?: (callId: string, partial: string) => void;
|
|
43
|
-
/** Callback when stream connects */
|
|
44
|
-
onConnect?: (callId: string, streamSid: string) => void;
|
|
45
|
-
/** Callback when realtime transcription is ready for the stream */
|
|
46
|
-
onTranscriptionReady?: (callId: string, streamSid: string) => void;
|
|
47
|
-
/** Callback when speech starts (barge-in) */
|
|
48
|
-
onSpeechStart?: (callId: string) => void;
|
|
49
|
-
/** Callback when stream disconnects */
|
|
50
|
-
onDisconnect?: (callId: string, streamSid: string) => void;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Active media stream session.
|
|
55
|
-
*/
|
|
56
|
-
interface StreamSession {
|
|
57
|
-
callId: string;
|
|
58
|
-
streamSid: string;
|
|
59
|
-
ws: WebSocket;
|
|
60
|
-
sttSession: RealtimeTranscriptionSession;
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
type TtsQueueEntry = {
|
|
64
|
-
playFn: (signal: AbortSignal) => Promise<void>;
|
|
65
|
-
controller: AbortController;
|
|
66
|
-
resolve: () => void;
|
|
67
|
-
reject: (error: unknown) => void;
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
type StreamSendResult = {
|
|
71
|
-
sent: boolean;
|
|
72
|
-
readyState?: number;
|
|
73
|
-
bufferedBeforeBytes: number;
|
|
74
|
-
bufferedAfterBytes: number;
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
type PendingConnection = {
|
|
78
|
-
ip: string;
|
|
79
|
-
timeout: ReturnType<typeof setTimeout>;
|
|
80
|
-
};
|
|
81
|
-
|
|
82
|
-
const DEFAULT_PRE_START_TIMEOUT_MS = 5000;
|
|
83
|
-
const DEFAULT_MAX_PENDING_CONNECTIONS = 32;
|
|
84
|
-
const DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP = 4;
|
|
85
|
-
const DEFAULT_MAX_CONNECTIONS = 128;
|
|
86
|
-
const MAX_INBOUND_MESSAGE_BYTES = 64 * 1024;
|
|
87
|
-
const MAX_WS_BUFFERED_BYTES = 1024 * 1024;
|
|
88
|
-
const CLOSE_REASON_LOG_MAX_CHARS = 120;
|
|
89
|
-
|
|
90
|
-
export function sanitizeLogText(value: string, maxChars: number): string {
|
|
91
|
-
const sanitized = value
|
|
92
|
-
.replace(/\p{Cc}/gu, " ")
|
|
93
|
-
.replace(/\s+/g, " ")
|
|
94
|
-
.trim();
|
|
95
|
-
if (sanitized.length <= maxChars) {
|
|
96
|
-
return sanitized;
|
|
97
|
-
}
|
|
98
|
-
return `${sanitized.slice(0, maxChars)}...`;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
function normalizeWsMessageData(data: RawData): Buffer {
|
|
102
|
-
if (Buffer.isBuffer(data)) {
|
|
103
|
-
return data;
|
|
104
|
-
}
|
|
105
|
-
if (Array.isArray(data)) {
|
|
106
|
-
return Buffer.concat(data);
|
|
107
|
-
}
|
|
108
|
-
return Buffer.from(data);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Manages WebSocket connections for Twilio media streams.
|
|
113
|
-
*/
|
|
114
|
-
export class MediaStreamHandler {
|
|
115
|
-
private wss: WebSocketServer | null = null;
|
|
116
|
-
private sessions = new Map<string, StreamSession>();
|
|
117
|
-
private config: MediaStreamConfig;
|
|
118
|
-
/** Pending sockets that have upgraded but not yet sent an accepted `start` frame. */
|
|
119
|
-
private pendingConnections = new Map<WebSocket, PendingConnection>();
|
|
120
|
-
/** Pending socket count per remote IP for pre-auth throttling. */
|
|
121
|
-
private pendingByIp = new Map<string, number>();
|
|
122
|
-
private preStartTimeoutMs: number;
|
|
123
|
-
private maxPendingConnections: number;
|
|
124
|
-
private maxPendingConnectionsPerIp: number;
|
|
125
|
-
private maxConnections: number;
|
|
126
|
-
private inflightUpgrades = 0;
|
|
127
|
-
/** TTS playback queues per stream (serialize audio to prevent overlap) */
|
|
128
|
-
private ttsQueues = new Map<string, TtsQueueEntry[]>();
|
|
129
|
-
/** Whether TTS is currently playing per stream */
|
|
130
|
-
private ttsPlaying = new Map<string, boolean>();
|
|
131
|
-
/** Active TTS playback controllers per stream */
|
|
132
|
-
private ttsActiveControllers = new Map<string, AbortController>();
|
|
133
|
-
|
|
134
|
-
constructor(config: MediaStreamConfig) {
|
|
135
|
-
this.config = config;
|
|
136
|
-
this.preStartTimeoutMs = config.preStartTimeoutMs ?? DEFAULT_PRE_START_TIMEOUT_MS;
|
|
137
|
-
this.maxPendingConnections = config.maxPendingConnections ?? DEFAULT_MAX_PENDING_CONNECTIONS;
|
|
138
|
-
this.maxPendingConnectionsPerIp =
|
|
139
|
-
config.maxPendingConnectionsPerIp ?? DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP;
|
|
140
|
-
this.maxConnections = config.maxConnections ?? DEFAULT_MAX_CONNECTIONS;
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
/**
|
|
144
|
-
* Handle WebSocket upgrade for media stream connections.
|
|
145
|
-
*/
|
|
146
|
-
handleUpgrade(request: IncomingMessage, socket: Duplex, head: Buffer): void {
|
|
147
|
-
if (!this.wss) {
|
|
148
|
-
this.wss = new WebSocketServer({
|
|
149
|
-
noServer: true,
|
|
150
|
-
// Reject oversized frames before app-level parsing runs on unauthenticated sockets.
|
|
151
|
-
maxPayload: MAX_INBOUND_MESSAGE_BYTES,
|
|
152
|
-
});
|
|
153
|
-
this.wss.on("connection", (ws, req) => this.handleConnection(ws, req));
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
const currentConnections = this.getCurrentConnectionCount();
|
|
157
|
-
if (currentConnections >= this.maxConnections) {
|
|
158
|
-
this.rejectUpgrade(socket, 503, "Too many media stream connections");
|
|
159
|
-
return;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
this.inflightUpgrades += 1;
|
|
163
|
-
let released = false;
|
|
164
|
-
const releaseUpgradeReservation = () => {
|
|
165
|
-
if (released) {
|
|
166
|
-
return;
|
|
167
|
-
}
|
|
168
|
-
released = true;
|
|
169
|
-
this.inflightUpgrades = Math.max(0, this.inflightUpgrades - 1);
|
|
170
|
-
};
|
|
171
|
-
const handleUpgradeAbort = () => {
|
|
172
|
-
socket.removeListener("error", handleUpgradeAbort);
|
|
173
|
-
socket.removeListener("close", handleUpgradeAbort);
|
|
174
|
-
releaseUpgradeReservation();
|
|
175
|
-
};
|
|
176
|
-
socket.once("error", handleUpgradeAbort);
|
|
177
|
-
socket.once("close", handleUpgradeAbort);
|
|
178
|
-
|
|
179
|
-
try {
|
|
180
|
-
this.wss.handleUpgrade(request, socket, head, (ws) => {
|
|
181
|
-
socket.removeListener("error", handleUpgradeAbort);
|
|
182
|
-
socket.removeListener("close", handleUpgradeAbort);
|
|
183
|
-
releaseUpgradeReservation();
|
|
184
|
-
this.wss?.emit("connection", ws, request);
|
|
185
|
-
});
|
|
186
|
-
} catch (error) {
|
|
187
|
-
socket.removeListener("error", handleUpgradeAbort);
|
|
188
|
-
socket.removeListener("close", handleUpgradeAbort);
|
|
189
|
-
releaseUpgradeReservation();
|
|
190
|
-
throw error;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
/**
|
|
195
|
-
* Handle new WebSocket connection from Twilio.
|
|
196
|
-
*/
|
|
197
|
-
private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
|
|
198
|
-
let session: StreamSession | null = null;
|
|
199
|
-
const streamToken = this.getStreamToken(_request);
|
|
200
|
-
const ip = this.getClientIp(_request);
|
|
201
|
-
|
|
202
|
-
if (!this.registerPendingConnection(ws, ip)) {
|
|
203
|
-
ws.close(1013, "Too many pending media stream connections");
|
|
204
|
-
return;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
ws.on("message", async (data: RawData) => {
|
|
208
|
-
try {
|
|
209
|
-
const raw = normalizeWsMessageData(data);
|
|
210
|
-
const message = JSON.parse(raw.toString("utf8")) as TwilioMediaMessage;
|
|
211
|
-
|
|
212
|
-
switch (message.event) {
|
|
213
|
-
case "connected":
|
|
214
|
-
console.log("[MediaStream] Twilio connected");
|
|
215
|
-
break;
|
|
216
|
-
|
|
217
|
-
case "start":
|
|
218
|
-
session = this.handleStart(ws, message, streamToken);
|
|
219
|
-
if (session) {
|
|
220
|
-
this.clearPendingConnection(ws);
|
|
221
|
-
}
|
|
222
|
-
break;
|
|
223
|
-
|
|
224
|
-
case "media":
|
|
225
|
-
if (session && message.media?.payload) {
|
|
226
|
-
// Forward audio to STT
|
|
227
|
-
const audioBuffer = Buffer.from(message.media.payload, "base64");
|
|
228
|
-
session.sttSession.sendAudio(audioBuffer);
|
|
229
|
-
}
|
|
230
|
-
break;
|
|
231
|
-
|
|
232
|
-
case "stop":
|
|
233
|
-
if (session) {
|
|
234
|
-
this.handleStop(session);
|
|
235
|
-
session = null;
|
|
236
|
-
}
|
|
237
|
-
break;
|
|
238
|
-
|
|
239
|
-
case "clear":
|
|
240
|
-
case "mark":
|
|
241
|
-
break;
|
|
242
|
-
}
|
|
243
|
-
} catch (error) {
|
|
244
|
-
console.error("[MediaStream] Error processing message:", error);
|
|
245
|
-
}
|
|
246
|
-
});
|
|
247
|
-
|
|
248
|
-
ws.on("close", (code, reason) => {
|
|
249
|
-
const rawReason = Buffer.isBuffer(reason) ? reason.toString("utf8") : String(reason || "");
|
|
250
|
-
const reasonText = sanitizeLogText(rawReason, CLOSE_REASON_LOG_MAX_CHARS);
|
|
251
|
-
console.log(
|
|
252
|
-
`[MediaStream] WebSocket closed (code: ${code}, reason: ${reasonText || "none"})`,
|
|
253
|
-
);
|
|
254
|
-
this.clearPendingConnection(ws);
|
|
255
|
-
if (session) {
|
|
256
|
-
this.handleStop(session);
|
|
257
|
-
}
|
|
258
|
-
});
|
|
259
|
-
|
|
260
|
-
ws.on("error", (error) => {
|
|
261
|
-
console.error("[MediaStream] WebSocket error:", error);
|
|
262
|
-
});
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
/**
|
|
266
|
-
* Handle stream start event.
|
|
267
|
-
*/
|
|
268
|
-
private handleStart(
|
|
269
|
-
ws: WebSocket,
|
|
270
|
-
message: TwilioMediaMessage,
|
|
271
|
-
streamToken?: string,
|
|
272
|
-
): StreamSession | null {
|
|
273
|
-
const streamSid = message.streamSid || "";
|
|
274
|
-
const callSid = message.start?.callSid || "";
|
|
275
|
-
|
|
276
|
-
// Prefer token from start message customParameters (set via TwiML <Parameter>),
|
|
277
|
-
// falling back to query string token. Twilio strips query params from WebSocket
|
|
278
|
-
// URLs but reliably delivers <Parameter> values in customParameters.
|
|
279
|
-
const effectiveToken = message.start?.customParameters?.token ?? streamToken;
|
|
280
|
-
|
|
281
|
-
console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
|
|
282
|
-
if (!callSid) {
|
|
283
|
-
console.warn("[MediaStream] Missing callSid; closing stream");
|
|
284
|
-
ws.close(1008, "Missing callSid");
|
|
285
|
-
return null;
|
|
286
|
-
}
|
|
287
|
-
if (
|
|
288
|
-
this.config.shouldAcceptStream &&
|
|
289
|
-
!this.config.shouldAcceptStream({ callId: callSid, streamSid, token: effectiveToken })
|
|
290
|
-
) {
|
|
291
|
-
console.warn(`[MediaStream] Rejecting stream for unknown call: ${callSid}`);
|
|
292
|
-
ws.close(1008, "Unknown call");
|
|
293
|
-
return null;
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
const sttSession = this.config.transcriptionProvider.createSession({
|
|
297
|
-
providerConfig: this.config.providerConfig,
|
|
298
|
-
onPartial: (partial) => {
|
|
299
|
-
this.config.onPartialTranscript?.(callSid, partial);
|
|
300
|
-
},
|
|
301
|
-
onTranscript: (transcript) => {
|
|
302
|
-
this.config.onTranscript?.(callSid, transcript);
|
|
303
|
-
},
|
|
304
|
-
onSpeechStart: () => {
|
|
305
|
-
this.config.onSpeechStart?.(callSid);
|
|
306
|
-
},
|
|
307
|
-
onError: (error) => {
|
|
308
|
-
console.warn("[MediaStream] Transcription session error:", error.message);
|
|
309
|
-
},
|
|
310
|
-
});
|
|
311
|
-
|
|
312
|
-
const session: StreamSession = {
|
|
313
|
-
callId: callSid,
|
|
314
|
-
streamSid,
|
|
315
|
-
ws,
|
|
316
|
-
sttSession,
|
|
317
|
-
};
|
|
318
|
-
|
|
319
|
-
this.sessions.set(streamSid, session);
|
|
320
|
-
this.config.onConnect?.(callSid, streamSid);
|
|
321
|
-
void this.connectTranscriptionAndNotify(session);
|
|
322
|
-
|
|
323
|
-
return session;
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
private async connectTranscriptionAndNotify(session: StreamSession): Promise<void> {
|
|
327
|
-
try {
|
|
328
|
-
await session.sttSession.connect();
|
|
329
|
-
} catch (error) {
|
|
330
|
-
console.warn(
|
|
331
|
-
"[MediaStream] STT connection failed; closing media stream:",
|
|
332
|
-
error instanceof Error ? error.message : String(error),
|
|
333
|
-
);
|
|
334
|
-
if (
|
|
335
|
-
this.sessions.get(session.streamSid) === session &&
|
|
336
|
-
session.ws.readyState === WebSocket.OPEN
|
|
337
|
-
) {
|
|
338
|
-
session.ws.close(1011, "STT connection failed");
|
|
339
|
-
} else {
|
|
340
|
-
session.sttSession.close();
|
|
341
|
-
}
|
|
342
|
-
return;
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
if (
|
|
346
|
-
this.sessions.get(session.streamSid) !== session ||
|
|
347
|
-
session.ws.readyState !== WebSocket.OPEN
|
|
348
|
-
) {
|
|
349
|
-
session.sttSession.close();
|
|
350
|
-
return;
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
this.config.onTranscriptionReady?.(session.callId, session.streamSid);
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
/**
|
|
357
|
-
* Handle stream stop event.
|
|
358
|
-
*/
|
|
359
|
-
private handleStop(session: StreamSession): void {
|
|
360
|
-
console.log(`[MediaStream] Stream stopped: ${session.streamSid}`);
|
|
361
|
-
|
|
362
|
-
this.clearTtsState(session.streamSid);
|
|
363
|
-
session.sttSession.close();
|
|
364
|
-
this.sessions.delete(session.streamSid);
|
|
365
|
-
this.config.onDisconnect?.(session.callId, session.streamSid);
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
private getStreamToken(request: IncomingMessage): string | undefined {
|
|
369
|
-
if (!request.url || !request.headers.host) {
|
|
370
|
-
return undefined;
|
|
371
|
-
}
|
|
372
|
-
try {
|
|
373
|
-
const url = new URL(request.url, `http://${request.headers.host}`);
|
|
374
|
-
return url.searchParams.get("token") ?? undefined;
|
|
375
|
-
} catch {
|
|
376
|
-
return undefined;
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
private getClientIp(request: IncomingMessage): string {
|
|
381
|
-
const resolvedIp = this.config.resolveClientIp?.(request)?.trim();
|
|
382
|
-
if (resolvedIp) {
|
|
383
|
-
return resolvedIp;
|
|
384
|
-
}
|
|
385
|
-
return request.socket.remoteAddress || "unknown";
|
|
386
|
-
}
|
|
387
|
-
|
|
388
|
-
private getCurrentConnectionCount(): number {
|
|
389
|
-
return this.wss ? this.wss.clients.size + this.inflightUpgrades : this.inflightUpgrades;
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
private registerPendingConnection(ws: WebSocket, ip: string): boolean {
|
|
393
|
-
if (this.pendingConnections.size >= this.maxPendingConnections) {
|
|
394
|
-
console.warn("[MediaStream] Rejecting connection: pending connection limit reached");
|
|
395
|
-
return false;
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
const pendingForIp = this.pendingByIp.get(ip) ?? 0;
|
|
399
|
-
if (pendingForIp >= this.maxPendingConnectionsPerIp) {
|
|
400
|
-
console.warn(`[MediaStream] Rejecting connection: pending per-IP limit reached (${ip})`);
|
|
401
|
-
return false;
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
const timeout = setTimeout(() => {
|
|
405
|
-
if (!this.pendingConnections.has(ws)) {
|
|
406
|
-
return;
|
|
407
|
-
}
|
|
408
|
-
console.warn(
|
|
409
|
-
`[MediaStream] Closing pre-start idle connection after ${this.preStartTimeoutMs}ms (${ip})`,
|
|
410
|
-
);
|
|
411
|
-
ws.close(1008, "Start timeout");
|
|
412
|
-
}, this.preStartTimeoutMs);
|
|
413
|
-
|
|
414
|
-
timeout.unref?.();
|
|
415
|
-
this.pendingConnections.set(ws, { ip, timeout });
|
|
416
|
-
this.pendingByIp.set(ip, pendingForIp + 1);
|
|
417
|
-
return true;
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
private clearPendingConnection(ws: WebSocket): void {
|
|
421
|
-
const pending = this.pendingConnections.get(ws);
|
|
422
|
-
if (!pending) {
|
|
423
|
-
return;
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
clearTimeout(pending.timeout);
|
|
427
|
-
this.pendingConnections.delete(ws);
|
|
428
|
-
|
|
429
|
-
const current = this.pendingByIp.get(pending.ip) ?? 0;
|
|
430
|
-
if (current <= 1) {
|
|
431
|
-
this.pendingByIp.delete(pending.ip);
|
|
432
|
-
return;
|
|
433
|
-
}
|
|
434
|
-
this.pendingByIp.set(pending.ip, current - 1);
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
private rejectUpgrade(socket: Duplex, statusCode: 429 | 503, message: string): void {
|
|
438
|
-
const statusText = statusCode === 429 ? "Too Many Requests" : "Service Unavailable";
|
|
439
|
-
const body = `${message}\n`;
|
|
440
|
-
socket.write(
|
|
441
|
-
`HTTP/1.1 ${statusCode} ${statusText}\r\n` +
|
|
442
|
-
"Connection: close\r\n" +
|
|
443
|
-
"Content-Type: text/plain; charset=utf-8\r\n" +
|
|
444
|
-
`Content-Length: ${Buffer.byteLength(body)}\r\n` +
|
|
445
|
-
"\r\n" +
|
|
446
|
-
body,
|
|
447
|
-
);
|
|
448
|
-
socket.destroy();
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
/**
|
|
452
|
-
* Get an active session with an open WebSocket, or undefined if unavailable.
|
|
453
|
-
*/
|
|
454
|
-
private getOpenSession(streamSid: string): StreamSession | undefined {
|
|
455
|
-
const session = this.sessions.get(streamSid);
|
|
456
|
-
return session?.ws.readyState === WebSocket.OPEN ? session : undefined;
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
/**
|
|
460
|
-
* Send a message to a stream's WebSocket if available.
|
|
461
|
-
*/
|
|
462
|
-
private sendToStream(streamSid: string, message: unknown): StreamSendResult {
|
|
463
|
-
const session = this.sessions.get(streamSid);
|
|
464
|
-
if (!session) {
|
|
465
|
-
return {
|
|
466
|
-
sent: false,
|
|
467
|
-
bufferedBeforeBytes: 0,
|
|
468
|
-
bufferedAfterBytes: 0,
|
|
469
|
-
};
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
const readyState = session.ws.readyState;
|
|
473
|
-
const bufferedBeforeBytes = session.ws.bufferedAmount;
|
|
474
|
-
if (readyState !== WebSocket.OPEN) {
|
|
475
|
-
return {
|
|
476
|
-
sent: false,
|
|
477
|
-
readyState,
|
|
478
|
-
bufferedBeforeBytes,
|
|
479
|
-
bufferedAfterBytes: session.ws.bufferedAmount,
|
|
480
|
-
};
|
|
481
|
-
}
|
|
482
|
-
if (bufferedBeforeBytes > MAX_WS_BUFFERED_BYTES) {
|
|
483
|
-
try {
|
|
484
|
-
session.ws.close(1013, "Backpressure: send buffer exceeded");
|
|
485
|
-
} catch {
|
|
486
|
-
// Best-effort close; caller still receives sent:false.
|
|
487
|
-
}
|
|
488
|
-
return {
|
|
489
|
-
sent: false,
|
|
490
|
-
readyState,
|
|
491
|
-
bufferedBeforeBytes,
|
|
492
|
-
bufferedAfterBytes: session.ws.bufferedAmount,
|
|
493
|
-
};
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
try {
|
|
497
|
-
session.ws.send(JSON.stringify(message));
|
|
498
|
-
const bufferedAfterBytes = session.ws.bufferedAmount;
|
|
499
|
-
if (bufferedAfterBytes > MAX_WS_BUFFERED_BYTES) {
|
|
500
|
-
try {
|
|
501
|
-
session.ws.close(1013, "Backpressure: send buffer exceeded");
|
|
502
|
-
} catch {
|
|
503
|
-
// Best-effort close; caller still receives sent:false.
|
|
504
|
-
}
|
|
505
|
-
return {
|
|
506
|
-
sent: false,
|
|
507
|
-
readyState,
|
|
508
|
-
bufferedBeforeBytes,
|
|
509
|
-
bufferedAfterBytes,
|
|
510
|
-
};
|
|
511
|
-
}
|
|
512
|
-
return {
|
|
513
|
-
sent: true,
|
|
514
|
-
readyState,
|
|
515
|
-
bufferedBeforeBytes,
|
|
516
|
-
bufferedAfterBytes,
|
|
517
|
-
};
|
|
518
|
-
} catch {
|
|
519
|
-
return {
|
|
520
|
-
sent: false,
|
|
521
|
-
readyState,
|
|
522
|
-
bufferedBeforeBytes,
|
|
523
|
-
bufferedAfterBytes: session.ws.bufferedAmount,
|
|
524
|
-
};
|
|
525
|
-
}
|
|
526
|
-
}
|
|
527
|
-
|
|
528
|
-
/**
|
|
529
|
-
* Send audio to a specific stream (for TTS playback).
|
|
530
|
-
* Audio should be mu-law encoded at 8kHz mono.
|
|
531
|
-
*/
|
|
532
|
-
sendAudio(streamSid: string, muLawAudio: Buffer): StreamSendResult {
|
|
533
|
-
return this.sendToStream(streamSid, {
|
|
534
|
-
event: "media",
|
|
535
|
-
streamSid,
|
|
536
|
-
media: { payload: muLawAudio.toString("base64") },
|
|
537
|
-
});
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
/**
|
|
541
|
-
* Send a mark event to track audio playback position.
|
|
542
|
-
*/
|
|
543
|
-
sendMark(streamSid: string, name: string): StreamSendResult {
|
|
544
|
-
return this.sendToStream(streamSid, {
|
|
545
|
-
event: "mark",
|
|
546
|
-
streamSid,
|
|
547
|
-
mark: { name },
|
|
548
|
-
});
|
|
549
|
-
}
|
|
550
|
-
|
|
551
|
-
/**
|
|
552
|
-
* Clear audio buffer (interrupt playback).
|
|
553
|
-
*/
|
|
554
|
-
clearAudio(streamSid: string): StreamSendResult {
|
|
555
|
-
return this.sendToStream(streamSid, { event: "clear", streamSid });
|
|
556
|
-
}
|
|
557
|
-
|
|
558
|
-
/**
|
|
559
|
-
* Queue a TTS operation for sequential playback.
|
|
560
|
-
* Only one TTS operation plays at a time per stream to prevent overlap.
|
|
561
|
-
*/
|
|
562
|
-
async queueTts(streamSid: string, playFn: (signal: AbortSignal) => Promise<void>): Promise<void> {
|
|
563
|
-
const queue = this.getTtsQueue(streamSid);
|
|
564
|
-
let resolveEntry: () => void;
|
|
565
|
-
let rejectEntry: (error: unknown) => void;
|
|
566
|
-
const promise = new Promise<void>((resolve, reject) => {
|
|
567
|
-
resolveEntry = resolve;
|
|
568
|
-
rejectEntry = reject;
|
|
569
|
-
});
|
|
570
|
-
|
|
571
|
-
queue.push({
|
|
572
|
-
playFn,
|
|
573
|
-
controller: new AbortController(),
|
|
574
|
-
resolve: resolveEntry!,
|
|
575
|
-
reject: rejectEntry!,
|
|
576
|
-
});
|
|
577
|
-
|
|
578
|
-
if (!this.ttsPlaying.get(streamSid)) {
|
|
579
|
-
void this.processQueue(streamSid);
|
|
580
|
-
}
|
|
581
|
-
|
|
582
|
-
return promise;
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
/**
|
|
586
|
-
* Clear TTS queue and interrupt current playback (barge-in).
|
|
587
|
-
*/
|
|
588
|
-
clearTtsQueue(streamSid: string, _reason = "unspecified"): void {
|
|
589
|
-
const queue = this.getTtsQueue(streamSid);
|
|
590
|
-
this.resolveQueuedTtsEntries(queue);
|
|
591
|
-
this.ttsActiveControllers.get(streamSid)?.abort();
|
|
592
|
-
this.clearAudio(streamSid);
|
|
593
|
-
}
|
|
594
|
-
|
|
595
|
-
/**
|
|
596
|
-
* Get active session by call ID.
|
|
597
|
-
*/
|
|
598
|
-
getSessionByCallId(callId: string): StreamSession | undefined {
|
|
599
|
-
return [...this.sessions.values()].find((session) => session.callId === callId);
|
|
600
|
-
}
|
|
601
|
-
|
|
602
|
-
/**
|
|
603
|
-
* Close all sessions.
|
|
604
|
-
*/
|
|
605
|
-
closeAll(): void {
|
|
606
|
-
for (const session of this.sessions.values()) {
|
|
607
|
-
this.clearTtsState(session.streamSid);
|
|
608
|
-
session.sttSession.close();
|
|
609
|
-
session.ws.close();
|
|
610
|
-
}
|
|
611
|
-
this.sessions.clear();
|
|
612
|
-
}
|
|
613
|
-
|
|
614
|
-
private getTtsQueue(streamSid: string): TtsQueueEntry[] {
|
|
615
|
-
const existing = this.ttsQueues.get(streamSid);
|
|
616
|
-
if (existing) {
|
|
617
|
-
return existing;
|
|
618
|
-
}
|
|
619
|
-
const queue: TtsQueueEntry[] = [];
|
|
620
|
-
this.ttsQueues.set(streamSid, queue);
|
|
621
|
-
return queue;
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
/**
|
|
625
|
-
* Process the TTS queue for a stream.
|
|
626
|
-
* Uses iterative approach to avoid stack accumulation from recursion.
|
|
627
|
-
*/
|
|
628
|
-
private async processQueue(streamSid: string): Promise<void> {
|
|
629
|
-
this.ttsPlaying.set(streamSid, true);
|
|
630
|
-
|
|
631
|
-
while (true) {
|
|
632
|
-
const queue = this.ttsQueues.get(streamSid);
|
|
633
|
-
if (!queue || queue.length === 0) {
|
|
634
|
-
this.ttsPlaying.set(streamSid, false);
|
|
635
|
-
this.ttsActiveControllers.delete(streamSid);
|
|
636
|
-
return;
|
|
637
|
-
}
|
|
638
|
-
|
|
639
|
-
const entry = queue.shift()!;
|
|
640
|
-
this.ttsActiveControllers.set(streamSid, entry.controller);
|
|
641
|
-
|
|
642
|
-
try {
|
|
643
|
-
await entry.playFn(entry.controller.signal);
|
|
644
|
-
entry.resolve();
|
|
645
|
-
} catch (error) {
|
|
646
|
-
if (entry.controller.signal.aborted) {
|
|
647
|
-
entry.resolve();
|
|
648
|
-
} else {
|
|
649
|
-
console.error("[MediaStream] TTS playback error:", error);
|
|
650
|
-
entry.reject(error);
|
|
651
|
-
}
|
|
652
|
-
} finally {
|
|
653
|
-
if (this.ttsActiveControllers.get(streamSid) === entry.controller) {
|
|
654
|
-
this.ttsActiveControllers.delete(streamSid);
|
|
655
|
-
}
|
|
656
|
-
}
|
|
657
|
-
}
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
private clearTtsState(streamSid: string): void {
|
|
661
|
-
const queue = this.ttsQueues.get(streamSid);
|
|
662
|
-
if (queue) {
|
|
663
|
-
this.resolveQueuedTtsEntries(queue);
|
|
664
|
-
}
|
|
665
|
-
this.ttsActiveControllers.get(streamSid)?.abort();
|
|
666
|
-
this.ttsActiveControllers.delete(streamSid);
|
|
667
|
-
this.ttsPlaying.delete(streamSid);
|
|
668
|
-
this.ttsQueues.delete(streamSid);
|
|
669
|
-
}
|
|
670
|
-
|
|
671
|
-
private resolveQueuedTtsEntries(queue: TtsQueueEntry[]): void {
|
|
672
|
-
const pending = queue.splice(0);
|
|
673
|
-
for (const entry of pending) {
|
|
674
|
-
entry.controller.abort();
|
|
675
|
-
entry.resolve();
|
|
676
|
-
}
|
|
677
|
-
}
|
|
678
|
-
}
|
|
679
|
-
|
|
680
|
-
/**
|
|
681
|
-
* Twilio Media Stream message format.
|
|
682
|
-
*/
|
|
683
|
-
interface TwilioMediaMessage {
|
|
684
|
-
event: "connected" | "start" | "media" | "stop" | "mark" | "clear";
|
|
685
|
-
sequenceNumber?: string;
|
|
686
|
-
streamSid?: string;
|
|
687
|
-
start?: {
|
|
688
|
-
streamSid: string;
|
|
689
|
-
accountSid: string;
|
|
690
|
-
callSid: string;
|
|
691
|
-
tracks: string[];
|
|
692
|
-
customParameters?: Record<string, string>;
|
|
693
|
-
mediaFormat: {
|
|
694
|
-
encoding: string;
|
|
695
|
-
sampleRate: number;
|
|
696
|
-
channels: number;
|
|
697
|
-
};
|
|
698
|
-
};
|
|
699
|
-
media?: {
|
|
700
|
-
track?: string;
|
|
701
|
-
chunk?: string;
|
|
702
|
-
timestamp?: string;
|
|
703
|
-
payload?: string;
|
|
704
|
-
};
|
|
705
|
-
mark?: {
|
|
706
|
-
name: string;
|
|
707
|
-
};
|
|
708
|
-
}
|