@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/api.ts +16 -0
- package/cli-metadata.ts +10 -0
- package/config-api.ts +12 -0
- package/index.test.ts +943 -0
- package/index.ts +379 -149
- package/openclaw.plugin.json +384 -157
- package/package.json +35 -5
- package/runtime-api.ts +20 -0
- package/runtime-entry.ts +1 -0
- package/setup-api.ts +47 -0
- package/src/allowlist.test.ts +18 -0
- package/src/cli.ts +533 -68
- package/src/config-compat.test.ts +120 -0
- package/src/config-compat.ts +227 -0
- package/src/config.test.ts +273 -12
- package/src/config.ts +355 -72
- package/src/core-bridge.ts +2 -147
- package/src/deep-merge.test.ts +40 -0
- package/src/gateway-continue-operation.ts +200 -0
- package/src/http-headers.ts +6 -3
- package/src/manager/context.ts +6 -5
- package/src/manager/events.test.ts +243 -19
- package/src/manager/events.ts +61 -31
- package/src/manager/lifecycle.ts +53 -0
- package/src/manager/lookup.test.ts +52 -0
- package/src/manager/outbound.test.ts +528 -0
- package/src/manager/outbound.ts +163 -57
- package/src/manager/store.ts +18 -6
- package/src/manager/timers.test.ts +129 -0
- package/src/manager/timers.ts +4 -3
- package/src/manager/twiml.test.ts +13 -0
- package/src/manager/twiml.ts +8 -0
- package/src/manager.closed-loop.test.ts +30 -12
- package/src/manager.inbound-allowlist.test.ts +77 -10
- package/src/manager.notify.test.ts +344 -20
- package/src/manager.restore.test.ts +95 -8
- package/src/manager.test-harness.ts +8 -6
- package/src/manager.ts +79 -5
- package/src/media-stream.test.ts +578 -81
- package/src/media-stream.ts +235 -54
- package/src/providers/base.ts +19 -0
- package/src/providers/mock.ts +7 -1
- package/src/providers/plivo.test.ts +50 -6
- package/src/providers/plivo.ts +14 -6
- package/src/providers/shared/call-status.ts +2 -1
- package/src/providers/shared/guarded-json-api.test.ts +106 -0
- package/src/providers/shared/guarded-json-api.ts +1 -1
- package/src/providers/telnyx.test.ts +178 -6
- package/src/providers/telnyx.ts +40 -3
- package/src/providers/twilio/api.test.ts +145 -0
- package/src/providers/twilio/api.ts +67 -16
- package/src/providers/twilio/twiml-policy.ts +6 -10
- package/src/providers/twilio/webhook.ts +1 -1
- package/src/providers/twilio.test.ts +425 -25
- package/src/providers/twilio.ts +230 -77
- package/src/providers/twilio.types.ts +17 -0
- package/src/realtime-defaults.ts +3 -0
- package/src/realtime-fast-context.test.ts +88 -0
- package/src/realtime-fast-context.ts +165 -0
- package/src/realtime-transcription.runtime.ts +4 -0
- package/src/realtime-voice.runtime.ts +5 -0
- package/src/response-generator.test.ts +321 -0
- package/src/response-generator.ts +213 -53
- package/src/response-model.test.ts +71 -0
- package/src/response-model.ts +23 -0
- package/src/runtime.test.ts +429 -0
- package/src/runtime.ts +270 -24
- package/src/telephony-audio.test.ts +61 -0
- package/src/telephony-audio.ts +1 -79
- package/src/telephony-tts.test.ts +133 -12
- package/src/telephony-tts.ts +155 -2
- package/src/test-fixtures.ts +28 -7
- package/src/tts-provider-voice.test.ts +34 -0
- package/src/tts-provider-voice.ts +21 -0
- package/src/tunnel.test.ts +166 -0
- package/src/tunnel.ts +1 -1
- package/src/types.ts +24 -37
- package/src/utils.test.ts +17 -0
- package/src/voice-mapping.test.ts +34 -0
- package/src/voice-mapping.ts +3 -2
- package/src/webhook/realtime-handler.test.ts +598 -0
- package/src/webhook/realtime-handler.ts +485 -0
- package/src/webhook/stale-call-reaper.test.ts +88 -0
- package/src/webhook/stale-call-reaper.ts +5 -0
- package/src/webhook/tailscale.test.ts +214 -0
- package/src/webhook/tailscale.ts +19 -5
- package/src/webhook-exposure.test.ts +33 -0
- package/src/webhook-exposure.ts +84 -0
- package/src/webhook-security.test.ts +172 -21
- package/src/webhook-security.ts +43 -29
- package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
- package/src/webhook.test.ts +1145 -27
- package/src/webhook.ts +523 -102
- package/src/webhook.types.ts +5 -0
- package/src/websocket-test-support.ts +72 -0
- package/tsconfig.json +16 -0
- package/CHANGELOG.md +0 -121
- package/src/providers/index.ts +0 -10
- package/src/providers/stt-openai-realtime.test.ts +0 -42
- package/src/providers/stt-openai-realtime.ts +0 -311
- package/src/providers/tts-openai.test.ts +0 -43
- package/src/providers/tts-openai.ts +0 -221
package/src/media-stream.ts
CHANGED
|
@@ -3,24 +3,27 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Handles bidirectional audio streaming between Twilio and the AI services.
|
|
5
5
|
* - Receives mu-law audio from Twilio via WebSocket
|
|
6
|
-
* - Forwards to
|
|
6
|
+
* - Forwards to the selected realtime transcription provider
|
|
7
7
|
* - Sends TTS audio back to Twilio
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import type { IncomingMessage } from "node:http";
|
|
11
11
|
import type { Duplex } from "node:stream";
|
|
12
|
-
import { WebSocket, WebSocketServer } from "ws";
|
|
13
12
|
import type {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
RealtimeTranscriptionProviderConfig,
|
|
14
|
+
RealtimeTranscriptionProviderPlugin,
|
|
15
|
+
RealtimeTranscriptionSession,
|
|
16
|
+
} from "openclaw/plugin-sdk/realtime-transcription";
|
|
17
|
+
import { type RawData, WebSocket, WebSocketServer } from "ws";
|
|
17
18
|
|
|
18
19
|
/**
|
|
19
20
|
* Configuration for the media stream handler.
|
|
20
21
|
*/
|
|
21
22
|
export interface MediaStreamConfig {
|
|
22
|
-
/**
|
|
23
|
-
|
|
23
|
+
/** Realtime transcription provider for streaming STT. */
|
|
24
|
+
transcriptionProvider: RealtimeTranscriptionProviderPlugin;
|
|
25
|
+
/** Provider-owned config blob passed into the transcription session. */
|
|
26
|
+
providerConfig: RealtimeTranscriptionProviderConfig;
|
|
24
27
|
/** Close sockets that never send a valid `start` frame within this window. */
|
|
25
28
|
preStartTimeoutMs?: number;
|
|
26
29
|
/** Max concurrent pre-start sockets. */
|
|
@@ -29,6 +32,8 @@ export interface MediaStreamConfig {
|
|
|
29
32
|
maxPendingConnectionsPerIp?: number;
|
|
30
33
|
/** Max total open sockets (pending + active sessions). */
|
|
31
34
|
maxConnections?: number;
|
|
35
|
+
/** Optional trusted resolver for the source IP used by pending-connection guards. */
|
|
36
|
+
resolveClientIp?: (request: IncomingMessage) => string | undefined;
|
|
32
37
|
/** Validate whether to accept a media stream for the given call ID */
|
|
33
38
|
shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean;
|
|
34
39
|
/** Callback when transcript is received */
|
|
@@ -37,10 +42,12 @@ export interface MediaStreamConfig {
|
|
|
37
42
|
onPartialTranscript?: (callId: string, partial: string) => void;
|
|
38
43
|
/** Callback when stream connects */
|
|
39
44
|
onConnect?: (callId: string, streamSid: string) => void;
|
|
45
|
+
/** Callback when realtime transcription is ready for the stream */
|
|
46
|
+
onTranscriptionReady?: (callId: string, streamSid: string) => void;
|
|
40
47
|
/** Callback when speech starts (barge-in) */
|
|
41
48
|
onSpeechStart?: (callId: string) => void;
|
|
42
49
|
/** Callback when stream disconnects */
|
|
43
|
-
onDisconnect?: (callId: string) => void;
|
|
50
|
+
onDisconnect?: (callId: string, streamSid: string) => void;
|
|
44
51
|
}
|
|
45
52
|
|
|
46
53
|
/**
|
|
@@ -50,7 +57,7 @@ interface StreamSession {
|
|
|
50
57
|
callId: string;
|
|
51
58
|
streamSid: string;
|
|
52
59
|
ws: WebSocket;
|
|
53
|
-
sttSession:
|
|
60
|
+
sttSession: RealtimeTranscriptionSession;
|
|
54
61
|
}
|
|
55
62
|
|
|
56
63
|
type TtsQueueEntry = {
|
|
@@ -60,6 +67,13 @@ type TtsQueueEntry = {
|
|
|
60
67
|
reject: (error: unknown) => void;
|
|
61
68
|
};
|
|
62
69
|
|
|
70
|
+
type StreamSendResult = {
|
|
71
|
+
sent: boolean;
|
|
72
|
+
readyState?: number;
|
|
73
|
+
bufferedBeforeBytes: number;
|
|
74
|
+
bufferedAfterBytes: number;
|
|
75
|
+
};
|
|
76
|
+
|
|
63
77
|
type PendingConnection = {
|
|
64
78
|
ip: string;
|
|
65
79
|
timeout: ReturnType<typeof setTimeout>;
|
|
@@ -69,6 +83,30 @@ const DEFAULT_PRE_START_TIMEOUT_MS = 5000;
|
|
|
69
83
|
const DEFAULT_MAX_PENDING_CONNECTIONS = 32;
|
|
70
84
|
const DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP = 4;
|
|
71
85
|
const DEFAULT_MAX_CONNECTIONS = 128;
|
|
86
|
+
const MAX_INBOUND_MESSAGE_BYTES = 64 * 1024;
|
|
87
|
+
const MAX_WS_BUFFERED_BYTES = 1024 * 1024;
|
|
88
|
+
const CLOSE_REASON_LOG_MAX_CHARS = 120;
|
|
89
|
+
|
|
90
|
+
export function sanitizeLogText(value: string, maxChars: number): string {
|
|
91
|
+
const sanitized = value
|
|
92
|
+
.replace(/\p{Cc}/gu, " ")
|
|
93
|
+
.replace(/\s+/g, " ")
|
|
94
|
+
.trim();
|
|
95
|
+
if (sanitized.length <= maxChars) {
|
|
96
|
+
return sanitized;
|
|
97
|
+
}
|
|
98
|
+
return `${sanitized.slice(0, maxChars)}...`;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function normalizeWsMessageData(data: RawData): Buffer {
|
|
102
|
+
if (Buffer.isBuffer(data)) {
|
|
103
|
+
return data;
|
|
104
|
+
}
|
|
105
|
+
if (Array.isArray(data)) {
|
|
106
|
+
return Buffer.concat(data);
|
|
107
|
+
}
|
|
108
|
+
return Buffer.from(data);
|
|
109
|
+
}
|
|
72
110
|
|
|
73
111
|
/**
|
|
74
112
|
* Manages WebSocket connections for Twilio media streams.
|
|
@@ -85,6 +123,7 @@ export class MediaStreamHandler {
|
|
|
85
123
|
private maxPendingConnections: number;
|
|
86
124
|
private maxPendingConnectionsPerIp: number;
|
|
87
125
|
private maxConnections: number;
|
|
126
|
+
private inflightUpgrades = 0;
|
|
88
127
|
/** TTS playback queues per stream (serialize audio to prevent overlap) */
|
|
89
128
|
private ttsQueues = new Map<string, TtsQueueEntry[]>();
|
|
90
129
|
/** Whether TTS is currently playing per stream */
|
|
@@ -106,19 +145,50 @@ export class MediaStreamHandler {
|
|
|
106
145
|
*/
|
|
107
146
|
handleUpgrade(request: IncomingMessage, socket: Duplex, head: Buffer): void {
|
|
108
147
|
if (!this.wss) {
|
|
109
|
-
this.wss = new WebSocketServer({
|
|
148
|
+
this.wss = new WebSocketServer({
|
|
149
|
+
noServer: true,
|
|
150
|
+
// Reject oversized frames before app-level parsing runs on unauthenticated sockets.
|
|
151
|
+
maxPayload: MAX_INBOUND_MESSAGE_BYTES,
|
|
152
|
+
});
|
|
110
153
|
this.wss.on("connection", (ws, req) => this.handleConnection(ws, req));
|
|
111
154
|
}
|
|
112
155
|
|
|
113
|
-
const currentConnections = this.
|
|
156
|
+
const currentConnections = this.getCurrentConnectionCount();
|
|
114
157
|
if (currentConnections >= this.maxConnections) {
|
|
115
158
|
this.rejectUpgrade(socket, 503, "Too many media stream connections");
|
|
116
159
|
return;
|
|
117
160
|
}
|
|
118
161
|
|
|
119
|
-
this.
|
|
120
|
-
|
|
121
|
-
|
|
162
|
+
this.inflightUpgrades += 1;
|
|
163
|
+
let released = false;
|
|
164
|
+
const releaseUpgradeReservation = () => {
|
|
165
|
+
if (released) {
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
released = true;
|
|
169
|
+
this.inflightUpgrades = Math.max(0, this.inflightUpgrades - 1);
|
|
170
|
+
};
|
|
171
|
+
const handleUpgradeAbort = () => {
|
|
172
|
+
socket.removeListener("error", handleUpgradeAbort);
|
|
173
|
+
socket.removeListener("close", handleUpgradeAbort);
|
|
174
|
+
releaseUpgradeReservation();
|
|
175
|
+
};
|
|
176
|
+
socket.once("error", handleUpgradeAbort);
|
|
177
|
+
socket.once("close", handleUpgradeAbort);
|
|
178
|
+
|
|
179
|
+
try {
|
|
180
|
+
this.wss.handleUpgrade(request, socket, head, (ws) => {
|
|
181
|
+
socket.removeListener("error", handleUpgradeAbort);
|
|
182
|
+
socket.removeListener("close", handleUpgradeAbort);
|
|
183
|
+
releaseUpgradeReservation();
|
|
184
|
+
this.wss?.emit("connection", ws, request);
|
|
185
|
+
});
|
|
186
|
+
} catch (error) {
|
|
187
|
+
socket.removeListener("error", handleUpgradeAbort);
|
|
188
|
+
socket.removeListener("close", handleUpgradeAbort);
|
|
189
|
+
releaseUpgradeReservation();
|
|
190
|
+
throw error;
|
|
191
|
+
}
|
|
122
192
|
}
|
|
123
193
|
|
|
124
194
|
/**
|
|
@@ -134,9 +204,10 @@ export class MediaStreamHandler {
|
|
|
134
204
|
return;
|
|
135
205
|
}
|
|
136
206
|
|
|
137
|
-
ws.on("message", async (data:
|
|
207
|
+
ws.on("message", async (data: RawData) => {
|
|
138
208
|
try {
|
|
139
|
-
const
|
|
209
|
+
const raw = normalizeWsMessageData(data);
|
|
210
|
+
const message = JSON.parse(raw.toString("utf8")) as TwilioMediaMessage;
|
|
140
211
|
|
|
141
212
|
switch (message.event) {
|
|
142
213
|
case "connected":
|
|
@@ -144,7 +215,7 @@ export class MediaStreamHandler {
|
|
|
144
215
|
break;
|
|
145
216
|
|
|
146
217
|
case "start":
|
|
147
|
-
session =
|
|
218
|
+
session = this.handleStart(ws, message, streamToken);
|
|
148
219
|
if (session) {
|
|
149
220
|
this.clearPendingConnection(ws);
|
|
150
221
|
}
|
|
@@ -164,13 +235,22 @@ export class MediaStreamHandler {
|
|
|
164
235
|
session = null;
|
|
165
236
|
}
|
|
166
237
|
break;
|
|
238
|
+
|
|
239
|
+
case "clear":
|
|
240
|
+
case "mark":
|
|
241
|
+
break;
|
|
167
242
|
}
|
|
168
243
|
} catch (error) {
|
|
169
244
|
console.error("[MediaStream] Error processing message:", error);
|
|
170
245
|
}
|
|
171
246
|
});
|
|
172
247
|
|
|
173
|
-
ws.on("close", () => {
|
|
248
|
+
ws.on("close", (code, reason) => {
|
|
249
|
+
const rawReason = Buffer.isBuffer(reason) ? reason.toString("utf8") : String(reason || "");
|
|
250
|
+
const reasonText = sanitizeLogText(rawReason, CLOSE_REASON_LOG_MAX_CHARS);
|
|
251
|
+
console.log(
|
|
252
|
+
`[MediaStream] WebSocket closed (code: ${code}, reason: ${reasonText || "none"})`,
|
|
253
|
+
);
|
|
174
254
|
this.clearPendingConnection(ws);
|
|
175
255
|
if (session) {
|
|
176
256
|
this.handleStop(session);
|
|
@@ -185,11 +265,11 @@ export class MediaStreamHandler {
|
|
|
185
265
|
/**
|
|
186
266
|
* Handle stream start event.
|
|
187
267
|
*/
|
|
188
|
-
private
|
|
268
|
+
private handleStart(
|
|
189
269
|
ws: WebSocket,
|
|
190
270
|
message: TwilioMediaMessage,
|
|
191
271
|
streamToken?: string,
|
|
192
|
-
):
|
|
272
|
+
): StreamSession | null {
|
|
193
273
|
const streamSid = message.streamSid || "";
|
|
194
274
|
const callSid = message.start?.callSid || "";
|
|
195
275
|
|
|
@@ -213,20 +293,20 @@ export class MediaStreamHandler {
|
|
|
213
293
|
return null;
|
|
214
294
|
}
|
|
215
295
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
296
|
+
const sttSession = this.config.transcriptionProvider.createSession({
|
|
297
|
+
providerConfig: this.config.providerConfig,
|
|
298
|
+
onPartial: (partial) => {
|
|
299
|
+
this.config.onPartialTranscript?.(callSid, partial);
|
|
300
|
+
},
|
|
301
|
+
onTranscript: (transcript) => {
|
|
302
|
+
this.config.onTranscript?.(callSid, transcript);
|
|
303
|
+
},
|
|
304
|
+
onSpeechStart: () => {
|
|
305
|
+
this.config.onSpeechStart?.(callSid);
|
|
306
|
+
},
|
|
307
|
+
onError: (error) => {
|
|
308
|
+
console.warn("[MediaStream] Transcription session error:", error.message);
|
|
309
|
+
},
|
|
230
310
|
});
|
|
231
311
|
|
|
232
312
|
const session: StreamSession = {
|
|
@@ -237,18 +317,42 @@ export class MediaStreamHandler {
|
|
|
237
317
|
};
|
|
238
318
|
|
|
239
319
|
this.sessions.set(streamSid, session);
|
|
240
|
-
|
|
241
|
-
// Notify connection BEFORE STT connect so TTS can work even if STT fails
|
|
242
320
|
this.config.onConnect?.(callSid, streamSid);
|
|
243
|
-
|
|
244
|
-
// Connect to OpenAI STT (non-blocking, log errors but don't fail the call)
|
|
245
|
-
sttSession.connect().catch((err) => {
|
|
246
|
-
console.warn(`[MediaStream] STT connection failed (TTS still works):`, err.message);
|
|
247
|
-
});
|
|
321
|
+
void this.connectTranscriptionAndNotify(session);
|
|
248
322
|
|
|
249
323
|
return session;
|
|
250
324
|
}
|
|
251
325
|
|
|
326
|
+
private async connectTranscriptionAndNotify(session: StreamSession): Promise<void> {
|
|
327
|
+
try {
|
|
328
|
+
await session.sttSession.connect();
|
|
329
|
+
} catch (error) {
|
|
330
|
+
console.warn(
|
|
331
|
+
"[MediaStream] STT connection failed; closing media stream:",
|
|
332
|
+
error instanceof Error ? error.message : String(error),
|
|
333
|
+
);
|
|
334
|
+
if (
|
|
335
|
+
this.sessions.get(session.streamSid) === session &&
|
|
336
|
+
session.ws.readyState === WebSocket.OPEN
|
|
337
|
+
) {
|
|
338
|
+
session.ws.close(1011, "STT connection failed");
|
|
339
|
+
} else {
|
|
340
|
+
session.sttSession.close();
|
|
341
|
+
}
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (
|
|
346
|
+
this.sessions.get(session.streamSid) !== session ||
|
|
347
|
+
session.ws.readyState !== WebSocket.OPEN
|
|
348
|
+
) {
|
|
349
|
+
session.sttSession.close();
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
this.config.onTranscriptionReady?.(session.callId, session.streamSid);
|
|
354
|
+
}
|
|
355
|
+
|
|
252
356
|
/**
|
|
253
357
|
* Handle stream stop event.
|
|
254
358
|
*/
|
|
@@ -258,7 +362,7 @@ export class MediaStreamHandler {
|
|
|
258
362
|
this.clearTtsState(session.streamSid);
|
|
259
363
|
session.sttSession.close();
|
|
260
364
|
this.sessions.delete(session.streamSid);
|
|
261
|
-
this.config.onDisconnect?.(session.callId);
|
|
365
|
+
this.config.onDisconnect?.(session.callId, session.streamSid);
|
|
262
366
|
}
|
|
263
367
|
|
|
264
368
|
private getStreamToken(request: IncomingMessage): string | undefined {
|
|
@@ -274,9 +378,17 @@ export class MediaStreamHandler {
|
|
|
274
378
|
}
|
|
275
379
|
|
|
276
380
|
private getClientIp(request: IncomingMessage): string {
|
|
381
|
+
const resolvedIp = this.config.resolveClientIp?.(request)?.trim();
|
|
382
|
+
if (resolvedIp) {
|
|
383
|
+
return resolvedIp;
|
|
384
|
+
}
|
|
277
385
|
return request.socket.remoteAddress || "unknown";
|
|
278
386
|
}
|
|
279
387
|
|
|
388
|
+
private getCurrentConnectionCount(): number {
|
|
389
|
+
return this.wss ? this.wss.clients.size + this.inflightUpgrades : this.inflightUpgrades;
|
|
390
|
+
}
|
|
391
|
+
|
|
280
392
|
private registerPendingConnection(ws: WebSocket, ip: string): boolean {
|
|
281
393
|
if (this.pendingConnections.size >= this.maxPendingConnections) {
|
|
282
394
|
console.warn("[MediaStream] Rejecting connection: pending connection limit reached");
|
|
@@ -347,17 +459,78 @@ export class MediaStreamHandler {
|
|
|
347
459
|
/**
|
|
348
460
|
* Send a message to a stream's WebSocket if available.
|
|
349
461
|
*/
|
|
350
|
-
private sendToStream(streamSid: string, message: unknown):
|
|
351
|
-
const session = this.
|
|
352
|
-
session
|
|
462
|
+
private sendToStream(streamSid: string, message: unknown): StreamSendResult {
|
|
463
|
+
const session = this.sessions.get(streamSid);
|
|
464
|
+
if (!session) {
|
|
465
|
+
return {
|
|
466
|
+
sent: false,
|
|
467
|
+
bufferedBeforeBytes: 0,
|
|
468
|
+
bufferedAfterBytes: 0,
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
const readyState = session.ws.readyState;
|
|
473
|
+
const bufferedBeforeBytes = session.ws.bufferedAmount;
|
|
474
|
+
if (readyState !== WebSocket.OPEN) {
|
|
475
|
+
return {
|
|
476
|
+
sent: false,
|
|
477
|
+
readyState,
|
|
478
|
+
bufferedBeforeBytes,
|
|
479
|
+
bufferedAfterBytes: session.ws.bufferedAmount,
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
if (bufferedBeforeBytes > MAX_WS_BUFFERED_BYTES) {
|
|
483
|
+
try {
|
|
484
|
+
session.ws.close(1013, "Backpressure: send buffer exceeded");
|
|
485
|
+
} catch {
|
|
486
|
+
// Best-effort close; caller still receives sent:false.
|
|
487
|
+
}
|
|
488
|
+
return {
|
|
489
|
+
sent: false,
|
|
490
|
+
readyState,
|
|
491
|
+
bufferedBeforeBytes,
|
|
492
|
+
bufferedAfterBytes: session.ws.bufferedAmount,
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
try {
|
|
497
|
+
session.ws.send(JSON.stringify(message));
|
|
498
|
+
const bufferedAfterBytes = session.ws.bufferedAmount;
|
|
499
|
+
if (bufferedAfterBytes > MAX_WS_BUFFERED_BYTES) {
|
|
500
|
+
try {
|
|
501
|
+
session.ws.close(1013, "Backpressure: send buffer exceeded");
|
|
502
|
+
} catch {
|
|
503
|
+
// Best-effort close; caller still receives sent:false.
|
|
504
|
+
}
|
|
505
|
+
return {
|
|
506
|
+
sent: false,
|
|
507
|
+
readyState,
|
|
508
|
+
bufferedBeforeBytes,
|
|
509
|
+
bufferedAfterBytes,
|
|
510
|
+
};
|
|
511
|
+
}
|
|
512
|
+
return {
|
|
513
|
+
sent: true,
|
|
514
|
+
readyState,
|
|
515
|
+
bufferedBeforeBytes,
|
|
516
|
+
bufferedAfterBytes,
|
|
517
|
+
};
|
|
518
|
+
} catch {
|
|
519
|
+
return {
|
|
520
|
+
sent: false,
|
|
521
|
+
readyState,
|
|
522
|
+
bufferedBeforeBytes,
|
|
523
|
+
bufferedAfterBytes: session.ws.bufferedAmount,
|
|
524
|
+
};
|
|
525
|
+
}
|
|
353
526
|
}
|
|
354
527
|
|
|
355
528
|
/**
|
|
356
529
|
* Send audio to a specific stream (for TTS playback).
|
|
357
530
|
* Audio should be mu-law encoded at 8kHz mono.
|
|
358
531
|
*/
|
|
359
|
-
sendAudio(streamSid: string, muLawAudio: Buffer):
|
|
360
|
-
this.sendToStream(streamSid, {
|
|
532
|
+
sendAudio(streamSid: string, muLawAudio: Buffer): StreamSendResult {
|
|
533
|
+
return this.sendToStream(streamSid, {
|
|
361
534
|
event: "media",
|
|
362
535
|
streamSid,
|
|
363
536
|
media: { payload: muLawAudio.toString("base64") },
|
|
@@ -367,8 +540,8 @@ export class MediaStreamHandler {
|
|
|
367
540
|
/**
|
|
368
541
|
* Send a mark event to track audio playback position.
|
|
369
542
|
*/
|
|
370
|
-
sendMark(streamSid: string, name: string):
|
|
371
|
-
this.sendToStream(streamSid, {
|
|
543
|
+
sendMark(streamSid: string, name: string): StreamSendResult {
|
|
544
|
+
return this.sendToStream(streamSid, {
|
|
372
545
|
event: "mark",
|
|
373
546
|
streamSid,
|
|
374
547
|
mark: { name },
|
|
@@ -378,8 +551,8 @@ export class MediaStreamHandler {
|
|
|
378
551
|
/**
|
|
379
552
|
* Clear audio buffer (interrupt playback).
|
|
380
553
|
*/
|
|
381
|
-
clearAudio(streamSid: string):
|
|
382
|
-
this.sendToStream(streamSid, { event: "clear", streamSid });
|
|
554
|
+
clearAudio(streamSid: string): StreamSendResult {
|
|
555
|
+
return this.sendToStream(streamSid, { event: "clear", streamSid });
|
|
383
556
|
}
|
|
384
557
|
|
|
385
558
|
/**
|
|
@@ -412,9 +585,9 @@ export class MediaStreamHandler {
|
|
|
412
585
|
/**
|
|
413
586
|
* Clear TTS queue and interrupt current playback (barge-in).
|
|
414
587
|
*/
|
|
415
|
-
clearTtsQueue(streamSid: string): void {
|
|
588
|
+
clearTtsQueue(streamSid: string, _reason = "unspecified"): void {
|
|
416
589
|
const queue = this.getTtsQueue(streamSid);
|
|
417
|
-
queue
|
|
590
|
+
this.resolveQueuedTtsEntries(queue);
|
|
418
591
|
this.ttsActiveControllers.get(streamSid)?.abort();
|
|
419
592
|
this.clearAudio(streamSid);
|
|
420
593
|
}
|
|
@@ -487,13 +660,21 @@ export class MediaStreamHandler {
|
|
|
487
660
|
private clearTtsState(streamSid: string): void {
|
|
488
661
|
const queue = this.ttsQueues.get(streamSid);
|
|
489
662
|
if (queue) {
|
|
490
|
-
queue
|
|
663
|
+
this.resolveQueuedTtsEntries(queue);
|
|
491
664
|
}
|
|
492
665
|
this.ttsActiveControllers.get(streamSid)?.abort();
|
|
493
666
|
this.ttsActiveControllers.delete(streamSid);
|
|
494
667
|
this.ttsPlaying.delete(streamSid);
|
|
495
668
|
this.ttsQueues.delete(streamSid);
|
|
496
669
|
}
|
|
670
|
+
|
|
671
|
+
private resolveQueuedTtsEntries(queue: TtsQueueEntry[]): void {
|
|
672
|
+
const pending = queue.splice(0);
|
|
673
|
+
for (const entry of pending) {
|
|
674
|
+
entry.controller.abort();
|
|
675
|
+
entry.resolve();
|
|
676
|
+
}
|
|
677
|
+
}
|
|
497
678
|
}
|
|
498
679
|
|
|
499
680
|
/**
|
package/src/providers/base.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type {
|
|
2
|
+
AnswerCallInput,
|
|
2
3
|
GetCallStatusInput,
|
|
3
4
|
GetCallStatusResult,
|
|
4
5
|
HangupCallInput,
|
|
@@ -6,6 +7,7 @@ import type {
|
|
|
6
7
|
InitiateCallResult,
|
|
7
8
|
PlayTtsInput,
|
|
8
9
|
ProviderName,
|
|
10
|
+
SendDtmfInput,
|
|
9
11
|
WebhookParseOptions,
|
|
10
12
|
ProviderWebhookParseResult,
|
|
11
13
|
StartListeningInput,
|
|
@@ -41,12 +43,24 @@ export interface VoiceCallProvider {
|
|
|
41
43
|
*/
|
|
42
44
|
parseWebhookEvent(ctx: WebhookContext, options?: WebhookParseOptions): ProviderWebhookParseResult;
|
|
43
45
|
|
|
46
|
+
/**
|
|
47
|
+
* Consume one-time TwiML that must be served before shortcut handlers such as
|
|
48
|
+
* realtime media streams take over the webhook response.
|
|
49
|
+
*/
|
|
50
|
+
consumeInitialTwiML?: (ctx: WebhookContext) => string | null;
|
|
51
|
+
|
|
44
52
|
/**
|
|
45
53
|
* Initiate an outbound call.
|
|
46
54
|
* @returns Provider call ID and status
|
|
47
55
|
*/
|
|
48
56
|
initiateCall(input: InitiateCallInput): Promise<InitiateCallResult>;
|
|
49
57
|
|
|
58
|
+
/**
|
|
59
|
+
* Answer an accepted inbound call when the provider requires an explicit
|
|
60
|
+
* answer command after the initial webhook.
|
|
61
|
+
*/
|
|
62
|
+
answerCall?: (input: AnswerCallInput) => Promise<void>;
|
|
63
|
+
|
|
50
64
|
/**
|
|
51
65
|
* Hang up an active call.
|
|
52
66
|
*/
|
|
@@ -58,6 +72,11 @@ export interface VoiceCallProvider {
|
|
|
58
72
|
*/
|
|
59
73
|
playTts(input: PlayTtsInput): Promise<void>;
|
|
60
74
|
|
|
75
|
+
/**
|
|
76
|
+
* Send DTMF digits to an active call.
|
|
77
|
+
*/
|
|
78
|
+
sendDtmf?: (input: SendDtmfInput) => Promise<void>;
|
|
79
|
+
|
|
61
80
|
/**
|
|
62
81
|
* Start listening for user speech (activate STT).
|
|
63
82
|
*/
|
package/src/providers/mock.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
|
+
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
|
|
2
3
|
import type {
|
|
3
4
|
EndReason,
|
|
4
5
|
GetCallStatusInput,
|
|
@@ -10,6 +11,7 @@ import type {
|
|
|
10
11
|
PlayTtsInput,
|
|
11
12
|
WebhookParseOptions,
|
|
12
13
|
ProviderWebhookParseResult,
|
|
14
|
+
SendDtmfInput,
|
|
13
15
|
StartListeningInput,
|
|
14
16
|
StopListeningInput,
|
|
15
17
|
WebhookContext,
|
|
@@ -161,6 +163,10 @@ export class MockProvider implements VoiceCallProvider {
|
|
|
161
163
|
// No-op for mock
|
|
162
164
|
}
|
|
163
165
|
|
|
166
|
+
async sendDtmf(_input: SendDtmfInput): Promise<void> {
|
|
167
|
+
// No-op for mock
|
|
168
|
+
}
|
|
169
|
+
|
|
164
170
|
async startListening(_input: StartListeningInput): Promise<void> {
|
|
165
171
|
// No-op for mock
|
|
166
172
|
}
|
|
@@ -170,7 +176,7 @@ export class MockProvider implements VoiceCallProvider {
|
|
|
170
176
|
}
|
|
171
177
|
|
|
172
178
|
async getCallStatus(input: GetCallStatusInput): Promise<GetCallStatusResult> {
|
|
173
|
-
const id = input.providerCallId
|
|
179
|
+
const id = normalizeLowercaseStringOrEmpty(input.providerCallId);
|
|
174
180
|
if (id.includes("stale") || id.includes("ended") || id.includes("completed")) {
|
|
175
181
|
return { status: "completed", isTerminal: true };
|
|
176
182
|
}
|
|
@@ -1,6 +1,20 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
2
|
import { PlivoProvider } from "./plivo.js";
|
|
3
3
|
|
|
4
|
+
function requireEvent<T>(event: T | undefined, message: string): T {
|
|
5
|
+
if (!event) {
|
|
6
|
+
throw new Error(message);
|
|
7
|
+
}
|
|
8
|
+
return event;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function requireResponseBody(body: string | undefined): string {
|
|
12
|
+
if (!body) {
|
|
13
|
+
throw new Error("Plivo provider did not return a response body");
|
|
14
|
+
}
|
|
15
|
+
return body;
|
|
16
|
+
}
|
|
17
|
+
|
|
4
18
|
describe("PlivoProvider", () => {
|
|
5
19
|
it("parses answer callback into call.answered and returns keep-alive XML", () => {
|
|
6
20
|
const provider = new PlivoProvider({
|
|
@@ -18,11 +32,13 @@ describe("PlivoProvider", () => {
|
|
|
18
32
|
});
|
|
19
33
|
|
|
20
34
|
expect(result.events).toHaveLength(1);
|
|
21
|
-
|
|
22
|
-
expect(
|
|
23
|
-
expect(
|
|
24
|
-
expect(
|
|
25
|
-
|
|
35
|
+
const event = requireEvent(result.events[0], "expected Plivo answer event");
|
|
36
|
+
expect(event.type).toBe("call.answered");
|
|
37
|
+
expect(event.callId).toBe("internal-call-id");
|
|
38
|
+
expect(event.providerCallId).toBe("call-uuid");
|
|
39
|
+
const responseBody = requireResponseBody(result.providerResponseBody);
|
|
40
|
+
expect(responseBody).toContain("<Wait");
|
|
41
|
+
expect(responseBody).toContain('length="300"');
|
|
26
42
|
});
|
|
27
43
|
|
|
28
44
|
it("uses verified request key when provided", () => {
|
|
@@ -44,6 +60,34 @@ describe("PlivoProvider", () => {
|
|
|
44
60
|
);
|
|
45
61
|
|
|
46
62
|
expect(result.events).toHaveLength(1);
|
|
47
|
-
expect(result.events[0]
|
|
63
|
+
expect(requireEvent(result.events[0], "expected verified Plivo event").dedupeKey).toBe(
|
|
64
|
+
"plivo:v3:verified",
|
|
65
|
+
);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it("pins stored callback bases to publicUrl instead of request Host", () => {
|
|
69
|
+
const provider = new PlivoProvider(
|
|
70
|
+
{
|
|
71
|
+
authId: "MA000000000000000000",
|
|
72
|
+
authToken: "test-token",
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
publicUrl: "https://voice.openclaw.ai/voice/webhook?provider=plivo",
|
|
76
|
+
},
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
provider.parseWebhookEvent({
|
|
80
|
+
headers: { host: "attacker.example" },
|
|
81
|
+
rawBody:
|
|
82
|
+
"CallUUID=call-uuid&CallStatus=in-progress&Direction=outbound&From=%2B15550000000&To=%2B15550000001&Event=StartApp",
|
|
83
|
+
url: "https://attacker.example/voice/webhook?provider=plivo&flow=answer&callId=internal-call-id",
|
|
84
|
+
method: "POST",
|
|
85
|
+
query: { provider: "plivo", flow: "answer", callId: "internal-call-id" },
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
const callbackMap = (provider as unknown as { callUuidToWebhookUrl: Map<string, string> })
|
|
89
|
+
.callUuidToWebhookUrl;
|
|
90
|
+
|
|
91
|
+
expect(callbackMap.get("call-uuid")).toBe("https://voice.openclaw.ai/voice/webhook");
|
|
48
92
|
});
|
|
49
93
|
});
|
package/src/providers/plivo.ts
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
|
+
import {
|
|
3
|
+
normalizeLowercaseStringOrEmpty,
|
|
4
|
+
normalizeOptionalString,
|
|
5
|
+
} from "openclaw/plugin-sdk/text-runtime";
|
|
2
6
|
import type { PlivoConfig, WebhookSecurityConfig } from "../config.js";
|
|
3
7
|
import { getHeader } from "../http-headers.js";
|
|
4
8
|
import type {
|
|
@@ -129,7 +133,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
129
133
|
ctx: WebhookContext,
|
|
130
134
|
options?: WebhookParseOptions,
|
|
131
135
|
): ProviderWebhookParseResult {
|
|
132
|
-
const flow =
|
|
136
|
+
const flow = normalizeOptionalString(ctx.query?.flow) ?? "";
|
|
133
137
|
|
|
134
138
|
const parsed = this.parseBody(ctx.rawBody);
|
|
135
139
|
if (!parsed) {
|
|
@@ -480,7 +484,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
480
484
|
|
|
481
485
|
private static normalizeNumber(numberOrSip: string): string {
|
|
482
486
|
const trimmed = numberOrSip.trim();
|
|
483
|
-
if (trimmed
|
|
487
|
+
if (normalizeLowercaseStringOrEmpty(trimmed).startsWith("sip:")) {
|
|
484
488
|
return trimmed;
|
|
485
489
|
}
|
|
486
490
|
return trimmed.replace(/[^\d+]/g, "");
|
|
@@ -517,10 +521,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
517
521
|
}
|
|
518
522
|
|
|
519
523
|
private getCallIdFromQuery(ctx: WebhookContext): string | undefined {
|
|
520
|
-
const callId =
|
|
521
|
-
typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
|
|
522
|
-
? ctx.query.callId.trim()
|
|
523
|
-
: undefined;
|
|
524
|
+
const callId = normalizeOptionalString(ctx.query?.callId);
|
|
524
525
|
return callId || undefined;
|
|
525
526
|
}
|
|
526
527
|
|
|
@@ -544,6 +545,13 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
544
545
|
|
|
545
546
|
private baseWebhookUrlFromCtx(ctx: WebhookContext): string | null {
|
|
546
547
|
try {
|
|
548
|
+
if (this.options.publicUrl) {
|
|
549
|
+
const base = new URL(this.options.publicUrl);
|
|
550
|
+
const requestUrl = new URL(ctx.url);
|
|
551
|
+
base.pathname = requestUrl.pathname;
|
|
552
|
+
return `${base.origin}${base.pathname}`;
|
|
553
|
+
}
|
|
554
|
+
|
|
547
555
|
const u = new URL(
|
|
548
556
|
reconstructWebhookUrl(ctx, {
|
|
549
557
|
allowedHosts: this.options.webhookSecurity?.allowedHosts,
|