@openclaw/voice-call 2026.2.21 → 2026.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +13 -0
- package/package.json +1 -1
- package/src/cli.ts +29 -16
- package/src/config.test.ts +4 -0
- package/src/config.ts +15 -0
- package/src/manager/context.ts +1 -0
- package/src/manager/events.test.ts +100 -71
- package/src/manager/events.ts +17 -4
- package/src/manager/outbound.ts +76 -36
- package/src/manager/timers.ts +13 -4
- package/src/manager.test.ts +109 -127
- package/src/media-stream.test.ts +175 -0
- package/src/media-stream.ts +110 -0
- package/src/providers/plivo.ts +84 -39
- package/src/providers/twilio/webhook.ts +1 -0
- package/src/providers/twilio.test.ts +34 -0
- package/src/providers/twilio.ts +54 -3
- package/src/types.ts +8 -0
- package/src/webhook-security.test.ts +76 -0
- package/src/webhook-security.ts +100 -17
- package/src/webhook.test.ts +51 -1
- package/src/webhook.ts +24 -8
package/src/media-stream.ts
CHANGED
|
@@ -21,6 +21,14 @@ import type {
|
|
|
21
21
|
export interface MediaStreamConfig {
|
|
22
22
|
/** STT provider for transcription */
|
|
23
23
|
sttProvider: OpenAIRealtimeSTTProvider;
|
|
24
|
+
/** Close sockets that never send a valid `start` frame within this window. */
|
|
25
|
+
preStartTimeoutMs?: number;
|
|
26
|
+
/** Max concurrent pre-start sockets. */
|
|
27
|
+
maxPendingConnections?: number;
|
|
28
|
+
/** Max concurrent pre-start sockets from a single source IP. */
|
|
29
|
+
maxPendingConnectionsPerIp?: number;
|
|
30
|
+
/** Max total open sockets (pending + active sessions). */
|
|
31
|
+
maxConnections?: number;
|
|
24
32
|
/** Validate whether to accept a media stream for the given call ID */
|
|
25
33
|
shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean;
|
|
26
34
|
/** Callback when transcript is received */
|
|
@@ -52,6 +60,16 @@ type TtsQueueEntry = {
|
|
|
52
60
|
reject: (error: unknown) => void;
|
|
53
61
|
};
|
|
54
62
|
|
|
63
|
+
type PendingConnection = {
|
|
64
|
+
ip: string;
|
|
65
|
+
timeout: ReturnType<typeof setTimeout>;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const DEFAULT_PRE_START_TIMEOUT_MS = 5000;
|
|
69
|
+
const DEFAULT_MAX_PENDING_CONNECTIONS = 32;
|
|
70
|
+
const DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP = 4;
|
|
71
|
+
const DEFAULT_MAX_CONNECTIONS = 128;
|
|
72
|
+
|
|
55
73
|
/**
|
|
56
74
|
* Manages WebSocket connections for Twilio media streams.
|
|
57
75
|
*/
|
|
@@ -59,6 +77,14 @@ export class MediaStreamHandler {
|
|
|
59
77
|
private wss: WebSocketServer | null = null;
|
|
60
78
|
private sessions = new Map<string, StreamSession>();
|
|
61
79
|
private config: MediaStreamConfig;
|
|
80
|
+
/** Pending sockets that have upgraded but not yet sent an accepted `start` frame. */
|
|
81
|
+
private pendingConnections = new Map<WebSocket, PendingConnection>();
|
|
82
|
+
/** Pending socket count per remote IP for pre-auth throttling. */
|
|
83
|
+
private pendingByIp = new Map<string, number>();
|
|
84
|
+
private preStartTimeoutMs: number;
|
|
85
|
+
private maxPendingConnections: number;
|
|
86
|
+
private maxPendingConnectionsPerIp: number;
|
|
87
|
+
private maxConnections: number;
|
|
62
88
|
/** TTS playback queues per stream (serialize audio to prevent overlap) */
|
|
63
89
|
private ttsQueues = new Map<string, TtsQueueEntry[]>();
|
|
64
90
|
/** Whether TTS is currently playing per stream */
|
|
@@ -68,6 +94,11 @@ export class MediaStreamHandler {
|
|
|
68
94
|
|
|
69
95
|
constructor(config: MediaStreamConfig) {
|
|
70
96
|
this.config = config;
|
|
97
|
+
this.preStartTimeoutMs = config.preStartTimeoutMs ?? DEFAULT_PRE_START_TIMEOUT_MS;
|
|
98
|
+
this.maxPendingConnections = config.maxPendingConnections ?? DEFAULT_MAX_PENDING_CONNECTIONS;
|
|
99
|
+
this.maxPendingConnectionsPerIp =
|
|
100
|
+
config.maxPendingConnectionsPerIp ?? DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP;
|
|
101
|
+
this.maxConnections = config.maxConnections ?? DEFAULT_MAX_CONNECTIONS;
|
|
71
102
|
}
|
|
72
103
|
|
|
73
104
|
/**
|
|
@@ -79,6 +110,12 @@ export class MediaStreamHandler {
|
|
|
79
110
|
this.wss.on("connection", (ws, req) => this.handleConnection(ws, req));
|
|
80
111
|
}
|
|
81
112
|
|
|
113
|
+
const currentConnections = this.wss.clients.size;
|
|
114
|
+
if (currentConnections >= this.maxConnections) {
|
|
115
|
+
this.rejectUpgrade(socket, 503, "Too many media stream connections");
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
|
|
82
119
|
this.wss.handleUpgrade(request, socket, head, (ws) => {
|
|
83
120
|
this.wss?.emit("connection", ws, request);
|
|
84
121
|
});
|
|
@@ -90,6 +127,12 @@ export class MediaStreamHandler {
|
|
|
90
127
|
private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
|
|
91
128
|
let session: StreamSession | null = null;
|
|
92
129
|
const streamToken = this.getStreamToken(_request);
|
|
130
|
+
const ip = this.getClientIp(_request);
|
|
131
|
+
|
|
132
|
+
if (!this.registerPendingConnection(ws, ip)) {
|
|
133
|
+
ws.close(1013, "Too many pending media stream connections");
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
93
136
|
|
|
94
137
|
ws.on("message", async (data: Buffer) => {
|
|
95
138
|
try {
|
|
@@ -102,6 +145,9 @@ export class MediaStreamHandler {
|
|
|
102
145
|
|
|
103
146
|
case "start":
|
|
104
147
|
session = await this.handleStart(ws, message, streamToken);
|
|
148
|
+
if (session) {
|
|
149
|
+
this.clearPendingConnection(ws);
|
|
150
|
+
}
|
|
105
151
|
break;
|
|
106
152
|
|
|
107
153
|
case "media":
|
|
@@ -125,6 +171,7 @@ export class MediaStreamHandler {
|
|
|
125
171
|
});
|
|
126
172
|
|
|
127
173
|
ws.on("close", () => {
|
|
174
|
+
this.clearPendingConnection(ws);
|
|
128
175
|
if (session) {
|
|
129
176
|
this.handleStop(session);
|
|
130
177
|
}
|
|
@@ -226,6 +273,69 @@ export class MediaStreamHandler {
|
|
|
226
273
|
}
|
|
227
274
|
}
|
|
228
275
|
|
|
276
|
+
private getClientIp(request: IncomingMessage): string {
|
|
277
|
+
return request.socket.remoteAddress || "unknown";
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
private registerPendingConnection(ws: WebSocket, ip: string): boolean {
|
|
281
|
+
if (this.pendingConnections.size >= this.maxPendingConnections) {
|
|
282
|
+
console.warn("[MediaStream] Rejecting connection: pending connection limit reached");
|
|
283
|
+
return false;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const pendingForIp = this.pendingByIp.get(ip) ?? 0;
|
|
287
|
+
if (pendingForIp >= this.maxPendingConnectionsPerIp) {
|
|
288
|
+
console.warn(`[MediaStream] Rejecting connection: pending per-IP limit reached (${ip})`);
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
const timeout = setTimeout(() => {
|
|
293
|
+
if (!this.pendingConnections.has(ws)) {
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
console.warn(
|
|
297
|
+
`[MediaStream] Closing pre-start idle connection after ${this.preStartTimeoutMs}ms (${ip})`,
|
|
298
|
+
);
|
|
299
|
+
ws.close(1008, "Start timeout");
|
|
300
|
+
}, this.preStartTimeoutMs);
|
|
301
|
+
|
|
302
|
+
timeout.unref?.();
|
|
303
|
+
this.pendingConnections.set(ws, { ip, timeout });
|
|
304
|
+
this.pendingByIp.set(ip, pendingForIp + 1);
|
|
305
|
+
return true;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
private clearPendingConnection(ws: WebSocket): void {
|
|
309
|
+
const pending = this.pendingConnections.get(ws);
|
|
310
|
+
if (!pending) {
|
|
311
|
+
return;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
clearTimeout(pending.timeout);
|
|
315
|
+
this.pendingConnections.delete(ws);
|
|
316
|
+
|
|
317
|
+
const current = this.pendingByIp.get(pending.ip) ?? 0;
|
|
318
|
+
if (current <= 1) {
|
|
319
|
+
this.pendingByIp.delete(pending.ip);
|
|
320
|
+
return;
|
|
321
|
+
}
|
|
322
|
+
this.pendingByIp.set(pending.ip, current - 1);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
private rejectUpgrade(socket: Duplex, statusCode: 429 | 503, message: string): void {
|
|
326
|
+
const statusText = statusCode === 429 ? "Too Many Requests" : "Service Unavailable";
|
|
327
|
+
const body = `${message}\n`;
|
|
328
|
+
socket.write(
|
|
329
|
+
`HTTP/1.1 ${statusCode} ${statusText}\r\n` +
|
|
330
|
+
"Connection: close\r\n" +
|
|
331
|
+
"Content-Type: text/plain; charset=utf-8\r\n" +
|
|
332
|
+
`Content-Length: ${Buffer.byteLength(body)}\r\n` +
|
|
333
|
+
"\r\n" +
|
|
334
|
+
body,
|
|
335
|
+
);
|
|
336
|
+
socket.destroy();
|
|
337
|
+
}
|
|
338
|
+
|
|
229
339
|
/**
|
|
230
340
|
* Get an active session with an open WebSocket, or undefined if unavailable.
|
|
231
341
|
*/
|
package/src/providers/plivo.ts
CHANGED
|
@@ -30,6 +30,29 @@ export interface PlivoProviderOptions {
|
|
|
30
30
|
type PendingSpeak = { text: string; locale?: string };
|
|
31
31
|
type PendingListen = { language?: string };
|
|
32
32
|
|
|
33
|
+
function getHeader(
|
|
34
|
+
headers: Record<string, string | string[] | undefined>,
|
|
35
|
+
name: string,
|
|
36
|
+
): string | undefined {
|
|
37
|
+
const value = headers[name.toLowerCase()];
|
|
38
|
+
if (Array.isArray(value)) {
|
|
39
|
+
return value[0];
|
|
40
|
+
}
|
|
41
|
+
return value;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function createPlivoRequestDedupeKey(ctx: WebhookContext): string {
|
|
45
|
+
const nonceV3 = getHeader(ctx.headers, "x-plivo-signature-v3-nonce");
|
|
46
|
+
if (nonceV3) {
|
|
47
|
+
return `plivo:v3:${nonceV3}`;
|
|
48
|
+
}
|
|
49
|
+
const nonceV2 = getHeader(ctx.headers, "x-plivo-signature-v2-nonce");
|
|
50
|
+
if (nonceV2) {
|
|
51
|
+
return `plivo:v2:${nonceV2}`;
|
|
52
|
+
}
|
|
53
|
+
return `plivo:fallback:${crypto.createHash("sha256").update(ctx.rawBody).digest("hex")}`;
|
|
54
|
+
}
|
|
55
|
+
|
|
33
56
|
export class PlivoProvider implements VoiceCallProvider {
|
|
34
57
|
readonly name = "plivo" as const;
|
|
35
58
|
|
|
@@ -104,7 +127,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
104
127
|
console.warn(`[plivo] Webhook verification failed: ${result.reason}`);
|
|
105
128
|
}
|
|
106
129
|
|
|
107
|
-
return { ok: result.ok, reason: result.reason };
|
|
130
|
+
return { ok: result.ok, reason: result.reason, isReplay: result.isReplay };
|
|
108
131
|
}
|
|
109
132
|
|
|
110
133
|
parseWebhookEvent(ctx: WebhookContext): ProviderWebhookParseResult {
|
|
@@ -173,7 +196,8 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
173
196
|
|
|
174
197
|
// Normal events.
|
|
175
198
|
const callIdFromQuery = this.getCallIdFromQuery(ctx);
|
|
176
|
-
const
|
|
199
|
+
const dedupeKey = createPlivoRequestDedupeKey(ctx);
|
|
200
|
+
const event = this.normalizeEvent(parsed, callIdFromQuery, dedupeKey);
|
|
177
201
|
|
|
178
202
|
return {
|
|
179
203
|
events: event ? [event] : [],
|
|
@@ -186,7 +210,11 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
186
210
|
};
|
|
187
211
|
}
|
|
188
212
|
|
|
189
|
-
private normalizeEvent(
|
|
213
|
+
private normalizeEvent(
|
|
214
|
+
params: URLSearchParams,
|
|
215
|
+
callIdOverride?: string,
|
|
216
|
+
dedupeKey?: string,
|
|
217
|
+
): NormalizedEvent | null {
|
|
190
218
|
const callUuid = params.get("CallUUID") || "";
|
|
191
219
|
const requestUuid = params.get("RequestUUID") || "";
|
|
192
220
|
|
|
@@ -201,6 +229,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
201
229
|
|
|
202
230
|
const baseEvent = {
|
|
203
231
|
id: crypto.randomUUID(),
|
|
232
|
+
dedupeKey,
|
|
204
233
|
callId: callIdOverride || callUuid || requestUuid,
|
|
205
234
|
providerCallId: callUuid || requestUuid || undefined,
|
|
206
235
|
timestamp: Date.now(),
|
|
@@ -331,31 +360,40 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
331
360
|
});
|
|
332
361
|
}
|
|
333
362
|
|
|
334
|
-
|
|
335
|
-
|
|
363
|
+
private resolveCallContext(params: {
|
|
364
|
+
providerCallId: string;
|
|
365
|
+
callId: string;
|
|
366
|
+
operation: string;
|
|
367
|
+
}): {
|
|
368
|
+
callUuid: string;
|
|
369
|
+
webhookBase: string;
|
|
370
|
+
} {
|
|
371
|
+
const callUuid = this.requestUuidToCallUuid.get(params.providerCallId) ?? params.providerCallId;
|
|
336
372
|
const webhookBase =
|
|
337
|
-
this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(
|
|
373
|
+
this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(params.callId);
|
|
338
374
|
if (!webhookBase) {
|
|
339
375
|
throw new Error("Missing webhook URL for this call (provider state missing)");
|
|
340
376
|
}
|
|
341
|
-
|
|
342
377
|
if (!callUuid) {
|
|
343
|
-
throw new Error(
|
|
378
|
+
throw new Error(`Missing Plivo CallUUID for ${params.operation}`);
|
|
344
379
|
}
|
|
380
|
+
return { callUuid, webhookBase };
|
|
381
|
+
}
|
|
345
382
|
|
|
346
|
-
|
|
383
|
+
private async transferCallLeg(params: {
|
|
384
|
+
callUuid: string;
|
|
385
|
+
webhookBase: string;
|
|
386
|
+
callId: string;
|
|
387
|
+
flow: "xml-speak" | "xml-listen";
|
|
388
|
+
}): Promise<void> {
|
|
389
|
+
const transferUrl = new URL(params.webhookBase);
|
|
347
390
|
transferUrl.searchParams.set("provider", "plivo");
|
|
348
|
-
transferUrl.searchParams.set("flow",
|
|
349
|
-
transferUrl.searchParams.set("callId",
|
|
350
|
-
|
|
351
|
-
this.pendingSpeakByCallId.set(input.callId, {
|
|
352
|
-
text: input.text,
|
|
353
|
-
locale: input.locale,
|
|
354
|
-
});
|
|
391
|
+
transferUrl.searchParams.set("flow", params.flow);
|
|
392
|
+
transferUrl.searchParams.set("callId", params.callId);
|
|
355
393
|
|
|
356
394
|
await this.apiRequest({
|
|
357
395
|
method: "POST",
|
|
358
|
-
endpoint: `/Call/${callUuid}/`,
|
|
396
|
+
endpoint: `/Call/${params.callUuid}/`,
|
|
359
397
|
body: {
|
|
360
398
|
legs: "aleg",
|
|
361
399
|
aleg_url: transferUrl.toString(),
|
|
@@ -364,35 +402,42 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
364
402
|
});
|
|
365
403
|
}
|
|
366
404
|
|
|
367
|
-
async
|
|
368
|
-
const callUuid = this.
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
}
|
|
405
|
+
async playTts(input: PlayTtsInput): Promise<void> {
|
|
406
|
+
const { callUuid, webhookBase } = this.resolveCallContext({
|
|
407
|
+
providerCallId: input.providerCallId,
|
|
408
|
+
callId: input.callId,
|
|
409
|
+
operation: "playTts",
|
|
410
|
+
});
|
|
374
411
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
412
|
+
this.pendingSpeakByCallId.set(input.callId, {
|
|
413
|
+
text: input.text,
|
|
414
|
+
locale: input.locale,
|
|
415
|
+
});
|
|
378
416
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
417
|
+
await this.transferCallLeg({
|
|
418
|
+
callUuid,
|
|
419
|
+
webhookBase,
|
|
420
|
+
callId: input.callId,
|
|
421
|
+
flow: "xml-speak",
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
async startListening(input: StartListeningInput): Promise<void> {
|
|
426
|
+
const { callUuid, webhookBase } = this.resolveCallContext({
|
|
427
|
+
providerCallId: input.providerCallId,
|
|
428
|
+
callId: input.callId,
|
|
429
|
+
operation: "startListening",
|
|
430
|
+
});
|
|
383
431
|
|
|
384
432
|
this.pendingListenByCallId.set(input.callId, {
|
|
385
433
|
language: input.language,
|
|
386
434
|
});
|
|
387
435
|
|
|
388
|
-
await this.
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
aleg_url: transferUrl.toString(),
|
|
394
|
-
aleg_method: "POST",
|
|
395
|
-
},
|
|
436
|
+
await this.transferCallLeg({
|
|
437
|
+
callUuid,
|
|
438
|
+
webhookBase,
|
|
439
|
+
callId: input.callId,
|
|
440
|
+
flow: "xml-listen",
|
|
396
441
|
});
|
|
397
442
|
}
|
|
398
443
|
|
|
@@ -59,4 +59,38 @@ describe("TwilioProvider", () => {
|
|
|
59
59
|
expect(result.providerResponseBody).toContain('<Parameter name="token" value="');
|
|
60
60
|
expect(result.providerResponseBody).toContain("<Connect>");
|
|
61
61
|
});
|
|
62
|
+
|
|
63
|
+
it("uses a stable dedupeKey for identical request payloads", () => {
|
|
64
|
+
const provider = createProvider();
|
|
65
|
+
const rawBody = "CallSid=CA789&Direction=inbound&SpeechResult=hello";
|
|
66
|
+
const ctxA = {
|
|
67
|
+
...createContext(rawBody, { callId: "call-1", turnToken: "turn-1" }),
|
|
68
|
+
headers: { "i-twilio-idempotency-token": "idem-123" },
|
|
69
|
+
};
|
|
70
|
+
const ctxB = {
|
|
71
|
+
...createContext(rawBody, { callId: "call-1", turnToken: "turn-1" }),
|
|
72
|
+
headers: { "i-twilio-idempotency-token": "idem-123" },
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
const eventA = provider.parseWebhookEvent(ctxA).events[0];
|
|
76
|
+
const eventB = provider.parseWebhookEvent(ctxB).events[0];
|
|
77
|
+
|
|
78
|
+
expect(eventA).toBeDefined();
|
|
79
|
+
expect(eventB).toBeDefined();
|
|
80
|
+
expect(eventA?.id).not.toBe(eventB?.id);
|
|
81
|
+
expect(eventA?.dedupeKey).toBe("twilio:idempotency:idem-123");
|
|
82
|
+
expect(eventA?.dedupeKey).toBe(eventB?.dedupeKey);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("keeps turnToken from query on speech events", () => {
|
|
86
|
+
const provider = createProvider();
|
|
87
|
+
const ctx = createContext("CallSid=CA222&Direction=inbound&SpeechResult=hello", {
|
|
88
|
+
callId: "call-2",
|
|
89
|
+
turnToken: "turn-xyz",
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
const event = provider.parseWebhookEvent(ctx).events[0];
|
|
93
|
+
expect(event?.type).toBe("call.speech");
|
|
94
|
+
expect(event?.turnToken).toBe("turn-xyz");
|
|
95
|
+
});
|
|
62
96
|
});
|
package/src/providers/twilio.ts
CHANGED
|
@@ -20,6 +20,33 @@ import type { VoiceCallProvider } from "./base.js";
|
|
|
20
20
|
import { twilioApiRequest } from "./twilio/api.js";
|
|
21
21
|
import { verifyTwilioProviderWebhook } from "./twilio/webhook.js";
|
|
22
22
|
|
|
23
|
+
function getHeader(
|
|
24
|
+
headers: Record<string, string | string[] | undefined>,
|
|
25
|
+
name: string,
|
|
26
|
+
): string | undefined {
|
|
27
|
+
const value = headers[name.toLowerCase()];
|
|
28
|
+
if (Array.isArray(value)) {
|
|
29
|
+
return value[0];
|
|
30
|
+
}
|
|
31
|
+
return value;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function createTwilioRequestDedupeKey(ctx: WebhookContext): string {
|
|
35
|
+
const idempotencyToken = getHeader(ctx.headers, "i-twilio-idempotency-token");
|
|
36
|
+
if (idempotencyToken) {
|
|
37
|
+
return `twilio:idempotency:${idempotencyToken}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const signature = getHeader(ctx.headers, "x-twilio-signature") ?? "";
|
|
41
|
+
const callId = typeof ctx.query?.callId === "string" ? ctx.query.callId.trim() : "";
|
|
42
|
+
const flow = typeof ctx.query?.flow === "string" ? ctx.query.flow.trim() : "";
|
|
43
|
+
const turnToken = typeof ctx.query?.turnToken === "string" ? ctx.query.turnToken.trim() : "";
|
|
44
|
+
return `twilio:fallback:${crypto
|
|
45
|
+
.createHash("sha256")
|
|
46
|
+
.update(`${signature}\n${callId}\n${flow}\n${turnToken}\n${ctx.rawBody}`)
|
|
47
|
+
.digest("hex")}`;
|
|
48
|
+
}
|
|
49
|
+
|
|
23
50
|
/**
|
|
24
51
|
* Twilio Voice API provider implementation.
|
|
25
52
|
*
|
|
@@ -212,7 +239,16 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
212
239
|
typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
|
|
213
240
|
? ctx.query.callId.trim()
|
|
214
241
|
: undefined;
|
|
215
|
-
const
|
|
242
|
+
const turnTokenFromQuery =
|
|
243
|
+
typeof ctx.query?.turnToken === "string" && ctx.query.turnToken.trim()
|
|
244
|
+
? ctx.query.turnToken.trim()
|
|
245
|
+
: undefined;
|
|
246
|
+
const dedupeKey = createTwilioRequestDedupeKey(ctx);
|
|
247
|
+
const event = this.normalizeEvent(params, {
|
|
248
|
+
callIdOverride: callIdFromQuery,
|
|
249
|
+
dedupeKey,
|
|
250
|
+
turnToken: turnTokenFromQuery,
|
|
251
|
+
});
|
|
216
252
|
|
|
217
253
|
// For Twilio, we must return TwiML. Most actions are driven by Calls API updates,
|
|
218
254
|
// so the webhook response is typically a pause to keep the call alive.
|
|
@@ -245,14 +281,24 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
245
281
|
/**
|
|
246
282
|
* Convert Twilio webhook params to normalized event format.
|
|
247
283
|
*/
|
|
248
|
-
private normalizeEvent(
|
|
284
|
+
private normalizeEvent(
|
|
285
|
+
params: URLSearchParams,
|
|
286
|
+
options?: {
|
|
287
|
+
callIdOverride?: string;
|
|
288
|
+
dedupeKey?: string;
|
|
289
|
+
turnToken?: string;
|
|
290
|
+
},
|
|
291
|
+
): NormalizedEvent | null {
|
|
249
292
|
const callSid = params.get("CallSid") || "";
|
|
293
|
+
const callIdOverride = options?.callIdOverride;
|
|
250
294
|
|
|
251
295
|
const baseEvent = {
|
|
252
296
|
id: crypto.randomUUID(),
|
|
297
|
+
dedupeKey: options?.dedupeKey,
|
|
253
298
|
callId: callIdOverride || callSid,
|
|
254
299
|
providerCallId: callSid,
|
|
255
300
|
timestamp: Date.now(),
|
|
301
|
+
turnToken: options?.turnToken,
|
|
256
302
|
direction: TwilioProvider.parseDirection(params.get("Direction")),
|
|
257
303
|
from: params.get("From") || undefined,
|
|
258
304
|
to: params.get("To") || undefined,
|
|
@@ -603,9 +649,14 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
603
649
|
throw new Error("Missing webhook URL for this call (provider state not initialized)");
|
|
604
650
|
}
|
|
605
651
|
|
|
652
|
+
const actionUrl = new URL(webhookUrl);
|
|
653
|
+
if (input.turnToken) {
|
|
654
|
+
actionUrl.searchParams.set("turnToken", input.turnToken);
|
|
655
|
+
}
|
|
656
|
+
|
|
606
657
|
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
607
658
|
<Response>
|
|
608
|
-
<Gather input="speech" speechTimeout="auto" language="${input.language || "en-US"}" action="${escapeXml(
|
|
659
|
+
<Gather input="speech" speechTimeout="auto" language="${input.language || "en-US"}" action="${escapeXml(actionUrl.toString())}" method="POST">
|
|
609
660
|
</Gather>
|
|
610
661
|
</Response>`;
|
|
611
662
|
|
package/src/types.ts
CHANGED
|
@@ -74,9 +74,13 @@ export type EndReason = z.infer<typeof EndReasonSchema>;
|
|
|
74
74
|
|
|
75
75
|
const BaseEventSchema = z.object({
|
|
76
76
|
id: z.string(),
|
|
77
|
+
// Stable provider-derived key for idempotency/replay dedupe.
|
|
78
|
+
dedupeKey: z.string().optional(),
|
|
77
79
|
callId: z.string(),
|
|
78
80
|
providerCallId: z.string().optional(),
|
|
79
81
|
timestamp: z.number(),
|
|
82
|
+
// Optional per-turn nonce for speech events (Twilio <Gather> replay hardening).
|
|
83
|
+
turnToken: z.string().optional(),
|
|
80
84
|
// Optional fields for inbound call detection
|
|
81
85
|
direction: z.enum(["inbound", "outbound"]).optional(),
|
|
82
86
|
from: z.string().optional(),
|
|
@@ -171,6 +175,8 @@ export type CallRecord = z.infer<typeof CallRecordSchema>;
|
|
|
171
175
|
export type WebhookVerificationResult = {
|
|
172
176
|
ok: boolean;
|
|
173
177
|
reason?: string;
|
|
178
|
+
/** Signature is valid, but request was seen before within replay window. */
|
|
179
|
+
isReplay?: boolean;
|
|
174
180
|
};
|
|
175
181
|
|
|
176
182
|
export type WebhookContext = {
|
|
@@ -226,6 +232,8 @@ export type StartListeningInput = {
|
|
|
226
232
|
callId: CallId;
|
|
227
233
|
providerCallId: ProviderCallId;
|
|
228
234
|
language?: string;
|
|
235
|
+
/** Optional per-turn nonce for provider callbacks (replay hardening). */
|
|
236
|
+
turnToken?: string;
|
|
229
237
|
};
|
|
230
238
|
|
|
231
239
|
export type StopListeningInput = {
|
|
@@ -163,6 +163,40 @@ describe("verifyPlivoWebhook", () => {
|
|
|
163
163
|
expect(result.ok).toBe(false);
|
|
164
164
|
expect(result.reason).toMatch(/Missing Plivo signature headers/);
|
|
165
165
|
});
|
|
166
|
+
|
|
167
|
+
it("marks replayed valid V3 requests as replay without failing auth", () => {
|
|
168
|
+
const authToken = "test-auth-token";
|
|
169
|
+
const nonce = "nonce-replay-v3";
|
|
170
|
+
const urlWithQuery = "https://example.com/voice/webhook?flow=answer&callId=abc";
|
|
171
|
+
const postBody = "CallUUID=uuid&CallStatus=in-progress&From=%2B15550000000";
|
|
172
|
+
const signature = plivoV3Signature({
|
|
173
|
+
authToken,
|
|
174
|
+
urlWithQuery,
|
|
175
|
+
postBody,
|
|
176
|
+
nonce,
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
const ctx = {
|
|
180
|
+
headers: {
|
|
181
|
+
host: "example.com",
|
|
182
|
+
"x-forwarded-proto": "https",
|
|
183
|
+
"x-plivo-signature-v3": signature,
|
|
184
|
+
"x-plivo-signature-v3-nonce": nonce,
|
|
185
|
+
},
|
|
186
|
+
rawBody: postBody,
|
|
187
|
+
url: urlWithQuery,
|
|
188
|
+
method: "POST" as const,
|
|
189
|
+
query: { flow: "answer", callId: "abc" },
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
const first = verifyPlivoWebhook(ctx, authToken);
|
|
193
|
+
const second = verifyPlivoWebhook(ctx, authToken);
|
|
194
|
+
|
|
195
|
+
expect(first.ok).toBe(true);
|
|
196
|
+
expect(first.isReplay).toBeFalsy();
|
|
197
|
+
expect(second.ok).toBe(true);
|
|
198
|
+
expect(second.isReplay).toBe(true);
|
|
199
|
+
});
|
|
166
200
|
});
|
|
167
201
|
|
|
168
202
|
describe("verifyTwilioWebhook", () => {
|
|
@@ -197,6 +231,48 @@ describe("verifyTwilioWebhook", () => {
|
|
|
197
231
|
expect(result.ok).toBe(true);
|
|
198
232
|
});
|
|
199
233
|
|
|
234
|
+
it("marks replayed valid requests as replay without failing auth", () => {
|
|
235
|
+
const authToken = "test-auth-token";
|
|
236
|
+
const publicUrl = "https://example.com/voice/webhook";
|
|
237
|
+
const urlWithQuery = `${publicUrl}?callId=abc`;
|
|
238
|
+
const postBody = "CallSid=CS777&CallStatus=completed&From=%2B15550000000";
|
|
239
|
+
const signature = twilioSignature({ authToken, url: urlWithQuery, postBody });
|
|
240
|
+
const headers = {
|
|
241
|
+
host: "example.com",
|
|
242
|
+
"x-forwarded-proto": "https",
|
|
243
|
+
"x-twilio-signature": signature,
|
|
244
|
+
"i-twilio-idempotency-token": "idem-replay-1",
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
const first = verifyTwilioWebhook(
|
|
248
|
+
{
|
|
249
|
+
headers,
|
|
250
|
+
rawBody: postBody,
|
|
251
|
+
url: "http://local/voice/webhook?callId=abc",
|
|
252
|
+
method: "POST",
|
|
253
|
+
query: { callId: "abc" },
|
|
254
|
+
},
|
|
255
|
+
authToken,
|
|
256
|
+
{ publicUrl },
|
|
257
|
+
);
|
|
258
|
+
const second = verifyTwilioWebhook(
|
|
259
|
+
{
|
|
260
|
+
headers,
|
|
261
|
+
rawBody: postBody,
|
|
262
|
+
url: "http://local/voice/webhook?callId=abc",
|
|
263
|
+
method: "POST",
|
|
264
|
+
query: { callId: "abc" },
|
|
265
|
+
},
|
|
266
|
+
authToken,
|
|
267
|
+
{ publicUrl },
|
|
268
|
+
);
|
|
269
|
+
|
|
270
|
+
expect(first.ok).toBe(true);
|
|
271
|
+
expect(first.isReplay).toBeFalsy();
|
|
272
|
+
expect(second.ok).toBe(true);
|
|
273
|
+
expect(second.isReplay).toBe(true);
|
|
274
|
+
});
|
|
275
|
+
|
|
200
276
|
it("rejects invalid signatures even when attacker injects forwarded host", () => {
|
|
201
277
|
const authToken = "test-auth-token";
|
|
202
278
|
const postBody = "CallSid=CS123&CallStatus=completed&From=%2B15550000000";
|