@openclaw/voice-call 2026.2.22 → 2026.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/package.json +1 -1
- package/src/cli.ts +29 -16
- package/src/manager/context.ts +1 -0
- package/src/manager/events.test.ts +64 -21
- package/src/manager/events.ts +17 -4
- package/src/manager/outbound.ts +36 -16
- package/src/manager/timers.ts +13 -4
- package/src/manager.test.ts +60 -1
- package/src/providers/plivo.ts +84 -39
- package/src/providers/twilio/webhook.ts +1 -0
- package/src/providers/twilio.test.ts +34 -0
- package/src/providers/twilio.ts +54 -3
- package/src/types.ts +8 -0
- package/src/webhook-security.test.ts +76 -0
- package/src/webhook-security.ts +100 -17
- package/src/webhook.test.ts +51 -1
- package/src/webhook.ts +9 -5
package/README.md
CHANGED
|
@@ -175,5 +175,7 @@ Actions:
|
|
|
175
175
|
## Notes
|
|
176
176
|
|
|
177
177
|
- Uses webhook signature verification for Twilio/Telnyx/Plivo.
|
|
178
|
+
- Adds replay protection for Twilio and Plivo webhooks (valid duplicate callbacks are ignored safely).
|
|
179
|
+
- Twilio speech turns include a per-turn token so stale/replayed callbacks cannot complete a newer turn.
|
|
178
180
|
- `responseModel` / `responseSystemPrompt` control AI auto-responses.
|
|
179
181
|
- Media streaming requires `ws` and OpenAI Realtime API key.
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -81,6 +81,27 @@ function summarizeSeries(values: number[]): {
|
|
|
81
81
|
};
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
+
function resolveCallMode(mode?: string): "notify" | "conversation" | undefined {
|
|
85
|
+
return mode === "notify" || mode === "conversation" ? mode : undefined;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async function initiateCallAndPrintId(params: {
|
|
89
|
+
runtime: VoiceCallRuntime;
|
|
90
|
+
to: string;
|
|
91
|
+
message?: string;
|
|
92
|
+
mode?: string;
|
|
93
|
+
}) {
|
|
94
|
+
const result = await params.runtime.manager.initiateCall(params.to, undefined, {
|
|
95
|
+
message: params.message,
|
|
96
|
+
mode: resolveCallMode(params.mode),
|
|
97
|
+
});
|
|
98
|
+
if (!result.success) {
|
|
99
|
+
throw new Error(result.error || "initiate failed");
|
|
100
|
+
}
|
|
101
|
+
// eslint-disable-next-line no-console
|
|
102
|
+
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
|
103
|
+
}
|
|
104
|
+
|
|
84
105
|
export function registerVoiceCallCli(params: {
|
|
85
106
|
program: Command;
|
|
86
107
|
config: VoiceCallConfig;
|
|
@@ -112,16 +133,12 @@ export function registerVoiceCallCli(params: {
|
|
|
112
133
|
if (!to) {
|
|
113
134
|
throw new Error("Missing --to and no toNumber configured");
|
|
114
135
|
}
|
|
115
|
-
|
|
136
|
+
await initiateCallAndPrintId({
|
|
137
|
+
runtime: rt,
|
|
138
|
+
to,
|
|
116
139
|
message: options.message,
|
|
117
|
-
mode:
|
|
118
|
-
options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
|
|
140
|
+
mode: options.mode,
|
|
119
141
|
});
|
|
120
|
-
if (!result.success) {
|
|
121
|
-
throw new Error(result.error || "initiate failed");
|
|
122
|
-
}
|
|
123
|
-
// eslint-disable-next-line no-console
|
|
124
|
-
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
|
125
142
|
});
|
|
126
143
|
|
|
127
144
|
root
|
|
@@ -136,16 +153,12 @@ export function registerVoiceCallCli(params: {
|
|
|
136
153
|
)
|
|
137
154
|
.action(async (options: { to: string; message?: string; mode?: string }) => {
|
|
138
155
|
const rt = await ensureRuntime();
|
|
139
|
-
|
|
156
|
+
await initiateCallAndPrintId({
|
|
157
|
+
runtime: rt,
|
|
158
|
+
to: options.to,
|
|
140
159
|
message: options.message,
|
|
141
|
-
mode:
|
|
142
|
-
options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
|
|
160
|
+
mode: options.mode,
|
|
143
161
|
});
|
|
144
|
-
if (!result.success) {
|
|
145
|
-
throw new Error(result.error || "initiate failed");
|
|
146
|
-
}
|
|
147
|
-
// eslint-disable-next-line no-console
|
|
148
|
-
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
|
149
162
|
});
|
|
150
163
|
|
|
151
164
|
root
|
package/src/manager/context.ts
CHANGED
|
@@ -71,19 +71,26 @@ function createInboundInitiatedEvent(params: {
|
|
|
71
71
|
};
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
+
function createRejectingInboundContext(): {
|
|
75
|
+
ctx: CallManagerContext;
|
|
76
|
+
hangupCalls: HangupCallInput[];
|
|
77
|
+
} {
|
|
78
|
+
const hangupCalls: HangupCallInput[] = [];
|
|
79
|
+
const provider = createProvider({
|
|
80
|
+
hangupCall: async (input: HangupCallInput): Promise<void> => {
|
|
81
|
+
hangupCalls.push(input);
|
|
82
|
+
},
|
|
83
|
+
});
|
|
84
|
+
const ctx = createContext({
|
|
85
|
+
config: createInboundDisabledConfig(),
|
|
86
|
+
provider,
|
|
87
|
+
});
|
|
88
|
+
return { ctx, hangupCalls };
|
|
89
|
+
}
|
|
90
|
+
|
|
74
91
|
describe("processEvent (functional)", () => {
|
|
75
92
|
it("calls provider hangup when rejecting inbound call", () => {
|
|
76
|
-
const hangupCalls
|
|
77
|
-
const provider = createProvider({
|
|
78
|
-
hangupCall: async (input: HangupCallInput): Promise<void> => {
|
|
79
|
-
hangupCalls.push(input);
|
|
80
|
-
},
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
const ctx = createContext({
|
|
84
|
-
config: createInboundDisabledConfig(),
|
|
85
|
-
provider,
|
|
86
|
-
});
|
|
93
|
+
const { ctx, hangupCalls } = createRejectingInboundContext();
|
|
87
94
|
const event = createInboundInitiatedEvent({
|
|
88
95
|
id: "evt-1",
|
|
89
96
|
providerCallId: "prov-1",
|
|
@@ -118,16 +125,7 @@ describe("processEvent (functional)", () => {
|
|
|
118
125
|
});
|
|
119
126
|
|
|
120
127
|
it("calls hangup only once for duplicate events for same rejected call", () => {
|
|
121
|
-
const hangupCalls
|
|
122
|
-
const provider = createProvider({
|
|
123
|
-
hangupCall: async (input: HangupCallInput): Promise<void> => {
|
|
124
|
-
hangupCalls.push(input);
|
|
125
|
-
},
|
|
126
|
-
});
|
|
127
|
-
const ctx = createContext({
|
|
128
|
-
config: createInboundDisabledConfig(),
|
|
129
|
-
provider,
|
|
130
|
-
});
|
|
128
|
+
const { ctx, hangupCalls } = createRejectingInboundContext();
|
|
131
129
|
const event1 = createInboundInitiatedEvent({
|
|
132
130
|
id: "evt-init",
|
|
133
131
|
providerCallId: "prov-dup",
|
|
@@ -236,4 +234,49 @@ describe("processEvent (functional)", () => {
|
|
|
236
234
|
expect(() => processEvent(ctx, event)).not.toThrow();
|
|
237
235
|
expect(ctx.activeCalls.size).toBe(0);
|
|
238
236
|
});
|
|
237
|
+
|
|
238
|
+
it("deduplicates by dedupeKey even when event IDs differ", () => {
|
|
239
|
+
const now = Date.now();
|
|
240
|
+
const ctx = createContext();
|
|
241
|
+
ctx.activeCalls.set("call-dedupe", {
|
|
242
|
+
callId: "call-dedupe",
|
|
243
|
+
providerCallId: "provider-dedupe",
|
|
244
|
+
provider: "plivo",
|
|
245
|
+
direction: "outbound",
|
|
246
|
+
state: "answered",
|
|
247
|
+
from: "+15550000000",
|
|
248
|
+
to: "+15550000001",
|
|
249
|
+
startedAt: now,
|
|
250
|
+
transcript: [],
|
|
251
|
+
processedEventIds: [],
|
|
252
|
+
metadata: {},
|
|
253
|
+
});
|
|
254
|
+
ctx.providerCallIdMap.set("provider-dedupe", "call-dedupe");
|
|
255
|
+
|
|
256
|
+
processEvent(ctx, {
|
|
257
|
+
id: "evt-1",
|
|
258
|
+
dedupeKey: "stable-key-1",
|
|
259
|
+
type: "call.speech",
|
|
260
|
+
callId: "call-dedupe",
|
|
261
|
+
providerCallId: "provider-dedupe",
|
|
262
|
+
timestamp: now + 1,
|
|
263
|
+
transcript: "hello",
|
|
264
|
+
isFinal: true,
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
processEvent(ctx, {
|
|
268
|
+
id: "evt-2",
|
|
269
|
+
dedupeKey: "stable-key-1",
|
|
270
|
+
type: "call.speech",
|
|
271
|
+
callId: "call-dedupe",
|
|
272
|
+
providerCallId: "provider-dedupe",
|
|
273
|
+
timestamp: now + 2,
|
|
274
|
+
transcript: "hello",
|
|
275
|
+
isFinal: true,
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
const call = ctx.activeCalls.get("call-dedupe");
|
|
279
|
+
expect(call?.transcript).toHaveLength(1);
|
|
280
|
+
expect(Array.from(ctx.processedEventIds)).toEqual(["stable-key-1"]);
|
|
281
|
+
});
|
|
239
282
|
});
|
package/src/manager/events.ts
CHANGED
|
@@ -92,10 +92,11 @@ function createInboundCall(params: {
|
|
|
92
92
|
}
|
|
93
93
|
|
|
94
94
|
export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
|
95
|
-
|
|
95
|
+
const dedupeKey = event.dedupeKey || event.id;
|
|
96
|
+
if (ctx.processedEventIds.has(dedupeKey)) {
|
|
96
97
|
return;
|
|
97
98
|
}
|
|
98
|
-
ctx.processedEventIds.add(
|
|
99
|
+
ctx.processedEventIds.add(dedupeKey);
|
|
99
100
|
|
|
100
101
|
let call = findCall({
|
|
101
102
|
activeCalls: ctx.activeCalls,
|
|
@@ -158,7 +159,7 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
|
|
158
159
|
}
|
|
159
160
|
}
|
|
160
161
|
|
|
161
|
-
call.processedEventIds.push(
|
|
162
|
+
call.processedEventIds.push(dedupeKey);
|
|
162
163
|
|
|
163
164
|
switch (event.type) {
|
|
164
165
|
case "call.initiated":
|
|
@@ -192,8 +193,20 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
|
|
192
193
|
|
|
193
194
|
case "call.speech":
|
|
194
195
|
if (event.isFinal) {
|
|
196
|
+
const hadWaiter = ctx.transcriptWaiters.has(call.callId);
|
|
197
|
+
const resolved = resolveTranscriptWaiter(
|
|
198
|
+
ctx,
|
|
199
|
+
call.callId,
|
|
200
|
+
event.transcript,
|
|
201
|
+
event.turnToken,
|
|
202
|
+
);
|
|
203
|
+
if (hadWaiter && !resolved) {
|
|
204
|
+
console.warn(
|
|
205
|
+
`[voice-call] Ignoring speech event with mismatched turn token for ${call.callId}`,
|
|
206
|
+
);
|
|
207
|
+
break;
|
|
208
|
+
}
|
|
195
209
|
addTranscriptEntry(call, "user", event.transcript);
|
|
196
|
-
resolveTranscriptWaiter(ctx, call.callId, event.transcript);
|
|
197
210
|
}
|
|
198
211
|
transitionState(call, "listening");
|
|
199
212
|
break;
|
package/src/manager/outbound.ts
CHANGED
|
@@ -63,6 +63,15 @@ type ConnectedCallLookup =
|
|
|
63
63
|
provider: NonNullable<ConnectedCallContext["provider"]>;
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
+
type ConnectedCallResolution =
|
|
67
|
+
| { ok: false; error: string }
|
|
68
|
+
| {
|
|
69
|
+
ok: true;
|
|
70
|
+
call: CallRecord;
|
|
71
|
+
providerCallId: string;
|
|
72
|
+
provider: NonNullable<ConnectedCallContext["provider"]>;
|
|
73
|
+
};
|
|
74
|
+
|
|
66
75
|
function lookupConnectedCall(ctx: ConnectedCallContext, callId: CallId): ConnectedCallLookup {
|
|
67
76
|
const call = ctx.activeCalls.get(callId);
|
|
68
77
|
if (!call) {
|
|
@@ -77,6 +86,22 @@ function lookupConnectedCall(ctx: ConnectedCallContext, callId: CallId): Connect
|
|
|
77
86
|
return { kind: "ok", call, providerCallId: call.providerCallId, provider: ctx.provider };
|
|
78
87
|
}
|
|
79
88
|
|
|
89
|
+
function requireConnectedCall(ctx: ConnectedCallContext, callId: CallId): ConnectedCallResolution {
|
|
90
|
+
const lookup = lookupConnectedCall(ctx, callId);
|
|
91
|
+
if (lookup.kind === "error") {
|
|
92
|
+
return { ok: false, error: lookup.error };
|
|
93
|
+
}
|
|
94
|
+
if (lookup.kind === "ended") {
|
|
95
|
+
return { ok: false, error: "Call has ended" };
|
|
96
|
+
}
|
|
97
|
+
return {
|
|
98
|
+
ok: true,
|
|
99
|
+
call: lookup.call,
|
|
100
|
+
providerCallId: lookup.providerCallId,
|
|
101
|
+
provider: lookup.provider,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
|
|
80
105
|
export async function initiateCall(
|
|
81
106
|
ctx: InitiateContext,
|
|
82
107
|
to: string,
|
|
@@ -175,14 +200,11 @@ export async function speak(
|
|
|
175
200
|
callId: CallId,
|
|
176
201
|
text: string,
|
|
177
202
|
): Promise<{ success: boolean; error?: string }> {
|
|
178
|
-
const
|
|
179
|
-
if (
|
|
180
|
-
return { success: false, error:
|
|
203
|
+
const connected = requireConnectedCall(ctx, callId);
|
|
204
|
+
if (!connected.ok) {
|
|
205
|
+
return { success: false, error: connected.error };
|
|
181
206
|
}
|
|
182
|
-
|
|
183
|
-
return { success: false, error: "Call has ended" };
|
|
184
|
-
}
|
|
185
|
-
const { call, providerCallId, provider } = lookup;
|
|
207
|
+
const { call, providerCallId, provider } = connected;
|
|
186
208
|
|
|
187
209
|
try {
|
|
188
210
|
transitionState(call, "speaking");
|
|
@@ -257,14 +279,11 @@ export async function continueCall(
|
|
|
257
279
|
callId: CallId,
|
|
258
280
|
prompt: string,
|
|
259
281
|
): Promise<{ success: boolean; transcript?: string; error?: string }> {
|
|
260
|
-
const
|
|
261
|
-
if (
|
|
262
|
-
return { success: false, error:
|
|
282
|
+
const connected = requireConnectedCall(ctx, callId);
|
|
283
|
+
if (!connected.ok) {
|
|
284
|
+
return { success: false, error: connected.error };
|
|
263
285
|
}
|
|
264
|
-
|
|
265
|
-
return { success: false, error: "Call has ended" };
|
|
266
|
-
}
|
|
267
|
-
const { call, providerCallId, provider } = lookup;
|
|
286
|
+
const { call, providerCallId, provider } = connected;
|
|
268
287
|
|
|
269
288
|
if (ctx.activeTurnCalls.has(callId) || ctx.transcriptWaiters.has(callId)) {
|
|
270
289
|
return { success: false, error: "Already waiting for transcript" };
|
|
@@ -272,6 +291,7 @@ export async function continueCall(
|
|
|
272
291
|
ctx.activeTurnCalls.add(callId);
|
|
273
292
|
|
|
274
293
|
const turnStartedAt = Date.now();
|
|
294
|
+
const turnToken = provider.name === "twilio" ? crypto.randomUUID() : undefined;
|
|
275
295
|
|
|
276
296
|
try {
|
|
277
297
|
await speak(ctx, callId, prompt);
|
|
@@ -280,9 +300,9 @@ export async function continueCall(
|
|
|
280
300
|
persistCallRecord(ctx.storePath, call);
|
|
281
301
|
|
|
282
302
|
const listenStartedAt = Date.now();
|
|
283
|
-
await provider.startListening({ callId, providerCallId });
|
|
303
|
+
await provider.startListening({ callId, providerCallId, turnToken });
|
|
284
304
|
|
|
285
|
-
const transcript = await waitForFinalTranscript(ctx, callId);
|
|
305
|
+
const transcript = await waitForFinalTranscript(ctx, callId, turnToken);
|
|
286
306
|
const transcriptReceivedAt = Date.now();
|
|
287
307
|
|
|
288
308
|
// Best-effort: stop listening after final transcript.
|
package/src/manager/timers.ts
CHANGED
|
@@ -77,16 +77,25 @@ export function resolveTranscriptWaiter(
|
|
|
77
77
|
ctx: TranscriptWaiterContext,
|
|
78
78
|
callId: CallId,
|
|
79
79
|
transcript: string,
|
|
80
|
-
|
|
80
|
+
turnToken?: string,
|
|
81
|
+
): boolean {
|
|
81
82
|
const waiter = ctx.transcriptWaiters.get(callId);
|
|
82
83
|
if (!waiter) {
|
|
83
|
-
return;
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
if (waiter.turnToken && waiter.turnToken !== turnToken) {
|
|
87
|
+
return false;
|
|
84
88
|
}
|
|
85
89
|
clearTranscriptWaiter(ctx, callId);
|
|
86
90
|
waiter.resolve(transcript);
|
|
91
|
+
return true;
|
|
87
92
|
}
|
|
88
93
|
|
|
89
|
-
export function waitForFinalTranscript(
|
|
94
|
+
export function waitForFinalTranscript(
|
|
95
|
+
ctx: TimerContext,
|
|
96
|
+
callId: CallId,
|
|
97
|
+
turnToken?: string,
|
|
98
|
+
): Promise<string> {
|
|
90
99
|
if (ctx.transcriptWaiters.has(callId)) {
|
|
91
100
|
return Promise.reject(new Error("Already waiting for transcript"));
|
|
92
101
|
}
|
|
@@ -98,6 +107,6 @@ export function waitForFinalTranscript(ctx: TimerContext, callId: CallId): Promi
|
|
|
98
107
|
reject(new Error(`Timed out waiting for transcript after ${timeoutMs}ms`));
|
|
99
108
|
}, timeoutMs);
|
|
100
109
|
|
|
101
|
-
ctx.transcriptWaiters.set(callId, { resolve, reject, timeout });
|
|
110
|
+
ctx.transcriptWaiters.set(callId, { resolve, reject, timeout, turnToken });
|
|
102
111
|
});
|
|
103
112
|
}
|
package/src/manager.test.ts
CHANGED
|
@@ -17,12 +17,16 @@ import type {
|
|
|
17
17
|
} from "./types.js";
|
|
18
18
|
|
|
19
19
|
class FakeProvider implements VoiceCallProvider {
|
|
20
|
-
readonly name
|
|
20
|
+
readonly name: "plivo" | "twilio";
|
|
21
21
|
readonly playTtsCalls: PlayTtsInput[] = [];
|
|
22
22
|
readonly hangupCalls: HangupCallInput[] = [];
|
|
23
23
|
readonly startListeningCalls: StartListeningInput[] = [];
|
|
24
24
|
readonly stopListeningCalls: StopListeningInput[] = [];
|
|
25
25
|
|
|
26
|
+
constructor(name: "plivo" | "twilio" = "plivo") {
|
|
27
|
+
this.name = name;
|
|
28
|
+
}
|
|
29
|
+
|
|
26
30
|
verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
|
|
27
31
|
return { ok: true };
|
|
28
32
|
}
|
|
@@ -319,6 +323,61 @@ describe("CallManager", () => {
|
|
|
319
323
|
expect(provider.stopListeningCalls).toHaveLength(1);
|
|
320
324
|
});
|
|
321
325
|
|
|
326
|
+
it("ignores speech events with mismatched turnToken while waiting for transcript", async () => {
|
|
327
|
+
const { manager, provider } = createManagerHarness(
|
|
328
|
+
{
|
|
329
|
+
transcriptTimeoutMs: 5000,
|
|
330
|
+
},
|
|
331
|
+
new FakeProvider("twilio"),
|
|
332
|
+
);
|
|
333
|
+
|
|
334
|
+
const started = await manager.initiateCall("+15550000004");
|
|
335
|
+
expect(started.success).toBe(true);
|
|
336
|
+
|
|
337
|
+
markCallAnswered(manager, started.callId, "evt-turn-token-answered");
|
|
338
|
+
|
|
339
|
+
const turnPromise = manager.continueCall(started.callId, "Prompt");
|
|
340
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
341
|
+
|
|
342
|
+
const expectedTurnToken = provider.startListeningCalls[0]?.turnToken;
|
|
343
|
+
expect(typeof expectedTurnToken).toBe("string");
|
|
344
|
+
|
|
345
|
+
manager.processEvent({
|
|
346
|
+
id: "evt-turn-token-bad",
|
|
347
|
+
type: "call.speech",
|
|
348
|
+
callId: started.callId,
|
|
349
|
+
providerCallId: "request-uuid",
|
|
350
|
+
timestamp: Date.now(),
|
|
351
|
+
transcript: "stale replay",
|
|
352
|
+
isFinal: true,
|
|
353
|
+
turnToken: "wrong-token",
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
const pendingState = await Promise.race([
|
|
357
|
+
turnPromise.then(() => "resolved"),
|
|
358
|
+
new Promise<"pending">((resolve) => setTimeout(() => resolve("pending"), 0)),
|
|
359
|
+
]);
|
|
360
|
+
expect(pendingState).toBe("pending");
|
|
361
|
+
|
|
362
|
+
manager.processEvent({
|
|
363
|
+
id: "evt-turn-token-good",
|
|
364
|
+
type: "call.speech",
|
|
365
|
+
callId: started.callId,
|
|
366
|
+
providerCallId: "request-uuid",
|
|
367
|
+
timestamp: Date.now(),
|
|
368
|
+
transcript: "final answer",
|
|
369
|
+
isFinal: true,
|
|
370
|
+
turnToken: expectedTurnToken,
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
const turnResult = await turnPromise;
|
|
374
|
+
expect(turnResult.success).toBe(true);
|
|
375
|
+
expect(turnResult.transcript).toBe("final answer");
|
|
376
|
+
|
|
377
|
+
const call = manager.getCall(started.callId);
|
|
378
|
+
expect(call?.transcript.map((entry) => entry.text)).toEqual(["Prompt", "final answer"]);
|
|
379
|
+
});
|
|
380
|
+
|
|
322
381
|
it("tracks latency metadata across multiple closed-loop turns", async () => {
|
|
323
382
|
const { manager, provider } = createManagerHarness({
|
|
324
383
|
transcriptTimeoutMs: 5000,
|
package/src/providers/plivo.ts
CHANGED
|
@@ -30,6 +30,29 @@ export interface PlivoProviderOptions {
|
|
|
30
30
|
type PendingSpeak = { text: string; locale?: string };
|
|
31
31
|
type PendingListen = { language?: string };
|
|
32
32
|
|
|
33
|
+
function getHeader(
|
|
34
|
+
headers: Record<string, string | string[] | undefined>,
|
|
35
|
+
name: string,
|
|
36
|
+
): string | undefined {
|
|
37
|
+
const value = headers[name.toLowerCase()];
|
|
38
|
+
if (Array.isArray(value)) {
|
|
39
|
+
return value[0];
|
|
40
|
+
}
|
|
41
|
+
return value;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function createPlivoRequestDedupeKey(ctx: WebhookContext): string {
|
|
45
|
+
const nonceV3 = getHeader(ctx.headers, "x-plivo-signature-v3-nonce");
|
|
46
|
+
if (nonceV3) {
|
|
47
|
+
return `plivo:v3:${nonceV3}`;
|
|
48
|
+
}
|
|
49
|
+
const nonceV2 = getHeader(ctx.headers, "x-plivo-signature-v2-nonce");
|
|
50
|
+
if (nonceV2) {
|
|
51
|
+
return `plivo:v2:${nonceV2}`;
|
|
52
|
+
}
|
|
53
|
+
return `plivo:fallback:${crypto.createHash("sha256").update(ctx.rawBody).digest("hex")}`;
|
|
54
|
+
}
|
|
55
|
+
|
|
33
56
|
export class PlivoProvider implements VoiceCallProvider {
|
|
34
57
|
readonly name = "plivo" as const;
|
|
35
58
|
|
|
@@ -104,7 +127,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
104
127
|
console.warn(`[plivo] Webhook verification failed: ${result.reason}`);
|
|
105
128
|
}
|
|
106
129
|
|
|
107
|
-
return { ok: result.ok, reason: result.reason };
|
|
130
|
+
return { ok: result.ok, reason: result.reason, isReplay: result.isReplay };
|
|
108
131
|
}
|
|
109
132
|
|
|
110
133
|
parseWebhookEvent(ctx: WebhookContext): ProviderWebhookParseResult {
|
|
@@ -173,7 +196,8 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
173
196
|
|
|
174
197
|
// Normal events.
|
|
175
198
|
const callIdFromQuery = this.getCallIdFromQuery(ctx);
|
|
176
|
-
const
|
|
199
|
+
const dedupeKey = createPlivoRequestDedupeKey(ctx);
|
|
200
|
+
const event = this.normalizeEvent(parsed, callIdFromQuery, dedupeKey);
|
|
177
201
|
|
|
178
202
|
return {
|
|
179
203
|
events: event ? [event] : [],
|
|
@@ -186,7 +210,11 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
186
210
|
};
|
|
187
211
|
}
|
|
188
212
|
|
|
189
|
-
private normalizeEvent(
|
|
213
|
+
private normalizeEvent(
|
|
214
|
+
params: URLSearchParams,
|
|
215
|
+
callIdOverride?: string,
|
|
216
|
+
dedupeKey?: string,
|
|
217
|
+
): NormalizedEvent | null {
|
|
190
218
|
const callUuid = params.get("CallUUID") || "";
|
|
191
219
|
const requestUuid = params.get("RequestUUID") || "";
|
|
192
220
|
|
|
@@ -201,6 +229,7 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
201
229
|
|
|
202
230
|
const baseEvent = {
|
|
203
231
|
id: crypto.randomUUID(),
|
|
232
|
+
dedupeKey,
|
|
204
233
|
callId: callIdOverride || callUuid || requestUuid,
|
|
205
234
|
providerCallId: callUuid || requestUuid || undefined,
|
|
206
235
|
timestamp: Date.now(),
|
|
@@ -331,31 +360,40 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
331
360
|
});
|
|
332
361
|
}
|
|
333
362
|
|
|
334
|
-
|
|
335
|
-
|
|
363
|
+
private resolveCallContext(params: {
|
|
364
|
+
providerCallId: string;
|
|
365
|
+
callId: string;
|
|
366
|
+
operation: string;
|
|
367
|
+
}): {
|
|
368
|
+
callUuid: string;
|
|
369
|
+
webhookBase: string;
|
|
370
|
+
} {
|
|
371
|
+
const callUuid = this.requestUuidToCallUuid.get(params.providerCallId) ?? params.providerCallId;
|
|
336
372
|
const webhookBase =
|
|
337
|
-
this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(
|
|
373
|
+
this.callUuidToWebhookUrl.get(callUuid) || this.callIdToWebhookUrl.get(params.callId);
|
|
338
374
|
if (!webhookBase) {
|
|
339
375
|
throw new Error("Missing webhook URL for this call (provider state missing)");
|
|
340
376
|
}
|
|
341
|
-
|
|
342
377
|
if (!callUuid) {
|
|
343
|
-
throw new Error(
|
|
378
|
+
throw new Error(`Missing Plivo CallUUID for ${params.operation}`);
|
|
344
379
|
}
|
|
380
|
+
return { callUuid, webhookBase };
|
|
381
|
+
}
|
|
345
382
|
|
|
346
|
-
|
|
383
|
+
private async transferCallLeg(params: {
|
|
384
|
+
callUuid: string;
|
|
385
|
+
webhookBase: string;
|
|
386
|
+
callId: string;
|
|
387
|
+
flow: "xml-speak" | "xml-listen";
|
|
388
|
+
}): Promise<void> {
|
|
389
|
+
const transferUrl = new URL(params.webhookBase);
|
|
347
390
|
transferUrl.searchParams.set("provider", "plivo");
|
|
348
|
-
transferUrl.searchParams.set("flow",
|
|
349
|
-
transferUrl.searchParams.set("callId",
|
|
350
|
-
|
|
351
|
-
this.pendingSpeakByCallId.set(input.callId, {
|
|
352
|
-
text: input.text,
|
|
353
|
-
locale: input.locale,
|
|
354
|
-
});
|
|
391
|
+
transferUrl.searchParams.set("flow", params.flow);
|
|
392
|
+
transferUrl.searchParams.set("callId", params.callId);
|
|
355
393
|
|
|
356
394
|
await this.apiRequest({
|
|
357
395
|
method: "POST",
|
|
358
|
-
endpoint: `/Call/${callUuid}/`,
|
|
396
|
+
endpoint: `/Call/${params.callUuid}/`,
|
|
359
397
|
body: {
|
|
360
398
|
legs: "aleg",
|
|
361
399
|
aleg_url: transferUrl.toString(),
|
|
@@ -364,35 +402,42 @@ export class PlivoProvider implements VoiceCallProvider {
|
|
|
364
402
|
});
|
|
365
403
|
}
|
|
366
404
|
|
|
367
|
-
async
|
|
368
|
-
const callUuid = this.
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
}
|
|
405
|
+
async playTts(input: PlayTtsInput): Promise<void> {
|
|
406
|
+
const { callUuid, webhookBase } = this.resolveCallContext({
|
|
407
|
+
providerCallId: input.providerCallId,
|
|
408
|
+
callId: input.callId,
|
|
409
|
+
operation: "playTts",
|
|
410
|
+
});
|
|
374
411
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
412
|
+
this.pendingSpeakByCallId.set(input.callId, {
|
|
413
|
+
text: input.text,
|
|
414
|
+
locale: input.locale,
|
|
415
|
+
});
|
|
378
416
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
417
|
+
await this.transferCallLeg({
|
|
418
|
+
callUuid,
|
|
419
|
+
webhookBase,
|
|
420
|
+
callId: input.callId,
|
|
421
|
+
flow: "xml-speak",
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
async startListening(input: StartListeningInput): Promise<void> {
|
|
426
|
+
const { callUuid, webhookBase } = this.resolveCallContext({
|
|
427
|
+
providerCallId: input.providerCallId,
|
|
428
|
+
callId: input.callId,
|
|
429
|
+
operation: "startListening",
|
|
430
|
+
});
|
|
383
431
|
|
|
384
432
|
this.pendingListenByCallId.set(input.callId, {
|
|
385
433
|
language: input.language,
|
|
386
434
|
});
|
|
387
435
|
|
|
388
|
-
await this.
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
aleg_url: transferUrl.toString(),
|
|
394
|
-
aleg_method: "POST",
|
|
395
|
-
},
|
|
436
|
+
await this.transferCallLeg({
|
|
437
|
+
callUuid,
|
|
438
|
+
webhookBase,
|
|
439
|
+
callId: input.callId,
|
|
440
|
+
flow: "xml-listen",
|
|
396
441
|
});
|
|
397
442
|
}
|
|
398
443
|
|
|
@@ -59,4 +59,38 @@ describe("TwilioProvider", () => {
|
|
|
59
59
|
expect(result.providerResponseBody).toContain('<Parameter name="token" value="');
|
|
60
60
|
expect(result.providerResponseBody).toContain("<Connect>");
|
|
61
61
|
});
|
|
62
|
+
|
|
63
|
+
it("uses a stable dedupeKey for identical request payloads", () => {
|
|
64
|
+
const provider = createProvider();
|
|
65
|
+
const rawBody = "CallSid=CA789&Direction=inbound&SpeechResult=hello";
|
|
66
|
+
const ctxA = {
|
|
67
|
+
...createContext(rawBody, { callId: "call-1", turnToken: "turn-1" }),
|
|
68
|
+
headers: { "i-twilio-idempotency-token": "idem-123" },
|
|
69
|
+
};
|
|
70
|
+
const ctxB = {
|
|
71
|
+
...createContext(rawBody, { callId: "call-1", turnToken: "turn-1" }),
|
|
72
|
+
headers: { "i-twilio-idempotency-token": "idem-123" },
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
const eventA = provider.parseWebhookEvent(ctxA).events[0];
|
|
76
|
+
const eventB = provider.parseWebhookEvent(ctxB).events[0];
|
|
77
|
+
|
|
78
|
+
expect(eventA).toBeDefined();
|
|
79
|
+
expect(eventB).toBeDefined();
|
|
80
|
+
expect(eventA?.id).not.toBe(eventB?.id);
|
|
81
|
+
expect(eventA?.dedupeKey).toBe("twilio:idempotency:idem-123");
|
|
82
|
+
expect(eventA?.dedupeKey).toBe(eventB?.dedupeKey);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("keeps turnToken from query on speech events", () => {
|
|
86
|
+
const provider = createProvider();
|
|
87
|
+
const ctx = createContext("CallSid=CA222&Direction=inbound&SpeechResult=hello", {
|
|
88
|
+
callId: "call-2",
|
|
89
|
+
turnToken: "turn-xyz",
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
const event = provider.parseWebhookEvent(ctx).events[0];
|
|
93
|
+
expect(event?.type).toBe("call.speech");
|
|
94
|
+
expect(event?.turnToken).toBe("turn-xyz");
|
|
95
|
+
});
|
|
62
96
|
});
|
package/src/providers/twilio.ts
CHANGED
|
@@ -20,6 +20,33 @@ import type { VoiceCallProvider } from "./base.js";
|
|
|
20
20
|
import { twilioApiRequest } from "./twilio/api.js";
|
|
21
21
|
import { verifyTwilioProviderWebhook } from "./twilio/webhook.js";
|
|
22
22
|
|
|
23
|
+
function getHeader(
|
|
24
|
+
headers: Record<string, string | string[] | undefined>,
|
|
25
|
+
name: string,
|
|
26
|
+
): string | undefined {
|
|
27
|
+
const value = headers[name.toLowerCase()];
|
|
28
|
+
if (Array.isArray(value)) {
|
|
29
|
+
return value[0];
|
|
30
|
+
}
|
|
31
|
+
return value;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function createTwilioRequestDedupeKey(ctx: WebhookContext): string {
|
|
35
|
+
const idempotencyToken = getHeader(ctx.headers, "i-twilio-idempotency-token");
|
|
36
|
+
if (idempotencyToken) {
|
|
37
|
+
return `twilio:idempotency:${idempotencyToken}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const signature = getHeader(ctx.headers, "x-twilio-signature") ?? "";
|
|
41
|
+
const callId = typeof ctx.query?.callId === "string" ? ctx.query.callId.trim() : "";
|
|
42
|
+
const flow = typeof ctx.query?.flow === "string" ? ctx.query.flow.trim() : "";
|
|
43
|
+
const turnToken = typeof ctx.query?.turnToken === "string" ? ctx.query.turnToken.trim() : "";
|
|
44
|
+
return `twilio:fallback:${crypto
|
|
45
|
+
.createHash("sha256")
|
|
46
|
+
.update(`${signature}\n${callId}\n${flow}\n${turnToken}\n${ctx.rawBody}`)
|
|
47
|
+
.digest("hex")}`;
|
|
48
|
+
}
|
|
49
|
+
|
|
23
50
|
/**
|
|
24
51
|
* Twilio Voice API provider implementation.
|
|
25
52
|
*
|
|
@@ -212,7 +239,16 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
212
239
|
typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
|
|
213
240
|
? ctx.query.callId.trim()
|
|
214
241
|
: undefined;
|
|
215
|
-
const
|
|
242
|
+
const turnTokenFromQuery =
|
|
243
|
+
typeof ctx.query?.turnToken === "string" && ctx.query.turnToken.trim()
|
|
244
|
+
? ctx.query.turnToken.trim()
|
|
245
|
+
: undefined;
|
|
246
|
+
const dedupeKey = createTwilioRequestDedupeKey(ctx);
|
|
247
|
+
const event = this.normalizeEvent(params, {
|
|
248
|
+
callIdOverride: callIdFromQuery,
|
|
249
|
+
dedupeKey,
|
|
250
|
+
turnToken: turnTokenFromQuery,
|
|
251
|
+
});
|
|
216
252
|
|
|
217
253
|
// For Twilio, we must return TwiML. Most actions are driven by Calls API updates,
|
|
218
254
|
// so the webhook response is typically a pause to keep the call alive.
|
|
@@ -245,14 +281,24 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
245
281
|
/**
|
|
246
282
|
* Convert Twilio webhook params to normalized event format.
|
|
247
283
|
*/
|
|
248
|
-
private normalizeEvent(
|
|
284
|
+
private normalizeEvent(
|
|
285
|
+
params: URLSearchParams,
|
|
286
|
+
options?: {
|
|
287
|
+
callIdOverride?: string;
|
|
288
|
+
dedupeKey?: string;
|
|
289
|
+
turnToken?: string;
|
|
290
|
+
},
|
|
291
|
+
): NormalizedEvent | null {
|
|
249
292
|
const callSid = params.get("CallSid") || "";
|
|
293
|
+
const callIdOverride = options?.callIdOverride;
|
|
250
294
|
|
|
251
295
|
const baseEvent = {
|
|
252
296
|
id: crypto.randomUUID(),
|
|
297
|
+
dedupeKey: options?.dedupeKey,
|
|
253
298
|
callId: callIdOverride || callSid,
|
|
254
299
|
providerCallId: callSid,
|
|
255
300
|
timestamp: Date.now(),
|
|
301
|
+
turnToken: options?.turnToken,
|
|
256
302
|
direction: TwilioProvider.parseDirection(params.get("Direction")),
|
|
257
303
|
from: params.get("From") || undefined,
|
|
258
304
|
to: params.get("To") || undefined,
|
|
@@ -603,9 +649,14 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
603
649
|
throw new Error("Missing webhook URL for this call (provider state not initialized)");
|
|
604
650
|
}
|
|
605
651
|
|
|
652
|
+
const actionUrl = new URL(webhookUrl);
|
|
653
|
+
if (input.turnToken) {
|
|
654
|
+
actionUrl.searchParams.set("turnToken", input.turnToken);
|
|
655
|
+
}
|
|
656
|
+
|
|
606
657
|
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
607
658
|
<Response>
|
|
608
|
-
<Gather input="speech" speechTimeout="auto" language="${input.language || "en-US"}" action="${escapeXml(
|
|
659
|
+
<Gather input="speech" speechTimeout="auto" language="${input.language || "en-US"}" action="${escapeXml(actionUrl.toString())}" method="POST">
|
|
609
660
|
</Gather>
|
|
610
661
|
</Response>`;
|
|
611
662
|
|
package/src/types.ts
CHANGED
|
@@ -74,9 +74,13 @@ export type EndReason = z.infer<typeof EndReasonSchema>;
|
|
|
74
74
|
|
|
75
75
|
const BaseEventSchema = z.object({
|
|
76
76
|
id: z.string(),
|
|
77
|
+
// Stable provider-derived key for idempotency/replay dedupe.
|
|
78
|
+
dedupeKey: z.string().optional(),
|
|
77
79
|
callId: z.string(),
|
|
78
80
|
providerCallId: z.string().optional(),
|
|
79
81
|
timestamp: z.number(),
|
|
82
|
+
// Optional per-turn nonce for speech events (Twilio <Gather> replay hardening).
|
|
83
|
+
turnToken: z.string().optional(),
|
|
80
84
|
// Optional fields for inbound call detection
|
|
81
85
|
direction: z.enum(["inbound", "outbound"]).optional(),
|
|
82
86
|
from: z.string().optional(),
|
|
@@ -171,6 +175,8 @@ export type CallRecord = z.infer<typeof CallRecordSchema>;
|
|
|
171
175
|
export type WebhookVerificationResult = {
|
|
172
176
|
ok: boolean;
|
|
173
177
|
reason?: string;
|
|
178
|
+
/** Signature is valid, but request was seen before within replay window. */
|
|
179
|
+
isReplay?: boolean;
|
|
174
180
|
};
|
|
175
181
|
|
|
176
182
|
export type WebhookContext = {
|
|
@@ -226,6 +232,8 @@ export type StartListeningInput = {
|
|
|
226
232
|
callId: CallId;
|
|
227
233
|
providerCallId: ProviderCallId;
|
|
228
234
|
language?: string;
|
|
235
|
+
/** Optional per-turn nonce for provider callbacks (replay hardening). */
|
|
236
|
+
turnToken?: string;
|
|
229
237
|
};
|
|
230
238
|
|
|
231
239
|
export type StopListeningInput = {
|
|
@@ -163,6 +163,40 @@ describe("verifyPlivoWebhook", () => {
|
|
|
163
163
|
expect(result.ok).toBe(false);
|
|
164
164
|
expect(result.reason).toMatch(/Missing Plivo signature headers/);
|
|
165
165
|
});
|
|
166
|
+
|
|
167
|
+
it("marks replayed valid V3 requests as replay without failing auth", () => {
|
|
168
|
+
const authToken = "test-auth-token";
|
|
169
|
+
const nonce = "nonce-replay-v3";
|
|
170
|
+
const urlWithQuery = "https://example.com/voice/webhook?flow=answer&callId=abc";
|
|
171
|
+
const postBody = "CallUUID=uuid&CallStatus=in-progress&From=%2B15550000000";
|
|
172
|
+
const signature = plivoV3Signature({
|
|
173
|
+
authToken,
|
|
174
|
+
urlWithQuery,
|
|
175
|
+
postBody,
|
|
176
|
+
nonce,
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
const ctx = {
|
|
180
|
+
headers: {
|
|
181
|
+
host: "example.com",
|
|
182
|
+
"x-forwarded-proto": "https",
|
|
183
|
+
"x-plivo-signature-v3": signature,
|
|
184
|
+
"x-plivo-signature-v3-nonce": nonce,
|
|
185
|
+
},
|
|
186
|
+
rawBody: postBody,
|
|
187
|
+
url: urlWithQuery,
|
|
188
|
+
method: "POST" as const,
|
|
189
|
+
query: { flow: "answer", callId: "abc" },
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
const first = verifyPlivoWebhook(ctx, authToken);
|
|
193
|
+
const second = verifyPlivoWebhook(ctx, authToken);
|
|
194
|
+
|
|
195
|
+
expect(first.ok).toBe(true);
|
|
196
|
+
expect(first.isReplay).toBeFalsy();
|
|
197
|
+
expect(second.ok).toBe(true);
|
|
198
|
+
expect(second.isReplay).toBe(true);
|
|
199
|
+
});
|
|
166
200
|
});
|
|
167
201
|
|
|
168
202
|
describe("verifyTwilioWebhook", () => {
|
|
@@ -197,6 +231,48 @@ describe("verifyTwilioWebhook", () => {
|
|
|
197
231
|
expect(result.ok).toBe(true);
|
|
198
232
|
});
|
|
199
233
|
|
|
234
|
+
it("marks replayed valid requests as replay without failing auth", () => {
|
|
235
|
+
const authToken = "test-auth-token";
|
|
236
|
+
const publicUrl = "https://example.com/voice/webhook";
|
|
237
|
+
const urlWithQuery = `${publicUrl}?callId=abc`;
|
|
238
|
+
const postBody = "CallSid=CS777&CallStatus=completed&From=%2B15550000000";
|
|
239
|
+
const signature = twilioSignature({ authToken, url: urlWithQuery, postBody });
|
|
240
|
+
const headers = {
|
|
241
|
+
host: "example.com",
|
|
242
|
+
"x-forwarded-proto": "https",
|
|
243
|
+
"x-twilio-signature": signature,
|
|
244
|
+
"i-twilio-idempotency-token": "idem-replay-1",
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
const first = verifyTwilioWebhook(
|
|
248
|
+
{
|
|
249
|
+
headers,
|
|
250
|
+
rawBody: postBody,
|
|
251
|
+
url: "http://local/voice/webhook?callId=abc",
|
|
252
|
+
method: "POST",
|
|
253
|
+
query: { callId: "abc" },
|
|
254
|
+
},
|
|
255
|
+
authToken,
|
|
256
|
+
{ publicUrl },
|
|
257
|
+
);
|
|
258
|
+
const second = verifyTwilioWebhook(
|
|
259
|
+
{
|
|
260
|
+
headers,
|
|
261
|
+
rawBody: postBody,
|
|
262
|
+
url: "http://local/voice/webhook?callId=abc",
|
|
263
|
+
method: "POST",
|
|
264
|
+
query: { callId: "abc" },
|
|
265
|
+
},
|
|
266
|
+
authToken,
|
|
267
|
+
{ publicUrl },
|
|
268
|
+
);
|
|
269
|
+
|
|
270
|
+
expect(first.ok).toBe(true);
|
|
271
|
+
expect(first.isReplay).toBeFalsy();
|
|
272
|
+
expect(second.ok).toBe(true);
|
|
273
|
+
expect(second.isReplay).toBe(true);
|
|
274
|
+
});
|
|
275
|
+
|
|
200
276
|
it("rejects invalid signatures even when attacker injects forwarded host", () => {
|
|
201
277
|
const authToken = "test-auth-token";
|
|
202
278
|
const postBody = "CallSid=CS123&CallStatus=completed&From=%2B15550000000";
|
package/src/webhook-security.ts
CHANGED
|
@@ -1,6 +1,63 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
2
|
import type { WebhookContext } from "./types.js";
|
|
3
3
|
|
|
4
|
+
const REPLAY_WINDOW_MS = 10 * 60 * 1000;
|
|
5
|
+
const REPLAY_CACHE_MAX_ENTRIES = 10_000;
|
|
6
|
+
const REPLAY_CACHE_PRUNE_INTERVAL = 64;
|
|
7
|
+
|
|
8
|
+
type ReplayCache = {
|
|
9
|
+
seenUntil: Map<string, number>;
|
|
10
|
+
calls: number;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
const twilioReplayCache: ReplayCache = {
|
|
14
|
+
seenUntil: new Map<string, number>(),
|
|
15
|
+
calls: 0,
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const plivoReplayCache: ReplayCache = {
|
|
19
|
+
seenUntil: new Map<string, number>(),
|
|
20
|
+
calls: 0,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
function sha256Hex(input: string): string {
|
|
24
|
+
return crypto.createHash("sha256").update(input).digest("hex");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function pruneReplayCache(cache: ReplayCache, now: number): void {
|
|
28
|
+
for (const [key, expiresAt] of cache.seenUntil) {
|
|
29
|
+
if (expiresAt <= now) {
|
|
30
|
+
cache.seenUntil.delete(key);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
while (cache.seenUntil.size > REPLAY_CACHE_MAX_ENTRIES) {
|
|
34
|
+
const oldest = cache.seenUntil.keys().next().value;
|
|
35
|
+
if (!oldest) {
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
cache.seenUntil.delete(oldest);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function markReplay(cache: ReplayCache, replayKey: string): boolean {
|
|
43
|
+
const now = Date.now();
|
|
44
|
+
cache.calls += 1;
|
|
45
|
+
if (cache.calls % REPLAY_CACHE_PRUNE_INTERVAL === 0) {
|
|
46
|
+
pruneReplayCache(cache, now);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const existing = cache.seenUntil.get(replayKey);
|
|
50
|
+
if (existing && existing > now) {
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
cache.seenUntil.set(replayKey, now + REPLAY_WINDOW_MS);
|
|
55
|
+
if (cache.seenUntil.size > REPLAY_CACHE_MAX_ENTRIES) {
|
|
56
|
+
pruneReplayCache(cache, now);
|
|
57
|
+
}
|
|
58
|
+
return false;
|
|
59
|
+
}
|
|
60
|
+
|
|
4
61
|
/**
|
|
5
62
|
* Validate Twilio webhook signature using HMAC-SHA1.
|
|
6
63
|
*
|
|
@@ -328,6 +385,8 @@ export interface TwilioVerificationResult {
|
|
|
328
385
|
verificationUrl?: string;
|
|
329
386
|
/** Whether we're running behind ngrok free tier */
|
|
330
387
|
isNgrokFreeTier?: boolean;
|
|
388
|
+
/** Request is cryptographically valid but was already processed recently. */
|
|
389
|
+
isReplay?: boolean;
|
|
331
390
|
}
|
|
332
391
|
|
|
333
392
|
export interface TelnyxVerificationResult {
|
|
@@ -335,6 +394,20 @@ export interface TelnyxVerificationResult {
|
|
|
335
394
|
reason?: string;
|
|
336
395
|
}
|
|
337
396
|
|
|
397
|
+
function createTwilioReplayKey(params: {
|
|
398
|
+
ctx: WebhookContext;
|
|
399
|
+
signature: string;
|
|
400
|
+
verificationUrl: string;
|
|
401
|
+
}): string {
|
|
402
|
+
const idempotencyToken = getHeader(params.ctx.headers, "i-twilio-idempotency-token");
|
|
403
|
+
if (idempotencyToken) {
|
|
404
|
+
return `twilio:idempotency:${idempotencyToken}`;
|
|
405
|
+
}
|
|
406
|
+
return `twilio:fallback:${sha256Hex(
|
|
407
|
+
`${params.verificationUrl}\n${params.signature}\n${params.ctx.rawBody}`,
|
|
408
|
+
)}`;
|
|
409
|
+
}
|
|
410
|
+
|
|
338
411
|
function decodeBase64OrBase64Url(input: string): Buffer {
|
|
339
412
|
// Telnyx docs say Base64; some tooling emits Base64URL. Accept both.
|
|
340
413
|
const normalized = input.replace(/-/g, "+").replace(/_/g, "/");
|
|
@@ -505,7 +578,9 @@ export function verifyTwilioWebhook(
|
|
|
505
578
|
const isValid = validateTwilioSignature(authToken, signature, verificationUrl, params);
|
|
506
579
|
|
|
507
580
|
if (isValid) {
|
|
508
|
-
|
|
581
|
+
const replayKey = createTwilioReplayKey({ ctx, signature, verificationUrl });
|
|
582
|
+
const isReplay = markReplay(twilioReplayCache, replayKey);
|
|
583
|
+
return { ok: true, verificationUrl, isReplay };
|
|
509
584
|
}
|
|
510
585
|
|
|
511
586
|
// Check if this is ngrok free tier - the URL might have different format
|
|
@@ -533,6 +608,8 @@ export interface PlivoVerificationResult {
|
|
|
533
608
|
verificationUrl?: string;
|
|
534
609
|
/** Signature version used for verification */
|
|
535
610
|
version?: "v3" | "v2";
|
|
611
|
+
/** Request is cryptographically valid but was already processed recently. */
|
|
612
|
+
isReplay?: boolean;
|
|
536
613
|
}
|
|
537
614
|
|
|
538
615
|
function normalizeSignatureBase64(input: string): string {
|
|
@@ -753,14 +830,17 @@ export function verifyPlivoWebhook(
|
|
|
753
830
|
url: verificationUrl,
|
|
754
831
|
postParams,
|
|
755
832
|
});
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
833
|
+
if (!ok) {
|
|
834
|
+
return {
|
|
835
|
+
ok: false,
|
|
836
|
+
version: "v3",
|
|
837
|
+
verificationUrl,
|
|
838
|
+
reason: "Invalid Plivo V3 signature",
|
|
839
|
+
};
|
|
840
|
+
}
|
|
841
|
+
const replayKey = `plivo:v3:${sha256Hex(`${verificationUrl}\n${nonceV3}`)}`;
|
|
842
|
+
const isReplay = markReplay(plivoReplayCache, replayKey);
|
|
843
|
+
return { ok: true, version: "v3", verificationUrl, isReplay };
|
|
764
844
|
}
|
|
765
845
|
|
|
766
846
|
if (signatureV2 && nonceV2) {
|
|
@@ -770,14 +850,17 @@ export function verifyPlivoWebhook(
|
|
|
770
850
|
nonce: nonceV2,
|
|
771
851
|
url: verificationUrl,
|
|
772
852
|
});
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
853
|
+
if (!ok) {
|
|
854
|
+
return {
|
|
855
|
+
ok: false,
|
|
856
|
+
version: "v2",
|
|
857
|
+
verificationUrl,
|
|
858
|
+
reason: "Invalid Plivo V2 signature",
|
|
859
|
+
};
|
|
860
|
+
}
|
|
861
|
+
const replayKey = `plivo:v2:${sha256Hex(`${verificationUrl}\n${nonceV2}`)}`;
|
|
862
|
+
const isReplay = markReplay(plivoReplayCache, replayKey);
|
|
863
|
+
return { ok: true, version: "v2", verificationUrl, isReplay };
|
|
781
864
|
}
|
|
782
865
|
|
|
783
866
|
return {
|
package/src/webhook.test.ts
CHANGED
|
@@ -45,12 +45,14 @@ const createCall = (startedAt: number): CallRecord => ({
|
|
|
45
45
|
|
|
46
46
|
const createManager = (calls: CallRecord[]) => {
|
|
47
47
|
const endCall = vi.fn(async () => ({ success: true }));
|
|
48
|
+
const processEvent = vi.fn();
|
|
48
49
|
const manager = {
|
|
49
50
|
getActiveCalls: () => calls,
|
|
50
51
|
endCall,
|
|
52
|
+
processEvent,
|
|
51
53
|
} as unknown as CallManager;
|
|
52
54
|
|
|
53
|
-
return { manager, endCall };
|
|
55
|
+
return { manager, endCall, processEvent };
|
|
54
56
|
};
|
|
55
57
|
|
|
56
58
|
describe("VoiceCallWebhookServer stale call reaper", () => {
|
|
@@ -116,3 +118,51 @@ describe("VoiceCallWebhookServer stale call reaper", () => {
|
|
|
116
118
|
}
|
|
117
119
|
});
|
|
118
120
|
});
|
|
121
|
+
|
|
122
|
+
describe("VoiceCallWebhookServer replay handling", () => {
|
|
123
|
+
it("acknowledges replayed webhook requests and skips event side effects", async () => {
|
|
124
|
+
const replayProvider: VoiceCallProvider = {
|
|
125
|
+
...provider,
|
|
126
|
+
verifyWebhook: () => ({ ok: true, isReplay: true }),
|
|
127
|
+
parseWebhookEvent: () => ({
|
|
128
|
+
events: [
|
|
129
|
+
{
|
|
130
|
+
id: "evt-replay",
|
|
131
|
+
dedupeKey: "stable-replay",
|
|
132
|
+
type: "call.speech",
|
|
133
|
+
callId: "call-1",
|
|
134
|
+
providerCallId: "provider-call-1",
|
|
135
|
+
timestamp: Date.now(),
|
|
136
|
+
transcript: "hello",
|
|
137
|
+
isFinal: true,
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
statusCode: 200,
|
|
141
|
+
}),
|
|
142
|
+
};
|
|
143
|
+
const { manager, processEvent } = createManager([]);
|
|
144
|
+
const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
|
|
145
|
+
const server = new VoiceCallWebhookServer(config, manager, replayProvider);
|
|
146
|
+
|
|
147
|
+
try {
|
|
148
|
+
const baseUrl = await server.start();
|
|
149
|
+
const address = (
|
|
150
|
+
server as unknown as { server?: { address?: () => unknown } }
|
|
151
|
+
).server?.address?.();
|
|
152
|
+
const requestUrl = new URL(baseUrl);
|
|
153
|
+
if (address && typeof address === "object" && "port" in address && address.port) {
|
|
154
|
+
requestUrl.port = String(address.port);
|
|
155
|
+
}
|
|
156
|
+
const response = await fetch(requestUrl.toString(), {
|
|
157
|
+
method: "POST",
|
|
158
|
+
headers: { "content-type": "application/x-www-form-urlencoded" },
|
|
159
|
+
body: "CallSid=CA123&SpeechResult=hello",
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
expect(response.status).toBe(200);
|
|
163
|
+
expect(processEvent).not.toHaveBeenCalled();
|
|
164
|
+
} finally {
|
|
165
|
+
await server.stop();
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
});
|
package/src/webhook.ts
CHANGED
|
@@ -346,11 +346,15 @@ export class VoiceCallWebhookServer {
|
|
|
346
346
|
const result = this.provider.parseWebhookEvent(ctx);
|
|
347
347
|
|
|
348
348
|
// Process each event
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
349
|
+
if (verification.isReplay) {
|
|
350
|
+
console.warn("[voice-call] Replay detected; skipping event side effects");
|
|
351
|
+
} else {
|
|
352
|
+
for (const event of result.events) {
|
|
353
|
+
try {
|
|
354
|
+
this.manager.processEvent(event);
|
|
355
|
+
} catch (err) {
|
|
356
|
+
console.error(`[voice-call] Error processing event ${event.type}:`, err);
|
|
357
|
+
}
|
|
354
358
|
}
|
|
355
359
|
}
|
|
356
360
|
|