@openclaw/voice-call 2026.5.2 → 2026.5.3-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/dist/api.js +2 -0
  2. package/dist/call-status-CXldV5o8.js +32 -0
  3. package/dist/cli-metadata.js +12 -0
  4. package/dist/config-7w04YpHh.js +548 -0
  5. package/dist/config-compat-B0me39_4.js +129 -0
  6. package/dist/guarded-json-api-Btx5EE4w.js +591 -0
  7. package/dist/http-headers-BrnxBasF.js +10 -0
  8. package/dist/index.js +1284 -0
  9. package/dist/mock-CeKvfVEd.js +135 -0
  10. package/dist/plivo-B-a7KFoT.js +393 -0
  11. package/dist/realtime-handler-B63CIDP2.js +325 -0
  12. package/dist/realtime-transcription.runtime-B2h70y2W.js +2 -0
  13. package/dist/realtime-voice.runtime-Bkh4nvLn.js +2 -0
  14. package/dist/response-generator-BrcmwDZU.js +182 -0
  15. package/dist/response-model-CyF5K80p.js +12 -0
  16. package/dist/runtime-api.js +6 -0
  17. package/dist/runtime-entry-88ytYAQa.js +3119 -0
  18. package/dist/runtime-entry.js +2 -0
  19. package/dist/setup-api.js +37 -0
  20. package/dist/telnyx-jjBE8boz.js +260 -0
  21. package/dist/twilio-1OqbcXLL.js +676 -0
  22. package/dist/voice-mapping-BYDGdWGx.js +40 -0
  23. package/package.json +14 -6
  24. package/api.ts +0 -16
  25. package/cli-metadata.ts +0 -10
  26. package/config-api.ts +0 -12
  27. package/index.test.ts +0 -943
  28. package/index.ts +0 -794
  29. package/runtime-api.ts +0 -20
  30. package/runtime-entry.ts +0 -1
  31. package/setup-api.ts +0 -47
  32. package/src/allowlist.test.ts +0 -18
  33. package/src/allowlist.ts +0 -19
  34. package/src/cli.ts +0 -845
  35. package/src/config-compat.test.ts +0 -120
  36. package/src/config-compat.ts +0 -227
  37. package/src/config.test.ts +0 -479
  38. package/src/config.ts +0 -808
  39. package/src/core-bridge.ts +0 -14
  40. package/src/deep-merge.test.ts +0 -40
  41. package/src/deep-merge.ts +0 -23
  42. package/src/gateway-continue-operation.ts +0 -200
  43. package/src/http-headers.test.ts +0 -16
  44. package/src/http-headers.ts +0 -15
  45. package/src/manager/context.ts +0 -42
  46. package/src/manager/events.test.ts +0 -581
  47. package/src/manager/events.ts +0 -288
  48. package/src/manager/lifecycle.ts +0 -53
  49. package/src/manager/lookup.test.ts +0 -52
  50. package/src/manager/lookup.ts +0 -35
  51. package/src/manager/outbound.test.ts +0 -528
  52. package/src/manager/outbound.ts +0 -486
  53. package/src/manager/state.ts +0 -48
  54. package/src/manager/store.ts +0 -106
  55. package/src/manager/timers.test.ts +0 -129
  56. package/src/manager/timers.ts +0 -113
  57. package/src/manager/twiml.test.ts +0 -13
  58. package/src/manager/twiml.ts +0 -17
  59. package/src/manager.closed-loop.test.ts +0 -236
  60. package/src/manager.inbound-allowlist.test.ts +0 -188
  61. package/src/manager.notify.test.ts +0 -377
  62. package/src/manager.restore.test.ts +0 -183
  63. package/src/manager.test-harness.ts +0 -127
  64. package/src/manager.ts +0 -392
  65. package/src/media-stream.test.ts +0 -768
  66. package/src/media-stream.ts +0 -708
  67. package/src/providers/base.ts +0 -97
  68. package/src/providers/mock.test.ts +0 -78
  69. package/src/providers/mock.ts +0 -185
  70. package/src/providers/plivo.test.ts +0 -93
  71. package/src/providers/plivo.ts +0 -601
  72. package/src/providers/shared/call-status.test.ts +0 -24
  73. package/src/providers/shared/call-status.ts +0 -24
  74. package/src/providers/shared/guarded-json-api.test.ts +0 -106
  75. package/src/providers/shared/guarded-json-api.ts +0 -42
  76. package/src/providers/telnyx.test.ts +0 -340
  77. package/src/providers/telnyx.ts +0 -394
  78. package/src/providers/twilio/api.test.ts +0 -145
  79. package/src/providers/twilio/api.ts +0 -93
  80. package/src/providers/twilio/twiml-policy.test.ts +0 -84
  81. package/src/providers/twilio/twiml-policy.ts +0 -87
  82. package/src/providers/twilio/webhook.ts +0 -34
  83. package/src/providers/twilio.test.ts +0 -591
  84. package/src/providers/twilio.ts +0 -861
  85. package/src/providers/twilio.types.ts +0 -17
  86. package/src/realtime-defaults.ts +0 -3
  87. package/src/realtime-fast-context.test.ts +0 -88
  88. package/src/realtime-fast-context.ts +0 -165
  89. package/src/realtime-transcription.runtime.ts +0 -4
  90. package/src/realtime-voice.runtime.ts +0 -5
  91. package/src/response-generator.test.ts +0 -321
  92. package/src/response-generator.ts +0 -318
  93. package/src/response-model.test.ts +0 -71
  94. package/src/response-model.ts +0 -23
  95. package/src/runtime.test.ts +0 -536
  96. package/src/runtime.ts +0 -510
  97. package/src/telephony-audio.test.ts +0 -61
  98. package/src/telephony-audio.ts +0 -12
  99. package/src/telephony-tts.test.ts +0 -196
  100. package/src/telephony-tts.ts +0 -235
  101. package/src/test-fixtures.ts +0 -73
  102. package/src/tts-provider-voice.test.ts +0 -34
  103. package/src/tts-provider-voice.ts +0 -21
  104. package/src/tunnel.test.ts +0 -166
  105. package/src/tunnel.ts +0 -314
  106. package/src/types.ts +0 -291
  107. package/src/utils.test.ts +0 -17
  108. package/src/utils.ts +0 -14
  109. package/src/voice-mapping.test.ts +0 -34
  110. package/src/voice-mapping.ts +0 -68
  111. package/src/webhook/realtime-handler.test.ts +0 -598
  112. package/src/webhook/realtime-handler.ts +0 -485
  113. package/src/webhook/stale-call-reaper.test.ts +0 -88
  114. package/src/webhook/stale-call-reaper.ts +0 -38
  115. package/src/webhook/tailscale.test.ts +0 -214
  116. package/src/webhook/tailscale.ts +0 -129
  117. package/src/webhook-exposure.test.ts +0 -33
  118. package/src/webhook-exposure.ts +0 -84
  119. package/src/webhook-security.test.ts +0 -770
  120. package/src/webhook-security.ts +0 -994
  121. package/src/webhook.hangup-once.lifecycle.test.ts +0 -135
  122. package/src/webhook.test.ts +0 -1470
  123. package/src/webhook.ts +0 -908
  124. package/src/webhook.types.ts +0 -5
  125. package/src/websocket-test-support.ts +0 -72
  126. package/tsconfig.json +0 -16
@@ -1,1470 +0,0 @@
1
- import { request, type IncomingMessage } from "node:http";
2
- import type { RealtimeTranscriptionProviderPlugin } from "openclaw/plugin-sdk/realtime-transcription";
3
- import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
4
- import {
5
- VoiceCallConfigSchema,
6
- type VoiceCallConfig,
7
- type VoiceCallConfigInput,
8
- } from "./config.js";
9
- import type { CallManager } from "./manager.js";
10
- import type { VoiceCallProvider } from "./providers/base.js";
11
- import type { TwilioProvider } from "./providers/twilio.js";
12
- import type { CallRecord, NormalizedEvent } from "./types.js";
13
- import { VoiceCallWebhookServer } from "./webhook.js";
14
- import type { RealtimeCallHandler } from "./webhook/realtime-handler.js";
15
-
16
- const mocks = vi.hoisted(() => {
17
- const realtimeTranscriptionProvider: RealtimeTranscriptionProviderPlugin = {
18
- id: "openai",
19
- label: "OpenAI",
20
- aliases: ["openai-realtime"],
21
- isConfigured: () => true,
22
- resolveConfig: ({ rawConfig }) => rawConfig,
23
- createSession: () => ({
24
- connect: async () => {},
25
- sendAudio: () => {},
26
- close: () => {},
27
- isConnected: () => true,
28
- }),
29
- };
30
-
31
- return {
32
- getRealtimeTranscriptionProvider: vi.fn<(...args: unknown[]) => unknown>(
33
- () => realtimeTranscriptionProvider,
34
- ),
35
- listRealtimeTranscriptionProviders: vi.fn(() => [realtimeTranscriptionProvider]),
36
- };
37
- });
38
-
39
- vi.mock("./realtime-transcription.runtime.js", () => ({
40
- getRealtimeTranscriptionProvider: mocks.getRealtimeTranscriptionProvider,
41
- listRealtimeTranscriptionProviders: mocks.listRealtimeTranscriptionProviders,
42
- }));
43
-
44
- const provider: VoiceCallProvider = {
45
- name: "mock",
46
- verifyWebhook: () => ({ ok: true, verifiedRequestKey: "mock:req:base" }),
47
- parseWebhookEvent: () => ({ events: [] }),
48
- initiateCall: async () => ({ providerCallId: "provider-call", status: "initiated" }),
49
- hangupCall: async () => {},
50
- playTts: async () => {},
51
- startListening: async () => {},
52
- stopListening: async () => {},
53
- getCallStatus: async () => ({ status: "in-progress", isTerminal: false }),
54
- };
55
-
56
- type TwilioProviderTestDouble = VoiceCallProvider &
57
- Pick<
58
- TwilioProvider,
59
- | "isValidStreamToken"
60
- | "registerCallStream"
61
- | "unregisterCallStream"
62
- | "hasRegisteredStream"
63
- | "clearTtsQueue"
64
- >;
65
-
66
- const createConfig = (overrides: VoiceCallConfigInput = {}): VoiceCallConfig => {
67
- const base = VoiceCallConfigSchema.parse({});
68
- base.serve.port = 0;
69
-
70
- const merged = {
71
- ...base,
72
- ...overrides,
73
- serve: {
74
- ...base.serve,
75
- ...overrides.serve,
76
- },
77
- realtime: {
78
- ...base.realtime,
79
- ...overrides.realtime,
80
- tools: overrides.realtime?.tools ?? base.realtime.tools,
81
- fastContext: {
82
- ...base.realtime.fastContext,
83
- ...overrides.realtime?.fastContext,
84
- sources: overrides.realtime?.fastContext?.sources ?? base.realtime.fastContext.sources,
85
- },
86
- providers: overrides.realtime?.providers ?? base.realtime.providers,
87
- },
88
- };
89
- const parsed = VoiceCallConfigSchema.parse({
90
- ...merged,
91
- serve: { ...merged.serve, port: merged.serve.port === 0 ? 1 : merged.serve.port },
92
- });
93
- parsed.serve.port = merged.serve.port;
94
- return parsed;
95
- };
96
-
97
- const createCall = (startedAt: number): CallRecord => ({
98
- callId: "call-1",
99
- providerCallId: "provider-call-1",
100
- provider: "mock",
101
- direction: "outbound",
102
- state: "initiated",
103
- from: "+15550001234",
104
- to: "+15550005678",
105
- startedAt,
106
- transcript: [],
107
- processedEventIds: [],
108
- });
109
-
110
- const createManager = (calls: CallRecord[]) => {
111
- const endCall = vi.fn(async () => ({ success: true }));
112
- const processEvent = vi.fn();
113
- const manager = {
114
- getActiveCalls: () => calls,
115
- endCall,
116
- processEvent,
117
- } as unknown as CallManager;
118
-
119
- return { manager, endCall, processEvent };
120
- };
121
-
122
- function hasPort(value: unknown): value is { port: number | string } {
123
- if (!value || typeof value !== "object") {
124
- return false;
125
- }
126
- const maybeAddress = value as { port?: unknown };
127
- return typeof maybeAddress.port === "number" || typeof maybeAddress.port === "string";
128
- }
129
-
130
- function requireBoundRequestUrl(server: VoiceCallWebhookServer, baseUrl: string) {
131
- const address = (
132
- server as unknown as { server?: { address?: () => unknown } }
133
- ).server?.address?.();
134
- if (!hasPort(address) || !address.port) {
135
- throw new Error("voice webhook server did not expose a bound port");
136
- }
137
- const requestUrl = new URL(baseUrl);
138
- requestUrl.port = String(address.port);
139
- return requestUrl;
140
- }
141
-
142
- function expectWebhookUrl(url: string, expectedPath: string) {
143
- const parsed = new URL(url);
144
- expect(parsed.pathname).toBe(expectedPath);
145
- expect(parsed.port).not.toBe("");
146
- expect(parsed.port).not.toBe("0");
147
- }
148
-
149
- function createTwilioVerificationProvider(
150
- overrides: Partial<TwilioProviderTestDouble> = {},
151
- ): VoiceCallProvider {
152
- return {
153
- ...provider,
154
- name: "twilio",
155
- verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:test" }),
156
- ...overrides,
157
- };
158
- }
159
-
160
- function createTwilioStreamingProvider(
161
- overrides: Partial<TwilioProviderTestDouble> = {},
162
- ): TwilioProviderTestDouble {
163
- return {
164
- ...createTwilioVerificationProvider({
165
- parseWebhookEvent: () => ({ events: [] }),
166
- initiateCall: async () => ({ providerCallId: "provider-call", status: "initiated" as const }),
167
- hangupCall: async () => {},
168
- playTts: async () => {},
169
- startListening: async () => {},
170
- stopListening: async () => {},
171
- getCallStatus: async () => ({ status: "in-progress", isTerminal: false }),
172
- }),
173
- isValidStreamToken: () => true,
174
- registerCallStream: () => {},
175
- unregisterCallStream: () => {},
176
- hasRegisteredStream: () => true,
177
- clearTtsQueue: () => {},
178
- ...overrides,
179
- };
180
- }
181
-
182
- describe("VoiceCallWebhookServer realtime transcription provider selection", () => {
183
- it("auto-selects the first registered provider when streaming.provider is unset", async () => {
184
- const { manager } = createManager([]);
185
- const config = createConfig({
186
- streaming: {
187
- ...createConfig().streaming,
188
- enabled: true,
189
- providers: {
190
- openai: {
191
- apiKey: "sk-test", // pragma: allowlist secret
192
- },
193
- },
194
- },
195
- });
196
- const autoSelectedProvider: RealtimeTranscriptionProviderPlugin = {
197
- id: "openai",
198
- label: "OpenAI",
199
- autoSelectOrder: 5,
200
- isConfigured: () => true,
201
- resolveConfig: ({ rawConfig }) => rawConfig,
202
- createSession: () => ({
203
- connect: async () => {},
204
- sendAudio: () => {},
205
- close: () => {},
206
- isConnected: () => true,
207
- }),
208
- };
209
- mocks.getRealtimeTranscriptionProvider.mockReturnValueOnce(undefined);
210
- mocks.listRealtimeTranscriptionProviders.mockReturnValueOnce([autoSelectedProvider]);
211
-
212
- const server = new VoiceCallWebhookServer(config, manager, provider);
213
- try {
214
- await server.start();
215
- expect(mocks.getRealtimeTranscriptionProvider).not.toHaveBeenCalled();
216
- expect(mocks.listRealtimeTranscriptionProviders).toHaveBeenCalledWith(null);
217
- expect(server.getMediaStreamHandler()).toBeTruthy();
218
- } finally {
219
- await server.stop();
220
- }
221
- });
222
- });
223
-
224
- describe("VoiceCallWebhookServer media stream client IP resolution", () => {
225
- type MediaStreamRequestDouble = {
226
- headers: Record<string, string>;
227
- socket: { remoteAddress?: string };
228
- };
229
-
230
- const resolveMediaStreamClientIp = (
231
- configOverrides: Partial<VoiceCallConfig>,
232
- requestOverrides: Partial<MediaStreamRequestDouble> = {},
233
- ): string | undefined => {
234
- const { manager } = createManager([]);
235
- const server = new VoiceCallWebhookServer(
236
- createConfig(configOverrides),
237
- manager,
238
- createTwilioStreamingProvider(),
239
- );
240
- const request = {
241
- headers: {},
242
- socket: { remoteAddress: "127.0.0.1" },
243
- ...requestOverrides,
244
- };
245
-
246
- return (
247
- server as unknown as {
248
- resolveMediaStreamClientIp: (request: MediaStreamRequestDouble) => string | undefined;
249
- }
250
- ).resolveMediaStreamClientIp(request as never);
251
- };
252
-
253
- it("uses forwarded IPs only when forwarding trust is explicitly enabled", () => {
254
- const ip = resolveMediaStreamClientIp(
255
- {
256
- webhookSecurity: {
257
- allowedHosts: [],
258
- trustForwardingHeaders: true,
259
- trustedProxyIPs: ["127.0.0.1"],
260
- },
261
- },
262
- {
263
- headers: {
264
- "x-forwarded-for": "198.51.100.10, 203.0.113.10",
265
- },
266
- },
267
- );
268
-
269
- expect(ip).toBe("203.0.113.10");
270
- });
271
-
272
- it("does not trust forwarded IPs when only allowedHosts is configured", () => {
273
- const ip = resolveMediaStreamClientIp(
274
- {
275
- webhookSecurity: {
276
- allowedHosts: ["voice.example.com"],
277
- trustForwardingHeaders: false,
278
- trustedProxyIPs: ["127.0.0.1"],
279
- },
280
- },
281
- {
282
- headers: {
283
- "x-forwarded-for": "198.51.100.10",
284
- "x-real-ip": "198.51.100.11",
285
- },
286
- },
287
- );
288
-
289
- expect(ip).toBe("127.0.0.1");
290
- });
291
-
292
- it("ignores spoofed forwarded IPs from untrusted remotes", () => {
293
- const ip = resolveMediaStreamClientIp(
294
- {
295
- webhookSecurity: {
296
- allowedHosts: [],
297
- trustForwardingHeaders: true,
298
- trustedProxyIPs: ["203.0.113.10"],
299
- },
300
- },
301
- {
302
- headers: {
303
- "x-forwarded-for": "198.51.100.10",
304
- },
305
- socket: { remoteAddress: "127.0.0.1" },
306
- },
307
- );
308
-
309
- expect(ip).toBe("127.0.0.1");
310
- });
311
-
312
- it("walks the forwarded chain from the right to support trusted multi-proxy deployments", () => {
313
- const ip = resolveMediaStreamClientIp(
314
- {
315
- webhookSecurity: {
316
- allowedHosts: [],
317
- trustForwardingHeaders: true,
318
- trustedProxyIPs: ["127.0.0.1", "203.0.113.10"],
319
- },
320
- },
321
- {
322
- headers: {
323
- "x-forwarded-for": "198.51.100.10, 203.0.113.10",
324
- },
325
- },
326
- );
327
-
328
- expect(ip).toBe("198.51.100.10");
329
- });
330
-
331
- it("ignores forwarded IPs when no trusted proxy is configured", () => {
332
- const ip = resolveMediaStreamClientIp(
333
- {
334
- webhookSecurity: {
335
- allowedHosts: [],
336
- trustForwardingHeaders: true,
337
- trustedProxyIPs: [],
338
- },
339
- },
340
- {
341
- headers: {
342
- "x-forwarded-for": "198.51.100.10",
343
- "x-real-ip": "198.51.100.11",
344
- },
345
- socket: { remoteAddress: "127.0.0.1" },
346
- },
347
- );
348
-
349
- expect(ip).toBe("127.0.0.1");
350
- });
351
-
352
- it("matches trusted proxies when the remote uses an IPv4-mapped form", () => {
353
- const ip = resolveMediaStreamClientIp(
354
- {
355
- webhookSecurity: {
356
- allowedHosts: [],
357
- trustForwardingHeaders: true,
358
- trustedProxyIPs: ["127.0.0.1", "203.0.113.10"],
359
- },
360
- },
361
- {
362
- headers: {
363
- "x-forwarded-for": "198.51.100.10, 203.0.113.10",
364
- },
365
- socket: { remoteAddress: "::ffff:127.0.0.1" },
366
- },
367
- );
368
-
369
- expect(ip).toBe("198.51.100.10");
370
- });
371
- });
372
-
373
- async function runStaleCallReaperCase(params: {
374
- callAgeMs: number;
375
- staleCallReaperSeconds: number;
376
- advanceMs: number;
377
- callOverrides?: Partial<CallRecord>;
378
- }) {
379
- const now = new Date("2026-02-16T00:00:00Z");
380
- vi.setSystemTime(now);
381
-
382
- const call = { ...createCall(now.getTime() - params.callAgeMs), ...params.callOverrides };
383
- const { manager, endCall } = createManager([call]);
384
- const config = createConfig({ staleCallReaperSeconds: params.staleCallReaperSeconds });
385
- const server = new VoiceCallWebhookServer(config, manager, provider);
386
-
387
- try {
388
- await server.start();
389
- await vi.advanceTimersByTimeAsync(params.advanceMs);
390
- return { call, endCall };
391
- } finally {
392
- await server.stop();
393
- }
394
- }
395
-
396
- async function postWebhookForm(server: VoiceCallWebhookServer, baseUrl: string, body: string) {
397
- const requestUrl = requireBoundRequestUrl(server, baseUrl);
398
- return await fetch(requestUrl.toString(), {
399
- method: "POST",
400
- headers: { "content-type": "application/x-www-form-urlencoded" },
401
- body,
402
- });
403
- }
404
-
405
- async function postWebhookFormWithHeaders(
406
- server: VoiceCallWebhookServer,
407
- baseUrl: string,
408
- body: string,
409
- headers: Record<string, string>,
410
- ) {
411
- const requestUrl = requireBoundRequestUrl(server, baseUrl);
412
- return await fetch(requestUrl.toString(), {
413
- method: "POST",
414
- headers: {
415
- "content-type": "application/x-www-form-urlencoded",
416
- ...headers,
417
- },
418
- body,
419
- });
420
- }
421
-
422
- async function postWebhookFormWithHeadersResult(
423
- server: VoiceCallWebhookServer,
424
- baseUrl: string,
425
- body: string,
426
- headers: Record<string, string>,
427
- ): Promise<
428
- | { kind: "response"; statusCode: number; body: string }
429
- | { kind: "error"; code: string | undefined }
430
- > {
431
- const requestUrl = requireBoundRequestUrl(server, baseUrl);
432
- return await new Promise((resolve) => {
433
- const req = request(
434
- {
435
- hostname: requestUrl.hostname,
436
- port: requestUrl.port,
437
- path: requestUrl.pathname + requestUrl.search,
438
- method: "POST",
439
- headers: {
440
- "content-type": "application/x-www-form-urlencoded",
441
- ...headers,
442
- },
443
- },
444
- (res) => {
445
- res.setEncoding("utf8");
446
- let responseBody = "";
447
- res.on("data", (chunk) => {
448
- responseBody += chunk;
449
- });
450
- res.on("end", () => {
451
- resolve({
452
- kind: "response",
453
- statusCode: res.statusCode ?? 0,
454
- body: responseBody,
455
- });
456
- });
457
- },
458
- );
459
- req.on("error", (error: NodeJS.ErrnoException) => {
460
- resolve({ kind: "error", code: error.code });
461
- });
462
- req.end(body);
463
- });
464
- }
465
-
466
- describe("VoiceCallWebhookServer stale call reaper", () => {
467
- beforeEach(() => {
468
- vi.useFakeTimers();
469
- });
470
-
471
- afterEach(() => {
472
- vi.useRealTimers();
473
- });
474
-
475
- it("ends calls older than staleCallReaperSeconds", async () => {
476
- const { call, endCall } = await runStaleCallReaperCase({
477
- callAgeMs: 120_000,
478
- staleCallReaperSeconds: 60,
479
- advanceMs: 30_000,
480
- });
481
- expect(endCall).toHaveBeenCalledWith(call.callId);
482
- });
483
-
484
- it("skips calls that are younger than the threshold", async () => {
485
- const { endCall } = await runStaleCallReaperCase({
486
- callAgeMs: 10_000,
487
- staleCallReaperSeconds: 60,
488
- advanceMs: 30_000,
489
- });
490
- expect(endCall).not.toHaveBeenCalled();
491
- });
492
-
493
- it("does not run when staleCallReaperSeconds is disabled", async () => {
494
- const now = new Date("2026-02-16T00:00:00Z");
495
- vi.setSystemTime(now);
496
-
497
- const call = createCall(now.getTime() - 120_000);
498
- const { manager, endCall } = createManager([call]);
499
- const config = createConfig({ staleCallReaperSeconds: 0 });
500
- const server = new VoiceCallWebhookServer(config, manager, provider);
501
-
502
- try {
503
- await server.start();
504
- await vi.advanceTimersByTimeAsync(60_000);
505
- expect(endCall).not.toHaveBeenCalled();
506
- } finally {
507
- await server.stop();
508
- }
509
- });
510
-
511
- it("does not reap calls that reached the answered state", async () => {
512
- const { endCall } = await runStaleCallReaperCase({
513
- callAgeMs: 120_000,
514
- staleCallReaperSeconds: 60,
515
- advanceMs: 30_000,
516
- callOverrides: {
517
- state: "answered",
518
- answeredAt: new Date("2026-02-15T23:58:30Z").getTime(),
519
- },
520
- });
521
- expect(endCall).not.toHaveBeenCalled();
522
- });
523
- });
524
-
525
- describe("VoiceCallWebhookServer path matching", () => {
526
- it("rejects lookalike webhook paths that only match by prefix", async () => {
527
- const verifyWebhook = vi.fn(() => ({ ok: true, verifiedRequestKey: "verified:req:prefix" }));
528
- const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
529
- const strictProvider: VoiceCallProvider = {
530
- ...provider,
531
- verifyWebhook,
532
- parseWebhookEvent,
533
- };
534
- const { manager } = createManager([]);
535
- const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
536
- const server = new VoiceCallWebhookServer(config, manager, strictProvider);
537
-
538
- try {
539
- const baseUrl = await server.start();
540
- const requestUrl = requireBoundRequestUrl(server, baseUrl);
541
- requestUrl.pathname = "/voice/webhook-evil";
542
-
543
- const response = await fetch(requestUrl.toString(), {
544
- method: "POST",
545
- headers: { "content-type": "application/x-www-form-urlencoded" },
546
- body: "CallSid=CA123&SpeechResult=hello",
547
- });
548
-
549
- expect(response.status).toBe(404);
550
- expect(verifyWebhook).not.toHaveBeenCalled();
551
- expect(parseWebhookEvent).not.toHaveBeenCalled();
552
- } finally {
553
- await server.stop();
554
- }
555
- });
556
- });
557
-
558
- describe("VoiceCallWebhookServer replay handling", () => {
559
- it("acknowledges replayed webhook requests and skips event side effects", async () => {
560
- const replayProvider: VoiceCallProvider = {
561
- ...provider,
562
- verifyWebhook: () => ({ ok: true, isReplay: true, verifiedRequestKey: "mock:req:replay" }),
563
- parseWebhookEvent: () => ({
564
- events: [
565
- {
566
- id: "evt-replay",
567
- dedupeKey: "stable-replay",
568
- type: "call.speech",
569
- callId: "call-1",
570
- providerCallId: "provider-call-1",
571
- timestamp: Date.now(),
572
- transcript: "hello",
573
- isFinal: true,
574
- },
575
- ],
576
- statusCode: 200,
577
- }),
578
- };
579
- const { manager, processEvent } = createManager([]);
580
- const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
581
- const server = new VoiceCallWebhookServer(config, manager, replayProvider);
582
-
583
- try {
584
- const baseUrl = await server.start();
585
- const response = await postWebhookForm(server, baseUrl, "CallSid=CA123&SpeechResult=hello");
586
-
587
- expect(response.status).toBe(200);
588
- expect(processEvent).not.toHaveBeenCalled();
589
- } finally {
590
- await server.stop();
591
- }
592
- });
593
-
594
- it("returns realtime TwiML for replayed inbound twilio webhooks", async () => {
595
- const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
596
- const twilioProvider: VoiceCallProvider = {
597
- ...provider,
598
- name: "twilio",
599
- verifyWebhook: () => ({ ok: true, isReplay: true, verifiedRequestKey: "twilio:req:replay" }),
600
- parseWebhookEvent,
601
- };
602
- const { manager, processEvent } = createManager([]);
603
- const config = createConfig({
604
- provider: "twilio",
605
- inboundPolicy: "open",
606
- realtime: {
607
- enabled: true,
608
- streamPath: "/voice/stream/realtime",
609
- instructions: "Be helpful.",
610
- toolPolicy: "safe-read-only",
611
- tools: [],
612
- providers: {},
613
- },
614
- });
615
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
616
- server.setRealtimeHandler({
617
- buildTwiMLPayload: () => ({
618
- statusCode: 200,
619
- headers: { "Content-Type": "text/xml" },
620
- body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
621
- }),
622
- getStreamPathPattern: () => "/voice/stream/realtime",
623
- handleWebSocketUpgrade: () => {},
624
- registerToolHandler: () => {},
625
- setPublicUrl: () => {},
626
- } as unknown as RealtimeCallHandler);
627
-
628
- try {
629
- const baseUrl = await server.start();
630
- const response = await postWebhookFormWithHeaders(
631
- server,
632
- baseUrl,
633
- "CallSid=CA123&Direction=inbound&CallStatus=ringing",
634
- { "x-twilio-signature": "sig" },
635
- );
636
-
637
- expect(response.status).toBe(200);
638
- expect(await response.text()).toContain("<Connect><Stream");
639
- expect(parseWebhookEvent).not.toHaveBeenCalled();
640
- expect(processEvent).not.toHaveBeenCalled();
641
- } finally {
642
- await server.stop();
643
- }
644
- });
645
-
646
- it.each(["outbound-api", "outbound-dial"] as const)(
647
- "returns realtime TwiML for %s twilio TwiML fetches",
648
- async (direction) => {
649
- const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
650
- const buildTwiMLPayload = vi.fn(() => ({
651
- statusCode: 200,
652
- headers: { "Content-Type": "text/xml" },
653
- body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
654
- }));
655
- const twilioProvider: VoiceCallProvider = {
656
- ...provider,
657
- name: "twilio",
658
- verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-outbound" }),
659
- parseWebhookEvent,
660
- };
661
- const { manager, processEvent } = createManager([]);
662
- const config = createConfig({
663
- provider: "twilio",
664
- inboundPolicy: "disabled",
665
- realtime: {
666
- enabled: true,
667
- streamPath: "/voice/stream/realtime",
668
- instructions: "Be helpful.",
669
- toolPolicy: "safe-read-only",
670
- tools: [],
671
- providers: {},
672
- },
673
- });
674
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
675
- server.setRealtimeHandler({
676
- buildTwiMLPayload,
677
- getStreamPathPattern: () => "/voice/stream/realtime",
678
- handleWebSocketUpgrade: () => {},
679
- registerToolHandler: () => {},
680
- setPublicUrl: () => {},
681
- } as unknown as RealtimeCallHandler);
682
-
683
- try {
684
- const baseUrl = await server.start();
685
- const response = await postWebhookFormWithHeaders(
686
- server,
687
- baseUrl,
688
- `CallSid=CA123&Direction=${direction}&CallStatus=in-progress&From=%2B15550001111&To=%2B15550002222`,
689
- { "x-twilio-signature": "sig" },
690
- );
691
-
692
- expect(response.status).toBe(200);
693
- expect(await response.text()).toContain("<Connect><Stream");
694
- expect(buildTwiMLPayload).toHaveBeenCalledTimes(1);
695
- expect(parseWebhookEvent).not.toHaveBeenCalled();
696
- expect(processEvent).not.toHaveBeenCalled();
697
- } finally {
698
- await server.stop();
699
- }
700
- },
701
- );
702
-
703
- it("serves initial provider TwiML before the realtime shortcut", async () => {
704
- const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
705
- const consumeInitialTwiML = vi.fn(
706
- () =>
707
- '<Response><Play digits="ww123456#" /><Redirect method="POST">https://example.test</Redirect></Response>',
708
- );
709
- const buildTwiMLPayload = vi.fn(() => ({
710
- statusCode: 200,
711
- headers: { "Content-Type": "text/xml" },
712
- body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
713
- }));
714
- const twilioProvider: VoiceCallProvider = {
715
- ...provider,
716
- name: "twilio",
717
- verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-stored" }),
718
- parseWebhookEvent,
719
- consumeInitialTwiML,
720
- };
721
- const { manager, processEvent } = createManager([]);
722
- const config = createConfig({
723
- provider: "twilio",
724
- inboundPolicy: "disabled",
725
- realtime: {
726
- enabled: true,
727
- streamPath: "/voice/stream/realtime",
728
- instructions: "Be helpful.",
729
- toolPolicy: "safe-read-only",
730
- tools: [],
731
- providers: {},
732
- },
733
- });
734
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
735
- server.setRealtimeHandler({
736
- buildTwiMLPayload,
737
- getStreamPathPattern: () => "/voice/stream/realtime",
738
- handleWebSocketUpgrade: () => {},
739
- registerToolHandler: () => {},
740
- setPublicUrl: () => {},
741
- } as unknown as RealtimeCallHandler);
742
-
743
- try {
744
- const baseUrl = await server.start();
745
- const requestUrl = requireBoundRequestUrl(server, baseUrl);
746
- requestUrl.searchParams.set("callId", "call-1");
747
- const response = await fetch(requestUrl.toString(), {
748
- method: "POST",
749
- headers: {
750
- "content-type": "application/x-www-form-urlencoded",
751
- "x-twilio-signature": "sig",
752
- },
753
- body: "CallSid=CA123&Direction=outbound-api&CallStatus=in-progress&From=%2B15550001111&To=%2B15550002222",
754
- });
755
-
756
- expect(response.status).toBe(200);
757
- const body = await response.text();
758
- expect(body).toContain('<Play digits="ww123456#"');
759
- expect(consumeInitialTwiML).toHaveBeenCalledTimes(1);
760
- expect(buildTwiMLPayload).not.toHaveBeenCalled();
761
- expect(parseWebhookEvent).not.toHaveBeenCalled();
762
- expect(processEvent).not.toHaveBeenCalled();
763
- } finally {
764
- await server.stop();
765
- }
766
- });
767
-
768
- it("rejects non-allowlisted inbound realtime calls before creating a stream token", async () => {
769
- const buildTwiMLPayload = vi.fn(() => ({
770
- statusCode: 200,
771
- headers: { "Content-Type": "text/xml" },
772
- body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
773
- }));
774
- const twilioProvider: VoiceCallProvider = {
775
- ...provider,
776
- name: "twilio",
777
- verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-deny" }),
778
- };
779
- const { manager } = createManager([]);
780
- const config = createConfig({
781
- provider: "twilio",
782
- inboundPolicy: "allowlist",
783
- allowFrom: ["+15550001111"],
784
- realtime: {
785
- enabled: true,
786
- streamPath: "/voice/stream/realtime",
787
- instructions: "Be helpful.",
788
- toolPolicy: "safe-read-only",
789
- tools: [],
790
- providers: {},
791
- },
792
- });
793
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
794
- server.setRealtimeHandler({
795
- buildTwiMLPayload,
796
- getStreamPathPattern: () => "/voice/stream/realtime",
797
- handleWebSocketUpgrade: () => {},
798
- registerToolHandler: () => {},
799
- setPublicUrl: () => {},
800
- } as unknown as RealtimeCallHandler);
801
-
802
- try {
803
- const baseUrl = await server.start();
804
- const response = await postWebhookFormWithHeaders(
805
- server,
806
- baseUrl,
807
- "CallSid=CA123&Direction=inbound&CallStatus=ringing&From=%2B15550002222",
808
- { "x-twilio-signature": "sig" },
809
- );
810
- const body = await response.text();
811
-
812
- expect(response.status).toBe(200);
813
- expect(body).toContain("<Reject");
814
- expect(buildTwiMLPayload).not.toHaveBeenCalled();
815
- } finally {
816
- await server.stop();
817
- }
818
- });
819
-
820
- it("creates a realtime stream only for allowlisted inbound callers", async () => {
821
- const buildTwiMLPayload = vi.fn(() => ({
822
- statusCode: 200,
823
- headers: { "Content-Type": "text/xml" },
824
- body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
825
- }));
826
- const twilioProvider: VoiceCallProvider = {
827
- ...provider,
828
- name: "twilio",
829
- verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-allow" }),
830
- };
831
- const { manager } = createManager([]);
832
- const config = createConfig({
833
- provider: "twilio",
834
- inboundPolicy: "allowlist",
835
- allowFrom: ["+15550002222"],
836
- realtime: {
837
- enabled: true,
838
- streamPath: "/voice/stream/realtime",
839
- instructions: "Be helpful.",
840
- toolPolicy: "safe-read-only",
841
- tools: [],
842
- providers: {},
843
- },
844
- });
845
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
846
- server.setRealtimeHandler({
847
- buildTwiMLPayload,
848
- getStreamPathPattern: () => "/voice/stream/realtime",
849
- handleWebSocketUpgrade: () => {},
850
- registerToolHandler: () => {},
851
- setPublicUrl: () => {},
852
- } as unknown as RealtimeCallHandler);
853
-
854
- try {
855
- const baseUrl = await server.start();
856
- const response = await postWebhookFormWithHeaders(
857
- server,
858
- baseUrl,
859
- "CallSid=CA123&Direction=inbound&CallStatus=ringing&From=%2B15550002222",
860
- { "x-twilio-signature": "sig" },
861
- );
862
- const body = await response.text();
863
-
864
- expect(response.status).toBe(200);
865
- expect(body).toContain("<Connect><Stream");
866
- expect(buildTwiMLPayload).toHaveBeenCalledTimes(1);
867
- } finally {
868
- await server.stop();
869
- }
870
- });
871
-
872
- it("passes verified request key from verifyWebhook into parseWebhookEvent", async () => {
873
- const parseWebhookEvent = vi.fn((_ctx: unknown, options?: { verifiedRequestKey?: string }) => ({
874
- events: [
875
- {
876
- id: "evt-verified",
877
- dedupeKey: options?.verifiedRequestKey,
878
- type: "call.speech" as const,
879
- callId: "call-1",
880
- providerCallId: "provider-call-1",
881
- timestamp: Date.now(),
882
- transcript: "hello",
883
- isFinal: true,
884
- },
885
- ],
886
- statusCode: 200,
887
- }));
888
- const verifiedProvider: VoiceCallProvider = {
889
- ...provider,
890
- verifyWebhook: () => ({ ok: true, verifiedRequestKey: "verified:req:123" }),
891
- parseWebhookEvent,
892
- };
893
- const { manager, processEvent } = createManager([]);
894
- const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
895
- const server = new VoiceCallWebhookServer(config, manager, verifiedProvider);
896
-
897
- try {
898
- const baseUrl = await server.start();
899
- const response = await postWebhookForm(server, baseUrl, "CallSid=CA123&SpeechResult=hello");
900
-
901
- expect(response.status).toBe(200);
902
- expect(parseWebhookEvent).toHaveBeenCalledTimes(1);
903
- const parseOptions = parseWebhookEvent.mock.calls[0]?.[1];
904
- if (!parseOptions) {
905
- throw new Error("webhook server did not pass verified parse options");
906
- }
907
- expect(parseOptions).toEqual({
908
- verifiedRequestKey: "verified:req:123",
909
- });
910
- expect(processEvent).toHaveBeenCalledTimes(1);
911
- const firstEvent = processEvent.mock.calls[0]?.[0];
912
- if (!firstEvent) {
913
- throw new Error("webhook server did not forward the parsed event");
914
- }
915
- expect(firstEvent.dedupeKey).toBe("verified:req:123");
916
- } finally {
917
- await server.stop();
918
- }
919
- });
920
-
921
- it("rejects requests when verification succeeds without a request key", async () => {
922
- const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
923
- const badProvider: VoiceCallProvider = {
924
- ...provider,
925
- verifyWebhook: () => ({ ok: true }),
926
- parseWebhookEvent,
927
- };
928
- const { manager } = createManager([]);
929
- const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
930
- const server = new VoiceCallWebhookServer(config, manager, badProvider);
931
-
932
- try {
933
- const baseUrl = await server.start();
934
- const response = await postWebhookForm(server, baseUrl, "CallSid=CA123&SpeechResult=hello");
935
-
936
- expect(response.status).toBe(401);
937
- expect(parseWebhookEvent).not.toHaveBeenCalled();
938
- } finally {
939
- await server.stop();
940
- }
941
- });
942
- });
943
-
944
- describe("VoiceCallWebhookServer pre-auth webhook guards", () => {
945
- it("rejects missing signature headers before reading the request body", async () => {
946
- const verifyWebhook = vi.fn(() => ({ ok: true, verifiedRequestKey: "twilio:req:test" }));
947
- const twilioProvider = createTwilioVerificationProvider({ verifyWebhook });
948
- const { manager } = createManager([]);
949
- const config = createConfig({ provider: "twilio" });
950
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
951
- const readBodySpy = vi.spyOn(
952
- server as unknown as {
953
- readBody: (req: unknown, maxBytes: number, timeoutMs?: number) => Promise<string>;
954
- },
955
- "readBody",
956
- );
957
-
958
- try {
959
- const baseUrl = await server.start();
960
- const response = await postWebhookForm(server, baseUrl, "CallSid=CA123&SpeechResult=hello");
961
-
962
- expect(response.status).toBe(401);
963
- expect(await response.text()).toBe("Unauthorized");
964
- expect(readBodySpy).not.toHaveBeenCalled();
965
- expect(verifyWebhook).not.toHaveBeenCalled();
966
- } finally {
967
- readBodySpy.mockRestore();
968
- await server.stop();
969
- }
970
- });
971
-
972
- it("uses the shared pre-auth body cap before verification", async () => {
973
- const verifyWebhook = vi.fn(() => ({ ok: true, verifiedRequestKey: "twilio:req:test" }));
974
- const twilioProvider = createTwilioVerificationProvider({ verifyWebhook });
975
- const { manager } = createManager([]);
976
- const config = createConfig({ provider: "twilio" });
977
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
978
-
979
- try {
980
- const baseUrl = await server.start();
981
- const responseOrError = await postWebhookFormWithHeadersResult(
982
- server,
983
- baseUrl,
984
- "CallSid=CA123&SpeechResult=".padEnd(70 * 1024, "a"),
985
- { "x-twilio-signature": "sig" },
986
- );
987
-
988
- if (responseOrError.kind === "response") {
989
- expect(responseOrError.statusCode).toBe(413);
990
- expect(responseOrError.body).toBe("Payload Too Large");
991
- } else {
992
- expect(responseOrError.code).toBeOneOf(["ECONNRESET", "EPIPE"]);
993
- }
994
- expect(verifyWebhook).not.toHaveBeenCalled();
995
- } finally {
996
- await server.stop();
997
- }
998
- });
999
-
1000
- it("limits concurrent pre-auth requests per source IP", async () => {
1001
- const twilioProvider: VoiceCallProvider = {
1002
- ...provider,
1003
- name: "twilio",
1004
- verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:test" }),
1005
- };
1006
- const { manager } = createManager([]);
1007
- const config = createConfig({ provider: "twilio" });
1008
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
1009
-
1010
- let enteredReads = 0;
1011
- let releaseReads!: () => void;
1012
- let unblockReadBodies!: () => void;
1013
- const enteredEightReads = new Promise<void>((resolve) => {
1014
- releaseReads = resolve;
1015
- });
1016
- const unblockReads = new Promise<void>((resolve) => {
1017
- unblockReadBodies = resolve;
1018
- });
1019
- const readBodySpy = vi.spyOn(
1020
- server as unknown as {
1021
- readBody: (req: unknown, maxBytes: number, timeoutMs?: number) => Promise<string>;
1022
- },
1023
- "readBody",
1024
- );
1025
- readBodySpy.mockImplementation(async () => {
1026
- enteredReads += 1;
1027
- if (enteredReads === 8) {
1028
- releaseReads();
1029
- }
1030
- if (enteredReads <= 8) {
1031
- await unblockReads;
1032
- }
1033
- return "CallSid=CA123&SpeechResult=hello";
1034
- });
1035
-
1036
- try {
1037
- const baseUrl = await server.start();
1038
- const headers = { "x-twilio-signature": "sig" };
1039
- const inFlightRequests = Array.from({ length: 8 }, () =>
1040
- postWebhookFormWithHeaders(server, baseUrl, "CallSid=CA123", headers),
1041
- );
1042
- await enteredEightReads;
1043
-
1044
- const rejected = await postWebhookFormWithHeaders(server, baseUrl, "CallSid=CA999", headers);
1045
- expect(rejected.status).toBe(429);
1046
- expect(await rejected.text()).toBe("Too Many Requests");
1047
-
1048
- unblockReadBodies();
1049
-
1050
- const settled = await Promise.all(inFlightRequests);
1051
- expect(settled.every((response) => response.status === 200)).toBe(true);
1052
- } finally {
1053
- unblockReadBodies();
1054
- readBodySpy.mockRestore();
1055
- await server.stop();
1056
- }
1057
- });
1058
-
1059
- it("limits missing remote addresses with a shared fallback bucket", async () => {
1060
- const twilioProvider: VoiceCallProvider = {
1061
- ...provider,
1062
- name: "twilio",
1063
- verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:test" }),
1064
- };
1065
- const { manager } = createManager([]);
1066
- const config = createConfig({ provider: "twilio" });
1067
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
1068
- const runWebhookPipeline = (
1069
- server as unknown as {
1070
- runWebhookPipeline: (
1071
- req: IncomingMessage,
1072
- webhookPath: string,
1073
- ) => Promise<{ statusCode: number; body: string }>;
1074
- }
1075
- ).runWebhookPipeline.bind(server);
1076
-
1077
- let enteredReads = 0;
1078
- let releaseReads!: () => void;
1079
- let unblockReadBodies!: () => void;
1080
- const enteredEightReads = new Promise<void>((resolve) => {
1081
- releaseReads = resolve;
1082
- });
1083
- const unblockReads = new Promise<void>((resolve) => {
1084
- unblockReadBodies = resolve;
1085
- });
1086
- const readBodySpy = vi.spyOn(
1087
- server as unknown as {
1088
- readBody: (req: unknown, maxBytes: number, timeoutMs?: number) => Promise<string>;
1089
- },
1090
- "readBody",
1091
- );
1092
- readBodySpy.mockImplementation(async () => {
1093
- enteredReads += 1;
1094
- if (enteredReads === 8) {
1095
- releaseReads();
1096
- }
1097
- await unblockReads;
1098
- return "CallSid=CA123&SpeechResult=hello";
1099
- });
1100
-
1101
- const makeRequestWithoutRemoteAddress = () =>
1102
- ({
1103
- method: "POST",
1104
- url: "/voice/webhook",
1105
- headers: { "x-twilio-signature": "sig" },
1106
- socket: { remoteAddress: undefined },
1107
- }) as unknown as IncomingMessage;
1108
-
1109
- try {
1110
- const inFlightRequests = Array.from({ length: 8 }, () =>
1111
- runWebhookPipeline(makeRequestWithoutRemoteAddress(), "/voice/webhook"),
1112
- );
1113
- await enteredEightReads;
1114
-
1115
- const rejected = await runWebhookPipeline(
1116
- makeRequestWithoutRemoteAddress(),
1117
- "/voice/webhook",
1118
- );
1119
- expect(rejected.statusCode).toBe(429);
1120
- expect(rejected.body).toBe("Too Many Requests");
1121
- expect(readBodySpy).toHaveBeenCalledTimes(8);
1122
-
1123
- unblockReadBodies();
1124
-
1125
- const settled = await Promise.all(inFlightRequests);
1126
- expect(settled.every((response) => response.statusCode === 200)).toBe(true);
1127
- } finally {
1128
- unblockReadBodies();
1129
- readBodySpy.mockRestore();
1130
- }
1131
- });
1132
- });
1133
-
1134
- describe("VoiceCallWebhookServer response normalization", () => {
1135
- it("preserves explicit empty provider response bodies", async () => {
1136
- const responseProvider: VoiceCallProvider = {
1137
- ...provider,
1138
- parseWebhookEvent: () => ({
1139
- events: [],
1140
- statusCode: 204,
1141
- providerResponseBody: "",
1142
- }),
1143
- };
1144
- const { manager } = createManager([]);
1145
- const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
1146
- const server = new VoiceCallWebhookServer(config, manager, responseProvider);
1147
-
1148
- try {
1149
- const baseUrl = await server.start();
1150
- const response = await postWebhookForm(server, baseUrl, "CallSid=CA123&SpeechResult=hello");
1151
-
1152
- expect(response.status).toBe(204);
1153
- expect(await response.text()).toBe("");
1154
- } finally {
1155
- await server.stop();
1156
- }
1157
- });
1158
- });
1159
-
1160
- describe("VoiceCallWebhookServer start idempotency", () => {
1161
- it("returns existing URL when start() is called twice without stop()", async () => {
1162
- const { manager } = createManager([]);
1163
- const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
1164
- const server = new VoiceCallWebhookServer(config, manager, provider);
1165
-
1166
- try {
1167
- const firstUrl = await server.start();
1168
- // Second call should return immediately without EADDRINUSE
1169
- const secondUrl = await server.start();
1170
-
1171
- // Dynamic port allocations should resolve to a real listening port.
1172
- expectWebhookUrl(firstUrl, "/voice/webhook");
1173
- // Idempotent re-start should return the same already-bound URL.
1174
- expect(secondUrl).toBe(firstUrl);
1175
- expectWebhookUrl(secondUrl, "/voice/webhook");
1176
- } finally {
1177
- await server.stop();
1178
- }
1179
- });
1180
-
1181
- it("supports concurrent start() calls without double-binding the port", async () => {
1182
- const { manager } = createManager([]);
1183
- const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
1184
- const server = new VoiceCallWebhookServer(config, manager, provider);
1185
-
1186
- try {
1187
- const [firstUrl, secondUrl] = await Promise.all([server.start(), server.start()]);
1188
-
1189
- expectWebhookUrl(firstUrl, "/voice/webhook");
1190
- expect(secondUrl).toBe(firstUrl);
1191
- } finally {
1192
- await server.stop();
1193
- }
1194
- });
1195
-
1196
- it("can start again after stop()", async () => {
1197
- const { manager } = createManager([]);
1198
- const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
1199
- const server = new VoiceCallWebhookServer(config, manager, provider);
1200
-
1201
- const firstUrl = await server.start();
1202
- expectWebhookUrl(firstUrl, "/voice/webhook");
1203
- await server.stop();
1204
-
1205
- // After stopping, a new start should succeed
1206
- const secondUrl = await server.start();
1207
- expectWebhookUrl(secondUrl, "/voice/webhook");
1208
- await server.stop();
1209
- });
1210
-
1211
- it("stop() is safe to call when server was never started", async () => {
1212
- const { manager } = createManager([]);
1213
- const config = createConfig();
1214
- const server = new VoiceCallWebhookServer(config, manager, provider);
1215
-
1216
- // Should not throw
1217
- await server.stop();
1218
- });
1219
- });
1220
-
1221
- describe("VoiceCallWebhookServer stream disconnect grace", () => {
1222
- beforeEach(() => {
1223
- vi.useFakeTimers();
1224
- });
1225
-
1226
- afterEach(() => {
1227
- vi.useRealTimers();
1228
- });
1229
-
1230
- it("ignores stale stream disconnects after reconnect and only hangs up on current stream disconnect", async () => {
1231
- const call = createCall(Date.now() - 1_000);
1232
- call.providerCallId = "CA-stream-1";
1233
-
1234
- const endCall = vi.fn(async () => ({ success: true }));
1235
- const speakInitialMessage = vi.fn(async () => {});
1236
- const getCallByProviderCallId = vi.fn((providerCallId: string) =>
1237
- providerCallId === "CA-stream-1" ? call : undefined,
1238
- );
1239
-
1240
- const manager = {
1241
- getActiveCalls: () => [call],
1242
- getCallByProviderCallId,
1243
- endCall,
1244
- speakInitialMessage,
1245
- processEvent: vi.fn(),
1246
- } as unknown as CallManager;
1247
-
1248
- let currentStreamSid: string | null = "MZ-old";
1249
- const twilioProvider = createTwilioStreamingProvider({
1250
- registerCallStream: (_callSid: string, streamSid: string) => {
1251
- currentStreamSid = streamSid;
1252
- },
1253
- unregisterCallStream: (_callSid: string, streamSid?: string) => {
1254
- if (!currentStreamSid) {
1255
- return;
1256
- }
1257
- if (streamSid && currentStreamSid !== streamSid) {
1258
- return;
1259
- }
1260
- currentStreamSid = null;
1261
- },
1262
- hasRegisteredStream: () => currentStreamSid !== null,
1263
- });
1264
-
1265
- const config = createConfig({
1266
- provider: "twilio",
1267
- streaming: {
1268
- ...createConfig().streaming,
1269
- enabled: true,
1270
- providers: {
1271
- openai: {
1272
- apiKey: "test-key", // pragma: allowlist secret
1273
- },
1274
- },
1275
- },
1276
- });
1277
- const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
1278
- await server.start();
1279
-
1280
- const mediaHandler = server.getMediaStreamHandler() as unknown as {
1281
- config: {
1282
- onDisconnect?: (providerCallId: string, streamSid: string) => void;
1283
- onConnect?: (providerCallId: string, streamSid: string) => void;
1284
- onTranscriptionReady?: (providerCallId: string, streamSid: string) => void;
1285
- };
1286
- };
1287
- if (!mediaHandler) {
1288
- throw new Error("expected webhook server to expose a media stream handler");
1289
- }
1290
-
1291
- mediaHandler.config.onDisconnect?.("CA-stream-1", "MZ-old");
1292
- await vi.advanceTimersByTimeAsync(1_000);
1293
- mediaHandler.config.onConnect?.("CA-stream-1", "MZ-new");
1294
- await vi.advanceTimersByTimeAsync(2_100);
1295
- expect(endCall).not.toHaveBeenCalled();
1296
- expect(speakInitialMessage).not.toHaveBeenCalled();
1297
-
1298
- mediaHandler.config.onTranscriptionReady?.("CA-stream-1", "MZ-new");
1299
- expect(speakInitialMessage).toHaveBeenCalledTimes(1);
1300
- expect(speakInitialMessage).toHaveBeenCalledWith("CA-stream-1");
1301
-
1302
- mediaHandler.config.onDisconnect?.("CA-stream-1", "MZ-new");
1303
- await vi.advanceTimersByTimeAsync(2_100);
1304
- expect(endCall).toHaveBeenCalledTimes(1);
1305
- expect(endCall).toHaveBeenCalledWith(call.callId);
1306
-
1307
- await server.stop();
1308
- });
1309
- });
1310
-
1311
- describe("VoiceCallWebhookServer barge-in suppression during initial message", () => {
1312
- const createTwilioProvider = (
1313
- clearTtsQueue: ReturnType<typeof vi.fn<TwilioProviderTestDouble["clearTtsQueue"]>>,
1314
- ) =>
1315
- createTwilioStreamingProvider({
1316
- clearTtsQueue,
1317
- });
1318
-
1319
- const getMediaCallbacks = (server: VoiceCallWebhookServer) =>
1320
- server.getMediaStreamHandler() as unknown as {
1321
- config: {
1322
- onSpeechStart?: (providerCallId: string) => void;
1323
- onTranscript?: (providerCallId: string, transcript: string) => void;
1324
- };
1325
- };
1326
-
1327
- it("suppresses barge-in clear while outbound conversation initial message is pending", async () => {
1328
- const call = createCall(Date.now() - 1_000);
1329
- call.callId = "call-barge";
1330
- call.providerCallId = "CA-barge";
1331
- call.direction = "outbound";
1332
- call.state = "speaking";
1333
- call.metadata = {
1334
- mode: "conversation",
1335
- initialMessage: "Hi, this is OpenClaw.",
1336
- };
1337
-
1338
- const clearTtsQueue = vi.fn<TwilioProviderTestDouble["clearTtsQueue"]>();
1339
- const processEvent = vi.fn((event: NormalizedEvent) => {
1340
- if (event.type === "call.speech") {
1341
- // Mirrors manager behavior: call.speech transitions to listening.
1342
- call.state = "listening";
1343
- }
1344
- });
1345
- const manager = {
1346
- getActiveCalls: () => [call],
1347
- getCallByProviderCallId: (providerCallId: string) =>
1348
- providerCallId === call.providerCallId ? call : undefined,
1349
- getCall: (callId: string) => (callId === call.callId ? call : undefined),
1350
- endCall: vi.fn(async () => ({ success: true })),
1351
- speakInitialMessage: vi.fn(async () => {}),
1352
- processEvent,
1353
- } as unknown as CallManager;
1354
-
1355
- const config = createConfig({
1356
- provider: "twilio",
1357
- streaming: {
1358
- ...createConfig().streaming,
1359
- enabled: true,
1360
- providers: {
1361
- openai: {
1362
- apiKey: "test-key", // pragma: allowlist secret
1363
- },
1364
- },
1365
- },
1366
- });
1367
- const server = new VoiceCallWebhookServer(config, manager, createTwilioProvider(clearTtsQueue));
1368
- await server.start();
1369
- const handleInboundResponse = vi.fn(async () => {});
1370
- (
1371
- server as unknown as {
1372
- handleInboundResponse: (
1373
- callId: string,
1374
- transcript: string,
1375
- timing?: unknown,
1376
- ) => Promise<void>;
1377
- }
1378
- ).handleInboundResponse = handleInboundResponse;
1379
-
1380
- try {
1381
- const media = getMediaCallbacks(server);
1382
- media.config.onSpeechStart?.("CA-barge");
1383
- media.config.onTranscript?.("CA-barge", "hello");
1384
- media.config.onSpeechStart?.("CA-barge");
1385
- media.config.onTranscript?.("CA-barge", "hello again");
1386
- expect(clearTtsQueue).not.toHaveBeenCalled();
1387
- expect(handleInboundResponse).not.toHaveBeenCalled();
1388
- expect(processEvent).not.toHaveBeenCalled();
1389
-
1390
- if (call.metadata) {
1391
- delete call.metadata.initialMessage;
1392
- }
1393
- call.state = "listening";
1394
-
1395
- media.config.onSpeechStart?.("CA-barge");
1396
- media.config.onTranscript?.("CA-barge", "hello after greeting");
1397
- expect(clearTtsQueue).toHaveBeenCalledTimes(2);
1398
- expect(handleInboundResponse).toHaveBeenCalledTimes(1);
1399
- expect(processEvent).toHaveBeenCalledTimes(1);
1400
- const [calledCallId, calledTranscript] = (handleInboundResponse.mock.calls[0] ??
1401
- []) as unknown as [string | undefined, string | undefined];
1402
- expect(calledCallId).toBe(call.callId);
1403
- expect(calledTranscript).toBe("hello after greeting");
1404
- } finally {
1405
- await server.stop();
1406
- }
1407
- });
1408
-
1409
- it("keeps barge-in clear enabled for inbound calls", async () => {
1410
- const call = createCall(Date.now() - 1_000);
1411
- call.callId = "call-inbound";
1412
- call.providerCallId = "CA-inbound";
1413
- call.direction = "inbound";
1414
- call.metadata = {
1415
- initialMessage: "Hello from inbound greeting.",
1416
- };
1417
-
1418
- const clearTtsQueue = vi.fn<TwilioProviderTestDouble["clearTtsQueue"]>();
1419
- const processEvent = vi.fn();
1420
- const manager = {
1421
- getActiveCalls: () => [call],
1422
- getCallByProviderCallId: (providerCallId: string) =>
1423
- providerCallId === call.providerCallId ? call : undefined,
1424
- getCall: (callId: string) => (callId === call.callId ? call : undefined),
1425
- endCall: vi.fn(async () => ({ success: true })),
1426
- speakInitialMessage: vi.fn(async () => {}),
1427
- processEvent,
1428
- } as unknown as CallManager;
1429
-
1430
- const config = createConfig({
1431
- provider: "twilio",
1432
- streaming: {
1433
- ...createConfig().streaming,
1434
- enabled: true,
1435
- providers: {
1436
- openai: {
1437
- apiKey: "test-key", // pragma: allowlist secret
1438
- },
1439
- },
1440
- },
1441
- });
1442
- const server = new VoiceCallWebhookServer(config, manager, createTwilioProvider(clearTtsQueue));
1443
- await server.start();
1444
- const handleInboundResponse = vi.fn(async () => {});
1445
- (
1446
- server as unknown as {
1447
- handleInboundResponse: (callId: string, transcript: string) => Promise<void>;
1448
- }
1449
- ).handleInboundResponse = handleInboundResponse;
1450
-
1451
- try {
1452
- const media = getMediaCallbacks(server);
1453
- media.config.onSpeechStart?.("CA-inbound");
1454
- media.config.onTranscript?.("CA-inbound", "hello");
1455
- expect(clearTtsQueue).toHaveBeenCalledTimes(2);
1456
- expect(processEvent).toHaveBeenCalledWith(
1457
- expect.objectContaining({
1458
- type: "call.speech",
1459
- callId: "call-inbound",
1460
- providerCallId: "CA-inbound",
1461
- transcript: "hello",
1462
- isFinal: true,
1463
- }),
1464
- );
1465
- expect(handleInboundResponse).toHaveBeenCalledWith("call-inbound", "hello");
1466
- } finally {
1467
- await server.stop();
1468
- }
1469
- });
1470
- });