@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +27 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +943 -0
  6. package/index.ts +379 -149
  7. package/openclaw.plugin.json +384 -157
  8. package/package.json +35 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +273 -12
  17. package/src/config.ts +355 -72
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +243 -19
  24. package/src/manager/events.ts +61 -31
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +528 -0
  28. package/src/manager/outbound.ts +163 -57
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +321 -0
  64. package/src/response-generator.ts +213 -53
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +429 -0
  68. package/src/runtime.ts +270 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +28 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +523 -102
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
@@ -1,9 +1,45 @@
1
+ import { request, type IncomingMessage } from "node:http";
2
+ import type { RealtimeTranscriptionProviderPlugin } from "openclaw/plugin-sdk/realtime-transcription";
1
3
  import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
- import { VoiceCallConfigSchema, type VoiceCallConfig } from "./config.js";
4
+ import {
5
+ VoiceCallConfigSchema,
6
+ type VoiceCallConfig,
7
+ type VoiceCallConfigInput,
8
+ } from "./config.js";
3
9
  import type { CallManager } from "./manager.js";
4
10
  import type { VoiceCallProvider } from "./providers/base.js";
5
- import type { CallRecord } from "./types.js";
11
+ import type { TwilioProvider } from "./providers/twilio.js";
12
+ import type { CallRecord, NormalizedEvent } from "./types.js";
6
13
  import { VoiceCallWebhookServer } from "./webhook.js";
14
+ import type { RealtimeCallHandler } from "./webhook/realtime-handler.js";
15
+
16
+ const mocks = vi.hoisted(() => {
17
+ const realtimeTranscriptionProvider: RealtimeTranscriptionProviderPlugin = {
18
+ id: "openai",
19
+ label: "OpenAI",
20
+ aliases: ["openai-realtime"],
21
+ isConfigured: () => true,
22
+ resolveConfig: ({ rawConfig }) => rawConfig,
23
+ createSession: () => ({
24
+ connect: async () => {},
25
+ sendAudio: () => {},
26
+ close: () => {},
27
+ isConnected: () => true,
28
+ }),
29
+ };
30
+
31
+ return {
32
+ getRealtimeTranscriptionProvider: vi.fn<(...args: unknown[]) => unknown>(
33
+ () => realtimeTranscriptionProvider,
34
+ ),
35
+ listRealtimeTranscriptionProviders: vi.fn(() => [realtimeTranscriptionProvider]),
36
+ };
37
+ });
38
+
39
+ vi.mock("./realtime-transcription.runtime.js", () => ({
40
+ getRealtimeTranscriptionProvider: mocks.getRealtimeTranscriptionProvider,
41
+ listRealtimeTranscriptionProviders: mocks.listRealtimeTranscriptionProviders,
42
+ }));
7
43
 
8
44
  const provider: VoiceCallProvider = {
9
45
  name: "mock",
@@ -17,18 +53,45 @@ const provider: VoiceCallProvider = {
17
53
  getCallStatus: async () => ({ status: "in-progress", isTerminal: false }),
18
54
  };
19
55
 
20
- const createConfig = (overrides: Partial<VoiceCallConfig> = {}): VoiceCallConfig => {
56
+ type TwilioProviderTestDouble = VoiceCallProvider &
57
+ Pick<
58
+ TwilioProvider,
59
+ | "isValidStreamToken"
60
+ | "registerCallStream"
61
+ | "unregisterCallStream"
62
+ | "hasRegisteredStream"
63
+ | "clearTtsQueue"
64
+ >;
65
+
66
+ const createConfig = (overrides: VoiceCallConfigInput = {}): VoiceCallConfig => {
21
67
  const base = VoiceCallConfigSchema.parse({});
22
68
  base.serve.port = 0;
23
69
 
24
- return {
70
+ const merged = {
25
71
  ...base,
26
72
  ...overrides,
27
73
  serve: {
28
74
  ...base.serve,
29
- ...(overrides.serve ?? {}),
75
+ ...overrides.serve,
76
+ },
77
+ realtime: {
78
+ ...base.realtime,
79
+ ...overrides.realtime,
80
+ tools: overrides.realtime?.tools ?? base.realtime.tools,
81
+ fastContext: {
82
+ ...base.realtime.fastContext,
83
+ ...overrides.realtime?.fastContext,
84
+ sources: overrides.realtime?.fastContext?.sources ?? base.realtime.fastContext.sources,
85
+ },
86
+ providers: overrides.realtime?.providers ?? base.realtime.providers,
30
87
  },
31
88
  };
89
+ const parsed = VoiceCallConfigSchema.parse({
90
+ ...merged,
91
+ serve: { ...merged.serve, port: merged.serve.port === 0 ? 1 : merged.serve.port },
92
+ });
93
+ parsed.serve.port = merged.serve.port;
94
+ return parsed;
32
95
  };
33
96
 
34
97
  const createCall = (startedAt: number): CallRecord => ({
@@ -56,15 +119,267 @@ const createManager = (calls: CallRecord[]) => {
56
119
  return { manager, endCall, processEvent };
57
120
  };
58
121
 
122
+ function hasPort(value: unknown): value is { port: number | string } {
123
+ if (!value || typeof value !== "object") {
124
+ return false;
125
+ }
126
+ const maybeAddress = value as { port?: unknown };
127
+ return typeof maybeAddress.port === "number" || typeof maybeAddress.port === "string";
128
+ }
129
+
130
+ function requireBoundRequestUrl(server: VoiceCallWebhookServer, baseUrl: string) {
131
+ const address = (
132
+ server as unknown as { server?: { address?: () => unknown } }
133
+ ).server?.address?.();
134
+ if (!hasPort(address) || !address.port) {
135
+ throw new Error("voice webhook server did not expose a bound port");
136
+ }
137
+ const requestUrl = new URL(baseUrl);
138
+ requestUrl.port = String(address.port);
139
+ return requestUrl;
140
+ }
141
+
142
+ function expectWebhookUrl(url: string, expectedPath: string) {
143
+ const parsed = new URL(url);
144
+ expect(parsed.pathname).toBe(expectedPath);
145
+ expect(parsed.port).not.toBe("");
146
+ expect(parsed.port).not.toBe("0");
147
+ }
148
+
149
+ function createTwilioVerificationProvider(
150
+ overrides: Partial<TwilioProviderTestDouble> = {},
151
+ ): VoiceCallProvider {
152
+ return {
153
+ ...provider,
154
+ name: "twilio",
155
+ verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:test" }),
156
+ ...overrides,
157
+ };
158
+ }
159
+
160
+ function createTwilioStreamingProvider(
161
+ overrides: Partial<TwilioProviderTestDouble> = {},
162
+ ): TwilioProviderTestDouble {
163
+ return {
164
+ ...createTwilioVerificationProvider({
165
+ parseWebhookEvent: () => ({ events: [] }),
166
+ initiateCall: async () => ({ providerCallId: "provider-call", status: "initiated" as const }),
167
+ hangupCall: async () => {},
168
+ playTts: async () => {},
169
+ startListening: async () => {},
170
+ stopListening: async () => {},
171
+ getCallStatus: async () => ({ status: "in-progress", isTerminal: false }),
172
+ }),
173
+ isValidStreamToken: () => true,
174
+ registerCallStream: () => {},
175
+ unregisterCallStream: () => {},
176
+ hasRegisteredStream: () => true,
177
+ clearTtsQueue: () => {},
178
+ ...overrides,
179
+ };
180
+ }
181
+
182
+ describe("VoiceCallWebhookServer realtime transcription provider selection", () => {
183
+ it("auto-selects the first registered provider when streaming.provider is unset", async () => {
184
+ const { manager } = createManager([]);
185
+ const config = createConfig({
186
+ streaming: {
187
+ ...createConfig().streaming,
188
+ enabled: true,
189
+ providers: {
190
+ openai: {
191
+ apiKey: "sk-test", // pragma: allowlist secret
192
+ },
193
+ },
194
+ },
195
+ });
196
+ const autoSelectedProvider: RealtimeTranscriptionProviderPlugin = {
197
+ id: "openai",
198
+ label: "OpenAI",
199
+ autoSelectOrder: 5,
200
+ isConfigured: () => true,
201
+ resolveConfig: ({ rawConfig }) => rawConfig,
202
+ createSession: () => ({
203
+ connect: async () => {},
204
+ sendAudio: () => {},
205
+ close: () => {},
206
+ isConnected: () => true,
207
+ }),
208
+ };
209
+ mocks.getRealtimeTranscriptionProvider.mockReturnValueOnce(undefined);
210
+ mocks.listRealtimeTranscriptionProviders.mockReturnValueOnce([autoSelectedProvider]);
211
+
212
+ const server = new VoiceCallWebhookServer(config, manager, provider);
213
+ try {
214
+ await server.start();
215
+ expect(mocks.getRealtimeTranscriptionProvider).not.toHaveBeenCalled();
216
+ expect(mocks.listRealtimeTranscriptionProviders).toHaveBeenCalledWith(null);
217
+ expect(server.getMediaStreamHandler()).toBeTruthy();
218
+ } finally {
219
+ await server.stop();
220
+ }
221
+ });
222
+ });
223
+
224
+ describe("VoiceCallWebhookServer media stream client IP resolution", () => {
225
+ type MediaStreamRequestDouble = {
226
+ headers: Record<string, string>;
227
+ socket: { remoteAddress?: string };
228
+ };
229
+
230
+ const resolveMediaStreamClientIp = (
231
+ configOverrides: Partial<VoiceCallConfig>,
232
+ requestOverrides: Partial<MediaStreamRequestDouble> = {},
233
+ ): string | undefined => {
234
+ const { manager } = createManager([]);
235
+ const server = new VoiceCallWebhookServer(
236
+ createConfig(configOverrides),
237
+ manager,
238
+ createTwilioStreamingProvider(),
239
+ );
240
+ const request = {
241
+ headers: {},
242
+ socket: { remoteAddress: "127.0.0.1" },
243
+ ...requestOverrides,
244
+ };
245
+
246
+ return (
247
+ server as unknown as {
248
+ resolveMediaStreamClientIp: (request: MediaStreamRequestDouble) => string | undefined;
249
+ }
250
+ ).resolveMediaStreamClientIp(request as never);
251
+ };
252
+
253
+ it("uses forwarded IPs only when forwarding trust is explicitly enabled", () => {
254
+ const ip = resolveMediaStreamClientIp(
255
+ {
256
+ webhookSecurity: {
257
+ allowedHosts: [],
258
+ trustForwardingHeaders: true,
259
+ trustedProxyIPs: ["127.0.0.1"],
260
+ },
261
+ },
262
+ {
263
+ headers: {
264
+ "x-forwarded-for": "198.51.100.10, 203.0.113.10",
265
+ },
266
+ },
267
+ );
268
+
269
+ expect(ip).toBe("203.0.113.10");
270
+ });
271
+
272
+ it("does not trust forwarded IPs when only allowedHosts is configured", () => {
273
+ const ip = resolveMediaStreamClientIp(
274
+ {
275
+ webhookSecurity: {
276
+ allowedHosts: ["voice.example.com"],
277
+ trustForwardingHeaders: false,
278
+ trustedProxyIPs: ["127.0.0.1"],
279
+ },
280
+ },
281
+ {
282
+ headers: {
283
+ "x-forwarded-for": "198.51.100.10",
284
+ "x-real-ip": "198.51.100.11",
285
+ },
286
+ },
287
+ );
288
+
289
+ expect(ip).toBe("127.0.0.1");
290
+ });
291
+
292
+ it("ignores spoofed forwarded IPs from untrusted remotes", () => {
293
+ const ip = resolveMediaStreamClientIp(
294
+ {
295
+ webhookSecurity: {
296
+ allowedHosts: [],
297
+ trustForwardingHeaders: true,
298
+ trustedProxyIPs: ["203.0.113.10"],
299
+ },
300
+ },
301
+ {
302
+ headers: {
303
+ "x-forwarded-for": "198.51.100.10",
304
+ },
305
+ socket: { remoteAddress: "127.0.0.1" },
306
+ },
307
+ );
308
+
309
+ expect(ip).toBe("127.0.0.1");
310
+ });
311
+
312
+ it("walks the forwarded chain from the right to support trusted multi-proxy deployments", () => {
313
+ const ip = resolveMediaStreamClientIp(
314
+ {
315
+ webhookSecurity: {
316
+ allowedHosts: [],
317
+ trustForwardingHeaders: true,
318
+ trustedProxyIPs: ["127.0.0.1", "203.0.113.10"],
319
+ },
320
+ },
321
+ {
322
+ headers: {
323
+ "x-forwarded-for": "198.51.100.10, 203.0.113.10",
324
+ },
325
+ },
326
+ );
327
+
328
+ expect(ip).toBe("198.51.100.10");
329
+ });
330
+
331
+ it("ignores forwarded IPs when no trusted proxy is configured", () => {
332
+ const ip = resolveMediaStreamClientIp(
333
+ {
334
+ webhookSecurity: {
335
+ allowedHosts: [],
336
+ trustForwardingHeaders: true,
337
+ trustedProxyIPs: [],
338
+ },
339
+ },
340
+ {
341
+ headers: {
342
+ "x-forwarded-for": "198.51.100.10",
343
+ "x-real-ip": "198.51.100.11",
344
+ },
345
+ socket: { remoteAddress: "127.0.0.1" },
346
+ },
347
+ );
348
+
349
+ expect(ip).toBe("127.0.0.1");
350
+ });
351
+
352
+ it("matches trusted proxies when the remote uses an IPv4-mapped form", () => {
353
+ const ip = resolveMediaStreamClientIp(
354
+ {
355
+ webhookSecurity: {
356
+ allowedHosts: [],
357
+ trustForwardingHeaders: true,
358
+ trustedProxyIPs: ["127.0.0.1", "203.0.113.10"],
359
+ },
360
+ },
361
+ {
362
+ headers: {
363
+ "x-forwarded-for": "198.51.100.10, 203.0.113.10",
364
+ },
365
+ socket: { remoteAddress: "::ffff:127.0.0.1" },
366
+ },
367
+ );
368
+
369
+ expect(ip).toBe("198.51.100.10");
370
+ });
371
+ });
372
+
59
373
  async function runStaleCallReaperCase(params: {
60
374
  callAgeMs: number;
61
375
  staleCallReaperSeconds: number;
62
376
  advanceMs: number;
377
+ callOverrides?: Partial<CallRecord>;
63
378
  }) {
64
379
  const now = new Date("2026-02-16T00:00:00Z");
65
380
  vi.setSystemTime(now);
66
381
 
67
- const call = createCall(now.getTime() - params.callAgeMs);
382
+ const call = { ...createCall(now.getTime() - params.callAgeMs), ...params.callOverrides };
68
383
  const { manager, endCall } = createManager([call]);
69
384
  const config = createConfig({ staleCallReaperSeconds: params.staleCallReaperSeconds });
70
385
  const server = new VoiceCallWebhookServer(config, manager, provider);
@@ -79,13 +394,7 @@ async function runStaleCallReaperCase(params: {
79
394
  }
80
395
 
81
396
  async function postWebhookForm(server: VoiceCallWebhookServer, baseUrl: string, body: string) {
82
- const address = (
83
- server as unknown as { server?: { address?: () => unknown } }
84
- ).server?.address?.();
85
- const requestUrl = new URL(baseUrl);
86
- if (address && typeof address === "object" && "port" in address && address.port) {
87
- requestUrl.port = String(address.port);
88
- }
397
+ const requestUrl = requireBoundRequestUrl(server, baseUrl);
89
398
  return await fetch(requestUrl.toString(), {
90
399
  method: "POST",
91
400
  headers: { "content-type": "application/x-www-form-urlencoded" },
@@ -93,6 +402,67 @@ async function postWebhookForm(server: VoiceCallWebhookServer, baseUrl: string,
93
402
  });
94
403
  }
95
404
 
405
+ async function postWebhookFormWithHeaders(
406
+ server: VoiceCallWebhookServer,
407
+ baseUrl: string,
408
+ body: string,
409
+ headers: Record<string, string>,
410
+ ) {
411
+ const requestUrl = requireBoundRequestUrl(server, baseUrl);
412
+ return await fetch(requestUrl.toString(), {
413
+ method: "POST",
414
+ headers: {
415
+ "content-type": "application/x-www-form-urlencoded",
416
+ ...headers,
417
+ },
418
+ body,
419
+ });
420
+ }
421
+
422
+ async function postWebhookFormWithHeadersResult(
423
+ server: VoiceCallWebhookServer,
424
+ baseUrl: string,
425
+ body: string,
426
+ headers: Record<string, string>,
427
+ ): Promise<
428
+ | { kind: "response"; statusCode: number; body: string }
429
+ | { kind: "error"; code: string | undefined }
430
+ > {
431
+ const requestUrl = requireBoundRequestUrl(server, baseUrl);
432
+ return await new Promise((resolve) => {
433
+ const req = request(
434
+ {
435
+ hostname: requestUrl.hostname,
436
+ port: requestUrl.port,
437
+ path: requestUrl.pathname + requestUrl.search,
438
+ method: "POST",
439
+ headers: {
440
+ "content-type": "application/x-www-form-urlencoded",
441
+ ...headers,
442
+ },
443
+ },
444
+ (res) => {
445
+ res.setEncoding("utf8");
446
+ let responseBody = "";
447
+ res.on("data", (chunk) => {
448
+ responseBody += chunk;
449
+ });
450
+ res.on("end", () => {
451
+ resolve({
452
+ kind: "response",
453
+ statusCode: res.statusCode ?? 0,
454
+ body: responseBody,
455
+ });
456
+ });
457
+ },
458
+ );
459
+ req.on("error", (error: NodeJS.ErrnoException) => {
460
+ resolve({ kind: "error", code: error.code });
461
+ });
462
+ req.end(body);
463
+ });
464
+ }
465
+
96
466
  describe("VoiceCallWebhookServer stale call reaper", () => {
97
467
  beforeEach(() => {
98
468
  vi.useFakeTimers();
@@ -137,6 +507,19 @@ describe("VoiceCallWebhookServer stale call reaper", () => {
137
507
  await server.stop();
138
508
  }
139
509
  });
510
+
511
+ it("does not reap calls that reached the answered state", async () => {
512
+ const { endCall } = await runStaleCallReaperCase({
513
+ callAgeMs: 120_000,
514
+ staleCallReaperSeconds: 60,
515
+ advanceMs: 30_000,
516
+ callOverrides: {
517
+ state: "answered",
518
+ answeredAt: new Date("2026-02-15T23:58:30Z").getTime(),
519
+ },
520
+ });
521
+ expect(endCall).not.toHaveBeenCalled();
522
+ });
140
523
  });
141
524
 
142
525
  describe("VoiceCallWebhookServer path matching", () => {
@@ -154,13 +537,7 @@ describe("VoiceCallWebhookServer path matching", () => {
154
537
 
155
538
  try {
156
539
  const baseUrl = await server.start();
157
- const address = (
158
- server as unknown as { server?: { address?: () => unknown } }
159
- ).server?.address?.();
160
- const requestUrl = new URL(baseUrl);
161
- if (address && typeof address === "object" && "port" in address && address.port) {
162
- requestUrl.port = String(address.port);
163
- }
540
+ const requestUrl = requireBoundRequestUrl(server, baseUrl);
164
541
  requestUrl.pathname = "/voice/webhook-evil";
165
542
 
166
543
  const response = await fetch(requestUrl.toString(), {
@@ -214,6 +591,284 @@ describe("VoiceCallWebhookServer replay handling", () => {
214
591
  }
215
592
  });
216
593
 
594
+ it("returns realtime TwiML for replayed inbound twilio webhooks", async () => {
595
+ const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
596
+ const twilioProvider: VoiceCallProvider = {
597
+ ...provider,
598
+ name: "twilio",
599
+ verifyWebhook: () => ({ ok: true, isReplay: true, verifiedRequestKey: "twilio:req:replay" }),
600
+ parseWebhookEvent,
601
+ };
602
+ const { manager, processEvent } = createManager([]);
603
+ const config = createConfig({
604
+ provider: "twilio",
605
+ inboundPolicy: "open",
606
+ realtime: {
607
+ enabled: true,
608
+ streamPath: "/voice/stream/realtime",
609
+ instructions: "Be helpful.",
610
+ toolPolicy: "safe-read-only",
611
+ tools: [],
612
+ providers: {},
613
+ },
614
+ });
615
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
616
+ server.setRealtimeHandler({
617
+ buildTwiMLPayload: () => ({
618
+ statusCode: 200,
619
+ headers: { "Content-Type": "text/xml" },
620
+ body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
621
+ }),
622
+ getStreamPathPattern: () => "/voice/stream/realtime",
623
+ handleWebSocketUpgrade: () => {},
624
+ registerToolHandler: () => {},
625
+ setPublicUrl: () => {},
626
+ } as unknown as RealtimeCallHandler);
627
+
628
+ try {
629
+ const baseUrl = await server.start();
630
+ const response = await postWebhookFormWithHeaders(
631
+ server,
632
+ baseUrl,
633
+ "CallSid=CA123&Direction=inbound&CallStatus=ringing",
634
+ { "x-twilio-signature": "sig" },
635
+ );
636
+
637
+ expect(response.status).toBe(200);
638
+ expect(await response.text()).toContain("<Connect><Stream");
639
+ expect(parseWebhookEvent).not.toHaveBeenCalled();
640
+ expect(processEvent).not.toHaveBeenCalled();
641
+ } finally {
642
+ await server.stop();
643
+ }
644
+ });
645
+
646
+ it.each(["outbound-api", "outbound-dial"] as const)(
647
+ "returns realtime TwiML for %s twilio TwiML fetches",
648
+ async (direction) => {
649
+ const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
650
+ const buildTwiMLPayload = vi.fn(() => ({
651
+ statusCode: 200,
652
+ headers: { "Content-Type": "text/xml" },
653
+ body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
654
+ }));
655
+ const twilioProvider: VoiceCallProvider = {
656
+ ...provider,
657
+ name: "twilio",
658
+ verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-outbound" }),
659
+ parseWebhookEvent,
660
+ };
661
+ const { manager, processEvent } = createManager([]);
662
+ const config = createConfig({
663
+ provider: "twilio",
664
+ inboundPolicy: "disabled",
665
+ realtime: {
666
+ enabled: true,
667
+ streamPath: "/voice/stream/realtime",
668
+ instructions: "Be helpful.",
669
+ toolPolicy: "safe-read-only",
670
+ tools: [],
671
+ providers: {},
672
+ },
673
+ });
674
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
675
+ server.setRealtimeHandler({
676
+ buildTwiMLPayload,
677
+ getStreamPathPattern: () => "/voice/stream/realtime",
678
+ handleWebSocketUpgrade: () => {},
679
+ registerToolHandler: () => {},
680
+ setPublicUrl: () => {},
681
+ } as unknown as RealtimeCallHandler);
682
+
683
+ try {
684
+ const baseUrl = await server.start();
685
+ const response = await postWebhookFormWithHeaders(
686
+ server,
687
+ baseUrl,
688
+ `CallSid=CA123&Direction=${direction}&CallStatus=in-progress&From=%2B15550001111&To=%2B15550002222`,
689
+ { "x-twilio-signature": "sig" },
690
+ );
691
+
692
+ expect(response.status).toBe(200);
693
+ expect(await response.text()).toContain("<Connect><Stream");
694
+ expect(buildTwiMLPayload).toHaveBeenCalledTimes(1);
695
+ expect(parseWebhookEvent).not.toHaveBeenCalled();
696
+ expect(processEvent).not.toHaveBeenCalled();
697
+ } finally {
698
+ await server.stop();
699
+ }
700
+ },
701
+ );
702
+
703
+ it("serves initial provider TwiML before the realtime shortcut", async () => {
704
+ const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
705
+ const consumeInitialTwiML = vi.fn(
706
+ () =>
707
+ '<Response><Play digits="ww123456#" /><Redirect method="POST">https://example.test</Redirect></Response>',
708
+ );
709
+ const buildTwiMLPayload = vi.fn(() => ({
710
+ statusCode: 200,
711
+ headers: { "Content-Type": "text/xml" },
712
+ body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
713
+ }));
714
+ const twilioProvider: VoiceCallProvider = {
715
+ ...provider,
716
+ name: "twilio",
717
+ verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-stored" }),
718
+ parseWebhookEvent,
719
+ consumeInitialTwiML,
720
+ };
721
+ const { manager, processEvent } = createManager([]);
722
+ const config = createConfig({
723
+ provider: "twilio",
724
+ inboundPolicy: "disabled",
725
+ realtime: {
726
+ enabled: true,
727
+ streamPath: "/voice/stream/realtime",
728
+ instructions: "Be helpful.",
729
+ toolPolicy: "safe-read-only",
730
+ tools: [],
731
+ providers: {},
732
+ },
733
+ });
734
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
735
+ server.setRealtimeHandler({
736
+ buildTwiMLPayload,
737
+ getStreamPathPattern: () => "/voice/stream/realtime",
738
+ handleWebSocketUpgrade: () => {},
739
+ registerToolHandler: () => {},
740
+ setPublicUrl: () => {},
741
+ } as unknown as RealtimeCallHandler);
742
+
743
+ try {
744
+ const baseUrl = await server.start();
745
+ const requestUrl = requireBoundRequestUrl(server, baseUrl);
746
+ requestUrl.searchParams.set("callId", "call-1");
747
+ const response = await fetch(requestUrl.toString(), {
748
+ method: "POST",
749
+ headers: {
750
+ "content-type": "application/x-www-form-urlencoded",
751
+ "x-twilio-signature": "sig",
752
+ },
753
+ body: "CallSid=CA123&Direction=outbound-api&CallStatus=in-progress&From=%2B15550001111&To=%2B15550002222",
754
+ });
755
+
756
+ expect(response.status).toBe(200);
757
+ const body = await response.text();
758
+ expect(body).toContain('<Play digits="ww123456#"');
759
+ expect(consumeInitialTwiML).toHaveBeenCalledTimes(1);
760
+ expect(buildTwiMLPayload).not.toHaveBeenCalled();
761
+ expect(parseWebhookEvent).not.toHaveBeenCalled();
762
+ expect(processEvent).not.toHaveBeenCalled();
763
+ } finally {
764
+ await server.stop();
765
+ }
766
+ });
767
+
768
+ it("rejects non-allowlisted inbound realtime calls before creating a stream token", async () => {
769
+ const buildTwiMLPayload = vi.fn(() => ({
770
+ statusCode: 200,
771
+ headers: { "Content-Type": "text/xml" },
772
+ body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
773
+ }));
774
+ const twilioProvider: VoiceCallProvider = {
775
+ ...provider,
776
+ name: "twilio",
777
+ verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-deny" }),
778
+ };
779
+ const { manager } = createManager([]);
780
+ const config = createConfig({
781
+ provider: "twilio",
782
+ inboundPolicy: "allowlist",
783
+ allowFrom: ["+15550001111"],
784
+ realtime: {
785
+ enabled: true,
786
+ streamPath: "/voice/stream/realtime",
787
+ instructions: "Be helpful.",
788
+ toolPolicy: "safe-read-only",
789
+ tools: [],
790
+ providers: {},
791
+ },
792
+ });
793
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
794
+ server.setRealtimeHandler({
795
+ buildTwiMLPayload,
796
+ getStreamPathPattern: () => "/voice/stream/realtime",
797
+ handleWebSocketUpgrade: () => {},
798
+ registerToolHandler: () => {},
799
+ setPublicUrl: () => {},
800
+ } as unknown as RealtimeCallHandler);
801
+
802
+ try {
803
+ const baseUrl = await server.start();
804
+ const response = await postWebhookFormWithHeaders(
805
+ server,
806
+ baseUrl,
807
+ "CallSid=CA123&Direction=inbound&CallStatus=ringing&From=%2B15550002222",
808
+ { "x-twilio-signature": "sig" },
809
+ );
810
+ const body = await response.text();
811
+
812
+ expect(response.status).toBe(200);
813
+ expect(body).toContain("<Reject");
814
+ expect(buildTwiMLPayload).not.toHaveBeenCalled();
815
+ } finally {
816
+ await server.stop();
817
+ }
818
+ });
819
+
820
+ it("creates a realtime stream only for allowlisted inbound callers", async () => {
821
+ const buildTwiMLPayload = vi.fn(() => ({
822
+ statusCode: 200,
823
+ headers: { "Content-Type": "text/xml" },
824
+ body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
825
+ }));
826
+ const twilioProvider: VoiceCallProvider = {
827
+ ...provider,
828
+ name: "twilio",
829
+ verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-allow" }),
830
+ };
831
+ const { manager } = createManager([]);
832
+ const config = createConfig({
833
+ provider: "twilio",
834
+ inboundPolicy: "allowlist",
835
+ allowFrom: ["+15550002222"],
836
+ realtime: {
837
+ enabled: true,
838
+ streamPath: "/voice/stream/realtime",
839
+ instructions: "Be helpful.",
840
+ toolPolicy: "safe-read-only",
841
+ tools: [],
842
+ providers: {},
843
+ },
844
+ });
845
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
846
+ server.setRealtimeHandler({
847
+ buildTwiMLPayload,
848
+ getStreamPathPattern: () => "/voice/stream/realtime",
849
+ handleWebSocketUpgrade: () => {},
850
+ registerToolHandler: () => {},
851
+ setPublicUrl: () => {},
852
+ } as unknown as RealtimeCallHandler);
853
+
854
+ try {
855
+ const baseUrl = await server.start();
856
+ const response = await postWebhookFormWithHeaders(
857
+ server,
858
+ baseUrl,
859
+ "CallSid=CA123&Direction=inbound&CallStatus=ringing&From=%2B15550002222",
860
+ { "x-twilio-signature": "sig" },
861
+ );
862
+ const body = await response.text();
863
+
864
+ expect(response.status).toBe(200);
865
+ expect(body).toContain("<Connect><Stream");
866
+ expect(buildTwiMLPayload).toHaveBeenCalledTimes(1);
867
+ } finally {
868
+ await server.stop();
869
+ }
870
+ });
871
+
217
872
  it("passes verified request key from verifyWebhook into parseWebhookEvent", async () => {
218
873
  const parseWebhookEvent = vi.fn((_ctx: unknown, options?: { verifiedRequestKey?: string }) => ({
219
874
  events: [
@@ -245,11 +900,19 @@ describe("VoiceCallWebhookServer replay handling", () => {
245
900
 
246
901
  expect(response.status).toBe(200);
247
902
  expect(parseWebhookEvent).toHaveBeenCalledTimes(1);
248
- expect(parseWebhookEvent.mock.calls[0]?.[1]).toEqual({
903
+ const parseOptions = parseWebhookEvent.mock.calls[0]?.[1];
904
+ if (!parseOptions) {
905
+ throw new Error("webhook server did not pass verified parse options");
906
+ }
907
+ expect(parseOptions).toEqual({
249
908
  verifiedRequestKey: "verified:req:123",
250
909
  });
251
910
  expect(processEvent).toHaveBeenCalledTimes(1);
252
- expect(processEvent.mock.calls[0]?.[0]?.dedupeKey).toBe("verified:req:123");
911
+ const firstEvent = processEvent.mock.calls[0]?.[0];
912
+ if (!firstEvent) {
913
+ throw new Error("webhook server did not forward the parsed event");
914
+ }
915
+ expect(firstEvent.dedupeKey).toBe("verified:req:123");
253
916
  } finally {
254
917
  await server.stop();
255
918
  }
@@ -278,6 +941,196 @@ describe("VoiceCallWebhookServer replay handling", () => {
278
941
  });
279
942
  });
280
943
 
944
+ describe("VoiceCallWebhookServer pre-auth webhook guards", () => {
945
+ it("rejects missing signature headers before reading the request body", async () => {
946
+ const verifyWebhook = vi.fn(() => ({ ok: true, verifiedRequestKey: "twilio:req:test" }));
947
+ const twilioProvider = createTwilioVerificationProvider({ verifyWebhook });
948
+ const { manager } = createManager([]);
949
+ const config = createConfig({ provider: "twilio" });
950
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
951
+ const readBodySpy = vi.spyOn(
952
+ server as unknown as {
953
+ readBody: (req: unknown, maxBytes: number, timeoutMs?: number) => Promise<string>;
954
+ },
955
+ "readBody",
956
+ );
957
+
958
+ try {
959
+ const baseUrl = await server.start();
960
+ const response = await postWebhookForm(server, baseUrl, "CallSid=CA123&SpeechResult=hello");
961
+
962
+ expect(response.status).toBe(401);
963
+ expect(await response.text()).toBe("Unauthorized");
964
+ expect(readBodySpy).not.toHaveBeenCalled();
965
+ expect(verifyWebhook).not.toHaveBeenCalled();
966
+ } finally {
967
+ readBodySpy.mockRestore();
968
+ await server.stop();
969
+ }
970
+ });
971
+
972
+ it("uses the shared pre-auth body cap before verification", async () => {
973
+ const verifyWebhook = vi.fn(() => ({ ok: true, verifiedRequestKey: "twilio:req:test" }));
974
+ const twilioProvider = createTwilioVerificationProvider({ verifyWebhook });
975
+ const { manager } = createManager([]);
976
+ const config = createConfig({ provider: "twilio" });
977
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
978
+
979
+ try {
980
+ const baseUrl = await server.start();
981
+ const responseOrError = await postWebhookFormWithHeadersResult(
982
+ server,
983
+ baseUrl,
984
+ "CallSid=CA123&SpeechResult=".padEnd(70 * 1024, "a"),
985
+ { "x-twilio-signature": "sig" },
986
+ );
987
+
988
+ if (responseOrError.kind === "response") {
989
+ expect(responseOrError.statusCode).toBe(413);
990
+ expect(responseOrError.body).toBe("Payload Too Large");
991
+ } else {
992
+ expect(responseOrError.code).toBeOneOf(["ECONNRESET", "EPIPE"]);
993
+ }
994
+ expect(verifyWebhook).not.toHaveBeenCalled();
995
+ } finally {
996
+ await server.stop();
997
+ }
998
+ });
999
+
1000
+ it("limits concurrent pre-auth requests per source IP", async () => {
1001
+ const twilioProvider: VoiceCallProvider = {
1002
+ ...provider,
1003
+ name: "twilio",
1004
+ verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:test" }),
1005
+ };
1006
+ const { manager } = createManager([]);
1007
+ const config = createConfig({ provider: "twilio" });
1008
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
1009
+
1010
+ let enteredReads = 0;
1011
+ let releaseReads!: () => void;
1012
+ let unblockReadBodies!: () => void;
1013
+ const enteredEightReads = new Promise<void>((resolve) => {
1014
+ releaseReads = resolve;
1015
+ });
1016
+ const unblockReads = new Promise<void>((resolve) => {
1017
+ unblockReadBodies = resolve;
1018
+ });
1019
+ const readBodySpy = vi.spyOn(
1020
+ server as unknown as {
1021
+ readBody: (req: unknown, maxBytes: number, timeoutMs?: number) => Promise<string>;
1022
+ },
1023
+ "readBody",
1024
+ );
1025
+ readBodySpy.mockImplementation(async () => {
1026
+ enteredReads += 1;
1027
+ if (enteredReads === 8) {
1028
+ releaseReads();
1029
+ }
1030
+ if (enteredReads <= 8) {
1031
+ await unblockReads;
1032
+ }
1033
+ return "CallSid=CA123&SpeechResult=hello";
1034
+ });
1035
+
1036
+ try {
1037
+ const baseUrl = await server.start();
1038
+ const headers = { "x-twilio-signature": "sig" };
1039
+ const inFlightRequests = Array.from({ length: 8 }, () =>
1040
+ postWebhookFormWithHeaders(server, baseUrl, "CallSid=CA123", headers),
1041
+ );
1042
+ await enteredEightReads;
1043
+
1044
+ const rejected = await postWebhookFormWithHeaders(server, baseUrl, "CallSid=CA999", headers);
1045
+ expect(rejected.status).toBe(429);
1046
+ expect(await rejected.text()).toBe("Too Many Requests");
1047
+
1048
+ unblockReadBodies();
1049
+
1050
+ const settled = await Promise.all(inFlightRequests);
1051
+ expect(settled.every((response) => response.status === 200)).toBe(true);
1052
+ } finally {
1053
+ unblockReadBodies();
1054
+ readBodySpy.mockRestore();
1055
+ await server.stop();
1056
+ }
1057
+ });
1058
+
1059
+ it("limits missing remote addresses with a shared fallback bucket", async () => {
1060
+ const twilioProvider: VoiceCallProvider = {
1061
+ ...provider,
1062
+ name: "twilio",
1063
+ verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:test" }),
1064
+ };
1065
+ const { manager } = createManager([]);
1066
+ const config = createConfig({ provider: "twilio" });
1067
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
1068
+ const runWebhookPipeline = (
1069
+ server as unknown as {
1070
+ runWebhookPipeline: (
1071
+ req: IncomingMessage,
1072
+ webhookPath: string,
1073
+ ) => Promise<{ statusCode: number; body: string }>;
1074
+ }
1075
+ ).runWebhookPipeline.bind(server);
1076
+
1077
+ let enteredReads = 0;
1078
+ let releaseReads!: () => void;
1079
+ let unblockReadBodies!: () => void;
1080
+ const enteredEightReads = new Promise<void>((resolve) => {
1081
+ releaseReads = resolve;
1082
+ });
1083
+ const unblockReads = new Promise<void>((resolve) => {
1084
+ unblockReadBodies = resolve;
1085
+ });
1086
+ const readBodySpy = vi.spyOn(
1087
+ server as unknown as {
1088
+ readBody: (req: unknown, maxBytes: number, timeoutMs?: number) => Promise<string>;
1089
+ },
1090
+ "readBody",
1091
+ );
1092
+ readBodySpy.mockImplementation(async () => {
1093
+ enteredReads += 1;
1094
+ if (enteredReads === 8) {
1095
+ releaseReads();
1096
+ }
1097
+ await unblockReads;
1098
+ return "CallSid=CA123&SpeechResult=hello";
1099
+ });
1100
+
1101
+ const makeRequestWithoutRemoteAddress = () =>
1102
+ ({
1103
+ method: "POST",
1104
+ url: "/voice/webhook",
1105
+ headers: { "x-twilio-signature": "sig" },
1106
+ socket: { remoteAddress: undefined },
1107
+ }) as unknown as IncomingMessage;
1108
+
1109
+ try {
1110
+ const inFlightRequests = Array.from({ length: 8 }, () =>
1111
+ runWebhookPipeline(makeRequestWithoutRemoteAddress(), "/voice/webhook"),
1112
+ );
1113
+ await enteredEightReads;
1114
+
1115
+ const rejected = await runWebhookPipeline(
1116
+ makeRequestWithoutRemoteAddress(),
1117
+ "/voice/webhook",
1118
+ );
1119
+ expect(rejected.statusCode).toBe(429);
1120
+ expect(rejected.body).toBe("Too Many Requests");
1121
+ expect(readBodySpy).toHaveBeenCalledTimes(8);
1122
+
1123
+ unblockReadBodies();
1124
+
1125
+ const settled = await Promise.all(inFlightRequests);
1126
+ expect(settled.every((response) => response.statusCode === 200)).toBe(true);
1127
+ } finally {
1128
+ unblockReadBodies();
1129
+ readBodySpy.mockRestore();
1130
+ }
1131
+ });
1132
+ });
1133
+
281
1134
  describe("VoiceCallWebhookServer response normalization", () => {
282
1135
  it("preserves explicit empty provider response bodies", async () => {
283
1136
  const responseProvider: VoiceCallProvider = {
@@ -316,11 +1169,25 @@ describe("VoiceCallWebhookServer start idempotency", () => {
316
1169
  const secondUrl = await server.start();
317
1170
 
318
1171
  // Dynamic port allocations should resolve to a real listening port.
319
- expect(firstUrl).toContain("/voice/webhook");
320
- expect(firstUrl).not.toContain(":0/");
1172
+ expectWebhookUrl(firstUrl, "/voice/webhook");
321
1173
  // Idempotent re-start should return the same already-bound URL.
322
1174
  expect(secondUrl).toBe(firstUrl);
323
- expect(secondUrl).toContain("/voice/webhook");
1175
+ expectWebhookUrl(secondUrl, "/voice/webhook");
1176
+ } finally {
1177
+ await server.stop();
1178
+ }
1179
+ });
1180
+
1181
+ it("supports concurrent start() calls without double-binding the port", async () => {
1182
+ const { manager } = createManager([]);
1183
+ const config = createConfig({ serve: { port: 0, bind: "127.0.0.1", path: "/voice/webhook" } });
1184
+ const server = new VoiceCallWebhookServer(config, manager, provider);
1185
+
1186
+ try {
1187
+ const [firstUrl, secondUrl] = await Promise.all([server.start(), server.start()]);
1188
+
1189
+ expectWebhookUrl(firstUrl, "/voice/webhook");
1190
+ expect(secondUrl).toBe(firstUrl);
324
1191
  } finally {
325
1192
  await server.stop();
326
1193
  }
@@ -332,12 +1199,12 @@ describe("VoiceCallWebhookServer start idempotency", () => {
332
1199
  const server = new VoiceCallWebhookServer(config, manager, provider);
333
1200
 
334
1201
  const firstUrl = await server.start();
335
- expect(firstUrl).toContain("/voice/webhook");
1202
+ expectWebhookUrl(firstUrl, "/voice/webhook");
336
1203
  await server.stop();
337
1204
 
338
1205
  // After stopping, a new start should succeed
339
1206
  const secondUrl = await server.start();
340
- expect(secondUrl).toContain("/voice/webhook");
1207
+ expectWebhookUrl(secondUrl, "/voice/webhook");
341
1208
  await server.stop();
342
1209
  });
343
1210
 
@@ -350,3 +1217,254 @@ describe("VoiceCallWebhookServer start idempotency", () => {
350
1217
  await server.stop();
351
1218
  });
352
1219
  });
1220
+
1221
+ describe("VoiceCallWebhookServer stream disconnect grace", () => {
1222
+ beforeEach(() => {
1223
+ vi.useFakeTimers();
1224
+ });
1225
+
1226
+ afterEach(() => {
1227
+ vi.useRealTimers();
1228
+ });
1229
+
1230
+ it("ignores stale stream disconnects after reconnect and only hangs up on current stream disconnect", async () => {
1231
+ const call = createCall(Date.now() - 1_000);
1232
+ call.providerCallId = "CA-stream-1";
1233
+
1234
+ const endCall = vi.fn(async () => ({ success: true }));
1235
+ const speakInitialMessage = vi.fn(async () => {});
1236
+ const getCallByProviderCallId = vi.fn((providerCallId: string) =>
1237
+ providerCallId === "CA-stream-1" ? call : undefined,
1238
+ );
1239
+
1240
+ const manager = {
1241
+ getActiveCalls: () => [call],
1242
+ getCallByProviderCallId,
1243
+ endCall,
1244
+ speakInitialMessage,
1245
+ processEvent: vi.fn(),
1246
+ } as unknown as CallManager;
1247
+
1248
+ let currentStreamSid: string | null = "MZ-old";
1249
+ const twilioProvider = createTwilioStreamingProvider({
1250
+ registerCallStream: (_callSid: string, streamSid: string) => {
1251
+ currentStreamSid = streamSid;
1252
+ },
1253
+ unregisterCallStream: (_callSid: string, streamSid?: string) => {
1254
+ if (!currentStreamSid) {
1255
+ return;
1256
+ }
1257
+ if (streamSid && currentStreamSid !== streamSid) {
1258
+ return;
1259
+ }
1260
+ currentStreamSid = null;
1261
+ },
1262
+ hasRegisteredStream: () => currentStreamSid !== null,
1263
+ });
1264
+
1265
+ const config = createConfig({
1266
+ provider: "twilio",
1267
+ streaming: {
1268
+ ...createConfig().streaming,
1269
+ enabled: true,
1270
+ providers: {
1271
+ openai: {
1272
+ apiKey: "test-key", // pragma: allowlist secret
1273
+ },
1274
+ },
1275
+ },
1276
+ });
1277
+ const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
1278
+ await server.start();
1279
+
1280
+ const mediaHandler = server.getMediaStreamHandler() as unknown as {
1281
+ config: {
1282
+ onDisconnect?: (providerCallId: string, streamSid: string) => void;
1283
+ onConnect?: (providerCallId: string, streamSid: string) => void;
1284
+ onTranscriptionReady?: (providerCallId: string, streamSid: string) => void;
1285
+ };
1286
+ };
1287
+ if (!mediaHandler) {
1288
+ throw new Error("expected webhook server to expose a media stream handler");
1289
+ }
1290
+
1291
+ mediaHandler.config.onDisconnect?.("CA-stream-1", "MZ-old");
1292
+ await vi.advanceTimersByTimeAsync(1_000);
1293
+ mediaHandler.config.onConnect?.("CA-stream-1", "MZ-new");
1294
+ await vi.advanceTimersByTimeAsync(2_100);
1295
+ expect(endCall).not.toHaveBeenCalled();
1296
+ expect(speakInitialMessage).not.toHaveBeenCalled();
1297
+
1298
+ mediaHandler.config.onTranscriptionReady?.("CA-stream-1", "MZ-new");
1299
+ expect(speakInitialMessage).toHaveBeenCalledTimes(1);
1300
+ expect(speakInitialMessage).toHaveBeenCalledWith("CA-stream-1");
1301
+
1302
+ mediaHandler.config.onDisconnect?.("CA-stream-1", "MZ-new");
1303
+ await vi.advanceTimersByTimeAsync(2_100);
1304
+ expect(endCall).toHaveBeenCalledTimes(1);
1305
+ expect(endCall).toHaveBeenCalledWith(call.callId);
1306
+
1307
+ await server.stop();
1308
+ });
1309
+ });
1310
+
1311
+ describe("VoiceCallWebhookServer barge-in suppression during initial message", () => {
1312
+ const createTwilioProvider = (
1313
+ clearTtsQueue: ReturnType<typeof vi.fn<TwilioProviderTestDouble["clearTtsQueue"]>>,
1314
+ ) =>
1315
+ createTwilioStreamingProvider({
1316
+ clearTtsQueue,
1317
+ });
1318
+
1319
+ const getMediaCallbacks = (server: VoiceCallWebhookServer) =>
1320
+ server.getMediaStreamHandler() as unknown as {
1321
+ config: {
1322
+ onSpeechStart?: (providerCallId: string) => void;
1323
+ onTranscript?: (providerCallId: string, transcript: string) => void;
1324
+ };
1325
+ };
1326
+
1327
+ it("suppresses barge-in clear while outbound conversation initial message is pending", async () => {
1328
+ const call = createCall(Date.now() - 1_000);
1329
+ call.callId = "call-barge";
1330
+ call.providerCallId = "CA-barge";
1331
+ call.direction = "outbound";
1332
+ call.state = "speaking";
1333
+ call.metadata = {
1334
+ mode: "conversation",
1335
+ initialMessage: "Hi, this is OpenClaw.",
1336
+ };
1337
+
1338
+ const clearTtsQueue = vi.fn<TwilioProviderTestDouble["clearTtsQueue"]>();
1339
+ const processEvent = vi.fn((event: NormalizedEvent) => {
1340
+ if (event.type === "call.speech") {
1341
+ // Mirrors manager behavior: call.speech transitions to listening.
1342
+ call.state = "listening";
1343
+ }
1344
+ });
1345
+ const manager = {
1346
+ getActiveCalls: () => [call],
1347
+ getCallByProviderCallId: (providerCallId: string) =>
1348
+ providerCallId === call.providerCallId ? call : undefined,
1349
+ getCall: (callId: string) => (callId === call.callId ? call : undefined),
1350
+ endCall: vi.fn(async () => ({ success: true })),
1351
+ speakInitialMessage: vi.fn(async () => {}),
1352
+ processEvent,
1353
+ } as unknown as CallManager;
1354
+
1355
+ const config = createConfig({
1356
+ provider: "twilio",
1357
+ streaming: {
1358
+ ...createConfig().streaming,
1359
+ enabled: true,
1360
+ providers: {
1361
+ openai: {
1362
+ apiKey: "test-key", // pragma: allowlist secret
1363
+ },
1364
+ },
1365
+ },
1366
+ });
1367
+ const server = new VoiceCallWebhookServer(config, manager, createTwilioProvider(clearTtsQueue));
1368
+ await server.start();
1369
+ const handleInboundResponse = vi.fn(async () => {});
1370
+ (
1371
+ server as unknown as {
1372
+ handleInboundResponse: (
1373
+ callId: string,
1374
+ transcript: string,
1375
+ timing?: unknown,
1376
+ ) => Promise<void>;
1377
+ }
1378
+ ).handleInboundResponse = handleInboundResponse;
1379
+
1380
+ try {
1381
+ const media = getMediaCallbacks(server);
1382
+ media.config.onSpeechStart?.("CA-barge");
1383
+ media.config.onTranscript?.("CA-barge", "hello");
1384
+ media.config.onSpeechStart?.("CA-barge");
1385
+ media.config.onTranscript?.("CA-barge", "hello again");
1386
+ expect(clearTtsQueue).not.toHaveBeenCalled();
1387
+ expect(handleInboundResponse).not.toHaveBeenCalled();
1388
+ expect(processEvent).not.toHaveBeenCalled();
1389
+
1390
+ if (call.metadata) {
1391
+ delete call.metadata.initialMessage;
1392
+ }
1393
+ call.state = "listening";
1394
+
1395
+ media.config.onSpeechStart?.("CA-barge");
1396
+ media.config.onTranscript?.("CA-barge", "hello after greeting");
1397
+ expect(clearTtsQueue).toHaveBeenCalledTimes(2);
1398
+ expect(handleInboundResponse).toHaveBeenCalledTimes(1);
1399
+ expect(processEvent).toHaveBeenCalledTimes(1);
1400
+ const [calledCallId, calledTranscript] = (handleInboundResponse.mock.calls[0] ??
1401
+ []) as unknown as [string | undefined, string | undefined];
1402
+ expect(calledCallId).toBe(call.callId);
1403
+ expect(calledTranscript).toBe("hello after greeting");
1404
+ } finally {
1405
+ await server.stop();
1406
+ }
1407
+ });
1408
+
1409
+ it("keeps barge-in clear enabled for inbound calls", async () => {
1410
+ const call = createCall(Date.now() - 1_000);
1411
+ call.callId = "call-inbound";
1412
+ call.providerCallId = "CA-inbound";
1413
+ call.direction = "inbound";
1414
+ call.metadata = {
1415
+ initialMessage: "Hello from inbound greeting.",
1416
+ };
1417
+
1418
+ const clearTtsQueue = vi.fn<TwilioProviderTestDouble["clearTtsQueue"]>();
1419
+ const processEvent = vi.fn();
1420
+ const manager = {
1421
+ getActiveCalls: () => [call],
1422
+ getCallByProviderCallId: (providerCallId: string) =>
1423
+ providerCallId === call.providerCallId ? call : undefined,
1424
+ getCall: (callId: string) => (callId === call.callId ? call : undefined),
1425
+ endCall: vi.fn(async () => ({ success: true })),
1426
+ speakInitialMessage: vi.fn(async () => {}),
1427
+ processEvent,
1428
+ } as unknown as CallManager;
1429
+
1430
+ const config = createConfig({
1431
+ provider: "twilio",
1432
+ streaming: {
1433
+ ...createConfig().streaming,
1434
+ enabled: true,
1435
+ providers: {
1436
+ openai: {
1437
+ apiKey: "test-key", // pragma: allowlist secret
1438
+ },
1439
+ },
1440
+ },
1441
+ });
1442
+ const server = new VoiceCallWebhookServer(config, manager, createTwilioProvider(clearTtsQueue));
1443
+ await server.start();
1444
+ const handleInboundResponse = vi.fn(async () => {});
1445
+ (
1446
+ server as unknown as {
1447
+ handleInboundResponse: (callId: string, transcript: string) => Promise<void>;
1448
+ }
1449
+ ).handleInboundResponse = handleInboundResponse;
1450
+
1451
+ try {
1452
+ const media = getMediaCallbacks(server);
1453
+ media.config.onSpeechStart?.("CA-inbound");
1454
+ media.config.onTranscript?.("CA-inbound", "hello");
1455
+ expect(clearTtsQueue).toHaveBeenCalledTimes(2);
1456
+ expect(processEvent).toHaveBeenCalledWith(
1457
+ expect.objectContaining({
1458
+ type: "call.speech",
1459
+ callId: "call-inbound",
1460
+ providerCallId: "CA-inbound",
1461
+ transcript: "hello",
1462
+ isFinal: true,
1463
+ }),
1464
+ );
1465
+ expect(handleInboundResponse).toHaveBeenCalledWith("call-inbound", "hello");
1466
+ } finally {
1467
+ await server.stop();
1468
+ }
1469
+ });
1470
+ });