@openclaw/voice-call 2026.5.2 → 2026.5.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/dist/api.js +2 -0
  2. package/dist/call-status-CXldV5o8.js +32 -0
  3. package/dist/cli-metadata.js +12 -0
  4. package/dist/config-7w04YpHh.js +548 -0
  5. package/dist/config-compat-B0me39_4.js +129 -0
  6. package/dist/guarded-json-api-Btx5EE4w.js +591 -0
  7. package/dist/http-headers-BrnxBasF.js +10 -0
  8. package/dist/index.js +1284 -0
  9. package/dist/mock-CeKvfVEd.js +135 -0
  10. package/dist/plivo-B-a7KFoT.js +393 -0
  11. package/dist/realtime-handler-B63CIDP2.js +325 -0
  12. package/dist/realtime-transcription.runtime-B2h70y2W.js +2 -0
  13. package/dist/realtime-voice.runtime-Bkh4nvLn.js +2 -0
  14. package/dist/response-generator-BrcmwDZU.js +182 -0
  15. package/dist/response-model-CyF5K80p.js +12 -0
  16. package/dist/runtime-api.js +6 -0
  17. package/dist/runtime-entry-88ytYAQa.js +3119 -0
  18. package/dist/runtime-entry.js +2 -0
  19. package/dist/setup-api.js +37 -0
  20. package/dist/telnyx-jjBE8boz.js +260 -0
  21. package/dist/twilio-1OqbcXLL.js +676 -0
  22. package/dist/voice-mapping-BYDGdWGx.js +40 -0
  23. package/package.json +14 -6
  24. package/api.ts +0 -16
  25. package/cli-metadata.ts +0 -10
  26. package/config-api.ts +0 -12
  27. package/index.test.ts +0 -943
  28. package/index.ts +0 -794
  29. package/runtime-api.ts +0 -20
  30. package/runtime-entry.ts +0 -1
  31. package/setup-api.ts +0 -47
  32. package/src/allowlist.test.ts +0 -18
  33. package/src/allowlist.ts +0 -19
  34. package/src/cli.ts +0 -845
  35. package/src/config-compat.test.ts +0 -120
  36. package/src/config-compat.ts +0 -227
  37. package/src/config.test.ts +0 -479
  38. package/src/config.ts +0 -808
  39. package/src/core-bridge.ts +0 -14
  40. package/src/deep-merge.test.ts +0 -40
  41. package/src/deep-merge.ts +0 -23
  42. package/src/gateway-continue-operation.ts +0 -200
  43. package/src/http-headers.test.ts +0 -16
  44. package/src/http-headers.ts +0 -15
  45. package/src/manager/context.ts +0 -42
  46. package/src/manager/events.test.ts +0 -581
  47. package/src/manager/events.ts +0 -288
  48. package/src/manager/lifecycle.ts +0 -53
  49. package/src/manager/lookup.test.ts +0 -52
  50. package/src/manager/lookup.ts +0 -35
  51. package/src/manager/outbound.test.ts +0 -528
  52. package/src/manager/outbound.ts +0 -486
  53. package/src/manager/state.ts +0 -48
  54. package/src/manager/store.ts +0 -106
  55. package/src/manager/timers.test.ts +0 -129
  56. package/src/manager/timers.ts +0 -113
  57. package/src/manager/twiml.test.ts +0 -13
  58. package/src/manager/twiml.ts +0 -17
  59. package/src/manager.closed-loop.test.ts +0 -236
  60. package/src/manager.inbound-allowlist.test.ts +0 -188
  61. package/src/manager.notify.test.ts +0 -377
  62. package/src/manager.restore.test.ts +0 -183
  63. package/src/manager.test-harness.ts +0 -127
  64. package/src/manager.ts +0 -392
  65. package/src/media-stream.test.ts +0 -768
  66. package/src/media-stream.ts +0 -708
  67. package/src/providers/base.ts +0 -97
  68. package/src/providers/mock.test.ts +0 -78
  69. package/src/providers/mock.ts +0 -185
  70. package/src/providers/plivo.test.ts +0 -93
  71. package/src/providers/plivo.ts +0 -601
  72. package/src/providers/shared/call-status.test.ts +0 -24
  73. package/src/providers/shared/call-status.ts +0 -24
  74. package/src/providers/shared/guarded-json-api.test.ts +0 -106
  75. package/src/providers/shared/guarded-json-api.ts +0 -42
  76. package/src/providers/telnyx.test.ts +0 -340
  77. package/src/providers/telnyx.ts +0 -394
  78. package/src/providers/twilio/api.test.ts +0 -145
  79. package/src/providers/twilio/api.ts +0 -93
  80. package/src/providers/twilio/twiml-policy.test.ts +0 -84
  81. package/src/providers/twilio/twiml-policy.ts +0 -87
  82. package/src/providers/twilio/webhook.ts +0 -34
  83. package/src/providers/twilio.test.ts +0 -591
  84. package/src/providers/twilio.ts +0 -861
  85. package/src/providers/twilio.types.ts +0 -17
  86. package/src/realtime-defaults.ts +0 -3
  87. package/src/realtime-fast-context.test.ts +0 -88
  88. package/src/realtime-fast-context.ts +0 -165
  89. package/src/realtime-transcription.runtime.ts +0 -4
  90. package/src/realtime-voice.runtime.ts +0 -5
  91. package/src/response-generator.test.ts +0 -321
  92. package/src/response-generator.ts +0 -318
  93. package/src/response-model.test.ts +0 -71
  94. package/src/response-model.ts +0 -23
  95. package/src/runtime.test.ts +0 -536
  96. package/src/runtime.ts +0 -510
  97. package/src/telephony-audio.test.ts +0 -61
  98. package/src/telephony-audio.ts +0 -12
  99. package/src/telephony-tts.test.ts +0 -196
  100. package/src/telephony-tts.ts +0 -235
  101. package/src/test-fixtures.ts +0 -73
  102. package/src/tts-provider-voice.test.ts +0 -34
  103. package/src/tts-provider-voice.ts +0 -21
  104. package/src/tunnel.test.ts +0 -166
  105. package/src/tunnel.ts +0 -314
  106. package/src/types.ts +0 -291
  107. package/src/utils.test.ts +0 -17
  108. package/src/utils.ts +0 -14
  109. package/src/voice-mapping.test.ts +0 -34
  110. package/src/voice-mapping.ts +0 -68
  111. package/src/webhook/realtime-handler.test.ts +0 -598
  112. package/src/webhook/realtime-handler.ts +0 -485
  113. package/src/webhook/stale-call-reaper.test.ts +0 -88
  114. package/src/webhook/stale-call-reaper.ts +0 -38
  115. package/src/webhook/tailscale.test.ts +0 -214
  116. package/src/webhook/tailscale.ts +0 -129
  117. package/src/webhook-exposure.test.ts +0 -33
  118. package/src/webhook-exposure.ts +0 -84
  119. package/src/webhook-security.test.ts +0 -770
  120. package/src/webhook-security.ts +0 -994
  121. package/src/webhook.hangup-once.lifecycle.test.ts +0 -135
  122. package/src/webhook.test.ts +0 -1470
  123. package/src/webhook.ts +0 -908
  124. package/src/webhook.types.ts +0 -5
  125. package/src/websocket-test-support.ts +0 -72
  126. package/tsconfig.json +0 -16
@@ -1,768 +0,0 @@
1
- import type { IncomingMessage } from "node:http";
2
- import net from "node:net";
3
- import type {
4
- RealtimeTranscriptionProviderPlugin,
5
- RealtimeTranscriptionSession,
6
- } from "openclaw/plugin-sdk/realtime-transcription";
7
- import { describe, expect, it, vi } from "vitest";
8
- import { WebSocket } from "ws";
9
- import { MediaStreamHandler, sanitizeLogText } from "./media-stream.js";
10
- import {
11
- connectWs,
12
- startUpgradeWsServer,
13
- waitForClose,
14
- withTimeout,
15
- } from "./websocket-test-support.js";
16
-
17
- const createStubSession = (): RealtimeTranscriptionSession => ({
18
- connect: async () => {},
19
- sendAudio: () => {},
20
- close: () => {},
21
- isConnected: () => true,
22
- });
23
-
24
- const createStubSttProvider = (): RealtimeTranscriptionProviderPlugin =>
25
- ({
26
- createSession: () => createStubSession(),
27
- id: "openai",
28
- label: "OpenAI",
29
- isConfigured: () => true,
30
- }) as unknown as RealtimeTranscriptionProviderPlugin;
31
-
32
- const flush = async (): Promise<void> => {
33
- await new Promise((resolve) => setTimeout(resolve, 0));
34
- };
35
-
36
- const createDeferred = (): {
37
- promise: Promise<void>;
38
- resolve: () => void;
39
- reject: (error: Error) => void;
40
- } => {
41
- let resolve!: () => void;
42
- let reject!: (error: Error) => void;
43
- const promise = new Promise<void>((resolvePromise, rejectPromise) => {
44
- resolve = resolvePromise;
45
- reject = rejectPromise;
46
- });
47
- return { promise, resolve, reject };
48
- };
49
-
50
- const waitForAbort = (signal: AbortSignal): Promise<void> =>
51
- new Promise((resolve) => {
52
- if (signal.aborted) {
53
- resolve();
54
- return;
55
- }
56
- signal.addEventListener("abort", () => resolve(), { once: true });
57
- });
58
-
59
- const startWsServer = async (
60
- handler: MediaStreamHandler,
61
- ): Promise<{
62
- url: string;
63
- close: () => Promise<void>;
64
- }> =>
65
- startUpgradeWsServer({
66
- urlPath: "/voice/stream",
67
- onUpgrade: (request, socket, head) => {
68
- handler.handleUpgrade(request, socket, head);
69
- },
70
- });
71
-
72
- describe("MediaStreamHandler TTS queue", () => {
73
- it("serializes TTS playback and resolves in order", async () => {
74
- const handler = new MediaStreamHandler({
75
- transcriptionProvider: createStubSttProvider(),
76
- providerConfig: {},
77
- });
78
- const started: number[] = [];
79
- const finished: number[] = [];
80
-
81
- let resolveFirst!: () => void;
82
- const firstGate = new Promise<void>((resolve) => {
83
- resolveFirst = resolve;
84
- });
85
-
86
- const first = handler.queueTts("stream-1", async () => {
87
- started.push(1);
88
- await firstGate;
89
- finished.push(1);
90
- });
91
- const second = handler.queueTts("stream-1", async () => {
92
- started.push(2);
93
- finished.push(2);
94
- });
95
-
96
- await flush();
97
- expect(started).toEqual([1]);
98
-
99
- resolveFirst();
100
- await first;
101
- await second;
102
-
103
- expect(started).toEqual([1, 2]);
104
- expect(finished).toEqual([1, 2]);
105
- });
106
-
107
- it("cancels active playback and clears queued items", async () => {
108
- const handler = new MediaStreamHandler({
109
- transcriptionProvider: createStubSttProvider(),
110
- providerConfig: {},
111
- });
112
-
113
- let queuedRan = false;
114
- const started: string[] = [];
115
-
116
- const active = handler.queueTts("stream-1", async (signal) => {
117
- started.push("active");
118
- await waitForAbort(signal);
119
- });
120
- const queued = handler.queueTts("stream-1", async () => {
121
- queuedRan = true;
122
- });
123
-
124
- await flush();
125
- expect(started).toEqual(["active"]);
126
-
127
- handler.clearTtsQueue("stream-1");
128
- await active;
129
- await withTimeout(queued);
130
- await flush();
131
-
132
- expect(queuedRan).toBe(false);
133
- });
134
-
135
- it("resolves pending queued playback during stream teardown", async () => {
136
- const handler = new MediaStreamHandler({
137
- transcriptionProvider: createStubSttProvider(),
138
- providerConfig: {},
139
- });
140
-
141
- let queuedRan = false;
142
- const active = handler.queueTts("stream-1", async (signal) => {
143
- await waitForAbort(signal);
144
- });
145
- const queued = handler.queueTts("stream-1", async () => {
146
- queuedRan = true;
147
- });
148
-
149
- await flush();
150
- (
151
- handler as unknown as {
152
- clearTtsState(streamSid: string): void;
153
- }
154
- ).clearTtsState("stream-1");
155
-
156
- await withTimeout(active);
157
- await withTimeout(queued);
158
- expect(queuedRan).toBe(false);
159
- });
160
- });
161
-
162
- describe("MediaStreamHandler security hardening", () => {
163
- it("fails sends and closes stream when buffered bytes already exceed the cap", () => {
164
- const handler = new MediaStreamHandler({
165
- transcriptionProvider: createStubSttProvider(),
166
- providerConfig: {},
167
- });
168
- const ws = {
169
- readyState: WebSocket.OPEN,
170
- bufferedAmount: 2 * 1024 * 1024,
171
- send: vi.fn(),
172
- close: vi.fn(),
173
- } as unknown as WebSocket;
174
- (
175
- handler as unknown as {
176
- sessions: Map<
177
- string,
178
- {
179
- callId: string;
180
- streamSid: string;
181
- ws: WebSocket;
182
- sttSession: RealtimeTranscriptionSession;
183
- }
184
- >;
185
- }
186
- ).sessions.set("MZ-backpressure", {
187
- callId: "CA-backpressure",
188
- streamSid: "MZ-backpressure",
189
- ws,
190
- sttSession: createStubSession(),
191
- });
192
-
193
- const result = handler.sendAudio("MZ-backpressure", Buffer.alloc(160, 0xff));
194
-
195
- expect(result.sent).toBe(false);
196
- expect(ws.send).not.toHaveBeenCalled();
197
- expect(ws.close).toHaveBeenCalledWith(1013, "Backpressure: send buffer exceeded");
198
- });
199
-
200
- it("fails sends when buffered bytes exceed cap after enqueueing a frame", () => {
201
- const handler = new MediaStreamHandler({
202
- transcriptionProvider: createStubSttProvider(),
203
- providerConfig: {},
204
- });
205
- const ws = {
206
- readyState: WebSocket.OPEN,
207
- bufferedAmount: 0,
208
- send: vi.fn(() => {
209
- (
210
- ws as unknown as {
211
- bufferedAmount: number;
212
- }
213
- ).bufferedAmount = 2 * 1024 * 1024;
214
- }),
215
- close: vi.fn(),
216
- } as unknown as WebSocket;
217
- (
218
- handler as unknown as {
219
- sessions: Map<
220
- string,
221
- {
222
- callId: string;
223
- streamSid: string;
224
- ws: WebSocket;
225
- sttSession: RealtimeTranscriptionSession;
226
- }
227
- >;
228
- }
229
- ).sessions.set("MZ-overflow", {
230
- callId: "CA-overflow",
231
- streamSid: "MZ-overflow",
232
- ws,
233
- sttSession: createStubSession(),
234
- });
235
-
236
- const result = handler.sendMark("MZ-overflow", "mark-1");
237
-
238
- expect(ws.send).toHaveBeenCalledTimes(1);
239
- expect(result.sent).toBe(false);
240
- expect(ws.close).toHaveBeenCalledWith(1013, "Backpressure: send buffer exceeded");
241
- });
242
-
243
- it("sanitizes websocket close reason before logging", () => {
244
- const reason = sanitizeLogText("forged\nline\r\tentry", 120);
245
- expect(reason).not.toContain("\n");
246
- expect(reason).not.toContain("\r");
247
- expect(reason).not.toContain("\t");
248
- expect(reason).toContain("forged line entry");
249
- });
250
-
251
- it("closes idle pre-start connections after timeout", async () => {
252
- const shouldAcceptStreamCalls: Array<{ callId: string; streamSid: string; token?: string }> =
253
- [];
254
- const handler = new MediaStreamHandler({
255
- transcriptionProvider: createStubSttProvider(),
256
- providerConfig: {},
257
- preStartTimeoutMs: 40,
258
- shouldAcceptStream: (params) => {
259
- shouldAcceptStreamCalls.push(params);
260
- return true;
261
- },
262
- });
263
- const server = await startWsServer(handler);
264
-
265
- try {
266
- const ws = await connectWs(server.url);
267
- const closed = await waitForClose(ws);
268
-
269
- expect(closed.code).toBe(1008);
270
- expect(closed.reason).toBe("Start timeout");
271
- expect(shouldAcceptStreamCalls).toEqual([]);
272
- } finally {
273
- await server.close();
274
- }
275
- });
276
-
277
- it("enforces pending connection limits", async () => {
278
- const handler = new MediaStreamHandler({
279
- transcriptionProvider: createStubSttProvider(),
280
- providerConfig: {},
281
- preStartTimeoutMs: 5_000,
282
- maxPendingConnections: 1,
283
- maxPendingConnectionsPerIp: 1,
284
- });
285
- const server = await startWsServer(handler);
286
-
287
- try {
288
- const first = await connectWs(server.url);
289
- const second = await connectWs(server.url);
290
- const secondClosed = await waitForClose(second);
291
-
292
- expect(secondClosed.code).toBe(1013);
293
- expect(secondClosed.reason).toContain("Too many pending");
294
- expect(first.readyState).toBe(WebSocket.OPEN);
295
-
296
- first.close();
297
- await waitForClose(first);
298
- } finally {
299
- await server.close();
300
- }
301
- });
302
-
303
- it("uses resolved client IPs for per-IP pending limits", async () => {
304
- const handler = new MediaStreamHandler({
305
- transcriptionProvider: createStubSttProvider(),
306
- providerConfig: {},
307
- preStartTimeoutMs: 5_000,
308
- maxPendingConnections: 10,
309
- maxPendingConnectionsPerIp: 1,
310
- resolveClientIp: (request) => String(request.headers["x-forwarded-for"] ?? ""),
311
- });
312
- const server = await startWsServer(handler);
313
-
314
- try {
315
- const first = new WebSocket(server.url, {
316
- headers: { "x-forwarded-for": "198.51.100.10" },
317
- });
318
- await withTimeout(new Promise((resolve) => first.once("open", resolve)));
319
-
320
- const second = new WebSocket(server.url, {
321
- headers: { "x-forwarded-for": "203.0.113.20" },
322
- });
323
- await withTimeout(new Promise((resolve) => second.once("open", resolve)));
324
-
325
- expect(first.readyState).toBe(WebSocket.OPEN);
326
- expect(second.readyState).toBe(WebSocket.OPEN);
327
-
328
- const firstClosed = waitForClose(first);
329
- const secondClosed = waitForClose(second);
330
- first.close();
331
- second.close();
332
- await firstClosed;
333
- await secondClosed;
334
- } finally {
335
- await server.close();
336
- }
337
- });
338
-
339
- it("rejects upgrades when max connection cap is reached", async () => {
340
- const handler = new MediaStreamHandler({
341
- transcriptionProvider: createStubSttProvider(),
342
- providerConfig: {},
343
- preStartTimeoutMs: 5_000,
344
- maxConnections: 1,
345
- maxPendingConnections: 10,
346
- maxPendingConnectionsPerIp: 10,
347
- });
348
- const server = await startWsServer(handler);
349
-
350
- try {
351
- const first = await connectWs(server.url);
352
- const secondError = await withTimeout(
353
- new Promise<Error>((resolve) => {
354
- const ws = new WebSocket(server.url);
355
- ws.once("error", (err) => resolve(err));
356
- }),
357
- );
358
-
359
- expect(secondError.message).toContain("Unexpected server response: 503");
360
-
361
- first.close();
362
- await waitForClose(first);
363
- } finally {
364
- await server.close();
365
- }
366
- });
367
-
368
- it("counts in-flight upgrades against the max connection cap", () => {
369
- const handler = new MediaStreamHandler({
370
- transcriptionProvider: createStubSttProvider(),
371
- providerConfig: {},
372
- maxConnections: 2,
373
- maxPendingConnections: 10,
374
- maxPendingConnectionsPerIp: 10,
375
- });
376
-
377
- const fakeWss = {
378
- clients: new Set([{}]),
379
- handleUpgrade: vi.fn(),
380
- emit: vi.fn(),
381
- on: vi.fn(),
382
- };
383
- let upgradeCallback: ((ws: WebSocket) => void) | null = null;
384
- fakeWss.handleUpgrade.mockImplementation(
385
- (
386
- _request: IncomingMessage,
387
- _socket: unknown,
388
- _head: Buffer,
389
- callback: (ws: WebSocket) => void,
390
- ) => {
391
- upgradeCallback = callback;
392
- },
393
- );
394
-
395
- (
396
- handler as unknown as {
397
- wss: typeof fakeWss;
398
- }
399
- ).wss = fakeWss;
400
-
401
- const firstSocket = {
402
- once: vi.fn(),
403
- removeListener: vi.fn(),
404
- write: vi.fn(),
405
- destroy: vi.fn(),
406
- };
407
- handler.handleUpgrade(
408
- { socket: { remoteAddress: "127.0.0.1" } } as IncomingMessage,
409
- firstSocket as never,
410
- Buffer.alloc(0),
411
- );
412
-
413
- const secondSocket = {
414
- once: vi.fn(),
415
- removeListener: vi.fn(),
416
- write: vi.fn(),
417
- destroy: vi.fn(),
418
- };
419
- handler.handleUpgrade(
420
- { socket: { remoteAddress: "127.0.0.1" } } as IncomingMessage,
421
- secondSocket as never,
422
- Buffer.alloc(0),
423
- );
424
-
425
- expect(fakeWss.handleUpgrade).toHaveBeenCalledTimes(1);
426
- expect(secondSocket.write).toHaveBeenCalledOnce();
427
- expect(secondSocket.destroy).toHaveBeenCalledOnce();
428
-
429
- expect(upgradeCallback).not.toBeNull();
430
- const completeUpgrade = upgradeCallback as ((ws: WebSocket) => void) | null;
431
- if (!completeUpgrade) {
432
- throw new Error("Expected upgrade callback to be registered");
433
- }
434
- completeUpgrade({} as WebSocket);
435
- expect(fakeWss.emit).toHaveBeenCalledWith(
436
- "connection",
437
- expect.anything(),
438
- expect.objectContaining({ socket: { remoteAddress: "127.0.0.1" } }),
439
- );
440
- });
441
-
442
- it("releases in-flight reservations when ws rejects a malformed upgrade before the callback", async () => {
443
- const handler = new MediaStreamHandler({
444
- transcriptionProvider: createStubSttProvider(),
445
- providerConfig: {},
446
- preStartTimeoutMs: 5_000,
447
- maxConnections: 1,
448
- maxPendingConnections: 10,
449
- maxPendingConnectionsPerIp: 10,
450
- });
451
- const server = await startWsServer(handler);
452
- const serverUrl = new URL(server.url);
453
-
454
- try {
455
- await withTimeout(
456
- new Promise<void>((resolve, reject) => {
457
- const socket = net.createConnection(
458
- { host: serverUrl.hostname, port: Number(serverUrl.port) },
459
- () => {
460
- socket.write(
461
- [
462
- "GET /voice/stream HTTP/1.1",
463
- `Host: ${serverUrl.host}`,
464
- "Upgrade: websocket",
465
- "Connection: Upgrade",
466
- "Sec-WebSocket-Version: 13",
467
- "",
468
- "",
469
- ].join("\r\n"),
470
- );
471
- },
472
- );
473
- socket.once("error", reject);
474
- socket.once("data", () => {
475
- socket.end();
476
- });
477
- socket.once("close", () => resolve());
478
- }),
479
- );
480
-
481
- const ws = await connectWs(server.url);
482
- expect(ws.readyState).toBe(WebSocket.OPEN);
483
- ws.close();
484
- await waitForClose(ws);
485
- } finally {
486
- await server.close();
487
- }
488
- });
489
-
490
- it("clears pending state after valid start", async () => {
491
- const handler = new MediaStreamHandler({
492
- transcriptionProvider: createStubSttProvider(),
493
- providerConfig: {},
494
- preStartTimeoutMs: 40,
495
- shouldAcceptStream: () => true,
496
- });
497
- const server = await startWsServer(handler);
498
-
499
- try {
500
- const ws = await connectWs(server.url);
501
- ws.send(
502
- JSON.stringify({
503
- event: "start",
504
- streamSid: "MZ123",
505
- start: { callSid: "CA123", customParameters: { token: "token-123" } },
506
- }),
507
- );
508
-
509
- await new Promise((resolve) => setTimeout(resolve, 80));
510
- expect(ws.readyState).toBe(WebSocket.OPEN);
511
-
512
- ws.close();
513
- await waitForClose(ws);
514
- } finally {
515
- await server.close();
516
- }
517
- });
518
-
519
- it("keeps accepted streams alive while STT readiness exceeds the pre-start timeout", async () => {
520
- const sttReady = createDeferred();
521
- const sttConnectStarted = createDeferred();
522
- const transcriptionReady = createDeferred();
523
- const events: string[] = [];
524
-
525
- const session: RealtimeTranscriptionSession = {
526
- connect: async () => {
527
- events.push("stt-connect-start");
528
- sttConnectStarted.resolve();
529
- await sttReady.promise;
530
- events.push("stt-connect-ready");
531
- },
532
- sendAudio: () => {},
533
- close: () => {},
534
- isConnected: () => false,
535
- };
536
-
537
- const handler = new MediaStreamHandler({
538
- transcriptionProvider: {
539
- createSession: () => session,
540
- id: "openai",
541
- label: "OpenAI",
542
- isConfigured: () => true,
543
- },
544
- providerConfig: {},
545
- preStartTimeoutMs: 40,
546
- shouldAcceptStream: () => true,
547
- onConnect: () => {
548
- events.push("onConnect");
549
- },
550
- onTranscriptionReady: () => {
551
- events.push("onTranscriptionReady");
552
- transcriptionReady.resolve();
553
- },
554
- });
555
- const server = await startWsServer(handler);
556
-
557
- try {
558
- const ws = await connectWs(server.url);
559
- ws.send(
560
- JSON.stringify({
561
- event: "start",
562
- streamSid: "MZ-slow-stt",
563
- start: { callSid: "CA-slow-stt" },
564
- }),
565
- );
566
-
567
- await withTimeout(sttConnectStarted.promise);
568
- await new Promise((resolve) => setTimeout(resolve, 80));
569
- expect(ws.readyState).toBe(WebSocket.OPEN);
570
- expect(events).toEqual(["onConnect", "stt-connect-start"]);
571
-
572
- sttReady.resolve();
573
- await withTimeout(transcriptionReady.promise);
574
- expect(events).toEqual([
575
- "onConnect",
576
- "stt-connect-start",
577
- "stt-connect-ready",
578
- "onTranscriptionReady",
579
- ]);
580
-
581
- ws.close();
582
- await waitForClose(ws);
583
- } finally {
584
- await server.close();
585
- }
586
- });
587
-
588
- it("forwards early Twilio media into the STT session before readiness", async () => {
589
- const sttReady = createDeferred();
590
- const sttConnectStarted = createDeferred();
591
- const transcriptionReady = createDeferred();
592
- const audioReceived = createDeferred();
593
- const receivedAudio: Buffer[] = [];
594
- let onConnectCalls = 0;
595
- let onTranscriptionReadyCalls = 0;
596
-
597
- const session: RealtimeTranscriptionSession = {
598
- connect: async () => {
599
- sttConnectStarted.resolve();
600
- await sttReady.promise;
601
- },
602
- sendAudio: (audio) => {
603
- receivedAudio.push(Buffer.from(audio));
604
- audioReceived.resolve();
605
- },
606
- close: () => {},
607
- isConnected: () => false,
608
- };
609
-
610
- const handler = new MediaStreamHandler({
611
- transcriptionProvider: {
612
- createSession: () => session,
613
- id: "openai",
614
- label: "OpenAI",
615
- isConfigured: () => true,
616
- },
617
- providerConfig: {},
618
- shouldAcceptStream: () => true,
619
- onConnect: () => {
620
- onConnectCalls += 1;
621
- },
622
- onTranscriptionReady: () => {
623
- onTranscriptionReadyCalls += 1;
624
- transcriptionReady.resolve();
625
- },
626
- });
627
- const server = await startWsServer(handler);
628
- let ws: WebSocket | undefined;
629
-
630
- try {
631
- ws = await connectWs(server.url);
632
- ws.send(
633
- JSON.stringify({
634
- event: "start",
635
- streamSid: "MZ-early-media",
636
- start: { callSid: "CA-early-media" },
637
- }),
638
- );
639
-
640
- await withTimeout(sttConnectStarted.promise);
641
- ws.send(
642
- JSON.stringify({
643
- event: "media",
644
- streamSid: "MZ-early-media",
645
- media: { payload: Buffer.from("early").toString("base64") },
646
- }),
647
- );
648
- await withTimeout(audioReceived.promise);
649
-
650
- expect(Buffer.concat(receivedAudio).toString()).toBe("early");
651
- expect(onConnectCalls).toBe(1);
652
- expect(onTranscriptionReadyCalls).toBe(0);
653
-
654
- sttReady.resolve();
655
- await withTimeout(transcriptionReady.promise);
656
- expect(onConnectCalls).toBe(1);
657
- expect(onTranscriptionReadyCalls).toBe(1);
658
- } finally {
659
- sttReady.resolve();
660
- if (ws) {
661
- if (ws.readyState === WebSocket.OPEN) {
662
- ws.close();
663
- }
664
- if (ws.readyState !== WebSocket.CLOSED) {
665
- await waitForClose(ws).catch(() => {});
666
- }
667
- }
668
- await server.close();
669
- }
670
- });
671
-
672
- it("closes the media stream and disconnects once when STT readiness fails", async () => {
673
- const sttConnectStarted = createDeferred();
674
- const onDisconnectReady = createDeferred();
675
- const onConnect = vi.fn();
676
- const onTranscriptionReady = vi.fn();
677
- const onDisconnect = vi.fn(() => {
678
- onDisconnectReady.resolve();
679
- });
680
-
681
- const session: RealtimeTranscriptionSession = {
682
- connect: async () => {
683
- sttConnectStarted.resolve();
684
- throw new Error("provider unavailable");
685
- },
686
- sendAudio: () => {},
687
- close: vi.fn(),
688
- isConnected: () => false,
689
- };
690
-
691
- const handler = new MediaStreamHandler({
692
- transcriptionProvider: {
693
- createSession: () => session,
694
- id: "openai",
695
- label: "OpenAI",
696
- isConfigured: () => true,
697
- },
698
- providerConfig: {},
699
- shouldAcceptStream: () => true,
700
- onConnect,
701
- onTranscriptionReady,
702
- onDisconnect,
703
- });
704
- const server = await startWsServer(handler);
705
-
706
- try {
707
- const ws = await connectWs(server.url);
708
- ws.send(
709
- JSON.stringify({
710
- event: "start",
711
- streamSid: "MZ-stt-fail",
712
- start: { callSid: "CA-stt-fail" },
713
- }),
714
- );
715
-
716
- await withTimeout(sttConnectStarted.promise);
717
- const closed = await waitForClose(ws);
718
- await withTimeout(onDisconnectReady.promise);
719
-
720
- expect(closed.code).toBe(1011);
721
- expect(closed.reason).toBe("STT connection failed");
722
- expect(onConnect).toHaveBeenCalledTimes(1);
723
- expect(onConnect).toHaveBeenCalledWith("CA-stt-fail", "MZ-stt-fail");
724
- expect(onTranscriptionReady).not.toHaveBeenCalled();
725
- expect(onDisconnect).toHaveBeenCalledTimes(1);
726
- expect(onDisconnect).toHaveBeenCalledWith("CA-stt-fail", "MZ-stt-fail");
727
- expect(session.close).toHaveBeenCalledTimes(1);
728
- } finally {
729
- await server.close();
730
- }
731
- });
732
-
733
- it("rejects oversized pre-start frames at the websocket maxPayload guard before validation runs", async () => {
734
- const shouldAcceptStreamCalls: Array<{ callId: string; streamSid: string; token?: string }> =
735
- [];
736
- const handler = new MediaStreamHandler({
737
- transcriptionProvider: createStubSttProvider(),
738
- providerConfig: {},
739
- preStartTimeoutMs: 1_000,
740
- shouldAcceptStream: (params) => {
741
- shouldAcceptStreamCalls.push(params);
742
- return true;
743
- },
744
- });
745
- const server = await startWsServer(handler);
746
-
747
- try {
748
- const ws = await connectWs(server.url);
749
- ws.send(
750
- JSON.stringify({
751
- event: "start",
752
- streamSid: "MZ-oversized",
753
- start: {
754
- callSid: "CA-oversized",
755
- customParameters: { token: "token-oversized", padding: "A".repeat(256 * 1024) },
756
- },
757
- }),
758
- );
759
-
760
- const closed = await waitForClose(ws);
761
-
762
- expect(closed.code).toBe(1009);
763
- expect(shouldAcceptStreamCalls).toEqual([]);
764
- } finally {
765
- await server.close();
766
- }
767
- });
768
- });