@openclaw/voice-call 2026.2.21 → 2026.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,12 +17,16 @@ import type {
17
17
  } from "./types.js";
18
18
 
19
19
  class FakeProvider implements VoiceCallProvider {
20
- readonly name = "plivo" as const;
20
+ readonly name: "plivo" | "twilio";
21
21
  readonly playTtsCalls: PlayTtsInput[] = [];
22
22
  readonly hangupCalls: HangupCallInput[] = [];
23
23
  readonly startListeningCalls: StartListeningInput[] = [];
24
24
  readonly stopListeningCalls: StopListeningInput[] = [];
25
25
 
26
+ constructor(name: "plivo" | "twilio" = "plivo") {
27
+ this.name = name;
28
+ }
29
+
26
30
  verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
27
31
  return { ok: true };
28
32
  }
@@ -46,17 +50,44 @@ class FakeProvider implements VoiceCallProvider {
46
50
  }
47
51
  }
48
52
 
53
+ let storeSeq = 0;
54
+
55
+ function createTestStorePath(): string {
56
+ storeSeq += 1;
57
+ return path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}-${storeSeq}`);
58
+ }
59
+
60
+ function createManagerHarness(
61
+ configOverrides: Record<string, unknown> = {},
62
+ provider = new FakeProvider(),
63
+ ): {
64
+ manager: CallManager;
65
+ provider: FakeProvider;
66
+ } {
67
+ const config = VoiceCallConfigSchema.parse({
68
+ enabled: true,
69
+ provider: "plivo",
70
+ fromNumber: "+15550000000",
71
+ ...configOverrides,
72
+ });
73
+ const manager = new CallManager(config, createTestStorePath());
74
+ manager.initialize(provider, "https://example.com/voice/webhook");
75
+ return { manager, provider };
76
+ }
77
+
78
+ function markCallAnswered(manager: CallManager, callId: string, eventId: string): void {
79
+ manager.processEvent({
80
+ id: eventId,
81
+ type: "call.answered",
82
+ callId,
83
+ providerCallId: "request-uuid",
84
+ timestamp: Date.now(),
85
+ });
86
+ }
87
+
49
88
  describe("CallManager", () => {
50
89
  it("upgrades providerCallId mapping when provider ID changes", async () => {
51
- const config = VoiceCallConfigSchema.parse({
52
- enabled: true,
53
- provider: "plivo",
54
- fromNumber: "+15550000000",
55
- });
56
-
57
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
58
- const manager = new CallManager(config, storePath);
59
- manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
90
+ const { manager } = createManagerHarness();
60
91
 
61
92
  const { callId, success, error } = await manager.initiateCall("+15550000001");
62
93
  expect(success).toBe(true);
@@ -81,16 +112,7 @@ describe("CallManager", () => {
81
112
  });
82
113
 
83
114
  it("speaks initial message on answered for notify mode (non-Twilio)", async () => {
84
- const config = VoiceCallConfigSchema.parse({
85
- enabled: true,
86
- provider: "plivo",
87
- fromNumber: "+15550000000",
88
- });
89
-
90
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
91
- const provider = new FakeProvider();
92
- const manager = new CallManager(config, storePath);
93
- manager.initialize(provider, "https://example.com/voice/webhook");
115
+ const { manager, provider } = createManagerHarness();
94
116
 
95
117
  const { callId, success } = await manager.initiateCall("+15550000002", undefined, {
96
118
  message: "Hello there",
@@ -113,19 +135,11 @@ describe("CallManager", () => {
113
135
  });
114
136
 
115
137
  it("rejects inbound calls with missing caller ID when allowlist enabled", () => {
116
- const config = VoiceCallConfigSchema.parse({
117
- enabled: true,
118
- provider: "plivo",
119
- fromNumber: "+15550000000",
138
+ const { manager, provider } = createManagerHarness({
120
139
  inboundPolicy: "allowlist",
121
140
  allowFrom: ["+15550001234"],
122
141
  });
123
142
 
124
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
125
- const provider = new FakeProvider();
126
- const manager = new CallManager(config, storePath);
127
- manager.initialize(provider, "https://example.com/voice/webhook");
128
-
129
143
  manager.processEvent({
130
144
  id: "evt-allowlist-missing",
131
145
  type: "call.initiated",
@@ -142,19 +156,11 @@ describe("CallManager", () => {
142
156
  });
143
157
 
144
158
  it("rejects inbound calls with anonymous caller ID when allowlist enabled", () => {
145
- const config = VoiceCallConfigSchema.parse({
146
- enabled: true,
147
- provider: "plivo",
148
- fromNumber: "+15550000000",
159
+ const { manager, provider } = createManagerHarness({
149
160
  inboundPolicy: "allowlist",
150
161
  allowFrom: ["+15550001234"],
151
162
  });
152
163
 
153
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
154
- const provider = new FakeProvider();
155
- const manager = new CallManager(config, storePath);
156
- manager.initialize(provider, "https://example.com/voice/webhook");
157
-
158
164
  manager.processEvent({
159
165
  id: "evt-allowlist-anon",
160
166
  type: "call.initiated",
@@ -172,19 +178,11 @@ describe("CallManager", () => {
172
178
  });
173
179
 
174
180
  it("rejects inbound calls that only match allowlist suffixes", () => {
175
- const config = VoiceCallConfigSchema.parse({
176
- enabled: true,
177
- provider: "plivo",
178
- fromNumber: "+15550000000",
181
+ const { manager, provider } = createManagerHarness({
179
182
  inboundPolicy: "allowlist",
180
183
  allowFrom: ["+15550001234"],
181
184
  });
182
185
 
183
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
184
- const provider = new FakeProvider();
185
- const manager = new CallManager(config, storePath);
186
- manager.initialize(provider, "https://example.com/voice/webhook");
187
-
188
186
  manager.processEvent({
189
187
  id: "evt-allowlist-suffix",
190
188
  type: "call.initiated",
@@ -202,18 +200,10 @@ describe("CallManager", () => {
202
200
  });
203
201
 
204
202
  it("rejects duplicate inbound events with a single hangup call", () => {
205
- const config = VoiceCallConfigSchema.parse({
206
- enabled: true,
207
- provider: "plivo",
208
- fromNumber: "+15550000000",
203
+ const { manager, provider } = createManagerHarness({
209
204
  inboundPolicy: "disabled",
210
205
  });
211
206
 
212
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
213
- const provider = new FakeProvider();
214
- const manager = new CallManager(config, storePath);
215
- manager.initialize(provider, "https://example.com/voice/webhook");
216
-
217
207
  manager.processEvent({
218
208
  id: "evt-reject-init",
219
209
  type: "call.initiated",
@@ -242,18 +232,11 @@ describe("CallManager", () => {
242
232
  });
243
233
 
244
234
  it("accepts inbound calls that exactly match the allowlist", () => {
245
- const config = VoiceCallConfigSchema.parse({
246
- enabled: true,
247
- provider: "plivo",
248
- fromNumber: "+15550000000",
235
+ const { manager } = createManagerHarness({
249
236
  inboundPolicy: "allowlist",
250
237
  allowFrom: ["+15550001234"],
251
238
  });
252
239
 
253
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
254
- const manager = new CallManager(config, storePath);
255
- manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
256
-
257
240
  manager.processEvent({
258
241
  id: "evt-allowlist-exact",
259
242
  type: "call.initiated",
@@ -269,28 +252,14 @@ describe("CallManager", () => {
269
252
  });
270
253
 
271
254
  it("completes a closed-loop turn without live audio", async () => {
272
- const config = VoiceCallConfigSchema.parse({
273
- enabled: true,
274
- provider: "plivo",
275
- fromNumber: "+15550000000",
255
+ const { manager, provider } = createManagerHarness({
276
256
  transcriptTimeoutMs: 5000,
277
257
  });
278
258
 
279
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
280
- const provider = new FakeProvider();
281
- const manager = new CallManager(config, storePath);
282
- manager.initialize(provider, "https://example.com/voice/webhook");
283
-
284
259
  const started = await manager.initiateCall("+15550000003");
285
260
  expect(started.success).toBe(true);
286
261
 
287
- manager.processEvent({
288
- id: "evt-closed-loop-answered",
289
- type: "call.answered",
290
- callId: started.callId,
291
- providerCallId: "request-uuid",
292
- timestamp: Date.now(),
293
- });
262
+ markCallAnswered(manager, started.callId, "evt-closed-loop-answered");
294
263
 
295
264
  const turnPromise = manager.continueCall(started.callId, "How can I help?");
296
265
  await new Promise((resolve) => setTimeout(resolve, 0));
@@ -323,28 +292,14 @@ describe("CallManager", () => {
323
292
  });
324
293
 
325
294
  it("rejects overlapping continueCall requests for the same call", async () => {
326
- const config = VoiceCallConfigSchema.parse({
327
- enabled: true,
328
- provider: "plivo",
329
- fromNumber: "+15550000000",
295
+ const { manager, provider } = createManagerHarness({
330
296
  transcriptTimeoutMs: 5000,
331
297
  });
332
298
 
333
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
334
- const provider = new FakeProvider();
335
- const manager = new CallManager(config, storePath);
336
- manager.initialize(provider, "https://example.com/voice/webhook");
337
-
338
299
  const started = await manager.initiateCall("+15550000004");
339
300
  expect(started.success).toBe(true);
340
301
 
341
- manager.processEvent({
342
- id: "evt-overlap-answered",
343
- type: "call.answered",
344
- callId: started.callId,
345
- providerCallId: "request-uuid",
346
- timestamp: Date.now(),
347
- });
302
+ markCallAnswered(manager, started.callId, "evt-overlap-answered");
348
303
 
349
304
  const first = manager.continueCall(started.callId, "First prompt");
350
305
  const second = await manager.continueCall(started.callId, "Second prompt");
@@ -368,30 +323,71 @@ describe("CallManager", () => {
368
323
  expect(provider.stopListeningCalls).toHaveLength(1);
369
324
  });
370
325
 
371
- it("tracks latency metadata across multiple closed-loop turns", async () => {
372
- const config = VoiceCallConfigSchema.parse({
373
- enabled: true,
374
- provider: "plivo",
375
- fromNumber: "+15550000000",
376
- transcriptTimeoutMs: 5000,
377
- });
378
-
379
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
380
- const provider = new FakeProvider();
381
- const manager = new CallManager(config, storePath);
382
- manager.initialize(provider, "https://example.com/voice/webhook");
326
+ it("ignores speech events with mismatched turnToken while waiting for transcript", async () => {
327
+ const { manager, provider } = createManagerHarness(
328
+ {
329
+ transcriptTimeoutMs: 5000,
330
+ },
331
+ new FakeProvider("twilio"),
332
+ );
383
333
 
384
- const started = await manager.initiateCall("+15550000005");
334
+ const started = await manager.initiateCall("+15550000004");
385
335
  expect(started.success).toBe(true);
386
336
 
337
+ markCallAnswered(manager, started.callId, "evt-turn-token-answered");
338
+
339
+ const turnPromise = manager.continueCall(started.callId, "Prompt");
340
+ await new Promise((resolve) => setTimeout(resolve, 0));
341
+
342
+ const expectedTurnToken = provider.startListeningCalls[0]?.turnToken;
343
+ expect(typeof expectedTurnToken).toBe("string");
344
+
387
345
  manager.processEvent({
388
- id: "evt-multi-answered",
389
- type: "call.answered",
346
+ id: "evt-turn-token-bad",
347
+ type: "call.speech",
390
348
  callId: started.callId,
391
349
  providerCallId: "request-uuid",
392
350
  timestamp: Date.now(),
351
+ transcript: "stale replay",
352
+ isFinal: true,
353
+ turnToken: "wrong-token",
354
+ });
355
+
356
+ const pendingState = await Promise.race([
357
+ turnPromise.then(() => "resolved"),
358
+ new Promise<"pending">((resolve) => setTimeout(() => resolve("pending"), 0)),
359
+ ]);
360
+ expect(pendingState).toBe("pending");
361
+
362
+ manager.processEvent({
363
+ id: "evt-turn-token-good",
364
+ type: "call.speech",
365
+ callId: started.callId,
366
+ providerCallId: "request-uuid",
367
+ timestamp: Date.now(),
368
+ transcript: "final answer",
369
+ isFinal: true,
370
+ turnToken: expectedTurnToken,
393
371
  });
394
372
 
373
+ const turnResult = await turnPromise;
374
+ expect(turnResult.success).toBe(true);
375
+ expect(turnResult.transcript).toBe("final answer");
376
+
377
+ const call = manager.getCall(started.callId);
378
+ expect(call?.transcript.map((entry) => entry.text)).toEqual(["Prompt", "final answer"]);
379
+ });
380
+
381
+ it("tracks latency metadata across multiple closed-loop turns", async () => {
382
+ const { manager, provider } = createManagerHarness({
383
+ transcriptTimeoutMs: 5000,
384
+ });
385
+
386
+ const started = await manager.initiateCall("+15550000005");
387
+ expect(started.success).toBe(true);
388
+
389
+ markCallAnswered(manager, started.callId, "evt-multi-answered");
390
+
395
391
  const firstTurn = manager.continueCall(started.callId, "First question");
396
392
  await new Promise((resolve) => setTimeout(resolve, 0));
397
393
  manager.processEvent({
@@ -436,28 +432,14 @@ describe("CallManager", () => {
436
432
  });
437
433
 
438
434
  it("handles repeated closed-loop turns without waiter churn", async () => {
439
- const config = VoiceCallConfigSchema.parse({
440
- enabled: true,
441
- provider: "plivo",
442
- fromNumber: "+15550000000",
435
+ const { manager, provider } = createManagerHarness({
443
436
  transcriptTimeoutMs: 5000,
444
437
  });
445
438
 
446
- const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
447
- const provider = new FakeProvider();
448
- const manager = new CallManager(config, storePath);
449
- manager.initialize(provider, "https://example.com/voice/webhook");
450
-
451
439
  const started = await manager.initiateCall("+15550000006");
452
440
  expect(started.success).toBe(true);
453
441
 
454
- manager.processEvent({
455
- id: "evt-loop-answered",
456
- type: "call.answered",
457
- callId: started.callId,
458
- providerCallId: "request-uuid",
459
- timestamp: Date.now(),
460
- });
442
+ markCallAnswered(manager, started.callId, "evt-loop-answered");
461
443
 
462
444
  for (let i = 1; i <= 5; i++) {
463
445
  const turnPromise = manager.continueCall(started.callId, `Prompt ${i}`);
@@ -1,4 +1,7 @@
1
+ import { once } from "node:events";
2
+ import http from "node:http";
1
3
  import { describe, expect, it } from "vitest";
4
+ import { WebSocket } from "ws";
2
5
  import { MediaStreamHandler } from "./media-stream.js";
3
6
  import type {
4
7
  OpenAIRealtimeSTTProvider,
@@ -34,6 +37,70 @@ const waitForAbort = (signal: AbortSignal): Promise<void> =>
34
37
  signal.addEventListener("abort", () => resolve(), { once: true });
35
38
  });
36
39
 
40
+ const withTimeout = async <T>(promise: Promise<T>, timeoutMs = 2000): Promise<T> => {
41
+ let timer: ReturnType<typeof setTimeout> | null = null;
42
+ const timeout = new Promise<never>((_, reject) => {
43
+ timer = setTimeout(() => reject(new Error(`Timed out after ${timeoutMs}ms`)), timeoutMs);
44
+ });
45
+
46
+ try {
47
+ return await Promise.race([promise, timeout]);
48
+ } finally {
49
+ if (timer) {
50
+ clearTimeout(timer);
51
+ }
52
+ }
53
+ };
54
+
55
+ const startWsServer = async (
56
+ handler: MediaStreamHandler,
57
+ ): Promise<{
58
+ url: string;
59
+ close: () => Promise<void>;
60
+ }> => {
61
+ const server = http.createServer();
62
+ server.on("upgrade", (request, socket, head) => {
63
+ handler.handleUpgrade(request, socket, head);
64
+ });
65
+
66
+ await new Promise<void>((resolve) => {
67
+ server.listen(0, "127.0.0.1", resolve);
68
+ });
69
+
70
+ const address = server.address();
71
+ if (!address || typeof address === "string") {
72
+ throw new Error("Failed to resolve test server address");
73
+ }
74
+
75
+ return {
76
+ url: `ws://127.0.0.1:${address.port}/voice/stream`,
77
+ close: async () => {
78
+ await new Promise<void>((resolve, reject) => {
79
+ server.close((err) => (err ? reject(err) : resolve()));
80
+ });
81
+ },
82
+ };
83
+ };
84
+
85
+ const connectWs = async (url: string): Promise<WebSocket> => {
86
+ const ws = new WebSocket(url);
87
+ await withTimeout(once(ws, "open") as Promise<[unknown]>);
88
+ return ws;
89
+ };
90
+
91
+ const waitForClose = async (
92
+ ws: WebSocket,
93
+ ): Promise<{
94
+ code: number;
95
+ reason: string;
96
+ }> => {
97
+ const [code, reason] = (await withTimeout(once(ws, "close") as Promise<[number, Buffer]>)) ?? [];
98
+ return {
99
+ code,
100
+ reason: Buffer.isBuffer(reason) ? reason.toString() : String(reason || ""),
101
+ };
102
+ };
103
+
37
104
  describe("MediaStreamHandler TTS queue", () => {
38
105
  it("serializes TTS playback and resolves in order", async () => {
39
106
  const handler = new MediaStreamHandler({
@@ -94,3 +161,111 @@ describe("MediaStreamHandler TTS queue", () => {
94
161
  expect(queuedRan).toBe(false);
95
162
  });
96
163
  });
164
+
165
+ describe("MediaStreamHandler security hardening", () => {
166
+ it("closes idle pre-start connections after timeout", async () => {
167
+ const shouldAcceptStreamCalls: Array<{ callId: string; streamSid: string; token?: string }> =
168
+ [];
169
+ const handler = new MediaStreamHandler({
170
+ sttProvider: createStubSttProvider(),
171
+ preStartTimeoutMs: 40,
172
+ shouldAcceptStream: (params) => {
173
+ shouldAcceptStreamCalls.push(params);
174
+ return true;
175
+ },
176
+ });
177
+ const server = await startWsServer(handler);
178
+
179
+ try {
180
+ const ws = await connectWs(server.url);
181
+ const closed = await waitForClose(ws);
182
+
183
+ expect(closed.code).toBe(1008);
184
+ expect(closed.reason).toBe("Start timeout");
185
+ expect(shouldAcceptStreamCalls).toEqual([]);
186
+ } finally {
187
+ await server.close();
188
+ }
189
+ });
190
+
191
+ it("enforces pending connection limits", async () => {
192
+ const handler = new MediaStreamHandler({
193
+ sttProvider: createStubSttProvider(),
194
+ preStartTimeoutMs: 5_000,
195
+ maxPendingConnections: 1,
196
+ maxPendingConnectionsPerIp: 1,
197
+ });
198
+ const server = await startWsServer(handler);
199
+
200
+ try {
201
+ const first = await connectWs(server.url);
202
+ const second = await connectWs(server.url);
203
+ const secondClosed = await waitForClose(second);
204
+
205
+ expect(secondClosed.code).toBe(1013);
206
+ expect(secondClosed.reason).toContain("Too many pending");
207
+ expect(first.readyState).toBe(WebSocket.OPEN);
208
+
209
+ first.close();
210
+ await waitForClose(first);
211
+ } finally {
212
+ await server.close();
213
+ }
214
+ });
215
+
216
+ it("rejects upgrades when max connection cap is reached", async () => {
217
+ const handler = new MediaStreamHandler({
218
+ sttProvider: createStubSttProvider(),
219
+ preStartTimeoutMs: 5_000,
220
+ maxConnections: 1,
221
+ maxPendingConnections: 10,
222
+ maxPendingConnectionsPerIp: 10,
223
+ });
224
+ const server = await startWsServer(handler);
225
+
226
+ try {
227
+ const first = await connectWs(server.url);
228
+ const secondError = await withTimeout(
229
+ new Promise<Error>((resolve) => {
230
+ const ws = new WebSocket(server.url);
231
+ ws.once("error", (err) => resolve(err as Error));
232
+ }),
233
+ );
234
+
235
+ expect(secondError.message).toContain("Unexpected server response: 503");
236
+
237
+ first.close();
238
+ await waitForClose(first);
239
+ } finally {
240
+ await server.close();
241
+ }
242
+ });
243
+
244
+ it("clears pending state after valid start", async () => {
245
+ const handler = new MediaStreamHandler({
246
+ sttProvider: createStubSttProvider(),
247
+ preStartTimeoutMs: 40,
248
+ shouldAcceptStream: () => true,
249
+ });
250
+ const server = await startWsServer(handler);
251
+
252
+ try {
253
+ const ws = await connectWs(server.url);
254
+ ws.send(
255
+ JSON.stringify({
256
+ event: "start",
257
+ streamSid: "MZ123",
258
+ start: { callSid: "CA123", customParameters: { token: "token-123" } },
259
+ }),
260
+ );
261
+
262
+ await new Promise((resolve) => setTimeout(resolve, 80));
263
+ expect(ws.readyState).toBe(WebSocket.OPEN);
264
+
265
+ ws.close();
266
+ await waitForClose(ws);
267
+ } finally {
268
+ await server.close();
269
+ }
270
+ });
271
+ });