@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +27 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +943 -0
  6. package/index.ts +379 -149
  7. package/openclaw.plugin.json +384 -157
  8. package/package.json +35 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +273 -12
  17. package/src/config.ts +355 -72
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +243 -19
  24. package/src/manager/events.ts +61 -31
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +528 -0
  28. package/src/manager/outbound.ts +163 -57
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +321 -0
  64. package/src/response-generator.ts +213 -53
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +429 -0
  68. package/src/runtime.ts +270 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +28 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +523 -102
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
@@ -1,6 +1,13 @@
1
1
  import crypto from "node:crypto";
2
- import type { CallMode } from "../config.js";
2
+ import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
3
3
  import {
4
+ resolveVoiceCallEffectiveConfig,
5
+ resolveVoiceCallSessionKey,
6
+ type CallMode,
7
+ } from "../config.js";
8
+ import { resolvePreferredTtsVoice } from "../tts-provider-voice.js";
9
+ import {
10
+ type EndReason,
4
11
  TerminalStates,
5
12
  type CallId,
6
13
  type CallRecord,
@@ -8,16 +15,12 @@ import {
8
15
  } from "../types.js";
9
16
  import { mapVoiceToPolly } from "../voice-mapping.js";
10
17
  import type { CallManagerContext } from "./context.js";
18
+ import { finalizeCall } from "./lifecycle.js";
11
19
  import { getCallByProviderCallId } from "./lookup.js";
12
20
  import { addTranscriptEntry, transitionState } from "./state.js";
13
21
  import { persistCallRecord } from "./store.js";
14
- import {
15
- clearMaxDurationTimer,
16
- clearTranscriptWaiter,
17
- rejectTranscriptWaiter,
18
- waitForFinalTranscript,
19
- } from "./timers.js";
20
- import { generateNotifyTwiml } from "./twiml.js";
22
+ import { clearTranscriptWaiter, waitForFinalTranscript } from "./timers.js";
23
+ import { generateDtmfRedirectTwiml, generateNotifyTwiml } from "./twiml.js";
21
24
 
22
25
  type InitiateContext = Pick<
23
26
  CallManagerContext,
@@ -39,6 +42,7 @@ type ConversationContext = Pick<
39
42
  | "activeTurnCalls"
40
43
  | "transcriptWaiters"
41
44
  | "maxDurationTimers"
45
+ | "initialMessageInFlight"
42
46
  >;
43
47
 
44
48
  type EndCallContext = Pick<
@@ -102,6 +106,12 @@ function requireConnectedCall(ctx: ConnectedCallContext, callId: CallId): Connec
102
106
  };
103
107
  }
104
108
 
109
+ function validateDtmfDigits(digits: string): string | null {
110
+ return /^[0-9*#wWpP,]+$/.test(digits)
111
+ ? null
112
+ : "digits may only contain digits, *, #, comma, w, p";
113
+ }
114
+
105
115
  export async function initiateCall(
106
116
  ctx: InitiateContext,
107
117
  to: string,
@@ -112,6 +122,20 @@ export async function initiateCall(
112
122
  typeof options === "string" ? { message: options } : (options ?? {});
113
123
  const initialMessage = opts.message;
114
124
  const mode = opts.mode ?? ctx.config.outbound.defaultMode;
125
+ const dtmfSequence = opts.dtmfSequence;
126
+ if (dtmfSequence) {
127
+ const validationError = validateDtmfDigits(dtmfSequence);
128
+ if (validationError) {
129
+ return { callId: "", success: false, error: validationError };
130
+ }
131
+ if (mode !== "conversation") {
132
+ return {
133
+ callId: "",
134
+ success: false,
135
+ error: "dtmfSequence requires conversation mode",
136
+ };
137
+ }
138
+ }
115
139
 
116
140
  if (!ctx.provider) {
117
141
  return { callId: "", success: false, error: "Provider not initialized" };
@@ -142,7 +166,12 @@ export async function initiateCall(
142
166
  state: "initiated",
143
167
  from,
144
168
  to,
145
- sessionKey,
169
+ sessionKey: resolveVoiceCallSessionKey({
170
+ config: ctx.config,
171
+ callId,
172
+ phone: to,
173
+ explicitSessionKey: sessionKey,
174
+ }),
146
175
  startedAt: Date.now(),
147
176
  transcript: [],
148
177
  processedEventIds: [],
@@ -158,10 +187,16 @@ export async function initiateCall(
158
187
  try {
159
188
  // For notify mode with a message, use inline TwiML with <Say>.
160
189
  let inlineTwiml: string | undefined;
190
+ let preConnectTwiml: string | undefined;
161
191
  if (mode === "notify" && initialMessage) {
162
- const pollyVoice = mapVoiceToPolly(ctx.config.tts?.openai?.voice);
192
+ const pollyVoice = mapVoiceToPolly(resolvePreferredTtsVoice(ctx.config));
163
193
  inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
164
194
  console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
195
+ } else if (dtmfSequence) {
196
+ preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
197
+ console.log(
198
+ `[voice-call] Using pre-connect DTMF TwiML for call ${callId} (digits=${dtmfSequence.length}, initialMessage=${initialMessage ? "yes" : "no"})`,
199
+ );
165
200
  }
166
201
 
167
202
  const result = await ctx.provider.initiateCall({
@@ -170,27 +205,28 @@ export async function initiateCall(
170
205
  to,
171
206
  webhookUrl: ctx.webhookUrl,
172
207
  inlineTwiml,
208
+ preConnectTwiml,
173
209
  });
174
210
 
175
211
  callRecord.providerCallId = result.providerCallId;
176
212
  ctx.providerCallIdMap.set(result.providerCallId, callId);
177
213
  persistCallRecord(ctx.storePath, callRecord);
214
+ console.log(
215
+ `[voice-call] Outbound call initiated: callId=${callId} providerCallId=${result.providerCallId} mode=${mode} preConnectDtmf=${preConnectTwiml ? "yes" : "no"} initialMessage=${initialMessage ? "yes" : "no"}`,
216
+ );
178
217
 
179
218
  return { callId, success: true };
180
219
  } catch (err) {
181
- callRecord.state = "failed";
182
- callRecord.endedAt = Date.now();
183
- callRecord.endReason = "failed";
184
- persistCallRecord(ctx.storePath, callRecord);
185
- ctx.activeCalls.delete(callId);
186
- if (callRecord.providerCallId) {
187
- ctx.providerCallIdMap.delete(callRecord.providerCallId);
188
- }
220
+ finalizeCall({
221
+ ctx,
222
+ call: callRecord,
223
+ endReason: "failed",
224
+ });
189
225
 
190
226
  return {
191
227
  callId,
192
228
  success: false,
193
- error: err instanceof Error ? err.message : String(err),
229
+ error: formatErrorMessage(err),
194
230
  };
195
231
  }
196
232
  }
@@ -210,9 +246,11 @@ export async function speak(
210
246
  transitionState(call, "speaking");
211
247
  persistCallRecord(ctx.storePath, call);
212
248
 
213
- addTranscriptEntry(call, "bot", text);
214
-
215
- const voice = provider.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
249
+ const numberRouteKey =
250
+ typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to;
251
+ const voice = resolvePreferredTtsVoice(
252
+ resolveVoiceCallEffectiveConfig(ctx.config, numberRouteKey).config,
253
+ );
216
254
  await provider.playTts({
217
255
  callId,
218
256
  providerCallId,
@@ -220,9 +258,57 @@ export async function speak(
220
258
  voice,
221
259
  });
222
260
 
261
+ addTranscriptEntry(call, "bot", text);
262
+ persistCallRecord(ctx.storePath, call);
263
+
264
+ return { success: true };
265
+ } catch (err) {
266
+ // A failed playback should not leave the call stuck in speaking state.
267
+ transitionState(call, "listening");
268
+ persistCallRecord(ctx.storePath, call);
269
+ return { success: false, error: formatErrorMessage(err) };
270
+ }
271
+ }
272
+
273
+ function shouldStartListeningAfterInitialMessage(ctx: ConversationContext): boolean {
274
+ if (ctx.provider?.name !== "twilio") {
275
+ return true;
276
+ }
277
+ if (!ctx.config.streaming.enabled) {
278
+ return true;
279
+ }
280
+ const streamAwareProvider = ctx.provider as typeof ctx.provider & {
281
+ isConversationStreamConnectEnabled?: () => boolean;
282
+ };
283
+ return streamAwareProvider.isConversationStreamConnectEnabled?.() !== true;
284
+ }
285
+
286
+ export async function sendDtmf(
287
+ ctx: SpeakContext,
288
+ callId: CallId,
289
+ digits: string,
290
+ ): Promise<{ success: boolean; error?: string }> {
291
+ const validationError = validateDtmfDigits(digits);
292
+ if (validationError) {
293
+ return { success: false, error: validationError };
294
+ }
295
+ const connected = requireConnectedCall(ctx, callId);
296
+ if (!connected.ok) {
297
+ return { success: false, error: connected.error };
298
+ }
299
+ if (!connected.provider.sendDtmf) {
300
+ return { success: false, error: `${connected.provider.name} does not support outbound DTMF` };
301
+ }
302
+
303
+ try {
304
+ await connected.provider.sendDtmf({
305
+ callId,
306
+ providerCallId: connected.providerCallId,
307
+ digits,
308
+ });
223
309
  return { success: true };
224
310
  } catch (err) {
225
- return { success: false, error: err instanceof Error ? err.message : String(err) };
311
+ return { success: false, error: formatErrorMessage(err) };
226
312
  }
227
313
  }
228
314
 
@@ -248,29 +334,52 @@ export async function speakInitialMessage(
248
334
  return;
249
335
  }
250
336
 
251
- // Clear so we don't speak it again if the provider reconnects.
252
- if (call.metadata) {
253
- delete call.metadata.initialMessage;
254
- persistCallRecord(ctx.storePath, call);
255
- }
256
-
257
- console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
258
- const result = await speak(ctx, call.callId, initialMessage);
259
- if (!result.success) {
260
- console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
337
+ if (ctx.initialMessageInFlight.has(call.callId)) {
338
+ console.log(
339
+ `[voice-call] speakInitialMessage: initial message already in flight for ${call.callId}`,
340
+ );
261
341
  return;
262
342
  }
343
+ ctx.initialMessageInFlight.add(call.callId);
263
344
 
264
- if (mode === "notify") {
265
- const delaySec = ctx.config.outbound.notifyHangupDelaySec;
266
- console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
267
- setTimeout(async () => {
268
- const currentCall = ctx.activeCalls.get(call.callId);
269
- if (currentCall && !TerminalStates.has(currentCall.state)) {
270
- console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
271
- await endCall(ctx, call.callId);
272
- }
273
- }, delaySec * 1000);
345
+ try {
346
+ console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
347
+ const result = await speak(ctx, call.callId, initialMessage);
348
+ if (!result.success) {
349
+ console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
350
+ return;
351
+ }
352
+
353
+ // Clear only after successful playback so transient provider failures can retry.
354
+ if (call.metadata) {
355
+ delete call.metadata.initialMessage;
356
+ persistCallRecord(ctx.storePath, call);
357
+ }
358
+
359
+ if (mode === "notify") {
360
+ const delaySec = ctx.config.outbound.notifyHangupDelaySec;
361
+ console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
362
+ setTimeout(async () => {
363
+ const currentCall = ctx.activeCalls.get(call.callId);
364
+ if (currentCall && !TerminalStates.has(currentCall.state)) {
365
+ console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
366
+ await endCall(ctx, call.callId);
367
+ }
368
+ }, delaySec * 1000);
369
+ } else if (
370
+ mode === "conversation" &&
371
+ ctx.provider &&
372
+ shouldStartListeningAfterInitialMessage(ctx)
373
+ ) {
374
+ transitionState(call, "listening");
375
+ persistCallRecord(ctx.storePath, call);
376
+ await ctx.provider.startListening({
377
+ callId: call.callId,
378
+ providerCallId,
379
+ });
380
+ }
381
+ } finally {
382
+ ctx.initialMessageInFlight.delete(call.callId);
274
383
  }
275
384
  }
276
385
 
@@ -316,7 +425,7 @@ export async function continueCall(
316
425
  : 1;
317
426
 
318
427
  call.metadata = {
319
- ...(call.metadata ?? {}),
428
+ ...call.metadata,
320
429
  turnCount,
321
430
  lastTurnLatencyMs,
322
431
  lastTurnListenWaitMs,
@@ -335,7 +444,7 @@ export async function continueCall(
335
444
 
336
445
  return { success: true, transcript };
337
446
  } catch (err) {
338
- return { success: false, error: err instanceof Error ? err.message : String(err) };
447
+ return { success: false, error: formatErrorMessage(err) };
339
448
  } finally {
340
449
  ctx.activeTurnCalls.delete(callId);
341
450
  clearTranscriptWaiter(ctx, callId);
@@ -345,6 +454,7 @@ export async function continueCall(
345
454
  export async function endCall(
346
455
  ctx: EndCallContext,
347
456
  callId: CallId,
457
+ options?: { reason?: EndReason },
348
458
  ): Promise<{ success: boolean; error?: string }> {
349
459
  const lookup = lookupConnectedCall(ctx, callId);
350
460
  if (lookup.kind === "error") {
@@ -354,27 +464,23 @@ export async function endCall(
354
464
  return { success: true };
355
465
  }
356
466
  const { call, providerCallId, provider } = lookup;
467
+ const reason = options?.reason ?? "hangup-bot";
357
468
 
358
469
  try {
359
470
  await provider.hangupCall({
360
471
  callId,
361
472
  providerCallId,
362
- reason: "hangup-bot",
473
+ reason,
363
474
  });
364
475
 
365
- call.state = "hangup-bot";
366
- call.endedAt = Date.now();
367
- call.endReason = "hangup-bot";
368
- persistCallRecord(ctx.storePath, call);
369
-
370
- clearMaxDurationTimer(ctx, callId);
371
- rejectTranscriptWaiter(ctx, callId, "Call ended: hangup-bot");
372
-
373
- ctx.activeCalls.delete(callId);
374
- ctx.providerCallIdMap.delete(providerCallId);
476
+ finalizeCall({
477
+ ctx,
478
+ call,
479
+ endReason: reason,
480
+ });
375
481
 
376
482
  return { success: true };
377
483
  } catch (err) {
378
- return { success: false, error: err instanceof Error ? err.message : String(err) };
484
+ return { success: false, error: formatErrorMessage(err) };
379
485
  }
380
486
  }
@@ -3,13 +3,25 @@ import fsp from "node:fs/promises";
3
3
  import path from "node:path";
4
4
  import { CallRecordSchema, TerminalStates, type CallId, type CallRecord } from "../types.js";
5
5
 
6
+ const pendingPersistWrites = new Set<Promise<void>>();
7
+
6
8
  export function persistCallRecord(storePath: string, call: CallRecord): void {
7
9
  const logPath = path.join(storePath, "calls.jsonl");
8
10
  const line = `${JSON.stringify(call)}\n`;
9
11
  // Fire-and-forget async write to avoid blocking event loop.
10
- fsp.appendFile(logPath, line).catch((err) => {
11
- console.error("[voice-call] Failed to persist call record:", err);
12
- });
12
+ const write = fsp
13
+ .appendFile(logPath, line)
14
+ .catch((err) => {
15
+ console.error("[voice-call] Failed to persist call record:", err);
16
+ })
17
+ .finally(() => {
18
+ pendingPersistWrites.delete(write);
19
+ });
20
+ pendingPersistWrites.add(write);
21
+ }
22
+
23
+ export async function flushPendingCallRecordWritesForTest(): Promise<void> {
24
+ await Promise.allSettled(pendingPersistWrites);
13
25
  }
14
26
 
15
27
  export function loadActiveCallsFromStore(storePath: string): {
@@ -50,6 +62,9 @@ export function loadActiveCallsFromStore(storePath: string): {
50
62
  const rejectedProviderCallIds = new Set<string>();
51
63
 
52
64
  for (const [callId, call] of callMap) {
65
+ for (const eventId of call.processedEventIds) {
66
+ processedEventIds.add(eventId);
67
+ }
53
68
  if (TerminalStates.has(call.state)) {
54
69
  continue;
55
70
  }
@@ -57,9 +72,6 @@ export function loadActiveCallsFromStore(storePath: string): {
57
72
  if (call.providerCallId) {
58
73
  providerCallIdMap.set(call.providerCallId, callId);
59
74
  }
60
- for (const eventId of call.processedEventIds) {
61
- processedEventIds.add(eventId);
62
- }
63
75
  }
64
76
 
65
77
  return { activeCalls, providerCallIdMap, processedEventIds, rejectedProviderCallIds };
@@ -0,0 +1,129 @@
1
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
+
3
+ const { persistCallRecordMock } = vi.hoisted(() => ({
4
+ persistCallRecordMock: vi.fn(),
5
+ }));
6
+
7
+ vi.mock("./store.js", () => ({
8
+ persistCallRecord: persistCallRecordMock,
9
+ }));
10
+
11
+ import {
12
+ clearMaxDurationTimer,
13
+ clearTranscriptWaiter,
14
+ rejectTranscriptWaiter,
15
+ resolveTranscriptWaiter,
16
+ startMaxDurationTimer,
17
+ waitForFinalTranscript,
18
+ } from "./timers.js";
19
+
20
+ describe("voice-call manager timers", () => {
21
+ beforeEach(() => {
22
+ vi.useFakeTimers();
23
+ vi.clearAllMocks();
24
+ });
25
+
26
+ afterEach(() => {
27
+ vi.useRealTimers();
28
+ });
29
+
30
+ it("starts and clears max duration timers, persisting timeout metadata before delegation", async () => {
31
+ const call = { id: "call-1", state: "active" };
32
+ const ctx = {
33
+ activeCalls: new Map([["call-1", call]]),
34
+ maxDurationTimers: new Map(),
35
+ config: { maxDurationSeconds: 5 },
36
+ storePath: "/tmp/voice-call",
37
+ };
38
+ const onTimeout = vi.fn(async () => {});
39
+
40
+ startMaxDurationTimer({
41
+ ctx: ctx as never,
42
+ callId: "call-1",
43
+ onTimeout,
44
+ });
45
+
46
+ expect(ctx.maxDurationTimers.has("call-1")).toBe(true);
47
+
48
+ await vi.advanceTimersByTimeAsync(5_000);
49
+
50
+ expect(call).toEqual({ id: "call-1", state: "active", endReason: "timeout" });
51
+ expect(persistCallRecordMock).toHaveBeenCalledWith("/tmp/voice-call", call);
52
+ expect(onTimeout).toHaveBeenCalledWith("call-1");
53
+ expect(ctx.maxDurationTimers.has("call-1")).toBe(false);
54
+
55
+ startMaxDurationTimer({
56
+ ctx: ctx as never,
57
+ callId: "call-1",
58
+ onTimeout,
59
+ });
60
+ clearMaxDurationTimer(ctx as never, "call-1");
61
+ expect(ctx.maxDurationTimers.has("call-1")).toBe(false);
62
+ });
63
+
64
+ it("does not time out terminal calls", async () => {
65
+ const ctx = {
66
+ activeCalls: new Map([["call-1", { id: "call-1", state: "completed" }]]),
67
+ maxDurationTimers: new Map(),
68
+ config: { maxDurationSeconds: 5 },
69
+ storePath: "/tmp/voice-call",
70
+ };
71
+ const onTimeout = vi.fn(async () => {});
72
+
73
+ startMaxDurationTimer({
74
+ ctx: ctx as never,
75
+ callId: "call-1",
76
+ onTimeout,
77
+ });
78
+
79
+ await vi.advanceTimersByTimeAsync(5_000);
80
+
81
+ expect(persistCallRecordMock).not.toHaveBeenCalled();
82
+ expect(onTimeout).not.toHaveBeenCalled();
83
+ });
84
+
85
+ it("waits for transcripts, resolves matching tokens, rejects mismatches and timeouts", async () => {
86
+ const ctx = {
87
+ transcriptWaiters: new Map(),
88
+ config: { transcriptTimeoutMs: 1_000 },
89
+ };
90
+
91
+ const pending = waitForFinalTranscript(ctx as never, "call-1", "turn-1");
92
+ expect(resolveTranscriptWaiter(ctx as never, "call-1", "ignored", "turn-2")).toBe(false);
93
+ expect(resolveTranscriptWaiter(ctx as never, "call-1", "final transcript", "turn-1")).toBe(
94
+ true,
95
+ );
96
+ await expect(pending).resolves.toBe("final transcript");
97
+
98
+ const another = waitForFinalTranscript(ctx as never, "call-2");
99
+ rejectTranscriptWaiter(ctx as never, "call-2", "provider failed");
100
+ await expect(another).rejects.toThrow("provider failed");
101
+
102
+ const timedOut = waitForFinalTranscript(ctx as never, "call-3").catch((error) => error);
103
+ await vi.advanceTimersByTimeAsync(1_000);
104
+ await expect(timedOut).resolves.toEqual(
105
+ expect.objectContaining({
106
+ message: "Timed out waiting for transcript after 1000ms",
107
+ }),
108
+ );
109
+
110
+ const toClear = waitForFinalTranscript(ctx as never, "call-4");
111
+ clearTranscriptWaiter(ctx as never, "call-4");
112
+ expect(ctx.transcriptWaiters.has("call-4")).toBe(false);
113
+ void toClear.catch(() => {});
114
+ });
115
+
116
+ it("rejects duplicate transcript waiters for the same call", async () => {
117
+ const ctx = {
118
+ transcriptWaiters: new Map(),
119
+ config: { transcriptTimeoutMs: 1_000 },
120
+ };
121
+
122
+ const pending = waitForFinalTranscript(ctx as never, "call-1");
123
+ await expect(waitForFinalTranscript(ctx as never, "call-1")).rejects.toThrow(
124
+ "Already waiting for transcript",
125
+ );
126
+ rejectTranscriptWaiter(ctx as never, "call-1", "done");
127
+ await expect(pending).rejects.toThrow("done");
128
+ });
129
+ });
@@ -27,12 +27,13 @@ export function startMaxDurationTimer(params: {
27
27
  ctx: MaxDurationTimerContext;
28
28
  callId: CallId;
29
29
  onTimeout: (callId: CallId) => Promise<void>;
30
+ timeoutMs?: number;
30
31
  }): void {
31
32
  clearMaxDurationTimer(params.ctx, params.callId);
32
33
 
33
- const maxDurationMs = params.ctx.config.maxDurationSeconds * 1000;
34
+ const maxDurationMs = params.timeoutMs ?? params.ctx.config.maxDurationSeconds * 1000;
34
35
  console.log(
35
- `[voice-call] Starting max duration timer (${params.ctx.config.maxDurationSeconds}s) for call ${params.callId}`,
36
+ `[voice-call] Starting max duration timer (${Math.ceil(maxDurationMs / 1000)}s) for call ${params.callId}`,
36
37
  );
37
38
 
38
39
  const timer = setTimeout(async () => {
@@ -40,7 +41,7 @@ export function startMaxDurationTimer(params: {
40
41
  const call = params.ctx.activeCalls.get(params.callId);
41
42
  if (call && !TerminalStates.has(call.state)) {
42
43
  console.log(
43
- `[voice-call] Max duration reached (${params.ctx.config.maxDurationSeconds}s), ending call ${params.callId}`,
44
+ `[voice-call] Max duration reached (${Math.ceil(maxDurationMs / 1000)}s), ending call ${params.callId}`,
44
45
  );
45
46
  call.endReason = "timeout";
46
47
  persistCallRecord(params.ctx.storePath, call);
@@ -0,0 +1,13 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { generateNotifyTwiml } from "./twiml.js";
3
+
4
+ describe("generateNotifyTwiml", () => {
5
+ it("renders escaped xml with the requested voice", () => {
6
+ expect(generateNotifyTwiml(`Call <ended> & "logged"`, "Polly.Joanna"))
7
+ .toBe(`<?xml version="1.0" encoding="UTF-8"?>
8
+ <Response>
9
+ <Say voice="Polly.Joanna">Call &lt;ended&gt; &amp; &quot;logged&quot;</Say>
10
+ <Hangup/>
11
+ </Response>`);
12
+ });
13
+ });
@@ -7,3 +7,11 @@ export function generateNotifyTwiml(message: string, voice: string): string {
7
7
  <Hangup/>
8
8
  </Response>`;
9
9
  }
10
+
11
+ export function generateDtmfRedirectTwiml(digits: string, webhookUrl: string): string {
12
+ return `<?xml version="1.0" encoding="UTF-8"?>
13
+ <Response>
14
+ <Play digits="${escapeXml(digits)}" />
15
+ <Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
16
+ </Response>`;
17
+ }
@@ -1,6 +1,25 @@
1
1
  import { describe, expect, it } from "vitest";
2
2
  import { createManagerHarness, FakeProvider, markCallAnswered } from "./manager.test-harness.js";
3
3
 
4
+ function requireCall(
5
+ manager: Awaited<ReturnType<typeof createManagerHarness>>["manager"],
6
+ callId: string,
7
+ ) {
8
+ const call = manager.getCall(callId);
9
+ if (!call) {
10
+ throw new Error(`expected active call ${callId}`);
11
+ }
12
+ return call;
13
+ }
14
+
15
+ function requireTurnToken(provider: Awaited<ReturnType<typeof createManagerHarness>>["provider"]) {
16
+ const firstStart = provider.startListeningCalls[0];
17
+ if (!firstStart?.turnToken) {
18
+ throw new Error("expected closed-loop turn to capture a turn token");
19
+ }
20
+ return firstStart.turnToken;
21
+ }
22
+
4
23
  describe("CallManager closed-loop turns", () => {
5
24
  it("completes a closed-loop turn without live audio", async () => {
6
25
  const { manager, provider } = await createManagerHarness({
@@ -31,12 +50,12 @@ describe("CallManager closed-loop turns", () => {
31
50
  expect(provider.startListeningCalls).toHaveLength(1);
32
51
  expect(provider.stopListeningCalls).toHaveLength(1);
33
52
 
34
- const call = manager.getCall(started.callId);
35
- expect(call?.transcript.map((entry) => entry.text)).toEqual([
53
+ const call = requireCall(manager, started.callId);
54
+ expect(call.transcript.map((entry) => entry.text)).toEqual([
36
55
  "How can I help?",
37
56
  "Please check status",
38
57
  ]);
39
- const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
58
+ const metadata = call.metadata ?? {};
40
59
  expect(typeof metadata.lastTurnLatencyMs).toBe("number");
41
60
  expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
42
61
  expect(metadata.turnCount).toBe(1);
@@ -90,8 +109,7 @@ describe("CallManager closed-loop turns", () => {
90
109
  const turnPromise = manager.continueCall(started.callId, "Prompt");
91
110
  await new Promise((resolve) => setTimeout(resolve, 0));
92
111
 
93
- const expectedTurnToken = provider.startListeningCalls[0]?.turnToken;
94
- expect(typeof expectedTurnToken).toBe("string");
112
+ const expectedTurnToken = requireTurnToken(provider);
95
113
 
96
114
  manager.processEvent({
97
115
  id: "evt-turn-token-bad",
@@ -125,8 +143,8 @@ describe("CallManager closed-loop turns", () => {
125
143
  expect(turnResult.success).toBe(true);
126
144
  expect(turnResult.transcript).toBe("final answer");
127
145
 
128
- const call = manager.getCall(started.callId);
129
- expect(call?.transcript.map((entry) => entry.text)).toEqual(["Prompt", "final answer"]);
146
+ const call = requireCall(manager, started.callId);
147
+ expect(call.transcript.map((entry) => entry.text)).toEqual(["Prompt", "final answer"]);
130
148
  });
131
149
 
132
150
  it("tracks latency metadata across multiple closed-loop turns", async () => {
@@ -167,14 +185,14 @@ describe("CallManager closed-loop turns", () => {
167
185
 
168
186
  expect(secondResult.success).toBe(true);
169
187
 
170
- const call = manager.getCall(started.callId);
171
- expect(call?.transcript.map((entry) => entry.text)).toEqual([
188
+ const call = requireCall(manager, started.callId);
189
+ expect(call.transcript.map((entry) => entry.text)).toEqual([
172
190
  "First question",
173
191
  "First answer",
174
192
  "Second question",
175
193
  "Second answer",
176
194
  ]);
177
- const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
195
+ const metadata = call.metadata ?? {};
178
196
  expect(metadata.turnCount).toBe(2);
179
197
  expect(typeof metadata.lastTurnLatencyMs).toBe("number");
180
198
  expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
@@ -209,8 +227,8 @@ describe("CallManager closed-loop turns", () => {
209
227
  expect(result.transcript).toBe(`Answer ${i}`);
210
228
  }
211
229
 
212
- const call = manager.getCall(started.callId);
213
- const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
230
+ const call = requireCall(manager, started.callId);
231
+ const metadata = call.metadata ?? {};
214
232
  expect(metadata.turnCount).toBe(5);
215
233
  expect(provider.startListeningCalls).toHaveLength(5);
216
234
  expect(provider.stopListeningCalls).toHaveLength(5);