@openclaw/voice-call 2026.3.13 → 2026.5.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +25 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +866 -0
  6. package/index.ts +353 -148
  7. package/openclaw.plugin.json +336 -157
  8. package/package.json +33 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +160 -12
  17. package/src/config.ts +243 -74
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +179 -19
  24. package/src/manager/events.ts +48 -30
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +464 -0
  28. package/src/manager/outbound.ts +148 -55
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +277 -0
  64. package/src/response-generator.ts +186 -40
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +351 -0
  68. package/src/runtime.ts +254 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +26 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +513 -100
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
@@ -1,6 +1,9 @@
1
1
  import crypto from "node:crypto";
2
+ import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
2
3
  import type { CallMode } from "../config.js";
4
+ import { resolvePreferredTtsVoice } from "../tts-provider-voice.js";
3
5
  import {
6
+ type EndReason,
4
7
  TerminalStates,
5
8
  type CallId,
6
9
  type CallRecord,
@@ -8,16 +11,12 @@ import {
8
11
  } from "../types.js";
9
12
  import { mapVoiceToPolly } from "../voice-mapping.js";
10
13
  import type { CallManagerContext } from "./context.js";
14
+ import { finalizeCall } from "./lifecycle.js";
11
15
  import { getCallByProviderCallId } from "./lookup.js";
12
16
  import { addTranscriptEntry, transitionState } from "./state.js";
13
17
  import { persistCallRecord } from "./store.js";
14
- import {
15
- clearMaxDurationTimer,
16
- clearTranscriptWaiter,
17
- rejectTranscriptWaiter,
18
- waitForFinalTranscript,
19
- } from "./timers.js";
20
- import { generateNotifyTwiml } from "./twiml.js";
18
+ import { clearTranscriptWaiter, waitForFinalTranscript } from "./timers.js";
19
+ import { generateDtmfRedirectTwiml, generateNotifyTwiml } from "./twiml.js";
21
20
 
22
21
  type InitiateContext = Pick<
23
22
  CallManagerContext,
@@ -39,6 +38,7 @@ type ConversationContext = Pick<
39
38
  | "activeTurnCalls"
40
39
  | "transcriptWaiters"
41
40
  | "maxDurationTimers"
41
+ | "initialMessageInFlight"
42
42
  >;
43
43
 
44
44
  type EndCallContext = Pick<
@@ -102,6 +102,12 @@ function requireConnectedCall(ctx: ConnectedCallContext, callId: CallId): Connec
102
102
  };
103
103
  }
104
104
 
105
+ function validateDtmfDigits(digits: string): string | null {
106
+ return /^[0-9*#wWpP,]+$/.test(digits)
107
+ ? null
108
+ : "digits may only contain digits, *, #, comma, w, p";
109
+ }
110
+
105
111
  export async function initiateCall(
106
112
  ctx: InitiateContext,
107
113
  to: string,
@@ -112,6 +118,20 @@ export async function initiateCall(
112
118
  typeof options === "string" ? { message: options } : (options ?? {});
113
119
  const initialMessage = opts.message;
114
120
  const mode = opts.mode ?? ctx.config.outbound.defaultMode;
121
+ const dtmfSequence = opts.dtmfSequence;
122
+ if (dtmfSequence) {
123
+ const validationError = validateDtmfDigits(dtmfSequence);
124
+ if (validationError) {
125
+ return { callId: "", success: false, error: validationError };
126
+ }
127
+ if (mode !== "conversation") {
128
+ return {
129
+ callId: "",
130
+ success: false,
131
+ error: "dtmfSequence requires conversation mode",
132
+ };
133
+ }
134
+ }
115
135
 
116
136
  if (!ctx.provider) {
117
137
  return { callId: "", success: false, error: "Provider not initialized" };
@@ -158,10 +178,16 @@ export async function initiateCall(
158
178
  try {
159
179
  // For notify mode with a message, use inline TwiML with <Say>.
160
180
  let inlineTwiml: string | undefined;
181
+ let preConnectTwiml: string | undefined;
161
182
  if (mode === "notify" && initialMessage) {
162
- const pollyVoice = mapVoiceToPolly(ctx.config.tts?.openai?.voice);
183
+ const pollyVoice = mapVoiceToPolly(resolvePreferredTtsVoice(ctx.config));
163
184
  inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
164
185
  console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
186
+ } else if (dtmfSequence) {
187
+ preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
188
+ console.log(
189
+ `[voice-call] Using pre-connect DTMF TwiML for call ${callId} (digits=${dtmfSequence.length}, initialMessage=${initialMessage ? "yes" : "no"})`,
190
+ );
165
191
  }
166
192
 
167
193
  const result = await ctx.provider.initiateCall({
@@ -170,27 +196,28 @@ export async function initiateCall(
170
196
  to,
171
197
  webhookUrl: ctx.webhookUrl,
172
198
  inlineTwiml,
199
+ preConnectTwiml,
173
200
  });
174
201
 
175
202
  callRecord.providerCallId = result.providerCallId;
176
203
  ctx.providerCallIdMap.set(result.providerCallId, callId);
177
204
  persistCallRecord(ctx.storePath, callRecord);
205
+ console.log(
206
+ `[voice-call] Outbound call initiated: callId=${callId} providerCallId=${result.providerCallId} mode=${mode} preConnectDtmf=${preConnectTwiml ? "yes" : "no"} initialMessage=${initialMessage ? "yes" : "no"}`,
207
+ );
178
208
 
179
209
  return { callId, success: true };
180
210
  } catch (err) {
181
- callRecord.state = "failed";
182
- callRecord.endedAt = Date.now();
183
- callRecord.endReason = "failed";
184
- persistCallRecord(ctx.storePath, callRecord);
185
- ctx.activeCalls.delete(callId);
186
- if (callRecord.providerCallId) {
187
- ctx.providerCallIdMap.delete(callRecord.providerCallId);
188
- }
211
+ finalizeCall({
212
+ ctx,
213
+ call: callRecord,
214
+ endReason: "failed",
215
+ });
189
216
 
190
217
  return {
191
218
  callId,
192
219
  success: false,
193
- error: err instanceof Error ? err.message : String(err),
220
+ error: formatErrorMessage(err),
194
221
  };
195
222
  }
196
223
  }
@@ -210,9 +237,7 @@ export async function speak(
210
237
  transitionState(call, "speaking");
211
238
  persistCallRecord(ctx.storePath, call);
212
239
 
213
- addTranscriptEntry(call, "bot", text);
214
-
215
- const voice = provider.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
240
+ const voice = resolvePreferredTtsVoice(ctx.config);
216
241
  await provider.playTts({
217
242
  callId,
218
243
  providerCallId,
@@ -220,9 +245,57 @@ export async function speak(
220
245
  voice,
221
246
  });
222
247
 
248
+ addTranscriptEntry(call, "bot", text);
249
+ persistCallRecord(ctx.storePath, call);
250
+
223
251
  return { success: true };
224
252
  } catch (err) {
225
- return { success: false, error: err instanceof Error ? err.message : String(err) };
253
+ // A failed playback should not leave the call stuck in speaking state.
254
+ transitionState(call, "listening");
255
+ persistCallRecord(ctx.storePath, call);
256
+ return { success: false, error: formatErrorMessage(err) };
257
+ }
258
+ }
259
+
260
+ function shouldStartListeningAfterInitialMessage(ctx: ConversationContext): boolean {
261
+ if (ctx.provider?.name !== "twilio") {
262
+ return true;
263
+ }
264
+ if (!ctx.config.streaming.enabled) {
265
+ return true;
266
+ }
267
+ const streamAwareProvider = ctx.provider as typeof ctx.provider & {
268
+ isConversationStreamConnectEnabled?: () => boolean;
269
+ };
270
+ return streamAwareProvider.isConversationStreamConnectEnabled?.() !== true;
271
+ }
272
+
273
+ export async function sendDtmf(
274
+ ctx: SpeakContext,
275
+ callId: CallId,
276
+ digits: string,
277
+ ): Promise<{ success: boolean; error?: string }> {
278
+ const validationError = validateDtmfDigits(digits);
279
+ if (validationError) {
280
+ return { success: false, error: validationError };
281
+ }
282
+ const connected = requireConnectedCall(ctx, callId);
283
+ if (!connected.ok) {
284
+ return { success: false, error: connected.error };
285
+ }
286
+ if (!connected.provider.sendDtmf) {
287
+ return { success: false, error: `${connected.provider.name} does not support outbound DTMF` };
288
+ }
289
+
290
+ try {
291
+ await connected.provider.sendDtmf({
292
+ callId,
293
+ providerCallId: connected.providerCallId,
294
+ digits,
295
+ });
296
+ return { success: true };
297
+ } catch (err) {
298
+ return { success: false, error: formatErrorMessage(err) };
226
299
  }
227
300
  }
228
301
 
@@ -248,29 +321,52 @@ export async function speakInitialMessage(
248
321
  return;
249
322
  }
250
323
 
251
- // Clear so we don't speak it again if the provider reconnects.
252
- if (call.metadata) {
253
- delete call.metadata.initialMessage;
254
- persistCallRecord(ctx.storePath, call);
255
- }
256
-
257
- console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
258
- const result = await speak(ctx, call.callId, initialMessage);
259
- if (!result.success) {
260
- console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
324
+ if (ctx.initialMessageInFlight.has(call.callId)) {
325
+ console.log(
326
+ `[voice-call] speakInitialMessage: initial message already in flight for ${call.callId}`,
327
+ );
261
328
  return;
262
329
  }
330
+ ctx.initialMessageInFlight.add(call.callId);
331
+
332
+ try {
333
+ console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
334
+ const result = await speak(ctx, call.callId, initialMessage);
335
+ if (!result.success) {
336
+ console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
337
+ return;
338
+ }
339
+
340
+ // Clear only after successful playback so transient provider failures can retry.
341
+ if (call.metadata) {
342
+ delete call.metadata.initialMessage;
343
+ persistCallRecord(ctx.storePath, call);
344
+ }
263
345
 
264
- if (mode === "notify") {
265
- const delaySec = ctx.config.outbound.notifyHangupDelaySec;
266
- console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
267
- setTimeout(async () => {
268
- const currentCall = ctx.activeCalls.get(call.callId);
269
- if (currentCall && !TerminalStates.has(currentCall.state)) {
270
- console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
271
- await endCall(ctx, call.callId);
272
- }
273
- }, delaySec * 1000);
346
+ if (mode === "notify") {
347
+ const delaySec = ctx.config.outbound.notifyHangupDelaySec;
348
+ console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
349
+ setTimeout(async () => {
350
+ const currentCall = ctx.activeCalls.get(call.callId);
351
+ if (currentCall && !TerminalStates.has(currentCall.state)) {
352
+ console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
353
+ await endCall(ctx, call.callId);
354
+ }
355
+ }, delaySec * 1000);
356
+ } else if (
357
+ mode === "conversation" &&
358
+ ctx.provider &&
359
+ shouldStartListeningAfterInitialMessage(ctx)
360
+ ) {
361
+ transitionState(call, "listening");
362
+ persistCallRecord(ctx.storePath, call);
363
+ await ctx.provider.startListening({
364
+ callId: call.callId,
365
+ providerCallId,
366
+ });
367
+ }
368
+ } finally {
369
+ ctx.initialMessageInFlight.delete(call.callId);
274
370
  }
275
371
  }
276
372
 
@@ -316,7 +412,7 @@ export async function continueCall(
316
412
  : 1;
317
413
 
318
414
  call.metadata = {
319
- ...(call.metadata ?? {}),
415
+ ...call.metadata,
320
416
  turnCount,
321
417
  lastTurnLatencyMs,
322
418
  lastTurnListenWaitMs,
@@ -335,7 +431,7 @@ export async function continueCall(
335
431
 
336
432
  return { success: true, transcript };
337
433
  } catch (err) {
338
- return { success: false, error: err instanceof Error ? err.message : String(err) };
434
+ return { success: false, error: formatErrorMessage(err) };
339
435
  } finally {
340
436
  ctx.activeTurnCalls.delete(callId);
341
437
  clearTranscriptWaiter(ctx, callId);
@@ -345,6 +441,7 @@ export async function continueCall(
345
441
  export async function endCall(
346
442
  ctx: EndCallContext,
347
443
  callId: CallId,
444
+ options?: { reason?: EndReason },
348
445
  ): Promise<{ success: boolean; error?: string }> {
349
446
  const lookup = lookupConnectedCall(ctx, callId);
350
447
  if (lookup.kind === "error") {
@@ -354,27 +451,23 @@ export async function endCall(
354
451
  return { success: true };
355
452
  }
356
453
  const { call, providerCallId, provider } = lookup;
454
+ const reason = options?.reason ?? "hangup-bot";
357
455
 
358
456
  try {
359
457
  await provider.hangupCall({
360
458
  callId,
361
459
  providerCallId,
362
- reason: "hangup-bot",
460
+ reason,
363
461
  });
364
462
 
365
- call.state = "hangup-bot";
366
- call.endedAt = Date.now();
367
- call.endReason = "hangup-bot";
368
- persistCallRecord(ctx.storePath, call);
369
-
370
- clearMaxDurationTimer(ctx, callId);
371
- rejectTranscriptWaiter(ctx, callId, "Call ended: hangup-bot");
372
-
373
- ctx.activeCalls.delete(callId);
374
- ctx.providerCallIdMap.delete(providerCallId);
463
+ finalizeCall({
464
+ ctx,
465
+ call,
466
+ endReason: reason,
467
+ });
375
468
 
376
469
  return { success: true };
377
470
  } catch (err) {
378
- return { success: false, error: err instanceof Error ? err.message : String(err) };
471
+ return { success: false, error: formatErrorMessage(err) };
379
472
  }
380
473
  }
@@ -3,13 +3,25 @@ import fsp from "node:fs/promises";
3
3
  import path from "node:path";
4
4
  import { CallRecordSchema, TerminalStates, type CallId, type CallRecord } from "../types.js";
5
5
 
6
+ const pendingPersistWrites = new Set<Promise<void>>();
7
+
6
8
  export function persistCallRecord(storePath: string, call: CallRecord): void {
7
9
  const logPath = path.join(storePath, "calls.jsonl");
8
10
  const line = `${JSON.stringify(call)}\n`;
9
11
  // Fire-and-forget async write to avoid blocking event loop.
10
- fsp.appendFile(logPath, line).catch((err) => {
11
- console.error("[voice-call] Failed to persist call record:", err);
12
- });
12
+ const write = fsp
13
+ .appendFile(logPath, line)
14
+ .catch((err) => {
15
+ console.error("[voice-call] Failed to persist call record:", err);
16
+ })
17
+ .finally(() => {
18
+ pendingPersistWrites.delete(write);
19
+ });
20
+ pendingPersistWrites.add(write);
21
+ }
22
+
23
+ export async function flushPendingCallRecordWritesForTest(): Promise<void> {
24
+ await Promise.allSettled(pendingPersistWrites);
13
25
  }
14
26
 
15
27
  export function loadActiveCallsFromStore(storePath: string): {
@@ -50,6 +62,9 @@ export function loadActiveCallsFromStore(storePath: string): {
50
62
  const rejectedProviderCallIds = new Set<string>();
51
63
 
52
64
  for (const [callId, call] of callMap) {
65
+ for (const eventId of call.processedEventIds) {
66
+ processedEventIds.add(eventId);
67
+ }
53
68
  if (TerminalStates.has(call.state)) {
54
69
  continue;
55
70
  }
@@ -57,9 +72,6 @@ export function loadActiveCallsFromStore(storePath: string): {
57
72
  if (call.providerCallId) {
58
73
  providerCallIdMap.set(call.providerCallId, callId);
59
74
  }
60
- for (const eventId of call.processedEventIds) {
61
- processedEventIds.add(eventId);
62
- }
63
75
  }
64
76
 
65
77
  return { activeCalls, providerCallIdMap, processedEventIds, rejectedProviderCallIds };
@@ -0,0 +1,129 @@
1
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
+
3
+ const { persistCallRecordMock } = vi.hoisted(() => ({
4
+ persistCallRecordMock: vi.fn(),
5
+ }));
6
+
7
+ vi.mock("./store.js", () => ({
8
+ persistCallRecord: persistCallRecordMock,
9
+ }));
10
+
11
+ import {
12
+ clearMaxDurationTimer,
13
+ clearTranscriptWaiter,
14
+ rejectTranscriptWaiter,
15
+ resolveTranscriptWaiter,
16
+ startMaxDurationTimer,
17
+ waitForFinalTranscript,
18
+ } from "./timers.js";
19
+
20
+ describe("voice-call manager timers", () => {
21
+ beforeEach(() => {
22
+ vi.useFakeTimers();
23
+ vi.clearAllMocks();
24
+ });
25
+
26
+ afterEach(() => {
27
+ vi.useRealTimers();
28
+ });
29
+
30
+ it("starts and clears max duration timers, persisting timeout metadata before delegation", async () => {
31
+ const call = { id: "call-1", state: "active" };
32
+ const ctx = {
33
+ activeCalls: new Map([["call-1", call]]),
34
+ maxDurationTimers: new Map(),
35
+ config: { maxDurationSeconds: 5 },
36
+ storePath: "/tmp/voice-call",
37
+ };
38
+ const onTimeout = vi.fn(async () => {});
39
+
40
+ startMaxDurationTimer({
41
+ ctx: ctx as never,
42
+ callId: "call-1",
43
+ onTimeout,
44
+ });
45
+
46
+ expect(ctx.maxDurationTimers.has("call-1")).toBe(true);
47
+
48
+ await vi.advanceTimersByTimeAsync(5_000);
49
+
50
+ expect(call).toEqual({ id: "call-1", state: "active", endReason: "timeout" });
51
+ expect(persistCallRecordMock).toHaveBeenCalledWith("/tmp/voice-call", call);
52
+ expect(onTimeout).toHaveBeenCalledWith("call-1");
53
+ expect(ctx.maxDurationTimers.has("call-1")).toBe(false);
54
+
55
+ startMaxDurationTimer({
56
+ ctx: ctx as never,
57
+ callId: "call-1",
58
+ onTimeout,
59
+ });
60
+ clearMaxDurationTimer(ctx as never, "call-1");
61
+ expect(ctx.maxDurationTimers.has("call-1")).toBe(false);
62
+ });
63
+
64
+ it("does not time out terminal calls", async () => {
65
+ const ctx = {
66
+ activeCalls: new Map([["call-1", { id: "call-1", state: "completed" }]]),
67
+ maxDurationTimers: new Map(),
68
+ config: { maxDurationSeconds: 5 },
69
+ storePath: "/tmp/voice-call",
70
+ };
71
+ const onTimeout = vi.fn(async () => {});
72
+
73
+ startMaxDurationTimer({
74
+ ctx: ctx as never,
75
+ callId: "call-1",
76
+ onTimeout,
77
+ });
78
+
79
+ await vi.advanceTimersByTimeAsync(5_000);
80
+
81
+ expect(persistCallRecordMock).not.toHaveBeenCalled();
82
+ expect(onTimeout).not.toHaveBeenCalled();
83
+ });
84
+
85
+ it("waits for transcripts, resolves matching tokens, rejects mismatches and timeouts", async () => {
86
+ const ctx = {
87
+ transcriptWaiters: new Map(),
88
+ config: { transcriptTimeoutMs: 1_000 },
89
+ };
90
+
91
+ const pending = waitForFinalTranscript(ctx as never, "call-1", "turn-1");
92
+ expect(resolveTranscriptWaiter(ctx as never, "call-1", "ignored", "turn-2")).toBe(false);
93
+ expect(resolveTranscriptWaiter(ctx as never, "call-1", "final transcript", "turn-1")).toBe(
94
+ true,
95
+ );
96
+ await expect(pending).resolves.toBe("final transcript");
97
+
98
+ const another = waitForFinalTranscript(ctx as never, "call-2");
99
+ rejectTranscriptWaiter(ctx as never, "call-2", "provider failed");
100
+ await expect(another).rejects.toThrow("provider failed");
101
+
102
+ const timedOut = waitForFinalTranscript(ctx as never, "call-3").catch((error) => error);
103
+ await vi.advanceTimersByTimeAsync(1_000);
104
+ await expect(timedOut).resolves.toEqual(
105
+ expect.objectContaining({
106
+ message: "Timed out waiting for transcript after 1000ms",
107
+ }),
108
+ );
109
+
110
+ const toClear = waitForFinalTranscript(ctx as never, "call-4");
111
+ clearTranscriptWaiter(ctx as never, "call-4");
112
+ expect(ctx.transcriptWaiters.has("call-4")).toBe(false);
113
+ void toClear.catch(() => {});
114
+ });
115
+
116
+ it("rejects duplicate transcript waiters for the same call", async () => {
117
+ const ctx = {
118
+ transcriptWaiters: new Map(),
119
+ config: { transcriptTimeoutMs: 1_000 },
120
+ };
121
+
122
+ const pending = waitForFinalTranscript(ctx as never, "call-1");
123
+ await expect(waitForFinalTranscript(ctx as never, "call-1")).rejects.toThrow(
124
+ "Already waiting for transcript",
125
+ );
126
+ rejectTranscriptWaiter(ctx as never, "call-1", "done");
127
+ await expect(pending).rejects.toThrow("done");
128
+ });
129
+ });
@@ -27,12 +27,13 @@ export function startMaxDurationTimer(params: {
27
27
  ctx: MaxDurationTimerContext;
28
28
  callId: CallId;
29
29
  onTimeout: (callId: CallId) => Promise<void>;
30
+ timeoutMs?: number;
30
31
  }): void {
31
32
  clearMaxDurationTimer(params.ctx, params.callId);
32
33
 
33
- const maxDurationMs = params.ctx.config.maxDurationSeconds * 1000;
34
+ const maxDurationMs = params.timeoutMs ?? params.ctx.config.maxDurationSeconds * 1000;
34
35
  console.log(
35
- `[voice-call] Starting max duration timer (${params.ctx.config.maxDurationSeconds}s) for call ${params.callId}`,
36
+ `[voice-call] Starting max duration timer (${Math.ceil(maxDurationMs / 1000)}s) for call ${params.callId}`,
36
37
  );
37
38
 
38
39
  const timer = setTimeout(async () => {
@@ -40,7 +41,7 @@ export function startMaxDurationTimer(params: {
40
41
  const call = params.ctx.activeCalls.get(params.callId);
41
42
  if (call && !TerminalStates.has(call.state)) {
42
43
  console.log(
43
- `[voice-call] Max duration reached (${params.ctx.config.maxDurationSeconds}s), ending call ${params.callId}`,
44
+ `[voice-call] Max duration reached (${Math.ceil(maxDurationMs / 1000)}s), ending call ${params.callId}`,
44
45
  );
45
46
  call.endReason = "timeout";
46
47
  persistCallRecord(params.ctx.storePath, call);
@@ -0,0 +1,13 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { generateNotifyTwiml } from "./twiml.js";
3
+
4
+ describe("generateNotifyTwiml", () => {
5
+ it("renders escaped xml with the requested voice", () => {
6
+ expect(generateNotifyTwiml(`Call <ended> & "logged"`, "Polly.Joanna"))
7
+ .toBe(`<?xml version="1.0" encoding="UTF-8"?>
8
+ <Response>
9
+ <Say voice="Polly.Joanna">Call &lt;ended&gt; &amp; &quot;logged&quot;</Say>
10
+ <Hangup/>
11
+ </Response>`);
12
+ });
13
+ });
@@ -7,3 +7,11 @@ export function generateNotifyTwiml(message: string, voice: string): string {
7
7
  <Hangup/>
8
8
  </Response>`;
9
9
  }
10
+
11
+ export function generateDtmfRedirectTwiml(digits: string, webhookUrl: string): string {
12
+ return `<?xml version="1.0" encoding="UTF-8"?>
13
+ <Response>
14
+ <Play digits="${escapeXml(digits)}" />
15
+ <Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
16
+ </Response>`;
17
+ }
@@ -1,6 +1,25 @@
1
1
  import { describe, expect, it } from "vitest";
2
2
  import { createManagerHarness, FakeProvider, markCallAnswered } from "./manager.test-harness.js";
3
3
 
4
+ function requireCall(
5
+ manager: Awaited<ReturnType<typeof createManagerHarness>>["manager"],
6
+ callId: string,
7
+ ) {
8
+ const call = manager.getCall(callId);
9
+ if (!call) {
10
+ throw new Error(`expected active call ${callId}`);
11
+ }
12
+ return call;
13
+ }
14
+
15
+ function requireTurnToken(provider: Awaited<ReturnType<typeof createManagerHarness>>["provider"]) {
16
+ const firstStart = provider.startListeningCalls[0];
17
+ if (!firstStart?.turnToken) {
18
+ throw new Error("expected closed-loop turn to capture a turn token");
19
+ }
20
+ return firstStart.turnToken;
21
+ }
22
+
4
23
  describe("CallManager closed-loop turns", () => {
5
24
  it("completes a closed-loop turn without live audio", async () => {
6
25
  const { manager, provider } = await createManagerHarness({
@@ -31,12 +50,12 @@ describe("CallManager closed-loop turns", () => {
31
50
  expect(provider.startListeningCalls).toHaveLength(1);
32
51
  expect(provider.stopListeningCalls).toHaveLength(1);
33
52
 
34
- const call = manager.getCall(started.callId);
35
- expect(call?.transcript.map((entry) => entry.text)).toEqual([
53
+ const call = requireCall(manager, started.callId);
54
+ expect(call.transcript.map((entry) => entry.text)).toEqual([
36
55
  "How can I help?",
37
56
  "Please check status",
38
57
  ]);
39
- const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
58
+ const metadata = call.metadata ?? {};
40
59
  expect(typeof metadata.lastTurnLatencyMs).toBe("number");
41
60
  expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
42
61
  expect(metadata.turnCount).toBe(1);
@@ -90,8 +109,7 @@ describe("CallManager closed-loop turns", () => {
90
109
  const turnPromise = manager.continueCall(started.callId, "Prompt");
91
110
  await new Promise((resolve) => setTimeout(resolve, 0));
92
111
 
93
- const expectedTurnToken = provider.startListeningCalls[0]?.turnToken;
94
- expect(typeof expectedTurnToken).toBe("string");
112
+ const expectedTurnToken = requireTurnToken(provider);
95
113
 
96
114
  manager.processEvent({
97
115
  id: "evt-turn-token-bad",
@@ -125,8 +143,8 @@ describe("CallManager closed-loop turns", () => {
125
143
  expect(turnResult.success).toBe(true);
126
144
  expect(turnResult.transcript).toBe("final answer");
127
145
 
128
- const call = manager.getCall(started.callId);
129
- expect(call?.transcript.map((entry) => entry.text)).toEqual(["Prompt", "final answer"]);
146
+ const call = requireCall(manager, started.callId);
147
+ expect(call.transcript.map((entry) => entry.text)).toEqual(["Prompt", "final answer"]);
130
148
  });
131
149
 
132
150
  it("tracks latency metadata across multiple closed-loop turns", async () => {
@@ -167,14 +185,14 @@ describe("CallManager closed-loop turns", () => {
167
185
 
168
186
  expect(secondResult.success).toBe(true);
169
187
 
170
- const call = manager.getCall(started.callId);
171
- expect(call?.transcript.map((entry) => entry.text)).toEqual([
188
+ const call = requireCall(manager, started.callId);
189
+ expect(call.transcript.map((entry) => entry.text)).toEqual([
172
190
  "First question",
173
191
  "First answer",
174
192
  "Second question",
175
193
  "Second answer",
176
194
  ]);
177
- const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
195
+ const metadata = call.metadata ?? {};
178
196
  expect(metadata.turnCount).toBe(2);
179
197
  expect(typeof metadata.lastTurnLatencyMs).toBe("number");
180
198
  expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
@@ -209,8 +227,8 @@ describe("CallManager closed-loop turns", () => {
209
227
  expect(result.transcript).toBe(`Answer ${i}`);
210
228
  }
211
229
 
212
- const call = manager.getCall(started.callId);
213
- const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
230
+ const call = requireCall(manager, started.callId);
231
+ const metadata = call.metadata ?? {};
214
232
  expect(metadata.turnCount).toBe(5);
215
233
  expect(provider.startListeningCalls).toHaveLength(5);
216
234
  expect(provider.stopListeningCalls).toHaveLength(5);