@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +27 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +943 -0
  6. package/index.ts +379 -149
  7. package/openclaw.plugin.json +384 -157
  8. package/package.json +35 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +273 -12
  17. package/src/config.ts +355 -72
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +243 -19
  24. package/src/manager/events.ts +61 -31
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +528 -0
  28. package/src/manager/outbound.ts +163 -57
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +321 -0
  64. package/src/response-generator.ts +213 -53
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +429 -0
  68. package/src/runtime.ts +270 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +28 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +523 -102
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
@@ -1,5 +1,7 @@
1
1
  import crypto from "node:crypto";
2
- import type { TwilioConfig, WebhookSecurityConfig } from "../config.js";
2
+ import { setTimeout as sleep } from "node:timers/promises";
3
+ import { safeEqualSecret } from "openclaw/plugin-sdk/security-runtime";
4
+ import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
3
5
  import { getHeader } from "../http-headers.js";
4
6
  import type { MediaStreamHandler } from "../media-stream.js";
5
7
  import { chunkAudio } from "../telephony-audio.js";
@@ -13,6 +15,7 @@ import type {
13
15
  NormalizedEvent,
14
16
  PlayTtsInput,
15
17
  ProviderWebhookParseResult,
18
+ SendDtmfInput,
16
19
  StartListeningInput,
17
20
  StopListeningInput,
18
21
  WebhookContext,
@@ -27,9 +30,18 @@ import {
27
30
  normalizeProviderStatus,
28
31
  } from "./shared/call-status.js";
29
32
  import { guardedJsonApiRequest } from "./shared/guarded-json-api.js";
30
- import { twilioApiRequest } from "./twilio/api.js";
33
+ import type { TwilioProviderOptions } from "./twilio.types.js";
34
+ import { TwilioApiError, twilioApiRequest } from "./twilio/api.js";
31
35
  import { decideTwimlResponse, readTwimlRequestView } from "./twilio/twiml-policy.js";
32
36
  import { verifyTwilioProviderWebhook } from "./twilio/webhook.js";
37
+ export type { TwilioProviderOptions } from "./twilio.types.js";
38
+
39
+ const TWILIO_CALL_NOT_IN_PROGRESS_CODE = 21220;
40
+ const TWILIO_CALL_UPDATE_RETRY_DELAYS_MS = [250, 750] as const;
41
+
42
+ function isTwilioCallNotInProgressError(err: unknown): boolean {
43
+ return err instanceof TwilioApiError && err.twilioCode === TWILIO_CALL_NOT_IN_PROGRESS_CODE;
44
+ }
33
45
 
34
46
  function createTwilioRequestDedupeKey(ctx: WebhookContext, verifiedRequestKey?: string): string {
35
47
  if (verifiedRequestKey) {
@@ -41,9 +53,9 @@ function createTwilioRequestDedupeKey(ctx: WebhookContext, verifiedRequestKey?:
41
53
  const callSid = params.get("CallSid") ?? "";
42
54
  const callStatus = params.get("CallStatus") ?? "";
43
55
  const direction = params.get("Direction") ?? "";
44
- const callId = typeof ctx.query?.callId === "string" ? ctx.query.callId.trim() : "";
45
- const flow = typeof ctx.query?.flow === "string" ? ctx.query.flow.trim() : "";
46
- const turnToken = typeof ctx.query?.turnToken === "string" ? ctx.query.turnToken.trim() : "";
56
+ const callId = normalizeOptionalString(ctx.query?.callId) ?? "";
57
+ const flow = normalizeOptionalString(ctx.query?.flow) ?? "";
58
+ const turnToken = normalizeOptionalString(ctx.query?.turnToken) ?? "";
47
59
  return `twilio:fallback:${crypto
48
60
  .createHash("sha256")
49
61
  .update(
@@ -52,27 +64,14 @@ function createTwilioRequestDedupeKey(ctx: WebhookContext, verifiedRequestKey?:
52
64
  .digest("hex")}`;
53
65
  }
54
66
 
55
- /**
56
- * Twilio Voice API provider implementation.
57
- *
58
- * Uses Twilio Programmable Voice API with Media Streams for real-time
59
- * bidirectional audio streaming.
60
- *
61
- * @see https://www.twilio.com/docs/voice
62
- * @see https://www.twilio.com/docs/voice/media-streams
63
- */
64
- export interface TwilioProviderOptions {
65
- /** Allow ngrok free tier compatibility mode (loopback only, less secure) */
66
- allowNgrokFreeTierLoopbackBypass?: boolean;
67
- /** Override public URL for signature verification */
68
- publicUrl?: string;
69
- /** Path for media stream WebSocket (e.g., /voice/stream) */
70
- streamPath?: string;
71
- /** Skip webhook signature verification (development only) */
72
- skipVerification?: boolean;
73
- /** Webhook security options (forwarded headers/allowlist) */
74
- webhookSecurity?: WebhookSecurityConfig;
75
- }
67
+ type StreamSendResult = {
68
+ sent: boolean;
69
+ };
70
+
71
+ type TwilioProviderConfig = {
72
+ accountSid?: string;
73
+ authToken?: string;
74
+ };
76
75
 
77
76
  export class TwilioProvider implements VoiceCallProvider {
78
77
  readonly name = "twilio" as const;
@@ -134,7 +133,7 @@ export class TwilioProvider implements VoiceCallProvider {
134
133
  this.streamAuthTokens.delete(providerCallId);
135
134
  }
136
135
 
137
- constructor(config: TwilioConfig, options: TwilioProviderOptions = {}) {
136
+ constructor(config: TwilioProviderConfig, options: TwilioProviderOptions = {}) {
138
137
  if (!config.accountSid) {
139
138
  throw new Error("Twilio Account SID is required");
140
139
  }
@@ -172,33 +171,47 @@ export class TwilioProvider implements VoiceCallProvider {
172
171
  this.callStreamMap.set(callSid, streamSid);
173
172
  }
174
173
 
175
- unregisterCallStream(callSid: string): void {
174
+ hasRegisteredStream(callSid: string): boolean {
175
+ return this.callStreamMap.has(callSid);
176
+ }
177
+
178
+ unregisterCallStream(callSid: string, streamSid?: string): void {
179
+ const currentStreamSid = this.callStreamMap.get(callSid);
180
+ if (!currentStreamSid) {
181
+ if (!streamSid) {
182
+ this.activeStreamCalls.delete(callSid);
183
+ }
184
+ return;
185
+ }
186
+ if (streamSid && currentStreamSid !== streamSid) {
187
+ return;
188
+ }
176
189
  this.callStreamMap.delete(callSid);
177
190
  this.activeStreamCalls.delete(callSid);
178
191
  }
179
192
 
193
+ isConversationStreamConnectEnabled(): boolean {
194
+ return Boolean(this.mediaStreamHandler && this.getStreamUrl());
195
+ }
196
+
180
197
  isValidStreamToken(callSid: string, token?: string): boolean {
181
198
  const expected = this.streamAuthTokens.get(callSid);
182
199
  if (!expected || !token) {
183
200
  return false;
184
201
  }
185
- if (expected.length !== token.length) {
186
- const dummy = Buffer.from(expected);
187
- crypto.timingSafeEqual(dummy, dummy);
188
- return false;
189
- }
190
- return crypto.timingSafeEqual(Buffer.from(expected), Buffer.from(token));
202
+ return safeEqualSecret(expected, token);
191
203
  }
192
204
 
193
205
  /**
194
206
  * Clear TTS queue for a call (barge-in).
195
207
  * Used when user starts speaking to interrupt current TTS playback.
196
208
  */
197
- clearTtsQueue(callSid: string): void {
209
+ clearTtsQueue(callSid: string, reason = "unspecified"): void {
198
210
  const streamSid = this.callStreamMap.get(callSid);
199
- if (streamSid && this.mediaStreamHandler) {
200
- this.mediaStreamHandler.clearTtsQueue(streamSid);
211
+ if (!streamSid || !this.mediaStreamHandler) {
212
+ return;
201
213
  }
214
+ this.mediaStreamHandler.clearTtsQueue(streamSid, reason);
202
215
  }
203
216
 
204
217
  /**
@@ -219,6 +232,30 @@ export class TwilioProvider implements VoiceCallProvider {
219
232
  });
220
233
  }
221
234
 
235
+ private async updateLiveCallTwiml(
236
+ providerCallId: string,
237
+ twiml: string,
238
+ operation: string,
239
+ ): Promise<void> {
240
+ let retryIndex = 0;
241
+ while (true) {
242
+ try {
243
+ await this.apiRequest(`/Calls/${providerCallId}.json`, { Twiml: twiml });
244
+ return;
245
+ } catch (err) {
246
+ const retryDelayMs = TWILIO_CALL_UPDATE_RETRY_DELAYS_MS[retryIndex];
247
+ if (retryDelayMs === undefined || !isTwilioCallNotInProgressError(err)) {
248
+ throw err;
249
+ }
250
+ retryIndex += 1;
251
+ console.warn(
252
+ `[voice-call] Twilio ${operation} update hit call state race (21220); retrying in ${retryDelayMs}ms`,
253
+ );
254
+ await sleep(retryDelayMs);
255
+ }
256
+ }
257
+ }
258
+
222
259
  /**
223
260
  * Verify Twilio webhook signature using HMAC-SHA1.
224
261
  *
@@ -245,14 +282,8 @@ export class TwilioProvider implements VoiceCallProvider {
245
282
  ): ProviderWebhookParseResult {
246
283
  try {
247
284
  const params = new URLSearchParams(ctx.rawBody);
248
- const callIdFromQuery =
249
- typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
250
- ? ctx.query.callId.trim()
251
- : undefined;
252
- const turnTokenFromQuery =
253
- typeof ctx.query?.turnToken === "string" && ctx.query.turnToken.trim()
254
- ? ctx.query.turnToken.trim()
255
- : undefined;
285
+ const callIdFromQuery = normalizeOptionalString(ctx.query?.callId);
286
+ const turnTokenFromQuery = normalizeOptionalString(ctx.query?.turnToken);
256
287
  const dedupeKey = createTwilioRequestDedupeKey(ctx, options?.verifiedRequestKey);
257
288
  const event = this.normalizeEvent(params, {
258
289
  callIdOverride: callIdFromQuery,
@@ -322,7 +353,7 @@ export class TwilioProvider implements VoiceCallProvider {
322
353
  type: "call.speech",
323
354
  transcript: speechResult,
324
355
  isFinal: true,
325
- confidence: parseFloat(params.get("Confidence") || "0.9"),
356
+ confidence: Number.parseFloat(params.get("Confidence") || "0.9"),
326
357
  };
327
358
  }
328
359
 
@@ -416,6 +447,23 @@ export class TwilioProvider implements VoiceCallProvider {
416
447
  }
417
448
  }
418
449
 
450
+ consumeInitialTwiML(ctx: WebhookContext): string | null {
451
+ const view = readTwimlRequestView(ctx);
452
+ if (!view.callIdFromQuery || view.isStatusCallback) {
453
+ return null;
454
+ }
455
+ const storedTwiml = this.twimlStorage.get(view.callIdFromQuery);
456
+ if (!storedTwiml) {
457
+ return null;
458
+ }
459
+ const kind = this.notifyCalls.has(view.callIdFromQuery) ? "notify" : "pre-connect";
460
+ this.deleteStoredTwiml(view.callIdFromQuery);
461
+ console.log(
462
+ `[voice-call] Twilio initial TwiML consumed for call ${view.callIdFromQuery} (kind=${kind}, callSid=${view.callSid ?? "unknown"})`,
463
+ );
464
+ return storedTwiml;
465
+ }
466
+
419
467
  /**
420
468
  * Get the WebSocket URL for media streaming.
421
469
  * Derives from the public URL origin + stream path.
@@ -489,8 +537,8 @@ export class TwilioProvider implements VoiceCallProvider {
489
537
 
490
538
  /**
491
539
  * Initiate an outbound call via Twilio API.
492
- * If inlineTwiml is provided, uses that directly (for notify mode).
493
- * Otherwise, uses webhook URL for dynamic TwiML.
540
+ * If preConnectTwiml is provided, the first webhook request receives that
541
+ * TwiML before normal dynamic TwiML resumes.
494
542
  */
495
543
  async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
496
544
  const url = new URL(input.webhookUrl);
@@ -501,24 +549,30 @@ export class TwilioProvider implements VoiceCallProvider {
501
549
  statusUrl.searchParams.set("callId", input.callId);
502
550
  statusUrl.searchParams.set("type", "status"); // Differentiate from TwiML requests
503
551
 
504
- // Store TwiML content if provided (for notify mode)
505
- // We now serve it from the webhook endpoint instead of sending inline
506
- if (input.inlineTwiml) {
507
- this.twimlStorage.set(input.callId, input.inlineTwiml);
508
- this.notifyCalls.add(input.callId);
552
+ if (!input.inlineTwiml && input.preConnectTwiml) {
553
+ this.twimlStorage.set(input.callId, input.preConnectTwiml);
554
+ console.log(
555
+ `[voice-call] Stored Twilio initial TwiML for call ${input.callId} (kind=pre-connect)`,
556
+ );
509
557
  }
510
558
 
511
- // Build request params - always use URL-based TwiML.
512
- // Twilio silently ignores `StatusCallback` when using the inline `Twiml` parameter.
513
559
  const params: Record<string, string | string[]> = {
514
560
  To: input.to,
515
561
  From: input.from,
516
- Url: url.toString(), // TwiML serving endpoint
517
- StatusCallback: statusUrl.toString(), // Separate status callback endpoint
562
+ StatusCallback: statusUrl.toString(),
518
563
  StatusCallbackEvent: ["initiated", "ringing", "answered", "completed"],
519
564
  Timeout: "30",
520
565
  };
521
566
 
567
+ if (input.inlineTwiml) {
568
+ params.Twiml = input.inlineTwiml;
569
+ console.log(
570
+ `[voice-call] Sending direct Twilio initial TwiML for call ${input.callId} (kind=notify)`,
571
+ );
572
+ } else {
573
+ params.Url = url.toString();
574
+ }
575
+
522
576
  const result = await this.apiRequest<TwilioCallResponse>("/Calls.json", params);
523
577
 
524
578
  this.callWebhookUrls.set(result.sid, url.toString());
@@ -550,28 +604,32 @@ export class TwilioProvider implements VoiceCallProvider {
550
604
  * Play TTS audio via Twilio.
551
605
  *
552
606
  * Two modes:
553
- * 1. Core TTS + Media Streams: If TTS provider and media stream are available,
554
- * generates audio via core TTS and streams it through WebSocket (preferred).
555
- * 2. TwiML <Say>: Falls back to Twilio's native TTS with Polly voices.
556
- * Note: This may not work on all Twilio accounts.
607
+ * 1. Core TTS + Media Streams: when an active stream exists, stream playback is required.
608
+ * If telephony TTS is unavailable in that state, playback fails rather than mixing paths.
609
+ * 2. TwiML <Say>: fallback only when there is no active stream for the call.
557
610
  */
558
611
  async playTts(input: PlayTtsInput): Promise<void> {
559
- // Try telephony TTS via media stream first (if configured)
560
612
  const streamSid = this.callStreamMap.get(input.providerCallId);
561
- if (this.ttsProvider && this.mediaStreamHandler && streamSid) {
613
+ if (streamSid) {
614
+ if (!this.ttsProvider || !this.mediaStreamHandler) {
615
+ throw new Error(
616
+ "Telephony TTS unavailable while media stream is active; refusing TwiML fallback",
617
+ );
618
+ }
619
+
562
620
  try {
563
621
  await this.playTtsViaStream(input.text, streamSid);
564
622
  return;
565
623
  } catch (err) {
566
624
  console.warn(
567
- `[voice-call] Telephony TTS failed, falling back to Twilio <Say>:`,
625
+ `[voice-call] Telephony TTS failed:`,
568
626
  err instanceof Error ? err.message : err,
569
627
  );
570
- // Fall through to TwiML <Say> fallback
628
+ throw err instanceof Error ? err : new Error(String(err));
571
629
  }
572
630
  }
573
631
 
574
- // Fall back to TwiML <Say> (may not work on all accounts)
632
+ // Fall back to TwiML <Say> only when no active stream exists.
575
633
  const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
576
634
  if (!webhookUrl) {
577
635
  throw new Error("Missing webhook URL for this call (provider state not initialized)");
@@ -590,9 +648,22 @@ export class TwilioProvider implements VoiceCallProvider {
590
648
  </Gather>
591
649
  </Response>`;
592
650
 
593
- await this.apiRequest(`/Calls/${input.providerCallId}.json`, {
594
- Twiml: twiml,
595
- });
651
+ await this.updateLiveCallTwiml(input.providerCallId, twiml, "playTts");
652
+ }
653
+
654
+ async sendDtmf(input: SendDtmfInput): Promise<void> {
655
+ const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
656
+ if (!webhookUrl) {
657
+ throw new Error("Missing webhook URL for this call (provider state not initialized)");
658
+ }
659
+
660
+ const twiml = `<?xml version="1.0" encoding="UTF-8"?>
661
+ <Response>
662
+ <Play digits="${escapeXml(input.digits)}" />
663
+ <Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
664
+ </Response>`;
665
+
666
+ await this.updateLiveCallTwiml(input.providerCallId, twiml, "sendDtmf");
596
667
  }
597
668
 
598
669
  /**
@@ -608,28 +679,112 @@ export class TwilioProvider implements VoiceCallProvider {
608
679
  // Stream audio in 20ms chunks (160 bytes at 8kHz mu-law)
609
680
  const CHUNK_SIZE = 160;
610
681
  const CHUNK_DELAY_MS = 20;
682
+ const SILENCE_CHUNK = Buffer.alloc(CHUNK_SIZE, 0xff);
611
683
 
612
684
  const handler = this.mediaStreamHandler;
613
685
  const ttsProvider = this.ttsProvider;
686
+
687
+ const normalizeSendResult = (raw: unknown): StreamSendResult => {
688
+ if (!raw || typeof raw !== "object") {
689
+ return { sent: true };
690
+ }
691
+ const typed = raw as {
692
+ sent?: unknown;
693
+ };
694
+ return {
695
+ sent: typed.sent === undefined ? true : Boolean(typed.sent),
696
+ };
697
+ };
698
+
699
+ const sendAudioChunk = (audio: Buffer): StreamSendResult => {
700
+ const raw = (handler as { sendAudio: (sid: string, chunk: Buffer) => unknown }).sendAudio(
701
+ streamSid,
702
+ audio,
703
+ );
704
+ return normalizeSendResult(raw);
705
+ };
706
+
707
+ const sendPlaybackMark = (name: string): StreamSendResult => {
708
+ const raw = (handler as { sendMark: (sid: string, markName: string) => unknown }).sendMark(
709
+ streamSid,
710
+ name,
711
+ );
712
+ return normalizeSendResult(raw);
713
+ };
714
+
614
715
  await handler.queueTts(streamSid, async (signal) => {
716
+ const sendKeepAlive = () => {
717
+ sendAudioChunk(SILENCE_CHUNK);
718
+ };
719
+ sendKeepAlive();
720
+ const keepAlive = setInterval(() => {
721
+ if (!signal.aborted) {
722
+ sendKeepAlive();
723
+ }
724
+ }, CHUNK_DELAY_MS);
725
+
615
726
  // Generate audio with core TTS (returns mu-law at 8kHz)
616
- const muLawAudio = await ttsProvider.synthesizeForTelephony(text);
727
+ let muLawAudio: Buffer;
728
+ let synthTimeout: ReturnType<typeof setTimeout> | null = null;
729
+ const synthTimeoutMs = ttsProvider.synthesisTimeoutMs;
730
+ try {
731
+ const synthPromise = ttsProvider.synthesizeForTelephony(text);
732
+ const timeoutPromise = new Promise<Buffer>((_, reject) => {
733
+ synthTimeout = setTimeout(() => {
734
+ reject(new Error(`Telephony TTS synthesis timed out after ${synthTimeoutMs}ms`));
735
+ }, synthTimeoutMs);
736
+ });
737
+ muLawAudio = await Promise.race([synthPromise, timeoutPromise]);
738
+ } finally {
739
+ if (synthTimeout) {
740
+ clearTimeout(synthTimeout);
741
+ }
742
+ clearInterval(keepAlive);
743
+ }
744
+
745
+ if (muLawAudio.length === 0) {
746
+ throw new Error("Telephony TTS produced no audio");
747
+ }
748
+
749
+ let chunkAttempts = 0;
750
+ let chunkDelivered = 0;
751
+ let nextChunkDueAt = Date.now() + CHUNK_DELAY_MS;
617
752
  for (const chunk of chunkAudio(muLawAudio, CHUNK_SIZE)) {
618
753
  if (signal.aborted) {
619
754
  break;
620
755
  }
621
- handler.sendAudio(streamSid, chunk);
756
+ chunkAttempts += 1;
757
+ const chunkResult = sendAudioChunk(chunk);
758
+ if (chunkResult.sent) {
759
+ chunkDelivered += 1;
760
+ }
622
761
 
623
- // Pace the audio to match real-time playback
624
- await new Promise((resolve) => setTimeout(resolve, CHUNK_DELAY_MS));
762
+ // Drift-corrected pacing: schedule against an absolute clock to avoid cumulative delay.
763
+ const waitMs = nextChunkDueAt - Date.now();
764
+ if (waitMs > 0) {
765
+ await new Promise((resolve) => setTimeout(resolve, Math.ceil(waitMs)));
766
+ }
767
+ nextChunkDueAt += CHUNK_DELAY_MS;
625
768
  if (signal.aborted) {
626
769
  break;
627
770
  }
628
771
  }
629
772
 
773
+ let markSent = true;
630
774
  if (!signal.aborted) {
631
775
  // Send a mark to track when audio finishes
632
- handler.sendMark(streamSid, `tts-${Date.now()}`);
776
+ markSent = sendPlaybackMark(`tts-${Date.now()}`).sent;
777
+ }
778
+
779
+ if (!signal.aborted && chunkAttempts > 0 && (chunkDelivered === 0 || !markSent)) {
780
+ const failures: string[] = [];
781
+ if (chunkDelivered === 0) {
782
+ failures.push("no audio chunks delivered");
783
+ }
784
+ if (!markSent) {
785
+ failures.push("completion mark not delivered");
786
+ }
787
+ throw new Error(`Telephony stream playback failed: ${failures.join("; ")}`);
633
788
  }
634
789
  });
635
790
  }
@@ -654,9 +809,7 @@ export class TwilioProvider implements VoiceCallProvider {
654
809
  </Gather>
655
810
  </Response>`;
656
811
 
657
- await this.apiRequest(`/Calls/${input.providerCallId}.json`, {
658
- Twiml: twiml,
659
- });
812
+ await this.updateLiveCallTwiml(input.providerCallId, twiml, "startListening");
660
813
  }
661
814
 
662
815
  /**
@@ -0,0 +1,17 @@
1
+ import type { WebhookSecurityConfig } from "../config.js";
2
+
3
+ /**
4
+ * Twilio Voice API provider options.
5
+ */
6
+ export interface TwilioProviderOptions {
7
+ /** Allow ngrok free tier compatibility mode (loopback only, less secure) */
8
+ allowNgrokFreeTierLoopbackBypass?: boolean;
9
+ /** Override public URL for signature verification */
10
+ publicUrl?: string;
11
+ /** Path for media stream WebSocket (e.g., /voice/stream) */
12
+ streamPath?: string;
13
+ /** Skip webhook signature verification (development only) */
14
+ skipVerification?: boolean;
15
+ /** Webhook security options (forwarded headers/allowlist) */
16
+ webhookSecurity?: WebhookSecurityConfig;
17
+ }
@@ -0,0 +1,3 @@
1
+ import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice";
2
+
3
+ export const DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS = `You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`;
@@ -0,0 +1,88 @@
1
+ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
2
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
3
+ import type { VoiceCallRealtimeFastContextConfig } from "./config.js";
4
+
5
+ const mocks = vi.hoisted(() => ({
6
+ getActiveMemorySearchManager: vi.fn(),
7
+ }));
8
+
9
+ vi.mock("openclaw/plugin-sdk/memory-host-search", () => ({
10
+ getActiveMemorySearchManager: mocks.getActiveMemorySearchManager,
11
+ }));
12
+
13
+ import { resolveRealtimeFastContextConsult } from "./realtime-fast-context.js";
14
+
15
+ const cfg = {} as OpenClawConfig;
16
+
17
+ function createFastContextConfig(
18
+ overrides: Partial<VoiceCallRealtimeFastContextConfig> = {},
19
+ ): VoiceCallRealtimeFastContextConfig {
20
+ return {
21
+ enabled: true,
22
+ timeoutMs: 800,
23
+ maxResults: 3,
24
+ sources: ["memory", "sessions"],
25
+ fallbackToConsult: false,
26
+ ...overrides,
27
+ };
28
+ }
29
+
30
+ function createLogger() {
31
+ return {
32
+ debug: vi.fn(),
33
+ warn: vi.fn(),
34
+ };
35
+ }
36
+
37
+ describe("resolveRealtimeFastContextConsult", () => {
38
+ beforeEach(() => {
39
+ mocks.getActiveMemorySearchManager.mockReset();
40
+ });
41
+
42
+ afterEach(() => {
43
+ vi.useRealTimers();
44
+ });
45
+
46
+ it("falls back to the full consult when memory manager setup fails", async () => {
47
+ const logger = createLogger();
48
+ mocks.getActiveMemorySearchManager.mockRejectedValue(new Error("memory misconfigured"));
49
+
50
+ await expect(
51
+ resolveRealtimeFastContextConsult({
52
+ cfg,
53
+ agentId: "main",
54
+ sessionKey: "voice:15550001234",
55
+ config: createFastContextConfig({ fallbackToConsult: true }),
56
+ args: { question: "What do you remember?" },
57
+ logger,
58
+ }),
59
+ ).resolves.toEqual({ handled: false });
60
+
61
+ expect(logger.debug).toHaveBeenCalledWith(expect.stringContaining("memory misconfigured"));
62
+ });
63
+
64
+ it("returns a bounded miss when memory manager setup exceeds the fast context timeout", async () => {
65
+ vi.useFakeTimers();
66
+ const logger = createLogger();
67
+ mocks.getActiveMemorySearchManager.mockReturnValue(new Promise(() => {}));
68
+
69
+ const resultPromise = resolveRealtimeFastContextConsult({
70
+ cfg,
71
+ agentId: "main",
72
+ sessionKey: "voice:15550001234",
73
+ config: createFastContextConfig({ fallbackToConsult: false, timeoutMs: 25 }),
74
+ args: { question: "What do you remember?" },
75
+ logger,
76
+ });
77
+
78
+ await vi.advanceTimersByTimeAsync(25);
79
+
80
+ await expect(resultPromise).resolves.toEqual({
81
+ handled: true,
82
+ result: {
83
+ text: expect.stringContaining("No relevant OpenClaw memory or session context"),
84
+ },
85
+ });
86
+ expect(logger.debug).toHaveBeenCalledWith(expect.stringContaining("timed out after 25ms"));
87
+ });
88
+ });