@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/api.ts +16 -0
- package/cli-metadata.ts +10 -0
- package/config-api.ts +12 -0
- package/index.test.ts +943 -0
- package/index.ts +379 -149
- package/openclaw.plugin.json +384 -157
- package/package.json +35 -5
- package/runtime-api.ts +20 -0
- package/runtime-entry.ts +1 -0
- package/setup-api.ts +47 -0
- package/src/allowlist.test.ts +18 -0
- package/src/cli.ts +533 -68
- package/src/config-compat.test.ts +120 -0
- package/src/config-compat.ts +227 -0
- package/src/config.test.ts +273 -12
- package/src/config.ts +355 -72
- package/src/core-bridge.ts +2 -147
- package/src/deep-merge.test.ts +40 -0
- package/src/gateway-continue-operation.ts +200 -0
- package/src/http-headers.ts +6 -3
- package/src/manager/context.ts +6 -5
- package/src/manager/events.test.ts +243 -19
- package/src/manager/events.ts +61 -31
- package/src/manager/lifecycle.ts +53 -0
- package/src/manager/lookup.test.ts +52 -0
- package/src/manager/outbound.test.ts +528 -0
- package/src/manager/outbound.ts +163 -57
- package/src/manager/store.ts +18 -6
- package/src/manager/timers.test.ts +129 -0
- package/src/manager/timers.ts +4 -3
- package/src/manager/twiml.test.ts +13 -0
- package/src/manager/twiml.ts +8 -0
- package/src/manager.closed-loop.test.ts +30 -12
- package/src/manager.inbound-allowlist.test.ts +77 -10
- package/src/manager.notify.test.ts +344 -20
- package/src/manager.restore.test.ts +95 -8
- package/src/manager.test-harness.ts +8 -6
- package/src/manager.ts +79 -5
- package/src/media-stream.test.ts +578 -81
- package/src/media-stream.ts +235 -54
- package/src/providers/base.ts +19 -0
- package/src/providers/mock.ts +7 -1
- package/src/providers/plivo.test.ts +50 -6
- package/src/providers/plivo.ts +14 -6
- package/src/providers/shared/call-status.ts +2 -1
- package/src/providers/shared/guarded-json-api.test.ts +106 -0
- package/src/providers/shared/guarded-json-api.ts +1 -1
- package/src/providers/telnyx.test.ts +178 -6
- package/src/providers/telnyx.ts +40 -3
- package/src/providers/twilio/api.test.ts +145 -0
- package/src/providers/twilio/api.ts +67 -16
- package/src/providers/twilio/twiml-policy.ts +6 -10
- package/src/providers/twilio/webhook.ts +1 -1
- package/src/providers/twilio.test.ts +425 -25
- package/src/providers/twilio.ts +230 -77
- package/src/providers/twilio.types.ts +17 -0
- package/src/realtime-defaults.ts +3 -0
- package/src/realtime-fast-context.test.ts +88 -0
- package/src/realtime-fast-context.ts +165 -0
- package/src/realtime-transcription.runtime.ts +4 -0
- package/src/realtime-voice.runtime.ts +5 -0
- package/src/response-generator.test.ts +321 -0
- package/src/response-generator.ts +213 -53
- package/src/response-model.test.ts +71 -0
- package/src/response-model.ts +23 -0
- package/src/runtime.test.ts +429 -0
- package/src/runtime.ts +270 -24
- package/src/telephony-audio.test.ts +61 -0
- package/src/telephony-audio.ts +1 -79
- package/src/telephony-tts.test.ts +133 -12
- package/src/telephony-tts.ts +155 -2
- package/src/test-fixtures.ts +28 -7
- package/src/tts-provider-voice.test.ts +34 -0
- package/src/tts-provider-voice.ts +21 -0
- package/src/tunnel.test.ts +166 -0
- package/src/tunnel.ts +1 -1
- package/src/types.ts +24 -37
- package/src/utils.test.ts +17 -0
- package/src/voice-mapping.test.ts +34 -0
- package/src/voice-mapping.ts +3 -2
- package/src/webhook/realtime-handler.test.ts +598 -0
- package/src/webhook/realtime-handler.ts +485 -0
- package/src/webhook/stale-call-reaper.test.ts +88 -0
- package/src/webhook/stale-call-reaper.ts +5 -0
- package/src/webhook/tailscale.test.ts +214 -0
- package/src/webhook/tailscale.ts +19 -5
- package/src/webhook-exposure.test.ts +33 -0
- package/src/webhook-exposure.ts +84 -0
- package/src/webhook-security.test.ts +172 -21
- package/src/webhook-security.ts +43 -29
- package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
- package/src/webhook.test.ts +1145 -27
- package/src/webhook.ts +523 -102
- package/src/webhook.types.ts +5 -0
- package/src/websocket-test-support.ts +72 -0
- package/tsconfig.json +16 -0
- package/CHANGELOG.md +0 -121
- package/src/providers/index.ts +0 -10
- package/src/providers/stt-openai-realtime.test.ts +0 -42
- package/src/providers/stt-openai-realtime.ts +0 -311
- package/src/providers/tts-openai.test.ts +0 -43
- package/src/providers/tts-openai.ts +0 -221
package/src/providers/twilio.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
|
-
import
|
|
2
|
+
import { setTimeout as sleep } from "node:timers/promises";
|
|
3
|
+
import { safeEqualSecret } from "openclaw/plugin-sdk/security-runtime";
|
|
4
|
+
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
|
3
5
|
import { getHeader } from "../http-headers.js";
|
|
4
6
|
import type { MediaStreamHandler } from "../media-stream.js";
|
|
5
7
|
import { chunkAudio } from "../telephony-audio.js";
|
|
@@ -13,6 +15,7 @@ import type {
|
|
|
13
15
|
NormalizedEvent,
|
|
14
16
|
PlayTtsInput,
|
|
15
17
|
ProviderWebhookParseResult,
|
|
18
|
+
SendDtmfInput,
|
|
16
19
|
StartListeningInput,
|
|
17
20
|
StopListeningInput,
|
|
18
21
|
WebhookContext,
|
|
@@ -27,9 +30,18 @@ import {
|
|
|
27
30
|
normalizeProviderStatus,
|
|
28
31
|
} from "./shared/call-status.js";
|
|
29
32
|
import { guardedJsonApiRequest } from "./shared/guarded-json-api.js";
|
|
30
|
-
import {
|
|
33
|
+
import type { TwilioProviderOptions } from "./twilio.types.js";
|
|
34
|
+
import { TwilioApiError, twilioApiRequest } from "./twilio/api.js";
|
|
31
35
|
import { decideTwimlResponse, readTwimlRequestView } from "./twilio/twiml-policy.js";
|
|
32
36
|
import { verifyTwilioProviderWebhook } from "./twilio/webhook.js";
|
|
37
|
+
export type { TwilioProviderOptions } from "./twilio.types.js";
|
|
38
|
+
|
|
39
|
+
const TWILIO_CALL_NOT_IN_PROGRESS_CODE = 21220;
|
|
40
|
+
const TWILIO_CALL_UPDATE_RETRY_DELAYS_MS = [250, 750] as const;
|
|
41
|
+
|
|
42
|
+
function isTwilioCallNotInProgressError(err: unknown): boolean {
|
|
43
|
+
return err instanceof TwilioApiError && err.twilioCode === TWILIO_CALL_NOT_IN_PROGRESS_CODE;
|
|
44
|
+
}
|
|
33
45
|
|
|
34
46
|
function createTwilioRequestDedupeKey(ctx: WebhookContext, verifiedRequestKey?: string): string {
|
|
35
47
|
if (verifiedRequestKey) {
|
|
@@ -41,9 +53,9 @@ function createTwilioRequestDedupeKey(ctx: WebhookContext, verifiedRequestKey?:
|
|
|
41
53
|
const callSid = params.get("CallSid") ?? "";
|
|
42
54
|
const callStatus = params.get("CallStatus") ?? "";
|
|
43
55
|
const direction = params.get("Direction") ?? "";
|
|
44
|
-
const callId =
|
|
45
|
-
const flow =
|
|
46
|
-
const turnToken =
|
|
56
|
+
const callId = normalizeOptionalString(ctx.query?.callId) ?? "";
|
|
57
|
+
const flow = normalizeOptionalString(ctx.query?.flow) ?? "";
|
|
58
|
+
const turnToken = normalizeOptionalString(ctx.query?.turnToken) ?? "";
|
|
47
59
|
return `twilio:fallback:${crypto
|
|
48
60
|
.createHash("sha256")
|
|
49
61
|
.update(
|
|
@@ -52,27 +64,14 @@ function createTwilioRequestDedupeKey(ctx: WebhookContext, verifiedRequestKey?:
|
|
|
52
64
|
.digest("hex")}`;
|
|
53
65
|
}
|
|
54
66
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
*/
|
|
64
|
-
export interface TwilioProviderOptions {
|
|
65
|
-
/** Allow ngrok free tier compatibility mode (loopback only, less secure) */
|
|
66
|
-
allowNgrokFreeTierLoopbackBypass?: boolean;
|
|
67
|
-
/** Override public URL for signature verification */
|
|
68
|
-
publicUrl?: string;
|
|
69
|
-
/** Path for media stream WebSocket (e.g., /voice/stream) */
|
|
70
|
-
streamPath?: string;
|
|
71
|
-
/** Skip webhook signature verification (development only) */
|
|
72
|
-
skipVerification?: boolean;
|
|
73
|
-
/** Webhook security options (forwarded headers/allowlist) */
|
|
74
|
-
webhookSecurity?: WebhookSecurityConfig;
|
|
75
|
-
}
|
|
67
|
+
type StreamSendResult = {
|
|
68
|
+
sent: boolean;
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
type TwilioProviderConfig = {
|
|
72
|
+
accountSid?: string;
|
|
73
|
+
authToken?: string;
|
|
74
|
+
};
|
|
76
75
|
|
|
77
76
|
export class TwilioProvider implements VoiceCallProvider {
|
|
78
77
|
readonly name = "twilio" as const;
|
|
@@ -134,7 +133,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
134
133
|
this.streamAuthTokens.delete(providerCallId);
|
|
135
134
|
}
|
|
136
135
|
|
|
137
|
-
constructor(config:
|
|
136
|
+
constructor(config: TwilioProviderConfig, options: TwilioProviderOptions = {}) {
|
|
138
137
|
if (!config.accountSid) {
|
|
139
138
|
throw new Error("Twilio Account SID is required");
|
|
140
139
|
}
|
|
@@ -172,33 +171,47 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
172
171
|
this.callStreamMap.set(callSid, streamSid);
|
|
173
172
|
}
|
|
174
173
|
|
|
175
|
-
|
|
174
|
+
hasRegisteredStream(callSid: string): boolean {
|
|
175
|
+
return this.callStreamMap.has(callSid);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
unregisterCallStream(callSid: string, streamSid?: string): void {
|
|
179
|
+
const currentStreamSid = this.callStreamMap.get(callSid);
|
|
180
|
+
if (!currentStreamSid) {
|
|
181
|
+
if (!streamSid) {
|
|
182
|
+
this.activeStreamCalls.delete(callSid);
|
|
183
|
+
}
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
if (streamSid && currentStreamSid !== streamSid) {
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
176
189
|
this.callStreamMap.delete(callSid);
|
|
177
190
|
this.activeStreamCalls.delete(callSid);
|
|
178
191
|
}
|
|
179
192
|
|
|
193
|
+
isConversationStreamConnectEnabled(): boolean {
|
|
194
|
+
return Boolean(this.mediaStreamHandler && this.getStreamUrl());
|
|
195
|
+
}
|
|
196
|
+
|
|
180
197
|
isValidStreamToken(callSid: string, token?: string): boolean {
|
|
181
198
|
const expected = this.streamAuthTokens.get(callSid);
|
|
182
199
|
if (!expected || !token) {
|
|
183
200
|
return false;
|
|
184
201
|
}
|
|
185
|
-
|
|
186
|
-
const dummy = Buffer.from(expected);
|
|
187
|
-
crypto.timingSafeEqual(dummy, dummy);
|
|
188
|
-
return false;
|
|
189
|
-
}
|
|
190
|
-
return crypto.timingSafeEqual(Buffer.from(expected), Buffer.from(token));
|
|
202
|
+
return safeEqualSecret(expected, token);
|
|
191
203
|
}
|
|
192
204
|
|
|
193
205
|
/**
|
|
194
206
|
* Clear TTS queue for a call (barge-in).
|
|
195
207
|
* Used when user starts speaking to interrupt current TTS playback.
|
|
196
208
|
*/
|
|
197
|
-
clearTtsQueue(callSid: string): void {
|
|
209
|
+
clearTtsQueue(callSid: string, reason = "unspecified"): void {
|
|
198
210
|
const streamSid = this.callStreamMap.get(callSid);
|
|
199
|
-
if (streamSid
|
|
200
|
-
|
|
211
|
+
if (!streamSid || !this.mediaStreamHandler) {
|
|
212
|
+
return;
|
|
201
213
|
}
|
|
214
|
+
this.mediaStreamHandler.clearTtsQueue(streamSid, reason);
|
|
202
215
|
}
|
|
203
216
|
|
|
204
217
|
/**
|
|
@@ -219,6 +232,30 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
219
232
|
});
|
|
220
233
|
}
|
|
221
234
|
|
|
235
|
+
private async updateLiveCallTwiml(
|
|
236
|
+
providerCallId: string,
|
|
237
|
+
twiml: string,
|
|
238
|
+
operation: string,
|
|
239
|
+
): Promise<void> {
|
|
240
|
+
let retryIndex = 0;
|
|
241
|
+
while (true) {
|
|
242
|
+
try {
|
|
243
|
+
await this.apiRequest(`/Calls/${providerCallId}.json`, { Twiml: twiml });
|
|
244
|
+
return;
|
|
245
|
+
} catch (err) {
|
|
246
|
+
const retryDelayMs = TWILIO_CALL_UPDATE_RETRY_DELAYS_MS[retryIndex];
|
|
247
|
+
if (retryDelayMs === undefined || !isTwilioCallNotInProgressError(err)) {
|
|
248
|
+
throw err;
|
|
249
|
+
}
|
|
250
|
+
retryIndex += 1;
|
|
251
|
+
console.warn(
|
|
252
|
+
`[voice-call] Twilio ${operation} update hit call state race (21220); retrying in ${retryDelayMs}ms`,
|
|
253
|
+
);
|
|
254
|
+
await sleep(retryDelayMs);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
222
259
|
/**
|
|
223
260
|
* Verify Twilio webhook signature using HMAC-SHA1.
|
|
224
261
|
*
|
|
@@ -245,14 +282,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
245
282
|
): ProviderWebhookParseResult {
|
|
246
283
|
try {
|
|
247
284
|
const params = new URLSearchParams(ctx.rawBody);
|
|
248
|
-
const callIdFromQuery =
|
|
249
|
-
|
|
250
|
-
? ctx.query.callId.trim()
|
|
251
|
-
: undefined;
|
|
252
|
-
const turnTokenFromQuery =
|
|
253
|
-
typeof ctx.query?.turnToken === "string" && ctx.query.turnToken.trim()
|
|
254
|
-
? ctx.query.turnToken.trim()
|
|
255
|
-
: undefined;
|
|
285
|
+
const callIdFromQuery = normalizeOptionalString(ctx.query?.callId);
|
|
286
|
+
const turnTokenFromQuery = normalizeOptionalString(ctx.query?.turnToken);
|
|
256
287
|
const dedupeKey = createTwilioRequestDedupeKey(ctx, options?.verifiedRequestKey);
|
|
257
288
|
const event = this.normalizeEvent(params, {
|
|
258
289
|
callIdOverride: callIdFromQuery,
|
|
@@ -322,7 +353,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
322
353
|
type: "call.speech",
|
|
323
354
|
transcript: speechResult,
|
|
324
355
|
isFinal: true,
|
|
325
|
-
confidence: parseFloat(params.get("Confidence") || "0.9"),
|
|
356
|
+
confidence: Number.parseFloat(params.get("Confidence") || "0.9"),
|
|
326
357
|
};
|
|
327
358
|
}
|
|
328
359
|
|
|
@@ -416,6 +447,23 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
416
447
|
}
|
|
417
448
|
}
|
|
418
449
|
|
|
450
|
+
consumeInitialTwiML(ctx: WebhookContext): string | null {
|
|
451
|
+
const view = readTwimlRequestView(ctx);
|
|
452
|
+
if (!view.callIdFromQuery || view.isStatusCallback) {
|
|
453
|
+
return null;
|
|
454
|
+
}
|
|
455
|
+
const storedTwiml = this.twimlStorage.get(view.callIdFromQuery);
|
|
456
|
+
if (!storedTwiml) {
|
|
457
|
+
return null;
|
|
458
|
+
}
|
|
459
|
+
const kind = this.notifyCalls.has(view.callIdFromQuery) ? "notify" : "pre-connect";
|
|
460
|
+
this.deleteStoredTwiml(view.callIdFromQuery);
|
|
461
|
+
console.log(
|
|
462
|
+
`[voice-call] Twilio initial TwiML consumed for call ${view.callIdFromQuery} (kind=${kind}, callSid=${view.callSid ?? "unknown"})`,
|
|
463
|
+
);
|
|
464
|
+
return storedTwiml;
|
|
465
|
+
}
|
|
466
|
+
|
|
419
467
|
/**
|
|
420
468
|
* Get the WebSocket URL for media streaming.
|
|
421
469
|
* Derives from the public URL origin + stream path.
|
|
@@ -489,8 +537,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
489
537
|
|
|
490
538
|
/**
|
|
491
539
|
* Initiate an outbound call via Twilio API.
|
|
492
|
-
* If
|
|
493
|
-
*
|
|
540
|
+
* If preConnectTwiml is provided, the first webhook request receives that
|
|
541
|
+
* TwiML before normal dynamic TwiML resumes.
|
|
494
542
|
*/
|
|
495
543
|
async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
|
|
496
544
|
const url = new URL(input.webhookUrl);
|
|
@@ -501,24 +549,30 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
501
549
|
statusUrl.searchParams.set("callId", input.callId);
|
|
502
550
|
statusUrl.searchParams.set("type", "status"); // Differentiate from TwiML requests
|
|
503
551
|
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
552
|
+
if (!input.inlineTwiml && input.preConnectTwiml) {
|
|
553
|
+
this.twimlStorage.set(input.callId, input.preConnectTwiml);
|
|
554
|
+
console.log(
|
|
555
|
+
`[voice-call] Stored Twilio initial TwiML for call ${input.callId} (kind=pre-connect)`,
|
|
556
|
+
);
|
|
509
557
|
}
|
|
510
558
|
|
|
511
|
-
// Build request params - always use URL-based TwiML.
|
|
512
|
-
// Twilio silently ignores `StatusCallback` when using the inline `Twiml` parameter.
|
|
513
559
|
const params: Record<string, string | string[]> = {
|
|
514
560
|
To: input.to,
|
|
515
561
|
From: input.from,
|
|
516
|
-
|
|
517
|
-
StatusCallback: statusUrl.toString(), // Separate status callback endpoint
|
|
562
|
+
StatusCallback: statusUrl.toString(),
|
|
518
563
|
StatusCallbackEvent: ["initiated", "ringing", "answered", "completed"],
|
|
519
564
|
Timeout: "30",
|
|
520
565
|
};
|
|
521
566
|
|
|
567
|
+
if (input.inlineTwiml) {
|
|
568
|
+
params.Twiml = input.inlineTwiml;
|
|
569
|
+
console.log(
|
|
570
|
+
`[voice-call] Sending direct Twilio initial TwiML for call ${input.callId} (kind=notify)`,
|
|
571
|
+
);
|
|
572
|
+
} else {
|
|
573
|
+
params.Url = url.toString();
|
|
574
|
+
}
|
|
575
|
+
|
|
522
576
|
const result = await this.apiRequest<TwilioCallResponse>("/Calls.json", params);
|
|
523
577
|
|
|
524
578
|
this.callWebhookUrls.set(result.sid, url.toString());
|
|
@@ -550,28 +604,32 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
550
604
|
* Play TTS audio via Twilio.
|
|
551
605
|
*
|
|
552
606
|
* Two modes:
|
|
553
|
-
* 1. Core TTS + Media Streams:
|
|
554
|
-
*
|
|
555
|
-
* 2. TwiML <Say>:
|
|
556
|
-
* Note: This may not work on all Twilio accounts.
|
|
607
|
+
* 1. Core TTS + Media Streams: when an active stream exists, stream playback is required.
|
|
608
|
+
* If telephony TTS is unavailable in that state, playback fails rather than mixing paths.
|
|
609
|
+
* 2. TwiML <Say>: fallback only when there is no active stream for the call.
|
|
557
610
|
*/
|
|
558
611
|
async playTts(input: PlayTtsInput): Promise<void> {
|
|
559
|
-
// Try telephony TTS via media stream first (if configured)
|
|
560
612
|
const streamSid = this.callStreamMap.get(input.providerCallId);
|
|
561
|
-
if (
|
|
613
|
+
if (streamSid) {
|
|
614
|
+
if (!this.ttsProvider || !this.mediaStreamHandler) {
|
|
615
|
+
throw new Error(
|
|
616
|
+
"Telephony TTS unavailable while media stream is active; refusing TwiML fallback",
|
|
617
|
+
);
|
|
618
|
+
}
|
|
619
|
+
|
|
562
620
|
try {
|
|
563
621
|
await this.playTtsViaStream(input.text, streamSid);
|
|
564
622
|
return;
|
|
565
623
|
} catch (err) {
|
|
566
624
|
console.warn(
|
|
567
|
-
`[voice-call] Telephony TTS failed
|
|
625
|
+
`[voice-call] Telephony TTS failed:`,
|
|
568
626
|
err instanceof Error ? err.message : err,
|
|
569
627
|
);
|
|
570
|
-
|
|
628
|
+
throw err instanceof Error ? err : new Error(String(err));
|
|
571
629
|
}
|
|
572
630
|
}
|
|
573
631
|
|
|
574
|
-
// Fall back to TwiML <Say>
|
|
632
|
+
// Fall back to TwiML <Say> only when no active stream exists.
|
|
575
633
|
const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
|
|
576
634
|
if (!webhookUrl) {
|
|
577
635
|
throw new Error("Missing webhook URL for this call (provider state not initialized)");
|
|
@@ -590,9 +648,22 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
590
648
|
</Gather>
|
|
591
649
|
</Response>`;
|
|
592
650
|
|
|
593
|
-
await this.
|
|
594
|
-
|
|
595
|
-
|
|
651
|
+
await this.updateLiveCallTwiml(input.providerCallId, twiml, "playTts");
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
async sendDtmf(input: SendDtmfInput): Promise<void> {
|
|
655
|
+
const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
|
|
656
|
+
if (!webhookUrl) {
|
|
657
|
+
throw new Error("Missing webhook URL for this call (provider state not initialized)");
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
661
|
+
<Response>
|
|
662
|
+
<Play digits="${escapeXml(input.digits)}" />
|
|
663
|
+
<Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
|
|
664
|
+
</Response>`;
|
|
665
|
+
|
|
666
|
+
await this.updateLiveCallTwiml(input.providerCallId, twiml, "sendDtmf");
|
|
596
667
|
}
|
|
597
668
|
|
|
598
669
|
/**
|
|
@@ -608,28 +679,112 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
608
679
|
// Stream audio in 20ms chunks (160 bytes at 8kHz mu-law)
|
|
609
680
|
const CHUNK_SIZE = 160;
|
|
610
681
|
const CHUNK_DELAY_MS = 20;
|
|
682
|
+
const SILENCE_CHUNK = Buffer.alloc(CHUNK_SIZE, 0xff);
|
|
611
683
|
|
|
612
684
|
const handler = this.mediaStreamHandler;
|
|
613
685
|
const ttsProvider = this.ttsProvider;
|
|
686
|
+
|
|
687
|
+
const normalizeSendResult = (raw: unknown): StreamSendResult => {
|
|
688
|
+
if (!raw || typeof raw !== "object") {
|
|
689
|
+
return { sent: true };
|
|
690
|
+
}
|
|
691
|
+
const typed = raw as {
|
|
692
|
+
sent?: unknown;
|
|
693
|
+
};
|
|
694
|
+
return {
|
|
695
|
+
sent: typed.sent === undefined ? true : Boolean(typed.sent),
|
|
696
|
+
};
|
|
697
|
+
};
|
|
698
|
+
|
|
699
|
+
const sendAudioChunk = (audio: Buffer): StreamSendResult => {
|
|
700
|
+
const raw = (handler as { sendAudio: (sid: string, chunk: Buffer) => unknown }).sendAudio(
|
|
701
|
+
streamSid,
|
|
702
|
+
audio,
|
|
703
|
+
);
|
|
704
|
+
return normalizeSendResult(raw);
|
|
705
|
+
};
|
|
706
|
+
|
|
707
|
+
const sendPlaybackMark = (name: string): StreamSendResult => {
|
|
708
|
+
const raw = (handler as { sendMark: (sid: string, markName: string) => unknown }).sendMark(
|
|
709
|
+
streamSid,
|
|
710
|
+
name,
|
|
711
|
+
);
|
|
712
|
+
return normalizeSendResult(raw);
|
|
713
|
+
};
|
|
714
|
+
|
|
614
715
|
await handler.queueTts(streamSid, async (signal) => {
|
|
716
|
+
const sendKeepAlive = () => {
|
|
717
|
+
sendAudioChunk(SILENCE_CHUNK);
|
|
718
|
+
};
|
|
719
|
+
sendKeepAlive();
|
|
720
|
+
const keepAlive = setInterval(() => {
|
|
721
|
+
if (!signal.aborted) {
|
|
722
|
+
sendKeepAlive();
|
|
723
|
+
}
|
|
724
|
+
}, CHUNK_DELAY_MS);
|
|
725
|
+
|
|
615
726
|
// Generate audio with core TTS (returns mu-law at 8kHz)
|
|
616
|
-
|
|
727
|
+
let muLawAudio: Buffer;
|
|
728
|
+
let synthTimeout: ReturnType<typeof setTimeout> | null = null;
|
|
729
|
+
const synthTimeoutMs = ttsProvider.synthesisTimeoutMs;
|
|
730
|
+
try {
|
|
731
|
+
const synthPromise = ttsProvider.synthesizeForTelephony(text);
|
|
732
|
+
const timeoutPromise = new Promise<Buffer>((_, reject) => {
|
|
733
|
+
synthTimeout = setTimeout(() => {
|
|
734
|
+
reject(new Error(`Telephony TTS synthesis timed out after ${synthTimeoutMs}ms`));
|
|
735
|
+
}, synthTimeoutMs);
|
|
736
|
+
});
|
|
737
|
+
muLawAudio = await Promise.race([synthPromise, timeoutPromise]);
|
|
738
|
+
} finally {
|
|
739
|
+
if (synthTimeout) {
|
|
740
|
+
clearTimeout(synthTimeout);
|
|
741
|
+
}
|
|
742
|
+
clearInterval(keepAlive);
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
if (muLawAudio.length === 0) {
|
|
746
|
+
throw new Error("Telephony TTS produced no audio");
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
let chunkAttempts = 0;
|
|
750
|
+
let chunkDelivered = 0;
|
|
751
|
+
let nextChunkDueAt = Date.now() + CHUNK_DELAY_MS;
|
|
617
752
|
for (const chunk of chunkAudio(muLawAudio, CHUNK_SIZE)) {
|
|
618
753
|
if (signal.aborted) {
|
|
619
754
|
break;
|
|
620
755
|
}
|
|
621
|
-
|
|
756
|
+
chunkAttempts += 1;
|
|
757
|
+
const chunkResult = sendAudioChunk(chunk);
|
|
758
|
+
if (chunkResult.sent) {
|
|
759
|
+
chunkDelivered += 1;
|
|
760
|
+
}
|
|
622
761
|
|
|
623
|
-
//
|
|
624
|
-
|
|
762
|
+
// Drift-corrected pacing: schedule against an absolute clock to avoid cumulative delay.
|
|
763
|
+
const waitMs = nextChunkDueAt - Date.now();
|
|
764
|
+
if (waitMs > 0) {
|
|
765
|
+
await new Promise((resolve) => setTimeout(resolve, Math.ceil(waitMs)));
|
|
766
|
+
}
|
|
767
|
+
nextChunkDueAt += CHUNK_DELAY_MS;
|
|
625
768
|
if (signal.aborted) {
|
|
626
769
|
break;
|
|
627
770
|
}
|
|
628
771
|
}
|
|
629
772
|
|
|
773
|
+
let markSent = true;
|
|
630
774
|
if (!signal.aborted) {
|
|
631
775
|
// Send a mark to track when audio finishes
|
|
632
|
-
|
|
776
|
+
markSent = sendPlaybackMark(`tts-${Date.now()}`).sent;
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
if (!signal.aborted && chunkAttempts > 0 && (chunkDelivered === 0 || !markSent)) {
|
|
780
|
+
const failures: string[] = [];
|
|
781
|
+
if (chunkDelivered === 0) {
|
|
782
|
+
failures.push("no audio chunks delivered");
|
|
783
|
+
}
|
|
784
|
+
if (!markSent) {
|
|
785
|
+
failures.push("completion mark not delivered");
|
|
786
|
+
}
|
|
787
|
+
throw new Error(`Telephony stream playback failed: ${failures.join("; ")}`);
|
|
633
788
|
}
|
|
634
789
|
});
|
|
635
790
|
}
|
|
@@ -654,9 +809,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
654
809
|
</Gather>
|
|
655
810
|
</Response>`;
|
|
656
811
|
|
|
657
|
-
await this.
|
|
658
|
-
Twiml: twiml,
|
|
659
|
-
});
|
|
812
|
+
await this.updateLiveCallTwiml(input.providerCallId, twiml, "startListening");
|
|
660
813
|
}
|
|
661
814
|
|
|
662
815
|
/**
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { WebhookSecurityConfig } from "../config.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Twilio Voice API provider options.
|
|
5
|
+
*/
|
|
6
|
+
export interface TwilioProviderOptions {
|
|
7
|
+
/** Allow ngrok free tier compatibility mode (loopback only, less secure) */
|
|
8
|
+
allowNgrokFreeTierLoopbackBypass?: boolean;
|
|
9
|
+
/** Override public URL for signature verification */
|
|
10
|
+
publicUrl?: string;
|
|
11
|
+
/** Path for media stream WebSocket (e.g., /voice/stream) */
|
|
12
|
+
streamPath?: string;
|
|
13
|
+
/** Skip webhook signature verification (development only) */
|
|
14
|
+
skipVerification?: boolean;
|
|
15
|
+
/** Webhook security options (forwarded headers/allowlist) */
|
|
16
|
+
webhookSecurity?: WebhookSecurityConfig;
|
|
17
|
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice";
|
|
2
|
+
|
|
3
|
+
export const DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS = `You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`;
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
|
2
|
+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
3
|
+
import type { VoiceCallRealtimeFastContextConfig } from "./config.js";
|
|
4
|
+
|
|
5
|
+
const mocks = vi.hoisted(() => ({
|
|
6
|
+
getActiveMemorySearchManager: vi.fn(),
|
|
7
|
+
}));
|
|
8
|
+
|
|
9
|
+
vi.mock("openclaw/plugin-sdk/memory-host-search", () => ({
|
|
10
|
+
getActiveMemorySearchManager: mocks.getActiveMemorySearchManager,
|
|
11
|
+
}));
|
|
12
|
+
|
|
13
|
+
import { resolveRealtimeFastContextConsult } from "./realtime-fast-context.js";
|
|
14
|
+
|
|
15
|
+
const cfg = {} as OpenClawConfig;
|
|
16
|
+
|
|
17
|
+
function createFastContextConfig(
|
|
18
|
+
overrides: Partial<VoiceCallRealtimeFastContextConfig> = {},
|
|
19
|
+
): VoiceCallRealtimeFastContextConfig {
|
|
20
|
+
return {
|
|
21
|
+
enabled: true,
|
|
22
|
+
timeoutMs: 800,
|
|
23
|
+
maxResults: 3,
|
|
24
|
+
sources: ["memory", "sessions"],
|
|
25
|
+
fallbackToConsult: false,
|
|
26
|
+
...overrides,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function createLogger() {
|
|
31
|
+
return {
|
|
32
|
+
debug: vi.fn(),
|
|
33
|
+
warn: vi.fn(),
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
describe("resolveRealtimeFastContextConsult", () => {
|
|
38
|
+
beforeEach(() => {
|
|
39
|
+
mocks.getActiveMemorySearchManager.mockReset();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
afterEach(() => {
|
|
43
|
+
vi.useRealTimers();
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("falls back to the full consult when memory manager setup fails", async () => {
|
|
47
|
+
const logger = createLogger();
|
|
48
|
+
mocks.getActiveMemorySearchManager.mockRejectedValue(new Error("memory misconfigured"));
|
|
49
|
+
|
|
50
|
+
await expect(
|
|
51
|
+
resolveRealtimeFastContextConsult({
|
|
52
|
+
cfg,
|
|
53
|
+
agentId: "main",
|
|
54
|
+
sessionKey: "voice:15550001234",
|
|
55
|
+
config: createFastContextConfig({ fallbackToConsult: true }),
|
|
56
|
+
args: { question: "What do you remember?" },
|
|
57
|
+
logger,
|
|
58
|
+
}),
|
|
59
|
+
).resolves.toEqual({ handled: false });
|
|
60
|
+
|
|
61
|
+
expect(logger.debug).toHaveBeenCalledWith(expect.stringContaining("memory misconfigured"));
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("returns a bounded miss when memory manager setup exceeds the fast context timeout", async () => {
|
|
65
|
+
vi.useFakeTimers();
|
|
66
|
+
const logger = createLogger();
|
|
67
|
+
mocks.getActiveMemorySearchManager.mockReturnValue(new Promise(() => {}));
|
|
68
|
+
|
|
69
|
+
const resultPromise = resolveRealtimeFastContextConsult({
|
|
70
|
+
cfg,
|
|
71
|
+
agentId: "main",
|
|
72
|
+
sessionKey: "voice:15550001234",
|
|
73
|
+
config: createFastContextConfig({ fallbackToConsult: false, timeoutMs: 25 }),
|
|
74
|
+
args: { question: "What do you remember?" },
|
|
75
|
+
logger,
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
await vi.advanceTimersByTimeAsync(25);
|
|
79
|
+
|
|
80
|
+
await expect(resultPromise).resolves.toEqual({
|
|
81
|
+
handled: true,
|
|
82
|
+
result: {
|
|
83
|
+
text: expect.stringContaining("No relevant OpenClaw memory or session context"),
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
expect(logger.debug).toHaveBeenCalledWith(expect.stringContaining("timed out after 25ms"));
|
|
87
|
+
});
|
|
88
|
+
});
|