@openclaw/voice-call 2026.1.29 → 2026.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/README.md +13 -9
- package/index.ts +45 -49
- package/openclaw.plugin.json +11 -53
- package/package.json +6 -3
- package/src/cli.ts +80 -113
- package/src/config.test.ts +1 -4
- package/src/config.ts +88 -110
- package/src/core-bridge.ts +14 -12
- package/src/manager/context.ts +1 -1
- package/src/manager/events.ts +18 -9
- package/src/manager/lookup.ts +3 -1
- package/src/manager/outbound.ts +46 -19
- package/src/manager/state.ts +4 -6
- package/src/manager/store.ts +6 -3
- package/src/manager/timers.ts +11 -8
- package/src/manager.test.ts +7 -10
- package/src/manager.ts +53 -75
- package/src/media-stream.test.ts +0 -1
- package/src/media-stream.ts +12 -26
- package/src/providers/mock.ts +13 -16
- package/src/providers/plivo.test.ts +0 -1
- package/src/providers/plivo.ts +27 -29
- package/src/providers/stt-openai-realtime.ts +8 -8
- package/src/providers/telnyx.ts +5 -11
- package/src/providers/tts-openai.ts +9 -14
- package/src/providers/twilio/api.ts +9 -12
- package/src/providers/twilio/webhook.ts +2 -4
- package/src/providers/twilio.test.ts +1 -5
- package/src/providers/twilio.ts +34 -46
- package/src/response-generator.ts +7 -20
- package/src/runtime.ts +12 -25
- package/src/telephony-audio.ts +14 -12
- package/src/telephony-tts.ts +21 -12
- package/src/tunnel.ts +7 -24
- package/src/types.ts +0 -1
- package/src/utils.ts +3 -1
- package/src/voice-mapping.ts +3 -1
- package/src/webhook-security.test.ts +12 -21
- package/src/webhook-security.ts +25 -29
- package/src/webhook.ts +22 -57
package/src/webhook-security.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
|
-
|
|
3
2
|
import type { WebhookContext } from "./types.js";
|
|
4
3
|
|
|
5
4
|
/**
|
|
@@ -24,7 +23,7 @@ export function validateTwilioSignature(
|
|
|
24
23
|
let dataToSign = url;
|
|
25
24
|
|
|
26
25
|
// Sort params alphabetically and append key+value
|
|
27
|
-
const sortedParams = Array.from(params.entries()).
|
|
26
|
+
const sortedParams = Array.from(params.entries()).toSorted((a, b) =>
|
|
28
27
|
a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0,
|
|
29
28
|
);
|
|
30
29
|
|
|
@@ -98,10 +97,7 @@ export function reconstructWebhookUrl(ctx: WebhookContext): string {
|
|
|
98
97
|
return `${proto}://${host}${path}`;
|
|
99
98
|
}
|
|
100
99
|
|
|
101
|
-
function buildTwilioVerificationUrl(
|
|
102
|
-
ctx: WebhookContext,
|
|
103
|
-
publicUrl?: string,
|
|
104
|
-
): string {
|
|
100
|
+
function buildTwilioVerificationUrl(ctx: WebhookContext, publicUrl?: string): string {
|
|
105
101
|
if (!publicUrl) {
|
|
106
102
|
return reconstructWebhookUrl(ctx);
|
|
107
103
|
}
|
|
@@ -132,9 +128,15 @@ function getHeader(
|
|
|
132
128
|
}
|
|
133
129
|
|
|
134
130
|
function isLoopbackAddress(address?: string): boolean {
|
|
135
|
-
if (!address)
|
|
136
|
-
|
|
137
|
-
|
|
131
|
+
if (!address) {
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
if (address === "127.0.0.1" || address === "::1") {
|
|
135
|
+
return true;
|
|
136
|
+
}
|
|
137
|
+
if (address.startsWith("::ffff:127.")) {
|
|
138
|
+
return true;
|
|
139
|
+
}
|
|
138
140
|
return false;
|
|
139
141
|
}
|
|
140
142
|
|
|
@@ -186,12 +188,7 @@ export function verifyTwilioWebhook(
|
|
|
186
188
|
const params = new URLSearchParams(ctx.rawBody);
|
|
187
189
|
|
|
188
190
|
// Validate signature
|
|
189
|
-
const isValid = validateTwilioSignature(
|
|
190
|
-
authToken,
|
|
191
|
-
signature,
|
|
192
|
-
verificationUrl,
|
|
193
|
-
params,
|
|
194
|
-
);
|
|
191
|
+
const isValid = validateTwilioSignature(authToken, signature, verificationUrl, params);
|
|
195
192
|
|
|
196
193
|
if (isValid) {
|
|
197
194
|
return { ok: true, verificationUrl };
|
|
@@ -199,8 +196,7 @@ export function verifyTwilioWebhook(
|
|
|
199
196
|
|
|
200
197
|
// Check if this is ngrok free tier - the URL might have different format
|
|
201
198
|
const isNgrokFreeTier =
|
|
202
|
-
verificationUrl.includes(".ngrok-free.app") ||
|
|
203
|
-
verificationUrl.includes(".ngrok.io");
|
|
199
|
+
verificationUrl.includes(".ngrok-free.app") || verificationUrl.includes(".ngrok.io");
|
|
204
200
|
|
|
205
201
|
if (
|
|
206
202
|
isNgrokFreeTier &&
|
|
@@ -281,7 +277,9 @@ type PlivoParamMap = Record<string, string[]>;
|
|
|
281
277
|
function toParamMapFromSearchParams(sp: URLSearchParams): PlivoParamMap {
|
|
282
278
|
const map: PlivoParamMap = {};
|
|
283
279
|
for (const [key, value] of sp.entries()) {
|
|
284
|
-
if (!map[key])
|
|
280
|
+
if (!map[key]) {
|
|
281
|
+
map[key] = [];
|
|
282
|
+
}
|
|
285
283
|
map[key].push(value);
|
|
286
284
|
}
|
|
287
285
|
return map;
|
|
@@ -289,8 +287,8 @@ function toParamMapFromSearchParams(sp: URLSearchParams): PlivoParamMap {
|
|
|
289
287
|
|
|
290
288
|
function sortedQueryString(params: PlivoParamMap): string {
|
|
291
289
|
const parts: string[] = [];
|
|
292
|
-
for (const key of Object.keys(params).
|
|
293
|
-
const values = [...params[key]].
|
|
290
|
+
for (const key of Object.keys(params).toSorted()) {
|
|
291
|
+
const values = [...params[key]].toSorted();
|
|
294
292
|
for (const value of values) {
|
|
295
293
|
parts.push(`${key}=${value}`);
|
|
296
294
|
}
|
|
@@ -300,8 +298,8 @@ function sortedQueryString(params: PlivoParamMap): string {
|
|
|
300
298
|
|
|
301
299
|
function sortedParamsString(params: PlivoParamMap): string {
|
|
302
300
|
const parts: string[] = [];
|
|
303
|
-
for (const key of Object.keys(params).
|
|
304
|
-
const values = [...params[key]].
|
|
301
|
+
for (const key of Object.keys(params).toSorted()) {
|
|
302
|
+
const values = [...params[key]].toSorted();
|
|
305
303
|
for (const value of values) {
|
|
306
304
|
parts.push(`${key}${value}`);
|
|
307
305
|
}
|
|
@@ -353,10 +351,7 @@ function validatePlivoV3Signature(params: {
|
|
|
353
351
|
});
|
|
354
352
|
|
|
355
353
|
const hmacBase = `${baseUrl}.${params.nonce}`;
|
|
356
|
-
const digest = crypto
|
|
357
|
-
.createHmac("sha256", params.authToken)
|
|
358
|
-
.update(hmacBase)
|
|
359
|
-
.digest("base64");
|
|
354
|
+
const digest = crypto.createHmac("sha256", params.authToken).update(hmacBase).digest("base64");
|
|
360
355
|
const expected = normalizeSignatureBase64(digest);
|
|
361
356
|
|
|
362
357
|
// Header can contain multiple signatures separated by commas.
|
|
@@ -367,7 +362,9 @@ function validatePlivoV3Signature(params: {
|
|
|
367
362
|
.map((s) => normalizeSignatureBase64(s));
|
|
368
363
|
|
|
369
364
|
for (const sig of provided) {
|
|
370
|
-
if (timingSafeEqualString(expected, sig))
|
|
365
|
+
if (timingSafeEqualString(expected, sig)) {
|
|
366
|
+
return true;
|
|
367
|
+
}
|
|
371
368
|
}
|
|
372
369
|
return false;
|
|
373
370
|
}
|
|
@@ -413,8 +410,7 @@ export function verifyPlivoWebhook(
|
|
|
413
410
|
}
|
|
414
411
|
|
|
415
412
|
if (signatureV3 && nonceV3) {
|
|
416
|
-
const method =
|
|
417
|
-
ctx.method === "GET" || ctx.method === "POST" ? ctx.method : null;
|
|
413
|
+
const method = ctx.method === "GET" || ctx.method === "POST" ? ctx.method : null;
|
|
418
414
|
|
|
419
415
|
if (!method) {
|
|
420
416
|
return {
|
package/src/webhook.ts
CHANGED
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
2
|
import http from "node:http";
|
|
3
3
|
import { URL } from "node:url";
|
|
4
|
-
|
|
5
4
|
import type { VoiceCallConfig } from "./config.js";
|
|
6
5
|
import type { CoreConfig } from "./core-bridge.js";
|
|
7
6
|
import type { CallManager } from "./manager.js";
|
|
8
7
|
import type { MediaStreamConfig } from "./media-stream.js";
|
|
9
|
-
import { MediaStreamHandler } from "./media-stream.js";
|
|
10
8
|
import type { VoiceCallProvider } from "./providers/base.js";
|
|
11
|
-
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
|
|
12
9
|
import type { TwilioProvider } from "./providers/twilio.js";
|
|
13
10
|
import type { NormalizedEvent, WebhookContext } from "./types.js";
|
|
11
|
+
import { MediaStreamHandler } from "./media-stream.js";
|
|
12
|
+
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
|
|
14
13
|
|
|
15
14
|
/**
|
|
16
15
|
* HTTP server for receiving voice call webhooks from providers.
|
|
@@ -54,13 +53,10 @@ export class VoiceCallWebhookServer {
|
|
|
54
53
|
* Initialize media streaming with OpenAI Realtime STT.
|
|
55
54
|
*/
|
|
56
55
|
private initializeMediaStreaming(): void {
|
|
57
|
-
const apiKey =
|
|
58
|
-
this.config.streaming?.openaiApiKey || process.env.OPENAI_API_KEY;
|
|
56
|
+
const apiKey = this.config.streaming?.openaiApiKey || process.env.OPENAI_API_KEY;
|
|
59
57
|
|
|
60
58
|
if (!apiKey) {
|
|
61
|
-
console.warn(
|
|
62
|
-
"[voice-call] Streaming enabled but no OpenAI API key found",
|
|
63
|
-
);
|
|
59
|
+
console.warn("[voice-call] Streaming enabled but no OpenAI API key found");
|
|
64
60
|
return;
|
|
65
61
|
}
|
|
66
62
|
|
|
@@ -74,9 +70,7 @@ export class VoiceCallWebhookServer {
|
|
|
74
70
|
const streamConfig: MediaStreamConfig = {
|
|
75
71
|
sttProvider,
|
|
76
72
|
onTranscript: (providerCallId, transcript) => {
|
|
77
|
-
console.log(
|
|
78
|
-
`[voice-call] Transcript for ${providerCallId}: ${transcript}`,
|
|
79
|
-
);
|
|
73
|
+
console.log(`[voice-call] Transcript for ${providerCallId}: ${transcript}`);
|
|
80
74
|
|
|
81
75
|
// Clear TTS queue on barge-in (user started speaking, interrupt current playback)
|
|
82
76
|
if (this.provider.name === "twilio") {
|
|
@@ -86,9 +80,7 @@ export class VoiceCallWebhookServer {
|
|
|
86
80
|
// Look up our internal call ID from the provider call ID
|
|
87
81
|
const call = this.manager.getCallByProviderCallId(providerCallId);
|
|
88
82
|
if (!call) {
|
|
89
|
-
console.warn(
|
|
90
|
-
`[voice-call] No active call found for provider ID: ${providerCallId}`,
|
|
91
|
-
);
|
|
83
|
+
console.warn(`[voice-call] No active call found for provider ID: ${providerCallId}`);
|
|
92
84
|
return;
|
|
93
85
|
}
|
|
94
86
|
|
|
@@ -106,8 +98,7 @@ export class VoiceCallWebhookServer {
|
|
|
106
98
|
|
|
107
99
|
// Auto-respond in conversation mode (inbound always, outbound if mode is conversation)
|
|
108
100
|
const callMode = call.metadata?.mode as string | undefined;
|
|
109
|
-
const shouldRespond =
|
|
110
|
-
call.direction === "inbound" || callMode === "conversation";
|
|
101
|
+
const shouldRespond = call.direction === "inbound" || callMode === "conversation";
|
|
111
102
|
if (shouldRespond) {
|
|
112
103
|
this.handleInboundResponse(call.callId, transcript).catch((err) => {
|
|
113
104
|
console.warn(`[voice-call] Failed to auto-respond:`, err);
|
|
@@ -123,15 +114,10 @@ export class VoiceCallWebhookServer {
|
|
|
123
114
|
console.log(`[voice-call] Partial for ${callId}: ${partial}`);
|
|
124
115
|
},
|
|
125
116
|
onConnect: (callId, streamSid) => {
|
|
126
|
-
console.log(
|
|
127
|
-
`[voice-call] Media stream connected: ${callId} -> ${streamSid}`,
|
|
128
|
-
);
|
|
117
|
+
console.log(`[voice-call] Media stream connected: ${callId} -> ${streamSid}`);
|
|
129
118
|
// Register stream with provider for TTS routing
|
|
130
119
|
if (this.provider.name === "twilio") {
|
|
131
|
-
(this.provider as TwilioProvider).registerCallStream(
|
|
132
|
-
callId,
|
|
133
|
-
streamSid,
|
|
134
|
-
);
|
|
120
|
+
(this.provider as TwilioProvider).registerCallStream(callId, streamSid);
|
|
135
121
|
}
|
|
136
122
|
|
|
137
123
|
// Speak initial message if one was provided when call was initiated
|
|
@@ -173,10 +159,7 @@ export class VoiceCallWebhookServer {
|
|
|
173
159
|
// Handle WebSocket upgrades for media streams
|
|
174
160
|
if (this.mediaStreamHandler) {
|
|
175
161
|
this.server.on("upgrade", (request, socket, head) => {
|
|
176
|
-
const url = new URL(
|
|
177
|
-
request.url || "/",
|
|
178
|
-
`http://${request.headers.host}`,
|
|
179
|
-
);
|
|
162
|
+
const url = new URL(request.url || "/", `http://${request.headers.host}`);
|
|
180
163
|
|
|
181
164
|
if (url.pathname === streamPath) {
|
|
182
165
|
console.log("[voice-call] WebSocket upgrade for media stream");
|
|
@@ -193,9 +176,7 @@ export class VoiceCallWebhookServer {
|
|
|
193
176
|
const url = `http://${bind}:${port}${webhookPath}`;
|
|
194
177
|
console.log(`[voice-call] Webhook server listening on ${url}`);
|
|
195
178
|
if (this.mediaStreamHandler) {
|
|
196
|
-
console.log(
|
|
197
|
-
`[voice-call] Media stream WebSocket on ws://${bind}:${port}${streamPath}`,
|
|
198
|
-
);
|
|
179
|
+
console.log(`[voice-call] Media stream WebSocket on ws://${bind}:${port}${streamPath}`);
|
|
199
180
|
}
|
|
200
181
|
resolve(url);
|
|
201
182
|
});
|
|
@@ -258,9 +239,7 @@ export class VoiceCallWebhookServer {
|
|
|
258
239
|
// Verify signature
|
|
259
240
|
const verification = this.provider.verifyWebhook(ctx);
|
|
260
241
|
if (!verification.ok) {
|
|
261
|
-
console.warn(
|
|
262
|
-
`[voice-call] Webhook verification failed: ${verification.reason}`,
|
|
263
|
-
);
|
|
242
|
+
console.warn(`[voice-call] Webhook verification failed: ${verification.reason}`);
|
|
264
243
|
res.statusCode = 401;
|
|
265
244
|
res.end("Unauthorized");
|
|
266
245
|
return;
|
|
@@ -274,10 +253,7 @@ export class VoiceCallWebhookServer {
|
|
|
274
253
|
try {
|
|
275
254
|
this.manager.processEvent(event);
|
|
276
255
|
} catch (err) {
|
|
277
|
-
console.error(
|
|
278
|
-
`[voice-call] Error processing event ${event.type}:`,
|
|
279
|
-
err,
|
|
280
|
-
);
|
|
256
|
+
console.error(`[voice-call] Error processing event ${event.type}:`, err);
|
|
281
257
|
}
|
|
282
258
|
}
|
|
283
259
|
|
|
@@ -285,9 +261,7 @@ export class VoiceCallWebhookServer {
|
|
|
285
261
|
res.statusCode = result.statusCode || 200;
|
|
286
262
|
|
|
287
263
|
if (result.providerResponseHeaders) {
|
|
288
|
-
for (const [key, value] of Object.entries(
|
|
289
|
-
result.providerResponseHeaders,
|
|
290
|
-
)) {
|
|
264
|
+
for (const [key, value] of Object.entries(result.providerResponseHeaders)) {
|
|
291
265
|
res.setHeader(key, value);
|
|
292
266
|
}
|
|
293
267
|
}
|
|
@@ -311,13 +285,8 @@ export class VoiceCallWebhookServer {
|
|
|
311
285
|
* Handle auto-response for inbound calls using the agent system.
|
|
312
286
|
* Supports tool calling for richer voice interactions.
|
|
313
287
|
*/
|
|
314
|
-
private async handleInboundResponse(
|
|
315
|
-
callId:
|
|
316
|
-
userMessage: string,
|
|
317
|
-
): Promise<void> {
|
|
318
|
-
console.log(
|
|
319
|
-
`[voice-call] Auto-responding to inbound call ${callId}: "${userMessage}"`,
|
|
320
|
-
);
|
|
288
|
+
private async handleInboundResponse(callId: string, userMessage: string): Promise<void> {
|
|
289
|
+
console.log(`[voice-call] Auto-responding to inbound call ${callId}: "${userMessage}"`);
|
|
321
290
|
|
|
322
291
|
// Get call context for conversation history
|
|
323
292
|
const call = this.manager.getCall(callId);
|
|
@@ -344,9 +313,7 @@ export class VoiceCallWebhookServer {
|
|
|
344
313
|
});
|
|
345
314
|
|
|
346
315
|
if (result.error) {
|
|
347
|
-
console.error(
|
|
348
|
-
`[voice-call] Response generation error: ${result.error}`,
|
|
349
|
-
);
|
|
316
|
+
console.error(`[voice-call] Response generation error: ${result.error}`);
|
|
350
317
|
return;
|
|
351
318
|
}
|
|
352
319
|
|
|
@@ -399,7 +366,9 @@ function runTailscaleCommand(
|
|
|
399
366
|
|
|
400
367
|
export async function getTailscaleSelfInfo(): Promise<TailscaleSelfInfo | null> {
|
|
401
368
|
const { code, stdout } = await runTailscaleCommand(["status", "--json"]);
|
|
402
|
-
if (code !== 0)
|
|
369
|
+
if (code !== 0) {
|
|
370
|
+
return null;
|
|
371
|
+
}
|
|
403
372
|
|
|
404
373
|
try {
|
|
405
374
|
const status = JSON.parse(stdout);
|
|
@@ -458,9 +427,7 @@ export async function cleanupTailscaleExposureRoute(opts: {
|
|
|
458
427
|
* Setup Tailscale serve/funnel for the webhook server.
|
|
459
428
|
* This is a helper that shells out to `tailscale serve` or `tailscale funnel`.
|
|
460
429
|
*/
|
|
461
|
-
export async function setupTailscaleExposure(
|
|
462
|
-
config: VoiceCallConfig,
|
|
463
|
-
): Promise<string | null> {
|
|
430
|
+
export async function setupTailscaleExposure(config: VoiceCallConfig): Promise<string | null> {
|
|
464
431
|
if (config.tailscale.mode === "off") {
|
|
465
432
|
return null;
|
|
466
433
|
}
|
|
@@ -479,9 +446,7 @@ export async function setupTailscaleExposure(
|
|
|
479
446
|
/**
|
|
480
447
|
* Cleanup Tailscale serve/funnel.
|
|
481
448
|
*/
|
|
482
|
-
export async function cleanupTailscaleExposure(
|
|
483
|
-
config: VoiceCallConfig,
|
|
484
|
-
): Promise<void> {
|
|
449
|
+
export async function cleanupTailscaleExposure(config: VoiceCallConfig): Promise<void> {
|
|
485
450
|
if (config.tailscale.mode === "off") {
|
|
486
451
|
return;
|
|
487
452
|
}
|