@openclaw/voice-call 2026.1.29 → 2026.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/README.md +13 -9
- package/index.ts +45 -49
- package/openclaw.plugin.json +11 -53
- package/package.json +6 -3
- package/src/cli.ts +80 -113
- package/src/config.test.ts +1 -4
- package/src/config.ts +88 -110
- package/src/core-bridge.ts +14 -12
- package/src/manager/context.ts +1 -1
- package/src/manager/events.ts +18 -9
- package/src/manager/lookup.ts +3 -1
- package/src/manager/outbound.ts +46 -19
- package/src/manager/state.ts +4 -6
- package/src/manager/store.ts +6 -3
- package/src/manager/timers.ts +11 -8
- package/src/manager.test.ts +7 -10
- package/src/manager.ts +53 -75
- package/src/media-stream.test.ts +0 -1
- package/src/media-stream.ts +12 -26
- package/src/providers/mock.ts +13 -16
- package/src/providers/plivo.test.ts +0 -1
- package/src/providers/plivo.ts +27 -29
- package/src/providers/stt-openai-realtime.ts +8 -8
- package/src/providers/telnyx.ts +5 -11
- package/src/providers/tts-openai.ts +9 -14
- package/src/providers/twilio/api.ts +9 -12
- package/src/providers/twilio/webhook.ts +2 -4
- package/src/providers/twilio.test.ts +1 -5
- package/src/providers/twilio.ts +34 -46
- package/src/response-generator.ts +7 -20
- package/src/runtime.ts +12 -25
- package/src/telephony-audio.ts +14 -12
- package/src/telephony-tts.ts +21 -12
- package/src/tunnel.ts +7 -24
- package/src/types.ts +0 -1
- package/src/utils.ts +3 -1
- package/src/voice-mapping.ts +3 -1
- package/src/webhook-security.test.ts +12 -21
- package/src/webhook-security.ts +25 -29
- package/src/webhook.ts +22 -57
package/src/providers/twilio.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
|
-
|
|
3
2
|
import type { TwilioConfig } from "../config.js";
|
|
4
3
|
import type { MediaStreamHandler } from "../media-stream.js";
|
|
4
|
+
import type { TelephonyTtsProvider } from "../telephony-tts.js";
|
|
5
5
|
import type {
|
|
6
6
|
HangupCallInput,
|
|
7
7
|
InitiateCallInput,
|
|
@@ -14,10 +14,9 @@ import type {
|
|
|
14
14
|
WebhookContext,
|
|
15
15
|
WebhookVerificationResult,
|
|
16
16
|
} from "../types.js";
|
|
17
|
-
import { escapeXml, mapVoiceToPolly } from "../voice-mapping.js";
|
|
18
|
-
import { chunkAudio } from "../telephony-audio.js";
|
|
19
|
-
import type { TelephonyTtsProvider } from "../telephony-tts.js";
|
|
20
17
|
import type { VoiceCallProvider } from "./base.js";
|
|
18
|
+
import { chunkAudio } from "../telephony-audio.js";
|
|
19
|
+
import { escapeXml, mapVoiceToPolly } from "../voice-mapping.js";
|
|
21
20
|
import { twilioApiRequest } from "./twilio/api.js";
|
|
22
21
|
import { verifyTwilioProviderWebhook } from "./twilio/webhook.js";
|
|
23
22
|
|
|
@@ -85,10 +84,14 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
85
84
|
*/
|
|
86
85
|
private deleteStoredTwimlForProviderCall(providerCallId: string): void {
|
|
87
86
|
const webhookUrl = this.callWebhookUrls.get(providerCallId);
|
|
88
|
-
if (!webhookUrl)
|
|
87
|
+
if (!webhookUrl) {
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
89
90
|
|
|
90
91
|
const callIdMatch = webhookUrl.match(/callId=([^&]+)/);
|
|
91
|
-
if (!callIdMatch)
|
|
92
|
+
if (!callIdMatch) {
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
92
95
|
|
|
93
96
|
this.deleteStoredTwiml(callIdMatch[1]);
|
|
94
97
|
}
|
|
@@ -211,22 +214,20 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
211
214
|
/**
|
|
212
215
|
* Parse Twilio direction to normalized format.
|
|
213
216
|
*/
|
|
214
|
-
private static parseDirection(
|
|
215
|
-
direction
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
if (direction === "outbound-api" || direction === "outbound-dial")
|
|
217
|
+
private static parseDirection(direction: string | null): "inbound" | "outbound" | undefined {
|
|
218
|
+
if (direction === "inbound") {
|
|
219
|
+
return "inbound";
|
|
220
|
+
}
|
|
221
|
+
if (direction === "outbound-api" || direction === "outbound-dial") {
|
|
219
222
|
return "outbound";
|
|
223
|
+
}
|
|
220
224
|
return undefined;
|
|
221
225
|
}
|
|
222
226
|
|
|
223
227
|
/**
|
|
224
228
|
* Convert Twilio webhook params to normalized event format.
|
|
225
229
|
*/
|
|
226
|
-
private normalizeEvent(
|
|
227
|
-
params: URLSearchParams,
|
|
228
|
-
callIdOverride?: string,
|
|
229
|
-
): NormalizedEvent | null {
|
|
230
|
+
private normalizeEvent(params: URLSearchParams, callIdOverride?: string): NormalizedEvent | null {
|
|
230
231
|
const callSid = params.get("CallSid") || "";
|
|
231
232
|
|
|
232
233
|
const baseEvent = {
|
|
@@ -297,11 +298,12 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
297
298
|
* When a call is answered, connects to media stream for bidirectional audio.
|
|
298
299
|
*/
|
|
299
300
|
private generateTwimlResponse(ctx?: WebhookContext): string {
|
|
300
|
-
if (!ctx)
|
|
301
|
+
if (!ctx) {
|
|
302
|
+
return TwilioProvider.EMPTY_TWIML;
|
|
303
|
+
}
|
|
301
304
|
|
|
302
305
|
const params = new URLSearchParams(ctx.rawBody);
|
|
303
|
-
const type =
|
|
304
|
-
typeof ctx.query?.type === "string" ? ctx.query.type.trim() : undefined;
|
|
306
|
+
const type = typeof ctx.query?.type === "string" ? ctx.query.type.trim() : undefined;
|
|
305
307
|
const isStatusCallback = type === "status";
|
|
306
308
|
const callStatus = params.get("CallStatus");
|
|
307
309
|
const direction = params.get("Direction");
|
|
@@ -329,9 +331,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
329
331
|
// Conversation mode: return streaming TwiML immediately for outbound calls.
|
|
330
332
|
if (isOutbound) {
|
|
331
333
|
const streamUrl = this.getStreamUrl();
|
|
332
|
-
return streamUrl
|
|
333
|
-
? this.getStreamConnectXml(streamUrl)
|
|
334
|
-
: TwilioProvider.PAUSE_TWIML;
|
|
334
|
+
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
|
335
335
|
}
|
|
336
336
|
}
|
|
337
337
|
|
|
@@ -344,9 +344,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
344
344
|
// For inbound calls, answer immediately with stream
|
|
345
345
|
if (direction === "inbound") {
|
|
346
346
|
const streamUrl = this.getStreamUrl();
|
|
347
|
-
return streamUrl
|
|
348
|
-
? this.getStreamConnectXml(streamUrl)
|
|
349
|
-
: TwilioProvider.PAUSE_TWIML;
|
|
347
|
+
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
|
350
348
|
}
|
|
351
349
|
|
|
352
350
|
// For outbound calls, only connect to stream when call is in-progress
|
|
@@ -355,9 +353,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
355
353
|
}
|
|
356
354
|
|
|
357
355
|
const streamUrl = this.getStreamUrl();
|
|
358
|
-
return streamUrl
|
|
359
|
-
? this.getStreamConnectXml(streamUrl)
|
|
360
|
-
: TwilioProvider.PAUSE_TWIML;
|
|
356
|
+
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
|
361
357
|
}
|
|
362
358
|
|
|
363
359
|
/**
|
|
@@ -374,9 +370,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
374
370
|
const origin = url.origin;
|
|
375
371
|
|
|
376
372
|
// Convert https:// to wss:// for WebSocket
|
|
377
|
-
const wsOrigin = origin
|
|
378
|
-
.replace(/^https:\/\//, "wss://")
|
|
379
|
-
.replace(/^http:\/\//, "ws://");
|
|
373
|
+
const wsOrigin = origin.replace(/^https:\/\//, "wss://").replace(/^http:\/\//, "ws://");
|
|
380
374
|
|
|
381
375
|
// Append the stream path
|
|
382
376
|
const path = this.options.streamPath.startsWith("/")
|
|
@@ -433,10 +427,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
433
427
|
Timeout: "30",
|
|
434
428
|
};
|
|
435
429
|
|
|
436
|
-
const result = await this.apiRequest<TwilioCallResponse>(
|
|
437
|
-
"/Calls.json",
|
|
438
|
-
params,
|
|
439
|
-
);
|
|
430
|
+
const result = await this.apiRequest<TwilioCallResponse>("/Calls.json", params);
|
|
440
431
|
|
|
441
432
|
this.callWebhookUrls.set(result.sid, url.toString());
|
|
442
433
|
|
|
@@ -489,9 +480,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
489
480
|
// Fall back to TwiML <Say> (may not work on all accounts)
|
|
490
481
|
const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
|
|
491
482
|
if (!webhookUrl) {
|
|
492
|
-
throw new Error(
|
|
493
|
-
"Missing webhook URL for this call (provider state not initialized)",
|
|
494
|
-
);
|
|
483
|
+
throw new Error("Missing webhook URL for this call (provider state not initialized)");
|
|
495
484
|
}
|
|
496
485
|
|
|
497
486
|
console.warn(
|
|
@@ -517,10 +506,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
517
506
|
* Generates audio with core TTS, converts to mu-law, and streams via WebSocket.
|
|
518
507
|
* Uses a queue to serialize playback and prevent overlapping audio.
|
|
519
508
|
*/
|
|
520
|
-
private async playTtsViaStream(
|
|
521
|
-
text: string,
|
|
522
|
-
streamSid: string,
|
|
523
|
-
): Promise<void> {
|
|
509
|
+
private async playTtsViaStream(text: string, streamSid: string): Promise<void> {
|
|
524
510
|
if (!this.ttsProvider || !this.mediaStreamHandler) {
|
|
525
511
|
throw new Error("TTS provider and media stream handler required");
|
|
526
512
|
}
|
|
@@ -535,12 +521,16 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
535
521
|
// Generate audio with core TTS (returns mu-law at 8kHz)
|
|
536
522
|
const muLawAudio = await ttsProvider.synthesizeForTelephony(text);
|
|
537
523
|
for (const chunk of chunkAudio(muLawAudio, CHUNK_SIZE)) {
|
|
538
|
-
if (signal.aborted)
|
|
524
|
+
if (signal.aborted) {
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
539
527
|
handler.sendAudio(streamSid, chunk);
|
|
540
528
|
|
|
541
529
|
// Pace the audio to match real-time playback
|
|
542
530
|
await new Promise((resolve) => setTimeout(resolve, CHUNK_DELAY_MS));
|
|
543
|
-
if (signal.aborted)
|
|
531
|
+
if (signal.aborted) {
|
|
532
|
+
break;
|
|
533
|
+
}
|
|
544
534
|
}
|
|
545
535
|
|
|
546
536
|
if (!signal.aborted) {
|
|
@@ -556,9 +546,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
556
546
|
async startListening(input: StartListeningInput): Promise<void> {
|
|
557
547
|
const webhookUrl = this.callWebhookUrls.get(input.providerCallId);
|
|
558
548
|
if (!webhookUrl) {
|
|
559
|
-
throw new Error(
|
|
560
|
-
"Missing webhook URL for this call (provider state not initialized)",
|
|
561
|
-
);
|
|
549
|
+
throw new Error("Missing webhook URL for this call (provider state not initialized)");
|
|
562
550
|
}
|
|
563
551
|
|
|
564
552
|
const twiml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
@@ -4,10 +4,8 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import crypto from "node:crypto";
|
|
7
|
-
|
|
8
|
-
import { loadCoreAgentDeps, type CoreConfig } from "./core-bridge.js";
|
|
9
|
-
|
|
10
7
|
import type { VoiceCallConfig } from "./config.js";
|
|
8
|
+
import { loadCoreAgentDeps, type CoreConfig } from "./core-bridge.js";
|
|
11
9
|
|
|
12
10
|
export type VoiceResponseParams = {
|
|
13
11
|
/** Voice call config */
|
|
@@ -41,8 +39,7 @@ type SessionEntry = {
|
|
|
41
39
|
export async function generateVoiceResponse(
|
|
42
40
|
params: VoiceResponseParams,
|
|
43
41
|
): Promise<VoiceResponseResult> {
|
|
44
|
-
const { voiceConfig, callId, from, transcript, userMessage, coreConfig } =
|
|
45
|
-
params;
|
|
42
|
+
const { voiceConfig, callId, from, transcript, userMessage, coreConfig } = params;
|
|
46
43
|
|
|
47
44
|
if (!coreConfig) {
|
|
48
45
|
return { text: null, error: "Core config unavailable for voice response" };
|
|
@@ -54,10 +51,7 @@ export async function generateVoiceResponse(
|
|
|
54
51
|
} catch (err) {
|
|
55
52
|
return {
|
|
56
53
|
text: null,
|
|
57
|
-
error:
|
|
58
|
-
err instanceof Error
|
|
59
|
-
? err.message
|
|
60
|
-
: "Unable to load core agent dependencies",
|
|
54
|
+
error: err instanceof Error ? err.message : "Unable to load core agent dependencies",
|
|
61
55
|
};
|
|
62
56
|
}
|
|
63
57
|
const cfg = coreConfig;
|
|
@@ -95,12 +89,9 @@ export async function generateVoiceResponse(
|
|
|
95
89
|
});
|
|
96
90
|
|
|
97
91
|
// Resolve model from config
|
|
98
|
-
const modelRef =
|
|
99
|
-
voiceConfig.responseModel ||
|
|
100
|
-
`${deps.DEFAULT_PROVIDER}/${deps.DEFAULT_MODEL}`;
|
|
92
|
+
const modelRef = voiceConfig.responseModel || `${deps.DEFAULT_PROVIDER}/${deps.DEFAULT_MODEL}`;
|
|
101
93
|
const slashIndex = modelRef.indexOf("/");
|
|
102
|
-
const provider =
|
|
103
|
-
slashIndex === -1 ? deps.DEFAULT_PROVIDER : modelRef.slice(0, slashIndex);
|
|
94
|
+
const provider = slashIndex === -1 ? deps.DEFAULT_PROVIDER : modelRef.slice(0, slashIndex);
|
|
104
95
|
const model = slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1);
|
|
105
96
|
|
|
106
97
|
// Resolve thinking level
|
|
@@ -118,17 +109,13 @@ export async function generateVoiceResponse(
|
|
|
118
109
|
let extraSystemPrompt = basePrompt;
|
|
119
110
|
if (transcript.length > 0) {
|
|
120
111
|
const history = transcript
|
|
121
|
-
.map(
|
|
122
|
-
(entry) =>
|
|
123
|
-
`${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`,
|
|
124
|
-
)
|
|
112
|
+
.map((entry) => `${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`)
|
|
125
113
|
.join("\n");
|
|
126
114
|
extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
|
|
127
115
|
}
|
|
128
116
|
|
|
129
117
|
// Resolve timeout
|
|
130
|
-
const timeoutMs =
|
|
131
|
-
voiceConfig.responseTimeoutMs ?? deps.resolveAgentTimeoutMs({ cfg });
|
|
118
|
+
const timeoutMs = voiceConfig.responseTimeoutMs ?? deps.resolveAgentTimeoutMs({ cfg });
|
|
132
119
|
const runId = `voice:${callId}:${Date.now()}`;
|
|
133
120
|
|
|
134
121
|
try {
|
package/src/runtime.ts
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import type { CoreConfig } from "./core-bridge.js";
|
|
2
1
|
import type { VoiceCallConfig } from "./config.js";
|
|
2
|
+
import type { CoreConfig } from "./core-bridge.js";
|
|
3
|
+
import type { VoiceCallProvider } from "./providers/base.js";
|
|
4
|
+
import type { TelephonyTtsRuntime } from "./telephony-tts.js";
|
|
3
5
|
import { resolveVoiceCallConfig, validateProviderConfig } from "./config.js";
|
|
4
6
|
import { CallManager } from "./manager.js";
|
|
5
|
-
import type { VoiceCallProvider } from "./providers/base.js";
|
|
6
7
|
import { MockProvider } from "./providers/mock.js";
|
|
7
8
|
import { PlivoProvider } from "./providers/plivo.js";
|
|
8
9
|
import { TelnyxProvider } from "./providers/telnyx.js";
|
|
9
10
|
import { TwilioProvider } from "./providers/twilio.js";
|
|
10
|
-
import type { TelephonyTtsRuntime } from "./telephony-tts.js";
|
|
11
11
|
import { createTelephonyTtsProvider } from "./telephony-tts.js";
|
|
12
12
|
import { startTunnel, type TunnelResult } from "./tunnel.js";
|
|
13
13
|
import {
|
|
@@ -34,7 +34,9 @@ type Logger = {
|
|
|
34
34
|
};
|
|
35
35
|
|
|
36
36
|
function isLoopbackBind(bind: string | undefined): boolean {
|
|
37
|
-
if (!bind)
|
|
37
|
+
if (!bind) {
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
38
40
|
return bind === "127.0.0.1" || bind === "::1" || bind === "localhost";
|
|
39
41
|
}
|
|
40
42
|
|
|
@@ -42,9 +44,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
42
44
|
const allowNgrokFreeTierLoopbackBypass =
|
|
43
45
|
config.tunnel?.provider === "ngrok" &&
|
|
44
46
|
isLoopbackBind(config.serve?.bind) &&
|
|
45
|
-
(config.tunnel?.allowNgrokFreeTierLoopbackBypass ||
|
|
46
|
-
config.tunnel?.allowNgrokFreeTier ||
|
|
47
|
-
false);
|
|
47
|
+
(config.tunnel?.allowNgrokFreeTierLoopbackBypass || config.tunnel?.allowNgrokFreeTier || false);
|
|
48
48
|
|
|
49
49
|
switch (config.provider) {
|
|
50
50
|
case "telnyx":
|
|
@@ -63,9 +63,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
63
63
|
allowNgrokFreeTierLoopbackBypass,
|
|
64
64
|
publicUrl: config.publicUrl,
|
|
65
65
|
skipVerification: config.skipSignatureVerification,
|
|
66
|
-
streamPath: config.streaming?.enabled
|
|
67
|
-
? config.streaming.streamPath
|
|
68
|
-
: undefined,
|
|
66
|
+
streamPath: config.streaming?.enabled ? config.streaming.streamPath : undefined,
|
|
69
67
|
},
|
|
70
68
|
);
|
|
71
69
|
case "plivo":
|
|
@@ -83,9 +81,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
83
81
|
case "mock":
|
|
84
82
|
return new MockProvider();
|
|
85
83
|
default:
|
|
86
|
-
throw new Error(
|
|
87
|
-
`Unsupported voice-call provider: ${String(config.provider)}`,
|
|
88
|
-
);
|
|
84
|
+
throw new Error(`Unsupported voice-call provider: ${String(config.provider)}`);
|
|
89
85
|
}
|
|
90
86
|
}
|
|
91
87
|
|
|
@@ -106,9 +102,7 @@ export async function createVoiceCallRuntime(params: {
|
|
|
106
102
|
const config = resolveVoiceCallConfig(rawConfig);
|
|
107
103
|
|
|
108
104
|
if (!config.enabled) {
|
|
109
|
-
throw new Error(
|
|
110
|
-
"Voice call disabled. Enable the plugin entry in config.",
|
|
111
|
-
);
|
|
105
|
+
throw new Error("Voice call disabled. Enable the plugin entry in config.");
|
|
112
106
|
}
|
|
113
107
|
|
|
114
108
|
const validation = validateProviderConfig(config);
|
|
@@ -118,12 +112,7 @@ export async function createVoiceCallRuntime(params: {
|
|
|
118
112
|
|
|
119
113
|
const provider = resolveProvider(config);
|
|
120
114
|
const manager = new CallManager(config);
|
|
121
|
-
const webhookServer = new VoiceCallWebhookServer(
|
|
122
|
-
config,
|
|
123
|
-
manager,
|
|
124
|
-
provider,
|
|
125
|
-
coreConfig,
|
|
126
|
-
);
|
|
115
|
+
const webhookServer = new VoiceCallWebhookServer(config, manager, provider, coreConfig);
|
|
127
116
|
|
|
128
117
|
const localUrl = await webhookServer.start();
|
|
129
118
|
|
|
@@ -143,9 +132,7 @@ export async function createVoiceCallRuntime(params: {
|
|
|
143
132
|
publicUrl = tunnelResult?.publicUrl ?? null;
|
|
144
133
|
} catch (err) {
|
|
145
134
|
log.error(
|
|
146
|
-
`[voice-call] Tunnel setup failed: ${
|
|
147
|
-
err instanceof Error ? err.message : String(err)
|
|
148
|
-
}`,
|
|
135
|
+
`[voice-call] Tunnel setup failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
149
136
|
);
|
|
150
137
|
}
|
|
151
138
|
}
|
package/src/telephony-audio.ts
CHANGED
|
@@ -8,9 +8,13 @@ function clamp16(value: number): number {
|
|
|
8
8
|
* Resample 16-bit PCM (little-endian mono) to 8kHz using linear interpolation.
|
|
9
9
|
*/
|
|
10
10
|
export function resamplePcmTo8k(input: Buffer, inputSampleRate: number): Buffer {
|
|
11
|
-
if (inputSampleRate === TELEPHONY_SAMPLE_RATE)
|
|
11
|
+
if (inputSampleRate === TELEPHONY_SAMPLE_RATE) {
|
|
12
|
+
return input;
|
|
13
|
+
}
|
|
12
14
|
const inputSamples = Math.floor(input.length / 2);
|
|
13
|
-
if (inputSamples === 0)
|
|
15
|
+
if (inputSamples === 0) {
|
|
16
|
+
return Buffer.alloc(0);
|
|
17
|
+
}
|
|
14
18
|
|
|
15
19
|
const ratio = inputSampleRate / TELEPHONY_SAMPLE_RATE;
|
|
16
20
|
const outputSamples = Math.floor(inputSamples / ratio);
|
|
@@ -47,10 +51,7 @@ export function pcmToMulaw(pcm: Buffer): Buffer {
|
|
|
47
51
|
return mulaw;
|
|
48
52
|
}
|
|
49
53
|
|
|
50
|
-
export function convertPcmToMulaw8k(
|
|
51
|
-
pcm: Buffer,
|
|
52
|
-
inputSampleRate: number,
|
|
53
|
-
): Buffer {
|
|
54
|
+
export function convertPcmToMulaw8k(pcm: Buffer, inputSampleRate: number): Buffer {
|
|
54
55
|
const pcm8k = resamplePcmTo8k(pcm, inputSampleRate);
|
|
55
56
|
return pcmToMulaw(pcm8k);
|
|
56
57
|
}
|
|
@@ -58,10 +59,7 @@ export function convertPcmToMulaw8k(
|
|
|
58
59
|
/**
|
|
59
60
|
* Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
|
|
60
61
|
*/
|
|
61
|
-
export function chunkAudio(
|
|
62
|
-
audio: Buffer,
|
|
63
|
-
chunkSize = 160,
|
|
64
|
-
): Generator<Buffer, void, unknown> {
|
|
62
|
+
export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, void, unknown> {
|
|
65
63
|
return (function* () {
|
|
66
64
|
for (let i = 0; i < audio.length; i += chunkSize) {
|
|
67
65
|
yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
|
|
@@ -74,8 +72,12 @@ function linearToMulaw(sample: number): number {
|
|
|
74
72
|
const CLIP = 32635;
|
|
75
73
|
|
|
76
74
|
const sign = sample < 0 ? 0x80 : 0;
|
|
77
|
-
if (sample < 0)
|
|
78
|
-
|
|
75
|
+
if (sample < 0) {
|
|
76
|
+
sample = -sample;
|
|
77
|
+
}
|
|
78
|
+
if (sample > CLIP) {
|
|
79
|
+
sample = CLIP;
|
|
80
|
+
}
|
|
79
81
|
|
|
80
82
|
sample += BIAS;
|
|
81
83
|
let exponent = 7;
|
package/src/telephony-tts.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { CoreConfig } from "./core-bridge.js";
|
|
2
1
|
import type { VoiceCallTtsConfig } from "./config.js";
|
|
2
|
+
import type { CoreConfig } from "./core-bridge.js";
|
|
3
3
|
import { convertPcmToMulaw8k } from "./telephony-audio.js";
|
|
4
4
|
|
|
5
5
|
export type TelephonyTtsRuntime = {
|
|
@@ -44,20 +44,21 @@ export function createTelephonyTtsProvider(params: {
|
|
|
44
44
|
};
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
-
function applyTtsOverride(
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if (!override) return coreConfig;
|
|
47
|
+
function applyTtsOverride(coreConfig: CoreConfig, override?: VoiceCallTtsConfig): CoreConfig {
|
|
48
|
+
if (!override) {
|
|
49
|
+
return coreConfig;
|
|
50
|
+
}
|
|
52
51
|
|
|
53
52
|
const base = coreConfig.messages?.tts;
|
|
54
53
|
const merged = mergeTtsConfig(base, override);
|
|
55
|
-
if (!merged)
|
|
54
|
+
if (!merged) {
|
|
55
|
+
return coreConfig;
|
|
56
|
+
}
|
|
56
57
|
|
|
57
58
|
return {
|
|
58
59
|
...coreConfig,
|
|
59
60
|
messages: {
|
|
60
|
-
...
|
|
61
|
+
...coreConfig.messages,
|
|
61
62
|
tts: merged,
|
|
62
63
|
},
|
|
63
64
|
};
|
|
@@ -67,9 +68,15 @@ function mergeTtsConfig(
|
|
|
67
68
|
base?: VoiceCallTtsConfig,
|
|
68
69
|
override?: VoiceCallTtsConfig,
|
|
69
70
|
): VoiceCallTtsConfig | undefined {
|
|
70
|
-
if (!base && !override)
|
|
71
|
-
|
|
72
|
-
|
|
71
|
+
if (!base && !override) {
|
|
72
|
+
return undefined;
|
|
73
|
+
}
|
|
74
|
+
if (!override) {
|
|
75
|
+
return base;
|
|
76
|
+
}
|
|
77
|
+
if (!base) {
|
|
78
|
+
return override;
|
|
79
|
+
}
|
|
73
80
|
return deepMerge(base, override);
|
|
74
81
|
}
|
|
75
82
|
|
|
@@ -79,7 +86,9 @@ function deepMerge<T>(base: T, override: T): T {
|
|
|
79
86
|
}
|
|
80
87
|
const result: Record<string, unknown> = { ...base };
|
|
81
88
|
for (const [key, value] of Object.entries(override)) {
|
|
82
|
-
if (value === undefined)
|
|
89
|
+
if (value === undefined) {
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
83
92
|
const existing = (base as Record<string, unknown>)[key];
|
|
84
93
|
if (isPlainObject(existing) && isPlainObject(value)) {
|
|
85
94
|
result[key] = deepMerge(existing, value);
|
package/src/tunnel.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
|
-
|
|
3
2
|
import { getTailscaleDnsName } from "./webhook.js";
|
|
4
3
|
|
|
5
4
|
/**
|
|
@@ -52,14 +51,7 @@ export async function startNgrokTunnel(config: {
|
|
|
52
51
|
}
|
|
53
52
|
|
|
54
53
|
// Build ngrok command args
|
|
55
|
-
const args = [
|
|
56
|
-
"http",
|
|
57
|
-
String(config.port),
|
|
58
|
-
"--log",
|
|
59
|
-
"stdout",
|
|
60
|
-
"--log-format",
|
|
61
|
-
"json",
|
|
62
|
-
];
|
|
54
|
+
const args = ["http", String(config.port), "--log", "stdout", "--log-format", "json"];
|
|
63
55
|
|
|
64
56
|
// Add custom domain if provided (paid ngrok feature)
|
|
65
57
|
if (config.domain) {
|
|
@@ -234,11 +226,9 @@ export async function startTailscaleTunnel(config: {
|
|
|
234
226
|
const localUrl = `http://127.0.0.1:${config.port}${path}`;
|
|
235
227
|
|
|
236
228
|
return new Promise((resolve, reject) => {
|
|
237
|
-
const proc = spawn(
|
|
238
|
-
"
|
|
239
|
-
|
|
240
|
-
{ stdio: ["ignore", "pipe", "pipe"] },
|
|
241
|
-
);
|
|
229
|
+
const proc = spawn("tailscale", [config.mode, "--bg", "--yes", "--set-path", path, localUrl], {
|
|
230
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
231
|
+
});
|
|
242
232
|
|
|
243
233
|
const timeout = setTimeout(() => {
|
|
244
234
|
proc.kill("SIGKILL");
|
|
@@ -249,9 +239,7 @@ export async function startTailscaleTunnel(config: {
|
|
|
249
239
|
clearTimeout(timeout);
|
|
250
240
|
if (code === 0) {
|
|
251
241
|
const publicUrl = `https://${dnsName}${path}`;
|
|
252
|
-
console.log(
|
|
253
|
-
`[voice-call] Tailscale ${config.mode} active: ${publicUrl}`,
|
|
254
|
-
);
|
|
242
|
+
console.log(`[voice-call] Tailscale ${config.mode} active: ${publicUrl}`);
|
|
255
243
|
|
|
256
244
|
resolve({
|
|
257
245
|
publicUrl,
|
|
@@ -275,10 +263,7 @@ export async function startTailscaleTunnel(config: {
|
|
|
275
263
|
/**
|
|
276
264
|
* Stop a Tailscale serve/funnel tunnel.
|
|
277
265
|
*/
|
|
278
|
-
async function stopTailscaleTunnel(
|
|
279
|
-
mode: "serve" | "funnel",
|
|
280
|
-
path: string,
|
|
281
|
-
): Promise<void> {
|
|
266
|
+
async function stopTailscaleTunnel(mode: "serve" | "funnel", path: string): Promise<void> {
|
|
282
267
|
return new Promise((resolve) => {
|
|
283
268
|
const proc = spawn("tailscale", [mode, "off", path], {
|
|
284
269
|
stdio: "ignore",
|
|
@@ -299,9 +284,7 @@ async function stopTailscaleTunnel(
|
|
|
299
284
|
/**
|
|
300
285
|
* Start a tunnel based on configuration.
|
|
301
286
|
*/
|
|
302
|
-
export async function startTunnel(
|
|
303
|
-
config: TunnelConfig,
|
|
304
|
-
): Promise<TunnelResult | null> {
|
|
287
|
+
export async function startTunnel(config: TunnelConfig): Promise<TunnelResult | null> {
|
|
305
288
|
switch (config.provider) {
|
|
306
289
|
case "ngrok":
|
|
307
290
|
return startNgrokTunnel({
|
package/src/types.ts
CHANGED
package/src/utils.ts
CHANGED
|
@@ -3,7 +3,9 @@ import path from "node:path";
|
|
|
3
3
|
|
|
4
4
|
export function resolveUserPath(input: string): string {
|
|
5
5
|
const trimmed = input.trim();
|
|
6
|
-
if (!trimmed)
|
|
6
|
+
if (!trimmed) {
|
|
7
|
+
return trimmed;
|
|
8
|
+
}
|
|
7
9
|
if (trimmed.startsWith("~")) {
|
|
8
10
|
const expanded = trimmed.replace(/^~(?=$|[\\/])/, os.homedir());
|
|
9
11
|
return path.resolve(expanded);
|
package/src/voice-mapping.ts
CHANGED
|
@@ -39,7 +39,9 @@ export const DEFAULT_POLLY_VOICE = "Polly.Joanna";
|
|
|
39
39
|
* @returns Polly voice name suitable for Twilio TwiML
|
|
40
40
|
*/
|
|
41
41
|
export function mapVoiceToPolly(voice: string | undefined): string {
|
|
42
|
-
if (!voice)
|
|
42
|
+
if (!voice) {
|
|
43
|
+
return DEFAULT_POLLY_VOICE;
|
|
44
|
+
}
|
|
43
45
|
|
|
44
46
|
// Already a Polly/Google voice - pass through
|
|
45
47
|
if (voice.startsWith("Polly.") || voice.startsWith("Google.")) {
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
|
-
|
|
3
2
|
import { describe, expect, it } from "vitest";
|
|
4
|
-
|
|
5
3
|
import { verifyPlivoWebhook, verifyTwilioWebhook } from "./webhook-security.js";
|
|
6
4
|
|
|
7
5
|
function canonicalizeBase64(input: string): string {
|
|
@@ -29,7 +27,9 @@ function plivoV3Signature(params: {
|
|
|
29
27
|
const u = new URL(params.urlWithQuery);
|
|
30
28
|
const baseNoQuery = `${u.protocol}//${u.host}${u.pathname}`;
|
|
31
29
|
const queryPairs: Array<[string, string]> = [];
|
|
32
|
-
for (const [k, v] of u.searchParams.entries())
|
|
30
|
+
for (const [k, v] of u.searchParams.entries()) {
|
|
31
|
+
queryPairs.push([k, v]);
|
|
32
|
+
}
|
|
33
33
|
|
|
34
34
|
const queryMap = new Map<string, string[]>();
|
|
35
35
|
for (const [k, v] of queryPairs) {
|
|
@@ -37,10 +37,8 @@ function plivoV3Signature(params: {
|
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
const sortedQuery = Array.from(queryMap.keys())
|
|
40
|
-
.
|
|
41
|
-
.flatMap((k) =>
|
|
42
|
-
[...(queryMap.get(k) ?? [])].sort().map((v) => `${k}=${v}`),
|
|
43
|
-
)
|
|
40
|
+
.toSorted()
|
|
41
|
+
.flatMap((k) => [...(queryMap.get(k) ?? [])].toSorted().map((v) => `${k}=${v}`))
|
|
44
42
|
.join("&");
|
|
45
43
|
|
|
46
44
|
const postParams = new URLSearchParams(params.postBody);
|
|
@@ -50,8 +48,8 @@ function plivoV3Signature(params: {
|
|
|
50
48
|
}
|
|
51
49
|
|
|
52
50
|
const sortedPost = Array.from(postMap.keys())
|
|
53
|
-
.
|
|
54
|
-
.flatMap((k) => [...(postMap.get(k) ?? [])].
|
|
51
|
+
.toSorted()
|
|
52
|
+
.flatMap((k) => [...(postMap.get(k) ?? [])].toSorted().map((v) => `${k}${v}`))
|
|
55
53
|
.join("");
|
|
56
54
|
|
|
57
55
|
const hasPost = sortedPost.length > 0;
|
|
@@ -71,24 +69,17 @@ function plivoV3Signature(params: {
|
|
|
71
69
|
return canonicalizeBase64(digest);
|
|
72
70
|
}
|
|
73
71
|
|
|
74
|
-
function twilioSignature(params: {
|
|
75
|
-
authToken: string;
|
|
76
|
-
url: string;
|
|
77
|
-
postBody: string;
|
|
78
|
-
}): string {
|
|
72
|
+
function twilioSignature(params: { authToken: string; url: string; postBody: string }): string {
|
|
79
73
|
let dataToSign = params.url;
|
|
80
|
-
const sortedParams = Array.from(
|
|
81
|
-
|
|
82
|
-
)
|
|
74
|
+
const sortedParams = Array.from(new URLSearchParams(params.postBody).entries()).toSorted((a, b) =>
|
|
75
|
+
a[0].localeCompare(b[0]),
|
|
76
|
+
);
|
|
83
77
|
|
|
84
78
|
for (const [key, value] of sortedParams) {
|
|
85
79
|
dataToSign += key + value;
|
|
86
80
|
}
|
|
87
81
|
|
|
88
|
-
return crypto
|
|
89
|
-
.createHmac("sha1", params.authToken)
|
|
90
|
-
.update(dataToSign)
|
|
91
|
-
.digest("base64");
|
|
82
|
+
return crypto.createHmac("sha1", params.authToken).update(dataToSign).digest("base64");
|
|
92
83
|
}
|
|
93
84
|
|
|
94
85
|
describe("verifyPlivoWebhook", () => {
|