@openclaw/voice-call 2026.3.13 → 2026.5.1-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -5
- package/api.ts +16 -0
- package/cli-metadata.ts +10 -0
- package/config-api.ts +12 -0
- package/index.test.ts +866 -0
- package/index.ts +353 -148
- package/openclaw.plugin.json +336 -157
- package/package.json +33 -5
- package/runtime-api.ts +20 -0
- package/runtime-entry.ts +1 -0
- package/setup-api.ts +47 -0
- package/src/allowlist.test.ts +18 -0
- package/src/cli.ts +533 -68
- package/src/config-compat.test.ts +120 -0
- package/src/config-compat.ts +227 -0
- package/src/config.test.ts +160 -12
- package/src/config.ts +243 -74
- package/src/core-bridge.ts +2 -147
- package/src/deep-merge.test.ts +40 -0
- package/src/gateway-continue-operation.ts +200 -0
- package/src/http-headers.ts +6 -3
- package/src/manager/context.ts +6 -5
- package/src/manager/events.test.ts +179 -19
- package/src/manager/events.ts +48 -30
- package/src/manager/lifecycle.ts +53 -0
- package/src/manager/lookup.test.ts +52 -0
- package/src/manager/outbound.test.ts +464 -0
- package/src/manager/outbound.ts +148 -55
- package/src/manager/store.ts +18 -6
- package/src/manager/timers.test.ts +129 -0
- package/src/manager/timers.ts +4 -3
- package/src/manager/twiml.test.ts +13 -0
- package/src/manager/twiml.ts +8 -0
- package/src/manager.closed-loop.test.ts +30 -12
- package/src/manager.inbound-allowlist.test.ts +77 -10
- package/src/manager.notify.test.ts +344 -20
- package/src/manager.restore.test.ts +95 -8
- package/src/manager.test-harness.ts +8 -6
- package/src/manager.ts +79 -5
- package/src/media-stream.test.ts +578 -81
- package/src/media-stream.ts +235 -54
- package/src/providers/base.ts +19 -0
- package/src/providers/mock.ts +7 -1
- package/src/providers/plivo.test.ts +50 -6
- package/src/providers/plivo.ts +14 -6
- package/src/providers/shared/call-status.ts +2 -1
- package/src/providers/shared/guarded-json-api.test.ts +106 -0
- package/src/providers/shared/guarded-json-api.ts +1 -1
- package/src/providers/telnyx.test.ts +178 -6
- package/src/providers/telnyx.ts +40 -3
- package/src/providers/twilio/api.test.ts +145 -0
- package/src/providers/twilio/api.ts +67 -16
- package/src/providers/twilio/twiml-policy.ts +6 -10
- package/src/providers/twilio/webhook.ts +1 -1
- package/src/providers/twilio.test.ts +425 -25
- package/src/providers/twilio.ts +230 -77
- package/src/providers/twilio.types.ts +17 -0
- package/src/realtime-defaults.ts +3 -0
- package/src/realtime-fast-context.test.ts +88 -0
- package/src/realtime-fast-context.ts +165 -0
- package/src/realtime-transcription.runtime.ts +4 -0
- package/src/realtime-voice.runtime.ts +5 -0
- package/src/response-generator.test.ts +277 -0
- package/src/response-generator.ts +186 -40
- package/src/response-model.test.ts +71 -0
- package/src/response-model.ts +23 -0
- package/src/runtime.test.ts +351 -0
- package/src/runtime.ts +254 -24
- package/src/telephony-audio.test.ts +61 -0
- package/src/telephony-audio.ts +1 -79
- package/src/telephony-tts.test.ts +133 -12
- package/src/telephony-tts.ts +155 -2
- package/src/test-fixtures.ts +26 -7
- package/src/tts-provider-voice.test.ts +34 -0
- package/src/tts-provider-voice.ts +21 -0
- package/src/tunnel.test.ts +166 -0
- package/src/tunnel.ts +1 -1
- package/src/types.ts +24 -37
- package/src/utils.test.ts +17 -0
- package/src/voice-mapping.test.ts +34 -0
- package/src/voice-mapping.ts +3 -2
- package/src/webhook/realtime-handler.test.ts +598 -0
- package/src/webhook/realtime-handler.ts +485 -0
- package/src/webhook/stale-call-reaper.test.ts +88 -0
- package/src/webhook/stale-call-reaper.ts +5 -0
- package/src/webhook/tailscale.test.ts +214 -0
- package/src/webhook/tailscale.ts +19 -5
- package/src/webhook-exposure.test.ts +33 -0
- package/src/webhook-exposure.ts +84 -0
- package/src/webhook-security.test.ts +172 -21
- package/src/webhook-security.ts +43 -29
- package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
- package/src/webhook.test.ts +1145 -27
- package/src/webhook.ts +513 -100
- package/src/webhook.types.ts +5 -0
- package/src/websocket-test-support.ts +72 -0
- package/tsconfig.json +16 -0
- package/CHANGELOG.md +0 -121
- package/src/providers/index.ts +0 -10
- package/src/providers/stt-openai-realtime.test.ts +0 -42
- package/src/providers/stt-openai-realtime.ts +0 -311
- package/src/providers/tts-openai.test.ts +0 -43
- package/src/providers/tts-openai.ts +0 -221
package/src/runtime.ts
CHANGED
|
@@ -1,16 +1,34 @@
|
|
|
1
|
+
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
|
2
|
+
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
|
3
|
+
import {
|
|
4
|
+
consultRealtimeVoiceAgent,
|
|
5
|
+
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
|
|
6
|
+
resolveRealtimeVoiceAgentConsultTools,
|
|
7
|
+
resolveRealtimeVoiceAgentConsultToolsAllow,
|
|
8
|
+
type RealtimeVoiceAgentConsultTranscriptEntry,
|
|
9
|
+
type ResolvedRealtimeVoiceProvider,
|
|
10
|
+
} from "openclaw/plugin-sdk/realtime-voice";
|
|
1
11
|
import type { VoiceCallConfig } from "./config.js";
|
|
2
|
-
import {
|
|
3
|
-
|
|
12
|
+
import {
|
|
13
|
+
resolveTwilioAuthToken,
|
|
14
|
+
resolveVoiceCallConfig,
|
|
15
|
+
validateProviderConfig,
|
|
16
|
+
} from "./config.js";
|
|
17
|
+
import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
|
|
4
18
|
import { CallManager } from "./manager.js";
|
|
5
19
|
import type { VoiceCallProvider } from "./providers/base.js";
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { TwilioProvider } from "./providers/twilio.js";
|
|
20
|
+
import type { TwilioProvider } from "./providers/twilio.js";
|
|
21
|
+
import { resolveRealtimeFastContextConsult } from "./realtime-fast-context.js";
|
|
22
|
+
import { resolveVoiceResponseModel } from "./response-model.js";
|
|
10
23
|
import type { TelephonyTtsRuntime } from "./telephony-tts.js";
|
|
11
24
|
import { createTelephonyTtsProvider } from "./telephony-tts.js";
|
|
12
25
|
import { startTunnel, type TunnelResult } from "./tunnel.js";
|
|
26
|
+
import {
|
|
27
|
+
isProviderUnreachableWebhookUrl,
|
|
28
|
+
providerRequiresPublicWebhook,
|
|
29
|
+
} from "./webhook-exposure.js";
|
|
13
30
|
import { VoiceCallWebhookServer } from "./webhook.js";
|
|
31
|
+
import type { ToolHandlerContext } from "./webhook/realtime-handler.js";
|
|
14
32
|
import { cleanupTailscaleExposure, setupTailscaleExposure } from "./webhook/tailscale.js";
|
|
15
33
|
|
|
16
34
|
export type VoiceCallRuntime = {
|
|
@@ -30,6 +48,94 @@ type Logger = {
|
|
|
30
48
|
debug?: (message: string) => void;
|
|
31
49
|
};
|
|
32
50
|
|
|
51
|
+
type ResolvedRealtimeProvider = ResolvedRealtimeVoiceProvider;
|
|
52
|
+
|
|
53
|
+
type TelnyxProviderModule = typeof import("./providers/telnyx.js");
|
|
54
|
+
type TwilioProviderModule = typeof import("./providers/twilio.js");
|
|
55
|
+
type PlivoProviderModule = typeof import("./providers/plivo.js");
|
|
56
|
+
type MockProviderModule = typeof import("./providers/mock.js");
|
|
57
|
+
type RealtimeVoiceRuntimeModule = typeof import("./realtime-voice.runtime.js");
|
|
58
|
+
type RealtimeHandlerModule = typeof import("./webhook/realtime-handler.js");
|
|
59
|
+
|
|
60
|
+
const REALTIME_VOICE_CONSULT_SYSTEM_PROMPT = [
|
|
61
|
+
"You are a behind-the-scenes consultant for a live phone voice agent.",
|
|
62
|
+
"Prioritize a fast, speakable answer over exhaustive investigation.",
|
|
63
|
+
"For tool-backed status checks, prefer one or two bounded read-only queries before answering.",
|
|
64
|
+
"Do not print secret values or dump environment variables; only check whether required configuration is present.",
|
|
65
|
+
"Be accurate, brief, and speakable.",
|
|
66
|
+
].join(" ");
|
|
67
|
+
|
|
68
|
+
let telnyxProviderPromise: Promise<TelnyxProviderModule> | undefined;
|
|
69
|
+
let twilioProviderPromise: Promise<TwilioProviderModule> | undefined;
|
|
70
|
+
let plivoProviderPromise: Promise<PlivoProviderModule> | undefined;
|
|
71
|
+
let mockProviderPromise: Promise<MockProviderModule> | undefined;
|
|
72
|
+
let realtimeVoiceRuntimePromise: Promise<RealtimeVoiceRuntimeModule> | undefined;
|
|
73
|
+
let realtimeHandlerPromise: Promise<RealtimeHandlerModule> | undefined;
|
|
74
|
+
|
|
75
|
+
function loadTelnyxProvider(): Promise<TelnyxProviderModule> {
|
|
76
|
+
telnyxProviderPromise ??= import("./providers/telnyx.js");
|
|
77
|
+
return telnyxProviderPromise;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function loadTwilioProvider(): Promise<TwilioProviderModule> {
|
|
81
|
+
twilioProviderPromise ??= import("./providers/twilio.js");
|
|
82
|
+
return twilioProviderPromise;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function loadPlivoProvider(): Promise<PlivoProviderModule> {
|
|
86
|
+
plivoProviderPromise ??= import("./providers/plivo.js");
|
|
87
|
+
return plivoProviderPromise;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function loadMockProvider(): Promise<MockProviderModule> {
|
|
91
|
+
mockProviderPromise ??= import("./providers/mock.js");
|
|
92
|
+
return mockProviderPromise;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function loadRealtimeVoiceRuntime(): Promise<RealtimeVoiceRuntimeModule> {
|
|
96
|
+
realtimeVoiceRuntimePromise ??= import("./realtime-voice.runtime.js");
|
|
97
|
+
return realtimeVoiceRuntimePromise;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function loadRealtimeHandler(): Promise<RealtimeHandlerModule> {
|
|
101
|
+
realtimeHandlerPromise ??= import("./webhook/realtime-handler.js");
|
|
102
|
+
return realtimeHandlerPromise;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function resolveVoiceCallConsultSessionKey(call: {
|
|
106
|
+
sessionKey?: string;
|
|
107
|
+
from?: string;
|
|
108
|
+
to?: string;
|
|
109
|
+
direction?: "inbound" | "outbound";
|
|
110
|
+
callId: string;
|
|
111
|
+
}): string {
|
|
112
|
+
if (call.sessionKey) {
|
|
113
|
+
return call.sessionKey;
|
|
114
|
+
}
|
|
115
|
+
const phone = call.direction === "outbound" ? call.to : call.from;
|
|
116
|
+
const normalizedPhone = phone?.replace(/\D/g, "");
|
|
117
|
+
return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${call.callId}`;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function mapVoiceCallConsultTranscript(
|
|
121
|
+
call: {
|
|
122
|
+
transcript?: Array<{ speaker: "user" | "bot"; text: string }>;
|
|
123
|
+
},
|
|
124
|
+
context?: ToolHandlerContext,
|
|
125
|
+
): RealtimeVoiceAgentConsultTranscriptEntry[] {
|
|
126
|
+
const transcript: RealtimeVoiceAgentConsultTranscriptEntry[] = (call.transcript ?? []).map(
|
|
127
|
+
(entry) => ({
|
|
128
|
+
role: entry.speaker === "bot" ? "assistant" : "user",
|
|
129
|
+
text: entry.text,
|
|
130
|
+
}),
|
|
131
|
+
);
|
|
132
|
+
const partial = context?.partialUserTranscript?.trim();
|
|
133
|
+
if (partial && transcript.at(-1)?.text !== partial) {
|
|
134
|
+
transcript.push({ role: "user", text: partial });
|
|
135
|
+
}
|
|
136
|
+
return transcript;
|
|
137
|
+
}
|
|
138
|
+
|
|
33
139
|
function createRuntimeResourceLifecycle(params: {
|
|
34
140
|
config: VoiceCallConfig;
|
|
35
141
|
webhookServer: VoiceCallWebhookServer;
|
|
@@ -80,14 +186,15 @@ function isLoopbackBind(bind: string | undefined): boolean {
|
|
|
80
186
|
return bind === "127.0.0.1" || bind === "::1" || bind === "localhost";
|
|
81
187
|
}
|
|
82
188
|
|
|
83
|
-
function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
189
|
+
async function resolveProvider(config: VoiceCallConfig): Promise<VoiceCallProvider> {
|
|
84
190
|
const allowNgrokFreeTierLoopbackBypass =
|
|
85
191
|
config.tunnel?.provider === "ngrok" &&
|
|
86
192
|
isLoopbackBind(config.serve?.bind) &&
|
|
87
193
|
(config.tunnel?.allowNgrokFreeTierLoopbackBypass ?? false);
|
|
88
194
|
|
|
89
195
|
switch (config.provider) {
|
|
90
|
-
case "telnyx":
|
|
196
|
+
case "telnyx": {
|
|
197
|
+
const { TelnyxProvider } = await loadTelnyxProvider();
|
|
91
198
|
return new TelnyxProvider(
|
|
92
199
|
{
|
|
93
200
|
apiKey: config.telnyx?.apiKey,
|
|
@@ -98,11 +205,13 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
98
205
|
skipVerification: config.skipSignatureVerification,
|
|
99
206
|
},
|
|
100
207
|
);
|
|
101
|
-
|
|
208
|
+
}
|
|
209
|
+
case "twilio": {
|
|
210
|
+
const { TwilioProvider } = await loadTwilioProvider();
|
|
102
211
|
return new TwilioProvider(
|
|
103
212
|
{
|
|
104
213
|
accountSid: config.twilio?.accountSid,
|
|
105
|
-
authToken: config
|
|
214
|
+
authToken: resolveTwilioAuthToken(config),
|
|
106
215
|
},
|
|
107
216
|
{
|
|
108
217
|
allowNgrokFreeTierLoopbackBypass,
|
|
@@ -112,7 +221,9 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
112
221
|
webhookSecurity: config.webhookSecurity,
|
|
113
222
|
},
|
|
114
223
|
);
|
|
115
|
-
|
|
224
|
+
}
|
|
225
|
+
case "plivo": {
|
|
226
|
+
const { PlivoProvider } = await loadPlivoProvider();
|
|
116
227
|
return new PlivoProvider(
|
|
117
228
|
{
|
|
118
229
|
authId: config.plivo?.authId,
|
|
@@ -125,20 +236,37 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
125
236
|
webhookSecurity: config.webhookSecurity,
|
|
126
237
|
},
|
|
127
238
|
);
|
|
128
|
-
|
|
239
|
+
}
|
|
240
|
+
case "mock": {
|
|
241
|
+
const { MockProvider } = await loadMockProvider();
|
|
129
242
|
return new MockProvider();
|
|
243
|
+
}
|
|
130
244
|
default:
|
|
131
245
|
throw new Error(`Unsupported voice-call provider: ${String(config.provider)}`);
|
|
132
246
|
}
|
|
133
247
|
}
|
|
134
248
|
|
|
249
|
+
async function resolveRealtimeProvider(params: {
|
|
250
|
+
config: VoiceCallConfig;
|
|
251
|
+
fullConfig: OpenClawConfig;
|
|
252
|
+
}): Promise<ResolvedRealtimeProvider> {
|
|
253
|
+
const { resolveConfiguredRealtimeVoiceProvider } = await loadRealtimeVoiceRuntime();
|
|
254
|
+
return resolveConfiguredRealtimeVoiceProvider({
|
|
255
|
+
configuredProviderId: params.config.realtime.provider,
|
|
256
|
+
providerConfigs: params.config.realtime.providers,
|
|
257
|
+
cfg: params.fullConfig,
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
|
|
135
261
|
export async function createVoiceCallRuntime(params: {
|
|
136
262
|
config: VoiceCallConfig;
|
|
137
263
|
coreConfig: CoreConfig;
|
|
264
|
+
fullConfig?: OpenClawConfig;
|
|
265
|
+
agentRuntime: CoreAgentDeps;
|
|
138
266
|
ttsRuntime?: TelephonyTtsRuntime;
|
|
139
267
|
logger?: Logger;
|
|
140
268
|
}): Promise<VoiceCallRuntime> {
|
|
141
|
-
const { config: rawConfig, coreConfig, ttsRuntime, logger } = params;
|
|
269
|
+
const { config: rawConfig, coreConfig, fullConfig, agentRuntime, ttsRuntime, logger } = params;
|
|
142
270
|
const log = logger ?? {
|
|
143
271
|
info: console.log,
|
|
144
272
|
warn: console.warn,
|
|
@@ -147,6 +275,7 @@ export async function createVoiceCallRuntime(params: {
|
|
|
147
275
|
};
|
|
148
276
|
|
|
149
277
|
const config = resolveVoiceCallConfig(rawConfig);
|
|
278
|
+
const cfg = fullConfig ?? (coreConfig as OpenClawConfig);
|
|
150
279
|
|
|
151
280
|
if (!config.enabled) {
|
|
152
281
|
throw new Error("Voice call disabled. Enable the plugin entry in config.");
|
|
@@ -163,9 +292,97 @@ export async function createVoiceCallRuntime(params: {
|
|
|
163
292
|
throw new Error(`Invalid voice-call config: ${validation.errors.join("; ")}`);
|
|
164
293
|
}
|
|
165
294
|
|
|
166
|
-
const provider = resolveProvider(config);
|
|
295
|
+
const provider = await resolveProvider(config);
|
|
167
296
|
const manager = new CallManager(config);
|
|
168
|
-
const
|
|
297
|
+
const realtimeProvider = config.realtime.enabled
|
|
298
|
+
? await resolveRealtimeProvider({
|
|
299
|
+
config,
|
|
300
|
+
fullConfig: cfg,
|
|
301
|
+
})
|
|
302
|
+
: null;
|
|
303
|
+
const webhookServer = new VoiceCallWebhookServer(
|
|
304
|
+
config,
|
|
305
|
+
manager,
|
|
306
|
+
provider,
|
|
307
|
+
coreConfig,
|
|
308
|
+
fullConfig ?? (coreConfig as OpenClawConfig),
|
|
309
|
+
agentRuntime,
|
|
310
|
+
log,
|
|
311
|
+
);
|
|
312
|
+
if (realtimeProvider) {
|
|
313
|
+
const { RealtimeCallHandler } = await loadRealtimeHandler();
|
|
314
|
+
const realtimeConfig = {
|
|
315
|
+
...config.realtime,
|
|
316
|
+
tools: resolveRealtimeVoiceAgentConsultTools(
|
|
317
|
+
config.realtime.toolPolicy,
|
|
318
|
+
config.realtime.tools,
|
|
319
|
+
),
|
|
320
|
+
};
|
|
321
|
+
const realtimeHandler = new RealtimeCallHandler(
|
|
322
|
+
realtimeConfig,
|
|
323
|
+
manager,
|
|
324
|
+
provider,
|
|
325
|
+
realtimeProvider.provider,
|
|
326
|
+
realtimeProvider.providerConfig,
|
|
327
|
+
config.serve.path,
|
|
328
|
+
);
|
|
329
|
+
if (config.realtime.toolPolicy !== "none") {
|
|
330
|
+
realtimeHandler.registerToolHandler(
|
|
331
|
+
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
|
|
332
|
+
async (args, callId, handlerContext) => {
|
|
333
|
+
const call = manager.getCall(callId);
|
|
334
|
+
if (!call) {
|
|
335
|
+
return { error: `Call "${callId}" not found` };
|
|
336
|
+
}
|
|
337
|
+
const agentId = config.agentId ?? "main";
|
|
338
|
+
const sessionKey = resolveVoiceCallConsultSessionKey(call);
|
|
339
|
+
const fastContext = await resolveRealtimeFastContextConsult({
|
|
340
|
+
cfg,
|
|
341
|
+
agentId,
|
|
342
|
+
sessionKey,
|
|
343
|
+
config: config.realtime.fastContext,
|
|
344
|
+
args,
|
|
345
|
+
logger: log,
|
|
346
|
+
});
|
|
347
|
+
if (fastContext.handled) {
|
|
348
|
+
return fastContext.result;
|
|
349
|
+
}
|
|
350
|
+
const { provider: agentProvider, model } = resolveVoiceResponseModel({
|
|
351
|
+
voiceConfig: config,
|
|
352
|
+
agentRuntime,
|
|
353
|
+
});
|
|
354
|
+
const thinkLevel = agentRuntime.resolveThinkingDefault({
|
|
355
|
+
cfg,
|
|
356
|
+
provider: agentProvider,
|
|
357
|
+
model,
|
|
358
|
+
});
|
|
359
|
+
return await consultRealtimeVoiceAgent({
|
|
360
|
+
cfg,
|
|
361
|
+
agentRuntime,
|
|
362
|
+
logger: log,
|
|
363
|
+
agentId,
|
|
364
|
+
sessionKey,
|
|
365
|
+
messageProvider: "voice",
|
|
366
|
+
lane: "voice",
|
|
367
|
+
runIdPrefix: `voice-realtime-consult:${callId}`,
|
|
368
|
+
args,
|
|
369
|
+
transcript: mapVoiceCallConsultTranscript(call, handlerContext),
|
|
370
|
+
surface: "a live phone call",
|
|
371
|
+
userLabel: "Caller",
|
|
372
|
+
assistantLabel: "Agent",
|
|
373
|
+
questionSourceLabel: "caller",
|
|
374
|
+
provider: agentProvider,
|
|
375
|
+
model,
|
|
376
|
+
thinkLevel,
|
|
377
|
+
timeoutMs: config.responseTimeoutMs,
|
|
378
|
+
toolsAllow: resolveRealtimeVoiceAgentConsultToolsAllow(config.realtime.toolPolicy),
|
|
379
|
+
extraSystemPrompt: REALTIME_VOICE_CONSULT_SYSTEM_PROMPT,
|
|
380
|
+
});
|
|
381
|
+
},
|
|
382
|
+
);
|
|
383
|
+
}
|
|
384
|
+
webhookServer.setRealtimeHandler(realtimeHandler);
|
|
385
|
+
}
|
|
169
386
|
const lifecycle = createRuntimeResourceLifecycle({ config, webhookServer });
|
|
170
387
|
|
|
171
388
|
const localUrl = await webhookServer.start();
|
|
@@ -190,9 +407,7 @@ export async function createVoiceCallRuntime(params: {
|
|
|
190
407
|
lifecycle.setTunnelResult(nextTunnelResult);
|
|
191
408
|
publicUrl = nextTunnelResult?.publicUrl ?? null;
|
|
192
409
|
} catch (err) {
|
|
193
|
-
log.error(
|
|
194
|
-
`[voice-call] Tunnel setup failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
195
|
-
);
|
|
410
|
+
log.error(`[voice-call] Tunnel setup failed: ${formatErrorMessage(err)}`);
|
|
196
411
|
}
|
|
197
412
|
}
|
|
198
413
|
|
|
@@ -202,9 +417,23 @@ export async function createVoiceCallRuntime(params: {
|
|
|
202
417
|
|
|
203
418
|
const webhookUrl = publicUrl ?? localUrl;
|
|
204
419
|
|
|
420
|
+
if (
|
|
421
|
+
providerRequiresPublicWebhook(provider.name) &&
|
|
422
|
+
isProviderUnreachableWebhookUrl(webhookUrl)
|
|
423
|
+
) {
|
|
424
|
+
throw new Error(
|
|
425
|
+
`[voice-call] ${provider.name} requires a publicly reachable webhook URL. ` +
|
|
426
|
+
`Refusing to use local-only webhook ${webhookUrl}. ` +
|
|
427
|
+
"Set plugins.entries.voice-call.config.publicUrl or enable tunnel/tailscale exposure.",
|
|
428
|
+
);
|
|
429
|
+
}
|
|
430
|
+
|
|
205
431
|
if (publicUrl && provider.name === "twilio") {
|
|
206
432
|
(provider as TwilioProvider).setPublicUrl(publicUrl);
|
|
207
433
|
}
|
|
434
|
+
if (publicUrl && realtimeProvider) {
|
|
435
|
+
webhookServer.getRealtimeHandler()?.setPublicUrl(publicUrl);
|
|
436
|
+
}
|
|
208
437
|
|
|
209
438
|
if (provider.name === "twilio" && config.streaming?.enabled) {
|
|
210
439
|
const twilioProvider = provider as TwilioProvider;
|
|
@@ -214,15 +443,12 @@ export async function createVoiceCallRuntime(params: {
|
|
|
214
443
|
coreConfig,
|
|
215
444
|
ttsOverride: config.tts,
|
|
216
445
|
runtime: ttsRuntime,
|
|
446
|
+
logger: log,
|
|
217
447
|
});
|
|
218
448
|
twilioProvider.setTTSProvider(ttsProvider);
|
|
219
449
|
log.info("[voice-call] Telephony TTS provider configured");
|
|
220
450
|
} catch (err) {
|
|
221
|
-
log.warn(
|
|
222
|
-
`[voice-call] Failed to initialize telephony TTS: ${
|
|
223
|
-
err instanceof Error ? err.message : String(err)
|
|
224
|
-
}`,
|
|
225
|
-
);
|
|
451
|
+
log.warn(`[voice-call] Failed to initialize telephony TTS: ${formatErrorMessage(err)}`);
|
|
226
452
|
}
|
|
227
453
|
} else {
|
|
228
454
|
log.warn("[voice-call] Telephony TTS unavailable; streaming TTS disabled");
|
|
@@ -235,13 +461,17 @@ export async function createVoiceCallRuntime(params: {
|
|
|
235
461
|
}
|
|
236
462
|
}
|
|
237
463
|
|
|
464
|
+
if (realtimeProvider) {
|
|
465
|
+
log.info(`[voice-call] Realtime voice provider: ${realtimeProvider.provider.id}`);
|
|
466
|
+
}
|
|
467
|
+
|
|
238
468
|
await manager.initialize(provider, webhookUrl);
|
|
239
469
|
|
|
240
470
|
const stop = async () => await lifecycle.stop();
|
|
241
471
|
|
|
242
472
|
log.info("[voice-call] Runtime initialized");
|
|
243
473
|
log.info(`[voice-call] Webhook URL: ${webhookUrl}`);
|
|
244
|
-
if (publicUrl) {
|
|
474
|
+
if (publicUrl && publicUrl !== webhookUrl) {
|
|
245
475
|
log.info(`[voice-call] Public URL: ${publicUrl}`);
|
|
246
476
|
}
|
|
247
477
|
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { convertPcmToMulaw8k, resamplePcmTo8k } from "./telephony-audio.js";
|
|
3
|
+
|
|
4
|
+
function makeSinePcm(
|
|
5
|
+
sampleRate: number,
|
|
6
|
+
frequencyHz: number,
|
|
7
|
+
durationSeconds: number,
|
|
8
|
+
amplitude = 12_000,
|
|
9
|
+
): Buffer {
|
|
10
|
+
const samples = Math.floor(sampleRate * durationSeconds);
|
|
11
|
+
const output = Buffer.alloc(samples * 2);
|
|
12
|
+
for (let i = 0; i < samples; i++) {
|
|
13
|
+
const value = Math.round(Math.sin((2 * Math.PI * frequencyHz * i) / sampleRate) * amplitude);
|
|
14
|
+
output.writeInt16LE(value, i * 2);
|
|
15
|
+
}
|
|
16
|
+
return output;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function rmsPcm(buffer: Buffer): number {
|
|
20
|
+
const samples = Math.floor(buffer.length / 2);
|
|
21
|
+
if (samples === 0) {
|
|
22
|
+
return 0;
|
|
23
|
+
}
|
|
24
|
+
let sum = 0;
|
|
25
|
+
for (let i = 0; i < samples; i++) {
|
|
26
|
+
const sample = buffer.readInt16LE(i * 2);
|
|
27
|
+
sum += sample * sample;
|
|
28
|
+
}
|
|
29
|
+
return Math.sqrt(sum / samples);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
describe("telephony-audio resamplePcmTo8k", () => {
|
|
33
|
+
it("returns identical buffer for 8k input", () => {
|
|
34
|
+
const pcm8k = makeSinePcm(8_000, 1_000, 0.2);
|
|
35
|
+
const resampled = resamplePcmTo8k(pcm8k, 8_000);
|
|
36
|
+
expect(resampled).toBe(pcm8k);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("preserves low-frequency speech-band energy when downsampling", () => {
|
|
40
|
+
const input = makeSinePcm(48_000, 1_000, 0.6);
|
|
41
|
+
const output = resamplePcmTo8k(input, 48_000);
|
|
42
|
+
expect(output.length).toBe(9_600);
|
|
43
|
+
expect(rmsPcm(output)).toBeGreaterThan(7_500);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("attenuates out-of-band high frequencies before 8k telephony conversion", () => {
|
|
47
|
+
const lowTone = resamplePcmTo8k(makeSinePcm(48_000, 1_000, 0.6), 48_000);
|
|
48
|
+
const highTone = resamplePcmTo8k(makeSinePcm(48_000, 6_000, 0.6), 48_000);
|
|
49
|
+
const ratio = rmsPcm(highTone) / rmsPcm(lowTone);
|
|
50
|
+
expect(ratio).toBeLessThan(0.1);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
describe("telephony-audio convertPcmToMulaw8k", () => {
|
|
55
|
+
it("converts to 8k mu-law frame length", () => {
|
|
56
|
+
const input = makeSinePcm(24_000, 1_000, 0.5);
|
|
57
|
+
const mulaw = convertPcmToMulaw8k(input, 24_000);
|
|
58
|
+
// 0.5s @ 8kHz => 4000 8-bit samples
|
|
59
|
+
expect(mulaw.length).toBe(4_000);
|
|
60
|
+
});
|
|
61
|
+
});
|
package/src/telephony-audio.ts
CHANGED
|
@@ -1,60 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
function clamp16(value: number): number {
|
|
4
|
-
return Math.max(-32768, Math.min(32767, value));
|
|
5
|
-
}
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Resample 16-bit PCM (little-endian mono) to 8kHz using linear interpolation.
|
|
9
|
-
*/
|
|
10
|
-
export function resamplePcmTo8k(input: Buffer, inputSampleRate: number): Buffer {
|
|
11
|
-
if (inputSampleRate === TELEPHONY_SAMPLE_RATE) {
|
|
12
|
-
return input;
|
|
13
|
-
}
|
|
14
|
-
const inputSamples = Math.floor(input.length / 2);
|
|
15
|
-
if (inputSamples === 0) {
|
|
16
|
-
return Buffer.alloc(0);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
const ratio = inputSampleRate / TELEPHONY_SAMPLE_RATE;
|
|
20
|
-
const outputSamples = Math.floor(inputSamples / ratio);
|
|
21
|
-
const output = Buffer.alloc(outputSamples * 2);
|
|
22
|
-
|
|
23
|
-
for (let i = 0; i < outputSamples; i++) {
|
|
24
|
-
const srcPos = i * ratio;
|
|
25
|
-
const srcIndex = Math.floor(srcPos);
|
|
26
|
-
const frac = srcPos - srcIndex;
|
|
27
|
-
|
|
28
|
-
const s0 = input.readInt16LE(srcIndex * 2);
|
|
29
|
-
const s1Index = Math.min(srcIndex + 1, inputSamples - 1);
|
|
30
|
-
const s1 = input.readInt16LE(s1Index * 2);
|
|
31
|
-
|
|
32
|
-
const sample = Math.round(s0 + frac * (s1 - s0));
|
|
33
|
-
output.writeInt16LE(clamp16(sample), i * 2);
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
return output;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* Convert 16-bit PCM to 8-bit mu-law (G.711).
|
|
41
|
-
*/
|
|
42
|
-
export function pcmToMulaw(pcm: Buffer): Buffer {
|
|
43
|
-
const samples = Math.floor(pcm.length / 2);
|
|
44
|
-
const mulaw = Buffer.alloc(samples);
|
|
45
|
-
|
|
46
|
-
for (let i = 0; i < samples; i++) {
|
|
47
|
-
const sample = pcm.readInt16LE(i * 2);
|
|
48
|
-
mulaw[i] = linearToMulaw(sample);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
return mulaw;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
export function convertPcmToMulaw8k(pcm: Buffer, inputSampleRate: number): Buffer {
|
|
55
|
-
const pcm8k = resamplePcmTo8k(pcm, inputSampleRate);
|
|
56
|
-
return pcmToMulaw(pcm8k);
|
|
57
|
-
}
|
|
1
|
+
export { convertPcmToMulaw8k, resamplePcmTo8k } from "openclaw/plugin-sdk/realtime-voice";
|
|
58
2
|
|
|
59
3
|
/**
|
|
60
4
|
* Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
|
|
@@ -66,25 +10,3 @@ export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, vo
|
|
|
66
10
|
}
|
|
67
11
|
})();
|
|
68
12
|
}
|
|
69
|
-
|
|
70
|
-
function linearToMulaw(sample: number): number {
|
|
71
|
-
const BIAS = 132;
|
|
72
|
-
const CLIP = 32635;
|
|
73
|
-
|
|
74
|
-
const sign = sample < 0 ? 0x80 : 0;
|
|
75
|
-
if (sample < 0) {
|
|
76
|
-
sample = -sample;
|
|
77
|
-
}
|
|
78
|
-
if (sample > CLIP) {
|
|
79
|
-
sample = CLIP;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
sample += BIAS;
|
|
83
|
-
let exponent = 7;
|
|
84
|
-
for (let expMask = 0x4000; (sample & expMask) === 0 && exponent > 0; exponent--) {
|
|
85
|
-
expMask >>= 1;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
const mantissa = (sample >> (exponent + 3)) & 0x0f;
|
|
89
|
-
return ~(sign | (exponent << 4) | mantissa) & 0xff;
|
|
90
|
-
}
|