@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/api.ts +16 -0
- package/cli-metadata.ts +10 -0
- package/config-api.ts +12 -0
- package/index.test.ts +943 -0
- package/index.ts +379 -149
- package/openclaw.plugin.json +384 -157
- package/package.json +35 -5
- package/runtime-api.ts +20 -0
- package/runtime-entry.ts +1 -0
- package/setup-api.ts +47 -0
- package/src/allowlist.test.ts +18 -0
- package/src/cli.ts +533 -68
- package/src/config-compat.test.ts +120 -0
- package/src/config-compat.ts +227 -0
- package/src/config.test.ts +273 -12
- package/src/config.ts +355 -72
- package/src/core-bridge.ts +2 -147
- package/src/deep-merge.test.ts +40 -0
- package/src/gateway-continue-operation.ts +200 -0
- package/src/http-headers.ts +6 -3
- package/src/manager/context.ts +6 -5
- package/src/manager/events.test.ts +243 -19
- package/src/manager/events.ts +61 -31
- package/src/manager/lifecycle.ts +53 -0
- package/src/manager/lookup.test.ts +52 -0
- package/src/manager/outbound.test.ts +528 -0
- package/src/manager/outbound.ts +163 -57
- package/src/manager/store.ts +18 -6
- package/src/manager/timers.test.ts +129 -0
- package/src/manager/timers.ts +4 -3
- package/src/manager/twiml.test.ts +13 -0
- package/src/manager/twiml.ts +8 -0
- package/src/manager.closed-loop.test.ts +30 -12
- package/src/manager.inbound-allowlist.test.ts +77 -10
- package/src/manager.notify.test.ts +344 -20
- package/src/manager.restore.test.ts +95 -8
- package/src/manager.test-harness.ts +8 -6
- package/src/manager.ts +79 -5
- package/src/media-stream.test.ts +578 -81
- package/src/media-stream.ts +235 -54
- package/src/providers/base.ts +19 -0
- package/src/providers/mock.ts +7 -1
- package/src/providers/plivo.test.ts +50 -6
- package/src/providers/plivo.ts +14 -6
- package/src/providers/shared/call-status.ts +2 -1
- package/src/providers/shared/guarded-json-api.test.ts +106 -0
- package/src/providers/shared/guarded-json-api.ts +1 -1
- package/src/providers/telnyx.test.ts +178 -6
- package/src/providers/telnyx.ts +40 -3
- package/src/providers/twilio/api.test.ts +145 -0
- package/src/providers/twilio/api.ts +67 -16
- package/src/providers/twilio/twiml-policy.ts +6 -10
- package/src/providers/twilio/webhook.ts +1 -1
- package/src/providers/twilio.test.ts +425 -25
- package/src/providers/twilio.ts +230 -77
- package/src/providers/twilio.types.ts +17 -0
- package/src/realtime-defaults.ts +3 -0
- package/src/realtime-fast-context.test.ts +88 -0
- package/src/realtime-fast-context.ts +165 -0
- package/src/realtime-transcription.runtime.ts +4 -0
- package/src/realtime-voice.runtime.ts +5 -0
- package/src/response-generator.test.ts +321 -0
- package/src/response-generator.ts +213 -53
- package/src/response-model.test.ts +71 -0
- package/src/response-model.ts +23 -0
- package/src/runtime.test.ts +429 -0
- package/src/runtime.ts +270 -24
- package/src/telephony-audio.test.ts +61 -0
- package/src/telephony-audio.ts +1 -79
- package/src/telephony-tts.test.ts +133 -12
- package/src/telephony-tts.ts +155 -2
- package/src/test-fixtures.ts +28 -7
- package/src/tts-provider-voice.test.ts +34 -0
- package/src/tts-provider-voice.ts +21 -0
- package/src/tunnel.test.ts +166 -0
- package/src/tunnel.ts +1 -1
- package/src/types.ts +24 -37
- package/src/utils.test.ts +17 -0
- package/src/voice-mapping.test.ts +34 -0
- package/src/voice-mapping.ts +3 -2
- package/src/webhook/realtime-handler.test.ts +598 -0
- package/src/webhook/realtime-handler.ts +485 -0
- package/src/webhook/stale-call-reaper.test.ts +88 -0
- package/src/webhook/stale-call-reaper.ts +5 -0
- package/src/webhook/tailscale.test.ts +214 -0
- package/src/webhook/tailscale.ts +19 -5
- package/src/webhook-exposure.test.ts +33 -0
- package/src/webhook-exposure.ts +84 -0
- package/src/webhook-security.test.ts +172 -21
- package/src/webhook-security.ts +43 -29
- package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
- package/src/webhook.test.ts +1145 -27
- package/src/webhook.ts +523 -102
- package/src/webhook.types.ts +5 -0
- package/src/websocket-test-support.ts +72 -0
- package/tsconfig.json +16 -0
- package/CHANGELOG.md +0 -121
- package/src/providers/index.ts +0 -10
- package/src/providers/stt-openai-realtime.test.ts +0 -42
- package/src/providers/stt-openai-realtime.ts +0 -311
- package/src/providers/tts-openai.test.ts +0 -43
- package/src/providers/tts-openai.ts +0 -221
package/src/runtime.ts
CHANGED
|
@@ -1,16 +1,36 @@
|
|
|
1
|
+
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
|
2
|
+
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
|
3
|
+
import {
|
|
4
|
+
consultRealtimeVoiceAgent,
|
|
5
|
+
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
|
|
6
|
+
resolveRealtimeVoiceAgentConsultTools,
|
|
7
|
+
resolveRealtimeVoiceAgentConsultToolsAllow,
|
|
8
|
+
type RealtimeVoiceAgentConsultTranscriptEntry,
|
|
9
|
+
type ResolvedRealtimeVoiceProvider,
|
|
10
|
+
} from "openclaw/plugin-sdk/realtime-voice";
|
|
1
11
|
import type { VoiceCallConfig } from "./config.js";
|
|
2
|
-
import {
|
|
3
|
-
|
|
12
|
+
import {
|
|
13
|
+
resolveVoiceCallEffectiveConfig,
|
|
14
|
+
resolveVoiceCallSessionKey,
|
|
15
|
+
resolveTwilioAuthToken,
|
|
16
|
+
resolveVoiceCallConfig,
|
|
17
|
+
validateProviderConfig,
|
|
18
|
+
} from "./config.js";
|
|
19
|
+
import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
|
|
4
20
|
import { CallManager } from "./manager.js";
|
|
5
21
|
import type { VoiceCallProvider } from "./providers/base.js";
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { TwilioProvider } from "./providers/twilio.js";
|
|
22
|
+
import type { TwilioProvider } from "./providers/twilio.js";
|
|
23
|
+
import { resolveRealtimeFastContextConsult } from "./realtime-fast-context.js";
|
|
24
|
+
import { resolveVoiceResponseModel } from "./response-model.js";
|
|
10
25
|
import type { TelephonyTtsRuntime } from "./telephony-tts.js";
|
|
11
26
|
import { createTelephonyTtsProvider } from "./telephony-tts.js";
|
|
12
27
|
import { startTunnel, type TunnelResult } from "./tunnel.js";
|
|
28
|
+
import {
|
|
29
|
+
isProviderUnreachableWebhookUrl,
|
|
30
|
+
providerRequiresPublicWebhook,
|
|
31
|
+
} from "./webhook-exposure.js";
|
|
13
32
|
import { VoiceCallWebhookServer } from "./webhook.js";
|
|
33
|
+
import type { ToolHandlerContext } from "./webhook/realtime-handler.js";
|
|
14
34
|
import { cleanupTailscaleExposure, setupTailscaleExposure } from "./webhook/tailscale.js";
|
|
15
35
|
|
|
16
36
|
export type VoiceCallRuntime = {
|
|
@@ -30,6 +50,98 @@ type Logger = {
|
|
|
30
50
|
debug?: (message: string) => void;
|
|
31
51
|
};
|
|
32
52
|
|
|
53
|
+
type ResolvedRealtimeProvider = ResolvedRealtimeVoiceProvider;
|
|
54
|
+
|
|
55
|
+
type TelnyxProviderModule = typeof import("./providers/telnyx.js");
|
|
56
|
+
type TwilioProviderModule = typeof import("./providers/twilio.js");
|
|
57
|
+
type PlivoProviderModule = typeof import("./providers/plivo.js");
|
|
58
|
+
type MockProviderModule = typeof import("./providers/mock.js");
|
|
59
|
+
type RealtimeVoiceRuntimeModule = typeof import("./realtime-voice.runtime.js");
|
|
60
|
+
type RealtimeHandlerModule = typeof import("./webhook/realtime-handler.js");
|
|
61
|
+
|
|
62
|
+
const REALTIME_VOICE_CONSULT_SYSTEM_PROMPT = [
|
|
63
|
+
"You are a behind-the-scenes consultant for a live phone voice agent.",
|
|
64
|
+
"Prioritize a fast, speakable answer over exhaustive investigation.",
|
|
65
|
+
"For tool-backed status checks, prefer one or two bounded read-only queries before answering.",
|
|
66
|
+
"Do not print secret values or dump environment variables; only check whether required configuration is present.",
|
|
67
|
+
"Be accurate, brief, and speakable.",
|
|
68
|
+
].join(" ");
|
|
69
|
+
|
|
70
|
+
let telnyxProviderPromise: Promise<TelnyxProviderModule> | undefined;
|
|
71
|
+
let twilioProviderPromise: Promise<TwilioProviderModule> | undefined;
|
|
72
|
+
let plivoProviderPromise: Promise<PlivoProviderModule> | undefined;
|
|
73
|
+
let mockProviderPromise: Promise<MockProviderModule> | undefined;
|
|
74
|
+
let realtimeVoiceRuntimePromise: Promise<RealtimeVoiceRuntimeModule> | undefined;
|
|
75
|
+
let realtimeHandlerPromise: Promise<RealtimeHandlerModule> | undefined;
|
|
76
|
+
|
|
77
|
+
function loadTelnyxProvider(): Promise<TelnyxProviderModule> {
|
|
78
|
+
telnyxProviderPromise ??= import("./providers/telnyx.js");
|
|
79
|
+
return telnyxProviderPromise;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function loadTwilioProvider(): Promise<TwilioProviderModule> {
|
|
83
|
+
twilioProviderPromise ??= import("./providers/twilio.js");
|
|
84
|
+
return twilioProviderPromise;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function loadPlivoProvider(): Promise<PlivoProviderModule> {
|
|
88
|
+
plivoProviderPromise ??= import("./providers/plivo.js");
|
|
89
|
+
return plivoProviderPromise;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function loadMockProvider(): Promise<MockProviderModule> {
|
|
93
|
+
mockProviderPromise ??= import("./providers/mock.js");
|
|
94
|
+
return mockProviderPromise;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function loadRealtimeVoiceRuntime(): Promise<RealtimeVoiceRuntimeModule> {
|
|
98
|
+
realtimeVoiceRuntimePromise ??= import("./realtime-voice.runtime.js");
|
|
99
|
+
return realtimeVoiceRuntimePromise;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function loadRealtimeHandler(): Promise<RealtimeHandlerModule> {
|
|
103
|
+
realtimeHandlerPromise ??= import("./webhook/realtime-handler.js");
|
|
104
|
+
return realtimeHandlerPromise;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function resolveVoiceCallConsultSessionKey(call: {
|
|
108
|
+
config: VoiceCallConfig;
|
|
109
|
+
sessionKey?: string;
|
|
110
|
+
from?: string;
|
|
111
|
+
to?: string;
|
|
112
|
+
direction?: "inbound" | "outbound";
|
|
113
|
+
callId: string;
|
|
114
|
+
}): string {
|
|
115
|
+
if (call.sessionKey) {
|
|
116
|
+
return call.sessionKey;
|
|
117
|
+
}
|
|
118
|
+
const phone = call.direction === "outbound" ? call.to : call.from;
|
|
119
|
+
return resolveVoiceCallSessionKey({
|
|
120
|
+
config: call.config,
|
|
121
|
+
callId: call.callId,
|
|
122
|
+
phone,
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function mapVoiceCallConsultTranscript(
|
|
127
|
+
call: {
|
|
128
|
+
transcript?: Array<{ speaker: "user" | "bot"; text: string }>;
|
|
129
|
+
},
|
|
130
|
+
context?: ToolHandlerContext,
|
|
131
|
+
): RealtimeVoiceAgentConsultTranscriptEntry[] {
|
|
132
|
+
const transcript: RealtimeVoiceAgentConsultTranscriptEntry[] = (call.transcript ?? []).map(
|
|
133
|
+
(entry) => ({
|
|
134
|
+
role: entry.speaker === "bot" ? "assistant" : "user",
|
|
135
|
+
text: entry.text,
|
|
136
|
+
}),
|
|
137
|
+
);
|
|
138
|
+
const partial = context?.partialUserTranscript?.trim();
|
|
139
|
+
if (partial && transcript.at(-1)?.text !== partial) {
|
|
140
|
+
transcript.push({ role: "user", text: partial });
|
|
141
|
+
}
|
|
142
|
+
return transcript;
|
|
143
|
+
}
|
|
144
|
+
|
|
33
145
|
function createRuntimeResourceLifecycle(params: {
|
|
34
146
|
config: VoiceCallConfig;
|
|
35
147
|
webhookServer: VoiceCallWebhookServer;
|
|
@@ -80,14 +192,15 @@ function isLoopbackBind(bind: string | undefined): boolean {
|
|
|
80
192
|
return bind === "127.0.0.1" || bind === "::1" || bind === "localhost";
|
|
81
193
|
}
|
|
82
194
|
|
|
83
|
-
function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
195
|
+
async function resolveProvider(config: VoiceCallConfig): Promise<VoiceCallProvider> {
|
|
84
196
|
const allowNgrokFreeTierLoopbackBypass =
|
|
85
197
|
config.tunnel?.provider === "ngrok" &&
|
|
86
198
|
isLoopbackBind(config.serve?.bind) &&
|
|
87
199
|
(config.tunnel?.allowNgrokFreeTierLoopbackBypass ?? false);
|
|
88
200
|
|
|
89
201
|
switch (config.provider) {
|
|
90
|
-
case "telnyx":
|
|
202
|
+
case "telnyx": {
|
|
203
|
+
const { TelnyxProvider } = await loadTelnyxProvider();
|
|
91
204
|
return new TelnyxProvider(
|
|
92
205
|
{
|
|
93
206
|
apiKey: config.telnyx?.apiKey,
|
|
@@ -98,11 +211,13 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
98
211
|
skipVerification: config.skipSignatureVerification,
|
|
99
212
|
},
|
|
100
213
|
);
|
|
101
|
-
|
|
214
|
+
}
|
|
215
|
+
case "twilio": {
|
|
216
|
+
const { TwilioProvider } = await loadTwilioProvider();
|
|
102
217
|
return new TwilioProvider(
|
|
103
218
|
{
|
|
104
219
|
accountSid: config.twilio?.accountSid,
|
|
105
|
-
authToken: config
|
|
220
|
+
authToken: resolveTwilioAuthToken(config),
|
|
106
221
|
},
|
|
107
222
|
{
|
|
108
223
|
allowNgrokFreeTierLoopbackBypass,
|
|
@@ -112,7 +227,9 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
112
227
|
webhookSecurity: config.webhookSecurity,
|
|
113
228
|
},
|
|
114
229
|
);
|
|
115
|
-
|
|
230
|
+
}
|
|
231
|
+
case "plivo": {
|
|
232
|
+
const { PlivoProvider } = await loadPlivoProvider();
|
|
116
233
|
return new PlivoProvider(
|
|
117
234
|
{
|
|
118
235
|
authId: config.plivo?.authId,
|
|
@@ -125,20 +242,37 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
125
242
|
webhookSecurity: config.webhookSecurity,
|
|
126
243
|
},
|
|
127
244
|
);
|
|
128
|
-
|
|
245
|
+
}
|
|
246
|
+
case "mock": {
|
|
247
|
+
const { MockProvider } = await loadMockProvider();
|
|
129
248
|
return new MockProvider();
|
|
249
|
+
}
|
|
130
250
|
default:
|
|
131
251
|
throw new Error(`Unsupported voice-call provider: ${String(config.provider)}`);
|
|
132
252
|
}
|
|
133
253
|
}
|
|
134
254
|
|
|
255
|
+
async function resolveRealtimeProvider(params: {
|
|
256
|
+
config: VoiceCallConfig;
|
|
257
|
+
fullConfig: OpenClawConfig;
|
|
258
|
+
}): Promise<ResolvedRealtimeProvider> {
|
|
259
|
+
const { resolveConfiguredRealtimeVoiceProvider } = await loadRealtimeVoiceRuntime();
|
|
260
|
+
return resolveConfiguredRealtimeVoiceProvider({
|
|
261
|
+
configuredProviderId: params.config.realtime.provider,
|
|
262
|
+
providerConfigs: params.config.realtime.providers,
|
|
263
|
+
cfg: params.fullConfig,
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
|
|
135
267
|
export async function createVoiceCallRuntime(params: {
|
|
136
268
|
config: VoiceCallConfig;
|
|
137
269
|
coreConfig: CoreConfig;
|
|
270
|
+
fullConfig?: OpenClawConfig;
|
|
271
|
+
agentRuntime: CoreAgentDeps;
|
|
138
272
|
ttsRuntime?: TelephonyTtsRuntime;
|
|
139
273
|
logger?: Logger;
|
|
140
274
|
}): Promise<VoiceCallRuntime> {
|
|
141
|
-
const { config: rawConfig, coreConfig, ttsRuntime, logger } = params;
|
|
275
|
+
const { config: rawConfig, coreConfig, fullConfig, agentRuntime, ttsRuntime, logger } = params;
|
|
142
276
|
const log = logger ?? {
|
|
143
277
|
info: console.log,
|
|
144
278
|
warn: console.warn,
|
|
@@ -147,6 +281,7 @@ export async function createVoiceCallRuntime(params: {
|
|
|
147
281
|
};
|
|
148
282
|
|
|
149
283
|
const config = resolveVoiceCallConfig(rawConfig);
|
|
284
|
+
const cfg = fullConfig ?? (coreConfig as OpenClawConfig);
|
|
150
285
|
|
|
151
286
|
if (!config.enabled) {
|
|
152
287
|
throw new Error("Voice call disabled. Enable the plugin entry in config.");
|
|
@@ -163,9 +298,107 @@ export async function createVoiceCallRuntime(params: {
|
|
|
163
298
|
throw new Error(`Invalid voice-call config: ${validation.errors.join("; ")}`);
|
|
164
299
|
}
|
|
165
300
|
|
|
166
|
-
const provider = resolveProvider(config);
|
|
301
|
+
const provider = await resolveProvider(config);
|
|
167
302
|
const manager = new CallManager(config);
|
|
168
|
-
const
|
|
303
|
+
const realtimeProvider = config.realtime.enabled
|
|
304
|
+
? await resolveRealtimeProvider({
|
|
305
|
+
config,
|
|
306
|
+
fullConfig: cfg,
|
|
307
|
+
})
|
|
308
|
+
: null;
|
|
309
|
+
const webhookServer = new VoiceCallWebhookServer(
|
|
310
|
+
config,
|
|
311
|
+
manager,
|
|
312
|
+
provider,
|
|
313
|
+
coreConfig,
|
|
314
|
+
fullConfig ?? (coreConfig as OpenClawConfig),
|
|
315
|
+
agentRuntime,
|
|
316
|
+
log,
|
|
317
|
+
);
|
|
318
|
+
if (realtimeProvider) {
|
|
319
|
+
const { RealtimeCallHandler } = await loadRealtimeHandler();
|
|
320
|
+
const realtimeConfig = {
|
|
321
|
+
...config.realtime,
|
|
322
|
+
tools: resolveRealtimeVoiceAgentConsultTools(
|
|
323
|
+
config.realtime.toolPolicy,
|
|
324
|
+
config.realtime.tools,
|
|
325
|
+
),
|
|
326
|
+
};
|
|
327
|
+
const realtimeHandler = new RealtimeCallHandler(
|
|
328
|
+
realtimeConfig,
|
|
329
|
+
manager,
|
|
330
|
+
provider,
|
|
331
|
+
realtimeProvider.provider,
|
|
332
|
+
realtimeProvider.providerConfig,
|
|
333
|
+
config.serve.path,
|
|
334
|
+
);
|
|
335
|
+
if (config.realtime.toolPolicy !== "none") {
|
|
336
|
+
realtimeHandler.registerToolHandler(
|
|
337
|
+
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
|
|
338
|
+
async (args, callId, handlerContext) => {
|
|
339
|
+
const call = manager.getCall(callId);
|
|
340
|
+
if (!call) {
|
|
341
|
+
return { error: `Call "${callId}" not found` };
|
|
342
|
+
}
|
|
343
|
+
const numberRouteKey =
|
|
344
|
+
typeof call.metadata?.numberRouteKey === "string"
|
|
345
|
+
? call.metadata.numberRouteKey
|
|
346
|
+
: call.to;
|
|
347
|
+
const effectiveConfig = resolveVoiceCallEffectiveConfig(config, numberRouteKey).config;
|
|
348
|
+
const agentId = effectiveConfig.agentId ?? "main";
|
|
349
|
+
const sessionKey = resolveVoiceCallConsultSessionKey({
|
|
350
|
+
...call,
|
|
351
|
+
config: effectiveConfig,
|
|
352
|
+
});
|
|
353
|
+
const fastContext = await resolveRealtimeFastContextConsult({
|
|
354
|
+
cfg,
|
|
355
|
+
agentId,
|
|
356
|
+
sessionKey,
|
|
357
|
+
config: effectiveConfig.realtime.fastContext,
|
|
358
|
+
args,
|
|
359
|
+
logger: log,
|
|
360
|
+
});
|
|
361
|
+
if (fastContext.handled) {
|
|
362
|
+
return fastContext.result;
|
|
363
|
+
}
|
|
364
|
+
const { provider: agentProvider, model } = resolveVoiceResponseModel({
|
|
365
|
+
voiceConfig: effectiveConfig,
|
|
366
|
+
agentRuntime,
|
|
367
|
+
});
|
|
368
|
+
const thinkLevel = agentRuntime.resolveThinkingDefault({
|
|
369
|
+
cfg,
|
|
370
|
+
provider: agentProvider,
|
|
371
|
+
model,
|
|
372
|
+
});
|
|
373
|
+
return await consultRealtimeVoiceAgent({
|
|
374
|
+
cfg,
|
|
375
|
+
agentRuntime,
|
|
376
|
+
logger: log,
|
|
377
|
+
agentId,
|
|
378
|
+
sessionKey,
|
|
379
|
+
messageProvider: "voice",
|
|
380
|
+
lane: "voice",
|
|
381
|
+
runIdPrefix: `voice-realtime-consult:${callId}`,
|
|
382
|
+
args,
|
|
383
|
+
transcript: mapVoiceCallConsultTranscript(call, handlerContext),
|
|
384
|
+
surface: "a live phone call",
|
|
385
|
+
userLabel: "Caller",
|
|
386
|
+
assistantLabel: "Agent",
|
|
387
|
+
questionSourceLabel: "caller",
|
|
388
|
+
provider: agentProvider,
|
|
389
|
+
model,
|
|
390
|
+
thinkLevel,
|
|
391
|
+
timeoutMs: effectiveConfig.responseTimeoutMs,
|
|
392
|
+
toolsAllow: resolveRealtimeVoiceAgentConsultToolsAllow(
|
|
393
|
+
effectiveConfig.realtime.toolPolicy,
|
|
394
|
+
),
|
|
395
|
+
extraSystemPrompt: REALTIME_VOICE_CONSULT_SYSTEM_PROMPT,
|
|
396
|
+
});
|
|
397
|
+
},
|
|
398
|
+
);
|
|
399
|
+
}
|
|
400
|
+
webhookServer.setRealtimeHandler(realtimeHandler);
|
|
401
|
+
}
|
|
169
402
|
const lifecycle = createRuntimeResourceLifecycle({ config, webhookServer });
|
|
170
403
|
|
|
171
404
|
const localUrl = await webhookServer.start();
|
|
@@ -190,9 +423,7 @@ export async function createVoiceCallRuntime(params: {
|
|
|
190
423
|
lifecycle.setTunnelResult(nextTunnelResult);
|
|
191
424
|
publicUrl = nextTunnelResult?.publicUrl ?? null;
|
|
192
425
|
} catch (err) {
|
|
193
|
-
log.error(
|
|
194
|
-
`[voice-call] Tunnel setup failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
195
|
-
);
|
|
426
|
+
log.error(`[voice-call] Tunnel setup failed: ${formatErrorMessage(err)}`);
|
|
196
427
|
}
|
|
197
428
|
}
|
|
198
429
|
|
|
@@ -202,9 +433,23 @@ export async function createVoiceCallRuntime(params: {
|
|
|
202
433
|
|
|
203
434
|
const webhookUrl = publicUrl ?? localUrl;
|
|
204
435
|
|
|
436
|
+
if (
|
|
437
|
+
providerRequiresPublicWebhook(provider.name) &&
|
|
438
|
+
isProviderUnreachableWebhookUrl(webhookUrl)
|
|
439
|
+
) {
|
|
440
|
+
throw new Error(
|
|
441
|
+
`[voice-call] ${provider.name} requires a publicly reachable webhook URL. ` +
|
|
442
|
+
`Refusing to use local-only webhook ${webhookUrl}. ` +
|
|
443
|
+
"Set plugins.entries.voice-call.config.publicUrl or enable tunnel/tailscale exposure.",
|
|
444
|
+
);
|
|
445
|
+
}
|
|
446
|
+
|
|
205
447
|
if (publicUrl && provider.name === "twilio") {
|
|
206
448
|
(provider as TwilioProvider).setPublicUrl(publicUrl);
|
|
207
449
|
}
|
|
450
|
+
if (publicUrl && realtimeProvider) {
|
|
451
|
+
webhookServer.getRealtimeHandler()?.setPublicUrl(publicUrl);
|
|
452
|
+
}
|
|
208
453
|
|
|
209
454
|
if (provider.name === "twilio" && config.streaming?.enabled) {
|
|
210
455
|
const twilioProvider = provider as TwilioProvider;
|
|
@@ -214,15 +459,12 @@ export async function createVoiceCallRuntime(params: {
|
|
|
214
459
|
coreConfig,
|
|
215
460
|
ttsOverride: config.tts,
|
|
216
461
|
runtime: ttsRuntime,
|
|
462
|
+
logger: log,
|
|
217
463
|
});
|
|
218
464
|
twilioProvider.setTTSProvider(ttsProvider);
|
|
219
465
|
log.info("[voice-call] Telephony TTS provider configured");
|
|
220
466
|
} catch (err) {
|
|
221
|
-
log.warn(
|
|
222
|
-
`[voice-call] Failed to initialize telephony TTS: ${
|
|
223
|
-
err instanceof Error ? err.message : String(err)
|
|
224
|
-
}`,
|
|
225
|
-
);
|
|
467
|
+
log.warn(`[voice-call] Failed to initialize telephony TTS: ${formatErrorMessage(err)}`);
|
|
226
468
|
}
|
|
227
469
|
} else {
|
|
228
470
|
log.warn("[voice-call] Telephony TTS unavailable; streaming TTS disabled");
|
|
@@ -235,13 +477,17 @@ export async function createVoiceCallRuntime(params: {
|
|
|
235
477
|
}
|
|
236
478
|
}
|
|
237
479
|
|
|
480
|
+
if (realtimeProvider) {
|
|
481
|
+
log.info(`[voice-call] Realtime voice provider: ${realtimeProvider.provider.id}`);
|
|
482
|
+
}
|
|
483
|
+
|
|
238
484
|
await manager.initialize(provider, webhookUrl);
|
|
239
485
|
|
|
240
486
|
const stop = async () => await lifecycle.stop();
|
|
241
487
|
|
|
242
488
|
log.info("[voice-call] Runtime initialized");
|
|
243
489
|
log.info(`[voice-call] Webhook URL: ${webhookUrl}`);
|
|
244
|
-
if (publicUrl) {
|
|
490
|
+
if (publicUrl && publicUrl !== webhookUrl) {
|
|
245
491
|
log.info(`[voice-call] Public URL: ${publicUrl}`);
|
|
246
492
|
}
|
|
247
493
|
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { convertPcmToMulaw8k, resamplePcmTo8k } from "./telephony-audio.js";
|
|
3
|
+
|
|
4
|
+
function makeSinePcm(
|
|
5
|
+
sampleRate: number,
|
|
6
|
+
frequencyHz: number,
|
|
7
|
+
durationSeconds: number,
|
|
8
|
+
amplitude = 12_000,
|
|
9
|
+
): Buffer {
|
|
10
|
+
const samples = Math.floor(sampleRate * durationSeconds);
|
|
11
|
+
const output = Buffer.alloc(samples * 2);
|
|
12
|
+
for (let i = 0; i < samples; i++) {
|
|
13
|
+
const value = Math.round(Math.sin((2 * Math.PI * frequencyHz * i) / sampleRate) * amplitude);
|
|
14
|
+
output.writeInt16LE(value, i * 2);
|
|
15
|
+
}
|
|
16
|
+
return output;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function rmsPcm(buffer: Buffer): number {
|
|
20
|
+
const samples = Math.floor(buffer.length / 2);
|
|
21
|
+
if (samples === 0) {
|
|
22
|
+
return 0;
|
|
23
|
+
}
|
|
24
|
+
let sum = 0;
|
|
25
|
+
for (let i = 0; i < samples; i++) {
|
|
26
|
+
const sample = buffer.readInt16LE(i * 2);
|
|
27
|
+
sum += sample * sample;
|
|
28
|
+
}
|
|
29
|
+
return Math.sqrt(sum / samples);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
describe("telephony-audio resamplePcmTo8k", () => {
|
|
33
|
+
it("returns identical buffer for 8k input", () => {
|
|
34
|
+
const pcm8k = makeSinePcm(8_000, 1_000, 0.2);
|
|
35
|
+
const resampled = resamplePcmTo8k(pcm8k, 8_000);
|
|
36
|
+
expect(resampled).toBe(pcm8k);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("preserves low-frequency speech-band energy when downsampling", () => {
|
|
40
|
+
const input = makeSinePcm(48_000, 1_000, 0.6);
|
|
41
|
+
const output = resamplePcmTo8k(input, 48_000);
|
|
42
|
+
expect(output.length).toBe(9_600);
|
|
43
|
+
expect(rmsPcm(output)).toBeGreaterThan(7_500);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("attenuates out-of-band high frequencies before 8k telephony conversion", () => {
|
|
47
|
+
const lowTone = resamplePcmTo8k(makeSinePcm(48_000, 1_000, 0.6), 48_000);
|
|
48
|
+
const highTone = resamplePcmTo8k(makeSinePcm(48_000, 6_000, 0.6), 48_000);
|
|
49
|
+
const ratio = rmsPcm(highTone) / rmsPcm(lowTone);
|
|
50
|
+
expect(ratio).toBeLessThan(0.1);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
describe("telephony-audio convertPcmToMulaw8k", () => {
|
|
55
|
+
it("converts to 8k mu-law frame length", () => {
|
|
56
|
+
const input = makeSinePcm(24_000, 1_000, 0.5);
|
|
57
|
+
const mulaw = convertPcmToMulaw8k(input, 24_000);
|
|
58
|
+
// 0.5s @ 8kHz => 4000 8-bit samples
|
|
59
|
+
expect(mulaw.length).toBe(4_000);
|
|
60
|
+
});
|
|
61
|
+
});
|
package/src/telephony-audio.ts
CHANGED
|
@@ -1,60 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
function clamp16(value: number): number {
|
|
4
|
-
return Math.max(-32768, Math.min(32767, value));
|
|
5
|
-
}
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Resample 16-bit PCM (little-endian mono) to 8kHz using linear interpolation.
|
|
9
|
-
*/
|
|
10
|
-
export function resamplePcmTo8k(input: Buffer, inputSampleRate: number): Buffer {
|
|
11
|
-
if (inputSampleRate === TELEPHONY_SAMPLE_RATE) {
|
|
12
|
-
return input;
|
|
13
|
-
}
|
|
14
|
-
const inputSamples = Math.floor(input.length / 2);
|
|
15
|
-
if (inputSamples === 0) {
|
|
16
|
-
return Buffer.alloc(0);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
const ratio = inputSampleRate / TELEPHONY_SAMPLE_RATE;
|
|
20
|
-
const outputSamples = Math.floor(inputSamples / ratio);
|
|
21
|
-
const output = Buffer.alloc(outputSamples * 2);
|
|
22
|
-
|
|
23
|
-
for (let i = 0; i < outputSamples; i++) {
|
|
24
|
-
const srcPos = i * ratio;
|
|
25
|
-
const srcIndex = Math.floor(srcPos);
|
|
26
|
-
const frac = srcPos - srcIndex;
|
|
27
|
-
|
|
28
|
-
const s0 = input.readInt16LE(srcIndex * 2);
|
|
29
|
-
const s1Index = Math.min(srcIndex + 1, inputSamples - 1);
|
|
30
|
-
const s1 = input.readInt16LE(s1Index * 2);
|
|
31
|
-
|
|
32
|
-
const sample = Math.round(s0 + frac * (s1 - s0));
|
|
33
|
-
output.writeInt16LE(clamp16(sample), i * 2);
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
return output;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* Convert 16-bit PCM to 8-bit mu-law (G.711).
|
|
41
|
-
*/
|
|
42
|
-
export function pcmToMulaw(pcm: Buffer): Buffer {
|
|
43
|
-
const samples = Math.floor(pcm.length / 2);
|
|
44
|
-
const mulaw = Buffer.alloc(samples);
|
|
45
|
-
|
|
46
|
-
for (let i = 0; i < samples; i++) {
|
|
47
|
-
const sample = pcm.readInt16LE(i * 2);
|
|
48
|
-
mulaw[i] = linearToMulaw(sample);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
return mulaw;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
export function convertPcmToMulaw8k(pcm: Buffer, inputSampleRate: number): Buffer {
|
|
55
|
-
const pcm8k = resamplePcmTo8k(pcm, inputSampleRate);
|
|
56
|
-
return pcmToMulaw(pcm8k);
|
|
57
|
-
}
|
|
1
|
+
export { convertPcmToMulaw8k, resamplePcmTo8k } from "openclaw/plugin-sdk/realtime-voice";
|
|
58
2
|
|
|
59
3
|
/**
|
|
60
4
|
* Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
|
|
@@ -66,25 +10,3 @@ export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, vo
|
|
|
66
10
|
}
|
|
67
11
|
})();
|
|
68
12
|
}
|
|
69
|
-
|
|
70
|
-
function linearToMulaw(sample: number): number {
|
|
71
|
-
const BIAS = 132;
|
|
72
|
-
const CLIP = 32635;
|
|
73
|
-
|
|
74
|
-
const sign = sample < 0 ? 0x80 : 0;
|
|
75
|
-
if (sample < 0) {
|
|
76
|
-
sample = -sample;
|
|
77
|
-
}
|
|
78
|
-
if (sample > CLIP) {
|
|
79
|
-
sample = CLIP;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
sample += BIAS;
|
|
83
|
-
let exponent = 7;
|
|
84
|
-
for (let expMask = 0x4000; (sample & expMask) === 0 && exponent > 0; exponent--) {
|
|
85
|
-
expMask >>= 1;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
const mantissa = (sample >> (exponent + 3)) & 0x0f;
|
|
89
|
-
return ~(sign | (exponent << 4) | mantissa) & 0xff;
|
|
90
|
-
}
|