@openclaw/voice-call 2026.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +78 -0
- package/README.md +135 -0
- package/index.ts +497 -0
- package/openclaw.plugin.json +601 -0
- package/package.json +16 -0
- package/src/cli.ts +312 -0
- package/src/config.test.ts +204 -0
- package/src/config.ts +502 -0
- package/src/core-bridge.ts +198 -0
- package/src/manager/context.ts +21 -0
- package/src/manager/events.ts +177 -0
- package/src/manager/lookup.ts +33 -0
- package/src/manager/outbound.ts +248 -0
- package/src/manager/state.ts +50 -0
- package/src/manager/store.ts +88 -0
- package/src/manager/timers.ts +86 -0
- package/src/manager/twiml.ts +9 -0
- package/src/manager.test.ts +108 -0
- package/src/manager.ts +888 -0
- package/src/media-stream.test.ts +97 -0
- package/src/media-stream.ts +393 -0
- package/src/providers/base.ts +67 -0
- package/src/providers/index.ts +10 -0
- package/src/providers/mock.ts +168 -0
- package/src/providers/plivo.test.ts +28 -0
- package/src/providers/plivo.ts +504 -0
- package/src/providers/stt-openai-realtime.ts +311 -0
- package/src/providers/telnyx.ts +364 -0
- package/src/providers/tts-openai.ts +264 -0
- package/src/providers/twilio/api.ts +45 -0
- package/src/providers/twilio/webhook.ts +30 -0
- package/src/providers/twilio.test.ts +64 -0
- package/src/providers/twilio.ts +595 -0
- package/src/response-generator.ts +171 -0
- package/src/runtime.ts +217 -0
- package/src/telephony-audio.ts +88 -0
- package/src/telephony-tts.ts +95 -0
- package/src/tunnel.ts +331 -0
- package/src/types.ts +273 -0
- package/src/utils.ts +12 -0
- package/src/voice-mapping.ts +65 -0
- package/src/webhook-security.test.ts +260 -0
- package/src/webhook-security.ts +469 -0
- package/src/webhook.ts +491 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice call response generator - uses the embedded Pi agent for tool support.
|
|
3
|
+
* Routes voice responses through the same agent infrastructure as messaging.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import crypto from "node:crypto";
|
|
7
|
+
|
|
8
|
+
import { loadCoreAgentDeps, type CoreConfig } from "./core-bridge.js";
|
|
9
|
+
|
|
10
|
+
import type { VoiceCallConfig } from "./config.js";
|
|
11
|
+
|
|
12
|
+
export type VoiceResponseParams = {
|
|
13
|
+
/** Voice call config */
|
|
14
|
+
voiceConfig: VoiceCallConfig;
|
|
15
|
+
/** Core OpenClaw config */
|
|
16
|
+
coreConfig: CoreConfig;
|
|
17
|
+
/** Call ID for session tracking */
|
|
18
|
+
callId: string;
|
|
19
|
+
/** Caller's phone number */
|
|
20
|
+
from: string;
|
|
21
|
+
/** Conversation transcript */
|
|
22
|
+
transcript: Array<{ speaker: "user" | "bot"; text: string }>;
|
|
23
|
+
/** Latest user message */
|
|
24
|
+
userMessage: string;
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export type VoiceResponseResult = {
|
|
28
|
+
text: string | null;
|
|
29
|
+
error?: string;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
type SessionEntry = {
|
|
33
|
+
sessionId: string;
|
|
34
|
+
updatedAt: number;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Generate a voice response using the embedded Pi agent with full tool support.
|
|
39
|
+
* Uses the same agent infrastructure as messaging for consistent behavior.
|
|
40
|
+
*/
|
|
41
|
+
export async function generateVoiceResponse(
|
|
42
|
+
params: VoiceResponseParams,
|
|
43
|
+
): Promise<VoiceResponseResult> {
|
|
44
|
+
const { voiceConfig, callId, from, transcript, userMessage, coreConfig } =
|
|
45
|
+
params;
|
|
46
|
+
|
|
47
|
+
if (!coreConfig) {
|
|
48
|
+
return { text: null, error: "Core config unavailable for voice response" };
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
let deps: Awaited<ReturnType<typeof loadCoreAgentDeps>>;
|
|
52
|
+
try {
|
|
53
|
+
deps = await loadCoreAgentDeps();
|
|
54
|
+
} catch (err) {
|
|
55
|
+
return {
|
|
56
|
+
text: null,
|
|
57
|
+
error:
|
|
58
|
+
err instanceof Error
|
|
59
|
+
? err.message
|
|
60
|
+
: "Unable to load core agent dependencies",
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
const cfg = coreConfig;
|
|
64
|
+
|
|
65
|
+
// Build voice-specific session key based on phone number
|
|
66
|
+
const normalizedPhone = from.replace(/\D/g, "");
|
|
67
|
+
const sessionKey = `voice:${normalizedPhone}`;
|
|
68
|
+
const agentId = "main";
|
|
69
|
+
|
|
70
|
+
// Resolve paths
|
|
71
|
+
const storePath = deps.resolveStorePath(cfg.session?.store, { agentId });
|
|
72
|
+
const agentDir = deps.resolveAgentDir(cfg, agentId);
|
|
73
|
+
const workspaceDir = deps.resolveAgentWorkspaceDir(cfg, agentId);
|
|
74
|
+
|
|
75
|
+
// Ensure workspace exists
|
|
76
|
+
await deps.ensureAgentWorkspace({ dir: workspaceDir });
|
|
77
|
+
|
|
78
|
+
// Load or create session entry
|
|
79
|
+
const sessionStore = deps.loadSessionStore(storePath);
|
|
80
|
+
const now = Date.now();
|
|
81
|
+
let sessionEntry = sessionStore[sessionKey] as SessionEntry | undefined;
|
|
82
|
+
|
|
83
|
+
if (!sessionEntry) {
|
|
84
|
+
sessionEntry = {
|
|
85
|
+
sessionId: crypto.randomUUID(),
|
|
86
|
+
updatedAt: now,
|
|
87
|
+
};
|
|
88
|
+
sessionStore[sessionKey] = sessionEntry;
|
|
89
|
+
await deps.saveSessionStore(storePath, sessionStore);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const sessionId = sessionEntry.sessionId;
|
|
93
|
+
const sessionFile = deps.resolveSessionFilePath(sessionId, sessionEntry, {
|
|
94
|
+
agentId,
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// Resolve model from config
|
|
98
|
+
const modelRef =
|
|
99
|
+
voiceConfig.responseModel ||
|
|
100
|
+
`${deps.DEFAULT_PROVIDER}/${deps.DEFAULT_MODEL}`;
|
|
101
|
+
const slashIndex = modelRef.indexOf("/");
|
|
102
|
+
const provider =
|
|
103
|
+
slashIndex === -1 ? deps.DEFAULT_PROVIDER : modelRef.slice(0, slashIndex);
|
|
104
|
+
const model = slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1);
|
|
105
|
+
|
|
106
|
+
// Resolve thinking level
|
|
107
|
+
const thinkLevel = deps.resolveThinkingDefault({ cfg, provider, model });
|
|
108
|
+
|
|
109
|
+
// Resolve agent identity for personalized prompt
|
|
110
|
+
const identity = deps.resolveAgentIdentity(cfg, agentId);
|
|
111
|
+
const agentName = identity?.name?.trim() || "assistant";
|
|
112
|
+
|
|
113
|
+
// Build system prompt with conversation history
|
|
114
|
+
const basePrompt =
|
|
115
|
+
voiceConfig.responseSystemPrompt ??
|
|
116
|
+
`You are ${agentName}, a helpful voice assistant on a phone call. Keep responses brief and conversational (1-2 sentences max). Be natural and friendly. The caller's phone number is ${from}. You have access to tools - use them when helpful.`;
|
|
117
|
+
|
|
118
|
+
let extraSystemPrompt = basePrompt;
|
|
119
|
+
if (transcript.length > 0) {
|
|
120
|
+
const history = transcript
|
|
121
|
+
.map(
|
|
122
|
+
(entry) =>
|
|
123
|
+
`${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`,
|
|
124
|
+
)
|
|
125
|
+
.join("\n");
|
|
126
|
+
extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Resolve timeout
|
|
130
|
+
const timeoutMs =
|
|
131
|
+
voiceConfig.responseTimeoutMs ?? deps.resolveAgentTimeoutMs({ cfg });
|
|
132
|
+
const runId = `voice:${callId}:${Date.now()}`;
|
|
133
|
+
|
|
134
|
+
try {
|
|
135
|
+
const result = await deps.runEmbeddedPiAgent({
|
|
136
|
+
sessionId,
|
|
137
|
+
sessionKey,
|
|
138
|
+
messageProvider: "voice",
|
|
139
|
+
sessionFile,
|
|
140
|
+
workspaceDir,
|
|
141
|
+
config: cfg,
|
|
142
|
+
prompt: userMessage,
|
|
143
|
+
provider,
|
|
144
|
+
model,
|
|
145
|
+
thinkLevel,
|
|
146
|
+
verboseLevel: "off",
|
|
147
|
+
timeoutMs,
|
|
148
|
+
runId,
|
|
149
|
+
lane: "voice",
|
|
150
|
+
extraSystemPrompt,
|
|
151
|
+
agentDir,
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// Extract text from payloads
|
|
155
|
+
const texts = (result.payloads ?? [])
|
|
156
|
+
.filter((p) => p.text && !p.isError)
|
|
157
|
+
.map((p) => p.text?.trim())
|
|
158
|
+
.filter(Boolean);
|
|
159
|
+
|
|
160
|
+
const text = texts.join(" ") || null;
|
|
161
|
+
|
|
162
|
+
if (!text && result.meta.aborted) {
|
|
163
|
+
return { text: null, error: "Response generation was aborted" };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return { text };
|
|
167
|
+
} catch (err) {
|
|
168
|
+
console.error(`[voice-call] Response generation failed:`, err);
|
|
169
|
+
return { text: null, error: String(err) };
|
|
170
|
+
}
|
|
171
|
+
}
|
package/src/runtime.ts
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import type { CoreConfig } from "./core-bridge.js";
|
|
2
|
+
import type { VoiceCallConfig } from "./config.js";
|
|
3
|
+
import { resolveVoiceCallConfig, validateProviderConfig } from "./config.js";
|
|
4
|
+
import { CallManager } from "./manager.js";
|
|
5
|
+
import type { VoiceCallProvider } from "./providers/base.js";
|
|
6
|
+
import { MockProvider } from "./providers/mock.js";
|
|
7
|
+
import { PlivoProvider } from "./providers/plivo.js";
|
|
8
|
+
import { TelnyxProvider } from "./providers/telnyx.js";
|
|
9
|
+
import { TwilioProvider } from "./providers/twilio.js";
|
|
10
|
+
import type { TelephonyTtsRuntime } from "./telephony-tts.js";
|
|
11
|
+
import { createTelephonyTtsProvider } from "./telephony-tts.js";
|
|
12
|
+
import { startTunnel, type TunnelResult } from "./tunnel.js";
|
|
13
|
+
import {
|
|
14
|
+
cleanupTailscaleExposure,
|
|
15
|
+
setupTailscaleExposure,
|
|
16
|
+
VoiceCallWebhookServer,
|
|
17
|
+
} from "./webhook.js";
|
|
18
|
+
|
|
19
|
+
export type VoiceCallRuntime = {
|
|
20
|
+
config: VoiceCallConfig;
|
|
21
|
+
provider: VoiceCallProvider;
|
|
22
|
+
manager: CallManager;
|
|
23
|
+
webhookServer: VoiceCallWebhookServer;
|
|
24
|
+
webhookUrl: string;
|
|
25
|
+
publicUrl: string | null;
|
|
26
|
+
stop: () => Promise<void>;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
type Logger = {
|
|
30
|
+
info: (message: string) => void;
|
|
31
|
+
warn: (message: string) => void;
|
|
32
|
+
error: (message: string) => void;
|
|
33
|
+
debug: (message: string) => void;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
function isLoopbackBind(bind: string | undefined): boolean {
|
|
37
|
+
if (!bind) return false;
|
|
38
|
+
return bind === "127.0.0.1" || bind === "::1" || bind === "localhost";
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
42
|
+
const allowNgrokFreeTierLoopbackBypass =
|
|
43
|
+
config.tunnel?.provider === "ngrok" &&
|
|
44
|
+
isLoopbackBind(config.serve?.bind) &&
|
|
45
|
+
(config.tunnel?.allowNgrokFreeTierLoopbackBypass ||
|
|
46
|
+
config.tunnel?.allowNgrokFreeTier ||
|
|
47
|
+
false);
|
|
48
|
+
|
|
49
|
+
switch (config.provider) {
|
|
50
|
+
case "telnyx":
|
|
51
|
+
return new TelnyxProvider({
|
|
52
|
+
apiKey: config.telnyx?.apiKey,
|
|
53
|
+
connectionId: config.telnyx?.connectionId,
|
|
54
|
+
publicKey: config.telnyx?.publicKey,
|
|
55
|
+
});
|
|
56
|
+
case "twilio":
|
|
57
|
+
return new TwilioProvider(
|
|
58
|
+
{
|
|
59
|
+
accountSid: config.twilio?.accountSid,
|
|
60
|
+
authToken: config.twilio?.authToken,
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
allowNgrokFreeTierLoopbackBypass,
|
|
64
|
+
publicUrl: config.publicUrl,
|
|
65
|
+
skipVerification: config.skipSignatureVerification,
|
|
66
|
+
streamPath: config.streaming?.enabled
|
|
67
|
+
? config.streaming.streamPath
|
|
68
|
+
: undefined,
|
|
69
|
+
},
|
|
70
|
+
);
|
|
71
|
+
case "plivo":
|
|
72
|
+
return new PlivoProvider(
|
|
73
|
+
{
|
|
74
|
+
authId: config.plivo?.authId,
|
|
75
|
+
authToken: config.plivo?.authToken,
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
publicUrl: config.publicUrl,
|
|
79
|
+
skipVerification: config.skipSignatureVerification,
|
|
80
|
+
ringTimeoutSec: Math.max(1, Math.floor(config.ringTimeoutMs / 1000)),
|
|
81
|
+
},
|
|
82
|
+
);
|
|
83
|
+
case "mock":
|
|
84
|
+
return new MockProvider();
|
|
85
|
+
default:
|
|
86
|
+
throw new Error(
|
|
87
|
+
`Unsupported voice-call provider: ${String(config.provider)}`,
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function createVoiceCallRuntime(params: {
|
|
93
|
+
config: VoiceCallConfig;
|
|
94
|
+
coreConfig: CoreConfig;
|
|
95
|
+
ttsRuntime?: TelephonyTtsRuntime;
|
|
96
|
+
logger?: Logger;
|
|
97
|
+
}): Promise<VoiceCallRuntime> {
|
|
98
|
+
const { config: rawConfig, coreConfig, ttsRuntime, logger } = params;
|
|
99
|
+
const log = logger ?? {
|
|
100
|
+
info: console.log,
|
|
101
|
+
warn: console.warn,
|
|
102
|
+
error: console.error,
|
|
103
|
+
debug: console.debug,
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
const config = resolveVoiceCallConfig(rawConfig);
|
|
107
|
+
|
|
108
|
+
if (!config.enabled) {
|
|
109
|
+
throw new Error(
|
|
110
|
+
"Voice call disabled. Enable the plugin entry in config.",
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const validation = validateProviderConfig(config);
|
|
115
|
+
if (!validation.valid) {
|
|
116
|
+
throw new Error(`Invalid voice-call config: ${validation.errors.join("; ")}`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const provider = resolveProvider(config);
|
|
120
|
+
const manager = new CallManager(config);
|
|
121
|
+
const webhookServer = new VoiceCallWebhookServer(
|
|
122
|
+
config,
|
|
123
|
+
manager,
|
|
124
|
+
provider,
|
|
125
|
+
coreConfig,
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
const localUrl = await webhookServer.start();
|
|
129
|
+
|
|
130
|
+
// Determine public URL - priority: config.publicUrl > tunnel > legacy tailscale
|
|
131
|
+
let publicUrl: string | null = config.publicUrl ?? null;
|
|
132
|
+
let tunnelResult: TunnelResult | null = null;
|
|
133
|
+
|
|
134
|
+
if (!publicUrl && config.tunnel?.provider && config.tunnel.provider !== "none") {
|
|
135
|
+
try {
|
|
136
|
+
tunnelResult = await startTunnel({
|
|
137
|
+
provider: config.tunnel.provider,
|
|
138
|
+
port: config.serve.port,
|
|
139
|
+
path: config.serve.path,
|
|
140
|
+
ngrokAuthToken: config.tunnel.ngrokAuthToken,
|
|
141
|
+
ngrokDomain: config.tunnel.ngrokDomain,
|
|
142
|
+
});
|
|
143
|
+
publicUrl = tunnelResult?.publicUrl ?? null;
|
|
144
|
+
} catch (err) {
|
|
145
|
+
log.error(
|
|
146
|
+
`[voice-call] Tunnel setup failed: ${
|
|
147
|
+
err instanceof Error ? err.message : String(err)
|
|
148
|
+
}`,
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (!publicUrl && config.tailscale?.mode !== "off") {
|
|
154
|
+
publicUrl = await setupTailscaleExposure(config);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const webhookUrl = publicUrl ?? localUrl;
|
|
158
|
+
|
|
159
|
+
if (publicUrl && provider.name === "twilio") {
|
|
160
|
+
(provider as TwilioProvider).setPublicUrl(publicUrl);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (provider.name === "twilio" && config.streaming?.enabled) {
|
|
164
|
+
const twilioProvider = provider as TwilioProvider;
|
|
165
|
+
if (ttsRuntime?.textToSpeechTelephony) {
|
|
166
|
+
try {
|
|
167
|
+
const ttsProvider = createTelephonyTtsProvider({
|
|
168
|
+
coreConfig,
|
|
169
|
+
ttsOverride: config.tts,
|
|
170
|
+
runtime: ttsRuntime,
|
|
171
|
+
});
|
|
172
|
+
twilioProvider.setTTSProvider(ttsProvider);
|
|
173
|
+
log.info("[voice-call] Telephony TTS provider configured");
|
|
174
|
+
} catch (err) {
|
|
175
|
+
log.warn(
|
|
176
|
+
`[voice-call] Failed to initialize telephony TTS: ${
|
|
177
|
+
err instanceof Error ? err.message : String(err)
|
|
178
|
+
}`,
|
|
179
|
+
);
|
|
180
|
+
}
|
|
181
|
+
} else {
|
|
182
|
+
log.warn("[voice-call] Telephony TTS unavailable; streaming TTS disabled");
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const mediaHandler = webhookServer.getMediaStreamHandler();
|
|
186
|
+
if (mediaHandler) {
|
|
187
|
+
twilioProvider.setMediaStreamHandler(mediaHandler);
|
|
188
|
+
log.info("[voice-call] Media stream handler wired to provider");
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
manager.initialize(provider, webhookUrl);
|
|
193
|
+
|
|
194
|
+
const stop = async () => {
|
|
195
|
+
if (tunnelResult) {
|
|
196
|
+
await tunnelResult.stop();
|
|
197
|
+
}
|
|
198
|
+
await cleanupTailscaleExposure(config);
|
|
199
|
+
await webhookServer.stop();
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
log.info("[voice-call] Runtime initialized");
|
|
203
|
+
log.info(`[voice-call] Webhook URL: ${webhookUrl}`);
|
|
204
|
+
if (publicUrl) {
|
|
205
|
+
log.info(`[voice-call] Public URL: ${publicUrl}`);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
config,
|
|
210
|
+
provider,
|
|
211
|
+
manager,
|
|
212
|
+
webhookServer,
|
|
213
|
+
webhookUrl,
|
|
214
|
+
publicUrl,
|
|
215
|
+
stop,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
const TELEPHONY_SAMPLE_RATE = 8000;
|
|
2
|
+
|
|
3
|
+
function clamp16(value: number): number {
|
|
4
|
+
return Math.max(-32768, Math.min(32767, value));
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Resample 16-bit PCM (little-endian mono) to 8kHz using linear interpolation.
|
|
9
|
+
*/
|
|
10
|
+
export function resamplePcmTo8k(input: Buffer, inputSampleRate: number): Buffer {
|
|
11
|
+
if (inputSampleRate === TELEPHONY_SAMPLE_RATE) return input;
|
|
12
|
+
const inputSamples = Math.floor(input.length / 2);
|
|
13
|
+
if (inputSamples === 0) return Buffer.alloc(0);
|
|
14
|
+
|
|
15
|
+
const ratio = inputSampleRate / TELEPHONY_SAMPLE_RATE;
|
|
16
|
+
const outputSamples = Math.floor(inputSamples / ratio);
|
|
17
|
+
const output = Buffer.alloc(outputSamples * 2);
|
|
18
|
+
|
|
19
|
+
for (let i = 0; i < outputSamples; i++) {
|
|
20
|
+
const srcPos = i * ratio;
|
|
21
|
+
const srcIndex = Math.floor(srcPos);
|
|
22
|
+
const frac = srcPos - srcIndex;
|
|
23
|
+
|
|
24
|
+
const s0 = input.readInt16LE(srcIndex * 2);
|
|
25
|
+
const s1Index = Math.min(srcIndex + 1, inputSamples - 1);
|
|
26
|
+
const s1 = input.readInt16LE(s1Index * 2);
|
|
27
|
+
|
|
28
|
+
const sample = Math.round(s0 + frac * (s1 - s0));
|
|
29
|
+
output.writeInt16LE(clamp16(sample), i * 2);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return output;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Convert 16-bit PCM to 8-bit mu-law (G.711).
|
|
37
|
+
*/
|
|
38
|
+
export function pcmToMulaw(pcm: Buffer): Buffer {
|
|
39
|
+
const samples = Math.floor(pcm.length / 2);
|
|
40
|
+
const mulaw = Buffer.alloc(samples);
|
|
41
|
+
|
|
42
|
+
for (let i = 0; i < samples; i++) {
|
|
43
|
+
const sample = pcm.readInt16LE(i * 2);
|
|
44
|
+
mulaw[i] = linearToMulaw(sample);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return mulaw;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function convertPcmToMulaw8k(
|
|
51
|
+
pcm: Buffer,
|
|
52
|
+
inputSampleRate: number,
|
|
53
|
+
): Buffer {
|
|
54
|
+
const pcm8k = resamplePcmTo8k(pcm, inputSampleRate);
|
|
55
|
+
return pcmToMulaw(pcm8k);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
|
|
60
|
+
*/
|
|
61
|
+
export function chunkAudio(
|
|
62
|
+
audio: Buffer,
|
|
63
|
+
chunkSize = 160,
|
|
64
|
+
): Generator<Buffer, void, unknown> {
|
|
65
|
+
return (function* () {
|
|
66
|
+
for (let i = 0; i < audio.length; i += chunkSize) {
|
|
67
|
+
yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
|
|
68
|
+
}
|
|
69
|
+
})();
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function linearToMulaw(sample: number): number {
|
|
73
|
+
const BIAS = 132;
|
|
74
|
+
const CLIP = 32635;
|
|
75
|
+
|
|
76
|
+
const sign = sample < 0 ? 0x80 : 0;
|
|
77
|
+
if (sample < 0) sample = -sample;
|
|
78
|
+
if (sample > CLIP) sample = CLIP;
|
|
79
|
+
|
|
80
|
+
sample += BIAS;
|
|
81
|
+
let exponent = 7;
|
|
82
|
+
for (let expMask = 0x4000; (sample & expMask) === 0 && exponent > 0; exponent--) {
|
|
83
|
+
expMask >>= 1;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const mantissa = (sample >> (exponent + 3)) & 0x0f;
|
|
87
|
+
return ~(sign | (exponent << 4) | mantissa) & 0xff;
|
|
88
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import type { CoreConfig } from "./core-bridge.js";
|
|
2
|
+
import type { VoiceCallTtsConfig } from "./config.js";
|
|
3
|
+
import { convertPcmToMulaw8k } from "./telephony-audio.js";
|
|
4
|
+
|
|
5
|
+
export type TelephonyTtsRuntime = {
|
|
6
|
+
textToSpeechTelephony: (params: {
|
|
7
|
+
text: string;
|
|
8
|
+
cfg: CoreConfig;
|
|
9
|
+
prefsPath?: string;
|
|
10
|
+
}) => Promise<{
|
|
11
|
+
success: boolean;
|
|
12
|
+
audioBuffer?: Buffer;
|
|
13
|
+
sampleRate?: number;
|
|
14
|
+
provider?: string;
|
|
15
|
+
error?: string;
|
|
16
|
+
}>;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export type TelephonyTtsProvider = {
|
|
20
|
+
synthesizeForTelephony: (text: string) => Promise<Buffer>;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export function createTelephonyTtsProvider(params: {
|
|
24
|
+
coreConfig: CoreConfig;
|
|
25
|
+
ttsOverride?: VoiceCallTtsConfig;
|
|
26
|
+
runtime: TelephonyTtsRuntime;
|
|
27
|
+
}): TelephonyTtsProvider {
|
|
28
|
+
const { coreConfig, ttsOverride, runtime } = params;
|
|
29
|
+
const mergedConfig = applyTtsOverride(coreConfig, ttsOverride);
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
synthesizeForTelephony: async (text: string) => {
|
|
33
|
+
const result = await runtime.textToSpeechTelephony({
|
|
34
|
+
text,
|
|
35
|
+
cfg: mergedConfig,
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
if (!result.success || !result.audioBuffer || !result.sampleRate) {
|
|
39
|
+
throw new Error(result.error ?? "TTS conversion failed");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return convertPcmToMulaw8k(result.audioBuffer, result.sampleRate);
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function applyTtsOverride(
|
|
48
|
+
coreConfig: CoreConfig,
|
|
49
|
+
override?: VoiceCallTtsConfig,
|
|
50
|
+
): CoreConfig {
|
|
51
|
+
if (!override) return coreConfig;
|
|
52
|
+
|
|
53
|
+
const base = coreConfig.messages?.tts;
|
|
54
|
+
const merged = mergeTtsConfig(base, override);
|
|
55
|
+
if (!merged) return coreConfig;
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
...coreConfig,
|
|
59
|
+
messages: {
|
|
60
|
+
...(coreConfig.messages ?? {}),
|
|
61
|
+
tts: merged,
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function mergeTtsConfig(
|
|
67
|
+
base?: VoiceCallTtsConfig,
|
|
68
|
+
override?: VoiceCallTtsConfig,
|
|
69
|
+
): VoiceCallTtsConfig | undefined {
|
|
70
|
+
if (!base && !override) return undefined;
|
|
71
|
+
if (!override) return base;
|
|
72
|
+
if (!base) return override;
|
|
73
|
+
return deepMerge(base, override);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function deepMerge<T>(base: T, override: T): T {
|
|
77
|
+
if (!isPlainObject(base) || !isPlainObject(override)) {
|
|
78
|
+
return override;
|
|
79
|
+
}
|
|
80
|
+
const result: Record<string, unknown> = { ...base };
|
|
81
|
+
for (const [key, value] of Object.entries(override)) {
|
|
82
|
+
if (value === undefined) continue;
|
|
83
|
+
const existing = (base as Record<string, unknown>)[key];
|
|
84
|
+
if (isPlainObject(existing) && isPlainObject(value)) {
|
|
85
|
+
result[key] = deepMerge(existing, value);
|
|
86
|
+
} else {
|
|
87
|
+
result[key] = value;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return result as T;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
|
94
|
+
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
95
|
+
}
|