@openclaw/voice-call 2026.3.13 → 2026.5.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +25 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +866 -0
  6. package/index.ts +353 -148
  7. package/openclaw.plugin.json +336 -157
  8. package/package.json +33 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +160 -12
  17. package/src/config.ts +243 -74
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +179 -19
  24. package/src/manager/events.ts +48 -30
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +464 -0
  28. package/src/manager/outbound.ts +148 -55
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +277 -0
  64. package/src/response-generator.ts +186 -40
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +351 -0
  68. package/src/runtime.ts +254 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +26 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +513 -100
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
package/src/runtime.ts CHANGED
@@ -1,16 +1,34 @@
1
+ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
2
+ import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
3
+ import {
4
+ consultRealtimeVoiceAgent,
5
+ REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
6
+ resolveRealtimeVoiceAgentConsultTools,
7
+ resolveRealtimeVoiceAgentConsultToolsAllow,
8
+ type RealtimeVoiceAgentConsultTranscriptEntry,
9
+ type ResolvedRealtimeVoiceProvider,
10
+ } from "openclaw/plugin-sdk/realtime-voice";
1
11
  import type { VoiceCallConfig } from "./config.js";
2
- import { resolveVoiceCallConfig, validateProviderConfig } from "./config.js";
3
- import type { CoreConfig } from "./core-bridge.js";
12
+ import {
13
+ resolveTwilioAuthToken,
14
+ resolveVoiceCallConfig,
15
+ validateProviderConfig,
16
+ } from "./config.js";
17
+ import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
4
18
  import { CallManager } from "./manager.js";
5
19
  import type { VoiceCallProvider } from "./providers/base.js";
6
- import { MockProvider } from "./providers/mock.js";
7
- import { PlivoProvider } from "./providers/plivo.js";
8
- import { TelnyxProvider } from "./providers/telnyx.js";
9
- import { TwilioProvider } from "./providers/twilio.js";
20
+ import type { TwilioProvider } from "./providers/twilio.js";
21
+ import { resolveRealtimeFastContextConsult } from "./realtime-fast-context.js";
22
+ import { resolveVoiceResponseModel } from "./response-model.js";
10
23
  import type { TelephonyTtsRuntime } from "./telephony-tts.js";
11
24
  import { createTelephonyTtsProvider } from "./telephony-tts.js";
12
25
  import { startTunnel, type TunnelResult } from "./tunnel.js";
26
+ import {
27
+ isProviderUnreachableWebhookUrl,
28
+ providerRequiresPublicWebhook,
29
+ } from "./webhook-exposure.js";
13
30
  import { VoiceCallWebhookServer } from "./webhook.js";
31
+ import type { ToolHandlerContext } from "./webhook/realtime-handler.js";
14
32
  import { cleanupTailscaleExposure, setupTailscaleExposure } from "./webhook/tailscale.js";
15
33
 
16
34
  export type VoiceCallRuntime = {
@@ -30,6 +48,94 @@ type Logger = {
30
48
  debug?: (message: string) => void;
31
49
  };
32
50
 
51
+ type ResolvedRealtimeProvider = ResolvedRealtimeVoiceProvider;
52
+
53
+ type TelnyxProviderModule = typeof import("./providers/telnyx.js");
54
+ type TwilioProviderModule = typeof import("./providers/twilio.js");
55
+ type PlivoProviderModule = typeof import("./providers/plivo.js");
56
+ type MockProviderModule = typeof import("./providers/mock.js");
57
+ type RealtimeVoiceRuntimeModule = typeof import("./realtime-voice.runtime.js");
58
+ type RealtimeHandlerModule = typeof import("./webhook/realtime-handler.js");
59
+
60
+ const REALTIME_VOICE_CONSULT_SYSTEM_PROMPT = [
61
+ "You are a behind-the-scenes consultant for a live phone voice agent.",
62
+ "Prioritize a fast, speakable answer over exhaustive investigation.",
63
+ "For tool-backed status checks, prefer one or two bounded read-only queries before answering.",
64
+ "Do not print secret values or dump environment variables; only check whether required configuration is present.",
65
+ "Be accurate, brief, and speakable.",
66
+ ].join(" ");
67
+
68
+ let telnyxProviderPromise: Promise<TelnyxProviderModule> | undefined;
69
+ let twilioProviderPromise: Promise<TwilioProviderModule> | undefined;
70
+ let plivoProviderPromise: Promise<PlivoProviderModule> | undefined;
71
+ let mockProviderPromise: Promise<MockProviderModule> | undefined;
72
+ let realtimeVoiceRuntimePromise: Promise<RealtimeVoiceRuntimeModule> | undefined;
73
+ let realtimeHandlerPromise: Promise<RealtimeHandlerModule> | undefined;
74
+
75
+ function loadTelnyxProvider(): Promise<TelnyxProviderModule> {
76
+ telnyxProviderPromise ??= import("./providers/telnyx.js");
77
+ return telnyxProviderPromise;
78
+ }
79
+
80
+ function loadTwilioProvider(): Promise<TwilioProviderModule> {
81
+ twilioProviderPromise ??= import("./providers/twilio.js");
82
+ return twilioProviderPromise;
83
+ }
84
+
85
+ function loadPlivoProvider(): Promise<PlivoProviderModule> {
86
+ plivoProviderPromise ??= import("./providers/plivo.js");
87
+ return plivoProviderPromise;
88
+ }
89
+
90
+ function loadMockProvider(): Promise<MockProviderModule> {
91
+ mockProviderPromise ??= import("./providers/mock.js");
92
+ return mockProviderPromise;
93
+ }
94
+
95
+ function loadRealtimeVoiceRuntime(): Promise<RealtimeVoiceRuntimeModule> {
96
+ realtimeVoiceRuntimePromise ??= import("./realtime-voice.runtime.js");
97
+ return realtimeVoiceRuntimePromise;
98
+ }
99
+
100
+ function loadRealtimeHandler(): Promise<RealtimeHandlerModule> {
101
+ realtimeHandlerPromise ??= import("./webhook/realtime-handler.js");
102
+ return realtimeHandlerPromise;
103
+ }
104
+
105
+ function resolveVoiceCallConsultSessionKey(call: {
106
+ sessionKey?: string;
107
+ from?: string;
108
+ to?: string;
109
+ direction?: "inbound" | "outbound";
110
+ callId: string;
111
+ }): string {
112
+ if (call.sessionKey) {
113
+ return call.sessionKey;
114
+ }
115
+ const phone = call.direction === "outbound" ? call.to : call.from;
116
+ const normalizedPhone = phone?.replace(/\D/g, "");
117
+ return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${call.callId}`;
118
+ }
119
+
120
+ function mapVoiceCallConsultTranscript(
121
+ call: {
122
+ transcript?: Array<{ speaker: "user" | "bot"; text: string }>;
123
+ },
124
+ context?: ToolHandlerContext,
125
+ ): RealtimeVoiceAgentConsultTranscriptEntry[] {
126
+ const transcript: RealtimeVoiceAgentConsultTranscriptEntry[] = (call.transcript ?? []).map(
127
+ (entry) => ({
128
+ role: entry.speaker === "bot" ? "assistant" : "user",
129
+ text: entry.text,
130
+ }),
131
+ );
132
+ const partial = context?.partialUserTranscript?.trim();
133
+ if (partial && transcript.at(-1)?.text !== partial) {
134
+ transcript.push({ role: "user", text: partial });
135
+ }
136
+ return transcript;
137
+ }
138
+
33
139
  function createRuntimeResourceLifecycle(params: {
34
140
  config: VoiceCallConfig;
35
141
  webhookServer: VoiceCallWebhookServer;
@@ -80,14 +186,15 @@ function isLoopbackBind(bind: string | undefined): boolean {
80
186
  return bind === "127.0.0.1" || bind === "::1" || bind === "localhost";
81
187
  }
82
188
 
83
- function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
189
+ async function resolveProvider(config: VoiceCallConfig): Promise<VoiceCallProvider> {
84
190
  const allowNgrokFreeTierLoopbackBypass =
85
191
  config.tunnel?.provider === "ngrok" &&
86
192
  isLoopbackBind(config.serve?.bind) &&
87
193
  (config.tunnel?.allowNgrokFreeTierLoopbackBypass ?? false);
88
194
 
89
195
  switch (config.provider) {
90
- case "telnyx":
196
+ case "telnyx": {
197
+ const { TelnyxProvider } = await loadTelnyxProvider();
91
198
  return new TelnyxProvider(
92
199
  {
93
200
  apiKey: config.telnyx?.apiKey,
@@ -98,11 +205,13 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
98
205
  skipVerification: config.skipSignatureVerification,
99
206
  },
100
207
  );
101
- case "twilio":
208
+ }
209
+ case "twilio": {
210
+ const { TwilioProvider } = await loadTwilioProvider();
102
211
  return new TwilioProvider(
103
212
  {
104
213
  accountSid: config.twilio?.accountSid,
105
- authToken: config.twilio?.authToken,
214
+ authToken: resolveTwilioAuthToken(config),
106
215
  },
107
216
  {
108
217
  allowNgrokFreeTierLoopbackBypass,
@@ -112,7 +221,9 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
112
221
  webhookSecurity: config.webhookSecurity,
113
222
  },
114
223
  );
115
- case "plivo":
224
+ }
225
+ case "plivo": {
226
+ const { PlivoProvider } = await loadPlivoProvider();
116
227
  return new PlivoProvider(
117
228
  {
118
229
  authId: config.plivo?.authId,
@@ -125,20 +236,37 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
125
236
  webhookSecurity: config.webhookSecurity,
126
237
  },
127
238
  );
128
- case "mock":
239
+ }
240
+ case "mock": {
241
+ const { MockProvider } = await loadMockProvider();
129
242
  return new MockProvider();
243
+ }
130
244
  default:
131
245
  throw new Error(`Unsupported voice-call provider: ${String(config.provider)}`);
132
246
  }
133
247
  }
134
248
 
249
+ async function resolveRealtimeProvider(params: {
250
+ config: VoiceCallConfig;
251
+ fullConfig: OpenClawConfig;
252
+ }): Promise<ResolvedRealtimeProvider> {
253
+ const { resolveConfiguredRealtimeVoiceProvider } = await loadRealtimeVoiceRuntime();
254
+ return resolveConfiguredRealtimeVoiceProvider({
255
+ configuredProviderId: params.config.realtime.provider,
256
+ providerConfigs: params.config.realtime.providers,
257
+ cfg: params.fullConfig,
258
+ });
259
+ }
260
+
135
261
  export async function createVoiceCallRuntime(params: {
136
262
  config: VoiceCallConfig;
137
263
  coreConfig: CoreConfig;
264
+ fullConfig?: OpenClawConfig;
265
+ agentRuntime: CoreAgentDeps;
138
266
  ttsRuntime?: TelephonyTtsRuntime;
139
267
  logger?: Logger;
140
268
  }): Promise<VoiceCallRuntime> {
141
- const { config: rawConfig, coreConfig, ttsRuntime, logger } = params;
269
+ const { config: rawConfig, coreConfig, fullConfig, agentRuntime, ttsRuntime, logger } = params;
142
270
  const log = logger ?? {
143
271
  info: console.log,
144
272
  warn: console.warn,
@@ -147,6 +275,7 @@ export async function createVoiceCallRuntime(params: {
147
275
  };
148
276
 
149
277
  const config = resolveVoiceCallConfig(rawConfig);
278
+ const cfg = fullConfig ?? (coreConfig as OpenClawConfig);
150
279
 
151
280
  if (!config.enabled) {
152
281
  throw new Error("Voice call disabled. Enable the plugin entry in config.");
@@ -163,9 +292,97 @@ export async function createVoiceCallRuntime(params: {
163
292
  throw new Error(`Invalid voice-call config: ${validation.errors.join("; ")}`);
164
293
  }
165
294
 
166
- const provider = resolveProvider(config);
295
+ const provider = await resolveProvider(config);
167
296
  const manager = new CallManager(config);
168
- const webhookServer = new VoiceCallWebhookServer(config, manager, provider, coreConfig);
297
+ const realtimeProvider = config.realtime.enabled
298
+ ? await resolveRealtimeProvider({
299
+ config,
300
+ fullConfig: cfg,
301
+ })
302
+ : null;
303
+ const webhookServer = new VoiceCallWebhookServer(
304
+ config,
305
+ manager,
306
+ provider,
307
+ coreConfig,
308
+ fullConfig ?? (coreConfig as OpenClawConfig),
309
+ agentRuntime,
310
+ log,
311
+ );
312
+ if (realtimeProvider) {
313
+ const { RealtimeCallHandler } = await loadRealtimeHandler();
314
+ const realtimeConfig = {
315
+ ...config.realtime,
316
+ tools: resolveRealtimeVoiceAgentConsultTools(
317
+ config.realtime.toolPolicy,
318
+ config.realtime.tools,
319
+ ),
320
+ };
321
+ const realtimeHandler = new RealtimeCallHandler(
322
+ realtimeConfig,
323
+ manager,
324
+ provider,
325
+ realtimeProvider.provider,
326
+ realtimeProvider.providerConfig,
327
+ config.serve.path,
328
+ );
329
+ if (config.realtime.toolPolicy !== "none") {
330
+ realtimeHandler.registerToolHandler(
331
+ REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
332
+ async (args, callId, handlerContext) => {
333
+ const call = manager.getCall(callId);
334
+ if (!call) {
335
+ return { error: `Call "${callId}" not found` };
336
+ }
337
+ const agentId = config.agentId ?? "main";
338
+ const sessionKey = resolveVoiceCallConsultSessionKey(call);
339
+ const fastContext = await resolveRealtimeFastContextConsult({
340
+ cfg,
341
+ agentId,
342
+ sessionKey,
343
+ config: config.realtime.fastContext,
344
+ args,
345
+ logger: log,
346
+ });
347
+ if (fastContext.handled) {
348
+ return fastContext.result;
349
+ }
350
+ const { provider: agentProvider, model } = resolveVoiceResponseModel({
351
+ voiceConfig: config,
352
+ agentRuntime,
353
+ });
354
+ const thinkLevel = agentRuntime.resolveThinkingDefault({
355
+ cfg,
356
+ provider: agentProvider,
357
+ model,
358
+ });
359
+ return await consultRealtimeVoiceAgent({
360
+ cfg,
361
+ agentRuntime,
362
+ logger: log,
363
+ agentId,
364
+ sessionKey,
365
+ messageProvider: "voice",
366
+ lane: "voice",
367
+ runIdPrefix: `voice-realtime-consult:${callId}`,
368
+ args,
369
+ transcript: mapVoiceCallConsultTranscript(call, handlerContext),
370
+ surface: "a live phone call",
371
+ userLabel: "Caller",
372
+ assistantLabel: "Agent",
373
+ questionSourceLabel: "caller",
374
+ provider: agentProvider,
375
+ model,
376
+ thinkLevel,
377
+ timeoutMs: config.responseTimeoutMs,
378
+ toolsAllow: resolveRealtimeVoiceAgentConsultToolsAllow(config.realtime.toolPolicy),
379
+ extraSystemPrompt: REALTIME_VOICE_CONSULT_SYSTEM_PROMPT,
380
+ });
381
+ },
382
+ );
383
+ }
384
+ webhookServer.setRealtimeHandler(realtimeHandler);
385
+ }
169
386
  const lifecycle = createRuntimeResourceLifecycle({ config, webhookServer });
170
387
 
171
388
  const localUrl = await webhookServer.start();
@@ -190,9 +407,7 @@ export async function createVoiceCallRuntime(params: {
190
407
  lifecycle.setTunnelResult(nextTunnelResult);
191
408
  publicUrl = nextTunnelResult?.publicUrl ?? null;
192
409
  } catch (err) {
193
- log.error(
194
- `[voice-call] Tunnel setup failed: ${err instanceof Error ? err.message : String(err)}`,
195
- );
410
+ log.error(`[voice-call] Tunnel setup failed: ${formatErrorMessage(err)}`);
196
411
  }
197
412
  }
198
413
 
@@ -202,9 +417,23 @@ export async function createVoiceCallRuntime(params: {
202
417
 
203
418
  const webhookUrl = publicUrl ?? localUrl;
204
419
 
420
+ if (
421
+ providerRequiresPublicWebhook(provider.name) &&
422
+ isProviderUnreachableWebhookUrl(webhookUrl)
423
+ ) {
424
+ throw new Error(
425
+ `[voice-call] ${provider.name} requires a publicly reachable webhook URL. ` +
426
+ `Refusing to use local-only webhook ${webhookUrl}. ` +
427
+ "Set plugins.entries.voice-call.config.publicUrl or enable tunnel/tailscale exposure.",
428
+ );
429
+ }
430
+
205
431
  if (publicUrl && provider.name === "twilio") {
206
432
  (provider as TwilioProvider).setPublicUrl(publicUrl);
207
433
  }
434
+ if (publicUrl && realtimeProvider) {
435
+ webhookServer.getRealtimeHandler()?.setPublicUrl(publicUrl);
436
+ }
208
437
 
209
438
  if (provider.name === "twilio" && config.streaming?.enabled) {
210
439
  const twilioProvider = provider as TwilioProvider;
@@ -214,15 +443,12 @@ export async function createVoiceCallRuntime(params: {
214
443
  coreConfig,
215
444
  ttsOverride: config.tts,
216
445
  runtime: ttsRuntime,
446
+ logger: log,
217
447
  });
218
448
  twilioProvider.setTTSProvider(ttsProvider);
219
449
  log.info("[voice-call] Telephony TTS provider configured");
220
450
  } catch (err) {
221
- log.warn(
222
- `[voice-call] Failed to initialize telephony TTS: ${
223
- err instanceof Error ? err.message : String(err)
224
- }`,
225
- );
451
+ log.warn(`[voice-call] Failed to initialize telephony TTS: ${formatErrorMessage(err)}`);
226
452
  }
227
453
  } else {
228
454
  log.warn("[voice-call] Telephony TTS unavailable; streaming TTS disabled");
@@ -235,13 +461,17 @@ export async function createVoiceCallRuntime(params: {
235
461
  }
236
462
  }
237
463
 
464
+ if (realtimeProvider) {
465
+ log.info(`[voice-call] Realtime voice provider: ${realtimeProvider.provider.id}`);
466
+ }
467
+
238
468
  await manager.initialize(provider, webhookUrl);
239
469
 
240
470
  const stop = async () => await lifecycle.stop();
241
471
 
242
472
  log.info("[voice-call] Runtime initialized");
243
473
  log.info(`[voice-call] Webhook URL: ${webhookUrl}`);
244
- if (publicUrl) {
474
+ if (publicUrl && publicUrl !== webhookUrl) {
245
475
  log.info(`[voice-call] Public URL: ${publicUrl}`);
246
476
  }
247
477
 
@@ -0,0 +1,61 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { convertPcmToMulaw8k, resamplePcmTo8k } from "./telephony-audio.js";
3
+
4
+ function makeSinePcm(
5
+ sampleRate: number,
6
+ frequencyHz: number,
7
+ durationSeconds: number,
8
+ amplitude = 12_000,
9
+ ): Buffer {
10
+ const samples = Math.floor(sampleRate * durationSeconds);
11
+ const output = Buffer.alloc(samples * 2);
12
+ for (let i = 0; i < samples; i++) {
13
+ const value = Math.round(Math.sin((2 * Math.PI * frequencyHz * i) / sampleRate) * amplitude);
14
+ output.writeInt16LE(value, i * 2);
15
+ }
16
+ return output;
17
+ }
18
+
19
+ function rmsPcm(buffer: Buffer): number {
20
+ const samples = Math.floor(buffer.length / 2);
21
+ if (samples === 0) {
22
+ return 0;
23
+ }
24
+ let sum = 0;
25
+ for (let i = 0; i < samples; i++) {
26
+ const sample = buffer.readInt16LE(i * 2);
27
+ sum += sample * sample;
28
+ }
29
+ return Math.sqrt(sum / samples);
30
+ }
31
+
32
+ describe("telephony-audio resamplePcmTo8k", () => {
33
+ it("returns identical buffer for 8k input", () => {
34
+ const pcm8k = makeSinePcm(8_000, 1_000, 0.2);
35
+ const resampled = resamplePcmTo8k(pcm8k, 8_000);
36
+ expect(resampled).toBe(pcm8k);
37
+ });
38
+
39
+ it("preserves low-frequency speech-band energy when downsampling", () => {
40
+ const input = makeSinePcm(48_000, 1_000, 0.6);
41
+ const output = resamplePcmTo8k(input, 48_000);
42
+ expect(output.length).toBe(9_600);
43
+ expect(rmsPcm(output)).toBeGreaterThan(7_500);
44
+ });
45
+
46
+ it("attenuates out-of-band high frequencies before 8k telephony conversion", () => {
47
+ const lowTone = resamplePcmTo8k(makeSinePcm(48_000, 1_000, 0.6), 48_000);
48
+ const highTone = resamplePcmTo8k(makeSinePcm(48_000, 6_000, 0.6), 48_000);
49
+ const ratio = rmsPcm(highTone) / rmsPcm(lowTone);
50
+ expect(ratio).toBeLessThan(0.1);
51
+ });
52
+ });
53
+
54
+ describe("telephony-audio convertPcmToMulaw8k", () => {
55
+ it("converts to 8k mu-law frame length", () => {
56
+ const input = makeSinePcm(24_000, 1_000, 0.5);
57
+ const mulaw = convertPcmToMulaw8k(input, 24_000);
58
+ // 0.5s @ 8kHz => 4000 8-bit samples
59
+ expect(mulaw.length).toBe(4_000);
60
+ });
61
+ });
@@ -1,60 +1,4 @@
1
- const TELEPHONY_SAMPLE_RATE = 8000;
2
-
3
- function clamp16(value: number): number {
4
- return Math.max(-32768, Math.min(32767, value));
5
- }
6
-
7
- /**
8
- * Resample 16-bit PCM (little-endian mono) to 8kHz using linear interpolation.
9
- */
10
- export function resamplePcmTo8k(input: Buffer, inputSampleRate: number): Buffer {
11
- if (inputSampleRate === TELEPHONY_SAMPLE_RATE) {
12
- return input;
13
- }
14
- const inputSamples = Math.floor(input.length / 2);
15
- if (inputSamples === 0) {
16
- return Buffer.alloc(0);
17
- }
18
-
19
- const ratio = inputSampleRate / TELEPHONY_SAMPLE_RATE;
20
- const outputSamples = Math.floor(inputSamples / ratio);
21
- const output = Buffer.alloc(outputSamples * 2);
22
-
23
- for (let i = 0; i < outputSamples; i++) {
24
- const srcPos = i * ratio;
25
- const srcIndex = Math.floor(srcPos);
26
- const frac = srcPos - srcIndex;
27
-
28
- const s0 = input.readInt16LE(srcIndex * 2);
29
- const s1Index = Math.min(srcIndex + 1, inputSamples - 1);
30
- const s1 = input.readInt16LE(s1Index * 2);
31
-
32
- const sample = Math.round(s0 + frac * (s1 - s0));
33
- output.writeInt16LE(clamp16(sample), i * 2);
34
- }
35
-
36
- return output;
37
- }
38
-
39
- /**
40
- * Convert 16-bit PCM to 8-bit mu-law (G.711).
41
- */
42
- export function pcmToMulaw(pcm: Buffer): Buffer {
43
- const samples = Math.floor(pcm.length / 2);
44
- const mulaw = Buffer.alloc(samples);
45
-
46
- for (let i = 0; i < samples; i++) {
47
- const sample = pcm.readInt16LE(i * 2);
48
- mulaw[i] = linearToMulaw(sample);
49
- }
50
-
51
- return mulaw;
52
- }
53
-
54
- export function convertPcmToMulaw8k(pcm: Buffer, inputSampleRate: number): Buffer {
55
- const pcm8k = resamplePcmTo8k(pcm, inputSampleRate);
56
- return pcmToMulaw(pcm8k);
57
- }
1
+ export { convertPcmToMulaw8k, resamplePcmTo8k } from "openclaw/plugin-sdk/realtime-voice";
58
2
 
59
3
  /**
60
4
  * Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
@@ -66,25 +10,3 @@ export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, vo
66
10
  }
67
11
  })();
68
12
  }
69
-
70
- function linearToMulaw(sample: number): number {
71
- const BIAS = 132;
72
- const CLIP = 32635;
73
-
74
- const sign = sample < 0 ? 0x80 : 0;
75
- if (sample < 0) {
76
- sample = -sample;
77
- }
78
- if (sample > CLIP) {
79
- sample = CLIP;
80
- }
81
-
82
- sample += BIAS;
83
- let exponent = 7;
84
- for (let expMask = 0x4000; (sample & expMask) === 0 && exponent > 0; exponent--) {
85
- expMask >>= 1;
86
- }
87
-
88
- const mantissa = (sample >> (exponent + 3)) & 0x0f;
89
- return ~(sign | (exponent << 4) | mantissa) & 0xff;
90
- }