@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +27 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +943 -0
  6. package/index.ts +379 -149
  7. package/openclaw.plugin.json +384 -157
  8. package/package.json +35 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +273 -12
  17. package/src/config.ts +355 -72
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +243 -19
  24. package/src/manager/events.ts +61 -31
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +528 -0
  28. package/src/manager/outbound.ts +163 -57
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +321 -0
  64. package/src/response-generator.ts +213 -53
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +429 -0
  68. package/src/runtime.ts +270 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +28 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +523 -102
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
@@ -1,4 +1,4 @@
1
- import { afterEach, describe, expect, it } from "vitest";
1
+ import { afterEach, describe, expect, it, vi } from "vitest";
2
2
  import type { VoiceCallTtsConfig } from "./config.js";
3
3
  import type { CoreConfig } from "./core-bridge.js";
4
4
  import { createTelephonyTtsProvider } from "./telephony-tts.js";
@@ -6,14 +6,36 @@ import { createTelephonyTtsProvider } from "./telephony-tts.js";
6
6
  function createCoreConfig(): CoreConfig {
7
7
  const tts: VoiceCallTtsConfig = {
8
8
  provider: "openai",
9
- openai: {
10
- model: "gpt-4o-mini-tts",
11
- voice: "alloy",
9
+ providers: {
10
+ openai: {
11
+ model: "gpt-4o-mini-tts",
12
+ voice: "alloy",
13
+ },
12
14
  },
13
15
  };
14
16
  return { messages: { tts } };
15
17
  }
16
18
 
19
+ function requireMergedTtsConfig(mergedConfig: CoreConfig | undefined) {
20
+ const tts = mergedConfig?.messages?.tts;
21
+ if (!tts) {
22
+ throw new Error("telephony TTS runtime did not receive merged TTS config");
23
+ }
24
+ return tts as Record<string, unknown>;
25
+ }
26
+
27
+ function requireOpenAIProviderConfig(tts: Record<string, unknown>): Record<string, unknown> {
28
+ const providers =
29
+ tts.providers && typeof tts.providers === "object"
30
+ ? (tts.providers as Record<string, unknown>)
31
+ : null;
32
+ const openai = providers?.openai;
33
+ if (!openai || typeof openai !== "object") {
34
+ throw new Error("merged TTS config did not preserve providers.openai");
35
+ }
36
+ return openai as Record<string, unknown>;
37
+ }
38
+
17
39
  async function mergeOverride(override: unknown): Promise<Record<string, unknown>> {
18
40
  let mergedConfig: CoreConfig | undefined;
19
41
  const provider = createTelephonyTtsProvider({
@@ -32,8 +54,7 @@ async function mergeOverride(override: unknown): Promise<Record<string, unknown>
32
54
  });
33
55
 
34
56
  await provider.synthesizeForTelephony("hello");
35
- expect(mergedConfig?.messages?.tts).toBeDefined();
36
- return mergedConfig?.messages?.tts as Record<string, unknown>;
57
+ return requireMergedTtsConfig(mergedConfig);
37
58
  }
38
59
 
39
60
  afterEach(() => {
@@ -43,9 +64,9 @@ afterEach(() => {
43
64
  describe("createTelephonyTtsProvider deepMerge hardening", () => {
44
65
  it("merges safe nested overrides", async () => {
45
66
  const tts = await mergeOverride({
46
- openai: { voice: "coral" },
67
+ providers: { openai: { voice: "coral" } },
47
68
  });
48
- const openai = tts.openai as Record<string, unknown>;
69
+ const openai = requireOpenAIProviderConfig(tts);
49
70
 
50
71
  expect(openai.voice).toBe("coral");
51
72
  expect(openai.model).toBe("gpt-4o-mini-tts");
@@ -53,9 +74,9 @@ describe("createTelephonyTtsProvider deepMerge hardening", () => {
53
74
 
54
75
  it("blocks top-level __proto__ keys", async () => {
55
76
  const tts = await mergeOverride(
56
- JSON.parse('{"__proto__":{"polluted":"top"},"openai":{"voice":"coral"}}'),
77
+ JSON.parse('{"__proto__":{"polluted":"top"},"providers":{"openai":{"voice":"coral"}}}'),
57
78
  );
58
- const openai = tts.openai as Record<string, unknown>;
79
+ const openai = requireOpenAIProviderConfig(tts);
59
80
 
60
81
  expect((Object.prototype as Record<string, unknown>).polluted).toBeUndefined();
61
82
  expect(tts.polluted).toBeUndefined();
@@ -64,12 +85,112 @@ describe("createTelephonyTtsProvider deepMerge hardening", () => {
64
85
 
65
86
  it("blocks nested __proto__ keys", async () => {
66
87
  const tts = await mergeOverride(
67
- JSON.parse('{"openai":{"model":"safe","__proto__":{"polluted":"nested"}}}'),
88
+ JSON.parse('{"providers":{"openai":{"model":"safe","__proto__":{"polluted":"nested"}}}}'),
68
89
  );
69
- const openai = tts.openai as Record<string, unknown>;
90
+ const openai = requireOpenAIProviderConfig(tts);
70
91
 
71
92
  expect((Object.prototype as Record<string, unknown>).polluted).toBeUndefined();
72
93
  expect(openai.polluted).toBeUndefined();
73
94
  expect(openai.model).toBe("safe");
74
95
  });
96
+
97
+ it("logs fallback metadata when telephony TTS uses a fallback provider", async () => {
98
+ const warn = vi.fn();
99
+ const provider = createTelephonyTtsProvider({
100
+ coreConfig: createCoreConfig(),
101
+ runtime: {
102
+ textToSpeechTelephony: async () => ({
103
+ success: true,
104
+ audioBuffer: Buffer.alloc(2),
105
+ sampleRate: 8000,
106
+ provider: "microsoft",
107
+ fallbackFrom: "elevenlabs",
108
+ attemptedProviders: ["elevenlabs", "microsoft"],
109
+ }),
110
+ },
111
+ logger: { warn },
112
+ });
113
+
114
+ await provider.synthesizeForTelephony("hello");
115
+ expect(warn).toHaveBeenCalledWith(
116
+ "[voice-call] Telephony TTS fallback used from=elevenlabs to=microsoft attempts=elevenlabs -> microsoft",
117
+ );
118
+ });
119
+
120
+ it("strips telephony TTS directive tags before synthesis", async () => {
121
+ let requestText: string | undefined;
122
+ const provider = createTelephonyTtsProvider({
123
+ coreConfig: createCoreConfig(),
124
+ runtime: {
125
+ textToSpeechTelephony: async ({ text }) => {
126
+ requestText = text;
127
+ return {
128
+ success: true,
129
+ audioBuffer: Buffer.alloc(2),
130
+ sampleRate: 8000,
131
+ };
132
+ },
133
+ },
134
+ });
135
+
136
+ await provider.synthesizeForTelephony("[[tts]]Hello caller[[/tts]]");
137
+
138
+ expect(requestText).toBe("Hello caller");
139
+ });
140
+
141
+ it("uses hidden telephony TTS directive text for synthesis", async () => {
142
+ let requestText: string | undefined;
143
+ let requestOverrides: unknown;
144
+ const provider = createTelephonyTtsProvider({
145
+ coreConfig: createCoreConfig(),
146
+ runtime: {
147
+ textToSpeechTelephony: async ({ text, overrides }) => {
148
+ requestText = text;
149
+ requestOverrides = overrides;
150
+ return {
151
+ success: true,
152
+ audioBuffer: Buffer.alloc(2),
153
+ sampleRate: 8000,
154
+ };
155
+ },
156
+ },
157
+ });
158
+
159
+ await provider.synthesizeForTelephony(
160
+ "Visible text [[tts:text]]Speak this instead[[/tts:text]]",
161
+ );
162
+
163
+ expect(requestText).toBe("Speak this instead");
164
+ expect(requestOverrides).toMatchObject({ ttsText: "Speak this instead" });
165
+ });
166
+
167
+ it("exposes configured timeoutMs as synthesisTimeoutMs", () => {
168
+ const provider = createTelephonyTtsProvider({
169
+ coreConfig: { messages: { tts: { provider: "openai", timeoutMs: 15000 } } },
170
+ runtime: {
171
+ textToSpeechTelephony: async () => ({
172
+ success: true,
173
+ audioBuffer: Buffer.alloc(2),
174
+ sampleRate: 8000,
175
+ }),
176
+ },
177
+ });
178
+
179
+ expect(provider.synthesisTimeoutMs).toBe(15000);
180
+ });
181
+
182
+ it("keeps the telephony timeout default when timeoutMs is not configured", () => {
183
+ const provider = createTelephonyTtsProvider({
184
+ coreConfig: createCoreConfig(),
185
+ runtime: {
186
+ textToSpeechTelephony: async () => ({
187
+ success: true,
188
+ audioBuffer: Buffer.alloc(2),
189
+ sampleRate: 8000,
190
+ }),
191
+ },
192
+ });
193
+
194
+ expect(provider.synthesisTimeoutMs).toBe(8000);
195
+ });
75
196
  });
@@ -1,3 +1,9 @@
1
+ import {
2
+ parseTtsDirectives,
3
+ type SpeechModelOverridePolicy,
4
+ type SpeechProviderConfig,
5
+ type TtsDirectiveOverrides,
6
+ } from "openclaw/plugin-sdk/speech";
1
7
  import type { VoiceCallTtsConfig } from "./config.js";
2
8
  import type { CoreConfig } from "./core-bridge.js";
3
9
  import { deepMergeDefined } from "./deep-merge.js";
@@ -8,38 +14,91 @@ export type TelephonyTtsRuntime = {
8
14
  text: string;
9
15
  cfg: CoreConfig;
10
16
  prefsPath?: string;
17
+ overrides?: TtsDirectiveOverrides;
11
18
  }) => Promise<{
12
19
  success: boolean;
13
20
  audioBuffer?: Buffer;
14
21
  sampleRate?: number;
15
22
  provider?: string;
23
+ fallbackFrom?: string;
24
+ attemptedProviders?: string[];
16
25
  error?: string;
17
26
  }>;
18
27
  };
19
28
 
20
29
  export type TelephonyTtsProvider = {
30
+ synthesisTimeoutMs: number;
21
31
  synthesizeForTelephony: (text: string) => Promise<Buffer>;
22
32
  };
23
33
 
34
+ export const TELEPHONY_DEFAULT_TTS_TIMEOUT_MS = 8000;
35
+
36
+ type TelephonyModelOverrideConfig = {
37
+ enabled?: boolean;
38
+ allowText?: boolean;
39
+ allowProvider?: boolean;
40
+ allowVoice?: boolean;
41
+ allowModelId?: boolean;
42
+ allowVoiceSettings?: boolean;
43
+ allowNormalization?: boolean;
44
+ allowSeed?: boolean;
45
+ };
46
+
24
47
  export function createTelephonyTtsProvider(params: {
25
48
  coreConfig: CoreConfig;
26
49
  ttsOverride?: VoiceCallTtsConfig;
27
50
  runtime: TelephonyTtsRuntime;
51
+ logger?: {
52
+ warn?: (message: string) => void;
53
+ };
28
54
  }): TelephonyTtsProvider {
29
- const { coreConfig, ttsOverride, runtime } = params;
55
+ const { coreConfig, ttsOverride, runtime, logger } = params;
30
56
  const mergedConfig = applyTtsOverride(coreConfig, ttsOverride);
57
+ const ttsConfig = mergedConfig.messages?.tts;
58
+ const modelOverrides = resolveTelephonyModelOverridePolicy(
59
+ readTelephonyModelOverrides(ttsConfig),
60
+ );
61
+ const providerConfigs = collectTelephonyProviderConfigs(ttsConfig);
62
+ const activeProvider = normalizeProviderId(ttsConfig?.provider);
63
+ const synthesisTimeoutMs =
64
+ mergedConfig.messages?.tts?.timeoutMs ?? TELEPHONY_DEFAULT_TTS_TIMEOUT_MS;
31
65
 
32
66
  return {
67
+ synthesisTimeoutMs,
33
68
  synthesizeForTelephony: async (text: string) => {
69
+ const directives = parseTtsDirectives(text, modelOverrides, {
70
+ cfg: mergedConfig,
71
+ providerConfigs,
72
+ preferredProviderId: activeProvider,
73
+ });
74
+ if (directives.warnings.length > 0) {
75
+ logger?.warn?.(
76
+ `[voice-call] Ignored telephony TTS directive overrides (${directives.warnings.join("; ")})`,
77
+ );
78
+ }
79
+ const cleanText = directives.hasDirective
80
+ ? directives.ttsText?.trim() || directives.cleanedText.trim()
81
+ : text;
34
82
  const result = await runtime.textToSpeechTelephony({
35
- text,
83
+ text: cleanText,
36
84
  cfg: mergedConfig,
85
+ overrides: directives.overrides,
37
86
  });
38
87
 
39
88
  if (!result.success || !result.audioBuffer || !result.sampleRate) {
40
89
  throw new Error(result.error ?? "TTS conversion failed");
41
90
  }
42
91
 
92
+ if (result.fallbackFrom && result.provider && result.fallbackFrom !== result.provider) {
93
+ const attemptedChain =
94
+ result.attemptedProviders && result.attemptedProviders.length > 0
95
+ ? result.attemptedProviders.join(" -> ")
96
+ : `${result.fallbackFrom} -> ${result.provider}`;
97
+ logger?.warn?.(
98
+ `[voice-call] Telephony TTS fallback used from=${result.fallbackFrom} to=${result.provider} attempts=${attemptedChain}`,
99
+ );
100
+ }
101
+
43
102
  return convertPcmToMulaw8k(result.audioBuffer, result.sampleRate);
44
103
  },
45
104
  };
@@ -80,3 +139,97 @@ function mergeTtsConfig(
80
139
  }
81
140
  return deepMergeDefined(base, override) as VoiceCallTtsConfig;
82
141
  }
142
+
143
+ function resolveTelephonyModelOverridePolicy(
144
+ overrides: TelephonyModelOverrideConfig | undefined,
145
+ ): SpeechModelOverridePolicy {
146
+ const enabled = overrides?.enabled ?? true;
147
+ if (!enabled) {
148
+ return {
149
+ enabled: false,
150
+ allowText: false,
151
+ allowProvider: false,
152
+ allowVoice: false,
153
+ allowModelId: false,
154
+ allowVoiceSettings: false,
155
+ allowNormalization: false,
156
+ allowSeed: false,
157
+ };
158
+ }
159
+ const allow = (value: boolean | undefined, defaultValue = true) => value ?? defaultValue;
160
+ return {
161
+ enabled: true,
162
+ allowText: allow(overrides?.allowText),
163
+ allowProvider: allow(overrides?.allowProvider, false),
164
+ allowVoice: allow(overrides?.allowVoice),
165
+ allowModelId: allow(overrides?.allowModelId),
166
+ allowVoiceSettings: allow(overrides?.allowVoiceSettings),
167
+ allowNormalization: allow(overrides?.allowNormalization),
168
+ allowSeed: allow(overrides?.allowSeed),
169
+ };
170
+ }
171
+
172
+ function readTelephonyModelOverrides(
173
+ ttsConfig: VoiceCallTtsConfig | undefined,
174
+ ): TelephonyModelOverrideConfig | undefined {
175
+ const value = (ttsConfig as Record<string, unknown> | undefined)?.modelOverrides;
176
+ return value && typeof value === "object" && !Array.isArray(value)
177
+ ? (value as TelephonyModelOverrideConfig)
178
+ : undefined;
179
+ }
180
+
181
+ function normalizeProviderId(value: unknown): string | undefined {
182
+ return typeof value === "string" ? value.trim().toLowerCase() || undefined : undefined;
183
+ }
184
+
185
+ function asProviderConfig(value: unknown): SpeechProviderConfig {
186
+ return value && typeof value === "object" && !Array.isArray(value)
187
+ ? (value as SpeechProviderConfig)
188
+ : {};
189
+ }
190
+
191
+ function collectTelephonyProviderConfigs(
192
+ ttsConfig: VoiceCallTtsConfig | undefined,
193
+ ): Record<string, SpeechProviderConfig> {
194
+ if (!ttsConfig) {
195
+ return {};
196
+ }
197
+ const entries: Record<string, SpeechProviderConfig> = {};
198
+ const rawProviders =
199
+ ttsConfig.providers &&
200
+ typeof ttsConfig.providers === "object" &&
201
+ !Array.isArray(ttsConfig.providers)
202
+ ? (ttsConfig.providers as Record<string, unknown>)
203
+ : {};
204
+ for (const [providerId, value] of Object.entries(rawProviders)) {
205
+ const normalized = normalizeProviderId(providerId) ?? providerId;
206
+ entries[normalized] = asProviderConfig(value);
207
+ }
208
+ const reservedKeys = new Set([
209
+ "auto",
210
+ "enabled",
211
+ "maxTextLength",
212
+ "mode",
213
+ "modelOverrides",
214
+ "persona",
215
+ "personas",
216
+ "prefsPath",
217
+ "provider",
218
+ "providers",
219
+ "summaryModel",
220
+ "timeoutMs",
221
+ ]);
222
+ for (const [key, value] of Object.entries(ttsConfig as Record<string, unknown>)) {
223
+ if (
224
+ reservedKeys.has(key) ||
225
+ typeof value !== "object" ||
226
+ value === null ||
227
+ Array.isArray(value)
228
+ ) {
229
+ continue;
230
+ }
231
+ const normalized = normalizeProviderId(key) ?? key;
232
+ entries[normalized] ??= asProviderConfig(value);
233
+ }
234
+ return entries;
235
+ }
@@ -1,4 +1,5 @@
1
1
  import type { VoiceCallConfig } from "./config.js";
2
+ import { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js";
2
3
 
3
4
  export function createVoiceCallBaseConfig(params?: {
4
5
  provider?: "telnyx" | "twilio" | "plivo" | "mock";
@@ -10,6 +11,7 @@ export function createVoiceCallBaseConfig(params?: {
10
11
  fromNumber: "+15550001234",
11
12
  inboundPolicy: "disabled",
12
13
  allowFrom: [],
14
+ numbers: {},
13
15
  outbound: { defaultMode: "notify", notifyHangupDelaySec: 3 },
14
16
  maxDurationSeconds: 300,
15
17
  staleCallReaperSeconds: 600,
@@ -17,6 +19,7 @@ export function createVoiceCallBaseConfig(params?: {
17
19
  transcriptTimeoutMs: 180000,
18
20
  ringTimeoutMs: 30000,
19
21
  maxConcurrentCalls: 1,
22
+ sessionScope: "per-phone",
20
23
  serve: { port: 3334, bind: "127.0.0.1", path: "/voice/webhook" },
21
24
  tailscale: { mode: "off", path: "/voice/webhook" },
22
25
  tunnel: {
@@ -30,23 +33,41 @@ export function createVoiceCallBaseConfig(params?: {
30
33
  },
31
34
  streaming: {
32
35
  enabled: false,
33
- sttProvider: "openai-realtime",
34
- sttModel: "gpt-4o-transcribe",
35
- silenceDurationMs: 800,
36
- vadThreshold: 0.5,
36
+ providers: {
37
+ openai: {
38
+ model: "gpt-4o-transcribe",
39
+ silenceDurationMs: 800,
40
+ vadThreshold: 0.5,
41
+ },
42
+ },
37
43
  streamPath: "/voice/stream",
38
44
  preStartTimeoutMs: 5000,
39
45
  maxPendingConnections: 32,
40
46
  maxPendingConnectionsPerIp: 4,
41
47
  maxConnections: 128,
42
48
  },
49
+ realtime: {
50
+ enabled: false,
51
+ streamPath: "/voice/stream/realtime",
52
+ instructions: DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS,
53
+ toolPolicy: "safe-read-only",
54
+ tools: [],
55
+ fastContext: {
56
+ enabled: false,
57
+ timeoutMs: 800,
58
+ maxResults: 3,
59
+ sources: ["memory", "sessions"],
60
+ fallbackToConsult: false,
61
+ },
62
+ providers: {},
63
+ },
43
64
  skipSignatureVerification: false,
44
- stt: { provider: "openai", model: "whisper-1" },
45
65
  tts: {
46
66
  provider: "openai",
47
- openai: { model: "gpt-4o-mini-tts", voice: "coral" },
67
+ providers: {
68
+ openai: { model: "gpt-4o-mini-tts", voice: "coral" },
69
+ },
48
70
  },
49
- responseModel: "openai/gpt-4o-mini",
50
71
  responseTimeoutMs: 30000,
51
72
  };
52
73
  }
@@ -0,0 +1,34 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { resolvePreferredTtsVoice } from "./tts-provider-voice.js";
3
+
4
+ describe("resolvePreferredTtsVoice", () => {
5
+ it("returns provider voice when present", () => {
6
+ expect(
7
+ resolvePreferredTtsVoice({
8
+ tts: {
9
+ provider: "openai",
10
+ providers: {
11
+ openai: {
12
+ voice: "coral",
13
+ },
14
+ },
15
+ },
16
+ }),
17
+ ).toBe("coral");
18
+ });
19
+
20
+ it("falls back to voiceId for providers that use that field", () => {
21
+ expect(
22
+ resolvePreferredTtsVoice({
23
+ tts: {
24
+ provider: "elevenlabs",
25
+ providers: {
26
+ elevenlabs: {
27
+ voiceId: "voice-123",
28
+ },
29
+ },
30
+ },
31
+ }),
32
+ ).toBe("voice-123");
33
+ });
34
+ });
@@ -0,0 +1,21 @@
1
+ import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
2
+ import type { VoiceCallTtsConfig } from "./config.js";
3
+
4
+ function resolveProviderVoiceSetting(providerConfig: unknown): string | undefined {
5
+ if (!providerConfig || typeof providerConfig !== "object") {
6
+ return undefined;
7
+ }
8
+ const candidate = providerConfig as {
9
+ voice?: unknown;
10
+ voiceId?: unknown;
11
+ };
12
+ return normalizeOptionalString(candidate.voice) ?? normalizeOptionalString(candidate.voiceId);
13
+ }
14
+
15
+ export function resolvePreferredTtsVoice(config: { tts?: VoiceCallTtsConfig }): string | undefined {
16
+ const providerId = config.tts?.provider;
17
+ if (!providerId) {
18
+ return undefined;
19
+ }
20
+ return resolveProviderVoiceSetting(config.tts?.providers?.[providerId]);
21
+ }
@@ -0,0 +1,166 @@
1
+ import { EventEmitter } from "node:events";
2
+ import { beforeEach, describe, expect, it, vi } from "vitest";
3
+
4
+ class FakeChildProcess extends EventEmitter {
5
+ readonly stdout = new EventEmitter();
6
+ readonly stderr = new EventEmitter();
7
+ killedWith: NodeJS.Signals | null = null;
8
+
9
+ kill(signal: NodeJS.Signals = "SIGTERM"): boolean {
10
+ this.killedWith = signal;
11
+ queueMicrotask(() => this.emit("close", null));
12
+ return true;
13
+ }
14
+
15
+ close(code: number | null = 0): void {
16
+ this.emit("close", code);
17
+ }
18
+
19
+ fail(error: Error): void {
20
+ this.emit("error", error);
21
+ }
22
+ }
23
+
24
+ const mocks = vi.hoisted(() => ({
25
+ spawn: vi.fn(),
26
+ getTailscaleDnsName: vi.fn(),
27
+ }));
28
+
29
+ vi.mock("node:child_process", () => ({
30
+ spawn: mocks.spawn,
31
+ }));
32
+
33
+ vi.mock("./webhook/tailscale.js", () => ({
34
+ getTailscaleDnsName: mocks.getTailscaleDnsName,
35
+ }));
36
+
37
+ import { isNgrokAvailable, startNgrokTunnel, startTailscaleTunnel, startTunnel } from "./tunnel.js";
38
+
39
+ function nextProcess(): FakeChildProcess {
40
+ const proc = new FakeChildProcess();
41
+ mocks.spawn.mockReturnValueOnce(proc as never);
42
+ return proc;
43
+ }
44
+
45
+ function emitNgrokUrl(proc: FakeChildProcess, url: string): void {
46
+ proc.stdout.emit("data", Buffer.from(`${JSON.stringify({ msg: "started tunnel", url })}\n`));
47
+ }
48
+
49
+ describe("voice-call tunnels", () => {
50
+ beforeEach(() => {
51
+ vi.clearAllMocks();
52
+ mocks.getTailscaleDnsName.mockReset();
53
+ });
54
+
55
+ it("checks ngrok availability from the version command exit code", async () => {
56
+ const proc = nextProcess();
57
+ const result = isNgrokAvailable();
58
+ proc.close(0);
59
+
60
+ await expect(result).resolves.toBe(true);
61
+ expect(mocks.spawn).toHaveBeenCalledWith("ngrok", ["version"], expect.any(Object));
62
+ });
63
+
64
+ it("treats ngrok spawn failures as unavailable", async () => {
65
+ const proc = nextProcess();
66
+ const result = isNgrokAvailable();
67
+ proc.fail(new Error("spawn ngrok ENOENT"));
68
+
69
+ await expect(result).resolves.toBe(false);
70
+ });
71
+
72
+ it("starts ngrok and appends the webhook path to the public URL", async () => {
73
+ const proc = nextProcess();
74
+ const result = startNgrokTunnel({ port: 3334, path: "/voice/webhook" });
75
+
76
+ emitNgrokUrl(proc, "https://abc.ngrok.io");
77
+
78
+ await expect(result).resolves.toMatchObject({
79
+ publicUrl: "https://abc.ngrok.io/voice/webhook",
80
+ provider: "ngrok",
81
+ });
82
+ expect(mocks.spawn).toHaveBeenCalledWith(
83
+ "ngrok",
84
+ expect.arrayContaining(["http", "3334"]),
85
+ expect.any(Object),
86
+ );
87
+ });
88
+
89
+ it("sets ngrok auth token before starting the tunnel", async () => {
90
+ const authProc = nextProcess();
91
+ const tunnelProc = nextProcess();
92
+ const result = startNgrokTunnel({
93
+ port: 3334,
94
+ path: "/hook",
95
+ authToken: "token",
96
+ });
97
+
98
+ authProc.close(0);
99
+ await vi.waitFor(() => expect(mocks.spawn).toHaveBeenCalledTimes(2));
100
+ emitNgrokUrl(tunnelProc, "https://auth.ngrok.io");
101
+
102
+ await expect(result).resolves.toMatchObject({
103
+ publicUrl: "https://auth.ngrok.io/hook",
104
+ });
105
+ expect(mocks.spawn).toHaveBeenNthCalledWith(
106
+ 1,
107
+ "ngrok",
108
+ ["config", "add-authtoken", "token"],
109
+ expect.any(Object),
110
+ );
111
+ });
112
+
113
+ it("rejects ngrok startup errors from stderr", async () => {
114
+ const proc = nextProcess();
115
+ const result = startNgrokTunnel({ port: 3334, path: "/hook" });
116
+
117
+ proc.stderr.emit("data", Buffer.from("ERR_NGROK_3200: invalid auth token"));
118
+
119
+ await expect(result).rejects.toThrow("ngrok error:");
120
+ });
121
+
122
+ it("starts Tailscale serve using the resolved tailnet DNS name", async () => {
123
+ mocks.getTailscaleDnsName.mockResolvedValue("host.tailnet.ts.net");
124
+ const proc = nextProcess();
125
+ const result = startTailscaleTunnel({
126
+ mode: "serve",
127
+ port: 3334,
128
+ path: "voice/webhook",
129
+ });
130
+
131
+ await vi.waitFor(() => expect(mocks.spawn).toHaveBeenCalled());
132
+ proc.close(0);
133
+
134
+ await expect(result).resolves.toMatchObject({
135
+ publicUrl: "https://host.tailnet.ts.net/voice/webhook",
136
+ provider: "tailscale-serve",
137
+ });
138
+ expect(mocks.spawn).toHaveBeenCalledWith(
139
+ "tailscale",
140
+ expect.arrayContaining(["serve", "--set-path", "/voice/webhook"]),
141
+ expect.any(Object),
142
+ );
143
+ });
144
+
145
+ it("rejects Tailscale tunnel startup when the DNS name is unavailable", async () => {
146
+ mocks.getTailscaleDnsName.mockResolvedValue(null);
147
+
148
+ await expect(
149
+ startTailscaleTunnel({ mode: "funnel", port: 3334, path: "/hook" }),
150
+ ).rejects.toThrow("Could not get Tailscale DNS name");
151
+ expect(mocks.spawn).not.toHaveBeenCalled();
152
+ });
153
+
154
+ it("dispatches tunnel providers from config", async () => {
155
+ await expect(startTunnel({ provider: "none", port: 3334, path: "/hook" })).resolves.toBeNull();
156
+
157
+ const proc = nextProcess();
158
+ const result = startTunnel({ provider: "ngrok", port: 3334, path: "/hook" });
159
+ emitNgrokUrl(proc, "https://dispatch.ngrok.io");
160
+
161
+ await expect(result).resolves.toMatchObject({
162
+ publicUrl: "https://dispatch.ngrok.io/hook",
163
+ provider: "ngrok",
164
+ });
165
+ });
166
+ });
package/src/tunnel.ts CHANGED
@@ -4,7 +4,7 @@ import { getTailscaleDnsName } from "./webhook/tailscale.js";
4
4
  /**
5
5
  * Tunnel configuration for exposing the webhook server.
6
6
  */
7
- export interface TunnelConfig {
7
+ interface TunnelConfig {
8
8
  /** Tunnel provider: ngrok, tailscale-serve, or tailscale-funnel */
9
9
  provider: "ngrok" | "tailscale-serve" | "tailscale-funnel" | "none";
10
10
  /** Local port to tunnel */