@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +27 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +943 -0
  6. package/index.ts +379 -149
  7. package/openclaw.plugin.json +384 -157
  8. package/package.json +35 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +273 -12
  17. package/src/config.ts +355 -72
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +243 -19
  24. package/src/manager/events.ts +61 -31
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +528 -0
  28. package/src/manager/outbound.ts +163 -57
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +321 -0
  64. package/src/response-generator.ts +213 -53
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +429 -0
  68. package/src/runtime.ts +270 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +28 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +523 -102
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
@@ -0,0 +1,120 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ VOICE_CALL_LEGACY_CONFIG_REMOVAL_VERSION,
4
+ collectVoiceCallLegacyConfigIssues,
5
+ formatVoiceCallLegacyConfigWarnings,
6
+ migrateVoiceCallLegacyConfigInput,
7
+ normalizeVoiceCallLegacyConfigInput,
8
+ parseVoiceCallPluginConfig,
9
+ } from "./config-compat.js";
10
+
11
+ describe("voice-call config compatibility", () => {
12
+ it("maps deprecated provider and twilio.from fields into canonical config", () => {
13
+ const parsed = parseVoiceCallPluginConfig({
14
+ enabled: true,
15
+ provider: "log",
16
+ twilio: {
17
+ from: "+15550001234",
18
+ },
19
+ });
20
+
21
+ expect(parsed.provider).toBe("mock");
22
+ expect(parsed.fromNumber).toBe("+15550001234");
23
+ });
24
+
25
+ it("moves legacy streaming OpenAI fields into streaming.providers.openai", () => {
26
+ const normalized = normalizeVoiceCallLegacyConfigInput({
27
+ streaming: {
28
+ enabled: true,
29
+ sttProvider: "openai",
30
+ openaiApiKey: "sk-test", // pragma: allowlist secret
31
+ sttModel: "gpt-4o-transcribe",
32
+ silenceDurationMs: 700,
33
+ vadThreshold: 0.4,
34
+ },
35
+ });
36
+
37
+ expect(normalized).toMatchObject({
38
+ streaming: {
39
+ enabled: true,
40
+ provider: "openai",
41
+ providers: {
42
+ openai: {
43
+ apiKey: "sk-test",
44
+ model: "gpt-4o-transcribe",
45
+ silenceDurationMs: 700,
46
+ vadThreshold: 0.4,
47
+ },
48
+ },
49
+ },
50
+ });
51
+ expect((normalized.streaming as Record<string, unknown>).openaiApiKey).toBeUndefined();
52
+ expect((normalized.streaming as Record<string, unknown>).sttModel).toBeUndefined();
53
+ });
54
+
55
+ it("reports doctor-oriented legacy issues and warnings", () => {
56
+ const raw = {
57
+ provider: "log",
58
+ twilio: {
59
+ from: "+15550001234",
60
+ },
61
+ streaming: {
62
+ sttProvider: "openai",
63
+ openaiApiKey: "sk-test", // pragma: allowlist secret
64
+ },
65
+ };
66
+
67
+ expect(collectVoiceCallLegacyConfigIssues(raw)).toEqual([
68
+ {
69
+ path: "provider",
70
+ replacement: "provider",
71
+ message: 'Replace provider "log" with "mock".',
72
+ },
73
+ {
74
+ path: "twilio.from",
75
+ replacement: "fromNumber",
76
+ message: "Move twilio.from to fromNumber.",
77
+ },
78
+ {
79
+ path: "streaming.sttProvider",
80
+ replacement: "streaming.provider",
81
+ message: "Move streaming.sttProvider to streaming.provider.",
82
+ },
83
+ {
84
+ path: "streaming.openaiApiKey",
85
+ replacement: "streaming.providers.openai.apiKey",
86
+ message: "Move streaming.openaiApiKey to streaming.providers.openai.apiKey.",
87
+ },
88
+ ]);
89
+ expect(
90
+ formatVoiceCallLegacyConfigWarnings({
91
+ value: raw,
92
+ configPathPrefix: "plugins.entries.voice-call.config",
93
+ doctorFixCommand: "openclaw doctor --fix",
94
+ }),
95
+ ).toEqual([
96
+ `[voice-call] legacy config keys detected under plugins.entries.voice-call.config; runtime loading will not rewrite them, and support for the legacy shape will be removed in ${VOICE_CALL_LEGACY_CONFIG_REMOVAL_VERSION}. Run "openclaw doctor --fix".`,
97
+ '[voice-call] plugins.entries.voice-call.config.provider: Replace provider "log" with "mock".',
98
+ "[voice-call] plugins.entries.voice-call.config.twilio.from: Move twilio.from to fromNumber.",
99
+ "[voice-call] plugins.entries.voice-call.config.streaming.sttProvider: Move streaming.sttProvider to streaming.provider.",
100
+ "[voice-call] plugins.entries.voice-call.config.streaming.openaiApiKey: Move streaming.openaiApiKey to streaming.providers.openai.apiKey.",
101
+ ]);
102
+ });
103
+
104
+ it("returns doctor migration change lines", () => {
105
+ const migration = migrateVoiceCallLegacyConfigInput({
106
+ value: {
107
+ provider: "log",
108
+ streaming: {
109
+ sttProvider: "openai",
110
+ },
111
+ },
112
+ configPathPrefix: "plugins.entries.voice-call.config",
113
+ });
114
+
115
+ expect(migration.changes).toEqual([
116
+ 'Moved plugins.entries.voice-call.config.provider "log" → "mock".',
117
+ "Moved plugins.entries.voice-call.config.streaming.sttProvider → plugins.entries.voice-call.config.streaming.provider.",
118
+ ]);
119
+ });
120
+ });
@@ -0,0 +1,227 @@
1
+ import { asOptionalRecord, readStringField } from "openclaw/plugin-sdk/text-runtime";
2
+ import type { VoiceCallConfig } from "./config.js";
3
+ import { VoiceCallConfigSchema } from "./config.js";
4
+
5
+ export const VOICE_CALL_LEGACY_CONFIG_REMOVAL_VERSION = "2026.6.0";
6
+
7
+ type VoiceCallLegacyConfigIssue = {
8
+ path: string;
9
+ replacement: string;
10
+ message: string;
11
+ };
12
+
13
+ const asObject = asOptionalRecord;
14
+ const getString = readStringField;
15
+
16
+ function getNumber(obj: Record<string, unknown> | undefined, key: string): number | undefined {
17
+ const value = obj?.[key];
18
+ return typeof value === "number" ? value : undefined;
19
+ }
20
+
21
+ function mergeProviderConfig(
22
+ providersValue: unknown,
23
+ providerId: string,
24
+ compatValues: Record<string, unknown>,
25
+ ): Record<string, unknown> | undefined {
26
+ if (Object.keys(compatValues).length === 0) {
27
+ return asObject(providersValue);
28
+ }
29
+
30
+ const providers = asObject(providersValue) ?? {};
31
+ const existing = asObject(providers[providerId]) ?? {};
32
+ return {
33
+ ...providers,
34
+ [providerId]: {
35
+ ...existing,
36
+ ...compatValues,
37
+ },
38
+ };
39
+ }
40
+
41
+ export function collectVoiceCallLegacyConfigIssues(value: unknown): VoiceCallLegacyConfigIssue[] {
42
+ const raw = asObject(value) ?? {};
43
+ const twilio = asObject(raw.twilio);
44
+ const streaming = asObject(raw.streaming);
45
+
46
+ const issues: VoiceCallLegacyConfigIssue[] = [];
47
+ if (raw.provider === "log") {
48
+ issues.push({
49
+ path: "provider",
50
+ replacement: "provider",
51
+ message: 'Replace provider "log" with "mock".',
52
+ });
53
+ }
54
+ if (typeof twilio?.from === "string") {
55
+ issues.push({
56
+ path: "twilio.from",
57
+ replacement: "fromNumber",
58
+ message: "Move twilio.from to fromNumber.",
59
+ });
60
+ }
61
+ if (typeof streaming?.sttProvider === "string") {
62
+ issues.push({
63
+ path: "streaming.sttProvider",
64
+ replacement: "streaming.provider",
65
+ message: "Move streaming.sttProvider to streaming.provider.",
66
+ });
67
+ }
68
+ if (typeof streaming?.openaiApiKey === "string") {
69
+ issues.push({
70
+ path: "streaming.openaiApiKey",
71
+ replacement: "streaming.providers.openai.apiKey",
72
+ message: "Move streaming.openaiApiKey to streaming.providers.openai.apiKey.",
73
+ });
74
+ }
75
+ if (typeof streaming?.sttModel === "string") {
76
+ issues.push({
77
+ path: "streaming.sttModel",
78
+ replacement: "streaming.providers.openai.model",
79
+ message: "Move streaming.sttModel to streaming.providers.openai.model.",
80
+ });
81
+ }
82
+ if (typeof streaming?.silenceDurationMs === "number") {
83
+ issues.push({
84
+ path: "streaming.silenceDurationMs",
85
+ replacement: "streaming.providers.openai.silenceDurationMs",
86
+ message: "Move streaming.silenceDurationMs to streaming.providers.openai.silenceDurationMs.",
87
+ });
88
+ }
89
+ if (typeof streaming?.vadThreshold === "number") {
90
+ issues.push({
91
+ path: "streaming.vadThreshold",
92
+ replacement: "streaming.providers.openai.vadThreshold",
93
+ message: "Move streaming.vadThreshold to streaming.providers.openai.vadThreshold.",
94
+ });
95
+ }
96
+
97
+ return issues;
98
+ }
99
+
100
+ export function formatVoiceCallLegacyConfigWarnings(params: {
101
+ value: unknown;
102
+ configPathPrefix: string;
103
+ doctorFixCommand: string;
104
+ }): string[] {
105
+ const issues = collectVoiceCallLegacyConfigIssues(params.value);
106
+ if (issues.length === 0) {
107
+ return [];
108
+ }
109
+
110
+ return [
111
+ `[voice-call] legacy config keys detected under ${params.configPathPrefix}; runtime loading will not rewrite them, and support for the legacy shape will be removed in ${VOICE_CALL_LEGACY_CONFIG_REMOVAL_VERSION}. Run "${params.doctorFixCommand}".`,
112
+ ...issues.map(
113
+ (issue) => `[voice-call] ${params.configPathPrefix}.${issue.path}: ${issue.message}`,
114
+ ),
115
+ ];
116
+ }
117
+
118
+ export function migrateVoiceCallLegacyConfigInput(params: {
119
+ value: unknown;
120
+ configPathPrefix?: string;
121
+ }): {
122
+ config: Record<string, unknown>;
123
+ changes: string[];
124
+ issues: VoiceCallLegacyConfigIssue[];
125
+ } {
126
+ const raw = asObject(params.value) ?? {};
127
+ const twilio = asObject(raw.twilio);
128
+ const streaming = asObject(raw.streaming);
129
+ const configPathPrefix = params.configPathPrefix ?? "plugins.entries.voice-call.config";
130
+ const issues = collectVoiceCallLegacyConfigIssues(raw);
131
+
132
+ const legacyStreamingOpenAICompat: Record<string, unknown> = {};
133
+ const streamingOpenAIApiKey = getString(streaming, "openaiApiKey");
134
+ if (streamingOpenAIApiKey) {
135
+ legacyStreamingOpenAICompat.apiKey = streamingOpenAIApiKey;
136
+ }
137
+ const streamingSttModel = getString(streaming, "sttModel");
138
+ if (streamingSttModel) {
139
+ legacyStreamingOpenAICompat.model = streamingSttModel;
140
+ }
141
+ const streamingSilenceDurationMs = getNumber(streaming, "silenceDurationMs");
142
+ if (streamingSilenceDurationMs !== undefined) {
143
+ legacyStreamingOpenAICompat.silenceDurationMs = streamingSilenceDurationMs;
144
+ }
145
+ const streamingVadThreshold = getNumber(streaming, "vadThreshold");
146
+ if (streamingVadThreshold !== undefined) {
147
+ legacyStreamingOpenAICompat.vadThreshold = streamingVadThreshold;
148
+ }
149
+ const streamingProvider = getString(streaming, "provider");
150
+ const legacyStreamingProvider = getString(streaming, "sttProvider");
151
+
152
+ const normalizedStreaming: Record<string, unknown> | undefined = streaming
153
+ ? {
154
+ ...streaming,
155
+ provider: streamingProvider ?? legacyStreamingProvider,
156
+ providers: mergeProviderConfig(streaming.providers, "openai", legacyStreamingOpenAICompat),
157
+ }
158
+ : undefined;
159
+
160
+ if (normalizedStreaming) {
161
+ delete normalizedStreaming.sttProvider;
162
+ delete normalizedStreaming.openaiApiKey;
163
+ delete normalizedStreaming.sttModel;
164
+ delete normalizedStreaming.silenceDurationMs;
165
+ delete normalizedStreaming.vadThreshold;
166
+ }
167
+
168
+ const normalizedTwilio = twilio
169
+ ? {
170
+ ...twilio,
171
+ }
172
+ : undefined;
173
+ if (normalizedTwilio) {
174
+ delete normalizedTwilio.from;
175
+ }
176
+
177
+ const config = {
178
+ ...raw,
179
+ provider: raw.provider === "log" ? "mock" : raw.provider,
180
+ fromNumber: raw.fromNumber ?? (typeof twilio?.from === "string" ? twilio.from : undefined),
181
+ twilio: normalizedTwilio,
182
+ streaming: normalizedStreaming,
183
+ };
184
+
185
+ const changes: string[] = [];
186
+ if (raw.provider === "log") {
187
+ changes.push(`Moved ${configPathPrefix}.provider "log" → "mock".`);
188
+ }
189
+ if (typeof twilio?.from === "string" && typeof raw.fromNumber !== "string") {
190
+ changes.push(`Moved ${configPathPrefix}.twilio.from → ${configPathPrefix}.fromNumber.`);
191
+ }
192
+ if (typeof streaming?.sttProvider === "string") {
193
+ changes.push(
194
+ `Moved ${configPathPrefix}.streaming.sttProvider → ${configPathPrefix}.streaming.provider.`,
195
+ );
196
+ }
197
+ if (typeof streaming?.openaiApiKey === "string") {
198
+ changes.push(
199
+ `Moved ${configPathPrefix}.streaming.openaiApiKey → ${configPathPrefix}.streaming.providers.openai.apiKey.`,
200
+ );
201
+ }
202
+ if (typeof streaming?.sttModel === "string") {
203
+ changes.push(
204
+ `Moved ${configPathPrefix}.streaming.sttModel → ${configPathPrefix}.streaming.providers.openai.model.`,
205
+ );
206
+ }
207
+ if (typeof streaming?.silenceDurationMs === "number") {
208
+ changes.push(
209
+ `Moved ${configPathPrefix}.streaming.silenceDurationMs → ${configPathPrefix}.streaming.providers.openai.silenceDurationMs.`,
210
+ );
211
+ }
212
+ if (typeof streaming?.vadThreshold === "number") {
213
+ changes.push(
214
+ `Moved ${configPathPrefix}.streaming.vadThreshold → ${configPathPrefix}.streaming.providers.openai.vadThreshold.`,
215
+ );
216
+ }
217
+
218
+ return { config, changes, issues };
219
+ }
220
+
221
+ export function normalizeVoiceCallLegacyConfigInput(value: unknown): Record<string, unknown> {
222
+ return migrateVoiceCallLegacyConfigInput({ value }).config;
223
+ }
224
+
225
+ export function parseVoiceCallPluginConfig(value: unknown): VoiceCallConfig {
226
+ return VoiceCallConfigSchema.parse(normalizeVoiceCallLegacyConfigInput(value));
227
+ }
@@ -1,5 +1,10 @@
1
1
  import { afterEach, beforeEach, describe, expect, it } from "vitest";
2
2
  import {
3
+ VoiceCallConfigSchema,
4
+ resolveTwilioAuthToken,
5
+ resolveVoiceCallEffectiveConfig,
6
+ resolveVoiceCallNumberRouteKey,
7
+ resolveVoiceCallSessionKey,
3
8
  validateProviderConfig,
4
9
  normalizeVoiceCallConfig,
5
10
  resolveVoiceCallConfig,
@@ -11,11 +16,25 @@ function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): Voi
11
16
  return createVoiceCallBaseConfig({ provider });
12
17
  }
13
18
 
19
+ function envRef(id: string) {
20
+ return { source: "env" as const, provider: "default", id };
21
+ }
22
+
23
+ function requireElevenLabsTtsConfig(config: Pick<VoiceCallConfig, "tts">) {
24
+ const tts = config.tts;
25
+ const elevenlabs = tts?.providers?.elevenlabs;
26
+ if (!elevenlabs || typeof elevenlabs !== "object") {
27
+ throw new Error("voice-call config did not preserve nested elevenlabs TTS config");
28
+ }
29
+ return { tts, elevenlabs };
30
+ }
31
+
14
32
  describe("validateProviderConfig", () => {
15
33
  const originalEnv = { ...process.env };
16
34
  const clearProviderEnv = () => {
17
35
  delete process.env.TWILIO_ACCOUNT_SID;
18
36
  delete process.env.TWILIO_AUTH_TOKEN;
37
+ delete process.env.TWILIO_FROM_NUMBER;
19
38
  delete process.env.TELNYX_API_KEY;
20
39
  delete process.env.TELNYX_CONNECTION_ID;
21
40
  delete process.env.TELNYX_PUBLIC_KEY;
@@ -54,6 +73,7 @@ describe("validateProviderConfig", () => {
54
73
  if (provider === "twilio") {
55
74
  process.env.TWILIO_ACCOUNT_SID = "AC123";
56
75
  process.env.TWILIO_AUTH_TOKEN = "secret";
76
+ process.env.TWILIO_FROM_NUMBER = "+15550001234";
57
77
  } else if (provider === "telnyx") {
58
78
  process.env.TELNYX_API_KEY = "KEY123";
59
79
  process.env.TELNYX_CONNECTION_ID = "CONN456";
@@ -69,6 +89,24 @@ describe("validateProviderConfig", () => {
69
89
  });
70
90
 
71
91
  describe("twilio provider", () => {
92
+ it("accepts SecretRef-backed auth tokens before runtime resolution", () => {
93
+ const config = VoiceCallConfigSchema.parse({
94
+ enabled: true,
95
+ provider: "twilio",
96
+ fromNumber: "+15550001234",
97
+ twilio: {
98
+ accountSid: "AC123",
99
+ authToken: envRef("TWILIO_AUTH_TOKEN"),
100
+ },
101
+ });
102
+
103
+ expect(config.twilio?.authToken).toEqual(envRef("TWILIO_AUTH_TOKEN"));
104
+ expect(validateProviderConfig(config)).toMatchObject({ valid: true, errors: [] });
105
+ expect(() => resolveTwilioAuthToken(config)).toThrow(
106
+ 'plugins.entries.voice-call.config.twilio.authToken: unresolved SecretRef "env:default:TWILIO_AUTH_TOKEN"',
107
+ );
108
+ });
109
+
72
110
  it("passes validation with mixed config and env vars", () => {
73
111
  process.env.TWILIO_AUTH_TOKEN = "secret";
74
112
  let config = createBaseConfig("twilio");
@@ -81,6 +119,20 @@ describe("validateProviderConfig", () => {
81
119
  expect(result.errors).toEqual([]);
82
120
  });
83
121
 
122
+ it("resolves the Twilio from number from environment", () => {
123
+ process.env.TWILIO_ACCOUNT_SID = "AC123";
124
+ process.env.TWILIO_AUTH_TOKEN = "secret";
125
+ process.env.TWILIO_FROM_NUMBER = "+15550001234";
126
+
127
+ const config = resolveVoiceCallConfig({
128
+ ...createBaseConfig("twilio"),
129
+ fromNumber: undefined,
130
+ });
131
+
132
+ expect(config.fromNumber).toBe("+15550001234");
133
+ expect(validateProviderConfig(config)).toMatchObject({ valid: true, errors: [] });
134
+ });
135
+
84
136
  it("fails validation when required twilio credentials are missing", () => {
85
137
  process.env.TWILIO_AUTH_TOKEN = "secret";
86
138
  const missingSid = validateProviderConfig(resolveVoiceCallConfig(createBaseConfig("twilio")));
@@ -170,6 +222,153 @@ describe("validateProviderConfig", () => {
170
222
  expect(result.errors).toEqual([]);
171
223
  });
172
224
  });
225
+
226
+ describe("realtime config", () => {
227
+ it("rejects disabled inbound policy for realtime mode", () => {
228
+ const config = createBaseConfig("twilio");
229
+ config.realtime.enabled = true;
230
+ config.inboundPolicy = "disabled";
231
+
232
+ const result = validateProviderConfig(config);
233
+
234
+ expect(result.valid).toBe(false);
235
+ expect(result.errors).toContain(
236
+ 'plugins.entries.voice-call.config.inboundPolicy must not be "disabled" when realtime.enabled is true',
237
+ );
238
+ });
239
+
240
+ it("rejects enabling realtime and streaming together", () => {
241
+ const config = createBaseConfig("twilio");
242
+ config.realtime.enabled = true;
243
+ config.streaming.enabled = true;
244
+ config.inboundPolicy = "allowlist";
245
+
246
+ const result = validateProviderConfig(config);
247
+
248
+ expect(result.valid).toBe(false);
249
+ expect(result.errors).toContain(
250
+ "plugins.entries.voice-call.config.realtime.enabled and plugins.entries.voice-call.config.streaming.enabled cannot both be true",
251
+ );
252
+ });
253
+ });
254
+ });
255
+
256
+ describe("resolveVoiceCallConfig", () => {
257
+ it("enables the pre-answer stale call reaper by default", () => {
258
+ const config = resolveVoiceCallConfig({ enabled: true, provider: "mock" });
259
+
260
+ expect(config.staleCallReaperSeconds).toBe(120);
261
+ });
262
+
263
+ it("keeps voice sessions scoped by phone by default", () => {
264
+ const config = resolveVoiceCallConfig({ enabled: true, provider: "mock" });
265
+
266
+ expect(config.sessionScope).toBe("per-phone");
267
+ expect(
268
+ resolveVoiceCallSessionKey({
269
+ config,
270
+ callId: "call-123",
271
+ phone: "+1 (555) 000-1111",
272
+ }),
273
+ ).toBe("voice:15550001111");
274
+ });
275
+
276
+ it("can scope voice sessions to each call", () => {
277
+ const config = resolveVoiceCallConfig({
278
+ enabled: true,
279
+ provider: "mock",
280
+ sessionScope: "per-call",
281
+ });
282
+
283
+ expect(config.sessionScope).toBe("per-call");
284
+ expect(
285
+ resolveVoiceCallSessionKey({
286
+ config,
287
+ callId: "call-123",
288
+ phone: "+1 (555) 000-1111",
289
+ }),
290
+ ).toBe("voice:call:call-123");
291
+ });
292
+
293
+ it("preserves explicit voice session keys", () => {
294
+ const config = resolveVoiceCallConfig({
295
+ enabled: true,
296
+ provider: "mock",
297
+ sessionScope: "per-call",
298
+ });
299
+
300
+ expect(
301
+ resolveVoiceCallSessionKey({
302
+ config,
303
+ callId: "call-123",
304
+ phone: "+1 (555) 000-1111",
305
+ explicitSessionKey: "meet-room-1",
306
+ }),
307
+ ).toBe("meet-room-1");
308
+ });
309
+
310
+ it("resolves per-number inbound route overrides over global voice settings", () => {
311
+ const config = resolveVoiceCallConfig({
312
+ enabled: true,
313
+ provider: "mock",
314
+ inboundGreeting: "Hello from global.",
315
+ agentId: "main",
316
+ responseModel: "openai/gpt-5.4-mini",
317
+ responseSystemPrompt: "Global voice assistant.",
318
+ responseTimeoutMs: 10000,
319
+ tts: {
320
+ provider: "openai",
321
+ providers: {
322
+ openai: { voice: "coral", speed: 1 },
323
+ },
324
+ },
325
+ numbers: {
326
+ "+15550001111": {
327
+ inboundGreeting: "Silver Fox Cards, how can I help?",
328
+ agentId: "cards",
329
+ responseModel: "openai/gpt-5.5",
330
+ responseSystemPrompt: "You are a baseball card expert.",
331
+ responseTimeoutMs: 20000,
332
+ tts: {
333
+ providers: {
334
+ openai: { voice: "alloy" },
335
+ },
336
+ },
337
+ },
338
+ },
339
+ });
340
+
341
+ expect(resolveVoiceCallNumberRouteKey(config, "+1 (555) 000-1111")).toBe("+15550001111");
342
+ const effective = resolveVoiceCallEffectiveConfig(config, "+1 (555) 000-1111");
343
+
344
+ expect(effective.numberRouteKey).toBe("+15550001111");
345
+ expect(effective.config.inboundGreeting).toBe("Silver Fox Cards, how can I help?");
346
+ expect(effective.config.agentId).toBe("cards");
347
+ expect(effective.config.responseModel).toBe("openai/gpt-5.5");
348
+ expect(effective.config.responseSystemPrompt).toBe("You are a baseball card expert.");
349
+ expect(effective.config.responseTimeoutMs).toBe(20000);
350
+ expect(effective.config.tts?.provider).toBe("openai");
351
+ expect(effective.config.tts?.providers?.openai).toEqual({ voice: "alloy", speed: 1 });
352
+ });
353
+
354
+ it("falls back to global voice settings when no per-number route matches", () => {
355
+ const config = resolveVoiceCallConfig({
356
+ enabled: true,
357
+ provider: "mock",
358
+ inboundGreeting: "Hello from global.",
359
+ numbers: {
360
+ "+15550001111": {
361
+ inboundGreeting: "Hello from route.",
362
+ },
363
+ },
364
+ });
365
+
366
+ const effective = resolveVoiceCallEffectiveConfig(config, "+15550002222");
367
+
368
+ expect(effective.numberRouteKey).toBeUndefined();
369
+ expect(effective.config).toBe(config);
370
+ expect(effective.config.inboundGreeting).toBe("Hello from global.");
371
+ });
173
372
  });
174
373
 
175
374
  describe("normalizeVoiceCallConfig", () => {
@@ -185,34 +384,96 @@ describe("normalizeVoiceCallConfig", () => {
185
384
 
186
385
  expect(normalized.serve.path).toBe("/voice/webhook");
187
386
  expect(normalized.streaming.streamPath).toBe("/custom-stream");
188
- expect(normalized.streaming.sttModel).toBe("gpt-4o-transcribe");
387
+ expect(normalized.streaming.provider).toBeUndefined();
388
+ expect(normalized.streaming.providers).toEqual({});
389
+ expect(normalized.realtime.streamPath).toBe("/voice/stream/realtime");
390
+ expect(normalized.realtime.toolPolicy).toBe("safe-read-only");
391
+ expect(normalized.realtime.fastContext).toEqual({
392
+ enabled: false,
393
+ timeoutMs: 800,
394
+ maxResults: 3,
395
+ sources: ["memory", "sessions"],
396
+ fallbackToConsult: false,
397
+ });
398
+ expect(normalized.realtime.instructions).toContain("openclaw_agent_consult");
189
399
  expect(normalized.tunnel.provider).toBe("none");
190
400
  expect(normalized.webhookSecurity.allowedHosts).toEqual([]);
191
401
  });
192
402
 
403
+ it("derives the realtime stream path from a custom webhook path", () => {
404
+ const normalized = normalizeVoiceCallConfig({
405
+ enabled: true,
406
+ provider: "twilio",
407
+ serve: {
408
+ path: "/custom/webhook",
409
+ },
410
+ });
411
+
412
+ expect(normalized.realtime.streamPath).toBe("/custom/stream/realtime");
413
+ });
414
+
193
415
  it("accepts partial nested TTS overrides and preserves nested objects", () => {
194
416
  const normalized = normalizeVoiceCallConfig({
195
417
  tts: {
196
418
  provider: "elevenlabs",
197
- elevenlabs: {
198
- apiKey: {
199
- source: "env",
200
- provider: "elevenlabs",
201
- id: "ELEVENLABS_API_KEY",
202
- },
203
- voiceSettings: {
204
- speed: 1.1,
419
+ providers: {
420
+ elevenlabs: {
421
+ apiKey: {
422
+ source: "env",
423
+ provider: "elevenlabs",
424
+ id: "ELEVENLABS_API_KEY",
425
+ },
426
+ voiceSettings: {
427
+ speed: 1.1,
428
+ },
205
429
  },
206
430
  },
207
431
  },
208
432
  });
209
433
 
210
- expect(normalized.tts?.provider).toBe("elevenlabs");
211
- expect(normalized.tts?.elevenlabs?.apiKey).toEqual({
434
+ const { tts, elevenlabs } = requireElevenLabsTtsConfig(normalized);
435
+ expect(tts.provider).toBe("elevenlabs");
436
+ expect(elevenlabs.apiKey).toEqual({
212
437
  source: "env",
213
438
  provider: "elevenlabs",
214
439
  id: "ELEVENLABS_API_KEY",
215
440
  });
216
- expect(normalized.tts?.elevenlabs?.voiceSettings).toEqual({ speed: 1.1 });
441
+ expect(elevenlabs.voiceSettings).toEqual({ speed: 1.1 });
442
+ });
443
+ });
444
+
445
+ describe("resolveVoiceCallConfig", () => {
446
+ it("preserves configured realtime instructions without env indirection", () => {
447
+ const resolved = resolveVoiceCallConfig({
448
+ enabled: true,
449
+ provider: "twilio",
450
+ realtime: {
451
+ enabled: true,
452
+ instructions: "Stay concise.",
453
+ },
454
+ });
455
+
456
+ expect(resolved.realtime.instructions).toBe("Stay concise.");
457
+ expect(resolved.realtime.toolPolicy).toBe("safe-read-only");
458
+ expect(resolved.realtime.provider).toBeUndefined();
459
+ });
460
+
461
+ it("leaves responseModel unset so voice responses can inherit runtime defaults", () => {
462
+ const resolved = resolveVoiceCallConfig({
463
+ enabled: true,
464
+ provider: "mock",
465
+ });
466
+
467
+ expect(resolved.responseModel).toBeUndefined();
468
+ });
469
+
470
+ it("preserves the configured voice response agent id", () => {
471
+ const resolved = resolveVoiceCallConfig({
472
+ enabled: true,
473
+ provider: "mock",
474
+ agentId: "voice",
475
+ });
476
+
477
+ expect(resolved.agentId).toBe("voice");
217
478
  });
218
479
  });