@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/api.ts +16 -0
- package/cli-metadata.ts +10 -0
- package/config-api.ts +12 -0
- package/index.test.ts +943 -0
- package/index.ts +379 -149
- package/openclaw.plugin.json +384 -157
- package/package.json +35 -5
- package/runtime-api.ts +20 -0
- package/runtime-entry.ts +1 -0
- package/setup-api.ts +47 -0
- package/src/allowlist.test.ts +18 -0
- package/src/cli.ts +533 -68
- package/src/config-compat.test.ts +120 -0
- package/src/config-compat.ts +227 -0
- package/src/config.test.ts +273 -12
- package/src/config.ts +355 -72
- package/src/core-bridge.ts +2 -147
- package/src/deep-merge.test.ts +40 -0
- package/src/gateway-continue-operation.ts +200 -0
- package/src/http-headers.ts +6 -3
- package/src/manager/context.ts +6 -5
- package/src/manager/events.test.ts +243 -19
- package/src/manager/events.ts +61 -31
- package/src/manager/lifecycle.ts +53 -0
- package/src/manager/lookup.test.ts +52 -0
- package/src/manager/outbound.test.ts +528 -0
- package/src/manager/outbound.ts +163 -57
- package/src/manager/store.ts +18 -6
- package/src/manager/timers.test.ts +129 -0
- package/src/manager/timers.ts +4 -3
- package/src/manager/twiml.test.ts +13 -0
- package/src/manager/twiml.ts +8 -0
- package/src/manager.closed-loop.test.ts +30 -12
- package/src/manager.inbound-allowlist.test.ts +77 -10
- package/src/manager.notify.test.ts +344 -20
- package/src/manager.restore.test.ts +95 -8
- package/src/manager.test-harness.ts +8 -6
- package/src/manager.ts +79 -5
- package/src/media-stream.test.ts +578 -81
- package/src/media-stream.ts +235 -54
- package/src/providers/base.ts +19 -0
- package/src/providers/mock.ts +7 -1
- package/src/providers/plivo.test.ts +50 -6
- package/src/providers/plivo.ts +14 -6
- package/src/providers/shared/call-status.ts +2 -1
- package/src/providers/shared/guarded-json-api.test.ts +106 -0
- package/src/providers/shared/guarded-json-api.ts +1 -1
- package/src/providers/telnyx.test.ts +178 -6
- package/src/providers/telnyx.ts +40 -3
- package/src/providers/twilio/api.test.ts +145 -0
- package/src/providers/twilio/api.ts +67 -16
- package/src/providers/twilio/twiml-policy.ts +6 -10
- package/src/providers/twilio/webhook.ts +1 -1
- package/src/providers/twilio.test.ts +425 -25
- package/src/providers/twilio.ts +230 -77
- package/src/providers/twilio.types.ts +17 -0
- package/src/realtime-defaults.ts +3 -0
- package/src/realtime-fast-context.test.ts +88 -0
- package/src/realtime-fast-context.ts +165 -0
- package/src/realtime-transcription.runtime.ts +4 -0
- package/src/realtime-voice.runtime.ts +5 -0
- package/src/response-generator.test.ts +321 -0
- package/src/response-generator.ts +213 -53
- package/src/response-model.test.ts +71 -0
- package/src/response-model.ts +23 -0
- package/src/runtime.test.ts +429 -0
- package/src/runtime.ts +270 -24
- package/src/telephony-audio.test.ts +61 -0
- package/src/telephony-audio.ts +1 -79
- package/src/telephony-tts.test.ts +133 -12
- package/src/telephony-tts.ts +155 -2
- package/src/test-fixtures.ts +28 -7
- package/src/tts-provider-voice.test.ts +34 -0
- package/src/tts-provider-voice.ts +21 -0
- package/src/tunnel.test.ts +166 -0
- package/src/tunnel.ts +1 -1
- package/src/types.ts +24 -37
- package/src/utils.test.ts +17 -0
- package/src/voice-mapping.test.ts +34 -0
- package/src/voice-mapping.ts +3 -2
- package/src/webhook/realtime-handler.test.ts +598 -0
- package/src/webhook/realtime-handler.ts +485 -0
- package/src/webhook/stale-call-reaper.test.ts +88 -0
- package/src/webhook/stale-call-reaper.ts +5 -0
- package/src/webhook/tailscale.test.ts +214 -0
- package/src/webhook/tailscale.ts +19 -5
- package/src/webhook-exposure.test.ts +33 -0
- package/src/webhook-exposure.ts +84 -0
- package/src/webhook-security.test.ts +172 -21
- package/src/webhook-security.ts +43 -29
- package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
- package/src/webhook.test.ts +1145 -27
- package/src/webhook.ts +523 -102
- package/src/webhook.types.ts +5 -0
- package/src/websocket-test-support.ts +72 -0
- package/tsconfig.json +16 -0
- package/CHANGELOG.md +0 -121
- package/src/providers/index.ts +0 -10
- package/src/providers/stt-openai-realtime.test.ts +0 -42
- package/src/providers/stt-openai-realtime.ts +0 -311
- package/src/providers/tts-openai.test.ts +0 -43
- package/src/providers/tts-openai.ts +0 -221
package/src/config.ts
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
|
+
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES } from "openclaw/plugin-sdk/realtime-voice";
|
|
1
2
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
} from "openclaw/plugin-sdk/
|
|
7
|
-
import { z } from "zod";
|
|
3
|
+
buildSecretInputSchema,
|
|
4
|
+
hasConfiguredSecretInput,
|
|
5
|
+
normalizeResolvedSecretInputString,
|
|
6
|
+
type SecretInput,
|
|
7
|
+
} from "openclaw/plugin-sdk/secret-input";
|
|
8
|
+
import { z } from "openclaw/plugin-sdk/zod";
|
|
9
|
+
import { TtsConfigSchema } from "../api.js";
|
|
8
10
|
import { deepMergeDefined } from "./deep-merge.js";
|
|
11
|
+
import { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js";
|
|
9
12
|
|
|
10
13
|
// -----------------------------------------------------------------------------
|
|
11
14
|
// Phone Number Validation
|
|
@@ -15,7 +18,7 @@ import { deepMergeDefined } from "./deep-merge.js";
|
|
|
15
18
|
* E.164 phone number format: +[country code][number]
|
|
16
19
|
* Examples use 555 prefix (reserved for fictional numbers)
|
|
17
20
|
*/
|
|
18
|
-
|
|
21
|
+
const E164Schema = z
|
|
19
22
|
.string()
|
|
20
23
|
.regex(/^\+[1-9]\d{1,14}$/, "Expected E.164 format, e.g. +15550001234");
|
|
21
24
|
|
|
@@ -30,14 +33,15 @@ export const E164Schema = z
|
|
|
30
33
|
* - "pairing": Unknown callers can request pairing (future)
|
|
31
34
|
* - "open": Accept all inbound calls (dangerous!)
|
|
32
35
|
*/
|
|
33
|
-
|
|
34
|
-
export type InboundPolicy = z.infer<typeof InboundPolicySchema>;
|
|
36
|
+
const InboundPolicySchema = z.enum(["disabled", "allowlist", "pairing", "open"]);
|
|
35
37
|
|
|
36
38
|
// -----------------------------------------------------------------------------
|
|
37
39
|
// Provider-Specific Configuration
|
|
38
40
|
// -----------------------------------------------------------------------------
|
|
39
41
|
|
|
40
|
-
|
|
42
|
+
const SecretInputSchema = buildSecretInputSchema();
|
|
43
|
+
|
|
44
|
+
const TelnyxConfigSchema = z
|
|
41
45
|
.object({
|
|
42
46
|
/** Telnyx API v2 key */
|
|
43
47
|
apiKey: z.string().min(1).optional(),
|
|
@@ -49,17 +53,16 @@ export const TelnyxConfigSchema = z
|
|
|
49
53
|
.strict();
|
|
50
54
|
export type TelnyxConfig = z.infer<typeof TelnyxConfigSchema>;
|
|
51
55
|
|
|
52
|
-
|
|
56
|
+
const TwilioConfigSchema = z
|
|
53
57
|
.object({
|
|
54
58
|
/** Twilio Account SID */
|
|
55
59
|
accountSid: z.string().min(1).optional(),
|
|
56
60
|
/** Twilio Auth Token */
|
|
57
|
-
authToken:
|
|
61
|
+
authToken: SecretInputSchema.optional(),
|
|
58
62
|
})
|
|
59
63
|
.strict();
|
|
60
|
-
export type TwilioConfig = z.infer<typeof TwilioConfigSchema>;
|
|
61
64
|
|
|
62
|
-
|
|
65
|
+
const PlivoConfigSchema = z
|
|
63
66
|
.object({
|
|
64
67
|
/** Plivo Auth ID (starts with MA/SA) */
|
|
65
68
|
authId: z.string().min(1).optional(),
|
|
@@ -69,29 +72,31 @@ export const PlivoConfigSchema = z
|
|
|
69
72
|
.strict();
|
|
70
73
|
export type PlivoConfig = z.infer<typeof PlivoConfigSchema>;
|
|
71
74
|
|
|
72
|
-
|
|
73
|
-
// STT/TTS Configuration
|
|
74
|
-
// -----------------------------------------------------------------------------
|
|
75
|
+
export type VoiceCallTtsConfig = z.infer<typeof TtsConfigSchema>;
|
|
75
76
|
|
|
76
|
-
|
|
77
|
+
const VoiceCallNumberRouteConfigSchema = z
|
|
77
78
|
.object({
|
|
78
|
-
/**
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
|
|
79
|
+
/** Greeting message for inbound calls to this number. */
|
|
80
|
+
inboundGreeting: z.string().optional(),
|
|
81
|
+
/** TTS override for inbound calls to this number. Deep-merges with global voice-call TTS. */
|
|
82
|
+
tts: TtsConfigSchema,
|
|
83
|
+
/** Agent ID to use for voice response generation for this number. */
|
|
84
|
+
agentId: z.string().min(1).optional(),
|
|
85
|
+
/** Optional model override for voice responses for this number. */
|
|
86
|
+
responseModel: z.string().optional(),
|
|
87
|
+
/** System prompt for voice responses for this number. */
|
|
88
|
+
responseSystemPrompt: z.string().optional(),
|
|
89
|
+
/** Timeout for response generation in ms for this number. */
|
|
90
|
+
responseTimeoutMs: z.number().int().positive().optional(),
|
|
82
91
|
})
|
|
83
|
-
.strict()
|
|
84
|
-
|
|
85
|
-
export type SttConfig = z.infer<typeof SttConfigSchema>;
|
|
86
|
-
|
|
87
|
-
export { TtsAutoSchema, TtsConfigSchema, TtsModeSchema, TtsProviderSchema };
|
|
88
|
-
export type VoiceCallTtsConfig = z.infer<typeof TtsConfigSchema>;
|
|
92
|
+
.strict();
|
|
93
|
+
export type VoiceCallNumberRouteConfig = z.infer<typeof VoiceCallNumberRouteConfigSchema>;
|
|
89
94
|
|
|
90
95
|
// -----------------------------------------------------------------------------
|
|
91
96
|
// Webhook Server Configuration
|
|
92
97
|
// -----------------------------------------------------------------------------
|
|
93
98
|
|
|
94
|
-
|
|
99
|
+
const VoiceCallServeConfigSchema = z
|
|
95
100
|
.object({
|
|
96
101
|
/** Port to listen on */
|
|
97
102
|
port: z.number().int().positive().default(3334),
|
|
@@ -102,9 +107,8 @@ export const VoiceCallServeConfigSchema = z
|
|
|
102
107
|
})
|
|
103
108
|
.strict()
|
|
104
109
|
.default({ port: 3334, bind: "127.0.0.1", path: "/voice/webhook" });
|
|
105
|
-
export type VoiceCallServeConfig = z.infer<typeof VoiceCallServeConfigSchema>;
|
|
106
110
|
|
|
107
|
-
|
|
111
|
+
const VoiceCallTailscaleConfigSchema = z
|
|
108
112
|
.object({
|
|
109
113
|
/**
|
|
110
114
|
* Tailscale exposure mode:
|
|
@@ -118,13 +122,12 @@ export const VoiceCallTailscaleConfigSchema = z
|
|
|
118
122
|
})
|
|
119
123
|
.strict()
|
|
120
124
|
.default({ mode: "off", path: "/voice/webhook" });
|
|
121
|
-
export type VoiceCallTailscaleConfig = z.infer<typeof VoiceCallTailscaleConfigSchema>;
|
|
122
125
|
|
|
123
126
|
// -----------------------------------------------------------------------------
|
|
124
127
|
// Tunnel Configuration (unified ngrok/tailscale)
|
|
125
128
|
// -----------------------------------------------------------------------------
|
|
126
129
|
|
|
127
|
-
|
|
130
|
+
const VoiceCallTunnelConfigSchema = z
|
|
128
131
|
.object({
|
|
129
132
|
/**
|
|
130
133
|
* Tunnel provider:
|
|
@@ -149,13 +152,12 @@ export const VoiceCallTunnelConfigSchema = z
|
|
|
149
152
|
})
|
|
150
153
|
.strict()
|
|
151
154
|
.default({ provider: "none", allowNgrokFreeTierLoopbackBypass: false });
|
|
152
|
-
export type VoiceCallTunnelConfig = z.infer<typeof VoiceCallTunnelConfigSchema>;
|
|
153
155
|
|
|
154
156
|
// -----------------------------------------------------------------------------
|
|
155
157
|
// Webhook Security Configuration
|
|
156
158
|
// -----------------------------------------------------------------------------
|
|
157
159
|
|
|
158
|
-
|
|
160
|
+
const VoiceCallWebhookSecurityConfigSchema = z
|
|
159
161
|
.object({
|
|
160
162
|
/**
|
|
161
163
|
* Allowed hostnames for webhook URL reconstruction.
|
|
@@ -186,10 +188,13 @@ export type WebhookSecurityConfig = z.infer<typeof VoiceCallWebhookSecurityConfi
|
|
|
186
188
|
* - "notify": Deliver message and auto-hangup after delay (one-way notification)
|
|
187
189
|
* - "conversation": Stay open for back-and-forth until explicit end or timeout
|
|
188
190
|
*/
|
|
189
|
-
|
|
191
|
+
const CallModeSchema = z.enum(["notify", "conversation"]);
|
|
190
192
|
export type CallMode = z.infer<typeof CallModeSchema>;
|
|
191
193
|
|
|
192
|
-
|
|
194
|
+
const VoiceCallSessionScopeSchema = z.enum(["per-phone", "per-call"]);
|
|
195
|
+
export type VoiceCallSessionScope = z.infer<typeof VoiceCallSessionScopeSchema>;
|
|
196
|
+
|
|
197
|
+
const OutboundConfigSchema = z
|
|
193
198
|
.object({
|
|
194
199
|
/** Default call mode for outbound calls */
|
|
195
200
|
defaultMode: CallModeSchema.default("notify"),
|
|
@@ -198,28 +203,115 @@ export const OutboundConfigSchema = z
|
|
|
198
203
|
})
|
|
199
204
|
.strict()
|
|
200
205
|
.default({ defaultMode: "notify", notifyHangupDelaySec: 3 });
|
|
201
|
-
export type OutboundConfig = z.infer<typeof OutboundConfigSchema>;
|
|
202
206
|
|
|
203
207
|
// -----------------------------------------------------------------------------
|
|
204
|
-
//
|
|
208
|
+
// Realtime Voice Configuration
|
|
209
|
+
// -----------------------------------------------------------------------------
|
|
210
|
+
|
|
211
|
+
const RealtimeToolSchema = z
|
|
212
|
+
.object({
|
|
213
|
+
type: z.literal("function"),
|
|
214
|
+
name: z.string().min(1),
|
|
215
|
+
description: z.string(),
|
|
216
|
+
parameters: z.object({
|
|
217
|
+
type: z.literal("object"),
|
|
218
|
+
properties: z.record(z.string(), z.unknown()),
|
|
219
|
+
required: z.array(z.string()).optional(),
|
|
220
|
+
}),
|
|
221
|
+
})
|
|
222
|
+
.strict();
|
|
223
|
+
type RealtimeToolConfig = z.infer<typeof RealtimeToolSchema>;
|
|
224
|
+
|
|
225
|
+
const VoiceCallRealtimeProvidersConfigSchema = z
|
|
226
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
227
|
+
.default({});
|
|
228
|
+
|
|
229
|
+
const VoiceCallRealtimeToolPolicySchema = z.enum(REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES);
|
|
230
|
+
|
|
231
|
+
const VoiceCallRealtimeFastContextSourceSchema = z.enum(["memory", "sessions"]);
|
|
232
|
+
|
|
233
|
+
const VoiceCallRealtimeFastContextConfigSchema = z
|
|
234
|
+
.object({
|
|
235
|
+
/** Enable bounded memory/session lookup before the full consult agent. */
|
|
236
|
+
enabled: z.boolean().default(false),
|
|
237
|
+
/** Hard deadline for the fast context lookup. */
|
|
238
|
+
timeoutMs: z.number().int().positive().default(800),
|
|
239
|
+
/** Maximum memory/session hits to inject into the realtime tool result. */
|
|
240
|
+
maxResults: z.number().int().positive().default(3),
|
|
241
|
+
/** Indexed sources used by the fast context lookup. */
|
|
242
|
+
sources: z
|
|
243
|
+
.array(VoiceCallRealtimeFastContextSourceSchema)
|
|
244
|
+
.min(1)
|
|
245
|
+
.default(["memory", "sessions"]),
|
|
246
|
+
/** Fall back to the full agent consult when fast context has no answer. */
|
|
247
|
+
fallbackToConsult: z.boolean().default(false),
|
|
248
|
+
})
|
|
249
|
+
.strict()
|
|
250
|
+
.default({
|
|
251
|
+
enabled: false,
|
|
252
|
+
timeoutMs: 800,
|
|
253
|
+
maxResults: 3,
|
|
254
|
+
sources: ["memory", "sessions"],
|
|
255
|
+
fallbackToConsult: false,
|
|
256
|
+
});
|
|
257
|
+
export type VoiceCallRealtimeFastContextConfig = z.infer<
|
|
258
|
+
typeof VoiceCallRealtimeFastContextConfigSchema
|
|
259
|
+
>;
|
|
260
|
+
|
|
261
|
+
const VoiceCallStreamingProvidersConfigSchema = z
|
|
262
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
263
|
+
.default({});
|
|
264
|
+
|
|
265
|
+
const VoiceCallRealtimeConfigSchema = z
|
|
266
|
+
.object({
|
|
267
|
+
/** Enable realtime voice-to-voice mode. */
|
|
268
|
+
enabled: z.boolean().default(false),
|
|
269
|
+
/** Provider id from registered realtime voice providers. */
|
|
270
|
+
provider: z.string().min(1).optional(),
|
|
271
|
+
/** Optional override for the local WebSocket route path. */
|
|
272
|
+
streamPath: z.string().min(1).optional(),
|
|
273
|
+
/** System instructions passed to the realtime provider. */
|
|
274
|
+
instructions: z.string().default(DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS),
|
|
275
|
+
/** Tool policy for the shared OpenClaw agent consult tool. */
|
|
276
|
+
toolPolicy: VoiceCallRealtimeToolPolicySchema.default("safe-read-only"),
|
|
277
|
+
/** Tool definitions exposed to the realtime provider. */
|
|
278
|
+
tools: z.array(RealtimeToolSchema).default([]),
|
|
279
|
+
/** Low-latency memory/session context for the consult tool. */
|
|
280
|
+
fastContext: VoiceCallRealtimeFastContextConfigSchema,
|
|
281
|
+
/** Provider-owned raw config blobs keyed by provider id. */
|
|
282
|
+
providers: VoiceCallRealtimeProvidersConfigSchema,
|
|
283
|
+
})
|
|
284
|
+
.strict()
|
|
285
|
+
.default({
|
|
286
|
+
enabled: false,
|
|
287
|
+
instructions: DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS,
|
|
288
|
+
toolPolicy: "safe-read-only",
|
|
289
|
+
tools: [],
|
|
290
|
+
fastContext: {
|
|
291
|
+
enabled: false,
|
|
292
|
+
timeoutMs: 800,
|
|
293
|
+
maxResults: 3,
|
|
294
|
+
sources: ["memory", "sessions"],
|
|
295
|
+
fallbackToConsult: false,
|
|
296
|
+
},
|
|
297
|
+
providers: {},
|
|
298
|
+
});
|
|
299
|
+
export type VoiceCallRealtimeConfig = z.infer<typeof VoiceCallRealtimeConfigSchema>;
|
|
300
|
+
|
|
301
|
+
// -----------------------------------------------------------------------------
|
|
302
|
+
// Streaming Configuration (Realtime Transcription)
|
|
205
303
|
// -----------------------------------------------------------------------------
|
|
206
304
|
|
|
207
|
-
|
|
305
|
+
const VoiceCallStreamingConfigSchema = z
|
|
208
306
|
.object({
|
|
209
307
|
/** Enable real-time audio streaming (requires WebSocket support) */
|
|
210
308
|
enabled: z.boolean().default(false),
|
|
211
|
-
/**
|
|
212
|
-
|
|
213
|
-
/** OpenAI API key for Realtime API (uses OPENAI_API_KEY env if not set) */
|
|
214
|
-
openaiApiKey: z.string().min(1).optional(),
|
|
215
|
-
/** OpenAI transcription model (default: gpt-4o-transcribe) */
|
|
216
|
-
sttModel: z.string().min(1).default("gpt-4o-transcribe"),
|
|
217
|
-
/** VAD silence duration in ms before considering speech ended */
|
|
218
|
-
silenceDurationMs: z.number().int().positive().default(800),
|
|
219
|
-
/** VAD threshold 0-1 (higher = less sensitive) */
|
|
220
|
-
vadThreshold: z.number().min(0).max(1).default(0.5),
|
|
309
|
+
/** Provider id from registered realtime transcription providers. */
|
|
310
|
+
provider: z.string().min(1).optional(),
|
|
221
311
|
/** WebSocket path for media stream connections */
|
|
222
312
|
streamPath: z.string().min(1).default("/voice/stream"),
|
|
313
|
+
/** Provider-owned raw config blobs keyed by provider id. */
|
|
314
|
+
providers: VoiceCallStreamingProvidersConfigSchema,
|
|
223
315
|
/**
|
|
224
316
|
* Close unauthenticated media stream sockets if no valid `start` frame arrives in time.
|
|
225
317
|
* Protects against pre-auth idle connection hold attacks.
|
|
@@ -235,17 +327,13 @@ export const VoiceCallStreamingConfigSchema = z
|
|
|
235
327
|
.strict()
|
|
236
328
|
.default({
|
|
237
329
|
enabled: false,
|
|
238
|
-
sttProvider: "openai-realtime",
|
|
239
|
-
sttModel: "gpt-4o-transcribe",
|
|
240
|
-
silenceDurationMs: 800,
|
|
241
|
-
vadThreshold: 0.5,
|
|
242
330
|
streamPath: "/voice/stream",
|
|
331
|
+
providers: {},
|
|
243
332
|
preStartTimeoutMs: 5000,
|
|
244
333
|
maxPendingConnections: 32,
|
|
245
334
|
maxPendingConnectionsPerIp: 4,
|
|
246
335
|
maxConnections: 128,
|
|
247
336
|
});
|
|
248
|
-
export type VoiceCallStreamingConfig = z.infer<typeof VoiceCallStreamingConfigSchema>;
|
|
249
337
|
|
|
250
338
|
// -----------------------------------------------------------------------------
|
|
251
339
|
// Main Voice Call Configuration
|
|
@@ -283,6 +371,9 @@ export const VoiceCallConfigSchema = z
|
|
|
283
371
|
/** Greeting message for inbound calls */
|
|
284
372
|
inboundGreeting: z.string().optional(),
|
|
285
373
|
|
|
374
|
+
/** Per-dialed-number overrides for inbound calls. Keys are E.164 numbers. */
|
|
375
|
+
numbers: z.record(E164Schema, VoiceCallNumberRouteConfigSchema).default({}),
|
|
376
|
+
|
|
286
377
|
/** Outbound call configuration */
|
|
287
378
|
outbound: OutboundConfigSchema,
|
|
288
379
|
|
|
@@ -291,11 +382,10 @@ export const VoiceCallConfigSchema = z
|
|
|
291
382
|
|
|
292
383
|
/**
|
|
293
384
|
* Maximum age of a call in seconds before it is automatically reaped.
|
|
294
|
-
* Catches calls stuck
|
|
295
|
-
* never receive
|
|
296
|
-
* Default: 0 (disabled). Recommended: 120-300 for production.
|
|
385
|
+
* Catches calls stuck before answer (for example, local mock calls that
|
|
386
|
+
* never receive provider webhooks). Set to 0 to disable.
|
|
297
387
|
*/
|
|
298
|
-
staleCallReaperSeconds: z.number().int().nonnegative().default(
|
|
388
|
+
staleCallReaperSeconds: z.number().int().nonnegative().default(120),
|
|
299
389
|
|
|
300
390
|
/** Silence timeout for end-of-speech detection (ms) */
|
|
301
391
|
silenceTimeoutMs: z.number().int().positive().default(800),
|
|
@@ -312,7 +402,7 @@ export const VoiceCallConfigSchema = z
|
|
|
312
402
|
/** Webhook server configuration */
|
|
313
403
|
serve: VoiceCallServeConfigSchema,
|
|
314
404
|
|
|
315
|
-
/**
|
|
405
|
+
/** @deprecated Prefer tunnel config. */
|
|
316
406
|
tailscale: VoiceCallTailscaleConfigSchema,
|
|
317
407
|
|
|
318
408
|
/** Tunnel configuration (unified ngrok/tailscale) */
|
|
@@ -324,23 +414,29 @@ export const VoiceCallConfigSchema = z
|
|
|
324
414
|
/** Real-time audio streaming configuration */
|
|
325
415
|
streaming: VoiceCallStreamingConfigSchema,
|
|
326
416
|
|
|
417
|
+
/** Realtime voice-to-voice configuration */
|
|
418
|
+
realtime: VoiceCallRealtimeConfigSchema,
|
|
419
|
+
|
|
420
|
+
/** Session memory scope for voice conversations. */
|
|
421
|
+
sessionScope: VoiceCallSessionScopeSchema.default("per-phone"),
|
|
422
|
+
|
|
327
423
|
/** Public webhook URL override (if set, bypasses tunnel auto-detection) */
|
|
328
424
|
publicUrl: z.string().url().optional(),
|
|
329
425
|
|
|
330
426
|
/** Skip webhook signature verification (development only, NOT for production) */
|
|
331
427
|
skipSignatureVerification: z.boolean().default(false),
|
|
332
428
|
|
|
333
|
-
/** STT configuration */
|
|
334
|
-
stt: SttConfigSchema,
|
|
335
|
-
|
|
336
429
|
/** TTS override (deep-merges with core messages.tts) */
|
|
337
430
|
tts: TtsConfigSchema,
|
|
338
431
|
|
|
339
432
|
/** Store path for call logs */
|
|
340
433
|
store: z.string().optional(),
|
|
341
434
|
|
|
342
|
-
/**
|
|
343
|
-
|
|
435
|
+
/** Agent ID to use for voice response generation. Defaults to "main". */
|
|
436
|
+
agentId: z.string().min(1).optional(),
|
|
437
|
+
|
|
438
|
+
/** Optional model override for generating voice responses. */
|
|
439
|
+
responseModel: z.string().optional(),
|
|
344
440
|
|
|
345
441
|
/** System prompt for voice responses */
|
|
346
442
|
responseSystemPrompt: z.string().optional(),
|
|
@@ -351,13 +447,19 @@ export const VoiceCallConfigSchema = z
|
|
|
351
447
|
.strict();
|
|
352
448
|
|
|
353
449
|
export type VoiceCallConfig = z.infer<typeof VoiceCallConfigSchema>;
|
|
354
|
-
type
|
|
355
|
-
|
|
450
|
+
export type VoiceCallEffectiveConfigResult = {
|
|
451
|
+
config: VoiceCallConfig;
|
|
452
|
+
numberRouteKey?: string;
|
|
453
|
+
};
|
|
454
|
+
type DeepPartial<T> = T extends SecretInput
|
|
455
|
+
? T
|
|
456
|
+
: T extends Array<infer U>
|
|
356
457
|
? DeepPartial<U>[]
|
|
357
458
|
: T extends object
|
|
358
459
|
? { [K in keyof T]?: DeepPartial<T[K]> }
|
|
359
460
|
: T;
|
|
360
461
|
export type VoiceCallConfigInput = DeepPartial<VoiceCallConfig>;
|
|
462
|
+
const TWILIO_AUTH_TOKEN_PATH = "plugins.entries.voice-call.config.twilio.authToken";
|
|
361
463
|
|
|
362
464
|
// -----------------------------------------------------------------------------
|
|
363
465
|
// Configuration Helpers
|
|
@@ -369,6 +471,29 @@ function cloneDefaultVoiceCallConfig(): VoiceCallConfig {
|
|
|
369
471
|
return structuredClone(DEFAULT_VOICE_CALL_CONFIG);
|
|
370
472
|
}
|
|
371
473
|
|
|
474
|
+
function normalizeWebhookLikePath(pathname: string): string {
|
|
475
|
+
const trimmed = pathname.trim();
|
|
476
|
+
if (!trimmed) {
|
|
477
|
+
return "/";
|
|
478
|
+
}
|
|
479
|
+
const prefixed = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
|
|
480
|
+
if (prefixed === "/") {
|
|
481
|
+
return prefixed;
|
|
482
|
+
}
|
|
483
|
+
return prefixed.endsWith("/") ? prefixed.slice(0, -1) : prefixed;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
function defaultRealtimeStreamPathForServePath(servePath: string): string {
|
|
487
|
+
const normalized = normalizeWebhookLikePath(servePath);
|
|
488
|
+
if (normalized.endsWith("/webhook")) {
|
|
489
|
+
return `${normalized.slice(0, -"/webhook".length)}/stream/realtime`;
|
|
490
|
+
}
|
|
491
|
+
if (normalized === "/") {
|
|
492
|
+
return "/voice/stream/realtime";
|
|
493
|
+
}
|
|
494
|
+
return `${normalized}/stream/realtime`;
|
|
495
|
+
}
|
|
496
|
+
|
|
372
497
|
function normalizeVoiceCallTtsConfig(
|
|
373
498
|
defaults: VoiceCallTtsConfig,
|
|
374
499
|
overrides: DeepPartial<NonNullable<VoiceCallTtsConfig>> | undefined,
|
|
@@ -380,14 +505,116 @@ function normalizeVoiceCallTtsConfig(
|
|
|
380
505
|
return TtsConfigSchema.parse(deepMergeDefined(defaults ?? {}, overrides ?? {}));
|
|
381
506
|
}
|
|
382
507
|
|
|
508
|
+
function normalizePhoneRouteKey(phone: string | undefined): string {
|
|
509
|
+
return phone?.replace(/\D/g, "") ?? "";
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
export function resolveVoiceCallNumberRouteKey(
|
|
513
|
+
config: Pick<VoiceCallConfig, "numbers">,
|
|
514
|
+
phone: string | undefined,
|
|
515
|
+
): string | undefined {
|
|
516
|
+
const routes = config.numbers;
|
|
517
|
+
if (!routes) {
|
|
518
|
+
return undefined;
|
|
519
|
+
}
|
|
520
|
+
if (phone && Object.prototype.hasOwnProperty.call(routes, phone)) {
|
|
521
|
+
return phone;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
const normalizedPhone = normalizePhoneRouteKey(phone);
|
|
525
|
+
if (!normalizedPhone) {
|
|
526
|
+
return undefined;
|
|
527
|
+
}
|
|
528
|
+
return Object.keys(routes).find(
|
|
529
|
+
(routeKey) => normalizePhoneRouteKey(routeKey) === normalizedPhone,
|
|
530
|
+
);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
export function resolveVoiceCallEffectiveConfig(
|
|
534
|
+
config: VoiceCallConfig,
|
|
535
|
+
phoneOrRouteKey: string | undefined,
|
|
536
|
+
): VoiceCallEffectiveConfigResult {
|
|
537
|
+
const numberRouteKey = resolveVoiceCallNumberRouteKey(config, phoneOrRouteKey);
|
|
538
|
+
if (!numberRouteKey) {
|
|
539
|
+
return { config };
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
const route = config.numbers[numberRouteKey];
|
|
543
|
+
if (!route) {
|
|
544
|
+
return { config };
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
return {
|
|
548
|
+
numberRouteKey,
|
|
549
|
+
config: {
|
|
550
|
+
...config,
|
|
551
|
+
...route,
|
|
552
|
+
tts: normalizeVoiceCallTtsConfig(config.tts, route.tts),
|
|
553
|
+
numbers: config.numbers,
|
|
554
|
+
},
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
function sanitizeVoiceCallProviderConfigs(
|
|
559
|
+
value: Record<string, Record<string, unknown> | undefined> | undefined,
|
|
560
|
+
): Record<string, Record<string, unknown>> {
|
|
561
|
+
if (!value) {
|
|
562
|
+
return {};
|
|
563
|
+
}
|
|
564
|
+
return Object.fromEntries(
|
|
565
|
+
Object.entries(value).filter(
|
|
566
|
+
(entry): entry is [string, Record<string, unknown>] => entry[1] !== undefined,
|
|
567
|
+
),
|
|
568
|
+
);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
function sanitizeVoiceCallNumberRoutes(
|
|
572
|
+
value: Record<string, unknown> | undefined,
|
|
573
|
+
): Record<string, VoiceCallNumberRouteConfig> {
|
|
574
|
+
if (!value) {
|
|
575
|
+
return {};
|
|
576
|
+
}
|
|
577
|
+
return Object.fromEntries(
|
|
578
|
+
Object.entries(value)
|
|
579
|
+
.filter((entry): entry is [string, unknown] => entry[1] !== undefined)
|
|
580
|
+
.map(([key, route]) => [key, VoiceCallNumberRouteConfigSchema.parse(route)]),
|
|
581
|
+
);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
export function resolveTwilioAuthToken(
|
|
585
|
+
config: Pick<VoiceCallConfig, "twilio">,
|
|
586
|
+
): string | undefined {
|
|
587
|
+
return normalizeResolvedSecretInputString({
|
|
588
|
+
value: config.twilio?.authToken,
|
|
589
|
+
path: TWILIO_AUTH_TOKEN_PATH,
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
|
+
|
|
383
593
|
export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallConfig {
|
|
384
594
|
const defaults = cloneDefaultVoiceCallConfig();
|
|
595
|
+
const serve = { ...defaults.serve, ...config.serve };
|
|
596
|
+
const streamingProvider = config.streaming?.provider;
|
|
597
|
+
const streamingProviders = sanitizeVoiceCallProviderConfigs(
|
|
598
|
+
config.streaming?.providers ?? defaults.streaming.providers,
|
|
599
|
+
);
|
|
600
|
+
const realtimeProvider = config.realtime?.provider ?? defaults.realtime.provider;
|
|
601
|
+
const realtimeProviders = sanitizeVoiceCallProviderConfigs(
|
|
602
|
+
config.realtime?.providers ?? defaults.realtime.providers,
|
|
603
|
+
);
|
|
604
|
+
const realtimeFastContext = {
|
|
605
|
+
...defaults.realtime.fastContext,
|
|
606
|
+
...config.realtime?.fastContext,
|
|
607
|
+
sources: config.realtime?.fastContext?.sources ?? defaults.realtime.fastContext.sources,
|
|
608
|
+
};
|
|
385
609
|
return {
|
|
386
610
|
...defaults,
|
|
387
611
|
...config,
|
|
388
612
|
allowFrom: config.allowFrom ?? defaults.allowFrom,
|
|
613
|
+
numbers: sanitizeVoiceCallNumberRoutes(
|
|
614
|
+
(config.numbers ?? defaults.numbers) as Record<string, unknown>,
|
|
615
|
+
),
|
|
389
616
|
outbound: { ...defaults.outbound, ...config.outbound },
|
|
390
|
-
serve
|
|
617
|
+
serve,
|
|
391
618
|
tailscale: { ...defaults.tailscale, ...config.tailscale },
|
|
392
619
|
tunnel: { ...defaults.tunnel, ...config.tunnel },
|
|
393
620
|
webhookSecurity: {
|
|
@@ -397,12 +624,45 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
|
|
|
397
624
|
trustedProxyIPs:
|
|
398
625
|
config.webhookSecurity?.trustedProxyIPs ?? defaults.webhookSecurity.trustedProxyIPs,
|
|
399
626
|
},
|
|
400
|
-
streaming: {
|
|
401
|
-
|
|
627
|
+
streaming: {
|
|
628
|
+
...defaults.streaming,
|
|
629
|
+
...config.streaming,
|
|
630
|
+
provider: streamingProvider,
|
|
631
|
+
providers: streamingProviders,
|
|
632
|
+
},
|
|
633
|
+
realtime: {
|
|
634
|
+
...defaults.realtime,
|
|
635
|
+
...config.realtime,
|
|
636
|
+
provider: realtimeProvider,
|
|
637
|
+
streamPath:
|
|
638
|
+
config.realtime?.streamPath ??
|
|
639
|
+
defaultRealtimeStreamPathForServePath(serve.path ?? defaults.serve.path),
|
|
640
|
+
tools:
|
|
641
|
+
(config.realtime?.tools as RealtimeToolConfig[] | undefined) ?? defaults.realtime.tools,
|
|
642
|
+
fastContext: realtimeFastContext,
|
|
643
|
+
providers: realtimeProviders,
|
|
644
|
+
},
|
|
402
645
|
tts: normalizeVoiceCallTtsConfig(defaults.tts, config.tts),
|
|
403
646
|
};
|
|
404
647
|
}
|
|
405
648
|
|
|
649
|
+
export function resolveVoiceCallSessionKey(params: {
|
|
650
|
+
config: Pick<VoiceCallConfig, "sessionScope">;
|
|
651
|
+
callId: string;
|
|
652
|
+
phone?: string;
|
|
653
|
+
explicitSessionKey?: string;
|
|
654
|
+
}): string {
|
|
655
|
+
const explicit = params.explicitSessionKey?.trim();
|
|
656
|
+
if (explicit) {
|
|
657
|
+
return explicit;
|
|
658
|
+
}
|
|
659
|
+
if (params.config.sessionScope === "per-call") {
|
|
660
|
+
return `voice:call:${params.callId}`;
|
|
661
|
+
}
|
|
662
|
+
const normalizedPhone = params.phone?.replace(/\D/g, "");
|
|
663
|
+
return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${params.callId}`;
|
|
664
|
+
}
|
|
665
|
+
|
|
406
666
|
/**
|
|
407
667
|
* Resolves the configuration by merging environment variables into missing fields.
|
|
408
668
|
* Returns a new configuration object with environment variables applied.
|
|
@@ -420,6 +680,7 @@ export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallC
|
|
|
420
680
|
|
|
421
681
|
// Twilio
|
|
422
682
|
if (resolved.provider === "twilio") {
|
|
683
|
+
resolved.fromNumber = resolved.fromNumber ?? process.env.TWILIO_FROM_NUMBER;
|
|
423
684
|
resolved.twilio = resolved.twilio ?? {};
|
|
424
685
|
resolved.twilio.accountSid = resolved.twilio.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
|
|
425
686
|
resolved.twilio.authToken = resolved.twilio.authToken ?? process.env.TWILIO_AUTH_TOKEN;
|
|
@@ -474,7 +735,11 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
|
|
474
735
|
}
|
|
475
736
|
|
|
476
737
|
if (!config.fromNumber && config.provider !== "mock") {
|
|
477
|
-
errors.push(
|
|
738
|
+
errors.push(
|
|
739
|
+
config.provider === "twilio"
|
|
740
|
+
? "plugins.entries.voice-call.config.fromNumber is required (or set TWILIO_FROM_NUMBER env)"
|
|
741
|
+
: "plugins.entries.voice-call.config.fromNumber is required",
|
|
742
|
+
);
|
|
478
743
|
}
|
|
479
744
|
|
|
480
745
|
if (config.provider === "telnyx") {
|
|
@@ -501,7 +766,7 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
|
|
501
766
|
"plugins.entries.voice-call.config.twilio.accountSid is required (or set TWILIO_ACCOUNT_SID env)",
|
|
502
767
|
);
|
|
503
768
|
}
|
|
504
|
-
if (!config.twilio?.authToken) {
|
|
769
|
+
if (!hasConfiguredSecretInput(config.twilio?.authToken)) {
|
|
505
770
|
errors.push(
|
|
506
771
|
"plugins.entries.voice-call.config.twilio.authToken is required (or set TWILIO_AUTH_TOKEN env)",
|
|
507
772
|
);
|
|
@@ -521,5 +786,23 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
|
|
521
786
|
}
|
|
522
787
|
}
|
|
523
788
|
|
|
789
|
+
if (config.realtime.enabled && config.inboundPolicy === "disabled") {
|
|
790
|
+
errors.push(
|
|
791
|
+
'plugins.entries.voice-call.config.inboundPolicy must not be "disabled" when realtime.enabled is true',
|
|
792
|
+
);
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
if (config.realtime.enabled && config.streaming.enabled) {
|
|
796
|
+
errors.push(
|
|
797
|
+
"plugins.entries.voice-call.config.realtime.enabled and plugins.entries.voice-call.config.streaming.enabled cannot both be true",
|
|
798
|
+
);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
if (config.realtime.enabled && config.provider && config.provider !== "twilio") {
|
|
802
|
+
errors.push(
|
|
803
|
+
'plugins.entries.voice-call.config.provider must be "twilio" when realtime.enabled is true',
|
|
804
|
+
);
|
|
805
|
+
}
|
|
806
|
+
|
|
524
807
|
return { valid: errors.length === 0, errors };
|
|
525
808
|
}
|