@openclaw/voice-call 2026.3.13 → 2026.5.1-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -5
- package/api.ts +16 -0
- package/cli-metadata.ts +10 -0
- package/config-api.ts +12 -0
- package/index.test.ts +866 -0
- package/index.ts +353 -148
- package/openclaw.plugin.json +336 -157
- package/package.json +33 -5
- package/runtime-api.ts +20 -0
- package/runtime-entry.ts +1 -0
- package/setup-api.ts +47 -0
- package/src/allowlist.test.ts +18 -0
- package/src/cli.ts +533 -68
- package/src/config-compat.test.ts +120 -0
- package/src/config-compat.ts +227 -0
- package/src/config.test.ts +160 -12
- package/src/config.ts +243 -74
- package/src/core-bridge.ts +2 -147
- package/src/deep-merge.test.ts +40 -0
- package/src/gateway-continue-operation.ts +200 -0
- package/src/http-headers.ts +6 -3
- package/src/manager/context.ts +6 -5
- package/src/manager/events.test.ts +179 -19
- package/src/manager/events.ts +48 -30
- package/src/manager/lifecycle.ts +53 -0
- package/src/manager/lookup.test.ts +52 -0
- package/src/manager/outbound.test.ts +464 -0
- package/src/manager/outbound.ts +148 -55
- package/src/manager/store.ts +18 -6
- package/src/manager/timers.test.ts +129 -0
- package/src/manager/timers.ts +4 -3
- package/src/manager/twiml.test.ts +13 -0
- package/src/manager/twiml.ts +8 -0
- package/src/manager.closed-loop.test.ts +30 -12
- package/src/manager.inbound-allowlist.test.ts +77 -10
- package/src/manager.notify.test.ts +344 -20
- package/src/manager.restore.test.ts +95 -8
- package/src/manager.test-harness.ts +8 -6
- package/src/manager.ts +79 -5
- package/src/media-stream.test.ts +578 -81
- package/src/media-stream.ts +235 -54
- package/src/providers/base.ts +19 -0
- package/src/providers/mock.ts +7 -1
- package/src/providers/plivo.test.ts +50 -6
- package/src/providers/plivo.ts +14 -6
- package/src/providers/shared/call-status.ts +2 -1
- package/src/providers/shared/guarded-json-api.test.ts +106 -0
- package/src/providers/shared/guarded-json-api.ts +1 -1
- package/src/providers/telnyx.test.ts +178 -6
- package/src/providers/telnyx.ts +40 -3
- package/src/providers/twilio/api.test.ts +145 -0
- package/src/providers/twilio/api.ts +67 -16
- package/src/providers/twilio/twiml-policy.ts +6 -10
- package/src/providers/twilio/webhook.ts +1 -1
- package/src/providers/twilio.test.ts +425 -25
- package/src/providers/twilio.ts +230 -77
- package/src/providers/twilio.types.ts +17 -0
- package/src/realtime-defaults.ts +3 -0
- package/src/realtime-fast-context.test.ts +88 -0
- package/src/realtime-fast-context.ts +165 -0
- package/src/realtime-transcription.runtime.ts +4 -0
- package/src/realtime-voice.runtime.ts +5 -0
- package/src/response-generator.test.ts +277 -0
- package/src/response-generator.ts +186 -40
- package/src/response-model.test.ts +71 -0
- package/src/response-model.ts +23 -0
- package/src/runtime.test.ts +351 -0
- package/src/runtime.ts +254 -24
- package/src/telephony-audio.test.ts +61 -0
- package/src/telephony-audio.ts +1 -79
- package/src/telephony-tts.test.ts +133 -12
- package/src/telephony-tts.ts +155 -2
- package/src/test-fixtures.ts +26 -7
- package/src/tts-provider-voice.test.ts +34 -0
- package/src/tts-provider-voice.ts +21 -0
- package/src/tunnel.test.ts +166 -0
- package/src/tunnel.ts +1 -1
- package/src/types.ts +24 -37
- package/src/utils.test.ts +17 -0
- package/src/voice-mapping.test.ts +34 -0
- package/src/voice-mapping.ts +3 -2
- package/src/webhook/realtime-handler.test.ts +598 -0
- package/src/webhook/realtime-handler.ts +485 -0
- package/src/webhook/stale-call-reaper.test.ts +88 -0
- package/src/webhook/stale-call-reaper.ts +5 -0
- package/src/webhook/tailscale.test.ts +214 -0
- package/src/webhook/tailscale.ts +19 -5
- package/src/webhook-exposure.test.ts +33 -0
- package/src/webhook-exposure.ts +84 -0
- package/src/webhook-security.test.ts +172 -21
- package/src/webhook-security.ts +43 -29
- package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
- package/src/webhook.test.ts +1145 -27
- package/src/webhook.ts +513 -100
- package/src/webhook.types.ts +5 -0
- package/src/websocket-test-support.ts +72 -0
- package/tsconfig.json +16 -0
- package/CHANGELOG.md +0 -121
- package/src/providers/index.ts +0 -10
- package/src/providers/stt-openai-realtime.test.ts +0 -42
- package/src/providers/stt-openai-realtime.ts +0 -311
- package/src/providers/tts-openai.test.ts +0 -43
- package/src/providers/tts-openai.ts +0 -221
package/src/config.ts
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
|
+
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES } from "openclaw/plugin-sdk/realtime-voice";
|
|
1
2
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
} from "openclaw/plugin-sdk/
|
|
7
|
-
import { z } from "zod";
|
|
3
|
+
buildSecretInputSchema,
|
|
4
|
+
hasConfiguredSecretInput,
|
|
5
|
+
normalizeResolvedSecretInputString,
|
|
6
|
+
type SecretInput,
|
|
7
|
+
} from "openclaw/plugin-sdk/secret-input";
|
|
8
|
+
import { z } from "openclaw/plugin-sdk/zod";
|
|
9
|
+
import { TtsConfigSchema } from "../api.js";
|
|
8
10
|
import { deepMergeDefined } from "./deep-merge.js";
|
|
11
|
+
import { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js";
|
|
9
12
|
|
|
10
13
|
// -----------------------------------------------------------------------------
|
|
11
14
|
// Phone Number Validation
|
|
@@ -15,7 +18,7 @@ import { deepMergeDefined } from "./deep-merge.js";
|
|
|
15
18
|
* E.164 phone number format: +[country code][number]
|
|
16
19
|
* Examples use 555 prefix (reserved for fictional numbers)
|
|
17
20
|
*/
|
|
18
|
-
|
|
21
|
+
const E164Schema = z
|
|
19
22
|
.string()
|
|
20
23
|
.regex(/^\+[1-9]\d{1,14}$/, "Expected E.164 format, e.g. +15550001234");
|
|
21
24
|
|
|
@@ -30,14 +33,15 @@ export const E164Schema = z
|
|
|
30
33
|
* - "pairing": Unknown callers can request pairing (future)
|
|
31
34
|
* - "open": Accept all inbound calls (dangerous!)
|
|
32
35
|
*/
|
|
33
|
-
|
|
34
|
-
export type InboundPolicy = z.infer<typeof InboundPolicySchema>;
|
|
36
|
+
const InboundPolicySchema = z.enum(["disabled", "allowlist", "pairing", "open"]);
|
|
35
37
|
|
|
36
38
|
// -----------------------------------------------------------------------------
|
|
37
39
|
// Provider-Specific Configuration
|
|
38
40
|
// -----------------------------------------------------------------------------
|
|
39
41
|
|
|
40
|
-
|
|
42
|
+
const SecretInputSchema = buildSecretInputSchema();
|
|
43
|
+
|
|
44
|
+
const TelnyxConfigSchema = z
|
|
41
45
|
.object({
|
|
42
46
|
/** Telnyx API v2 key */
|
|
43
47
|
apiKey: z.string().min(1).optional(),
|
|
@@ -49,17 +53,16 @@ export const TelnyxConfigSchema = z
|
|
|
49
53
|
.strict();
|
|
50
54
|
export type TelnyxConfig = z.infer<typeof TelnyxConfigSchema>;
|
|
51
55
|
|
|
52
|
-
|
|
56
|
+
const TwilioConfigSchema = z
|
|
53
57
|
.object({
|
|
54
58
|
/** Twilio Account SID */
|
|
55
59
|
accountSid: z.string().min(1).optional(),
|
|
56
60
|
/** Twilio Auth Token */
|
|
57
|
-
authToken:
|
|
61
|
+
authToken: SecretInputSchema.optional(),
|
|
58
62
|
})
|
|
59
63
|
.strict();
|
|
60
|
-
export type TwilioConfig = z.infer<typeof TwilioConfigSchema>;
|
|
61
64
|
|
|
62
|
-
|
|
65
|
+
const PlivoConfigSchema = z
|
|
63
66
|
.object({
|
|
64
67
|
/** Plivo Auth ID (starts with MA/SA) */
|
|
65
68
|
authId: z.string().min(1).optional(),
|
|
@@ -69,29 +72,13 @@ export const PlivoConfigSchema = z
|
|
|
69
72
|
.strict();
|
|
70
73
|
export type PlivoConfig = z.infer<typeof PlivoConfigSchema>;
|
|
71
74
|
|
|
72
|
-
// -----------------------------------------------------------------------------
|
|
73
|
-
// STT/TTS Configuration
|
|
74
|
-
// -----------------------------------------------------------------------------
|
|
75
|
-
|
|
76
|
-
export const SttConfigSchema = z
|
|
77
|
-
.object({
|
|
78
|
-
/** STT provider (currently only OpenAI supported) */
|
|
79
|
-
provider: z.literal("openai").default("openai"),
|
|
80
|
-
/** Whisper model to use */
|
|
81
|
-
model: z.string().min(1).default("whisper-1"),
|
|
82
|
-
})
|
|
83
|
-
.strict()
|
|
84
|
-
.default({ provider: "openai", model: "whisper-1" });
|
|
85
|
-
export type SttConfig = z.infer<typeof SttConfigSchema>;
|
|
86
|
-
|
|
87
|
-
export { TtsAutoSchema, TtsConfigSchema, TtsModeSchema, TtsProviderSchema };
|
|
88
75
|
export type VoiceCallTtsConfig = z.infer<typeof TtsConfigSchema>;
|
|
89
76
|
|
|
90
77
|
// -----------------------------------------------------------------------------
|
|
91
78
|
// Webhook Server Configuration
|
|
92
79
|
// -----------------------------------------------------------------------------
|
|
93
80
|
|
|
94
|
-
|
|
81
|
+
const VoiceCallServeConfigSchema = z
|
|
95
82
|
.object({
|
|
96
83
|
/** Port to listen on */
|
|
97
84
|
port: z.number().int().positive().default(3334),
|
|
@@ -102,9 +89,8 @@ export const VoiceCallServeConfigSchema = z
|
|
|
102
89
|
})
|
|
103
90
|
.strict()
|
|
104
91
|
.default({ port: 3334, bind: "127.0.0.1", path: "/voice/webhook" });
|
|
105
|
-
export type VoiceCallServeConfig = z.infer<typeof VoiceCallServeConfigSchema>;
|
|
106
92
|
|
|
107
|
-
|
|
93
|
+
const VoiceCallTailscaleConfigSchema = z
|
|
108
94
|
.object({
|
|
109
95
|
/**
|
|
110
96
|
* Tailscale exposure mode:
|
|
@@ -118,13 +104,12 @@ export const VoiceCallTailscaleConfigSchema = z
|
|
|
118
104
|
})
|
|
119
105
|
.strict()
|
|
120
106
|
.default({ mode: "off", path: "/voice/webhook" });
|
|
121
|
-
export type VoiceCallTailscaleConfig = z.infer<typeof VoiceCallTailscaleConfigSchema>;
|
|
122
107
|
|
|
123
108
|
// -----------------------------------------------------------------------------
|
|
124
109
|
// Tunnel Configuration (unified ngrok/tailscale)
|
|
125
110
|
// -----------------------------------------------------------------------------
|
|
126
111
|
|
|
127
|
-
|
|
112
|
+
const VoiceCallTunnelConfigSchema = z
|
|
128
113
|
.object({
|
|
129
114
|
/**
|
|
130
115
|
* Tunnel provider:
|
|
@@ -149,13 +134,12 @@ export const VoiceCallTunnelConfigSchema = z
|
|
|
149
134
|
})
|
|
150
135
|
.strict()
|
|
151
136
|
.default({ provider: "none", allowNgrokFreeTierLoopbackBypass: false });
|
|
152
|
-
export type VoiceCallTunnelConfig = z.infer<typeof VoiceCallTunnelConfigSchema>;
|
|
153
137
|
|
|
154
138
|
// -----------------------------------------------------------------------------
|
|
155
139
|
// Webhook Security Configuration
|
|
156
140
|
// -----------------------------------------------------------------------------
|
|
157
141
|
|
|
158
|
-
|
|
142
|
+
const VoiceCallWebhookSecurityConfigSchema = z
|
|
159
143
|
.object({
|
|
160
144
|
/**
|
|
161
145
|
* Allowed hostnames for webhook URL reconstruction.
|
|
@@ -186,10 +170,10 @@ export type WebhookSecurityConfig = z.infer<typeof VoiceCallWebhookSecurityConfi
|
|
|
186
170
|
* - "notify": Deliver message and auto-hangup after delay (one-way notification)
|
|
187
171
|
* - "conversation": Stay open for back-and-forth until explicit end or timeout
|
|
188
172
|
*/
|
|
189
|
-
|
|
173
|
+
const CallModeSchema = z.enum(["notify", "conversation"]);
|
|
190
174
|
export type CallMode = z.infer<typeof CallModeSchema>;
|
|
191
175
|
|
|
192
|
-
|
|
176
|
+
const OutboundConfigSchema = z
|
|
193
177
|
.object({
|
|
194
178
|
/** Default call mode for outbound calls */
|
|
195
179
|
defaultMode: CallModeSchema.default("notify"),
|
|
@@ -198,28 +182,115 @@ export const OutboundConfigSchema = z
|
|
|
198
182
|
})
|
|
199
183
|
.strict()
|
|
200
184
|
.default({ defaultMode: "notify", notifyHangupDelaySec: 3 });
|
|
201
|
-
export type OutboundConfig = z.infer<typeof OutboundConfigSchema>;
|
|
202
185
|
|
|
203
186
|
// -----------------------------------------------------------------------------
|
|
204
|
-
//
|
|
187
|
+
// Realtime Voice Configuration
|
|
205
188
|
// -----------------------------------------------------------------------------
|
|
206
189
|
|
|
207
|
-
|
|
190
|
+
const RealtimeToolSchema = z
|
|
191
|
+
.object({
|
|
192
|
+
type: z.literal("function"),
|
|
193
|
+
name: z.string().min(1),
|
|
194
|
+
description: z.string(),
|
|
195
|
+
parameters: z.object({
|
|
196
|
+
type: z.literal("object"),
|
|
197
|
+
properties: z.record(z.string(), z.unknown()),
|
|
198
|
+
required: z.array(z.string()).optional(),
|
|
199
|
+
}),
|
|
200
|
+
})
|
|
201
|
+
.strict();
|
|
202
|
+
type RealtimeToolConfig = z.infer<typeof RealtimeToolSchema>;
|
|
203
|
+
|
|
204
|
+
const VoiceCallRealtimeProvidersConfigSchema = z
|
|
205
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
206
|
+
.default({});
|
|
207
|
+
|
|
208
|
+
const VoiceCallRealtimeToolPolicySchema = z.enum(REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES);
|
|
209
|
+
|
|
210
|
+
const VoiceCallRealtimeFastContextSourceSchema = z.enum(["memory", "sessions"]);
|
|
211
|
+
|
|
212
|
+
const VoiceCallRealtimeFastContextConfigSchema = z
|
|
213
|
+
.object({
|
|
214
|
+
/** Enable bounded memory/session lookup before the full consult agent. */
|
|
215
|
+
enabled: z.boolean().default(false),
|
|
216
|
+
/** Hard deadline for the fast context lookup. */
|
|
217
|
+
timeoutMs: z.number().int().positive().default(800),
|
|
218
|
+
/** Maximum memory/session hits to inject into the realtime tool result. */
|
|
219
|
+
maxResults: z.number().int().positive().default(3),
|
|
220
|
+
/** Indexed sources used by the fast context lookup. */
|
|
221
|
+
sources: z
|
|
222
|
+
.array(VoiceCallRealtimeFastContextSourceSchema)
|
|
223
|
+
.min(1)
|
|
224
|
+
.default(["memory", "sessions"]),
|
|
225
|
+
/** Fall back to the full agent consult when fast context has no answer. */
|
|
226
|
+
fallbackToConsult: z.boolean().default(false),
|
|
227
|
+
})
|
|
228
|
+
.strict()
|
|
229
|
+
.default({
|
|
230
|
+
enabled: false,
|
|
231
|
+
timeoutMs: 800,
|
|
232
|
+
maxResults: 3,
|
|
233
|
+
sources: ["memory", "sessions"],
|
|
234
|
+
fallbackToConsult: false,
|
|
235
|
+
});
|
|
236
|
+
export type VoiceCallRealtimeFastContextConfig = z.infer<
|
|
237
|
+
typeof VoiceCallRealtimeFastContextConfigSchema
|
|
238
|
+
>;
|
|
239
|
+
|
|
240
|
+
const VoiceCallStreamingProvidersConfigSchema = z
|
|
241
|
+
.record(z.string(), z.record(z.string(), z.unknown()))
|
|
242
|
+
.default({});
|
|
243
|
+
|
|
244
|
+
const VoiceCallRealtimeConfigSchema = z
|
|
245
|
+
.object({
|
|
246
|
+
/** Enable realtime voice-to-voice mode. */
|
|
247
|
+
enabled: z.boolean().default(false),
|
|
248
|
+
/** Provider id from registered realtime voice providers. */
|
|
249
|
+
provider: z.string().min(1).optional(),
|
|
250
|
+
/** Optional override for the local WebSocket route path. */
|
|
251
|
+
streamPath: z.string().min(1).optional(),
|
|
252
|
+
/** System instructions passed to the realtime provider. */
|
|
253
|
+
instructions: z.string().default(DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS),
|
|
254
|
+
/** Tool policy for the shared OpenClaw agent consult tool. */
|
|
255
|
+
toolPolicy: VoiceCallRealtimeToolPolicySchema.default("safe-read-only"),
|
|
256
|
+
/** Tool definitions exposed to the realtime provider. */
|
|
257
|
+
tools: z.array(RealtimeToolSchema).default([]),
|
|
258
|
+
/** Low-latency memory/session context for the consult tool. */
|
|
259
|
+
fastContext: VoiceCallRealtimeFastContextConfigSchema,
|
|
260
|
+
/** Provider-owned raw config blobs keyed by provider id. */
|
|
261
|
+
providers: VoiceCallRealtimeProvidersConfigSchema,
|
|
262
|
+
})
|
|
263
|
+
.strict()
|
|
264
|
+
.default({
|
|
265
|
+
enabled: false,
|
|
266
|
+
instructions: DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS,
|
|
267
|
+
toolPolicy: "safe-read-only",
|
|
268
|
+
tools: [],
|
|
269
|
+
fastContext: {
|
|
270
|
+
enabled: false,
|
|
271
|
+
timeoutMs: 800,
|
|
272
|
+
maxResults: 3,
|
|
273
|
+
sources: ["memory", "sessions"],
|
|
274
|
+
fallbackToConsult: false,
|
|
275
|
+
},
|
|
276
|
+
providers: {},
|
|
277
|
+
});
|
|
278
|
+
export type VoiceCallRealtimeConfig = z.infer<typeof VoiceCallRealtimeConfigSchema>;
|
|
279
|
+
|
|
280
|
+
// -----------------------------------------------------------------------------
|
|
281
|
+
// Streaming Configuration (Realtime Transcription)
|
|
282
|
+
// -----------------------------------------------------------------------------
|
|
283
|
+
|
|
284
|
+
const VoiceCallStreamingConfigSchema = z
|
|
208
285
|
.object({
|
|
209
286
|
/** Enable real-time audio streaming (requires WebSocket support) */
|
|
210
287
|
enabled: z.boolean().default(false),
|
|
211
|
-
/**
|
|
212
|
-
|
|
213
|
-
/** OpenAI API key for Realtime API (uses OPENAI_API_KEY env if not set) */
|
|
214
|
-
openaiApiKey: z.string().min(1).optional(),
|
|
215
|
-
/** OpenAI transcription model (default: gpt-4o-transcribe) */
|
|
216
|
-
sttModel: z.string().min(1).default("gpt-4o-transcribe"),
|
|
217
|
-
/** VAD silence duration in ms before considering speech ended */
|
|
218
|
-
silenceDurationMs: z.number().int().positive().default(800),
|
|
219
|
-
/** VAD threshold 0-1 (higher = less sensitive) */
|
|
220
|
-
vadThreshold: z.number().min(0).max(1).default(0.5),
|
|
288
|
+
/** Provider id from registered realtime transcription providers. */
|
|
289
|
+
provider: z.string().min(1).optional(),
|
|
221
290
|
/** WebSocket path for media stream connections */
|
|
222
291
|
streamPath: z.string().min(1).default("/voice/stream"),
|
|
292
|
+
/** Provider-owned raw config blobs keyed by provider id. */
|
|
293
|
+
providers: VoiceCallStreamingProvidersConfigSchema,
|
|
223
294
|
/**
|
|
224
295
|
* Close unauthenticated media stream sockets if no valid `start` frame arrives in time.
|
|
225
296
|
* Protects against pre-auth idle connection hold attacks.
|
|
@@ -235,17 +306,13 @@ export const VoiceCallStreamingConfigSchema = z
|
|
|
235
306
|
.strict()
|
|
236
307
|
.default({
|
|
237
308
|
enabled: false,
|
|
238
|
-
sttProvider: "openai-realtime",
|
|
239
|
-
sttModel: "gpt-4o-transcribe",
|
|
240
|
-
silenceDurationMs: 800,
|
|
241
|
-
vadThreshold: 0.5,
|
|
242
309
|
streamPath: "/voice/stream",
|
|
310
|
+
providers: {},
|
|
243
311
|
preStartTimeoutMs: 5000,
|
|
244
312
|
maxPendingConnections: 32,
|
|
245
313
|
maxPendingConnectionsPerIp: 4,
|
|
246
314
|
maxConnections: 128,
|
|
247
315
|
});
|
|
248
|
-
export type VoiceCallStreamingConfig = z.infer<typeof VoiceCallStreamingConfigSchema>;
|
|
249
316
|
|
|
250
317
|
// -----------------------------------------------------------------------------
|
|
251
318
|
// Main Voice Call Configuration
|
|
@@ -291,11 +358,10 @@ export const VoiceCallConfigSchema = z
|
|
|
291
358
|
|
|
292
359
|
/**
|
|
293
360
|
* Maximum age of a call in seconds before it is automatically reaped.
|
|
294
|
-
* Catches calls stuck
|
|
295
|
-
* never receive
|
|
296
|
-
* Default: 0 (disabled). Recommended: 120-300 for production.
|
|
361
|
+
* Catches calls stuck before answer (for example, local mock calls that
|
|
362
|
+
* never receive provider webhooks). Set to 0 to disable.
|
|
297
363
|
*/
|
|
298
|
-
staleCallReaperSeconds: z.number().int().nonnegative().default(
|
|
364
|
+
staleCallReaperSeconds: z.number().int().nonnegative().default(120),
|
|
299
365
|
|
|
300
366
|
/** Silence timeout for end-of-speech detection (ms) */
|
|
301
367
|
silenceTimeoutMs: z.number().int().positive().default(800),
|
|
@@ -312,7 +378,7 @@ export const VoiceCallConfigSchema = z
|
|
|
312
378
|
/** Webhook server configuration */
|
|
313
379
|
serve: VoiceCallServeConfigSchema,
|
|
314
380
|
|
|
315
|
-
/**
|
|
381
|
+
/** @deprecated Prefer tunnel config. */
|
|
316
382
|
tailscale: VoiceCallTailscaleConfigSchema,
|
|
317
383
|
|
|
318
384
|
/** Tunnel configuration (unified ngrok/tailscale) */
|
|
@@ -324,23 +390,26 @@ export const VoiceCallConfigSchema = z
|
|
|
324
390
|
/** Real-time audio streaming configuration */
|
|
325
391
|
streaming: VoiceCallStreamingConfigSchema,
|
|
326
392
|
|
|
393
|
+
/** Realtime voice-to-voice configuration */
|
|
394
|
+
realtime: VoiceCallRealtimeConfigSchema,
|
|
395
|
+
|
|
327
396
|
/** Public webhook URL override (if set, bypasses tunnel auto-detection) */
|
|
328
397
|
publicUrl: z.string().url().optional(),
|
|
329
398
|
|
|
330
399
|
/** Skip webhook signature verification (development only, NOT for production) */
|
|
331
400
|
skipSignatureVerification: z.boolean().default(false),
|
|
332
401
|
|
|
333
|
-
/** STT configuration */
|
|
334
|
-
stt: SttConfigSchema,
|
|
335
|
-
|
|
336
402
|
/** TTS override (deep-merges with core messages.tts) */
|
|
337
403
|
tts: TtsConfigSchema,
|
|
338
404
|
|
|
339
405
|
/** Store path for call logs */
|
|
340
406
|
store: z.string().optional(),
|
|
341
407
|
|
|
342
|
-
/**
|
|
343
|
-
|
|
408
|
+
/** Agent ID to use for voice response generation. Defaults to "main". */
|
|
409
|
+
agentId: z.string().min(1).optional(),
|
|
410
|
+
|
|
411
|
+
/** Optional model override for generating voice responses. */
|
|
412
|
+
responseModel: z.string().optional(),
|
|
344
413
|
|
|
345
414
|
/** System prompt for voice responses */
|
|
346
415
|
responseSystemPrompt: z.string().optional(),
|
|
@@ -351,13 +420,15 @@ export const VoiceCallConfigSchema = z
|
|
|
351
420
|
.strict();
|
|
352
421
|
|
|
353
422
|
export type VoiceCallConfig = z.infer<typeof VoiceCallConfigSchema>;
|
|
354
|
-
type DeepPartial<T> =
|
|
355
|
-
T
|
|
423
|
+
type DeepPartial<T> = T extends SecretInput
|
|
424
|
+
? T
|
|
425
|
+
: T extends Array<infer U>
|
|
356
426
|
? DeepPartial<U>[]
|
|
357
427
|
: T extends object
|
|
358
428
|
? { [K in keyof T]?: DeepPartial<T[K]> }
|
|
359
429
|
: T;
|
|
360
430
|
export type VoiceCallConfigInput = DeepPartial<VoiceCallConfig>;
|
|
431
|
+
const TWILIO_AUTH_TOKEN_PATH = "plugins.entries.voice-call.config.twilio.authToken";
|
|
361
432
|
|
|
362
433
|
// -----------------------------------------------------------------------------
|
|
363
434
|
// Configuration Helpers
|
|
@@ -369,6 +440,29 @@ function cloneDefaultVoiceCallConfig(): VoiceCallConfig {
|
|
|
369
440
|
return structuredClone(DEFAULT_VOICE_CALL_CONFIG);
|
|
370
441
|
}
|
|
371
442
|
|
|
443
|
+
function normalizeWebhookLikePath(pathname: string): string {
|
|
444
|
+
const trimmed = pathname.trim();
|
|
445
|
+
if (!trimmed) {
|
|
446
|
+
return "/";
|
|
447
|
+
}
|
|
448
|
+
const prefixed = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
|
|
449
|
+
if (prefixed === "/") {
|
|
450
|
+
return prefixed;
|
|
451
|
+
}
|
|
452
|
+
return prefixed.endsWith("/") ? prefixed.slice(0, -1) : prefixed;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
function defaultRealtimeStreamPathForServePath(servePath: string): string {
|
|
456
|
+
const normalized = normalizeWebhookLikePath(servePath);
|
|
457
|
+
if (normalized.endsWith("/webhook")) {
|
|
458
|
+
return `${normalized.slice(0, -"/webhook".length)}/stream/realtime`;
|
|
459
|
+
}
|
|
460
|
+
if (normalized === "/") {
|
|
461
|
+
return "/voice/stream/realtime";
|
|
462
|
+
}
|
|
463
|
+
return `${normalized}/stream/realtime`;
|
|
464
|
+
}
|
|
465
|
+
|
|
372
466
|
function normalizeVoiceCallTtsConfig(
|
|
373
467
|
defaults: VoiceCallTtsConfig,
|
|
374
468
|
overrides: DeepPartial<NonNullable<VoiceCallTtsConfig>> | undefined,
|
|
@@ -380,14 +474,50 @@ function normalizeVoiceCallTtsConfig(
|
|
|
380
474
|
return TtsConfigSchema.parse(deepMergeDefined(defaults ?? {}, overrides ?? {}));
|
|
381
475
|
}
|
|
382
476
|
|
|
477
|
+
function sanitizeVoiceCallProviderConfigs(
|
|
478
|
+
value: Record<string, Record<string, unknown> | undefined> | undefined,
|
|
479
|
+
): Record<string, Record<string, unknown>> {
|
|
480
|
+
if (!value) {
|
|
481
|
+
return {};
|
|
482
|
+
}
|
|
483
|
+
return Object.fromEntries(
|
|
484
|
+
Object.entries(value).filter(
|
|
485
|
+
(entry): entry is [string, Record<string, unknown>] => entry[1] !== undefined,
|
|
486
|
+
),
|
|
487
|
+
);
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
export function resolveTwilioAuthToken(
|
|
491
|
+
config: Pick<VoiceCallConfig, "twilio">,
|
|
492
|
+
): string | undefined {
|
|
493
|
+
return normalizeResolvedSecretInputString({
|
|
494
|
+
value: config.twilio?.authToken,
|
|
495
|
+
path: TWILIO_AUTH_TOKEN_PATH,
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
|
|
383
499
|
export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallConfig {
|
|
384
500
|
const defaults = cloneDefaultVoiceCallConfig();
|
|
501
|
+
const serve = { ...defaults.serve, ...config.serve };
|
|
502
|
+
const streamingProvider = config.streaming?.provider;
|
|
503
|
+
const streamingProviders = sanitizeVoiceCallProviderConfigs(
|
|
504
|
+
config.streaming?.providers ?? defaults.streaming.providers,
|
|
505
|
+
);
|
|
506
|
+
const realtimeProvider = config.realtime?.provider ?? defaults.realtime.provider;
|
|
507
|
+
const realtimeProviders = sanitizeVoiceCallProviderConfigs(
|
|
508
|
+
config.realtime?.providers ?? defaults.realtime.providers,
|
|
509
|
+
);
|
|
510
|
+
const realtimeFastContext = {
|
|
511
|
+
...defaults.realtime.fastContext,
|
|
512
|
+
...config.realtime?.fastContext,
|
|
513
|
+
sources: config.realtime?.fastContext?.sources ?? defaults.realtime.fastContext.sources,
|
|
514
|
+
};
|
|
385
515
|
return {
|
|
386
516
|
...defaults,
|
|
387
517
|
...config,
|
|
388
518
|
allowFrom: config.allowFrom ?? defaults.allowFrom,
|
|
389
519
|
outbound: { ...defaults.outbound, ...config.outbound },
|
|
390
|
-
serve
|
|
520
|
+
serve,
|
|
391
521
|
tailscale: { ...defaults.tailscale, ...config.tailscale },
|
|
392
522
|
tunnel: { ...defaults.tunnel, ...config.tunnel },
|
|
393
523
|
webhookSecurity: {
|
|
@@ -397,8 +527,24 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
|
|
|
397
527
|
trustedProxyIPs:
|
|
398
528
|
config.webhookSecurity?.trustedProxyIPs ?? defaults.webhookSecurity.trustedProxyIPs,
|
|
399
529
|
},
|
|
400
|
-
streaming: {
|
|
401
|
-
|
|
530
|
+
streaming: {
|
|
531
|
+
...defaults.streaming,
|
|
532
|
+
...config.streaming,
|
|
533
|
+
provider: streamingProvider,
|
|
534
|
+
providers: streamingProviders,
|
|
535
|
+
},
|
|
536
|
+
realtime: {
|
|
537
|
+
...defaults.realtime,
|
|
538
|
+
...config.realtime,
|
|
539
|
+
provider: realtimeProvider,
|
|
540
|
+
streamPath:
|
|
541
|
+
config.realtime?.streamPath ??
|
|
542
|
+
defaultRealtimeStreamPathForServePath(serve.path ?? defaults.serve.path),
|
|
543
|
+
tools:
|
|
544
|
+
(config.realtime?.tools as RealtimeToolConfig[] | undefined) ?? defaults.realtime.tools,
|
|
545
|
+
fastContext: realtimeFastContext,
|
|
546
|
+
providers: realtimeProviders,
|
|
547
|
+
},
|
|
402
548
|
tts: normalizeVoiceCallTtsConfig(defaults.tts, config.tts),
|
|
403
549
|
};
|
|
404
550
|
}
|
|
@@ -420,6 +566,7 @@ export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallC
|
|
|
420
566
|
|
|
421
567
|
// Twilio
|
|
422
568
|
if (resolved.provider === "twilio") {
|
|
569
|
+
resolved.fromNumber = resolved.fromNumber ?? process.env.TWILIO_FROM_NUMBER;
|
|
423
570
|
resolved.twilio = resolved.twilio ?? {};
|
|
424
571
|
resolved.twilio.accountSid = resolved.twilio.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
|
|
425
572
|
resolved.twilio.authToken = resolved.twilio.authToken ?? process.env.TWILIO_AUTH_TOKEN;
|
|
@@ -474,7 +621,11 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
|
|
474
621
|
}
|
|
475
622
|
|
|
476
623
|
if (!config.fromNumber && config.provider !== "mock") {
|
|
477
|
-
errors.push(
|
|
624
|
+
errors.push(
|
|
625
|
+
config.provider === "twilio"
|
|
626
|
+
? "plugins.entries.voice-call.config.fromNumber is required (or set TWILIO_FROM_NUMBER env)"
|
|
627
|
+
: "plugins.entries.voice-call.config.fromNumber is required",
|
|
628
|
+
);
|
|
478
629
|
}
|
|
479
630
|
|
|
480
631
|
if (config.provider === "telnyx") {
|
|
@@ -501,7 +652,7 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
|
|
501
652
|
"plugins.entries.voice-call.config.twilio.accountSid is required (or set TWILIO_ACCOUNT_SID env)",
|
|
502
653
|
);
|
|
503
654
|
}
|
|
504
|
-
if (!config.twilio?.authToken) {
|
|
655
|
+
if (!hasConfiguredSecretInput(config.twilio?.authToken)) {
|
|
505
656
|
errors.push(
|
|
506
657
|
"plugins.entries.voice-call.config.twilio.authToken is required (or set TWILIO_AUTH_TOKEN env)",
|
|
507
658
|
);
|
|
@@ -521,5 +672,23 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
|
|
521
672
|
}
|
|
522
673
|
}
|
|
523
674
|
|
|
675
|
+
if (config.realtime.enabled && config.inboundPolicy === "disabled") {
|
|
676
|
+
errors.push(
|
|
677
|
+
'plugins.entries.voice-call.config.inboundPolicy must not be "disabled" when realtime.enabled is true',
|
|
678
|
+
);
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
if (config.realtime.enabled && config.streaming.enabled) {
|
|
682
|
+
errors.push(
|
|
683
|
+
"plugins.entries.voice-call.config.realtime.enabled and plugins.entries.voice-call.config.streaming.enabled cannot both be true",
|
|
684
|
+
);
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
if (config.realtime.enabled && config.provider && config.provider !== "twilio") {
|
|
688
|
+
errors.push(
|
|
689
|
+
'plugins.entries.voice-call.config.provider must be "twilio" when realtime.enabled is true',
|
|
690
|
+
);
|
|
691
|
+
}
|
|
692
|
+
|
|
524
693
|
return { valid: errors.length === 0, errors };
|
|
525
694
|
}
|
package/src/core-bridge.ts
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
1
|
+
import type { OpenClawPluginApi } from "../api.js";
|
|
4
2
|
import type { VoiceCallTtsConfig } from "./config.js";
|
|
5
3
|
|
|
6
4
|
export type CoreConfig = {
|
|
@@ -13,147 +11,4 @@ export type CoreConfig = {
|
|
|
13
11
|
[key: string]: unknown;
|
|
14
12
|
};
|
|
15
13
|
|
|
16
|
-
type CoreAgentDeps =
|
|
17
|
-
resolveAgentDir: (cfg: CoreConfig, agentId: string) => string;
|
|
18
|
-
resolveAgentWorkspaceDir: (cfg: CoreConfig, agentId: string) => string;
|
|
19
|
-
resolveAgentIdentity: (
|
|
20
|
-
cfg: CoreConfig,
|
|
21
|
-
agentId: string,
|
|
22
|
-
) => { name?: string | null } | null | undefined;
|
|
23
|
-
resolveThinkingDefault: (params: {
|
|
24
|
-
cfg: CoreConfig;
|
|
25
|
-
provider?: string;
|
|
26
|
-
model?: string;
|
|
27
|
-
}) => string;
|
|
28
|
-
runEmbeddedPiAgent: (params: {
|
|
29
|
-
sessionId: string;
|
|
30
|
-
sessionKey?: string;
|
|
31
|
-
messageProvider?: string;
|
|
32
|
-
sessionFile: string;
|
|
33
|
-
workspaceDir: string;
|
|
34
|
-
config?: CoreConfig;
|
|
35
|
-
prompt: string;
|
|
36
|
-
provider?: string;
|
|
37
|
-
model?: string;
|
|
38
|
-
thinkLevel?: string;
|
|
39
|
-
verboseLevel?: string;
|
|
40
|
-
timeoutMs: number;
|
|
41
|
-
runId: string;
|
|
42
|
-
lane?: string;
|
|
43
|
-
extraSystemPrompt?: string;
|
|
44
|
-
agentDir?: string;
|
|
45
|
-
}) => Promise<{
|
|
46
|
-
payloads?: Array<{ text?: string; isError?: boolean }>;
|
|
47
|
-
meta?: { aborted?: boolean };
|
|
48
|
-
}>;
|
|
49
|
-
resolveAgentTimeoutMs: (opts: { cfg: CoreConfig }) => number;
|
|
50
|
-
ensureAgentWorkspace: (params?: { dir: string }) => Promise<void>;
|
|
51
|
-
resolveStorePath: (store?: string, opts?: { agentId?: string }) => string;
|
|
52
|
-
loadSessionStore: (storePath: string) => Record<string, unknown>;
|
|
53
|
-
saveSessionStore: (storePath: string, store: Record<string, unknown>) => Promise<void>;
|
|
54
|
-
resolveSessionFilePath: (
|
|
55
|
-
sessionId: string,
|
|
56
|
-
entry: unknown,
|
|
57
|
-
opts?: { agentId?: string },
|
|
58
|
-
) => string;
|
|
59
|
-
DEFAULT_MODEL: string;
|
|
60
|
-
DEFAULT_PROVIDER: string;
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
let coreRootCache: string | null = null;
|
|
64
|
-
let coreDepsPromise: Promise<CoreAgentDeps> | null = null;
|
|
65
|
-
|
|
66
|
-
function findPackageRoot(startDir: string, name: string): string | null {
|
|
67
|
-
let dir = startDir;
|
|
68
|
-
for (;;) {
|
|
69
|
-
const pkgPath = path.join(dir, "package.json");
|
|
70
|
-
try {
|
|
71
|
-
if (fs.existsSync(pkgPath)) {
|
|
72
|
-
const raw = fs.readFileSync(pkgPath, "utf8");
|
|
73
|
-
const pkg = JSON.parse(raw) as { name?: string };
|
|
74
|
-
if (pkg.name === name) {
|
|
75
|
-
return dir;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
} catch {
|
|
79
|
-
// ignore parse errors and keep walking
|
|
80
|
-
}
|
|
81
|
-
const parent = path.dirname(dir);
|
|
82
|
-
if (parent === dir) {
|
|
83
|
-
return null;
|
|
84
|
-
}
|
|
85
|
-
dir = parent;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function resolveOpenClawRoot(): string {
|
|
90
|
-
if (coreRootCache) {
|
|
91
|
-
return coreRootCache;
|
|
92
|
-
}
|
|
93
|
-
const override = process.env.OPENCLAW_ROOT?.trim();
|
|
94
|
-
if (override) {
|
|
95
|
-
coreRootCache = override;
|
|
96
|
-
return override;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
const candidates = new Set<string>();
|
|
100
|
-
if (process.argv[1]) {
|
|
101
|
-
candidates.add(path.dirname(process.argv[1]));
|
|
102
|
-
}
|
|
103
|
-
candidates.add(process.cwd());
|
|
104
|
-
try {
|
|
105
|
-
const urlPath = fileURLToPath(import.meta.url);
|
|
106
|
-
candidates.add(path.dirname(urlPath));
|
|
107
|
-
} catch {
|
|
108
|
-
// ignore
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
for (const start of candidates) {
|
|
112
|
-
for (const name of ["openclaw"]) {
|
|
113
|
-
const found = findPackageRoot(start, name);
|
|
114
|
-
if (found) {
|
|
115
|
-
coreRootCache = found;
|
|
116
|
-
return found;
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
throw new Error("Unable to resolve core root. Set OPENCLAW_ROOT to the package root.");
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
async function importCoreExtensionAPI(): Promise<{
|
|
125
|
-
resolveAgentDir: CoreAgentDeps["resolveAgentDir"];
|
|
126
|
-
resolveAgentWorkspaceDir: CoreAgentDeps["resolveAgentWorkspaceDir"];
|
|
127
|
-
DEFAULT_MODEL: string;
|
|
128
|
-
DEFAULT_PROVIDER: string;
|
|
129
|
-
resolveAgentIdentity: CoreAgentDeps["resolveAgentIdentity"];
|
|
130
|
-
resolveThinkingDefault: CoreAgentDeps["resolveThinkingDefault"];
|
|
131
|
-
runEmbeddedPiAgent: CoreAgentDeps["runEmbeddedPiAgent"];
|
|
132
|
-
resolveAgentTimeoutMs: CoreAgentDeps["resolveAgentTimeoutMs"];
|
|
133
|
-
ensureAgentWorkspace: CoreAgentDeps["ensureAgentWorkspace"];
|
|
134
|
-
resolveStorePath: CoreAgentDeps["resolveStorePath"];
|
|
135
|
-
loadSessionStore: CoreAgentDeps["loadSessionStore"];
|
|
136
|
-
saveSessionStore: CoreAgentDeps["saveSessionStore"];
|
|
137
|
-
resolveSessionFilePath: CoreAgentDeps["resolveSessionFilePath"];
|
|
138
|
-
}> {
|
|
139
|
-
// Do not import any other module. You can't touch this or you will be fired.
|
|
140
|
-
const distPath = path.join(resolveOpenClawRoot(), "dist", "extensionAPI.js");
|
|
141
|
-
if (!fs.existsSync(distPath)) {
|
|
142
|
-
throw new Error(
|
|
143
|
-
`Missing core module at ${distPath}. Run \`pnpm build\` or install the official package.`,
|
|
144
|
-
);
|
|
145
|
-
}
|
|
146
|
-
return await import(pathToFileURL(distPath).href);
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
export async function loadCoreAgentDeps(): Promise<CoreAgentDeps> {
|
|
150
|
-
if (coreDepsPromise) {
|
|
151
|
-
return coreDepsPromise;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
coreDepsPromise = (async () => {
|
|
155
|
-
return await importCoreExtensionAPI();
|
|
156
|
-
})();
|
|
157
|
-
|
|
158
|
-
return coreDepsPromise;
|
|
159
|
-
}
|
|
14
|
+
export type CoreAgentDeps = OpenClawPluginApi["runtime"]["agent"];
|