@clawdbot/voice-call 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/README.md +107 -0
- package/index.ts +477 -0
- package/package.json +14 -0
- package/src/cli.ts +297 -0
- package/src/config.ts +355 -0
- package/src/core-bridge.ts +190 -0
- package/src/manager.ts +846 -0
- package/src/media-stream.ts +279 -0
- package/src/providers/base.ts +67 -0
- package/src/providers/index.ts +9 -0
- package/src/providers/mock.ts +168 -0
- package/src/providers/stt-openai-realtime.ts +303 -0
- package/src/providers/telnyx.ts +364 -0
- package/src/providers/tts-openai.ts +264 -0
- package/src/providers/twilio.ts +537 -0
- package/src/response-generator.ts +171 -0
- package/src/runtime.ts +194 -0
- package/src/tunnel.ts +330 -0
- package/src/types.ts +272 -0
- package/src/utils.ts +12 -0
- package/src/voice-mapping.ts +65 -0
- package/src/webhook-security.ts +197 -0
- package/src/webhook.ts +480 -0
package/src/cli.ts
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import os from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
|
|
5
|
+
import type { Command } from "commander";
|
|
6
|
+
|
|
7
|
+
import type { VoiceCallConfig } from "./config.js";
|
|
8
|
+
import type { VoiceCallRuntime } from "./runtime.js";
|
|
9
|
+
import { resolveUserPath } from "./utils.js";
|
|
10
|
+
import {
|
|
11
|
+
cleanupTailscaleExposureRoute,
|
|
12
|
+
getTailscaleSelfInfo,
|
|
13
|
+
setupTailscaleExposureRoute,
|
|
14
|
+
} from "./webhook.js";
|
|
15
|
+
|
|
16
|
+
type Logger = {
|
|
17
|
+
info: (message: string) => void;
|
|
18
|
+
warn: (message: string) => void;
|
|
19
|
+
error: (message: string) => void;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
function resolveMode(input: string): "off" | "serve" | "funnel" {
|
|
23
|
+
const raw = input.trim().toLowerCase();
|
|
24
|
+
if (raw === "serve" || raw === "off") return raw;
|
|
25
|
+
return "funnel";
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function resolveDefaultStorePath(config: VoiceCallConfig): string {
|
|
29
|
+
const base =
|
|
30
|
+
config.store?.trim() || path.join(os.homedir(), "clawd", "voice-calls");
|
|
31
|
+
return path.join(resolveUserPath(base), "calls.jsonl");
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function sleep(ms: number): Promise<void> {
|
|
35
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function registerVoiceCallCli(params: {
|
|
39
|
+
program: Command;
|
|
40
|
+
config: VoiceCallConfig;
|
|
41
|
+
ensureRuntime: () => Promise<VoiceCallRuntime>;
|
|
42
|
+
logger: Logger;
|
|
43
|
+
}) {
|
|
44
|
+
const { program, config, ensureRuntime, logger } = params;
|
|
45
|
+
const root = program.command("voicecall").description("Voice call utilities");
|
|
46
|
+
|
|
47
|
+
root
|
|
48
|
+
.command("call")
|
|
49
|
+
.description("Initiate an outbound voice call")
|
|
50
|
+
.requiredOption(
|
|
51
|
+
"-m, --message <text>",
|
|
52
|
+
"Message to speak when call connects",
|
|
53
|
+
)
|
|
54
|
+
.option(
|
|
55
|
+
"-t, --to <phone>",
|
|
56
|
+
"Phone number to call (E.164 format, uses config toNumber if not set)",
|
|
57
|
+
)
|
|
58
|
+
.option(
|
|
59
|
+
"--mode <mode>",
|
|
60
|
+
"Call mode: notify (hangup after message) or conversation (stay open)",
|
|
61
|
+
"conversation",
|
|
62
|
+
)
|
|
63
|
+
.action(
|
|
64
|
+
async (options: { message: string; to?: string; mode?: string }) => {
|
|
65
|
+
const rt = await ensureRuntime();
|
|
66
|
+
const to = options.to ?? rt.config.toNumber;
|
|
67
|
+
if (!to) {
|
|
68
|
+
throw new Error("Missing --to and no toNumber configured");
|
|
69
|
+
}
|
|
70
|
+
const result = await rt.manager.initiateCall(to, undefined, {
|
|
71
|
+
message: options.message,
|
|
72
|
+
mode:
|
|
73
|
+
options.mode === "notify" || options.mode === "conversation"
|
|
74
|
+
? options.mode
|
|
75
|
+
: undefined,
|
|
76
|
+
});
|
|
77
|
+
if (!result.success) {
|
|
78
|
+
throw new Error(result.error || "initiate failed");
|
|
79
|
+
}
|
|
80
|
+
// eslint-disable-next-line no-console
|
|
81
|
+
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
|
82
|
+
},
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
root
|
|
86
|
+
.command("start")
|
|
87
|
+
.description("Alias for voicecall call")
|
|
88
|
+
.requiredOption("--to <phone>", "Phone number to call")
|
|
89
|
+
.option("--message <text>", "Message to speak when call connects")
|
|
90
|
+
.option(
|
|
91
|
+
"--mode <mode>",
|
|
92
|
+
"Call mode: notify (hangup after message) or conversation (stay open)",
|
|
93
|
+
"conversation",
|
|
94
|
+
)
|
|
95
|
+
.action(
|
|
96
|
+
async (options: { to: string; message?: string; mode?: string }) => {
|
|
97
|
+
const rt = await ensureRuntime();
|
|
98
|
+
const result = await rt.manager.initiateCall(options.to, undefined, {
|
|
99
|
+
message: options.message,
|
|
100
|
+
mode:
|
|
101
|
+
options.mode === "notify" || options.mode === "conversation"
|
|
102
|
+
? options.mode
|
|
103
|
+
: undefined,
|
|
104
|
+
});
|
|
105
|
+
if (!result.success) {
|
|
106
|
+
throw new Error(result.error || "initiate failed");
|
|
107
|
+
}
|
|
108
|
+
// eslint-disable-next-line no-console
|
|
109
|
+
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
|
110
|
+
},
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
root
|
|
114
|
+
.command("continue")
|
|
115
|
+
.description("Speak a message and wait for a response")
|
|
116
|
+
.requiredOption("--call-id <id>", "Call ID")
|
|
117
|
+
.requiredOption("--message <text>", "Message to speak")
|
|
118
|
+
.action(async (options: { callId: string; message: string }) => {
|
|
119
|
+
const rt = await ensureRuntime();
|
|
120
|
+
const result = await rt.manager.continueCall(
|
|
121
|
+
options.callId,
|
|
122
|
+
options.message,
|
|
123
|
+
);
|
|
124
|
+
if (!result.success) {
|
|
125
|
+
throw new Error(result.error || "continue failed");
|
|
126
|
+
}
|
|
127
|
+
// eslint-disable-next-line no-console
|
|
128
|
+
console.log(JSON.stringify(result, null, 2));
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
root
|
|
132
|
+
.command("speak")
|
|
133
|
+
.description("Speak a message without waiting for response")
|
|
134
|
+
.requiredOption("--call-id <id>", "Call ID")
|
|
135
|
+
.requiredOption("--message <text>", "Message to speak")
|
|
136
|
+
.action(async (options: { callId: string; message: string }) => {
|
|
137
|
+
const rt = await ensureRuntime();
|
|
138
|
+
const result = await rt.manager.speak(options.callId, options.message);
|
|
139
|
+
if (!result.success) {
|
|
140
|
+
throw new Error(result.error || "speak failed");
|
|
141
|
+
}
|
|
142
|
+
// eslint-disable-next-line no-console
|
|
143
|
+
console.log(JSON.stringify(result, null, 2));
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
root
|
|
147
|
+
.command("end")
|
|
148
|
+
.description("Hang up an active call")
|
|
149
|
+
.requiredOption("--call-id <id>", "Call ID")
|
|
150
|
+
.action(async (options: { callId: string }) => {
|
|
151
|
+
const rt = await ensureRuntime();
|
|
152
|
+
const result = await rt.manager.endCall(options.callId);
|
|
153
|
+
if (!result.success) {
|
|
154
|
+
throw new Error(result.error || "end failed");
|
|
155
|
+
}
|
|
156
|
+
// eslint-disable-next-line no-console
|
|
157
|
+
console.log(JSON.stringify(result, null, 2));
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
root
|
|
161
|
+
.command("status")
|
|
162
|
+
.description("Show call status")
|
|
163
|
+
.requiredOption("--call-id <id>", "Call ID")
|
|
164
|
+
.action(async (options: { callId: string }) => {
|
|
165
|
+
const rt = await ensureRuntime();
|
|
166
|
+
const call = rt.manager.getCall(options.callId);
|
|
167
|
+
// eslint-disable-next-line no-console
|
|
168
|
+
console.log(JSON.stringify(call ?? { found: false }, null, 2));
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
root
|
|
172
|
+
.command("tail")
|
|
173
|
+
.description(
|
|
174
|
+
"Tail voice-call JSONL logs (prints new lines; useful during provider tests)",
|
|
175
|
+
)
|
|
176
|
+
.option("--file <path>", "Path to calls.jsonl", resolveDefaultStorePath(config))
|
|
177
|
+
.option("--since <n>", "Print last N lines first", "25")
|
|
178
|
+
.option("--poll <ms>", "Poll interval in ms", "250")
|
|
179
|
+
.action(
|
|
180
|
+
async (options: { file: string; since?: string; poll?: string }) => {
|
|
181
|
+
const file = options.file;
|
|
182
|
+
const since = Math.max(0, Number(options.since ?? 0));
|
|
183
|
+
const pollMs = Math.max(50, Number(options.poll ?? 250));
|
|
184
|
+
|
|
185
|
+
if (!fs.existsSync(file)) {
|
|
186
|
+
logger.error(`No log file at ${file}`);
|
|
187
|
+
process.exit(1);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const initial = fs.readFileSync(file, "utf8");
|
|
191
|
+
const lines = initial.split("\n").filter(Boolean);
|
|
192
|
+
for (const line of lines.slice(Math.max(0, lines.length - since))) {
|
|
193
|
+
// eslint-disable-next-line no-console
|
|
194
|
+
console.log(line);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
let offset = Buffer.byteLength(initial, "utf8");
|
|
198
|
+
|
|
199
|
+
for (;;) {
|
|
200
|
+
try {
|
|
201
|
+
const stat = fs.statSync(file);
|
|
202
|
+
if (stat.size < offset) {
|
|
203
|
+
offset = 0;
|
|
204
|
+
}
|
|
205
|
+
if (stat.size > offset) {
|
|
206
|
+
const fd = fs.openSync(file, "r");
|
|
207
|
+
try {
|
|
208
|
+
const buf = Buffer.alloc(stat.size - offset);
|
|
209
|
+
fs.readSync(fd, buf, 0, buf.length, offset);
|
|
210
|
+
offset = stat.size;
|
|
211
|
+
const text = buf.toString("utf8");
|
|
212
|
+
for (const line of text.split("\n").filter(Boolean)) {
|
|
213
|
+
// eslint-disable-next-line no-console
|
|
214
|
+
console.log(line);
|
|
215
|
+
}
|
|
216
|
+
} finally {
|
|
217
|
+
fs.closeSync(fd);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
} catch {
|
|
221
|
+
// ignore and retry
|
|
222
|
+
}
|
|
223
|
+
await sleep(pollMs);
|
|
224
|
+
}
|
|
225
|
+
},
|
|
226
|
+
);
|
|
227
|
+
|
|
228
|
+
root
|
|
229
|
+
.command("expose")
|
|
230
|
+
.description("Enable/disable Tailscale serve/funnel for the webhook")
|
|
231
|
+
.option("--mode <mode>", "off | serve (tailnet) | funnel (public)", "funnel")
|
|
232
|
+
.option(
|
|
233
|
+
"--path <path>",
|
|
234
|
+
"Tailscale path to expose (recommend matching serve.path)",
|
|
235
|
+
)
|
|
236
|
+
.option("--port <port>", "Local webhook port")
|
|
237
|
+
.option("--serve-path <path>", "Local webhook path")
|
|
238
|
+
.action(
|
|
239
|
+
async (options: {
|
|
240
|
+
mode?: string;
|
|
241
|
+
port?: string;
|
|
242
|
+
path?: string;
|
|
243
|
+
servePath?: string;
|
|
244
|
+
}) => {
|
|
245
|
+
const mode = resolveMode(options.mode ?? "funnel");
|
|
246
|
+
const servePort = Number(options.port ?? config.serve.port ?? 3334);
|
|
247
|
+
const servePath = String(
|
|
248
|
+
options.servePath ?? config.serve.path ?? "/voice/webhook",
|
|
249
|
+
);
|
|
250
|
+
const tsPath = String(
|
|
251
|
+
options.path ?? config.tailscale?.path ?? servePath,
|
|
252
|
+
);
|
|
253
|
+
|
|
254
|
+
const localUrl = `http://127.0.0.1:${servePort}`;
|
|
255
|
+
|
|
256
|
+
if (mode === "off") {
|
|
257
|
+
await cleanupTailscaleExposureRoute({ mode: "serve", path: tsPath });
|
|
258
|
+
await cleanupTailscaleExposureRoute({ mode: "funnel", path: tsPath });
|
|
259
|
+
// eslint-disable-next-line no-console
|
|
260
|
+
console.log(JSON.stringify({ ok: true, mode: "off", path: tsPath }, null, 2));
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const publicUrl = await setupTailscaleExposureRoute({
|
|
265
|
+
mode,
|
|
266
|
+
path: tsPath,
|
|
267
|
+
localUrl,
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
const tsInfo = publicUrl ? null : await getTailscaleSelfInfo();
|
|
271
|
+
const enableUrl = tsInfo?.nodeId
|
|
272
|
+
? `https://login.tailscale.com/f/${mode}?node=${tsInfo.nodeId}`
|
|
273
|
+
: null;
|
|
274
|
+
|
|
275
|
+
// eslint-disable-next-line no-console
|
|
276
|
+
console.log(
|
|
277
|
+
JSON.stringify(
|
|
278
|
+
{
|
|
279
|
+
ok: Boolean(publicUrl),
|
|
280
|
+
mode,
|
|
281
|
+
path: tsPath,
|
|
282
|
+
localUrl,
|
|
283
|
+
publicUrl,
|
|
284
|
+
hint: publicUrl
|
|
285
|
+
? undefined
|
|
286
|
+
: {
|
|
287
|
+
note: "Tailscale serve/funnel may be disabled on this tailnet (or require admin enable).",
|
|
288
|
+
enableUrl,
|
|
289
|
+
},
|
|
290
|
+
},
|
|
291
|
+
null,
|
|
292
|
+
2,
|
|
293
|
+
),
|
|
294
|
+
);
|
|
295
|
+
},
|
|
296
|
+
);
|
|
297
|
+
}
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
// -----------------------------------------------------------------------------
|
|
4
|
+
// Phone Number Validation
|
|
5
|
+
// -----------------------------------------------------------------------------
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* E.164 phone number format: +[country code][number]
|
|
9
|
+
* Examples use 555 prefix (reserved for fictional numbers)
|
|
10
|
+
*/
|
|
11
|
+
export const E164Schema = z
|
|
12
|
+
.string()
|
|
13
|
+
.regex(/^\+[1-9]\d{1,14}$/, "Expected E.164 format, e.g. +15550001234");
|
|
14
|
+
|
|
15
|
+
// -----------------------------------------------------------------------------
|
|
16
|
+
// Inbound Policy
|
|
17
|
+
// -----------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Controls how inbound calls are handled:
|
|
21
|
+
* - "disabled": Block all inbound calls (outbound only)
|
|
22
|
+
* - "allowlist": Only accept calls from numbers in allowFrom
|
|
23
|
+
* - "pairing": Unknown callers can request pairing (future)
|
|
24
|
+
* - "open": Accept all inbound calls (dangerous!)
|
|
25
|
+
*/
|
|
26
|
+
export const InboundPolicySchema = z.enum([
|
|
27
|
+
"disabled",
|
|
28
|
+
"allowlist",
|
|
29
|
+
"pairing",
|
|
30
|
+
"open",
|
|
31
|
+
]);
|
|
32
|
+
export type InboundPolicy = z.infer<typeof InboundPolicySchema>;
|
|
33
|
+
|
|
34
|
+
// -----------------------------------------------------------------------------
|
|
35
|
+
// Provider-Specific Configuration
|
|
36
|
+
// -----------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
export const TelnyxConfigSchema = z.object({
|
|
39
|
+
/** Telnyx API v2 key */
|
|
40
|
+
apiKey: z.string().min(1).optional(),
|
|
41
|
+
/** Telnyx connection ID (from Call Control app) */
|
|
42
|
+
connectionId: z.string().min(1).optional(),
|
|
43
|
+
/** Public key for webhook signature verification */
|
|
44
|
+
publicKey: z.string().min(1).optional(),
|
|
45
|
+
});
|
|
46
|
+
export type TelnyxConfig = z.infer<typeof TelnyxConfigSchema>;
|
|
47
|
+
|
|
48
|
+
export const TwilioConfigSchema = z.object({
|
|
49
|
+
/** Twilio Account SID */
|
|
50
|
+
accountSid: z.string().min(1).optional(),
|
|
51
|
+
/** Twilio Auth Token */
|
|
52
|
+
authToken: z.string().min(1).optional(),
|
|
53
|
+
});
|
|
54
|
+
export type TwilioConfig = z.infer<typeof TwilioConfigSchema>;
|
|
55
|
+
|
|
56
|
+
// -----------------------------------------------------------------------------
|
|
57
|
+
// STT/TTS Configuration
|
|
58
|
+
// -----------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
export const SttConfigSchema = z
|
|
61
|
+
.object({
|
|
62
|
+
/** STT provider (currently only OpenAI supported) */
|
|
63
|
+
provider: z.literal("openai").default("openai"),
|
|
64
|
+
/** Whisper model to use */
|
|
65
|
+
model: z.string().min(1).default("whisper-1"),
|
|
66
|
+
})
|
|
67
|
+
.default({ provider: "openai", model: "whisper-1" });
|
|
68
|
+
export type SttConfig = z.infer<typeof SttConfigSchema>;
|
|
69
|
+
|
|
70
|
+
export const TtsConfigSchema = z
|
|
71
|
+
.object({
|
|
72
|
+
/** TTS provider (currently only OpenAI supported) */
|
|
73
|
+
provider: z.literal("openai").default("openai"),
|
|
74
|
+
/**
|
|
75
|
+
* TTS model to use:
|
|
76
|
+
* - gpt-4o-mini-tts: newest, supports instructions for tone/style control (recommended)
|
|
77
|
+
* - tts-1: lower latency
|
|
78
|
+
* - tts-1-hd: higher quality
|
|
79
|
+
*/
|
|
80
|
+
model: z.string().min(1).default("gpt-4o-mini-tts"),
|
|
81
|
+
/**
|
|
82
|
+
* Voice ID. For best quality, use marin or cedar.
|
|
83
|
+
* All voices: alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, shimmer, verse, marin, cedar
|
|
84
|
+
*/
|
|
85
|
+
voice: z.string().min(1).default("coral"),
|
|
86
|
+
/**
|
|
87
|
+
* Instructions for speech style (only works with gpt-4o-mini-tts).
|
|
88
|
+
* Examples: "Speak in a cheerful tone", "Talk like a sympathetic customer service agent"
|
|
89
|
+
*/
|
|
90
|
+
instructions: z.string().optional(),
|
|
91
|
+
})
|
|
92
|
+
.default({ provider: "openai", model: "gpt-4o-mini-tts", voice: "coral" });
|
|
93
|
+
export type TtsConfig = z.infer<typeof TtsConfigSchema>;
|
|
94
|
+
|
|
95
|
+
// -----------------------------------------------------------------------------
|
|
96
|
+
// Webhook Server Configuration
|
|
97
|
+
// -----------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
export const VoiceCallServeConfigSchema = z
|
|
100
|
+
.object({
|
|
101
|
+
/** Port to listen on */
|
|
102
|
+
port: z.number().int().positive().default(3334),
|
|
103
|
+
/** Bind address */
|
|
104
|
+
bind: z.string().default("127.0.0.1"),
|
|
105
|
+
/** Webhook path */
|
|
106
|
+
path: z.string().min(1).default("/voice/webhook"),
|
|
107
|
+
})
|
|
108
|
+
.default({ port: 3334, bind: "127.0.0.1", path: "/voice/webhook" });
|
|
109
|
+
export type VoiceCallServeConfig = z.infer<typeof VoiceCallServeConfigSchema>;
|
|
110
|
+
|
|
111
|
+
export const VoiceCallTailscaleConfigSchema = z
|
|
112
|
+
.object({
|
|
113
|
+
/**
|
|
114
|
+
* Tailscale exposure mode:
|
|
115
|
+
* - "off": No Tailscale exposure
|
|
116
|
+
* - "serve": Tailscale serve (private to tailnet)
|
|
117
|
+
* - "funnel": Tailscale funnel (public HTTPS)
|
|
118
|
+
*/
|
|
119
|
+
mode: z.enum(["off", "serve", "funnel"]).default("off"),
|
|
120
|
+
/** Path for Tailscale serve/funnel (should usually match serve.path) */
|
|
121
|
+
path: z.string().min(1).default("/voice/webhook"),
|
|
122
|
+
})
|
|
123
|
+
.default({ mode: "off", path: "/voice/webhook" });
|
|
124
|
+
export type VoiceCallTailscaleConfig = z.infer<
|
|
125
|
+
typeof VoiceCallTailscaleConfigSchema
|
|
126
|
+
>;
|
|
127
|
+
|
|
128
|
+
// -----------------------------------------------------------------------------
|
|
129
|
+
// Tunnel Configuration (unified ngrok/tailscale)
|
|
130
|
+
// -----------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
export const VoiceCallTunnelConfigSchema = z
|
|
133
|
+
.object({
|
|
134
|
+
/**
|
|
135
|
+
* Tunnel provider:
|
|
136
|
+
* - "none": No tunnel (use publicUrl if set, or manual setup)
|
|
137
|
+
* - "ngrok": Use ngrok for public HTTPS tunnel
|
|
138
|
+
* - "tailscale-serve": Tailscale serve (private to tailnet)
|
|
139
|
+
* - "tailscale-funnel": Tailscale funnel (public HTTPS)
|
|
140
|
+
*/
|
|
141
|
+
provider: z
|
|
142
|
+
.enum(["none", "ngrok", "tailscale-serve", "tailscale-funnel"])
|
|
143
|
+
.default("none"),
|
|
144
|
+
/** ngrok auth token (optional, enables longer sessions and more features) */
|
|
145
|
+
ngrokAuthToken: z.string().min(1).optional(),
|
|
146
|
+
/** ngrok custom domain (paid feature, e.g., "myapp.ngrok.io") */
|
|
147
|
+
ngrokDomain: z.string().min(1).optional(),
|
|
148
|
+
/**
|
|
149
|
+
* Allow ngrok free tier compatibility mode.
|
|
150
|
+
* When true, signature verification failures on ngrok-free.app URLs
|
|
151
|
+
* will be logged but allowed through. Less secure, but necessary
|
|
152
|
+
* for ngrok free tier which may modify URLs.
|
|
153
|
+
*/
|
|
154
|
+
allowNgrokFreeTier: z.boolean().default(true),
|
|
155
|
+
})
|
|
156
|
+
.default({ provider: "none", allowNgrokFreeTier: true });
|
|
157
|
+
export type VoiceCallTunnelConfig = z.infer<typeof VoiceCallTunnelConfigSchema>;
|
|
158
|
+
|
|
159
|
+
// -----------------------------------------------------------------------------
|
|
160
|
+
// Outbound Call Configuration
|
|
161
|
+
// -----------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Call mode determines how outbound calls behave:
|
|
165
|
+
* - "notify": Deliver message and auto-hangup after delay (one-way notification)
|
|
166
|
+
* - "conversation": Stay open for back-and-forth until explicit end or timeout
|
|
167
|
+
*/
|
|
168
|
+
export const CallModeSchema = z.enum(["notify", "conversation"]);
|
|
169
|
+
export type CallMode = z.infer<typeof CallModeSchema>;
|
|
170
|
+
|
|
171
|
+
export const OutboundConfigSchema = z
|
|
172
|
+
.object({
|
|
173
|
+
/** Default call mode for outbound calls */
|
|
174
|
+
defaultMode: CallModeSchema.default("notify"),
|
|
175
|
+
/** Seconds to wait after TTS before auto-hangup in notify mode */
|
|
176
|
+
notifyHangupDelaySec: z.number().int().nonnegative().default(3),
|
|
177
|
+
})
|
|
178
|
+
.default({ defaultMode: "notify", notifyHangupDelaySec: 3 });
|
|
179
|
+
export type OutboundConfig = z.infer<typeof OutboundConfigSchema>;
|
|
180
|
+
|
|
181
|
+
// -----------------------------------------------------------------------------
|
|
182
|
+
// Streaming Configuration (OpenAI Realtime STT)
|
|
183
|
+
// -----------------------------------------------------------------------------
|
|
184
|
+
|
|
185
|
+
export const VoiceCallStreamingConfigSchema = z
|
|
186
|
+
.object({
|
|
187
|
+
/** Enable real-time audio streaming (requires WebSocket support) */
|
|
188
|
+
enabled: z.boolean().default(false),
|
|
189
|
+
/** STT provider for real-time transcription */
|
|
190
|
+
sttProvider: z.enum(["openai-realtime"]).default("openai-realtime"),
|
|
191
|
+
/** OpenAI API key for Realtime API (uses OPENAI_API_KEY env if not set) */
|
|
192
|
+
openaiApiKey: z.string().min(1).optional(),
|
|
193
|
+
/** OpenAI transcription model (default: gpt-4o-transcribe) */
|
|
194
|
+
sttModel: z.string().min(1).default("gpt-4o-transcribe"),
|
|
195
|
+
/** VAD silence duration in ms before considering speech ended */
|
|
196
|
+
silenceDurationMs: z.number().int().positive().default(800),
|
|
197
|
+
/** VAD threshold 0-1 (higher = less sensitive) */
|
|
198
|
+
vadThreshold: z.number().min(0).max(1).default(0.5),
|
|
199
|
+
/** WebSocket path for media stream connections */
|
|
200
|
+
streamPath: z.string().min(1).default("/voice/stream"),
|
|
201
|
+
})
|
|
202
|
+
.default({
|
|
203
|
+
enabled: false,
|
|
204
|
+
sttProvider: "openai-realtime",
|
|
205
|
+
sttModel: "gpt-4o-transcribe",
|
|
206
|
+
silenceDurationMs: 800,
|
|
207
|
+
vadThreshold: 0.5,
|
|
208
|
+
streamPath: "/voice/stream",
|
|
209
|
+
});
|
|
210
|
+
export type VoiceCallStreamingConfig = z.infer<
|
|
211
|
+
typeof VoiceCallStreamingConfigSchema
|
|
212
|
+
>;
|
|
213
|
+
|
|
214
|
+
// -----------------------------------------------------------------------------
|
|
215
|
+
// Main Voice Call Configuration
|
|
216
|
+
// -----------------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
export const VoiceCallConfigSchema = z.object({
|
|
219
|
+
/** Enable voice call functionality */
|
|
220
|
+
enabled: z.boolean().default(false),
|
|
221
|
+
|
|
222
|
+
/** Active provider (telnyx, twilio, or mock) */
|
|
223
|
+
provider: z.enum(["telnyx", "twilio", "mock"]).optional(),
|
|
224
|
+
|
|
225
|
+
/** Telnyx-specific configuration */
|
|
226
|
+
telnyx: TelnyxConfigSchema.optional(),
|
|
227
|
+
|
|
228
|
+
/** Twilio-specific configuration */
|
|
229
|
+
twilio: TwilioConfigSchema.optional(),
|
|
230
|
+
|
|
231
|
+
/** Phone number to call from (E.164) */
|
|
232
|
+
fromNumber: E164Schema.optional(),
|
|
233
|
+
|
|
234
|
+
/** Default phone number to call (E.164) */
|
|
235
|
+
toNumber: E164Schema.optional(),
|
|
236
|
+
|
|
237
|
+
/** Inbound call policy */
|
|
238
|
+
inboundPolicy: InboundPolicySchema.default("disabled"),
|
|
239
|
+
|
|
240
|
+
/** Allowlist of phone numbers for inbound calls (E.164) */
|
|
241
|
+
allowFrom: z.array(E164Schema).default([]),
|
|
242
|
+
|
|
243
|
+
/** Greeting message for inbound calls */
|
|
244
|
+
inboundGreeting: z.string().optional(),
|
|
245
|
+
|
|
246
|
+
/** Outbound call configuration */
|
|
247
|
+
outbound: OutboundConfigSchema,
|
|
248
|
+
|
|
249
|
+
/** Maximum call duration in seconds */
|
|
250
|
+
maxDurationSeconds: z.number().int().positive().default(300),
|
|
251
|
+
|
|
252
|
+
/** Silence timeout for end-of-speech detection (ms) */
|
|
253
|
+
silenceTimeoutMs: z.number().int().positive().default(800),
|
|
254
|
+
|
|
255
|
+
/** Timeout for user transcript (ms) */
|
|
256
|
+
transcriptTimeoutMs: z.number().int().positive().default(180000),
|
|
257
|
+
|
|
258
|
+
/** Ring timeout for outbound calls (ms) */
|
|
259
|
+
ringTimeoutMs: z.number().int().positive().default(30000),
|
|
260
|
+
|
|
261
|
+
/** Maximum concurrent calls */
|
|
262
|
+
maxConcurrentCalls: z.number().int().positive().default(1),
|
|
263
|
+
|
|
264
|
+
/** Webhook server configuration */
|
|
265
|
+
serve: VoiceCallServeConfigSchema,
|
|
266
|
+
|
|
267
|
+
/** Tailscale exposure configuration (legacy, prefer tunnel config) */
|
|
268
|
+
tailscale: VoiceCallTailscaleConfigSchema,
|
|
269
|
+
|
|
270
|
+
/** Tunnel configuration (unified ngrok/tailscale) */
|
|
271
|
+
tunnel: VoiceCallTunnelConfigSchema,
|
|
272
|
+
|
|
273
|
+
/** Real-time audio streaming configuration */
|
|
274
|
+
streaming: VoiceCallStreamingConfigSchema,
|
|
275
|
+
|
|
276
|
+
/** Public webhook URL override (if set, bypasses tunnel auto-detection) */
|
|
277
|
+
publicUrl: z.string().url().optional(),
|
|
278
|
+
|
|
279
|
+
/** Skip webhook signature verification (development only, NOT for production) */
|
|
280
|
+
skipSignatureVerification: z.boolean().default(false),
|
|
281
|
+
|
|
282
|
+
/** STT configuration */
|
|
283
|
+
stt: SttConfigSchema,
|
|
284
|
+
|
|
285
|
+
/** TTS configuration */
|
|
286
|
+
tts: TtsConfigSchema,
|
|
287
|
+
|
|
288
|
+
/** Store path for call logs */
|
|
289
|
+
store: z.string().optional(),
|
|
290
|
+
|
|
291
|
+
/** Model for generating voice responses (e.g., "anthropic/claude-sonnet-4", "openai/gpt-4o") */
|
|
292
|
+
responseModel: z.string().default("openai/gpt-4o-mini"),
|
|
293
|
+
|
|
294
|
+
/** System prompt for voice responses */
|
|
295
|
+
responseSystemPrompt: z.string().optional(),
|
|
296
|
+
|
|
297
|
+
/** Timeout for response generation in ms (default 30s) */
|
|
298
|
+
responseTimeoutMs: z.number().int().positive().default(30000),
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
export type VoiceCallConfig = z.infer<typeof VoiceCallConfigSchema>;
|
|
302
|
+
|
|
303
|
+
// -----------------------------------------------------------------------------
|
|
304
|
+
// Configuration Helpers
|
|
305
|
+
// -----------------------------------------------------------------------------
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Validate that the configuration has all required fields for the selected provider.
|
|
309
|
+
*/
|
|
310
|
+
export function validateProviderConfig(config: VoiceCallConfig): {
|
|
311
|
+
valid: boolean;
|
|
312
|
+
errors: string[];
|
|
313
|
+
} {
|
|
314
|
+
const errors: string[] = [];
|
|
315
|
+
|
|
316
|
+
if (!config.enabled) {
|
|
317
|
+
return { valid: true, errors: [] };
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
if (!config.provider) {
|
|
321
|
+
errors.push("plugins.entries.voice-call.config.provider is required");
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (!config.fromNumber && config.provider !== "mock") {
|
|
325
|
+
errors.push("plugins.entries.voice-call.config.fromNumber is required");
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (config.provider === "telnyx") {
|
|
329
|
+
if (!config.telnyx?.apiKey) {
|
|
330
|
+
errors.push(
|
|
331
|
+
"plugins.entries.voice-call.config.telnyx.apiKey is required (or set TELNYX_API_KEY env)",
|
|
332
|
+
);
|
|
333
|
+
}
|
|
334
|
+
if (!config.telnyx?.connectionId) {
|
|
335
|
+
errors.push(
|
|
336
|
+
"plugins.entries.voice-call.config.telnyx.connectionId is required (or set TELNYX_CONNECTION_ID env)",
|
|
337
|
+
);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
if (config.provider === "twilio") {
|
|
342
|
+
if (!config.twilio?.accountSid) {
|
|
343
|
+
errors.push(
|
|
344
|
+
"plugins.entries.voice-call.config.twilio.accountSid is required (or set TWILIO_ACCOUNT_SID env)",
|
|
345
|
+
);
|
|
346
|
+
}
|
|
347
|
+
if (!config.twilio?.authToken) {
|
|
348
|
+
errors.push(
|
|
349
|
+
"plugins.entries.voice-call.config.twilio.authToken is required (or set TWILIO_AUTH_TOKEN env)",
|
|
350
|
+
);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
return { valid: errors.length === 0, errors };
|
|
355
|
+
}
|