@clawdbot/voice-call 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,20 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0
4
+
5
+ ### Highlights
6
+ - First public release of the @clawdbot/voice-call plugin.
7
+
8
+ ### Features
9
+ - Providers: Twilio (Programmable Voice + Media Streams), Telnyx (Call Control v2), and mock provider for local dev.
10
+ - Call flows: outbound notify vs. conversation modes, configurable auto‑hangup, and multi‑turn continuation.
11
+ - Inbound handling: policy controls (disabled/allowlist/open), allowlist matching, and inbound greeting.
12
+ - Webhooks: built‑in server with configurable bind/port/path plus `publicUrl` override.
13
+ - Exposure helpers: ngrok + Tailscale serve/funnel; dev‑only signature bypass for ngrok free tier.
14
+ - Streaming: OpenAI Realtime STT over media WebSocket with partial + final transcripts.
15
+ - Speech: OpenAI TTS (model/voice/instructions) with Twilio `<Say>` fallback.
16
+ - Tooling: `voice_call` tool actions for initiate/continue/speak/end/status.
17
+ - Gateway RPC: `voicecall.initiate|continue|speak|end|status` (+ legacy `voicecall.start`).
18
+ - CLI: `clawdbot voicecall` commands (call/start/continue/speak/end/status/tail/expose).
19
+ - Observability: JSONL call logs and `voicecall tail` for live inspection.
20
+ - Response controls: `responseModel`, `responseSystemPrompt`, and `responseTimeoutMs` for auto‑responses.
package/README.md ADDED
@@ -0,0 +1,107 @@
1
+ # @clawdbot/voice-call
2
+
3
+ Official Voice Call plugin for **Clawdbot**.
4
+
5
+ Providers:
6
+ - **Twilio** (Programmable Voice + Media Streams)
7
+ - **Telnyx** (Call Control v2)
8
+ - **Mock** (dev/no network)
9
+
10
+ Docs: `https://docs.clawd.bot/plugins/voice-call`
11
+ Plugin system: `https://docs.clawd.bot/plugin`
12
+
13
+ ## Install (local dev)
14
+
15
+ ### Option A: install via Clawdbot (recommended)
16
+
17
+ ```bash
18
+ clawdbot plugins install @clawdbot/voice-call
19
+ ```
20
+
21
+ Restart the Gateway afterwards.
22
+
23
+ ### Option B: copy into your global extensions folder (dev)
24
+
25
+ ```bash
26
+ mkdir -p ~/.clawdbot/extensions
27
+ cp -R extensions/voice-call ~/.clawdbot/extensions/voice-call
28
+ cd ~/.clawdbot/extensions/voice-call && pnpm install
29
+ ```
30
+
31
+ ## Config
32
+
33
+ Put under `plugins.entries.voice-call.config`:
34
+
35
+ ```json5
36
+ {
37
+ provider: "twilio", // or "telnyx" | "mock"
38
+ fromNumber: "+15550001234",
39
+ toNumber: "+15550005678",
40
+
41
+ twilio: {
42
+ accountSid: "ACxxxxxxxx",
43
+ authToken: "your_token"
44
+ },
45
+
46
+ // Webhook server
47
+ serve: {
48
+ port: 3334,
49
+ path: "/voice/webhook"
50
+ },
51
+
52
+ // Public exposure (pick one):
53
+ // publicUrl: "https://example.ngrok.app/voice/webhook",
54
+ // tunnel: { provider: "ngrok" },
55
+ // tailscale: { mode: "funnel", path: "/voice/webhook" }
56
+
57
+ outbound: {
58
+ defaultMode: "notify" // or "conversation"
59
+ },
60
+
61
+ streaming: {
62
+ enabled: true,
63
+ streamPath: "/voice/stream"
64
+ }
65
+ }
66
+ ```
67
+
68
+ Notes:
69
+ - Twilio/Telnyx require a **publicly reachable** webhook URL.
70
+ - `mock` is a local dev provider (no network calls).
71
+
72
+ ## CLI
73
+
74
+ ```bash
75
+ clawdbot voicecall call --to "+15555550123" --message "Hello from Clawdbot"
76
+ clawdbot voicecall continue --call-id <id> --message "Any questions?"
77
+ clawdbot voicecall speak --call-id <id> --message "One moment"
78
+ clawdbot voicecall end --call-id <id>
79
+ clawdbot voicecall status --call-id <id>
80
+ clawdbot voicecall tail
81
+ clawdbot voicecall expose --mode funnel
82
+ ```
83
+
84
+ ## Tool
85
+
86
+ Tool name: `voice_call`
87
+
88
+ Actions:
89
+ - `initiate_call` (message, to?, mode?)
90
+ - `continue_call` (callId, message)
91
+ - `speak_to_user` (callId, message)
92
+ - `end_call` (callId)
93
+ - `get_status` (callId)
94
+
95
+ ## Gateway RPC
96
+
97
+ - `voicecall.initiate` (to?, message, mode?)
98
+ - `voicecall.continue` (callId, message)
99
+ - `voicecall.speak` (callId, message)
100
+ - `voicecall.end` (callId)
101
+ - `voicecall.status` (callId)
102
+
103
+ ## Notes
104
+
105
+ - Uses webhook signature verification for Twilio/Telnyx.
106
+ - `responseModel` / `responseSystemPrompt` control AI auto-responses.
107
+ - Media streaming requires `ws` and OpenAI Realtime API key.
package/index.ts ADDED
@@ -0,0 +1,477 @@
1
+ import { Type } from "@sinclair/typebox";
2
+
3
+ import type { CoreConfig } from "./src/core-bridge.js";
4
+ import {
5
+ VoiceCallConfigSchema,
6
+ validateProviderConfig,
7
+ type VoiceCallConfig,
8
+ } from "./src/config.js";
9
+ import { registerVoiceCallCli } from "./src/cli.js";
10
+ import { createVoiceCallRuntime, type VoiceCallRuntime } from "./src/runtime.js";
11
+
12
+ const voiceCallConfigSchema = {
13
+ parse(value: unknown): VoiceCallConfig {
14
+ const raw =
15
+ value && typeof value === "object" && !Array.isArray(value)
16
+ ? (value as Record<string, unknown>)
17
+ : {};
18
+
19
+ const twilio = raw.twilio as Record<string, unknown> | undefined;
20
+ const legacyFrom = typeof twilio?.from === "string" ? twilio.from : undefined;
21
+
22
+ const enabled = typeof raw.enabled === "boolean" ? raw.enabled : true;
23
+ const providerRaw = raw.provider === "log" ? "mock" : raw.provider;
24
+ const provider = providerRaw ?? (enabled ? "mock" : undefined);
25
+
26
+ return VoiceCallConfigSchema.parse({
27
+ ...raw,
28
+ enabled,
29
+ provider,
30
+ fromNumber: raw.fromNumber ?? legacyFrom,
31
+ });
32
+ },
33
+ uiHints: {
34
+ provider: {
35
+ label: "Provider",
36
+ help: "Use twilio, telnyx, or mock for dev/no-network.",
37
+ },
38
+ fromNumber: { label: "From Number", placeholder: "+15550001234" },
39
+ toNumber: { label: "Default To Number", placeholder: "+15550001234" },
40
+ inboundPolicy: { label: "Inbound Policy" },
41
+ allowFrom: { label: "Inbound Allowlist" },
42
+ inboundGreeting: { label: "Inbound Greeting", advanced: true },
43
+ "telnyx.apiKey": { label: "Telnyx API Key", sensitive: true },
44
+ "telnyx.connectionId": { label: "Telnyx Connection ID" },
45
+ "telnyx.publicKey": { label: "Telnyx Public Key", sensitive: true },
46
+ "twilio.accountSid": { label: "Twilio Account SID" },
47
+ "twilio.authToken": { label: "Twilio Auth Token", sensitive: true },
48
+ "outbound.defaultMode": { label: "Default Call Mode" },
49
+ "outbound.notifyHangupDelaySec": {
50
+ label: "Notify Hangup Delay (sec)",
51
+ advanced: true,
52
+ },
53
+ "serve.port": { label: "Webhook Port" },
54
+ "serve.bind": { label: "Webhook Bind" },
55
+ "serve.path": { label: "Webhook Path" },
56
+ "tailscale.mode": { label: "Tailscale Mode", advanced: true },
57
+ "tailscale.path": { label: "Tailscale Path", advanced: true },
58
+ "tunnel.provider": { label: "Tunnel Provider", advanced: true },
59
+ "tunnel.ngrokAuthToken": {
60
+ label: "ngrok Auth Token",
61
+ sensitive: true,
62
+ advanced: true,
63
+ },
64
+ "tunnel.ngrokDomain": { label: "ngrok Domain", advanced: true },
65
+ "tunnel.allowNgrokFreeTier": {
66
+ label: "Allow ngrok Free Tier",
67
+ advanced: true,
68
+ },
69
+ "streaming.enabled": { label: "Enable Streaming", advanced: true },
70
+ "streaming.openaiApiKey": {
71
+ label: "OpenAI Realtime API Key",
72
+ sensitive: true,
73
+ advanced: true,
74
+ },
75
+ "streaming.sttModel": { label: "Realtime STT Model", advanced: true },
76
+ "streaming.streamPath": { label: "Media Stream Path", advanced: true },
77
+ "tts.model": { label: "TTS Model", advanced: true },
78
+ "tts.voice": { label: "TTS Voice", advanced: true },
79
+ "tts.instructions": { label: "TTS Instructions", advanced: true },
80
+ publicUrl: { label: "Public Webhook URL", advanced: true },
81
+ skipSignatureVerification: {
82
+ label: "Skip Signature Verification",
83
+ advanced: true,
84
+ },
85
+ store: { label: "Call Log Store Path", advanced: true },
86
+ responseModel: { label: "Response Model", advanced: true },
87
+ responseSystemPrompt: { label: "Response System Prompt", advanced: true },
88
+ responseTimeoutMs: { label: "Response Timeout (ms)", advanced: true },
89
+ },
90
+ };
91
+
92
+ const VoiceCallToolSchema = Type.Union([
93
+ Type.Object({
94
+ action: Type.Literal("initiate_call"),
95
+ to: Type.Optional(Type.String({ description: "Call target" })),
96
+ message: Type.String({ description: "Intro message" }),
97
+ mode: Type.Optional(Type.Union([Type.Literal("notify"), Type.Literal("conversation")])),
98
+ }),
99
+ Type.Object({
100
+ action: Type.Literal("continue_call"),
101
+ callId: Type.String({ description: "Call ID" }),
102
+ message: Type.String({ description: "Follow-up message" }),
103
+ }),
104
+ Type.Object({
105
+ action: Type.Literal("speak_to_user"),
106
+ callId: Type.String({ description: "Call ID" }),
107
+ message: Type.String({ description: "Message to speak" }),
108
+ }),
109
+ Type.Object({
110
+ action: Type.Literal("end_call"),
111
+ callId: Type.String({ description: "Call ID" }),
112
+ }),
113
+ Type.Object({
114
+ action: Type.Literal("get_status"),
115
+ callId: Type.String({ description: "Call ID" }),
116
+ }),
117
+ Type.Object({
118
+ mode: Type.Optional(Type.Union([Type.Literal("call"), Type.Literal("status")])),
119
+ to: Type.Optional(Type.String({ description: "Call target" })),
120
+ sid: Type.Optional(Type.String({ description: "Call SID" })),
121
+ message: Type.Optional(Type.String({ description: "Optional intro message" })),
122
+ }),
123
+ ]);
124
+
125
+ const voiceCallPlugin = {
126
+ id: "voice-call",
127
+ name: "Voice Call",
128
+ description: "Voice-call plugin with Telnyx/Twilio providers",
129
+ configSchema: voiceCallConfigSchema,
130
+ register(api) {
131
+ const cfg = voiceCallConfigSchema.parse(api.pluginConfig);
132
+ const validation = validateProviderConfig(cfg);
133
+
134
+ if (api.pluginConfig && typeof api.pluginConfig === "object") {
135
+ const raw = api.pluginConfig as Record<string, unknown>;
136
+ const twilio = raw.twilio as Record<string, unknown> | undefined;
137
+ if (raw.provider === "log") {
138
+ api.logger.warn(
139
+ "[voice-call] provider \"log\" is deprecated; use \"mock\" instead",
140
+ );
141
+ }
142
+ if (typeof twilio?.from === "string") {
143
+ api.logger.warn(
144
+ "[voice-call] twilio.from is deprecated; use fromNumber instead",
145
+ );
146
+ }
147
+ }
148
+
149
+ let runtimePromise: Promise<VoiceCallRuntime> | null = null;
150
+ let runtime: VoiceCallRuntime | null = null;
151
+
152
+ const ensureRuntime = async () => {
153
+ if (!cfg.enabled) {
154
+ throw new Error("Voice call disabled in plugin config");
155
+ }
156
+ if (!validation.valid) {
157
+ throw new Error(validation.errors.join("; "));
158
+ }
159
+ if (runtime) return runtime;
160
+ if (!runtimePromise) {
161
+ runtimePromise = createVoiceCallRuntime({
162
+ config: cfg,
163
+ coreConfig: api.config as CoreConfig,
164
+ logger: api.logger,
165
+ });
166
+ }
167
+ runtime = await runtimePromise;
168
+ return runtime;
169
+ };
170
+
171
+ const sendError = (respond: (ok: boolean, payload?: unknown) => void, err: unknown) => {
172
+ respond(false, { error: err instanceof Error ? err.message : String(err) });
173
+ };
174
+
175
+ api.registerGatewayMethod("voicecall.initiate", async ({ params, respond }) => {
176
+ try {
177
+ const message =
178
+ typeof params?.message === "string" ? params.message.trim() : "";
179
+ if (!message) {
180
+ respond(false, { error: "message required" });
181
+ return;
182
+ }
183
+ const rt = await ensureRuntime();
184
+ const to =
185
+ typeof params?.to === "string" && params.to.trim()
186
+ ? params.to.trim()
187
+ : rt.config.toNumber;
188
+ if (!to) {
189
+ respond(false, { error: "to required" });
190
+ return;
191
+ }
192
+ const mode =
193
+ params?.mode === "notify" || params?.mode === "conversation"
194
+ ? params.mode
195
+ : undefined;
196
+ const result = await rt.manager.initiateCall(to, undefined, {
197
+ message,
198
+ mode,
199
+ });
200
+ if (!result.success) {
201
+ respond(false, { error: result.error || "initiate failed" });
202
+ return;
203
+ }
204
+ respond(true, { callId: result.callId, initiated: true });
205
+ } catch (err) {
206
+ sendError(respond, err);
207
+ }
208
+ });
209
+
210
+ api.registerGatewayMethod("voicecall.continue", async ({ params, respond }) => {
211
+ try {
212
+ const callId =
213
+ typeof params?.callId === "string" ? params.callId.trim() : "";
214
+ const message =
215
+ typeof params?.message === "string" ? params.message.trim() : "";
216
+ if (!callId || !message) {
217
+ respond(false, { error: "callId and message required" });
218
+ return;
219
+ }
220
+ const rt = await ensureRuntime();
221
+ const result = await rt.manager.continueCall(callId, message);
222
+ if (!result.success) {
223
+ respond(false, { error: result.error || "continue failed" });
224
+ return;
225
+ }
226
+ respond(true, { success: true, transcript: result.transcript });
227
+ } catch (err) {
228
+ sendError(respond, err);
229
+ }
230
+ });
231
+
232
+ api.registerGatewayMethod("voicecall.speak", async ({ params, respond }) => {
233
+ try {
234
+ const callId =
235
+ typeof params?.callId === "string" ? params.callId.trim() : "";
236
+ const message =
237
+ typeof params?.message === "string" ? params.message.trim() : "";
238
+ if (!callId || !message) {
239
+ respond(false, { error: "callId and message required" });
240
+ return;
241
+ }
242
+ const rt = await ensureRuntime();
243
+ const result = await rt.manager.speak(callId, message);
244
+ if (!result.success) {
245
+ respond(false, { error: result.error || "speak failed" });
246
+ return;
247
+ }
248
+ respond(true, { success: true });
249
+ } catch (err) {
250
+ sendError(respond, err);
251
+ }
252
+ });
253
+
254
+ api.registerGatewayMethod("voicecall.end", async ({ params, respond }) => {
255
+ try {
256
+ const callId =
257
+ typeof params?.callId === "string" ? params.callId.trim() : "";
258
+ if (!callId) {
259
+ respond(false, { error: "callId required" });
260
+ return;
261
+ }
262
+ const rt = await ensureRuntime();
263
+ const result = await rt.manager.endCall(callId);
264
+ if (!result.success) {
265
+ respond(false, { error: result.error || "end failed" });
266
+ return;
267
+ }
268
+ respond(true, { success: true });
269
+ } catch (err) {
270
+ sendError(respond, err);
271
+ }
272
+ });
273
+
274
+ api.registerGatewayMethod("voicecall.status", async ({ params, respond }) => {
275
+ try {
276
+ const raw =
277
+ typeof params?.callId === "string"
278
+ ? params.callId.trim()
279
+ : typeof params?.sid === "string"
280
+ ? params.sid.trim()
281
+ : "";
282
+ if (!raw) {
283
+ respond(false, { error: "callId required" });
284
+ return;
285
+ }
286
+ const rt = await ensureRuntime();
287
+ const call =
288
+ rt.manager.getCall(raw) || rt.manager.getCallByProviderCallId(raw);
289
+ if (!call) {
290
+ respond(true, { found: false });
291
+ return;
292
+ }
293
+ respond(true, { found: true, call });
294
+ } catch (err) {
295
+ sendError(respond, err);
296
+ }
297
+ });
298
+
299
+ api.registerGatewayMethod("voicecall.start", async ({ params, respond }) => {
300
+ try {
301
+ const to = typeof params?.to === "string" ? params.to.trim() : "";
302
+ const message =
303
+ typeof params?.message === "string" ? params.message.trim() : "";
304
+ if (!to) {
305
+ respond(false, { error: "to required" });
306
+ return;
307
+ }
308
+ const rt = await ensureRuntime();
309
+ const result = await rt.manager.initiateCall(to, undefined, {
310
+ message: message || undefined,
311
+ });
312
+ if (!result.success) {
313
+ respond(false, { error: result.error || "initiate failed" });
314
+ return;
315
+ }
316
+ respond(true, { callId: result.callId, initiated: true });
317
+ } catch (err) {
318
+ sendError(respond, err);
319
+ }
320
+ });
321
+
322
+ api.registerTool({
323
+ name: "voice_call",
324
+ label: "Voice Call",
325
+ description:
326
+ "Make phone calls and have voice conversations via the voice-call plugin.",
327
+ parameters: VoiceCallToolSchema,
328
+ async execute(_toolCallId, params) {
329
+ const json = (payload: unknown) => ({
330
+ content: [
331
+ { type: "text", text: JSON.stringify(payload, null, 2) },
332
+ ],
333
+ details: payload,
334
+ });
335
+
336
+ try {
337
+ const rt = await ensureRuntime();
338
+
339
+ if (typeof params?.action === "string") {
340
+ switch (params.action) {
341
+ case "initiate_call": {
342
+ const message = String(params.message || "").trim();
343
+ if (!message) throw new Error("message required");
344
+ const to =
345
+ typeof params.to === "string" && params.to.trim()
346
+ ? params.to.trim()
347
+ : rt.config.toNumber;
348
+ if (!to) throw new Error("to required");
349
+ const result = await rt.manager.initiateCall(to, undefined, {
350
+ message,
351
+ mode:
352
+ params.mode === "notify" || params.mode === "conversation"
353
+ ? params.mode
354
+ : undefined,
355
+ });
356
+ if (!result.success) {
357
+ throw new Error(result.error || "initiate failed");
358
+ }
359
+ return json({ callId: result.callId, initiated: true });
360
+ }
361
+ case "continue_call": {
362
+ const callId = String(params.callId || "").trim();
363
+ const message = String(params.message || "").trim();
364
+ if (!callId || !message) {
365
+ throw new Error("callId and message required");
366
+ }
367
+ const result = await rt.manager.continueCall(callId, message);
368
+ if (!result.success) {
369
+ throw new Error(result.error || "continue failed");
370
+ }
371
+ return json({ success: true, transcript: result.transcript });
372
+ }
373
+ case "speak_to_user": {
374
+ const callId = String(params.callId || "").trim();
375
+ const message = String(params.message || "").trim();
376
+ if (!callId || !message) {
377
+ throw new Error("callId and message required");
378
+ }
379
+ const result = await rt.manager.speak(callId, message);
380
+ if (!result.success) {
381
+ throw new Error(result.error || "speak failed");
382
+ }
383
+ return json({ success: true });
384
+ }
385
+ case "end_call": {
386
+ const callId = String(params.callId || "").trim();
387
+ if (!callId) throw new Error("callId required");
388
+ const result = await rt.manager.endCall(callId);
389
+ if (!result.success) {
390
+ throw new Error(result.error || "end failed");
391
+ }
392
+ return json({ success: true });
393
+ }
394
+ case "get_status": {
395
+ const callId = String(params.callId || "").trim();
396
+ if (!callId) throw new Error("callId required");
397
+ const call =
398
+ rt.manager.getCall(callId) ||
399
+ rt.manager.getCallByProviderCallId(callId);
400
+ return json(call ? { found: true, call } : { found: false });
401
+ }
402
+ }
403
+ }
404
+
405
+ const mode = params?.mode ?? "call";
406
+ if (mode === "status") {
407
+ const sid =
408
+ typeof params.sid === "string" ? params.sid.trim() : "";
409
+ if (!sid) throw new Error("sid required for status");
410
+ const call =
411
+ rt.manager.getCall(sid) || rt.manager.getCallByProviderCallId(sid);
412
+ return json(call ? { found: true, call } : { found: false });
413
+ }
414
+
415
+ const to =
416
+ typeof params.to === "string" && params.to.trim()
417
+ ? params.to.trim()
418
+ : rt.config.toNumber;
419
+ if (!to) throw new Error("to required for call");
420
+ const result = await rt.manager.initiateCall(to, undefined, {
421
+ message:
422
+ typeof params.message === "string" && params.message.trim()
423
+ ? params.message.trim()
424
+ : undefined,
425
+ });
426
+ if (!result.success) {
427
+ throw new Error(result.error || "initiate failed");
428
+ }
429
+ return json({ callId: result.callId, initiated: true });
430
+ } catch (err) {
431
+ return json({
432
+ error: err instanceof Error ? err.message : String(err),
433
+ });
434
+ }
435
+ },
436
+ });
437
+
438
+ api.registerCli(
439
+ ({ program }) =>
440
+ registerVoiceCallCli({
441
+ program,
442
+ config: cfg,
443
+ ensureRuntime,
444
+ logger: api.logger,
445
+ }),
446
+ { commands: ["voicecall"] },
447
+ );
448
+
449
+ api.registerService({
450
+ id: "voicecall",
451
+ start: async () => {
452
+ if (!cfg.enabled) return;
453
+ try {
454
+ await ensureRuntime();
455
+ } catch (err) {
456
+ api.logger.error(
457
+ `[voice-call] Failed to start runtime: ${
458
+ err instanceof Error ? err.message : String(err)
459
+ }`,
460
+ );
461
+ }
462
+ },
463
+ stop: async () => {
464
+ if (!runtimePromise) return;
465
+ try {
466
+ const rt = await runtimePromise;
467
+ await rt.stop();
468
+ } finally {
469
+ runtimePromise = null;
470
+ runtime = null;
471
+ }
472
+ },
473
+ });
474
+ },
475
+ };
476
+
477
+ export default voiceCallPlugin;
package/package.json ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "@clawdbot/voice-call",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "description": "Clawdbot voice-call plugin",
6
+ "dependencies": {
7
+ "@sinclair/typebox": "0.34.47",
8
+ "ws": "^8.19.0",
9
+ "zod": "^4.3.5"
10
+ },
11
+ "clawdbot": {
12
+ "extensions": ["./index.ts"]
13
+ }
14
+ }