@openclaw/voice-call 2026.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +78 -0
  2. package/README.md +135 -0
  3. package/index.ts +497 -0
  4. package/openclaw.plugin.json +601 -0
  5. package/package.json +16 -0
  6. package/src/cli.ts +312 -0
  7. package/src/config.test.ts +204 -0
  8. package/src/config.ts +502 -0
  9. package/src/core-bridge.ts +198 -0
  10. package/src/manager/context.ts +21 -0
  11. package/src/manager/events.ts +177 -0
  12. package/src/manager/lookup.ts +33 -0
  13. package/src/manager/outbound.ts +248 -0
  14. package/src/manager/state.ts +50 -0
  15. package/src/manager/store.ts +88 -0
  16. package/src/manager/timers.ts +86 -0
  17. package/src/manager/twiml.ts +9 -0
  18. package/src/manager.test.ts +108 -0
  19. package/src/manager.ts +888 -0
  20. package/src/media-stream.test.ts +97 -0
  21. package/src/media-stream.ts +393 -0
  22. package/src/providers/base.ts +67 -0
  23. package/src/providers/index.ts +10 -0
  24. package/src/providers/mock.ts +168 -0
  25. package/src/providers/plivo.test.ts +28 -0
  26. package/src/providers/plivo.ts +504 -0
  27. package/src/providers/stt-openai-realtime.ts +311 -0
  28. package/src/providers/telnyx.ts +364 -0
  29. package/src/providers/tts-openai.ts +264 -0
  30. package/src/providers/twilio/api.ts +45 -0
  31. package/src/providers/twilio/webhook.ts +30 -0
  32. package/src/providers/twilio.test.ts +64 -0
  33. package/src/providers/twilio.ts +595 -0
  34. package/src/response-generator.ts +171 -0
  35. package/src/runtime.ts +217 -0
  36. package/src/telephony-audio.ts +88 -0
  37. package/src/telephony-tts.ts +95 -0
  38. package/src/tunnel.ts +331 -0
  39. package/src/types.ts +273 -0
  40. package/src/utils.ts +12 -0
  41. package/src/voice-mapping.ts +65 -0
  42. package/src/webhook-security.test.ts +260 -0
  43. package/src/webhook-security.ts +469 -0
  44. package/src/webhook.ts +491 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,78 @@
1
+ # Changelog
2
+
3
+ ## 2026.1.29
4
+
5
+ ### Changes
6
+ - Version alignment with core OpenClaw release numbers.
7
+
8
+ ## 2026.1.26
9
+
10
+ ### Changes
11
+ - Breaking: voice-call TTS now uses core `messages.tts` (plugin TTS config deep‑merges with core).
12
+ - Telephony TTS supports OpenAI + ElevenLabs; Edge TTS is ignored for calls.
13
+ - Removed legacy `tts.model`/`tts.voice`/`tts.instructions` plugin fields.
14
+ - Ngrok free-tier bypass renamed to `tunnel.allowNgrokFreeTierLoopbackBypass` and gated to loopback + `tunnel.provider="ngrok"`.
15
+
16
+ ## 2026.1.23
17
+
18
+ ### Changes
19
+ - Version alignment with core OpenClaw release numbers.
20
+
21
+ ## 2026.1.22
22
+
23
+ ### Changes
24
+ - Version alignment with core OpenClaw release numbers.
25
+
26
+ ## 2026.1.21
27
+
28
+ ### Changes
29
+ - Version alignment with core OpenClaw release numbers.
30
+
31
+ ## 2026.1.20
32
+
33
+ ### Changes
34
+ - Version alignment with core OpenClaw release numbers.
35
+
36
+ ## 2026.1.17-1
37
+
38
+ ### Changes
39
+ - Version alignment with core OpenClaw release numbers.
40
+
41
+ ## 2026.1.17
42
+
43
+ ### Changes
44
+ - Version alignment with core OpenClaw release numbers.
45
+
46
+ ## 2026.1.16
47
+
48
+ ### Changes
49
+ - Version alignment with core OpenClaw release numbers.
50
+
51
+ ## 2026.1.15
52
+
53
+ ### Changes
54
+ - Version alignment with core OpenClaw release numbers.
55
+
56
+ ## 2026.1.14
57
+
58
+ ### Changes
59
+ - Version alignment with core OpenClaw release numbers.
60
+
61
+ ## 0.1.0
62
+
63
+ ### Highlights
64
+ - First public release of the @openclaw/voice-call plugin.
65
+
66
+ ### Features
67
+ - Providers: Twilio (Programmable Voice + Media Streams), Telnyx (Call Control v2), and mock provider for local dev.
68
+ - Call flows: outbound notify vs. conversation modes, configurable auto‑hangup, and multi‑turn continuation.
69
+ - Inbound handling: policy controls (disabled/allowlist/open), allowlist matching, and inbound greeting.
70
+ - Webhooks: built‑in server with configurable bind/port/path plus `publicUrl` override.
71
+ - Exposure helpers: ngrok + Tailscale serve/funnel; dev‑only signature bypass for ngrok free tier.
72
+ - Streaming: OpenAI Realtime STT over media WebSocket with partial + final transcripts.
73
+ - Speech: OpenAI TTS (model/voice/instructions) with Twilio `<Say>` fallback.
74
+ - Tooling: `voice_call` tool actions for initiate/continue/speak/end/status.
75
+ - Gateway RPC: `voicecall.initiate|continue|speak|end|status` (+ legacy `voicecall.start`).
76
+ - CLI: `openclaw voicecall` commands (call/start/continue/speak/end/status/tail/expose).
77
+ - Observability: JSONL call logs and `voicecall tail` for live inspection.
78
+ - Response controls: `responseModel`, `responseSystemPrompt`, and `responseTimeoutMs` for auto‑responses.
package/README.md ADDED
@@ -0,0 +1,135 @@
1
+ # @openclaw/voice-call
2
+
3
+ Official Voice Call plugin for **OpenClaw**.
4
+
5
+ Providers:
6
+ - **Twilio** (Programmable Voice + Media Streams)
7
+ - **Telnyx** (Call Control v2)
8
+ - **Plivo** (Voice API + XML transfer + GetInput speech)
9
+ - **Mock** (dev/no network)
10
+
11
+ Docs: `https://docs.openclaw.ai/plugins/voice-call`
12
+ Plugin system: `https://docs.openclaw.ai/plugin`
13
+
14
+ ## Install (local dev)
15
+
16
+ ### Option A: install via OpenClaw (recommended)
17
+
18
+ ```bash
19
+ openclaw plugins install @openclaw/voice-call
20
+ ```
21
+
22
+ Restart the Gateway afterwards.
23
+
24
+ ### Option B: copy into your global extensions folder (dev)
25
+
26
+ ```bash
27
+ mkdir -p ~/.openclaw/extensions
28
+ cp -R extensions/voice-call ~/.openclaw/extensions/voice-call
29
+ cd ~/.openclaw/extensions/voice-call && pnpm install
30
+ ```
31
+
32
+ ## Config
33
+
34
+ Put under `plugins.entries.voice-call.config`:
35
+
36
+ ```json5
37
+ {
38
+ provider: "twilio", // or "telnyx" | "plivo" | "mock"
39
+ fromNumber: "+15550001234",
40
+ toNumber: "+15550005678",
41
+
42
+ twilio: {
43
+ accountSid: "ACxxxxxxxx",
44
+ authToken: "your_token"
45
+ },
46
+
47
+ plivo: {
48
+ authId: "MAxxxxxxxxxxxxxxxxxxxx",
49
+ authToken: "your_token"
50
+ },
51
+
52
+ // Webhook server
53
+ serve: {
54
+ port: 3334,
55
+ path: "/voice/webhook"
56
+ },
57
+
58
+ // Public exposure (pick one):
59
+ // publicUrl: "https://example.ngrok.app/voice/webhook",
60
+ // tunnel: { provider: "ngrok" },
61
+ // tailscale: { mode: "funnel", path: "/voice/webhook" }
62
+
63
+ outbound: {
64
+ defaultMode: "notify" // or "conversation"
65
+ },
66
+
67
+ streaming: {
68
+ enabled: true,
69
+ streamPath: "/voice/stream"
70
+ }
71
+ }
72
+ ```
73
+
74
+ Notes:
75
+ - Twilio/Telnyx/Plivo require a **publicly reachable** webhook URL.
76
+ - `mock` is a local dev provider (no network calls).
77
+ - `tunnel.allowNgrokFreeTierLoopbackBypass: true` allows Twilio webhooks with invalid signatures **only** when `tunnel.provider="ngrok"` and `serve.bind` is loopback (ngrok local agent). Use for local dev only.
78
+
79
+ ## TTS for calls
80
+
81
+ Voice Call uses the core `messages.tts` configuration (OpenAI or ElevenLabs) for
82
+ streaming speech on calls. You can override it under the plugin config with the
83
+ same shape — overrides deep-merge with `messages.tts`.
84
+
85
+ ```json5
86
+ {
87
+ tts: {
88
+ provider: "openai",
89
+ openai: {
90
+ voice: "alloy"
91
+ }
92
+ }
93
+ }
94
+ ```
95
+
96
+ Notes:
97
+ - Edge TTS is ignored for voice calls (telephony audio needs PCM; Edge output is unreliable).
98
+ - Core TTS is used when Twilio media streaming is enabled; otherwise calls fall back to provider native voices.
99
+
100
+ ## CLI
101
+
102
+ ```bash
103
+ openclaw voicecall call --to "+15555550123" --message "Hello from OpenClaw"
104
+ openclaw voicecall continue --call-id <id> --message "Any questions?"
105
+ openclaw voicecall speak --call-id <id> --message "One moment"
106
+ openclaw voicecall end --call-id <id>
107
+ openclaw voicecall status --call-id <id>
108
+ openclaw voicecall tail
109
+ openclaw voicecall expose --mode funnel
110
+ ```
111
+
112
+ ## Tool
113
+
114
+ Tool name: `voice_call`
115
+
116
+ Actions:
117
+ - `initiate_call` (message, to?, mode?)
118
+ - `continue_call` (callId, message)
119
+ - `speak_to_user` (callId, message)
120
+ - `end_call` (callId)
121
+ - `get_status` (callId)
122
+
123
+ ## Gateway RPC
124
+
125
+ - `voicecall.initiate` (to?, message, mode?)
126
+ - `voicecall.continue` (callId, message)
127
+ - `voicecall.speak` (callId, message)
128
+ - `voicecall.end` (callId)
129
+ - `voicecall.status` (callId)
130
+
131
+ ## Notes
132
+
133
+ - Uses webhook signature verification for Twilio/Telnyx/Plivo.
134
+ - `responseModel` / `responseSystemPrompt` control AI auto-responses.
135
+ - Media streaming requires `ws` and OpenAI Realtime API key.
package/index.ts ADDED
@@ -0,0 +1,497 @@
1
+ import { Type } from "@sinclair/typebox";
2
+ import type { CoreConfig } from "./src/core-bridge.js";
3
+ import {
4
+ VoiceCallConfigSchema,
5
+ resolveVoiceCallConfig,
6
+ validateProviderConfig,
7
+ type VoiceCallConfig,
8
+ } from "./src/config.js";
9
+ import { registerVoiceCallCli } from "./src/cli.js";
10
+ import { createVoiceCallRuntime, type VoiceCallRuntime } from "./src/runtime.js";
11
+
12
+ const voiceCallConfigSchema = {
13
+ parse(value: unknown): VoiceCallConfig {
14
+ const raw =
15
+ value && typeof value === "object" && !Array.isArray(value)
16
+ ? (value as Record<string, unknown>)
17
+ : {};
18
+
19
+ const twilio = raw.twilio as Record<string, unknown> | undefined;
20
+ const legacyFrom = typeof twilio?.from === "string" ? twilio.from : undefined;
21
+
22
+ const enabled = typeof raw.enabled === "boolean" ? raw.enabled : true;
23
+ const providerRaw = raw.provider === "log" ? "mock" : raw.provider;
24
+ const provider = providerRaw ?? (enabled ? "mock" : undefined);
25
+
26
+ return VoiceCallConfigSchema.parse({
27
+ ...raw,
28
+ enabled,
29
+ provider,
30
+ fromNumber: raw.fromNumber ?? legacyFrom,
31
+ });
32
+ },
33
+ uiHints: {
34
+ provider: {
35
+ label: "Provider",
36
+ help: "Use twilio, telnyx, or mock for dev/no-network.",
37
+ },
38
+ fromNumber: { label: "From Number", placeholder: "+15550001234" },
39
+ toNumber: { label: "Default To Number", placeholder: "+15550001234" },
40
+ inboundPolicy: { label: "Inbound Policy" },
41
+ allowFrom: { label: "Inbound Allowlist" },
42
+ inboundGreeting: { label: "Inbound Greeting", advanced: true },
43
+ "telnyx.apiKey": { label: "Telnyx API Key", sensitive: true },
44
+ "telnyx.connectionId": { label: "Telnyx Connection ID" },
45
+ "telnyx.publicKey": { label: "Telnyx Public Key", sensitive: true },
46
+ "twilio.accountSid": { label: "Twilio Account SID" },
47
+ "twilio.authToken": { label: "Twilio Auth Token", sensitive: true },
48
+ "outbound.defaultMode": { label: "Default Call Mode" },
49
+ "outbound.notifyHangupDelaySec": {
50
+ label: "Notify Hangup Delay (sec)",
51
+ advanced: true,
52
+ },
53
+ "serve.port": { label: "Webhook Port" },
54
+ "serve.bind": { label: "Webhook Bind" },
55
+ "serve.path": { label: "Webhook Path" },
56
+ "tailscale.mode": { label: "Tailscale Mode", advanced: true },
57
+ "tailscale.path": { label: "Tailscale Path", advanced: true },
58
+ "tunnel.provider": { label: "Tunnel Provider", advanced: true },
59
+ "tunnel.ngrokAuthToken": {
60
+ label: "ngrok Auth Token",
61
+ sensitive: true,
62
+ advanced: true,
63
+ },
64
+ "tunnel.ngrokDomain": { label: "ngrok Domain", advanced: true },
65
+ "tunnel.allowNgrokFreeTierLoopbackBypass": {
66
+ label: "Allow ngrok Free Tier (Loopback Bypass)",
67
+ advanced: true,
68
+ },
69
+ "streaming.enabled": { label: "Enable Streaming", advanced: true },
70
+ "streaming.openaiApiKey": {
71
+ label: "OpenAI Realtime API Key",
72
+ sensitive: true,
73
+ advanced: true,
74
+ },
75
+ "streaming.sttModel": { label: "Realtime STT Model", advanced: true },
76
+ "streaming.streamPath": { label: "Media Stream Path", advanced: true },
77
+ "tts.provider": {
78
+ label: "TTS Provider Override",
79
+ help: "Deep-merges with messages.tts (Edge is ignored for calls).",
80
+ advanced: true,
81
+ },
82
+ "tts.openai.model": { label: "OpenAI TTS Model", advanced: true },
83
+ "tts.openai.voice": { label: "OpenAI TTS Voice", advanced: true },
84
+ "tts.openai.apiKey": {
85
+ label: "OpenAI API Key",
86
+ sensitive: true,
87
+ advanced: true,
88
+ },
89
+ "tts.elevenlabs.modelId": { label: "ElevenLabs Model ID", advanced: true },
90
+ "tts.elevenlabs.voiceId": { label: "ElevenLabs Voice ID", advanced: true },
91
+ "tts.elevenlabs.apiKey": {
92
+ label: "ElevenLabs API Key",
93
+ sensitive: true,
94
+ advanced: true,
95
+ },
96
+ "tts.elevenlabs.baseUrl": { label: "ElevenLabs Base URL", advanced: true },
97
+ publicUrl: { label: "Public Webhook URL", advanced: true },
98
+ skipSignatureVerification: {
99
+ label: "Skip Signature Verification",
100
+ advanced: true,
101
+ },
102
+ store: { label: "Call Log Store Path", advanced: true },
103
+ responseModel: { label: "Response Model", advanced: true },
104
+ responseSystemPrompt: { label: "Response System Prompt", advanced: true },
105
+ responseTimeoutMs: { label: "Response Timeout (ms)", advanced: true },
106
+ },
107
+ };
108
+
109
+ const VoiceCallToolSchema = Type.Union([
110
+ Type.Object({
111
+ action: Type.Literal("initiate_call"),
112
+ to: Type.Optional(Type.String({ description: "Call target" })),
113
+ message: Type.String({ description: "Intro message" }),
114
+ mode: Type.Optional(Type.Union([Type.Literal("notify"), Type.Literal("conversation")])),
115
+ }),
116
+ Type.Object({
117
+ action: Type.Literal("continue_call"),
118
+ callId: Type.String({ description: "Call ID" }),
119
+ message: Type.String({ description: "Follow-up message" }),
120
+ }),
121
+ Type.Object({
122
+ action: Type.Literal("speak_to_user"),
123
+ callId: Type.String({ description: "Call ID" }),
124
+ message: Type.String({ description: "Message to speak" }),
125
+ }),
126
+ Type.Object({
127
+ action: Type.Literal("end_call"),
128
+ callId: Type.String({ description: "Call ID" }),
129
+ }),
130
+ Type.Object({
131
+ action: Type.Literal("get_status"),
132
+ callId: Type.String({ description: "Call ID" }),
133
+ }),
134
+ Type.Object({
135
+ mode: Type.Optional(Type.Union([Type.Literal("call"), Type.Literal("status")])),
136
+ to: Type.Optional(Type.String({ description: "Call target" })),
137
+ sid: Type.Optional(Type.String({ description: "Call SID" })),
138
+ message: Type.Optional(Type.String({ description: "Optional intro message" })),
139
+ }),
140
+ ]);
141
+
142
+ const voiceCallPlugin = {
143
+ id: "voice-call",
144
+ name: "Voice Call",
145
+ description: "Voice-call plugin with Telnyx/Twilio/Plivo providers",
146
+ configSchema: voiceCallConfigSchema,
147
+ register(api) {
148
+ const config = resolveVoiceCallConfig(
149
+ voiceCallConfigSchema.parse(api.pluginConfig),
150
+ );
151
+ const validation = validateProviderConfig(config);
152
+
153
+ if (api.pluginConfig && typeof api.pluginConfig === "object") {
154
+ const raw = api.pluginConfig as Record<string, unknown>;
155
+ const twilio = raw.twilio as Record<string, unknown> | undefined;
156
+ if (raw.provider === "log") {
157
+ api.logger.warn(
158
+ "[voice-call] provider \"log\" is deprecated; use \"mock\" instead",
159
+ );
160
+ }
161
+ if (typeof twilio?.from === "string") {
162
+ api.logger.warn(
163
+ "[voice-call] twilio.from is deprecated; use fromNumber instead",
164
+ );
165
+ }
166
+ }
167
+
168
+ let runtimePromise: Promise<VoiceCallRuntime> | null = null;
169
+ let runtime: VoiceCallRuntime | null = null;
170
+
171
+ const ensureRuntime = async () => {
172
+ if (!config.enabled) {
173
+ throw new Error("Voice call disabled in plugin config");
174
+ }
175
+ if (!validation.valid) {
176
+ throw new Error(validation.errors.join("; "));
177
+ }
178
+ if (runtime) return runtime;
179
+ if (!runtimePromise) {
180
+ runtimePromise = createVoiceCallRuntime({
181
+ config,
182
+ coreConfig: api.config as CoreConfig,
183
+ ttsRuntime: api.runtime.tts,
184
+ logger: api.logger,
185
+ });
186
+ }
187
+ runtime = await runtimePromise;
188
+ return runtime;
189
+ };
190
+
191
+ const sendError = (respond: (ok: boolean, payload?: unknown) => void, err: unknown) => {
192
+ respond(false, { error: err instanceof Error ? err.message : String(err) });
193
+ };
194
+
195
+ api.registerGatewayMethod("voicecall.initiate", async ({ params, respond }) => {
196
+ try {
197
+ const message =
198
+ typeof params?.message === "string" ? params.message.trim() : "";
199
+ if (!message) {
200
+ respond(false, { error: "message required" });
201
+ return;
202
+ }
203
+ const rt = await ensureRuntime();
204
+ const to =
205
+ typeof params?.to === "string" && params.to.trim()
206
+ ? params.to.trim()
207
+ : rt.config.toNumber;
208
+ if (!to) {
209
+ respond(false, { error: "to required" });
210
+ return;
211
+ }
212
+ const mode =
213
+ params?.mode === "notify" || params?.mode === "conversation"
214
+ ? params.mode
215
+ : undefined;
216
+ const result = await rt.manager.initiateCall(to, undefined, {
217
+ message,
218
+ mode,
219
+ });
220
+ if (!result.success) {
221
+ respond(false, { error: result.error || "initiate failed" });
222
+ return;
223
+ }
224
+ respond(true, { callId: result.callId, initiated: true });
225
+ } catch (err) {
226
+ sendError(respond, err);
227
+ }
228
+ });
229
+
230
+ api.registerGatewayMethod("voicecall.continue", async ({ params, respond }) => {
231
+ try {
232
+ const callId =
233
+ typeof params?.callId === "string" ? params.callId.trim() : "";
234
+ const message =
235
+ typeof params?.message === "string" ? params.message.trim() : "";
236
+ if (!callId || !message) {
237
+ respond(false, { error: "callId and message required" });
238
+ return;
239
+ }
240
+ const rt = await ensureRuntime();
241
+ const result = await rt.manager.continueCall(callId, message);
242
+ if (!result.success) {
243
+ respond(false, { error: result.error || "continue failed" });
244
+ return;
245
+ }
246
+ respond(true, { success: true, transcript: result.transcript });
247
+ } catch (err) {
248
+ sendError(respond, err);
249
+ }
250
+ });
251
+
252
+ api.registerGatewayMethod("voicecall.speak", async ({ params, respond }) => {
253
+ try {
254
+ const callId =
255
+ typeof params?.callId === "string" ? params.callId.trim() : "";
256
+ const message =
257
+ typeof params?.message === "string" ? params.message.trim() : "";
258
+ if (!callId || !message) {
259
+ respond(false, { error: "callId and message required" });
260
+ return;
261
+ }
262
+ const rt = await ensureRuntime();
263
+ const result = await rt.manager.speak(callId, message);
264
+ if (!result.success) {
265
+ respond(false, { error: result.error || "speak failed" });
266
+ return;
267
+ }
268
+ respond(true, { success: true });
269
+ } catch (err) {
270
+ sendError(respond, err);
271
+ }
272
+ });
273
+
274
+ api.registerGatewayMethod("voicecall.end", async ({ params, respond }) => {
275
+ try {
276
+ const callId =
277
+ typeof params?.callId === "string" ? params.callId.trim() : "";
278
+ if (!callId) {
279
+ respond(false, { error: "callId required" });
280
+ return;
281
+ }
282
+ const rt = await ensureRuntime();
283
+ const result = await rt.manager.endCall(callId);
284
+ if (!result.success) {
285
+ respond(false, { error: result.error || "end failed" });
286
+ return;
287
+ }
288
+ respond(true, { success: true });
289
+ } catch (err) {
290
+ sendError(respond, err);
291
+ }
292
+ });
293
+
294
+ api.registerGatewayMethod("voicecall.status", async ({ params, respond }) => {
295
+ try {
296
+ const raw =
297
+ typeof params?.callId === "string"
298
+ ? params.callId.trim()
299
+ : typeof params?.sid === "string"
300
+ ? params.sid.trim()
301
+ : "";
302
+ if (!raw) {
303
+ respond(false, { error: "callId required" });
304
+ return;
305
+ }
306
+ const rt = await ensureRuntime();
307
+ const call =
308
+ rt.manager.getCall(raw) || rt.manager.getCallByProviderCallId(raw);
309
+ if (!call) {
310
+ respond(true, { found: false });
311
+ return;
312
+ }
313
+ respond(true, { found: true, call });
314
+ } catch (err) {
315
+ sendError(respond, err);
316
+ }
317
+ });
318
+
319
+ api.registerGatewayMethod("voicecall.start", async ({ params, respond }) => {
320
+ try {
321
+ const to = typeof params?.to === "string" ? params.to.trim() : "";
322
+ const message =
323
+ typeof params?.message === "string" ? params.message.trim() : "";
324
+ if (!to) {
325
+ respond(false, { error: "to required" });
326
+ return;
327
+ }
328
+ const rt = await ensureRuntime();
329
+ const result = await rt.manager.initiateCall(to, undefined, {
330
+ message: message || undefined,
331
+ });
332
+ if (!result.success) {
333
+ respond(false, { error: result.error || "initiate failed" });
334
+ return;
335
+ }
336
+ respond(true, { callId: result.callId, initiated: true });
337
+ } catch (err) {
338
+ sendError(respond, err);
339
+ }
340
+ });
341
+
342
+ api.registerTool({
343
+ name: "voice_call",
344
+ label: "Voice Call",
345
+ description:
346
+ "Make phone calls and have voice conversations via the voice-call plugin.",
347
+ parameters: VoiceCallToolSchema,
348
+ async execute(_toolCallId, params) {
349
+ const json = (payload: unknown) => ({
350
+ content: [
351
+ { type: "text", text: JSON.stringify(payload, null, 2) },
352
+ ],
353
+ details: payload,
354
+ });
355
+
356
+ try {
357
+ const rt = await ensureRuntime();
358
+
359
+ if (typeof params?.action === "string") {
360
+ switch (params.action) {
361
+ case "initiate_call": {
362
+ const message = String(params.message || "").trim();
363
+ if (!message) throw new Error("message required");
364
+ const to =
365
+ typeof params.to === "string" && params.to.trim()
366
+ ? params.to.trim()
367
+ : rt.config.toNumber;
368
+ if (!to) throw new Error("to required");
369
+ const result = await rt.manager.initiateCall(to, undefined, {
370
+ message,
371
+ mode:
372
+ params.mode === "notify" || params.mode === "conversation"
373
+ ? params.mode
374
+ : undefined,
375
+ });
376
+ if (!result.success) {
377
+ throw new Error(result.error || "initiate failed");
378
+ }
379
+ return json({ callId: result.callId, initiated: true });
380
+ }
381
+ case "continue_call": {
382
+ const callId = String(params.callId || "").trim();
383
+ const message = String(params.message || "").trim();
384
+ if (!callId || !message) {
385
+ throw new Error("callId and message required");
386
+ }
387
+ const result = await rt.manager.continueCall(callId, message);
388
+ if (!result.success) {
389
+ throw new Error(result.error || "continue failed");
390
+ }
391
+ return json({ success: true, transcript: result.transcript });
392
+ }
393
+ case "speak_to_user": {
394
+ const callId = String(params.callId || "").trim();
395
+ const message = String(params.message || "").trim();
396
+ if (!callId || !message) {
397
+ throw new Error("callId and message required");
398
+ }
399
+ const result = await rt.manager.speak(callId, message);
400
+ if (!result.success) {
401
+ throw new Error(result.error || "speak failed");
402
+ }
403
+ return json({ success: true });
404
+ }
405
+ case "end_call": {
406
+ const callId = String(params.callId || "").trim();
407
+ if (!callId) throw new Error("callId required");
408
+ const result = await rt.manager.endCall(callId);
409
+ if (!result.success) {
410
+ throw new Error(result.error || "end failed");
411
+ }
412
+ return json({ success: true });
413
+ }
414
+ case "get_status": {
415
+ const callId = String(params.callId || "").trim();
416
+ if (!callId) throw new Error("callId required");
417
+ const call =
418
+ rt.manager.getCall(callId) ||
419
+ rt.manager.getCallByProviderCallId(callId);
420
+ return json(call ? { found: true, call } : { found: false });
421
+ }
422
+ }
423
+ }
424
+
425
+ const mode = params?.mode ?? "call";
426
+ if (mode === "status") {
427
+ const sid =
428
+ typeof params.sid === "string" ? params.sid.trim() : "";
429
+ if (!sid) throw new Error("sid required for status");
430
+ const call =
431
+ rt.manager.getCall(sid) || rt.manager.getCallByProviderCallId(sid);
432
+ return json(call ? { found: true, call } : { found: false });
433
+ }
434
+
435
+ const to =
436
+ typeof params.to === "string" && params.to.trim()
437
+ ? params.to.trim()
438
+ : rt.config.toNumber;
439
+ if (!to) throw new Error("to required for call");
440
+ const result = await rt.manager.initiateCall(to, undefined, {
441
+ message:
442
+ typeof params.message === "string" && params.message.trim()
443
+ ? params.message.trim()
444
+ : undefined,
445
+ });
446
+ if (!result.success) {
447
+ throw new Error(result.error || "initiate failed");
448
+ }
449
+ return json({ callId: result.callId, initiated: true });
450
+ } catch (err) {
451
+ return json({
452
+ error: err instanceof Error ? err.message : String(err),
453
+ });
454
+ }
455
+ },
456
+ });
457
+
458
+ api.registerCli(
459
+ ({ program }) =>
460
+ registerVoiceCallCli({
461
+ program,
462
+ config,
463
+ ensureRuntime,
464
+ logger: api.logger,
465
+ }),
466
+ { commands: ["voicecall"] },
467
+ );
468
+
469
+ api.registerService({
470
+ id: "voicecall",
471
+ start: async () => {
472
+ if (!config.enabled) return;
473
+ try {
474
+ await ensureRuntime();
475
+ } catch (err) {
476
+ api.logger.error(
477
+ `[voice-call] Failed to start runtime: ${
478
+ err instanceof Error ? err.message : String(err)
479
+ }`,
480
+ );
481
+ }
482
+ },
483
+ stop: async () => {
484
+ if (!runtimePromise) return;
485
+ try {
486
+ const rt = await runtimePromise;
487
+ await rt.stop();
488
+ } finally {
489
+ runtimePromise = null;
490
+ runtime = null;
491
+ }
492
+ },
493
+ });
494
+ },
495
+ };
496
+
497
+ export default voiceCallPlugin;