@openclaw/voice-call 2026.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +78 -0
- package/README.md +135 -0
- package/index.ts +497 -0
- package/openclaw.plugin.json +601 -0
- package/package.json +16 -0
- package/src/cli.ts +312 -0
- package/src/config.test.ts +204 -0
- package/src/config.ts +502 -0
- package/src/core-bridge.ts +198 -0
- package/src/manager/context.ts +21 -0
- package/src/manager/events.ts +177 -0
- package/src/manager/lookup.ts +33 -0
- package/src/manager/outbound.ts +248 -0
- package/src/manager/state.ts +50 -0
- package/src/manager/store.ts +88 -0
- package/src/manager/timers.ts +86 -0
- package/src/manager/twiml.ts +9 -0
- package/src/manager.test.ts +108 -0
- package/src/manager.ts +888 -0
- package/src/media-stream.test.ts +97 -0
- package/src/media-stream.ts +393 -0
- package/src/providers/base.ts +67 -0
- package/src/providers/index.ts +10 -0
- package/src/providers/mock.ts +168 -0
- package/src/providers/plivo.test.ts +28 -0
- package/src/providers/plivo.ts +504 -0
- package/src/providers/stt-openai-realtime.ts +311 -0
- package/src/providers/telnyx.ts +364 -0
- package/src/providers/tts-openai.ts +264 -0
- package/src/providers/twilio/api.ts +45 -0
- package/src/providers/twilio/webhook.ts +30 -0
- package/src/providers/twilio.test.ts +64 -0
- package/src/providers/twilio.ts +595 -0
- package/src/response-generator.ts +171 -0
- package/src/runtime.ts +217 -0
- package/src/telephony-audio.ts +88 -0
- package/src/telephony-tts.ts +95 -0
- package/src/tunnel.ts +331 -0
- package/src/types.ts +273 -0
- package/src/utils.ts +12 -0
- package/src/voice-mapping.ts +65 -0
- package/src/webhook-security.test.ts +260 -0
- package/src/webhook-security.ts +469 -0
- package/src/webhook.ts +491 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 2026.1.29
|
|
4
|
+
|
|
5
|
+
### Changes
|
|
6
|
+
- Version alignment with core OpenClaw release numbers.
|
|
7
|
+
|
|
8
|
+
## 2026.1.26
|
|
9
|
+
|
|
10
|
+
### Changes
|
|
11
|
+
- Breaking: voice-call TTS now uses core `messages.tts` (plugin TTS config deep‑merges with core).
|
|
12
|
+
- Telephony TTS supports OpenAI + ElevenLabs; Edge TTS is ignored for calls.
|
|
13
|
+
- Removed legacy `tts.model`/`tts.voice`/`tts.instructions` plugin fields.
|
|
14
|
+
- Ngrok free-tier bypass renamed to `tunnel.allowNgrokFreeTierLoopbackBypass` and gated to loopback + `tunnel.provider="ngrok"`.
|
|
15
|
+
|
|
16
|
+
## 2026.1.23
|
|
17
|
+
|
|
18
|
+
### Changes
|
|
19
|
+
- Version alignment with core OpenClaw release numbers.
|
|
20
|
+
|
|
21
|
+
## 2026.1.22
|
|
22
|
+
|
|
23
|
+
### Changes
|
|
24
|
+
- Version alignment with core OpenClaw release numbers.
|
|
25
|
+
|
|
26
|
+
## 2026.1.21
|
|
27
|
+
|
|
28
|
+
### Changes
|
|
29
|
+
- Version alignment with core OpenClaw release numbers.
|
|
30
|
+
|
|
31
|
+
## 2026.1.20
|
|
32
|
+
|
|
33
|
+
### Changes
|
|
34
|
+
- Version alignment with core OpenClaw release numbers.
|
|
35
|
+
|
|
36
|
+
## 2026.1.17-1
|
|
37
|
+
|
|
38
|
+
### Changes
|
|
39
|
+
- Version alignment with core OpenClaw release numbers.
|
|
40
|
+
|
|
41
|
+
## 2026.1.17
|
|
42
|
+
|
|
43
|
+
### Changes
|
|
44
|
+
- Version alignment with core OpenClaw release numbers.
|
|
45
|
+
|
|
46
|
+
## 2026.1.16
|
|
47
|
+
|
|
48
|
+
### Changes
|
|
49
|
+
- Version alignment with core OpenClaw release numbers.
|
|
50
|
+
|
|
51
|
+
## 2026.1.15
|
|
52
|
+
|
|
53
|
+
### Changes
|
|
54
|
+
- Version alignment with core OpenClaw release numbers.
|
|
55
|
+
|
|
56
|
+
## 2026.1.14
|
|
57
|
+
|
|
58
|
+
### Changes
|
|
59
|
+
- Version alignment with core OpenClaw release numbers.
|
|
60
|
+
|
|
61
|
+
## 0.1.0
|
|
62
|
+
|
|
63
|
+
### Highlights
|
|
64
|
+
- First public release of the @openclaw/voice-call plugin.
|
|
65
|
+
|
|
66
|
+
### Features
|
|
67
|
+
- Providers: Twilio (Programmable Voice + Media Streams), Telnyx (Call Control v2), and mock provider for local dev.
|
|
68
|
+
- Call flows: outbound notify vs. conversation modes, configurable auto‑hangup, and multi‑turn continuation.
|
|
69
|
+
- Inbound handling: policy controls (disabled/allowlist/open), allowlist matching, and inbound greeting.
|
|
70
|
+
- Webhooks: built‑in server with configurable bind/port/path plus `publicUrl` override.
|
|
71
|
+
- Exposure helpers: ngrok + Tailscale serve/funnel; dev‑only signature bypass for ngrok free tier.
|
|
72
|
+
- Streaming: OpenAI Realtime STT over media WebSocket with partial + final transcripts.
|
|
73
|
+
- Speech: OpenAI TTS (model/voice/instructions) with Twilio `<Say>` fallback.
|
|
74
|
+
- Tooling: `voice_call` tool actions for initiate/continue/speak/end/status.
|
|
75
|
+
- Gateway RPC: `voicecall.initiate|continue|speak|end|status` (+ legacy `voicecall.start`).
|
|
76
|
+
- CLI: `openclaw voicecall` commands (call/start/continue/speak/end/status/tail/expose).
|
|
77
|
+
- Observability: JSONL call logs and `voicecall tail` for live inspection.
|
|
78
|
+
- Response controls: `responseModel`, `responseSystemPrompt`, and `responseTimeoutMs` for auto‑responses.
|
package/README.md
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# @openclaw/voice-call
|
|
2
|
+
|
|
3
|
+
Official Voice Call plugin for **OpenClaw**.
|
|
4
|
+
|
|
5
|
+
Providers:
|
|
6
|
+
- **Twilio** (Programmable Voice + Media Streams)
|
|
7
|
+
- **Telnyx** (Call Control v2)
|
|
8
|
+
- **Plivo** (Voice API + XML transfer + GetInput speech)
|
|
9
|
+
- **Mock** (dev/no network)
|
|
10
|
+
|
|
11
|
+
Docs: `https://docs.openclaw.ai/plugins/voice-call`
|
|
12
|
+
Plugin system: `https://docs.openclaw.ai/plugin`
|
|
13
|
+
|
|
14
|
+
## Install (local dev)
|
|
15
|
+
|
|
16
|
+
### Option A: install via OpenClaw (recommended)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
openclaw plugins install @openclaw/voice-call
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Restart the Gateway afterwards.
|
|
23
|
+
|
|
24
|
+
### Option B: copy into your global extensions folder (dev)
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
mkdir -p ~/.openclaw/extensions
|
|
28
|
+
cp -R extensions/voice-call ~/.openclaw/extensions/voice-call
|
|
29
|
+
cd ~/.openclaw/extensions/voice-call && pnpm install
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Config
|
|
33
|
+
|
|
34
|
+
Put under `plugins.entries.voice-call.config`:
|
|
35
|
+
|
|
36
|
+
```json5
|
|
37
|
+
{
|
|
38
|
+
provider: "twilio", // or "telnyx" | "plivo" | "mock"
|
|
39
|
+
fromNumber: "+15550001234",
|
|
40
|
+
toNumber: "+15550005678",
|
|
41
|
+
|
|
42
|
+
twilio: {
|
|
43
|
+
accountSid: "ACxxxxxxxx",
|
|
44
|
+
authToken: "your_token"
|
|
45
|
+
},
|
|
46
|
+
|
|
47
|
+
plivo: {
|
|
48
|
+
authId: "MAxxxxxxxxxxxxxxxxxxxx",
|
|
49
|
+
authToken: "your_token"
|
|
50
|
+
},
|
|
51
|
+
|
|
52
|
+
// Webhook server
|
|
53
|
+
serve: {
|
|
54
|
+
port: 3334,
|
|
55
|
+
path: "/voice/webhook"
|
|
56
|
+
},
|
|
57
|
+
|
|
58
|
+
// Public exposure (pick one):
|
|
59
|
+
// publicUrl: "https://example.ngrok.app/voice/webhook",
|
|
60
|
+
// tunnel: { provider: "ngrok" },
|
|
61
|
+
// tailscale: { mode: "funnel", path: "/voice/webhook" }
|
|
62
|
+
|
|
63
|
+
outbound: {
|
|
64
|
+
defaultMode: "notify" // or "conversation"
|
|
65
|
+
},
|
|
66
|
+
|
|
67
|
+
streaming: {
|
|
68
|
+
enabled: true,
|
|
69
|
+
streamPath: "/voice/stream"
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Notes:
|
|
75
|
+
- Twilio/Telnyx/Plivo require a **publicly reachable** webhook URL.
|
|
76
|
+
- `mock` is a local dev provider (no network calls).
|
|
77
|
+
- `tunnel.allowNgrokFreeTierLoopbackBypass: true` allows Twilio webhooks with invalid signatures **only** when `tunnel.provider="ngrok"` and `serve.bind` is loopback (ngrok local agent). Use for local dev only.
|
|
78
|
+
|
|
79
|
+
## TTS for calls
|
|
80
|
+
|
|
81
|
+
Voice Call uses the core `messages.tts` configuration (OpenAI or ElevenLabs) for
|
|
82
|
+
streaming speech on calls. You can override it under the plugin config with the
|
|
83
|
+
same shape — overrides deep-merge with `messages.tts`.
|
|
84
|
+
|
|
85
|
+
```json5
|
|
86
|
+
{
|
|
87
|
+
tts: {
|
|
88
|
+
provider: "openai",
|
|
89
|
+
openai: {
|
|
90
|
+
voice: "alloy"
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Notes:
|
|
97
|
+
- Edge TTS is ignored for voice calls (telephony audio needs PCM; Edge output is unreliable).
|
|
98
|
+
- Core TTS is used when Twilio media streaming is enabled; otherwise calls fall back to provider native voices.
|
|
99
|
+
|
|
100
|
+
## CLI
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
openclaw voicecall call --to "+15555550123" --message "Hello from OpenClaw"
|
|
104
|
+
openclaw voicecall continue --call-id <id> --message "Any questions?"
|
|
105
|
+
openclaw voicecall speak --call-id <id> --message "One moment"
|
|
106
|
+
openclaw voicecall end --call-id <id>
|
|
107
|
+
openclaw voicecall status --call-id <id>
|
|
108
|
+
openclaw voicecall tail
|
|
109
|
+
openclaw voicecall expose --mode funnel
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Tool
|
|
113
|
+
|
|
114
|
+
Tool name: `voice_call`
|
|
115
|
+
|
|
116
|
+
Actions:
|
|
117
|
+
- `initiate_call` (message, to?, mode?)
|
|
118
|
+
- `continue_call` (callId, message)
|
|
119
|
+
- `speak_to_user` (callId, message)
|
|
120
|
+
- `end_call` (callId)
|
|
121
|
+
- `get_status` (callId)
|
|
122
|
+
|
|
123
|
+
## Gateway RPC
|
|
124
|
+
|
|
125
|
+
- `voicecall.initiate` (to?, message, mode?)
|
|
126
|
+
- `voicecall.continue` (callId, message)
|
|
127
|
+
- `voicecall.speak` (callId, message)
|
|
128
|
+
- `voicecall.end` (callId)
|
|
129
|
+
- `voicecall.status` (callId)
|
|
130
|
+
|
|
131
|
+
## Notes
|
|
132
|
+
|
|
133
|
+
- Uses webhook signature verification for Twilio/Telnyx/Plivo.
|
|
134
|
+
- `responseModel` / `responseSystemPrompt` control AI auto-responses.
|
|
135
|
+
- Media streaming requires `ws` and OpenAI Realtime API key.
|
package/index.ts
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
import { Type } from "@sinclair/typebox";
|
|
2
|
+
import type { CoreConfig } from "./src/core-bridge.js";
|
|
3
|
+
import {
|
|
4
|
+
VoiceCallConfigSchema,
|
|
5
|
+
resolveVoiceCallConfig,
|
|
6
|
+
validateProviderConfig,
|
|
7
|
+
type VoiceCallConfig,
|
|
8
|
+
} from "./src/config.js";
|
|
9
|
+
import { registerVoiceCallCli } from "./src/cli.js";
|
|
10
|
+
import { createVoiceCallRuntime, type VoiceCallRuntime } from "./src/runtime.js";
|
|
11
|
+
|
|
12
|
+
const voiceCallConfigSchema = {
|
|
13
|
+
parse(value: unknown): VoiceCallConfig {
|
|
14
|
+
const raw =
|
|
15
|
+
value && typeof value === "object" && !Array.isArray(value)
|
|
16
|
+
? (value as Record<string, unknown>)
|
|
17
|
+
: {};
|
|
18
|
+
|
|
19
|
+
const twilio = raw.twilio as Record<string, unknown> | undefined;
|
|
20
|
+
const legacyFrom = typeof twilio?.from === "string" ? twilio.from : undefined;
|
|
21
|
+
|
|
22
|
+
const enabled = typeof raw.enabled === "boolean" ? raw.enabled : true;
|
|
23
|
+
const providerRaw = raw.provider === "log" ? "mock" : raw.provider;
|
|
24
|
+
const provider = providerRaw ?? (enabled ? "mock" : undefined);
|
|
25
|
+
|
|
26
|
+
return VoiceCallConfigSchema.parse({
|
|
27
|
+
...raw,
|
|
28
|
+
enabled,
|
|
29
|
+
provider,
|
|
30
|
+
fromNumber: raw.fromNumber ?? legacyFrom,
|
|
31
|
+
});
|
|
32
|
+
},
|
|
33
|
+
uiHints: {
|
|
34
|
+
provider: {
|
|
35
|
+
label: "Provider",
|
|
36
|
+
help: "Use twilio, telnyx, or mock for dev/no-network.",
|
|
37
|
+
},
|
|
38
|
+
fromNumber: { label: "From Number", placeholder: "+15550001234" },
|
|
39
|
+
toNumber: { label: "Default To Number", placeholder: "+15550001234" },
|
|
40
|
+
inboundPolicy: { label: "Inbound Policy" },
|
|
41
|
+
allowFrom: { label: "Inbound Allowlist" },
|
|
42
|
+
inboundGreeting: { label: "Inbound Greeting", advanced: true },
|
|
43
|
+
"telnyx.apiKey": { label: "Telnyx API Key", sensitive: true },
|
|
44
|
+
"telnyx.connectionId": { label: "Telnyx Connection ID" },
|
|
45
|
+
"telnyx.publicKey": { label: "Telnyx Public Key", sensitive: true },
|
|
46
|
+
"twilio.accountSid": { label: "Twilio Account SID" },
|
|
47
|
+
"twilio.authToken": { label: "Twilio Auth Token", sensitive: true },
|
|
48
|
+
"outbound.defaultMode": { label: "Default Call Mode" },
|
|
49
|
+
"outbound.notifyHangupDelaySec": {
|
|
50
|
+
label: "Notify Hangup Delay (sec)",
|
|
51
|
+
advanced: true,
|
|
52
|
+
},
|
|
53
|
+
"serve.port": { label: "Webhook Port" },
|
|
54
|
+
"serve.bind": { label: "Webhook Bind" },
|
|
55
|
+
"serve.path": { label: "Webhook Path" },
|
|
56
|
+
"tailscale.mode": { label: "Tailscale Mode", advanced: true },
|
|
57
|
+
"tailscale.path": { label: "Tailscale Path", advanced: true },
|
|
58
|
+
"tunnel.provider": { label: "Tunnel Provider", advanced: true },
|
|
59
|
+
"tunnel.ngrokAuthToken": {
|
|
60
|
+
label: "ngrok Auth Token",
|
|
61
|
+
sensitive: true,
|
|
62
|
+
advanced: true,
|
|
63
|
+
},
|
|
64
|
+
"tunnel.ngrokDomain": { label: "ngrok Domain", advanced: true },
|
|
65
|
+
"tunnel.allowNgrokFreeTierLoopbackBypass": {
|
|
66
|
+
label: "Allow ngrok Free Tier (Loopback Bypass)",
|
|
67
|
+
advanced: true,
|
|
68
|
+
},
|
|
69
|
+
"streaming.enabled": { label: "Enable Streaming", advanced: true },
|
|
70
|
+
"streaming.openaiApiKey": {
|
|
71
|
+
label: "OpenAI Realtime API Key",
|
|
72
|
+
sensitive: true,
|
|
73
|
+
advanced: true,
|
|
74
|
+
},
|
|
75
|
+
"streaming.sttModel": { label: "Realtime STT Model", advanced: true },
|
|
76
|
+
"streaming.streamPath": { label: "Media Stream Path", advanced: true },
|
|
77
|
+
"tts.provider": {
|
|
78
|
+
label: "TTS Provider Override",
|
|
79
|
+
help: "Deep-merges with messages.tts (Edge is ignored for calls).",
|
|
80
|
+
advanced: true,
|
|
81
|
+
},
|
|
82
|
+
"tts.openai.model": { label: "OpenAI TTS Model", advanced: true },
|
|
83
|
+
"tts.openai.voice": { label: "OpenAI TTS Voice", advanced: true },
|
|
84
|
+
"tts.openai.apiKey": {
|
|
85
|
+
label: "OpenAI API Key",
|
|
86
|
+
sensitive: true,
|
|
87
|
+
advanced: true,
|
|
88
|
+
},
|
|
89
|
+
"tts.elevenlabs.modelId": { label: "ElevenLabs Model ID", advanced: true },
|
|
90
|
+
"tts.elevenlabs.voiceId": { label: "ElevenLabs Voice ID", advanced: true },
|
|
91
|
+
"tts.elevenlabs.apiKey": {
|
|
92
|
+
label: "ElevenLabs API Key",
|
|
93
|
+
sensitive: true,
|
|
94
|
+
advanced: true,
|
|
95
|
+
},
|
|
96
|
+
"tts.elevenlabs.baseUrl": { label: "ElevenLabs Base URL", advanced: true },
|
|
97
|
+
publicUrl: { label: "Public Webhook URL", advanced: true },
|
|
98
|
+
skipSignatureVerification: {
|
|
99
|
+
label: "Skip Signature Verification",
|
|
100
|
+
advanced: true,
|
|
101
|
+
},
|
|
102
|
+
store: { label: "Call Log Store Path", advanced: true },
|
|
103
|
+
responseModel: { label: "Response Model", advanced: true },
|
|
104
|
+
responseSystemPrompt: { label: "Response System Prompt", advanced: true },
|
|
105
|
+
responseTimeoutMs: { label: "Response Timeout (ms)", advanced: true },
|
|
106
|
+
},
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const VoiceCallToolSchema = Type.Union([
|
|
110
|
+
Type.Object({
|
|
111
|
+
action: Type.Literal("initiate_call"),
|
|
112
|
+
to: Type.Optional(Type.String({ description: "Call target" })),
|
|
113
|
+
message: Type.String({ description: "Intro message" }),
|
|
114
|
+
mode: Type.Optional(Type.Union([Type.Literal("notify"), Type.Literal("conversation")])),
|
|
115
|
+
}),
|
|
116
|
+
Type.Object({
|
|
117
|
+
action: Type.Literal("continue_call"),
|
|
118
|
+
callId: Type.String({ description: "Call ID" }),
|
|
119
|
+
message: Type.String({ description: "Follow-up message" }),
|
|
120
|
+
}),
|
|
121
|
+
Type.Object({
|
|
122
|
+
action: Type.Literal("speak_to_user"),
|
|
123
|
+
callId: Type.String({ description: "Call ID" }),
|
|
124
|
+
message: Type.String({ description: "Message to speak" }),
|
|
125
|
+
}),
|
|
126
|
+
Type.Object({
|
|
127
|
+
action: Type.Literal("end_call"),
|
|
128
|
+
callId: Type.String({ description: "Call ID" }),
|
|
129
|
+
}),
|
|
130
|
+
Type.Object({
|
|
131
|
+
action: Type.Literal("get_status"),
|
|
132
|
+
callId: Type.String({ description: "Call ID" }),
|
|
133
|
+
}),
|
|
134
|
+
Type.Object({
|
|
135
|
+
mode: Type.Optional(Type.Union([Type.Literal("call"), Type.Literal("status")])),
|
|
136
|
+
to: Type.Optional(Type.String({ description: "Call target" })),
|
|
137
|
+
sid: Type.Optional(Type.String({ description: "Call SID" })),
|
|
138
|
+
message: Type.Optional(Type.String({ description: "Optional intro message" })),
|
|
139
|
+
}),
|
|
140
|
+
]);
|
|
141
|
+
|
|
142
|
+
const voiceCallPlugin = {
|
|
143
|
+
id: "voice-call",
|
|
144
|
+
name: "Voice Call",
|
|
145
|
+
description: "Voice-call plugin with Telnyx/Twilio/Plivo providers",
|
|
146
|
+
configSchema: voiceCallConfigSchema,
|
|
147
|
+
register(api) {
|
|
148
|
+
const config = resolveVoiceCallConfig(
|
|
149
|
+
voiceCallConfigSchema.parse(api.pluginConfig),
|
|
150
|
+
);
|
|
151
|
+
const validation = validateProviderConfig(config);
|
|
152
|
+
|
|
153
|
+
if (api.pluginConfig && typeof api.pluginConfig === "object") {
|
|
154
|
+
const raw = api.pluginConfig as Record<string, unknown>;
|
|
155
|
+
const twilio = raw.twilio as Record<string, unknown> | undefined;
|
|
156
|
+
if (raw.provider === "log") {
|
|
157
|
+
api.logger.warn(
|
|
158
|
+
"[voice-call] provider \"log\" is deprecated; use \"mock\" instead",
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
if (typeof twilio?.from === "string") {
|
|
162
|
+
api.logger.warn(
|
|
163
|
+
"[voice-call] twilio.from is deprecated; use fromNumber instead",
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
let runtimePromise: Promise<VoiceCallRuntime> | null = null;
|
|
169
|
+
let runtime: VoiceCallRuntime | null = null;
|
|
170
|
+
|
|
171
|
+
const ensureRuntime = async () => {
|
|
172
|
+
if (!config.enabled) {
|
|
173
|
+
throw new Error("Voice call disabled in plugin config");
|
|
174
|
+
}
|
|
175
|
+
if (!validation.valid) {
|
|
176
|
+
throw new Error(validation.errors.join("; "));
|
|
177
|
+
}
|
|
178
|
+
if (runtime) return runtime;
|
|
179
|
+
if (!runtimePromise) {
|
|
180
|
+
runtimePromise = createVoiceCallRuntime({
|
|
181
|
+
config,
|
|
182
|
+
coreConfig: api.config as CoreConfig,
|
|
183
|
+
ttsRuntime: api.runtime.tts,
|
|
184
|
+
logger: api.logger,
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
runtime = await runtimePromise;
|
|
188
|
+
return runtime;
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
const sendError = (respond: (ok: boolean, payload?: unknown) => void, err: unknown) => {
|
|
192
|
+
respond(false, { error: err instanceof Error ? err.message : String(err) });
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
api.registerGatewayMethod("voicecall.initiate", async ({ params, respond }) => {
|
|
196
|
+
try {
|
|
197
|
+
const message =
|
|
198
|
+
typeof params?.message === "string" ? params.message.trim() : "";
|
|
199
|
+
if (!message) {
|
|
200
|
+
respond(false, { error: "message required" });
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
const rt = await ensureRuntime();
|
|
204
|
+
const to =
|
|
205
|
+
typeof params?.to === "string" && params.to.trim()
|
|
206
|
+
? params.to.trim()
|
|
207
|
+
: rt.config.toNumber;
|
|
208
|
+
if (!to) {
|
|
209
|
+
respond(false, { error: "to required" });
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
const mode =
|
|
213
|
+
params?.mode === "notify" || params?.mode === "conversation"
|
|
214
|
+
? params.mode
|
|
215
|
+
: undefined;
|
|
216
|
+
const result = await rt.manager.initiateCall(to, undefined, {
|
|
217
|
+
message,
|
|
218
|
+
mode,
|
|
219
|
+
});
|
|
220
|
+
if (!result.success) {
|
|
221
|
+
respond(false, { error: result.error || "initiate failed" });
|
|
222
|
+
return;
|
|
223
|
+
}
|
|
224
|
+
respond(true, { callId: result.callId, initiated: true });
|
|
225
|
+
} catch (err) {
|
|
226
|
+
sendError(respond, err);
|
|
227
|
+
}
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
api.registerGatewayMethod("voicecall.continue", async ({ params, respond }) => {
|
|
231
|
+
try {
|
|
232
|
+
const callId =
|
|
233
|
+
typeof params?.callId === "string" ? params.callId.trim() : "";
|
|
234
|
+
const message =
|
|
235
|
+
typeof params?.message === "string" ? params.message.trim() : "";
|
|
236
|
+
if (!callId || !message) {
|
|
237
|
+
respond(false, { error: "callId and message required" });
|
|
238
|
+
return;
|
|
239
|
+
}
|
|
240
|
+
const rt = await ensureRuntime();
|
|
241
|
+
const result = await rt.manager.continueCall(callId, message);
|
|
242
|
+
if (!result.success) {
|
|
243
|
+
respond(false, { error: result.error || "continue failed" });
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
246
|
+
respond(true, { success: true, transcript: result.transcript });
|
|
247
|
+
} catch (err) {
|
|
248
|
+
sendError(respond, err);
|
|
249
|
+
}
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
api.registerGatewayMethod("voicecall.speak", async ({ params, respond }) => {
|
|
253
|
+
try {
|
|
254
|
+
const callId =
|
|
255
|
+
typeof params?.callId === "string" ? params.callId.trim() : "";
|
|
256
|
+
const message =
|
|
257
|
+
typeof params?.message === "string" ? params.message.trim() : "";
|
|
258
|
+
if (!callId || !message) {
|
|
259
|
+
respond(false, { error: "callId and message required" });
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
const rt = await ensureRuntime();
|
|
263
|
+
const result = await rt.manager.speak(callId, message);
|
|
264
|
+
if (!result.success) {
|
|
265
|
+
respond(false, { error: result.error || "speak failed" });
|
|
266
|
+
return;
|
|
267
|
+
}
|
|
268
|
+
respond(true, { success: true });
|
|
269
|
+
} catch (err) {
|
|
270
|
+
sendError(respond, err);
|
|
271
|
+
}
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
api.registerGatewayMethod("voicecall.end", async ({ params, respond }) => {
|
|
275
|
+
try {
|
|
276
|
+
const callId =
|
|
277
|
+
typeof params?.callId === "string" ? params.callId.trim() : "";
|
|
278
|
+
if (!callId) {
|
|
279
|
+
respond(false, { error: "callId required" });
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
const rt = await ensureRuntime();
|
|
283
|
+
const result = await rt.manager.endCall(callId);
|
|
284
|
+
if (!result.success) {
|
|
285
|
+
respond(false, { error: result.error || "end failed" });
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
respond(true, { success: true });
|
|
289
|
+
} catch (err) {
|
|
290
|
+
sendError(respond, err);
|
|
291
|
+
}
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
api.registerGatewayMethod("voicecall.status", async ({ params, respond }) => {
|
|
295
|
+
try {
|
|
296
|
+
const raw =
|
|
297
|
+
typeof params?.callId === "string"
|
|
298
|
+
? params.callId.trim()
|
|
299
|
+
: typeof params?.sid === "string"
|
|
300
|
+
? params.sid.trim()
|
|
301
|
+
: "";
|
|
302
|
+
if (!raw) {
|
|
303
|
+
respond(false, { error: "callId required" });
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
const rt = await ensureRuntime();
|
|
307
|
+
const call =
|
|
308
|
+
rt.manager.getCall(raw) || rt.manager.getCallByProviderCallId(raw);
|
|
309
|
+
if (!call) {
|
|
310
|
+
respond(true, { found: false });
|
|
311
|
+
return;
|
|
312
|
+
}
|
|
313
|
+
respond(true, { found: true, call });
|
|
314
|
+
} catch (err) {
|
|
315
|
+
sendError(respond, err);
|
|
316
|
+
}
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
api.registerGatewayMethod("voicecall.start", async ({ params, respond }) => {
|
|
320
|
+
try {
|
|
321
|
+
const to = typeof params?.to === "string" ? params.to.trim() : "";
|
|
322
|
+
const message =
|
|
323
|
+
typeof params?.message === "string" ? params.message.trim() : "";
|
|
324
|
+
if (!to) {
|
|
325
|
+
respond(false, { error: "to required" });
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
const rt = await ensureRuntime();
|
|
329
|
+
const result = await rt.manager.initiateCall(to, undefined, {
|
|
330
|
+
message: message || undefined,
|
|
331
|
+
});
|
|
332
|
+
if (!result.success) {
|
|
333
|
+
respond(false, { error: result.error || "initiate failed" });
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
respond(true, { callId: result.callId, initiated: true });
|
|
337
|
+
} catch (err) {
|
|
338
|
+
sendError(respond, err);
|
|
339
|
+
}
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
api.registerTool({
|
|
343
|
+
name: "voice_call",
|
|
344
|
+
label: "Voice Call",
|
|
345
|
+
description:
|
|
346
|
+
"Make phone calls and have voice conversations via the voice-call plugin.",
|
|
347
|
+
parameters: VoiceCallToolSchema,
|
|
348
|
+
async execute(_toolCallId, params) {
|
|
349
|
+
const json = (payload: unknown) => ({
|
|
350
|
+
content: [
|
|
351
|
+
{ type: "text", text: JSON.stringify(payload, null, 2) },
|
|
352
|
+
],
|
|
353
|
+
details: payload,
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
try {
|
|
357
|
+
const rt = await ensureRuntime();
|
|
358
|
+
|
|
359
|
+
if (typeof params?.action === "string") {
|
|
360
|
+
switch (params.action) {
|
|
361
|
+
case "initiate_call": {
|
|
362
|
+
const message = String(params.message || "").trim();
|
|
363
|
+
if (!message) throw new Error("message required");
|
|
364
|
+
const to =
|
|
365
|
+
typeof params.to === "string" && params.to.trim()
|
|
366
|
+
? params.to.trim()
|
|
367
|
+
: rt.config.toNumber;
|
|
368
|
+
if (!to) throw new Error("to required");
|
|
369
|
+
const result = await rt.manager.initiateCall(to, undefined, {
|
|
370
|
+
message,
|
|
371
|
+
mode:
|
|
372
|
+
params.mode === "notify" || params.mode === "conversation"
|
|
373
|
+
? params.mode
|
|
374
|
+
: undefined,
|
|
375
|
+
});
|
|
376
|
+
if (!result.success) {
|
|
377
|
+
throw new Error(result.error || "initiate failed");
|
|
378
|
+
}
|
|
379
|
+
return json({ callId: result.callId, initiated: true });
|
|
380
|
+
}
|
|
381
|
+
case "continue_call": {
|
|
382
|
+
const callId = String(params.callId || "").trim();
|
|
383
|
+
const message = String(params.message || "").trim();
|
|
384
|
+
if (!callId || !message) {
|
|
385
|
+
throw new Error("callId and message required");
|
|
386
|
+
}
|
|
387
|
+
const result = await rt.manager.continueCall(callId, message);
|
|
388
|
+
if (!result.success) {
|
|
389
|
+
throw new Error(result.error || "continue failed");
|
|
390
|
+
}
|
|
391
|
+
return json({ success: true, transcript: result.transcript });
|
|
392
|
+
}
|
|
393
|
+
case "speak_to_user": {
|
|
394
|
+
const callId = String(params.callId || "").trim();
|
|
395
|
+
const message = String(params.message || "").trim();
|
|
396
|
+
if (!callId || !message) {
|
|
397
|
+
throw new Error("callId and message required");
|
|
398
|
+
}
|
|
399
|
+
const result = await rt.manager.speak(callId, message);
|
|
400
|
+
if (!result.success) {
|
|
401
|
+
throw new Error(result.error || "speak failed");
|
|
402
|
+
}
|
|
403
|
+
return json({ success: true });
|
|
404
|
+
}
|
|
405
|
+
case "end_call": {
|
|
406
|
+
const callId = String(params.callId || "").trim();
|
|
407
|
+
if (!callId) throw new Error("callId required");
|
|
408
|
+
const result = await rt.manager.endCall(callId);
|
|
409
|
+
if (!result.success) {
|
|
410
|
+
throw new Error(result.error || "end failed");
|
|
411
|
+
}
|
|
412
|
+
return json({ success: true });
|
|
413
|
+
}
|
|
414
|
+
case "get_status": {
|
|
415
|
+
const callId = String(params.callId || "").trim();
|
|
416
|
+
if (!callId) throw new Error("callId required");
|
|
417
|
+
const call =
|
|
418
|
+
rt.manager.getCall(callId) ||
|
|
419
|
+
rt.manager.getCallByProviderCallId(callId);
|
|
420
|
+
return json(call ? { found: true, call } : { found: false });
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
const mode = params?.mode ?? "call";
|
|
426
|
+
if (mode === "status") {
|
|
427
|
+
const sid =
|
|
428
|
+
typeof params.sid === "string" ? params.sid.trim() : "";
|
|
429
|
+
if (!sid) throw new Error("sid required for status");
|
|
430
|
+
const call =
|
|
431
|
+
rt.manager.getCall(sid) || rt.manager.getCallByProviderCallId(sid);
|
|
432
|
+
return json(call ? { found: true, call } : { found: false });
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
const to =
|
|
436
|
+
typeof params.to === "string" && params.to.trim()
|
|
437
|
+
? params.to.trim()
|
|
438
|
+
: rt.config.toNumber;
|
|
439
|
+
if (!to) throw new Error("to required for call");
|
|
440
|
+
const result = await rt.manager.initiateCall(to, undefined, {
|
|
441
|
+
message:
|
|
442
|
+
typeof params.message === "string" && params.message.trim()
|
|
443
|
+
? params.message.trim()
|
|
444
|
+
: undefined,
|
|
445
|
+
});
|
|
446
|
+
if (!result.success) {
|
|
447
|
+
throw new Error(result.error || "initiate failed");
|
|
448
|
+
}
|
|
449
|
+
return json({ callId: result.callId, initiated: true });
|
|
450
|
+
} catch (err) {
|
|
451
|
+
return json({
|
|
452
|
+
error: err instanceof Error ? err.message : String(err),
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
},
|
|
456
|
+
});
|
|
457
|
+
|
|
458
|
+
api.registerCli(
|
|
459
|
+
({ program }) =>
|
|
460
|
+
registerVoiceCallCli({
|
|
461
|
+
program,
|
|
462
|
+
config,
|
|
463
|
+
ensureRuntime,
|
|
464
|
+
logger: api.logger,
|
|
465
|
+
}),
|
|
466
|
+
{ commands: ["voicecall"] },
|
|
467
|
+
);
|
|
468
|
+
|
|
469
|
+
api.registerService({
|
|
470
|
+
id: "voicecall",
|
|
471
|
+
start: async () => {
|
|
472
|
+
if (!config.enabled) return;
|
|
473
|
+
try {
|
|
474
|
+
await ensureRuntime();
|
|
475
|
+
} catch (err) {
|
|
476
|
+
api.logger.error(
|
|
477
|
+
`[voice-call] Failed to start runtime: ${
|
|
478
|
+
err instanceof Error ? err.message : String(err)
|
|
479
|
+
}`,
|
|
480
|
+
);
|
|
481
|
+
}
|
|
482
|
+
},
|
|
483
|
+
stop: async () => {
|
|
484
|
+
if (!runtimePromise) return;
|
|
485
|
+
try {
|
|
486
|
+
const rt = await runtimePromise;
|
|
487
|
+
await rt.stop();
|
|
488
|
+
} finally {
|
|
489
|
+
runtimePromise = null;
|
|
490
|
+
runtime = null;
|
|
491
|
+
}
|
|
492
|
+
},
|
|
493
|
+
});
|
|
494
|
+
},
|
|
495
|
+
};
|
|
496
|
+
|
|
497
|
+
export default voiceCallPlugin;
|