@clawvoice/voice-assistant 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +125 -0
- package/CHANGELOG.md +112 -0
- package/LICENSE +21 -0
- package/README.md +215 -0
- package/dist/cli.d.ts +10 -0
- package/dist/cli.js +272 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.js +182 -0
- package/dist/diagnostics/health.d.ts +14 -0
- package/dist/diagnostics/health.js +182 -0
- package/dist/hooks.d.ts +16 -0
- package/dist/hooks.js +113 -0
- package/dist/inbound/classifier.d.ts +5 -0
- package/dist/inbound/classifier.js +72 -0
- package/dist/inbound/types.d.ts +30 -0
- package/dist/inbound/types.js +2 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +52 -0
- package/dist/routes.d.ts +6 -0
- package/dist/routes.js +89 -0
- package/dist/services/memory-extraction.d.ts +42 -0
- package/dist/services/memory-extraction.js +117 -0
- package/dist/services/post-call.d.ts +56 -0
- package/dist/services/post-call.js +112 -0
- package/dist/services/relay.d.ts +9 -0
- package/dist/services/relay.js +19 -0
- package/dist/services/voice-call.d.ts +61 -0
- package/dist/services/voice-call.js +189 -0
- package/dist/telephony/telnyx.d.ts +12 -0
- package/dist/telephony/telnyx.js +60 -0
- package/dist/telephony/twilio.d.ts +12 -0
- package/dist/telephony/twilio.js +63 -0
- package/dist/telephony/types.d.ts +15 -0
- package/dist/telephony/types.js +2 -0
- package/dist/telephony/util.d.ts +2 -0
- package/dist/telephony/util.js +25 -0
- package/dist/tools.d.ts +5 -0
- package/dist/tools.js +167 -0
- package/dist/voice/bridge.d.ts +47 -0
- package/dist/voice/bridge.js +411 -0
- package/dist/voice/types.d.ts +168 -0
- package/dist/voice/types.js +42 -0
- package/dist/webhooks/verify.d.ts +30 -0
- package/dist/webhooks/verify.js +95 -0
- package/docs/FEATURES.md +36 -0
- package/docs/OPENCLAW_PLUGIN_GUIDE.md +1202 -0
- package/docs/SETUP.md +303 -0
- package/openclaw.plugin.json +137 -0
- package/package.json +37 -0
- package/skills/voice-assistant/SKILL.md +15 -0
|
@@ -0,0 +1,1202 @@
|
|
|
1
|
+
# ClawVoice - OpenClaw Plugin Implementation Guide
|
|
2
|
+
|
|
3
|
+
Technical reference for implementing ClawVoice as an OpenClaw plugin. This document covers the plugin SDK, registration patterns, and how each ClawVoice component maps to OpenClaw's extension points.
|
|
4
|
+
|
|
5
|
+
## OpenClaw Plugin System Overview
|
|
6
|
+
|
|
7
|
+
OpenClaw plugins are TypeScript modules loaded at runtime via [jiti](https://github.com/unjs/jiti). A plugin can register:
|
|
8
|
+
|
|
9
|
+
- **Tools** (actions the agent can invoke)
|
|
10
|
+
- **CLI commands** (user-facing terminal commands)
|
|
11
|
+
- **RPC methods** (machine-to-machine API)
|
|
12
|
+
- **HTTP routes** (webhook endpoints)
|
|
13
|
+
- **Background services** (long-running processes)
|
|
14
|
+
- **Hooks** (lifecycle event listeners)
|
|
15
|
+
- **Skills** (agent knowledge documents)
|
|
16
|
+
- **Channels** (communication interfaces like voice, SMS)
|
|
17
|
+
|
|
18
|
+
## Plugin Manifest
|
|
19
|
+
|
|
20
|
+
Every plugin needs an `openclaw.plugin.json` at the package root.
|
|
21
|
+
|
|
22
|
+
### ClawVoice Manifest
|
|
23
|
+
|
|
24
|
+
```json
|
|
25
|
+
{
|
|
26
|
+
"id": "voice-assistant",
|
|
27
|
+
"name": "ClawVoice",
|
|
28
|
+
"description": "Voice calling for OpenClaw agents. Inbound and outbound phone calls with Deepgram Voice Agent or ElevenLabs Conversational AI.",
|
|
29
|
+
"version": "0.1.0",
|
|
30
|
+
"kind": "channel",
|
|
31
|
+
"channels": ["voice"],
|
|
32
|
+
"skills": ["voice-assistant"],
|
|
33
|
+
"entryPoint": "dist/index.js",
|
|
34
|
+
"configSchema": {
|
|
35
|
+
"type": "object",
|
|
36
|
+
"properties": {
|
|
37
|
+
"mode": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"enum": ["self-hosted", "managed"],
|
|
40
|
+
"default": "self-hosted",
|
|
41
|
+
"description": "Operating mode. self-hosted=BYOK, managed=ClawVoice service."
|
|
42
|
+
},
|
|
43
|
+
"serviceToken": {
|
|
44
|
+
"type": "string",
|
|
45
|
+
"description": "Managed service authentication token."
|
|
46
|
+
},
|
|
47
|
+
"telephonyProvider": {
|
|
48
|
+
"type": "string",
|
|
49
|
+
"enum": ["telnyx", "twilio"],
|
|
50
|
+
"default": "twilio",
|
|
51
|
+
"description": "PSTN telephony provider."
|
|
52
|
+
},
|
|
53
|
+
"voiceProvider": {
|
|
54
|
+
"type": "string",
|
|
55
|
+
"enum": ["deepgram-agent", "elevenlabs-conversational"],
|
|
56
|
+
"default": "deepgram-agent",
|
|
57
|
+
"description": "Voice pipeline provider."
|
|
58
|
+
},
|
|
59
|
+
"telnyxApiKey": { "type": "string" },
|
|
60
|
+
"telnyxConnectionId": { "type": "string" },
|
|
61
|
+
"telnyxPhoneNumber": { "type": "string" },
|
|
62
|
+
"telnyxWebhookSecret": { "type": "string" },
|
|
63
|
+
"twilioAccountSid": { "type": "string" },
|
|
64
|
+
"twilioAuthToken": { "type": "string" },
|
|
65
|
+
"twilioPhoneNumber": { "type": "string" },
|
|
66
|
+
"deepgramApiKey": { "type": "string" },
|
|
67
|
+
"deepgramVoice": {
|
|
68
|
+
"type": "string",
|
|
69
|
+
"default": "aura-asteria-en",
|
|
70
|
+
"description": "Default Deepgram Aura voice ID."
|
|
71
|
+
},
|
|
72
|
+
"elevenlabsApiKey": { "type": "string" },
|
|
73
|
+
"elevenlabsAgentId": { "type": "string" },
|
|
74
|
+
"elevenlabsVoiceId": { "type": "string" },
|
|
75
|
+
"openaiApiKey": {
|
|
76
|
+
"type": "string",
|
|
77
|
+
"description": "Optional. For dedicated post-call analysis model."
|
|
78
|
+
},
|
|
79
|
+
"analysisModel": {
|
|
80
|
+
"type": "string",
|
|
81
|
+
"default": "gpt-4o-mini"
|
|
82
|
+
},
|
|
83
|
+
"mainMemoryAccess": {
|
|
84
|
+
"type": "string",
|
|
85
|
+
"enum": ["read", "none"],
|
|
86
|
+
"default": "read",
|
|
87
|
+
"description": "Can voice sessions read main MEMORY.md?"
|
|
88
|
+
},
|
|
89
|
+
"autoExtractMemories": {
|
|
90
|
+
"type": "boolean",
|
|
91
|
+
"default": true
|
|
92
|
+
},
|
|
93
|
+
"restrictTools": {
|
|
94
|
+
"type": "boolean",
|
|
95
|
+
"default": true
|
|
96
|
+
},
|
|
97
|
+
"deniedTools": {
|
|
98
|
+
"type": "array",
|
|
99
|
+
"items": { "type": "string" },
|
|
100
|
+
"default": ["exec", "browser", "web_fetch", "gateway", "cron", "sessions_spawn"]
|
|
101
|
+
},
|
|
102
|
+
"amdEnabled": {
|
|
103
|
+
"type": "boolean",
|
|
104
|
+
"default": true,
|
|
105
|
+
"description": "Answering machine detection for outbound calls."
|
|
106
|
+
},
|
|
107
|
+
"maxCallDuration": {
|
|
108
|
+
"type": "number",
|
|
109
|
+
"default": 1800,
|
|
110
|
+
"description": "Maximum call duration in seconds."
|
|
111
|
+
},
|
|
112
|
+
"recordCalls": {
|
|
113
|
+
"type": "boolean",
|
|
114
|
+
"default": false
|
|
115
|
+
},
|
|
116
|
+
"relayUrl": {
|
|
117
|
+
"type": "string",
|
|
118
|
+
"default": "wss://relay.clawvoice.dev"
|
|
119
|
+
}
|
|
120
|
+
},
|
|
121
|
+
"required": []
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Key Manifest Fields
|
|
127
|
+
|
|
128
|
+
| Field | Purpose |
|
|
129
|
+
|-------|---------|
|
|
130
|
+
| `id` | Unique plugin identifier. Convention: `org/name`. |
|
|
131
|
+
| `kind` | Plugin category. `"channel"` for communication plugins. |
|
|
132
|
+
| `channels` | Array of channel types this plugin provides. `["voice"]` |
|
|
133
|
+
| `skills` | Agent skills this plugin ships. References `skills/voice-assistant/SKILL.md` |
|
|
134
|
+
| `entryPoint` | Compiled JS entry point (TypeScript compiled to dist/) |
|
|
135
|
+
| `configSchema` | JSON Schema for plugin configuration. Users set values via `openclaw config set clawvoice.*` or environment variables. |
|
|
136
|
+
|
|
137
|
+
## Plugin Entry Point
|
|
138
|
+
|
|
139
|
+
`src/index.ts` is the main registration file. OpenClaw calls the default export function with a plugin API object.
|
|
140
|
+
|
|
141
|
+
### Structure
|
|
142
|
+
|
|
143
|
+
```typescript
|
|
144
|
+
import { Plugin, PluginAPI } from "@openclaw/plugin-sdk";
|
|
145
|
+
|
|
146
|
+
// Import our components
|
|
147
|
+
import { registerTools } from "./tools";
|
|
148
|
+
import { registerCLI } from "./cli";
|
|
149
|
+
import { registerRoutes } from "./routes";
|
|
150
|
+
import { registerHooks } from "./hooks";
|
|
151
|
+
import { VoiceCallService } from "./services/voice-call";
|
|
152
|
+
import { WebSocketRelayService } from "./services/relay";
|
|
153
|
+
import { resolveConfig, ClawVoiceConfig } from "./config";
|
|
154
|
+
|
|
155
|
+
const plugin: Plugin = {
|
|
156
|
+
name: "clawvoice",
|
|
157
|
+
|
|
158
|
+
async init(api: PluginAPI) {
|
|
159
|
+
// Resolve configuration from configSchema + env vars
|
|
160
|
+
const config = resolveConfig(api.config);
|
|
161
|
+
|
|
162
|
+
// Register tools (agent actions)
|
|
163
|
+
registerTools(api, config);
|
|
164
|
+
|
|
165
|
+
// Register CLI commands
|
|
166
|
+
registerCLI(api, config);
|
|
167
|
+
|
|
168
|
+
// Register HTTP routes (webhooks from Telnyx/Twilio)
|
|
169
|
+
registerRoutes(api, config);
|
|
170
|
+
|
|
171
|
+
// Register lifecycle hooks
|
|
172
|
+
registerHooks(api, config);
|
|
173
|
+
|
|
174
|
+
// Start background services
|
|
175
|
+
if (config.mode === "managed") {
|
|
176
|
+
api.services.register("clawvoice-relay", new WebSocketRelayService(config));
|
|
177
|
+
}
|
|
178
|
+
api.services.register("clawvoice-calls", new VoiceCallService(config));
|
|
179
|
+
|
|
180
|
+
api.log.info("ClawVoice initialized", {
|
|
181
|
+
mode: config.mode,
|
|
182
|
+
telephony: config.telephonyProvider,
|
|
183
|
+
voice: config.voiceProvider,
|
|
184
|
+
});
|
|
185
|
+
},
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
export default plugin;
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Tool Registration
|
|
192
|
+
|
|
193
|
+
Tools are actions the OpenClaw agent can invoke during conversation. ClawVoice registers four tools.
|
|
194
|
+
|
|
195
|
+
### Registration Pattern
|
|
196
|
+
|
|
197
|
+
```typescript
|
|
198
|
+
// src/tools.ts
|
|
199
|
+
import { PluginAPI } from "@openclaw/plugin-sdk";
|
|
200
|
+
import { ClawVoiceConfig } from "./config";
|
|
201
|
+
|
|
202
|
+
export function registerTools(api: PluginAPI, config: ClawVoiceConfig) {
|
|
203
|
+
// voice_assistant.call - Initiate outbound phone call
|
|
204
|
+
api.tools.register({
|
|
205
|
+
name: "voice_assistant.call",
|
|
206
|
+
description: "Call a phone number. The agent will have a voice conversation with the person who answers.",
|
|
207
|
+
parameters: {
|
|
208
|
+
type: "object",
|
|
209
|
+
properties: {
|
|
210
|
+
phoneNumber: {
|
|
211
|
+
type: "string",
|
|
212
|
+
description: "Phone number to call in E.164 format (e.g., +15551234567)",
|
|
213
|
+
},
|
|
214
|
+
purpose: {
|
|
215
|
+
type: "string",
|
|
216
|
+
description: "Brief description of why you're calling (used as context for the voice agent)",
|
|
217
|
+
},
|
|
218
|
+
voice: {
|
|
219
|
+
type: "string",
|
|
220
|
+
description: "Voice to use. Options: aura-asteria-en, aura-luna-en, aura-orion-en, aura-arcas-en",
|
|
221
|
+
default: config.deepgramVoice,
|
|
222
|
+
},
|
|
223
|
+
},
|
|
224
|
+
required: ["phoneNumber"],
|
|
225
|
+
},
|
|
226
|
+
handler: async (params, ctx) => {
|
|
227
|
+
// ctx provides: session, memory, agent context
|
|
228
|
+
const result = await initiateOutboundCall(params, config, ctx);
|
|
229
|
+
return {
|
|
230
|
+
content: result.summary,
|
|
231
|
+
data: result,
|
|
232
|
+
};
|
|
233
|
+
},
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
// voice_assistant.hangup - End an active call
|
|
237
|
+
api.tools.register({
|
|
238
|
+
name: "voice_assistant.hangup",
|
|
239
|
+
description: "End an active phone call.",
|
|
240
|
+
parameters: {
|
|
241
|
+
type: "object",
|
|
242
|
+
properties: {
|
|
243
|
+
callId: {
|
|
244
|
+
type: "string",
|
|
245
|
+
description: "The call ID to hang up. If omitted, hangs up the most recent active call.",
|
|
246
|
+
},
|
|
247
|
+
},
|
|
248
|
+
},
|
|
249
|
+
handler: async (params, ctx) => {
|
|
250
|
+
const result = await hangupCall(params.callId, config);
|
|
251
|
+
return { content: result.message };
|
|
252
|
+
},
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
// voice_assistant.status - Get call status
|
|
256
|
+
api.tools.register({
|
|
257
|
+
name: "voice_assistant.status",
|
|
258
|
+
description: "Get the status of active and recent phone calls.",
|
|
259
|
+
parameters: {
|
|
260
|
+
type: "object",
|
|
261
|
+
properties: {},
|
|
262
|
+
},
|
|
263
|
+
handler: async (_params, ctx) => {
|
|
264
|
+
const status = await getCallStatus(config);
|
|
265
|
+
return { content: formatCallStatus(status) };
|
|
266
|
+
},
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
// voice_assistant.promote_memory - Promote voice memory to main
|
|
270
|
+
api.tools.register({
|
|
271
|
+
name: "voice_assistant.promote_memory",
|
|
272
|
+
description: "Review and promote a voice memory to main MEMORY.md. Requires operator confirmation.",
|
|
273
|
+
parameters: {
|
|
274
|
+
type: "object",
|
|
275
|
+
properties: {
|
|
276
|
+
memoryId: {
|
|
277
|
+
type: "string",
|
|
278
|
+
description: "ID of the voice memory entry to promote.",
|
|
279
|
+
},
|
|
280
|
+
},
|
|
281
|
+
required: ["memoryId"],
|
|
282
|
+
},
|
|
283
|
+
handler: async (params, ctx) => {
|
|
284
|
+
const result = await promoteVoiceMemory(params.memoryId, ctx);
|
|
285
|
+
return { content: result.message };
|
|
286
|
+
},
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Tool Handler Context
|
|
292
|
+
|
|
293
|
+
The `ctx` parameter in tool handlers provides:
|
|
294
|
+
|
|
295
|
+
```typescript
|
|
296
|
+
interface ToolContext {
|
|
297
|
+
session: {
|
|
298
|
+
id: string;
|
|
299
|
+
channel: string; // "voice", "text", "discord", etc.
|
|
300
|
+
peerId: string; // Phone number for voice
|
|
301
|
+
};
|
|
302
|
+
memory: {
|
|
303
|
+
read(path: string): Promise<string>;
|
|
304
|
+
write(path: string, content: string): Promise<void>;
|
|
305
|
+
append(path: string, content: string): Promise<void>;
|
|
306
|
+
};
|
|
307
|
+
agent: {
|
|
308
|
+
model: string;
|
|
309
|
+
systemPrompt: string;
|
|
310
|
+
};
|
|
311
|
+
log: Logger;
|
|
312
|
+
}
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
## CLI Command Registration
|
|
316
|
+
|
|
317
|
+
CLI commands let users interact with ClawVoice from the terminal.
|
|
318
|
+
|
|
319
|
+
```typescript
|
|
320
|
+
// src/cli.ts
|
|
321
|
+
import { PluginAPI } from "@openclaw/plugin-sdk";
|
|
322
|
+
import { ClawVoiceConfig } from "./config";
|
|
323
|
+
|
|
324
|
+
export function registerCLI(api: PluginAPI, config: ClawVoiceConfig) {
|
|
325
|
+
const clawvoice = api.cli.register("clawvoice", {
|
|
326
|
+
description: "Voice calling for OpenClaw",
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
// openclaw clawvoice setup
|
|
330
|
+
clawvoice.command("setup", {
|
|
331
|
+
description: "Set up ClawVoice (configure providers or connect to managed service)",
|
|
332
|
+
options: {
|
|
333
|
+
token: { type: "string", description: "Managed service token" },
|
|
334
|
+
},
|
|
335
|
+
handler: async (args) => {
|
|
336
|
+
if (args.token) {
|
|
337
|
+
await setupManagedService(args.token, config);
|
|
338
|
+
} else {
|
|
339
|
+
await interactiveSetup(config);
|
|
340
|
+
}
|
|
341
|
+
},
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
// openclaw clawvoice call <number>
|
|
345
|
+
clawvoice.command("call", {
|
|
346
|
+
description: "Initiate an outbound phone call",
|
|
347
|
+
args: [{ name: "number", required: true, description: "Phone number to call" }],
|
|
348
|
+
handler: async (args) => {
|
|
349
|
+
const result = await initiateOutboundCall(
|
|
350
|
+
{ phoneNumber: args.number },
|
|
351
|
+
config,
|
|
352
|
+
null // no agent context for CLI calls
|
|
353
|
+
);
|
|
354
|
+
console.log(`Call initiated: ${result.callId}`);
|
|
355
|
+
},
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
// openclaw clawvoice status
|
|
359
|
+
clawvoice.command("status", {
|
|
360
|
+
description: "Show active calls and configuration status",
|
|
361
|
+
handler: async () => {
|
|
362
|
+
const status = await getCallStatus(config);
|
|
363
|
+
printCallStatus(status);
|
|
364
|
+
},
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
// openclaw clawvoice promote
|
|
368
|
+
clawvoice.command("promote", {
|
|
369
|
+
description: "Review and promote voice memories to main MEMORY.md",
|
|
370
|
+
handler: async () => {
|
|
371
|
+
await interactiveMemoryPromotion(config);
|
|
372
|
+
},
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
// openclaw clawvoice history
|
|
376
|
+
clawvoice.command("history", {
|
|
377
|
+
description: "Show recent call history",
|
|
378
|
+
options: {
|
|
379
|
+
limit: { type: "number", default: 10, description: "Number of calls to show" },
|
|
380
|
+
},
|
|
381
|
+
handler: async (args) => {
|
|
382
|
+
const history = await getCallHistory(config, args.limit);
|
|
383
|
+
printCallHistory(history);
|
|
384
|
+
},
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
// openclaw clawvoice test
|
|
388
|
+
clawvoice.command("test", {
|
|
389
|
+
description: "Test voice pipeline connectivity",
|
|
390
|
+
handler: async () => {
|
|
391
|
+
await testConnectivity(config);
|
|
392
|
+
},
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
## HTTP Route Registration
|
|
398
|
+
|
|
399
|
+
The plugin needs HTTP endpoints for telephony provider webhooks (Telnyx/Twilio call events).
|
|
400
|
+
|
|
401
|
+
```typescript
|
|
402
|
+
// src/routes.ts
|
|
403
|
+
import { PluginAPI } from "@openclaw/plugin-sdk";
|
|
404
|
+
import { ClawVoiceConfig } from "./config";
|
|
405
|
+
import { handleTelnyxWebhook, verifyTelnyxSignature } from "./telephony/telnyx";
|
|
406
|
+
import { handleTwilioWebhook, verifyTwilioSignature } from "./telephony/twilio";
|
|
407
|
+
|
|
408
|
+
export function registerRoutes(api: PluginAPI, config: ClawVoiceConfig) {
|
|
409
|
+
const router = api.http.router("/clawvoice");
|
|
410
|
+
|
|
411
|
+
// Telnyx call event webhooks
|
|
412
|
+
router.post("/webhooks/telnyx", async (req, res) => {
|
|
413
|
+
// Verify webhook signature
|
|
414
|
+
if (config.telnyxWebhookSecret) {
|
|
415
|
+
const valid = verifyTelnyxSignature(req, config.telnyxWebhookSecret);
|
|
416
|
+
if (!valid) {
|
|
417
|
+
res.status(401).json({ error: "Invalid signature" });
|
|
418
|
+
return;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
const event = req.body;
|
|
423
|
+
await handleTelnyxWebhook(event, config);
|
|
424
|
+
res.status(200).json({ ok: true });
|
|
425
|
+
});
|
|
426
|
+
|
|
427
|
+
// Twilio call event webhooks (fallback provider)
|
|
428
|
+
router.post("/webhooks/twilio/voice", async (req, res) => {
|
|
429
|
+
if (!verifyTwilioSignature(req, config.twilioAuthToken)) {
|
|
430
|
+
res.status(401).send("Invalid signature");
|
|
431
|
+
return;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
const twiml = await handleTwilioWebhook(req.body, config);
|
|
435
|
+
res.type("text/xml").send(twiml);
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
// Twilio status callback
|
|
439
|
+
router.post("/webhooks/twilio/status", async (req, res) => {
|
|
440
|
+
await handleTwilioStatusCallback(req.body, config);
|
|
441
|
+
res.status(200).send();
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
// Call status API (for dashboard / external integrations)
|
|
445
|
+
router.get("/calls", async (req, res) => {
|
|
446
|
+
const calls = await getRecentCalls(config);
|
|
447
|
+
res.json({ calls });
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
// Voice memory review API (for dashboard)
|
|
451
|
+
router.get("/voice-memories", async (req, res) => {
|
|
452
|
+
const memories = await getPendingVoiceMemories(config);
|
|
453
|
+
res.json({ memories });
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
router.post("/voice-memories/:id/promote", async (req, res) => {
|
|
457
|
+
const result = await promoteVoiceMemory(req.params.id);
|
|
458
|
+
res.json(result);
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
### Webhook URL Configuration
|
|
464
|
+
|
|
465
|
+
For self-hosted mode, users need to expose their OpenClaw gateway publicly (or use a tunnel) so Telnyx/Twilio can reach the webhook endpoints:
|
|
466
|
+
|
|
467
|
+
```
|
|
468
|
+
https://your-openclaw-gateway.com/clawvoice/webhooks/telnyx
|
|
469
|
+
https://your-openclaw-gateway.com/clawvoice/webhooks/twilio/voice
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
For managed mode, the relay server handles webhooks and forwards events through the outbound WebSocket — no public endpoint needed.
|
|
473
|
+
|
|
474
|
+
## Hook Registration
|
|
475
|
+
|
|
476
|
+
Hooks let the plugin intercept OpenClaw lifecycle events. Critical for voice memory isolation.
|
|
477
|
+
|
|
478
|
+
```typescript
|
|
479
|
+
// src/hooks.ts
|
|
480
|
+
import { PluginAPI } from "@openclaw/plugin-sdk";
|
|
481
|
+
import { ClawVoiceConfig } from "./config";
|
|
482
|
+
import { buildVoiceMemoryContext } from "./memory/voice-namespace";
|
|
483
|
+
import { buildVoiceSecurityPolicy } from "./security";
|
|
484
|
+
|
|
485
|
+
export function registerHooks(api: PluginAPI, config: ClawVoiceConfig) {
|
|
486
|
+
|
|
487
|
+
// Inject voice memory context before the agent prompt is built
|
|
488
|
+
api.hooks.on("before_prompt_build", async (event, ctx) => {
|
|
489
|
+
if (!isVoiceSession(ctx)) return;
|
|
490
|
+
|
|
491
|
+
const voiceMemory = await buildVoiceMemoryContext(ctx, config);
|
|
492
|
+
const mainMemorySnippet = config.mainMemoryAccess === "read"
|
|
493
|
+
? await ctx.memory.read("MEMORY.md")
|
|
494
|
+
: null;
|
|
495
|
+
|
|
496
|
+
return {
|
|
497
|
+
appendSystemContext: [
|
|
498
|
+
"## Voice Memory",
|
|
499
|
+
voiceMemory,
|
|
500
|
+
...(mainMemorySnippet
|
|
501
|
+
? ["## Main Memory (read-only)", mainMemorySnippet]
|
|
502
|
+
: []),
|
|
503
|
+
"",
|
|
504
|
+
"IMPORTANT: You are in a voice call. Keep responses concise and conversational.",
|
|
505
|
+
"You can only write memories to voice-memory/. You cannot modify main MEMORY.md.",
|
|
506
|
+
].join("\n"),
|
|
507
|
+
};
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
// Restrict tool access for voice sessions
|
|
511
|
+
api.hooks.on("before_tool_execute", async (event, ctx) => {
|
|
512
|
+
if (!isVoiceSession(ctx)) return;
|
|
513
|
+
if (!config.restrictTools) return;
|
|
514
|
+
|
|
515
|
+
const deniedTools = config.deniedTools || [
|
|
516
|
+
"exec", "browser", "web_fetch", "gateway", "cron", "sessions_spawn"
|
|
517
|
+
];
|
|
518
|
+
|
|
519
|
+
if (deniedTools.includes(event.toolName)) {
|
|
520
|
+
return {
|
|
521
|
+
blocked: true,
|
|
522
|
+
reason: `Tool "${event.toolName}" is not available during voice calls for security.`,
|
|
523
|
+
};
|
|
524
|
+
}
|
|
525
|
+
});
|
|
526
|
+
|
|
527
|
+
// Intercept memory writes from voice sessions
|
|
528
|
+
api.hooks.on("before_memory_write", async (event, ctx) => {
|
|
529
|
+
if (!isVoiceSession(ctx)) return;
|
|
530
|
+
|
|
531
|
+
// Redirect writes to voice-memory/ namespace
|
|
532
|
+
if (!event.path.startsWith("voice-memory/")) {
|
|
533
|
+
return {
|
|
534
|
+
redirectPath: `voice-memory/${event.path}`,
|
|
535
|
+
};
|
|
536
|
+
}
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
// After a voice call ends, run post-call analysis
|
|
540
|
+
api.hooks.on("after_session_end", async (event, ctx) => {
|
|
541
|
+
if (!isVoiceSession(ctx)) return;
|
|
542
|
+
|
|
543
|
+
await runPostCallAnalysis(ctx, config);
|
|
544
|
+
});
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
function isVoiceSession(ctx: any): boolean {
|
|
548
|
+
return ctx.session?.channel === "voice";
|
|
549
|
+
}
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
### Available Hooks
|
|
553
|
+
|
|
554
|
+
| Hook | When | Use in ClawVoice |
|
|
555
|
+
|------|------|-----------------|
|
|
556
|
+
| `before_prompt_build` | Before system prompt is assembled | Inject voice memory, voice-specific instructions |
|
|
557
|
+
| `before_tool_execute` | Before any tool runs | Block restricted tools in voice sessions |
|
|
558
|
+
| `before_memory_write` | Before memory file is written | Redirect voice writes to voice-memory/ namespace |
|
|
559
|
+
| `after_session_end` | After a conversation session closes | Run post-call analysis, extract memories |
|
|
560
|
+
| `after_response` | After agent sends a response | Log voice interactions for transcript |
|
|
561
|
+
| `on_error` | When an error occurs | Handle voice pipeline errors gracefully |
|
|
562
|
+
|
|
563
|
+
## Background Services
|
|
564
|
+
|
|
565
|
+
Long-running processes that operate independently of request/response cycles.
|
|
566
|
+
|
|
567
|
+
### Voice Call Service
|
|
568
|
+
|
|
569
|
+
Manages active calls, maintains WebSocket connections to voice providers.
|
|
570
|
+
|
|
571
|
+
```typescript
|
|
572
|
+
// src/services/voice-call.ts
|
|
573
|
+
import { Service } from "@openclaw/plugin-sdk";
|
|
574
|
+
import { ClawVoiceConfig } from "../config";
|
|
575
|
+
|
|
576
|
+
export class VoiceCallService implements Service {
|
|
577
|
+
private activeCalls = new Map<string, ActiveCall>();
|
|
578
|
+
private config: ClawVoiceConfig;
|
|
579
|
+
|
|
580
|
+
constructor(config: ClawVoiceConfig) {
|
|
581
|
+
this.config = config;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
async start(): Promise<void> {
|
|
585
|
+
// Initialize telephony provider connection
|
|
586
|
+
// Set up event listeners for incoming calls
|
|
587
|
+
// Start health check interval
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
async stop(): Promise<void> {
|
|
591
|
+
// Gracefully end all active calls
|
|
592
|
+
// Close WebSocket connections
|
|
593
|
+
// Clean up resources
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
async initiateCall(params: CallParams): Promise<CallResult> {
|
|
597
|
+
// 1. Validate phone number
|
|
598
|
+
// 2. Create call via telephony provider (Telnyx/Twilio)
|
|
599
|
+
// 3. Set up voice provider WebSocket (Deepgram/ElevenLabs)
|
|
600
|
+
// 4. Bridge telephony audio <-> voice provider
|
|
601
|
+
// 5. Track in activeCalls map
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
async hangup(callId: string): Promise<void> {
|
|
605
|
+
const call = this.activeCalls.get(callId);
|
|
606
|
+
if (!call) throw new Error(`No active call: ${callId}`);
|
|
607
|
+
// End call via telephony provider
|
|
608
|
+
// Close voice provider WebSocket
|
|
609
|
+
// Run post-call analysis
|
|
610
|
+
// Clean up
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
```
|
|
614
|
+
|
|
615
|
+
### WebSocket Relay Service (Managed Mode)
|
|
616
|
+
|
|
617
|
+
For managed service users, this service connects outbound to ClawVoice relay servers.
|
|
618
|
+
|
|
619
|
+
```typescript
|
|
620
|
+
// src/services/relay.ts
|
|
621
|
+
import { Service } from "@openclaw/plugin-sdk";
|
|
622
|
+
import { ClawVoiceConfig } from "../config";
|
|
623
|
+
import WebSocket from "ws";
|
|
624
|
+
|
|
625
|
+
export class WebSocketRelayService implements Service {
|
|
626
|
+
private ws: WebSocket | null = null;
|
|
627
|
+
private config: ClawVoiceConfig;
|
|
628
|
+
private reconnectTimer: NodeJS.Timeout | null = null;
|
|
629
|
+
|
|
630
|
+
constructor(config: ClawVoiceConfig) {
|
|
631
|
+
this.config = config;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
async start(): Promise<void> {
|
|
635
|
+
await this.connect();
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
private async connect(): Promise<void> {
|
|
639
|
+
const url = `${this.config.relayUrl}?token=${this.config.serviceToken}`;
|
|
640
|
+
|
|
641
|
+
this.ws = new WebSocket(url);
|
|
642
|
+
|
|
643
|
+
this.ws.on("open", () => {
|
|
644
|
+
// Authenticate with relay server
|
|
645
|
+
// Report capabilities (voice provider, available tools)
|
|
646
|
+
// Ready to receive call events
|
|
647
|
+
});
|
|
648
|
+
|
|
649
|
+
this.ws.on("message", async (data) => {
|
|
650
|
+
const event = JSON.parse(data.toString());
|
|
651
|
+
// Handle relay events:
|
|
652
|
+
// - "incoming_call": New inbound call to process
|
|
653
|
+
// - "call_audio": Audio chunk from active call
|
|
654
|
+
// - "call_ended": Call ended, run post-processing
|
|
655
|
+
// - "config_update": Service config changed
|
|
656
|
+
});
|
|
657
|
+
|
|
658
|
+
this.ws.on("close", () => {
|
|
659
|
+
// Reconnect with exponential backoff
|
|
660
|
+
this.scheduleReconnect();
|
|
661
|
+
});
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
private scheduleReconnect(): void {
|
|
665
|
+
// Exponential backoff: 1s, 2s, 4s, 8s, max 30s
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
async stop(): Promise<void> {
|
|
669
|
+
if (this.reconnectTimer) clearTimeout(this.reconnectTimer);
|
|
670
|
+
this.ws?.close();
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
```
|
|
674
|
+
|
|
675
|
+
## Voice Memory Namespace Implementation
|
|
676
|
+
|
|
677
|
+
The core differentiator. Voice calls write to an isolated namespace.
|
|
678
|
+
|
|
679
|
+
### File Structure
|
|
680
|
+
|
|
681
|
+
```
|
|
682
|
+
~/.openclaw/workspace/
|
|
683
|
+
MEMORY.md # Main memory (text channels write here)
|
|
684
|
+
memory/
|
|
685
|
+
2026-03-11.md # Main daily log
|
|
686
|
+
voice-memory/ # Voice-only namespace (created by plugin)
|
|
687
|
+
VOICE-MEMORY.md # Curated voice long-term memory
|
|
688
|
+
2026-03-11.md # Voice daily log
|
|
689
|
+
calls/
|
|
690
|
+
call-abc123.md # Individual call transcript + analysis
|
|
691
|
+
```
|
|
692
|
+
|
|
693
|
+
### Implementation
|
|
694
|
+
|
|
695
|
+
```typescript
|
|
696
|
+
// src/memory/voice-namespace.ts
|
|
697
|
+
import { PluginAPI } from "@openclaw/plugin-sdk";
|
|
698
|
+
import { ClawVoiceConfig } from "../config";
|
|
699
|
+
import path from "path";
|
|
700
|
+
import fs from "fs/promises";
|
|
701
|
+
|
|
702
|
+
const VOICE_MEMORY_DIR = "voice-memory";
|
|
703
|
+
const VOICE_MEMORY_FILE = "voice-memory/VOICE-MEMORY.md";
|
|
704
|
+
|
|
705
|
+
export async function ensureVoiceMemoryDir(workspacePath: string): Promise<void> {
|
|
706
|
+
const voiceDir = path.join(workspacePath, VOICE_MEMORY_DIR);
|
|
707
|
+
await fs.mkdir(voiceDir, { recursive: true });
|
|
708
|
+
await fs.mkdir(path.join(voiceDir, "calls"), { recursive: true });
|
|
709
|
+
|
|
710
|
+
// Create VOICE-MEMORY.md if it doesn't exist
|
|
711
|
+
const memFile = path.join(workspacePath, VOICE_MEMORY_FILE);
|
|
712
|
+
try {
|
|
713
|
+
await fs.access(memFile);
|
|
714
|
+
} catch {
|
|
715
|
+
await fs.writeFile(memFile, "# Voice Memory\n\nCurated memories from voice calls.\n");
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
export async function buildVoiceMemoryContext(
|
|
720
|
+
ctx: any,
|
|
721
|
+
config: ClawVoiceConfig
|
|
722
|
+
): Promise<string> {
|
|
723
|
+
const workspace = ctx.workspace.path;
|
|
724
|
+
await ensureVoiceMemoryDir(workspace);
|
|
725
|
+
|
|
726
|
+
// Read voice long-term memory
|
|
727
|
+
const voiceMemory = await safeRead(path.join(workspace, VOICE_MEMORY_FILE));
|
|
728
|
+
|
|
729
|
+
// Read today's voice log
|
|
730
|
+
const today = new Date().toISOString().split("T")[0];
|
|
731
|
+
const todayLog = await safeRead(
|
|
732
|
+
path.join(workspace, VOICE_MEMORY_DIR, `${today}.md`)
|
|
733
|
+
);
|
|
734
|
+
|
|
735
|
+
return [voiceMemory, todayLog ? `\n## Today's Voice Log\n${todayLog}` : ""].join("\n");
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
export async function writeCallTranscript(
|
|
739
|
+
workspacePath: string,
|
|
740
|
+
callId: string,
|
|
741
|
+
transcript: string,
|
|
742
|
+
analysis: CallAnalysis
|
|
743
|
+
): Promise<void> {
|
|
744
|
+
const callFile = path.join(workspacePath, VOICE_MEMORY_DIR, "calls", `${callId}.md`);
|
|
745
|
+
const today = new Date().toISOString().split("T")[0];
|
|
746
|
+
const dailyLog = path.join(workspacePath, VOICE_MEMORY_DIR, `${today}.md`);
|
|
747
|
+
|
|
748
|
+
// Write full call record
|
|
749
|
+
const callContent = [
|
|
750
|
+
`# Call ${callId}`,
|
|
751
|
+
`Date: ${new Date().toISOString()}`,
|
|
752
|
+
`Duration: ${analysis.duration}s`,
|
|
753
|
+
"",
|
|
754
|
+
"## Summary",
|
|
755
|
+
analysis.summary,
|
|
756
|
+
"",
|
|
757
|
+
"## Mood",
|
|
758
|
+
analysis.mood,
|
|
759
|
+
"",
|
|
760
|
+
"## Topics",
|
|
761
|
+
analysis.topics.map((t: string) => `- ${t}`).join("\n"),
|
|
762
|
+
"",
|
|
763
|
+
"## Action Items",
|
|
764
|
+
analysis.actionItems.map((a: string) => `- [ ] ${a}`).join("\n"),
|
|
765
|
+
"",
|
|
766
|
+
"## Transcript",
|
|
767
|
+
transcript,
|
|
768
|
+
].join("\n");
|
|
769
|
+
|
|
770
|
+
await fs.writeFile(callFile, callContent);
|
|
771
|
+
|
|
772
|
+
// Append summary to daily log
|
|
773
|
+
const logEntry = [
|
|
774
|
+
`### ${new Date().toLocaleTimeString()} - Call ${callId}`,
|
|
775
|
+
analysis.summary,
|
|
776
|
+
analysis.actionItems.length > 0
|
|
777
|
+
? `Action items: ${analysis.actionItems.join(", ")}`
|
|
778
|
+
: "",
|
|
779
|
+
"",
|
|
780
|
+
].join("\n");
|
|
781
|
+
|
|
782
|
+
await fs.appendFile(dailyLog, logEntry);
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
async function safeRead(filePath: string): Promise<string | null> {
|
|
786
|
+
try {
|
|
787
|
+
return await fs.readFile(filePath, "utf-8");
|
|
788
|
+
} catch {
|
|
789
|
+
return null;
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
```
|
|
793
|
+
|
|
794
|
+
### Memory Promotion
|
|
795
|
+
|
|
796
|
+
```typescript
|
|
797
|
+
// src/memory/promotion.ts
|
|
798
|
+
import fs from "fs/promises";
|
|
799
|
+
import path from "path";
|
|
800
|
+
|
|
801
|
+
interface VoiceMemoryEntry {
|
|
802
|
+
id: string;
|
|
803
|
+
date: string;
|
|
804
|
+
callId: string;
|
|
805
|
+
content: string;
|
|
806
|
+
status: "pending" | "promoted" | "rejected";
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
export async function getPendingMemories(workspacePath: string): Promise<VoiceMemoryEntry[]> {
|
|
810
|
+
// Scan voice-memory/calls/ for entries not yet promoted
|
|
811
|
+
// Return list with metadata
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
export async function promoteMemory(
|
|
815
|
+
workspacePath: string,
|
|
816
|
+
memoryId: string,
|
|
817
|
+
content: string
|
|
818
|
+
): Promise<void> {
|
|
819
|
+
const mainMemory = path.join(workspacePath, "MEMORY.md");
|
|
820
|
+
const existing = await fs.readFile(mainMemory, "utf-8");
|
|
821
|
+
|
|
822
|
+
// Append promoted memory with provenance tag
|
|
823
|
+
const entry = [
|
|
824
|
+
"",
|
|
825
|
+
`<!-- Promoted from voice-memory, call ${memoryId}, ${new Date().toISOString()} -->`,
|
|
826
|
+
content,
|
|
827
|
+
].join("\n");
|
|
828
|
+
|
|
829
|
+
await fs.appendFile(mainMemory, entry);
|
|
830
|
+
|
|
831
|
+
// Mark as promoted in voice-memory
|
|
832
|
+
// ...
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
export async function rejectMemory(
|
|
836
|
+
workspacePath: string,
|
|
837
|
+
memoryId: string
|
|
838
|
+
): Promise<void> {
|
|
839
|
+
// Move to voice-memory/archived/
|
|
840
|
+
// Mark as rejected
|
|
841
|
+
}
|
|
842
|
+
```
|
|
843
|
+
|
|
844
|
+
## Config Resolution
|
|
845
|
+
|
|
846
|
+
Plugin config can come from multiple sources. Resolution order:
|
|
847
|
+
|
|
848
|
+
1. Environment variables (highest priority)
|
|
849
|
+
2. `openclaw config set clawvoice.*` values
|
|
850
|
+
3. `configSchema` defaults (lowest priority)
|
|
851
|
+
|
|
852
|
+
```typescript
|
|
853
|
+
// src/config.ts
|
|
854
|
+
|
|
855
|
+
export interface ClawVoiceConfig {
|
|
856
|
+
mode: "self-hosted" | "managed";
|
|
857
|
+
serviceToken?: string;
|
|
858
|
+
telephonyProvider: "telnyx" | "twilio";
|
|
859
|
+
voiceProvider: "deepgram-agent" | "elevenlabs-conversational";
|
|
860
|
+
telnyxApiKey?: string;
|
|
861
|
+
telnyxConnectionId?: string;
|
|
862
|
+
telnyxPhoneNumber?: string;
|
|
863
|
+
telnyxWebhookSecret?: string;
|
|
864
|
+
twilioAccountSid?: string;
|
|
865
|
+
twilioAuthToken?: string;
|
|
866
|
+
twilioPhoneNumber?: string;
|
|
867
|
+
deepgramApiKey?: string;
|
|
868
|
+
deepgramVoice: string;
|
|
869
|
+
elevenlabsApiKey?: string;
|
|
870
|
+
elevenlabsAgentId?: string;
|
|
871
|
+
elevenlabsVoiceId?: string;
|
|
872
|
+
openaiApiKey?: string;
|
|
873
|
+
analysisModel: string;
|
|
874
|
+
mainMemoryAccess: "read" | "none";
|
|
875
|
+
autoExtractMemories: boolean;
|
|
876
|
+
restrictTools: boolean;
|
|
877
|
+
deniedTools: string[];
|
|
878
|
+
amdEnabled: boolean;
|
|
879
|
+
maxCallDuration: number;
|
|
880
|
+
recordCalls: boolean;
|
|
881
|
+
relayUrl: string;
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
export function resolveConfig(pluginConfig: Record<string, any>): ClawVoiceConfig {
|
|
885
|
+
return {
|
|
886
|
+
mode: env("CLAWVOICE_MODE") || pluginConfig.mode || "self-hosted",
|
|
887
|
+
serviceToken: env("CLAWVOICE_SERVICE_TOKEN") || pluginConfig.serviceToken,
|
|
888
|
+
telephonyProvider: env("CLAWVOICE_TELEPHONY_PROVIDER") || pluginConfig.telephonyProvider || "telnyx",
|
|
889
|
+
voiceProvider: env("CLAWVOICE_VOICE_PROVIDER") || pluginConfig.voiceProvider || "deepgram-agent",
|
|
890
|
+
telnyxApiKey: env("TELNYX_API_KEY") || pluginConfig.telnyxApiKey,
|
|
891
|
+
telnyxConnectionId: env("TELNYX_CONNECTION_ID") || pluginConfig.telnyxConnectionId,
|
|
892
|
+
telnyxPhoneNumber: env("TELNYX_PHONE_NUMBER") || pluginConfig.telnyxPhoneNumber,
|
|
893
|
+
telnyxWebhookSecret: env("TELNYX_WEBHOOK_SECRET") || pluginConfig.telnyxWebhookSecret,
|
|
894
|
+
twilioAccountSid: env("TWILIO_ACCOUNT_SID") || pluginConfig.twilioAccountSid,
|
|
895
|
+
twilioAuthToken: env("TWILIO_AUTH_TOKEN") || pluginConfig.twilioAuthToken,
|
|
896
|
+
twilioPhoneNumber: env("TWILIO_PHONE_NUMBER") || pluginConfig.twilioPhoneNumber,
|
|
897
|
+
deepgramApiKey: env("DEEPGRAM_API_KEY") || pluginConfig.deepgramApiKey,
|
|
898
|
+
deepgramVoice: env("CLAWVOICE_DEEPGRAM_VOICE") || pluginConfig.deepgramVoice || "aura-asteria-en",
|
|
899
|
+
elevenlabsApiKey: env("ELEVENLABS_API_KEY") || pluginConfig.elevenlabsApiKey,
|
|
900
|
+
elevenlabsAgentId: env("ELEVENLABS_AGENT_ID") || pluginConfig.elevenlabsAgentId,
|
|
901
|
+
elevenlabsVoiceId: env("ELEVENLABS_VOICE_ID") || pluginConfig.elevenlabsVoiceId,
|
|
902
|
+
openaiApiKey: env("OPENAI_API_KEY") || pluginConfig.openaiApiKey,
|
|
903
|
+
analysisModel: env("CLAWVOICE_ANALYSIS_MODEL") || pluginConfig.analysisModel || "gpt-4o-mini",
|
|
904
|
+
mainMemoryAccess: (env("CLAWVOICE_MAIN_MEMORY_ACCESS") || pluginConfig.mainMemoryAccess || "read") as "read" | "none",
|
|
905
|
+
autoExtractMemories: parseBool(env("CLAWVOICE_AUTO_EXTRACT_MEMORIES"), pluginConfig.autoExtractMemories ?? true),
|
|
906
|
+
restrictTools: parseBool(env("CLAWVOICE_RESTRICT_TOOLS"), pluginConfig.restrictTools ?? true),
|
|
907
|
+
deniedTools: parseArray(env("CLAWVOICE_DENIED_TOOLS")) || pluginConfig.deniedTools || ["exec", "browser", "web_fetch", "gateway", "cron", "sessions_spawn"],
|
|
908
|
+
amdEnabled: parseBool(env("CLAWVOICE_AMD_ENABLED"), pluginConfig.amdEnabled ?? true),
|
|
909
|
+
maxCallDuration: parseInt(env("CLAWVOICE_MAX_CALL_DURATION") || "") || pluginConfig.maxCallDuration || 1800,
|
|
910
|
+
recordCalls: parseBool(env("CLAWVOICE_RECORD_CALLS"), pluginConfig.recordCalls ?? false),
|
|
911
|
+
relayUrl: env("CLAWVOICE_RELAY_URL") || pluginConfig.relayUrl || "wss://relay.clawvoice.dev",
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
function env(key: string): string | undefined {
|
|
916
|
+
return process.env[key] || undefined;
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
function parseBool(envVal: string | undefined, fallback: boolean): boolean {
|
|
920
|
+
if (envVal === "true") return true;
|
|
921
|
+
if (envVal === "false") return false;
|
|
922
|
+
return fallback;
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
function parseArray(envVal: string | undefined): string[] | null {
|
|
926
|
+
if (!envVal) return null;
|
|
927
|
+
return envVal.split(",").map((s) => s.trim());
|
|
928
|
+
}
|
|
929
|
+
```
|
|
930
|
+
|
|
931
|
+
## Agent Skill Document
|
|
932
|
+
|
|
933
|
+
The plugin ships a skill that teaches the OpenClaw agent how to use voice calling.
|
|
934
|
+
|
|
935
|
+
```markdown
|
|
936
|
+
<!-- skills/voice-assistant/SKILL.md -->
|
|
937
|
+
|
|
938
|
+
# Voice Assistant
|
|
939
|
+
|
|
940
|
+
You can make and receive phone calls using ClawVoice.
|
|
941
|
+
|
|
942
|
+
## Making a Call
|
|
943
|
+
|
|
944
|
+
Use the `voice_assistant.call` tool to call someone:
|
|
945
|
+
- Provide the phone number in E.164 format (+15551234567)
|
|
946
|
+
- Optionally describe the purpose of the call
|
|
947
|
+
|
|
948
|
+
During the call, you'll have a real-time voice conversation. Keep your responses
|
|
949
|
+
concise and natural -- you're speaking, not typing.
|
|
950
|
+
|
|
951
|
+
## During Voice Calls
|
|
952
|
+
|
|
953
|
+
When you're in a voice call session:
|
|
954
|
+
- Keep responses SHORT (1-3 sentences). Long responses feel unnatural in voice.
|
|
955
|
+
- Be conversational. Use contractions, casual language.
|
|
956
|
+
- If you need to convey complex information, break it into small chunks.
|
|
957
|
+
- You can use the `voice_assistant.hangup` tool to end the call.
|
|
958
|
+
|
|
959
|
+
## Voice Memory
|
|
960
|
+
|
|
961
|
+
Voice calls write to a separate memory namespace (voice-memory/).
|
|
962
|
+
- You can reference your main memory but cannot write to it during calls.
|
|
963
|
+
- After a call ends, a summary and extracted memories are saved.
|
|
964
|
+
- An operator can later promote voice memories to your main MEMORY.md.
|
|
965
|
+
|
|
966
|
+
## Security
|
|
967
|
+
|
|
968
|
+
During voice calls, some tools are restricted for safety:
|
|
969
|
+
- No file execution, browser automation, or web fetching
|
|
970
|
+
- No spawning new sessions or modifying cron jobs
|
|
971
|
+
- Workspace file access only
|
|
972
|
+
```
|
|
973
|
+
|
|
974
|
+
## Telnyx Integration Reference
|
|
975
|
+
|
|
976
|
+
### Key Differences from Twilio
|
|
977
|
+
|
|
978
|
+
The reference code uses Twilio. When porting to Telnyx:
|
|
979
|
+
|
|
980
|
+
| Twilio Concept | Telnyx Equivalent |
|
|
981
|
+
|---------------|-------------------|
|
|
982
|
+
| TwiML (XML responses) | TeXML (compatible format) or Call Control API (REST) |
|
|
983
|
+
| Media Streams (WebSocket) | Stream API (WebSocket, different message format) |
|
|
984
|
+
| Status Callbacks | Webhooks (different event names/payloads) |
|
|
985
|
+
| Programmable Voice API | Call Control v2 API |
|
|
986
|
+
| AMD (Answering Machine Detection) | AMD via Call Control API |
|
|
987
|
+
| `TWILIO_ACCOUNT_SID` + `TWILIO_AUTH_TOKEN` | `TELNYX_API_KEY` |
|
|
988
|
+
|
|
989
|
+
### Telnyx Call Control v2
|
|
990
|
+
|
|
991
|
+
```typescript
|
|
992
|
+
// Initiate outbound call
|
|
993
|
+
const response = await fetch("https://api.telnyx.com/v2/calls", {
|
|
994
|
+
method: "POST",
|
|
995
|
+
headers: {
|
|
996
|
+
"Authorization": `Bearer ${config.telnyxApiKey}`,
|
|
997
|
+
"Content-Type": "application/json",
|
|
998
|
+
},
|
|
999
|
+
body: JSON.stringify({
|
|
1000
|
+
connection_id: config.telnyxConnectionId,
|
|
1001
|
+
to: phoneNumber,
|
|
1002
|
+
from: config.telnyxPhoneNumber,
|
|
1003
|
+
answering_machine_detection: config.amdEnabled ? "detect" : "disabled",
|
|
1004
|
+
webhook_url: `${gatewayUrl}/clawvoice/webhooks/telnyx`,
|
|
1005
|
+
stream_url: `wss://${gatewayHost}/clawvoice/media-stream`,
|
|
1006
|
+
stream_track: "both_tracks",
|
|
1007
|
+
}),
|
|
1008
|
+
});
|
|
1009
|
+
```
|
|
1010
|
+
|
|
1011
|
+
### Telnyx WebSocket Media Streaming
|
|
1012
|
+
|
|
1013
|
+
Telnyx sends audio over WebSocket in a different format than Twilio:
|
|
1014
|
+
|
|
1015
|
+
```typescript
|
|
1016
|
+
// Telnyx media message format
|
|
1017
|
+
interface TelnyxMediaMessage {
|
|
1018
|
+
event: "media";
|
|
1019
|
+
stream_id: string;
|
|
1020
|
+
payload: string; // base64-encoded audio
|
|
1021
|
+
sequence_number: number;
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
// Audio format: 8kHz mulaw by default, can request 16kHz linear16
|
|
1025
|
+
// Configure via stream_url parameters or Call Control API
|
|
1026
|
+
```
|
|
1027
|
+
|
|
1028
|
+
### Key Telnyx Webhook Events
|
|
1029
|
+
|
|
1030
|
+
| Event | Description |
|
|
1031
|
+
|-------|-------------|
|
|
1032
|
+
| `call.initiated` | Outbound call started |
|
|
1033
|
+
| `call.answered` | Call was answered |
|
|
1034
|
+
| `call.hangup` | Call ended |
|
|
1035
|
+
| `call.machine.detection.ended` | AMD result |
|
|
1036
|
+
| `call.machine.greeting.ended` | Voicemail greeting finished (if leaving message) |
|
|
1037
|
+
| `streaming.started` | WebSocket media stream established |
|
|
1038
|
+
| `streaming.stopped` | WebSocket media stream ended |
|
|
1039
|
+
|
|
1040
|
+
## ElevenLabs Conversational AI Integration
|
|
1041
|
+
|
|
1042
|
+
This is a new integration (no reference code exists). ElevenLabs Agents handle the full voice pipeline.
|
|
1043
|
+
|
|
1044
|
+
### Architecture
|
|
1045
|
+
|
|
1046
|
+
```
|
|
1047
|
+
Phone -> Telnyx -> Audio WebSocket -> ElevenLabs Agent
|
|
1048
|
+
|
|
|
1049
|
+
EL calls OpenClaw's
|
|
1050
|
+
/v1/chat/completions
|
|
1051
|
+
|
|
|
1052
|
+
OpenClaw Agent (brain)
|
|
1053
|
+
|
|
|
1054
|
+
Response text
|
|
1055
|
+
|
|
|
1056
|
+
ElevenLabs TTS -> Audio -> Phone
|
|
1057
|
+
```
|
|
1058
|
+
|
|
1059
|
+
### Setup
|
|
1060
|
+
|
|
1061
|
+
1. Create an ElevenLabs Conversational AI agent in their dashboard
|
|
1062
|
+
2. Configure it to use a "Custom LLM" pointing at your OpenClaw gateway's chat completions endpoint
|
|
1063
|
+
3. Set the agent ID in ClawVoice config
|
|
1064
|
+
|
|
1065
|
+
### Implementation Notes
|
|
1066
|
+
|
|
1067
|
+
- ElevenLabs handles STT, turn-taking, interruption detection, and TTS
|
|
1068
|
+
- OpenClaw's `/v1/chat/completions` endpoint (provided by the gateway) serves as the LLM
|
|
1069
|
+
- The plugin needs to:
|
|
1070
|
+
1. Bridge Telnyx audio WebSocket <-> ElevenLabs Agent WebSocket
|
|
1071
|
+
2. Ensure the OpenClaw chat completions endpoint includes voice memory context
|
|
1072
|
+
3. Apply tool restrictions when the request comes from an ElevenLabs agent session
|
|
1073
|
+
|
|
1074
|
+
## Development Workflow
|
|
1075
|
+
|
|
1076
|
+
### Local Development
|
|
1077
|
+
|
|
1078
|
+
```bash
|
|
1079
|
+
# 1. Clone and install
|
|
1080
|
+
git clone https://github.com/your-org/clawvoice.git
|
|
1081
|
+
cd clawvoice
|
|
1082
|
+
npm install
|
|
1083
|
+
|
|
1084
|
+
# 2. Build
|
|
1085
|
+
npm run build
|
|
1086
|
+
|
|
1087
|
+
# 3. Link to local OpenClaw
|
|
1088
|
+
npm link
|
|
1089
|
+
cd /path/to/your/openclaw/workspace
|
|
1090
|
+
openclaw plugins install --link @clawvoice/voice-assistant
|
|
1091
|
+
|
|
1092
|
+
# 4. Configure (minimal for testing)
|
|
1093
|
+
openclaw config set clawvoice.telephonyProvider telnyx
|
|
1094
|
+
openclaw config set clawvoice.telnyxApiKey YOUR_KEY
|
|
1095
|
+
openclaw config set clawvoice.deepgramApiKey YOUR_KEY
|
|
1096
|
+
|
|
1097
|
+
# 5. Start OpenClaw
|
|
1098
|
+
openclaw start
|
|
1099
|
+
|
|
1100
|
+
# 6. For webhook testing, use ngrok or similar tunnel
|
|
1101
|
+
ngrok http 3000
|
|
1102
|
+
# Then set webhook URL in Telnyx dashboard to ngrok URL + /clawvoice/webhooks/telnyx
|
|
1103
|
+
```
|
|
1104
|
+
|
|
1105
|
+
### Testing
|
|
1106
|
+
|
|
1107
|
+
```bash
|
|
1108
|
+
# Run tests
|
|
1109
|
+
npm test
|
|
1110
|
+
|
|
1111
|
+
# Test telephony connectivity
|
|
1112
|
+
openclaw clawvoice test
|
|
1113
|
+
|
|
1114
|
+
# Make a test call
|
|
1115
|
+
openclaw clawvoice call +15551234567
|
|
1116
|
+
```
|
|
1117
|
+
|
|
1118
|
+
### Package Structure
|
|
1119
|
+
|
|
1120
|
+
```
|
|
1121
|
+
clawvoice/
|
|
1122
|
+
openclaw.plugin.json # Plugin manifest
|
|
1123
|
+
package.json
|
|
1124
|
+
tsconfig.json
|
|
1125
|
+
src/
|
|
1126
|
+
index.ts # Plugin entry point
|
|
1127
|
+
config.ts # Config resolution
|
|
1128
|
+
tools.ts # Tool registration
|
|
1129
|
+
cli.ts # CLI command registration
|
|
1130
|
+
routes.ts # HTTP route registration
|
|
1131
|
+
hooks.ts # Hook registration
|
|
1132
|
+
providers/
|
|
1133
|
+
deepgram-agent.ts # Deepgram Voice Agent provider
|
|
1134
|
+
elevenlabs-conversational.ts # ElevenLabs Agents provider
|
|
1135
|
+
voice-mapping.ts # Voice ID mapping
|
|
1136
|
+
types.ts # VoiceProvider interface
|
|
1137
|
+
telephony/
|
|
1138
|
+
telnyx.ts # Telnyx Call Control v2
|
|
1139
|
+
twilio.ts # Twilio fallback
|
|
1140
|
+
phone-utils.ts # Phone number formatting
|
|
1141
|
+
types.ts # TelephonyProvider interface
|
|
1142
|
+
memory/
|
|
1143
|
+
voice-namespace.ts # Voice memory isolation
|
|
1144
|
+
extractor.ts # Post-call memory extraction
|
|
1145
|
+
promotion.ts # Memory promotion gate
|
|
1146
|
+
analysis/
|
|
1147
|
+
post-call.ts # Call summary, mood, topics
|
|
1148
|
+
services/
|
|
1149
|
+
voice-call.ts # Active call management
|
|
1150
|
+
relay.ts # Managed service WebSocket relay
|
|
1151
|
+
security/
|
|
1152
|
+
tool-policy.ts # Voice session tool restrictions
|
|
1153
|
+
prompt-guards.ts # Voice-specific injection guards
|
|
1154
|
+
skills/
|
|
1155
|
+
voice-assistant/
|
|
1156
|
+
SKILL.md # Agent skill document
|
|
1157
|
+
dist/ # Compiled output
|
|
1158
|
+
tests/
|
|
1159
|
+
providers/
|
|
1160
|
+
telephony/
|
|
1161
|
+
memory/
|
|
1162
|
+
integration/
|
|
1163
|
+
```
|
|
1164
|
+
|
|
1165
|
+
## Porting Priority
|
|
1166
|
+
|
|
1167
|
+
When implementing, work in this order:
|
|
1168
|
+
|
|
1169
|
+
### Phase 1: Core Plugin Skeleton
|
|
1170
|
+
1. `openclaw.plugin.json` manifest
|
|
1171
|
+
2. `src/index.ts` entry point
|
|
1172
|
+
3. `src/config.ts` config resolution
|
|
1173
|
+
4. `src/tools.ts` tool stubs (return "not yet implemented")
|
|
1174
|
+
5. `src/cli.ts` CLI stubs
|
|
1175
|
+
|
|
1176
|
+
### Phase 2: Telephony + Voice Provider
|
|
1177
|
+
6. `src/telephony/telnyx.ts` - Telnyx Call Control (port from twilioService.ts)
|
|
1178
|
+
7. `src/providers/deepgram-agent.ts` - Port from deepgramAgentService.ts
|
|
1179
|
+
8. `src/services/voice-call.ts` - Active call management
|
|
1180
|
+
9. `src/routes.ts` - Webhook handlers
|
|
1181
|
+
|
|
1182
|
+
### Phase 3: Memory + Analysis
|
|
1183
|
+
10. `src/memory/voice-namespace.ts` - Voice memory isolation
|
|
1184
|
+
11. `src/memory/extractor.ts` - Port from memoryExtractionService.ts
|
|
1185
|
+
12. `src/analysis/post-call.ts` - Port from openaiService.ts
|
|
1186
|
+
13. `src/hooks.ts` - Lifecycle hooks
|
|
1187
|
+
|
|
1188
|
+
### Phase 4: Security + Polish
|
|
1189
|
+
14. `src/security/tool-policy.ts` - Tool restrictions
|
|
1190
|
+
15. `src/security/prompt-guards.ts` - Voice injection guards
|
|
1191
|
+
16. `src/memory/promotion.ts` - Memory promotion
|
|
1192
|
+
17. `skills/voice-assistant/SKILL.md` - Agent skill doc
|
|
1193
|
+
|
|
1194
|
+
### Phase 5: Managed Service
|
|
1195
|
+
18. `src/services/relay.ts` - Outbound WebSocket relay
|
|
1196
|
+
19. `src/telephony/twilio.ts` - Twilio fallback provider
|
|
1197
|
+
20. `src/providers/elevenlabs-conversational.ts` - ElevenLabs Agents
|
|
1198
|
+
|
|
1199
|
+
### Phase 6: Testing + Docs
|
|
1200
|
+
21. Integration tests
|
|
1201
|
+
22. Unit tests for each module
|
|
1202
|
+
23. README, CHANGELOG
|