@openclaw/voice-call 2026.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +78 -0
  2. package/README.md +135 -0
  3. package/index.ts +497 -0
  4. package/openclaw.plugin.json +601 -0
  5. package/package.json +16 -0
  6. package/src/cli.ts +312 -0
  7. package/src/config.test.ts +204 -0
  8. package/src/config.ts +502 -0
  9. package/src/core-bridge.ts +198 -0
  10. package/src/manager/context.ts +21 -0
  11. package/src/manager/events.ts +177 -0
  12. package/src/manager/lookup.ts +33 -0
  13. package/src/manager/outbound.ts +248 -0
  14. package/src/manager/state.ts +50 -0
  15. package/src/manager/store.ts +88 -0
  16. package/src/manager/timers.ts +86 -0
  17. package/src/manager/twiml.ts +9 -0
  18. package/src/manager.test.ts +108 -0
  19. package/src/manager.ts +888 -0
  20. package/src/media-stream.test.ts +97 -0
  21. package/src/media-stream.ts +393 -0
  22. package/src/providers/base.ts +67 -0
  23. package/src/providers/index.ts +10 -0
  24. package/src/providers/mock.ts +168 -0
  25. package/src/providers/plivo.test.ts +28 -0
  26. package/src/providers/plivo.ts +504 -0
  27. package/src/providers/stt-openai-realtime.ts +311 -0
  28. package/src/providers/telnyx.ts +364 -0
  29. package/src/providers/tts-openai.ts +264 -0
  30. package/src/providers/twilio/api.ts +45 -0
  31. package/src/providers/twilio/webhook.ts +30 -0
  32. package/src/providers/twilio.test.ts +64 -0
  33. package/src/providers/twilio.ts +595 -0
  34. package/src/response-generator.ts +171 -0
  35. package/src/runtime.ts +217 -0
  36. package/src/telephony-audio.ts +88 -0
  37. package/src/telephony-tts.ts +95 -0
  38. package/src/tunnel.ts +331 -0
  39. package/src/types.ts +273 -0
  40. package/src/utils.ts +12 -0
  41. package/src/voice-mapping.ts +65 -0
  42. package/src/webhook-security.test.ts +260 -0
  43. package/src/webhook-security.ts +469 -0
  44. package/src/webhook.ts +491 -0
package/src/webhook.ts ADDED
@@ -0,0 +1,491 @@
1
+ import { spawn } from "node:child_process";
2
+ import http from "node:http";
3
+ import { URL } from "node:url";
4
+
5
+ import type { VoiceCallConfig } from "./config.js";
6
+ import type { CoreConfig } from "./core-bridge.js";
7
+ import type { CallManager } from "./manager.js";
8
+ import type { MediaStreamConfig } from "./media-stream.js";
9
+ import { MediaStreamHandler } from "./media-stream.js";
10
+ import type { VoiceCallProvider } from "./providers/base.js";
11
+ import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
12
+ import type { TwilioProvider } from "./providers/twilio.js";
13
+ import type { NormalizedEvent, WebhookContext } from "./types.js";
14
+
15
+ /**
16
+ * HTTP server for receiving voice call webhooks from providers.
17
+ * Supports WebSocket upgrades for media streams when streaming is enabled.
18
+ */
19
+ export class VoiceCallWebhookServer {
20
+ private server: http.Server | null = null;
21
+ private config: VoiceCallConfig;
22
+ private manager: CallManager;
23
+ private provider: VoiceCallProvider;
24
+ private coreConfig: CoreConfig | null;
25
+
26
+ /** Media stream handler for bidirectional audio (when streaming enabled) */
27
+ private mediaStreamHandler: MediaStreamHandler | null = null;
28
+
29
+ constructor(
30
+ config: VoiceCallConfig,
31
+ manager: CallManager,
32
+ provider: VoiceCallProvider,
33
+ coreConfig?: CoreConfig,
34
+ ) {
35
+ this.config = config;
36
+ this.manager = manager;
37
+ this.provider = provider;
38
+ this.coreConfig = coreConfig ?? null;
39
+
40
+ // Initialize media stream handler if streaming is enabled
41
+ if (config.streaming?.enabled) {
42
+ this.initializeMediaStreaming();
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Get the media stream handler (for wiring to provider).
48
+ */
49
+ getMediaStreamHandler(): MediaStreamHandler | null {
50
+ return this.mediaStreamHandler;
51
+ }
52
+
53
+ /**
54
+ * Initialize media streaming with OpenAI Realtime STT.
55
+ */
56
+ private initializeMediaStreaming(): void {
57
+ const apiKey =
58
+ this.config.streaming?.openaiApiKey || process.env.OPENAI_API_KEY;
59
+
60
+ if (!apiKey) {
61
+ console.warn(
62
+ "[voice-call] Streaming enabled but no OpenAI API key found",
63
+ );
64
+ return;
65
+ }
66
+
67
+ const sttProvider = new OpenAIRealtimeSTTProvider({
68
+ apiKey,
69
+ model: this.config.streaming?.sttModel,
70
+ silenceDurationMs: this.config.streaming?.silenceDurationMs,
71
+ vadThreshold: this.config.streaming?.vadThreshold,
72
+ });
73
+
74
+ const streamConfig: MediaStreamConfig = {
75
+ sttProvider,
76
+ onTranscript: (providerCallId, transcript) => {
77
+ console.log(
78
+ `[voice-call] Transcript for ${providerCallId}: ${transcript}`,
79
+ );
80
+
81
+ // Clear TTS queue on barge-in (user started speaking, interrupt current playback)
82
+ if (this.provider.name === "twilio") {
83
+ (this.provider as TwilioProvider).clearTtsQueue(providerCallId);
84
+ }
85
+
86
+ // Look up our internal call ID from the provider call ID
87
+ const call = this.manager.getCallByProviderCallId(providerCallId);
88
+ if (!call) {
89
+ console.warn(
90
+ `[voice-call] No active call found for provider ID: ${providerCallId}`,
91
+ );
92
+ return;
93
+ }
94
+
95
+ // Create a speech event and process it through the manager
96
+ const event: NormalizedEvent = {
97
+ id: `stream-transcript-${Date.now()}`,
98
+ type: "call.speech",
99
+ callId: call.callId,
100
+ providerCallId,
101
+ timestamp: Date.now(),
102
+ transcript,
103
+ isFinal: true,
104
+ };
105
+ this.manager.processEvent(event);
106
+
107
+ // Auto-respond in conversation mode (inbound always, outbound if mode is conversation)
108
+ const callMode = call.metadata?.mode as string | undefined;
109
+ const shouldRespond =
110
+ call.direction === "inbound" || callMode === "conversation";
111
+ if (shouldRespond) {
112
+ this.handleInboundResponse(call.callId, transcript).catch((err) => {
113
+ console.warn(`[voice-call] Failed to auto-respond:`, err);
114
+ });
115
+ }
116
+ },
117
+ onSpeechStart: (providerCallId) => {
118
+ if (this.provider.name === "twilio") {
119
+ (this.provider as TwilioProvider).clearTtsQueue(providerCallId);
120
+ }
121
+ },
122
+ onPartialTranscript: (callId, partial) => {
123
+ console.log(`[voice-call] Partial for ${callId}: ${partial}`);
124
+ },
125
+ onConnect: (callId, streamSid) => {
126
+ console.log(
127
+ `[voice-call] Media stream connected: ${callId} -> ${streamSid}`,
128
+ );
129
+ // Register stream with provider for TTS routing
130
+ if (this.provider.name === "twilio") {
131
+ (this.provider as TwilioProvider).registerCallStream(
132
+ callId,
133
+ streamSid,
134
+ );
135
+ }
136
+
137
+ // Speak initial message if one was provided when call was initiated
138
+ // Use setTimeout to allow stream setup to complete
139
+ setTimeout(() => {
140
+ this.manager.speakInitialMessage(callId).catch((err) => {
141
+ console.warn(`[voice-call] Failed to speak initial message:`, err);
142
+ });
143
+ }, 500);
144
+ },
145
+ onDisconnect: (callId) => {
146
+ console.log(`[voice-call] Media stream disconnected: ${callId}`);
147
+ if (this.provider.name === "twilio") {
148
+ (this.provider as TwilioProvider).unregisterCallStream(callId);
149
+ }
150
+ },
151
+ };
152
+
153
+ this.mediaStreamHandler = new MediaStreamHandler(streamConfig);
154
+ console.log("[voice-call] Media streaming initialized");
155
+ }
156
+
157
+ /**
158
+ * Start the webhook server.
159
+ */
160
+ async start(): Promise<string> {
161
+ const { port, bind, path: webhookPath } = this.config.serve;
162
+ const streamPath = this.config.streaming?.streamPath || "/voice/stream";
163
+
164
+ return new Promise((resolve, reject) => {
165
+ this.server = http.createServer((req, res) => {
166
+ this.handleRequest(req, res, webhookPath).catch((err) => {
167
+ console.error("[voice-call] Webhook error:", err);
168
+ res.statusCode = 500;
169
+ res.end("Internal Server Error");
170
+ });
171
+ });
172
+
173
+ // Handle WebSocket upgrades for media streams
174
+ if (this.mediaStreamHandler) {
175
+ this.server.on("upgrade", (request, socket, head) => {
176
+ const url = new URL(
177
+ request.url || "/",
178
+ `http://${request.headers.host}`,
179
+ );
180
+
181
+ if (url.pathname === streamPath) {
182
+ console.log("[voice-call] WebSocket upgrade for media stream");
183
+ this.mediaStreamHandler?.handleUpgrade(request, socket, head);
184
+ } else {
185
+ socket.destroy();
186
+ }
187
+ });
188
+ }
189
+
190
+ this.server.on("error", reject);
191
+
192
+ this.server.listen(port, bind, () => {
193
+ const url = `http://${bind}:${port}${webhookPath}`;
194
+ console.log(`[voice-call] Webhook server listening on ${url}`);
195
+ if (this.mediaStreamHandler) {
196
+ console.log(
197
+ `[voice-call] Media stream WebSocket on ws://${bind}:${port}${streamPath}`,
198
+ );
199
+ }
200
+ resolve(url);
201
+ });
202
+ });
203
+ }
204
+
205
+ /**
206
+ * Stop the webhook server.
207
+ */
208
+ async stop(): Promise<void> {
209
+ return new Promise((resolve) => {
210
+ if (this.server) {
211
+ this.server.close(() => {
212
+ this.server = null;
213
+ resolve();
214
+ });
215
+ } else {
216
+ resolve();
217
+ }
218
+ });
219
+ }
220
+
221
+ /**
222
+ * Handle incoming HTTP request.
223
+ */
224
+ private async handleRequest(
225
+ req: http.IncomingMessage,
226
+ res: http.ServerResponse,
227
+ webhookPath: string,
228
+ ): Promise<void> {
229
+ const url = new URL(req.url || "/", `http://${req.headers.host}`);
230
+
231
+ // Check path
232
+ if (!url.pathname.startsWith(webhookPath)) {
233
+ res.statusCode = 404;
234
+ res.end("Not Found");
235
+ return;
236
+ }
237
+
238
+ // Only accept POST
239
+ if (req.method !== "POST") {
240
+ res.statusCode = 405;
241
+ res.end("Method Not Allowed");
242
+ return;
243
+ }
244
+
245
+ // Read body
246
+ const body = await this.readBody(req);
247
+
248
+ // Build webhook context
249
+ const ctx: WebhookContext = {
250
+ headers: req.headers as Record<string, string | string[] | undefined>,
251
+ rawBody: body,
252
+ url: `http://${req.headers.host}${req.url}`,
253
+ method: "POST",
254
+ query: Object.fromEntries(url.searchParams),
255
+ remoteAddress: req.socket.remoteAddress ?? undefined,
256
+ };
257
+
258
+ // Verify signature
259
+ const verification = this.provider.verifyWebhook(ctx);
260
+ if (!verification.ok) {
261
+ console.warn(
262
+ `[voice-call] Webhook verification failed: ${verification.reason}`,
263
+ );
264
+ res.statusCode = 401;
265
+ res.end("Unauthorized");
266
+ return;
267
+ }
268
+
269
+ // Parse events
270
+ const result = this.provider.parseWebhookEvent(ctx);
271
+
272
+ // Process each event
273
+ for (const event of result.events) {
274
+ try {
275
+ this.manager.processEvent(event);
276
+ } catch (err) {
277
+ console.error(
278
+ `[voice-call] Error processing event ${event.type}:`,
279
+ err,
280
+ );
281
+ }
282
+ }
283
+
284
+ // Send response
285
+ res.statusCode = result.statusCode || 200;
286
+
287
+ if (result.providerResponseHeaders) {
288
+ for (const [key, value] of Object.entries(
289
+ result.providerResponseHeaders,
290
+ )) {
291
+ res.setHeader(key, value);
292
+ }
293
+ }
294
+
295
+ res.end(result.providerResponseBody || "OK");
296
+ }
297
+
298
+ /**
299
+ * Read request body as string.
300
+ */
301
+ private readBody(req: http.IncomingMessage): Promise<string> {
302
+ return new Promise((resolve, reject) => {
303
+ const chunks: Buffer[] = [];
304
+ req.on("data", (chunk) => chunks.push(chunk));
305
+ req.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8")));
306
+ req.on("error", reject);
307
+ });
308
+ }
309
+
310
+ /**
311
+ * Handle auto-response for inbound calls using the agent system.
312
+ * Supports tool calling for richer voice interactions.
313
+ */
314
+ private async handleInboundResponse(
315
+ callId: string,
316
+ userMessage: string,
317
+ ): Promise<void> {
318
+ console.log(
319
+ `[voice-call] Auto-responding to inbound call ${callId}: "${userMessage}"`,
320
+ );
321
+
322
+ // Get call context for conversation history
323
+ const call = this.manager.getCall(callId);
324
+ if (!call) {
325
+ console.warn(`[voice-call] Call ${callId} not found for auto-response`);
326
+ return;
327
+ }
328
+
329
+ if (!this.coreConfig) {
330
+ console.warn("[voice-call] Core config missing; skipping auto-response");
331
+ return;
332
+ }
333
+
334
+ try {
335
+ const { generateVoiceResponse } = await import("./response-generator.js");
336
+
337
+ const result = await generateVoiceResponse({
338
+ voiceConfig: this.config,
339
+ coreConfig: this.coreConfig,
340
+ callId,
341
+ from: call.from,
342
+ transcript: call.transcript,
343
+ userMessage,
344
+ });
345
+
346
+ if (result.error) {
347
+ console.error(
348
+ `[voice-call] Response generation error: ${result.error}`,
349
+ );
350
+ return;
351
+ }
352
+
353
+ if (result.text) {
354
+ console.log(`[voice-call] AI response: "${result.text}"`);
355
+ await this.manager.speak(callId, result.text);
356
+ }
357
+ } catch (err) {
358
+ console.error(`[voice-call] Auto-response error:`, err);
359
+ }
360
+ }
361
+ }
362
+
363
+ /**
364
+ * Resolve the current machine's Tailscale DNS name.
365
+ */
366
+ export type TailscaleSelfInfo = {
367
+ dnsName: string | null;
368
+ nodeId: string | null;
369
+ };
370
+
371
+ /**
372
+ * Run a tailscale command with timeout, collecting stdout.
373
+ */
374
+ function runTailscaleCommand(
375
+ args: string[],
376
+ timeoutMs = 2500,
377
+ ): Promise<{ code: number; stdout: string }> {
378
+ return new Promise((resolve) => {
379
+ const proc = spawn("tailscale", args, {
380
+ stdio: ["ignore", "pipe", "pipe"],
381
+ });
382
+
383
+ let stdout = "";
384
+ proc.stdout.on("data", (data) => {
385
+ stdout += data;
386
+ });
387
+
388
+ const timer = setTimeout(() => {
389
+ proc.kill("SIGKILL");
390
+ resolve({ code: -1, stdout: "" });
391
+ }, timeoutMs);
392
+
393
+ proc.on("close", (code) => {
394
+ clearTimeout(timer);
395
+ resolve({ code: code ?? -1, stdout });
396
+ });
397
+ });
398
+ }
399
+
400
+ export async function getTailscaleSelfInfo(): Promise<TailscaleSelfInfo | null> {
401
+ const { code, stdout } = await runTailscaleCommand(["status", "--json"]);
402
+ if (code !== 0) return null;
403
+
404
+ try {
405
+ const status = JSON.parse(stdout);
406
+ return {
407
+ dnsName: status.Self?.DNSName?.replace(/\.$/, "") || null,
408
+ nodeId: status.Self?.ID || null,
409
+ };
410
+ } catch {
411
+ return null;
412
+ }
413
+ }
414
+
415
+ export async function getTailscaleDnsName(): Promise<string | null> {
416
+ const info = await getTailscaleSelfInfo();
417
+ return info?.dnsName ?? null;
418
+ }
419
+
420
+ export async function setupTailscaleExposureRoute(opts: {
421
+ mode: "serve" | "funnel";
422
+ path: string;
423
+ localUrl: string;
424
+ }): Promise<string | null> {
425
+ const dnsName = await getTailscaleDnsName();
426
+ if (!dnsName) {
427
+ console.warn("[voice-call] Could not get Tailscale DNS name");
428
+ return null;
429
+ }
430
+
431
+ const { code } = await runTailscaleCommand([
432
+ opts.mode,
433
+ "--bg",
434
+ "--yes",
435
+ "--set-path",
436
+ opts.path,
437
+ opts.localUrl,
438
+ ]);
439
+
440
+ if (code === 0) {
441
+ const publicUrl = `https://${dnsName}${opts.path}`;
442
+ console.log(`[voice-call] Tailscale ${opts.mode} active: ${publicUrl}`);
443
+ return publicUrl;
444
+ }
445
+
446
+ console.warn(`[voice-call] Tailscale ${opts.mode} failed`);
447
+ return null;
448
+ }
449
+
450
+ export async function cleanupTailscaleExposureRoute(opts: {
451
+ mode: "serve" | "funnel";
452
+ path: string;
453
+ }): Promise<void> {
454
+ await runTailscaleCommand([opts.mode, "off", opts.path]);
455
+ }
456
+
457
+ /**
458
+ * Setup Tailscale serve/funnel for the webhook server.
459
+ * This is a helper that shells out to `tailscale serve` or `tailscale funnel`.
460
+ */
461
+ export async function setupTailscaleExposure(
462
+ config: VoiceCallConfig,
463
+ ): Promise<string | null> {
464
+ if (config.tailscale.mode === "off") {
465
+ return null;
466
+ }
467
+
468
+ const mode = config.tailscale.mode === "funnel" ? "funnel" : "serve";
469
+ // Include the path suffix so tailscale forwards to the correct endpoint
470
+ // (tailscale strips the mount path prefix when proxying)
471
+ const localUrl = `http://127.0.0.1:${config.serve.port}${config.serve.path}`;
472
+ return setupTailscaleExposureRoute({
473
+ mode,
474
+ path: config.tailscale.path,
475
+ localUrl,
476
+ });
477
+ }
478
+
479
+ /**
480
+ * Cleanup Tailscale serve/funnel.
481
+ */
482
+ export async function cleanupTailscaleExposure(
483
+ config: VoiceCallConfig,
484
+ ): Promise<void> {
485
+ if (config.tailscale.mode === "off") {
486
+ return;
487
+ }
488
+
489
+ const mode = config.tailscale.mode === "funnel" ? "funnel" : "serve";
490
+ await cleanupTailscaleExposureRoute({ mode, path: config.tailscale.path });
491
+ }