@openclaw/voice-call 2026.5.12-beta.8 → 2026.5.14-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -611,7 +611,7 @@ function validateProviderConfig(config) {
611
611
  }
612
612
  if (config.realtime.enabled && config.inboundPolicy === "disabled") errors.push("plugins.entries.voice-call.config.inboundPolicy must not be \"disabled\" when realtime.enabled is true");
613
613
  if (config.realtime.enabled && config.streaming.enabled) errors.push("plugins.entries.voice-call.config.realtime.enabled and plugins.entries.voice-call.config.streaming.enabled cannot both be true");
614
- if (config.realtime.enabled && config.provider && config.provider !== "twilio") errors.push("plugins.entries.voice-call.config.provider must be \"twilio\" when realtime.enabled is true");
614
+ if (config.realtime.enabled && config.provider && config.provider !== "twilio" && config.provider !== "telnyx") errors.push("plugins.entries.voice-call.config.provider must be \"twilio\" or \"telnyx\" when realtime.enabled is true");
615
615
  return {
616
616
  valid: errors.length === 0,
617
617
  errors
@@ -1,4 +1,4 @@
1
- import { t as VoiceCallConfigSchema } from "./config-Beumk4ED.js";
1
+ import { t as VoiceCallConfigSchema } from "./config-C8gX5Cik.js";
2
2
  import { asOptionalRecord, readStringField } from "openclaw/plugin-sdk/string-coerce-runtime";
3
3
  //#region extensions/voice-call/src/config-compat.ts
4
4
  const VOICE_CALL_LEGACY_CONFIG_REMOVAL_VERSION = "2026.6.0";
@@ -577,7 +577,12 @@ async function guardedJsonApiRequest(params) {
577
577
  throw new Error(`${params.errorPrefix}: ${response.status} ${errorText}`);
578
578
  }
579
579
  const text = await response.text();
580
- return text ? JSON.parse(text) : void 0;
580
+ if (!text) return;
581
+ try {
582
+ return JSON.parse(text);
583
+ } catch {
584
+ throw new Error(`${params.errorPrefix}: malformed JSON response`);
585
+ }
581
586
  } finally {
582
587
  await release();
583
588
  }
package/dist/index.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import { definePluginEntry, sleep } from "./runtime-api.js";
2
2
  import "./api.js";
3
- import { i as resolveVoiceCallConfig, s as validateProviderConfig } from "./config-Beumk4ED.js";
4
- import { a as setupTailscaleExposureRoute, i as getTailscaleSelfInfo, n as resolveWebhookExposureStatus, r as cleanupTailscaleExposureRoute, s as resolveUserPath, t as createVoiceCallRuntime } from "./runtime-entry-CQfEI6TJ.js";
5
- import { i as parseVoiceCallPluginConfig, r as normalizeVoiceCallLegacyConfigInput, t as formatVoiceCallLegacyConfigWarnings } from "./config-compat-BSV2aOY6.js";
3
+ import { i as resolveVoiceCallConfig, s as validateProviderConfig } from "./config-C8gX5Cik.js";
4
+ import { a as setupTailscaleExposureRoute, i as getTailscaleSelfInfo, n as resolveWebhookExposureStatus, r as cleanupTailscaleExposureRoute, s as resolveUserPath, t as createVoiceCallRuntime } from "./runtime-entry-CmYLshgT.js";
5
+ import { i as parseVoiceCallPluginConfig, r as normalizeVoiceCallLegacyConfigInput, t as formatVoiceCallLegacyConfigWarnings } from "./config-compat-Monzho4R.js";
6
6
  import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
7
7
  import { ErrorCodes, callGatewayFromCli, errorShape } from "openclaw/plugin-sdk/gateway-runtime";
8
8
  import { normalizeOptionalLowercaseString, normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
@@ -1,6 +1,6 @@
1
1
  import { t as escapeXml } from "./voice-mapping-DMm-YvxM.js";
2
2
  import { t as getHeader } from "./http-headers-B5L5gMpK.js";
3
- import { n as reconstructWebhookUrl, r as verifyPlivoWebhook, t as guardedJsonApiRequest } from "./guarded-json-api-C5TNOqhE.js";
3
+ import { n as reconstructWebhookUrl, r as verifyPlivoWebhook, t as guardedJsonApiRequest } from "./guarded-json-api-D26auOvl.js";
4
4
  import { normalizeLowercaseStringOrEmpty, normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
5
5
  import crypto from "node:crypto";
6
6
  //#region extensions/voice-call/src/providers/plivo.ts
@@ -13,7 +13,7 @@ const DEFAULT_MAX_QUEUED_AUDIO_BYTES = TELEPHONY_SAMPLE_RATE * 120;
13
13
  const PCM16_MAX_AMPLITUDE = 32768;
14
14
  const MULAW_LINEAR_SAMPLES = new Int16Array(256);
15
15
  for (let i = 0; i < MULAW_LINEAR_SAMPLES.length; i += 1) MULAW_LINEAR_SAMPLES[i] = decodeMulawSample(i);
16
- var RealtimeTwilioAudioPacer = class {
16
+ var RealtimeAudioPacer = class {
17
17
  constructor(params) {
18
18
  this.params = params;
19
19
  this.queue = [];
@@ -53,10 +53,7 @@ var RealtimeTwilioAudioPacer = class {
53
53
  this.clearTimer();
54
54
  this.queue = [];
55
55
  this.queuedAudioBytes = 0;
56
- this.params.sendJson({
57
- event: "clear",
58
- streamSid: this.params.streamSid
59
- });
56
+ this.params.send(this.params.serializer.clear());
60
57
  return clearedAudioBytes;
61
58
  }
62
59
  close() {
@@ -86,17 +83,9 @@ var RealtimeTwilioAudioPacer = class {
86
83
  let sent = true;
87
84
  if (item.type === "audio") {
88
85
  this.queuedAudioBytes = Math.max(0, this.queuedAudioBytes - item.chunk.length);
89
- sent = this.params.sendJson({
90
- event: "media",
91
- streamSid: this.params.streamSid,
92
- media: { payload: item.chunk.toString("base64") }
93
- });
86
+ sent = this.params.send(this.params.serializer.media(item.chunk.toString("base64")));
94
87
  delayMs = item.durationMs || TELEPHONY_CHUNK_MS;
95
- } else sent = this.params.sendJson({
96
- event: "mark",
97
- streamSid: this.params.streamSid,
98
- mark: { name: item.name }
99
- });
88
+ } else sent = this.params.send(this.params.serializer.mark(item.name));
100
89
  if (!sent) {
101
90
  this.queue = [];
102
91
  this.queuedAudioBytes = 0;
@@ -148,6 +137,156 @@ function decodeMulawSample(value) {
148
137
  return sign ? -sample : sample;
149
138
  }
150
139
  //#endregion
140
+ //#region extensions/voice-call/src/webhook/stream-frame-adapter.ts
141
+ function parseTimestampMs(value) {
142
+ if (typeof value === "number" && Number.isFinite(value)) return value;
143
+ if (typeof value === "string") {
144
+ const parsed = Number.parseInt(value, 10);
145
+ return Number.isFinite(parsed) ? parsed : void 0;
146
+ }
147
+ }
148
+ function tryParseJson(rawMessage) {
149
+ try {
150
+ const parsed = JSON.parse(rawMessage);
151
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) return parsed;
152
+ } catch {}
153
+ return null;
154
+ }
155
+ function normalizeBase64ForCompare(value) {
156
+ return value.replace(/=+$/u, "").replace(/-/gu, "+").replace(/_/gu, "/");
157
+ }
158
+ function isValidBase64Payload(value) {
159
+ return normalizeBase64ForCompare(Buffer.from(value, "base64").toString("base64")) === normalizeBase64ForCompare(value);
160
+ }
161
+ var TwilioStreamFrameAdapter = class {
162
+ constructor() {
163
+ this.providerName = "twilio";
164
+ this.streamSid = "";
165
+ }
166
+ parseInbound(rawMessage) {
167
+ const msg = tryParseJson(rawMessage);
168
+ if (!msg) return { kind: "ignored" };
169
+ const event = msg.event;
170
+ if (event === "start") {
171
+ const startData = typeof msg.start === "object" && msg.start !== null ? msg.start : void 0;
172
+ const streamSid = typeof startData?.streamSid === "string" ? startData.streamSid : "";
173
+ const callSid = typeof startData?.callSid === "string" ? startData.callSid : "";
174
+ if (!streamSid || !callSid) return { kind: "ignored" };
175
+ this.streamSid = streamSid;
176
+ return {
177
+ kind: "start",
178
+ streamId: streamSid,
179
+ providerCallId: callSid
180
+ };
181
+ }
182
+ if (event === "media") {
183
+ const mediaData = typeof msg.media === "object" && msg.media !== null ? msg.media : void 0;
184
+ const payload = typeof mediaData?.payload === "string" ? mediaData.payload : void 0;
185
+ if (!payload || !isValidBase64Payload(payload)) return { kind: "ignored" };
186
+ return {
187
+ kind: "media",
188
+ payloadBase64: payload,
189
+ timestampMs: parseTimestampMs(mediaData?.timestamp),
190
+ track: typeof mediaData?.track === "string" ? mediaData.track : void 0
191
+ };
192
+ }
193
+ if (event === "mark") {
194
+ const markData = typeof msg.mark === "object" && msg.mark !== null ? msg.mark : void 0;
195
+ return {
196
+ kind: "mark",
197
+ name: typeof markData?.name === "string" ? markData.name : void 0
198
+ };
199
+ }
200
+ if (event === "stop") return { kind: "stop" };
201
+ return { kind: "ignored" };
202
+ }
203
+ serializeMedia(payloadBase64) {
204
+ return JSON.stringify({
205
+ event: "media",
206
+ streamSid: this.streamSid,
207
+ media: { payload: payloadBase64 }
208
+ });
209
+ }
210
+ serializeClear() {
211
+ return JSON.stringify({
212
+ event: "clear",
213
+ streamSid: this.streamSid
214
+ });
215
+ }
216
+ serializeMark(name) {
217
+ return JSON.stringify({
218
+ event: "mark",
219
+ streamSid: this.streamSid,
220
+ mark: { name }
221
+ });
222
+ }
223
+ };
224
+ var TelnyxStreamFrameAdapter = class {
225
+ constructor() {
226
+ this.providerName = "telnyx";
227
+ }
228
+ parseInbound(rawMessage) {
229
+ const msg = tryParseJson(rawMessage);
230
+ if (!msg) return { kind: "ignored" };
231
+ const event = msg.event;
232
+ const topLevelStreamId = typeof msg.stream_id === "string" && msg.stream_id ? msg.stream_id : void 0;
233
+ if (event === "start") {
234
+ const startData = typeof msg.start === "object" && msg.start !== null ? msg.start : void 0;
235
+ const providerCallId = typeof startData?.call_control_id === "string" && startData.call_control_id ? startData.call_control_id : void 0;
236
+ if (!topLevelStreamId || !providerCallId) return { kind: "ignored" };
237
+ return {
238
+ kind: "start",
239
+ streamId: topLevelStreamId,
240
+ providerCallId
241
+ };
242
+ }
243
+ if (event === "media") {
244
+ const mediaData = typeof msg.media === "object" && msg.media !== null ? msg.media : void 0;
245
+ const payload = typeof mediaData?.payload === "string" ? mediaData.payload : void 0;
246
+ if (!payload || !isValidBase64Payload(payload)) return { kind: "ignored" };
247
+ return {
248
+ kind: "media",
249
+ payloadBase64: payload,
250
+ timestampMs: parseTimestampMs(mediaData?.timestamp),
251
+ track: typeof mediaData?.track === "string" ? mediaData.track : void 0
252
+ };
253
+ }
254
+ if (event === "mark") {
255
+ const markData = typeof msg.mark === "object" && msg.mark !== null ? msg.mark : void 0;
256
+ return {
257
+ kind: "mark",
258
+ name: typeof markData?.name === "string" ? markData.name : void 0
259
+ };
260
+ }
261
+ if (event === "stop") return { kind: "stop" };
262
+ if (event === "error") {
263
+ const errorData = typeof msg.payload === "object" && msg.payload !== null ? msg.payload : void 0;
264
+ return {
265
+ kind: "error",
266
+ code: typeof errorData?.code === "string" || typeof errorData?.code === "number" ? String(errorData.code) : void 0,
267
+ title: typeof errorData?.title === "string" ? errorData.title : void 0,
268
+ detail: typeof errorData?.detail === "string" ? errorData.detail : void 0
269
+ };
270
+ }
271
+ return { kind: "ignored" };
272
+ }
273
+ serializeMedia(payloadBase64) {
274
+ return JSON.stringify({
275
+ event: "media",
276
+ media: { payload: payloadBase64 }
277
+ });
278
+ }
279
+ serializeClear() {
280
+ return JSON.stringify({ event: "clear" });
281
+ }
282
+ serializeMark(name) {
283
+ return JSON.stringify({
284
+ event: "mark",
285
+ mark: { name }
286
+ });
287
+ }
288
+ };
289
+ //#endregion
151
290
  //#region extensions/voice-call/src/webhook/realtime-handler.ts
152
291
  const STREAM_TOKEN_TTL_MS = 3e4;
153
292
  const DEFAULT_HOST = "localhost:8443";
@@ -318,23 +457,29 @@ var RealtimeCallHandler = class {
318
457
  return `${this.publicPathPrefix}${normalizePath(this.config.streamPath ?? "/voice/stream/realtime")}`;
319
458
  }
320
459
  buildTwiMLPayload(req, params) {
321
- const host = this.publicOrigin || req.headers.host || DEFAULT_HOST;
322
460
  const rawDirection = params?.get("Direction");
323
- const token = this.issueStreamToken({
324
- from: params?.get("From") ?? void 0,
325
- to: params?.get("To") ?? void 0,
326
- direction: rawDirection?.startsWith("outbound") ? "outbound" : "inbound"
327
- });
328
- return {
329
- statusCode: 200,
330
- headers: { "Content-Type": "text/xml" },
331
- body: `<?xml version="1.0" encoding="UTF-8"?>
461
+ const previousOrigin = this.publicOrigin;
462
+ if (!previousOrigin) this.publicOrigin = req.headers.host ?? DEFAULT_HOST;
463
+ try {
464
+ const { streamUrl } = this.issueStreamSession({
465
+ providerName: "twilio",
466
+ from: params?.get("From") ?? void 0,
467
+ to: params?.get("To") ?? void 0,
468
+ direction: rawDirection?.startsWith("outbound") ? "outbound" : "inbound"
469
+ });
470
+ return {
471
+ statusCode: 200,
472
+ headers: { "Content-Type": "text/xml" },
473
+ body: `<?xml version="1.0" encoding="UTF-8"?>
332
474
  <Response>
333
475
  <Connect>
334
- <Stream url="${`wss://${host}${this.getStreamPathPattern()}/${token}`}" />
476
+ <Stream url="${streamUrl}" />
335
477
  </Connect>
336
478
  </Response>`
337
- };
479
+ };
480
+ } finally {
481
+ this.publicOrigin = previousOrigin;
482
+ }
338
483
  }
339
484
  handleWebSocketUpgrade(request, socket, head) {
340
485
  const token = new URL(request.url ?? "/", "wss://localhost").pathname.split("/").pop() ?? null;
@@ -344,6 +489,7 @@ var RealtimeCallHandler = class {
344
489
  socket.destroy();
345
490
  return;
346
491
  }
492
+ const adapter = (callerMeta.providerName ?? "twilio") === "telnyx" ? new TelnyxStreamFrameAdapter() : new TwilioStreamFrameAdapter();
347
493
  new WebSocketServer({
348
494
  noServer: true,
349
495
  maxPayload: MAX_REALTIME_MESSAGE_BYTES
@@ -356,43 +502,44 @@ var RealtimeCallHandler = class {
356
502
  let lastMediaGapWarnAt = 0;
357
503
  ws.on("message", (data) => {
358
504
  try {
359
- const msg = JSON.parse(data.toString());
360
- if (!initialized && msg.event === "start") {
505
+ const frame = adapter.parseInbound(data.toString());
506
+ if (frame.kind === "ignored") return;
507
+ if (frame.kind === "start") {
508
+ if (initialized) return;
361
509
  initialized = true;
362
- const startData = typeof msg.start === "object" && msg.start !== null ? msg.start : void 0;
363
- const streamSid = typeof startData?.streamSid === "string" ? startData.streamSid : "unknown";
364
- const callSid = typeof startData?.callSid === "string" ? startData.callSid : "unknown";
365
- activeCallSid = callSid;
366
- const nextBridge = this.handleCall(streamSid, callSid, ws, callerMeta);
510
+ activeCallSid = frame.providerCallId;
511
+ const nextBridge = this.handleCall(frame.streamId, frame.providerCallId, ws, callerMeta, adapter);
367
512
  if (!nextBridge) return;
368
513
  bridge = nextBridge;
369
514
  return;
370
515
  }
371
516
  if (!bridge) return;
372
- const mediaData = typeof msg.media === "object" && msg.media !== null ? msg.media : void 0;
373
- if (msg.event === "media" && typeof mediaData?.payload === "string") {
374
- const audio = Buffer.from(mediaData.payload, "base64");
517
+ if (frame.kind === "media") {
518
+ const audio = Buffer.from(frame.payloadBase64, "base64");
375
519
  bridge.sendAudio(audio);
376
- const mediaTimestamp = typeof mediaData.timestamp === "number" ? mediaData.timestamp : typeof mediaData.timestamp === "string" ? Number.parseInt(mediaData.timestamp, 10) : NaN;
377
- if (Number.isFinite(mediaTimestamp)) {
520
+ if (frame.timestampMs !== void 0) {
378
521
  if (lastMediaTimestamp !== void 0) {
379
- const gapMs = mediaTimestamp - lastMediaTimestamp;
522
+ const gapMs = frame.timestampMs - lastMediaTimestamp;
380
523
  const now = Date.now();
381
524
  if ((gapMs > 120 || gapMs < 0) && now - lastMediaGapWarnAt > 5e3) {
382
525
  lastMediaGapWarnAt = now;
383
- console.warn(`[voice-call] realtime media timestamp gap providerCallId=${activeCallSid} gapMs=${gapMs} timestamp=${mediaTimestamp}`);
526
+ console.warn(`[voice-call] realtime media timestamp gap providerCallId=${activeCallSid} gapMs=${gapMs} timestamp=${frame.timestampMs}`);
384
527
  }
385
528
  }
386
- lastMediaTimestamp = mediaTimestamp;
387
- bridge.setMediaTimestamp(mediaTimestamp);
529
+ lastMediaTimestamp = frame.timestampMs;
530
+ bridge.setMediaTimestamp(frame.timestampMs);
388
531
  }
389
532
  return;
390
533
  }
391
- if (msg.event === "mark") {
534
+ if (frame.kind === "mark") {
392
535
  bridge.acknowledgeMark();
393
536
  return;
394
537
  }
395
- if (msg.event === "stop") {
538
+ if (frame.kind === "error") {
539
+ console.error(`[voice-call] realtime WS error frame providerCallId=${activeCallSid} code=${frame.code ?? "?"} title=${frame.title ?? ""} detail=${frame.detail ?? ""}`);
540
+ return;
541
+ }
542
+ if (frame.kind === "stop") {
396
543
  stopReceived = true;
397
544
  this.closeTelephonyBridge(activeCallSid, bridge, "completed");
398
545
  }
@@ -428,6 +575,19 @@ var RealtimeCallHandler = class {
428
575
  };
429
576
  }
430
577
  }
578
+ issueStreamSession(request = {}) {
579
+ const token = this.issueStreamToken({
580
+ providerName: request.providerName ?? "twilio",
581
+ callId: request.callId,
582
+ from: request.from,
583
+ to: request.to,
584
+ direction: request.direction
585
+ });
586
+ return {
587
+ token,
588
+ streamUrl: `wss://${this.publicOrigin || DEFAULT_HOST}${this.getStreamPathPattern()}/${token}`
589
+ };
590
+ }
431
591
  issueStreamToken(meta = {}) {
432
592
  const token = randomUUID();
433
593
  this.pendingStreamTokens.set(token, {
@@ -445,10 +605,12 @@ var RealtimeCallHandler = class {
445
605
  return {
446
606
  from: entry.from,
447
607
  to: entry.to,
448
- direction: entry.direction
608
+ direction: entry.direction,
609
+ providerName: entry.providerName,
610
+ callId: entry.callId
449
611
  };
450
612
  }
451
- handleCall(streamSid, callSid, ws, callerMeta) {
613
+ handleCall(streamSid, callSid, ws, callerMeta, adapter) {
452
614
  const registration = this.registerCallInManager(callSid, callerMeta);
453
615
  if (!registration) {
454
616
  ws.close(1008, "Caller rejected by policy");
@@ -508,14 +670,14 @@ var RealtimeCallHandler = class {
508
670
  callEndEmitted = true;
509
671
  this.endCallInManager(callSid, callId, reason);
510
672
  };
511
- const sendJson = (message) => {
673
+ const sendString = (message) => {
512
674
  if (ws.readyState !== WebSocket.OPEN) return false;
513
675
  if (ws.bufferedAmount > MAX_REALTIME_WS_BUFFERED_BYTES) {
514
676
  console.warn(`[voice-call] realtime outbound websocket backpressure before send callId=${callId} providerCallId=${callSid} bufferedBytes=${ws.bufferedAmount}`);
515
677
  ws.close(1013, "Backpressure: send buffer exceeded");
516
678
  return false;
517
679
  }
518
- ws.send(JSON.stringify(message));
680
+ ws.send(message);
519
681
  if (ws.bufferedAmount > MAX_REALTIME_WS_BUFFERED_BYTES) {
520
682
  console.warn(`[voice-call] realtime outbound websocket backpressure after send callId=${callId} providerCallId=${callSid} bufferedBytes=${ws.bufferedAmount}`);
521
683
  ws.close(1013, "Backpressure: send buffer exceeded");
@@ -523,9 +685,13 @@ var RealtimeCallHandler = class {
523
685
  }
524
686
  return true;
525
687
  };
526
- const audioPacer = new RealtimeTwilioAudioPacer({
527
- streamSid,
528
- sendJson,
688
+ const audioPacer = new RealtimeAudioPacer({
689
+ send: sendString,
690
+ serializer: {
691
+ media: (payload) => adapter.serializeMedia(payload),
692
+ clear: () => adapter.serializeClear(),
693
+ mark: (name) => adapter.serializeMark(name)
694
+ },
529
695
  onBackpressure: () => {
530
696
  console.warn(`[voice-call] realtime paced audio backpressure callId=${callId} providerCallId=${callSid}`);
531
697
  if (ws.readyState === WebSocket.OPEN) ws.close(1013, "Backpressure: paced audio queue exceeded");
@@ -915,20 +1081,14 @@ var RealtimeCallHandler = class {
915
1081
  ...callerMeta.from ? { from: callerMeta.from } : {},
916
1082
  ...callerMeta.to ? { to: callerMeta.to } : {}
917
1083
  };
918
- this.manager.processEvent({
919
- id: `realtime-initiated-${callSid}`,
920
- callId: callSid,
921
- type: "call.initiated",
922
- ...baseFields
923
- });
924
- const callRecord = this.manager.getCallByProviderCallId(callSid);
1084
+ const callRecord = this.resolveRealtimeCall(callSid, callerMeta, baseFields);
925
1085
  if (!callRecord) return null;
926
1086
  const initialGreeting = this.extractInitialGreeting(callRecord);
927
1087
  console.log(`[voice-call] Realtime call ${callRecord.callId} initial greeting ${initialGreeting ? "queued" : "absent"}`);
928
1088
  if (callRecord.metadata) delete callRecord.metadata.initialMessage;
929
1089
  this.manager.processEvent({
930
1090
  id: `realtime-answered-${callSid}`,
931
- callId: callSid,
1091
+ callId: callRecord.callId,
932
1092
  type: "call.answered",
933
1093
  ...baseFields
934
1094
  });
@@ -937,6 +1097,19 @@ var RealtimeCallHandler = class {
937
1097
  initialGreetingInstructions: buildGreetingInstructions(this.config.instructions, initialGreeting)
938
1098
  };
939
1099
  }
1100
+ resolveRealtimeCall(callSid, callerMeta, baseFields) {
1101
+ if (callerMeta.callId) {
1102
+ const call = this.manager.getCall(callerMeta.callId);
1103
+ return call?.providerCallId === callSid ? call : null;
1104
+ }
1105
+ this.manager.processEvent({
1106
+ id: `realtime-initiated-${callSid}`,
1107
+ callId: callSid,
1108
+ type: "call.initiated",
1109
+ ...baseFields
1110
+ });
1111
+ return this.manager.getCallByProviderCallId(callSid) ?? null;
1112
+ }
940
1113
  extractInitialGreeting(call) {
941
1114
  return typeof call.metadata?.initialMessage === "string" ? call.metadata.initialMessage : void 0;
942
1115
  }
@@ -1,4 +1,4 @@
1
- import { o as resolveVoiceCallSessionKey } from "./config-Beumk4ED.js";
1
+ import { o as resolveVoiceCallSessionKey } from "./config-C8gX5Cik.js";
2
2
  import { t as resolveVoiceResponseModel } from "./response-model-CyF5K80p.js";
3
3
  import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime";
4
4
  import crypto from "node:crypto";
@@ -1,6 +1,6 @@
1
1
  import { isBlockedHostnameOrIp, isRequestBodyLimitError, readRequestBodyWithLimit, requestBodyErrorToText } from "./runtime-api.js";
2
2
  import "./api.js";
3
- import { a as resolveVoiceCallEffectiveConfig, c as deepMergeDefined, i as resolveVoiceCallConfig, n as normalizeVoiceCallConfig, o as resolveVoiceCallSessionKey, r as resolveTwilioAuthToken, s as validateProviderConfig } from "./config-Beumk4ED.js";
3
+ import { a as resolveVoiceCallEffectiveConfig, c as deepMergeDefined, i as resolveVoiceCallConfig, n as normalizeVoiceCallConfig, o as resolveVoiceCallSessionKey, r as resolveTwilioAuthToken, s as validateProviderConfig } from "./config-C8gX5Cik.js";
4
4
  import { n as mapVoiceToPolly, t as escapeXml } from "./voice-mapping-DMm-YvxM.js";
5
5
  import { t as resolveVoiceResponseModel } from "./response-model-CyF5K80p.js";
6
6
  import { a as convertPcmToMulaw8k, t as isProviderStatusTerminal } from "./call-status-CuIeqfac.js";
@@ -485,13 +485,24 @@ async function initiateCall(ctx, to, sessionKey, options) {
485
485
  preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
486
486
  console.log(`[voice-call] Using pre-connect DTMF TwiML for call ${callId} (digits=${dtmfSequence.length}, initialMessage=${initialMessage ? "yes" : "no"})`);
487
487
  }
488
+ const streamSession = ctx.config.realtime?.enabled && ctx.provider.name === "telnyx" && ctx.streamSessionIssuer ? ctx.streamSessionIssuer({
489
+ providerName: "telnyx",
490
+ callId,
491
+ from,
492
+ to,
493
+ direction: "outbound"
494
+ }) : void 0;
488
495
  const result = await ctx.provider.initiateCall({
489
496
  callId,
490
497
  from,
491
498
  to,
492
499
  webhookUrl: ctx.webhookUrl,
493
500
  inlineTwiml,
494
- preConnectTwiml
501
+ preConnectTwiml,
502
+ ...streamSession ? {
503
+ streamUrl: streamSession.streamUrl,
504
+ streamAuthToken: streamSession.token
505
+ } : {}
495
506
  });
496
507
  callRecord.providerCallId = result.providerCallId;
497
508
  ctx.providerCallIdMap.set(result.providerCallId, callId);
@@ -830,13 +841,26 @@ function processEvent(ctx, event) {
830
841
  switch (event.type) {
831
842
  case "call.initiated":
832
843
  transitionState(call, "initiated");
833
- if (call.direction === "inbound" && call.providerCallId && ctx.provider?.answerCall) ctx.provider.answerCall({
834
- callId: call.callId,
835
- providerCallId: call.providerCallId
836
- }).catch((err) => {
837
- const message = formatErrorMessage(err);
838
- console.warn(`[voice-call] Failed to answer inbound call ${call.providerCallId}:`, message);
839
- });
844
+ if (call.direction === "inbound" && call.providerCallId && ctx.provider?.answerCall) {
845
+ const inboundStreamSession = ctx.config.realtime?.enabled && ctx.provider.name === "telnyx" && ctx.streamSessionIssuer ? ctx.streamSessionIssuer({
846
+ providerName: "telnyx",
847
+ callId: call.callId,
848
+ from: call.from,
849
+ to: call.to,
850
+ direction: "inbound"
851
+ }) : void 0;
852
+ ctx.provider.answerCall({
853
+ callId: call.callId,
854
+ providerCallId: call.providerCallId,
855
+ ...inboundStreamSession ? {
856
+ streamUrl: inboundStreamSession.streamUrl,
857
+ streamAuthToken: inboundStreamSession.token
858
+ } : {}
859
+ }).catch((err) => {
860
+ const message = formatErrorMessage(err);
861
+ console.warn(`[voice-call] Failed to answer inbound call ${call.providerCallId}:`, message);
862
+ });
863
+ }
840
864
  break;
841
865
  case "call.ringing":
842
866
  transitionState(call, "ringing");
@@ -1110,7 +1134,8 @@ var CallManager = class {
1110
1134
  initialMessageInFlight: this.initialMessageInFlight,
1111
1135
  onCallAnswered: (call) => {
1112
1136
  this.maybeSpeakInitialMessageOnAnswered(call);
1113
- }
1137
+ },
1138
+ streamSessionIssuer: this.streamSessionIssuer
1114
1139
  };
1115
1140
  }
1116
1141
  /**
@@ -2394,7 +2419,7 @@ function loadRealtimeTranscriptionRuntime() {
2394
2419
  return realtimeTranscriptionRuntimePromise;
2395
2420
  }
2396
2421
  function loadResponseGeneratorModule() {
2397
- responseGeneratorModulePromise ??= import("./response-generator-splfS1mU.js");
2422
+ responseGeneratorModulePromise ??= import("./response-generator-4wAlt0EM.js");
2398
2423
  return responseGeneratorModulePromise;
2399
2424
  }
2400
2425
  function sanitizeTranscriptForLog(value) {
@@ -2418,8 +2443,8 @@ function appendRecentTalkEventMetadata(call, event) {
2418
2443
  recentTalkEvents: recent.slice(-10)
2419
2444
  };
2420
2445
  }
2421
- function buildRequestUrl(requestUrl, requestHost, fallbackHost = "localhost") {
2422
- return new URL$1(requestUrl ?? "/", `http://${requestHost ?? fallbackHost}`);
2446
+ function buildRequestUrl(requestUrl) {
2447
+ return new URL$1(requestUrl ?? "/", "http://localhost");
2423
2448
  }
2424
2449
  function normalizeProxyIp(value) {
2425
2450
  const trimmed = value?.trim();
@@ -2745,7 +2770,7 @@ var VoiceCallWebhookServer = class {
2745
2770
  }
2746
2771
  getUpgradePathname(request) {
2747
2772
  try {
2748
- return buildRequestUrl(request.url, request.headers.host).pathname;
2773
+ return buildRequestUrl(request.url).pathname;
2749
2774
  } catch {
2750
2775
  return null;
2751
2776
  }
@@ -2768,7 +2793,7 @@ var VoiceCallWebhookServer = class {
2768
2793
  this.writeWebhookResponse(res, payload);
2769
2794
  }
2770
2795
  async runWebhookPipeline(req, webhookPath) {
2771
- const url = buildRequestUrl(req.url, req.headers.host);
2796
+ const url = buildRequestUrl(req.url);
2772
2797
  if (url.pathname === "/voice/hold-music") return {
2773
2798
  statusCode: 200,
2774
2799
  headers: { "Content-Type": "text/xml" },
@@ -2902,7 +2927,7 @@ var VoiceCallWebhookServer = class {
2902
2927
  }
2903
2928
  isRealtimeWebSocketUpgrade(req) {
2904
2929
  try {
2905
- const pathname = buildRequestUrl(req.url, req.headers.host).pathname;
2930
+ const pathname = buildRequestUrl(req.url).pathname;
2906
2931
  const pattern = this.realtimeHandler?.getStreamPathPattern();
2907
2932
  return Boolean(pattern && pathname.startsWith(pattern));
2908
2933
  } catch {
@@ -3011,15 +3036,15 @@ let mockProviderPromise;
3011
3036
  let realtimeVoiceRuntimePromise;
3012
3037
  let realtimeHandlerPromise;
3013
3038
  function loadTelnyxProvider() {
3014
- telnyxProviderPromise ??= import("./telnyx-DPwKir7b.js");
3039
+ telnyxProviderPromise ??= import("./telnyx-3TbRULg_.js");
3015
3040
  return telnyxProviderPromise;
3016
3041
  }
3017
3042
  function loadTwilioProvider() {
3018
- twilioProviderPromise ??= import("./twilio-D-QbSGSk.js");
3043
+ twilioProviderPromise ??= import("./twilio-DjcNE_Rb.js");
3019
3044
  return twilioProviderPromise;
3020
3045
  }
3021
3046
  function loadPlivoProvider() {
3022
- plivoProviderPromise ??= import("./plivo-AW4Op9oA.js");
3047
+ plivoProviderPromise ??= import("./plivo-P0n73IxS.js");
3023
3048
  return plivoProviderPromise;
3024
3049
  }
3025
3050
  function loadMockProvider() {
@@ -3031,7 +3056,7 @@ function loadRealtimeVoiceRuntime() {
3031
3056
  return realtimeVoiceRuntimePromise;
3032
3057
  }
3033
3058
  function loadRealtimeHandler() {
3034
- realtimeHandlerPromise ??= import("./realtime-handler-B_aqJXZj.js");
3059
+ realtimeHandlerPromise ??= import("./realtime-handler-DQPIcvi-.js");
3035
3060
  return realtimeHandlerPromise;
3036
3061
  }
3037
3062
  function resolveVoiceCallConsultSessionKey(call) {
@@ -3249,8 +3274,10 @@ async function createVoiceCallRuntime(params) {
3249
3274
  if (!publicUrl && config.tailscale?.mode !== "off") publicUrl = await setupTailscaleExposure(config);
3250
3275
  const webhookUrl = publicUrl ?? localUrl;
3251
3276
  if (providerRequiresPublicWebhook(provider.name) && isProviderUnreachableWebhookUrl(webhookUrl)) throw new Error(`[voice-call] ${provider.name} requires a publicly reachable webhook URL. Refusing to use local-only webhook ${webhookUrl}. Set plugins.entries.voice-call.config.publicUrl or enable tunnel/tailscale exposure.`);
3252
- if (publicUrl && provider.name === "twilio") provider.setPublicUrl(publicUrl);
3277
+ if (publicUrl) provider.setPublicUrl?.(publicUrl);
3253
3278
  if (publicUrl && realtimeProvider) webhookServer.getRealtimeHandler()?.setPublicUrl(publicUrl);
3279
+ const realtimeHandler = webhookServer.getRealtimeHandler();
3280
+ if (realtimeHandler) manager.streamSessionIssuer = (request) => realtimeHandler.issueStreamSession(request);
3254
3281
  if (provider.name === "twilio" && config.streaming?.enabled) {
3255
3282
  const twilioProvider = provider;
3256
3283
  if (ttsRuntime?.textToSpeechTelephony) try {
@@ -1,2 +1,2 @@
1
- import { t as createVoiceCallRuntime } from "./runtime-entry-CQfEI6TJ.js";
1
+ import { t as createVoiceCallRuntime } from "./runtime-entry-CmYLshgT.js";
2
2
  export { createVoiceCallRuntime };
package/dist/setup-api.js CHANGED
@@ -1,4 +1,4 @@
1
- import { n as migrateVoiceCallLegacyConfigInput } from "./config-compat-BSV2aOY6.js";
1
+ import { n as migrateVoiceCallLegacyConfigInput } from "./config-compat-Monzho4R.js";
2
2
  import { isRecord } from "openclaw/plugin-sdk/string-coerce-runtime";
3
3
  import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
4
4
  //#region extensions/voice-call/setup-api.ts
@@ -1,4 +1,4 @@
1
- import { i as verifyTelnyxWebhook, t as guardedJsonApiRequest } from "./guarded-json-api-C5TNOqhE.js";
1
+ import { i as verifyTelnyxWebhook, t as guardedJsonApiRequest } from "./guarded-json-api-D26auOvl.js";
2
2
  import crypto from "node:crypto";
3
3
  //#region extensions/voice-call/src/providers/telnyx.ts
4
4
  function normalizeTelnyxDirection(direction) {
@@ -10,6 +10,14 @@ function normalizeTelnyxDirection(direction) {
10
10
  default: return;
11
11
  }
12
12
  }
13
+ function normalizeBase64ForCompare(value) {
14
+ return value.replace(/=+$/u, "").replace(/-/gu, "+").replace(/_/gu, "/");
15
+ }
16
+ function decodeClientStateBase64(value) {
17
+ const buffer = Buffer.from(value, "base64");
18
+ if (normalizeBase64ForCompare(buffer.toString("base64")) !== normalizeBase64ForCompare(value)) return null;
19
+ return buffer.toString("utf8");
20
+ }
13
21
  var TelnyxProvider = class {
14
22
  constructor(config, options = {}) {
15
23
  this.name = "telnyx";
@@ -79,11 +87,7 @@ var TelnyxProvider = class {
79
87
  */
80
88
  normalizeEvent(data, dedupeKey) {
81
89
  let callId = "";
82
- if (data.payload?.client_state) try {
83
- callId = Buffer.from(data.payload.client_state, "base64").toString("utf8");
84
- } catch {
85
- callId = data.payload.client_state;
86
- }
90
+ if (data.payload?.client_state) callId = decodeClientStateBase64(data.payload.client_state) ?? data.payload.client_state;
87
91
  if (!callId) callId = data.payload?.call_control_id || "";
88
92
  const baseEvent = {
89
93
  id: data.id || crypto.randomUUID(),
@@ -134,6 +138,8 @@ var TelnyxProvider = class {
134
138
  type: "call.dtmf",
135
139
  digits: data.payload?.digit || ""
136
140
  };
141
+ case "streaming.started":
142
+ case "streaming.stopped": return null;
137
143
  default: return null;
138
144
  }
139
145
  }
@@ -163,20 +169,19 @@ var TelnyxProvider = class {
163
169
  return "completed";
164
170
  }
165
171
  }
166
- /**
167
- * Initiate an outbound call via Telnyx API.
168
- */
169
172
  async initiateCall(input) {
173
+ const body = {
174
+ connection_id: this.connectionId,
175
+ to: input.to,
176
+ from: input.from,
177
+ webhook_url: input.webhookUrl,
178
+ webhook_url_method: "POST",
179
+ client_state: Buffer.from(input.callId).toString("base64"),
180
+ timeout_secs: 30,
181
+ ...input.streamUrl ? buildTelnyxStreamingFields(input.streamUrl, input.streamAuthToken) : {}
182
+ };
170
183
  return {
171
- providerCallId: (await this.apiRequest("/calls", {
172
- connection_id: this.connectionId,
173
- to: input.to,
174
- from: input.from,
175
- webhook_url: input.webhookUrl,
176
- webhook_url_method: "POST",
177
- client_state: Buffer.from(input.callId).toString("base64"),
178
- timeout_secs: 30
179
- })).data.call_control_id,
184
+ providerCallId: (await this.apiRequest("/calls", body)).data.call_control_id,
180
185
  status: "initiated"
181
186
  };
182
187
  }
@@ -186,11 +191,12 @@ var TelnyxProvider = class {
186
191
  async hangupCall(input) {
187
192
  await this.apiRequest(`/calls/${input.providerCallId}/actions/hangup`, { command_id: crypto.randomUUID() }, { allowNotFound: true });
188
193
  }
189
- /**
190
- * Answer an inbound Telnyx Call Control leg.
191
- */
192
194
  async answerCall(input) {
193
- await this.apiRequest(`/calls/${input.providerCallId}/actions/answer`, { command_id: `openclaw-answer-${input.callId}` });
195
+ const body = {
196
+ command_id: `openclaw-answer-${input.callId}`,
197
+ ...input.streamUrl ? buildTelnyxStreamingFields(input.streamUrl, input.streamAuthToken) : {}
198
+ };
199
+ await this.apiRequest(`/calls/${input.providerCallId}/actions/answer`, body);
194
200
  }
195
201
  /**
196
202
  * Play TTS audio via Telnyx speak action.
@@ -256,5 +262,17 @@ var TelnyxProvider = class {
256
262
  }
257
263
  }
258
264
  };
265
+ function buildTelnyxStreamingFields(streamUrl, streamAuthToken) {
266
+ return {
267
+ stream_url: streamUrl,
268
+ stream_track: "inbound_track",
269
+ stream_codec: "PCMU",
270
+ stream_bidirectional_mode: "rtp",
271
+ stream_bidirectional_codec: "PCMU",
272
+ stream_bidirectional_sampling_rate: 8e3,
273
+ stream_bidirectional_target_legs: "self",
274
+ ...streamAuthToken ? { stream_auth_token: streamAuthToken } : {}
275
+ };
276
+ }
259
277
  //#endregion
260
278
  export { TelnyxProvider };
@@ -3,7 +3,7 @@ import "./api.js";
3
3
  import { n as mapVoiceToPolly, t as escapeXml } from "./voice-mapping-DMm-YvxM.js";
4
4
  import { i as chunkAudio, n as mapProviderStatusToEndReason, r as normalizeProviderStatus, t as isProviderStatusTerminal } from "./call-status-CuIeqfac.js";
5
5
  import { t as getHeader } from "./http-headers-B5L5gMpK.js";
6
- import { a as verifyTwilioWebhook, t as guardedJsonApiRequest } from "./guarded-json-api-C5TNOqhE.js";
6
+ import { a as verifyTwilioWebhook, t as guardedJsonApiRequest } from "./guarded-json-api-D26auOvl.js";
7
7
  import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
8
8
  import crypto from "node:crypto";
9
9
  import { safeEqualSecret } from "openclaw/plugin-sdk/security-runtime";
@@ -61,7 +61,12 @@ async function twilioApiRequest(params) {
61
61
  throw new TwilioApiError(response.status, errorText);
62
62
  }
63
63
  const text = await response.text();
64
- return text ? JSON.parse(text) : void 0;
64
+ if (!text) return;
65
+ try {
66
+ return JSON.parse(text);
67
+ } catch {
68
+ throw new Error("Twilio API returned malformed JSON.");
69
+ }
65
70
  } finally {
66
71
  await release();
67
72
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openclaw/voice-call",
3
- "version": "2026.5.12-beta.8",
3
+ "version": "2026.5.14-beta.1",
4
4
  "description": "OpenClaw voice-call plugin",
5
5
  "repository": {
6
6
  "type": "git",
@@ -18,7 +18,7 @@
18
18
  "openclaw": "workspace:*"
19
19
  },
20
20
  "peerDependencies": {
21
- "openclaw": ">=2026.5.12-beta.8"
21
+ "openclaw": ">=2026.5.14-beta.1"
22
22
  },
23
23
  "peerDependenciesMeta": {
24
24
  "openclaw": {
@@ -35,10 +35,10 @@
35
35
  "minHostVersion": ">=2026.4.10"
36
36
  },
37
37
  "compat": {
38
- "pluginApi": ">=2026.5.12-beta.8"
38
+ "pluginApi": ">=2026.5.14-beta.1"
39
39
  },
40
40
  "build": {
41
- "openclawVersion": "2026.5.12-beta.8"
41
+ "openclawVersion": "2026.5.14-beta.1"
42
42
  },
43
43
  "release": {
44
44
  "publishToClawHub": true,