@openclaw/voice-call 2026.3.13 → 2026.5.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +25 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +866 -0
  6. package/index.ts +353 -148
  7. package/openclaw.plugin.json +336 -157
  8. package/package.json +33 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +160 -12
  17. package/src/config.ts +243 -74
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +179 -19
  24. package/src/manager/events.ts +48 -30
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +464 -0
  28. package/src/manager/outbound.ts +148 -55
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +277 -0
  64. package/src/response-generator.ts +186 -40
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +351 -0
  68. package/src/runtime.ts +254 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +26 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +513 -100
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
package/index.ts CHANGED
@@ -1,37 +1,35 @@
1
- import { Type } from "@sinclair/typebox";
2
- import type {
3
- GatewayRequestHandlerOptions,
4
- OpenClawPluginApi,
5
- } from "openclaw/plugin-sdk/voice-call";
1
+ import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
2
+ import { ErrorCodes, errorShape } from "openclaw/plugin-sdk/gateway-runtime";
3
+ import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
4
+ import { Type } from "typebox";
5
+ import {
6
+ definePluginEntry,
7
+ type GatewayRequestHandlerOptions,
8
+ type OpenClawPluginApi,
9
+ } from "./api.js";
10
+ import { createVoiceCallRuntime, type VoiceCallRuntime } from "./runtime-entry.js";
6
11
  import { registerVoiceCallCli } from "./src/cli.js";
7
12
  import {
8
- VoiceCallConfigSchema,
13
+ formatVoiceCallLegacyConfigWarnings,
14
+ normalizeVoiceCallLegacyConfigInput,
15
+ parseVoiceCallPluginConfig,
16
+ } from "./src/config-compat.js";
17
+ import {
9
18
  resolveVoiceCallConfig,
10
19
  validateProviderConfig,
11
20
  type VoiceCallConfig,
12
21
  } from "./src/config.js";
13
22
  import type { CoreConfig } from "./src/core-bridge.js";
14
- import { createVoiceCallRuntime, type VoiceCallRuntime } from "./src/runtime.js";
23
+ import { createVoiceCallContinueOperationStore } from "./src/gateway-continue-operation.js";
15
24
 
16
25
  const voiceCallConfigSchema = {
17
26
  parse(value: unknown): VoiceCallConfig {
18
- const raw =
19
- value && typeof value === "object" && !Array.isArray(value)
20
- ? (value as Record<string, unknown>)
21
- : {};
22
-
23
- const twilio = raw.twilio as Record<string, unknown> | undefined;
24
- const legacyFrom = typeof twilio?.from === "string" ? twilio.from : undefined;
25
-
26
- const enabled = typeof raw.enabled === "boolean" ? raw.enabled : true;
27
- const providerRaw = raw.provider === "log" ? "mock" : raw.provider;
28
- const provider = providerRaw ?? (enabled ? "mock" : undefined);
29
-
30
- return VoiceCallConfigSchema.parse({
31
- ...raw,
27
+ const normalized = normalizeVoiceCallLegacyConfigInput(value);
28
+ const enabled = typeof normalized.enabled === "boolean" ? normalized.enabled : true;
29
+ return parseVoiceCallPluginConfig({
30
+ ...normalized,
32
31
  enabled,
33
- provider,
34
- fromNumber: raw.fromNumber ?? legacyFrom,
32
+ provider: normalized.provider ?? (enabled ? "mock" : undefined),
35
33
  });
36
34
  },
37
35
  uiHints: {
@@ -71,40 +69,70 @@ const voiceCallConfigSchema = {
71
69
  advanced: true,
72
70
  },
73
71
  "streaming.enabled": { label: "Enable Streaming", advanced: true },
74
- "streaming.openaiApiKey": {
75
- label: "OpenAI Realtime API Key",
76
- sensitive: true,
72
+ "streaming.provider": {
73
+ label: "Streaming Provider",
74
+ help: "Uses the first registered realtime transcription provider when unset.",
77
75
  advanced: true,
78
76
  },
79
- "streaming.sttModel": { label: "Realtime STT Model", advanced: true },
77
+ "streaming.providers": { label: "Streaming Provider Config", advanced: true },
80
78
  "streaming.streamPath": { label: "Media Stream Path", advanced: true },
81
- "tts.provider": {
82
- label: "TTS Provider Override",
83
- help: "Deep-merges with messages.tts (Edge is ignored for calls).",
79
+ "realtime.enabled": { label: "Enable Realtime Voice", advanced: true },
80
+ "realtime.provider": {
81
+ label: "Realtime Voice Provider",
82
+ help: "Uses the first registered realtime voice provider when unset.",
84
83
  advanced: true,
85
84
  },
86
- "tts.openai.model": { label: "OpenAI TTS Model", advanced: true },
87
- "tts.openai.voice": { label: "OpenAI TTS Voice", advanced: true },
88
- "tts.openai.apiKey": {
89
- label: "OpenAI API Key",
90
- sensitive: true,
85
+ "realtime.streamPath": { label: "Realtime Stream Path", advanced: true },
86
+ "realtime.instructions": { label: "Realtime Instructions", advanced: true },
87
+ "realtime.toolPolicy": {
88
+ label: "Realtime Tool Policy",
89
+ help: "Controls the shared openclaw_agent_consult tool.",
91
90
  advanced: true,
92
91
  },
93
- "tts.elevenlabs.modelId": { label: "ElevenLabs Model ID", advanced: true },
94
- "tts.elevenlabs.voiceId": { label: "ElevenLabs Voice ID", advanced: true },
95
- "tts.elevenlabs.apiKey": {
96
- label: "ElevenLabs API Key",
97
- sensitive: true,
92
+ "realtime.fastContext.enabled": {
93
+ label: "Enable Fast Realtime Context",
94
+ help: "Searches memory/session context before the full consult agent.",
95
+ advanced: true,
96
+ },
97
+ "realtime.fastContext.timeoutMs": {
98
+ label: "Fast Context Timeout",
99
+ advanced: true,
100
+ },
101
+ "realtime.fastContext.maxResults": {
102
+ label: "Fast Context Result Limit",
103
+ advanced: true,
104
+ },
105
+ "realtime.fastContext.sources": {
106
+ label: "Fast Context Sources",
107
+ advanced: true,
108
+ },
109
+ "realtime.fastContext.fallbackToConsult": {
110
+ label: "Fallback To Full Consult",
111
+ advanced: true,
112
+ },
113
+ "realtime.providers": { label: "Realtime Provider Config", advanced: true },
114
+ "tts.provider": {
115
+ label: "TTS Provider Override",
116
+ help: "Deep-merges with messages.tts (Microsoft is ignored for calls).",
98
117
  advanced: true,
99
118
  },
100
- "tts.elevenlabs.baseUrl": { label: "ElevenLabs Base URL", advanced: true },
119
+ "tts.providers": { label: "TTS Provider Config", advanced: true },
101
120
  publicUrl: { label: "Public Webhook URL", advanced: true },
102
121
  skipSignatureVerification: {
103
122
  label: "Skip Signature Verification",
104
123
  advanced: true,
105
124
  },
106
125
  store: { label: "Call Log Store Path", advanced: true },
107
- responseModel: { label: "Response Model", advanced: true },
126
+ agentId: {
127
+ label: "Response Agent ID",
128
+ help: 'Agent workspace used for voice response generation. Defaults to "main".',
129
+ advanced: true,
130
+ },
131
+ responseModel: {
132
+ label: "Response Model",
133
+ help: "Optional override. Falls back to the runtime default model when unset.",
134
+ advanced: true,
135
+ },
108
136
  responseSystemPrompt: { label: "Response System Prompt", advanced: true },
109
137
  responseTimeoutMs: { label: "Response Timeout (ms)", advanced: true },
110
138
  },
@@ -116,6 +144,7 @@ const VoiceCallToolSchema = Type.Union([
116
144
  to: Type.Optional(Type.String({ description: "Call target" })),
117
145
  message: Type.String({ description: "Intro message" }),
118
146
  mode: Type.Optional(Type.Union([Type.Literal("notify"), Type.Literal("conversation")])),
147
+ dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
119
148
  }),
120
149
  Type.Object({
121
150
  action: Type.Literal("continue_call"),
@@ -127,6 +156,11 @@ const VoiceCallToolSchema = Type.Union([
127
156
  callId: Type.String({ description: "Call ID" }),
128
157
  message: Type.String({ description: "Message to speak" }),
129
158
  }),
159
+ Type.Object({
160
+ action: Type.Literal("send_dtmf"),
161
+ callId: Type.String({ description: "Call ID" }),
162
+ digits: Type.String({ description: "DTMF digits to send" }),
163
+ }),
130
164
  Type.Object({
131
165
  action: Type.Literal("end_call"),
132
166
  callId: Type.String({ description: "Call ID" }),
@@ -140,10 +174,39 @@ const VoiceCallToolSchema = Type.Union([
140
174
  to: Type.Optional(Type.String({ description: "Call target" })),
141
175
  sid: Type.Optional(Type.String({ description: "Call SID" })),
142
176
  message: Type.Optional(Type.String({ description: "Optional intro message" })),
177
+ dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
143
178
  }),
144
179
  ]);
145
180
 
146
- const voiceCallPlugin = {
181
+ function asParamRecord(params: unknown): Record<string, unknown> {
182
+ return params && typeof params === "object" && !Array.isArray(params)
183
+ ? (params as Record<string, unknown>)
184
+ : {};
185
+ }
186
+
187
+ function isCliOnlyProcess(): boolean {
188
+ return process.env.OPENCLAW_CLI === "1" && !process.argv.slice(2).includes("gateway");
189
+ }
190
+
191
+ const VOICE_CALL_RUNTIME_KEY = Symbol.for("openclaw.voice-call.runtime");
192
+ const VOICE_CALL_RUNTIME_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimePromise");
193
+ const VOICE_CALL_RUNTIME_STOP_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimeStopPromise");
194
+
195
+ type VoiceCallRuntimeGlobalState = typeof globalThis & {
196
+ [VOICE_CALL_RUNTIME_KEY]?: VoiceCallRuntime | null;
197
+ [VOICE_CALL_RUNTIME_PROMISE_KEY]?: Promise<VoiceCallRuntime> | null;
198
+ [VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]?: Promise<void> | null;
199
+ };
200
+
201
+ function getVoiceCallRuntimeGlobalState(): VoiceCallRuntimeGlobalState {
202
+ const state = globalThis as VoiceCallRuntimeGlobalState;
203
+ state[VOICE_CALL_RUNTIME_KEY] ??= null;
204
+ state[VOICE_CALL_RUNTIME_PROMISE_KEY] ??= null;
205
+ state[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] ??= null;
206
+ return state;
207
+ }
208
+
209
+ export default definePluginEntry({
147
210
  id: "voice-call",
148
211
  name: "Voice Call",
149
212
  description: "Voice-call plugin with Telnyx/Twilio/Plivo providers",
@@ -153,75 +216,112 @@ const voiceCallPlugin = {
153
216
  const validation = validateProviderConfig(config);
154
217
 
155
218
  if (api.pluginConfig && typeof api.pluginConfig === "object") {
156
- const raw = api.pluginConfig as Record<string, unknown>;
157
- const twilio = raw.twilio as Record<string, unknown> | undefined;
158
- if (raw.provider === "log") {
159
- api.logger.warn('[voice-call] provider "log" is deprecated; use "mock" instead');
160
- }
161
- if (typeof twilio?.from === "string") {
162
- api.logger.warn("[voice-call] twilio.from is deprecated; use fromNumber instead");
219
+ for (const warning of formatVoiceCallLegacyConfigWarnings({
220
+ value: api.pluginConfig,
221
+ configPathPrefix: "plugins.entries.voice-call.config",
222
+ doctorFixCommand: "openclaw doctor --fix",
223
+ })) {
224
+ api.logger.warn(warning);
163
225
  }
164
226
  }
165
227
 
166
- let runtimePromise: Promise<VoiceCallRuntime> | null = null;
167
- let runtime: VoiceCallRuntime | null = null;
228
+ const runtimeState = getVoiceCallRuntimeGlobalState();
229
+ const continueOperationStore = createVoiceCallContinueOperationStore({
230
+ config,
231
+ coreConfig: api.config as CoreConfig,
232
+ });
168
233
 
169
- const ensureRuntime = async () => {
234
+ const ensureRuntime = async (): Promise<VoiceCallRuntime> => {
170
235
  if (!config.enabled) {
171
236
  throw new Error("Voice call disabled in plugin config");
172
237
  }
173
238
  if (!validation.valid) {
174
239
  throw new Error(validation.errors.join("; "));
175
240
  }
176
- if (runtime) {
177
- return runtime;
178
- }
179
- if (!runtimePromise) {
180
- runtimePromise = createVoiceCallRuntime({
181
- config,
182
- coreConfig: api.config as CoreConfig,
183
- ttsRuntime: api.runtime.tts,
184
- logger: api.logger,
185
- });
186
- }
187
- try {
188
- runtime = await runtimePromise;
189
- } catch (err) {
190
- // Reset so the next call can retry instead of caching the
191
- // rejected promise forever (which also leaves the port orphaned
192
- // if the server started before the failure). See: #32387
193
- runtimePromise = null;
194
- throw err;
241
+
242
+ while (true) {
243
+ if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
244
+ await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY];
245
+ continue;
246
+ }
247
+
248
+ const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY];
249
+ if (runtime) {
250
+ return runtime;
251
+ }
252
+
253
+ let runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY];
254
+ if (!runtimePromise) {
255
+ runtimePromise = createVoiceCallRuntime({
256
+ config,
257
+ coreConfig: api.config as CoreConfig,
258
+ fullConfig: api.config,
259
+ agentRuntime: api.runtime.agent,
260
+ ttsRuntime: api.runtime.tts,
261
+ logger: api.logger,
262
+ });
263
+ runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = runtimePromise;
264
+ }
265
+
266
+ try {
267
+ const createdRuntime = await runtimePromise;
268
+ if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
269
+ continue;
270
+ }
271
+ if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] !== runtimePromise) {
272
+ continue;
273
+ }
274
+ runtimeState[VOICE_CALL_RUNTIME_KEY] = createdRuntime;
275
+ return createdRuntime;
276
+ } catch (err) {
277
+ if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] === runtimePromise) {
278
+ // Reset shared state so the next call can retry instead of caching
279
+ // a rejected promise across plugin contexts. See: #32387, #58115.
280
+ runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null;
281
+ runtimeState[VOICE_CALL_RUNTIME_KEY] = null;
282
+ }
283
+ throw err;
284
+ }
195
285
  }
196
- return runtime;
197
286
  };
198
287
 
199
- const sendError = (respond: (ok: boolean, payload?: unknown) => void, err: unknown) => {
200
- respond(false, { error: err instanceof Error ? err.message : String(err) });
288
+ const respondError = (
289
+ respond: GatewayRequestHandlerOptions["respond"],
290
+ message: string,
291
+ code: (typeof ErrorCodes)[keyof typeof ErrorCodes] = ErrorCodes.UNAVAILABLE,
292
+ ) => {
293
+ respond(false, undefined, errorShape(code, message));
294
+ };
295
+
296
+ const sendError = (respond: GatewayRequestHandlerOptions["respond"], err: unknown) => {
297
+ respondError(respond, formatErrorMessage(err));
201
298
  };
202
299
 
203
300
  const resolveCallMessageRequest = async (params: GatewayRequestHandlerOptions["params"]) => {
204
- const callId = typeof params?.callId === "string" ? params.callId.trim() : "";
205
- const message = typeof params?.message === "string" ? params.message.trim() : "";
301
+ const callId = normalizeOptionalString(params?.callId) ?? "";
302
+ const message = normalizeOptionalString(params?.message) ?? "";
206
303
  if (!callId || !message) {
207
304
  return { error: "callId and message required" } as const;
208
305
  }
209
306
  const rt = await ensureRuntime();
210
307
  return { rt, callId, message } as const;
211
308
  };
309
+
212
310
  const initiateCallAndRespond = async (params: {
213
311
  rt: VoiceCallRuntime;
214
312
  respond: GatewayRequestHandlerOptions["respond"];
215
313
  to: string;
216
314
  message?: string;
217
315
  mode?: "notify" | "conversation";
316
+ dtmfSequence?: string;
218
317
  }) => {
219
318
  const result = await params.rt.manager.initiateCall(params.to, undefined, {
220
319
  message: params.message,
221
320
  mode: params.mode,
321
+ dtmfSequence: params.dtmfSequence,
222
322
  });
223
323
  if (!result.success) {
224
- params.respond(false, { error: result.error || "initiate failed" });
324
+ respondError(params.respond, result.error || "initiate failed");
225
325
  return;
226
326
  }
227
327
  params.respond(true, { callId: result.callId, initiated: true });
@@ -242,12 +342,16 @@ const voiceCallPlugin = {
242
342
  }) => {
243
343
  const request = await resolveCallMessageRequest(params.requestParams);
244
344
  if ("error" in request) {
245
- params.respond(false, { error: request.error });
345
+ respondError(
346
+ params.respond,
347
+ request.error ?? "callId and message required",
348
+ ErrorCodes.INVALID_REQUEST,
349
+ );
246
350
  return;
247
351
  }
248
352
  const result = await params.action(request);
249
353
  if (!result.success) {
250
- params.respond(false, { error: result.error || params.failure });
354
+ respondError(params.respond, result.error || params.failure);
251
355
  return;
252
356
  }
253
357
  params.respond(
@@ -262,18 +366,15 @@ const voiceCallPlugin = {
262
366
  "voicecall.initiate",
263
367
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
264
368
  try {
265
- const message = typeof params?.message === "string" ? params.message.trim() : "";
369
+ const message = normalizeOptionalString(params?.message) ?? "";
266
370
  if (!message) {
267
- respond(false, { error: "message required" });
371
+ respondError(respond, "message required", ErrorCodes.INVALID_REQUEST);
268
372
  return;
269
373
  }
270
374
  const rt = await ensureRuntime();
271
- const to =
272
- typeof params?.to === "string" && params.to.trim()
273
- ? params.to.trim()
274
- : rt.config.toNumber;
375
+ const to = normalizeOptionalString(params?.to) ?? rt.config.toNumber;
275
376
  if (!to) {
276
- respond(false, { error: "to required" });
377
+ respondError(respond, "to required", ErrorCodes.INVALID_REQUEST);
277
378
  return;
278
379
  }
279
380
  const mode =
@@ -308,16 +409,99 @@ const voiceCallPlugin = {
308
409
  },
309
410
  );
310
411
 
412
+ api.registerGatewayMethod(
413
+ "voicecall.continue.start",
414
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
415
+ try {
416
+ const request = await resolveCallMessageRequest(params);
417
+ if ("error" in request) {
418
+ respondError(
419
+ respond,
420
+ request.error ?? "callId and message required",
421
+ ErrorCodes.INVALID_REQUEST,
422
+ );
423
+ return;
424
+ }
425
+ respond(true, continueOperationStore.start(request));
426
+ } catch (err) {
427
+ sendError(respond, err);
428
+ }
429
+ },
430
+ );
431
+
432
+ api.registerGatewayMethod(
433
+ "voicecall.continue.result",
434
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
435
+ try {
436
+ const operationId = normalizeOptionalString(params?.operationId) ?? "";
437
+ if (!operationId) {
438
+ respondError(respond, "operationId required", ErrorCodes.INVALID_REQUEST);
439
+ return;
440
+ }
441
+ const operation = continueOperationStore.read(operationId);
442
+ if (!operation.ok) {
443
+ respondError(respond, operation.error, ErrorCodes.INVALID_REQUEST);
444
+ return;
445
+ }
446
+ respond(true, operation.payload);
447
+ } catch (err) {
448
+ sendError(respond, err);
449
+ }
450
+ },
451
+ );
452
+
311
453
  api.registerGatewayMethod(
312
454
  "voicecall.speak",
313
455
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
314
456
  try {
315
- await respondToCallMessageAction({
316
- requestParams: params,
317
- respond,
318
- action: (request) => request.rt.manager.speak(request.callId, request.message),
319
- failure: "speak failed",
320
- });
457
+ const request = await resolveCallMessageRequest(params);
458
+ if ("error" in request) {
459
+ respondError(
460
+ respond,
461
+ request.error ?? "callId and message required",
462
+ ErrorCodes.INVALID_REQUEST,
463
+ );
464
+ return;
465
+ }
466
+ if (request.rt.config.realtime.enabled) {
467
+ const realtimeResult = request.rt.webhookServer.speakRealtime(
468
+ request.callId,
469
+ request.message,
470
+ );
471
+ if (realtimeResult.success) {
472
+ respond(true, { success: true });
473
+ return;
474
+ }
475
+ }
476
+ const result = await request.rt.manager.speak(request.callId, request.message);
477
+ if (!result.success) {
478
+ respondError(respond, result.error || "speak failed");
479
+ return;
480
+ }
481
+ respond(true, { success: true });
482
+ } catch (err) {
483
+ sendError(respond, err);
484
+ }
485
+ },
486
+ );
487
+
488
+ api.registerGatewayMethod(
489
+ "voicecall.dtmf",
490
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
491
+ try {
492
+ const callId = normalizeOptionalString(params?.callId) ?? "";
493
+ const digits = normalizeOptionalString(params?.digits) ?? "";
494
+ if (!callId || !digits) {
495
+ respondError(respond, "callId and digits required", ErrorCodes.INVALID_REQUEST);
496
+ return;
497
+ }
498
+ const rt = await ensureRuntime();
499
+ const result = await rt.manager.sendDtmf(callId, digits);
500
+ if (!result.success) {
501
+ respondError(respond, result.error || "dtmf failed");
502
+ return;
503
+ }
504
+ respond(true, { success: true });
321
505
  } catch (err) {
322
506
  sendError(respond, err);
323
507
  }
@@ -328,15 +512,15 @@ const voiceCallPlugin = {
328
512
  "voicecall.end",
329
513
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
330
514
  try {
331
- const callId = typeof params?.callId === "string" ? params.callId.trim() : "";
515
+ const callId = normalizeOptionalString(params?.callId) ?? "";
332
516
  if (!callId) {
333
- respond(false, { error: "callId required" });
517
+ respondError(respond, "callId required", ErrorCodes.INVALID_REQUEST);
334
518
  return;
335
519
  }
336
520
  const rt = await ensureRuntime();
337
521
  const result = await rt.manager.endCall(callId);
338
522
  if (!result.success) {
339
- respond(false, { error: result.error || "end failed" });
523
+ respondError(respond, result.error || "end failed");
340
524
  return;
341
525
  }
342
526
  respond(true, { success: true });
@@ -351,16 +535,12 @@ const voiceCallPlugin = {
351
535
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
352
536
  try {
353
537
  const raw =
354
- typeof params?.callId === "string"
355
- ? params.callId.trim()
356
- : typeof params?.sid === "string"
357
- ? params.sid.trim()
358
- : "";
538
+ normalizeOptionalString(params?.callId) ?? normalizeOptionalString(params?.sid) ?? "";
539
+ const rt = await ensureRuntime();
359
540
  if (!raw) {
360
- respond(false, { error: "callId required" });
541
+ respond(true, { found: true, calls: rt.manager.getActiveCalls() });
361
542
  return;
362
543
  }
363
- const rt = await ensureRuntime();
364
544
  const call = rt.manager.getCall(raw) || rt.manager.getCallByProviderCallId(raw);
365
545
  if (!call) {
366
546
  respond(true, { found: false });
@@ -377,18 +557,23 @@ const voiceCallPlugin = {
377
557
  "voicecall.start",
378
558
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
379
559
  try {
380
- const to = typeof params?.to === "string" ? params.to.trim() : "";
381
- const message = typeof params?.message === "string" ? params.message.trim() : "";
560
+ const to = normalizeOptionalString(params?.to) ?? "";
561
+ const message = normalizeOptionalString(params?.message) ?? "";
562
+ const dtmfSequence = normalizeOptionalString(params?.dtmfSequence);
382
563
  if (!to) {
383
- respond(false, { error: "to required" });
564
+ respondError(respond, "to required", ErrorCodes.INVALID_REQUEST);
384
565
  return;
385
566
  }
386
567
  const rt = await ensureRuntime();
568
+ const mode =
569
+ params?.mode === "notify" || params?.mode === "conversation" ? params.mode : undefined;
387
570
  await initiateCallAndRespond({
388
571
  rt,
389
572
  respond,
390
573
  to,
391
574
  message: message || undefined,
575
+ mode,
576
+ dtmfSequence,
392
577
  });
393
578
  } catch (err) {
394
579
  sendError(respond, err);
@@ -402,6 +587,7 @@ const voiceCallPlugin = {
402
587
  description: "Make phone calls and have voice conversations via the voice-call plugin.",
403
588
  parameters: VoiceCallToolSchema,
404
589
  async execute(_toolCallId, params) {
590
+ const rawParams = asParamRecord(params);
405
591
  const json = (payload: unknown) => ({
406
592
  content: [{ type: "text" as const, text: JSON.stringify(payload, null, 2) }],
407
593
  details: payload,
@@ -410,25 +596,23 @@ const voiceCallPlugin = {
410
596
  try {
411
597
  const rt = await ensureRuntime();
412
598
 
413
- if (typeof params?.action === "string") {
414
- switch (params.action) {
599
+ if (typeof rawParams.action === "string") {
600
+ switch (rawParams.action) {
415
601
  case "initiate_call": {
416
- const message = String(params.message || "").trim();
602
+ const message = normalizeOptionalString(rawParams.message) ?? "";
417
603
  if (!message) {
418
604
  throw new Error("message required");
419
605
  }
420
- const to =
421
- typeof params.to === "string" && params.to.trim()
422
- ? params.to.trim()
423
- : rt.config.toNumber;
606
+ const to = normalizeOptionalString(rawParams.to) ?? rt.config.toNumber;
424
607
  if (!to) {
425
608
  throw new Error("to required");
426
609
  }
427
610
  const result = await rt.manager.initiateCall(to, undefined, {
428
611
  message,
612
+ dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
429
613
  mode:
430
- params.mode === "notify" || params.mode === "conversation"
431
- ? params.mode
614
+ rawParams.mode === "notify" || rawParams.mode === "conversation"
615
+ ? rawParams.mode
432
616
  : undefined,
433
617
  });
434
618
  if (!result.success) {
@@ -437,8 +621,8 @@ const voiceCallPlugin = {
437
621
  return json({ callId: result.callId, initiated: true });
438
622
  }
439
623
  case "continue_call": {
440
- const callId = String(params.callId || "").trim();
441
- const message = String(params.message || "").trim();
624
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
625
+ const message = normalizeOptionalString(rawParams.message) ?? "";
442
626
  if (!callId || !message) {
443
627
  throw new Error("callId and message required");
444
628
  }
@@ -449,8 +633,8 @@ const voiceCallPlugin = {
449
633
  return json({ success: true, transcript: result.transcript });
450
634
  }
451
635
  case "speak_to_user": {
452
- const callId = String(params.callId || "").trim();
453
- const message = String(params.message || "").trim();
636
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
637
+ const message = normalizeOptionalString(rawParams.message) ?? "";
454
638
  if (!callId || !message) {
455
639
  throw new Error("callId and message required");
456
640
  }
@@ -460,8 +644,20 @@ const voiceCallPlugin = {
460
644
  }
461
645
  return json({ success: true });
462
646
  }
647
+ case "send_dtmf": {
648
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
649
+ const digits = normalizeOptionalString(rawParams.digits) ?? "";
650
+ if (!callId || !digits) {
651
+ throw new Error("callId and digits required");
652
+ }
653
+ const result = await rt.manager.sendDtmf(callId, digits);
654
+ if (!result.success) {
655
+ throw new Error(result.error || "dtmf failed");
656
+ }
657
+ return json({ success: true });
658
+ }
463
659
  case "end_call": {
464
- const callId = String(params.callId || "").trim();
660
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
465
661
  if (!callId) {
466
662
  throw new Error("callId required");
467
663
  }
@@ -472,7 +668,7 @@ const voiceCallPlugin = {
472
668
  return json({ success: true });
473
669
  }
474
670
  case "get_status": {
475
- const callId = String(params.callId || "").trim();
671
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
476
672
  if (!callId) {
477
673
  throw new Error("callId required");
478
674
  }
@@ -483,9 +679,9 @@ const voiceCallPlugin = {
483
679
  }
484
680
  }
485
681
 
486
- const mode = params?.mode ?? "call";
682
+ const mode = rawParams.mode ?? "call";
487
683
  if (mode === "status") {
488
- const sid = typeof params.sid === "string" ? params.sid.trim() : "";
684
+ const sid = normalizeOptionalString(rawParams.sid) ?? "";
489
685
  if (!sid) {
490
686
  throw new Error("sid required for status");
491
687
  }
@@ -493,18 +689,13 @@ const voiceCallPlugin = {
493
689
  return json(call ? { found: true, call } : { found: false });
494
690
  }
495
691
 
496
- const to =
497
- typeof params.to === "string" && params.to.trim()
498
- ? params.to.trim()
499
- : rt.config.toNumber;
692
+ const to = normalizeOptionalString(rawParams.to) ?? rt.config.toNumber;
500
693
  if (!to) {
501
694
  throw new Error("to required for call");
502
695
  }
503
696
  const result = await rt.manager.initiateCall(to, undefined, {
504
- message:
505
- typeof params.message === "string" && params.message.trim()
506
- ? params.message.trim()
507
- : undefined,
697
+ dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
698
+ message: normalizeOptionalString(rawParams.message),
508
699
  });
509
700
  if (!result.success) {
510
701
  throw new Error(result.error || "initiate failed");
@@ -512,7 +703,7 @@ const voiceCallPlugin = {
512
703
  return json({ callId: result.callId, initiated: true });
513
704
  } catch (err) {
514
705
  return json({
515
- error: err instanceof Error ? err.message : String(err),
706
+ error: formatErrorMessage(err),
516
707
  });
517
708
  }
518
709
  },
@@ -531,34 +722,48 @@ const voiceCallPlugin = {
531
722
 
532
723
  api.registerService({
533
724
  id: "voicecall",
534
- start: async () => {
725
+ start: () => {
726
+ if (isCliOnlyProcess()) {
727
+ return;
728
+ }
535
729
  if (!config.enabled) {
536
730
  return;
537
731
  }
538
- try {
539
- await ensureRuntime();
540
- } catch (err) {
541
- api.logger.error(
542
- `[voice-call] Failed to start runtime: ${
543
- err instanceof Error ? err.message : String(err)
544
- }`,
732
+ if (!validation.valid) {
733
+ api.logger.warn(
734
+ `[voice-call] Runtime not started; setup incomplete: ${validation.errors.join("; ")}`,
545
735
  );
736
+ return;
546
737
  }
738
+ void ensureRuntime().catch((err) => {
739
+ api.logger.error(`[voice-call] Failed to start runtime: ${formatErrorMessage(err)}`);
740
+ });
547
741
  },
548
742
  stop: async () => {
549
- if (!runtimePromise) {
743
+ if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
744
+ await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY];
550
745
  return;
551
746
  }
552
- try {
553
- const rt = await runtimePromise;
747
+ const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY];
748
+ const runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY];
749
+ if (!runtime && !runtimePromise) {
750
+ return;
751
+ }
752
+ runtimeState[VOICE_CALL_RUNTIME_KEY] = null;
753
+ runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null;
754
+ const stopPromise = (async () => {
755
+ const rt = runtime ?? (await runtimePromise!);
554
756
  await rt.stop();
757
+ })();
758
+ runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = stopPromise;
759
+ try {
760
+ await stopPromise;
555
761
  } finally {
556
- runtimePromise = null;
557
- runtime = null;
762
+ if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] === stopPromise) {
763
+ runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = null;
764
+ }
558
765
  }
559
766
  },
560
767
  });
561
768
  },
562
- };
563
-
564
- export default voiceCallPlugin;
769
+ });