@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +27 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +943 -0
  6. package/index.ts +379 -149
  7. package/openclaw.plugin.json +384 -157
  8. package/package.json +35 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +273 -12
  17. package/src/config.ts +355 -72
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +243 -19
  24. package/src/manager/events.ts +61 -31
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +528 -0
  28. package/src/manager/outbound.ts +163 -57
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +321 -0
  64. package/src/response-generator.ts +213 -53
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +429 -0
  68. package/src/runtime.ts +270 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +28 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +523 -102
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
package/index.ts CHANGED
@@ -1,37 +1,35 @@
1
- import { Type } from "@sinclair/typebox";
2
- import type {
3
- GatewayRequestHandlerOptions,
4
- OpenClawPluginApi,
5
- } from "openclaw/plugin-sdk/voice-call";
1
+ import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
2
+ import { ErrorCodes, errorShape } from "openclaw/plugin-sdk/gateway-runtime";
3
+ import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
4
+ import { Type } from "typebox";
5
+ import {
6
+ definePluginEntry,
7
+ type GatewayRequestHandlerOptions,
8
+ type OpenClawPluginApi,
9
+ } from "./api.js";
10
+ import { createVoiceCallRuntime, type VoiceCallRuntime } from "./runtime-entry.js";
6
11
  import { registerVoiceCallCli } from "./src/cli.js";
7
12
  import {
8
- VoiceCallConfigSchema,
13
+ formatVoiceCallLegacyConfigWarnings,
14
+ normalizeVoiceCallLegacyConfigInput,
15
+ parseVoiceCallPluginConfig,
16
+ } from "./src/config-compat.js";
17
+ import {
9
18
  resolveVoiceCallConfig,
10
19
  validateProviderConfig,
11
20
  type VoiceCallConfig,
12
21
  } from "./src/config.js";
13
22
  import type { CoreConfig } from "./src/core-bridge.js";
14
- import { createVoiceCallRuntime, type VoiceCallRuntime } from "./src/runtime.js";
23
+ import { createVoiceCallContinueOperationStore } from "./src/gateway-continue-operation.js";
15
24
 
16
25
  const voiceCallConfigSchema = {
17
26
  parse(value: unknown): VoiceCallConfig {
18
- const raw =
19
- value && typeof value === "object" && !Array.isArray(value)
20
- ? (value as Record<string, unknown>)
21
- : {};
22
-
23
- const twilio = raw.twilio as Record<string, unknown> | undefined;
24
- const legacyFrom = typeof twilio?.from === "string" ? twilio.from : undefined;
25
-
26
- const enabled = typeof raw.enabled === "boolean" ? raw.enabled : true;
27
- const providerRaw = raw.provider === "log" ? "mock" : raw.provider;
28
- const provider = providerRaw ?? (enabled ? "mock" : undefined);
29
-
30
- return VoiceCallConfigSchema.parse({
31
- ...raw,
27
+ const normalized = normalizeVoiceCallLegacyConfigInput(value);
28
+ const enabled = typeof normalized.enabled === "boolean" ? normalized.enabled : true;
29
+ return parseVoiceCallPluginConfig({
30
+ ...normalized,
32
31
  enabled,
33
- provider,
34
- fromNumber: raw.fromNumber ?? legacyFrom,
32
+ provider: normalized.provider ?? (enabled ? "mock" : undefined),
35
33
  });
36
34
  },
37
35
  uiHints: {
@@ -44,6 +42,11 @@ const voiceCallConfigSchema = {
44
42
  inboundPolicy: { label: "Inbound Policy" },
45
43
  allowFrom: { label: "Inbound Allowlist" },
46
44
  inboundGreeting: { label: "Inbound Greeting", advanced: true },
45
+ numbers: {
46
+ label: "Per-number Routing",
47
+ help: "Inbound overrides keyed by dialed E.164 number.",
48
+ advanced: true,
49
+ },
47
50
  "telnyx.apiKey": { label: "Telnyx API Key", sensitive: true },
48
51
  "telnyx.connectionId": { label: "Telnyx Connection ID" },
49
52
  "telnyx.publicKey": { label: "Telnyx Public Key", sensitive: true },
@@ -71,40 +74,70 @@ const voiceCallConfigSchema = {
71
74
  advanced: true,
72
75
  },
73
76
  "streaming.enabled": { label: "Enable Streaming", advanced: true },
74
- "streaming.openaiApiKey": {
75
- label: "OpenAI Realtime API Key",
76
- sensitive: true,
77
+ "streaming.provider": {
78
+ label: "Streaming Provider",
79
+ help: "Uses the first registered realtime transcription provider when unset.",
77
80
  advanced: true,
78
81
  },
79
- "streaming.sttModel": { label: "Realtime STT Model", advanced: true },
82
+ "streaming.providers": { label: "Streaming Provider Config", advanced: true },
80
83
  "streaming.streamPath": { label: "Media Stream Path", advanced: true },
81
- "tts.provider": {
82
- label: "TTS Provider Override",
83
- help: "Deep-merges with messages.tts (Edge is ignored for calls).",
84
+ "realtime.enabled": { label: "Enable Realtime Voice", advanced: true },
85
+ "realtime.provider": {
86
+ label: "Realtime Voice Provider",
87
+ help: "Uses the first registered realtime voice provider when unset.",
84
88
  advanced: true,
85
89
  },
86
- "tts.openai.model": { label: "OpenAI TTS Model", advanced: true },
87
- "tts.openai.voice": { label: "OpenAI TTS Voice", advanced: true },
88
- "tts.openai.apiKey": {
89
- label: "OpenAI API Key",
90
- sensitive: true,
90
+ "realtime.streamPath": { label: "Realtime Stream Path", advanced: true },
91
+ "realtime.instructions": { label: "Realtime Instructions", advanced: true },
92
+ "realtime.toolPolicy": {
93
+ label: "Realtime Tool Policy",
94
+ help: "Controls the shared openclaw_agent_consult tool.",
91
95
  advanced: true,
92
96
  },
93
- "tts.elevenlabs.modelId": { label: "ElevenLabs Model ID", advanced: true },
94
- "tts.elevenlabs.voiceId": { label: "ElevenLabs Voice ID", advanced: true },
95
- "tts.elevenlabs.apiKey": {
96
- label: "ElevenLabs API Key",
97
- sensitive: true,
97
+ "realtime.fastContext.enabled": {
98
+ label: "Enable Fast Realtime Context",
99
+ help: "Searches memory/session context before the full consult agent.",
100
+ advanced: true,
101
+ },
102
+ "realtime.fastContext.timeoutMs": {
103
+ label: "Fast Context Timeout",
104
+ advanced: true,
105
+ },
106
+ "realtime.fastContext.maxResults": {
107
+ label: "Fast Context Result Limit",
108
+ advanced: true,
109
+ },
110
+ "realtime.fastContext.sources": {
111
+ label: "Fast Context Sources",
98
112
  advanced: true,
99
113
  },
100
- "tts.elevenlabs.baseUrl": { label: "ElevenLabs Base URL", advanced: true },
114
+ "realtime.fastContext.fallbackToConsult": {
115
+ label: "Fallback To Full Consult",
116
+ advanced: true,
117
+ },
118
+ "realtime.providers": { label: "Realtime Provider Config", advanced: true },
119
+ "tts.provider": {
120
+ label: "TTS Provider Override",
121
+ help: "Deep-merges with messages.tts (Microsoft is ignored for calls).",
122
+ advanced: true,
123
+ },
124
+ "tts.providers": { label: "TTS Provider Config", advanced: true },
101
125
  publicUrl: { label: "Public Webhook URL", advanced: true },
102
126
  skipSignatureVerification: {
103
127
  label: "Skip Signature Verification",
104
128
  advanced: true,
105
129
  },
106
130
  store: { label: "Call Log Store Path", advanced: true },
107
- responseModel: { label: "Response Model", advanced: true },
131
+ agentId: {
132
+ label: "Response Agent ID",
133
+ help: 'Agent workspace used for voice response generation. Defaults to "main".',
134
+ advanced: true,
135
+ },
136
+ responseModel: {
137
+ label: "Response Model",
138
+ help: "Optional override. Falls back to the runtime default model when unset.",
139
+ advanced: true,
140
+ },
108
141
  responseSystemPrompt: { label: "Response System Prompt", advanced: true },
109
142
  responseTimeoutMs: { label: "Response Timeout (ms)", advanced: true },
110
143
  },
@@ -116,6 +149,7 @@ const VoiceCallToolSchema = Type.Union([
116
149
  to: Type.Optional(Type.String({ description: "Call target" })),
117
150
  message: Type.String({ description: "Intro message" }),
118
151
  mode: Type.Optional(Type.Union([Type.Literal("notify"), Type.Literal("conversation")])),
152
+ dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
119
153
  }),
120
154
  Type.Object({
121
155
  action: Type.Literal("continue_call"),
@@ -127,6 +161,11 @@ const VoiceCallToolSchema = Type.Union([
127
161
  callId: Type.String({ description: "Call ID" }),
128
162
  message: Type.String({ description: "Message to speak" }),
129
163
  }),
164
+ Type.Object({
165
+ action: Type.Literal("send_dtmf"),
166
+ callId: Type.String({ description: "Call ID" }),
167
+ digits: Type.String({ description: "DTMF digits to send" }),
168
+ }),
130
169
  Type.Object({
131
170
  action: Type.Literal("end_call"),
132
171
  callId: Type.String({ description: "Call ID" }),
@@ -140,10 +179,39 @@ const VoiceCallToolSchema = Type.Union([
140
179
  to: Type.Optional(Type.String({ description: "Call target" })),
141
180
  sid: Type.Optional(Type.String({ description: "Call SID" })),
142
181
  message: Type.Optional(Type.String({ description: "Optional intro message" })),
182
+ dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
143
183
  }),
144
184
  ]);
145
185
 
146
- const voiceCallPlugin = {
186
+ function asParamRecord(params: unknown): Record<string, unknown> {
187
+ return params && typeof params === "object" && !Array.isArray(params)
188
+ ? (params as Record<string, unknown>)
189
+ : {};
190
+ }
191
+
192
+ function isCliOnlyProcess(): boolean {
193
+ return process.env.OPENCLAW_CLI === "1" && !process.argv.slice(2).includes("gateway");
194
+ }
195
+
196
+ const VOICE_CALL_RUNTIME_KEY = Symbol.for("openclaw.voice-call.runtime");
197
+ const VOICE_CALL_RUNTIME_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimePromise");
198
+ const VOICE_CALL_RUNTIME_STOP_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimeStopPromise");
199
+
200
+ type VoiceCallRuntimeGlobalState = typeof globalThis & {
201
+ [VOICE_CALL_RUNTIME_KEY]?: VoiceCallRuntime | null;
202
+ [VOICE_CALL_RUNTIME_PROMISE_KEY]?: Promise<VoiceCallRuntime> | null;
203
+ [VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]?: Promise<void> | null;
204
+ };
205
+
206
+ function getVoiceCallRuntimeGlobalState(): VoiceCallRuntimeGlobalState {
207
+ const state = globalThis as VoiceCallRuntimeGlobalState;
208
+ state[VOICE_CALL_RUNTIME_KEY] ??= null;
209
+ state[VOICE_CALL_RUNTIME_PROMISE_KEY] ??= null;
210
+ state[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] ??= null;
211
+ return state;
212
+ }
213
+
214
+ export default definePluginEntry({
147
215
  id: "voice-call",
148
216
  name: "Voice Call",
149
217
  description: "Voice-call plugin with Telnyx/Twilio/Plivo providers",
@@ -153,75 +221,132 @@ const voiceCallPlugin = {
153
221
  const validation = validateProviderConfig(config);
154
222
 
155
223
  if (api.pluginConfig && typeof api.pluginConfig === "object") {
156
- const raw = api.pluginConfig as Record<string, unknown>;
157
- const twilio = raw.twilio as Record<string, unknown> | undefined;
158
- if (raw.provider === "log") {
159
- api.logger.warn('[voice-call] provider "log" is deprecated; use "mock" instead');
160
- }
161
- if (typeof twilio?.from === "string") {
162
- api.logger.warn("[voice-call] twilio.from is deprecated; use fromNumber instead");
224
+ for (const warning of formatVoiceCallLegacyConfigWarnings({
225
+ value: api.pluginConfig,
226
+ configPathPrefix: "plugins.entries.voice-call.config",
227
+ doctorFixCommand: "openclaw doctor --fix",
228
+ })) {
229
+ api.logger.warn(warning);
163
230
  }
164
231
  }
165
232
 
166
- let runtimePromise: Promise<VoiceCallRuntime> | null = null;
167
- let runtime: VoiceCallRuntime | null = null;
233
+ const runtimeState = getVoiceCallRuntimeGlobalState();
234
+ const continueOperationStore = createVoiceCallContinueOperationStore({
235
+ config,
236
+ coreConfig: api.config as CoreConfig,
237
+ });
168
238
 
169
- const ensureRuntime = async () => {
239
+ const ensureRuntime = async (): Promise<VoiceCallRuntime> => {
170
240
  if (!config.enabled) {
171
241
  throw new Error("Voice call disabled in plugin config");
172
242
  }
173
243
  if (!validation.valid) {
174
244
  throw new Error(validation.errors.join("; "));
175
245
  }
176
- if (runtime) {
177
- return runtime;
178
- }
179
- if (!runtimePromise) {
180
- runtimePromise = createVoiceCallRuntime({
181
- config,
182
- coreConfig: api.config as CoreConfig,
183
- ttsRuntime: api.runtime.tts,
184
- logger: api.logger,
185
- });
186
- }
187
- try {
188
- runtime = await runtimePromise;
189
- } catch (err) {
190
- // Reset so the next call can retry instead of caching the
191
- // rejected promise forever (which also leaves the port orphaned
192
- // if the server started before the failure). See: #32387
193
- runtimePromise = null;
194
- throw err;
246
+
247
+ while (true) {
248
+ if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
249
+ await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY];
250
+ continue;
251
+ }
252
+
253
+ const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY];
254
+ if (runtime) {
255
+ return runtime;
256
+ }
257
+
258
+ let runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY];
259
+ if (!runtimePromise) {
260
+ runtimePromise = createVoiceCallRuntime({
261
+ config,
262
+ coreConfig: api.config as CoreConfig,
263
+ fullConfig: api.config,
264
+ agentRuntime: api.runtime.agent,
265
+ ttsRuntime: api.runtime.tts,
266
+ logger: api.logger,
267
+ });
268
+ runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = runtimePromise;
269
+ }
270
+
271
+ try {
272
+ const createdRuntime = await runtimePromise;
273
+ if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
274
+ continue;
275
+ }
276
+ if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] !== runtimePromise) {
277
+ continue;
278
+ }
279
+ runtimeState[VOICE_CALL_RUNTIME_KEY] = createdRuntime;
280
+ return createdRuntime;
281
+ } catch (err) {
282
+ if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] === runtimePromise) {
283
+ // Reset shared state so the next call can retry instead of caching
284
+ // a rejected promise across plugin contexts. See: #32387, #58115.
285
+ runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null;
286
+ runtimeState[VOICE_CALL_RUNTIME_KEY] = null;
287
+ }
288
+ throw err;
289
+ }
195
290
  }
196
- return runtime;
197
291
  };
198
292
 
199
- const sendError = (respond: (ok: boolean, payload?: unknown) => void, err: unknown) => {
200
- respond(false, { error: err instanceof Error ? err.message : String(err) });
293
+ const respondError = (
294
+ respond: GatewayRequestHandlerOptions["respond"],
295
+ message: string,
296
+ code: (typeof ErrorCodes)[keyof typeof ErrorCodes] = ErrorCodes.UNAVAILABLE,
297
+ ) => {
298
+ respond(false, undefined, errorShape(code, message));
299
+ };
300
+
301
+ const sendError = (respond: GatewayRequestHandlerOptions["respond"], err: unknown) => {
302
+ respondError(respond, formatErrorMessage(err));
303
+ };
304
+
305
+ const describeHistoricalCall = async (rt: VoiceCallRuntime, callId: string) => {
306
+ const history = await rt.manager.getCallHistory(100);
307
+ const call = history
308
+ .toReversed()
309
+ .find((candidate) => candidate.callId === callId || candidate.providerCallId === callId);
310
+ if (!call) {
311
+ return undefined;
312
+ }
313
+ const details = [
314
+ `last state=${call.state}`,
315
+ call.endReason ? `endReason=${call.endReason}` : undefined,
316
+ call.endedAt ? `endedAt=${new Date(call.endedAt).toISOString()}` : undefined,
317
+ ].filter(Boolean);
318
+ return `call is not active (${details.join(", ")})`;
201
319
  };
202
320
 
203
321
  const resolveCallMessageRequest = async (params: GatewayRequestHandlerOptions["params"]) => {
204
- const callId = typeof params?.callId === "string" ? params.callId.trim() : "";
205
- const message = typeof params?.message === "string" ? params.message.trim() : "";
322
+ const callId = normalizeOptionalString(params?.callId) ?? "";
323
+ const message = normalizeOptionalString(params?.message) ?? "";
206
324
  if (!callId || !message) {
207
325
  return { error: "callId and message required" } as const;
208
326
  }
209
327
  const rt = await ensureRuntime();
210
- return { rt, callId, message } as const;
328
+ const activeCall = rt.manager.getCall(callId) ?? rt.manager.getCallByProviderCallId(callId);
329
+ if (activeCall) {
330
+ return { rt, callId: activeCall.callId, message } as const;
331
+ }
332
+ return { error: (await describeHistoricalCall(rt, callId)) ?? "Call not found" } as const;
211
333
  };
334
+
212
335
  const initiateCallAndRespond = async (params: {
213
336
  rt: VoiceCallRuntime;
214
337
  respond: GatewayRequestHandlerOptions["respond"];
215
338
  to: string;
216
339
  message?: string;
217
340
  mode?: "notify" | "conversation";
341
+ dtmfSequence?: string;
218
342
  }) => {
219
343
  const result = await params.rt.manager.initiateCall(params.to, undefined, {
220
344
  message: params.message,
221
345
  mode: params.mode,
346
+ dtmfSequence: params.dtmfSequence,
222
347
  });
223
348
  if (!result.success) {
224
- params.respond(false, { error: result.error || "initiate failed" });
349
+ respondError(params.respond, result.error || "initiate failed");
225
350
  return;
226
351
  }
227
352
  params.respond(true, { callId: result.callId, initiated: true });
@@ -242,12 +367,16 @@ const voiceCallPlugin = {
242
367
  }) => {
243
368
  const request = await resolveCallMessageRequest(params.requestParams);
244
369
  if ("error" in request) {
245
- params.respond(false, { error: request.error });
370
+ respondError(
371
+ params.respond,
372
+ request.error ?? "callId and message required",
373
+ ErrorCodes.INVALID_REQUEST,
374
+ );
246
375
  return;
247
376
  }
248
377
  const result = await params.action(request);
249
378
  if (!result.success) {
250
- params.respond(false, { error: result.error || params.failure });
379
+ respondError(params.respond, result.error || params.failure);
251
380
  return;
252
381
  }
253
382
  params.respond(
@@ -262,18 +391,15 @@ const voiceCallPlugin = {
262
391
  "voicecall.initiate",
263
392
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
264
393
  try {
265
- const message = typeof params?.message === "string" ? params.message.trim() : "";
394
+ const message = normalizeOptionalString(params?.message) ?? "";
266
395
  if (!message) {
267
- respond(false, { error: "message required" });
396
+ respondError(respond, "message required", ErrorCodes.INVALID_REQUEST);
268
397
  return;
269
398
  }
270
399
  const rt = await ensureRuntime();
271
- const to =
272
- typeof params?.to === "string" && params.to.trim()
273
- ? params.to.trim()
274
- : rt.config.toNumber;
400
+ const to = normalizeOptionalString(params?.to) ?? rt.config.toNumber;
275
401
  if (!to) {
276
- respond(false, { error: "to required" });
402
+ respondError(respond, "to required", ErrorCodes.INVALID_REQUEST);
277
403
  return;
278
404
  }
279
405
  const mode =
@@ -308,16 +434,99 @@ const voiceCallPlugin = {
308
434
  },
309
435
  );
310
436
 
437
+ api.registerGatewayMethod(
438
+ "voicecall.continue.start",
439
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
440
+ try {
441
+ const request = await resolveCallMessageRequest(params);
442
+ if ("error" in request) {
443
+ respondError(
444
+ respond,
445
+ request.error ?? "callId and message required",
446
+ ErrorCodes.INVALID_REQUEST,
447
+ );
448
+ return;
449
+ }
450
+ respond(true, continueOperationStore.start(request));
451
+ } catch (err) {
452
+ sendError(respond, err);
453
+ }
454
+ },
455
+ );
456
+
457
+ api.registerGatewayMethod(
458
+ "voicecall.continue.result",
459
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
460
+ try {
461
+ const operationId = normalizeOptionalString(params?.operationId) ?? "";
462
+ if (!operationId) {
463
+ respondError(respond, "operationId required", ErrorCodes.INVALID_REQUEST);
464
+ return;
465
+ }
466
+ const operation = continueOperationStore.read(operationId);
467
+ if (!operation.ok) {
468
+ respondError(respond, operation.error, ErrorCodes.INVALID_REQUEST);
469
+ return;
470
+ }
471
+ respond(true, operation.payload);
472
+ } catch (err) {
473
+ sendError(respond, err);
474
+ }
475
+ },
476
+ );
477
+
311
478
  api.registerGatewayMethod(
312
479
  "voicecall.speak",
313
480
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
314
481
  try {
315
- await respondToCallMessageAction({
316
- requestParams: params,
317
- respond,
318
- action: (request) => request.rt.manager.speak(request.callId, request.message),
319
- failure: "speak failed",
320
- });
482
+ const request = await resolveCallMessageRequest(params);
483
+ if ("error" in request) {
484
+ respondError(
485
+ respond,
486
+ request.error ?? "callId and message required",
487
+ ErrorCodes.INVALID_REQUEST,
488
+ );
489
+ return;
490
+ }
491
+ if (request.rt.config.realtime.enabled) {
492
+ const realtimeResult = request.rt.webhookServer.speakRealtime(
493
+ request.callId,
494
+ request.message,
495
+ );
496
+ if (realtimeResult.success) {
497
+ respond(true, { success: true });
498
+ return;
499
+ }
500
+ }
501
+ const result = await request.rt.manager.speak(request.callId, request.message);
502
+ if (!result.success) {
503
+ respondError(respond, result.error || "speak failed");
504
+ return;
505
+ }
506
+ respond(true, { success: true });
507
+ } catch (err) {
508
+ sendError(respond, err);
509
+ }
510
+ },
511
+ );
512
+
513
+ api.registerGatewayMethod(
514
+ "voicecall.dtmf",
515
+ async ({ params, respond }: GatewayRequestHandlerOptions) => {
516
+ try {
517
+ const callId = normalizeOptionalString(params?.callId) ?? "";
518
+ const digits = normalizeOptionalString(params?.digits) ?? "";
519
+ if (!callId || !digits) {
520
+ respondError(respond, "callId and digits required", ErrorCodes.INVALID_REQUEST);
521
+ return;
522
+ }
523
+ const rt = await ensureRuntime();
524
+ const result = await rt.manager.sendDtmf(callId, digits);
525
+ if (!result.success) {
526
+ respondError(respond, result.error || "dtmf failed");
527
+ return;
528
+ }
529
+ respond(true, { success: true });
321
530
  } catch (err) {
322
531
  sendError(respond, err);
323
532
  }
@@ -328,15 +537,15 @@ const voiceCallPlugin = {
328
537
  "voicecall.end",
329
538
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
330
539
  try {
331
- const callId = typeof params?.callId === "string" ? params.callId.trim() : "";
540
+ const callId = normalizeOptionalString(params?.callId) ?? "";
332
541
  if (!callId) {
333
- respond(false, { error: "callId required" });
542
+ respondError(respond, "callId required", ErrorCodes.INVALID_REQUEST);
334
543
  return;
335
544
  }
336
545
  const rt = await ensureRuntime();
337
546
  const result = await rt.manager.endCall(callId);
338
547
  if (!result.success) {
339
- respond(false, { error: result.error || "end failed" });
548
+ respondError(respond, result.error || "end failed");
340
549
  return;
341
550
  }
342
551
  respond(true, { success: true });
@@ -351,16 +560,12 @@ const voiceCallPlugin = {
351
560
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
352
561
  try {
353
562
  const raw =
354
- typeof params?.callId === "string"
355
- ? params.callId.trim()
356
- : typeof params?.sid === "string"
357
- ? params.sid.trim()
358
- : "";
563
+ normalizeOptionalString(params?.callId) ?? normalizeOptionalString(params?.sid) ?? "";
564
+ const rt = await ensureRuntime();
359
565
  if (!raw) {
360
- respond(false, { error: "callId required" });
566
+ respond(true, { found: true, calls: rt.manager.getActiveCalls() });
361
567
  return;
362
568
  }
363
- const rt = await ensureRuntime();
364
569
  const call = rt.manager.getCall(raw) || rt.manager.getCallByProviderCallId(raw);
365
570
  if (!call) {
366
571
  respond(true, { found: false });
@@ -377,18 +582,23 @@ const voiceCallPlugin = {
377
582
  "voicecall.start",
378
583
  async ({ params, respond }: GatewayRequestHandlerOptions) => {
379
584
  try {
380
- const to = typeof params?.to === "string" ? params.to.trim() : "";
381
- const message = typeof params?.message === "string" ? params.message.trim() : "";
585
+ const to = normalizeOptionalString(params?.to) ?? "";
586
+ const message = normalizeOptionalString(params?.message) ?? "";
587
+ const dtmfSequence = normalizeOptionalString(params?.dtmfSequence);
382
588
  if (!to) {
383
- respond(false, { error: "to required" });
589
+ respondError(respond, "to required", ErrorCodes.INVALID_REQUEST);
384
590
  return;
385
591
  }
386
592
  const rt = await ensureRuntime();
593
+ const mode =
594
+ params?.mode === "notify" || params?.mode === "conversation" ? params.mode : undefined;
387
595
  await initiateCallAndRespond({
388
596
  rt,
389
597
  respond,
390
598
  to,
391
599
  message: message || undefined,
600
+ mode,
601
+ dtmfSequence,
392
602
  });
393
603
  } catch (err) {
394
604
  sendError(respond, err);
@@ -402,6 +612,7 @@ const voiceCallPlugin = {
402
612
  description: "Make phone calls and have voice conversations via the voice-call plugin.",
403
613
  parameters: VoiceCallToolSchema,
404
614
  async execute(_toolCallId, params) {
615
+ const rawParams = asParamRecord(params);
405
616
  const json = (payload: unknown) => ({
406
617
  content: [{ type: "text" as const, text: JSON.stringify(payload, null, 2) }],
407
618
  details: payload,
@@ -410,25 +621,23 @@ const voiceCallPlugin = {
410
621
  try {
411
622
  const rt = await ensureRuntime();
412
623
 
413
- if (typeof params?.action === "string") {
414
- switch (params.action) {
624
+ if (typeof rawParams.action === "string") {
625
+ switch (rawParams.action) {
415
626
  case "initiate_call": {
416
- const message = String(params.message || "").trim();
627
+ const message = normalizeOptionalString(rawParams.message) ?? "";
417
628
  if (!message) {
418
629
  throw new Error("message required");
419
630
  }
420
- const to =
421
- typeof params.to === "string" && params.to.trim()
422
- ? params.to.trim()
423
- : rt.config.toNumber;
631
+ const to = normalizeOptionalString(rawParams.to) ?? rt.config.toNumber;
424
632
  if (!to) {
425
633
  throw new Error("to required");
426
634
  }
427
635
  const result = await rt.manager.initiateCall(to, undefined, {
428
636
  message,
637
+ dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
429
638
  mode:
430
- params.mode === "notify" || params.mode === "conversation"
431
- ? params.mode
639
+ rawParams.mode === "notify" || rawParams.mode === "conversation"
640
+ ? rawParams.mode
432
641
  : undefined,
433
642
  });
434
643
  if (!result.success) {
@@ -437,8 +646,8 @@ const voiceCallPlugin = {
437
646
  return json({ callId: result.callId, initiated: true });
438
647
  }
439
648
  case "continue_call": {
440
- const callId = String(params.callId || "").trim();
441
- const message = String(params.message || "").trim();
649
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
650
+ const message = normalizeOptionalString(rawParams.message) ?? "";
442
651
  if (!callId || !message) {
443
652
  throw new Error("callId and message required");
444
653
  }
@@ -449,8 +658,8 @@ const voiceCallPlugin = {
449
658
  return json({ success: true, transcript: result.transcript });
450
659
  }
451
660
  case "speak_to_user": {
452
- const callId = String(params.callId || "").trim();
453
- const message = String(params.message || "").trim();
661
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
662
+ const message = normalizeOptionalString(rawParams.message) ?? "";
454
663
  if (!callId || !message) {
455
664
  throw new Error("callId and message required");
456
665
  }
@@ -460,8 +669,20 @@ const voiceCallPlugin = {
460
669
  }
461
670
  return json({ success: true });
462
671
  }
672
+ case "send_dtmf": {
673
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
674
+ const digits = normalizeOptionalString(rawParams.digits) ?? "";
675
+ if (!callId || !digits) {
676
+ throw new Error("callId and digits required");
677
+ }
678
+ const result = await rt.manager.sendDtmf(callId, digits);
679
+ if (!result.success) {
680
+ throw new Error(result.error || "dtmf failed");
681
+ }
682
+ return json({ success: true });
683
+ }
463
684
  case "end_call": {
464
- const callId = String(params.callId || "").trim();
685
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
465
686
  if (!callId) {
466
687
  throw new Error("callId required");
467
688
  }
@@ -472,7 +693,7 @@ const voiceCallPlugin = {
472
693
  return json({ success: true });
473
694
  }
474
695
  case "get_status": {
475
- const callId = String(params.callId || "").trim();
696
+ const callId = normalizeOptionalString(rawParams.callId) ?? "";
476
697
  if (!callId) {
477
698
  throw new Error("callId required");
478
699
  }
@@ -483,9 +704,9 @@ const voiceCallPlugin = {
483
704
  }
484
705
  }
485
706
 
486
- const mode = params?.mode ?? "call";
707
+ const mode = rawParams.mode ?? "call";
487
708
  if (mode === "status") {
488
- const sid = typeof params.sid === "string" ? params.sid.trim() : "";
709
+ const sid = normalizeOptionalString(rawParams.sid) ?? "";
489
710
  if (!sid) {
490
711
  throw new Error("sid required for status");
491
712
  }
@@ -493,18 +714,13 @@ const voiceCallPlugin = {
493
714
  return json(call ? { found: true, call } : { found: false });
494
715
  }
495
716
 
496
- const to =
497
- typeof params.to === "string" && params.to.trim()
498
- ? params.to.trim()
499
- : rt.config.toNumber;
717
+ const to = normalizeOptionalString(rawParams.to) ?? rt.config.toNumber;
500
718
  if (!to) {
501
719
  throw new Error("to required for call");
502
720
  }
503
721
  const result = await rt.manager.initiateCall(to, undefined, {
504
- message:
505
- typeof params.message === "string" && params.message.trim()
506
- ? params.message.trim()
507
- : undefined,
722
+ dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
723
+ message: normalizeOptionalString(rawParams.message),
508
724
  });
509
725
  if (!result.success) {
510
726
  throw new Error(result.error || "initiate failed");
@@ -512,7 +728,7 @@ const voiceCallPlugin = {
512
728
  return json({ callId: result.callId, initiated: true });
513
729
  } catch (err) {
514
730
  return json({
515
- error: err instanceof Error ? err.message : String(err),
731
+ error: formatErrorMessage(err),
516
732
  });
517
733
  }
518
734
  },
@@ -531,34 +747,48 @@ const voiceCallPlugin = {
531
747
 
532
748
  api.registerService({
533
749
  id: "voicecall",
534
- start: async () => {
750
+ start: () => {
751
+ if (isCliOnlyProcess()) {
752
+ return;
753
+ }
535
754
  if (!config.enabled) {
536
755
  return;
537
756
  }
538
- try {
539
- await ensureRuntime();
540
- } catch (err) {
541
- api.logger.error(
542
- `[voice-call] Failed to start runtime: ${
543
- err instanceof Error ? err.message : String(err)
544
- }`,
757
+ if (!validation.valid) {
758
+ api.logger.warn(
759
+ `[voice-call] Runtime not started; setup incomplete: ${validation.errors.join("; ")}`,
545
760
  );
761
+ return;
546
762
  }
763
+ void ensureRuntime().catch((err) => {
764
+ api.logger.error(`[voice-call] Failed to start runtime: ${formatErrorMessage(err)}`);
765
+ });
547
766
  },
548
767
  stop: async () => {
549
- if (!runtimePromise) {
768
+ if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
769
+ await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY];
550
770
  return;
551
771
  }
552
- try {
553
- const rt = await runtimePromise;
772
+ const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY];
773
+ const runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY];
774
+ if (!runtime && !runtimePromise) {
775
+ return;
776
+ }
777
+ runtimeState[VOICE_CALL_RUNTIME_KEY] = null;
778
+ runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null;
779
+ const stopPromise = (async () => {
780
+ const rt = runtime ?? (await runtimePromise!);
554
781
  await rt.stop();
782
+ })();
783
+ runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = stopPromise;
784
+ try {
785
+ await stopPromise;
555
786
  } finally {
556
- runtimePromise = null;
557
- runtime = null;
787
+ if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] === stopPromise) {
788
+ runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = null;
789
+ }
558
790
  }
559
791
  },
560
792
  });
561
793
  },
562
- };
563
-
564
- export default voiceCallPlugin;
794
+ });