@openclaw/voice-call 2026.5.2 → 2026.5.3-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/dist/api.js +2 -0
  2. package/dist/call-status-CXldV5o8.js +32 -0
  3. package/dist/cli-metadata.js +12 -0
  4. package/dist/config-7w04YpHh.js +548 -0
  5. package/dist/config-compat-B0me39_4.js +129 -0
  6. package/dist/guarded-json-api-Btx5EE4w.js +591 -0
  7. package/dist/http-headers-BrnxBasF.js +10 -0
  8. package/dist/index.js +1284 -0
  9. package/dist/mock-CeKvfVEd.js +135 -0
  10. package/dist/plivo-B-a7KFoT.js +393 -0
  11. package/dist/realtime-handler-B63CIDP2.js +325 -0
  12. package/dist/realtime-transcription.runtime-B2h70y2W.js +2 -0
  13. package/dist/realtime-voice.runtime-Bkh4nvLn.js +2 -0
  14. package/dist/response-generator-BrcmwDZU.js +182 -0
  15. package/dist/response-model-CyF5K80p.js +12 -0
  16. package/dist/runtime-api.js +6 -0
  17. package/dist/runtime-entry-88ytYAQa.js +3119 -0
  18. package/dist/runtime-entry.js +2 -0
  19. package/dist/setup-api.js +37 -0
  20. package/dist/telnyx-jjBE8boz.js +260 -0
  21. package/dist/twilio-1OqbcXLL.js +676 -0
  22. package/dist/voice-mapping-BYDGdWGx.js +40 -0
  23. package/package.json +14 -6
  24. package/api.ts +0 -16
  25. package/cli-metadata.ts +0 -10
  26. package/config-api.ts +0 -12
  27. package/index.test.ts +0 -943
  28. package/index.ts +0 -794
  29. package/runtime-api.ts +0 -20
  30. package/runtime-entry.ts +0 -1
  31. package/setup-api.ts +0 -47
  32. package/src/allowlist.test.ts +0 -18
  33. package/src/allowlist.ts +0 -19
  34. package/src/cli.ts +0 -845
  35. package/src/config-compat.test.ts +0 -120
  36. package/src/config-compat.ts +0 -227
  37. package/src/config.test.ts +0 -479
  38. package/src/config.ts +0 -808
  39. package/src/core-bridge.ts +0 -14
  40. package/src/deep-merge.test.ts +0 -40
  41. package/src/deep-merge.ts +0 -23
  42. package/src/gateway-continue-operation.ts +0 -200
  43. package/src/http-headers.test.ts +0 -16
  44. package/src/http-headers.ts +0 -15
  45. package/src/manager/context.ts +0 -42
  46. package/src/manager/events.test.ts +0 -581
  47. package/src/manager/events.ts +0 -288
  48. package/src/manager/lifecycle.ts +0 -53
  49. package/src/manager/lookup.test.ts +0 -52
  50. package/src/manager/lookup.ts +0 -35
  51. package/src/manager/outbound.test.ts +0 -528
  52. package/src/manager/outbound.ts +0 -486
  53. package/src/manager/state.ts +0 -48
  54. package/src/manager/store.ts +0 -106
  55. package/src/manager/timers.test.ts +0 -129
  56. package/src/manager/timers.ts +0 -113
  57. package/src/manager/twiml.test.ts +0 -13
  58. package/src/manager/twiml.ts +0 -17
  59. package/src/manager.closed-loop.test.ts +0 -236
  60. package/src/manager.inbound-allowlist.test.ts +0 -188
  61. package/src/manager.notify.test.ts +0 -377
  62. package/src/manager.restore.test.ts +0 -183
  63. package/src/manager.test-harness.ts +0 -127
  64. package/src/manager.ts +0 -392
  65. package/src/media-stream.test.ts +0 -768
  66. package/src/media-stream.ts +0 -708
  67. package/src/providers/base.ts +0 -97
  68. package/src/providers/mock.test.ts +0 -78
  69. package/src/providers/mock.ts +0 -185
  70. package/src/providers/plivo.test.ts +0 -93
  71. package/src/providers/plivo.ts +0 -601
  72. package/src/providers/shared/call-status.test.ts +0 -24
  73. package/src/providers/shared/call-status.ts +0 -24
  74. package/src/providers/shared/guarded-json-api.test.ts +0 -106
  75. package/src/providers/shared/guarded-json-api.ts +0 -42
  76. package/src/providers/telnyx.test.ts +0 -340
  77. package/src/providers/telnyx.ts +0 -394
  78. package/src/providers/twilio/api.test.ts +0 -145
  79. package/src/providers/twilio/api.ts +0 -93
  80. package/src/providers/twilio/twiml-policy.test.ts +0 -84
  81. package/src/providers/twilio/twiml-policy.ts +0 -87
  82. package/src/providers/twilio/webhook.ts +0 -34
  83. package/src/providers/twilio.test.ts +0 -591
  84. package/src/providers/twilio.ts +0 -861
  85. package/src/providers/twilio.types.ts +0 -17
  86. package/src/realtime-defaults.ts +0 -3
  87. package/src/realtime-fast-context.test.ts +0 -88
  88. package/src/realtime-fast-context.ts +0 -165
  89. package/src/realtime-transcription.runtime.ts +0 -4
  90. package/src/realtime-voice.runtime.ts +0 -5
  91. package/src/response-generator.test.ts +0 -321
  92. package/src/response-generator.ts +0 -318
  93. package/src/response-model.test.ts +0 -71
  94. package/src/response-model.ts +0 -23
  95. package/src/runtime.test.ts +0 -536
  96. package/src/runtime.ts +0 -510
  97. package/src/telephony-audio.test.ts +0 -61
  98. package/src/telephony-audio.ts +0 -12
  99. package/src/telephony-tts.test.ts +0 -196
  100. package/src/telephony-tts.ts +0 -235
  101. package/src/test-fixtures.ts +0 -73
  102. package/src/tts-provider-voice.test.ts +0 -34
  103. package/src/tts-provider-voice.ts +0 -21
  104. package/src/tunnel.test.ts +0 -166
  105. package/src/tunnel.ts +0 -314
  106. package/src/types.ts +0 -291
  107. package/src/utils.test.ts +0 -17
  108. package/src/utils.ts +0 -14
  109. package/src/voice-mapping.test.ts +0 -34
  110. package/src/voice-mapping.ts +0 -68
  111. package/src/webhook/realtime-handler.test.ts +0 -598
  112. package/src/webhook/realtime-handler.ts +0 -485
  113. package/src/webhook/stale-call-reaper.test.ts +0 -88
  114. package/src/webhook/stale-call-reaper.ts +0 -38
  115. package/src/webhook/tailscale.test.ts +0 -214
  116. package/src/webhook/tailscale.ts +0 -129
  117. package/src/webhook-exposure.test.ts +0 -33
  118. package/src/webhook-exposure.ts +0 -84
  119. package/src/webhook-security.test.ts +0 -770
  120. package/src/webhook-security.ts +0 -994
  121. package/src/webhook.hangup-once.lifecycle.test.ts +0 -135
  122. package/src/webhook.test.ts +0 -1470
  123. package/src/webhook.ts +0 -908
  124. package/src/webhook.types.ts +0 -5
  125. package/src/websocket-test-support.ts +0 -72
  126. package/tsconfig.json +0 -16
@@ -1,318 +0,0 @@
1
- /**
2
- * Voice call response generator - uses the embedded Pi agent for tool support.
3
- * Routes voice responses through the same agent infrastructure as messaging.
4
- */
5
-
6
- import crypto from "node:crypto";
7
- import { applyModelOverrideToSessionEntry } from "openclaw/plugin-sdk/model-session-runtime";
8
- import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
9
- import type { SessionEntry } from "../api.js";
10
- import { resolveVoiceCallSessionKey, type VoiceCallConfig } from "./config.js";
11
- import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
12
- import { resolveVoiceResponseModel } from "./response-model.js";
13
-
14
- export type VoiceResponseParams = {
15
- /** Voice call config */
16
- voiceConfig: VoiceCallConfig;
17
- /** Core OpenClaw config */
18
- coreConfig: CoreConfig;
19
- /** Injected host agent runtime */
20
- agentRuntime: CoreAgentDeps;
21
- /** Call ID for session tracking */
22
- callId: string;
23
- /** Persisted call session key */
24
- sessionKey?: string;
25
- /** Caller's phone number */
26
- from: string;
27
- /** Conversation transcript */
28
- transcript: Array<{ speaker: "user" | "bot"; text: string }>;
29
- /** Latest user message */
30
- userMessage: string;
31
- };
32
-
33
- export type VoiceResponseResult = {
34
- text: string | null;
35
- error?: string;
36
- };
37
-
38
- type VoiceResponsePayload = {
39
- text?: string;
40
- isError?: boolean;
41
- isReasoning?: boolean;
42
- };
43
-
44
- const VOICE_SPOKEN_OUTPUT_CONTRACT = [
45
- "Output format requirements:",
46
- '- Return only valid JSON in this exact shape: {"spoken":"..."}',
47
- "- Do not include markdown, code fences, planning text, or extra keys.",
48
- '- Put exactly what should be spoken to the caller into "spoken".',
49
- '- If there is nothing to say, return {"spoken":""}.',
50
- ].join("\n");
51
-
52
- function normalizeSpokenText(value: string): string | null {
53
- const normalized = value.replace(/\s+/g, " ").trim();
54
- return normalized.length > 0 ? normalized : null;
55
- }
56
-
57
- function tryParseSpokenJson(text: string): string | null {
58
- const candidates: string[] = [];
59
- const trimmed = text.trim();
60
- if (!trimmed) {
61
- return null;
62
- }
63
- candidates.push(trimmed);
64
-
65
- const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i);
66
- if (fenced?.[1]) {
67
- candidates.push(fenced[1]);
68
- }
69
-
70
- const firstBrace = trimmed.indexOf("{");
71
- const lastBrace = trimmed.lastIndexOf("}");
72
- if (firstBrace >= 0 && lastBrace > firstBrace) {
73
- candidates.push(trimmed.slice(firstBrace, lastBrace + 1));
74
- }
75
-
76
- for (const candidate of candidates) {
77
- try {
78
- const parsed = JSON.parse(candidate) as { spoken?: unknown };
79
- if (typeof parsed?.spoken !== "string") {
80
- continue;
81
- }
82
- return normalizeSpokenText(parsed.spoken) ?? "";
83
- } catch {
84
- // Continue trying other candidates.
85
- }
86
- }
87
-
88
- const inlineSpokenMatch = trimmed.match(/"spoken"\s*:\s*"((?:[^"\\]|\\.)*)"/i);
89
- if (!inlineSpokenMatch) {
90
- return null;
91
- }
92
-
93
- try {
94
- const decoded = JSON.parse(`"${inlineSpokenMatch[1] ?? ""}"`) as string;
95
- return normalizeSpokenText(decoded) ?? "";
96
- } catch {
97
- return null;
98
- }
99
- }
100
-
101
- function isLikelyMetaReasoningParagraph(paragraph: string): boolean {
102
- const lower = normalizeLowercaseStringOrEmpty(paragraph);
103
- if (!lower) {
104
- return false;
105
- }
106
-
107
- if (lower.startsWith("thinking process")) {
108
- return true;
109
- }
110
- if (lower.startsWith("reasoning:") || lower.startsWith("analysis:")) {
111
- return true;
112
- }
113
- if (
114
- lower.startsWith("the user ") &&
115
- (lower.includes("i should") || lower.includes("i need to") || lower.includes("i will"))
116
- ) {
117
- return true;
118
- }
119
- if (
120
- lower.includes("this is a natural continuation of the conversation") ||
121
- lower.includes("keep the conversation flowing")
122
- ) {
123
- return true;
124
- }
125
-
126
- return false;
127
- }
128
-
129
- function sanitizePlainSpokenText(text: string): string | null {
130
- const withoutCodeFences = text.replace(/```[\s\S]*?```/g, " ").trim();
131
- if (!withoutCodeFences) {
132
- return null;
133
- }
134
-
135
- const paragraphs = withoutCodeFences
136
- .split(/\n\s*\n+/)
137
- .map((paragraph) => paragraph.trim())
138
- .filter(Boolean);
139
-
140
- while (paragraphs.length > 1 && isLikelyMetaReasoningParagraph(paragraphs[0])) {
141
- paragraphs.shift();
142
- }
143
-
144
- return normalizeSpokenText(paragraphs.join(" "));
145
- }
146
-
147
- function extractSpokenTextFromPayloads(payloads: VoiceResponsePayload[]): string | null {
148
- const spokenSegments: string[] = [];
149
-
150
- for (const payload of payloads) {
151
- if (payload.isError || payload.isReasoning) {
152
- continue;
153
- }
154
-
155
- const rawText = payload.text?.trim() ?? "";
156
- if (!rawText) {
157
- continue;
158
- }
159
-
160
- const structured = tryParseSpokenJson(rawText);
161
- if (structured !== null) {
162
- if (structured.length > 0) {
163
- spokenSegments.push(structured);
164
- }
165
- continue;
166
- }
167
-
168
- const plain = sanitizePlainSpokenText(rawText);
169
- if (plain) {
170
- spokenSegments.push(plain);
171
- }
172
- }
173
-
174
- return spokenSegments.length > 0 ? spokenSegments.join(" ").trim() : null;
175
- }
176
-
177
- function resolveVoiceSandboxSessionKey(agentId: string, sessionKey: string): string {
178
- const trimmed = sessionKey.trim();
179
- if (trimmed.toLowerCase().startsWith("agent:")) {
180
- return trimmed;
181
- }
182
- return `agent:${agentId}:${trimmed}`;
183
- }
184
-
185
- /**
186
- * Generate a voice response using the embedded Pi agent with full tool support.
187
- * Uses the same agent infrastructure as messaging for consistent behavior.
188
- */
189
- export async function generateVoiceResponse(
190
- params: VoiceResponseParams,
191
- ): Promise<VoiceResponseResult> {
192
- const {
193
- voiceConfig,
194
- callId,
195
- sessionKey,
196
- from,
197
- transcript,
198
- userMessage,
199
- coreConfig,
200
- agentRuntime,
201
- } = params;
202
-
203
- if (!coreConfig) {
204
- return { text: null, error: "Core config unavailable for voice response" };
205
- }
206
- const cfg = coreConfig;
207
-
208
- const resolvedSessionKey = resolveVoiceCallSessionKey({
209
- config: voiceConfig,
210
- callId,
211
- phone: from,
212
- explicitSessionKey: sessionKey,
213
- });
214
- const agentId = voiceConfig.agentId ?? "main";
215
-
216
- // Resolve paths
217
- const storePath = agentRuntime.session.resolveStorePath(cfg.session?.store, { agentId });
218
- const agentDir = agentRuntime.resolveAgentDir(cfg, agentId);
219
- const workspaceDir = agentRuntime.resolveAgentWorkspaceDir(cfg, agentId);
220
-
221
- // Ensure workspace exists
222
- await agentRuntime.ensureAgentWorkspace({ dir: workspaceDir });
223
-
224
- // Load or create session entry
225
- const sessionStore = agentRuntime.session.loadSessionStore(storePath);
226
- const now = Date.now();
227
- const existingSessionEntry = sessionStore[resolvedSessionKey] as SessionEntry | undefined;
228
-
229
- // Resolve model from config
230
- const { provider, model } = resolveVoiceResponseModel({ voiceConfig, agentRuntime });
231
-
232
- let sessionEntry = existingSessionEntry;
233
- if (!sessionEntry?.sessionId || voiceConfig.responseModel) {
234
- sessionEntry = await agentRuntime.session.updateSessionStore(storePath, (store) => {
235
- let entry = store[resolvedSessionKey] as SessionEntry | undefined;
236
- if (!entry?.sessionId) {
237
- entry = {
238
- ...entry,
239
- sessionId: crypto.randomUUID(),
240
- updatedAt: now,
241
- };
242
- store[resolvedSessionKey] = entry;
243
- }
244
- if (voiceConfig.responseModel) {
245
- applyModelOverrideToSessionEntry({
246
- entry,
247
- selection: { provider, model },
248
- selectionSource: "auto",
249
- });
250
- }
251
- return entry;
252
- });
253
- }
254
- const sessionId = sessionEntry.sessionId;
255
-
256
- const sessionFile = agentRuntime.session.resolveSessionFilePath(sessionId, sessionEntry, {
257
- agentId,
258
- });
259
-
260
- // Resolve thinking level
261
- const thinkLevel = agentRuntime.resolveThinkingDefault({ cfg, provider, model });
262
-
263
- // Resolve agent identity for personalized prompt
264
- const identity = agentRuntime.resolveAgentIdentity(cfg, agentId);
265
- const agentName = identity?.name?.trim() || "assistant";
266
-
267
- // Build system prompt with conversation history
268
- const basePrompt =
269
- voiceConfig.responseSystemPrompt ??
270
- `You are ${agentName}, a helpful voice assistant on a phone call. Keep responses brief and conversational (1-2 sentences max). Be natural and friendly. The caller's phone number is ${from}. You have access to tools - use them when helpful.`;
271
-
272
- let extraSystemPrompt = basePrompt;
273
- if (transcript.length > 0) {
274
- const history = transcript
275
- .map((entry) => `${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`)
276
- .join("\n");
277
- extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
278
- }
279
- extraSystemPrompt = `${extraSystemPrompt}\n\n${VOICE_SPOKEN_OUTPUT_CONTRACT}`;
280
-
281
- // Resolve timeout
282
- const timeoutMs = voiceConfig.responseTimeoutMs ?? agentRuntime.resolveAgentTimeoutMs({ cfg });
283
- const runId = `voice:${callId}:${Date.now()}`;
284
-
285
- try {
286
- const result = await agentRuntime.runEmbeddedPiAgent({
287
- sessionId,
288
- sessionKey: resolvedSessionKey,
289
- sandboxSessionKey: resolveVoiceSandboxSessionKey(agentId, resolvedSessionKey),
290
- agentId,
291
- messageProvider: "voice",
292
- sessionFile,
293
- workspaceDir,
294
- config: cfg,
295
- prompt: userMessage,
296
- provider,
297
- model,
298
- thinkLevel,
299
- verboseLevel: "off",
300
- timeoutMs,
301
- runId,
302
- lane: "voice",
303
- extraSystemPrompt,
304
- agentDir,
305
- });
306
-
307
- const text = extractSpokenTextFromPayloads((result.payloads ?? []) as VoiceResponsePayload[]);
308
-
309
- if (!text && result.meta?.aborted) {
310
- return { text: null, error: "Response generation was aborted" };
311
- }
312
-
313
- return { text };
314
- } catch (err) {
315
- console.error(`[voice-call] Response generation failed:`, err);
316
- return { text: null, error: String(err) };
317
- }
318
- }
@@ -1,71 +0,0 @@
1
- import { describe, expect, it } from "vitest";
2
- import { VoiceCallConfigSchema } from "./config.js";
3
- import type { CoreAgentDeps } from "./core-bridge.js";
4
- import { resolveVoiceResponseModel } from "./response-model.js";
5
-
6
- const agentRuntime = {
7
- defaults: {
8
- provider: "together",
9
- model: "Qwen/Qwen2.5-7B-Instruct-Turbo",
10
- },
11
- } as unknown as CoreAgentDeps;
12
-
13
- describe("resolveVoiceResponseModel", () => {
14
- it("falls back to the runtime default model", () => {
15
- expect(
16
- resolveVoiceResponseModel({
17
- voiceConfig: VoiceCallConfigSchema.parse({}),
18
- agentRuntime,
19
- }),
20
- ).toEqual({
21
- modelRef: "together/Qwen/Qwen2.5-7B-Instruct-Turbo",
22
- provider: "together",
23
- model: "Qwen/Qwen2.5-7B-Instruct-Turbo",
24
- });
25
- });
26
-
27
- it("uses an explicit provider/model ref", () => {
28
- expect(
29
- resolveVoiceResponseModel({
30
- voiceConfig: VoiceCallConfigSchema.parse({
31
- responseModel: "openai/gpt-5.4-mini",
32
- }),
33
- agentRuntime,
34
- }),
35
- ).toEqual({
36
- modelRef: "openai/gpt-5.4-mini",
37
- provider: "openai",
38
- model: "gpt-5.4-mini",
39
- });
40
- });
41
-
42
- it("uses the runtime default provider for bare model overrides", () => {
43
- expect(
44
- resolveVoiceResponseModel({
45
- voiceConfig: VoiceCallConfigSchema.parse({
46
- responseModel: "meta-llama/Llama-4-Scout-17B-16E-Instruct",
47
- }),
48
- agentRuntime,
49
- }),
50
- ).toEqual({
51
- modelRef: "meta-llama/Llama-4-Scout-17B-16E-Instruct",
52
- provider: "meta-llama",
53
- model: "Llama-4-Scout-17B-16E-Instruct",
54
- });
55
- });
56
-
57
- it("keeps legacy single-segment overrides on the runtime default provider", () => {
58
- expect(
59
- resolveVoiceResponseModel({
60
- voiceConfig: VoiceCallConfigSchema.parse({
61
- responseModel: "gpt-5.4-mini",
62
- }),
63
- agentRuntime,
64
- }),
65
- ).toEqual({
66
- modelRef: "gpt-5.4-mini",
67
- provider: "together",
68
- model: "gpt-5.4-mini",
69
- });
70
- });
71
- });
@@ -1,23 +0,0 @@
1
- import type { VoiceCallConfig } from "./config.js";
2
- import type { CoreAgentDeps } from "./core-bridge.js";
3
-
4
- export function resolveVoiceResponseModel(params: {
5
- voiceConfig: VoiceCallConfig;
6
- agentRuntime: CoreAgentDeps;
7
- }): {
8
- modelRef: string;
9
- provider: string;
10
- model: string;
11
- } {
12
- const modelRef =
13
- params.voiceConfig.responseModel ??
14
- `${params.agentRuntime.defaults.provider}/${params.agentRuntime.defaults.model}`;
15
- const slashIndex = modelRef.indexOf("/");
16
-
17
- return {
18
- modelRef,
19
- provider:
20
- slashIndex === -1 ? params.agentRuntime.defaults.provider : modelRef.slice(0, slashIndex),
21
- model: slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1),
22
- };
23
- }