@openclaw/voice-call 2026.5.2 → 2026.5.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.js +2 -0
- package/dist/call-status-CXldV5o8.js +32 -0
- package/dist/cli-metadata.js +12 -0
- package/dist/config-7w04YpHh.js +548 -0
- package/dist/config-compat-B0me39_4.js +129 -0
- package/dist/guarded-json-api-Btx5EE4w.js +591 -0
- package/dist/http-headers-BrnxBasF.js +10 -0
- package/dist/index.js +1284 -0
- package/dist/mock-CeKvfVEd.js +135 -0
- package/dist/plivo-B-a7KFoT.js +393 -0
- package/dist/realtime-handler-B63CIDP2.js +325 -0
- package/dist/realtime-transcription.runtime-B2h70y2W.js +2 -0
- package/dist/realtime-voice.runtime-Bkh4nvLn.js +2 -0
- package/dist/response-generator-BrcmwDZU.js +182 -0
- package/dist/response-model-CyF5K80p.js +12 -0
- package/dist/runtime-api.js +6 -0
- package/dist/runtime-entry-88ytYAQa.js +3119 -0
- package/dist/runtime-entry.js +2 -0
- package/dist/setup-api.js +37 -0
- package/dist/telnyx-jjBE8boz.js +260 -0
- package/dist/twilio-1OqbcXLL.js +676 -0
- package/dist/voice-mapping-BYDGdWGx.js +40 -0
- package/package.json +14 -6
- package/api.ts +0 -16
- package/cli-metadata.ts +0 -10
- package/config-api.ts +0 -12
- package/index.test.ts +0 -943
- package/index.ts +0 -794
- package/runtime-api.ts +0 -20
- package/runtime-entry.ts +0 -1
- package/setup-api.ts +0 -47
- package/src/allowlist.test.ts +0 -18
- package/src/allowlist.ts +0 -19
- package/src/cli.ts +0 -845
- package/src/config-compat.test.ts +0 -120
- package/src/config-compat.ts +0 -227
- package/src/config.test.ts +0 -479
- package/src/config.ts +0 -808
- package/src/core-bridge.ts +0 -14
- package/src/deep-merge.test.ts +0 -40
- package/src/deep-merge.ts +0 -23
- package/src/gateway-continue-operation.ts +0 -200
- package/src/http-headers.test.ts +0 -16
- package/src/http-headers.ts +0 -15
- package/src/manager/context.ts +0 -42
- package/src/manager/events.test.ts +0 -581
- package/src/manager/events.ts +0 -288
- package/src/manager/lifecycle.ts +0 -53
- package/src/manager/lookup.test.ts +0 -52
- package/src/manager/lookup.ts +0 -35
- package/src/manager/outbound.test.ts +0 -528
- package/src/manager/outbound.ts +0 -486
- package/src/manager/state.ts +0 -48
- package/src/manager/store.ts +0 -106
- package/src/manager/timers.test.ts +0 -129
- package/src/manager/timers.ts +0 -113
- package/src/manager/twiml.test.ts +0 -13
- package/src/manager/twiml.ts +0 -17
- package/src/manager.closed-loop.test.ts +0 -236
- package/src/manager.inbound-allowlist.test.ts +0 -188
- package/src/manager.notify.test.ts +0 -377
- package/src/manager.restore.test.ts +0 -183
- package/src/manager.test-harness.ts +0 -127
- package/src/manager.ts +0 -392
- package/src/media-stream.test.ts +0 -768
- package/src/media-stream.ts +0 -708
- package/src/providers/base.ts +0 -97
- package/src/providers/mock.test.ts +0 -78
- package/src/providers/mock.ts +0 -185
- package/src/providers/plivo.test.ts +0 -93
- package/src/providers/plivo.ts +0 -601
- package/src/providers/shared/call-status.test.ts +0 -24
- package/src/providers/shared/call-status.ts +0 -24
- package/src/providers/shared/guarded-json-api.test.ts +0 -106
- package/src/providers/shared/guarded-json-api.ts +0 -42
- package/src/providers/telnyx.test.ts +0 -340
- package/src/providers/telnyx.ts +0 -394
- package/src/providers/twilio/api.test.ts +0 -145
- package/src/providers/twilio/api.ts +0 -93
- package/src/providers/twilio/twiml-policy.test.ts +0 -84
- package/src/providers/twilio/twiml-policy.ts +0 -87
- package/src/providers/twilio/webhook.ts +0 -34
- package/src/providers/twilio.test.ts +0 -591
- package/src/providers/twilio.ts +0 -861
- package/src/providers/twilio.types.ts +0 -17
- package/src/realtime-defaults.ts +0 -3
- package/src/realtime-fast-context.test.ts +0 -88
- package/src/realtime-fast-context.ts +0 -165
- package/src/realtime-transcription.runtime.ts +0 -4
- package/src/realtime-voice.runtime.ts +0 -5
- package/src/response-generator.test.ts +0 -321
- package/src/response-generator.ts +0 -318
- package/src/response-model.test.ts +0 -71
- package/src/response-model.ts +0 -23
- package/src/runtime.test.ts +0 -536
- package/src/runtime.ts +0 -510
- package/src/telephony-audio.test.ts +0 -61
- package/src/telephony-audio.ts +0 -12
- package/src/telephony-tts.test.ts +0 -196
- package/src/telephony-tts.ts +0 -235
- package/src/test-fixtures.ts +0 -73
- package/src/tts-provider-voice.test.ts +0 -34
- package/src/tts-provider-voice.ts +0 -21
- package/src/tunnel.test.ts +0 -166
- package/src/tunnel.ts +0 -314
- package/src/types.ts +0 -291
- package/src/utils.test.ts +0 -17
- package/src/utils.ts +0 -14
- package/src/voice-mapping.test.ts +0 -34
- package/src/voice-mapping.ts +0 -68
- package/src/webhook/realtime-handler.test.ts +0 -598
- package/src/webhook/realtime-handler.ts +0 -485
- package/src/webhook/stale-call-reaper.test.ts +0 -88
- package/src/webhook/stale-call-reaper.ts +0 -38
- package/src/webhook/tailscale.test.ts +0 -214
- package/src/webhook/tailscale.ts +0 -129
- package/src/webhook-exposure.test.ts +0 -33
- package/src/webhook-exposure.ts +0 -84
- package/src/webhook-security.test.ts +0 -770
- package/src/webhook-security.ts +0 -994
- package/src/webhook.hangup-once.lifecycle.test.ts +0 -135
- package/src/webhook.test.ts +0 -1470
- package/src/webhook.ts +0 -908
- package/src/webhook.types.ts +0 -5
- package/src/websocket-test-support.ts +0 -72
- package/tsconfig.json +0 -16
|
@@ -0,0 +1,3119 @@
|
|
|
1
|
+
import { isBlockedHostnameOrIp, isRequestBodyLimitError, readRequestBodyWithLimit, requestBodyErrorToText } from "./runtime-api.js";
|
|
2
|
+
import "./api.js";
|
|
3
|
+
import { a as resolveVoiceCallEffectiveConfig, c as deepMergeDefined, i as resolveVoiceCallConfig, n as normalizeVoiceCallConfig, o as resolveVoiceCallSessionKey, r as resolveTwilioAuthToken, s as validateProviderConfig } from "./config-7w04YpHh.js";
|
|
4
|
+
import { n as mapVoiceToPolly, t as escapeXml } from "./voice-mapping-BYDGdWGx.js";
|
|
5
|
+
import { t as resolveVoiceResponseModel } from "./response-model-CyF5K80p.js";
|
|
6
|
+
import { a as convertPcmToMulaw8k, t as isProviderStatusTerminal } from "./call-status-CXldV5o8.js";
|
|
7
|
+
import { t as getHeader } from "./http-headers-BrnxBasF.js";
|
|
8
|
+
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
|
9
|
+
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
|
10
|
+
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, consultRealtimeVoiceAgent, parseRealtimeVoiceAgentConsultArgs, resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow } from "openclaw/plugin-sdk/realtime-voice";
|
|
11
|
+
import { z } from "openclaw/plugin-sdk/zod";
|
|
12
|
+
import fs from "node:fs";
|
|
13
|
+
import os from "node:os";
|
|
14
|
+
import path from "node:path";
|
|
15
|
+
import crypto from "node:crypto";
|
|
16
|
+
import fsp from "node:fs/promises";
|
|
17
|
+
import { getActiveMemorySearchManager } from "openclaw/plugin-sdk/memory-host-search";
|
|
18
|
+
import { parseTtsDirectives } from "openclaw/plugin-sdk/speech";
|
|
19
|
+
import { spawn } from "node:child_process";
|
|
20
|
+
import http from "node:http";
|
|
21
|
+
import { URL as URL$1 } from "node:url";
|
|
22
|
+
import { resolveConfiguredCapabilityProvider } from "openclaw/plugin-sdk/provider-selection-runtime";
|
|
23
|
+
import { WEBHOOK_BODY_READ_DEFAULTS, createWebhookInFlightLimiter } from "openclaw/plugin-sdk/webhook-ingress";
|
|
24
|
+
import { WebSocket as WebSocket$1, WebSocketServer } from "ws";
|
|
25
|
+
//#region extensions/voice-call/src/allowlist.ts
|
|
26
|
+
function normalizePhoneNumber(input) {
|
|
27
|
+
if (!input) return "";
|
|
28
|
+
return input.replace(/\D/g, "");
|
|
29
|
+
}
|
|
30
|
+
function isAllowlistedCaller(normalizedFrom, allowFrom) {
|
|
31
|
+
if (!normalizedFrom) return false;
|
|
32
|
+
return (allowFrom ?? []).some((num) => {
|
|
33
|
+
const normalizedAllow = normalizePhoneNumber(num);
|
|
34
|
+
return normalizedAllow !== "" && normalizedAllow === normalizedFrom;
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
//#endregion
|
|
38
|
+
//#region extensions/voice-call/src/types.ts
|
|
39
|
+
const ProviderNameSchema = z.enum([
|
|
40
|
+
"telnyx",
|
|
41
|
+
"twilio",
|
|
42
|
+
"plivo",
|
|
43
|
+
"mock"
|
|
44
|
+
]);
|
|
45
|
+
const CallStateSchema = z.enum([
|
|
46
|
+
"initiated",
|
|
47
|
+
"ringing",
|
|
48
|
+
"answered",
|
|
49
|
+
"active",
|
|
50
|
+
"speaking",
|
|
51
|
+
"listening",
|
|
52
|
+
"completed",
|
|
53
|
+
"hangup-user",
|
|
54
|
+
"hangup-bot",
|
|
55
|
+
"timeout",
|
|
56
|
+
"error",
|
|
57
|
+
"failed",
|
|
58
|
+
"no-answer",
|
|
59
|
+
"busy",
|
|
60
|
+
"voicemail"
|
|
61
|
+
]);
|
|
62
|
+
const TerminalStates = new Set([
|
|
63
|
+
"completed",
|
|
64
|
+
"hangup-user",
|
|
65
|
+
"hangup-bot",
|
|
66
|
+
"timeout",
|
|
67
|
+
"error",
|
|
68
|
+
"failed",
|
|
69
|
+
"no-answer",
|
|
70
|
+
"busy",
|
|
71
|
+
"voicemail"
|
|
72
|
+
]);
|
|
73
|
+
const EndReasonSchema = z.enum([
|
|
74
|
+
"completed",
|
|
75
|
+
"hangup-user",
|
|
76
|
+
"hangup-bot",
|
|
77
|
+
"timeout",
|
|
78
|
+
"error",
|
|
79
|
+
"failed",
|
|
80
|
+
"no-answer",
|
|
81
|
+
"busy",
|
|
82
|
+
"voicemail"
|
|
83
|
+
]);
|
|
84
|
+
const BaseEventSchema = z.object({
|
|
85
|
+
id: z.string(),
|
|
86
|
+
dedupeKey: z.string().optional(),
|
|
87
|
+
callId: z.string(),
|
|
88
|
+
providerCallId: z.string().optional(),
|
|
89
|
+
timestamp: z.number(),
|
|
90
|
+
turnToken: z.string().optional(),
|
|
91
|
+
direction: z.enum(["inbound", "outbound"]).optional(),
|
|
92
|
+
from: z.string().optional(),
|
|
93
|
+
to: z.string().optional()
|
|
94
|
+
});
|
|
95
|
+
z.discriminatedUnion("type", [
|
|
96
|
+
BaseEventSchema.extend({ type: z.literal("call.initiated") }),
|
|
97
|
+
BaseEventSchema.extend({ type: z.literal("call.ringing") }),
|
|
98
|
+
BaseEventSchema.extend({ type: z.literal("call.answered") }),
|
|
99
|
+
BaseEventSchema.extend({ type: z.literal("call.active") }),
|
|
100
|
+
BaseEventSchema.extend({
|
|
101
|
+
type: z.literal("call.speaking"),
|
|
102
|
+
text: z.string()
|
|
103
|
+
}),
|
|
104
|
+
BaseEventSchema.extend({
|
|
105
|
+
type: z.literal("call.speech"),
|
|
106
|
+
transcript: z.string(),
|
|
107
|
+
isFinal: z.boolean(),
|
|
108
|
+
confidence: z.number().min(0).max(1).optional()
|
|
109
|
+
}),
|
|
110
|
+
BaseEventSchema.extend({
|
|
111
|
+
type: z.literal("call.silence"),
|
|
112
|
+
durationMs: z.number()
|
|
113
|
+
}),
|
|
114
|
+
BaseEventSchema.extend({
|
|
115
|
+
type: z.literal("call.dtmf"),
|
|
116
|
+
digits: z.string()
|
|
117
|
+
}),
|
|
118
|
+
BaseEventSchema.extend({
|
|
119
|
+
type: z.literal("call.ended"),
|
|
120
|
+
reason: EndReasonSchema
|
|
121
|
+
}),
|
|
122
|
+
BaseEventSchema.extend({
|
|
123
|
+
type: z.literal("call.error"),
|
|
124
|
+
error: z.string(),
|
|
125
|
+
retryable: z.boolean().optional()
|
|
126
|
+
})
|
|
127
|
+
]);
|
|
128
|
+
const CallDirectionSchema = z.enum(["outbound", "inbound"]);
|
|
129
|
+
const TranscriptEntrySchema = z.object({
|
|
130
|
+
timestamp: z.number(),
|
|
131
|
+
speaker: z.enum(["bot", "user"]),
|
|
132
|
+
text: z.string(),
|
|
133
|
+
isFinal: z.boolean().default(true)
|
|
134
|
+
});
|
|
135
|
+
const CallRecordSchema = z.object({
|
|
136
|
+
callId: z.string(),
|
|
137
|
+
providerCallId: z.string().optional(),
|
|
138
|
+
provider: ProviderNameSchema,
|
|
139
|
+
direction: CallDirectionSchema,
|
|
140
|
+
state: CallStateSchema,
|
|
141
|
+
from: z.string(),
|
|
142
|
+
to: z.string(),
|
|
143
|
+
sessionKey: z.string().optional(),
|
|
144
|
+
startedAt: z.number(),
|
|
145
|
+
answeredAt: z.number().optional(),
|
|
146
|
+
endedAt: z.number().optional(),
|
|
147
|
+
endReason: EndReasonSchema.optional(),
|
|
148
|
+
transcript: z.array(TranscriptEntrySchema).default([]),
|
|
149
|
+
processedEventIds: z.array(z.string()).default([]),
|
|
150
|
+
metadata: z.record(z.string(), z.unknown()).optional()
|
|
151
|
+
});
|
|
152
|
+
//#endregion
|
|
153
|
+
//#region extensions/voice-call/src/manager/state.ts
|
|
154
|
+
const ConversationStates = new Set(["speaking", "listening"]);
|
|
155
|
+
const StateOrder = [
|
|
156
|
+
"initiated",
|
|
157
|
+
"ringing",
|
|
158
|
+
"answered",
|
|
159
|
+
"active",
|
|
160
|
+
"speaking",
|
|
161
|
+
"listening"
|
|
162
|
+
];
|
|
163
|
+
function transitionState(call, newState) {
|
|
164
|
+
if (call.state === newState || TerminalStates.has(call.state)) return;
|
|
165
|
+
if (TerminalStates.has(newState)) {
|
|
166
|
+
call.state = newState;
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
if (ConversationStates.has(call.state) && ConversationStates.has(newState)) {
|
|
170
|
+
call.state = newState;
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
const currentIndex = StateOrder.indexOf(call.state);
|
|
174
|
+
if (StateOrder.indexOf(newState) > currentIndex) call.state = newState;
|
|
175
|
+
}
|
|
176
|
+
function addTranscriptEntry(call, speaker, text) {
|
|
177
|
+
const entry = {
|
|
178
|
+
timestamp: Date.now(),
|
|
179
|
+
speaker,
|
|
180
|
+
text,
|
|
181
|
+
isFinal: true
|
|
182
|
+
};
|
|
183
|
+
call.transcript.push(entry);
|
|
184
|
+
}
|
|
185
|
+
//#endregion
|
|
186
|
+
//#region extensions/voice-call/src/manager/store.ts
|
|
187
|
+
const pendingPersistWrites = /* @__PURE__ */ new Set();
|
|
188
|
+
function persistCallRecord(storePath, call) {
|
|
189
|
+
const logPath = path.join(storePath, "calls.jsonl");
|
|
190
|
+
const line = `${JSON.stringify(call)}\n`;
|
|
191
|
+
const write = fsp.appendFile(logPath, line).catch((err) => {
|
|
192
|
+
console.error("[voice-call] Failed to persist call record:", err);
|
|
193
|
+
}).finally(() => {
|
|
194
|
+
pendingPersistWrites.delete(write);
|
|
195
|
+
});
|
|
196
|
+
pendingPersistWrites.add(write);
|
|
197
|
+
}
|
|
198
|
+
function loadActiveCallsFromStore(storePath) {
|
|
199
|
+
const logPath = path.join(storePath, "calls.jsonl");
|
|
200
|
+
if (!fs.existsSync(logPath)) return {
|
|
201
|
+
activeCalls: /* @__PURE__ */ new Map(),
|
|
202
|
+
providerCallIdMap: /* @__PURE__ */ new Map(),
|
|
203
|
+
processedEventIds: /* @__PURE__ */ new Set(),
|
|
204
|
+
rejectedProviderCallIds: /* @__PURE__ */ new Set()
|
|
205
|
+
};
|
|
206
|
+
const lines = fs.readFileSync(logPath, "utf-8").split("\n");
|
|
207
|
+
const callMap = /* @__PURE__ */ new Map();
|
|
208
|
+
for (const line of lines) {
|
|
209
|
+
if (!line.trim()) continue;
|
|
210
|
+
try {
|
|
211
|
+
const call = CallRecordSchema.parse(JSON.parse(line));
|
|
212
|
+
callMap.set(call.callId, call);
|
|
213
|
+
} catch {}
|
|
214
|
+
}
|
|
215
|
+
const activeCalls = /* @__PURE__ */ new Map();
|
|
216
|
+
const providerCallIdMap = /* @__PURE__ */ new Map();
|
|
217
|
+
const processedEventIds = /* @__PURE__ */ new Set();
|
|
218
|
+
const rejectedProviderCallIds = /* @__PURE__ */ new Set();
|
|
219
|
+
for (const [callId, call] of callMap) {
|
|
220
|
+
for (const eventId of call.processedEventIds) processedEventIds.add(eventId);
|
|
221
|
+
if (TerminalStates.has(call.state)) continue;
|
|
222
|
+
activeCalls.set(callId, call);
|
|
223
|
+
if (call.providerCallId) providerCallIdMap.set(call.providerCallId, callId);
|
|
224
|
+
}
|
|
225
|
+
return {
|
|
226
|
+
activeCalls,
|
|
227
|
+
providerCallIdMap,
|
|
228
|
+
processedEventIds,
|
|
229
|
+
rejectedProviderCallIds
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
async function getCallHistoryFromStore(storePath, limit = 50) {
|
|
233
|
+
const logPath = path.join(storePath, "calls.jsonl");
|
|
234
|
+
try {
|
|
235
|
+
await fsp.access(logPath);
|
|
236
|
+
} catch {
|
|
237
|
+
return [];
|
|
238
|
+
}
|
|
239
|
+
const lines = (await fsp.readFile(logPath, "utf-8")).trim().split("\n").filter(Boolean);
|
|
240
|
+
const calls = [];
|
|
241
|
+
for (const line of lines.slice(-limit)) try {
|
|
242
|
+
const parsed = CallRecordSchema.parse(JSON.parse(line));
|
|
243
|
+
calls.push(parsed);
|
|
244
|
+
} catch {}
|
|
245
|
+
return calls;
|
|
246
|
+
}
|
|
247
|
+
//#endregion
|
|
248
|
+
//#region extensions/voice-call/src/manager/timers.ts
|
|
249
|
+
function clearMaxDurationTimer(ctx, callId) {
|
|
250
|
+
const timer = ctx.maxDurationTimers.get(callId);
|
|
251
|
+
if (timer) {
|
|
252
|
+
clearTimeout(timer);
|
|
253
|
+
ctx.maxDurationTimers.delete(callId);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
function startMaxDurationTimer(params) {
|
|
257
|
+
clearMaxDurationTimer(params.ctx, params.callId);
|
|
258
|
+
const maxDurationMs = params.timeoutMs ?? params.ctx.config.maxDurationSeconds * 1e3;
|
|
259
|
+
console.log(`[voice-call] Starting max duration timer (${Math.ceil(maxDurationMs / 1e3)}s) for call ${params.callId}`);
|
|
260
|
+
const timer = setTimeout(async () => {
|
|
261
|
+
params.ctx.maxDurationTimers.delete(params.callId);
|
|
262
|
+
const call = params.ctx.activeCalls.get(params.callId);
|
|
263
|
+
if (call && !TerminalStates.has(call.state)) {
|
|
264
|
+
console.log(`[voice-call] Max duration reached (${Math.ceil(maxDurationMs / 1e3)}s), ending call ${params.callId}`);
|
|
265
|
+
call.endReason = "timeout";
|
|
266
|
+
persistCallRecord(params.ctx.storePath, call);
|
|
267
|
+
await params.onTimeout(params.callId);
|
|
268
|
+
}
|
|
269
|
+
}, maxDurationMs);
|
|
270
|
+
params.ctx.maxDurationTimers.set(params.callId, timer);
|
|
271
|
+
}
|
|
272
|
+
function clearTranscriptWaiter(ctx, callId) {
|
|
273
|
+
const waiter = ctx.transcriptWaiters.get(callId);
|
|
274
|
+
if (!waiter) return;
|
|
275
|
+
clearTimeout(waiter.timeout);
|
|
276
|
+
ctx.transcriptWaiters.delete(callId);
|
|
277
|
+
}
|
|
278
|
+
function rejectTranscriptWaiter(ctx, callId, reason) {
|
|
279
|
+
const waiter = ctx.transcriptWaiters.get(callId);
|
|
280
|
+
if (!waiter) return;
|
|
281
|
+
clearTranscriptWaiter(ctx, callId);
|
|
282
|
+
waiter.reject(new Error(reason));
|
|
283
|
+
}
|
|
284
|
+
function resolveTranscriptWaiter(ctx, callId, transcript, turnToken) {
|
|
285
|
+
const waiter = ctx.transcriptWaiters.get(callId);
|
|
286
|
+
if (!waiter) return false;
|
|
287
|
+
if (waiter.turnToken && waiter.turnToken !== turnToken) return false;
|
|
288
|
+
clearTranscriptWaiter(ctx, callId);
|
|
289
|
+
waiter.resolve(transcript);
|
|
290
|
+
return true;
|
|
291
|
+
}
|
|
292
|
+
function waitForFinalTranscript(ctx, callId, turnToken) {
|
|
293
|
+
if (ctx.transcriptWaiters.has(callId)) return Promise.reject(/* @__PURE__ */ new Error("Already waiting for transcript"));
|
|
294
|
+
const timeoutMs = ctx.config.transcriptTimeoutMs;
|
|
295
|
+
return new Promise((resolve, reject) => {
|
|
296
|
+
const timeout = setTimeout(() => {
|
|
297
|
+
ctx.transcriptWaiters.delete(callId);
|
|
298
|
+
reject(/* @__PURE__ */ new Error(`Timed out waiting for transcript after ${timeoutMs}ms`));
|
|
299
|
+
}, timeoutMs);
|
|
300
|
+
ctx.transcriptWaiters.set(callId, {
|
|
301
|
+
resolve,
|
|
302
|
+
reject,
|
|
303
|
+
timeout,
|
|
304
|
+
turnToken
|
|
305
|
+
});
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
//#endregion
|
|
309
|
+
//#region extensions/voice-call/src/manager/lifecycle.ts
|
|
310
|
+
function removeProviderCallMapping(providerCallIdMap, call) {
|
|
311
|
+
if (!call.providerCallId) return;
|
|
312
|
+
if (providerCallIdMap.get(call.providerCallId) === call.callId) providerCallIdMap.delete(call.providerCallId);
|
|
313
|
+
}
|
|
314
|
+
function finalizeCall(params) {
|
|
315
|
+
const { ctx, call, endReason } = params;
|
|
316
|
+
call.endedAt = params.endedAt ?? Date.now();
|
|
317
|
+
call.endReason = endReason;
|
|
318
|
+
transitionState(call, endReason);
|
|
319
|
+
persistCallRecord(ctx.storePath, call);
|
|
320
|
+
if (ctx.maxDurationTimers) clearMaxDurationTimer({ maxDurationTimers: ctx.maxDurationTimers }, call.callId);
|
|
321
|
+
if (ctx.transcriptWaiters) rejectTranscriptWaiter({ transcriptWaiters: ctx.transcriptWaiters }, call.callId, params.transcriptRejectReason ?? `Call ended: ${endReason}`);
|
|
322
|
+
ctx.activeCalls.delete(call.callId);
|
|
323
|
+
removeProviderCallMapping(ctx.providerCallIdMap, call);
|
|
324
|
+
}
|
|
325
|
+
//#endregion
|
|
326
|
+
//#region extensions/voice-call/src/manager/lookup.ts
|
|
327
|
+
function getCallByProviderCallId(params) {
|
|
328
|
+
const callId = params.providerCallIdMap.get(params.providerCallId);
|
|
329
|
+
if (callId) return params.activeCalls.get(callId);
|
|
330
|
+
for (const call of params.activeCalls.values()) if (call.providerCallId === params.providerCallId) return call;
|
|
331
|
+
}
|
|
332
|
+
function findCall(params) {
|
|
333
|
+
const directCall = params.activeCalls.get(params.callIdOrProviderCallId);
|
|
334
|
+
if (directCall) return directCall;
|
|
335
|
+
return getCallByProviderCallId({
|
|
336
|
+
activeCalls: params.activeCalls,
|
|
337
|
+
providerCallIdMap: params.providerCallIdMap,
|
|
338
|
+
providerCallId: params.callIdOrProviderCallId
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
//#endregion
|
|
342
|
+
//#region extensions/voice-call/src/tts-provider-voice.ts
|
|
343
|
+
function resolveProviderVoiceSetting(providerConfig) {
|
|
344
|
+
if (!providerConfig || typeof providerConfig !== "object") return;
|
|
345
|
+
const candidate = providerConfig;
|
|
346
|
+
return normalizeOptionalString(candidate.voice) ?? normalizeOptionalString(candidate.voiceId);
|
|
347
|
+
}
|
|
348
|
+
function resolvePreferredTtsVoice(config) {
|
|
349
|
+
const providerId = config.tts?.provider;
|
|
350
|
+
if (!providerId) return;
|
|
351
|
+
return resolveProviderVoiceSetting(config.tts?.providers?.[providerId]);
|
|
352
|
+
}
|
|
353
|
+
//#endregion
|
|
354
|
+
//#region extensions/voice-call/src/manager/twiml.ts
|
|
355
|
+
function generateNotifyTwiml(message, voice) {
|
|
356
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
357
|
+
<Response>
|
|
358
|
+
<Say voice="${voice}">${escapeXml(message)}</Say>
|
|
359
|
+
<Hangup/>
|
|
360
|
+
</Response>`;
|
|
361
|
+
}
|
|
362
|
+
function generateDtmfRedirectTwiml(digits, webhookUrl) {
|
|
363
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
364
|
+
<Response>
|
|
365
|
+
<Play digits="${escapeXml(digits)}" />
|
|
366
|
+
<Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
|
|
367
|
+
</Response>`;
|
|
368
|
+
}
|
|
369
|
+
//#endregion
|
|
370
|
+
//#region extensions/voice-call/src/manager/outbound.ts
|
|
371
|
+
function lookupConnectedCall(ctx, callId) {
|
|
372
|
+
const call = ctx.activeCalls.get(callId);
|
|
373
|
+
if (!call) return {
|
|
374
|
+
kind: "error",
|
|
375
|
+
error: "Call not found"
|
|
376
|
+
};
|
|
377
|
+
if (!ctx.provider || !call.providerCallId) return {
|
|
378
|
+
kind: "error",
|
|
379
|
+
error: "Call not connected"
|
|
380
|
+
};
|
|
381
|
+
if (TerminalStates.has(call.state)) return {
|
|
382
|
+
kind: "ended",
|
|
383
|
+
call
|
|
384
|
+
};
|
|
385
|
+
return {
|
|
386
|
+
kind: "ok",
|
|
387
|
+
call,
|
|
388
|
+
providerCallId: call.providerCallId,
|
|
389
|
+
provider: ctx.provider
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
function requireConnectedCall(ctx, callId) {
|
|
393
|
+
const lookup = lookupConnectedCall(ctx, callId);
|
|
394
|
+
if (lookup.kind === "error") return {
|
|
395
|
+
ok: false,
|
|
396
|
+
error: lookup.error
|
|
397
|
+
};
|
|
398
|
+
if (lookup.kind === "ended") return {
|
|
399
|
+
ok: false,
|
|
400
|
+
error: "Call has ended"
|
|
401
|
+
};
|
|
402
|
+
return {
|
|
403
|
+
ok: true,
|
|
404
|
+
call: lookup.call,
|
|
405
|
+
providerCallId: lookup.providerCallId,
|
|
406
|
+
provider: lookup.provider
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
function validateDtmfDigits(digits) {
|
|
410
|
+
return /^[0-9*#wWpP,]+$/.test(digits) ? null : "digits may only contain digits, *, #, comma, w, p";
|
|
411
|
+
}
|
|
412
|
+
async function initiateCall(ctx, to, sessionKey, options) {
|
|
413
|
+
const opts = typeof options === "string" ? { message: options } : options ?? {};
|
|
414
|
+
const initialMessage = opts.message;
|
|
415
|
+
const mode = opts.mode ?? ctx.config.outbound.defaultMode;
|
|
416
|
+
const dtmfSequence = opts.dtmfSequence;
|
|
417
|
+
if (dtmfSequence) {
|
|
418
|
+
const validationError = validateDtmfDigits(dtmfSequence);
|
|
419
|
+
if (validationError) return {
|
|
420
|
+
callId: "",
|
|
421
|
+
success: false,
|
|
422
|
+
error: validationError
|
|
423
|
+
};
|
|
424
|
+
if (mode !== "conversation") return {
|
|
425
|
+
callId: "",
|
|
426
|
+
success: false,
|
|
427
|
+
error: "dtmfSequence requires conversation mode"
|
|
428
|
+
};
|
|
429
|
+
}
|
|
430
|
+
if (!ctx.provider) return {
|
|
431
|
+
callId: "",
|
|
432
|
+
success: false,
|
|
433
|
+
error: "Provider not initialized"
|
|
434
|
+
};
|
|
435
|
+
if (!ctx.webhookUrl) return {
|
|
436
|
+
callId: "",
|
|
437
|
+
success: false,
|
|
438
|
+
error: "Webhook URL not configured"
|
|
439
|
+
};
|
|
440
|
+
if (ctx.activeCalls.size >= ctx.config.maxConcurrentCalls) return {
|
|
441
|
+
callId: "",
|
|
442
|
+
success: false,
|
|
443
|
+
error: `Maximum concurrent calls (${ctx.config.maxConcurrentCalls}) reached`
|
|
444
|
+
};
|
|
445
|
+
const callId = crypto.randomUUID();
|
|
446
|
+
const from = ctx.config.fromNumber || (ctx.provider?.name === "mock" ? "+15550000000" : void 0);
|
|
447
|
+
if (!from) return {
|
|
448
|
+
callId: "",
|
|
449
|
+
success: false,
|
|
450
|
+
error: "fromNumber not configured"
|
|
451
|
+
};
|
|
452
|
+
const callRecord = {
|
|
453
|
+
callId,
|
|
454
|
+
provider: ctx.provider.name,
|
|
455
|
+
direction: "outbound",
|
|
456
|
+
state: "initiated",
|
|
457
|
+
from,
|
|
458
|
+
to,
|
|
459
|
+
sessionKey: resolveVoiceCallSessionKey({
|
|
460
|
+
config: ctx.config,
|
|
461
|
+
callId,
|
|
462
|
+
phone: to,
|
|
463
|
+
explicitSessionKey: sessionKey
|
|
464
|
+
}),
|
|
465
|
+
startedAt: Date.now(),
|
|
466
|
+
transcript: [],
|
|
467
|
+
processedEventIds: [],
|
|
468
|
+
metadata: {
|
|
469
|
+
...initialMessage && { initialMessage },
|
|
470
|
+
mode
|
|
471
|
+
}
|
|
472
|
+
};
|
|
473
|
+
ctx.activeCalls.set(callId, callRecord);
|
|
474
|
+
persistCallRecord(ctx.storePath, callRecord);
|
|
475
|
+
try {
|
|
476
|
+
let inlineTwiml;
|
|
477
|
+
let preConnectTwiml;
|
|
478
|
+
if (mode === "notify" && initialMessage) {
|
|
479
|
+
const pollyVoice = mapVoiceToPolly(resolvePreferredTtsVoice(ctx.config));
|
|
480
|
+
inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
|
|
481
|
+
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
|
|
482
|
+
} else if (dtmfSequence) {
|
|
483
|
+
preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
|
|
484
|
+
console.log(`[voice-call] Using pre-connect DTMF TwiML for call ${callId} (digits=${dtmfSequence.length}, initialMessage=${initialMessage ? "yes" : "no"})`);
|
|
485
|
+
}
|
|
486
|
+
const result = await ctx.provider.initiateCall({
|
|
487
|
+
callId,
|
|
488
|
+
from,
|
|
489
|
+
to,
|
|
490
|
+
webhookUrl: ctx.webhookUrl,
|
|
491
|
+
inlineTwiml,
|
|
492
|
+
preConnectTwiml
|
|
493
|
+
});
|
|
494
|
+
callRecord.providerCallId = result.providerCallId;
|
|
495
|
+
ctx.providerCallIdMap.set(result.providerCallId, callId);
|
|
496
|
+
persistCallRecord(ctx.storePath, callRecord);
|
|
497
|
+
console.log(`[voice-call] Outbound call initiated: callId=${callId} providerCallId=${result.providerCallId} mode=${mode} preConnectDtmf=${preConnectTwiml ? "yes" : "no"} initialMessage=${initialMessage ? "yes" : "no"}`);
|
|
498
|
+
return {
|
|
499
|
+
callId,
|
|
500
|
+
success: true
|
|
501
|
+
};
|
|
502
|
+
} catch (err) {
|
|
503
|
+
finalizeCall({
|
|
504
|
+
ctx,
|
|
505
|
+
call: callRecord,
|
|
506
|
+
endReason: "failed"
|
|
507
|
+
});
|
|
508
|
+
return {
|
|
509
|
+
callId,
|
|
510
|
+
success: false,
|
|
511
|
+
error: formatErrorMessage(err)
|
|
512
|
+
};
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
async function speak(ctx, callId, text) {
|
|
516
|
+
const connected = requireConnectedCall(ctx, callId);
|
|
517
|
+
if (!connected.ok) return {
|
|
518
|
+
success: false,
|
|
519
|
+
error: connected.error
|
|
520
|
+
};
|
|
521
|
+
const { call, providerCallId, provider } = connected;
|
|
522
|
+
try {
|
|
523
|
+
transitionState(call, "speaking");
|
|
524
|
+
persistCallRecord(ctx.storePath, call);
|
|
525
|
+
const numberRouteKey = typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to;
|
|
526
|
+
const voice = resolvePreferredTtsVoice(resolveVoiceCallEffectiveConfig(ctx.config, numberRouteKey).config);
|
|
527
|
+
await provider.playTts({
|
|
528
|
+
callId,
|
|
529
|
+
providerCallId,
|
|
530
|
+
text,
|
|
531
|
+
voice
|
|
532
|
+
});
|
|
533
|
+
addTranscriptEntry(call, "bot", text);
|
|
534
|
+
persistCallRecord(ctx.storePath, call);
|
|
535
|
+
return { success: true };
|
|
536
|
+
} catch (err) {
|
|
537
|
+
transitionState(call, "listening");
|
|
538
|
+
persistCallRecord(ctx.storePath, call);
|
|
539
|
+
return {
|
|
540
|
+
success: false,
|
|
541
|
+
error: formatErrorMessage(err)
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
function shouldStartListeningAfterInitialMessage(ctx) {
|
|
546
|
+
if (ctx.provider?.name !== "twilio") return true;
|
|
547
|
+
if (!ctx.config.streaming.enabled) return true;
|
|
548
|
+
return ctx.provider.isConversationStreamConnectEnabled?.() !== true;
|
|
549
|
+
}
|
|
550
|
+
async function sendDtmf(ctx, callId, digits) {
|
|
551
|
+
const validationError = validateDtmfDigits(digits);
|
|
552
|
+
if (validationError) return {
|
|
553
|
+
success: false,
|
|
554
|
+
error: validationError
|
|
555
|
+
};
|
|
556
|
+
const connected = requireConnectedCall(ctx, callId);
|
|
557
|
+
if (!connected.ok) return {
|
|
558
|
+
success: false,
|
|
559
|
+
error: connected.error
|
|
560
|
+
};
|
|
561
|
+
if (!connected.provider.sendDtmf) return {
|
|
562
|
+
success: false,
|
|
563
|
+
error: `${connected.provider.name} does not support outbound DTMF`
|
|
564
|
+
};
|
|
565
|
+
try {
|
|
566
|
+
await connected.provider.sendDtmf({
|
|
567
|
+
callId,
|
|
568
|
+
providerCallId: connected.providerCallId,
|
|
569
|
+
digits
|
|
570
|
+
});
|
|
571
|
+
return { success: true };
|
|
572
|
+
} catch (err) {
|
|
573
|
+
return {
|
|
574
|
+
success: false,
|
|
575
|
+
error: formatErrorMessage(err)
|
|
576
|
+
};
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
async function speakInitialMessage(ctx, providerCallId) {
|
|
580
|
+
const call = getCallByProviderCallId({
|
|
581
|
+
activeCalls: ctx.activeCalls,
|
|
582
|
+
providerCallIdMap: ctx.providerCallIdMap,
|
|
583
|
+
providerCallId
|
|
584
|
+
});
|
|
585
|
+
if (!call) {
|
|
586
|
+
console.warn(`[voice-call] speakInitialMessage: no call found for ${providerCallId}`);
|
|
587
|
+
return;
|
|
588
|
+
}
|
|
589
|
+
const initialMessage = call.metadata?.initialMessage;
|
|
590
|
+
const mode = call.metadata?.mode ?? "conversation";
|
|
591
|
+
if (!initialMessage) {
|
|
592
|
+
console.log(`[voice-call] speakInitialMessage: no initial message for ${call.callId}`);
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
if (ctx.initialMessageInFlight.has(call.callId)) {
|
|
596
|
+
console.log(`[voice-call] speakInitialMessage: initial message already in flight for ${call.callId}`);
|
|
597
|
+
return;
|
|
598
|
+
}
|
|
599
|
+
ctx.initialMessageInFlight.add(call.callId);
|
|
600
|
+
try {
|
|
601
|
+
console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
|
|
602
|
+
const result = await speak(ctx, call.callId, initialMessage);
|
|
603
|
+
if (!result.success) {
|
|
604
|
+
console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
|
|
605
|
+
return;
|
|
606
|
+
}
|
|
607
|
+
if (call.metadata) {
|
|
608
|
+
delete call.metadata.initialMessage;
|
|
609
|
+
persistCallRecord(ctx.storePath, call);
|
|
610
|
+
}
|
|
611
|
+
if (mode === "notify") {
|
|
612
|
+
const delaySec = ctx.config.outbound.notifyHangupDelaySec;
|
|
613
|
+
console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
|
|
614
|
+
setTimeout(async () => {
|
|
615
|
+
const currentCall = ctx.activeCalls.get(call.callId);
|
|
616
|
+
if (currentCall && !TerminalStates.has(currentCall.state)) {
|
|
617
|
+
console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
|
|
618
|
+
await endCall(ctx, call.callId);
|
|
619
|
+
}
|
|
620
|
+
}, delaySec * 1e3);
|
|
621
|
+
} else if (mode === "conversation" && ctx.provider && shouldStartListeningAfterInitialMessage(ctx)) {
|
|
622
|
+
transitionState(call, "listening");
|
|
623
|
+
persistCallRecord(ctx.storePath, call);
|
|
624
|
+
await ctx.provider.startListening({
|
|
625
|
+
callId: call.callId,
|
|
626
|
+
providerCallId
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
} finally {
|
|
630
|
+
ctx.initialMessageInFlight.delete(call.callId);
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
async function continueCall(ctx, callId, prompt) {
|
|
634
|
+
const connected = requireConnectedCall(ctx, callId);
|
|
635
|
+
if (!connected.ok) return {
|
|
636
|
+
success: false,
|
|
637
|
+
error: connected.error
|
|
638
|
+
};
|
|
639
|
+
const { call, providerCallId, provider } = connected;
|
|
640
|
+
if (ctx.activeTurnCalls.has(callId) || ctx.transcriptWaiters.has(callId)) return {
|
|
641
|
+
success: false,
|
|
642
|
+
error: "Already waiting for transcript"
|
|
643
|
+
};
|
|
644
|
+
ctx.activeTurnCalls.add(callId);
|
|
645
|
+
const turnStartedAt = Date.now();
|
|
646
|
+
const turnToken = provider.name === "twilio" ? crypto.randomUUID() : void 0;
|
|
647
|
+
try {
|
|
648
|
+
await speak(ctx, callId, prompt);
|
|
649
|
+
transitionState(call, "listening");
|
|
650
|
+
persistCallRecord(ctx.storePath, call);
|
|
651
|
+
const listenStartedAt = Date.now();
|
|
652
|
+
await provider.startListening({
|
|
653
|
+
callId,
|
|
654
|
+
providerCallId,
|
|
655
|
+
turnToken
|
|
656
|
+
});
|
|
657
|
+
const transcript = await waitForFinalTranscript(ctx, callId, turnToken);
|
|
658
|
+
const transcriptReceivedAt = Date.now();
|
|
659
|
+
await provider.stopListening({
|
|
660
|
+
callId,
|
|
661
|
+
providerCallId
|
|
662
|
+
});
|
|
663
|
+
const lastTurnLatencyMs = transcriptReceivedAt - turnStartedAt;
|
|
664
|
+
const lastTurnListenWaitMs = transcriptReceivedAt - listenStartedAt;
|
|
665
|
+
const turnCount = call.metadata && typeof call.metadata.turnCount === "number" ? call.metadata.turnCount + 1 : 1;
|
|
666
|
+
call.metadata = {
|
|
667
|
+
...call.metadata,
|
|
668
|
+
turnCount,
|
|
669
|
+
lastTurnLatencyMs,
|
|
670
|
+
lastTurnListenWaitMs,
|
|
671
|
+
lastTurnCompletedAt: transcriptReceivedAt
|
|
672
|
+
};
|
|
673
|
+
persistCallRecord(ctx.storePath, call);
|
|
674
|
+
console.log("[voice-call] continueCall latency call=" + call.callId + " totalMs=" + String(lastTurnLatencyMs) + " listenWaitMs=" + String(lastTurnListenWaitMs));
|
|
675
|
+
return {
|
|
676
|
+
success: true,
|
|
677
|
+
transcript
|
|
678
|
+
};
|
|
679
|
+
} catch (err) {
|
|
680
|
+
return {
|
|
681
|
+
success: false,
|
|
682
|
+
error: formatErrorMessage(err)
|
|
683
|
+
};
|
|
684
|
+
} finally {
|
|
685
|
+
ctx.activeTurnCalls.delete(callId);
|
|
686
|
+
clearTranscriptWaiter(ctx, callId);
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
async function endCall(ctx, callId, options) {
|
|
690
|
+
const lookup = lookupConnectedCall(ctx, callId);
|
|
691
|
+
if (lookup.kind === "error") return {
|
|
692
|
+
success: false,
|
|
693
|
+
error: lookup.error
|
|
694
|
+
};
|
|
695
|
+
if (lookup.kind === "ended") return { success: true };
|
|
696
|
+
const { call, providerCallId, provider } = lookup;
|
|
697
|
+
const reason = options?.reason ?? "hangup-bot";
|
|
698
|
+
try {
|
|
699
|
+
await provider.hangupCall({
|
|
700
|
+
callId,
|
|
701
|
+
providerCallId,
|
|
702
|
+
reason
|
|
703
|
+
});
|
|
704
|
+
finalizeCall({
|
|
705
|
+
ctx,
|
|
706
|
+
call,
|
|
707
|
+
endReason: reason
|
|
708
|
+
});
|
|
709
|
+
return { success: true };
|
|
710
|
+
} catch (err) {
|
|
711
|
+
return {
|
|
712
|
+
success: false,
|
|
713
|
+
error: formatErrorMessage(err)
|
|
714
|
+
};
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
//#endregion
|
|
718
|
+
//#region extensions/voice-call/src/manager/events.ts
|
|
719
|
+
function shouldAcceptInbound(config, from) {
|
|
720
|
+
const { inboundPolicy: policy, allowFrom } = config;
|
|
721
|
+
switch (policy) {
|
|
722
|
+
case "disabled":
|
|
723
|
+
console.log("[voice-call] Inbound call rejected: policy is disabled");
|
|
724
|
+
return false;
|
|
725
|
+
case "open":
|
|
726
|
+
console.log("[voice-call] Inbound call accepted: policy is open");
|
|
727
|
+
return true;
|
|
728
|
+
case "allowlist":
|
|
729
|
+
case "pairing": {
|
|
730
|
+
const normalized = normalizePhoneNumber(from);
|
|
731
|
+
if (!normalized) {
|
|
732
|
+
console.log("[voice-call] Inbound call rejected: missing caller ID");
|
|
733
|
+
return false;
|
|
734
|
+
}
|
|
735
|
+
const allowed = isAllowlistedCaller(normalized, allowFrom);
|
|
736
|
+
console.log(`[voice-call] Inbound call ${allowed ? "accepted" : "rejected"}: ${from} ${allowed ? "is in" : "not in"} allowlist`);
|
|
737
|
+
return allowed;
|
|
738
|
+
}
|
|
739
|
+
default: return false;
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
function createWebhookCall(params) {
|
|
743
|
+
const callId = crypto.randomUUID();
|
|
744
|
+
const effective = resolveVoiceCallEffectiveConfig(params.ctx.config, params.direction === "inbound" ? params.to : void 0);
|
|
745
|
+
const effectiveConfig = effective.config;
|
|
746
|
+
const callRecord = {
|
|
747
|
+
callId,
|
|
748
|
+
providerCallId: params.providerCallId,
|
|
749
|
+
provider: params.ctx.provider?.name || "twilio",
|
|
750
|
+
direction: params.direction,
|
|
751
|
+
state: "ringing",
|
|
752
|
+
from: params.from,
|
|
753
|
+
to: params.to,
|
|
754
|
+
sessionKey: resolveVoiceCallSessionKey({
|
|
755
|
+
config: effectiveConfig,
|
|
756
|
+
callId,
|
|
757
|
+
phone: params.direction === "outbound" ? params.to : params.from
|
|
758
|
+
}),
|
|
759
|
+
startedAt: Date.now(),
|
|
760
|
+
transcript: [],
|
|
761
|
+
processedEventIds: [],
|
|
762
|
+
metadata: {
|
|
763
|
+
initialMessage: params.direction === "inbound" ? effectiveConfig.inboundGreeting || "Hello! How can I help you today?" : void 0,
|
|
764
|
+
...effective.numberRouteKey ? { numberRouteKey: effective.numberRouteKey } : {}
|
|
765
|
+
}
|
|
766
|
+
};
|
|
767
|
+
params.ctx.activeCalls.set(callId, callRecord);
|
|
768
|
+
params.ctx.providerCallIdMap.set(params.providerCallId, callId);
|
|
769
|
+
persistCallRecord(params.ctx.storePath, callRecord);
|
|
770
|
+
console.log(`[voice-call] Created ${params.direction} call record: ${callId} from ${params.from}`);
|
|
771
|
+
return callRecord;
|
|
772
|
+
}
|
|
773
|
+
function processEvent(ctx, event) {
|
|
774
|
+
const dedupeKey = event.dedupeKey || event.id;
|
|
775
|
+
if (ctx.processedEventIds.has(dedupeKey)) return;
|
|
776
|
+
let call = findCall({
|
|
777
|
+
activeCalls: ctx.activeCalls,
|
|
778
|
+
providerCallIdMap: ctx.providerCallIdMap,
|
|
779
|
+
callIdOrProviderCallId: event.callId
|
|
780
|
+
});
|
|
781
|
+
const providerCallId = event.providerCallId;
|
|
782
|
+
const eventDirection = event.direction === "inbound" || event.direction === "outbound" ? event.direction : void 0;
|
|
783
|
+
if (!call && providerCallId && eventDirection) {
|
|
784
|
+
if (eventDirection === "inbound" && !shouldAcceptInbound(ctx.config, event.from)) {
|
|
785
|
+
const pid = providerCallId;
|
|
786
|
+
if (!ctx.provider) {
|
|
787
|
+
console.warn(`[voice-call] Inbound call rejected by policy but no provider to hang up (providerCallId: ${pid}, from: ${event.from}); call will time out on provider side.`);
|
|
788
|
+
return;
|
|
789
|
+
}
|
|
790
|
+
ctx.processedEventIds.add(dedupeKey);
|
|
791
|
+
if (ctx.rejectedProviderCallIds.has(pid)) return;
|
|
792
|
+
ctx.rejectedProviderCallIds.add(pid);
|
|
793
|
+
const callId = event.callId ?? pid;
|
|
794
|
+
console.log(`[voice-call] Rejecting inbound call by policy: ${pid}`);
|
|
795
|
+
ctx.provider.hangupCall({
|
|
796
|
+
callId,
|
|
797
|
+
providerCallId: pid,
|
|
798
|
+
reason: "hangup-bot"
|
|
799
|
+
}).catch((err) => {
|
|
800
|
+
ctx.rejectedProviderCallIds.delete(pid);
|
|
801
|
+
const message = formatErrorMessage(err);
|
|
802
|
+
console.warn(`[voice-call] Failed to reject inbound call ${pid}:`, message);
|
|
803
|
+
});
|
|
804
|
+
return;
|
|
805
|
+
}
|
|
806
|
+
call = createWebhookCall({
|
|
807
|
+
ctx,
|
|
808
|
+
providerCallId,
|
|
809
|
+
direction: eventDirection === "outbound" ? "outbound" : "inbound",
|
|
810
|
+
from: event.from || "unknown",
|
|
811
|
+
to: event.to || ctx.config.fromNumber || "unknown"
|
|
812
|
+
});
|
|
813
|
+
event.callId = call.callId;
|
|
814
|
+
}
|
|
815
|
+
if (!call) return;
|
|
816
|
+
if (event.providerCallId && event.providerCallId !== call.providerCallId) {
|
|
817
|
+
const previousProviderCallId = call.providerCallId;
|
|
818
|
+
call.providerCallId = event.providerCallId;
|
|
819
|
+
ctx.providerCallIdMap.set(event.providerCallId, call.callId);
|
|
820
|
+
if (previousProviderCallId) {
|
|
821
|
+
if (ctx.providerCallIdMap.get(previousProviderCallId) === call.callId) ctx.providerCallIdMap.delete(previousProviderCallId);
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
if (!(event.type === "call.error" && event.retryable)) {
|
|
825
|
+
ctx.processedEventIds.add(dedupeKey);
|
|
826
|
+
call.processedEventIds.push(dedupeKey);
|
|
827
|
+
}
|
|
828
|
+
switch (event.type) {
|
|
829
|
+
case "call.initiated":
|
|
830
|
+
transitionState(call, "initiated");
|
|
831
|
+
if (call.direction === "inbound" && call.providerCallId && ctx.provider?.answerCall) ctx.provider.answerCall({
|
|
832
|
+
callId: call.callId,
|
|
833
|
+
providerCallId: call.providerCallId
|
|
834
|
+
}).catch((err) => {
|
|
835
|
+
const message = formatErrorMessage(err);
|
|
836
|
+
console.warn(`[voice-call] Failed to answer inbound call ${call.providerCallId}:`, message);
|
|
837
|
+
});
|
|
838
|
+
break;
|
|
839
|
+
case "call.ringing":
|
|
840
|
+
transitionState(call, "ringing");
|
|
841
|
+
break;
|
|
842
|
+
case "call.answered":
|
|
843
|
+
call.answeredAt = event.timestamp;
|
|
844
|
+
transitionState(call, "answered");
|
|
845
|
+
startMaxDurationTimer({
|
|
846
|
+
ctx,
|
|
847
|
+
callId: call.callId,
|
|
848
|
+
onTimeout: async (callId) => {
|
|
849
|
+
await endCall(ctx, callId, { reason: "timeout" });
|
|
850
|
+
}
|
|
851
|
+
});
|
|
852
|
+
ctx.onCallAnswered?.(call);
|
|
853
|
+
break;
|
|
854
|
+
case "call.active":
|
|
855
|
+
transitionState(call, "active");
|
|
856
|
+
break;
|
|
857
|
+
case "call.speaking":
|
|
858
|
+
transitionState(call, "speaking");
|
|
859
|
+
break;
|
|
860
|
+
case "call.speech":
|
|
861
|
+
if (event.isFinal) {
|
|
862
|
+
const hadWaiter = ctx.transcriptWaiters.has(call.callId);
|
|
863
|
+
const resolved = resolveTranscriptWaiter(ctx, call.callId, event.transcript, event.turnToken);
|
|
864
|
+
if (hadWaiter && !resolved) {
|
|
865
|
+
console.warn(`[voice-call] Ignoring speech event with mismatched turn token for ${call.callId}`);
|
|
866
|
+
break;
|
|
867
|
+
}
|
|
868
|
+
addTranscriptEntry(call, "user", event.transcript);
|
|
869
|
+
}
|
|
870
|
+
transitionState(call, "listening");
|
|
871
|
+
break;
|
|
872
|
+
case "call.silence":
|
|
873
|
+
case "call.dtmf": break;
|
|
874
|
+
case "call.ended":
|
|
875
|
+
finalizeCall({
|
|
876
|
+
ctx,
|
|
877
|
+
call,
|
|
878
|
+
endReason: event.reason,
|
|
879
|
+
endedAt: event.timestamp
|
|
880
|
+
});
|
|
881
|
+
return;
|
|
882
|
+
case "call.error":
|
|
883
|
+
if (!event.retryable) {
|
|
884
|
+
finalizeCall({
|
|
885
|
+
ctx,
|
|
886
|
+
call,
|
|
887
|
+
endReason: "error",
|
|
888
|
+
endedAt: event.timestamp,
|
|
889
|
+
transcriptRejectReason: `Call error: ${event.error}`
|
|
890
|
+
});
|
|
891
|
+
return;
|
|
892
|
+
}
|
|
893
|
+
break;
|
|
894
|
+
}
|
|
895
|
+
persistCallRecord(ctx.storePath, call);
|
|
896
|
+
}
|
|
897
|
+
//#endregion
|
|
898
|
+
//#region extensions/voice-call/src/utils.ts
|
|
899
|
+
function resolveUserPath(input) {
|
|
900
|
+
const trimmed = input.trim();
|
|
901
|
+
if (!trimmed) return trimmed;
|
|
902
|
+
if (trimmed.startsWith("~")) {
|
|
903
|
+
const expanded = trimmed.replace(/^~(?=$|[\\/])/, os.homedir());
|
|
904
|
+
return path.resolve(expanded);
|
|
905
|
+
}
|
|
906
|
+
return path.resolve(trimmed);
|
|
907
|
+
}
|
|
908
|
+
//#endregion
|
|
909
|
+
//#region extensions/voice-call/src/manager.ts
|
|
910
|
+
function markRestoredCallSkipped(call, endReason) {
|
|
911
|
+
call.endedAt = Date.now();
|
|
912
|
+
call.endReason = endReason;
|
|
913
|
+
call.state = endReason;
|
|
914
|
+
}
|
|
915
|
+
function incrementRestoreStatusCount(counts, status) {
|
|
916
|
+
const key = normalizeOptionalString(status) ?? "terminal";
|
|
917
|
+
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
918
|
+
}
|
|
919
|
+
function resolveDefaultStoreBase(config, storePath) {
|
|
920
|
+
const rawOverride = storePath?.trim() || config.store?.trim();
|
|
921
|
+
if (rawOverride) return resolveUserPath(rawOverride);
|
|
922
|
+
const preferred = path.join(os.homedir(), ".openclaw", "voice-calls");
|
|
923
|
+
return [preferred].map((dir) => resolveUserPath(dir)).find((dir) => {
|
|
924
|
+
try {
|
|
925
|
+
return fs.existsSync(path.join(dir, "calls.jsonl")) || fs.existsSync(dir);
|
|
926
|
+
} catch {
|
|
927
|
+
return false;
|
|
928
|
+
}
|
|
929
|
+
}) ?? resolveUserPath(preferred);
|
|
930
|
+
}
|
|
931
|
+
/**
|
|
932
|
+
* Manages voice calls: state ownership and delegation to manager helper modules.
|
|
933
|
+
*/
|
|
934
|
+
var CallManager = class {
|
|
935
|
+
constructor(config, storePath) {
|
|
936
|
+
this.activeCalls = /* @__PURE__ */ new Map();
|
|
937
|
+
this.providerCallIdMap = /* @__PURE__ */ new Map();
|
|
938
|
+
this.processedEventIds = /* @__PURE__ */ new Set();
|
|
939
|
+
this.rejectedProviderCallIds = /* @__PURE__ */ new Set();
|
|
940
|
+
this.provider = null;
|
|
941
|
+
this.webhookUrl = null;
|
|
942
|
+
this.activeTurnCalls = /* @__PURE__ */ new Set();
|
|
943
|
+
this.transcriptWaiters = /* @__PURE__ */ new Map();
|
|
944
|
+
this.maxDurationTimers = /* @__PURE__ */ new Map();
|
|
945
|
+
this.initialMessageInFlight = /* @__PURE__ */ new Set();
|
|
946
|
+
this.config = config;
|
|
947
|
+
this.storePath = resolveDefaultStoreBase(config, storePath);
|
|
948
|
+
}
|
|
949
|
+
/**
|
|
950
|
+
* Initialize the call manager with a provider.
|
|
951
|
+
* Verifies persisted calls with the provider and restarts timers.
|
|
952
|
+
*/
|
|
953
|
+
async initialize(provider, webhookUrl) {
|
|
954
|
+
this.provider = provider;
|
|
955
|
+
this.webhookUrl = webhookUrl;
|
|
956
|
+
fs.mkdirSync(this.storePath, { recursive: true });
|
|
957
|
+
const persisted = loadActiveCallsFromStore(this.storePath);
|
|
958
|
+
this.processedEventIds = persisted.processedEventIds;
|
|
959
|
+
this.rejectedProviderCallIds = persisted.rejectedProviderCallIds;
|
|
960
|
+
const verified = await this.verifyRestoredCalls(provider, persisted.activeCalls);
|
|
961
|
+
this.activeCalls = verified;
|
|
962
|
+
this.providerCallIdMap = /* @__PURE__ */ new Map();
|
|
963
|
+
for (const [callId, call] of verified) if (call.providerCallId) this.providerCallIdMap.set(call.providerCallId, callId);
|
|
964
|
+
let skippedAlreadyElapsedTimers = 0;
|
|
965
|
+
for (const [callId, call] of verified) if (call.answeredAt && !TerminalStates.has(call.state)) {
|
|
966
|
+
const elapsed = Date.now() - call.answeredAt;
|
|
967
|
+
const maxDurationMs = this.config.maxDurationSeconds * 1e3;
|
|
968
|
+
if (elapsed >= maxDurationMs) {
|
|
969
|
+
verified.delete(callId);
|
|
970
|
+
if (call.providerCallId) this.providerCallIdMap.delete(call.providerCallId);
|
|
971
|
+
skippedAlreadyElapsedTimers += 1;
|
|
972
|
+
continue;
|
|
973
|
+
}
|
|
974
|
+
startMaxDurationTimer({
|
|
975
|
+
ctx: this.getContext(),
|
|
976
|
+
callId,
|
|
977
|
+
timeoutMs: maxDurationMs - elapsed,
|
|
978
|
+
onTimeout: async (id) => {
|
|
979
|
+
await endCall(this.getContext(), id, { reason: "timeout" });
|
|
980
|
+
}
|
|
981
|
+
});
|
|
982
|
+
console.log(`[voice-call] Restarted max-duration timer for restored call ${callId}`);
|
|
983
|
+
}
|
|
984
|
+
if (skippedAlreadyElapsedTimers > 0) console.log(`[voice-call] Skipped ${skippedAlreadyElapsedTimers} restored call(s) whose max-duration timer already elapsed`);
|
|
985
|
+
if (verified.size > 0) console.log(`[voice-call] Restored ${verified.size} active call(s) from store`);
|
|
986
|
+
}
|
|
987
|
+
/**
|
|
988
|
+
* Verify persisted calls with the provider before restoring.
|
|
989
|
+
* Calls without providerCallId or older than maxDurationSeconds are skipped.
|
|
990
|
+
* Transient provider errors keep the call (rely on timer fallback).
|
|
991
|
+
*/
|
|
992
|
+
async verifyRestoredCalls(provider, candidates) {
|
|
993
|
+
if (candidates.size === 0) return /* @__PURE__ */ new Map();
|
|
994
|
+
const maxAgeMs = this.config.maxDurationSeconds * 1e3;
|
|
995
|
+
const now = Date.now();
|
|
996
|
+
const verified = /* @__PURE__ */ new Map();
|
|
997
|
+
const verifyTasks = [];
|
|
998
|
+
let skippedNoProviderCallId = 0;
|
|
999
|
+
let skippedOlderThanMaxDuration = 0;
|
|
1000
|
+
const skippedTerminalStatuses = /* @__PURE__ */ new Map();
|
|
1001
|
+
let keptVerifiedActive = 0;
|
|
1002
|
+
let keptUnknownProviderStatus = 0;
|
|
1003
|
+
let keptVerificationFailures = 0;
|
|
1004
|
+
for (const [callId, call] of candidates) {
|
|
1005
|
+
if (!call.providerCallId) {
|
|
1006
|
+
skippedNoProviderCallId += 1;
|
|
1007
|
+
continue;
|
|
1008
|
+
}
|
|
1009
|
+
if (now - call.startedAt > maxAgeMs) {
|
|
1010
|
+
skippedOlderThanMaxDuration += 1;
|
|
1011
|
+
markRestoredCallSkipped(call, "timeout");
|
|
1012
|
+
persistCallRecord(this.storePath, call);
|
|
1013
|
+
await provider.hangupCall({
|
|
1014
|
+
callId,
|
|
1015
|
+
providerCallId: call.providerCallId,
|
|
1016
|
+
reason: "timeout"
|
|
1017
|
+
}).catch((err) => {
|
|
1018
|
+
console.warn(`[voice-call] Failed to hang up expired restored call ${callId}:`, err instanceof Error ? err.message : String(err));
|
|
1019
|
+
});
|
|
1020
|
+
continue;
|
|
1021
|
+
}
|
|
1022
|
+
const task = {
|
|
1023
|
+
callId,
|
|
1024
|
+
call,
|
|
1025
|
+
promise: provider.getCallStatus({ providerCallId: call.providerCallId }).then((result) => {
|
|
1026
|
+
if (result.isTerminal) {
|
|
1027
|
+
incrementRestoreStatusCount(skippedTerminalStatuses, result.status);
|
|
1028
|
+
markRestoredCallSkipped(call, "completed");
|
|
1029
|
+
persistCallRecord(this.storePath, call);
|
|
1030
|
+
} else if (result.isUnknown) {
|
|
1031
|
+
keptUnknownProviderStatus += 1;
|
|
1032
|
+
verified.set(callId, call);
|
|
1033
|
+
} else {
|
|
1034
|
+
keptVerifiedActive += 1;
|
|
1035
|
+
verified.set(callId, call);
|
|
1036
|
+
}
|
|
1037
|
+
}).catch(() => {
|
|
1038
|
+
keptVerificationFailures += 1;
|
|
1039
|
+
verified.set(callId, call);
|
|
1040
|
+
})
|
|
1041
|
+
};
|
|
1042
|
+
verifyTasks.push(task);
|
|
1043
|
+
}
|
|
1044
|
+
await Promise.allSettled(verifyTasks.map((t) => t.promise));
|
|
1045
|
+
if (skippedNoProviderCallId > 0) console.log(`[voice-call] Skipped ${skippedNoProviderCallId} restored call(s) with no providerCallId`);
|
|
1046
|
+
if (skippedOlderThanMaxDuration > 0) console.log(`[voice-call] Skipped ${skippedOlderThanMaxDuration} restored call(s) older than maxDurationSeconds`);
|
|
1047
|
+
for (const [status, count] of [...skippedTerminalStatuses].toSorted(([a], [b]) => a.localeCompare(b))) console.log(`[voice-call] Skipped ${count} restored call(s) with provider status: ${status}`);
|
|
1048
|
+
if (keptVerifiedActive > 0) console.log(`[voice-call] Kept ${keptVerifiedActive} restored call(s) confirmed active by provider`);
|
|
1049
|
+
if (keptUnknownProviderStatus > 0) console.log(`[voice-call] Kept ${keptUnknownProviderStatus} restored call(s) with unknown provider status (relying on timer)`);
|
|
1050
|
+
if (keptVerificationFailures > 0) console.log(`[voice-call] Kept ${keptVerificationFailures} restored call(s) after verification failure (relying on timer)`);
|
|
1051
|
+
return verified;
|
|
1052
|
+
}
|
|
1053
|
+
/**
|
|
1054
|
+
* Get the current provider.
|
|
1055
|
+
*/
|
|
1056
|
+
getProvider() {
|
|
1057
|
+
return this.provider;
|
|
1058
|
+
}
|
|
1059
|
+
/**
|
|
1060
|
+
* Initiate an outbound call.
|
|
1061
|
+
*/
|
|
1062
|
+
async initiateCall(to, sessionKey, options) {
|
|
1063
|
+
return initiateCall(this.getContext(), to, sessionKey, options);
|
|
1064
|
+
}
|
|
1065
|
+
/**
|
|
1066
|
+
* Speak to user in an active call.
|
|
1067
|
+
*/
|
|
1068
|
+
async speak(callId, text) {
|
|
1069
|
+
return speak(this.getContext(), callId, text);
|
|
1070
|
+
}
|
|
1071
|
+
/**
|
|
1072
|
+
* Send DTMF digits to an active call.
|
|
1073
|
+
*/
|
|
1074
|
+
async sendDtmf(callId, digits) {
|
|
1075
|
+
return sendDtmf(this.getContext(), callId, digits);
|
|
1076
|
+
}
|
|
1077
|
+
/**
|
|
1078
|
+
* Speak the initial message for a call (called when media stream connects).
|
|
1079
|
+
*/
|
|
1080
|
+
async speakInitialMessage(providerCallId) {
|
|
1081
|
+
return speakInitialMessage(this.getContext(), providerCallId);
|
|
1082
|
+
}
|
|
1083
|
+
/**
|
|
1084
|
+
* Continue call: speak prompt, then wait for user's final transcript.
|
|
1085
|
+
*/
|
|
1086
|
+
async continueCall(callId, prompt) {
|
|
1087
|
+
return continueCall(this.getContext(), callId, prompt);
|
|
1088
|
+
}
|
|
1089
|
+
/**
|
|
1090
|
+
* End an active call.
|
|
1091
|
+
*/
|
|
1092
|
+
async endCall(callId) {
|
|
1093
|
+
return endCall(this.getContext(), callId);
|
|
1094
|
+
}
|
|
1095
|
+
getContext() {
|
|
1096
|
+
return {
|
|
1097
|
+
activeCalls: this.activeCalls,
|
|
1098
|
+
providerCallIdMap: this.providerCallIdMap,
|
|
1099
|
+
processedEventIds: this.processedEventIds,
|
|
1100
|
+
rejectedProviderCallIds: this.rejectedProviderCallIds,
|
|
1101
|
+
provider: this.provider,
|
|
1102
|
+
config: this.config,
|
|
1103
|
+
storePath: this.storePath,
|
|
1104
|
+
webhookUrl: this.webhookUrl,
|
|
1105
|
+
activeTurnCalls: this.activeTurnCalls,
|
|
1106
|
+
transcriptWaiters: this.transcriptWaiters,
|
|
1107
|
+
maxDurationTimers: this.maxDurationTimers,
|
|
1108
|
+
initialMessageInFlight: this.initialMessageInFlight,
|
|
1109
|
+
onCallAnswered: (call) => {
|
|
1110
|
+
this.maybeSpeakInitialMessageOnAnswered(call);
|
|
1111
|
+
}
|
|
1112
|
+
};
|
|
1113
|
+
}
|
|
1114
|
+
/**
|
|
1115
|
+
* Process a webhook event.
|
|
1116
|
+
*/
|
|
1117
|
+
processEvent(event) {
|
|
1118
|
+
processEvent(this.getContext(), event);
|
|
1119
|
+
}
|
|
1120
|
+
shouldDeferConversationInitialMessageUntilStreamConnect() {
|
|
1121
|
+
if (!this.provider || this.provider.name !== "twilio" || !this.config.streaming.enabled) return false;
|
|
1122
|
+
const streamAwareProvider = this.provider;
|
|
1123
|
+
if (typeof streamAwareProvider.isConversationStreamConnectEnabled !== "function") return false;
|
|
1124
|
+
return streamAwareProvider.isConversationStreamConnectEnabled();
|
|
1125
|
+
}
|
|
1126
|
+
maybeSpeakInitialMessageOnAnswered(call) {
|
|
1127
|
+
if (!(normalizeOptionalString(call.metadata?.initialMessage) ?? "")) return;
|
|
1128
|
+
const mode = call.metadata?.mode ?? "conversation";
|
|
1129
|
+
if (mode === "conversation") {
|
|
1130
|
+
if (this.config.realtime.enabled) return;
|
|
1131
|
+
if (this.shouldDeferConversationInitialMessageUntilStreamConnect()) return;
|
|
1132
|
+
} else if (mode !== "notify") return;
|
|
1133
|
+
if (!this.provider || !call.providerCallId) return;
|
|
1134
|
+
this.speakInitialMessage(call.providerCallId).catch((err) => {
|
|
1135
|
+
console.warn(`[voice-call] Failed to speak initial message for call ${call.callId}: ${formatErrorMessage(err)}`);
|
|
1136
|
+
});
|
|
1137
|
+
}
|
|
1138
|
+
/**
|
|
1139
|
+
* Get an active call by ID.
|
|
1140
|
+
*/
|
|
1141
|
+
getCall(callId) {
|
|
1142
|
+
return this.activeCalls.get(callId);
|
|
1143
|
+
}
|
|
1144
|
+
/**
|
|
1145
|
+
* Get an active call by provider call ID (e.g., Twilio CallSid).
|
|
1146
|
+
*/
|
|
1147
|
+
getCallByProviderCallId(providerCallId) {
|
|
1148
|
+
return getCallByProviderCallId({
|
|
1149
|
+
activeCalls: this.activeCalls,
|
|
1150
|
+
providerCallIdMap: this.providerCallIdMap,
|
|
1151
|
+
providerCallId
|
|
1152
|
+
});
|
|
1153
|
+
}
|
|
1154
|
+
/**
|
|
1155
|
+
* Get all active calls.
|
|
1156
|
+
*/
|
|
1157
|
+
getActiveCalls() {
|
|
1158
|
+
return Array.from(this.activeCalls.values());
|
|
1159
|
+
}
|
|
1160
|
+
/**
|
|
1161
|
+
* Get call history (from persisted logs).
|
|
1162
|
+
*/
|
|
1163
|
+
async getCallHistory(limit = 50) {
|
|
1164
|
+
return getCallHistoryFromStore(this.storePath, limit);
|
|
1165
|
+
}
|
|
1166
|
+
};
|
|
1167
|
+
//#endregion
|
|
1168
|
+
//#region extensions/voice-call/src/realtime-fast-context.ts
|
|
1169
|
+
const MAX_SNIPPET_CHARS = 700;
|
|
1170
|
+
var RealtimeFastContextTimeoutError = class extends Error {
|
|
1171
|
+
constructor(timeoutMs) {
|
|
1172
|
+
super(`fast context lookup timed out after ${timeoutMs}ms`);
|
|
1173
|
+
this.name = "RealtimeFastContextTimeoutError";
|
|
1174
|
+
}
|
|
1175
|
+
};
|
|
1176
|
+
function normalizeSnippet(text) {
|
|
1177
|
+
const normalized = text.replace(/\s+/g, " ").trim();
|
|
1178
|
+
if (normalized.length <= MAX_SNIPPET_CHARS) return normalized;
|
|
1179
|
+
return `${normalized.slice(0, MAX_SNIPPET_CHARS - 1).trimEnd()}...`;
|
|
1180
|
+
}
|
|
1181
|
+
function buildSearchQuery(args) {
|
|
1182
|
+
const parsed = parseRealtimeVoiceAgentConsultArgs(args);
|
|
1183
|
+
return [parsed.question, parsed.context].filter(Boolean).join("\n\n");
|
|
1184
|
+
}
|
|
1185
|
+
function buildContextText(params) {
|
|
1186
|
+
const hits = params.hits.map((hit, index) => {
|
|
1187
|
+
const location = `${hit.path}:${hit.startLine}-${hit.endLine}`;
|
|
1188
|
+
return `${index + 1}. [${hit.source}] ${location}\n${normalizeSnippet(hit.snippet)}`;
|
|
1189
|
+
}).join("\n\n");
|
|
1190
|
+
return [
|
|
1191
|
+
"Fast OpenClaw memory context found for the live caller.",
|
|
1192
|
+
"Use this context only if it answers the caller's question. If it is not relevant, say briefly that you do not have that context handy.",
|
|
1193
|
+
`Question:\n${params.query}`,
|
|
1194
|
+
`Context:\n${hits}`
|
|
1195
|
+
].join("\n\n");
|
|
1196
|
+
}
|
|
1197
|
+
function buildMissText(query) {
|
|
1198
|
+
return [
|
|
1199
|
+
"No relevant OpenClaw memory or session context was found quickly for the live caller.",
|
|
1200
|
+
"Answer briefly that you do not have that context handy. Do not keep checking unless the caller asks you to.",
|
|
1201
|
+
`Question:\n${query}`
|
|
1202
|
+
].join("\n\n");
|
|
1203
|
+
}
|
|
1204
|
+
async function withTimeout(promise, timeoutMs) {
|
|
1205
|
+
let timer;
|
|
1206
|
+
try {
|
|
1207
|
+
return await Promise.race([promise, new Promise((_resolve, reject) => {
|
|
1208
|
+
timer = setTimeout(() => reject(new RealtimeFastContextTimeoutError(timeoutMs)), timeoutMs);
|
|
1209
|
+
})]);
|
|
1210
|
+
} finally {
|
|
1211
|
+
if (timer) clearTimeout(timer);
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
async function lookupFastContext(params) {
|
|
1215
|
+
const memory = await getActiveMemorySearchManager({
|
|
1216
|
+
cfg: params.cfg,
|
|
1217
|
+
agentId: params.agentId
|
|
1218
|
+
});
|
|
1219
|
+
if (!memory.manager) return {
|
|
1220
|
+
status: "unavailable",
|
|
1221
|
+
error: memory.error ?? "no active memory manager"
|
|
1222
|
+
};
|
|
1223
|
+
return {
|
|
1224
|
+
status: "hits",
|
|
1225
|
+
hits: await memory.manager.search(params.query, {
|
|
1226
|
+
maxResults: params.config.maxResults,
|
|
1227
|
+
sessionKey: params.sessionKey,
|
|
1228
|
+
sources: params.config.sources
|
|
1229
|
+
})
|
|
1230
|
+
};
|
|
1231
|
+
}
|
|
1232
|
+
async function resolveRealtimeFastContextConsult(params) {
|
|
1233
|
+
if (!params.config.enabled) return { handled: false };
|
|
1234
|
+
const query = buildSearchQuery(params.args);
|
|
1235
|
+
try {
|
|
1236
|
+
const lookup = await withTimeout(lookupFastContext({
|
|
1237
|
+
cfg: params.cfg,
|
|
1238
|
+
agentId: params.agentId,
|
|
1239
|
+
sessionKey: params.sessionKey,
|
|
1240
|
+
config: params.config,
|
|
1241
|
+
query
|
|
1242
|
+
}), params.config.timeoutMs);
|
|
1243
|
+
if (lookup.status === "unavailable") {
|
|
1244
|
+
params.logger.debug?.(`[voice-call] realtime fast context unavailable: ${lookup.error}`);
|
|
1245
|
+
return params.config.fallbackToConsult ? { handled: false } : {
|
|
1246
|
+
handled: true,
|
|
1247
|
+
result: { text: buildMissText(query) }
|
|
1248
|
+
};
|
|
1249
|
+
}
|
|
1250
|
+
const { hits } = lookup;
|
|
1251
|
+
if (hits.length === 0) return params.config.fallbackToConsult ? { handled: false } : {
|
|
1252
|
+
handled: true,
|
|
1253
|
+
result: { text: buildMissText(query) }
|
|
1254
|
+
};
|
|
1255
|
+
return {
|
|
1256
|
+
handled: true,
|
|
1257
|
+
result: { text: buildContextText({
|
|
1258
|
+
query,
|
|
1259
|
+
hits
|
|
1260
|
+
}) }
|
|
1261
|
+
};
|
|
1262
|
+
} catch (error) {
|
|
1263
|
+
const message = formatErrorMessage(error);
|
|
1264
|
+
params.logger.debug?.(`[voice-call] realtime fast context lookup failed: ${message}`);
|
|
1265
|
+
return params.config.fallbackToConsult ? { handled: false } : {
|
|
1266
|
+
handled: true,
|
|
1267
|
+
result: { text: buildMissText(query) }
|
|
1268
|
+
};
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
//#endregion
|
|
1272
|
+
//#region extensions/voice-call/src/telephony-tts.ts
|
|
1273
|
+
const TELEPHONY_DEFAULT_TTS_TIMEOUT_MS = 8e3;
|
|
1274
|
+
function createTelephonyTtsProvider(params) {
|
|
1275
|
+
const { coreConfig, ttsOverride, runtime, logger } = params;
|
|
1276
|
+
const mergedConfig = applyTtsOverride(coreConfig, ttsOverride);
|
|
1277
|
+
const ttsConfig = mergedConfig.messages?.tts;
|
|
1278
|
+
const modelOverrides = resolveTelephonyModelOverridePolicy(readTelephonyModelOverrides(ttsConfig));
|
|
1279
|
+
const providerConfigs = collectTelephonyProviderConfigs(ttsConfig);
|
|
1280
|
+
const activeProvider = normalizeProviderId(ttsConfig?.provider);
|
|
1281
|
+
return {
|
|
1282
|
+
synthesisTimeoutMs: mergedConfig.messages?.tts?.timeoutMs ?? 8e3,
|
|
1283
|
+
synthesizeForTelephony: async (text) => {
|
|
1284
|
+
const directives = parseTtsDirectives(text, modelOverrides, {
|
|
1285
|
+
cfg: mergedConfig,
|
|
1286
|
+
providerConfigs,
|
|
1287
|
+
preferredProviderId: activeProvider
|
|
1288
|
+
});
|
|
1289
|
+
if (directives.warnings.length > 0) logger?.warn?.(`[voice-call] Ignored telephony TTS directive overrides (${directives.warnings.join("; ")})`);
|
|
1290
|
+
const cleanText = directives.hasDirective ? directives.ttsText?.trim() || directives.cleanedText.trim() : text;
|
|
1291
|
+
const result = await runtime.textToSpeechTelephony({
|
|
1292
|
+
text: cleanText,
|
|
1293
|
+
cfg: mergedConfig,
|
|
1294
|
+
overrides: directives.overrides
|
|
1295
|
+
});
|
|
1296
|
+
if (!result.success || !result.audioBuffer || !result.sampleRate) throw new Error(result.error ?? "TTS conversion failed");
|
|
1297
|
+
if (result.fallbackFrom && result.provider && result.fallbackFrom !== result.provider) {
|
|
1298
|
+
const attemptedChain = result.attemptedProviders && result.attemptedProviders.length > 0 ? result.attemptedProviders.join(" -> ") : `${result.fallbackFrom} -> ${result.provider}`;
|
|
1299
|
+
logger?.warn?.(`[voice-call] Telephony TTS fallback used from=${result.fallbackFrom} to=${result.provider} attempts=${attemptedChain}`);
|
|
1300
|
+
}
|
|
1301
|
+
return convertPcmToMulaw8k(result.audioBuffer, result.sampleRate);
|
|
1302
|
+
}
|
|
1303
|
+
};
|
|
1304
|
+
}
|
|
1305
|
+
function applyTtsOverride(coreConfig, override) {
|
|
1306
|
+
if (!override) return coreConfig;
|
|
1307
|
+
const base = coreConfig.messages?.tts;
|
|
1308
|
+
const merged = mergeTtsConfig(base, override);
|
|
1309
|
+
if (!merged) return coreConfig;
|
|
1310
|
+
return {
|
|
1311
|
+
...coreConfig,
|
|
1312
|
+
messages: {
|
|
1313
|
+
...coreConfig.messages,
|
|
1314
|
+
tts: merged
|
|
1315
|
+
}
|
|
1316
|
+
};
|
|
1317
|
+
}
|
|
1318
|
+
function mergeTtsConfig(base, override) {
|
|
1319
|
+
if (!base && !override) return;
|
|
1320
|
+
if (!override) return base;
|
|
1321
|
+
if (!base) return override;
|
|
1322
|
+
return deepMergeDefined(base, override);
|
|
1323
|
+
}
|
|
1324
|
+
function resolveTelephonyModelOverridePolicy(overrides) {
|
|
1325
|
+
if (!(overrides?.enabled ?? true)) return {
|
|
1326
|
+
enabled: false,
|
|
1327
|
+
allowText: false,
|
|
1328
|
+
allowProvider: false,
|
|
1329
|
+
allowVoice: false,
|
|
1330
|
+
allowModelId: false,
|
|
1331
|
+
allowVoiceSettings: false,
|
|
1332
|
+
allowNormalization: false,
|
|
1333
|
+
allowSeed: false
|
|
1334
|
+
};
|
|
1335
|
+
const allow = (value, defaultValue = true) => value ?? defaultValue;
|
|
1336
|
+
return {
|
|
1337
|
+
enabled: true,
|
|
1338
|
+
allowText: allow(overrides?.allowText),
|
|
1339
|
+
allowProvider: allow(overrides?.allowProvider, false),
|
|
1340
|
+
allowVoice: allow(overrides?.allowVoice),
|
|
1341
|
+
allowModelId: allow(overrides?.allowModelId),
|
|
1342
|
+
allowVoiceSettings: allow(overrides?.allowVoiceSettings),
|
|
1343
|
+
allowNormalization: allow(overrides?.allowNormalization),
|
|
1344
|
+
allowSeed: allow(overrides?.allowSeed)
|
|
1345
|
+
};
|
|
1346
|
+
}
|
|
1347
|
+
function readTelephonyModelOverrides(ttsConfig) {
|
|
1348
|
+
const value = ttsConfig?.modelOverrides;
|
|
1349
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : void 0;
|
|
1350
|
+
}
|
|
1351
|
+
function normalizeProviderId(value) {
|
|
1352
|
+
return typeof value === "string" ? value.trim().toLowerCase() || void 0 : void 0;
|
|
1353
|
+
}
|
|
1354
|
+
function asProviderConfig(value) {
|
|
1355
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
1356
|
+
}
|
|
1357
|
+
function collectTelephonyProviderConfigs(ttsConfig) {
|
|
1358
|
+
if (!ttsConfig) return {};
|
|
1359
|
+
const entries = {};
|
|
1360
|
+
const rawProviders = ttsConfig.providers && typeof ttsConfig.providers === "object" && !Array.isArray(ttsConfig.providers) ? ttsConfig.providers : {};
|
|
1361
|
+
for (const [providerId, value] of Object.entries(rawProviders)) {
|
|
1362
|
+
const normalized = normalizeProviderId(providerId) ?? providerId;
|
|
1363
|
+
entries[normalized] = asProviderConfig(value);
|
|
1364
|
+
}
|
|
1365
|
+
const reservedKeys = new Set([
|
|
1366
|
+
"auto",
|
|
1367
|
+
"enabled",
|
|
1368
|
+
"maxTextLength",
|
|
1369
|
+
"mode",
|
|
1370
|
+
"modelOverrides",
|
|
1371
|
+
"persona",
|
|
1372
|
+
"personas",
|
|
1373
|
+
"prefsPath",
|
|
1374
|
+
"provider",
|
|
1375
|
+
"providers",
|
|
1376
|
+
"summaryModel",
|
|
1377
|
+
"timeoutMs"
|
|
1378
|
+
]);
|
|
1379
|
+
for (const [key, value] of Object.entries(ttsConfig)) {
|
|
1380
|
+
if (reservedKeys.has(key) || typeof value !== "object" || value === null || Array.isArray(value)) continue;
|
|
1381
|
+
const normalized = normalizeProviderId(key) ?? key;
|
|
1382
|
+
entries[normalized] ??= asProviderConfig(value);
|
|
1383
|
+
}
|
|
1384
|
+
return entries;
|
|
1385
|
+
}
|
|
1386
|
+
//#endregion
|
|
1387
|
+
//#region extensions/voice-call/src/webhook/tailscale.ts
|
|
1388
|
+
function runTailscaleCommand(args, timeoutMs = 2500) {
|
|
1389
|
+
return new Promise((resolve) => {
|
|
1390
|
+
const proc = spawn("tailscale", args, { stdio: [
|
|
1391
|
+
"ignore",
|
|
1392
|
+
"pipe",
|
|
1393
|
+
"pipe"
|
|
1394
|
+
] });
|
|
1395
|
+
let stdout = "";
|
|
1396
|
+
let settled = false;
|
|
1397
|
+
let timer;
|
|
1398
|
+
const finish = (result) => {
|
|
1399
|
+
if (settled) return;
|
|
1400
|
+
settled = true;
|
|
1401
|
+
clearTimeout(timer);
|
|
1402
|
+
resolve(result);
|
|
1403
|
+
};
|
|
1404
|
+
proc.stdout.on("data", (data) => {
|
|
1405
|
+
stdout += data;
|
|
1406
|
+
});
|
|
1407
|
+
timer = setTimeout(() => {
|
|
1408
|
+
proc.kill("SIGKILL");
|
|
1409
|
+
finish({
|
|
1410
|
+
code: -1,
|
|
1411
|
+
stdout: ""
|
|
1412
|
+
});
|
|
1413
|
+
}, timeoutMs);
|
|
1414
|
+
proc.on("error", () => {
|
|
1415
|
+
finish({
|
|
1416
|
+
code: -1,
|
|
1417
|
+
stdout: ""
|
|
1418
|
+
});
|
|
1419
|
+
});
|
|
1420
|
+
proc.on("close", (code) => {
|
|
1421
|
+
finish({
|
|
1422
|
+
code: code ?? -1,
|
|
1423
|
+
stdout
|
|
1424
|
+
});
|
|
1425
|
+
});
|
|
1426
|
+
});
|
|
1427
|
+
}
|
|
1428
|
+
async function getTailscaleSelfInfo() {
|
|
1429
|
+
const { code, stdout } = await runTailscaleCommand(["status", "--json"]);
|
|
1430
|
+
if (code !== 0) return null;
|
|
1431
|
+
try {
|
|
1432
|
+
const status = JSON.parse(stdout);
|
|
1433
|
+
return {
|
|
1434
|
+
dnsName: status.Self?.DNSName?.replace(/\.$/, "") || null,
|
|
1435
|
+
nodeId: status.Self?.ID || null
|
|
1436
|
+
};
|
|
1437
|
+
} catch {
|
|
1438
|
+
return null;
|
|
1439
|
+
}
|
|
1440
|
+
}
|
|
1441
|
+
async function getTailscaleDnsName() {
|
|
1442
|
+
return (await getTailscaleSelfInfo())?.dnsName ?? null;
|
|
1443
|
+
}
|
|
1444
|
+
async function setupTailscaleExposureRoute(opts) {
|
|
1445
|
+
const dnsName = await getTailscaleDnsName();
|
|
1446
|
+
if (!dnsName) {
|
|
1447
|
+
console.warn("[voice-call] Could not get Tailscale DNS name");
|
|
1448
|
+
return null;
|
|
1449
|
+
}
|
|
1450
|
+
const { code } = await runTailscaleCommand([
|
|
1451
|
+
opts.mode,
|
|
1452
|
+
"--bg",
|
|
1453
|
+
"--yes",
|
|
1454
|
+
"--set-path",
|
|
1455
|
+
opts.path,
|
|
1456
|
+
opts.localUrl
|
|
1457
|
+
]);
|
|
1458
|
+
if (code === 0) {
|
|
1459
|
+
const publicUrl = `https://${dnsName}${opts.path}`;
|
|
1460
|
+
console.log(`[voice-call] Tailscale ${opts.mode} active: ${publicUrl}`);
|
|
1461
|
+
return publicUrl;
|
|
1462
|
+
}
|
|
1463
|
+
console.warn(`[voice-call] Tailscale ${opts.mode} failed`);
|
|
1464
|
+
return null;
|
|
1465
|
+
}
|
|
1466
|
+
async function cleanupTailscaleExposureRoute(opts) {
|
|
1467
|
+
await runTailscaleCommand([
|
|
1468
|
+
opts.mode,
|
|
1469
|
+
"off",
|
|
1470
|
+
opts.path
|
|
1471
|
+
]);
|
|
1472
|
+
}
|
|
1473
|
+
async function setupTailscaleExposure(config) {
|
|
1474
|
+
if (config.tailscale.mode === "off") return null;
|
|
1475
|
+
const mode = config.tailscale.mode === "funnel" ? "funnel" : "serve";
|
|
1476
|
+
const localUrl = `http://127.0.0.1:${config.serve.port}${config.serve.path}`;
|
|
1477
|
+
return setupTailscaleExposureRoute({
|
|
1478
|
+
mode,
|
|
1479
|
+
path: config.tailscale.path,
|
|
1480
|
+
localUrl
|
|
1481
|
+
});
|
|
1482
|
+
}
|
|
1483
|
+
async function cleanupTailscaleExposure(config) {
|
|
1484
|
+
if (config.tailscale.mode === "off") return;
|
|
1485
|
+
await cleanupTailscaleExposureRoute({
|
|
1486
|
+
mode: config.tailscale.mode === "funnel" ? "funnel" : "serve",
|
|
1487
|
+
path: config.tailscale.path
|
|
1488
|
+
});
|
|
1489
|
+
}
|
|
1490
|
+
//#endregion
|
|
1491
|
+
//#region extensions/voice-call/src/tunnel.ts
|
|
1492
|
+
/**
|
|
1493
|
+
* Start an ngrok tunnel to expose the local webhook server.
|
|
1494
|
+
*
|
|
1495
|
+
* Uses the ngrok CLI which must be installed: https://ngrok.com/download
|
|
1496
|
+
*
|
|
1497
|
+
* @example
|
|
1498
|
+
* const tunnel = await startNgrokTunnel({ port: 3334, path: '/voice/webhook' });
|
|
1499
|
+
* console.log('Public URL:', tunnel.publicUrl);
|
|
1500
|
+
* // Later: await tunnel.stop();
|
|
1501
|
+
*/
|
|
1502
|
+
async function startNgrokTunnel(config) {
|
|
1503
|
+
if (config.authToken) await runNgrokCommand([
|
|
1504
|
+
"config",
|
|
1505
|
+
"add-authtoken",
|
|
1506
|
+
config.authToken
|
|
1507
|
+
]);
|
|
1508
|
+
const args = [
|
|
1509
|
+
"http",
|
|
1510
|
+
String(config.port),
|
|
1511
|
+
"--log",
|
|
1512
|
+
"stdout",
|
|
1513
|
+
"--log-format",
|
|
1514
|
+
"json"
|
|
1515
|
+
];
|
|
1516
|
+
if (config.domain) args.push("--domain", config.domain);
|
|
1517
|
+
return new Promise((resolve, reject) => {
|
|
1518
|
+
const proc = spawn("ngrok", args, { stdio: [
|
|
1519
|
+
"ignore",
|
|
1520
|
+
"pipe",
|
|
1521
|
+
"pipe"
|
|
1522
|
+
] });
|
|
1523
|
+
let resolved = false;
|
|
1524
|
+
let publicUrl = null;
|
|
1525
|
+
let outputBuffer = "";
|
|
1526
|
+
const timeout = setTimeout(() => {
|
|
1527
|
+
if (!resolved) {
|
|
1528
|
+
resolved = true;
|
|
1529
|
+
proc.kill("SIGTERM");
|
|
1530
|
+
reject(/* @__PURE__ */ new Error("ngrok startup timed out (30s)"));
|
|
1531
|
+
}
|
|
1532
|
+
}, 3e4);
|
|
1533
|
+
const processLine = (line) => {
|
|
1534
|
+
try {
|
|
1535
|
+
const log = JSON.parse(line);
|
|
1536
|
+
if (log.msg === "started tunnel" && log.url) publicUrl = log.url;
|
|
1537
|
+
if (log.addr && log.url && !publicUrl) publicUrl = log.url;
|
|
1538
|
+
if (publicUrl && !resolved) {
|
|
1539
|
+
resolved = true;
|
|
1540
|
+
clearTimeout(timeout);
|
|
1541
|
+
const fullUrl = publicUrl + config.path;
|
|
1542
|
+
console.log(`[voice-call] ngrok tunnel active: ${fullUrl}`);
|
|
1543
|
+
resolve({
|
|
1544
|
+
publicUrl: fullUrl,
|
|
1545
|
+
provider: "ngrok",
|
|
1546
|
+
stop: async () => {
|
|
1547
|
+
proc.kill("SIGTERM");
|
|
1548
|
+
await new Promise((res) => {
|
|
1549
|
+
proc.on("close", () => res());
|
|
1550
|
+
setTimeout(res, 2e3);
|
|
1551
|
+
});
|
|
1552
|
+
}
|
|
1553
|
+
});
|
|
1554
|
+
}
|
|
1555
|
+
} catch {}
|
|
1556
|
+
};
|
|
1557
|
+
proc.stdout.on("data", (data) => {
|
|
1558
|
+
outputBuffer += data.toString();
|
|
1559
|
+
const lines = outputBuffer.split("\n");
|
|
1560
|
+
outputBuffer = lines.pop() || "";
|
|
1561
|
+
for (const line of lines) if (line.trim()) processLine(line);
|
|
1562
|
+
});
|
|
1563
|
+
proc.stderr.on("data", (data) => {
|
|
1564
|
+
const msg = data.toString();
|
|
1565
|
+
if (msg.includes("ERR_NGROK")) {
|
|
1566
|
+
if (!resolved) {
|
|
1567
|
+
resolved = true;
|
|
1568
|
+
clearTimeout(timeout);
|
|
1569
|
+
reject(/* @__PURE__ */ new Error(`ngrok error: ${msg}`));
|
|
1570
|
+
}
|
|
1571
|
+
}
|
|
1572
|
+
});
|
|
1573
|
+
proc.on("error", (err) => {
|
|
1574
|
+
if (!resolved) {
|
|
1575
|
+
resolved = true;
|
|
1576
|
+
clearTimeout(timeout);
|
|
1577
|
+
reject(/* @__PURE__ */ new Error(`Failed to start ngrok: ${err.message}`));
|
|
1578
|
+
}
|
|
1579
|
+
});
|
|
1580
|
+
proc.on("close", (code) => {
|
|
1581
|
+
if (!resolved) {
|
|
1582
|
+
resolved = true;
|
|
1583
|
+
clearTimeout(timeout);
|
|
1584
|
+
reject(/* @__PURE__ */ new Error(`ngrok exited unexpectedly with code ${code}`));
|
|
1585
|
+
}
|
|
1586
|
+
});
|
|
1587
|
+
});
|
|
1588
|
+
}
|
|
1589
|
+
/**
|
|
1590
|
+
* Run an ngrok command and wait for completion.
|
|
1591
|
+
*/
|
|
1592
|
+
async function runNgrokCommand(args) {
|
|
1593
|
+
return new Promise((resolve, reject) => {
|
|
1594
|
+
const proc = spawn("ngrok", args, { stdio: [
|
|
1595
|
+
"ignore",
|
|
1596
|
+
"pipe",
|
|
1597
|
+
"pipe"
|
|
1598
|
+
] });
|
|
1599
|
+
let stdout = "";
|
|
1600
|
+
let stderr = "";
|
|
1601
|
+
proc.stdout.on("data", (data) => {
|
|
1602
|
+
stdout += data.toString();
|
|
1603
|
+
});
|
|
1604
|
+
proc.stderr.on("data", (data) => {
|
|
1605
|
+
stderr += data.toString();
|
|
1606
|
+
});
|
|
1607
|
+
proc.on("close", (code) => {
|
|
1608
|
+
if (code === 0) resolve(stdout);
|
|
1609
|
+
else reject(/* @__PURE__ */ new Error(`ngrok command failed: ${stderr || stdout}`));
|
|
1610
|
+
});
|
|
1611
|
+
proc.on("error", reject);
|
|
1612
|
+
});
|
|
1613
|
+
}
|
|
1614
|
+
/**
|
|
1615
|
+
* Start a Tailscale serve/funnel tunnel.
|
|
1616
|
+
*/
|
|
1617
|
+
async function startTailscaleTunnel(config) {
|
|
1618
|
+
const dnsName = await getTailscaleDnsName();
|
|
1619
|
+
if (!dnsName) throw new Error("Could not get Tailscale DNS name. Is Tailscale running?");
|
|
1620
|
+
const path = config.path.startsWith("/") ? config.path : `/${config.path}`;
|
|
1621
|
+
const localUrl = `http://127.0.0.1:${config.port}${path}`;
|
|
1622
|
+
return new Promise((resolve, reject) => {
|
|
1623
|
+
const proc = spawn("tailscale", [
|
|
1624
|
+
config.mode,
|
|
1625
|
+
"--bg",
|
|
1626
|
+
"--yes",
|
|
1627
|
+
"--set-path",
|
|
1628
|
+
path,
|
|
1629
|
+
localUrl
|
|
1630
|
+
], { stdio: [
|
|
1631
|
+
"ignore",
|
|
1632
|
+
"pipe",
|
|
1633
|
+
"pipe"
|
|
1634
|
+
] });
|
|
1635
|
+
const timeout = setTimeout(() => {
|
|
1636
|
+
proc.kill("SIGKILL");
|
|
1637
|
+
reject(/* @__PURE__ */ new Error(`Tailscale ${config.mode} timed out`));
|
|
1638
|
+
}, 1e4);
|
|
1639
|
+
proc.on("close", (code) => {
|
|
1640
|
+
clearTimeout(timeout);
|
|
1641
|
+
if (code === 0) {
|
|
1642
|
+
const publicUrl = `https://${dnsName}${path}`;
|
|
1643
|
+
console.log(`[voice-call] Tailscale ${config.mode} active: ${publicUrl}`);
|
|
1644
|
+
resolve({
|
|
1645
|
+
publicUrl,
|
|
1646
|
+
provider: `tailscale-${config.mode}`,
|
|
1647
|
+
stop: async () => {
|
|
1648
|
+
await stopTailscaleTunnel(config.mode, path);
|
|
1649
|
+
}
|
|
1650
|
+
});
|
|
1651
|
+
} else reject(/* @__PURE__ */ new Error(`Tailscale ${config.mode} failed with code ${code}`));
|
|
1652
|
+
});
|
|
1653
|
+
proc.on("error", (err) => {
|
|
1654
|
+
clearTimeout(timeout);
|
|
1655
|
+
reject(err);
|
|
1656
|
+
});
|
|
1657
|
+
});
|
|
1658
|
+
}
|
|
1659
|
+
/**
|
|
1660
|
+
* Stop a Tailscale serve/funnel tunnel.
|
|
1661
|
+
*/
|
|
1662
|
+
async function stopTailscaleTunnel(mode, path) {
|
|
1663
|
+
return new Promise((resolve) => {
|
|
1664
|
+
const proc = spawn("tailscale", [
|
|
1665
|
+
mode,
|
|
1666
|
+
"off",
|
|
1667
|
+
path
|
|
1668
|
+
], { stdio: "ignore" });
|
|
1669
|
+
const timeout = setTimeout(() => {
|
|
1670
|
+
proc.kill("SIGKILL");
|
|
1671
|
+
resolve();
|
|
1672
|
+
}, 5e3);
|
|
1673
|
+
proc.on("close", () => {
|
|
1674
|
+
clearTimeout(timeout);
|
|
1675
|
+
resolve();
|
|
1676
|
+
});
|
|
1677
|
+
});
|
|
1678
|
+
}
|
|
1679
|
+
/**
|
|
1680
|
+
* Start a tunnel based on configuration.
|
|
1681
|
+
*/
|
|
1682
|
+
async function startTunnel(config) {
|
|
1683
|
+
switch (config.provider) {
|
|
1684
|
+
case "ngrok": return startNgrokTunnel({
|
|
1685
|
+
port: config.port,
|
|
1686
|
+
path: config.path,
|
|
1687
|
+
authToken: config.ngrokAuthToken,
|
|
1688
|
+
domain: config.ngrokDomain
|
|
1689
|
+
});
|
|
1690
|
+
case "tailscale-serve": return startTailscaleTunnel({
|
|
1691
|
+
mode: "serve",
|
|
1692
|
+
port: config.port,
|
|
1693
|
+
path: config.path
|
|
1694
|
+
});
|
|
1695
|
+
case "tailscale-funnel": return startTailscaleTunnel({
|
|
1696
|
+
mode: "funnel",
|
|
1697
|
+
port: config.port,
|
|
1698
|
+
path: config.path
|
|
1699
|
+
});
|
|
1700
|
+
default: return null;
|
|
1701
|
+
}
|
|
1702
|
+
}
|
|
1703
|
+
//#endregion
|
|
1704
|
+
//#region extensions/voice-call/src/webhook-exposure.ts
|
|
1705
|
+
function providerRequiresPublicWebhook(providerName) {
|
|
1706
|
+
return providerName === "twilio" || providerName === "telnyx" || providerName === "plivo";
|
|
1707
|
+
}
|
|
1708
|
+
function isLocalOnlyWebhookHost(hostname) {
|
|
1709
|
+
return isBlockedHostnameOrIp(hostname);
|
|
1710
|
+
}
|
|
1711
|
+
function isProviderUnreachableWebhookUrl(webhookUrl) {
|
|
1712
|
+
try {
|
|
1713
|
+
return isLocalOnlyWebhookHost(new URL(webhookUrl).hostname);
|
|
1714
|
+
} catch {
|
|
1715
|
+
return false;
|
|
1716
|
+
}
|
|
1717
|
+
}
|
|
1718
|
+
function resolveWebhookExposureStatus(config) {
|
|
1719
|
+
if (config.provider === "mock") return {
|
|
1720
|
+
ok: true,
|
|
1721
|
+
configured: true,
|
|
1722
|
+
message: "Mock provider does not need a public webhook"
|
|
1723
|
+
};
|
|
1724
|
+
if (config.publicUrl) {
|
|
1725
|
+
if (isProviderUnreachableWebhookUrl(config.publicUrl)) return {
|
|
1726
|
+
ok: false,
|
|
1727
|
+
configured: true,
|
|
1728
|
+
message: `Public webhook URL is local/private and cannot be reached by ${config.provider ?? "the provider"}: ${config.publicUrl}`
|
|
1729
|
+
};
|
|
1730
|
+
return {
|
|
1731
|
+
ok: true,
|
|
1732
|
+
configured: true,
|
|
1733
|
+
message: `Public webhook URL configured: ${config.publicUrl}`
|
|
1734
|
+
};
|
|
1735
|
+
}
|
|
1736
|
+
if (config.tunnel?.provider && config.tunnel.provider !== "none") return {
|
|
1737
|
+
ok: true,
|
|
1738
|
+
configured: true,
|
|
1739
|
+
message: "Webhook exposure configured through tunnel"
|
|
1740
|
+
};
|
|
1741
|
+
if (config.tailscale?.mode && config.tailscale.mode !== "off") return {
|
|
1742
|
+
ok: true,
|
|
1743
|
+
configured: true,
|
|
1744
|
+
message: "Webhook exposure configured through Tailscale"
|
|
1745
|
+
};
|
|
1746
|
+
return {
|
|
1747
|
+
ok: false,
|
|
1748
|
+
configured: false,
|
|
1749
|
+
message: "Set publicUrl or configure tunnel/tailscale so the provider can reach webhooks"
|
|
1750
|
+
};
|
|
1751
|
+
}
|
|
1752
|
+
//#endregion
|
|
1753
|
+
//#region extensions/voice-call/src/media-stream.ts
|
|
1754
|
+
const DEFAULT_PRE_START_TIMEOUT_MS = 5e3;
|
|
1755
|
+
const DEFAULT_MAX_PENDING_CONNECTIONS = 32;
|
|
1756
|
+
const DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP = 4;
|
|
1757
|
+
const DEFAULT_MAX_CONNECTIONS = 128;
|
|
1758
|
+
const MAX_INBOUND_MESSAGE_BYTES = 64 * 1024;
|
|
1759
|
+
const MAX_WS_BUFFERED_BYTES = 1024 * 1024;
|
|
1760
|
+
const CLOSE_REASON_LOG_MAX_CHARS = 120;
|
|
1761
|
+
function sanitizeLogText(value, maxChars) {
|
|
1762
|
+
const sanitized = value.replace(/\p{Cc}/gu, " ").replace(/\s+/g, " ").trim();
|
|
1763
|
+
if (sanitized.length <= maxChars) return sanitized;
|
|
1764
|
+
return `${sanitized.slice(0, maxChars)}...`;
|
|
1765
|
+
}
|
|
1766
|
+
function normalizeWsMessageData(data) {
|
|
1767
|
+
if (Buffer.isBuffer(data)) return data;
|
|
1768
|
+
if (Array.isArray(data)) return Buffer.concat(data);
|
|
1769
|
+
return Buffer.from(data);
|
|
1770
|
+
}
|
|
1771
|
+
/**
|
|
1772
|
+
* Manages WebSocket connections for Twilio media streams.
|
|
1773
|
+
*/
|
|
1774
|
+
var MediaStreamHandler = class {
|
|
1775
|
+
constructor(config) {
|
|
1776
|
+
this.wss = null;
|
|
1777
|
+
this.sessions = /* @__PURE__ */ new Map();
|
|
1778
|
+
this.pendingConnections = /* @__PURE__ */ new Map();
|
|
1779
|
+
this.pendingByIp = /* @__PURE__ */ new Map();
|
|
1780
|
+
this.inflightUpgrades = 0;
|
|
1781
|
+
this.ttsQueues = /* @__PURE__ */ new Map();
|
|
1782
|
+
this.ttsPlaying = /* @__PURE__ */ new Map();
|
|
1783
|
+
this.ttsActiveControllers = /* @__PURE__ */ new Map();
|
|
1784
|
+
this.config = config;
|
|
1785
|
+
this.preStartTimeoutMs = config.preStartTimeoutMs ?? DEFAULT_PRE_START_TIMEOUT_MS;
|
|
1786
|
+
this.maxPendingConnections = config.maxPendingConnections ?? DEFAULT_MAX_PENDING_CONNECTIONS;
|
|
1787
|
+
this.maxPendingConnectionsPerIp = config.maxPendingConnectionsPerIp ?? DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP;
|
|
1788
|
+
this.maxConnections = config.maxConnections ?? DEFAULT_MAX_CONNECTIONS;
|
|
1789
|
+
}
|
|
1790
|
+
/**
|
|
1791
|
+
* Handle WebSocket upgrade for media stream connections.
|
|
1792
|
+
*/
|
|
1793
|
+
handleUpgrade(request, socket, head) {
|
|
1794
|
+
if (!this.wss) {
|
|
1795
|
+
this.wss = new WebSocketServer({
|
|
1796
|
+
noServer: true,
|
|
1797
|
+
maxPayload: MAX_INBOUND_MESSAGE_BYTES
|
|
1798
|
+
});
|
|
1799
|
+
this.wss.on("connection", (ws, req) => this.handleConnection(ws, req));
|
|
1800
|
+
}
|
|
1801
|
+
if (this.getCurrentConnectionCount() >= this.maxConnections) {
|
|
1802
|
+
this.rejectUpgrade(socket, 503, "Too many media stream connections");
|
|
1803
|
+
return;
|
|
1804
|
+
}
|
|
1805
|
+
this.inflightUpgrades += 1;
|
|
1806
|
+
let released = false;
|
|
1807
|
+
const releaseUpgradeReservation = () => {
|
|
1808
|
+
if (released) return;
|
|
1809
|
+
released = true;
|
|
1810
|
+
this.inflightUpgrades = Math.max(0, this.inflightUpgrades - 1);
|
|
1811
|
+
};
|
|
1812
|
+
const handleUpgradeAbort = () => {
|
|
1813
|
+
socket.removeListener("error", handleUpgradeAbort);
|
|
1814
|
+
socket.removeListener("close", handleUpgradeAbort);
|
|
1815
|
+
releaseUpgradeReservation();
|
|
1816
|
+
};
|
|
1817
|
+
socket.once("error", handleUpgradeAbort);
|
|
1818
|
+
socket.once("close", handleUpgradeAbort);
|
|
1819
|
+
try {
|
|
1820
|
+
this.wss.handleUpgrade(request, socket, head, (ws) => {
|
|
1821
|
+
socket.removeListener("error", handleUpgradeAbort);
|
|
1822
|
+
socket.removeListener("close", handleUpgradeAbort);
|
|
1823
|
+
releaseUpgradeReservation();
|
|
1824
|
+
this.wss?.emit("connection", ws, request);
|
|
1825
|
+
});
|
|
1826
|
+
} catch (error) {
|
|
1827
|
+
socket.removeListener("error", handleUpgradeAbort);
|
|
1828
|
+
socket.removeListener("close", handleUpgradeAbort);
|
|
1829
|
+
releaseUpgradeReservation();
|
|
1830
|
+
throw error;
|
|
1831
|
+
}
|
|
1832
|
+
}
|
|
1833
|
+
/**
|
|
1834
|
+
* Handle new WebSocket connection from Twilio.
|
|
1835
|
+
*/
|
|
1836
|
+
async handleConnection(ws, _request) {
|
|
1837
|
+
let session = null;
|
|
1838
|
+
const streamToken = this.getStreamToken(_request);
|
|
1839
|
+
const ip = this.getClientIp(_request);
|
|
1840
|
+
if (!this.registerPendingConnection(ws, ip)) {
|
|
1841
|
+
ws.close(1013, "Too many pending media stream connections");
|
|
1842
|
+
return;
|
|
1843
|
+
}
|
|
1844
|
+
ws.on("message", async (data) => {
|
|
1845
|
+
try {
|
|
1846
|
+
const raw = normalizeWsMessageData(data);
|
|
1847
|
+
const message = JSON.parse(raw.toString("utf8"));
|
|
1848
|
+
switch (message.event) {
|
|
1849
|
+
case "connected":
|
|
1850
|
+
console.log("[MediaStream] Twilio connected");
|
|
1851
|
+
break;
|
|
1852
|
+
case "start":
|
|
1853
|
+
session = this.handleStart(ws, message, streamToken);
|
|
1854
|
+
if (session) this.clearPendingConnection(ws);
|
|
1855
|
+
break;
|
|
1856
|
+
case "media":
|
|
1857
|
+
if (session && message.media?.payload) {
|
|
1858
|
+
const audioBuffer = Buffer.from(message.media.payload, "base64");
|
|
1859
|
+
session.sttSession.sendAudio(audioBuffer);
|
|
1860
|
+
}
|
|
1861
|
+
break;
|
|
1862
|
+
case "stop":
|
|
1863
|
+
if (session) {
|
|
1864
|
+
this.handleStop(session);
|
|
1865
|
+
session = null;
|
|
1866
|
+
}
|
|
1867
|
+
break;
|
|
1868
|
+
case "clear":
|
|
1869
|
+
case "mark": break;
|
|
1870
|
+
}
|
|
1871
|
+
} catch (error) {
|
|
1872
|
+
console.error("[MediaStream] Error processing message:", error);
|
|
1873
|
+
}
|
|
1874
|
+
});
|
|
1875
|
+
ws.on("close", (code, reason) => {
|
|
1876
|
+
const reasonText = sanitizeLogText(Buffer.isBuffer(reason) ? reason.toString("utf8") : String(reason || ""), CLOSE_REASON_LOG_MAX_CHARS);
|
|
1877
|
+
console.log(`[MediaStream] WebSocket closed (code: ${code}, reason: ${reasonText || "none"})`);
|
|
1878
|
+
this.clearPendingConnection(ws);
|
|
1879
|
+
if (session) this.handleStop(session);
|
|
1880
|
+
});
|
|
1881
|
+
ws.on("error", (error) => {
|
|
1882
|
+
console.error("[MediaStream] WebSocket error:", error);
|
|
1883
|
+
});
|
|
1884
|
+
}
|
|
1885
|
+
/**
|
|
1886
|
+
* Handle stream start event.
|
|
1887
|
+
*/
|
|
1888
|
+
handleStart(ws, message, streamToken) {
|
|
1889
|
+
const streamSid = message.streamSid || "";
|
|
1890
|
+
const callSid = message.start?.callSid || "";
|
|
1891
|
+
const effectiveToken = message.start?.customParameters?.token ?? streamToken;
|
|
1892
|
+
console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
|
|
1893
|
+
if (!callSid) {
|
|
1894
|
+
console.warn("[MediaStream] Missing callSid; closing stream");
|
|
1895
|
+
ws.close(1008, "Missing callSid");
|
|
1896
|
+
return null;
|
|
1897
|
+
}
|
|
1898
|
+
if (this.config.shouldAcceptStream && !this.config.shouldAcceptStream({
|
|
1899
|
+
callId: callSid,
|
|
1900
|
+
streamSid,
|
|
1901
|
+
token: effectiveToken
|
|
1902
|
+
})) {
|
|
1903
|
+
console.warn(`[MediaStream] Rejecting stream for unknown call: ${callSid}`);
|
|
1904
|
+
ws.close(1008, "Unknown call");
|
|
1905
|
+
return null;
|
|
1906
|
+
}
|
|
1907
|
+
const session = {
|
|
1908
|
+
callId: callSid,
|
|
1909
|
+
streamSid,
|
|
1910
|
+
ws,
|
|
1911
|
+
sttSession: this.config.transcriptionProvider.createSession({
|
|
1912
|
+
providerConfig: this.config.providerConfig,
|
|
1913
|
+
onPartial: (partial) => {
|
|
1914
|
+
this.config.onPartialTranscript?.(callSid, partial);
|
|
1915
|
+
},
|
|
1916
|
+
onTranscript: (transcript) => {
|
|
1917
|
+
this.config.onTranscript?.(callSid, transcript);
|
|
1918
|
+
},
|
|
1919
|
+
onSpeechStart: () => {
|
|
1920
|
+
this.config.onSpeechStart?.(callSid);
|
|
1921
|
+
},
|
|
1922
|
+
onError: (error) => {
|
|
1923
|
+
console.warn("[MediaStream] Transcription session error:", error.message);
|
|
1924
|
+
}
|
|
1925
|
+
})
|
|
1926
|
+
};
|
|
1927
|
+
this.sessions.set(streamSid, session);
|
|
1928
|
+
this.config.onConnect?.(callSid, streamSid);
|
|
1929
|
+
this.connectTranscriptionAndNotify(session);
|
|
1930
|
+
return session;
|
|
1931
|
+
}
|
|
1932
|
+
async connectTranscriptionAndNotify(session) {
|
|
1933
|
+
try {
|
|
1934
|
+
await session.sttSession.connect();
|
|
1935
|
+
} catch (error) {
|
|
1936
|
+
console.warn("[MediaStream] STT connection failed; closing media stream:", error instanceof Error ? error.message : String(error));
|
|
1937
|
+
if (this.sessions.get(session.streamSid) === session && session.ws.readyState === WebSocket$1.OPEN) session.ws.close(1011, "STT connection failed");
|
|
1938
|
+
else session.sttSession.close();
|
|
1939
|
+
return;
|
|
1940
|
+
}
|
|
1941
|
+
if (this.sessions.get(session.streamSid) !== session || session.ws.readyState !== WebSocket$1.OPEN) {
|
|
1942
|
+
session.sttSession.close();
|
|
1943
|
+
return;
|
|
1944
|
+
}
|
|
1945
|
+
this.config.onTranscriptionReady?.(session.callId, session.streamSid);
|
|
1946
|
+
}
|
|
1947
|
+
/**
|
|
1948
|
+
* Handle stream stop event.
|
|
1949
|
+
*/
|
|
1950
|
+
handleStop(session) {
|
|
1951
|
+
console.log(`[MediaStream] Stream stopped: ${session.streamSid}`);
|
|
1952
|
+
this.clearTtsState(session.streamSid);
|
|
1953
|
+
session.sttSession.close();
|
|
1954
|
+
this.sessions.delete(session.streamSid);
|
|
1955
|
+
this.config.onDisconnect?.(session.callId, session.streamSid);
|
|
1956
|
+
}
|
|
1957
|
+
getStreamToken(request) {
|
|
1958
|
+
if (!request.url || !request.headers.host) return;
|
|
1959
|
+
try {
|
|
1960
|
+
return new URL(request.url, `http://${request.headers.host}`).searchParams.get("token") ?? void 0;
|
|
1961
|
+
} catch {
|
|
1962
|
+
return;
|
|
1963
|
+
}
|
|
1964
|
+
}
|
|
1965
|
+
getClientIp(request) {
|
|
1966
|
+
const resolvedIp = this.config.resolveClientIp?.(request)?.trim();
|
|
1967
|
+
if (resolvedIp) return resolvedIp;
|
|
1968
|
+
return request.socket.remoteAddress || "unknown";
|
|
1969
|
+
}
|
|
1970
|
+
getCurrentConnectionCount() {
|
|
1971
|
+
return this.wss ? this.wss.clients.size + this.inflightUpgrades : this.inflightUpgrades;
|
|
1972
|
+
}
|
|
1973
|
+
registerPendingConnection(ws, ip) {
|
|
1974
|
+
if (this.pendingConnections.size >= this.maxPendingConnections) {
|
|
1975
|
+
console.warn("[MediaStream] Rejecting connection: pending connection limit reached");
|
|
1976
|
+
return false;
|
|
1977
|
+
}
|
|
1978
|
+
const pendingForIp = this.pendingByIp.get(ip) ?? 0;
|
|
1979
|
+
if (pendingForIp >= this.maxPendingConnectionsPerIp) {
|
|
1980
|
+
console.warn(`[MediaStream] Rejecting connection: pending per-IP limit reached (${ip})`);
|
|
1981
|
+
return false;
|
|
1982
|
+
}
|
|
1983
|
+
const timeout = setTimeout(() => {
|
|
1984
|
+
if (!this.pendingConnections.has(ws)) return;
|
|
1985
|
+
console.warn(`[MediaStream] Closing pre-start idle connection after ${this.preStartTimeoutMs}ms (${ip})`);
|
|
1986
|
+
ws.close(1008, "Start timeout");
|
|
1987
|
+
}, this.preStartTimeoutMs);
|
|
1988
|
+
timeout.unref?.();
|
|
1989
|
+
this.pendingConnections.set(ws, {
|
|
1990
|
+
ip,
|
|
1991
|
+
timeout
|
|
1992
|
+
});
|
|
1993
|
+
this.pendingByIp.set(ip, pendingForIp + 1);
|
|
1994
|
+
return true;
|
|
1995
|
+
}
|
|
1996
|
+
clearPendingConnection(ws) {
|
|
1997
|
+
const pending = this.pendingConnections.get(ws);
|
|
1998
|
+
if (!pending) return;
|
|
1999
|
+
clearTimeout(pending.timeout);
|
|
2000
|
+
this.pendingConnections.delete(ws);
|
|
2001
|
+
const current = this.pendingByIp.get(pending.ip) ?? 0;
|
|
2002
|
+
if (current <= 1) {
|
|
2003
|
+
this.pendingByIp.delete(pending.ip);
|
|
2004
|
+
return;
|
|
2005
|
+
}
|
|
2006
|
+
this.pendingByIp.set(pending.ip, current - 1);
|
|
2007
|
+
}
|
|
2008
|
+
rejectUpgrade(socket, statusCode, message) {
|
|
2009
|
+
const statusText = statusCode === 429 ? "Too Many Requests" : "Service Unavailable";
|
|
2010
|
+
const body = `${message}\n`;
|
|
2011
|
+
socket.write(`HTTP/1.1 ${statusCode} ${statusText}\r\nConnection: close\r
|
|
2012
|
+
Content-Type: text/plain; charset=utf-8\r
|
|
2013
|
+
Content-Length: ${Buffer.byteLength(body)}\r\n\r
|
|
2014
|
+
` + body);
|
|
2015
|
+
socket.destroy();
|
|
2016
|
+
}
|
|
2017
|
+
/**
|
|
2018
|
+
* Get an active session with an open WebSocket, or undefined if unavailable.
|
|
2019
|
+
*/
|
|
2020
|
+
getOpenSession(streamSid) {
|
|
2021
|
+
const session = this.sessions.get(streamSid);
|
|
2022
|
+
return session?.ws.readyState === WebSocket$1.OPEN ? session : void 0;
|
|
2023
|
+
}
|
|
2024
|
+
/**
|
|
2025
|
+
* Send a message to a stream's WebSocket if available.
|
|
2026
|
+
*/
|
|
2027
|
+
sendToStream(streamSid, message) {
|
|
2028
|
+
const session = this.sessions.get(streamSid);
|
|
2029
|
+
if (!session) return {
|
|
2030
|
+
sent: false,
|
|
2031
|
+
bufferedBeforeBytes: 0,
|
|
2032
|
+
bufferedAfterBytes: 0
|
|
2033
|
+
};
|
|
2034
|
+
const readyState = session.ws.readyState;
|
|
2035
|
+
const bufferedBeforeBytes = session.ws.bufferedAmount;
|
|
2036
|
+
if (readyState !== WebSocket$1.OPEN) return {
|
|
2037
|
+
sent: false,
|
|
2038
|
+
readyState,
|
|
2039
|
+
bufferedBeforeBytes,
|
|
2040
|
+
bufferedAfterBytes: session.ws.bufferedAmount
|
|
2041
|
+
};
|
|
2042
|
+
if (bufferedBeforeBytes > MAX_WS_BUFFERED_BYTES) {
|
|
2043
|
+
try {
|
|
2044
|
+
session.ws.close(1013, "Backpressure: send buffer exceeded");
|
|
2045
|
+
} catch {}
|
|
2046
|
+
return {
|
|
2047
|
+
sent: false,
|
|
2048
|
+
readyState,
|
|
2049
|
+
bufferedBeforeBytes,
|
|
2050
|
+
bufferedAfterBytes: session.ws.bufferedAmount
|
|
2051
|
+
};
|
|
2052
|
+
}
|
|
2053
|
+
try {
|
|
2054
|
+
session.ws.send(JSON.stringify(message));
|
|
2055
|
+
const bufferedAfterBytes = session.ws.bufferedAmount;
|
|
2056
|
+
if (bufferedAfterBytes > MAX_WS_BUFFERED_BYTES) {
|
|
2057
|
+
try {
|
|
2058
|
+
session.ws.close(1013, "Backpressure: send buffer exceeded");
|
|
2059
|
+
} catch {}
|
|
2060
|
+
return {
|
|
2061
|
+
sent: false,
|
|
2062
|
+
readyState,
|
|
2063
|
+
bufferedBeforeBytes,
|
|
2064
|
+
bufferedAfterBytes
|
|
2065
|
+
};
|
|
2066
|
+
}
|
|
2067
|
+
return {
|
|
2068
|
+
sent: true,
|
|
2069
|
+
readyState,
|
|
2070
|
+
bufferedBeforeBytes,
|
|
2071
|
+
bufferedAfterBytes
|
|
2072
|
+
};
|
|
2073
|
+
} catch {
|
|
2074
|
+
return {
|
|
2075
|
+
sent: false,
|
|
2076
|
+
readyState,
|
|
2077
|
+
bufferedBeforeBytes,
|
|
2078
|
+
bufferedAfterBytes: session.ws.bufferedAmount
|
|
2079
|
+
};
|
|
2080
|
+
}
|
|
2081
|
+
}
|
|
2082
|
+
/**
|
|
2083
|
+
* Send audio to a specific stream (for TTS playback).
|
|
2084
|
+
* Audio should be mu-law encoded at 8kHz mono.
|
|
2085
|
+
*/
|
|
2086
|
+
sendAudio(streamSid, muLawAudio) {
|
|
2087
|
+
return this.sendToStream(streamSid, {
|
|
2088
|
+
event: "media",
|
|
2089
|
+
streamSid,
|
|
2090
|
+
media: { payload: muLawAudio.toString("base64") }
|
|
2091
|
+
});
|
|
2092
|
+
}
|
|
2093
|
+
/**
|
|
2094
|
+
* Send a mark event to track audio playback position.
|
|
2095
|
+
*/
|
|
2096
|
+
sendMark(streamSid, name) {
|
|
2097
|
+
return this.sendToStream(streamSid, {
|
|
2098
|
+
event: "mark",
|
|
2099
|
+
streamSid,
|
|
2100
|
+
mark: { name }
|
|
2101
|
+
});
|
|
2102
|
+
}
|
|
2103
|
+
/**
|
|
2104
|
+
* Clear audio buffer (interrupt playback).
|
|
2105
|
+
*/
|
|
2106
|
+
clearAudio(streamSid) {
|
|
2107
|
+
return this.sendToStream(streamSid, {
|
|
2108
|
+
event: "clear",
|
|
2109
|
+
streamSid
|
|
2110
|
+
});
|
|
2111
|
+
}
|
|
2112
|
+
/**
|
|
2113
|
+
* Queue a TTS operation for sequential playback.
|
|
2114
|
+
* Only one TTS operation plays at a time per stream to prevent overlap.
|
|
2115
|
+
*/
|
|
2116
|
+
async queueTts(streamSid, playFn) {
|
|
2117
|
+
const queue = this.getTtsQueue(streamSid);
|
|
2118
|
+
let resolveEntry;
|
|
2119
|
+
let rejectEntry;
|
|
2120
|
+
const promise = new Promise((resolve, reject) => {
|
|
2121
|
+
resolveEntry = resolve;
|
|
2122
|
+
rejectEntry = reject;
|
|
2123
|
+
});
|
|
2124
|
+
queue.push({
|
|
2125
|
+
playFn,
|
|
2126
|
+
controller: new AbortController(),
|
|
2127
|
+
resolve: resolveEntry,
|
|
2128
|
+
reject: rejectEntry
|
|
2129
|
+
});
|
|
2130
|
+
if (!this.ttsPlaying.get(streamSid)) this.processQueue(streamSid);
|
|
2131
|
+
return promise;
|
|
2132
|
+
}
|
|
2133
|
+
/**
|
|
2134
|
+
* Clear TTS queue and interrupt current playback (barge-in).
|
|
2135
|
+
*/
|
|
2136
|
+
clearTtsQueue(streamSid, _reason = "unspecified") {
|
|
2137
|
+
const queue = this.getTtsQueue(streamSid);
|
|
2138
|
+
this.resolveQueuedTtsEntries(queue);
|
|
2139
|
+
this.ttsActiveControllers.get(streamSid)?.abort();
|
|
2140
|
+
this.clearAudio(streamSid);
|
|
2141
|
+
}
|
|
2142
|
+
/**
|
|
2143
|
+
* Get active session by call ID.
|
|
2144
|
+
*/
|
|
2145
|
+
getSessionByCallId(callId) {
|
|
2146
|
+
return [...this.sessions.values()].find((session) => session.callId === callId);
|
|
2147
|
+
}
|
|
2148
|
+
/**
|
|
2149
|
+
* Close all sessions.
|
|
2150
|
+
*/
|
|
2151
|
+
closeAll() {
|
|
2152
|
+
for (const session of this.sessions.values()) {
|
|
2153
|
+
this.clearTtsState(session.streamSid);
|
|
2154
|
+
session.sttSession.close();
|
|
2155
|
+
session.ws.close();
|
|
2156
|
+
}
|
|
2157
|
+
this.sessions.clear();
|
|
2158
|
+
}
|
|
2159
|
+
getTtsQueue(streamSid) {
|
|
2160
|
+
const existing = this.ttsQueues.get(streamSid);
|
|
2161
|
+
if (existing) return existing;
|
|
2162
|
+
const queue = [];
|
|
2163
|
+
this.ttsQueues.set(streamSid, queue);
|
|
2164
|
+
return queue;
|
|
2165
|
+
}
|
|
2166
|
+
/**
|
|
2167
|
+
* Process the TTS queue for a stream.
|
|
2168
|
+
* Uses iterative approach to avoid stack accumulation from recursion.
|
|
2169
|
+
*/
|
|
2170
|
+
async processQueue(streamSid) {
|
|
2171
|
+
this.ttsPlaying.set(streamSid, true);
|
|
2172
|
+
while (true) {
|
|
2173
|
+
const queue = this.ttsQueues.get(streamSid);
|
|
2174
|
+
if (!queue || queue.length === 0) {
|
|
2175
|
+
this.ttsPlaying.set(streamSid, false);
|
|
2176
|
+
this.ttsActiveControllers.delete(streamSid);
|
|
2177
|
+
return;
|
|
2178
|
+
}
|
|
2179
|
+
const entry = queue.shift();
|
|
2180
|
+
this.ttsActiveControllers.set(streamSid, entry.controller);
|
|
2181
|
+
try {
|
|
2182
|
+
await entry.playFn(entry.controller.signal);
|
|
2183
|
+
entry.resolve();
|
|
2184
|
+
} catch (error) {
|
|
2185
|
+
if (entry.controller.signal.aborted) entry.resolve();
|
|
2186
|
+
else {
|
|
2187
|
+
console.error("[MediaStream] TTS playback error:", error);
|
|
2188
|
+
entry.reject(error);
|
|
2189
|
+
}
|
|
2190
|
+
} finally {
|
|
2191
|
+
if (this.ttsActiveControllers.get(streamSid) === entry.controller) this.ttsActiveControllers.delete(streamSid);
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
}
|
|
2195
|
+
clearTtsState(streamSid) {
|
|
2196
|
+
const queue = this.ttsQueues.get(streamSid);
|
|
2197
|
+
if (queue) this.resolveQueuedTtsEntries(queue);
|
|
2198
|
+
this.ttsActiveControllers.get(streamSid)?.abort();
|
|
2199
|
+
this.ttsActiveControllers.delete(streamSid);
|
|
2200
|
+
this.ttsPlaying.delete(streamSid);
|
|
2201
|
+
this.ttsQueues.delete(streamSid);
|
|
2202
|
+
}
|
|
2203
|
+
resolveQueuedTtsEntries(queue) {
|
|
2204
|
+
const pending = queue.splice(0);
|
|
2205
|
+
for (const entry of pending) {
|
|
2206
|
+
entry.controller.abort();
|
|
2207
|
+
entry.resolve();
|
|
2208
|
+
}
|
|
2209
|
+
}
|
|
2210
|
+
};
|
|
2211
|
+
//#endregion
|
|
2212
|
+
//#region extensions/voice-call/src/webhook/stale-call-reaper.ts
|
|
2213
|
+
const CHECK_INTERVAL_MS = 3e4;
|
|
2214
|
+
function startStaleCallReaper(params) {
|
|
2215
|
+
const maxAgeSeconds = params.staleCallReaperSeconds;
|
|
2216
|
+
if (!maxAgeSeconds || maxAgeSeconds <= 0) return null;
|
|
2217
|
+
const maxAgeMs = maxAgeSeconds * 1e3;
|
|
2218
|
+
const interval = setInterval(() => {
|
|
2219
|
+
const now = Date.now();
|
|
2220
|
+
for (const call of params.manager.getActiveCalls()) {
|
|
2221
|
+
if (call.answeredAt || TerminalStates.has(call.state)) continue;
|
|
2222
|
+
const age = now - call.startedAt;
|
|
2223
|
+
if (age > maxAgeMs) {
|
|
2224
|
+
console.log(`[voice-call] Reaping stale call ${call.callId} (age: ${Math.round(age / 1e3)}s, state: ${call.state})`);
|
|
2225
|
+
params.manager.endCall(call.callId).catch((err) => {
|
|
2226
|
+
console.warn(`[voice-call] Reaper failed to end call ${call.callId}:`, err);
|
|
2227
|
+
});
|
|
2228
|
+
}
|
|
2229
|
+
}
|
|
2230
|
+
}, CHECK_INTERVAL_MS);
|
|
2231
|
+
return () => {
|
|
2232
|
+
clearInterval(interval);
|
|
2233
|
+
};
|
|
2234
|
+
}
|
|
2235
|
+
//#endregion
|
|
2236
|
+
//#region extensions/voice-call/src/webhook.ts
|
|
2237
|
+
const MAX_WEBHOOK_BODY_BYTES = WEBHOOK_BODY_READ_DEFAULTS.preAuth.maxBytes;
|
|
2238
|
+
const WEBHOOK_BODY_TIMEOUT_MS = WEBHOOK_BODY_READ_DEFAULTS.preAuth.timeoutMs;
|
|
2239
|
+
const MISSING_REMOTE_ADDRESS_IN_FLIGHT_KEY = "__voice_call_no_remote__";
|
|
2240
|
+
const STREAM_DISCONNECT_HANGUP_GRACE_MS = 2e3;
|
|
2241
|
+
const TRANSCRIPT_LOG_MAX_CHARS = 200;
|
|
2242
|
+
let realtimeTranscriptionRuntimePromise;
|
|
2243
|
+
let responseGeneratorModulePromise;
|
|
2244
|
+
function loadRealtimeTranscriptionRuntime() {
|
|
2245
|
+
realtimeTranscriptionRuntimePromise ??= import("./realtime-transcription.runtime-B2h70y2W.js");
|
|
2246
|
+
return realtimeTranscriptionRuntimePromise;
|
|
2247
|
+
}
|
|
2248
|
+
function loadResponseGeneratorModule() {
|
|
2249
|
+
responseGeneratorModulePromise ??= import("./response-generator-BrcmwDZU.js");
|
|
2250
|
+
return responseGeneratorModulePromise;
|
|
2251
|
+
}
|
|
2252
|
+
function sanitizeTranscriptForLog(value) {
|
|
2253
|
+
const sanitized = value.replace(/\p{Cc}/gu, " ").replace(/\s+/g, " ").trim();
|
|
2254
|
+
if (sanitized.length <= TRANSCRIPT_LOG_MAX_CHARS) return sanitized;
|
|
2255
|
+
return `${sanitized.slice(0, TRANSCRIPT_LOG_MAX_CHARS)}...`;
|
|
2256
|
+
}
|
|
2257
|
+
function buildRequestUrl(requestUrl, requestHost, fallbackHost = "localhost") {
|
|
2258
|
+
return new URL$1(requestUrl ?? "/", `http://${requestHost ?? fallbackHost}`);
|
|
2259
|
+
}
|
|
2260
|
+
function normalizeProxyIp(value) {
|
|
2261
|
+
const trimmed = value?.trim();
|
|
2262
|
+
if (!trimmed) return;
|
|
2263
|
+
const normalized = (trimmed.startsWith("[") && trimmed.endsWith("]") ? trimmed.slice(1, -1) : trimmed).toLowerCase();
|
|
2264
|
+
if (normalized.startsWith("::ffff:")) {
|
|
2265
|
+
const mappedIpv4 = normalized.slice(7);
|
|
2266
|
+
if (/^\d{1,3}(?:\.\d{1,3}){3}$/.test(mappedIpv4)) return mappedIpv4;
|
|
2267
|
+
}
|
|
2268
|
+
return normalized;
|
|
2269
|
+
}
|
|
2270
|
+
function resolveForwardedClientIp(request, trustedProxyIPs) {
|
|
2271
|
+
const normalizedTrustedProxyIps = new Set(trustedProxyIPs.map((ip) => normalizeProxyIp(ip)).filter((ip) => Boolean(ip)));
|
|
2272
|
+
const forwardedFor = getHeader(request.headers, "x-forwarded-for");
|
|
2273
|
+
if (forwardedFor) {
|
|
2274
|
+
const forwardedIps = forwardedFor.split(",").map((part) => part.trim()).filter(Boolean);
|
|
2275
|
+
if (forwardedIps.length > 0) {
|
|
2276
|
+
if (normalizedTrustedProxyIps.size === 0) return forwardedIps[0];
|
|
2277
|
+
for (let index = forwardedIps.length - 1; index >= 0; index -= 1) {
|
|
2278
|
+
const hop = forwardedIps[index];
|
|
2279
|
+
if (!normalizedTrustedProxyIps.has(normalizeProxyIp(hop) ?? "")) return hop;
|
|
2280
|
+
}
|
|
2281
|
+
return forwardedIps[0];
|
|
2282
|
+
}
|
|
2283
|
+
}
|
|
2284
|
+
return getHeader(request.headers, "x-real-ip")?.trim() || void 0;
|
|
2285
|
+
}
|
|
2286
|
+
function normalizeWebhookResponse(parsed) {
|
|
2287
|
+
return {
|
|
2288
|
+
statusCode: parsed.statusCode ?? 200,
|
|
2289
|
+
headers: parsed.providerResponseHeaders,
|
|
2290
|
+
body: parsed.providerResponseBody ?? "OK"
|
|
2291
|
+
};
|
|
2292
|
+
}
|
|
2293
|
+
function buildRealtimeRejectedTwiML() {
|
|
2294
|
+
return {
|
|
2295
|
+
statusCode: 200,
|
|
2296
|
+
headers: { "Content-Type": "text/xml" },
|
|
2297
|
+
body: "<?xml version=\"1.0\" encoding=\"UTF-8\"?><Response><Reject reason=\"rejected\" /></Response>"
|
|
2298
|
+
};
|
|
2299
|
+
}
|
|
2300
|
+
/**
|
|
2301
|
+
* HTTP server for receiving voice call webhooks from providers.
|
|
2302
|
+
* Supports WebSocket upgrades for media streams when streaming is enabled.
|
|
2303
|
+
*/
|
|
2304
|
+
var VoiceCallWebhookServer = class {
|
|
2305
|
+
constructor(config, manager, provider, coreConfig, fullConfig, agentRuntime, logger) {
|
|
2306
|
+
this.server = null;
|
|
2307
|
+
this.listeningUrl = null;
|
|
2308
|
+
this.startPromise = null;
|
|
2309
|
+
this.stopStaleCallReaper = null;
|
|
2310
|
+
this.webhookInFlightLimiter = createWebhookInFlightLimiter();
|
|
2311
|
+
this.mediaStreamHandler = null;
|
|
2312
|
+
this.pendingDisconnectHangups = /* @__PURE__ */ new Map();
|
|
2313
|
+
this.realtimeHandler = null;
|
|
2314
|
+
this.config = normalizeVoiceCallConfig(config);
|
|
2315
|
+
this.manager = manager;
|
|
2316
|
+
this.provider = provider;
|
|
2317
|
+
this.coreConfig = coreConfig ?? null;
|
|
2318
|
+
this.fullConfig = fullConfig ?? null;
|
|
2319
|
+
this.agentRuntime = agentRuntime ?? null;
|
|
2320
|
+
this.logger = logger ?? {
|
|
2321
|
+
info: console.log,
|
|
2322
|
+
warn: console.warn,
|
|
2323
|
+
error: console.error,
|
|
2324
|
+
debug: console.debug
|
|
2325
|
+
};
|
|
2326
|
+
}
|
|
2327
|
+
/**
|
|
2328
|
+
* Get the media stream handler (for wiring to provider).
|
|
2329
|
+
*/
|
|
2330
|
+
getMediaStreamHandler() {
|
|
2331
|
+
return this.mediaStreamHandler;
|
|
2332
|
+
}
|
|
2333
|
+
getRealtimeHandler() {
|
|
2334
|
+
return this.realtimeHandler;
|
|
2335
|
+
}
|
|
2336
|
+
speakRealtime(callId, instructions) {
|
|
2337
|
+
if (!this.realtimeHandler) return {
|
|
2338
|
+
success: false,
|
|
2339
|
+
error: "Realtime voice handler is not configured"
|
|
2340
|
+
};
|
|
2341
|
+
return this.realtimeHandler.speak(callId, instructions);
|
|
2342
|
+
}
|
|
2343
|
+
setRealtimeHandler(handler) {
|
|
2344
|
+
this.realtimeHandler = handler;
|
|
2345
|
+
}
|
|
2346
|
+
clearPendingDisconnectHangup(providerCallId) {
|
|
2347
|
+
const existing = this.pendingDisconnectHangups.get(providerCallId);
|
|
2348
|
+
if (!existing) return;
|
|
2349
|
+
clearTimeout(existing);
|
|
2350
|
+
this.pendingDisconnectHangups.delete(providerCallId);
|
|
2351
|
+
}
|
|
2352
|
+
resolveMediaStreamClientIp(request) {
|
|
2353
|
+
const remoteIp = request.socket.remoteAddress ?? void 0;
|
|
2354
|
+
const trustedProxyIPs = this.config.webhookSecurity.trustedProxyIPs.filter(Boolean);
|
|
2355
|
+
const normalizedTrustedProxyIps = new Set(trustedProxyIPs.map((ip) => normalizeProxyIp(ip)).filter((ip) => Boolean(ip)));
|
|
2356
|
+
const normalizedRemoteIp = normalizeProxyIp(remoteIp);
|
|
2357
|
+
const fromTrustedProxy = normalizedTrustedProxyIps.size > 0 && normalizedRemoteIp !== void 0 && normalizedTrustedProxyIps.has(normalizedRemoteIp);
|
|
2358
|
+
if (this.config.webhookSecurity.trustForwardingHeaders && fromTrustedProxy) {
|
|
2359
|
+
const forwardedIp = resolveForwardedClientIp(request, trustedProxyIPs);
|
|
2360
|
+
if (forwardedIp) return forwardedIp;
|
|
2361
|
+
}
|
|
2362
|
+
return remoteIp;
|
|
2363
|
+
}
|
|
2364
|
+
shouldSuppressBargeInForInitialMessage(call) {
|
|
2365
|
+
if (!call || call.direction !== "outbound") return false;
|
|
2366
|
+
if (call.state !== "speaking") return false;
|
|
2367
|
+
if ((call.metadata?.mode ?? "conversation") !== "conversation") return false;
|
|
2368
|
+
return (normalizeOptionalString(call.metadata?.initialMessage) ?? "").length > 0;
|
|
2369
|
+
}
|
|
2370
|
+
/**
|
|
2371
|
+
* Initialize media streaming with the selected realtime transcription provider.
|
|
2372
|
+
*/
|
|
2373
|
+
async initializeMediaStreaming() {
|
|
2374
|
+
const streaming = this.config.streaming;
|
|
2375
|
+
const pluginConfig = this.fullConfig ?? this.coreConfig;
|
|
2376
|
+
const { getRealtimeTranscriptionProvider, listRealtimeTranscriptionProviders } = await loadRealtimeTranscriptionRuntime();
|
|
2377
|
+
const resolution = resolveConfiguredCapabilityProvider({
|
|
2378
|
+
configuredProviderId: streaming.provider,
|
|
2379
|
+
providerConfigs: streaming.providers,
|
|
2380
|
+
cfg: pluginConfig,
|
|
2381
|
+
cfgForResolve: pluginConfig ?? {},
|
|
2382
|
+
getConfiguredProvider: (providerId) => getRealtimeTranscriptionProvider(providerId, pluginConfig),
|
|
2383
|
+
listProviders: () => listRealtimeTranscriptionProviders(pluginConfig),
|
|
2384
|
+
resolveProviderConfig: ({ provider, cfg, rawConfig }) => provider.resolveConfig?.({
|
|
2385
|
+
cfg,
|
|
2386
|
+
rawConfig
|
|
2387
|
+
}) ?? rawConfig,
|
|
2388
|
+
isProviderConfigured: ({ provider, cfg, providerConfig }) => provider.isConfigured({
|
|
2389
|
+
cfg,
|
|
2390
|
+
providerConfig
|
|
2391
|
+
})
|
|
2392
|
+
});
|
|
2393
|
+
if (!resolution.ok && resolution.code === "missing-configured-provider") {
|
|
2394
|
+
console.warn(`[voice-call] Streaming enabled but realtime transcription provider "${resolution.configuredProviderId}" is not registered`);
|
|
2395
|
+
return;
|
|
2396
|
+
}
|
|
2397
|
+
if (!resolution.ok && resolution.code === "no-registered-provider") {
|
|
2398
|
+
console.warn("[voice-call] Streaming enabled but no realtime transcription provider is registered");
|
|
2399
|
+
return;
|
|
2400
|
+
}
|
|
2401
|
+
if (!resolution.ok) {
|
|
2402
|
+
console.warn(`[voice-call] Streaming enabled but provider "${resolution.provider?.id}" is not configured`);
|
|
2403
|
+
return;
|
|
2404
|
+
}
|
|
2405
|
+
const streamConfig = {
|
|
2406
|
+
transcriptionProvider: resolution.provider,
|
|
2407
|
+
providerConfig: resolution.providerConfig,
|
|
2408
|
+
preStartTimeoutMs: streaming.preStartTimeoutMs,
|
|
2409
|
+
maxPendingConnections: streaming.maxPendingConnections,
|
|
2410
|
+
maxPendingConnectionsPerIp: streaming.maxPendingConnectionsPerIp,
|
|
2411
|
+
maxConnections: streaming.maxConnections,
|
|
2412
|
+
resolveClientIp: (request) => this.resolveMediaStreamClientIp(request),
|
|
2413
|
+
shouldAcceptStream: ({ callId, token }) => {
|
|
2414
|
+
if (!this.manager.getCallByProviderCallId(callId)) return false;
|
|
2415
|
+
if (this.provider.name === "twilio") {
|
|
2416
|
+
if (!this.provider.isValidStreamToken(callId, token)) {
|
|
2417
|
+
console.warn(`[voice-call] Rejecting media stream: invalid token for ${callId}`);
|
|
2418
|
+
return false;
|
|
2419
|
+
}
|
|
2420
|
+
}
|
|
2421
|
+
return true;
|
|
2422
|
+
},
|
|
2423
|
+
onTranscript: (providerCallId, transcript) => {
|
|
2424
|
+
const safeTranscript = sanitizeTranscriptForLog(transcript);
|
|
2425
|
+
console.log(`[voice-call] Transcript for ${providerCallId}: ${safeTranscript} (chars=${transcript.length})`);
|
|
2426
|
+
const call = this.manager.getCallByProviderCallId(providerCallId);
|
|
2427
|
+
if (!call) {
|
|
2428
|
+
console.warn(`[voice-call] No active call found for provider ID: ${providerCallId}`);
|
|
2429
|
+
return;
|
|
2430
|
+
}
|
|
2431
|
+
if (this.shouldSuppressBargeInForInitialMessage(call)) {
|
|
2432
|
+
console.log(`[voice-call] Ignoring barge transcript while initial message is still playing (${providerCallId})`);
|
|
2433
|
+
return;
|
|
2434
|
+
}
|
|
2435
|
+
if (this.provider.name === "twilio") this.provider.clearTtsQueue(providerCallId);
|
|
2436
|
+
const event = {
|
|
2437
|
+
id: `stream-transcript-${Date.now()}`,
|
|
2438
|
+
type: "call.speech",
|
|
2439
|
+
callId: call.callId,
|
|
2440
|
+
providerCallId,
|
|
2441
|
+
timestamp: Date.now(),
|
|
2442
|
+
transcript,
|
|
2443
|
+
isFinal: true
|
|
2444
|
+
};
|
|
2445
|
+
this.manager.processEvent(event);
|
|
2446
|
+
const callMode = call.metadata?.mode;
|
|
2447
|
+
if (call.direction === "inbound" || callMode === "conversation") this.handleInboundResponse(call.callId, transcript).catch((err) => {
|
|
2448
|
+
console.warn(`[voice-call] Failed to auto-respond:`, err);
|
|
2449
|
+
});
|
|
2450
|
+
},
|
|
2451
|
+
onSpeechStart: (providerCallId) => {
|
|
2452
|
+
if (this.provider.name !== "twilio") return;
|
|
2453
|
+
const call = this.manager.getCallByProviderCallId(providerCallId);
|
|
2454
|
+
if (this.shouldSuppressBargeInForInitialMessage(call)) return;
|
|
2455
|
+
this.provider.clearTtsQueue(providerCallId);
|
|
2456
|
+
},
|
|
2457
|
+
onPartialTranscript: (callId, partial) => {
|
|
2458
|
+
const safePartial = sanitizeTranscriptForLog(partial);
|
|
2459
|
+
console.log(`[voice-call] Partial for ${callId}: ${safePartial} (chars=${partial.length})`);
|
|
2460
|
+
},
|
|
2461
|
+
onConnect: (callId, streamSid) => {
|
|
2462
|
+
console.log(`[voice-call] Media stream connected: ${callId} -> ${streamSid}`);
|
|
2463
|
+
this.clearPendingDisconnectHangup(callId);
|
|
2464
|
+
if (this.provider.name === "twilio") this.provider.registerCallStream(callId, streamSid);
|
|
2465
|
+
},
|
|
2466
|
+
onTranscriptionReady: (callId) => {
|
|
2467
|
+
this.manager.speakInitialMessage(callId).catch((err) => {
|
|
2468
|
+
console.warn(`[voice-call] Failed to speak initial message:`, err);
|
|
2469
|
+
});
|
|
2470
|
+
},
|
|
2471
|
+
onDisconnect: (callId, streamSid) => {
|
|
2472
|
+
console.log(`[voice-call] Media stream disconnected: ${callId} (${streamSid})`);
|
|
2473
|
+
if (this.provider.name === "twilio") this.provider.unregisterCallStream(callId, streamSid);
|
|
2474
|
+
this.clearPendingDisconnectHangup(callId);
|
|
2475
|
+
const timer = setTimeout(() => {
|
|
2476
|
+
this.pendingDisconnectHangups.delete(callId);
|
|
2477
|
+
const disconnectedCall = this.manager.getCallByProviderCallId(callId);
|
|
2478
|
+
if (!disconnectedCall) return;
|
|
2479
|
+
if (this.provider.name === "twilio") {
|
|
2480
|
+
if (this.provider.hasRegisteredStream(callId)) return;
|
|
2481
|
+
}
|
|
2482
|
+
console.log(`[voice-call] Auto-ending call ${disconnectedCall.callId} after stream disconnect grace`);
|
|
2483
|
+
this.manager.endCall(disconnectedCall.callId).catch((err) => {
|
|
2484
|
+
console.warn(`[voice-call] Failed to auto-end call ${disconnectedCall.callId}:`, err);
|
|
2485
|
+
});
|
|
2486
|
+
}, STREAM_DISCONNECT_HANGUP_GRACE_MS);
|
|
2487
|
+
timer.unref?.();
|
|
2488
|
+
this.pendingDisconnectHangups.set(callId, timer);
|
|
2489
|
+
}
|
|
2490
|
+
};
|
|
2491
|
+
this.mediaStreamHandler = new MediaStreamHandler(streamConfig);
|
|
2492
|
+
console.log("[voice-call] Media streaming initialized");
|
|
2493
|
+
}
|
|
2494
|
+
/**
|
|
2495
|
+
* Start the webhook server.
|
|
2496
|
+
* Idempotent: returns immediately if the server is already listening.
|
|
2497
|
+
*/
|
|
2498
|
+
async start() {
|
|
2499
|
+
const { port, bind, path: webhookPath } = this.config.serve;
|
|
2500
|
+
const streamPath = this.config.streaming.streamPath;
|
|
2501
|
+
if (this.server?.listening) return this.listeningUrl ?? this.resolveListeningUrl(bind, webhookPath);
|
|
2502
|
+
if (this.config.streaming.enabled && !this.mediaStreamHandler) await this.initializeMediaStreaming();
|
|
2503
|
+
if (this.startPromise) return this.startPromise;
|
|
2504
|
+
this.startPromise = new Promise((resolve, reject) => {
|
|
2505
|
+
this.server = http.createServer((req, res) => {
|
|
2506
|
+
this.handleRequest(req, res, webhookPath).catch((err) => {
|
|
2507
|
+
console.error("[voice-call] Webhook error:", err);
|
|
2508
|
+
res.statusCode = 500;
|
|
2509
|
+
res.end("Internal Server Error");
|
|
2510
|
+
});
|
|
2511
|
+
});
|
|
2512
|
+
if (this.realtimeHandler || this.mediaStreamHandler) this.server.on("upgrade", (request, socket, head) => {
|
|
2513
|
+
if (this.realtimeHandler && this.isRealtimeWebSocketUpgrade(request)) {
|
|
2514
|
+
this.realtimeHandler.handleWebSocketUpgrade(request, socket, head);
|
|
2515
|
+
return;
|
|
2516
|
+
}
|
|
2517
|
+
if (this.getUpgradePathname(request) === streamPath && this.mediaStreamHandler) this.mediaStreamHandler?.handleUpgrade(request, socket, head);
|
|
2518
|
+
else socket.destroy();
|
|
2519
|
+
});
|
|
2520
|
+
this.server.on("error", (err) => {
|
|
2521
|
+
this.server = null;
|
|
2522
|
+
this.listeningUrl = null;
|
|
2523
|
+
this.startPromise = null;
|
|
2524
|
+
reject(err);
|
|
2525
|
+
});
|
|
2526
|
+
this.server.listen(port, bind, () => {
|
|
2527
|
+
const url = this.resolveListeningUrl(bind, webhookPath);
|
|
2528
|
+
this.listeningUrl = url;
|
|
2529
|
+
this.startPromise = null;
|
|
2530
|
+
this.logger.info(`[voice-call] Webhook server listening on ${url}`);
|
|
2531
|
+
if (this.mediaStreamHandler) {
|
|
2532
|
+
const address = this.server?.address();
|
|
2533
|
+
const actualPort = address && typeof address === "object" ? address.port : this.config.serve.port;
|
|
2534
|
+
this.logger.info(`[voice-call] Media stream WebSocket on ws://${bind}:${actualPort}${streamPath}`);
|
|
2535
|
+
}
|
|
2536
|
+
resolve(url);
|
|
2537
|
+
this.stopStaleCallReaper = startStaleCallReaper({
|
|
2538
|
+
manager: this.manager,
|
|
2539
|
+
staleCallReaperSeconds: this.config.staleCallReaperSeconds
|
|
2540
|
+
});
|
|
2541
|
+
});
|
|
2542
|
+
});
|
|
2543
|
+
return this.startPromise;
|
|
2544
|
+
}
|
|
2545
|
+
/**
|
|
2546
|
+
* Stop the webhook server.
|
|
2547
|
+
*/
|
|
2548
|
+
async stop() {
|
|
2549
|
+
for (const timer of this.pendingDisconnectHangups.values()) clearTimeout(timer);
|
|
2550
|
+
this.pendingDisconnectHangups.clear();
|
|
2551
|
+
this.webhookInFlightLimiter.clear();
|
|
2552
|
+
this.startPromise = null;
|
|
2553
|
+
if (this.stopStaleCallReaper) {
|
|
2554
|
+
this.stopStaleCallReaper();
|
|
2555
|
+
this.stopStaleCallReaper = null;
|
|
2556
|
+
}
|
|
2557
|
+
return new Promise((resolve) => {
|
|
2558
|
+
if (this.server) this.server.close(() => {
|
|
2559
|
+
this.server = null;
|
|
2560
|
+
this.listeningUrl = null;
|
|
2561
|
+
resolve();
|
|
2562
|
+
});
|
|
2563
|
+
else {
|
|
2564
|
+
this.listeningUrl = null;
|
|
2565
|
+
resolve();
|
|
2566
|
+
}
|
|
2567
|
+
});
|
|
2568
|
+
}
|
|
2569
|
+
resolveListeningUrl(bind, webhookPath) {
|
|
2570
|
+
const address = this.server?.address();
|
|
2571
|
+
if (address && typeof address === "object") {
|
|
2572
|
+
const host = address.address && address.address.length > 0 ? address.address : bind;
|
|
2573
|
+
return `http://${host.includes(":") && !host.startsWith("[") ? `[${host}]` : host}:${address.port}${webhookPath}`;
|
|
2574
|
+
}
|
|
2575
|
+
return `http://${bind}:${this.config.serve.port}${webhookPath}`;
|
|
2576
|
+
}
|
|
2577
|
+
getUpgradePathname(request) {
|
|
2578
|
+
try {
|
|
2579
|
+
return buildRequestUrl(request.url, request.headers.host).pathname;
|
|
2580
|
+
} catch {
|
|
2581
|
+
return null;
|
|
2582
|
+
}
|
|
2583
|
+
}
|
|
2584
|
+
normalizeWebhookPathForMatch(pathname) {
|
|
2585
|
+
const trimmed = pathname.trim();
|
|
2586
|
+
if (!trimmed) return "/";
|
|
2587
|
+
const prefixed = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
|
|
2588
|
+
if (prefixed === "/") return prefixed;
|
|
2589
|
+
return prefixed.endsWith("/") ? prefixed.slice(0, -1) : prefixed;
|
|
2590
|
+
}
|
|
2591
|
+
isWebhookPathMatch(requestPath, configuredPath) {
|
|
2592
|
+
return this.normalizeWebhookPathForMatch(requestPath) === this.normalizeWebhookPathForMatch(configuredPath);
|
|
2593
|
+
}
|
|
2594
|
+
/**
|
|
2595
|
+
* Handle incoming HTTP request.
|
|
2596
|
+
*/
|
|
2597
|
+
async handleRequest(req, res, webhookPath) {
|
|
2598
|
+
const payload = await this.runWebhookPipeline(req, webhookPath);
|
|
2599
|
+
this.writeWebhookResponse(res, payload);
|
|
2600
|
+
}
|
|
2601
|
+
async runWebhookPipeline(req, webhookPath) {
|
|
2602
|
+
const url = buildRequestUrl(req.url, req.headers.host);
|
|
2603
|
+
if (url.pathname === "/voice/hold-music") return {
|
|
2604
|
+
statusCode: 200,
|
|
2605
|
+
headers: { "Content-Type": "text/xml" },
|
|
2606
|
+
body: `<?xml version="1.0" encoding="UTF-8"?>
|
|
2607
|
+
<Response>
|
|
2608
|
+
<Say voice="alice">All agents are currently busy. Please hold.</Say>
|
|
2609
|
+
<Play loop="0">https://s3.amazonaws.com/com.twilio.music.classical/BusyStrings.mp3</Play>
|
|
2610
|
+
</Response>`
|
|
2611
|
+
};
|
|
2612
|
+
if (!this.isWebhookPathMatch(url.pathname, webhookPath)) return {
|
|
2613
|
+
statusCode: 404,
|
|
2614
|
+
body: "Not Found"
|
|
2615
|
+
};
|
|
2616
|
+
if (req.method !== "POST") return {
|
|
2617
|
+
statusCode: 405,
|
|
2618
|
+
body: "Method Not Allowed"
|
|
2619
|
+
};
|
|
2620
|
+
const headerGate = this.verifyPreAuthWebhookHeaders(req.headers);
|
|
2621
|
+
if (!headerGate.ok) {
|
|
2622
|
+
console.warn(`[voice-call] Webhook rejected before body read: ${headerGate.reason}`);
|
|
2623
|
+
return {
|
|
2624
|
+
statusCode: 401,
|
|
2625
|
+
body: "Unauthorized"
|
|
2626
|
+
};
|
|
2627
|
+
}
|
|
2628
|
+
const remoteAddress = req.socket.remoteAddress;
|
|
2629
|
+
if (!remoteAddress) console.warn(`[voice-call] Webhook accepted with no remote address; using shared fallback in-flight key`);
|
|
2630
|
+
const inFlightKey = remoteAddress || MISSING_REMOTE_ADDRESS_IN_FLIGHT_KEY;
|
|
2631
|
+
if (!this.webhookInFlightLimiter.tryAcquire(inFlightKey)) {
|
|
2632
|
+
console.warn(`[voice-call] Webhook rejected before body read: too many in-flight requests`);
|
|
2633
|
+
return {
|
|
2634
|
+
statusCode: 429,
|
|
2635
|
+
body: "Too Many Requests"
|
|
2636
|
+
};
|
|
2637
|
+
}
|
|
2638
|
+
try {
|
|
2639
|
+
let body = "";
|
|
2640
|
+
try {
|
|
2641
|
+
body = await this.readBody(req, MAX_WEBHOOK_BODY_BYTES, WEBHOOK_BODY_TIMEOUT_MS);
|
|
2642
|
+
} catch (err) {
|
|
2643
|
+
if (isRequestBodyLimitError(err, "PAYLOAD_TOO_LARGE")) return {
|
|
2644
|
+
statusCode: 413,
|
|
2645
|
+
body: "Payload Too Large"
|
|
2646
|
+
};
|
|
2647
|
+
if (isRequestBodyLimitError(err, "REQUEST_BODY_TIMEOUT")) return {
|
|
2648
|
+
statusCode: 408,
|
|
2649
|
+
body: requestBodyErrorToText("REQUEST_BODY_TIMEOUT")
|
|
2650
|
+
};
|
|
2651
|
+
throw err;
|
|
2652
|
+
}
|
|
2653
|
+
const ctx = {
|
|
2654
|
+
headers: req.headers,
|
|
2655
|
+
rawBody: body,
|
|
2656
|
+
url: url.toString(),
|
|
2657
|
+
method: "POST",
|
|
2658
|
+
query: Object.fromEntries(url.searchParams),
|
|
2659
|
+
remoteAddress: req.socket.remoteAddress ?? void 0
|
|
2660
|
+
};
|
|
2661
|
+
const verification = this.provider.verifyWebhook(ctx);
|
|
2662
|
+
if (!verification.ok) {
|
|
2663
|
+
console.warn(`[voice-call] Webhook verification failed: ${verification.reason}`);
|
|
2664
|
+
return {
|
|
2665
|
+
statusCode: 401,
|
|
2666
|
+
body: "Unauthorized"
|
|
2667
|
+
};
|
|
2668
|
+
}
|
|
2669
|
+
if (!verification.verifiedRequestKey) {
|
|
2670
|
+
console.warn("[voice-call] Webhook verification succeeded without request identity key");
|
|
2671
|
+
return {
|
|
2672
|
+
statusCode: 401,
|
|
2673
|
+
body: "Unauthorized"
|
|
2674
|
+
};
|
|
2675
|
+
}
|
|
2676
|
+
const initialTwiML = this.provider.consumeInitialTwiML?.(ctx);
|
|
2677
|
+
if (initialTwiML !== void 0 && initialTwiML !== null) {
|
|
2678
|
+
const params = new URLSearchParams(ctx.rawBody);
|
|
2679
|
+
console.log(`[voice-call] Serving provider initial TwiML before realtime handling (callSid=${params.get("CallSid") ?? "unknown"}, direction=${params.get("Direction") ?? "unknown"})`);
|
|
2680
|
+
return {
|
|
2681
|
+
statusCode: 200,
|
|
2682
|
+
headers: { "Content-Type": "application/xml" },
|
|
2683
|
+
body: initialTwiML
|
|
2684
|
+
};
|
|
2685
|
+
}
|
|
2686
|
+
const realtimeParams = this.getRealtimeTwimlParams(ctx);
|
|
2687
|
+
if (realtimeParams) {
|
|
2688
|
+
const direction = realtimeParams.get("Direction");
|
|
2689
|
+
if ((!direction || direction === "inbound") && !this.shouldAcceptRealtimeInboundRequest(realtimeParams)) {
|
|
2690
|
+
console.log("[voice-call] Realtime inbound call rejected before stream setup");
|
|
2691
|
+
return buildRealtimeRejectedTwiML();
|
|
2692
|
+
}
|
|
2693
|
+
console.log(`[voice-call] Serving realtime TwiML for Twilio call ${realtimeParams.get("CallSid") ?? "unknown"} (direction=${direction ?? "unknown"})`);
|
|
2694
|
+
return this.realtimeHandler.buildTwiMLPayload(req, realtimeParams);
|
|
2695
|
+
}
|
|
2696
|
+
const parsed = this.provider.parseWebhookEvent(ctx, { verifiedRequestKey: verification.verifiedRequestKey });
|
|
2697
|
+
if (verification.isReplay) console.warn("[voice-call] Replay detected; skipping event side effects");
|
|
2698
|
+
else this.processParsedEvents(parsed.events);
|
|
2699
|
+
return normalizeWebhookResponse(parsed);
|
|
2700
|
+
} finally {
|
|
2701
|
+
this.webhookInFlightLimiter.release(inFlightKey);
|
|
2702
|
+
}
|
|
2703
|
+
}
|
|
2704
|
+
verifyPreAuthWebhookHeaders(headers) {
|
|
2705
|
+
if (this.config.skipSignatureVerification) return { ok: true };
|
|
2706
|
+
switch (this.provider.name) {
|
|
2707
|
+
case "telnyx": {
|
|
2708
|
+
const signature = getHeader(headers, "telnyx-signature-ed25519");
|
|
2709
|
+
const timestamp = getHeader(headers, "telnyx-timestamp");
|
|
2710
|
+
if (signature && timestamp) return { ok: true };
|
|
2711
|
+
return {
|
|
2712
|
+
ok: false,
|
|
2713
|
+
reason: "missing Telnyx signature or timestamp header"
|
|
2714
|
+
};
|
|
2715
|
+
}
|
|
2716
|
+
case "twilio":
|
|
2717
|
+
if (getHeader(headers, "x-twilio-signature")) return { ok: true };
|
|
2718
|
+
return {
|
|
2719
|
+
ok: false,
|
|
2720
|
+
reason: "missing X-Twilio-Signature header"
|
|
2721
|
+
};
|
|
2722
|
+
case "plivo": {
|
|
2723
|
+
const hasV3 = Boolean(getHeader(headers, "x-plivo-signature-v3")) && Boolean(getHeader(headers, "x-plivo-signature-v3-nonce"));
|
|
2724
|
+
const hasV2 = Boolean(getHeader(headers, "x-plivo-signature-v2")) && Boolean(getHeader(headers, "x-plivo-signature-v2-nonce"));
|
|
2725
|
+
if (hasV3 || hasV2) return { ok: true };
|
|
2726
|
+
return {
|
|
2727
|
+
ok: false,
|
|
2728
|
+
reason: "missing Plivo signature headers"
|
|
2729
|
+
};
|
|
2730
|
+
}
|
|
2731
|
+
default: return { ok: true };
|
|
2732
|
+
}
|
|
2733
|
+
}
|
|
2734
|
+
isRealtimeWebSocketUpgrade(req) {
|
|
2735
|
+
try {
|
|
2736
|
+
const pathname = buildRequestUrl(req.url, req.headers.host).pathname;
|
|
2737
|
+
const pattern = this.realtimeHandler?.getStreamPathPattern();
|
|
2738
|
+
return Boolean(pattern && pathname.startsWith(pattern));
|
|
2739
|
+
} catch {
|
|
2740
|
+
return false;
|
|
2741
|
+
}
|
|
2742
|
+
}
|
|
2743
|
+
getRealtimeTwimlParams(ctx) {
|
|
2744
|
+
if (!this.realtimeHandler || this.provider.name !== "twilio") return null;
|
|
2745
|
+
const params = new URLSearchParams(ctx.rawBody);
|
|
2746
|
+
const direction = params.get("Direction");
|
|
2747
|
+
if (!(!direction || direction === "inbound" || direction.startsWith("outbound"))) return null;
|
|
2748
|
+
if (ctx.query?.type === "status") return null;
|
|
2749
|
+
const callStatus = params.get("CallStatus");
|
|
2750
|
+
if (callStatus && isProviderStatusTerminal(callStatus)) return null;
|
|
2751
|
+
return !params.get("SpeechResult") && !params.get("Digits") ? params : null;
|
|
2752
|
+
}
|
|
2753
|
+
shouldAcceptRealtimeInboundRequest(params) {
|
|
2754
|
+
switch (this.config.inboundPolicy) {
|
|
2755
|
+
case "open": return true;
|
|
2756
|
+
case "allowlist":
|
|
2757
|
+
case "pairing": return isAllowlistedCaller(normalizePhoneNumber(params.get("From") ?? void 0), this.config.allowFrom);
|
|
2758
|
+
default: return false;
|
|
2759
|
+
}
|
|
2760
|
+
}
|
|
2761
|
+
processParsedEvents(events) {
|
|
2762
|
+
for (const event of events) try {
|
|
2763
|
+
this.manager.processEvent(event);
|
|
2764
|
+
} catch (err) {
|
|
2765
|
+
console.error(`[voice-call] Error processing event ${event.type}:`, err);
|
|
2766
|
+
}
|
|
2767
|
+
}
|
|
2768
|
+
writeWebhookResponse(res, payload) {
|
|
2769
|
+
res.statusCode = payload.statusCode;
|
|
2770
|
+
if (payload.headers) for (const [key, value] of Object.entries(payload.headers)) res.setHeader(key, value);
|
|
2771
|
+
res.end(payload.body);
|
|
2772
|
+
}
|
|
2773
|
+
/**
|
|
2774
|
+
* Read request body as string with timeout protection.
|
|
2775
|
+
*/
|
|
2776
|
+
readBody(req, maxBytes, timeoutMs = WEBHOOK_BODY_TIMEOUT_MS) {
|
|
2777
|
+
return readRequestBodyWithLimit(req, {
|
|
2778
|
+
maxBytes,
|
|
2779
|
+
timeoutMs
|
|
2780
|
+
});
|
|
2781
|
+
}
|
|
2782
|
+
/**
|
|
2783
|
+
* Handle auto-response for inbound calls using the agent system.
|
|
2784
|
+
* Supports tool calling for richer voice interactions.
|
|
2785
|
+
*/
|
|
2786
|
+
async handleInboundResponse(callId, userMessage) {
|
|
2787
|
+
console.log(`[voice-call] Auto-responding to inbound call ${callId}: "${userMessage}"`);
|
|
2788
|
+
const call = this.manager.getCall(callId);
|
|
2789
|
+
if (!call) {
|
|
2790
|
+
console.warn(`[voice-call] Call ${callId} not found for auto-response`);
|
|
2791
|
+
return;
|
|
2792
|
+
}
|
|
2793
|
+
if (!this.coreConfig) {
|
|
2794
|
+
console.warn("[voice-call] Core config missing; skipping auto-response");
|
|
2795
|
+
return;
|
|
2796
|
+
}
|
|
2797
|
+
if (!this.agentRuntime) {
|
|
2798
|
+
console.warn("[voice-call] Agent runtime missing; skipping auto-response");
|
|
2799
|
+
return;
|
|
2800
|
+
}
|
|
2801
|
+
try {
|
|
2802
|
+
const { generateVoiceResponse } = await loadResponseGeneratorModule();
|
|
2803
|
+
const numberRouteKey = typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to;
|
|
2804
|
+
const effectiveConfig = resolveVoiceCallEffectiveConfig(this.config, numberRouteKey).config;
|
|
2805
|
+
const result = await generateVoiceResponse({
|
|
2806
|
+
voiceConfig: effectiveConfig,
|
|
2807
|
+
coreConfig: this.coreConfig,
|
|
2808
|
+
agentRuntime: this.agentRuntime,
|
|
2809
|
+
callId,
|
|
2810
|
+
sessionKey: call.sessionKey,
|
|
2811
|
+
from: call.from,
|
|
2812
|
+
transcript: call.transcript,
|
|
2813
|
+
userMessage
|
|
2814
|
+
});
|
|
2815
|
+
if (result.error) {
|
|
2816
|
+
console.error(`[voice-call] Response generation error: ${result.error}`);
|
|
2817
|
+
return;
|
|
2818
|
+
}
|
|
2819
|
+
if (result.text) {
|
|
2820
|
+
console.log(`[voice-call] AI response: "${result.text}"`);
|
|
2821
|
+
await this.manager.speak(callId, result.text);
|
|
2822
|
+
}
|
|
2823
|
+
} catch (err) {
|
|
2824
|
+
console.error(`[voice-call] Auto-response error:`, err);
|
|
2825
|
+
}
|
|
2826
|
+
}
|
|
2827
|
+
};
|
|
2828
|
+
//#endregion
|
|
2829
|
+
//#region extensions/voice-call/src/runtime.ts
|
|
2830
|
+
const REALTIME_VOICE_CONSULT_SYSTEM_PROMPT = [
|
|
2831
|
+
"You are a behind-the-scenes consultant for a live phone voice agent.",
|
|
2832
|
+
"Prioritize a fast, speakable answer over exhaustive investigation.",
|
|
2833
|
+
"For tool-backed status checks, prefer one or two bounded read-only queries before answering.",
|
|
2834
|
+
"Do not print secret values or dump environment variables; only check whether required configuration is present.",
|
|
2835
|
+
"Be accurate, brief, and speakable."
|
|
2836
|
+
].join(" ");
|
|
2837
|
+
let telnyxProviderPromise;
|
|
2838
|
+
let twilioProviderPromise;
|
|
2839
|
+
let plivoProviderPromise;
|
|
2840
|
+
let mockProviderPromise;
|
|
2841
|
+
let realtimeVoiceRuntimePromise;
|
|
2842
|
+
let realtimeHandlerPromise;
|
|
2843
|
+
function loadTelnyxProvider() {
|
|
2844
|
+
telnyxProviderPromise ??= import("./telnyx-jjBE8boz.js");
|
|
2845
|
+
return telnyxProviderPromise;
|
|
2846
|
+
}
|
|
2847
|
+
function loadTwilioProvider() {
|
|
2848
|
+
twilioProviderPromise ??= import("./twilio-1OqbcXLL.js");
|
|
2849
|
+
return twilioProviderPromise;
|
|
2850
|
+
}
|
|
2851
|
+
function loadPlivoProvider() {
|
|
2852
|
+
plivoProviderPromise ??= import("./plivo-B-a7KFoT.js");
|
|
2853
|
+
return plivoProviderPromise;
|
|
2854
|
+
}
|
|
2855
|
+
function loadMockProvider() {
|
|
2856
|
+
mockProviderPromise ??= import("./mock-CeKvfVEd.js");
|
|
2857
|
+
return mockProviderPromise;
|
|
2858
|
+
}
|
|
2859
|
+
function loadRealtimeVoiceRuntime() {
|
|
2860
|
+
realtimeVoiceRuntimePromise ??= import("./realtime-voice.runtime-Bkh4nvLn.js");
|
|
2861
|
+
return realtimeVoiceRuntimePromise;
|
|
2862
|
+
}
|
|
2863
|
+
function loadRealtimeHandler() {
|
|
2864
|
+
realtimeHandlerPromise ??= import("./realtime-handler-B63CIDP2.js");
|
|
2865
|
+
return realtimeHandlerPromise;
|
|
2866
|
+
}
|
|
2867
|
+
function resolveVoiceCallConsultSessionKey(call) {
|
|
2868
|
+
if (call.sessionKey) return call.sessionKey;
|
|
2869
|
+
const phone = call.direction === "outbound" ? call.to : call.from;
|
|
2870
|
+
return resolveVoiceCallSessionKey({
|
|
2871
|
+
config: call.config,
|
|
2872
|
+
callId: call.callId,
|
|
2873
|
+
phone
|
|
2874
|
+
});
|
|
2875
|
+
}
|
|
2876
|
+
function mapVoiceCallConsultTranscript(call, context) {
|
|
2877
|
+
const transcript = (call.transcript ?? []).map((entry) => ({
|
|
2878
|
+
role: entry.speaker === "bot" ? "assistant" : "user",
|
|
2879
|
+
text: entry.text
|
|
2880
|
+
}));
|
|
2881
|
+
const partial = context?.partialUserTranscript?.trim();
|
|
2882
|
+
if (partial && transcript.at(-1)?.text !== partial) transcript.push({
|
|
2883
|
+
role: "user",
|
|
2884
|
+
text: partial
|
|
2885
|
+
});
|
|
2886
|
+
return transcript;
|
|
2887
|
+
}
|
|
2888
|
+
function createRuntimeResourceLifecycle(params) {
|
|
2889
|
+
let tunnelResult = null;
|
|
2890
|
+
let stopped = false;
|
|
2891
|
+
const runStep = async (step, suppressErrors) => {
|
|
2892
|
+
if (suppressErrors) {
|
|
2893
|
+
await step().catch(() => {});
|
|
2894
|
+
return;
|
|
2895
|
+
}
|
|
2896
|
+
await step();
|
|
2897
|
+
};
|
|
2898
|
+
return {
|
|
2899
|
+
setTunnelResult: (result) => {
|
|
2900
|
+
tunnelResult = result;
|
|
2901
|
+
},
|
|
2902
|
+
stop: async (opts) => {
|
|
2903
|
+
if (stopped) return;
|
|
2904
|
+
stopped = true;
|
|
2905
|
+
const suppressErrors = opts?.suppressErrors ?? false;
|
|
2906
|
+
await runStep(async () => {
|
|
2907
|
+
if (tunnelResult) await tunnelResult.stop();
|
|
2908
|
+
}, suppressErrors);
|
|
2909
|
+
await runStep(async () => {
|
|
2910
|
+
await cleanupTailscaleExposure(params.config);
|
|
2911
|
+
}, suppressErrors);
|
|
2912
|
+
await runStep(async () => {
|
|
2913
|
+
await params.webhookServer.stop();
|
|
2914
|
+
}, suppressErrors);
|
|
2915
|
+
}
|
|
2916
|
+
};
|
|
2917
|
+
}
|
|
2918
|
+
function isLoopbackBind(bind) {
|
|
2919
|
+
if (!bind) return false;
|
|
2920
|
+
return bind === "127.0.0.1" || bind === "::1" || bind === "localhost";
|
|
2921
|
+
}
|
|
2922
|
+
async function resolveProvider(config) {
|
|
2923
|
+
const allowNgrokFreeTierLoopbackBypass = config.tunnel?.provider === "ngrok" && isLoopbackBind(config.serve?.bind) && (config.tunnel?.allowNgrokFreeTierLoopbackBypass ?? false);
|
|
2924
|
+
switch (config.provider) {
|
|
2925
|
+
case "telnyx": {
|
|
2926
|
+
const { TelnyxProvider } = await loadTelnyxProvider();
|
|
2927
|
+
return new TelnyxProvider({
|
|
2928
|
+
apiKey: config.telnyx?.apiKey,
|
|
2929
|
+
connectionId: config.telnyx?.connectionId,
|
|
2930
|
+
publicKey: config.telnyx?.publicKey
|
|
2931
|
+
}, { skipVerification: config.skipSignatureVerification });
|
|
2932
|
+
}
|
|
2933
|
+
case "twilio": {
|
|
2934
|
+
const { TwilioProvider } = await loadTwilioProvider();
|
|
2935
|
+
return new TwilioProvider({
|
|
2936
|
+
accountSid: config.twilio?.accountSid,
|
|
2937
|
+
authToken: resolveTwilioAuthToken(config)
|
|
2938
|
+
}, {
|
|
2939
|
+
allowNgrokFreeTierLoopbackBypass,
|
|
2940
|
+
publicUrl: config.publicUrl,
|
|
2941
|
+
skipVerification: config.skipSignatureVerification,
|
|
2942
|
+
streamPath: config.streaming?.enabled ? config.streaming.streamPath : void 0,
|
|
2943
|
+
webhookSecurity: config.webhookSecurity
|
|
2944
|
+
});
|
|
2945
|
+
}
|
|
2946
|
+
case "plivo": {
|
|
2947
|
+
const { PlivoProvider } = await loadPlivoProvider();
|
|
2948
|
+
return new PlivoProvider({
|
|
2949
|
+
authId: config.plivo?.authId,
|
|
2950
|
+
authToken: config.plivo?.authToken
|
|
2951
|
+
}, {
|
|
2952
|
+
publicUrl: config.publicUrl,
|
|
2953
|
+
skipVerification: config.skipSignatureVerification,
|
|
2954
|
+
ringTimeoutSec: Math.max(1, Math.floor(config.ringTimeoutMs / 1e3)),
|
|
2955
|
+
webhookSecurity: config.webhookSecurity
|
|
2956
|
+
});
|
|
2957
|
+
}
|
|
2958
|
+
case "mock": {
|
|
2959
|
+
const { MockProvider } = await loadMockProvider();
|
|
2960
|
+
return new MockProvider();
|
|
2961
|
+
}
|
|
2962
|
+
default: throw new Error(`Unsupported voice-call provider: ${String(config.provider)}`);
|
|
2963
|
+
}
|
|
2964
|
+
}
|
|
2965
|
+
async function resolveRealtimeProvider(params) {
|
|
2966
|
+
const { resolveConfiguredRealtimeVoiceProvider } = await loadRealtimeVoiceRuntime();
|
|
2967
|
+
return resolveConfiguredRealtimeVoiceProvider({
|
|
2968
|
+
configuredProviderId: params.config.realtime.provider,
|
|
2969
|
+
providerConfigs: params.config.realtime.providers,
|
|
2970
|
+
cfg: params.fullConfig
|
|
2971
|
+
});
|
|
2972
|
+
}
|
|
2973
|
+
async function createVoiceCallRuntime(params) {
|
|
2974
|
+
const { config: rawConfig, coreConfig, fullConfig, agentRuntime, ttsRuntime, logger } = params;
|
|
2975
|
+
const log = logger ?? {
|
|
2976
|
+
info: console.log,
|
|
2977
|
+
warn: console.warn,
|
|
2978
|
+
error: console.error,
|
|
2979
|
+
debug: console.debug
|
|
2980
|
+
};
|
|
2981
|
+
const config = resolveVoiceCallConfig(rawConfig);
|
|
2982
|
+
const cfg = fullConfig ?? coreConfig;
|
|
2983
|
+
if (!config.enabled) throw new Error("Voice call disabled. Enable the plugin entry in config.");
|
|
2984
|
+
if (config.skipSignatureVerification) log.warn("[voice-call] SECURITY WARNING: skipSignatureVerification=true disables webhook signature verification (development only). Do not use in production.");
|
|
2985
|
+
const validation = validateProviderConfig(config);
|
|
2986
|
+
if (!validation.valid) throw new Error(`Invalid voice-call config: ${validation.errors.join("; ")}`);
|
|
2987
|
+
const provider = await resolveProvider(config);
|
|
2988
|
+
const manager = new CallManager(config);
|
|
2989
|
+
const realtimeProvider = config.realtime.enabled ? await resolveRealtimeProvider({
|
|
2990
|
+
config,
|
|
2991
|
+
fullConfig: cfg
|
|
2992
|
+
}) : null;
|
|
2993
|
+
const webhookServer = new VoiceCallWebhookServer(config, manager, provider, coreConfig, fullConfig ?? coreConfig, agentRuntime, log);
|
|
2994
|
+
if (realtimeProvider) {
|
|
2995
|
+
const { RealtimeCallHandler } = await loadRealtimeHandler();
|
|
2996
|
+
const realtimeHandler = new RealtimeCallHandler({
|
|
2997
|
+
...config.realtime,
|
|
2998
|
+
tools: resolveRealtimeVoiceAgentConsultTools(config.realtime.toolPolicy, config.realtime.tools)
|
|
2999
|
+
}, manager, provider, realtimeProvider.provider, realtimeProvider.providerConfig, config.serve.path);
|
|
3000
|
+
if (config.realtime.toolPolicy !== "none") realtimeHandler.registerToolHandler(REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, async (args, callId, handlerContext) => {
|
|
3001
|
+
const call = manager.getCall(callId);
|
|
3002
|
+
if (!call) return { error: `Call "${callId}" not found` };
|
|
3003
|
+
const effectiveConfig = resolveVoiceCallEffectiveConfig(config, typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to).config;
|
|
3004
|
+
const agentId = effectiveConfig.agentId ?? "main";
|
|
3005
|
+
const sessionKey = resolveVoiceCallConsultSessionKey({
|
|
3006
|
+
...call,
|
|
3007
|
+
config: effectiveConfig
|
|
3008
|
+
});
|
|
3009
|
+
const fastContext = await resolveRealtimeFastContextConsult({
|
|
3010
|
+
cfg,
|
|
3011
|
+
agentId,
|
|
3012
|
+
sessionKey,
|
|
3013
|
+
config: effectiveConfig.realtime.fastContext,
|
|
3014
|
+
args,
|
|
3015
|
+
logger: log
|
|
3016
|
+
});
|
|
3017
|
+
if (fastContext.handled) return fastContext.result;
|
|
3018
|
+
const { provider: agentProvider, model } = resolveVoiceResponseModel({
|
|
3019
|
+
voiceConfig: effectiveConfig,
|
|
3020
|
+
agentRuntime
|
|
3021
|
+
});
|
|
3022
|
+
const thinkLevel = agentRuntime.resolveThinkingDefault({
|
|
3023
|
+
cfg,
|
|
3024
|
+
provider: agentProvider,
|
|
3025
|
+
model
|
|
3026
|
+
});
|
|
3027
|
+
return await consultRealtimeVoiceAgent({
|
|
3028
|
+
cfg,
|
|
3029
|
+
agentRuntime,
|
|
3030
|
+
logger: log,
|
|
3031
|
+
agentId,
|
|
3032
|
+
sessionKey,
|
|
3033
|
+
messageProvider: "voice",
|
|
3034
|
+
lane: "voice",
|
|
3035
|
+
runIdPrefix: `voice-realtime-consult:${callId}`,
|
|
3036
|
+
args,
|
|
3037
|
+
transcript: mapVoiceCallConsultTranscript(call, handlerContext),
|
|
3038
|
+
surface: "a live phone call",
|
|
3039
|
+
userLabel: "Caller",
|
|
3040
|
+
assistantLabel: "Agent",
|
|
3041
|
+
questionSourceLabel: "caller",
|
|
3042
|
+
provider: agentProvider,
|
|
3043
|
+
model,
|
|
3044
|
+
thinkLevel,
|
|
3045
|
+
timeoutMs: effectiveConfig.responseTimeoutMs,
|
|
3046
|
+
toolsAllow: resolveRealtimeVoiceAgentConsultToolsAllow(effectiveConfig.realtime.toolPolicy),
|
|
3047
|
+
extraSystemPrompt: REALTIME_VOICE_CONSULT_SYSTEM_PROMPT
|
|
3048
|
+
});
|
|
3049
|
+
});
|
|
3050
|
+
webhookServer.setRealtimeHandler(realtimeHandler);
|
|
3051
|
+
}
|
|
3052
|
+
const lifecycle = createRuntimeResourceLifecycle({
|
|
3053
|
+
config,
|
|
3054
|
+
webhookServer
|
|
3055
|
+
});
|
|
3056
|
+
const localUrl = await webhookServer.start();
|
|
3057
|
+
try {
|
|
3058
|
+
let publicUrl = config.publicUrl ?? null;
|
|
3059
|
+
if (!publicUrl && config.tunnel?.provider && config.tunnel.provider !== "none") try {
|
|
3060
|
+
const nextTunnelResult = await startTunnel({
|
|
3061
|
+
provider: config.tunnel.provider,
|
|
3062
|
+
port: config.serve.port,
|
|
3063
|
+
path: config.serve.path,
|
|
3064
|
+
ngrokAuthToken: config.tunnel.ngrokAuthToken,
|
|
3065
|
+
ngrokDomain: config.tunnel.ngrokDomain
|
|
3066
|
+
});
|
|
3067
|
+
lifecycle.setTunnelResult(nextTunnelResult);
|
|
3068
|
+
publicUrl = nextTunnelResult?.publicUrl ?? null;
|
|
3069
|
+
} catch (err) {
|
|
3070
|
+
log.error(`[voice-call] Tunnel setup failed: ${formatErrorMessage(err)}`);
|
|
3071
|
+
}
|
|
3072
|
+
if (!publicUrl && config.tailscale?.mode !== "off") publicUrl = await setupTailscaleExposure(config);
|
|
3073
|
+
const webhookUrl = publicUrl ?? localUrl;
|
|
3074
|
+
if (providerRequiresPublicWebhook(provider.name) && isProviderUnreachableWebhookUrl(webhookUrl)) throw new Error(`[voice-call] ${provider.name} requires a publicly reachable webhook URL. Refusing to use local-only webhook ${webhookUrl}. Set plugins.entries.voice-call.config.publicUrl or enable tunnel/tailscale exposure.`);
|
|
3075
|
+
if (publicUrl && provider.name === "twilio") provider.setPublicUrl(publicUrl);
|
|
3076
|
+
if (publicUrl && realtimeProvider) webhookServer.getRealtimeHandler()?.setPublicUrl(publicUrl);
|
|
3077
|
+
if (provider.name === "twilio" && config.streaming?.enabled) {
|
|
3078
|
+
const twilioProvider = provider;
|
|
3079
|
+
if (ttsRuntime?.textToSpeechTelephony) try {
|
|
3080
|
+
const ttsProvider = createTelephonyTtsProvider({
|
|
3081
|
+
coreConfig,
|
|
3082
|
+
ttsOverride: config.tts,
|
|
3083
|
+
runtime: ttsRuntime,
|
|
3084
|
+
logger: log
|
|
3085
|
+
});
|
|
3086
|
+
twilioProvider.setTTSProvider(ttsProvider);
|
|
3087
|
+
log.info("[voice-call] Telephony TTS provider configured");
|
|
3088
|
+
} catch (err) {
|
|
3089
|
+
log.warn(`[voice-call] Failed to initialize telephony TTS: ${formatErrorMessage(err)}`);
|
|
3090
|
+
}
|
|
3091
|
+
else log.warn("[voice-call] Telephony TTS unavailable; streaming TTS disabled");
|
|
3092
|
+
const mediaHandler = webhookServer.getMediaStreamHandler();
|
|
3093
|
+
if (mediaHandler) {
|
|
3094
|
+
twilioProvider.setMediaStreamHandler(mediaHandler);
|
|
3095
|
+
log.info("[voice-call] Media stream handler wired to provider");
|
|
3096
|
+
}
|
|
3097
|
+
}
|
|
3098
|
+
if (realtimeProvider) log.info(`[voice-call] Realtime voice provider: ${realtimeProvider.provider.id}`);
|
|
3099
|
+
await manager.initialize(provider, webhookUrl);
|
|
3100
|
+
const stop = async () => await lifecycle.stop();
|
|
3101
|
+
log.info("[voice-call] Runtime initialized");
|
|
3102
|
+
log.info(`[voice-call] Webhook URL: ${webhookUrl}`);
|
|
3103
|
+
if (publicUrl && publicUrl !== webhookUrl) log.info(`[voice-call] Public URL: ${publicUrl}`);
|
|
3104
|
+
return {
|
|
3105
|
+
config,
|
|
3106
|
+
provider,
|
|
3107
|
+
manager,
|
|
3108
|
+
webhookServer,
|
|
3109
|
+
webhookUrl,
|
|
3110
|
+
publicUrl,
|
|
3111
|
+
stop
|
|
3112
|
+
};
|
|
3113
|
+
} catch (err) {
|
|
3114
|
+
await lifecycle.stop({ suppressErrors: true });
|
|
3115
|
+
throw err;
|
|
3116
|
+
}
|
|
3117
|
+
}
|
|
3118
|
+
//#endregion
|
|
3119
|
+
export { setupTailscaleExposureRoute as a, getTailscaleSelfInfo as i, resolveWebhookExposureStatus as n, TELEPHONY_DEFAULT_TTS_TIMEOUT_MS as o, cleanupTailscaleExposureRoute as r, resolveUserPath as s, createVoiceCallRuntime as t };
|